The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/mxge/if_mxge.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /******************************************************************************
    2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3 
    4 Copyright (c) 2006-2013, Myricom Inc.
    5 All rights reserved.
    6 
    7 Redistribution and use in source and binary forms, with or without
    8 modification, are permitted provided that the following conditions are met:
    9 
   10  1. Redistributions of source code must retain the above copyright notice,
   11     this list of conditions and the following disclaimer.
   12 
   13  2. Neither the name of the Myricom Inc, nor the names of its
   14     contributors may be used to endorse or promote products derived from
   15     this software without specific prior written permission.
   16 
   17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   27 POSSIBILITY OF SUCH DAMAGE.
   28 
   29 ***************************************************************************/
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/linker.h>
   37 #include <sys/firmware.h>
   38 #include <sys/endian.h>
   39 #include <sys/sockio.h>
   40 #include <sys/mbuf.h>
   41 #include <sys/malloc.h>
   42 #include <sys/kdb.h>
   43 #include <sys/kernel.h>
   44 #include <sys/lock.h>
   45 #include <sys/module.h>
   46 #include <sys/socket.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/sx.h>
   49 #include <sys/taskqueue.h>
   50 #include <contrib/zlib/zlib.h>
   51 #include <dev/zlib/zcalloc.h>
   52 
   53 #include <net/if.h>
   54 #include <net/if_var.h>
   55 #include <net/if_arp.h>
   56 #include <net/ethernet.h>
   57 #include <net/if_dl.h>
   58 #include <net/if_media.h>
   59 
   60 #include <net/bpf.h>
   61 
   62 #include <net/if_types.h>
   63 #include <net/if_vlan_var.h>
   64 
   65 #include <netinet/in_systm.h>
   66 #include <netinet/in.h>
   67 #include <netinet/ip.h>
   68 #include <netinet/ip6.h>
   69 #include <netinet/tcp.h>
   70 #include <netinet/tcp_lro.h>
   71 #include <netinet6/ip6_var.h>
   72 
   73 #include <machine/bus.h>
   74 #include <machine/in_cksum.h>
   75 #include <machine/resource.h>
   76 #include <sys/bus.h>
   77 #include <sys/rman.h>
   78 #include <sys/smp.h>
   79 
   80 #include <dev/pci/pcireg.h>
   81 #include <dev/pci/pcivar.h>
   82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
   83 
   84 #include <vm/vm.h>              /* for pmap_mapdev() */
   85 #include <vm/pmap.h>
   86 
   87 #if defined(__i386) || defined(__amd64)
   88 #include <machine/specialreg.h>
   89 #endif
   90 
   91 #include <dev/mxge/mxge_mcp.h>
   92 #include <dev/mxge/mcp_gen_header.h>
   93 /*#define MXGE_FAKE_IFP*/
   94 #include <dev/mxge/if_mxge_var.h>
   95 #ifdef IFNET_BUF_RING
   96 #include <sys/buf_ring.h>
   97 #endif
   98 
   99 #include "opt_inet.h"
  100 #include "opt_inet6.h"
  101 
  102 /* tunable params */
  103 static int mxge_nvidia_ecrc_enable = 1;
  104 static int mxge_force_firmware = 0;
  105 static int mxge_intr_coal_delay = 30;
  106 static int mxge_deassert_wait = 1;
  107 static int mxge_flow_control = 1;
  108 static int mxge_verbose = 0;
  109 static int mxge_ticks;
  110 static int mxge_max_slices = 1;
  111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
  112 static int mxge_always_promisc = 0;
  113 static int mxge_initial_mtu = ETHERMTU_JUMBO;
  114 static int mxge_throttle = 0;
  115 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
  116 static char *mxge_fw_aligned = "mxge_eth_z8e";
  117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
  118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
  119 
  120 static int mxge_probe(device_t dev);
  121 static int mxge_attach(device_t dev);
  122 static int mxge_detach(device_t dev);
  123 static int mxge_shutdown(device_t dev);
  124 static void mxge_intr(void *arg);
  125 
  126 static device_method_t mxge_methods[] =
  127 {
  128   /* Device interface */
  129   DEVMETHOD(device_probe, mxge_probe),
  130   DEVMETHOD(device_attach, mxge_attach),
  131   DEVMETHOD(device_detach, mxge_detach),
  132   DEVMETHOD(device_shutdown, mxge_shutdown),
  133 
  134   DEVMETHOD_END
  135 };
  136 
  137 static driver_t mxge_driver =
  138 {
  139   "mxge",
  140   mxge_methods,
  141   sizeof(mxge_softc_t),
  142 };
  143 
  144 /* Declare ourselves to be a child of the PCI bus.*/
  145 DRIVER_MODULE(mxge, pci, mxge_driver, 0, 0);
  146 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
  147 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
  148 
  149 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
  150 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
  151 static int mxge_close(mxge_softc_t *sc, int down);
  152 static int mxge_open(mxge_softc_t *sc);
  153 static void mxge_tick(void *arg);
  154 
  155 static int
  156 mxge_probe(device_t dev)
  157 {
  158         int rev;
  159 
  160         if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
  161             ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
  162              (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
  163                 rev = pci_get_revid(dev);
  164                 switch (rev) {
  165                 case MXGE_PCI_REV_Z8E:
  166                         device_set_desc(dev, "Myri10G-PCIE-8A");
  167                         break;
  168                 case MXGE_PCI_REV_Z8ES:
  169                         device_set_desc(dev, "Myri10G-PCIE-8B");
  170                         break;
  171                 default:
  172                         device_set_desc(dev, "Myri10G-PCIE-8??");
  173                         device_printf(dev, "Unrecognized rev %d NIC\n",
  174                                       rev);
  175                         break;  
  176                 }
  177                 return 0;
  178         }
  179         return ENXIO;
  180 }
  181 
  182 static void
  183 mxge_enable_wc(mxge_softc_t *sc)
  184 {
  185 #if defined(__i386) || defined(__amd64)
  186         vm_offset_t len;
  187         int err;
  188 
  189         sc->wc = 1;
  190         len = rman_get_size(sc->mem_res);
  191         err = pmap_change_attr((vm_offset_t) sc->sram,
  192                                len, PAT_WRITE_COMBINING);
  193         if (err != 0) {
  194                 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
  195                               err);
  196                 sc->wc = 0;
  197         }
  198 #endif          
  199 }
  200 
  201 /* callback to get our DMA address */
  202 static void
  203 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
  204                          int error)
  205 {
  206         if (error == 0) {
  207                 *(bus_addr_t *) arg = segs->ds_addr;
  208         }
  209 }
  210 
  211 static int
  212 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
  213                    bus_size_t alignment)
  214 {
  215         int err;
  216         device_t dev = sc->dev;
  217         bus_size_t boundary, maxsegsize;
  218 
  219         if (bytes > 4096 && alignment == 4096) {
  220                 boundary = 0;
  221                 maxsegsize = bytes;
  222         } else {
  223                 boundary = 4096;
  224                 maxsegsize = 4096;
  225         }
  226 
  227         /* allocate DMAable memory tags */
  228         err = bus_dma_tag_create(sc->parent_dmat,       /* parent */
  229                                  alignment,             /* alignment */
  230                                  boundary,              /* boundary */
  231                                  BUS_SPACE_MAXADDR,     /* low */
  232                                  BUS_SPACE_MAXADDR,     /* high */
  233                                  NULL, NULL,            /* filter */
  234                                  bytes,                 /* maxsize */
  235                                  1,                     /* num segs */
  236                                  maxsegsize,            /* maxsegsize */
  237                                  BUS_DMA_COHERENT,      /* flags */
  238                                  NULL, NULL,            /* lock */
  239                                  &dma->dmat);           /* tag */
  240         if (err != 0) {
  241                 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
  242                 return err;
  243         }
  244 
  245         /* allocate DMAable memory & map */
  246         err = bus_dmamem_alloc(dma->dmat, &dma->addr,
  247                                (BUS_DMA_WAITOK | BUS_DMA_COHERENT
  248                                 | BUS_DMA_ZERO),  &dma->map);
  249         if (err != 0) {
  250                 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
  251                 goto abort_with_dmat;
  252         }
  253 
  254         /* load the memory */
  255         err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
  256                               mxge_dmamap_callback,
  257                               (void *)&dma->bus_addr, 0);
  258         if (err != 0) {
  259                 device_printf(dev, "couldn't load map (err = %d)\n", err);
  260                 goto abort_with_mem;
  261         }
  262         return 0;
  263 
  264 abort_with_mem:
  265         bus_dmamem_free(dma->dmat, dma->addr, dma->map);
  266 abort_with_dmat:
  267         (void)bus_dma_tag_destroy(dma->dmat);
  268         return err;
  269 }
  270 
  271 static void
  272 mxge_dma_free(mxge_dma_t *dma)
  273 {
  274         bus_dmamap_unload(dma->dmat, dma->map);
  275         bus_dmamem_free(dma->dmat, dma->addr, dma->map);
  276         (void)bus_dma_tag_destroy(dma->dmat);
  277 }
  278 
  279 /*
  280  * The eeprom strings on the lanaiX have the format
  281  * SN=x\0
  282  * MAC=x:x:x:x:x:x\0
  283  * PC=text\0
  284  */
  285 
  286 static int
  287 mxge_parse_strings(mxge_softc_t *sc)
  288 {
  289         char *ptr;
  290         int i, found_mac, found_sn2;
  291         char *endptr;
  292 
  293         ptr = sc->eeprom_strings;
  294         found_mac = 0;
  295         found_sn2 = 0;
  296         while (*ptr != '\0') {
  297                 if (strncmp(ptr, "MAC=", 4) == 0) {
  298                         ptr += 4;
  299                         for (i = 0;;) {
  300                                 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
  301                                 if (endptr - ptr != 2)
  302                                         goto abort;
  303                                 ptr = endptr;
  304                                 if (++i == 6)
  305                                         break;
  306                                 if (*ptr++ != ':')
  307                                         goto abort;
  308                         }
  309                         found_mac = 1;
  310                 } else if (strncmp(ptr, "PC=", 3) == 0) {
  311                         ptr += 3;
  312                         strlcpy(sc->product_code_string, ptr,
  313                             sizeof(sc->product_code_string));
  314                 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
  315                         ptr += 3;
  316                         strlcpy(sc->serial_number_string, ptr,
  317                             sizeof(sc->serial_number_string));
  318                 } else if (strncmp(ptr, "SN2=", 4) == 0) {
  319                         /* SN2 takes precedence over SN */
  320                         ptr += 4;
  321                         found_sn2 = 1;
  322                         strlcpy(sc->serial_number_string, ptr,
  323                             sizeof(sc->serial_number_string));
  324                 }
  325                 while (*ptr++ != '\0') {}
  326         }
  327 
  328         if (found_mac)
  329                 return 0;
  330 
  331  abort:
  332         device_printf(sc->dev, "failed to parse eeprom_strings\n");
  333 
  334         return ENXIO;
  335 }
  336 
  337 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
  338 static void
  339 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
  340 {
  341         uint32_t val;
  342         unsigned long base, off;
  343         char *va, *cfgptr;
  344         device_t pdev, mcp55;
  345         uint16_t vendor_id, device_id, word;
  346         uintptr_t bus, slot, func, ivend, idev;
  347         uint32_t *ptr32;
  348 
  349         if (!mxge_nvidia_ecrc_enable)
  350                 return;
  351 
  352         pdev = device_get_parent(device_get_parent(sc->dev));
  353         if (pdev == NULL) {
  354                 device_printf(sc->dev, "could not find parent?\n");
  355                 return;
  356         }
  357         vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
  358         device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
  359 
  360         if (vendor_id != 0x10de)
  361                 return;
  362 
  363         base = 0;
  364 
  365         if (device_id == 0x005d) {
  366                 /* ck804, base address is magic */
  367                 base = 0xe0000000UL;
  368         } else if (device_id >= 0x0374 && device_id <= 0x378) {
  369                 /* mcp55, base address stored in chipset */
  370                 mcp55 = pci_find_bsf(0, 0, 0);
  371                 if (mcp55 &&
  372                     0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
  373                     0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
  374                         word = pci_read_config(mcp55, 0x90, 2);
  375                         base = ((unsigned long)word & 0x7ffeU) << 25;
  376                 }
  377         }
  378         if (!base)
  379                 return;
  380 
  381         /* XXXX
  382            Test below is commented because it is believed that doing
  383            config read/write beyond 0xff will access the config space
  384            for the next larger function.  Uncomment this and remove
  385            the hacky pmap_mapdev() way of accessing config space when
  386            FreeBSD grows support for extended pcie config space access
  387         */
  388 #if 0   
  389         /* See if we can, by some miracle, access the extended
  390            config space */
  391         val = pci_read_config(pdev, 0x178, 4);
  392         if (val != 0xffffffff) {
  393                 val |= 0x40;
  394                 pci_write_config(pdev, 0x178, val, 4);
  395                 return;
  396         }
  397 #endif
  398         /* Rather than using normal pci config space writes, we must
  399          * map the Nvidia config space ourselves.  This is because on
  400          * opteron/nvidia class machine the 0xe000000 mapping is
  401          * handled by the nvidia chipset, that means the internal PCI
  402          * device (the on-chip northbridge), or the amd-8131 bridge
  403          * and things behind them are not visible by this method.
  404          */
  405 
  406         BUS_READ_IVAR(device_get_parent(pdev), pdev,
  407                       PCI_IVAR_BUS, &bus);
  408         BUS_READ_IVAR(device_get_parent(pdev), pdev,
  409                       PCI_IVAR_SLOT, &slot);
  410         BUS_READ_IVAR(device_get_parent(pdev), pdev,
  411                       PCI_IVAR_FUNCTION, &func);
  412         BUS_READ_IVAR(device_get_parent(pdev), pdev,
  413                       PCI_IVAR_VENDOR, &ivend);
  414         BUS_READ_IVAR(device_get_parent(pdev), pdev,
  415                       PCI_IVAR_DEVICE, &idev);
  416                                         
  417         off =  base
  418                 + 0x00100000UL * (unsigned long)bus
  419                 + 0x00001000UL * (unsigned long)(func
  420                                                  + 8 * slot);
  421 
  422         /* map it into the kernel */
  423         va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
  424 
  425         if (va == NULL) {
  426                 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
  427                 return;
  428         }
  429         /* get a pointer to the config space mapped into the kernel */
  430         cfgptr = va + (off & PAGE_MASK);
  431 
  432         /* make sure that we can really access it */
  433         vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
  434         device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
  435         if (! (vendor_id == ivend && device_id == idev)) {
  436                 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
  437                               vendor_id, device_id);
  438                 pmap_unmapdev(va, PAGE_SIZE);
  439                 return;
  440         }
  441 
  442         ptr32 = (uint32_t*)(cfgptr + 0x178);
  443         val = *ptr32;
  444 
  445         if (val == 0xffffffff) {
  446                 device_printf(sc->dev, "extended mapping failed\n");
  447                 pmap_unmapdev(va, PAGE_SIZE);
  448                 return;
  449         }
  450         *ptr32 = val | 0x40;
  451         pmap_unmapdev(va, PAGE_SIZE);
  452         if (mxge_verbose)
  453                 device_printf(sc->dev,
  454                               "Enabled ECRC on upstream Nvidia bridge "
  455                               "at %d:%d:%d\n",
  456                               (int)bus, (int)slot, (int)func);
  457         return;
  458 }
  459 #else
  460 static void
  461 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
  462 {
  463         device_printf(sc->dev,
  464                       "Nforce 4 chipset on non-x86/amd64!?!?!\n");
  465         return;
  466 }
  467 #endif
  468 
  469 static int
  470 mxge_dma_test(mxge_softc_t *sc, int test_type)
  471 {
  472         mxge_cmd_t cmd;
  473         bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
  474         int status;
  475         uint32_t len;
  476         char *test = " ";
  477 
  478         /* Run a small DMA test.
  479          * The magic multipliers to the length tell the firmware
  480          * to do DMA read, write, or read+write tests.  The
  481          * results are returned in cmd.data0.  The upper 16
  482          * bits of the return is the number of transfers completed.
  483          * The lower 16 bits is the time in 0.5us ticks that the
  484          * transfers took to complete.
  485          */
  486 
  487         len = sc->tx_boundary;
  488 
  489         cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
  490         cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
  491         cmd.data2 = len * 0x10000;
  492         status = mxge_send_cmd(sc, test_type, &cmd);
  493         if (status != 0) {
  494                 test = "read";
  495                 goto abort;
  496         }
  497         sc->read_dma = ((cmd.data0>>16) * len * 2) /
  498                 (cmd.data0 & 0xffff);
  499         cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
  500         cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
  501         cmd.data2 = len * 0x1;
  502         status = mxge_send_cmd(sc, test_type, &cmd);
  503         if (status != 0) {
  504                 test = "write";
  505                 goto abort;
  506         }
  507         sc->write_dma = ((cmd.data0>>16) * len * 2) /
  508                 (cmd.data0 & 0xffff);
  509 
  510         cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
  511         cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
  512         cmd.data2 = len * 0x10001;
  513         status = mxge_send_cmd(sc, test_type, &cmd);
  514         if (status != 0) {
  515                 test = "read/write";
  516                 goto abort;
  517         }
  518         sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
  519                 (cmd.data0 & 0xffff);
  520 
  521 abort:
  522         if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
  523                 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
  524                               test, status);
  525 
  526         return status;
  527 }
  528 
  529 /*
  530  * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
  531  * when the PCI-E Completion packets are aligned on an 8-byte
  532  * boundary.  Some PCI-E chip sets always align Completion packets; on
  533  * the ones that do not, the alignment can be enforced by enabling
  534  * ECRC generation (if supported).
  535  *
  536  * When PCI-E Completion packets are not aligned, it is actually more
  537  * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
  538  *
  539  * If the driver can neither enable ECRC nor verify that it has
  540  * already been enabled, then it must use a firmware image which works
  541  * around unaligned completion packets (ethp_z8e.dat), and it should
  542  * also ensure that it never gives the device a Read-DMA which is
  543  * larger than 2KB by setting the tx_boundary to 2KB.  If ECRC is
  544  * enabled, then the driver should use the aligned (eth_z8e.dat)
  545  * firmware image, and set tx_boundary to 4KB.
  546  */
  547 
  548 static int
  549 mxge_firmware_probe(mxge_softc_t *sc)
  550 {
  551         device_t dev = sc->dev;
  552         int reg, status;
  553         uint16_t pectl;
  554 
  555         sc->tx_boundary = 4096;
  556         /*
  557          * Verify the max read request size was set to 4KB
  558          * before trying the test with 4KB.
  559          */
  560         if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
  561                 pectl = pci_read_config(dev, reg + 0x8, 2);
  562                 if ((pectl & (5 << 12)) != (5 << 12)) {
  563                         device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
  564                                       pectl);
  565                         sc->tx_boundary = 2048;
  566                 }
  567         }
  568 
  569         /*
  570          * load the optimized firmware (which assumes aligned PCIe
  571          * completions) in order to see if it works on this host.
  572          */
  573         sc->fw_name = mxge_fw_aligned;
  574         status = mxge_load_firmware(sc, 1);
  575         if (status != 0) {
  576                 return status;
  577         }
  578 
  579         /*
  580          * Enable ECRC if possible
  581          */
  582         mxge_enable_nvidia_ecrc(sc);
  583 
  584         /*
  585          * Run a DMA test which watches for unaligned completions and
  586          * aborts on the first one seen.  Not required on Z8ES or newer.
  587          */
  588         if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
  589                 return 0;
  590         status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
  591         if (status == 0)
  592                 return 0; /* keep the aligned firmware */
  593 
  594         if (status != E2BIG)
  595                 device_printf(dev, "DMA test failed: %d\n", status);
  596         if (status == ENOSYS)
  597                 device_printf(dev, "Falling back to ethp! "
  598                               "Please install up to date fw\n");
  599         return status;
  600 }
  601 
  602 static int
  603 mxge_select_firmware(mxge_softc_t *sc)
  604 {
  605         int aligned = 0;
  606         int force_firmware = mxge_force_firmware;
  607 
  608         if (sc->throttle)
  609                 force_firmware = sc->throttle;
  610 
  611         if (force_firmware != 0) {
  612                 if (force_firmware == 1)
  613                         aligned = 1;
  614                 else
  615                         aligned = 0;
  616                 if (mxge_verbose)
  617                         device_printf(sc->dev,
  618                                       "Assuming %s completions (forced)\n",
  619                                       aligned ? "aligned" : "unaligned");
  620                 goto abort;
  621         }
  622 
  623         /* if the PCIe link width is 4 or less, we can use the aligned
  624            firmware and skip any checks */
  625         if (sc->link_width != 0 && sc->link_width <= 4) {
  626                 device_printf(sc->dev,
  627                               "PCIe x%d Link, expect reduced performance\n",
  628                               sc->link_width);
  629                 aligned = 1;
  630                 goto abort;
  631         }
  632 
  633         if (0 == mxge_firmware_probe(sc))
  634                 return 0;
  635 
  636 abort:
  637         if (aligned) {
  638                 sc->fw_name = mxge_fw_aligned;
  639                 sc->tx_boundary = 4096;
  640         } else {
  641                 sc->fw_name = mxge_fw_unaligned;
  642                 sc->tx_boundary = 2048;
  643         }
  644         return (mxge_load_firmware(sc, 0));
  645 }
  646 
  647 static int
  648 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
  649 {
  650 
  651         if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
  652                 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
  653                               be32toh(hdr->mcp_type));
  654                 return EIO;
  655         }
  656 
  657         /* save firmware version for sysctl */
  658         strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
  659         if (mxge_verbose)
  660                 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
  661 
  662         sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
  663                &sc->fw_ver_minor, &sc->fw_ver_tiny);
  664 
  665         if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
  666               && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
  667                 device_printf(sc->dev, "Found firmware version %s\n",
  668                               sc->fw_version);
  669                 device_printf(sc->dev, "Driver needs %d.%d\n",
  670                               MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
  671                 return EINVAL;
  672         }
  673         return 0;
  674 
  675 }
  676 
  677 static int
  678 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
  679 {
  680         z_stream zs;
  681         char *inflate_buffer;
  682         const struct firmware *fw;
  683         const mcp_gen_header_t *hdr;
  684         unsigned hdr_offset;
  685         int status;
  686         unsigned int i;
  687         size_t fw_len;
  688 
  689         fw = firmware_get(sc->fw_name);
  690         if (fw == NULL) {
  691                 device_printf(sc->dev, "Could not find firmware image %s\n",
  692                               sc->fw_name);
  693                 return ENOENT;
  694         }
  695 
  696         /* setup zlib and decompress f/w */
  697         bzero(&zs, sizeof (zs));
  698         zs.zalloc = zcalloc_nowait;
  699         zs.zfree = zcfree;
  700         status = inflateInit(&zs);
  701         if (status != Z_OK) {
  702                 status = EIO;
  703                 goto abort_with_fw;
  704         }
  705 
  706         /* the uncompressed size is stored as the firmware version,
  707            which would otherwise go unused */
  708         fw_len = (size_t) fw->version;
  709         inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
  710         if (inflate_buffer == NULL)
  711                 goto abort_with_zs;
  712         zs.avail_in = fw->datasize;
  713         zs.next_in = __DECONST(char *, fw->data);
  714         zs.avail_out = fw_len;
  715         zs.next_out = inflate_buffer;
  716         status = inflate(&zs, Z_FINISH);
  717         if (status != Z_STREAM_END) {
  718                 device_printf(sc->dev, "zlib %d\n", status);
  719                 status = EIO;
  720                 goto abort_with_buffer;
  721         }
  722 
  723         /* check id */
  724         hdr_offset = htobe32(*(const uint32_t *)
  725                              (inflate_buffer + MCP_HEADER_PTR_OFFSET));
  726         if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
  727                 device_printf(sc->dev, "Bad firmware file");
  728                 status = EIO;
  729                 goto abort_with_buffer;
  730         }
  731         hdr = (const void*)(inflate_buffer + hdr_offset);
  732 
  733         status = mxge_validate_firmware(sc, hdr);
  734         if (status != 0)
  735                 goto abort_with_buffer;
  736 
  737         /* Copy the inflated firmware to NIC SRAM. */
  738         for (i = 0; i < fw_len; i += 256) {
  739                 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
  740                               inflate_buffer + i,
  741                               min(256U, (unsigned)(fw_len - i)));
  742                 wmb();
  743                 (void)*sc->sram;
  744                 wmb();
  745         }
  746 
  747         *limit = fw_len;
  748         status = 0;
  749 abort_with_buffer:
  750         free(inflate_buffer, M_TEMP);
  751 abort_with_zs:
  752         inflateEnd(&zs);
  753 abort_with_fw:
  754         firmware_put(fw, FIRMWARE_UNLOAD);
  755         return status;
  756 }
  757 
  758 /*
  759  * Enable or disable periodic RDMAs from the host to make certain
  760  * chipsets resend dropped PCIe messages
  761  */
  762 
  763 static void
  764 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
  765 {
  766         char buf_bytes[72];
  767         volatile uint32_t *confirm;
  768         volatile char *submit;
  769         uint32_t *buf, dma_low, dma_high;
  770         int i;
  771 
  772         buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
  773 
  774         /* clear confirmation addr */
  775         confirm = (volatile uint32_t *)sc->cmd;
  776         *confirm = 0;
  777         wmb();
  778 
  779         /* send an rdma command to the PCIe engine, and wait for the
  780            response in the confirmation address.  The firmware should
  781            write a -1 there to indicate it is alive and well
  782         */
  783 
  784         dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
  785         dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
  786         buf[0] = htobe32(dma_high);             /* confirm addr MSW */
  787         buf[1] = htobe32(dma_low);              /* confirm addr LSW */
  788         buf[2] = htobe32(0xffffffff);           /* confirm data */
  789         dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
  790         dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
  791         buf[3] = htobe32(dma_high);             /* dummy addr MSW */
  792         buf[4] = htobe32(dma_low);              /* dummy addr LSW */
  793         buf[5] = htobe32(enable);                       /* enable? */
  794 
  795         submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
  796 
  797         mxge_pio_copy(submit, buf, 64);
  798         wmb();
  799         DELAY(1000);
  800         wmb();
  801         i = 0;
  802         while (*confirm != 0xffffffff && i < 20) {
  803                 DELAY(1000);
  804                 i++;
  805         }
  806         if (*confirm != 0xffffffff) {
  807                 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
  808                               (enable ? "enable" : "disable"), confirm,
  809                               *confirm);
  810         }
  811         return;
  812 }
  813 
  814 static int
  815 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
  816 {
  817         mcp_cmd_t *buf;
  818         char buf_bytes[sizeof(*buf) + 8];
  819         volatile mcp_cmd_response_t *response = sc->cmd;
  820         volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
  821         uint32_t dma_low, dma_high;
  822         int err, sleep_total = 0;
  823 
  824         /* ensure buf is aligned to 8 bytes */
  825         buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
  826 
  827         buf->data0 = htobe32(data->data0);
  828         buf->data1 = htobe32(data->data1);
  829         buf->data2 = htobe32(data->data2);
  830         buf->cmd = htobe32(cmd);
  831         dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
  832         dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
  833 
  834         buf->response_addr.low = htobe32(dma_low);
  835         buf->response_addr.high = htobe32(dma_high);
  836         mtx_lock(&sc->cmd_mtx);
  837         response->result = 0xffffffff;
  838         wmb();
  839         mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
  840 
  841         /* wait up to 20ms */
  842         err = EAGAIN;
  843         for (sleep_total = 0; sleep_total <  20; sleep_total++) {
  844                 bus_dmamap_sync(sc->cmd_dma.dmat,
  845                                 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
  846                 wmb();
  847                 switch (be32toh(response->result)) {
  848                 case 0:
  849                         data->data0 = be32toh(response->data);
  850                         err = 0;
  851                         break;
  852                 case 0xffffffff:
  853                         DELAY(1000);
  854                         break;
  855                 case MXGEFW_CMD_UNKNOWN:
  856                         err = ENOSYS;
  857                         break;
  858                 case MXGEFW_CMD_ERROR_UNALIGNED:
  859                         err = E2BIG;
  860                         break;
  861                 case MXGEFW_CMD_ERROR_BUSY:
  862                         err = EBUSY;
  863                         break;
  864                 case MXGEFW_CMD_ERROR_I2C_ABSENT:
  865                         err = ENXIO;
  866                         break;
  867                 default:
  868                         device_printf(sc->dev,
  869                                       "mxge: command %d "
  870                                       "failed, result = %d\n",
  871                                       cmd, be32toh(response->result));
  872                         err = ENXIO;
  873                         break;
  874                 }
  875                 if (err != EAGAIN)
  876                         break;
  877         }
  878         if (err == EAGAIN)
  879                 device_printf(sc->dev, "mxge: command %d timed out"
  880                               "result = %d\n",
  881                               cmd, be32toh(response->result));
  882         mtx_unlock(&sc->cmd_mtx);
  883         return err;
  884 }
  885 
  886 static int
  887 mxge_adopt_running_firmware(mxge_softc_t *sc)
  888 {
  889         struct mcp_gen_header *hdr;
  890         const size_t bytes = sizeof (struct mcp_gen_header);
  891         size_t hdr_offset;
  892         int status;
  893 
  894         /* find running firmware header */
  895         hdr_offset = htobe32(*(volatile uint32_t *)
  896                              (sc->sram + MCP_HEADER_PTR_OFFSET));
  897 
  898         if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
  899                 device_printf(sc->dev,
  900                               "Running firmware has bad header offset (%d)\n",
  901                               (int)hdr_offset);
  902                 return EIO;
  903         }
  904 
  905         /* copy header of running firmware from SRAM to host memory to
  906          * validate firmware */
  907         hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
  908         if (hdr == NULL) {
  909                 device_printf(sc->dev, "could not malloc firmware hdr\n");
  910                 return ENOMEM;
  911         }
  912         bus_space_read_region_1(rman_get_bustag(sc->mem_res),
  913                                 rman_get_bushandle(sc->mem_res),
  914                                 hdr_offset, (char *)hdr, bytes);
  915         status = mxge_validate_firmware(sc, hdr);
  916         free(hdr, M_DEVBUF);
  917 
  918         /*
  919          * check to see if adopted firmware has bug where adopting
  920          * it will cause broadcasts to be filtered unless the NIC
  921          * is kept in ALLMULTI mode
  922          */
  923         if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
  924             sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
  925                 sc->adopted_rx_filter_bug = 1;
  926                 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
  927                               "working around rx filter bug\n",
  928                               sc->fw_ver_major, sc->fw_ver_minor,
  929                               sc->fw_ver_tiny);
  930         }
  931 
  932         return status;
  933 }
  934 
  935 static int
  936 mxge_load_firmware(mxge_softc_t *sc, int adopt)
  937 {
  938         volatile uint32_t *confirm;
  939         volatile char *submit;
  940         char buf_bytes[72];
  941         uint32_t *buf, size, dma_low, dma_high;
  942         int status, i;
  943 
  944         buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
  945 
  946         size = sc->sram_size;
  947         status = mxge_load_firmware_helper(sc, &size);
  948         if (status) {
  949                 if (!adopt)
  950                         return status;
  951                 /* Try to use the currently running firmware, if
  952                    it is new enough */
  953                 status = mxge_adopt_running_firmware(sc);
  954                 if (status) {
  955                         device_printf(sc->dev,
  956                                       "failed to adopt running firmware\n");
  957                         return status;
  958                 }
  959                 device_printf(sc->dev,
  960                               "Successfully adopted running firmware\n");
  961                 if (sc->tx_boundary == 4096) {
  962                         device_printf(sc->dev,
  963                                 "Using firmware currently running on NIC"
  964                                  ".  For optimal\n");
  965                         device_printf(sc->dev,
  966                                  "performance consider loading optimized "
  967                                  "firmware\n");
  968                 }
  969                 sc->fw_name = mxge_fw_unaligned;
  970                 sc->tx_boundary = 2048;
  971                 return 0;
  972         }
  973         /* clear confirmation addr */
  974         confirm = (volatile uint32_t *)sc->cmd;
  975         *confirm = 0;
  976         wmb();
  977         /* send a reload command to the bootstrap MCP, and wait for the
  978            response in the confirmation address.  The firmware should
  979            write a -1 there to indicate it is alive and well
  980         */
  981 
  982         dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
  983         dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
  984 
  985         buf[0] = htobe32(dma_high);     /* confirm addr MSW */
  986         buf[1] = htobe32(dma_low);      /* confirm addr LSW */
  987         buf[2] = htobe32(0xffffffff);   /* confirm data */
  988 
  989         /* FIX: All newest firmware should un-protect the bottom of
  990            the sram before handoff. However, the very first interfaces
  991            do not. Therefore the handoff copy must skip the first 8 bytes
  992         */
  993                                         /* where the code starts*/
  994         buf[3] = htobe32(MXGE_FW_OFFSET + 8);
  995         buf[4] = htobe32(size - 8);     /* length of code */
  996         buf[5] = htobe32(8);            /* where to copy to */
  997         buf[6] = htobe32(0);            /* where to jump to */
  998 
  999         submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
 1000         mxge_pio_copy(submit, buf, 64);
 1001         wmb();
 1002         DELAY(1000);
 1003         wmb();
 1004         i = 0;
 1005         while (*confirm != 0xffffffff && i < 20) {
 1006                 DELAY(1000*10);
 1007                 i++;
 1008                 bus_dmamap_sync(sc->cmd_dma.dmat,
 1009                                 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
 1010         }
 1011         if (*confirm != 0xffffffff) {
 1012                 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
 1013                         confirm, *confirm);
 1014                 
 1015                 return ENXIO;
 1016         }
 1017         return 0;
 1018 }
 1019 
 1020 static int
 1021 mxge_update_mac_address(mxge_softc_t *sc)
 1022 {
 1023         mxge_cmd_t cmd;
 1024         uint8_t *addr = sc->mac_addr;
 1025         int status;
 1026 
 1027         cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
 1028                      | (addr[2] << 8) | addr[3]);
 1029 
 1030         cmd.data1 = ((addr[4] << 8) | (addr[5]));
 1031 
 1032         status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
 1033         return status;
 1034 }
 1035 
 1036 static int
 1037 mxge_change_pause(mxge_softc_t *sc, int pause)
 1038 {       
 1039         mxge_cmd_t cmd;
 1040         int status;
 1041 
 1042         if (pause)
 1043                 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
 1044                                        &cmd);
 1045         else
 1046                 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
 1047                                        &cmd);
 1048 
 1049         if (status) {
 1050                 device_printf(sc->dev, "Failed to set flow control mode\n");
 1051                 return ENXIO;
 1052         }
 1053         sc->pause = pause;
 1054         return 0;
 1055 }
 1056 
 1057 static void
 1058 mxge_change_promisc(mxge_softc_t *sc, int promisc)
 1059 {       
 1060         mxge_cmd_t cmd;
 1061         int status;
 1062 
 1063         if (mxge_always_promisc)
 1064                 promisc = 1;
 1065 
 1066         if (promisc)
 1067                 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
 1068                                        &cmd);
 1069         else
 1070                 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
 1071                                        &cmd);
 1072 
 1073         if (status) {
 1074                 device_printf(sc->dev, "Failed to set promisc mode\n");
 1075         }
 1076 }
 1077 
 1078 struct mxge_add_maddr_ctx {
 1079         mxge_softc_t *sc;
 1080         int error;
 1081 };
 1082 
 1083 static u_int
 1084 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
 1085 {
 1086         struct mxge_add_maddr_ctx *ctx = arg;
 1087         mxge_cmd_t cmd;
 1088 
 1089         if (ctx->error != 0)
 1090                 return (0);
 1091         bcopy(LLADDR(sdl), &cmd.data0, 4);
 1092         bcopy(LLADDR(sdl) + 4, &cmd.data1, 2);
 1093         cmd.data0 = htonl(cmd.data0);
 1094         cmd.data1 = htonl(cmd.data1);
 1095 
 1096         ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
 1097 
 1098         return (1);
 1099 }
 1100 
 1101 static void
 1102 mxge_set_multicast_list(mxge_softc_t *sc)
 1103 {
 1104         struct mxge_add_maddr_ctx ctx;
 1105         struct ifnet *ifp = sc->ifp;
 1106         mxge_cmd_t cmd;
 1107         int err;
 1108 
 1109         /* This firmware is known to not support multicast */
 1110         if (!sc->fw_multicast_support)
 1111                 return;
 1112 
 1113         /* Disable multicast filtering while we play with the lists*/
 1114         err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
 1115         if (err != 0) {
 1116                 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
 1117                        " error status: %d\n", err);
 1118                 return;
 1119         }
 1120 
 1121         if (sc->adopted_rx_filter_bug)
 1122                 return;
 1123 
 1124         if (ifp->if_flags & IFF_ALLMULTI)
 1125                 /* request to disable multicast filtering, so quit here */
 1126                 return;
 1127 
 1128         /* Flush all the filters */
 1129 
 1130         err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
 1131         if (err != 0) {
 1132                 device_printf(sc->dev,
 1133                               "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
 1134                               ", error status: %d\n", err);
 1135                 return;
 1136         }
 1137 
 1138         /* Walk the multicast list, and add each address */
 1139         ctx.sc = sc;
 1140         ctx.error = 0;
 1141         if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx);
 1142         if (ctx.error != 0) {
 1143                 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
 1144                     "error status:" "%d\t", ctx.error);
 1145                 /* abort, leaving multicast filtering off */
 1146                 return;
 1147         }
 1148 
 1149         /* Enable multicast filtering */
 1150         err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
 1151         if (err != 0) {
 1152                 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
 1153                        ", error status: %d\n", err);
 1154         }
 1155 }
 1156 
 1157 static int
 1158 mxge_max_mtu(mxge_softc_t *sc)
 1159 {
 1160         mxge_cmd_t cmd;
 1161         int status;
 1162 
 1163         if (MJUMPAGESIZE - MXGEFW_PAD >  MXGEFW_MAX_MTU)
 1164                 return  MXGEFW_MAX_MTU - MXGEFW_PAD;
 1165 
 1166         /* try to set nbufs to see if it we can
 1167            use virtually contiguous jumbos */
 1168         cmd.data0 = 0;
 1169         status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
 1170                                &cmd);
 1171         if (status == 0)
 1172                 return  MXGEFW_MAX_MTU - MXGEFW_PAD;
 1173 
 1174         /* otherwise, we're limited to MJUMPAGESIZE */
 1175         return MJUMPAGESIZE - MXGEFW_PAD;
 1176 }
 1177 
 1178 static int
 1179 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
 1180 {
 1181         struct mxge_slice_state *ss;
 1182         mxge_rx_done_t *rx_done;
 1183         volatile uint32_t *irq_claim;
 1184         mxge_cmd_t cmd;
 1185         int slice, status;
 1186 
 1187         /* try to send a reset command to the card to see if it
 1188            is alive */
 1189         memset(&cmd, 0, sizeof (cmd));
 1190         status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
 1191         if (status != 0) {
 1192                 device_printf(sc->dev, "failed reset\n");
 1193                 return ENXIO;
 1194         }
 1195 
 1196         mxge_dummy_rdma(sc, 1);
 1197 
 1198         /* set the intrq size */
 1199         cmd.data0 = sc->rx_ring_size;
 1200         status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
 1201 
 1202         /*
 1203          * Even though we already know how many slices are supported
 1204          * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
 1205          * has magic side effects, and must be called after a reset.
 1206          * It must be called prior to calling any RSS related cmds,
 1207          * including assigning an interrupt queue for anything but
 1208          * slice 0.  It must also be called *after*
 1209          * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
 1210          * the firmware to compute offsets.
 1211          */
 1212 
 1213         if (sc->num_slices > 1) {
 1214                 /* ask the maximum number of slices it supports */
 1215                 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
 1216                                            &cmd);
 1217                 if (status != 0) {
 1218                         device_printf(sc->dev,
 1219                                       "failed to get number of slices\n");
 1220                         return status;
 1221                 }
 1222                 /*
 1223                  * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
 1224                  * to setting up the interrupt queue DMA
 1225                  */
 1226                 cmd.data0 = sc->num_slices;
 1227                 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
 1228 #ifdef IFNET_BUF_RING
 1229                 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
 1230 #endif
 1231                 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
 1232                                            &cmd);
 1233                 if (status != 0) {
 1234                         device_printf(sc->dev,
 1235                                       "failed to set number of slices\n");
 1236                         return status;
 1237                 }
 1238         }
 1239 
 1240         if (interrupts_setup) {
 1241                 /* Now exchange information about interrupts  */
 1242                 for (slice = 0; slice < sc->num_slices; slice++) {
 1243                         rx_done = &sc->ss[slice].rx_done;
 1244                         memset(rx_done->entry, 0, sc->rx_ring_size);
 1245                         cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
 1246                         cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
 1247                         cmd.data2 = slice;
 1248                         status |= mxge_send_cmd(sc,
 1249                                                 MXGEFW_CMD_SET_INTRQ_DMA,
 1250                                                 &cmd);
 1251                 }
 1252         }
 1253 
 1254         status |= mxge_send_cmd(sc,
 1255                                 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
 1256 
 1257         sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
 1258 
 1259         status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
 1260         irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
 1261 
 1262         status |= mxge_send_cmd(sc,  MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
 1263                                 &cmd);
 1264         sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
 1265         if (status != 0) {
 1266                 device_printf(sc->dev, "failed set interrupt parameters\n");
 1267                 return status;
 1268         }
 1269 
 1270         *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
 1271 
 1272         /* run a DMA benchmark */
 1273         (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
 1274 
 1275         for (slice = 0; slice < sc->num_slices; slice++) {
 1276                 ss = &sc->ss[slice];
 1277 
 1278                 ss->irq_claim = irq_claim + (2 * slice);
 1279                 /* reset mcp/driver shared state back to 0 */
 1280                 ss->rx_done.idx = 0;
 1281                 ss->rx_done.cnt = 0;
 1282                 ss->tx.req = 0;
 1283                 ss->tx.done = 0;
 1284                 ss->tx.pkt_done = 0;
 1285                 ss->tx.queue_active = 0;
 1286                 ss->tx.activate = 0;
 1287                 ss->tx.deactivate = 0;
 1288                 ss->tx.wake = 0;
 1289                 ss->tx.defrag = 0;
 1290                 ss->tx.stall = 0;
 1291                 ss->rx_big.cnt = 0;
 1292                 ss->rx_small.cnt = 0;
 1293                 ss->lc.lro_bad_csum = 0;
 1294                 ss->lc.lro_queued = 0;
 1295                 ss->lc.lro_flushed = 0;
 1296                 if (ss->fw_stats != NULL) {
 1297                         bzero(ss->fw_stats, sizeof *ss->fw_stats);
 1298                 }
 1299         }
 1300         sc->rdma_tags_available = 15;
 1301         status = mxge_update_mac_address(sc);
 1302         mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
 1303         mxge_change_pause(sc, sc->pause);
 1304         mxge_set_multicast_list(sc);
 1305         if (sc->throttle) {
 1306                 cmd.data0 = sc->throttle;
 1307                 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
 1308                                   &cmd)) {
 1309                         device_printf(sc->dev,
 1310                                       "can't enable throttle\n");
 1311                 }
 1312         }
 1313         return status;
 1314 }
 1315 
 1316 static int
 1317 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
 1318 {
 1319         mxge_cmd_t cmd;
 1320         mxge_softc_t *sc;
 1321         int err;
 1322         unsigned int throttle;
 1323 
 1324         sc = arg1;
 1325         throttle = sc->throttle;
 1326         err = sysctl_handle_int(oidp, &throttle, arg2, req);
 1327         if (err != 0) {
 1328                 return err;
 1329         }
 1330 
 1331         if (throttle == sc->throttle)
 1332                 return 0;
 1333 
 1334         if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
 1335                 return EINVAL;
 1336 
 1337         mtx_lock(&sc->driver_mtx);
 1338         cmd.data0 = throttle;
 1339         err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
 1340         if (err == 0)
 1341                 sc->throttle = throttle;
 1342         mtx_unlock(&sc->driver_mtx);    
 1343         return err;
 1344 }
 1345 
 1346 static int
 1347 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
 1348 {
 1349         mxge_softc_t *sc;
 1350         unsigned int intr_coal_delay;
 1351         int err;
 1352 
 1353         sc = arg1;
 1354         intr_coal_delay = sc->intr_coal_delay;
 1355         err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
 1356         if (err != 0) {
 1357                 return err;
 1358         }
 1359         if (intr_coal_delay == sc->intr_coal_delay)
 1360                 return 0;
 1361 
 1362         if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
 1363                 return EINVAL;
 1364 
 1365         mtx_lock(&sc->driver_mtx);
 1366         *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
 1367         sc->intr_coal_delay = intr_coal_delay;
 1368 
 1369         mtx_unlock(&sc->driver_mtx);
 1370         return err;
 1371 }
 1372 
 1373 static int
 1374 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
 1375 {
 1376         mxge_softc_t *sc;
 1377         unsigned int enabled;
 1378         int err;
 1379 
 1380         sc = arg1;
 1381         enabled = sc->pause;
 1382         err = sysctl_handle_int(oidp, &enabled, arg2, req);
 1383         if (err != 0) {
 1384                 return err;
 1385         }
 1386         if (enabled == sc->pause)
 1387                 return 0;
 1388 
 1389         mtx_lock(&sc->driver_mtx);
 1390         err = mxge_change_pause(sc, enabled);
 1391         mtx_unlock(&sc->driver_mtx);
 1392         return err;
 1393 }
 1394 
 1395 static int
 1396 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
 1397 {
 1398         int err;
 1399 
 1400         if (arg1 == NULL)
 1401                 return EFAULT;
 1402         arg2 = be32toh(*(int *)arg1);
 1403         arg1 = NULL;
 1404         err = sysctl_handle_int(oidp, arg1, arg2, req);
 1405 
 1406         return err;
 1407 }
 1408 
 1409 static void
 1410 mxge_rem_sysctls(mxge_softc_t *sc)
 1411 {
 1412         struct mxge_slice_state *ss;
 1413         int slice;
 1414 
 1415         if (sc->slice_sysctl_tree == NULL)
 1416                 return;
 1417 
 1418         for (slice = 0; slice < sc->num_slices; slice++) {
 1419                 ss = &sc->ss[slice];
 1420                 if (ss == NULL || ss->sysctl_tree == NULL)
 1421                         continue;
 1422                 sysctl_ctx_free(&ss->sysctl_ctx);
 1423                 ss->sysctl_tree = NULL;
 1424         }
 1425         sysctl_ctx_free(&sc->slice_sysctl_ctx);
 1426         sc->slice_sysctl_tree = NULL;
 1427 }
 1428 
 1429 static void
 1430 mxge_add_sysctls(mxge_softc_t *sc)
 1431 {
 1432         struct sysctl_ctx_list *ctx;
 1433         struct sysctl_oid_list *children;
 1434         mcp_irq_data_t *fw;
 1435         struct mxge_slice_state *ss;
 1436         int slice;
 1437         char slice_num[8];
 1438 
 1439         ctx = device_get_sysctl_ctx(sc->dev);
 1440         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 1441         fw = sc->ss[0].fw_stats;
 1442 
 1443         /* random information */
 1444         SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
 1445                        "firmware_version",
 1446                        CTLFLAG_RD, sc->fw_version,
 1447                        0, "firmware version");
 1448         SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
 1449                        "serial_number",
 1450                        CTLFLAG_RD, sc->serial_number_string,
 1451                        0, "serial number");
 1452         SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
 1453                        "product_code",
 1454                        CTLFLAG_RD, sc->product_code_string,
 1455                        0, "product_code");
 1456         SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1457                        "pcie_link_width",
 1458                        CTLFLAG_RD, &sc->link_width,
 1459                        0, "tx_boundary");
 1460         SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1461                        "tx_boundary",
 1462                        CTLFLAG_RD, &sc->tx_boundary,
 1463                        0, "tx_boundary");
 1464         SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1465                        "write_combine",
 1466                        CTLFLAG_RD, &sc->wc,
 1467                        0, "write combining PIO?");
 1468         SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1469                        "read_dma_MBs",
 1470                        CTLFLAG_RD, &sc->read_dma,
 1471                        0, "DMA Read speed in MB/s");
 1472         SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1473                        "write_dma_MBs",
 1474                        CTLFLAG_RD, &sc->write_dma,
 1475                        0, "DMA Write speed in MB/s");
 1476         SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1477                        "read_write_dma_MBs",
 1478                        CTLFLAG_RD, &sc->read_write_dma,
 1479                        0, "DMA concurrent Read/Write speed in MB/s");
 1480         SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1481                        "watchdog_resets",
 1482                        CTLFLAG_RD, &sc->watchdog_resets,
 1483                        0, "Number of times NIC was reset");
 1484 
 1485         /* performance related tunables */
 1486         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1487             "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 1488             sc, 0, mxge_change_intr_coal, "I",
 1489             "interrupt coalescing delay in usecs");
 1490 
 1491         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1492             "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 1493             mxge_change_throttle, "I", "transmit throttling");
 1494 
 1495         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1496             "flow_control_enabled",
 1497             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 1498             mxge_change_flow_control, "I",
 1499             "interrupt coalescing delay in usecs");
 1500 
 1501         SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1502                        "deassert_wait",
 1503                        CTLFLAG_RW, &mxge_deassert_wait,
 1504                        0, "Wait for IRQ line to go low in ihandler");
 1505 
 1506         /* stats block from firmware is in network byte order.
 1507            Need to swap it */
 1508         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1509             "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1510             &fw->link_up, 0, mxge_handle_be32, "I", "link up");
 1511         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1512             "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1513             &fw->rdma_tags_available, 0, mxge_handle_be32, "I",
 1514             "rdma_tags_available");
 1515         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1516             "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1517             &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I",
 1518             "dropped_bad_crc32");
 1519         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1520             "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1521             &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy");
 1522         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1523             "dropped_link_error_or_filtered",
 1524             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1525             &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I",
 1526             "dropped_link_error_or_filtered");
 1527         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1528             "dropped_link_overflow",
 1529             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1530             &fw->dropped_link_overflow, 0, mxge_handle_be32, "I",
 1531             "dropped_link_overflow");
 1532         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1533             "dropped_multicast_filtered",
 1534             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1535             &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I",
 1536             "dropped_multicast_filtered");
 1537         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1538             "dropped_no_big_buffer",
 1539             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1540             &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I",
 1541             "dropped_no_big_buffer");
 1542         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1543             "dropped_no_small_buffer",
 1544             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1545             &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I",
 1546             "dropped_no_small_buffer");
 1547         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1548             "dropped_overrun",
 1549             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1550             &fw->dropped_overrun, 0, mxge_handle_be32, "I",
 1551             "dropped_overrun");
 1552         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1553             "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1554             &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause");
 1555         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1556             "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1557             &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt");
 1558 
 1559         SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
 1560             "dropped_unicast_filtered",
 1561             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1562             &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I",
 1563             "dropped_unicast_filtered");
 1564 
 1565         /* verbose printing? */
 1566         SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1567                        "verbose",
 1568                        CTLFLAG_RW, &mxge_verbose,
 1569                        0, "verbose printing");
 1570 
 1571         /* add counters exported for debugging from all slices */
 1572         sysctl_ctx_init(&sc->slice_sysctl_ctx);
 1573         sc->slice_sysctl_tree =
 1574                 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
 1575                     "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 1576 
 1577         for (slice = 0; slice < sc->num_slices; slice++) {
 1578                 ss = &sc->ss[slice];
 1579                 sysctl_ctx_init(&ss->sysctl_ctx);
 1580                 ctx = &ss->sysctl_ctx;
 1581                 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
 1582                 sprintf(slice_num, "%d", slice);
 1583                 ss->sysctl_tree =
 1584                         SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
 1585                             CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 1586                 children = SYSCTL_CHILDREN(ss->sysctl_tree);
 1587                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1588                                "rx_small_cnt",
 1589                                CTLFLAG_RD, &ss->rx_small.cnt,
 1590                                0, "rx_small_cnt");
 1591                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1592                                "rx_big_cnt",
 1593                                CTLFLAG_RD, &ss->rx_big.cnt,
 1594                                0, "rx_small_cnt");
 1595                 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
 1596                                "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
 1597                                0, "number of lro merge queues flushed");
 1598 
 1599                 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
 1600                                "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
 1601                                0, "number of bad csums preventing LRO");
 1602 
 1603                 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
 1604                                "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
 1605                                0, "number of frames appended to lro merge"
 1606                                "queues");
 1607 
 1608 #ifndef IFNET_BUF_RING
 1609                 /* only transmit from slice 0 for now */
 1610                 if (slice > 0)
 1611                         continue;
 1612 #endif
 1613                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1614                                "tx_req",
 1615                                CTLFLAG_RD, &ss->tx.req,
 1616                                0, "tx_req");
 1617 
 1618                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1619                                "tx_done",
 1620                                CTLFLAG_RD, &ss->tx.done,
 1621                                0, "tx_done");
 1622                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1623                                "tx_pkt_done",
 1624                                CTLFLAG_RD, &ss->tx.pkt_done,
 1625                                0, "tx_done");
 1626                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1627                                "tx_stall",
 1628                                CTLFLAG_RD, &ss->tx.stall,
 1629                                0, "tx_stall");
 1630                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1631                                "tx_wake",
 1632                                CTLFLAG_RD, &ss->tx.wake,
 1633                                0, "tx_wake");
 1634                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1635                                "tx_defrag",
 1636                                CTLFLAG_RD, &ss->tx.defrag,
 1637                                0, "tx_defrag");
 1638                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1639                                "tx_queue_active",
 1640                                CTLFLAG_RD, &ss->tx.queue_active,
 1641                                0, "tx_queue_active");
 1642                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1643                                "tx_activate",
 1644                                CTLFLAG_RD, &ss->tx.activate,
 1645                                0, "tx_activate");
 1646                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
 1647                                "tx_deactivate",
 1648                                CTLFLAG_RD, &ss->tx.deactivate,
 1649                                0, "tx_deactivate");
 1650         }
 1651 }
 1652 
 1653 /* copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
 1654    backwards one at a time and handle ring wraps */
 1655 
 1656 static inline void
 1657 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
 1658                             mcp_kreq_ether_send_t *src, int cnt)
 1659 {
 1660         int idx, starting_slot;
 1661         starting_slot = tx->req;
 1662         while (cnt > 1) {
 1663                 cnt--;
 1664                 idx = (starting_slot + cnt) & tx->mask;
 1665                 mxge_pio_copy(&tx->lanai[idx],
 1666                               &src[cnt], sizeof(*src));
 1667                 wmb();
 1668         }
 1669 }
 1670 
 1671 /*
 1672  * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
 1673  * at most 32 bytes at a time, so as to avoid involving the software
 1674  * pio handler in the nic.   We re-write the first segment's flags
 1675  * to mark them valid only after writing the entire chain
 1676  */
 1677 
 1678 static inline void
 1679 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
 1680                   int cnt)
 1681 {
 1682         int idx, i;
 1683         uint32_t *src_ints;
 1684         volatile uint32_t *dst_ints;
 1685         mcp_kreq_ether_send_t *srcp;
 1686         volatile mcp_kreq_ether_send_t *dstp, *dst;
 1687         uint8_t last_flags;
 1688 
 1689         idx = tx->req & tx->mask;
 1690 
 1691         last_flags = src->flags;
 1692         src->flags = 0;
 1693         wmb();
 1694         dst = dstp = &tx->lanai[idx];
 1695         srcp = src;
 1696 
 1697         if ((idx + cnt) < tx->mask) {
 1698                 for (i = 0; i < (cnt - 1); i += 2) {
 1699                         mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
 1700                         wmb(); /* force write every 32 bytes */
 1701                         srcp += 2;
 1702                         dstp += 2;
 1703                 }
 1704         } else {
 1705                 /* submit all but the first request, and ensure
 1706                    that it is submitted below */
 1707                 mxge_submit_req_backwards(tx, src, cnt);
 1708                 i = 0;
 1709         }
 1710         if (i < cnt) {
 1711                 /* submit the first request */
 1712                 mxge_pio_copy(dstp, srcp, sizeof(*src));
 1713                 wmb(); /* barrier before setting valid flag */
 1714         }
 1715 
 1716         /* re-write the last 32-bits with the valid flags */
 1717         src->flags = last_flags;
 1718         src_ints = (uint32_t *)src;
 1719         src_ints+=3;
 1720         dst_ints = (volatile uint32_t *)dst;
 1721         dst_ints+=3;
 1722         *dst_ints =  *src_ints;
 1723         tx->req += cnt;
 1724         wmb();
 1725 }
 1726 
 1727 static int
 1728 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
 1729     struct mxge_pkt_info *pi)
 1730 {
 1731         struct ether_vlan_header *eh;
 1732         uint16_t etype;
 1733         int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
 1734 #if IFCAP_TSO6 && defined(INET6)
 1735         int nxt;
 1736 #endif
 1737 
 1738         eh = mtod(m, struct ether_vlan_header *);
 1739         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 1740                 etype = ntohs(eh->evl_proto);
 1741                 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 1742         } else {
 1743                 etype = ntohs(eh->evl_encap_proto);
 1744                 pi->ip_off = ETHER_HDR_LEN;
 1745         }
 1746 
 1747         switch (etype) {
 1748         case ETHERTYPE_IP:
 1749                 /*
 1750                  * ensure ip header is in first mbuf, copy it to a
 1751                  * scratch buffer if not
 1752                  */
 1753                 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
 1754                 pi->ip6 = NULL;
 1755                 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
 1756                         m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
 1757                             ss->scratch);
 1758                         pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
 1759                 }
 1760                 pi->ip_hlen = pi->ip->ip_hl << 2;
 1761                 if (!tso)
 1762                         return 0;
 1763 
 1764                 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
 1765                     sizeof(struct tcphdr))) {
 1766                         m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
 1767                             sizeof(struct tcphdr), ss->scratch);
 1768                         pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
 1769                 }
 1770                 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
 1771                 break;
 1772 #if IFCAP_TSO6 && defined(INET6)
 1773         case ETHERTYPE_IPV6:
 1774                 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
 1775                 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
 1776                         m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
 1777                             ss->scratch);
 1778                         pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
 1779                 }
 1780                 nxt = 0;
 1781                 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
 1782                 pi->ip_hlen -= pi->ip_off;
 1783                 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
 1784                         return EINVAL;
 1785 
 1786                 if (!tso)
 1787                         return 0;
 1788 
 1789                 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
 1790                         return EINVAL;
 1791 
 1792                 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
 1793                     sizeof(struct tcphdr))) {
 1794                         m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
 1795                             sizeof(struct tcphdr), ss->scratch);
 1796                         pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
 1797                 }
 1798                 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
 1799                 break;
 1800 #endif
 1801         default:
 1802                 return EINVAL;
 1803         }
 1804         return 0;
 1805 }
 1806 
 1807 #if IFCAP_TSO4
 1808 
 1809 static void
 1810 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
 1811                int busdma_seg_cnt, struct mxge_pkt_info *pi)
 1812 {
 1813         mxge_tx_ring_t *tx;
 1814         mcp_kreq_ether_send_t *req;
 1815         bus_dma_segment_t *seg;
 1816         uint32_t low, high_swapped;
 1817         int len, seglen, cum_len, cum_len_next;
 1818         int next_is_first, chop, cnt, rdma_count, small;
 1819         uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
 1820         uint8_t flags, flags_next;
 1821         static int once;
 1822 
 1823         mss = m->m_pkthdr.tso_segsz;
 1824 
 1825         /* negative cum_len signifies to the
 1826          * send loop that we are still in the
 1827          * header portion of the TSO packet.
 1828          */
 1829 
 1830         cksum_offset = pi->ip_off + pi->ip_hlen;
 1831         cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
 1832 
 1833         /* TSO implies checksum offload on this hardware */
 1834         if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
 1835                 /*
 1836                  * If packet has full TCP csum, replace it with pseudo hdr
 1837                  * sum that the NIC expects, otherwise the NIC will emit
 1838                  * packets with bad TCP checksums.
 1839                  */
 1840                 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 1841                 if (pi->ip6) {
 1842 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
 1843                         m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
 1844                         sum = in6_cksum_pseudo(pi->ip6,
 1845                             m->m_pkthdr.len - cksum_offset,
 1846                             IPPROTO_TCP, 0);
 1847 #endif
 1848                 } else {
 1849 #ifdef INET
 1850                         m->m_pkthdr.csum_flags |= CSUM_TCP;
 1851                         sum = in_pseudo(pi->ip->ip_src.s_addr,
 1852                             pi->ip->ip_dst.s_addr,
 1853                             htons(IPPROTO_TCP + (m->m_pkthdr.len -
 1854                                     cksum_offset)));
 1855 #endif
 1856                 }
 1857                 m_copyback(m, offsetof(struct tcphdr, th_sum) +
 1858                     cksum_offset, sizeof(sum), (caddr_t)&sum);
 1859         }
 1860         flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
 1861 
 1862         /* for TSO, pseudo_hdr_offset holds mss.
 1863          * The firmware figures out where to put
 1864          * the checksum by parsing the header. */
 1865         pseudo_hdr_offset = htobe16(mss);
 1866 
 1867         if (pi->ip6) {
 1868                 /*
 1869                  * for IPv6 TSO, the "checksum offset" is re-purposed
 1870                  * to store the TCP header len
 1871                  */
 1872                 cksum_offset = (pi->tcp->th_off << 2);
 1873         }
 1874 
 1875         tx = &ss->tx;
 1876         req = tx->req_list;
 1877         seg = tx->seg_list;
 1878         cnt = 0;
 1879         rdma_count = 0;
 1880         /* "rdma_count" is the number of RDMAs belonging to the
 1881          * current packet BEFORE the current send request. For
 1882          * non-TSO packets, this is equal to "count".
 1883          * For TSO packets, rdma_count needs to be reset
 1884          * to 0 after a segment cut.
 1885          *
 1886          * The rdma_count field of the send request is
 1887          * the number of RDMAs of the packet starting at
 1888          * that request. For TSO send requests with one ore more cuts
 1889          * in the middle, this is the number of RDMAs starting
 1890          * after the last cut in the request. All previous
 1891          * segments before the last cut implicitly have 1 RDMA.
 1892          *
 1893          * Since the number of RDMAs is not known beforehand,
 1894          * it must be filled-in retroactively - after each
 1895          * segmentation cut or at the end of the entire packet.
 1896          */
 1897 
 1898         while (busdma_seg_cnt) {
 1899                 /* Break the busdma segment up into pieces*/
 1900                 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
 1901                 high_swapped =  htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
 1902                 len = seg->ds_len;
 1903 
 1904                 while (len) {
 1905                         flags_next = flags & ~MXGEFW_FLAGS_FIRST;
 1906                         seglen = len;
 1907                         cum_len_next = cum_len + seglen;
 1908                         (req-rdma_count)->rdma_count = rdma_count + 1;
 1909                         if (__predict_true(cum_len >= 0)) {
 1910                                 /* payload */
 1911                                 chop = (cum_len_next > mss);
 1912                                 cum_len_next = cum_len_next % mss;
 1913                                 next_is_first = (cum_len_next == 0);
 1914                                 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
 1915                                 flags_next |= next_is_first *
 1916                                         MXGEFW_FLAGS_FIRST;
 1917                                 rdma_count |= -(chop | next_is_first);
 1918                                 rdma_count += chop & !next_is_first;
 1919                         } else if (cum_len_next >= 0) {
 1920                                 /* header ends */
 1921                                 rdma_count = -1;
 1922                                 cum_len_next = 0;
 1923                                 seglen = -cum_len;
 1924                                 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
 1925                                 flags_next = MXGEFW_FLAGS_TSO_PLD |
 1926                                         MXGEFW_FLAGS_FIRST |
 1927                                         (small * MXGEFW_FLAGS_SMALL);
 1928                             }
 1929                         
 1930                         req->addr_high = high_swapped;
 1931                         req->addr_low = htobe32(low);
 1932                         req->pseudo_hdr_offset = pseudo_hdr_offset;
 1933                         req->pad = 0;
 1934                         req->rdma_count = 1;
 1935                         req->length = htobe16(seglen);
 1936                         req->cksum_offset = cksum_offset;
 1937                         req->flags = flags | ((cum_len & 1) *
 1938                                               MXGEFW_FLAGS_ALIGN_ODD);
 1939                         low += seglen;
 1940                         len -= seglen;
 1941                         cum_len = cum_len_next;
 1942                         flags = flags_next;
 1943                         req++;
 1944                         cnt++;
 1945                         rdma_count++;
 1946                         if (cksum_offset != 0 && !pi->ip6) {
 1947                                 if (__predict_false(cksum_offset > seglen))
 1948                                         cksum_offset -= seglen;
 1949                                 else
 1950                                         cksum_offset = 0;
 1951                         }
 1952                         if (__predict_false(cnt > tx->max_desc))
 1953                                 goto drop;
 1954                 }
 1955                 busdma_seg_cnt--;
 1956                 seg++;
 1957         }
 1958         (req-rdma_count)->rdma_count = rdma_count;
 1959 
 1960         do {
 1961                 req--;
 1962                 req->flags |= MXGEFW_FLAGS_TSO_LAST;
 1963         } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
 1964 
 1965         tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
 1966         mxge_submit_req(tx, tx->req_list, cnt);
 1967 #ifdef IFNET_BUF_RING
 1968         if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
 1969                 /* tell the NIC to start polling this slice */
 1970                 *tx->send_go = 1;
 1971                 tx->queue_active = 1;
 1972                 tx->activate++;
 1973                 wmb();
 1974         }
 1975 #endif
 1976         return;
 1977 
 1978 drop:
 1979         bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
 1980         m_freem(m);
 1981         ss->oerrors++;
 1982         if (!once) {
 1983                 printf("tx->max_desc exceeded via TSO!\n");
 1984                 printf("mss = %d, %ld, %d!\n", mss,
 1985                        (long)seg - (long)tx->seg_list, tx->max_desc);
 1986                 once = 1;
 1987         }
 1988         return;
 1989 
 1990 }
 1991 
 1992 #endif /* IFCAP_TSO4 */
 1993 
 1994 #ifdef MXGE_NEW_VLAN_API
 1995 /*
 1996  * We reproduce the software vlan tag insertion from
 1997  * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
 1998  * vlan tag insertion. We need to advertise this in order to have the
 1999  * vlan interface respect our csum offload flags.
 2000  */
 2001 static struct mbuf *
 2002 mxge_vlan_tag_insert(struct mbuf *m)
 2003 {
 2004         struct ether_vlan_header *evl;
 2005 
 2006         M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 2007         if (__predict_false(m == NULL))
 2008                 return NULL;
 2009         if (m->m_len < sizeof(*evl)) {
 2010                 m = m_pullup(m, sizeof(*evl));
 2011                 if (__predict_false(m == NULL))
 2012                         return NULL;
 2013         }
 2014         /*
 2015          * Transform the Ethernet header into an Ethernet header
 2016          * with 802.1Q encapsulation.
 2017          */
 2018         evl = mtod(m, struct ether_vlan_header *);
 2019         bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
 2020               (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
 2021         evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
 2022         evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
 2023         m->m_flags &= ~M_VLANTAG;
 2024         return m;
 2025 }
 2026 #endif /* MXGE_NEW_VLAN_API */
 2027 
 2028 static void
 2029 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
 2030 {
 2031         struct mxge_pkt_info pi = {0,0,0,0};
 2032         mxge_softc_t *sc;
 2033         mcp_kreq_ether_send_t *req;
 2034         bus_dma_segment_t *seg;
 2035         struct mbuf *m_tmp;
 2036         mxge_tx_ring_t *tx;
 2037         int cnt, cum_len, err, i, idx, odd_flag;
 2038         uint16_t pseudo_hdr_offset;
 2039         uint8_t flags, cksum_offset;
 2040 
 2041         sc = ss->sc;
 2042         tx = &ss->tx;
 2043 
 2044 #ifdef MXGE_NEW_VLAN_API
 2045         if (m->m_flags & M_VLANTAG) {
 2046                 m = mxge_vlan_tag_insert(m);
 2047                 if (__predict_false(m == NULL))
 2048                         goto drop_without_m;
 2049         }
 2050 #endif
 2051         if (m->m_pkthdr.csum_flags &
 2052             (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
 2053                 if (mxge_parse_tx(ss, m, &pi))
 2054                         goto drop;
 2055         }
 2056 
 2057         /* (try to) map the frame for DMA */
 2058         idx = tx->req & tx->mask;
 2059         err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
 2060                                       m, tx->seg_list, &cnt,
 2061                                       BUS_DMA_NOWAIT);
 2062         if (__predict_false(err == EFBIG)) {
 2063                 /* Too many segments in the chain.  Try
 2064                    to defrag */
 2065                 m_tmp = m_defrag(m, M_NOWAIT);
 2066                 if (m_tmp == NULL) {
 2067                         goto drop;
 2068                 }
 2069                 ss->tx.defrag++;
 2070                 m = m_tmp;
 2071                 err = bus_dmamap_load_mbuf_sg(tx->dmat,
 2072                                               tx->info[idx].map,
 2073                                               m, tx->seg_list, &cnt,
 2074                                               BUS_DMA_NOWAIT);
 2075         }
 2076         if (__predict_false(err != 0)) {
 2077                 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
 2078                               " packet len = %d\n", err, m->m_pkthdr.len);
 2079                 goto drop;
 2080         }
 2081         bus_dmamap_sync(tx->dmat, tx->info[idx].map,
 2082                         BUS_DMASYNC_PREWRITE);
 2083         tx->info[idx].m = m;
 2084 
 2085 #if IFCAP_TSO4
 2086         /* TSO is different enough, we handle it in another routine */
 2087         if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
 2088                 mxge_encap_tso(ss, m, cnt, &pi);
 2089                 return;
 2090         }
 2091 #endif
 2092 
 2093         req = tx->req_list;
 2094         cksum_offset = 0;
 2095         pseudo_hdr_offset = 0;
 2096         flags = MXGEFW_FLAGS_NO_TSO;
 2097 
 2098         /* checksum offloading? */
 2099         if (m->m_pkthdr.csum_flags &
 2100             (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
 2101                 /* ensure ip header is in first mbuf, copy
 2102                    it to a scratch buffer if not */
 2103                 cksum_offset = pi.ip_off + pi.ip_hlen;
 2104                 pseudo_hdr_offset = cksum_offset +  m->m_pkthdr.csum_data;
 2105                 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
 2106                 req->cksum_offset = cksum_offset;
 2107                 flags |= MXGEFW_FLAGS_CKSUM;
 2108                 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
 2109         } else {
 2110                 odd_flag = 0;
 2111         }
 2112         if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
 2113                 flags |= MXGEFW_FLAGS_SMALL;
 2114 
 2115         /* convert segments into a request list */
 2116         cum_len = 0;
 2117         seg = tx->seg_list;
 2118         req->flags = MXGEFW_FLAGS_FIRST;
 2119         for (i = 0; i < cnt; i++) {
 2120                 req->addr_low =
 2121                         htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
 2122                 req->addr_high =
 2123                         htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
 2124                 req->length = htobe16(seg->ds_len);
 2125                 req->cksum_offset = cksum_offset;
 2126                 if (cksum_offset > seg->ds_len)
 2127                         cksum_offset -= seg->ds_len;
 2128                 else
 2129                         cksum_offset = 0;
 2130                 req->pseudo_hdr_offset = pseudo_hdr_offset;
 2131                 req->pad = 0; /* complete solid 16-byte block */
 2132                 req->rdma_count = 1;
 2133                 req->flags |= flags | ((cum_len & 1) * odd_flag);
 2134                 cum_len += seg->ds_len;
 2135                 seg++;
 2136                 req++;
 2137                 req->flags = 0;
 2138         }
 2139         req--;
 2140         /* pad runts to 60 bytes */
 2141         if (cum_len < 60) {
 2142                 req++;
 2143                 req->addr_low =
 2144                         htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
 2145                 req->addr_high =
 2146                         htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
 2147                 req->length = htobe16(60 - cum_len);
 2148                 req->cksum_offset = 0;
 2149                 req->pseudo_hdr_offset = pseudo_hdr_offset;
 2150                 req->pad = 0; /* complete solid 16-byte block */
 2151                 req->rdma_count = 1;
 2152                 req->flags |= flags | ((cum_len & 1) * odd_flag);
 2153                 cnt++;
 2154         }
 2155 
 2156         tx->req_list[0].rdma_count = cnt;
 2157 #if 0
 2158         /* print what the firmware will see */
 2159         for (i = 0; i < cnt; i++) {
 2160                 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
 2161                     "cso:%d, flags:0x%x, rdma:%d\n",
 2162                     i, (int)ntohl(tx->req_list[i].addr_high),
 2163                     (int)ntohl(tx->req_list[i].addr_low),
 2164                     (int)ntohs(tx->req_list[i].length),
 2165                     (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
 2166                     tx->req_list[i].cksum_offset, tx->req_list[i].flags,
 2167                     tx->req_list[i].rdma_count);
 2168         }
 2169         printf("--------------\n");
 2170 #endif
 2171         tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
 2172         mxge_submit_req(tx, tx->req_list, cnt);
 2173 #ifdef IFNET_BUF_RING
 2174         if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
 2175                 /* tell the NIC to start polling this slice */
 2176                 *tx->send_go = 1;
 2177                 tx->queue_active = 1;
 2178                 tx->activate++;
 2179                 wmb();
 2180         }
 2181 #endif
 2182         return;
 2183 
 2184 drop:
 2185         m_freem(m);
 2186 drop_without_m:
 2187         ss->oerrors++;
 2188         return;
 2189 }
 2190 
 2191 #ifdef IFNET_BUF_RING
 2192 static void
 2193 mxge_qflush(struct ifnet *ifp)
 2194 {
 2195         mxge_softc_t *sc = ifp->if_softc;
 2196         mxge_tx_ring_t *tx;
 2197         struct mbuf *m;
 2198         int slice;
 2199 
 2200         for (slice = 0; slice < sc->num_slices; slice++) {
 2201                 tx = &sc->ss[slice].tx;
 2202                 mtx_lock(&tx->mtx);
 2203                 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
 2204                         m_freem(m);
 2205                 mtx_unlock(&tx->mtx);
 2206         }
 2207         if_qflush(ifp);
 2208 }
 2209 
 2210 static inline void
 2211 mxge_start_locked(struct mxge_slice_state *ss)
 2212 {
 2213         mxge_softc_t *sc;
 2214         struct mbuf *m;
 2215         struct ifnet *ifp;
 2216         mxge_tx_ring_t *tx;
 2217 
 2218         sc = ss->sc;
 2219         ifp = sc->ifp;
 2220         tx = &ss->tx;
 2221 
 2222         while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
 2223                 m = drbr_dequeue(ifp, tx->br);
 2224                 if (m == NULL) {
 2225                         return;
 2226                 }
 2227                 /* let BPF see it */
 2228                 BPF_MTAP(ifp, m);
 2229 
 2230                 /* give it to the nic */
 2231                 mxge_encap(ss, m);
 2232         }
 2233         /* ran out of transmit slots */
 2234         if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
 2235             && (!drbr_empty(ifp, tx->br))) {
 2236                 ss->if_drv_flags |= IFF_DRV_OACTIVE;
 2237                 tx->stall++;
 2238         }
 2239 }
 2240 
 2241 static int
 2242 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
 2243 {
 2244         mxge_softc_t *sc;
 2245         struct ifnet *ifp;
 2246         mxge_tx_ring_t *tx;
 2247         int err;
 2248 
 2249         sc = ss->sc;
 2250         ifp = sc->ifp;
 2251         tx = &ss->tx;
 2252 
 2253         if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
 2254             IFF_DRV_RUNNING) {
 2255                 err = drbr_enqueue(ifp, tx->br, m);
 2256                 return (err);
 2257         }
 2258 
 2259         if (!drbr_needs_enqueue(ifp, tx->br) &&
 2260             ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
 2261                 /* let BPF see it */
 2262                 BPF_MTAP(ifp, m);
 2263                 /* give it to the nic */
 2264                 mxge_encap(ss, m);
 2265         } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
 2266                 return (err);
 2267         }
 2268         if (!drbr_empty(ifp, tx->br))
 2269                 mxge_start_locked(ss);
 2270         return (0);
 2271 }
 2272 
 2273 static int
 2274 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
 2275 {
 2276         mxge_softc_t *sc = ifp->if_softc;
 2277         struct mxge_slice_state *ss;
 2278         mxge_tx_ring_t *tx;
 2279         int err = 0;
 2280         int slice;
 2281 
 2282         slice = m->m_pkthdr.flowid;
 2283         slice &= (sc->num_slices - 1);  /* num_slices always power of 2 */
 2284 
 2285         ss = &sc->ss[slice];
 2286         tx = &ss->tx;
 2287 
 2288         if (mtx_trylock(&tx->mtx)) {
 2289                 err = mxge_transmit_locked(ss, m);
 2290                 mtx_unlock(&tx->mtx);
 2291         } else {
 2292                 err = drbr_enqueue(ifp, tx->br, m);
 2293         }
 2294 
 2295         return (err);
 2296 }
 2297 
 2298 #else
 2299 
 2300 static inline void
 2301 mxge_start_locked(struct mxge_slice_state *ss)
 2302 {
 2303         mxge_softc_t *sc;
 2304         struct mbuf *m;
 2305         struct ifnet *ifp;
 2306         mxge_tx_ring_t *tx;
 2307 
 2308         sc = ss->sc;
 2309         ifp = sc->ifp;
 2310         tx = &ss->tx;
 2311         while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
 2312                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
 2313                 if (m == NULL) {
 2314                         return;
 2315                 }
 2316                 /* let BPF see it */
 2317                 BPF_MTAP(ifp, m);
 2318 
 2319                 /* give it to the nic */
 2320                 mxge_encap(ss, m);
 2321         }
 2322         /* ran out of transmit slots */
 2323         if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
 2324                 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 2325                 tx->stall++;
 2326         }
 2327 }
 2328 #endif
 2329 static void
 2330 mxge_start(struct ifnet *ifp)
 2331 {
 2332         mxge_softc_t *sc = ifp->if_softc;
 2333         struct mxge_slice_state *ss;
 2334 
 2335         /* only use the first slice for now */
 2336         ss = &sc->ss[0];
 2337         mtx_lock(&ss->tx.mtx);
 2338         mxge_start_locked(ss);
 2339         mtx_unlock(&ss->tx.mtx);                
 2340 }
 2341 
 2342 /*
 2343  * copy an array of mcp_kreq_ether_recv_t's to the mcp.  Copy
 2344  * at most 32 bytes at a time, so as to avoid involving the software
 2345  * pio handler in the nic.   We re-write the first segment's low
 2346  * DMA address to mark it valid only after we write the entire chunk
 2347  * in a burst
 2348  */
 2349 static inline void
 2350 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
 2351                 mcp_kreq_ether_recv_t *src)
 2352 {
 2353         uint32_t low;
 2354 
 2355         low = src->addr_low;
 2356         src->addr_low = 0xffffffff;
 2357         mxge_pio_copy(dst, src, 4 * sizeof (*src));
 2358         wmb();
 2359         mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
 2360         wmb();
 2361         src->addr_low = low;
 2362         dst->addr_low = low;
 2363         wmb();
 2364 }
 2365 
 2366 static int
 2367 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
 2368 {
 2369         bus_dma_segment_t seg;
 2370         struct mbuf *m;
 2371         mxge_rx_ring_t *rx = &ss->rx_small;
 2372         int cnt, err;
 2373 
 2374         m = m_gethdr(M_NOWAIT, MT_DATA);
 2375         if (m == NULL) {
 2376                 rx->alloc_fail++;
 2377                 err = ENOBUFS;
 2378                 goto done;
 2379         }
 2380         m->m_len = MHLEN;
 2381         err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
 2382                                       &seg, &cnt, BUS_DMA_NOWAIT);
 2383         if (err != 0) {
 2384                 m_free(m);
 2385                 goto done;
 2386         }
 2387         rx->info[idx].m = m;
 2388         rx->shadow[idx].addr_low =
 2389                 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
 2390         rx->shadow[idx].addr_high =
 2391                 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
 2392 
 2393 done:
 2394         if ((idx & 7) == 7)
 2395                 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
 2396         return err;
 2397 }
 2398 
 2399 static int
 2400 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
 2401 {
 2402         bus_dma_segment_t seg[3];
 2403         struct mbuf *m;
 2404         mxge_rx_ring_t *rx = &ss->rx_big;
 2405         int cnt, err, i;
 2406 
 2407         m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
 2408         if (m == NULL) {
 2409                 rx->alloc_fail++;
 2410                 err = ENOBUFS;
 2411                 goto done;
 2412         }
 2413         m->m_len = rx->mlen;
 2414         err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
 2415                                       seg, &cnt, BUS_DMA_NOWAIT);
 2416         if (err != 0) {
 2417                 m_free(m);
 2418                 goto done;
 2419         }
 2420         rx->info[idx].m = m;
 2421         rx->shadow[idx].addr_low =
 2422                 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
 2423         rx->shadow[idx].addr_high =
 2424                 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
 2425 
 2426 #if MXGE_VIRT_JUMBOS
 2427         for (i = 1; i < cnt; i++) {
 2428                 rx->shadow[idx + i].addr_low =
 2429                         htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
 2430                 rx->shadow[idx + i].addr_high =
 2431                         htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
 2432        }
 2433 #endif
 2434 
 2435 done:
 2436        for (i = 0; i < rx->nbufs; i++) {
 2437                 if ((idx & 7) == 7) {
 2438                         mxge_submit_8rx(&rx->lanai[idx - 7],
 2439                                         &rx->shadow[idx - 7]);
 2440                 }
 2441                 idx++;
 2442         }
 2443         return err;
 2444 }
 2445 
 2446 #ifdef INET6
 2447 
 2448 static uint16_t
 2449 mxge_csum_generic(uint16_t *raw, int len)
 2450 {
 2451         uint32_t csum;
 2452 
 2453         csum = 0;
 2454         while (len > 0) {
 2455                 csum += *raw;
 2456                 raw++;
 2457                 len -= 2;
 2458         }
 2459         csum = (csum >> 16) + (csum & 0xffff);
 2460         csum = (csum >> 16) + (csum & 0xffff);
 2461         return (uint16_t)csum;
 2462 }
 2463 
 2464 static inline uint16_t
 2465 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
 2466 {
 2467         uint32_t partial;
 2468         int nxt, cksum_offset;
 2469         struct ip6_hdr *ip6 = p;
 2470         uint16_t c;
 2471 
 2472         nxt = ip6->ip6_nxt;
 2473         cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
 2474         if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
 2475                 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
 2476                                            IPPROTO_IPV6, &nxt);
 2477                 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
 2478                         return (1);
 2479         }
 2480 
 2481         /*
 2482          * IPv6 headers do not contain a checksum, and hence
 2483          * do not checksum to zero, so they don't "fall out"
 2484          * of the partial checksum calculation like IPv4
 2485          * headers do.  We need to fix the partial checksum by
 2486          * subtracting the checksum of the IPv6 header.
 2487          */
 2488 
 2489         partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
 2490                                     ETHER_HDR_LEN);
 2491         csum += ~partial;
 2492         csum +=  (csum < ~partial);
 2493         csum = (csum >> 16) + (csum & 0xFFFF);
 2494         csum = (csum >> 16) + (csum & 0xFFFF);
 2495         c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
 2496                              csum);
 2497         c ^= 0xffff;
 2498         return (c);
 2499 }
 2500 #endif /* INET6 */
 2501 /*
 2502  *  Myri10GE hardware checksums are not valid if the sender
 2503  *  padded the frame with non-zero padding.  This is because
 2504  *  the firmware just does a simple 16-bit 1s complement
 2505  *  checksum across the entire frame, excluding the first 14
 2506  *  bytes.  It is best to simply to check the checksum and
 2507  *  tell the stack about it only if the checksum is good
 2508  */
 2509 
 2510 static inline uint16_t
 2511 mxge_rx_csum(struct mbuf *m, int csum)
 2512 {
 2513         struct ether_header *eh;
 2514 #ifdef INET
 2515         struct ip *ip;
 2516 #endif
 2517 #if defined(INET) || defined(INET6)
 2518         int cap = m->m_pkthdr.rcvif->if_capenable;
 2519 #endif
 2520         uint16_t c, etype;
 2521 
 2522         eh = mtod(m, struct ether_header *);
 2523         etype = ntohs(eh->ether_type);
 2524         switch (etype) {
 2525 #ifdef INET
 2526         case ETHERTYPE_IP:
 2527                 if ((cap & IFCAP_RXCSUM) == 0)
 2528                         return (1);
 2529                 ip = (struct ip *)(eh + 1);
 2530                 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
 2531                         return (1);
 2532                 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 2533                               htonl(ntohs(csum) + ntohs(ip->ip_len) -
 2534                                     (ip->ip_hl << 2) + ip->ip_p));
 2535                 c ^= 0xffff;
 2536                 break;
 2537 #endif
 2538 #ifdef INET6
 2539         case ETHERTYPE_IPV6:
 2540                 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
 2541                         return (1);
 2542                 c = mxge_rx_csum6((eh + 1), m, csum);
 2543                 break;
 2544 #endif
 2545         default:
 2546                 c = 1;
 2547         }
 2548         return (c);
 2549 }
 2550 
 2551 static void
 2552 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
 2553 {
 2554         struct ether_vlan_header *evl;
 2555         uint32_t partial;
 2556 
 2557         evl = mtod(m, struct ether_vlan_header *);
 2558 
 2559         /*
 2560          * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
 2561          * after what the firmware thought was the end of the ethernet
 2562          * header.
 2563          */
 2564 
 2565         /* put checksum into host byte order */
 2566         *csum = ntohs(*csum);
 2567         partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
 2568         (*csum) += ~partial;
 2569         (*csum) +=  ((*csum) < ~partial);
 2570         (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
 2571         (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
 2572 
 2573         /* restore checksum to network byte order;
 2574            later consumers expect this */
 2575         *csum = htons(*csum);
 2576 
 2577         /* save the tag */
 2578 #ifdef MXGE_NEW_VLAN_API        
 2579         m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
 2580 #else
 2581         {
 2582                 struct m_tag *mtag;
 2583                 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
 2584                                    M_NOWAIT);
 2585                 if (mtag == NULL)
 2586                         return;
 2587                 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
 2588                 m_tag_prepend(m, mtag);
 2589         }
 2590 
 2591 #endif
 2592         m->m_flags |= M_VLANTAG;
 2593 
 2594         /*
 2595          * Remove the 802.1q header by copying the Ethernet
 2596          * addresses over it and adjusting the beginning of
 2597          * the data in the mbuf.  The encapsulated Ethernet
 2598          * type field is already in place.
 2599          */
 2600         bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
 2601               ETHER_HDR_LEN - ETHER_TYPE_LEN);
 2602         m_adj(m, ETHER_VLAN_ENCAP_LEN);
 2603 }
 2604 
 2605 static inline void
 2606 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
 2607                  uint32_t csum, int lro)
 2608 {
 2609         mxge_softc_t *sc;
 2610         struct ifnet *ifp;
 2611         struct mbuf *m;
 2612         struct ether_header *eh;
 2613         mxge_rx_ring_t *rx;
 2614         bus_dmamap_t old_map;
 2615         int idx;
 2616 
 2617         sc = ss->sc;
 2618         ifp = sc->ifp;
 2619         rx = &ss->rx_big;
 2620         idx = rx->cnt & rx->mask;
 2621         rx->cnt += rx->nbufs;
 2622         /* save a pointer to the received mbuf */
 2623         m = rx->info[idx].m;
 2624         /* try to replace the received mbuf */
 2625         if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
 2626                 /* drop the frame -- the old mbuf is re-cycled */
 2627                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 2628                 return;
 2629         }
 2630 
 2631         /* unmap the received buffer */
 2632         old_map = rx->info[idx].map;
 2633         bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
 2634         bus_dmamap_unload(rx->dmat, old_map);
 2635 
 2636         /* swap the bus_dmamap_t's */
 2637         rx->info[idx].map = rx->extra_map;
 2638         rx->extra_map = old_map;
 2639 
 2640         /* mcp implicitly skips 1st 2 bytes so that packet is properly
 2641          * aligned */
 2642         m->m_data += MXGEFW_PAD;
 2643 
 2644         m->m_pkthdr.rcvif = ifp;
 2645         m->m_len = m->m_pkthdr.len = len;
 2646         ss->ipackets++;
 2647         eh = mtod(m, struct ether_header *);
 2648         if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 2649                 mxge_vlan_tag_remove(m, &csum);
 2650         }
 2651         /* flowid only valid if RSS hashing is enabled */
 2652         if (sc->num_slices > 1) {
 2653                 m->m_pkthdr.flowid = (ss - sc->ss);
 2654                 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 2655         }
 2656         /* if the checksum is valid, mark it in the mbuf header */
 2657         if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
 2658             (0 == mxge_rx_csum(m, csum))) {
 2659                 /* Tell the stack that the  checksum is good */
 2660                 m->m_pkthdr.csum_data = 0xffff;
 2661                 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
 2662                         CSUM_DATA_VALID;
 2663 
 2664 #if defined(INET) || defined (INET6)
 2665                 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
 2666                         return;
 2667 #endif
 2668         }
 2669         /* pass the frame up the stack */
 2670         (*ifp->if_input)(ifp, m);
 2671 }
 2672 
 2673 static inline void
 2674 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
 2675                    uint32_t csum, int lro)
 2676 {
 2677         mxge_softc_t *sc;
 2678         struct ifnet *ifp;
 2679         struct ether_header *eh;
 2680         struct mbuf *m;
 2681         mxge_rx_ring_t *rx;
 2682         bus_dmamap_t old_map;
 2683         int idx;
 2684 
 2685         sc = ss->sc;
 2686         ifp = sc->ifp;
 2687         rx = &ss->rx_small;
 2688         idx = rx->cnt & rx->mask;
 2689         rx->cnt++;
 2690         /* save a pointer to the received mbuf */
 2691         m = rx->info[idx].m;
 2692         /* try to replace the received mbuf */
 2693         if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
 2694                 /* drop the frame -- the old mbuf is re-cycled */
 2695                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 2696                 return;
 2697         }
 2698 
 2699         /* unmap the received buffer */
 2700         old_map = rx->info[idx].map;
 2701         bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
 2702         bus_dmamap_unload(rx->dmat, old_map);
 2703 
 2704         /* swap the bus_dmamap_t's */
 2705         rx->info[idx].map = rx->extra_map;
 2706         rx->extra_map = old_map;
 2707 
 2708         /* mcp implicitly skips 1st 2 bytes so that packet is properly
 2709          * aligned */
 2710         m->m_data += MXGEFW_PAD;
 2711 
 2712         m->m_pkthdr.rcvif = ifp;
 2713         m->m_len = m->m_pkthdr.len = len;
 2714         ss->ipackets++;
 2715         eh = mtod(m, struct ether_header *);
 2716         if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 2717                 mxge_vlan_tag_remove(m, &csum);
 2718         }
 2719         /* flowid only valid if RSS hashing is enabled */
 2720         if (sc->num_slices > 1) {
 2721                 m->m_pkthdr.flowid = (ss - sc->ss);
 2722                 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 2723         }
 2724         /* if the checksum is valid, mark it in the mbuf header */
 2725         if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
 2726             (0 == mxge_rx_csum(m, csum))) {
 2727                 /* Tell the stack that the  checksum is good */
 2728                 m->m_pkthdr.csum_data = 0xffff;
 2729                 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
 2730                         CSUM_DATA_VALID;
 2731 
 2732 #if defined(INET) || defined (INET6)
 2733                 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
 2734                         return;
 2735 #endif
 2736         }
 2737         /* pass the frame up the stack */
 2738         (*ifp->if_input)(ifp, m);
 2739 }
 2740 
 2741 static inline void
 2742 mxge_clean_rx_done(struct mxge_slice_state *ss)
 2743 {
 2744         mxge_rx_done_t *rx_done = &ss->rx_done;
 2745         int limit = 0;
 2746         uint16_t length;
 2747         uint16_t checksum;
 2748         int lro;
 2749 
 2750         lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
 2751         while (rx_done->entry[rx_done->idx].length != 0) {
 2752                 length = ntohs(rx_done->entry[rx_done->idx].length);
 2753                 rx_done->entry[rx_done->idx].length = 0;
 2754                 checksum = rx_done->entry[rx_done->idx].checksum;
 2755                 if (length <= (MHLEN - MXGEFW_PAD))
 2756                         mxge_rx_done_small(ss, length, checksum, lro);
 2757                 else
 2758                         mxge_rx_done_big(ss, length, checksum, lro);
 2759                 rx_done->cnt++;
 2760                 rx_done->idx = rx_done->cnt & rx_done->mask;
 2761 
 2762                 /* limit potential for livelock */
 2763                 if (__predict_false(++limit > rx_done->mask / 2))
 2764                         break;
 2765         }
 2766 #if defined(INET)  || defined (INET6)
 2767         tcp_lro_flush_all(&ss->lc);
 2768 #endif
 2769 }
 2770 
 2771 static inline void
 2772 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
 2773 {
 2774         struct ifnet *ifp __unused;
 2775         mxge_tx_ring_t *tx;
 2776         struct mbuf *m;
 2777         bus_dmamap_t map;
 2778         int idx;
 2779         int *flags;
 2780 
 2781         tx = &ss->tx;
 2782         ifp = ss->sc->ifp;
 2783         while (tx->pkt_done != mcp_idx) {
 2784                 idx = tx->done & tx->mask;
 2785                 tx->done++;
 2786                 m = tx->info[idx].m;
 2787                 /* mbuf and DMA map only attached to the first
 2788                    segment per-mbuf */
 2789                 if (m != NULL) {
 2790                         ss->obytes += m->m_pkthdr.len;
 2791                         if (m->m_flags & M_MCAST)
 2792                                 ss->omcasts++;
 2793                         ss->opackets++;
 2794                         tx->info[idx].m = NULL;
 2795                         map = tx->info[idx].map;
 2796                         bus_dmamap_unload(tx->dmat, map);
 2797                         m_freem(m);
 2798                 }
 2799                 if (tx->info[idx].flag) {
 2800                         tx->info[idx].flag = 0;
 2801                         tx->pkt_done++;
 2802                 }
 2803         }
 2804 
 2805         /* If we have space, clear IFF_OACTIVE to tell the stack that
 2806            its OK to send packets */
 2807 #ifdef IFNET_BUF_RING
 2808         flags = &ss->if_drv_flags;
 2809 #else
 2810         flags = &ifp->if_drv_flags;
 2811 #endif
 2812         mtx_lock(&ss->tx.mtx);
 2813         if ((*flags) & IFF_DRV_OACTIVE &&
 2814             tx->req - tx->done < (tx->mask + 1)/4) {
 2815                 *(flags) &= ~IFF_DRV_OACTIVE;
 2816                 ss->tx.wake++;
 2817                 mxge_start_locked(ss);
 2818         }
 2819 #ifdef IFNET_BUF_RING
 2820         if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
 2821                 /* let the NIC stop polling this queue, since there
 2822                  * are no more transmits pending */
 2823                 if (tx->req == tx->done) {
 2824                         *tx->send_stop = 1;
 2825                         tx->queue_active = 0;
 2826                         tx->deactivate++;
 2827                         wmb();
 2828                 }
 2829         }
 2830 #endif
 2831         mtx_unlock(&ss->tx.mtx);
 2832 
 2833 }
 2834 
 2835 static struct mxge_media_type mxge_xfp_media_types[] =
 2836 {
 2837         {IFM_10G_CX4,   0x7f,           "10GBASE-CX4 (module)"},
 2838         {IFM_10G_SR,    (1 << 7),       "10GBASE-SR"},
 2839         {IFM_10G_LR,    (1 << 6),       "10GBASE-LR"},
 2840         {0,             (1 << 5),       "10GBASE-ER"},
 2841         {IFM_10G_LRM,   (1 << 4),       "10GBASE-LRM"},
 2842         {0,             (1 << 3),       "10GBASE-SW"},
 2843         {0,             (1 << 2),       "10GBASE-LW"},
 2844         {0,             (1 << 1),       "10GBASE-EW"},
 2845         {0,             (1 << 0),       "Reserved"}
 2846 };
 2847 static struct mxge_media_type mxge_sfp_media_types[] =
 2848 {
 2849         {IFM_10G_TWINAX,      0,        "10GBASE-Twinax"},
 2850         {0,             (1 << 7),       "Reserved"},
 2851         {IFM_10G_LRM,   (1 << 6),       "10GBASE-LRM"},
 2852         {IFM_10G_LR,    (1 << 5),       "10GBASE-LR"},
 2853         {IFM_10G_SR,    (1 << 4),       "10GBASE-SR"},
 2854         {IFM_10G_TWINAX,(1 << 0),       "10GBASE-Twinax"}
 2855 };
 2856 
 2857 static void
 2858 mxge_media_set(mxge_softc_t *sc, int media_type)
 2859 {
 2860 
 2861         ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
 2862                     0, NULL);
 2863         ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
 2864         sc->current_media = media_type;
 2865         sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
 2866 }
 2867 
 2868 static void
 2869 mxge_media_init(mxge_softc_t *sc)
 2870 {
 2871         char *ptr;
 2872         int i;
 2873 
 2874         ifmedia_removeall(&sc->media);
 2875         mxge_media_set(sc, IFM_AUTO);
 2876 
 2877         /*
 2878          * parse the product code to deterimine the interface type
 2879          * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
 2880          * after the 3rd dash in the driver's cached copy of the
 2881          * EEPROM's product code string.
 2882          */
 2883         ptr = sc->product_code_string;
 2884         if (ptr == NULL) {
 2885                 device_printf(sc->dev, "Missing product code\n");
 2886                 return;
 2887         }
 2888 
 2889         for (i = 0; i < 3; i++, ptr++) {
 2890                 ptr = strchr(ptr, '-');
 2891                 if (ptr == NULL) {
 2892                         device_printf(sc->dev,
 2893                                       "only %d dashes in PC?!?\n", i);
 2894                         return;
 2895                 }
 2896         }
 2897         if (*ptr == 'C' || *(ptr +1) == 'C') {
 2898                 /* -C is CX4 */
 2899                 sc->connector = MXGE_CX4;
 2900                 mxge_media_set(sc, IFM_10G_CX4);
 2901         } else if (*ptr == 'Q') {
 2902                 /* -Q is Quad Ribbon Fiber */
 2903                 sc->connector = MXGE_QRF;
 2904                 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
 2905                 /* FreeBSD has no media type for Quad ribbon fiber */
 2906         } else if (*ptr == 'R') {
 2907                 /* -R is XFP */
 2908                 sc->connector = MXGE_XFP;
 2909         } else if (*ptr == 'S' || *(ptr +1) == 'S') {
 2910                 /* -S or -2S is SFP+ */
 2911                 sc->connector = MXGE_SFP;
 2912         } else {
 2913                 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
 2914         }
 2915 }
 2916 
 2917 /*
 2918  * Determine the media type for a NIC.  Some XFPs will identify
 2919  * themselves only when their link is up, so this is initiated via a
 2920  * link up interrupt.  However, this can potentially take up to
 2921  * several milliseconds, so it is run via the watchdog routine, rather
 2922  * than in the interrupt handler itself.
 2923  */
 2924 static void
 2925 mxge_media_probe(mxge_softc_t *sc)
 2926 {
 2927         mxge_cmd_t cmd;
 2928         char *cage_type;
 2929 
 2930         struct mxge_media_type *mxge_media_types = NULL;
 2931         int i, err, ms, mxge_media_type_entries;
 2932         uint32_t byte;
 2933 
 2934         sc->need_media_probe = 0;
 2935 
 2936         if (sc->connector == MXGE_XFP) {
 2937                 /* -R is XFP */
 2938                 mxge_media_types = mxge_xfp_media_types;
 2939                 mxge_media_type_entries =
 2940                         nitems(mxge_xfp_media_types);
 2941                 byte = MXGE_XFP_COMPLIANCE_BYTE;
 2942                 cage_type = "XFP";
 2943         } else  if (sc->connector == MXGE_SFP) {
 2944                 /* -S or -2S is SFP+ */
 2945                 mxge_media_types = mxge_sfp_media_types;
 2946                 mxge_media_type_entries =
 2947                         nitems(mxge_sfp_media_types);
 2948                 cage_type = "SFP+";
 2949                 byte = 3;
 2950         } else {
 2951                 /* nothing to do; media type cannot change */
 2952                 return;
 2953         }
 2954 
 2955         /*
 2956          * At this point we know the NIC has an XFP cage, so now we
 2957          * try to determine what is in the cage by using the
 2958          * firmware's XFP I2C commands to read the XFP 10GbE compilance
 2959          * register.  We read just one byte, which may take over
 2960          * a millisecond
 2961          */
 2962 
 2963         cmd.data0 = 0;   /* just fetch 1 byte, not all 256 */
 2964         cmd.data1 = byte;
 2965         err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
 2966         if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
 2967                 device_printf(sc->dev, "failed to read XFP\n");
 2968         }
 2969         if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
 2970                 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
 2971         }
 2972         if (err != MXGEFW_CMD_OK) {
 2973                 return;
 2974         }
 2975 
 2976         /* now we wait for the data to be cached */
 2977         cmd.data0 = byte;
 2978         err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
 2979         for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
 2980                 DELAY(1000);
 2981                 cmd.data0 = byte;
 2982                 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
 2983         }
 2984         if (err != MXGEFW_CMD_OK) {
 2985                 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
 2986                               cage_type, err, ms);
 2987                 return;
 2988         }
 2989                 
 2990         if (cmd.data0 == mxge_media_types[0].bitmask) {
 2991                 if (mxge_verbose)
 2992                         device_printf(sc->dev, "%s:%s\n", cage_type,
 2993                                       mxge_media_types[0].name);
 2994                 if (sc->current_media != mxge_media_types[0].flag) {
 2995                         mxge_media_init(sc);
 2996                         mxge_media_set(sc, mxge_media_types[0].flag);
 2997                 }
 2998                 return;
 2999         }
 3000         for (i = 1; i < mxge_media_type_entries; i++) {
 3001                 if (cmd.data0 & mxge_media_types[i].bitmask) {
 3002                         if (mxge_verbose)
 3003                                 device_printf(sc->dev, "%s:%s\n",
 3004                                               cage_type,
 3005                                               mxge_media_types[i].name);
 3006 
 3007                         if (sc->current_media != mxge_media_types[i].flag) {
 3008                                 mxge_media_init(sc);
 3009                                 mxge_media_set(sc, mxge_media_types[i].flag);
 3010                         }
 3011                         return;
 3012                 }
 3013         }
 3014         if (mxge_verbose)
 3015                 device_printf(sc->dev, "%s media 0x%x unknown\n",
 3016                               cage_type, cmd.data0);
 3017 
 3018         return;
 3019 }
 3020 
 3021 static void
 3022 mxge_intr(void *arg)
 3023 {
 3024         struct mxge_slice_state *ss = arg;
 3025         mxge_softc_t *sc = ss->sc;
 3026         mcp_irq_data_t *stats = ss->fw_stats;
 3027         mxge_tx_ring_t *tx = &ss->tx;
 3028         mxge_rx_done_t *rx_done = &ss->rx_done;
 3029         uint32_t send_done_count;
 3030         uint8_t valid;
 3031 
 3032 #ifndef IFNET_BUF_RING
 3033         /* an interrupt on a non-zero slice is implicitly valid
 3034            since MSI-X irqs are not shared */
 3035         if (ss != sc->ss) {
 3036                 mxge_clean_rx_done(ss);
 3037                 *ss->irq_claim = be32toh(3);
 3038                 return;
 3039         }
 3040 #endif
 3041 
 3042         /* make sure the DMA has finished */
 3043         if (!stats->valid) {
 3044                 return;
 3045         }
 3046         valid = stats->valid;
 3047 
 3048         if (sc->legacy_irq) {
 3049                 /* lower legacy IRQ  */
 3050                 *sc->irq_deassert = 0;
 3051                 if (!mxge_deassert_wait)
 3052                         /* don't wait for conf. that irq is low */
 3053                         stats->valid = 0;
 3054         } else {
 3055                 stats->valid = 0;
 3056         }
 3057 
 3058         /* loop while waiting for legacy irq deassertion */
 3059         do {
 3060                 /* check for transmit completes and receives */
 3061                 send_done_count = be32toh(stats->send_done_count);
 3062                 while ((send_done_count != tx->pkt_done) ||
 3063                        (rx_done->entry[rx_done->idx].length != 0)) {
 3064                         if (send_done_count != tx->pkt_done)
 3065                                 mxge_tx_done(ss, (int)send_done_count);
 3066                         mxge_clean_rx_done(ss);
 3067                         send_done_count = be32toh(stats->send_done_count);
 3068                 }
 3069                 if (sc->legacy_irq && mxge_deassert_wait)
 3070                         wmb();
 3071         } while (*((volatile uint8_t *) &stats->valid));
 3072 
 3073         /* fw link & error stats meaningful only on the first slice */
 3074         if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
 3075                 if (sc->link_state != stats->link_up) {
 3076                         sc->link_state = stats->link_up;
 3077                         if (sc->link_state) {
 3078                                 if_link_state_change(sc->ifp, LINK_STATE_UP);
 3079                                 if (mxge_verbose)
 3080                                         device_printf(sc->dev, "link up\n");
 3081                         } else {
 3082                                 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
 3083                                 if (mxge_verbose)
 3084                                         device_printf(sc->dev, "link down\n");
 3085                         }
 3086                         sc->need_media_probe = 1;
 3087                 }
 3088                 if (sc->rdma_tags_available !=
 3089                     be32toh(stats->rdma_tags_available)) {
 3090                         sc->rdma_tags_available =
 3091                                 be32toh(stats->rdma_tags_available);
 3092                         device_printf(sc->dev, "RDMA timed out! %d tags "
 3093                                       "left\n", sc->rdma_tags_available);
 3094                 }
 3095 
 3096                 if (stats->link_down) {
 3097                         sc->down_cnt += stats->link_down;
 3098                         sc->link_state = 0;
 3099                         if_link_state_change(sc->ifp, LINK_STATE_DOWN);
 3100                 }
 3101         }
 3102 
 3103         /* check to see if we have rx token to pass back */
 3104         if (valid & 0x1)
 3105             *ss->irq_claim = be32toh(3);
 3106         *(ss->irq_claim + 1) = be32toh(3);
 3107 }
 3108 
 3109 static void
 3110 mxge_init(void *arg)
 3111 {
 3112         mxge_softc_t *sc = arg;
 3113         struct ifnet *ifp = sc->ifp;
 3114 
 3115         mtx_lock(&sc->driver_mtx);
 3116         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 3117                 (void) mxge_open(sc);
 3118         mtx_unlock(&sc->driver_mtx);
 3119 }
 3120 
 3121 static void
 3122 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
 3123 {
 3124         int i;
 3125 
 3126 #if defined(INET) || defined(INET6)
 3127         tcp_lro_free(&ss->lc);
 3128 #endif
 3129         for (i = 0; i <= ss->rx_big.mask; i++) {
 3130                 if (ss->rx_big.info[i].m == NULL)
 3131                         continue;
 3132                 bus_dmamap_unload(ss->rx_big.dmat,
 3133                                   ss->rx_big.info[i].map);
 3134                 m_freem(ss->rx_big.info[i].m);
 3135                 ss->rx_big.info[i].m = NULL;
 3136         }
 3137 
 3138         for (i = 0; i <= ss->rx_small.mask; i++) {
 3139                 if (ss->rx_small.info[i].m == NULL)
 3140                         continue;
 3141                 bus_dmamap_unload(ss->rx_small.dmat,
 3142                                   ss->rx_small.info[i].map);
 3143                 m_freem(ss->rx_small.info[i].m);
 3144                 ss->rx_small.info[i].m = NULL;
 3145         }
 3146 
 3147         /* transmit ring used only on the first slice */
 3148         if (ss->tx.info == NULL)
 3149                 return;
 3150 
 3151         for (i = 0; i <= ss->tx.mask; i++) {
 3152                 ss->tx.info[i].flag = 0;
 3153                 if (ss->tx.info[i].m == NULL)
 3154                         continue;
 3155                 bus_dmamap_unload(ss->tx.dmat,
 3156                                   ss->tx.info[i].map);
 3157                 m_freem(ss->tx.info[i].m);
 3158                 ss->tx.info[i].m = NULL;
 3159         }
 3160 }
 3161 
 3162 static void
 3163 mxge_free_mbufs(mxge_softc_t *sc)
 3164 {
 3165         int slice;
 3166 
 3167         for (slice = 0; slice < sc->num_slices; slice++)
 3168                 mxge_free_slice_mbufs(&sc->ss[slice]);
 3169 }
 3170 
 3171 static void
 3172 mxge_free_slice_rings(struct mxge_slice_state *ss)
 3173 {
 3174         int i;
 3175 
 3176         if (ss->rx_done.entry != NULL)
 3177                 mxge_dma_free(&ss->rx_done.dma);
 3178         ss->rx_done.entry = NULL;
 3179 
 3180         if (ss->tx.req_bytes != NULL)
 3181                 free(ss->tx.req_bytes, M_DEVBUF);
 3182         ss->tx.req_bytes = NULL;
 3183 
 3184         if (ss->tx.seg_list != NULL)
 3185                 free(ss->tx.seg_list, M_DEVBUF);
 3186         ss->tx.seg_list = NULL;
 3187 
 3188         if (ss->rx_small.shadow != NULL)
 3189                 free(ss->rx_small.shadow, M_DEVBUF);
 3190         ss->rx_small.shadow = NULL;
 3191 
 3192         if (ss->rx_big.shadow != NULL)
 3193                 free(ss->rx_big.shadow, M_DEVBUF);
 3194         ss->rx_big.shadow = NULL;
 3195 
 3196         if (ss->tx.info != NULL) {
 3197                 if (ss->tx.dmat != NULL) {
 3198                         for (i = 0; i <= ss->tx.mask; i++) {
 3199                                 bus_dmamap_destroy(ss->tx.dmat,
 3200                                                    ss->tx.info[i].map);
 3201                         }
 3202                         bus_dma_tag_destroy(ss->tx.dmat);
 3203                 }
 3204                 free(ss->tx.info, M_DEVBUF);
 3205         }
 3206         ss->tx.info = NULL;
 3207 
 3208         if (ss->rx_small.info != NULL) {
 3209                 if (ss->rx_small.dmat != NULL) {
 3210                         for (i = 0; i <= ss->rx_small.mask; i++) {
 3211                                 bus_dmamap_destroy(ss->rx_small.dmat,
 3212                                                    ss->rx_small.info[i].map);
 3213                         }
 3214                         bus_dmamap_destroy(ss->rx_small.dmat,
 3215                                            ss->rx_small.extra_map);
 3216                         bus_dma_tag_destroy(ss->rx_small.dmat);
 3217                 }
 3218                 free(ss->rx_small.info, M_DEVBUF);
 3219         }
 3220         ss->rx_small.info = NULL;
 3221 
 3222         if (ss->rx_big.info != NULL) {
 3223                 if (ss->rx_big.dmat != NULL) {
 3224                         for (i = 0; i <= ss->rx_big.mask; i++) {
 3225                                 bus_dmamap_destroy(ss->rx_big.dmat,
 3226                                                    ss->rx_big.info[i].map);
 3227                         }
 3228                         bus_dmamap_destroy(ss->rx_big.dmat,
 3229                                            ss->rx_big.extra_map);
 3230                         bus_dma_tag_destroy(ss->rx_big.dmat);
 3231                 }
 3232                 free(ss->rx_big.info, M_DEVBUF);
 3233         }
 3234         ss->rx_big.info = NULL;
 3235 }
 3236 
 3237 static void
 3238 mxge_free_rings(mxge_softc_t *sc)
 3239 {
 3240         int slice;
 3241 
 3242         for (slice = 0; slice < sc->num_slices; slice++)
 3243                 mxge_free_slice_rings(&sc->ss[slice]);
 3244 }
 3245 
 3246 static int
 3247 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
 3248                        int tx_ring_entries)
 3249 {
 3250         mxge_softc_t *sc = ss->sc;
 3251         size_t bytes;
 3252         int err, i;
 3253 
 3254         /* allocate per-slice receive resources */
 3255 
 3256         ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
 3257         ss->rx_done.mask = (2 * rx_ring_entries) - 1;
 3258 
 3259         /* allocate the rx shadow rings */
 3260         bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
 3261         ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
 3262 
 3263         bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
 3264         ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
 3265 
 3266         /* allocate the rx host info rings */
 3267         bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
 3268         ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
 3269 
 3270         bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
 3271         ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
 3272 
 3273         /* allocate the rx busdma resources */
 3274         err = bus_dma_tag_create(sc->parent_dmat,       /* parent */
 3275                                  1,                     /* alignment */
 3276                                  4096,                  /* boundary */
 3277                                  BUS_SPACE_MAXADDR,     /* low */
 3278                                  BUS_SPACE_MAXADDR,     /* high */
 3279                                  NULL, NULL,            /* filter */
 3280                                  MHLEN,                 /* maxsize */
 3281                                  1,                     /* num segs */
 3282                                  MHLEN,                 /* maxsegsize */
 3283                                  BUS_DMA_ALLOCNOW,      /* flags */
 3284                                  NULL, NULL,            /* lock */
 3285                                  &ss->rx_small.dmat);   /* tag */
 3286         if (err != 0) {
 3287                 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
 3288                               err);
 3289                 return err;
 3290         }
 3291 
 3292         err = bus_dma_tag_create(sc->parent_dmat,       /* parent */
 3293                                  1,                     /* alignment */
 3294 #if MXGE_VIRT_JUMBOS
 3295                                  4096,                  /* boundary */
 3296 #else
 3297                                  0,                     /* boundary */
 3298 #endif
 3299                                  BUS_SPACE_MAXADDR,     /* low */
 3300                                  BUS_SPACE_MAXADDR,     /* high */
 3301                                  NULL, NULL,            /* filter */
 3302                                  3*4096,                /* maxsize */
 3303 #if MXGE_VIRT_JUMBOS
 3304                                  3,                     /* num segs */
 3305                                  4096,                  /* maxsegsize*/
 3306 #else
 3307                                  1,                     /* num segs */
 3308                                  MJUM9BYTES,            /* maxsegsize*/
 3309 #endif
 3310                                  BUS_DMA_ALLOCNOW,      /* flags */
 3311                                  NULL, NULL,            /* lock */
 3312                                  &ss->rx_big.dmat);     /* tag */
 3313         if (err != 0) {
 3314                 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
 3315                               err);
 3316                 return err;
 3317         }
 3318         for (i = 0; i <= ss->rx_small.mask; i++) {
 3319                 err = bus_dmamap_create(ss->rx_small.dmat, 0,
 3320                                         &ss->rx_small.info[i].map);
 3321                 if (err != 0) {
 3322                         device_printf(sc->dev, "Err %d  rx_small dmamap\n",
 3323                                       err);
 3324                         return err;
 3325                 }
 3326         }
 3327         err = bus_dmamap_create(ss->rx_small.dmat, 0,
 3328                                 &ss->rx_small.extra_map);
 3329         if (err != 0) {
 3330                 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
 3331                               err);
 3332                 return err;
 3333         }
 3334 
 3335         for (i = 0; i <= ss->rx_big.mask; i++) {
 3336                 err = bus_dmamap_create(ss->rx_big.dmat, 0,
 3337                                         &ss->rx_big.info[i].map);
 3338                 if (err != 0) {
 3339                         device_printf(sc->dev, "Err %d  rx_big dmamap\n",
 3340                                       err);
 3341                         return err;
 3342                 }
 3343         }
 3344         err = bus_dmamap_create(ss->rx_big.dmat, 0,
 3345                                 &ss->rx_big.extra_map);
 3346         if (err != 0) {
 3347                 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
 3348                               err);
 3349                 return err;
 3350         }
 3351 
 3352         /* now allocate TX resources */
 3353 
 3354 #ifndef IFNET_BUF_RING
 3355         /* only use a single TX ring for now */
 3356         if (ss != ss->sc->ss)
 3357                 return 0;
 3358 #endif
 3359 
 3360         ss->tx.mask = tx_ring_entries - 1;
 3361         ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
 3362 
 3363         /* allocate the tx request copy block */
 3364         bytes = 8 +
 3365                 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
 3366         ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
 3367         /* ensure req_list entries are aligned to 8 bytes */
 3368         ss->tx.req_list = (mcp_kreq_ether_send_t *)
 3369                 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL);
 3370 
 3371         /* allocate the tx busdma segment list */
 3372         bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
 3373         ss->tx.seg_list = (bus_dma_segment_t *)
 3374                 malloc(bytes, M_DEVBUF, M_WAITOK);
 3375 
 3376         /* allocate the tx host info ring */
 3377         bytes = tx_ring_entries * sizeof (*ss->tx.info);
 3378         ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
 3379 
 3380         /* allocate the tx busdma resources */
 3381         err = bus_dma_tag_create(sc->parent_dmat,       /* parent */
 3382                                  1,                     /* alignment */
 3383                                  sc->tx_boundary,       /* boundary */
 3384                                  BUS_SPACE_MAXADDR,     /* low */
 3385                                  BUS_SPACE_MAXADDR,     /* high */
 3386                                  NULL, NULL,            /* filter */
 3387                                  65536 + 256,           /* maxsize */
 3388                                  ss->tx.max_desc - 2,   /* num segs */
 3389                                  sc->tx_boundary,       /* maxsegsz */
 3390                                  BUS_DMA_ALLOCNOW,      /* flags */
 3391                                  NULL, NULL,            /* lock */
 3392                                  &ss->tx.dmat);         /* tag */
 3393 
 3394         if (err != 0) {
 3395                 device_printf(sc->dev, "Err %d allocating tx dmat\n",
 3396                               err);
 3397                 return err;
 3398         }
 3399 
 3400         /* now use these tags to setup dmamaps for each slot
 3401            in the ring */
 3402         for (i = 0; i <= ss->tx.mask; i++) {
 3403                 err = bus_dmamap_create(ss->tx.dmat, 0,
 3404                                         &ss->tx.info[i].map);
 3405                 if (err != 0) {
 3406                         device_printf(sc->dev, "Err %d  tx dmamap\n",
 3407                                       err);
 3408                         return err;
 3409                 }
 3410         }
 3411         return 0;
 3412 
 3413 }
 3414 
 3415 static int
 3416 mxge_alloc_rings(mxge_softc_t *sc)
 3417 {
 3418         mxge_cmd_t cmd;
 3419         int tx_ring_size;
 3420         int tx_ring_entries, rx_ring_entries;
 3421         int err, slice;
 3422 
 3423         /* get ring sizes */
 3424         err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
 3425         tx_ring_size = cmd.data0;
 3426         if (err != 0) {
 3427                 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
 3428                 goto abort;
 3429         }
 3430 
 3431         tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
 3432         rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
 3433         IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
 3434         sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
 3435         IFQ_SET_READY(&sc->ifp->if_snd);
 3436 
 3437         for (slice = 0; slice < sc->num_slices; slice++) {
 3438                 err = mxge_alloc_slice_rings(&sc->ss[slice],
 3439                                              rx_ring_entries,
 3440                                              tx_ring_entries);
 3441                 if (err != 0)
 3442                         goto abort;
 3443         }
 3444         return 0;
 3445 
 3446 abort:
 3447         mxge_free_rings(sc);
 3448         return err;
 3449 
 3450 }
 3451 
 3452 static void
 3453 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
 3454 {
 3455         int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
 3456 
 3457         if (bufsize < MCLBYTES) {
 3458                 /* easy, everything fits in a single buffer */
 3459                 *big_buf_size = MCLBYTES;
 3460                 *cl_size = MCLBYTES;
 3461                 *nbufs = 1;
 3462                 return;
 3463         }
 3464 
 3465         if (bufsize < MJUMPAGESIZE) {
 3466                 /* still easy, everything still fits in a single buffer */
 3467                 *big_buf_size = MJUMPAGESIZE;
 3468                 *cl_size = MJUMPAGESIZE;
 3469                 *nbufs = 1;
 3470                 return;
 3471         }
 3472 #if MXGE_VIRT_JUMBOS
 3473         /* now we need to use virtually contiguous buffers */
 3474         *cl_size = MJUM9BYTES;
 3475         *big_buf_size = 4096;
 3476         *nbufs = mtu / 4096 + 1;
 3477         /* needs to be a power of two, so round up */
 3478         if (*nbufs == 3)
 3479                 *nbufs = 4;
 3480 #else
 3481         *cl_size = MJUM9BYTES;
 3482         *big_buf_size = MJUM9BYTES;
 3483         *nbufs = 1;
 3484 #endif
 3485 }
 3486 
 3487 static int
 3488 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
 3489 {
 3490         mxge_softc_t *sc;
 3491         mxge_cmd_t cmd;
 3492         bus_dmamap_t map;
 3493         int err, i, slice;
 3494 
 3495         sc = ss->sc;
 3496         slice = ss - sc->ss;
 3497 
 3498 #if defined(INET) || defined(INET6)
 3499         (void)tcp_lro_init(&ss->lc);
 3500 #endif
 3501         ss->lc.ifp = sc->ifp;
 3502 
 3503         /* get the lanai pointers to the send and receive rings */
 3504 
 3505         err = 0;
 3506 #ifndef IFNET_BUF_RING
 3507         /* We currently only send from the first slice */
 3508         if (slice == 0) {
 3509 #endif
 3510                 cmd.data0 = slice;
 3511                 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
 3512                 ss->tx.lanai =
 3513                         (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
 3514                 ss->tx.send_go = (volatile uint32_t *)
 3515                         (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
 3516                 ss->tx.send_stop = (volatile uint32_t *)
 3517                 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
 3518 #ifndef IFNET_BUF_RING
 3519         }
 3520 #endif
 3521         cmd.data0 = slice;
 3522         err |= mxge_send_cmd(sc,
 3523                              MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
 3524         ss->rx_small.lanai =
 3525                 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
 3526         cmd.data0 = slice;
 3527         err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
 3528         ss->rx_big.lanai =
 3529                 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
 3530 
 3531         if (err != 0) {
 3532                 device_printf(sc->dev,
 3533                               "failed to get ring sizes or locations\n");
 3534                 return EIO;
 3535         }
 3536 
 3537         /* stock receive rings */
 3538         for (i = 0; i <= ss->rx_small.mask; i++) {
 3539                 map = ss->rx_small.info[i].map;
 3540                 err = mxge_get_buf_small(ss, map, i);
 3541                 if (err) {
 3542                         device_printf(sc->dev, "alloced %d/%d smalls\n",
 3543                                       i, ss->rx_small.mask + 1);
 3544                         return ENOMEM;
 3545                 }
 3546         }
 3547         for (i = 0; i <= ss->rx_big.mask; i++) {
 3548                 ss->rx_big.shadow[i].addr_low = 0xffffffff;
 3549                 ss->rx_big.shadow[i].addr_high = 0xffffffff;
 3550         }
 3551         ss->rx_big.nbufs = nbufs;
 3552         ss->rx_big.cl_size = cl_size;
 3553         ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
 3554                 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
 3555         for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
 3556                 map = ss->rx_big.info[i].map;
 3557                 err = mxge_get_buf_big(ss, map, i);
 3558                 if (err) {
 3559                         device_printf(sc->dev, "alloced %d/%d bigs\n",
 3560                                       i, ss->rx_big.mask + 1);
 3561                         return ENOMEM;
 3562                 }
 3563         }
 3564         return 0;
 3565 }
 3566 
 3567 static int
 3568 mxge_open(mxge_softc_t *sc)
 3569 {
 3570         mxge_cmd_t cmd;
 3571         int err, big_bytes, nbufs, slice, cl_size, i;
 3572         bus_addr_t bus;
 3573         volatile uint8_t *itable;
 3574         struct mxge_slice_state *ss;
 3575 
 3576         /* Copy the MAC address in case it was overridden */
 3577         bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
 3578 
 3579         err = mxge_reset(sc, 1);
 3580         if (err != 0) {
 3581                 device_printf(sc->dev, "failed to reset\n");
 3582                 return EIO;
 3583         }
 3584 
 3585         if (sc->num_slices > 1) {
 3586                 /* setup the indirection table */
 3587                 cmd.data0 = sc->num_slices;
 3588                 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
 3589                                     &cmd);
 3590 
 3591                 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
 3592                                      &cmd);
 3593                 if (err != 0) {
 3594                         device_printf(sc->dev,
 3595                                       "failed to setup rss tables\n");
 3596                         return err;
 3597                 }
 3598 
 3599                 /* just enable an identity mapping */
 3600                 itable = sc->sram + cmd.data0;
 3601                 for (i = 0; i < sc->num_slices; i++)
 3602                         itable[i] = (uint8_t)i;
 3603 
 3604                 cmd.data0 = 1;
 3605                 cmd.data1 = mxge_rss_hash_type;
 3606                 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
 3607                 if (err != 0) {
 3608                         device_printf(sc->dev, "failed to enable slices\n");
 3609                         return err;
 3610                 }
 3611         }
 3612 
 3613         mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
 3614 
 3615         cmd.data0 = nbufs;
 3616         err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
 3617                             &cmd);
 3618         /* error is only meaningful if we're trying to set
 3619            MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
 3620         if (err && nbufs > 1) {
 3621                 device_printf(sc->dev,
 3622                               "Failed to set alway-use-n to %d\n",
 3623                               nbufs);
 3624                 return EIO;
 3625         }
 3626         /* Give the firmware the mtu and the big and small buffer
 3627            sizes.  The firmware wants the big buf size to be a power
 3628            of two. Luckily, FreeBSD's clusters are powers of two */
 3629         cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 3630         err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
 3631         cmd.data0 = MHLEN - MXGEFW_PAD;
 3632         err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
 3633                              &cmd);
 3634         cmd.data0 = big_bytes;
 3635         err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
 3636 
 3637         if (err != 0) {
 3638                 device_printf(sc->dev, "failed to setup params\n");
 3639                 goto abort;
 3640         }
 3641 
 3642         /* Now give him the pointer to the stats block */
 3643         for (slice = 0;
 3644 #ifdef IFNET_BUF_RING
 3645              slice < sc->num_slices;
 3646 #else
 3647              slice < 1;
 3648 #endif
 3649              slice++) {
 3650                 ss = &sc->ss[slice];
 3651                 cmd.data0 =
 3652                         MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
 3653                 cmd.data1 =
 3654                         MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
 3655                 cmd.data2 = sizeof(struct mcp_irq_data);
 3656                 cmd.data2 |= (slice << 16);
 3657                 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
 3658         }
 3659 
 3660         if (err != 0) {
 3661                 bus = sc->ss->fw_stats_dma.bus_addr;
 3662                 bus += offsetof(struct mcp_irq_data, send_done_count);
 3663                 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
 3664                 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
 3665                 err = mxge_send_cmd(sc,
 3666                                     MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
 3667                                     &cmd);
 3668                 /* Firmware cannot support multicast without STATS_DMA_V2 */
 3669                 sc->fw_multicast_support = 0;
 3670         } else {
 3671                 sc->fw_multicast_support = 1;
 3672         }
 3673 
 3674         if (err != 0) {
 3675                 device_printf(sc->dev, "failed to setup params\n");
 3676                 goto abort;
 3677         }
 3678 
 3679         for (slice = 0; slice < sc->num_slices; slice++) {
 3680                 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
 3681                 if (err != 0) {
 3682                         device_printf(sc->dev, "couldn't open slice %d\n",
 3683                                       slice);
 3684                         goto abort;
 3685                 }
 3686         }
 3687 
 3688         /* Finally, start the firmware running */
 3689         err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
 3690         if (err) {
 3691                 device_printf(sc->dev, "Couldn't bring up link\n");
 3692                 goto abort;
 3693         }
 3694 #ifdef IFNET_BUF_RING
 3695         for (slice = 0; slice < sc->num_slices; slice++) {
 3696                 ss = &sc->ss[slice];
 3697                 ss->if_drv_flags |= IFF_DRV_RUNNING;
 3698                 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
 3699         }
 3700 #endif
 3701         sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 3702         sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 3703 
 3704         return 0;
 3705 
 3706 abort:
 3707         mxge_free_mbufs(sc);
 3708 
 3709         return err;
 3710 }
 3711 
 3712 static int
 3713 mxge_close(mxge_softc_t *sc, int down)
 3714 {
 3715         mxge_cmd_t cmd;
 3716         int err, old_down_cnt;
 3717 #ifdef IFNET_BUF_RING
 3718         struct mxge_slice_state *ss;    
 3719         int slice;
 3720 #endif
 3721 
 3722 #ifdef IFNET_BUF_RING
 3723         for (slice = 0; slice < sc->num_slices; slice++) {
 3724                 ss = &sc->ss[slice];
 3725                 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
 3726         }
 3727 #endif
 3728         sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 3729         if (!down) {
 3730                 old_down_cnt = sc->down_cnt;
 3731                 wmb();
 3732                 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
 3733                 if (err) {
 3734                         device_printf(sc->dev,
 3735                                       "Couldn't bring down link\n");
 3736                 }
 3737                 if (old_down_cnt == sc->down_cnt) {
 3738                         /* wait for down irq */
 3739                         DELAY(10 * sc->intr_coal_delay);
 3740                 }
 3741                 wmb();
 3742                 if (old_down_cnt == sc->down_cnt) {
 3743                         device_printf(sc->dev, "never got down irq\n");
 3744                 }
 3745         }
 3746         mxge_free_mbufs(sc);
 3747 
 3748         return 0;
 3749 }
 3750 
 3751 static void
 3752 mxge_setup_cfg_space(mxge_softc_t *sc)
 3753 {
 3754         device_t dev = sc->dev;
 3755         int reg;
 3756         uint16_t lnk, pectl;
 3757 
 3758         /* find the PCIe link width and set max read request to 4KB*/
 3759         if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
 3760                 lnk = pci_read_config(dev, reg + 0x12, 2);
 3761                 sc->link_width = (lnk >> 4) & 0x3f;
 3762 
 3763                 if (sc->pectl == 0) {
 3764                         pectl = pci_read_config(dev, reg + 0x8, 2);
 3765                         pectl = (pectl & ~0x7000) | (5 << 12);
 3766                         pci_write_config(dev, reg + 0x8, pectl, 2);
 3767                         sc->pectl = pectl;
 3768                 } else {
 3769                         /* restore saved pectl after watchdog reset */
 3770                         pci_write_config(dev, reg + 0x8, sc->pectl, 2);
 3771                 }
 3772         }
 3773 
 3774         /* Enable DMA and Memory space access */
 3775         pci_enable_busmaster(dev);
 3776 }
 3777 
 3778 static uint32_t
 3779 mxge_read_reboot(mxge_softc_t *sc)
 3780 {
 3781         device_t dev = sc->dev;
 3782         uint32_t vs;
 3783 
 3784         /* find the vendor specific offset */
 3785         if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
 3786                 device_printf(sc->dev,
 3787                               "could not find vendor specific offset\n");
 3788                 return (uint32_t)-1;
 3789         }
 3790         /* enable read32 mode */
 3791         pci_write_config(dev, vs + 0x10, 0x3, 1);
 3792         /* tell NIC which register to read */
 3793         pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
 3794         return (pci_read_config(dev, vs + 0x14, 4));
 3795 }
 3796 
 3797 static void
 3798 mxge_watchdog_reset(mxge_softc_t *sc)
 3799 {
 3800         struct pci_devinfo *dinfo;
 3801         struct mxge_slice_state *ss;
 3802         int err, running, s, num_tx_slices = 1;
 3803         uint32_t reboot;
 3804         uint16_t cmd;
 3805 
 3806         err = ENXIO;
 3807 
 3808         device_printf(sc->dev, "Watchdog reset!\n");
 3809 
 3810         /*
 3811          * check to see if the NIC rebooted.  If it did, then all of
 3812          * PCI config space has been reset, and things like the
 3813          * busmaster bit will be zero.  If this is the case, then we
 3814          * must restore PCI config space before the NIC can be used
 3815          * again
 3816          */
 3817         cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
 3818         if (cmd == 0xffff) {
 3819                 /*
 3820                  * maybe the watchdog caught the NIC rebooting; wait
 3821                  * up to 100ms for it to finish.  If it does not come
 3822                  * back, then give up
 3823                  */
 3824                 DELAY(1000*100);
 3825                 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
 3826                 if (cmd == 0xffff) {
 3827                         device_printf(sc->dev, "NIC disappeared!\n");
 3828                 }
 3829         }
 3830         if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
 3831                 /* print the reboot status */
 3832                 reboot = mxge_read_reboot(sc);
 3833                 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
 3834                               reboot);
 3835                 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
 3836                 if (running) {
 3837                         /*
 3838                          * quiesce NIC so that TX routines will not try to
 3839                          * xmit after restoration of BAR
 3840                          */
 3841 
 3842                         /* Mark the link as down */
 3843                         if (sc->link_state) {
 3844                                 sc->link_state = 0;
 3845                                 if_link_state_change(sc->ifp,
 3846                                                      LINK_STATE_DOWN);
 3847                         }
 3848 #ifdef IFNET_BUF_RING
 3849                         num_tx_slices = sc->num_slices;
 3850 #endif
 3851                         /* grab all TX locks to ensure no tx  */
 3852                         for (s = 0; s < num_tx_slices; s++) {
 3853                                 ss = &sc->ss[s];
 3854                                 mtx_lock(&ss->tx.mtx);
 3855                         }
 3856                         mxge_close(sc, 1);
 3857                 }
 3858                 /* restore PCI configuration space */
 3859                 dinfo = device_get_ivars(sc->dev);
 3860                 pci_cfg_restore(sc->dev, dinfo);
 3861 
 3862                 /* and redo any changes we made to our config space */
 3863                 mxge_setup_cfg_space(sc);
 3864 
 3865                 /* reload f/w */
 3866                 err = mxge_load_firmware(sc, 0);
 3867                 if (err) {
 3868                         device_printf(sc->dev,
 3869                                       "Unable to re-load f/w\n");
 3870                 }
 3871                 if (running) {
 3872                         if (!err)
 3873                                 err = mxge_open(sc);
 3874                         /* release all TX locks */
 3875                         for (s = 0; s < num_tx_slices; s++) {
 3876                                 ss = &sc->ss[s];
 3877 #ifdef IFNET_BUF_RING
 3878                                 mxge_start_locked(ss);
 3879 #endif
 3880                                 mtx_unlock(&ss->tx.mtx);
 3881                         }
 3882                 }
 3883                 sc->watchdog_resets++;
 3884         } else {
 3885                 device_printf(sc->dev,
 3886                               "NIC did not reboot, not resetting\n");
 3887                 err = 0;
 3888         }
 3889         if (err) {
 3890                 device_printf(sc->dev, "watchdog reset failed\n");
 3891         } else {
 3892                 if (sc->dying == 2)
 3893                         sc->dying = 0;
 3894                 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
 3895         }
 3896 }
 3897 
 3898 static void
 3899 mxge_watchdog_task(void *arg, int pending)
 3900 {
 3901         mxge_softc_t *sc = arg;
 3902 
 3903         mtx_lock(&sc->driver_mtx);
 3904         mxge_watchdog_reset(sc);
 3905         mtx_unlock(&sc->driver_mtx);
 3906 }
 3907 
 3908 static void
 3909 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
 3910 {
 3911         tx = &sc->ss[slice].tx;
 3912         device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
 3913         device_printf(sc->dev,
 3914                       "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
 3915                       tx->req, tx->done, tx->queue_active);
 3916         device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
 3917                               tx->activate, tx->deactivate);
 3918         device_printf(sc->dev, "pkt_done=%d fw=%d\n",
 3919                       tx->pkt_done,
 3920                       be32toh(sc->ss->fw_stats->send_done_count));
 3921 }
 3922 
 3923 static int
 3924 mxge_watchdog(mxge_softc_t *sc)
 3925 {
 3926         mxge_tx_ring_t *tx;
 3927         uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
 3928         int i, err = 0;
 3929 
 3930         /* see if we have outstanding transmits, which
 3931            have been pending for more than mxge_ticks */
 3932         for (i = 0;
 3933 #ifdef IFNET_BUF_RING
 3934              (i < sc->num_slices) && (err == 0);
 3935 #else
 3936              (i < 1) && (err == 0);
 3937 #endif
 3938              i++) {
 3939                 tx = &sc->ss[i].tx;             
 3940                 if (tx->req != tx->done &&
 3941                     tx->watchdog_req != tx->watchdog_done &&
 3942                     tx->done == tx->watchdog_done) {
 3943                         /* check for pause blocking before resetting */
 3944                         if (tx->watchdog_rx_pause == rx_pause) {
 3945                                 mxge_warn_stuck(sc, tx, i);
 3946                                 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
 3947                                 return (ENXIO);
 3948                         }
 3949                         else
 3950                                 device_printf(sc->dev, "Flow control blocking "
 3951                                               "xmits, check link partner\n");
 3952                 }
 3953 
 3954                 tx->watchdog_req = tx->req;
 3955                 tx->watchdog_done = tx->done;
 3956                 tx->watchdog_rx_pause = rx_pause;
 3957         }
 3958 
 3959         if (sc->need_media_probe)
 3960                 mxge_media_probe(sc);
 3961         return (err);
 3962 }
 3963 
 3964 static uint64_t
 3965 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
 3966 {
 3967         struct mxge_softc *sc;
 3968         uint64_t rv;
 3969 
 3970         sc = if_getsoftc(ifp);
 3971         rv = 0;
 3972 
 3973         switch (cnt) {
 3974         case IFCOUNTER_IPACKETS:
 3975                 for (int s = 0; s < sc->num_slices; s++)
 3976                         rv += sc->ss[s].ipackets;
 3977                 return (rv);
 3978         case IFCOUNTER_OPACKETS:
 3979                 for (int s = 0; s < sc->num_slices; s++)
 3980                         rv += sc->ss[s].opackets;
 3981                 return (rv);
 3982         case IFCOUNTER_OERRORS:
 3983                 for (int s = 0; s < sc->num_slices; s++)
 3984                         rv += sc->ss[s].oerrors;
 3985                 return (rv);
 3986 #ifdef IFNET_BUF_RING
 3987         case IFCOUNTER_OBYTES:
 3988                 for (int s = 0; s < sc->num_slices; s++)
 3989                         rv += sc->ss[s].obytes;
 3990                 return (rv);
 3991         case IFCOUNTER_OMCASTS:
 3992                 for (int s = 0; s < sc->num_slices; s++)
 3993                         rv += sc->ss[s].omcasts;
 3994                 return (rv);
 3995         case IFCOUNTER_OQDROPS:
 3996                 for (int s = 0; s < sc->num_slices; s++)
 3997                         rv += sc->ss[s].tx.br->br_drops;
 3998                 return (rv);
 3999 #endif
 4000         default:
 4001                 return (if_get_counter_default(ifp, cnt));
 4002         }
 4003 }
 4004 
 4005 static void
 4006 mxge_tick(void *arg)
 4007 {
 4008         mxge_softc_t *sc = arg;
 4009         u_long pkts = 0;
 4010         int err = 0;
 4011         int running, ticks;
 4012         uint16_t cmd;
 4013 
 4014         ticks = mxge_ticks;
 4015         running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
 4016         if (running) {
 4017                 if (!sc->watchdog_countdown) {
 4018                         err = mxge_watchdog(sc);
 4019                         sc->watchdog_countdown = 4;
 4020                 }
 4021                 sc->watchdog_countdown--;
 4022         }
 4023         if (pkts == 0) {
 4024                 /* ensure NIC did not suffer h/w fault while idle */
 4025                 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);                
 4026                 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
 4027                         sc->dying = 2;
 4028                         taskqueue_enqueue(sc->tq, &sc->watchdog_task);
 4029                         err = ENXIO;
 4030                 }
 4031                 /* look less often if NIC is idle */
 4032                 ticks *= 4;
 4033         }
 4034 
 4035         if (err == 0)
 4036                 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
 4037 
 4038 }
 4039 
 4040 static int
 4041 mxge_media_change(struct ifnet *ifp)
 4042 {
 4043         return EINVAL;
 4044 }
 4045 
 4046 static int
 4047 mxge_change_mtu(mxge_softc_t *sc, int mtu)
 4048 {
 4049         struct ifnet *ifp = sc->ifp;
 4050         int real_mtu, old_mtu;
 4051         int err = 0;
 4052 
 4053         real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 4054         if ((real_mtu > sc->max_mtu) || real_mtu < 60)
 4055                 return EINVAL;
 4056         mtx_lock(&sc->driver_mtx);
 4057         old_mtu = ifp->if_mtu;
 4058         ifp->if_mtu = mtu;
 4059         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 4060                 mxge_close(sc, 0);
 4061                 err = mxge_open(sc);
 4062                 if (err != 0) {
 4063                         ifp->if_mtu = old_mtu;
 4064                         mxge_close(sc, 0);
 4065                         (void) mxge_open(sc);
 4066                 }
 4067         }
 4068         mtx_unlock(&sc->driver_mtx);
 4069         return err;
 4070 }       
 4071 
 4072 static void
 4073 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
 4074 {
 4075         mxge_softc_t *sc = ifp->if_softc;
 4076 
 4077         if (sc == NULL)
 4078                 return;
 4079         ifmr->ifm_status = IFM_AVALID;
 4080         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
 4081         ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
 4082         ifmr->ifm_active |= sc->current_media;
 4083 }
 4084 
 4085 static int
 4086 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c)
 4087 {
 4088         mxge_cmd_t cmd;
 4089         uint32_t i2c_args;
 4090         int i, ms, err;
 4091 
 4092         if (i2c->dev_addr != 0xA0 &&
 4093             i2c->dev_addr != 0xA2)
 4094                 return (EINVAL);
 4095         if (i2c->len > sizeof(i2c->data))
 4096                 return (EINVAL);
 4097 
 4098         for (i = 0; i < i2c->len; i++) {
 4099                 i2c_args = i2c->dev_addr << 0x8;
 4100                 i2c_args |= i2c->offset + i;
 4101                 cmd.data0 = 0;   /* just fetch 1 byte, not all 256 */
 4102                 cmd.data1 = i2c_args;
 4103                 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
 4104 
 4105                 if (err != MXGEFW_CMD_OK)
 4106                         return (EIO);
 4107                 /* now we wait for the data to be cached */
 4108                 cmd.data0 = i2c_args & 0xff;
 4109                 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
 4110                 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
 4111                         cmd.data0 = i2c_args & 0xff;
 4112                         err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
 4113                         if (err == EBUSY)
 4114                                 DELAY(1000);
 4115                 }
 4116                 if (err != MXGEFW_CMD_OK)
 4117                         return (EIO);
 4118                 i2c->data[i] = cmd.data0;
 4119         }
 4120         return (0);
 4121 }
 4122 
 4123 static int
 4124 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 4125 {
 4126         mxge_softc_t *sc = ifp->if_softc;
 4127         struct ifreq *ifr = (struct ifreq *)data;
 4128         struct ifi2creq i2c;
 4129         int err, mask;
 4130 
 4131         err = 0;
 4132         switch (command) {
 4133         case SIOCSIFMTU:
 4134                 err = mxge_change_mtu(sc, ifr->ifr_mtu);
 4135                 break;
 4136 
 4137         case SIOCSIFFLAGS:
 4138                 mtx_lock(&sc->driver_mtx);
 4139                 if (sc->dying) {
 4140                         mtx_unlock(&sc->driver_mtx);
 4141                         return EINVAL;
 4142                 }
 4143                 if (ifp->if_flags & IFF_UP) {
 4144                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 4145                                 err = mxge_open(sc);
 4146                         } else {
 4147                                 /* take care of promis can allmulti
 4148                                    flag chages */
 4149                                 mxge_change_promisc(sc,
 4150                                                     ifp->if_flags & IFF_PROMISC);
 4151                                 mxge_set_multicast_list(sc);
 4152                         }
 4153                 } else {
 4154                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 4155                                 mxge_close(sc, 0);
 4156                         }
 4157                 }
 4158                 mtx_unlock(&sc->driver_mtx);
 4159                 break;
 4160 
 4161         case SIOCADDMULTI:
 4162         case SIOCDELMULTI:
 4163                 mtx_lock(&sc->driver_mtx);
 4164                 if (sc->dying) {
 4165                         mtx_unlock(&sc->driver_mtx);
 4166                         return (EINVAL);
 4167                 }
 4168                 mxge_set_multicast_list(sc);
 4169                 mtx_unlock(&sc->driver_mtx);
 4170                 break;
 4171 
 4172         case SIOCSIFCAP:
 4173                 mtx_lock(&sc->driver_mtx);
 4174                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 4175                 if (mask & IFCAP_TXCSUM) {
 4176                         if (IFCAP_TXCSUM & ifp->if_capenable) {
 4177                                 mask &= ~IFCAP_TSO4;
 4178                                 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
 4179                                 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
 4180                         } else {
 4181                                 ifp->if_capenable |= IFCAP_TXCSUM;
 4182                                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
 4183                         }
 4184                 }
 4185                 if (mask & IFCAP_RXCSUM) {
 4186                         if (IFCAP_RXCSUM & ifp->if_capenable) {
 4187                                 ifp->if_capenable &= ~IFCAP_RXCSUM;
 4188                         } else {
 4189                                 ifp->if_capenable |= IFCAP_RXCSUM;
 4190                         }
 4191                 }
 4192                 if (mask & IFCAP_TSO4) {
 4193                         if (IFCAP_TSO4 & ifp->if_capenable) {
 4194                                 ifp->if_capenable &= ~IFCAP_TSO4;
 4195                         } else if (IFCAP_TXCSUM & ifp->if_capenable) {
 4196                                 ifp->if_capenable |= IFCAP_TSO4;
 4197                                 ifp->if_hwassist |= CSUM_TSO;
 4198                         } else {
 4199                                 printf("mxge requires tx checksum offload"
 4200                                        " be enabled to use TSO\n");
 4201                                 err = EINVAL;
 4202                         }
 4203                 }
 4204 #if IFCAP_TSO6
 4205                 if (mask & IFCAP_TXCSUM_IPV6) {
 4206                         if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
 4207                                 mask &= ~IFCAP_TSO6;
 4208                                 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
 4209                                                        | IFCAP_TSO6);
 4210                                 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
 4211                                                       | CSUM_UDP);
 4212                         } else {
 4213                                 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
 4214                                 ifp->if_hwassist |= (CSUM_TCP_IPV6
 4215                                                      | CSUM_UDP_IPV6);
 4216                         }
 4217                 }
 4218                 if (mask & IFCAP_RXCSUM_IPV6) {
 4219                         if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
 4220                                 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
 4221                         } else {
 4222                                 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
 4223                         }
 4224                 }
 4225                 if (mask & IFCAP_TSO6) {
 4226                         if (IFCAP_TSO6 & ifp->if_capenable) {
 4227                                 ifp->if_capenable &= ~IFCAP_TSO6;
 4228                         } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
 4229                                 ifp->if_capenable |= IFCAP_TSO6;
 4230                                 ifp->if_hwassist |= CSUM_TSO;
 4231                         } else {
 4232                                 printf("mxge requires tx checksum offload"
 4233                                        " be enabled to use TSO\n");
 4234                                 err = EINVAL;
 4235                         }
 4236                 }
 4237 #endif /*IFCAP_TSO6 */
 4238 
 4239                 if (mask & IFCAP_LRO)
 4240                         ifp->if_capenable ^= IFCAP_LRO;
 4241                 if (mask & IFCAP_VLAN_HWTAGGING)
 4242                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 4243                 if (mask & IFCAP_VLAN_HWTSO)
 4244                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 4245 
 4246                 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
 4247                     !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
 4248                         ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
 4249 
 4250                 mtx_unlock(&sc->driver_mtx);
 4251                 VLAN_CAPABILITIES(ifp);
 4252 
 4253                 break;
 4254 
 4255         case SIOCGIFMEDIA:
 4256                 mtx_lock(&sc->driver_mtx);
 4257                 if (sc->dying) {
 4258                         mtx_unlock(&sc->driver_mtx);
 4259                         return (EINVAL);
 4260                 }
 4261                 mxge_media_probe(sc);
 4262                 mtx_unlock(&sc->driver_mtx);
 4263                 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
 4264                                     &sc->media, command);
 4265                 break;
 4266 
 4267         case SIOCGI2C:
 4268                 if (sc->connector != MXGE_XFP &&
 4269                     sc->connector != MXGE_SFP) {
 4270                         err = ENXIO;
 4271                         break;
 4272                 }
 4273                 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
 4274                 if (err != 0)
 4275                         break;
 4276                 mtx_lock(&sc->driver_mtx);
 4277                 if (sc->dying) {
 4278                         mtx_unlock(&sc->driver_mtx);
 4279                         return (EINVAL);
 4280                 }
 4281                 err = mxge_fetch_i2c(sc, &i2c);
 4282                 mtx_unlock(&sc->driver_mtx);
 4283                 if (err == 0)
 4284                         err = copyout(&i2c, ifr_data_get_ptr(ifr),
 4285                             sizeof(i2c));
 4286                 break;
 4287         default:
 4288                 err = ether_ioctl(ifp, command, data);
 4289                 break;
 4290         }
 4291         return err;
 4292 }
 4293 
 4294 static void
 4295 mxge_fetch_tunables(mxge_softc_t *sc)
 4296 {
 4297 
 4298         TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
 4299         TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
 4300                           &mxge_flow_control);
 4301         TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
 4302                           &mxge_intr_coal_delay);       
 4303         TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
 4304                           &mxge_nvidia_ecrc_enable);    
 4305         TUNABLE_INT_FETCH("hw.mxge.force_firmware",
 4306                           &mxge_force_firmware);        
 4307         TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
 4308                           &mxge_deassert_wait); 
 4309         TUNABLE_INT_FETCH("hw.mxge.verbose",
 4310                           &mxge_verbose);       
 4311         TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
 4312         TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
 4313         TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
 4314         TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
 4315         TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
 4316         TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
 4317 
 4318         if (bootverbose)
 4319                 mxge_verbose = 1;
 4320         if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
 4321                 mxge_intr_coal_delay = 30;
 4322         if (mxge_ticks == 0)
 4323                 mxge_ticks = hz / 2;
 4324         sc->pause = mxge_flow_control;
 4325         if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
 4326             || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
 4327                 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
 4328         }
 4329         if (mxge_initial_mtu > ETHERMTU_JUMBO ||
 4330             mxge_initial_mtu < ETHER_MIN_LEN)
 4331                 mxge_initial_mtu = ETHERMTU_JUMBO;
 4332 
 4333         if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
 4334                 mxge_throttle = MXGE_MAX_THROTTLE;
 4335         if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
 4336                 mxge_throttle = MXGE_MIN_THROTTLE;
 4337         sc->throttle = mxge_throttle;
 4338 }
 4339 
 4340 static void
 4341 mxge_free_slices(mxge_softc_t *sc)
 4342 {
 4343         struct mxge_slice_state *ss;
 4344         int i;
 4345 
 4346         if (sc->ss == NULL)
 4347                 return;
 4348 
 4349         for (i = 0; i < sc->num_slices; i++) {
 4350                 ss = &sc->ss[i];
 4351                 if (ss->fw_stats != NULL) {
 4352                         mxge_dma_free(&ss->fw_stats_dma);
 4353                         ss->fw_stats = NULL;
 4354 #ifdef IFNET_BUF_RING
 4355                         if (ss->tx.br != NULL) {
 4356                                 drbr_free(ss->tx.br, M_DEVBUF);
 4357                                 ss->tx.br = NULL;
 4358                         }
 4359 #endif
 4360                         mtx_destroy(&ss->tx.mtx);
 4361                 }
 4362                 if (ss->rx_done.entry != NULL) {
 4363                         mxge_dma_free(&ss->rx_done.dma);
 4364                         ss->rx_done.entry = NULL;
 4365                 }
 4366         }
 4367         free(sc->ss, M_DEVBUF);
 4368         sc->ss = NULL;
 4369 }
 4370 
 4371 static int
 4372 mxge_alloc_slices(mxge_softc_t *sc)
 4373 {
 4374         mxge_cmd_t cmd;
 4375         struct mxge_slice_state *ss;
 4376         size_t bytes;
 4377         int err, i, max_intr_slots;
 4378 
 4379         err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
 4380         if (err != 0) {
 4381                 device_printf(sc->dev, "Cannot determine rx ring size\n");
 4382                 return err;
 4383         }
 4384         sc->rx_ring_size = cmd.data0;
 4385         max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
 4386 
 4387         bytes = sizeof (*sc->ss) * sc->num_slices;
 4388         sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
 4389         if (sc->ss == NULL)
 4390                 return (ENOMEM);
 4391         for (i = 0; i < sc->num_slices; i++) {
 4392                 ss = &sc->ss[i];
 4393 
 4394                 ss->sc = sc;
 4395 
 4396                 /* allocate per-slice rx interrupt queues */
 4397                 
 4398                 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
 4399                 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
 4400                 if (err != 0)
 4401                         goto abort;
 4402                 ss->rx_done.entry = ss->rx_done.dma.addr;
 4403                 bzero(ss->rx_done.entry, bytes);
 4404 
 4405                 /*
 4406                  * allocate the per-slice firmware stats; stats
 4407                  * (including tx) are used used only on the first
 4408                  * slice for now
 4409                  */
 4410 #ifndef IFNET_BUF_RING
 4411                 if (i > 0)
 4412                         continue;
 4413 #endif
 4414 
 4415                 bytes = sizeof (*ss->fw_stats);
 4416                 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
 4417                                      sizeof (*ss->fw_stats), 64);
 4418                 if (err != 0)
 4419                         goto abort;
 4420                 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
 4421                 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
 4422                          "%s:tx(%d)", device_get_nameunit(sc->dev), i);
 4423                 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
 4424 #ifdef IFNET_BUF_RING
 4425                 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
 4426                                            &ss->tx.mtx);
 4427 #endif
 4428         }
 4429 
 4430         return (0);
 4431 
 4432 abort:
 4433         mxge_free_slices(sc);
 4434         return (ENOMEM);
 4435 }
 4436 
 4437 static void
 4438 mxge_slice_probe(mxge_softc_t *sc)
 4439 {
 4440         mxge_cmd_t cmd;
 4441         char *old_fw;
 4442         int msix_cnt, status, max_intr_slots;
 4443 
 4444         sc->num_slices = 1;
 4445         /*
 4446          *  don't enable multiple slices if they are not enabled,
 4447          *  or if this is not an SMP system
 4448          */
 4449 
 4450         if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
 4451                 return;
 4452 
 4453         /* see how many MSI-X interrupts are available */
 4454         msix_cnt = pci_msix_count(sc->dev);
 4455         if (msix_cnt < 2)
 4456                 return;
 4457 
 4458         /* now load the slice aware firmware see what it supports */
 4459         old_fw = sc->fw_name;
 4460         if (old_fw == mxge_fw_aligned)
 4461                 sc->fw_name = mxge_fw_rss_aligned;
 4462         else
 4463                 sc->fw_name = mxge_fw_rss_unaligned;
 4464         status = mxge_load_firmware(sc, 0);
 4465         if (status != 0) {
 4466                 device_printf(sc->dev, "Falling back to a single slice\n");
 4467                 return;
 4468         }
 4469 
 4470         /* try to send a reset command to the card to see if it
 4471            is alive */
 4472         memset(&cmd, 0, sizeof (cmd));
 4473         status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
 4474         if (status != 0) {
 4475                 device_printf(sc->dev, "failed reset\n");
 4476                 goto abort_with_fw;
 4477         }
 4478 
 4479         /* get rx ring size */
 4480         status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
 4481         if (status != 0) {
 4482                 device_printf(sc->dev, "Cannot determine rx ring size\n");
 4483                 goto abort_with_fw;
 4484         }
 4485         max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
 4486 
 4487         /* tell it the size of the interrupt queues */
 4488         cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
 4489         status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
 4490         if (status != 0) {
 4491                 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
 4492                 goto abort_with_fw;
 4493         }
 4494 
 4495         /* ask the maximum number of slices it supports */
 4496         status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
 4497         if (status != 0) {
 4498                 device_printf(sc->dev,
 4499                               "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
 4500                 goto abort_with_fw;
 4501         }
 4502         sc->num_slices = cmd.data0;
 4503         if (sc->num_slices > msix_cnt)
 4504                 sc->num_slices = msix_cnt;
 4505 
 4506         if (mxge_max_slices == -1) {
 4507                 /* cap to number of CPUs in system */
 4508                 if (sc->num_slices > mp_ncpus)
 4509                         sc->num_slices = mp_ncpus;
 4510         } else {
 4511                 if (sc->num_slices > mxge_max_slices)
 4512                         sc->num_slices = mxge_max_slices;
 4513         }
 4514         /* make sure it is a power of two */
 4515         while (sc->num_slices & (sc->num_slices - 1))
 4516                 sc->num_slices--;
 4517 
 4518         if (mxge_verbose)
 4519                 device_printf(sc->dev, "using %d slices\n",
 4520                               sc->num_slices);
 4521 
 4522         return;
 4523 
 4524 abort_with_fw:
 4525         sc->fw_name = old_fw;
 4526         (void) mxge_load_firmware(sc, 0);
 4527 }
 4528 
 4529 static int
 4530 mxge_add_msix_irqs(mxge_softc_t *sc)
 4531 {
 4532         size_t bytes;
 4533         int count, err, i, rid;
 4534 
 4535         rid = PCIR_BAR(2);
 4536         sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
 4537                                                     &rid, RF_ACTIVE);
 4538 
 4539         if (sc->msix_table_res == NULL) {
 4540                 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
 4541                 return ENXIO;
 4542         }
 4543 
 4544         count = sc->num_slices;
 4545         err = pci_alloc_msix(sc->dev, &count);
 4546         if (err != 0) {
 4547                 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
 4548                               "err = %d \n", sc->num_slices, err);
 4549                 goto abort_with_msix_table;
 4550         }
 4551         if (count < sc->num_slices) {
 4552                 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
 4553                               count, sc->num_slices);
 4554                 device_printf(sc->dev,
 4555                               "Try setting hw.mxge.max_slices to %d\n",
 4556                               count);
 4557                 err = ENOSPC;
 4558                 goto abort_with_msix;
 4559         }
 4560         bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
 4561         sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
 4562         if (sc->msix_irq_res == NULL) {
 4563                 err = ENOMEM;
 4564                 goto abort_with_msix;
 4565         }
 4566 
 4567         for (i = 0; i < sc->num_slices; i++) {
 4568                 rid = i + 1;
 4569                 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
 4570                                                           SYS_RES_IRQ,
 4571                                                           &rid, RF_ACTIVE);
 4572                 if (sc->msix_irq_res[i] == NULL) {
 4573                         device_printf(sc->dev, "couldn't allocate IRQ res"
 4574                                       " for message %d\n", i);
 4575                         err = ENXIO;
 4576                         goto abort_with_res;
 4577                 }
 4578         }
 4579 
 4580         bytes = sizeof (*sc->msix_ih) * sc->num_slices;
 4581         sc->msix_ih =  malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
 4582 
 4583         for (i = 0; i < sc->num_slices; i++) {
 4584                 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
 4585                                      INTR_TYPE_NET | INTR_MPSAFE,
 4586 #if __FreeBSD_version > 700030
 4587                                      NULL,
 4588 #endif
 4589                                      mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
 4590                 if (err != 0) {
 4591                         device_printf(sc->dev, "couldn't setup intr for "
 4592                                       "message %d\n", i);
 4593                         goto abort_with_intr;
 4594                 }
 4595                 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
 4596                                   sc->msix_ih[i], "s%d", i);
 4597         }
 4598 
 4599         if (mxge_verbose) {
 4600                 device_printf(sc->dev, "using %d msix IRQs:",
 4601                               sc->num_slices);
 4602                 for (i = 0; i < sc->num_slices; i++)
 4603                         printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
 4604                 printf("\n");
 4605         }
 4606         return (0);
 4607 
 4608 abort_with_intr:
 4609         for (i = 0; i < sc->num_slices; i++) {
 4610                 if (sc->msix_ih[i] != NULL) {
 4611                         bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
 4612                                           sc->msix_ih[i]);
 4613                         sc->msix_ih[i] = NULL;
 4614                 }
 4615         }
 4616         free(sc->msix_ih, M_DEVBUF);
 4617 
 4618 abort_with_res:
 4619         for (i = 0; i < sc->num_slices; i++) {
 4620                 rid = i + 1;
 4621                 if (sc->msix_irq_res[i] != NULL)
 4622                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
 4623                                              sc->msix_irq_res[i]);
 4624                 sc->msix_irq_res[i] = NULL;
 4625         }
 4626         free(sc->msix_irq_res, M_DEVBUF);
 4627 
 4628 abort_with_msix:
 4629         pci_release_msi(sc->dev);
 4630 
 4631 abort_with_msix_table:
 4632         bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
 4633                              sc->msix_table_res);
 4634 
 4635         return err;
 4636 }
 4637 
 4638 static int
 4639 mxge_add_single_irq(mxge_softc_t *sc)
 4640 {
 4641         int count, err, rid;
 4642 
 4643         count = pci_msi_count(sc->dev);
 4644         if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
 4645                 rid = 1;
 4646         } else {
 4647                 rid = 0;
 4648                 sc->legacy_irq = 1;
 4649         }
 4650         sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
 4651                                              RF_SHAREABLE | RF_ACTIVE);
 4652         if (sc->irq_res == NULL) {
 4653                 device_printf(sc->dev, "could not alloc interrupt\n");
 4654                 return ENXIO;
 4655         }
 4656         if (mxge_verbose)
 4657                 device_printf(sc->dev, "using %s irq %jd\n",
 4658                               sc->legacy_irq ? "INTx" : "MSI",
 4659                               rman_get_start(sc->irq_res));
 4660         err = bus_setup_intr(sc->dev, sc->irq_res,
 4661                              INTR_TYPE_NET | INTR_MPSAFE,
 4662 #if __FreeBSD_version > 700030
 4663                              NULL,
 4664 #endif
 4665                              mxge_intr, &sc->ss[0], &sc->ih);
 4666         if (err != 0) {
 4667                 bus_release_resource(sc->dev, SYS_RES_IRQ,
 4668                                      sc->legacy_irq ? 0 : 1, sc->irq_res);
 4669                 if (!sc->legacy_irq)
 4670                         pci_release_msi(sc->dev);
 4671         }
 4672         return err;
 4673 }
 4674 
 4675 static void
 4676 mxge_rem_msix_irqs(mxge_softc_t *sc)
 4677 {
 4678         int i, rid;
 4679 
 4680         for (i = 0; i < sc->num_slices; i++) {
 4681                 if (sc->msix_ih[i] != NULL) {
 4682                         bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
 4683                                           sc->msix_ih[i]);
 4684                         sc->msix_ih[i] = NULL;
 4685                 }
 4686         }
 4687         free(sc->msix_ih, M_DEVBUF);
 4688 
 4689         for (i = 0; i < sc->num_slices; i++) {
 4690                 rid = i + 1;
 4691                 if (sc->msix_irq_res[i] != NULL)
 4692                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
 4693                                              sc->msix_irq_res[i]);
 4694                 sc->msix_irq_res[i] = NULL;
 4695         }
 4696         free(sc->msix_irq_res, M_DEVBUF);
 4697 
 4698         bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
 4699                              sc->msix_table_res);
 4700 
 4701         pci_release_msi(sc->dev);
 4702         return;
 4703 }
 4704 
 4705 static void
 4706 mxge_rem_single_irq(mxge_softc_t *sc)
 4707 {
 4708         bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
 4709         bus_release_resource(sc->dev, SYS_RES_IRQ,
 4710                              sc->legacy_irq ? 0 : 1, sc->irq_res);
 4711         if (!sc->legacy_irq)
 4712                 pci_release_msi(sc->dev);
 4713 }
 4714 
 4715 static void
 4716 mxge_rem_irq(mxge_softc_t *sc)
 4717 {
 4718         if (sc->num_slices > 1)
 4719                 mxge_rem_msix_irqs(sc);
 4720         else
 4721                 mxge_rem_single_irq(sc);
 4722 }
 4723 
 4724 static int
 4725 mxge_add_irq(mxge_softc_t *sc)
 4726 {
 4727         int err;
 4728 
 4729         if (sc->num_slices > 1)
 4730                 err = mxge_add_msix_irqs(sc);
 4731         else
 4732                 err = mxge_add_single_irq(sc);
 4733 
 4734         if (0 && err == 0 && sc->num_slices > 1) {
 4735                 mxge_rem_msix_irqs(sc);
 4736                 err = mxge_add_msix_irqs(sc);
 4737         }
 4738         return err;
 4739 }
 4740 
 4741 static int
 4742 mxge_attach(device_t dev)
 4743 {
 4744         mxge_cmd_t cmd;
 4745         mxge_softc_t *sc = device_get_softc(dev);
 4746         struct ifnet *ifp;
 4747         int err, rid;
 4748 
 4749         sc->dev = dev;
 4750         mxge_fetch_tunables(sc);
 4751 
 4752         TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
 4753         sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
 4754                                   taskqueue_thread_enqueue, &sc->tq);
 4755         if (sc->tq == NULL) {
 4756                 err = ENOMEM;
 4757                 goto abort_with_nothing;
 4758         }
 4759 
 4760         err = bus_dma_tag_create(bus_get_dma_tag(dev),  /* parent */
 4761                                  1,                     /* alignment */
 4762                                  0,                     /* boundary */
 4763                                  BUS_SPACE_MAXADDR,     /* low */
 4764                                  BUS_SPACE_MAXADDR,     /* high */
 4765                                  NULL, NULL,            /* filter */
 4766                                  65536 + 256,           /* maxsize */
 4767                                  MXGE_MAX_SEND_DESC,    /* num segs */
 4768                                  65536,                 /* maxsegsize */
 4769                                  0,                     /* flags */
 4770                                  NULL, NULL,            /* lock */
 4771                                  &sc->parent_dmat);     /* tag */
 4772 
 4773         if (err != 0) {
 4774                 device_printf(sc->dev, "Err %d allocating parent dmat\n",
 4775                               err);
 4776                 goto abort_with_tq;
 4777         }
 4778 
 4779         ifp = sc->ifp = if_alloc(IFT_ETHER);
 4780         if (ifp == NULL) {
 4781                 device_printf(dev, "can not if_alloc()\n");
 4782                 err = ENOSPC;
 4783                 goto abort_with_parent_dmat;
 4784         }
 4785         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 4786 
 4787         snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
 4788                  device_get_nameunit(dev));
 4789         mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
 4790         snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
 4791                  "%s:drv", device_get_nameunit(dev));
 4792         mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
 4793                  MTX_NETWORK_LOCK, MTX_DEF);
 4794 
 4795         callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
 4796 
 4797         mxge_setup_cfg_space(sc);
 4798 
 4799         /* Map the board into the kernel */
 4800         rid = PCIR_BARS;
 4801         sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 4802                                              RF_ACTIVE);
 4803         if (sc->mem_res == NULL) {
 4804                 device_printf(dev, "could not map memory\n");
 4805                 err = ENXIO;
 4806                 goto abort_with_lock;
 4807         }
 4808         sc->sram = rman_get_virtual(sc->mem_res);
 4809         sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
 4810         if (sc->sram_size > rman_get_size(sc->mem_res)) {
 4811                 device_printf(dev, "impossible memory region size %jd\n",
 4812                               rman_get_size(sc->mem_res));
 4813                 err = ENXIO;
 4814                 goto abort_with_mem_res;
 4815         }
 4816 
 4817         /* make NULL terminated copy of the EEPROM strings section of
 4818            lanai SRAM */
 4819         bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
 4820         bus_space_read_region_1(rman_get_bustag(sc->mem_res),
 4821                                 rman_get_bushandle(sc->mem_res),
 4822                                 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
 4823                                 sc->eeprom_strings,
 4824                                 MXGE_EEPROM_STRINGS_SIZE - 2);
 4825         err = mxge_parse_strings(sc);
 4826         if (err != 0)
 4827                 goto abort_with_mem_res;
 4828 
 4829         /* Enable write combining for efficient use of PCIe bus */
 4830         mxge_enable_wc(sc);
 4831 
 4832         /* Allocate the out of band dma memory */
 4833         err = mxge_dma_alloc(sc, &sc->cmd_dma,
 4834                              sizeof (mxge_cmd_t), 64);
 4835         if (err != 0)
 4836                 goto abort_with_mem_res;
 4837         sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
 4838         err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
 4839         if (err != 0)
 4840                 goto abort_with_cmd_dma;
 4841 
 4842         err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
 4843         if (err != 0)
 4844                 goto abort_with_zeropad_dma;
 4845 
 4846         /* select & load the firmware */
 4847         err = mxge_select_firmware(sc);
 4848         if (err != 0)
 4849                 goto abort_with_dmabench;
 4850         sc->intr_coal_delay = mxge_intr_coal_delay;
 4851 
 4852         mxge_slice_probe(sc);
 4853         err = mxge_alloc_slices(sc);
 4854         if (err != 0)
 4855                 goto abort_with_dmabench;
 4856 
 4857         err = mxge_reset(sc, 0);
 4858         if (err != 0)
 4859                 goto abort_with_slices;
 4860 
 4861         err = mxge_alloc_rings(sc);
 4862         if (err != 0) {
 4863                 device_printf(sc->dev, "failed to allocate rings\n");
 4864                 goto abort_with_slices;
 4865         }
 4866 
 4867         err = mxge_add_irq(sc);
 4868         if (err != 0) {
 4869                 device_printf(sc->dev, "failed to add irq\n");
 4870                 goto abort_with_rings;
 4871         }
 4872 
 4873         ifp->if_baudrate = IF_Gbps(10);
 4874         ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
 4875                 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
 4876                 IFCAP_RXCSUM_IPV6;
 4877 #if defined(INET) || defined(INET6)
 4878         ifp->if_capabilities |= IFCAP_LRO;
 4879 #endif
 4880 
 4881 #ifdef MXGE_NEW_VLAN_API
 4882         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
 4883 
 4884         /* Only FW 1.4.32 and newer can do TSO over vlans */
 4885         if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
 4886             sc->fw_ver_tiny >= 32)
 4887                 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
 4888 #endif
 4889         sc->max_mtu = mxge_max_mtu(sc);
 4890         if (sc->max_mtu >= 9000)
 4891                 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
 4892         else
 4893                 device_printf(dev, "MTU limited to %d.  Install "
 4894                               "latest firmware for 9000 byte jumbo support\n",
 4895                               sc->max_mtu - ETHER_HDR_LEN);
 4896         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
 4897         ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
 4898         /* check to see if f/w supports TSO for IPv6 */
 4899         if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
 4900                 if (CSUM_TCP_IPV6)
 4901                         ifp->if_capabilities |= IFCAP_TSO6;
 4902                 sc->max_tso6_hlen = min(cmd.data0,
 4903                                         sizeof (sc->ss[0].scratch));
 4904         }
 4905         ifp->if_capenable = ifp->if_capabilities;
 4906         if (sc->lro_cnt == 0)
 4907                 ifp->if_capenable &= ~IFCAP_LRO;
 4908         ifp->if_init = mxge_init;
 4909         ifp->if_softc = sc;
 4910         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 4911         ifp->if_ioctl = mxge_ioctl;
 4912         ifp->if_start = mxge_start;
 4913         ifp->if_get_counter = mxge_get_counter;
 4914         ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 4915         ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc;
 4916         ifp->if_hw_tsomaxsegsize = IP_MAXPACKET;
 4917         /* Initialise the ifmedia structure */
 4918         ifmedia_init(&sc->media, 0, mxge_media_change,
 4919                      mxge_media_status);
 4920         mxge_media_init(sc);
 4921         mxge_media_probe(sc);
 4922         sc->dying = 0;
 4923         ether_ifattach(ifp, sc->mac_addr);
 4924         /* ether_ifattach sets mtu to ETHERMTU */
 4925         if (mxge_initial_mtu != ETHERMTU)
 4926                 mxge_change_mtu(sc, mxge_initial_mtu);
 4927 
 4928         mxge_add_sysctls(sc);
 4929 #ifdef IFNET_BUF_RING
 4930         ifp->if_transmit = mxge_transmit;
 4931         ifp->if_qflush = mxge_qflush;
 4932 #endif
 4933         taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
 4934                                 device_get_nameunit(sc->dev));
 4935         callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
 4936         return 0;
 4937 
 4938 abort_with_rings:
 4939         mxge_free_rings(sc);
 4940 abort_with_slices:
 4941         mxge_free_slices(sc);
 4942 abort_with_dmabench:
 4943         mxge_dma_free(&sc->dmabench_dma);
 4944 abort_with_zeropad_dma:
 4945         mxge_dma_free(&sc->zeropad_dma);
 4946 abort_with_cmd_dma:
 4947         mxge_dma_free(&sc->cmd_dma);
 4948 abort_with_mem_res:
 4949         bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
 4950 abort_with_lock:
 4951         pci_disable_busmaster(dev);
 4952         mtx_destroy(&sc->cmd_mtx);
 4953         mtx_destroy(&sc->driver_mtx);
 4954         if_free(ifp);
 4955 abort_with_parent_dmat:
 4956         bus_dma_tag_destroy(sc->parent_dmat);
 4957 abort_with_tq:
 4958         if (sc->tq != NULL) {
 4959                 taskqueue_drain(sc->tq, &sc->watchdog_task);
 4960                 taskqueue_free(sc->tq);
 4961                 sc->tq = NULL;
 4962         }
 4963 abort_with_nothing:
 4964         return err;
 4965 }
 4966 
 4967 static int
 4968 mxge_detach(device_t dev)
 4969 {
 4970         mxge_softc_t *sc = device_get_softc(dev);
 4971 
 4972         if (mxge_vlans_active(sc)) {
 4973                 device_printf(sc->dev,
 4974                               "Detach vlans before removing module\n");
 4975                 return EBUSY;
 4976         }
 4977         mtx_lock(&sc->driver_mtx);
 4978         sc->dying = 1;
 4979         if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
 4980                 mxge_close(sc, 0);
 4981         mtx_unlock(&sc->driver_mtx);
 4982         ether_ifdetach(sc->ifp);
 4983         if (sc->tq != NULL) {
 4984                 taskqueue_drain(sc->tq, &sc->watchdog_task);
 4985                 taskqueue_free(sc->tq);
 4986                 sc->tq = NULL;
 4987         }
 4988         callout_drain(&sc->co_hdl);
 4989         ifmedia_removeall(&sc->media);
 4990         mxge_dummy_rdma(sc, 0);
 4991         mxge_rem_sysctls(sc);
 4992         mxge_rem_irq(sc);
 4993         mxge_free_rings(sc);
 4994         mxge_free_slices(sc);
 4995         mxge_dma_free(&sc->dmabench_dma);
 4996         mxge_dma_free(&sc->zeropad_dma);
 4997         mxge_dma_free(&sc->cmd_dma);
 4998         bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
 4999         pci_disable_busmaster(dev);
 5000         mtx_destroy(&sc->cmd_mtx);
 5001         mtx_destroy(&sc->driver_mtx);
 5002         if_free(sc->ifp);
 5003         bus_dma_tag_destroy(sc->parent_dmat);
 5004         return 0;
 5005 }
 5006 
 5007 static int
 5008 mxge_shutdown(device_t dev)
 5009 {
 5010         return 0;
 5011 }
 5012 
 5013 /*
 5014   This file uses Myri10GE driver indentation.
 5015 
 5016   Local Variables:
 5017   c-file-style:"linux"
 5018   tab-width:8
 5019   End:
 5020 */

Cache object: 2c2681b437bedc4053de58c98a24755e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.