The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/virtio/network/if_vtnet.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice unmodified, this list of conditions, and the following
   12  *    disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 /* Driver for VirtIO network devices. */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include <sys/param.h>
   35 #include <sys/eventhandler.h>
   36 #include <sys/systm.h>
   37 #include <sys/kernel.h>
   38 #include <sys/sockio.h>
   39 #include <sys/malloc.h>
   40 #include <sys/mbuf.h>
   41 #include <sys/module.h>
   42 #include <sys/msan.h>
   43 #include <sys/socket.h>
   44 #include <sys/sysctl.h>
   45 #include <sys/random.h>
   46 #include <sys/sglist.h>
   47 #include <sys/lock.h>
   48 #include <sys/mutex.h>
   49 #include <sys/taskqueue.h>
   50 #include <sys/smp.h>
   51 #include <machine/smp.h>
   52 
   53 #include <vm/uma.h>
   54 
   55 #include <net/debugnet.h>
   56 #include <net/ethernet.h>
   57 #include <net/pfil.h>
   58 #include <net/if.h>
   59 #include <net/if_var.h>
   60 #include <net/if_arp.h>
   61 #include <net/if_dl.h>
   62 #include <net/if_types.h>
   63 #include <net/if_media.h>
   64 #include <net/if_vlan_var.h>
   65 
   66 #include <net/bpf.h>
   67 
   68 #include <netinet/in_systm.h>
   69 #include <netinet/in.h>
   70 #include <netinet/ip.h>
   71 #include <netinet/ip6.h>
   72 #include <netinet6/ip6_var.h>
   73 #include <netinet/udp.h>
   74 #include <netinet/tcp.h>
   75 #include <netinet/tcp_lro.h>
   76 
   77 #include <machine/bus.h>
   78 #include <machine/resource.h>
   79 #include <sys/bus.h>
   80 #include <sys/rman.h>
   81 
   82 #include <dev/virtio/virtio.h>
   83 #include <dev/virtio/virtqueue.h>
   84 #include <dev/virtio/network/virtio_net.h>
   85 #include <dev/virtio/network/if_vtnetvar.h>
   86 #include "virtio_if.h"
   87 
   88 #include "opt_inet.h"
   89 #include "opt_inet6.h"
   90 
   91 #if defined(INET) || defined(INET6)
   92 #include <machine/in_cksum.h>
   93 #endif
   94 
   95 static int      vtnet_modevent(module_t, int, void *);
   96 
   97 static int      vtnet_probe(device_t);
   98 static int      vtnet_attach(device_t);
   99 static int      vtnet_detach(device_t);
  100 static int      vtnet_suspend(device_t);
  101 static int      vtnet_resume(device_t);
  102 static int      vtnet_shutdown(device_t);
  103 static int      vtnet_attach_completed(device_t);
  104 static int      vtnet_config_change(device_t);
  105 
  106 static int      vtnet_negotiate_features(struct vtnet_softc *);
  107 static int      vtnet_setup_features(struct vtnet_softc *);
  108 static int      vtnet_init_rxq(struct vtnet_softc *, int);
  109 static int      vtnet_init_txq(struct vtnet_softc *, int);
  110 static int      vtnet_alloc_rxtx_queues(struct vtnet_softc *);
  111 static void     vtnet_free_rxtx_queues(struct vtnet_softc *);
  112 static int      vtnet_alloc_rx_filters(struct vtnet_softc *);
  113 static void     vtnet_free_rx_filters(struct vtnet_softc *);
  114 static int      vtnet_alloc_virtqueues(struct vtnet_softc *);
  115 static int      vtnet_alloc_interface(struct vtnet_softc *);
  116 static int      vtnet_setup_interface(struct vtnet_softc *);
  117 static int      vtnet_ioctl_mtu(struct vtnet_softc *, u_int);
  118 static int      vtnet_ioctl_ifflags(struct vtnet_softc *);
  119 static int      vtnet_ioctl_multi(struct vtnet_softc *);
  120 static int      vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *);
  121 static int      vtnet_ioctl(if_t, u_long, caddr_t);
  122 static uint64_t vtnet_get_counter(if_t, ift_counter);
  123 
  124 static int      vtnet_rxq_populate(struct vtnet_rxq *);
  125 static void     vtnet_rxq_free_mbufs(struct vtnet_rxq *);
  126 static struct mbuf *
  127                 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
  128 static int      vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
  129                     struct mbuf *, int);
  130 static int      vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
  131 static int      vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
  132 static int      vtnet_rxq_new_buf(struct vtnet_rxq *);
  133 static int      vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
  134                      uint16_t, int, struct virtio_net_hdr *);
  135 static int      vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
  136                      uint16_t, int, struct virtio_net_hdr *);
  137 static int      vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
  138                      struct virtio_net_hdr *);
  139 static void     vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
  140 static void     vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
  141 static int      vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
  142 static void     vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
  143                     struct virtio_net_hdr *);
  144 static int      vtnet_rxq_eof(struct vtnet_rxq *);
  145 static void     vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries);
  146 static void     vtnet_rx_vq_intr(void *);
  147 static void     vtnet_rxq_tq_intr(void *, int);
  148 
  149 static int      vtnet_txq_intr_threshold(struct vtnet_txq *);
  150 static int      vtnet_txq_below_threshold(struct vtnet_txq *);
  151 static int      vtnet_txq_notify(struct vtnet_txq *);
  152 static void     vtnet_txq_free_mbufs(struct vtnet_txq *);
  153 static int      vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
  154                     int *, int *, int *);
  155 static int      vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
  156                     int, struct virtio_net_hdr *);
  157 static struct mbuf *
  158                 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
  159                     struct virtio_net_hdr *);
  160 static int      vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
  161                     struct vtnet_tx_header *);
  162 static int      vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
  163 #ifdef VTNET_LEGACY_TX
  164 static void     vtnet_start_locked(struct vtnet_txq *, if_t);
  165 static void     vtnet_start(if_t);
  166 #else
  167 static int      vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
  168 static int      vtnet_txq_mq_start(if_t, struct mbuf *);
  169 static void     vtnet_txq_tq_deferred(void *, int);
  170 #endif
  171 static void     vtnet_txq_start(struct vtnet_txq *);
  172 static void     vtnet_txq_tq_intr(void *, int);
  173 static int      vtnet_txq_eof(struct vtnet_txq *);
  174 static void     vtnet_tx_vq_intr(void *);
  175 static void     vtnet_tx_start_all(struct vtnet_softc *);
  176 
  177 #ifndef VTNET_LEGACY_TX
  178 static void     vtnet_qflush(if_t);
  179 #endif
  180 
  181 static int      vtnet_watchdog(struct vtnet_txq *);
  182 static void     vtnet_accum_stats(struct vtnet_softc *,
  183                     struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
  184 static void     vtnet_tick(void *);
  185 
  186 static void     vtnet_start_taskqueues(struct vtnet_softc *);
  187 static void     vtnet_free_taskqueues(struct vtnet_softc *);
  188 static void     vtnet_drain_taskqueues(struct vtnet_softc *);
  189 
  190 static void     vtnet_drain_rxtx_queues(struct vtnet_softc *);
  191 static void     vtnet_stop_rendezvous(struct vtnet_softc *);
  192 static void     vtnet_stop(struct vtnet_softc *);
  193 static int      vtnet_virtio_reinit(struct vtnet_softc *);
  194 static void     vtnet_init_rx_filters(struct vtnet_softc *);
  195 static int      vtnet_init_rx_queues(struct vtnet_softc *);
  196 static int      vtnet_init_tx_queues(struct vtnet_softc *);
  197 static int      vtnet_init_rxtx_queues(struct vtnet_softc *);
  198 static void     vtnet_set_active_vq_pairs(struct vtnet_softc *);
  199 static void     vtnet_update_rx_offloads(struct vtnet_softc *);
  200 static int      vtnet_reinit(struct vtnet_softc *);
  201 static void     vtnet_init_locked(struct vtnet_softc *, int);
  202 static void     vtnet_init(void *);
  203 
  204 static void     vtnet_free_ctrl_vq(struct vtnet_softc *);
  205 static void     vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
  206                     struct sglist *, int, int);
  207 static int      vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
  208 static int      vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t);
  209 static int      vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
  210 static int      vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool);
  211 static int      vtnet_set_promisc(struct vtnet_softc *, bool);
  212 static int      vtnet_set_allmulti(struct vtnet_softc *, bool);
  213 static void     vtnet_rx_filter(struct vtnet_softc *);
  214 static void     vtnet_rx_filter_mac(struct vtnet_softc *);
  215 static int      vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
  216 static void     vtnet_rx_filter_vlan(struct vtnet_softc *);
  217 static void     vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
  218 static void     vtnet_register_vlan(void *, if_t, uint16_t);
  219 static void     vtnet_unregister_vlan(void *, if_t, uint16_t);
  220 
  221 static void     vtnet_update_speed_duplex(struct vtnet_softc *);
  222 static int      vtnet_is_link_up(struct vtnet_softc *);
  223 static void     vtnet_update_link_status(struct vtnet_softc *);
  224 static int      vtnet_ifmedia_upd(if_t);
  225 static void     vtnet_ifmedia_sts(if_t, struct ifmediareq *);
  226 static void     vtnet_get_macaddr(struct vtnet_softc *);
  227 static void     vtnet_set_macaddr(struct vtnet_softc *);
  228 static void     vtnet_attached_set_macaddr(struct vtnet_softc *);
  229 static void     vtnet_vlan_tag_remove(struct mbuf *);
  230 static void     vtnet_set_rx_process_limit(struct vtnet_softc *);
  231 
  232 static void     vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
  233                     struct sysctl_oid_list *, struct vtnet_rxq *);
  234 static void     vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
  235                     struct sysctl_oid_list *, struct vtnet_txq *);
  236 static void     vtnet_setup_queue_sysctl(struct vtnet_softc *);
  237 static void     vtnet_load_tunables(struct vtnet_softc *);
  238 static void     vtnet_setup_sysctl(struct vtnet_softc *);
  239 
  240 static int      vtnet_rxq_enable_intr(struct vtnet_rxq *);
  241 static void     vtnet_rxq_disable_intr(struct vtnet_rxq *);
  242 static int      vtnet_txq_enable_intr(struct vtnet_txq *);
  243 static void     vtnet_txq_disable_intr(struct vtnet_txq *);
  244 static void     vtnet_enable_rx_interrupts(struct vtnet_softc *);
  245 static void     vtnet_enable_tx_interrupts(struct vtnet_softc *);
  246 static void     vtnet_enable_interrupts(struct vtnet_softc *);
  247 static void     vtnet_disable_rx_interrupts(struct vtnet_softc *);
  248 static void     vtnet_disable_tx_interrupts(struct vtnet_softc *);
  249 static void     vtnet_disable_interrupts(struct vtnet_softc *);
  250 
  251 static int      vtnet_tunable_int(struct vtnet_softc *, const char *, int);
  252 
  253 DEBUGNET_DEFINE(vtnet);
  254 
  255 #define vtnet_htog16(_sc, _val) virtio_htog16(vtnet_modern(_sc), _val)
  256 #define vtnet_htog32(_sc, _val) virtio_htog32(vtnet_modern(_sc), _val)
  257 #define vtnet_htog64(_sc, _val) virtio_htog64(vtnet_modern(_sc), _val)
  258 #define vtnet_gtoh16(_sc, _val) virtio_gtoh16(vtnet_modern(_sc), _val)
  259 #define vtnet_gtoh32(_sc, _val) virtio_gtoh32(vtnet_modern(_sc), _val)
  260 #define vtnet_gtoh64(_sc, _val) virtio_gtoh64(vtnet_modern(_sc), _val)
  261 
  262 /* Tunables. */
  263 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  264     "VirtIO Net driver parameters");
  265 
  266 static int vtnet_csum_disable = 0;
  267 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
  268     &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
  269 
  270 static int vtnet_fixup_needs_csum = 0;
  271 SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
  272     &vtnet_fixup_needs_csum, 0,
  273     "Calculate valid checksum for NEEDS_CSUM packets");
  274 
  275 static int vtnet_tso_disable = 0;
  276 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN,
  277     &vtnet_tso_disable, 0, "Disables TSO");
  278 
  279 static int vtnet_lro_disable = 0;
  280 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN,
  281     &vtnet_lro_disable, 0, "Disables hardware LRO");
  282 
  283 static int vtnet_mq_disable = 0;
  284 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN,
  285     &vtnet_mq_disable, 0, "Disables multiqueue support");
  286 
  287 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
  288 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
  289     &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs");
  290 
  291 static int vtnet_tso_maxlen = IP_MAXPACKET;
  292 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
  293     &vtnet_tso_maxlen, 0, "TSO burst limit");
  294 
  295 static int vtnet_rx_process_limit = 1024;
  296 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
  297     &vtnet_rx_process_limit, 0,
  298     "Number of RX segments processed in one pass");
  299 
  300 static int vtnet_lro_entry_count = 128;
  301 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
  302     &vtnet_lro_entry_count, 0, "Software LRO entry count");
  303 
  304 /* Enable sorted LRO, and the depth of the mbuf queue. */
  305 static int vtnet_lro_mbufq_depth = 0;
  306 SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
  307     &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue");
  308 
  309 static uma_zone_t vtnet_tx_header_zone;
  310 
  311 static struct virtio_feature_desc vtnet_feature_desc[] = {
  312         { VIRTIO_NET_F_CSUM,                    "TxChecksum"            },
  313         { VIRTIO_NET_F_GUEST_CSUM,              "RxChecksum"            },
  314         { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,     "CtrlRxOffloads"        },
  315         { VIRTIO_NET_F_MAC,                     "MAC"                   },
  316         { VIRTIO_NET_F_GSO,                     "TxGSO"                 },
  317         { VIRTIO_NET_F_GUEST_TSO4,              "RxLROv4"               },
  318         { VIRTIO_NET_F_GUEST_TSO6,              "RxLROv6"               },
  319         { VIRTIO_NET_F_GUEST_ECN,               "RxLROECN"              },
  320         { VIRTIO_NET_F_GUEST_UFO,               "RxUFO"                 },
  321         { VIRTIO_NET_F_HOST_TSO4,               "TxTSOv4"               },
  322         { VIRTIO_NET_F_HOST_TSO6,               "TxTSOv6"               },
  323         { VIRTIO_NET_F_HOST_ECN,                "TxTSOECN"              },
  324         { VIRTIO_NET_F_HOST_UFO,                "TxUFO"                 },
  325         { VIRTIO_NET_F_MRG_RXBUF,               "MrgRxBuf"              },
  326         { VIRTIO_NET_F_STATUS,                  "Status"                },
  327         { VIRTIO_NET_F_CTRL_VQ,                 "CtrlVq"                },
  328         { VIRTIO_NET_F_CTRL_RX,                 "CtrlRxMode"            },
  329         { VIRTIO_NET_F_CTRL_VLAN,               "CtrlVLANFilter"        },
  330         { VIRTIO_NET_F_CTRL_RX_EXTRA,           "CtrlRxModeExtra"       },
  331         { VIRTIO_NET_F_GUEST_ANNOUNCE,          "GuestAnnounce"         },
  332         { VIRTIO_NET_F_MQ,                      "Multiqueue"            },
  333         { VIRTIO_NET_F_CTRL_MAC_ADDR,           "CtrlMacAddr"           },
  334         { VIRTIO_NET_F_SPEED_DUPLEX,            "SpeedDuplex"           },
  335 
  336         { 0, NULL }
  337 };
  338 
  339 static device_method_t vtnet_methods[] = {
  340         /* Device methods. */
  341         DEVMETHOD(device_probe,                 vtnet_probe),
  342         DEVMETHOD(device_attach,                vtnet_attach),
  343         DEVMETHOD(device_detach,                vtnet_detach),
  344         DEVMETHOD(device_suspend,               vtnet_suspend),
  345         DEVMETHOD(device_resume,                vtnet_resume),
  346         DEVMETHOD(device_shutdown,              vtnet_shutdown),
  347 
  348         /* VirtIO methods. */
  349         DEVMETHOD(virtio_attach_completed,      vtnet_attach_completed),
  350         DEVMETHOD(virtio_config_change,         vtnet_config_change),
  351 
  352         DEVMETHOD_END
  353 };
  354 
  355 #ifdef DEV_NETMAP
  356 #include <dev/netmap/if_vtnet_netmap.h>
  357 #endif
  358 
  359 static driver_t vtnet_driver = {
  360     .name = "vtnet",
  361     .methods = vtnet_methods,
  362     .size = sizeof(struct vtnet_softc)
  363 };
  364 VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL);
  365 MODULE_VERSION(vtnet, 1);
  366 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
  367 #ifdef DEV_NETMAP
  368 MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
  369 #endif
  370 
  371 VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
  372 
  373 static int
  374 vtnet_modevent(module_t mod __unused, int type, void *unused __unused)
  375 {
  376         int error = 0;
  377         static int loaded = 0;
  378 
  379         switch (type) {
  380         case MOD_LOAD:
  381                 if (loaded++ == 0) {
  382                         vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
  383                                 sizeof(struct vtnet_tx_header),
  384                                 NULL, NULL, NULL, NULL, 0, 0);
  385 #ifdef DEBUGNET
  386                         /*
  387                          * We need to allocate from this zone in the transmit path, so ensure
  388                          * that we have at least one item per header available.
  389                          * XXX add a separate zone like we do for mbufs? otherwise we may alloc
  390                          * buckets
  391                          */
  392                         uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
  393                         uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
  394 #endif
  395                 }
  396                 break;
  397         case MOD_QUIESCE:
  398                 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
  399                         error = EBUSY;
  400                 break;
  401         case MOD_UNLOAD:
  402                 if (--loaded == 0) {
  403                         uma_zdestroy(vtnet_tx_header_zone);
  404                         vtnet_tx_header_zone = NULL;
  405                 }
  406                 break;
  407         case MOD_SHUTDOWN:
  408                 break;
  409         default:
  410                 error = EOPNOTSUPP;
  411                 break;
  412         }
  413 
  414         return (error);
  415 }
  416 
  417 static int
  418 vtnet_probe(device_t dev)
  419 {
  420         return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
  421 }
  422 
  423 static int
  424 vtnet_attach(device_t dev)
  425 {
  426         struct vtnet_softc *sc;
  427         int error;
  428 
  429         sc = device_get_softc(dev);
  430         sc->vtnet_dev = dev;
  431         virtio_set_feature_desc(dev, vtnet_feature_desc);
  432 
  433         VTNET_CORE_LOCK_INIT(sc);
  434         callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
  435         vtnet_load_tunables(sc);
  436 
  437         error = vtnet_alloc_interface(sc);
  438         if (error) {
  439                 device_printf(dev, "cannot allocate interface\n");
  440                 goto fail;
  441         }
  442 
  443         vtnet_setup_sysctl(sc);
  444 
  445         error = vtnet_setup_features(sc);
  446         if (error) {
  447                 device_printf(dev, "cannot setup features\n");
  448                 goto fail;
  449         }
  450 
  451         error = vtnet_alloc_rx_filters(sc);
  452         if (error) {
  453                 device_printf(dev, "cannot allocate Rx filters\n");
  454                 goto fail;
  455         }
  456 
  457         error = vtnet_alloc_rxtx_queues(sc);
  458         if (error) {
  459                 device_printf(dev, "cannot allocate queues\n");
  460                 goto fail;
  461         }
  462 
  463         error = vtnet_alloc_virtqueues(sc);
  464         if (error) {
  465                 device_printf(dev, "cannot allocate virtqueues\n");
  466                 goto fail;
  467         }
  468 
  469         error = vtnet_setup_interface(sc);
  470         if (error) {
  471                 device_printf(dev, "cannot setup interface\n");
  472                 goto fail;
  473         }
  474 
  475         error = virtio_setup_intr(dev, INTR_TYPE_NET);
  476         if (error) {
  477                 device_printf(dev, "cannot setup interrupts\n");
  478                 ether_ifdetach(sc->vtnet_ifp);
  479                 goto fail;
  480         }
  481 
  482 #ifdef DEV_NETMAP
  483         vtnet_netmap_attach(sc);
  484 #endif
  485         vtnet_start_taskqueues(sc);
  486 
  487 fail:
  488         if (error)
  489                 vtnet_detach(dev);
  490 
  491         return (error);
  492 }
  493 
  494 static int
  495 vtnet_detach(device_t dev)
  496 {
  497         struct vtnet_softc *sc;
  498         if_t ifp;
  499 
  500         sc = device_get_softc(dev);
  501         ifp = sc->vtnet_ifp;
  502 
  503         if (device_is_attached(dev)) {
  504                 VTNET_CORE_LOCK(sc);
  505                 vtnet_stop(sc);
  506                 VTNET_CORE_UNLOCK(sc);
  507 
  508                 callout_drain(&sc->vtnet_tick_ch);
  509                 vtnet_drain_taskqueues(sc);
  510 
  511                 ether_ifdetach(ifp);
  512         }
  513 
  514 #ifdef DEV_NETMAP
  515         netmap_detach(ifp);
  516 #endif
  517 
  518         if (sc->vtnet_pfil != NULL) {
  519                 pfil_head_unregister(sc->vtnet_pfil);
  520                 sc->vtnet_pfil = NULL;
  521         }
  522 
  523         vtnet_free_taskqueues(sc);
  524 
  525         if (sc->vtnet_vlan_attach != NULL) {
  526                 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
  527                 sc->vtnet_vlan_attach = NULL;
  528         }
  529         if (sc->vtnet_vlan_detach != NULL) {
  530                 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
  531                 sc->vtnet_vlan_detach = NULL;
  532         }
  533 
  534         ifmedia_removeall(&sc->vtnet_media);
  535 
  536         if (ifp != NULL) {
  537                 if_free(ifp);
  538                 sc->vtnet_ifp = NULL;
  539         }
  540 
  541         vtnet_free_rxtx_queues(sc);
  542         vtnet_free_rx_filters(sc);
  543 
  544         if (sc->vtnet_ctrl_vq != NULL)
  545                 vtnet_free_ctrl_vq(sc);
  546 
  547         VTNET_CORE_LOCK_DESTROY(sc);
  548 
  549         return (0);
  550 }
  551 
  552 static int
  553 vtnet_suspend(device_t dev)
  554 {
  555         struct vtnet_softc *sc;
  556 
  557         sc = device_get_softc(dev);
  558 
  559         VTNET_CORE_LOCK(sc);
  560         vtnet_stop(sc);
  561         sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
  562         VTNET_CORE_UNLOCK(sc);
  563 
  564         return (0);
  565 }
  566 
  567 static int
  568 vtnet_resume(device_t dev)
  569 {
  570         struct vtnet_softc *sc;
  571         if_t ifp;
  572 
  573         sc = device_get_softc(dev);
  574         ifp = sc->vtnet_ifp;
  575 
  576         VTNET_CORE_LOCK(sc);
  577         if (if_getflags(ifp) & IFF_UP)
  578                 vtnet_init_locked(sc, 0);
  579         sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
  580         VTNET_CORE_UNLOCK(sc);
  581 
  582         return (0);
  583 }
  584 
  585 static int
  586 vtnet_shutdown(device_t dev)
  587 {
  588         /*
  589          * Suspend already does all of what we need to
  590          * do here; we just never expect to be resumed.
  591          */
  592         return (vtnet_suspend(dev));
  593 }
  594 
  595 static int
  596 vtnet_attach_completed(device_t dev)
  597 {
  598         struct vtnet_softc *sc;
  599 
  600         sc = device_get_softc(dev);
  601 
  602         VTNET_CORE_LOCK(sc);
  603         vtnet_attached_set_macaddr(sc);
  604         VTNET_CORE_UNLOCK(sc);
  605 
  606         return (0);
  607 }
  608 
  609 static int
  610 vtnet_config_change(device_t dev)
  611 {
  612         struct vtnet_softc *sc;
  613 
  614         sc = device_get_softc(dev);
  615 
  616         VTNET_CORE_LOCK(sc);
  617         vtnet_update_link_status(sc);
  618         if (sc->vtnet_link_active != 0)
  619                 vtnet_tx_start_all(sc);
  620         VTNET_CORE_UNLOCK(sc);
  621 
  622         return (0);
  623 }
  624 
  625 static int
  626 vtnet_negotiate_features(struct vtnet_softc *sc)
  627 {
  628         device_t dev;
  629         uint64_t features, negotiated_features;
  630         int no_csum;
  631 
  632         dev = sc->vtnet_dev;
  633         features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES :
  634             VTNET_LEGACY_FEATURES;
  635 
  636         /*
  637          * TSO and LRO are only available when their corresponding checksum
  638          * offload feature is also negotiated.
  639          */
  640         no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable);
  641         if (no_csum)
  642                 features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM);
  643         if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
  644                 features &= ~VTNET_TSO_FEATURES;
  645         if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
  646                 features &= ~VTNET_LRO_FEATURES;
  647 
  648 #ifndef VTNET_LEGACY_TX
  649         if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
  650                 features &= ~VIRTIO_NET_F_MQ;
  651 #else
  652         features &= ~VIRTIO_NET_F_MQ;
  653 #endif
  654 
  655         negotiated_features = virtio_negotiate_features(dev, features);
  656 
  657         if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
  658                 uint16_t mtu;
  659 
  660                 mtu = virtio_read_dev_config_2(dev,
  661                     offsetof(struct virtio_net_config, mtu));
  662                 if (mtu < VTNET_MIN_MTU /* || mtu > VTNET_MAX_MTU */) {
  663                         device_printf(dev, "Invalid MTU value: %d. "
  664                             "MTU feature disabled.\n", mtu);
  665                         features &= ~VIRTIO_NET_F_MTU;
  666                         negotiated_features =
  667                             virtio_negotiate_features(dev, features);
  668                 }
  669         }
  670 
  671         if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
  672                 uint16_t npairs;
  673 
  674                 npairs = virtio_read_dev_config_2(dev,
  675                     offsetof(struct virtio_net_config, max_virtqueue_pairs));
  676                 if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
  677                     npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
  678                         device_printf(dev, "Invalid max_virtqueue_pairs value: "
  679                             "%d. Multiqueue feature disabled.\n", npairs);
  680                         features &= ~VIRTIO_NET_F_MQ;
  681                         negotiated_features =
  682                             virtio_negotiate_features(dev, features);
  683                 }
  684         }
  685 
  686         if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
  687             virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
  688                 /*
  689                  * LRO without mergeable buffers requires special care. This
  690                  * is not ideal because every receive buffer must be large
  691                  * enough to hold the maximum TCP packet, the Ethernet header,
  692                  * and the header. This requires up to 34 descriptors with
  693                  * MCLBYTES clusters. If we do not have indirect descriptors,
  694                  * LRO is disabled since the virtqueue will not contain very
  695                  * many receive buffers.
  696                  */
  697                 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
  698                         device_printf(dev,
  699                             "Host LRO disabled since both mergeable buffers "
  700                             "and indirect descriptors were not negotiated\n");
  701                         features &= ~VTNET_LRO_FEATURES;
  702                         negotiated_features =
  703                             virtio_negotiate_features(dev, features);
  704                 } else
  705                         sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
  706         }
  707 
  708         sc->vtnet_features = negotiated_features;
  709         sc->vtnet_negotiated_features = negotiated_features;
  710 
  711         return (virtio_finalize_features(dev));
  712 }
  713 
  714 static int
  715 vtnet_setup_features(struct vtnet_softc *sc)
  716 {
  717         device_t dev;
  718         int error;
  719 
  720         dev = sc->vtnet_dev;
  721 
  722         error = vtnet_negotiate_features(sc);
  723         if (error)
  724                 return (error);
  725 
  726         if (virtio_with_feature(dev, VIRTIO_F_VERSION_1))
  727                 sc->vtnet_flags |= VTNET_FLAG_MODERN;
  728         if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
  729                 sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
  730         if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
  731                 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
  732 
  733         if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
  734                 /* This feature should always be negotiated. */
  735                 sc->vtnet_flags |= VTNET_FLAG_MAC;
  736         }
  737 
  738         if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
  739                 sc->vtnet_max_mtu = virtio_read_dev_config_2(dev,
  740                     offsetof(struct virtio_net_config, mtu));
  741         } else
  742                 sc->vtnet_max_mtu = VTNET_MAX_MTU;
  743 
  744         if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
  745                 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
  746                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
  747         } else if (vtnet_modern(sc)) {
  748                 /* This is identical to the mergeable header. */
  749                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
  750         } else
  751                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
  752 
  753         if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
  754                 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
  755         else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
  756                 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
  757         else
  758                 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
  759 
  760         /*
  761          * Favor "hardware" LRO if negotiated, but support software LRO as
  762          * a fallback; there is usually little benefit (or worse) with both.
  763          */
  764         if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 &&
  765             virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0)
  766                 sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
  767 
  768         if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
  769             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
  770             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
  771                 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
  772         else
  773                 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
  774 
  775         sc->vtnet_req_vq_pairs = 1;
  776         sc->vtnet_max_vq_pairs = 1;
  777 
  778         if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
  779                 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
  780 
  781                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
  782                         sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
  783                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
  784                         sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
  785                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
  786                         sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
  787 
  788                 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
  789                         sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
  790                             offsetof(struct virtio_net_config,
  791                             max_virtqueue_pairs));
  792                 }
  793         }
  794 
  795         if (sc->vtnet_max_vq_pairs > 1) {
  796                 int req;
  797 
  798                 /*
  799                  * Limit the maximum number of requested queue pairs to the
  800                  * number of CPUs and the configured maximum.
  801                  */
  802                 req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
  803                 if (req < 0)
  804                         req = 1;
  805                 if (req == 0)
  806                         req = mp_ncpus;
  807                 if (req > sc->vtnet_max_vq_pairs)
  808                         req = sc->vtnet_max_vq_pairs;
  809                 if (req > mp_ncpus)
  810                         req = mp_ncpus;
  811                 if (req > 1) {
  812                         sc->vtnet_req_vq_pairs = req;
  813                         sc->vtnet_flags |= VTNET_FLAG_MQ;
  814                 }
  815         }
  816 
  817         return (0);
  818 }
  819 
  820 static int
  821 vtnet_init_rxq(struct vtnet_softc *sc, int id)
  822 {
  823         struct vtnet_rxq *rxq;
  824 
  825         rxq = &sc->vtnet_rxqs[id];
  826 
  827         snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
  828             device_get_nameunit(sc->vtnet_dev), id);
  829         mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
  830 
  831         rxq->vtnrx_sc = sc;
  832         rxq->vtnrx_id = id;
  833 
  834         rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
  835         if (rxq->vtnrx_sg == NULL)
  836                 return (ENOMEM);
  837 
  838 #if defined(INET) || defined(INET6)
  839         if (vtnet_software_lro(sc)) {
  840                 if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp,
  841                     sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0)
  842                         return (ENOMEM);
  843         }
  844 #endif
  845 
  846         NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
  847         rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
  848             taskqueue_thread_enqueue, &rxq->vtnrx_tq);
  849 
  850         return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
  851 }
  852 
  853 static int
  854 vtnet_init_txq(struct vtnet_softc *sc, int id)
  855 {
  856         struct vtnet_txq *txq;
  857 
  858         txq = &sc->vtnet_txqs[id];
  859 
  860         snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
  861             device_get_nameunit(sc->vtnet_dev), id);
  862         mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
  863 
  864         txq->vtntx_sc = sc;
  865         txq->vtntx_id = id;
  866 
  867         txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
  868         if (txq->vtntx_sg == NULL)
  869                 return (ENOMEM);
  870 
  871 #ifndef VTNET_LEGACY_TX
  872         txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
  873             M_NOWAIT, &txq->vtntx_mtx);
  874         if (txq->vtntx_br == NULL)
  875                 return (ENOMEM);
  876 
  877         TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
  878 #endif
  879         TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
  880         txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
  881             taskqueue_thread_enqueue, &txq->vtntx_tq);
  882         if (txq->vtntx_tq == NULL)
  883                 return (ENOMEM);
  884 
  885         return (0);
  886 }
  887 
  888 static int
  889 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
  890 {
  891         int i, npairs, error;
  892 
  893         npairs = sc->vtnet_max_vq_pairs;
  894 
  895         sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
  896             M_NOWAIT | M_ZERO);
  897         sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
  898             M_NOWAIT | M_ZERO);
  899         if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
  900                 return (ENOMEM);
  901 
  902         for (i = 0; i < npairs; i++) {
  903                 error = vtnet_init_rxq(sc, i);
  904                 if (error)
  905                         return (error);
  906                 error = vtnet_init_txq(sc, i);
  907                 if (error)
  908                         return (error);
  909         }
  910 
  911         vtnet_set_rx_process_limit(sc);
  912         vtnet_setup_queue_sysctl(sc);
  913 
  914         return (0);
  915 }
  916 
  917 static void
  918 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
  919 {
  920 
  921         rxq->vtnrx_sc = NULL;
  922         rxq->vtnrx_id = -1;
  923 
  924 #if defined(INET) || defined(INET6)
  925         tcp_lro_free(&rxq->vtnrx_lro);
  926 #endif
  927 
  928         if (rxq->vtnrx_sg != NULL) {
  929                 sglist_free(rxq->vtnrx_sg);
  930                 rxq->vtnrx_sg = NULL;
  931         }
  932 
  933         if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
  934                 mtx_destroy(&rxq->vtnrx_mtx);
  935 }
  936 
  937 static void
  938 vtnet_destroy_txq(struct vtnet_txq *txq)
  939 {
  940 
  941         txq->vtntx_sc = NULL;
  942         txq->vtntx_id = -1;
  943 
  944         if (txq->vtntx_sg != NULL) {
  945                 sglist_free(txq->vtntx_sg);
  946                 txq->vtntx_sg = NULL;
  947         }
  948 
  949 #ifndef VTNET_LEGACY_TX
  950         if (txq->vtntx_br != NULL) {
  951                 buf_ring_free(txq->vtntx_br, M_DEVBUF);
  952                 txq->vtntx_br = NULL;
  953         }
  954 #endif
  955 
  956         if (mtx_initialized(&txq->vtntx_mtx) != 0)
  957                 mtx_destroy(&txq->vtntx_mtx);
  958 }
  959 
  960 static void
  961 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
  962 {
  963         int i;
  964 
  965         if (sc->vtnet_rxqs != NULL) {
  966                 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
  967                         vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
  968                 free(sc->vtnet_rxqs, M_DEVBUF);
  969                 sc->vtnet_rxqs = NULL;
  970         }
  971 
  972         if (sc->vtnet_txqs != NULL) {
  973                 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
  974                         vtnet_destroy_txq(&sc->vtnet_txqs[i]);
  975                 free(sc->vtnet_txqs, M_DEVBUF);
  976                 sc->vtnet_txqs = NULL;
  977         }
  978 }
  979 
  980 static int
  981 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
  982 {
  983 
  984         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
  985                 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
  986                     M_DEVBUF, M_NOWAIT | M_ZERO);
  987                 if (sc->vtnet_mac_filter == NULL)
  988                         return (ENOMEM);
  989         }
  990 
  991         if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
  992                 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
  993                     VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
  994                 if (sc->vtnet_vlan_filter == NULL)
  995                         return (ENOMEM);
  996         }
  997 
  998         return (0);
  999 }
 1000 
 1001 static void
 1002 vtnet_free_rx_filters(struct vtnet_softc *sc)
 1003 {
 1004 
 1005         if (sc->vtnet_mac_filter != NULL) {
 1006                 free(sc->vtnet_mac_filter, M_DEVBUF);
 1007                 sc->vtnet_mac_filter = NULL;
 1008         }
 1009 
 1010         if (sc->vtnet_vlan_filter != NULL) {
 1011                 free(sc->vtnet_vlan_filter, M_DEVBUF);
 1012                 sc->vtnet_vlan_filter = NULL;
 1013         }
 1014 }
 1015 
 1016 static int
 1017 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
 1018 {
 1019         device_t dev;
 1020         struct vq_alloc_info *info;
 1021         struct vtnet_rxq *rxq;
 1022         struct vtnet_txq *txq;
 1023         int i, idx, flags, nvqs, error;
 1024 
 1025         dev = sc->vtnet_dev;
 1026         flags = 0;
 1027 
 1028         nvqs = sc->vtnet_max_vq_pairs * 2;
 1029         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
 1030                 nvqs++;
 1031 
 1032         info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
 1033         if (info == NULL)
 1034                 return (ENOMEM);
 1035 
 1036         for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) {
 1037                 rxq = &sc->vtnet_rxqs[i];
 1038                 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
 1039                     vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
 1040                     "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
 1041 
 1042                 txq = &sc->vtnet_txqs[i];
 1043                 VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
 1044                     vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
 1045                     "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
 1046         }
 1047 
 1048         /* These queues will not be used so allocate the minimum resources. */
 1049         for (/**/; i < sc->vtnet_max_vq_pairs; i++, idx += 2) {
 1050                 rxq = &sc->vtnet_rxqs[i];
 1051                 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq,
 1052                     "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
 1053 
 1054                 txq = &sc->vtnet_txqs[i];
 1055                 VQ_ALLOC_INFO_INIT(&info[idx+1], 0, NULL, txq, &txq->vtntx_vq,
 1056                     "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
 1057         }
 1058 
 1059         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
 1060                 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
 1061                     &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
 1062         }
 1063 
 1064         /*
 1065          * TODO: Enable interrupt binding if this is multiqueue. This will
 1066          * only matter when per-virtqueue MSIX is available.
 1067          */
 1068         if (sc->vtnet_flags & VTNET_FLAG_MQ)
 1069                 flags |= 0;
 1070 
 1071         error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
 1072         free(info, M_TEMP);
 1073 
 1074         return (error);
 1075 }
 1076 
 1077 static int
 1078 vtnet_alloc_interface(struct vtnet_softc *sc)
 1079 {
 1080         device_t dev;
 1081         if_t ifp;
 1082 
 1083         dev = sc->vtnet_dev;
 1084 
 1085         ifp = if_alloc(IFT_ETHER);
 1086         if (ifp == NULL)
 1087                 return (ENOMEM);
 1088 
 1089         sc->vtnet_ifp = ifp;
 1090         if_setsoftc(ifp, sc);
 1091         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 1092 
 1093         return (0);
 1094 }
 1095 
 1096 static int
 1097 vtnet_setup_interface(struct vtnet_softc *sc)
 1098 {
 1099         device_t dev;
 1100         struct pfil_head_args pa;
 1101         if_t ifp;
 1102 
 1103         dev = sc->vtnet_dev;
 1104         ifp = sc->vtnet_ifp;
 1105 
 1106         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
 1107             IFF_KNOWSEPOCH);
 1108         if_setbaudrate(ifp, IF_Gbps(10));
 1109         if_setinitfn(ifp, vtnet_init);
 1110         if_setioctlfn(ifp, vtnet_ioctl);
 1111         if_setgetcounterfn(ifp, vtnet_get_counter);
 1112 #ifndef VTNET_LEGACY_TX
 1113         if_settransmitfn(ifp, vtnet_txq_mq_start);
 1114         if_setqflushfn(ifp, vtnet_qflush);
 1115 #else
 1116         struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
 1117         if_setstartfn(ifp, vtnet_start);
 1118         if_setsendqlen(ifp, virtqueue_size(vq) - 1);
 1119         if_setsendqready(ifp);
 1120 #endif
 1121 
 1122         vtnet_get_macaddr(sc);
 1123 
 1124         if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
 1125                 if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
 1126 
 1127         ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts);
 1128         ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 1129         ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO);
 1130 
 1131         if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
 1132                 int gso;
 1133 
 1134                 if_setcapabilitiesbit(ifp, IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6, 0);
 1135 
 1136                 gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO);
 1137                 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
 1138                         if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
 1139                 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
 1140                         if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
 1141                 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
 1142                         sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
 1143 
 1144                 if (if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) {
 1145                         int tso_maxlen;
 1146 
 1147                         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0);
 1148 
 1149                         tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen",
 1150                             vtnet_tso_maxlen);
 1151                         if_sethwtsomax(ifp, tso_maxlen -
 1152                             (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
 1153                         if_sethwtsomaxsegcount(ifp, sc->vtnet_tx_nsegs - 1);
 1154                         if_sethwtsomaxsegsize(ifp, PAGE_SIZE);
 1155                 }
 1156         }
 1157 
 1158         if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
 1159                 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0);
 1160 #ifdef notyet
 1161                 /* BMV: Rx checksums not distinguished between IPv4 and IPv6. */
 1162                 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0);
 1163 #endif
 1164 
 1165                 if (vtnet_tunable_int(sc, "fixup_needs_csum",
 1166                     vtnet_fixup_needs_csum) != 0)
 1167                         sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
 1168 
 1169                 /* Support either "hardware" or software LRO. */
 1170                 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
 1171         }
 1172 
 1173         if (if_getcapabilities(ifp) & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) {
 1174                 /*
 1175                  * VirtIO does not support VLAN tagging, but we can fake
 1176                  * it by inserting and removing the 802.1Q header during
 1177                  * transmit and receive. We are then able to do checksum
 1178                  * offloading of VLAN frames.
 1179                  */
 1180                 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0);
 1181         }
 1182 
 1183         if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
 1184                 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
 1185         if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
 1186 
 1187         /*
 1188          * Capabilities after here are not enabled by default.
 1189          */
 1190         if_setcapenable(ifp, if_getcapabilities(ifp));
 1191 
 1192         if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
 1193                 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
 1194 
 1195                 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 1196                     vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
 1197                 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 1198                     vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 1199         }
 1200 
 1201         ether_ifattach(ifp, sc->vtnet_hwaddr);
 1202 
 1203         /* Tell the upper layer(s) we support long frames. */
 1204         if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
 1205 
 1206         DEBUGNET_SET(ifp, vtnet);
 1207 
 1208         pa.pa_version = PFIL_VERSION;
 1209         pa.pa_flags = PFIL_IN;
 1210         pa.pa_type = PFIL_TYPE_ETHERNET;
 1211         pa.pa_headname = if_name(ifp);
 1212         sc->vtnet_pfil = pfil_head_register(&pa);
 1213 
 1214         return (0);
 1215 }
 1216 
 1217 static int
 1218 vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
 1219 {
 1220         int framesz;
 1221 
 1222         if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
 1223                 return (MJUMPAGESIZE);
 1224         else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
 1225                 return (MCLBYTES);
 1226 
 1227         /*
 1228          * Try to scale the receive mbuf cluster size from the MTU. We
 1229          * could also use the VQ size to influence the selected size,
 1230          * but that would only matter for very small queues.
 1231          */
 1232         if (vtnet_modern(sc)) {
 1233                 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
 1234                 framesz = sizeof(struct virtio_net_hdr_v1);
 1235         } else
 1236                 framesz = sizeof(struct vtnet_rx_header);
 1237         framesz += sizeof(struct ether_vlan_header) + mtu;
 1238 
 1239         if (framesz <= MCLBYTES)
 1240                 return (MCLBYTES);
 1241         else if (framesz <= MJUMPAGESIZE)
 1242                 return (MJUMPAGESIZE);
 1243         else if (framesz <= MJUM9BYTES)
 1244                 return (MJUM9BYTES);
 1245 
 1246         /* Sane default; avoid 16KB clusters. */
 1247         return (MCLBYTES);
 1248 }
 1249 
 1250 static int
 1251 vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu)
 1252 {
 1253         if_t ifp;
 1254         int clustersz;
 1255 
 1256         ifp = sc->vtnet_ifp;
 1257         VTNET_CORE_LOCK_ASSERT(sc);
 1258 
 1259         if (if_getmtu(ifp) == mtu)
 1260                 return (0);
 1261         else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu)
 1262                 return (EINVAL);
 1263 
 1264         if_setmtu(ifp, mtu);
 1265         clustersz = vtnet_rx_cluster_size(sc, mtu);
 1266 
 1267         if (clustersz != sc->vtnet_rx_clustersz &&
 1268             if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 1269                 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 1270                 vtnet_init_locked(sc, 0);
 1271         }
 1272 
 1273         return (0);
 1274 }
 1275 
 1276 static int
 1277 vtnet_ioctl_ifflags(struct vtnet_softc *sc)
 1278 {
 1279         if_t ifp;
 1280         int drv_running;
 1281 
 1282         ifp = sc->vtnet_ifp;
 1283         drv_running = (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0;
 1284 
 1285         VTNET_CORE_LOCK_ASSERT(sc);
 1286 
 1287         if ((if_getflags(ifp) & IFF_UP) == 0) {
 1288                 if (drv_running)
 1289                         vtnet_stop(sc);
 1290                 goto out;
 1291         }
 1292 
 1293         if (!drv_running) {
 1294                 vtnet_init_locked(sc, 0);
 1295                 goto out;
 1296         }
 1297 
 1298         if ((if_getflags(ifp) ^ sc->vtnet_if_flags) &
 1299             (IFF_PROMISC | IFF_ALLMULTI)) {
 1300                 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
 1301                         vtnet_rx_filter(sc);
 1302                 else {
 1303                         if ((if_getflags(ifp) ^ sc->vtnet_if_flags) & IFF_ALLMULTI)
 1304                                 return (ENOTSUP);
 1305                         if_setflagbits(ifp, IFF_PROMISC, 0);
 1306                 }
 1307         }
 1308 
 1309 out:
 1310         sc->vtnet_if_flags = if_getflags(ifp);
 1311         return (0);
 1312 }
 1313 
 1314 static int
 1315 vtnet_ioctl_multi(struct vtnet_softc *sc)
 1316 {
 1317         if_t ifp;
 1318 
 1319         ifp = sc->vtnet_ifp;
 1320 
 1321         VTNET_CORE_LOCK_ASSERT(sc);
 1322 
 1323         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
 1324             if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 1325                 vtnet_rx_filter_mac(sc);
 1326 
 1327         return (0);
 1328 }
 1329 
 1330 static int
 1331 vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr)
 1332 {
 1333         if_t ifp;
 1334         int mask, reinit, update;
 1335 
 1336         ifp = sc->vtnet_ifp;
 1337         mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ if_getcapenable(ifp);
 1338         reinit = update = 0;
 1339 
 1340         VTNET_CORE_LOCK_ASSERT(sc);
 1341 
 1342         if (mask & IFCAP_TXCSUM)
 1343                 if_togglecapenable(ifp, IFCAP_TXCSUM);
 1344         if (mask & IFCAP_TXCSUM_IPV6)
 1345                 if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
 1346         if (mask & IFCAP_TSO4)
 1347                 if_togglecapenable(ifp, IFCAP_TSO4);
 1348         if (mask & IFCAP_TSO6)
 1349                 if_togglecapenable(ifp, IFCAP_TSO6);
 1350 
 1351         if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) {
 1352                 /*
 1353                  * These Rx features require the negotiated features to
 1354                  * be updated. Avoid a full reinit if possible.
 1355                  */
 1356                 if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
 1357                         update = 1;
 1358                 else
 1359                         reinit = 1;
 1360 
 1361                 /* BMV: Avoid needless renegotiation for just software LRO. */
 1362                 if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) ==
 1363                     IFCAP_LRO && vtnet_software_lro(sc))
 1364                         reinit = update = 0;
 1365 
 1366                 if (mask & IFCAP_RXCSUM)
 1367                         if_togglecapenable(ifp, IFCAP_RXCSUM);
 1368                 if (mask & IFCAP_RXCSUM_IPV6)
 1369                         if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
 1370                 if (mask & IFCAP_LRO)
 1371                         if_togglecapenable(ifp, IFCAP_LRO);
 1372 
 1373                 /*
 1374                  * VirtIO does not distinguish between IPv4 and IPv6 checksums
 1375                  * so treat them as a pair. Guest TSO (LRO) requires receive
 1376                  * checksums.
 1377                  */
 1378                 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
 1379                         if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
 1380 #ifdef notyet
 1381                         if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
 1382 #endif
 1383                 } else
 1384                         if_setcapenablebit(ifp, 0,
 1385                             (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO));
 1386         }
 1387 
 1388         if (mask & IFCAP_VLAN_HWFILTER) {
 1389                 /* These Rx features require renegotiation. */
 1390                 reinit = 1;
 1391 
 1392                 if (mask & IFCAP_VLAN_HWFILTER)
 1393                         if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
 1394         }
 1395 
 1396         if (mask & IFCAP_VLAN_HWTSO)
 1397                 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
 1398         if (mask & IFCAP_VLAN_HWTAGGING)
 1399                 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
 1400 
 1401         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 1402                 if (reinit) {
 1403                         if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 1404                         vtnet_init_locked(sc, 0);
 1405                 } else if (update)
 1406                         vtnet_update_rx_offloads(sc);
 1407         }
 1408 
 1409         return (0);
 1410 }
 1411 
 1412 static int
 1413 vtnet_ioctl(if_t ifp, u_long cmd, caddr_t data)
 1414 {
 1415         struct vtnet_softc *sc;
 1416         struct ifreq *ifr;
 1417         int error;
 1418 
 1419         sc = if_getsoftc(ifp);
 1420         ifr = (struct ifreq *) data;
 1421         error = 0;
 1422 
 1423         switch (cmd) {
 1424         case SIOCSIFMTU:
 1425                 VTNET_CORE_LOCK(sc);
 1426                 error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
 1427                 VTNET_CORE_UNLOCK(sc);
 1428                 break;
 1429 
 1430         case SIOCSIFFLAGS:
 1431                 VTNET_CORE_LOCK(sc);
 1432                 error = vtnet_ioctl_ifflags(sc);
 1433                 VTNET_CORE_UNLOCK(sc);
 1434                 break;
 1435 
 1436         case SIOCADDMULTI:
 1437         case SIOCDELMULTI:
 1438                 VTNET_CORE_LOCK(sc);
 1439                 error = vtnet_ioctl_multi(sc);
 1440                 VTNET_CORE_UNLOCK(sc);
 1441                 break;
 1442 
 1443         case SIOCSIFMEDIA:
 1444         case SIOCGIFMEDIA:
 1445                 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
 1446                 break;
 1447 
 1448         case SIOCSIFCAP:
 1449                 VTNET_CORE_LOCK(sc);
 1450                 error = vtnet_ioctl_ifcap(sc, ifr);
 1451                 VTNET_CORE_UNLOCK(sc);
 1452                 VLAN_CAPABILITIES(ifp);
 1453                 break;
 1454 
 1455         default:
 1456                 error = ether_ioctl(ifp, cmd, data);
 1457                 break;
 1458         }
 1459 
 1460         VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
 1461 
 1462         return (error);
 1463 }
 1464 
 1465 static int
 1466 vtnet_rxq_populate(struct vtnet_rxq *rxq)
 1467 {
 1468         struct virtqueue *vq;
 1469         int nbufs, error;
 1470 
 1471 #ifdef DEV_NETMAP
 1472         error = vtnet_netmap_rxq_populate(rxq);
 1473         if (error >= 0)
 1474                 return (error);
 1475 #endif  /* DEV_NETMAP */
 1476 
 1477         vq = rxq->vtnrx_vq;
 1478         error = ENOSPC;
 1479 
 1480         for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
 1481                 error = vtnet_rxq_new_buf(rxq);
 1482                 if (error)
 1483                         break;
 1484         }
 1485 
 1486         if (nbufs > 0) {
 1487                 virtqueue_notify(vq);
 1488                 /*
 1489                  * EMSGSIZE signifies the virtqueue did not have enough
 1490                  * entries available to hold the last mbuf. This is not
 1491                  * an error.
 1492                  */
 1493                 if (error == EMSGSIZE)
 1494                         error = 0;
 1495         }
 1496 
 1497         return (error);
 1498 }
 1499 
 1500 static void
 1501 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
 1502 {
 1503         struct virtqueue *vq;
 1504         struct mbuf *m;
 1505         int last;
 1506 #ifdef DEV_NETMAP
 1507         struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
 1508                                                         rxq->vtnrx_id, NR_RX);
 1509 #else  /* !DEV_NETMAP */
 1510         void *kring = NULL;
 1511 #endif /* !DEV_NETMAP */
 1512 
 1513         vq = rxq->vtnrx_vq;
 1514         last = 0;
 1515 
 1516         while ((m = virtqueue_drain(vq, &last)) != NULL) {
 1517                 if (kring == NULL)
 1518                         m_freem(m);
 1519         }
 1520 
 1521         KASSERT(virtqueue_empty(vq),
 1522             ("%s: mbufs remaining in rx queue %p", __func__, rxq));
 1523 }
 1524 
 1525 static struct mbuf *
 1526 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
 1527 {
 1528         struct mbuf *m_head, *m_tail, *m;
 1529         int i, size;
 1530 
 1531         m_head = NULL;
 1532         size = sc->vtnet_rx_clustersz;
 1533 
 1534         KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
 1535             ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
 1536 
 1537         for (i = 0; i < nbufs; i++) {
 1538                 m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
 1539                 if (m == NULL) {
 1540                         sc->vtnet_stats.mbuf_alloc_failed++;
 1541                         m_freem(m_head);
 1542                         return (NULL);
 1543                 }
 1544 
 1545                 m->m_len = size;
 1546                 if (m_head != NULL) {
 1547                         m_tail->m_next = m;
 1548                         m_tail = m;
 1549                 } else
 1550                         m_head = m_tail = m;
 1551         }
 1552 
 1553         if (m_tailp != NULL)
 1554                 *m_tailp = m_tail;
 1555 
 1556         return (m_head);
 1557 }
 1558 
 1559 /*
 1560  * Slow path for when LRO without mergeable buffers is negotiated.
 1561  */
 1562 static int
 1563 vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
 1564     int len0)
 1565 {
 1566         struct vtnet_softc *sc;
 1567         struct mbuf *m, *m_prev, *m_new, *m_tail;
 1568         int len, clustersz, nreplace, error;
 1569 
 1570         sc = rxq->vtnrx_sc;
 1571         clustersz = sc->vtnet_rx_clustersz;
 1572 
 1573         m_prev = NULL;
 1574         m_tail = NULL;
 1575         nreplace = 0;
 1576 
 1577         m = m0;
 1578         len = len0;
 1579 
 1580         /*
 1581          * Since these mbuf chains are so large, avoid allocating a complete
 1582          * replacement when the received frame did not consume the entire
 1583          * chain. Unused mbufs are moved to the tail of the replacement mbuf.
 1584          */
 1585         while (len > 0) {
 1586                 if (m == NULL) {
 1587                         sc->vtnet_stats.rx_frame_too_large++;
 1588                         return (EMSGSIZE);
 1589                 }
 1590 
 1591                 /*
 1592                  * Every mbuf should have the expected cluster size since that
 1593                  * is also used to allocate the replacements.
 1594                  */
 1595                 KASSERT(m->m_len == clustersz,
 1596                     ("%s: mbuf size %d not expected cluster size %d", __func__,
 1597                     m->m_len, clustersz));
 1598 
 1599                 m->m_len = MIN(m->m_len, len);
 1600                 len -= m->m_len;
 1601 
 1602                 m_prev = m;
 1603                 m = m->m_next;
 1604                 nreplace++;
 1605         }
 1606 
 1607         KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
 1608             ("%s: invalid replacement mbuf count %d max %d", __func__,
 1609             nreplace, sc->vtnet_rx_nmbufs));
 1610 
 1611         m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
 1612         if (m_new == NULL) {
 1613                 m_prev->m_len = clustersz;
 1614                 return (ENOBUFS);
 1615         }
 1616 
 1617         /*
 1618          * Move any unused mbufs from the received mbuf chain onto the
 1619          * end of the replacement chain.
 1620          */
 1621         if (m_prev->m_next != NULL) {
 1622                 m_tail->m_next = m_prev->m_next;
 1623                 m_prev->m_next = NULL;
 1624         }
 1625 
 1626         error = vtnet_rxq_enqueue_buf(rxq, m_new);
 1627         if (error) {
 1628                 /*
 1629                  * The replacement is suppose to be an copy of the one
 1630                  * dequeued so this is a very unexpected error.
 1631                  *
 1632                  * Restore the m0 chain to the original state if it was
 1633                  * modified so we can then discard it.
 1634                  */
 1635                 if (m_tail->m_next != NULL) {
 1636                         m_prev->m_next = m_tail->m_next;
 1637                         m_tail->m_next = NULL;
 1638                 }
 1639                 m_prev->m_len = clustersz;
 1640                 sc->vtnet_stats.rx_enq_replacement_failed++;
 1641                 m_freem(m_new);
 1642         }
 1643 
 1644         return (error);
 1645 }
 1646 
 1647 static int
 1648 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
 1649 {
 1650         struct vtnet_softc *sc;
 1651         struct mbuf *m_new;
 1652         int error;
 1653 
 1654         sc = rxq->vtnrx_sc;
 1655 
 1656         if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
 1657                 return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
 1658 
 1659         MPASS(m->m_next == NULL);
 1660         if (m->m_len < len)
 1661                 return (EMSGSIZE);
 1662 
 1663         m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
 1664         if (m_new == NULL)
 1665                 return (ENOBUFS);
 1666 
 1667         error = vtnet_rxq_enqueue_buf(rxq, m_new);
 1668         if (error) {
 1669                 sc->vtnet_stats.rx_enq_replacement_failed++;
 1670                 m_freem(m_new);
 1671         } else
 1672                 m->m_len = len;
 1673 
 1674         return (error);
 1675 }
 1676 
 1677 static int
 1678 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
 1679 {
 1680         struct vtnet_softc *sc;
 1681         struct sglist *sg;
 1682         int header_inlined, error;
 1683 
 1684         sc = rxq->vtnrx_sc;
 1685         sg = rxq->vtnrx_sg;
 1686 
 1687         KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
 1688             ("%s: mbuf chain without LRO_NOMRG", __func__));
 1689         VTNET_RXQ_LOCK_ASSERT(rxq);
 1690 
 1691         sglist_reset(sg);
 1692         header_inlined = vtnet_modern(sc) ||
 1693             (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
 1694 
 1695         if (header_inlined)
 1696                 error = sglist_append_mbuf(sg, m);
 1697         else {
 1698                 struct vtnet_rx_header *rxhdr =
 1699                     mtod(m, struct vtnet_rx_header *);
 1700                 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
 1701 
 1702                 /* Append the header and remaining mbuf data. */
 1703                 error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
 1704                 if (error)
 1705                         return (error);
 1706                 error = sglist_append(sg, &rxhdr[1],
 1707                     m->m_len - sizeof(struct vtnet_rx_header));
 1708                 if (error)
 1709                         return (error);
 1710 
 1711                 if (m->m_next != NULL)
 1712                         error = sglist_append_mbuf(sg, m->m_next);
 1713         }
 1714 
 1715         if (error)
 1716                 return (error);
 1717 
 1718         return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg));
 1719 }
 1720 
 1721 static int
 1722 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
 1723 {
 1724         struct vtnet_softc *sc;
 1725         struct mbuf *m;
 1726         int error;
 1727 
 1728         sc = rxq->vtnrx_sc;
 1729 
 1730         m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
 1731         if (m == NULL)
 1732                 return (ENOBUFS);
 1733 
 1734         error = vtnet_rxq_enqueue_buf(rxq, m);
 1735         if (error)
 1736                 m_freem(m);
 1737 
 1738         return (error);
 1739 }
 1740 
 1741 static int
 1742 vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
 1743     int hoff, struct virtio_net_hdr *hdr)
 1744 {
 1745         struct vtnet_softc *sc;
 1746         int error;
 1747 
 1748         sc = rxq->vtnrx_sc;
 1749 
 1750         /*
 1751          * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
 1752          * not have an analogous CSUM flag. The checksum has been validated,
 1753          * but is incomplete (TCP/UDP pseudo header).
 1754          *
 1755          * The packet is likely from another VM on the same host that itself
 1756          * performed checksum offloading so Tx/Rx is basically a memcpy and
 1757          * the checksum has little value.
 1758          *
 1759          * Default to receiving the packet as-is for performance reasons, but
 1760          * this can cause issues if the packet is to be forwarded because it
 1761          * does not contain a valid checksum. This patch may be helpful:
 1762          * https://reviews.freebsd.org/D6611. In the meantime, have the driver
 1763          * compute the checksum if requested.
 1764          *
 1765          * BMV: Need to add an CSUM_PARTIAL flag?
 1766          */
 1767         if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
 1768                 error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
 1769                 return (error);
 1770         }
 1771 
 1772         /*
 1773          * Compute the checksum in the driver so the packet will contain a
 1774          * valid checksum. The checksum is at csum_offset from csum_start.
 1775          */
 1776         switch (etype) {
 1777 #if defined(INET) || defined(INET6)
 1778         case ETHERTYPE_IP:
 1779         case ETHERTYPE_IPV6: {
 1780                 int csum_off, csum_end;
 1781                 uint16_t csum;
 1782 
 1783                 csum_off = hdr->csum_start + hdr->csum_offset;
 1784                 csum_end = csum_off + sizeof(uint16_t);
 1785 
 1786                 /* Assume checksum will be in the first mbuf. */
 1787                 if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
 1788                         return (1);
 1789 
 1790                 /*
 1791                  * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
 1792                  * checksum and write it at the specified offset. We could
 1793                  * try to verify the packet: csum_start should probably
 1794                  * correspond to the start of the TCP/UDP header.
 1795                  *
 1796                  * BMV: Need to properly handle UDP with zero checksum. Is
 1797                  * the IPv4 header checksum implicitly validated?
 1798                  */
 1799                 csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
 1800                 *(uint16_t *)(mtodo(m, csum_off)) = csum;
 1801                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 1802                 m->m_pkthdr.csum_data = 0xFFFF;
 1803                 break;
 1804         }
 1805 #endif
 1806         default:
 1807                 sc->vtnet_stats.rx_csum_bad_ethtype++;
 1808                 return (1);
 1809         }
 1810 
 1811         return (0);
 1812 }
 1813 
 1814 static int
 1815 vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
 1816     uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused)
 1817 {
 1818 #if 0
 1819         struct vtnet_softc *sc;
 1820 #endif
 1821         int protocol;
 1822 
 1823 #if 0
 1824         sc = rxq->vtnrx_sc;
 1825 #endif
 1826 
 1827         switch (etype) {
 1828 #if defined(INET)
 1829         case ETHERTYPE_IP:
 1830                 if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
 1831                         protocol = IPPROTO_DONE;
 1832                 else {
 1833                         struct ip *ip = (struct ip *)(m->m_data + hoff);
 1834                         protocol = ip->ip_p;
 1835                 }
 1836                 break;
 1837 #endif
 1838 #if defined(INET6)
 1839         case ETHERTYPE_IPV6:
 1840                 if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
 1841                     || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
 1842                         protocol = IPPROTO_DONE;
 1843                 break;
 1844 #endif
 1845         default:
 1846                 protocol = IPPROTO_DONE;
 1847                 break;
 1848         }
 1849 
 1850         switch (protocol) {
 1851         case IPPROTO_TCP:
 1852         case IPPROTO_UDP:
 1853                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 1854                 m->m_pkthdr.csum_data = 0xFFFF;
 1855                 break;
 1856         default:
 1857                 /*
 1858                  * FreeBSD does not support checksum offloading of this
 1859                  * protocol. Let the stack re-verify the checksum later
 1860                  * if the protocol is supported.
 1861                  */
 1862 #if 0
 1863                 if_printf(sc->vtnet_ifp,
 1864                     "%s: checksum offload of unsupported protocol "
 1865                     "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
 1866                     __func__, etype, protocol, hdr->csum_start,
 1867                     hdr->csum_offset);
 1868 #endif
 1869                 break;
 1870         }
 1871 
 1872         return (0);
 1873 }
 1874 
 1875 static int
 1876 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
 1877     struct virtio_net_hdr *hdr)
 1878 {
 1879         const struct ether_header *eh;
 1880         int hoff;
 1881         uint16_t etype;
 1882 
 1883         eh = mtod(m, const struct ether_header *);
 1884         etype = ntohs(eh->ether_type);
 1885         if (etype == ETHERTYPE_VLAN) {
 1886                 /* TODO BMV: Handle QinQ. */
 1887                 const struct ether_vlan_header *evh =
 1888                     mtod(m, const struct ether_vlan_header *);
 1889                 etype = ntohs(evh->evl_proto);
 1890                 hoff = sizeof(struct ether_vlan_header);
 1891         } else
 1892                 hoff = sizeof(struct ether_header);
 1893 
 1894         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
 1895                 return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
 1896         else /* VIRTIO_NET_HDR_F_DATA_VALID */
 1897                 return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
 1898 }
 1899 
 1900 static void
 1901 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
 1902 {
 1903         struct mbuf *m;
 1904 
 1905         while (--nbufs > 0) {
 1906                 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
 1907                 if (m == NULL)
 1908                         break;
 1909                 vtnet_rxq_discard_buf(rxq, m);
 1910         }
 1911 }
 1912 
 1913 static void
 1914 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
 1915 {
 1916         int error __diagused;
 1917 
 1918         /*
 1919          * Requeue the discarded mbuf. This should always be successful
 1920          * since it was just dequeued.
 1921          */
 1922         error = vtnet_rxq_enqueue_buf(rxq, m);
 1923         KASSERT(error == 0,
 1924             ("%s: cannot requeue discarded mbuf %d", __func__, error));
 1925 }
 1926 
 1927 static int
 1928 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
 1929 {
 1930         struct vtnet_softc *sc;
 1931         struct virtqueue *vq;
 1932         struct mbuf *m_tail;
 1933 
 1934         sc = rxq->vtnrx_sc;
 1935         vq = rxq->vtnrx_vq;
 1936         m_tail = m_head;
 1937 
 1938         while (--nbufs > 0) {
 1939                 struct mbuf *m;
 1940                 uint32_t len;
 1941 
 1942                 m = virtqueue_dequeue(vq, &len);
 1943                 if (m == NULL) {
 1944                         rxq->vtnrx_stats.vrxs_ierrors++;
 1945                         goto fail;
 1946                 }
 1947 
 1948                 if (vtnet_rxq_new_buf(rxq) != 0) {
 1949                         rxq->vtnrx_stats.vrxs_iqdrops++;
 1950                         vtnet_rxq_discard_buf(rxq, m);
 1951                         if (nbufs > 1)
 1952                                 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 1953                         goto fail;
 1954                 }
 1955 
 1956                 if (m->m_len < len)
 1957                         len = m->m_len;
 1958 
 1959                 m->m_len = len;
 1960                 m->m_flags &= ~M_PKTHDR;
 1961 
 1962                 m_head->m_pkthdr.len += len;
 1963                 m_tail->m_next = m;
 1964                 m_tail = m;
 1965         }
 1966 
 1967         return (0);
 1968 
 1969 fail:
 1970         sc->vtnet_stats.rx_mergeable_failed++;
 1971         m_freem(m_head);
 1972 
 1973         return (1);
 1974 }
 1975 
 1976 #if defined(INET) || defined(INET6)
 1977 static int
 1978 vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m)
 1979 {
 1980         struct lro_ctrl *lro;
 1981 
 1982         lro = &rxq->vtnrx_lro;
 1983 
 1984         if (lro->lro_mbuf_max != 0) {
 1985                 tcp_lro_queue_mbuf(lro, m);
 1986                 return (0);
 1987         }
 1988 
 1989         return (tcp_lro_rx(lro, m, 0));
 1990 }
 1991 #endif
 1992 
 1993 static void
 1994 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
 1995     struct virtio_net_hdr *hdr)
 1996 {
 1997         struct vtnet_softc *sc;
 1998         if_t ifp;
 1999 
 2000         sc = rxq->vtnrx_sc;
 2001         ifp = sc->vtnet_ifp;
 2002 
 2003         if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
 2004                 struct ether_header *eh = mtod(m, struct ether_header *);
 2005                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 2006                         vtnet_vlan_tag_remove(m);
 2007                         /*
 2008                          * With the 802.1Q header removed, update the
 2009                          * checksum starting location accordingly.
 2010                          */
 2011                         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
 2012                                 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
 2013                 }
 2014         }
 2015 
 2016         m->m_pkthdr.flowid = rxq->vtnrx_id;
 2017         M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 2018 
 2019         if (hdr->flags &
 2020             (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
 2021                 if (vtnet_rxq_csum(rxq, m, hdr) == 0)
 2022                         rxq->vtnrx_stats.vrxs_csum++;
 2023                 else
 2024                         rxq->vtnrx_stats.vrxs_csum_failed++;
 2025         }
 2026 
 2027         if (hdr->gso_size != 0) {
 2028                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 2029                 case VIRTIO_NET_HDR_GSO_TCPV4:
 2030                 case VIRTIO_NET_HDR_GSO_TCPV6:
 2031                         m->m_pkthdr.lro_nsegs =
 2032                             howmany(m->m_pkthdr.len, hdr->gso_size);
 2033                         rxq->vtnrx_stats.vrxs_host_lro++;
 2034                         break;
 2035                 }
 2036         }
 2037 
 2038         rxq->vtnrx_stats.vrxs_ipackets++;
 2039         rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
 2040 
 2041 #if defined(INET) || defined(INET6)
 2042         if (vtnet_software_lro(sc) && if_getcapenable(ifp) & IFCAP_LRO) {
 2043                 if (vtnet_lro_rx(rxq, m) == 0)
 2044                         return;
 2045         }
 2046 #endif
 2047 
 2048         if_input(ifp, m);
 2049 }
 2050 
 2051 static int
 2052 vtnet_rxq_eof(struct vtnet_rxq *rxq)
 2053 {
 2054         struct virtio_net_hdr lhdr, *hdr;
 2055         struct vtnet_softc *sc;
 2056         if_t ifp;
 2057         struct virtqueue *vq;
 2058         int deq, count;
 2059 
 2060         sc = rxq->vtnrx_sc;
 2061         vq = rxq->vtnrx_vq;
 2062         ifp = sc->vtnet_ifp;
 2063         deq = 0;
 2064         count = sc->vtnet_rx_process_limit;
 2065 
 2066         VTNET_RXQ_LOCK_ASSERT(rxq);
 2067 
 2068         while (count-- > 0) {
 2069                 struct mbuf *m;
 2070                 uint32_t len, nbufs, adjsz;
 2071 
 2072                 m = virtqueue_dequeue(vq, &len);
 2073                 if (m == NULL)
 2074                         break;
 2075                 deq++;
 2076 
 2077                 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
 2078                         rxq->vtnrx_stats.vrxs_ierrors++;
 2079                         vtnet_rxq_discard_buf(rxq, m);
 2080                         continue;
 2081                 }
 2082 
 2083                 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
 2084                         struct virtio_net_hdr_mrg_rxbuf *mhdr =
 2085                             mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
 2086                         kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED);
 2087                         nbufs = vtnet_htog16(sc, mhdr->num_buffers);
 2088                         adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 2089                 } else if (vtnet_modern(sc)) {
 2090                         nbufs = 1; /* num_buffers is always 1 */
 2091                         adjsz = sizeof(struct virtio_net_hdr_v1);
 2092                 } else {
 2093                         nbufs = 1;
 2094                         adjsz = sizeof(struct vtnet_rx_header);
 2095                         /*
 2096                          * Account for our gap between the header and start of
 2097                          * data to keep the segments separated.
 2098                          */
 2099                         len += VTNET_RX_HEADER_PAD;
 2100                 }
 2101 
 2102                 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
 2103                         rxq->vtnrx_stats.vrxs_iqdrops++;
 2104                         vtnet_rxq_discard_buf(rxq, m);
 2105                         if (nbufs > 1)
 2106                                 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 2107                         continue;
 2108                 }
 2109 
 2110                 m->m_pkthdr.len = len;
 2111                 m->m_pkthdr.rcvif = ifp;
 2112                 m->m_pkthdr.csum_flags = 0;
 2113 
 2114                 if (nbufs > 1) {
 2115                         /* Dequeue the rest of chain. */
 2116                         if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
 2117                                 continue;
 2118                 }
 2119 
 2120                 kmsan_mark_mbuf(m, KMSAN_STATE_INITED);
 2121 
 2122                 /*
 2123                  * Save an endian swapped version of the header prior to it
 2124                  * being stripped. The header is always at the start of the
 2125                  * mbuf data. num_buffers was already saved (and not needed)
 2126                  * so use the standard header.
 2127                  */
 2128                 hdr = mtod(m, struct virtio_net_hdr *);
 2129                 lhdr.flags = hdr->flags;
 2130                 lhdr.gso_type = hdr->gso_type;
 2131                 lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len);
 2132                 lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size);
 2133                 lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start);
 2134                 lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset);
 2135                 m_adj(m, adjsz);
 2136 
 2137                 if (PFIL_HOOKED_IN(sc->vtnet_pfil)) {
 2138                         pfil_return_t pfil;
 2139 
 2140                         pfil = pfil_run_hooks(sc->vtnet_pfil, &m, ifp, PFIL_IN,
 2141                             NULL);
 2142                         switch (pfil) {
 2143                         case PFIL_REALLOCED:
 2144                                 m = pfil_mem2mbuf(m->m_data);
 2145                                 break;
 2146                         case PFIL_DROPPED:
 2147                         case PFIL_CONSUMED:
 2148                                 continue;
 2149                         default:
 2150                                 KASSERT(pfil == PFIL_PASS,
 2151                                     ("Filter returned %d!", pfil));
 2152                         }
 2153                 }
 2154 
 2155                 vtnet_rxq_input(rxq, m, &lhdr);
 2156         }
 2157 
 2158         if (deq > 0) {
 2159 #if defined(INET) || defined(INET6)
 2160                 if (vtnet_software_lro(sc))
 2161                         tcp_lro_flush_all(&rxq->vtnrx_lro);
 2162 #endif
 2163                 virtqueue_notify(vq);
 2164         }
 2165 
 2166         return (count > 0 ? 0 : EAGAIN);
 2167 }
 2168 
 2169 static void
 2170 vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries)
 2171 {
 2172         struct vtnet_softc *sc;
 2173         if_t ifp;
 2174         u_int more;
 2175 #ifdef DEV_NETMAP
 2176         int nmirq;
 2177 #endif /* DEV_NETMAP */
 2178 
 2179         sc = rxq->vtnrx_sc;
 2180         ifp = sc->vtnet_ifp;
 2181 
 2182         if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
 2183                 /*
 2184                  * Ignore this interrupt. Either this is a spurious interrupt
 2185                  * or multiqueue without per-VQ MSIX so every queue needs to
 2186                  * be polled (a brain dead configuration we could try harder
 2187                  * to avoid).
 2188                  */
 2189                 vtnet_rxq_disable_intr(rxq);
 2190                 return;
 2191         }
 2192 
 2193         VTNET_RXQ_LOCK(rxq);
 2194 
 2195 #ifdef DEV_NETMAP
 2196         /*
 2197          * We call netmap_rx_irq() under lock to prevent concurrent calls.
 2198          * This is not necessary to serialize the access to the RX vq, but
 2199          * rather to avoid races that may happen if this interface is
 2200          * attached to a VALE switch, which would cause received packets
 2201          * to stall in the RX queue (nm_kr_tryget() could find the kring
 2202          * busy when called from netmap_bwrap_intr_notify()).
 2203          */
 2204         nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
 2205         if (nmirq != NM_IRQ_PASS) {
 2206                 VTNET_RXQ_UNLOCK(rxq);
 2207                 if (nmirq == NM_IRQ_RESCHED) {
 2208                         taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 2209                 }
 2210                 return;
 2211         }
 2212 #endif /* DEV_NETMAP */
 2213 
 2214 again:
 2215         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
 2216                 VTNET_RXQ_UNLOCK(rxq);
 2217                 return;
 2218         }
 2219 
 2220         more = vtnet_rxq_eof(rxq);
 2221         if (more || vtnet_rxq_enable_intr(rxq) != 0) {
 2222                 if (!more)
 2223                         vtnet_rxq_disable_intr(rxq);
 2224                 /*
 2225                  * This is an occasional condition or race (when !more),
 2226                  * so retry a few times before scheduling the taskqueue.
 2227                  */
 2228                 if (tries-- > 0)
 2229                         goto again;
 2230 
 2231                 rxq->vtnrx_stats.vrxs_rescheduled++;
 2232                 VTNET_RXQ_UNLOCK(rxq);
 2233                 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 2234         } else
 2235                 VTNET_RXQ_UNLOCK(rxq);
 2236 }
 2237 
 2238 static void
 2239 vtnet_rx_vq_intr(void *xrxq)
 2240 {
 2241         struct vtnet_rxq *rxq;
 2242 
 2243         rxq = xrxq;
 2244         vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES);
 2245 }
 2246 
 2247 static void
 2248 vtnet_rxq_tq_intr(void *xrxq, int pending __unused)
 2249 {
 2250         struct vtnet_rxq *rxq;
 2251 
 2252         rxq = xrxq;
 2253         vtnet_rx_vq_process(rxq, 0);
 2254 }
 2255 
 2256 static int
 2257 vtnet_txq_intr_threshold(struct vtnet_txq *txq)
 2258 {
 2259         struct vtnet_softc *sc;
 2260         int threshold;
 2261 
 2262         sc = txq->vtntx_sc;
 2263 
 2264         /*
 2265          * The Tx interrupt is disabled until the queue free count falls
 2266          * below our threshold. Completed frames are drained from the Tx
 2267          * virtqueue before transmitting new frames and in the watchdog
 2268          * callout, so the frequency of Tx interrupts is greatly reduced,
 2269          * at the cost of not freeing mbufs as quickly as they otherwise
 2270          * would be.
 2271          */
 2272         threshold = virtqueue_size(txq->vtntx_vq) / 4;
 2273 
 2274         /*
 2275          * Without indirect descriptors, leave enough room for the most
 2276          * segments we handle.
 2277          */
 2278         if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
 2279             threshold < sc->vtnet_tx_nsegs)
 2280                 threshold = sc->vtnet_tx_nsegs;
 2281 
 2282         return (threshold);
 2283 }
 2284 
 2285 static int
 2286 vtnet_txq_below_threshold(struct vtnet_txq *txq)
 2287 {
 2288         struct virtqueue *vq;
 2289 
 2290         vq = txq->vtntx_vq;
 2291 
 2292         return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold);
 2293 }
 2294 
 2295 static int
 2296 vtnet_txq_notify(struct vtnet_txq *txq)
 2297 {
 2298         struct virtqueue *vq;
 2299 
 2300         vq = txq->vtntx_vq;
 2301 
 2302         txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
 2303         virtqueue_notify(vq);
 2304 
 2305         if (vtnet_txq_enable_intr(txq) == 0)
 2306                 return (0);
 2307 
 2308         /*
 2309          * Drain frames that were completed since last checked. If this
 2310          * causes the queue to go above the threshold, the caller should
 2311          * continue transmitting.
 2312          */
 2313         if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
 2314                 virtqueue_disable_intr(vq);
 2315                 return (1);
 2316         }
 2317 
 2318         return (0);
 2319 }
 2320 
 2321 static void
 2322 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
 2323 {
 2324         struct virtqueue *vq;
 2325         struct vtnet_tx_header *txhdr;
 2326         int last;
 2327 #ifdef DEV_NETMAP
 2328         struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
 2329                                                         txq->vtntx_id, NR_TX);
 2330 #else  /* !DEV_NETMAP */
 2331         void *kring = NULL;
 2332 #endif /* !DEV_NETMAP */
 2333 
 2334         vq = txq->vtntx_vq;
 2335         last = 0;
 2336 
 2337         while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
 2338                 if (kring == NULL) {
 2339                         m_freem(txhdr->vth_mbuf);
 2340                         uma_zfree(vtnet_tx_header_zone, txhdr);
 2341                 }
 2342         }
 2343 
 2344         KASSERT(virtqueue_empty(vq),
 2345             ("%s: mbufs remaining in tx queue %p", __func__, txq));
 2346 }
 2347 
 2348 /*
 2349  * BMV: This can go away once we finally have offsets in the mbuf header.
 2350  */
 2351 static int
 2352 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype,
 2353     int *proto, int *start)
 2354 {
 2355         struct vtnet_softc *sc;
 2356         struct ether_vlan_header *evh;
 2357 #if defined(INET) || defined(INET6)
 2358         int offset;
 2359 #endif
 2360 
 2361         sc = txq->vtntx_sc;
 2362 
 2363         evh = mtod(m, struct ether_vlan_header *);
 2364         if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 2365                 /* BMV: We should handle nested VLAN tags too. */
 2366                 *etype = ntohs(evh->evl_proto);
 2367 #if defined(INET) || defined(INET6)
 2368                 offset = sizeof(struct ether_vlan_header);
 2369 #endif
 2370         } else {
 2371                 *etype = ntohs(evh->evl_encap_proto);
 2372 #if defined(INET) || defined(INET6)
 2373                 offset = sizeof(struct ether_header);
 2374 #endif
 2375         }
 2376 
 2377         switch (*etype) {
 2378 #if defined(INET)
 2379         case ETHERTYPE_IP: {
 2380                 struct ip *ip, iphdr;
 2381                 if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
 2382                         m_copydata(m, offset, sizeof(struct ip),
 2383                             (caddr_t) &iphdr);
 2384                         ip = &iphdr;
 2385                 } else
 2386                         ip = (struct ip *)(m->m_data + offset);
 2387                 *proto = ip->ip_p;
 2388                 *start = offset + (ip->ip_hl << 2);
 2389                 break;
 2390         }
 2391 #endif
 2392 #if defined(INET6)
 2393         case ETHERTYPE_IPV6:
 2394                 *proto = -1;
 2395                 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
 2396                 /* Assert the network stack sent us a valid packet. */
 2397                 KASSERT(*start > offset,
 2398                     ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
 2399                     *start, offset, *proto));
 2400                 break;
 2401 #endif
 2402         default:
 2403                 sc->vtnet_stats.tx_csum_unknown_ethtype++;
 2404                 return (EINVAL);
 2405         }
 2406 
 2407         return (0);
 2408 }
 2409 
 2410 static int
 2411 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
 2412     int offset, struct virtio_net_hdr *hdr)
 2413 {
 2414         static struct timeval lastecn;
 2415         static int curecn;
 2416         struct vtnet_softc *sc;
 2417         struct tcphdr *tcp, tcphdr;
 2418 
 2419         sc = txq->vtntx_sc;
 2420 
 2421         if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
 2422                 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
 2423                 tcp = &tcphdr;
 2424         } else
 2425                 tcp = (struct tcphdr *)(m->m_data + offset);
 2426 
 2427         hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2));
 2428         hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz);
 2429         hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
 2430             VIRTIO_NET_HDR_GSO_TCPV6;
 2431 
 2432         if (__predict_false(tcp->th_flags & TH_CWR)) {
 2433                 /*
 2434                  * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In
 2435                  * FreeBSD, ECN support is not on a per-interface basis,
 2436                  * but globally via the net.inet.tcp.ecn.enable sysctl
 2437                  * knob. The default is off.
 2438                  */
 2439                 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
 2440                         if (ppsratecheck(&lastecn, &curecn, 1))
 2441                                 if_printf(sc->vtnet_ifp,
 2442                                     "TSO with ECN not negotiated with host\n");
 2443                         return (ENOTSUP);
 2444                 }
 2445                 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 2446         }
 2447 
 2448         txq->vtntx_stats.vtxs_tso++;
 2449 
 2450         return (0);
 2451 }
 2452 
 2453 static struct mbuf *
 2454 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
 2455     struct virtio_net_hdr *hdr)
 2456 {
 2457         struct vtnet_softc *sc;
 2458         int flags, etype, csum_start, proto, error;
 2459 
 2460         sc = txq->vtntx_sc;
 2461         flags = m->m_pkthdr.csum_flags;
 2462 
 2463         error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
 2464         if (error)
 2465                 goto drop;
 2466 
 2467         if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) {
 2468                 /* Sanity check the parsed mbuf matches the offload flags. */
 2469                 if (__predict_false((flags & VTNET_CSUM_OFFLOAD &&
 2470                     etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6
 2471                     && etype != ETHERTYPE_IPV6))) {
 2472                         sc->vtnet_stats.tx_csum_proto_mismatch++;
 2473                         goto drop;
 2474                 }
 2475 
 2476                 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
 2477                 hdr->csum_start = vtnet_gtoh16(sc, csum_start);
 2478                 hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
 2479                 txq->vtntx_stats.vtxs_csum++;
 2480         }
 2481 
 2482         if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
 2483                 /*
 2484                  * Sanity check the parsed mbuf IP protocol is TCP, and
 2485                  * VirtIO TSO reqires the checksum offloading above.
 2486                  */
 2487                 if (__predict_false(proto != IPPROTO_TCP)) {
 2488                         sc->vtnet_stats.tx_tso_not_tcp++;
 2489                         goto drop;
 2490                 } else if (__predict_false((hdr->flags &
 2491                     VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) {
 2492                         sc->vtnet_stats.tx_tso_without_csum++;
 2493                         goto drop;
 2494                 }
 2495 
 2496                 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
 2497                 if (error)
 2498                         goto drop;
 2499         }
 2500 
 2501         return (m);
 2502 
 2503 drop:
 2504         m_freem(m);
 2505         return (NULL);
 2506 }
 2507 
 2508 static int
 2509 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
 2510     struct vtnet_tx_header *txhdr)
 2511 {
 2512         struct vtnet_softc *sc;
 2513         struct virtqueue *vq;
 2514         struct sglist *sg;
 2515         struct mbuf *m;
 2516         int error;
 2517 
 2518         sc = txq->vtntx_sc;
 2519         vq = txq->vtntx_vq;
 2520         sg = txq->vtntx_sg;
 2521         m = *m_head;
 2522 
 2523         sglist_reset(sg);
 2524         error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
 2525         if (error != 0 || sg->sg_nseg != 1) {
 2526                 KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
 2527                     __func__, error, sg->sg_nseg));
 2528                 goto fail;
 2529         }
 2530 
 2531         error = sglist_append_mbuf(sg, m);
 2532         if (error) {
 2533                 m = m_defrag(m, M_NOWAIT);
 2534                 if (m == NULL)
 2535                         goto fail;
 2536 
 2537                 *m_head = m;
 2538                 sc->vtnet_stats.tx_defragged++;
 2539 
 2540                 error = sglist_append_mbuf(sg, m);
 2541                 if (error)
 2542                         goto fail;
 2543         }
 2544 
 2545         txhdr->vth_mbuf = m;
 2546         error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
 2547 
 2548         return (error);
 2549 
 2550 fail:
 2551         sc->vtnet_stats.tx_defrag_failed++;
 2552         m_freem(*m_head);
 2553         *m_head = NULL;
 2554 
 2555         return (ENOBUFS);
 2556 }
 2557 
 2558 static int
 2559 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
 2560 {
 2561         struct vtnet_tx_header *txhdr;
 2562         struct virtio_net_hdr *hdr;
 2563         struct mbuf *m;
 2564         int error;
 2565 
 2566         m = *m_head;
 2567         M_ASSERTPKTHDR(m);
 2568 
 2569         txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
 2570         if (txhdr == NULL) {
 2571                 m_freem(m);
 2572                 *m_head = NULL;
 2573                 return (ENOMEM);
 2574         }
 2575 
 2576         /*
 2577          * Always use the non-mergeable header, regardless if mergable headers
 2578          * were negotiated, because for transmit num_buffers is always zero.
 2579          * The vtnet_hdr_size is used to enqueue the right header size segment.
 2580          */
 2581         hdr = &txhdr->vth_uhdr.hdr;
 2582 
 2583         if (m->m_flags & M_VLANTAG) {
 2584                 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
 2585                 if ((*m_head = m) == NULL) {
 2586                         error = ENOBUFS;
 2587                         goto fail;
 2588                 }
 2589                 m->m_flags &= ~M_VLANTAG;
 2590         }
 2591 
 2592         if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
 2593                 m = vtnet_txq_offload(txq, m, hdr);
 2594                 if ((*m_head = m) == NULL) {
 2595                         error = ENOBUFS;
 2596                         goto fail;
 2597                 }
 2598         }
 2599 
 2600         error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
 2601 fail:
 2602         if (error)
 2603                 uma_zfree(vtnet_tx_header_zone, txhdr);
 2604 
 2605         return (error);
 2606 }
 2607 
 2608 #ifdef VTNET_LEGACY_TX
 2609 
 2610 static void
 2611 vtnet_start_locked(struct vtnet_txq *txq, if_t ifp)
 2612 {
 2613         struct vtnet_softc *sc;
 2614         struct virtqueue *vq;
 2615         struct mbuf *m0;
 2616         int tries, enq;
 2617 
 2618         sc = txq->vtntx_sc;
 2619         vq = txq->vtntx_vq;
 2620         tries = 0;
 2621 
 2622         VTNET_TXQ_LOCK_ASSERT(txq);
 2623 
 2624         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
 2625             sc->vtnet_link_active == 0)
 2626                 return;
 2627 
 2628         vtnet_txq_eof(txq);
 2629 
 2630 again:
 2631         enq = 0;
 2632 
 2633         while (!if_sendq_empty(ifp)) {
 2634                 if (virtqueue_full(vq))
 2635                         break;
 2636 
 2637                 m0 = if_dequeue(ifp);
 2638                 if (m0 == NULL)
 2639                         break;
 2640 
 2641                 if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
 2642                         if (m0 != NULL)
 2643                                 if_sendq_prepend(ifp, m0);
 2644                         break;
 2645                 }
 2646 
 2647                 enq++;
 2648                 ETHER_BPF_MTAP(ifp, m0);
 2649         }
 2650 
 2651         if (enq > 0 && vtnet_txq_notify(txq) != 0) {
 2652                 if (tries++ < VTNET_NOTIFY_RETRIES)
 2653                         goto again;
 2654 
 2655                 txq->vtntx_stats.vtxs_rescheduled++;
 2656                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 2657         }
 2658 }
 2659 
 2660 static void
 2661 vtnet_start(if_t ifp)
 2662 {
 2663         struct vtnet_softc *sc;
 2664         struct vtnet_txq *txq;
 2665 
 2666         sc = if_getsoftc(ifp);
 2667         txq = &sc->vtnet_txqs[0];
 2668 
 2669         VTNET_TXQ_LOCK(txq);
 2670         vtnet_start_locked(txq, ifp);
 2671         VTNET_TXQ_UNLOCK(txq);
 2672 }
 2673 
 2674 #else /* !VTNET_LEGACY_TX */
 2675 
 2676 static int
 2677 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
 2678 {
 2679         struct vtnet_softc *sc;
 2680         struct virtqueue *vq;
 2681         struct buf_ring *br;
 2682         if_t ifp;
 2683         int enq, tries, error;
 2684 
 2685         sc = txq->vtntx_sc;
 2686         vq = txq->vtntx_vq;
 2687         br = txq->vtntx_br;
 2688         ifp = sc->vtnet_ifp;
 2689         tries = 0;
 2690         error = 0;
 2691 
 2692         VTNET_TXQ_LOCK_ASSERT(txq);
 2693 
 2694         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
 2695             sc->vtnet_link_active == 0) {
 2696                 if (m != NULL)
 2697                         error = drbr_enqueue(ifp, br, m);
 2698                 return (error);
 2699         }
 2700 
 2701         if (m != NULL) {
 2702                 error = drbr_enqueue(ifp, br, m);
 2703                 if (error)
 2704                         return (error);
 2705         }
 2706 
 2707         vtnet_txq_eof(txq);
 2708 
 2709 again:
 2710         enq = 0;
 2711 
 2712         while ((m = drbr_peek(ifp, br)) != NULL) {
 2713                 if (virtqueue_full(vq)) {
 2714                         drbr_putback(ifp, br, m);
 2715                         break;
 2716                 }
 2717 
 2718                 if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
 2719                         if (m != NULL)
 2720                                 drbr_putback(ifp, br, m);
 2721                         else
 2722                                 drbr_advance(ifp, br);
 2723                         break;
 2724                 }
 2725                 drbr_advance(ifp, br);
 2726 
 2727                 enq++;
 2728                 ETHER_BPF_MTAP(ifp, m);
 2729         }
 2730 
 2731         if (enq > 0 && vtnet_txq_notify(txq) != 0) {
 2732                 if (tries++ < VTNET_NOTIFY_RETRIES)
 2733                         goto again;
 2734 
 2735                 txq->vtntx_stats.vtxs_rescheduled++;
 2736                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 2737         }
 2738 
 2739         return (0);
 2740 }
 2741 
 2742 static int
 2743 vtnet_txq_mq_start(if_t ifp, struct mbuf *m)
 2744 {
 2745         struct vtnet_softc *sc;
 2746         struct vtnet_txq *txq;
 2747         int i, npairs, error;
 2748 
 2749         sc = if_getsoftc(ifp);
 2750         npairs = sc->vtnet_act_vq_pairs;
 2751 
 2752         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 2753                 i = m->m_pkthdr.flowid % npairs;
 2754         else
 2755                 i = curcpu % npairs;
 2756 
 2757         txq = &sc->vtnet_txqs[i];
 2758 
 2759         if (VTNET_TXQ_TRYLOCK(txq) != 0) {
 2760                 error = vtnet_txq_mq_start_locked(txq, m);
 2761                 VTNET_TXQ_UNLOCK(txq);
 2762         } else {
 2763                 error = drbr_enqueue(ifp, txq->vtntx_br, m);
 2764                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
 2765         }
 2766 
 2767         return (error);
 2768 }
 2769 
 2770 static void
 2771 vtnet_txq_tq_deferred(void *xtxq, int pending __unused)
 2772 {
 2773         struct vtnet_softc *sc;
 2774         struct vtnet_txq *txq;
 2775 
 2776         txq = xtxq;
 2777         sc = txq->vtntx_sc;
 2778 
 2779         VTNET_TXQ_LOCK(txq);
 2780         if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
 2781                 vtnet_txq_mq_start_locked(txq, NULL);
 2782         VTNET_TXQ_UNLOCK(txq);
 2783 }
 2784 
 2785 #endif /* VTNET_LEGACY_TX */
 2786 
 2787 static void
 2788 vtnet_txq_start(struct vtnet_txq *txq)
 2789 {
 2790         struct vtnet_softc *sc;
 2791         if_t ifp;
 2792 
 2793         sc = txq->vtntx_sc;
 2794         ifp = sc->vtnet_ifp;
 2795 
 2796 #ifdef VTNET_LEGACY_TX
 2797         if (!if_sendq_empty(ifp))
 2798                 vtnet_start_locked(txq, ifp);
 2799 #else
 2800         if (!drbr_empty(ifp, txq->vtntx_br))
 2801                 vtnet_txq_mq_start_locked(txq, NULL);
 2802 #endif
 2803 }
 2804 
 2805 static void
 2806 vtnet_txq_tq_intr(void *xtxq, int pending __unused)
 2807 {
 2808         struct vtnet_softc *sc;
 2809         struct vtnet_txq *txq;
 2810         if_t ifp;
 2811 
 2812         txq = xtxq;
 2813         sc = txq->vtntx_sc;
 2814         ifp = sc->vtnet_ifp;
 2815 
 2816         VTNET_TXQ_LOCK(txq);
 2817 
 2818         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
 2819                 VTNET_TXQ_UNLOCK(txq);
 2820                 return;
 2821         }
 2822 
 2823         vtnet_txq_eof(txq);
 2824         vtnet_txq_start(txq);
 2825 
 2826         VTNET_TXQ_UNLOCK(txq);
 2827 }
 2828 
 2829 static int
 2830 vtnet_txq_eof(struct vtnet_txq *txq)
 2831 {
 2832         struct virtqueue *vq;
 2833         struct vtnet_tx_header *txhdr;
 2834         struct mbuf *m;
 2835         int deq;
 2836 
 2837         vq = txq->vtntx_vq;
 2838         deq = 0;
 2839         VTNET_TXQ_LOCK_ASSERT(txq);
 2840 
 2841         while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
 2842                 m = txhdr->vth_mbuf;
 2843                 deq++;
 2844 
 2845                 txq->vtntx_stats.vtxs_opackets++;
 2846                 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
 2847                 if (m->m_flags & M_MCAST)
 2848                         txq->vtntx_stats.vtxs_omcasts++;
 2849 
 2850                 m_freem(m);
 2851                 uma_zfree(vtnet_tx_header_zone, txhdr);
 2852         }
 2853 
 2854         if (virtqueue_empty(vq))
 2855                 txq->vtntx_watchdog = 0;
 2856 
 2857         return (deq);
 2858 }
 2859 
 2860 static void
 2861 vtnet_tx_vq_intr(void *xtxq)
 2862 {
 2863         struct vtnet_softc *sc;
 2864         struct vtnet_txq *txq;
 2865         if_t ifp;
 2866 
 2867         txq = xtxq;
 2868         sc = txq->vtntx_sc;
 2869         ifp = sc->vtnet_ifp;
 2870 
 2871         if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
 2872                 /*
 2873                  * Ignore this interrupt. Either this is a spurious interrupt
 2874                  * or multiqueue without per-VQ MSIX so every queue needs to
 2875                  * be polled (a brain dead configuration we could try harder
 2876                  * to avoid).
 2877                  */
 2878                 vtnet_txq_disable_intr(txq);
 2879                 return;
 2880         }
 2881 
 2882 #ifdef DEV_NETMAP
 2883         if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
 2884                 return;
 2885 #endif /* DEV_NETMAP */
 2886 
 2887         VTNET_TXQ_LOCK(txq);
 2888 
 2889         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
 2890                 VTNET_TXQ_UNLOCK(txq);
 2891                 return;
 2892         }
 2893 
 2894         vtnet_txq_eof(txq);
 2895         vtnet_txq_start(txq);
 2896 
 2897         VTNET_TXQ_UNLOCK(txq);
 2898 }
 2899 
 2900 static void
 2901 vtnet_tx_start_all(struct vtnet_softc *sc)
 2902 {
 2903         struct vtnet_txq *txq;
 2904         int i;
 2905 
 2906         VTNET_CORE_LOCK_ASSERT(sc);
 2907 
 2908         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 2909                 txq = &sc->vtnet_txqs[i];
 2910 
 2911                 VTNET_TXQ_LOCK(txq);
 2912                 vtnet_txq_start(txq);
 2913                 VTNET_TXQ_UNLOCK(txq);
 2914         }
 2915 }
 2916 
 2917 #ifndef VTNET_LEGACY_TX
 2918 static void
 2919 vtnet_qflush(if_t ifp)
 2920 {
 2921         struct vtnet_softc *sc;
 2922         struct vtnet_txq *txq;
 2923         struct mbuf *m;
 2924         int i;
 2925 
 2926         sc = if_getsoftc(ifp);
 2927 
 2928         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 2929                 txq = &sc->vtnet_txqs[i];
 2930 
 2931                 VTNET_TXQ_LOCK(txq);
 2932                 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
 2933                         m_freem(m);
 2934                 VTNET_TXQ_UNLOCK(txq);
 2935         }
 2936 
 2937         if_qflush(ifp);
 2938 }
 2939 #endif
 2940 
 2941 static int
 2942 vtnet_watchdog(struct vtnet_txq *txq)
 2943 {
 2944         if_t ifp;
 2945 
 2946         ifp = txq->vtntx_sc->vtnet_ifp;
 2947 
 2948         VTNET_TXQ_LOCK(txq);
 2949         if (txq->vtntx_watchdog == 1) {
 2950                 /*
 2951                  * Only drain completed frames if the watchdog is about to
 2952                  * expire. If any frames were drained, there may be enough
 2953                  * free descriptors now available to transmit queued frames.
 2954                  * In that case, the timer will immediately be decremented
 2955                  * below, but the timeout is generous enough that should not
 2956                  * be a problem.
 2957                  */
 2958                 if (vtnet_txq_eof(txq) != 0)
 2959                         vtnet_txq_start(txq);
 2960         }
 2961 
 2962         if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
 2963                 VTNET_TXQ_UNLOCK(txq);
 2964                 return (0);
 2965         }
 2966         VTNET_TXQ_UNLOCK(txq);
 2967 
 2968         if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
 2969         return (1);
 2970 }
 2971 
 2972 static void
 2973 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
 2974     struct vtnet_txq_stats *txacc)
 2975 {
 2976 
 2977         bzero(rxacc, sizeof(struct vtnet_rxq_stats));
 2978         bzero(txacc, sizeof(struct vtnet_txq_stats));
 2979 
 2980         for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 2981                 struct vtnet_rxq_stats *rxst;
 2982                 struct vtnet_txq_stats *txst;
 2983 
 2984                 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
 2985                 rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
 2986                 rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
 2987                 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
 2988                 rxacc->vrxs_csum += rxst->vrxs_csum;
 2989                 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
 2990                 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
 2991 
 2992                 txst = &sc->vtnet_txqs[i].vtntx_stats;
 2993                 txacc->vtxs_opackets += txst->vtxs_opackets;
 2994                 txacc->vtxs_obytes += txst->vtxs_obytes;
 2995                 txacc->vtxs_csum += txst->vtxs_csum;
 2996                 txacc->vtxs_tso += txst->vtxs_tso;
 2997                 txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
 2998         }
 2999 }
 3000 
 3001 static uint64_t
 3002 vtnet_get_counter(if_t ifp, ift_counter cnt)
 3003 {
 3004         struct vtnet_softc *sc;
 3005         struct vtnet_rxq_stats rxaccum;
 3006         struct vtnet_txq_stats txaccum;
 3007 
 3008         sc = if_getsoftc(ifp);
 3009         vtnet_accum_stats(sc, &rxaccum, &txaccum);
 3010 
 3011         switch (cnt) {
 3012         case IFCOUNTER_IPACKETS:
 3013                 return (rxaccum.vrxs_ipackets);
 3014         case IFCOUNTER_IQDROPS:
 3015                 return (rxaccum.vrxs_iqdrops);
 3016         case IFCOUNTER_IERRORS:
 3017                 return (rxaccum.vrxs_ierrors);
 3018         case IFCOUNTER_OPACKETS:
 3019                 return (txaccum.vtxs_opackets);
 3020 #ifndef VTNET_LEGACY_TX
 3021         case IFCOUNTER_OBYTES:
 3022                 return (txaccum.vtxs_obytes);
 3023         case IFCOUNTER_OMCASTS:
 3024                 return (txaccum.vtxs_omcasts);
 3025 #endif
 3026         default:
 3027                 return (if_get_counter_default(ifp, cnt));
 3028         }
 3029 }
 3030 
 3031 static void
 3032 vtnet_tick(void *xsc)
 3033 {
 3034         struct vtnet_softc *sc;
 3035         if_t ifp;
 3036         int i, timedout;
 3037 
 3038         sc = xsc;
 3039         ifp = sc->vtnet_ifp;
 3040         timedout = 0;
 3041 
 3042         VTNET_CORE_LOCK_ASSERT(sc);
 3043 
 3044         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 3045                 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
 3046 
 3047         if (timedout != 0) {
 3048                 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 3049                 vtnet_init_locked(sc, 0);
 3050         } else
 3051                 callout_schedule(&sc->vtnet_tick_ch, hz);
 3052 }
 3053 
 3054 static void
 3055 vtnet_start_taskqueues(struct vtnet_softc *sc)
 3056 {
 3057         device_t dev;
 3058         struct vtnet_rxq *rxq;
 3059         struct vtnet_txq *txq;
 3060         int i, error;
 3061 
 3062         dev = sc->vtnet_dev;
 3063 
 3064         /*
 3065          * Errors here are very difficult to recover from - we cannot
 3066          * easily fail because, if this is during boot, we will hang
 3067          * when freeing any successfully started taskqueues because
 3068          * the scheduler isn't up yet.
 3069          *
 3070          * Most drivers just ignore the return value - it only fails
 3071          * with ENOMEM so an error is not likely.
 3072          */
 3073         for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
 3074                 rxq = &sc->vtnet_rxqs[i];
 3075                 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
 3076                     "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
 3077                 if (error) {
 3078                         device_printf(dev, "failed to start rx taskq %d\n",
 3079                             rxq->vtnrx_id);
 3080                 }
 3081 
 3082                 txq = &sc->vtnet_txqs[i];
 3083                 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
 3084                     "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
 3085                 if (error) {
 3086                         device_printf(dev, "failed to start tx taskq %d\n",
 3087                             txq->vtntx_id);
 3088                 }
 3089         }
 3090 }
 3091 
 3092 static void
 3093 vtnet_free_taskqueues(struct vtnet_softc *sc)
 3094 {
 3095         struct vtnet_rxq *rxq;
 3096         struct vtnet_txq *txq;
 3097         int i;
 3098 
 3099         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 3100                 rxq = &sc->vtnet_rxqs[i];
 3101                 if (rxq->vtnrx_tq != NULL) {
 3102                         taskqueue_free(rxq->vtnrx_tq);
 3103                         rxq->vtnrx_tq = NULL;
 3104                 }
 3105 
 3106                 txq = &sc->vtnet_txqs[i];
 3107                 if (txq->vtntx_tq != NULL) {
 3108                         taskqueue_free(txq->vtntx_tq);
 3109                         txq->vtntx_tq = NULL;
 3110                 }
 3111         }
 3112 }
 3113 
 3114 static void
 3115 vtnet_drain_taskqueues(struct vtnet_softc *sc)
 3116 {
 3117         struct vtnet_rxq *rxq;
 3118         struct vtnet_txq *txq;
 3119         int i;
 3120 
 3121         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 3122                 rxq = &sc->vtnet_rxqs[i];
 3123                 if (rxq->vtnrx_tq != NULL)
 3124                         taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 3125 
 3126                 txq = &sc->vtnet_txqs[i];
 3127                 if (txq->vtntx_tq != NULL) {
 3128                         taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
 3129 #ifndef VTNET_LEGACY_TX
 3130                         taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
 3131 #endif
 3132                 }
 3133         }
 3134 }
 3135 
 3136 static void
 3137 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
 3138 {
 3139         struct vtnet_rxq *rxq;
 3140         struct vtnet_txq *txq;
 3141         int i;
 3142 
 3143         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 3144                 rxq = &sc->vtnet_rxqs[i];
 3145                 vtnet_rxq_free_mbufs(rxq);
 3146 
 3147                 txq = &sc->vtnet_txqs[i];
 3148                 vtnet_txq_free_mbufs(txq);
 3149         }
 3150 }
 3151 
 3152 static void
 3153 vtnet_stop_rendezvous(struct vtnet_softc *sc)
 3154 {
 3155         struct vtnet_rxq *rxq;
 3156         struct vtnet_txq *txq;
 3157         int i;
 3158 
 3159         VTNET_CORE_LOCK_ASSERT(sc);
 3160 
 3161         /*
 3162          * Lock and unlock the per-queue mutex so we known the stop
 3163          * state is visible. Doing only the active queues should be
 3164          * sufficient, but it does not cost much extra to do all the
 3165          * queues.
 3166          */
 3167         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 3168                 rxq = &sc->vtnet_rxqs[i];
 3169                 VTNET_RXQ_LOCK(rxq);
 3170                 VTNET_RXQ_UNLOCK(rxq);
 3171 
 3172                 txq = &sc->vtnet_txqs[i];
 3173                 VTNET_TXQ_LOCK(txq);
 3174                 VTNET_TXQ_UNLOCK(txq);
 3175         }
 3176 }
 3177 
 3178 static void
 3179 vtnet_stop(struct vtnet_softc *sc)
 3180 {
 3181         device_t dev;
 3182         if_t ifp;
 3183 
 3184         dev = sc->vtnet_dev;
 3185         ifp = sc->vtnet_ifp;
 3186 
 3187         VTNET_CORE_LOCK_ASSERT(sc);
 3188 
 3189         if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 3190         sc->vtnet_link_active = 0;
 3191         callout_stop(&sc->vtnet_tick_ch);
 3192 
 3193         /* Only advisory. */
 3194         vtnet_disable_interrupts(sc);
 3195 
 3196 #ifdef DEV_NETMAP
 3197         /* Stop any pending txsync/rxsync and disable them. */
 3198         netmap_disable_all_rings(ifp);
 3199 #endif /* DEV_NETMAP */
 3200 
 3201         /*
 3202          * Stop the host adapter. This resets it to the pre-initialized
 3203          * state. It will not generate any interrupts until after it is
 3204          * reinitialized.
 3205          */
 3206         virtio_stop(dev);
 3207         vtnet_stop_rendezvous(sc);
 3208 
 3209         vtnet_drain_rxtx_queues(sc);
 3210         sc->vtnet_act_vq_pairs = 1;
 3211 }
 3212 
 3213 static int
 3214 vtnet_virtio_reinit(struct vtnet_softc *sc)
 3215 {
 3216         device_t dev;
 3217         if_t ifp;
 3218         uint64_t features;
 3219         int error;
 3220 
 3221         dev = sc->vtnet_dev;
 3222         ifp = sc->vtnet_ifp;
 3223         features = sc->vtnet_negotiated_features;
 3224 
 3225         /*
 3226          * Re-negotiate with the host, removing any disabled receive
 3227          * features. Transmit features are disabled only on our side
 3228          * via if_capenable and if_hwassist.
 3229          */
 3230 
 3231         if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
 3232                 features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES);
 3233 
 3234         if ((if_getcapenable(ifp) & IFCAP_LRO) == 0)
 3235                 features &= ~VTNET_LRO_FEATURES;
 3236 
 3237         if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0)
 3238                 features &= ~VIRTIO_NET_F_CTRL_VLAN;
 3239 
 3240         error = virtio_reinit(dev, features);
 3241         if (error) {
 3242                 device_printf(dev, "virtio reinit error %d\n", error);
 3243                 return (error);
 3244         }
 3245 
 3246         sc->vtnet_features = features;
 3247         virtio_reinit_complete(dev);
 3248 
 3249         return (0);
 3250 }
 3251 
 3252 static void
 3253 vtnet_init_rx_filters(struct vtnet_softc *sc)
 3254 {
 3255         if_t ifp;
 3256 
 3257         ifp = sc->vtnet_ifp;
 3258 
 3259         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
 3260                 vtnet_rx_filter(sc);
 3261                 vtnet_rx_filter_mac(sc);
 3262         }
 3263 
 3264         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 3265                 vtnet_rx_filter_vlan(sc);
 3266 }
 3267 
 3268 static int
 3269 vtnet_init_rx_queues(struct vtnet_softc *sc)
 3270 {
 3271         device_t dev;
 3272         if_t ifp;
 3273         struct vtnet_rxq *rxq;
 3274         int i, clustersz, error;
 3275 
 3276         dev = sc->vtnet_dev;
 3277         ifp = sc->vtnet_ifp;
 3278 
 3279         clustersz = vtnet_rx_cluster_size(sc, if_getmtu(ifp));
 3280         sc->vtnet_rx_clustersz = clustersz;
 3281 
 3282         if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
 3283                 sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
 3284                     VTNET_MAX_RX_SIZE, clustersz);
 3285                 KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
 3286                     ("%s: too many rx mbufs %d for %d segments", __func__,
 3287                     sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
 3288         } else
 3289                 sc->vtnet_rx_nmbufs = 1;
 3290 
 3291         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 3292                 rxq = &sc->vtnet_rxqs[i];
 3293 
 3294                 /* Hold the lock to satisfy asserts. */
 3295                 VTNET_RXQ_LOCK(rxq);
 3296                 error = vtnet_rxq_populate(rxq);
 3297                 VTNET_RXQ_UNLOCK(rxq);
 3298 
 3299                 if (error) {
 3300                         device_printf(dev, "cannot populate Rx queue %d\n", i);
 3301                         return (error);
 3302                 }
 3303         }
 3304 
 3305         return (0);
 3306 }
 3307 
 3308 static int
 3309 vtnet_init_tx_queues(struct vtnet_softc *sc)
 3310 {
 3311         struct vtnet_txq *txq;
 3312         int i;
 3313 
 3314         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 3315                 txq = &sc->vtnet_txqs[i];
 3316                 txq->vtntx_watchdog = 0;
 3317                 txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq);
 3318 #ifdef DEV_NETMAP
 3319                 netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
 3320 #endif /* DEV_NETMAP */
 3321         }
 3322 
 3323         return (0);
 3324 }
 3325 
 3326 static int
 3327 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
 3328 {
 3329         int error;
 3330 
 3331         error = vtnet_init_rx_queues(sc);
 3332         if (error)
 3333                 return (error);
 3334 
 3335         error = vtnet_init_tx_queues(sc);
 3336         if (error)
 3337                 return (error);
 3338 
 3339         return (0);
 3340 }
 3341 
 3342 static void
 3343 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
 3344 {
 3345         device_t dev;
 3346         int npairs;
 3347 
 3348         dev = sc->vtnet_dev;
 3349 
 3350         if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) {
 3351                 sc->vtnet_act_vq_pairs = 1;
 3352                 return;
 3353         }
 3354 
 3355         npairs = sc->vtnet_req_vq_pairs;
 3356 
 3357         if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
 3358                 device_printf(dev, "cannot set active queue pairs to %d, "
 3359                     "falling back to 1 queue pair\n", npairs);
 3360                 npairs = 1;
 3361         }
 3362 
 3363         sc->vtnet_act_vq_pairs = npairs;
 3364 }
 3365 
 3366 static void
 3367 vtnet_update_rx_offloads(struct vtnet_softc *sc)
 3368 {
 3369         if_t ifp;
 3370         uint64_t features;
 3371         int error;
 3372 
 3373         ifp = sc->vtnet_ifp;
 3374         features = sc->vtnet_features;
 3375 
 3376         VTNET_CORE_LOCK_ASSERT(sc);
 3377 
 3378         if (if_getcapabilities(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
 3379                 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
 3380                         features |= VIRTIO_NET_F_GUEST_CSUM;
 3381                 else
 3382                         features &= ~VIRTIO_NET_F_GUEST_CSUM;
 3383         }
 3384 
 3385         if (if_getcapabilities(ifp) & IFCAP_LRO && !vtnet_software_lro(sc)) {
 3386                 if (if_getcapenable(ifp) & IFCAP_LRO)
 3387                         features |= VTNET_LRO_FEATURES;
 3388                 else
 3389                         features &= ~VTNET_LRO_FEATURES;
 3390         }
 3391 
 3392         error = vtnet_ctrl_guest_offloads(sc,
 3393             features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 |
 3394                         VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN  |
 3395                         VIRTIO_NET_F_GUEST_UFO));
 3396         if (error) {
 3397                 device_printf(sc->vtnet_dev,
 3398                     "%s: cannot update Rx features\n", __func__);
 3399                 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 3400                         if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 3401                         vtnet_init_locked(sc, 0);
 3402                 }
 3403         } else
 3404                 sc->vtnet_features = features;
 3405 }
 3406 
 3407 static int
 3408 vtnet_reinit(struct vtnet_softc *sc)
 3409 {
 3410         if_t ifp;
 3411         int error;
 3412 
 3413         ifp = sc->vtnet_ifp;
 3414 
 3415         bcopy(if_getlladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
 3416 
 3417         error = vtnet_virtio_reinit(sc);
 3418         if (error)
 3419                 return (error);
 3420 
 3421         vtnet_set_macaddr(sc);
 3422         vtnet_set_active_vq_pairs(sc);
 3423 
 3424         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
 3425                 vtnet_init_rx_filters(sc);
 3426 
 3427         if_sethwassist(ifp, 0);
 3428         if (if_getcapenable(ifp) & IFCAP_TXCSUM)
 3429                 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD, 0);
 3430         if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
 3431                 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD_IPV6, 0);
 3432         if (if_getcapenable(ifp) & IFCAP_TSO4)
 3433                 if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
 3434         if (if_getcapenable(ifp) & IFCAP_TSO6)
 3435                 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
 3436 
 3437         error = vtnet_init_rxtx_queues(sc);
 3438         if (error)
 3439                 return (error);
 3440 
 3441         return (0);
 3442 }
 3443 
 3444 static void
 3445 vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
 3446 {
 3447         if_t ifp;
 3448 
 3449         ifp = sc->vtnet_ifp;
 3450 
 3451         VTNET_CORE_LOCK_ASSERT(sc);
 3452 
 3453         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 3454                 return;
 3455 
 3456         vtnet_stop(sc);
 3457 
 3458 #ifdef DEV_NETMAP
 3459         /* Once stopped we can update the netmap flags, if necessary. */
 3460         switch (init_mode) {
 3461         case VTNET_INIT_NETMAP_ENTER:
 3462                 nm_set_native_flags(NA(ifp));
 3463                 break;
 3464         case VTNET_INIT_NETMAP_EXIT:
 3465                 nm_clear_native_flags(NA(ifp));
 3466                 break;
 3467         }
 3468 #endif /* DEV_NETMAP */
 3469 
 3470         if (vtnet_reinit(sc) != 0) {
 3471                 vtnet_stop(sc);
 3472                 return;
 3473         }
 3474 
 3475         if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
 3476         vtnet_update_link_status(sc);
 3477         vtnet_enable_interrupts(sc);
 3478         callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
 3479 
 3480 #ifdef DEV_NETMAP
 3481         /* Re-enable txsync/rxsync. */
 3482         netmap_enable_all_rings(ifp);
 3483 #endif /* DEV_NETMAP */
 3484 }
 3485 
 3486 static void
 3487 vtnet_init(void *xsc)
 3488 {
 3489         struct vtnet_softc *sc;
 3490 
 3491         sc = xsc;
 3492 
 3493         VTNET_CORE_LOCK(sc);
 3494         vtnet_init_locked(sc, 0);
 3495         VTNET_CORE_UNLOCK(sc);
 3496 }
 3497 
 3498 static void
 3499 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
 3500 {
 3501 
 3502         /*
 3503          * The control virtqueue is only polled and therefore it should
 3504          * already be empty.
 3505          */
 3506         KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
 3507             ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq));
 3508 }
 3509 
 3510 static void
 3511 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
 3512     struct sglist *sg, int readable, int writable)
 3513 {
 3514         struct virtqueue *vq;
 3515 
 3516         vq = sc->vtnet_ctrl_vq;
 3517 
 3518         MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ);
 3519         VTNET_CORE_LOCK_ASSERT(sc);
 3520 
 3521         if (!virtqueue_empty(vq))
 3522                 return;
 3523 
 3524         /*
 3525          * Poll for the response, but the command is likely completed before
 3526          * returning from the notify.
 3527          */
 3528         if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0)  {
 3529                 virtqueue_notify(vq);
 3530                 virtqueue_poll(vq, NULL);
 3531         }
 3532 }
 3533 
 3534 static int
 3535 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
 3536 {
 3537         struct sglist_seg segs[3];
 3538         struct sglist sg;
 3539         struct {
 3540                 struct virtio_net_ctrl_hdr hdr __aligned(2);
 3541                 uint8_t pad1;
 3542                 uint8_t addr[ETHER_ADDR_LEN] __aligned(8);
 3543                 uint8_t pad2;
 3544                 uint8_t ack;
 3545         } s;
 3546         int error;
 3547 
 3548         error = 0;
 3549         MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC);
 3550 
 3551         s.hdr.class = VIRTIO_NET_CTRL_MAC;
 3552         s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
 3553         bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN);
 3554         s.ack = VIRTIO_NET_ERR;
 3555 
 3556         sglist_init(&sg, nitems(segs), segs);
 3557         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 3558         error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN);
 3559         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 3560         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 3561 
 3562         if (error == 0)
 3563                 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 3564 
 3565         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 3566 }
 3567 
 3568 static int
 3569 vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads)
 3570 {
 3571         struct sglist_seg segs[3];
 3572         struct sglist sg;
 3573         struct {
 3574                 struct virtio_net_ctrl_hdr hdr __aligned(2);
 3575                 uint8_t pad1;
 3576                 uint64_t offloads __aligned(8);
 3577                 uint8_t pad2;
 3578                 uint8_t ack;
 3579         } s;
 3580         int error;
 3581 
 3582         error = 0;
 3583         MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
 3584 
 3585         s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
 3586         s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
 3587         s.offloads = vtnet_gtoh64(sc, offloads);
 3588         s.ack = VIRTIO_NET_ERR;
 3589 
 3590         sglist_init(&sg, nitems(segs), segs);
 3591         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 3592         error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t));
 3593         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 3594         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 3595 
 3596         if (error == 0)
 3597                 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 3598 
 3599         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 3600 }
 3601 
 3602 static int
 3603 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
 3604 {
 3605         struct sglist_seg segs[3];
 3606         struct sglist sg;
 3607         struct {
 3608                 struct virtio_net_ctrl_hdr hdr __aligned(2);
 3609                 uint8_t pad1;
 3610                 struct virtio_net_ctrl_mq mq __aligned(2);
 3611                 uint8_t pad2;
 3612                 uint8_t ack;
 3613         } s;
 3614         int error;
 3615 
 3616         error = 0;
 3617         MPASS(sc->vtnet_flags & VTNET_FLAG_MQ);
 3618 
 3619         s.hdr.class = VIRTIO_NET_CTRL_MQ;
 3620         s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
 3621         s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs);
 3622         s.ack = VIRTIO_NET_ERR;
 3623 
 3624         sglist_init(&sg, nitems(segs), segs);
 3625         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 3626         error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
 3627         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 3628         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 3629 
 3630         if (error == 0)
 3631                 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 3632 
 3633         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 3634 }
 3635 
 3636 static int
 3637 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on)
 3638 {
 3639         struct sglist_seg segs[3];
 3640         struct sglist sg;
 3641         struct {
 3642                 struct virtio_net_ctrl_hdr hdr __aligned(2);
 3643                 uint8_t pad1;
 3644                 uint8_t onoff;
 3645                 uint8_t pad2;
 3646                 uint8_t ack;
 3647         } s;
 3648         int error;
 3649 
 3650         error = 0;
 3651         MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
 3652 
 3653         s.hdr.class = VIRTIO_NET_CTRL_RX;
 3654         s.hdr.cmd = cmd;
 3655         s.onoff = on;
 3656         s.ack = VIRTIO_NET_ERR;
 3657 
 3658         sglist_init(&sg, nitems(segs), segs);
 3659         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 3660         error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
 3661         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 3662         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 3663 
 3664         if (error == 0)
 3665                 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 3666 
 3667         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 3668 }
 3669 
 3670 static int
 3671 vtnet_set_promisc(struct vtnet_softc *sc, bool on)
 3672 {
 3673         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
 3674 }
 3675 
 3676 static int
 3677 vtnet_set_allmulti(struct vtnet_softc *sc, bool on)
 3678 {
 3679         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
 3680 }
 3681 
 3682 static void
 3683 vtnet_rx_filter(struct vtnet_softc *sc)
 3684 {
 3685         device_t dev;
 3686         if_t ifp;
 3687 
 3688         dev = sc->vtnet_dev;
 3689         ifp = sc->vtnet_ifp;
 3690 
 3691         VTNET_CORE_LOCK_ASSERT(sc);
 3692 
 3693         if (vtnet_set_promisc(sc, if_getflags(ifp) & IFF_PROMISC) != 0) {
 3694                 device_printf(dev, "cannot %s promiscuous mode\n",
 3695                     if_getflags(ifp) & IFF_PROMISC ? "enable" : "disable");
 3696         }
 3697 
 3698         if (vtnet_set_allmulti(sc, if_getflags(ifp) & IFF_ALLMULTI) != 0) {
 3699                 device_printf(dev, "cannot %s all-multicast mode\n",
 3700                     if_getflags(ifp) & IFF_ALLMULTI ? "enable" : "disable");
 3701         }
 3702 }
 3703 
 3704 static u_int
 3705 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
 3706 {
 3707         struct vtnet_softc *sc = arg;
 3708 
 3709         if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
 3710                 return (0);
 3711 
 3712         if (ucnt < VTNET_MAX_MAC_ENTRIES)
 3713                 bcopy(LLADDR(sdl),
 3714                     &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
 3715                     ETHER_ADDR_LEN);
 3716 
 3717         return (1);
 3718 }
 3719 
 3720 static u_int
 3721 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
 3722 {
 3723         struct vtnet_mac_filter *filter = arg;
 3724 
 3725         if (mcnt < VTNET_MAX_MAC_ENTRIES)
 3726                 bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
 3727                     ETHER_ADDR_LEN);
 3728 
 3729         return (1);
 3730 }
 3731 
 3732 static void
 3733 vtnet_rx_filter_mac(struct vtnet_softc *sc)
 3734 {
 3735         struct virtio_net_ctrl_hdr hdr __aligned(2);
 3736         struct vtnet_mac_filter *filter;
 3737         struct sglist_seg segs[4];
 3738         struct sglist sg;
 3739         if_t ifp;
 3740         bool promisc, allmulti;
 3741         u_int ucnt, mcnt;
 3742         int error;
 3743         uint8_t ack;
 3744 
 3745         ifp = sc->vtnet_ifp;
 3746         filter = sc->vtnet_mac_filter;
 3747         error = 0;
 3748 
 3749         MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
 3750         VTNET_CORE_LOCK_ASSERT(sc);
 3751 
 3752         /* Unicast MAC addresses: */
 3753         ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
 3754         promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
 3755 
 3756         if (promisc) {
 3757                 ucnt = 0;
 3758                 if_printf(ifp, "more than %d MAC addresses assigned, "
 3759                     "falling back to promiscuous mode\n",
 3760                     VTNET_MAX_MAC_ENTRIES);
 3761         }
 3762 
 3763         /* Multicast MAC addresses: */
 3764         mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
 3765         allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
 3766 
 3767         if (allmulti) {
 3768                 mcnt = 0;
 3769                 if_printf(ifp, "more than %d multicast MAC addresses "
 3770                     "assigned, falling back to all-multicast mode\n",
 3771                     VTNET_MAX_MAC_ENTRIES);
 3772         }
 3773 
 3774         if (promisc && allmulti)
 3775                 goto out;
 3776 
 3777         filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt);
 3778         filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt);
 3779 
 3780         hdr.class = VIRTIO_NET_CTRL_MAC;
 3781         hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
 3782         ack = VIRTIO_NET_ERR;
 3783 
 3784         sglist_init(&sg, nitems(segs), segs);
 3785         error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
 3786         error |= sglist_append(&sg, &filter->vmf_unicast,
 3787             sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN);
 3788         error |= sglist_append(&sg, &filter->vmf_multicast,
 3789             sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN);
 3790         error |= sglist_append(&sg, &ack, sizeof(uint8_t));
 3791         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 3792 
 3793         if (error == 0)
 3794                 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
 3795         if (ack != VIRTIO_NET_OK)
 3796                 if_printf(ifp, "error setting host MAC filter table\n");
 3797 
 3798 out:
 3799         if (promisc != 0 && vtnet_set_promisc(sc, true) != 0)
 3800                 if_printf(ifp, "cannot enable promiscuous mode\n");
 3801         if (allmulti != 0 && vtnet_set_allmulti(sc, true) != 0)
 3802                 if_printf(ifp, "cannot enable all-multicast mode\n");
 3803 }
 3804 
 3805 static int
 3806 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 3807 {
 3808         struct sglist_seg segs[3];
 3809         struct sglist sg;
 3810         struct {
 3811                 struct virtio_net_ctrl_hdr hdr __aligned(2);
 3812                 uint8_t pad1;
 3813                 uint16_t tag __aligned(2);
 3814                 uint8_t pad2;
 3815                 uint8_t ack;
 3816         } s;
 3817         int error;
 3818 
 3819         error = 0;
 3820         MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
 3821 
 3822         s.hdr.class = VIRTIO_NET_CTRL_VLAN;
 3823         s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
 3824         s.tag = vtnet_gtoh16(sc, tag);
 3825         s.ack = VIRTIO_NET_ERR;
 3826 
 3827         sglist_init(&sg, nitems(segs), segs);
 3828         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 3829         error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
 3830         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 3831         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 3832 
 3833         if (error == 0)
 3834                 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 3835 
 3836         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 3837 }
 3838 
 3839 static void
 3840 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
 3841 {
 3842         int i, bit;
 3843         uint32_t w;
 3844         uint16_t tag;
 3845 
 3846         MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
 3847         VTNET_CORE_LOCK_ASSERT(sc);
 3848 
 3849         /* Enable the filter for each configured VLAN. */
 3850         for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
 3851                 w = sc->vtnet_vlan_filter[i];
 3852 
 3853                 while ((bit = ffs(w) - 1) != -1) {
 3854                         w &= ~(1 << bit);
 3855                         tag = sizeof(w) * CHAR_BIT * i + bit;
 3856 
 3857                         if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
 3858                                 device_printf(sc->vtnet_dev,
 3859                                     "cannot enable VLAN %d filter\n", tag);
 3860                         }
 3861                 }
 3862         }
 3863 }
 3864 
 3865 static void
 3866 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 3867 {
 3868         if_t ifp;
 3869         int idx, bit;
 3870 
 3871         ifp = sc->vtnet_ifp;
 3872         idx = (tag >> 5) & 0x7F;
 3873         bit = tag & 0x1F;
 3874 
 3875         if (tag == 0 || tag > 4095)
 3876                 return;
 3877 
 3878         VTNET_CORE_LOCK(sc);
 3879 
 3880         if (add)
 3881                 sc->vtnet_vlan_filter[idx] |= (1 << bit);
 3882         else
 3883                 sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
 3884 
 3885         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER &&
 3886             if_getdrvflags(ifp) & IFF_DRV_RUNNING &&
 3887             vtnet_exec_vlan_filter(sc, add, tag) != 0) {
 3888                 device_printf(sc->vtnet_dev,
 3889                     "cannot %s VLAN %d %s the host filter table\n",
 3890                     add ? "add" : "remove", tag, add ? "to" : "from");
 3891         }
 3892 
 3893         VTNET_CORE_UNLOCK(sc);
 3894 }
 3895 
 3896 static void
 3897 vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag)
 3898 {
 3899 
 3900         if (if_getsoftc(ifp) != arg)
 3901                 return;
 3902 
 3903         vtnet_update_vlan_filter(arg, 1, tag);
 3904 }
 3905 
 3906 static void
 3907 vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag)
 3908 {
 3909 
 3910         if (if_getsoftc(ifp) != arg)
 3911                 return;
 3912 
 3913         vtnet_update_vlan_filter(arg, 0, tag);
 3914 }
 3915 
 3916 static void
 3917 vtnet_update_speed_duplex(struct vtnet_softc *sc)
 3918 {
 3919         if_t ifp;
 3920         uint32_t speed;
 3921 
 3922         ifp = sc->vtnet_ifp;
 3923 
 3924         if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0)
 3925                 return;
 3926 
 3927         /* BMV: Ignore duplex. */
 3928         speed = virtio_read_dev_config_4(sc->vtnet_dev,
 3929             offsetof(struct virtio_net_config, speed));
 3930         if (speed != UINT32_MAX)
 3931                 if_setbaudrate(ifp, IF_Mbps(speed));
 3932 }
 3933 
 3934 static int
 3935 vtnet_is_link_up(struct vtnet_softc *sc)
 3936 {
 3937         uint16_t status;
 3938 
 3939         if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0)
 3940                 return (1);
 3941 
 3942         status = virtio_read_dev_config_2(sc->vtnet_dev,
 3943             offsetof(struct virtio_net_config, status));
 3944 
 3945         return ((status & VIRTIO_NET_S_LINK_UP) != 0);
 3946 }
 3947 
 3948 static void
 3949 vtnet_update_link_status(struct vtnet_softc *sc)
 3950 {
 3951         if_t ifp;
 3952         int link;
 3953 
 3954         ifp = sc->vtnet_ifp;
 3955         VTNET_CORE_LOCK_ASSERT(sc);
 3956         link = vtnet_is_link_up(sc);
 3957 
 3958         /* Notify if the link status has changed. */
 3959         if (link != 0 && sc->vtnet_link_active == 0) {
 3960                 vtnet_update_speed_duplex(sc);
 3961                 sc->vtnet_link_active = 1;
 3962                 if_link_state_change(ifp, LINK_STATE_UP);
 3963         } else if (link == 0 && sc->vtnet_link_active != 0) {
 3964                 sc->vtnet_link_active = 0;
 3965                 if_link_state_change(ifp, LINK_STATE_DOWN);
 3966         }
 3967 }
 3968 
 3969 static int
 3970 vtnet_ifmedia_upd(if_t ifp __unused)
 3971 {
 3972         return (EOPNOTSUPP);
 3973 }
 3974 
 3975 static void
 3976 vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
 3977 {
 3978         struct vtnet_softc *sc;
 3979 
 3980         sc = if_getsoftc(ifp);
 3981 
 3982         ifmr->ifm_status = IFM_AVALID;
 3983         ifmr->ifm_active = IFM_ETHER;
 3984 
 3985         VTNET_CORE_LOCK(sc);
 3986         if (vtnet_is_link_up(sc) != 0) {
 3987                 ifmr->ifm_status |= IFM_ACTIVE;
 3988                 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
 3989         } else
 3990                 ifmr->ifm_active |= IFM_NONE;
 3991         VTNET_CORE_UNLOCK(sc);
 3992 }
 3993 
 3994 static void
 3995 vtnet_get_macaddr(struct vtnet_softc *sc)
 3996 {
 3997 
 3998         if (sc->vtnet_flags & VTNET_FLAG_MAC) {
 3999                 virtio_read_device_config_array(sc->vtnet_dev,
 4000                     offsetof(struct virtio_net_config, mac),
 4001                     &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN);
 4002         } else {
 4003                 /* Generate a random locally administered unicast address. */
 4004                 sc->vtnet_hwaddr[0] = 0xB2;
 4005                 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
 4006         }
 4007 }
 4008 
 4009 static void
 4010 vtnet_set_macaddr(struct vtnet_softc *sc)
 4011 {
 4012         device_t dev;
 4013         int error;
 4014 
 4015         dev = sc->vtnet_dev;
 4016 
 4017         if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
 4018                 error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr);
 4019                 if (error)
 4020                         device_printf(dev, "unable to set MAC address\n");
 4021                 return;
 4022         }
 4023 
 4024         /* MAC in config is read-only in modern VirtIO. */
 4025         if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) {
 4026                 for (int i = 0; i < ETHER_ADDR_LEN; i++) {
 4027                         virtio_write_dev_config_1(dev,
 4028                             offsetof(struct virtio_net_config, mac) + i,
 4029                             sc->vtnet_hwaddr[i]);
 4030                 }
 4031         }
 4032 }
 4033 
 4034 static void
 4035 vtnet_attached_set_macaddr(struct vtnet_softc *sc)
 4036 {
 4037 
 4038         /* Assign MAC address if it was generated. */
 4039         if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0)
 4040                 vtnet_set_macaddr(sc);
 4041 }
 4042 
 4043 static void
 4044 vtnet_vlan_tag_remove(struct mbuf *m)
 4045 {
 4046         struct ether_vlan_header *evh;
 4047 
 4048         evh = mtod(m, struct ether_vlan_header *);
 4049         m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
 4050         m->m_flags |= M_VLANTAG;
 4051 
 4052         /* Strip the 802.1Q header. */
 4053         bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
 4054             ETHER_HDR_LEN - ETHER_TYPE_LEN);
 4055         m_adj(m, ETHER_VLAN_ENCAP_LEN);
 4056 }
 4057 
 4058 static void
 4059 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
 4060 {
 4061         int limit;
 4062 
 4063         limit = vtnet_tunable_int(sc, "rx_process_limit",
 4064             vtnet_rx_process_limit);
 4065         if (limit < 0)
 4066                 limit = INT_MAX;
 4067         sc->vtnet_rx_process_limit = limit;
 4068 }
 4069 
 4070 static void
 4071 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
 4072     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
 4073 {
 4074         struct sysctl_oid *node;
 4075         struct sysctl_oid_list *list;
 4076         struct vtnet_rxq_stats *stats;
 4077         char namebuf[16];
 4078 
 4079         snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
 4080         node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 4081             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
 4082         list = SYSCTL_CHILDREN(node);
 4083 
 4084         stats = &rxq->vtnrx_stats;
 4085 
 4086         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
 4087             &stats->vrxs_ipackets, "Receive packets");
 4088         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
 4089             &stats->vrxs_ibytes, "Receive bytes");
 4090         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
 4091             &stats->vrxs_iqdrops, "Receive drops");
 4092         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
 4093             &stats->vrxs_ierrors, "Receive errors");
 4094         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
 4095             &stats->vrxs_csum, "Receive checksum offloaded");
 4096         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
 4097             &stats->vrxs_csum_failed, "Receive checksum offload failed");
 4098         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD,
 4099             &stats->vrxs_host_lro, "Receive host segmentation offloaded");
 4100         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
 4101             &stats->vrxs_rescheduled,
 4102             "Receive interrupt handler rescheduled");
 4103 }
 4104 
 4105 static void
 4106 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
 4107     struct sysctl_oid_list *child, struct vtnet_txq *txq)
 4108 {
 4109         struct sysctl_oid *node;
 4110         struct sysctl_oid_list *list;
 4111         struct vtnet_txq_stats *stats;
 4112         char namebuf[16];
 4113 
 4114         snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
 4115         node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 4116             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
 4117         list = SYSCTL_CHILDREN(node);
 4118 
 4119         stats = &txq->vtntx_stats;
 4120 
 4121         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
 4122             &stats->vtxs_opackets, "Transmit packets");
 4123         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
 4124             &stats->vtxs_obytes, "Transmit bytes");
 4125         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
 4126             &stats->vtxs_omcasts, "Transmit multicasts");
 4127         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
 4128             &stats->vtxs_csum, "Transmit checksum offloaded");
 4129         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
 4130             &stats->vtxs_tso, "Transmit TCP segmentation offloaded");
 4131         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
 4132             &stats->vtxs_rescheduled,
 4133             "Transmit interrupt handler rescheduled");
 4134 }
 4135 
 4136 static void
 4137 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
 4138 {
 4139         device_t dev;
 4140         struct sysctl_ctx_list *ctx;
 4141         struct sysctl_oid *tree;
 4142         struct sysctl_oid_list *child;
 4143         int i;
 4144 
 4145         dev = sc->vtnet_dev;
 4146         ctx = device_get_sysctl_ctx(dev);
 4147         tree = device_get_sysctl_tree(dev);
 4148         child = SYSCTL_CHILDREN(tree);
 4149 
 4150         for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
 4151                 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
 4152                 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
 4153         }
 4154 }
 4155 
 4156 static void
 4157 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
 4158     struct sysctl_oid_list *child, struct vtnet_softc *sc)
 4159 {
 4160         struct vtnet_statistics *stats;
 4161         struct vtnet_rxq_stats rxaccum;
 4162         struct vtnet_txq_stats txaccum;
 4163 
 4164         vtnet_accum_stats(sc, &rxaccum, &txaccum);
 4165 
 4166         stats = &sc->vtnet_stats;
 4167         stats->rx_csum_offloaded = rxaccum.vrxs_csum;
 4168         stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
 4169         stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
 4170         stats->tx_csum_offloaded = txaccum.vtxs_csum;
 4171         stats->tx_tso_offloaded = txaccum.vtxs_tso;
 4172         stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
 4173 
 4174         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
 4175             CTLFLAG_RD, &stats->mbuf_alloc_failed,
 4176             "Mbuf cluster allocation failures");
 4177 
 4178         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
 4179             CTLFLAG_RD, &stats->rx_frame_too_large,
 4180             "Received frame larger than the mbuf chain");
 4181         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
 4182             CTLFLAG_RD, &stats->rx_enq_replacement_failed,
 4183             "Enqueuing the replacement receive mbuf failed");
 4184         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
 4185             CTLFLAG_RD, &stats->rx_mergeable_failed,
 4186             "Mergeable buffers receive failures");
 4187         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
 4188             CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
 4189             "Received checksum offloaded buffer with unsupported "
 4190             "Ethernet type");
 4191         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
 4192             CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
 4193             "Received checksum offloaded buffer with incorrect IP protocol");
 4194         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
 4195             CTLFLAG_RD, &stats->rx_csum_bad_offset,
 4196             "Received checksum offloaded buffer with incorrect offset");
 4197         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
 4198             CTLFLAG_RD, &stats->rx_csum_bad_proto,
 4199             "Received checksum offloaded buffer with incorrect protocol");
 4200         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
 4201             CTLFLAG_RD, &stats->rx_csum_failed,
 4202             "Received buffer checksum offload failed");
 4203         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
 4204             CTLFLAG_RD, &stats->rx_csum_offloaded,
 4205             "Received buffer checksum offload succeeded");
 4206         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
 4207             CTLFLAG_RD, &stats->rx_task_rescheduled,
 4208             "Times the receive interrupt task rescheduled itself");
 4209 
 4210         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
 4211             CTLFLAG_RD, &stats->tx_csum_unknown_ethtype,
 4212             "Aborted transmit of checksum offloaded buffer with unknown "
 4213             "Ethernet type");
 4214         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
 4215             CTLFLAG_RD, &stats->tx_csum_proto_mismatch,
 4216             "Aborted transmit of checksum offloaded buffer because mismatched "
 4217             "protocols");
 4218         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
 4219             CTLFLAG_RD, &stats->tx_tso_not_tcp,
 4220             "Aborted transmit of TSO buffer with non TCP protocol");
 4221         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
 4222             CTLFLAG_RD, &stats->tx_tso_without_csum,
 4223             "Aborted transmit of TSO buffer without TCP checksum offload");
 4224         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
 4225             CTLFLAG_RD, &stats->tx_defragged,
 4226             "Transmit mbufs defragged");
 4227         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
 4228             CTLFLAG_RD, &stats->tx_defrag_failed,
 4229             "Aborted transmit of buffer because defrag failed");
 4230         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
 4231             CTLFLAG_RD, &stats->tx_csum_offloaded,
 4232             "Offloaded checksum of transmitted buffer");
 4233         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
 4234             CTLFLAG_RD, &stats->tx_tso_offloaded,
 4235             "Segmentation offload of transmitted buffer");
 4236         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
 4237             CTLFLAG_RD, &stats->tx_task_rescheduled,
 4238             "Times the transmit interrupt task rescheduled itself");
 4239 }
 4240 
 4241 static void
 4242 vtnet_setup_sysctl(struct vtnet_softc *sc)
 4243 {
 4244         device_t dev;
 4245         struct sysctl_ctx_list *ctx;
 4246         struct sysctl_oid *tree;
 4247         struct sysctl_oid_list *child;
 4248 
 4249         dev = sc->vtnet_dev;
 4250         ctx = device_get_sysctl_ctx(dev);
 4251         tree = device_get_sysctl_tree(dev);
 4252         child = SYSCTL_CHILDREN(tree);
 4253 
 4254         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
 4255             CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
 4256             "Number of maximum supported virtqueue pairs");
 4257         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs",
 4258             CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0,
 4259             "Number of requested virtqueue pairs");
 4260         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
 4261             CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
 4262             "Number of active virtqueue pairs");
 4263 
 4264         vtnet_setup_stat_sysctl(ctx, child, sc);
 4265 }
 4266 
 4267 static void
 4268 vtnet_load_tunables(struct vtnet_softc *sc)
 4269 {
 4270 
 4271         sc->vtnet_lro_entry_count = vtnet_tunable_int(sc,
 4272             "lro_entry_count", vtnet_lro_entry_count);
 4273         if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES)
 4274                 sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES;
 4275 
 4276         sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc,
 4277             "lro_mbufq_depth", vtnet_lro_mbufq_depth);
 4278 }
 4279 
 4280 static int
 4281 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
 4282 {
 4283 
 4284         return (virtqueue_enable_intr(rxq->vtnrx_vq));
 4285 }
 4286 
 4287 static void
 4288 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
 4289 {
 4290 
 4291         virtqueue_disable_intr(rxq->vtnrx_vq);
 4292 }
 4293 
 4294 static int
 4295 vtnet_txq_enable_intr(struct vtnet_txq *txq)
 4296 {
 4297         struct virtqueue *vq;
 4298 
 4299         vq = txq->vtntx_vq;
 4300 
 4301         if (vtnet_txq_below_threshold(txq) != 0)
 4302                 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
 4303 
 4304         /*
 4305          * The free count is above our threshold. Keep the Tx interrupt
 4306          * disabled until the queue is fuller.
 4307          */
 4308         return (0);
 4309 }
 4310 
 4311 static void
 4312 vtnet_txq_disable_intr(struct vtnet_txq *txq)
 4313 {
 4314 
 4315         virtqueue_disable_intr(txq->vtntx_vq);
 4316 }
 4317 
 4318 static void
 4319 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
 4320 {
 4321         struct vtnet_rxq *rxq;
 4322         int i;
 4323 
 4324         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 4325                 rxq = &sc->vtnet_rxqs[i];
 4326                 if (vtnet_rxq_enable_intr(rxq) != 0)
 4327                         taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 4328         }
 4329 }
 4330 
 4331 static void
 4332 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
 4333 {
 4334         int i;
 4335 
 4336         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 4337                 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
 4338 }
 4339 
 4340 static void
 4341 vtnet_enable_interrupts(struct vtnet_softc *sc)
 4342 {
 4343 
 4344         vtnet_enable_rx_interrupts(sc);
 4345         vtnet_enable_tx_interrupts(sc);
 4346 }
 4347 
 4348 static void
 4349 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
 4350 {
 4351         int i;
 4352 
 4353         for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 4354                 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
 4355 }
 4356 
 4357 static void
 4358 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
 4359 {
 4360         int i;
 4361 
 4362         for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 4363                 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
 4364 }
 4365 
 4366 static void
 4367 vtnet_disable_interrupts(struct vtnet_softc *sc)
 4368 {
 4369 
 4370         vtnet_disable_rx_interrupts(sc);
 4371         vtnet_disable_tx_interrupts(sc);
 4372 }
 4373 
 4374 static int
 4375 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
 4376 {
 4377         char path[64];
 4378 
 4379         snprintf(path, sizeof(path),
 4380             "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
 4381         TUNABLE_INT_FETCH(path, &def);
 4382 
 4383         return (def);
 4384 }
 4385 
 4386 #ifdef DEBUGNET
 4387 static void
 4388 vtnet_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
 4389 {
 4390         struct vtnet_softc *sc;
 4391 
 4392         sc = if_getsoftc(ifp);
 4393 
 4394         VTNET_CORE_LOCK(sc);
 4395         *nrxr = sc->vtnet_req_vq_pairs;
 4396         *ncl = DEBUGNET_MAX_IN_FLIGHT;
 4397         *clsize = sc->vtnet_rx_clustersz;
 4398         VTNET_CORE_UNLOCK(sc);
 4399 }
 4400 
 4401 static void
 4402 vtnet_debugnet_event(if_t ifp __unused, enum debugnet_ev event)
 4403 {
 4404         struct vtnet_softc *sc;
 4405         static bool sw_lro_enabled = false;
 4406 
 4407         /*
 4408          * Disable software LRO, since it would require entering the network
 4409          * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll().
 4410          */
 4411         sc = if_getsoftc(ifp);
 4412         switch (event) {
 4413         case DEBUGNET_START:
 4414                 sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0;
 4415                 if (sw_lro_enabled)
 4416                         sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO;
 4417                 break;
 4418         case DEBUGNET_END:
 4419                 if (sw_lro_enabled)
 4420                         sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
 4421                 break;
 4422         }
 4423 }
 4424 
 4425 static int
 4426 vtnet_debugnet_transmit(if_t ifp, struct mbuf *m)
 4427 {
 4428         struct vtnet_softc *sc;
 4429         struct vtnet_txq *txq;
 4430         int error;
 4431 
 4432         sc = if_getsoftc(ifp);
 4433         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 4434             IFF_DRV_RUNNING)
 4435                 return (EBUSY);
 4436 
 4437         txq = &sc->vtnet_txqs[0];
 4438         error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
 4439         if (error == 0)
 4440                 (void)vtnet_txq_notify(txq);
 4441         return (error);
 4442 }
 4443 
 4444 static int
 4445 vtnet_debugnet_poll(if_t ifp, int count)
 4446 {
 4447         struct vtnet_softc *sc;
 4448         int i;
 4449 
 4450         sc = if_getsoftc(ifp);
 4451         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 4452             IFF_DRV_RUNNING)
 4453                 return (EBUSY);
 4454 
 4455         (void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
 4456         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 4457                 (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
 4458         return (0);
 4459 }
 4460 #endif /* DEBUGNET */

Cache object: 0181c43b06c12c87f1583d90e20d6e67


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.