The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/xen/netback/netback.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2009-2011 Spectra Logic Corporation
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions, and the following disclaimer,
   12  *    without modification.
   13  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
   14  *    substantially similar to the "NO WARRANTY" disclaimer below
   15  *    ("Disclaimer") and any redistribution must be conditioned upon
   16  *    including a substantially similar Disclaimer requirement for further
   17  *    binary redistribution.
   18  *
   19  * NO WARRANTY
   20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
   23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   24  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   28  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   29  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGES.
   31  *
   32  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
   33  *          Alan Somers         (Spectra Logic Corporation)
   34  *          John Suykerbuyk     (Spectra Logic Corporation)
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 /**
   41  * \file netback.c
   42  *
   43  * \brief Device driver supporting the vending of network access
   44  *        from this FreeBSD domain to other domains.
   45  */
   46 #include "opt_inet.h"
   47 #include "opt_inet6.h"
   48 
   49 #include <sys/param.h>
   50 #include <sys/kernel.h>
   51 
   52 #include <sys/bus.h>
   53 #include <sys/module.h>
   54 #include <sys/rman.h>
   55 #include <sys/socket.h>
   56 #include <sys/sockio.h>
   57 #include <sys/sysctl.h>
   58 
   59 #include <net/if.h>
   60 #include <net/if_var.h>
   61 #include <net/if_arp.h>
   62 #include <net/ethernet.h>
   63 #include <net/if_dl.h>
   64 #include <net/if_media.h>
   65 #include <net/if_types.h>
   66 
   67 #include <netinet/in.h>
   68 #include <netinet/ip.h>
   69 #include <netinet/if_ether.h>
   70 #include <netinet/tcp.h>
   71 #include <netinet/ip_icmp.h>
   72 #include <netinet/udp.h>
   73 #include <machine/in_cksum.h>
   74 
   75 #include <vm/vm.h>
   76 #include <vm/pmap.h>
   77 #include <vm/vm_extern.h>
   78 #include <vm/vm_kern.h>
   79 
   80 #include <machine/_inttypes.h>
   81 
   82 #include <xen/xen-os.h>
   83 #include <xen/hypervisor.h>
   84 #include <xen/xen_intr.h>
   85 #include <contrib/xen/io/netif.h>
   86 #include <xen/xenbus/xenbusvar.h>
   87 
   88 /*--------------------------- Compile-time Tunables --------------------------*/
   89 
   90 /*---------------------------------- Macros ----------------------------------*/
   91 /**
   92  * Custom malloc type for all driver allocations.
   93  */
   94 static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data");
   95 
   96 #define XNB_SG  1       /* netback driver supports feature-sg */
   97 #define XNB_GSO_TCPV4 0 /* netback driver supports feature-gso-tcpv4 */
   98 #define XNB_RX_COPY 1   /* netback driver supports feature-rx-copy */
   99 #define XNB_RX_FLIP 0   /* netback driver does not support feature-rx-flip */
  100 
  101 #undef XNB_DEBUG
  102 #define XNB_DEBUG /* hardcode on during development */
  103 
  104 #ifdef XNB_DEBUG
  105 #define DPRINTF(fmt, args...) \
  106         printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
  107 #else
  108 #define DPRINTF(fmt, args...) do {} while (0)
  109 #endif
  110 
  111 /* Default length for stack-allocated grant tables */
  112 #define GNTTAB_LEN      (64)
  113 
  114 /* Features supported by all backends.  TSO and LRO can be negotiated */
  115 #define XNB_CSUM_FEATURES       (CSUM_TCP | CSUM_UDP)
  116 
  117 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
  118 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
  119 
  120 /**
  121  * Two argument version of the standard macro.  Second argument is a tentative
  122  * value of req_cons
  123  */
  124 #define RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({                     \
  125         unsigned int req = (_r)->sring->req_prod - cons;                \
  126         unsigned int rsp = RING_SIZE(_r) -                              \
  127         (cons - (_r)->rsp_prod_pvt);                                    \
  128         req < rsp ? req : rsp;                                          \
  129 })
  130 
  131 #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT)
  132 #define virt_to_offset(x) ((x) & (PAGE_SIZE - 1))
  133 
  134 /**
  135  * Predefined array type of grant table copy descriptors.  Used to pass around
  136  * statically allocated memory structures.
  137  */
  138 typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN];
  139 
  140 /*--------------------------- Forward Declarations ---------------------------*/
  141 struct xnb_softc;
  142 struct xnb_pkt;
  143 
  144 static void     xnb_attach_failed(struct xnb_softc *xnb,
  145                                   int err, const char *fmt, ...)
  146                                   __printflike(3,4);
  147 static int      xnb_shutdown(struct xnb_softc *xnb);
  148 static int      create_netdev(device_t dev);
  149 static int      xnb_detach(device_t dev);
  150 static int      xnb_ifmedia_upd(struct ifnet *ifp);
  151 static void     xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
  152 static void     xnb_intr(void *arg);
  153 static int      xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend,
  154                          const struct mbuf *mbufc, gnttab_copy_table gnttab);
  155 static int      xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend,
  156                          struct mbuf **mbufc, struct ifnet *ifnet,
  157                          gnttab_copy_table gnttab);
  158 static int      xnb_ring2pkt(struct xnb_pkt *pkt,
  159                              const netif_tx_back_ring_t *tx_ring,
  160                              RING_IDX start);
  161 static void     xnb_txpkt2rsp(const struct xnb_pkt *pkt,
  162                               netif_tx_back_ring_t *ring, int error);
  163 static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp);
  164 static int      xnb_txpkt2gnttab(const struct xnb_pkt *pkt,
  165                                  struct mbuf *mbufc,
  166                                  gnttab_copy_table gnttab,
  167                                  const netif_tx_back_ring_t *txb,
  168                                  domid_t otherend_id);
  169 static void     xnb_update_mbufc(struct mbuf *mbufc,
  170                                  const gnttab_copy_table gnttab, int n_entries);
  171 static int      xnb_mbufc2pkt(const struct mbuf *mbufc,
  172                               struct xnb_pkt *pkt,
  173                               RING_IDX start, int space);
  174 static int      xnb_rxpkt2gnttab(const struct xnb_pkt *pkt,
  175                                  const struct mbuf *mbufc,
  176                                  gnttab_copy_table gnttab,
  177                                  const netif_rx_back_ring_t *rxb,
  178                                  domid_t otherend_id);
  179 static int      xnb_rxpkt2rsp(const struct xnb_pkt *pkt,
  180                               const gnttab_copy_table gnttab, int n_entries,
  181                               netif_rx_back_ring_t *ring);
  182 static void     xnb_stop(struct xnb_softc*);
  183 static int      xnb_ioctl(struct ifnet*, u_long, caddr_t);
  184 static void     xnb_start_locked(struct ifnet*);
  185 static void     xnb_start(struct ifnet*);
  186 static void     xnb_ifinit_locked(struct xnb_softc*);
  187 static void     xnb_ifinit(void*);
  188 #ifdef XNB_DEBUG
  189 static int      xnb_unit_test_main(SYSCTL_HANDLER_ARGS);
  190 static int      xnb_dump_rings(SYSCTL_HANDLER_ARGS);
  191 #endif
  192 #if defined(INET) || defined(INET6)
  193 static void     xnb_add_mbuf_cksum(struct mbuf *mbufc);
  194 #endif
  195 /*------------------------------ Data Structures -----------------------------*/
  196 
  197 /**
  198  * Representation of a xennet packet.  Simplified version of a packet as
  199  * stored in the Xen tx ring.  Applicable to both RX and TX packets
  200  */
  201 struct xnb_pkt{
  202         /**
  203          * Array index of the first data-bearing (eg, not extra info) entry
  204          * for this packet
  205          */
  206         RING_IDX        car;
  207 
  208         /**
  209          * Array index of the second data-bearing entry for this packet.
  210          * Invalid if the packet has only one data-bearing entry.  If the
  211          * packet has more than two data-bearing entries, then the second
  212          * through the last will be sequential modulo the ring size
  213          */
  214         RING_IDX        cdr;
  215 
  216         /**
  217          * Optional extra info.  Only valid if flags contains
  218          * NETTXF_extra_info.  Note that extra.type will always be
  219          * XEN_NETIF_EXTRA_TYPE_GSO.  Currently, no known netfront or netback
  220          * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_*
  221          */
  222         netif_extra_info_t extra;
  223 
  224         /** Size of entire packet in bytes.       */
  225         uint16_t        size;
  226 
  227         /** The size of the first entry's data in bytes */
  228         uint16_t        car_size;
  229 
  230         /**
  231          * Either NETTXF_ or NETRXF_ flags.  Note that the flag values are
  232          * not the same for TX and RX packets
  233          */
  234         uint16_t        flags;
  235 
  236         /**
  237          * The number of valid data-bearing entries (either netif_tx_request's
  238          * or netif_rx_response's) in the packet.  If this is 0, it means the
  239          * entire packet is invalid.
  240          */
  241         uint16_t        list_len;
  242 
  243         /** There was an error processing the packet */
  244         uint8_t         error;
  245 };
  246 
  247 /** xnb_pkt method: initialize it */
  248 static inline void
  249 xnb_pkt_initialize(struct xnb_pkt *pxnb)
  250 {
  251         bzero(pxnb, sizeof(*pxnb));
  252 }
  253 
  254 /** xnb_pkt method: mark the packet as valid */
  255 static inline void
  256 xnb_pkt_validate(struct xnb_pkt *pxnb)
  257 {
  258         pxnb->error = 0;
  259 };
  260 
  261 /** xnb_pkt method: mark the packet as invalid */
  262 static inline void
  263 xnb_pkt_invalidate(struct xnb_pkt *pxnb)
  264 {
  265         pxnb->error = 1;
  266 };
  267 
  268 /** xnb_pkt method: Check whether the packet is valid */
  269 static inline int
  270 xnb_pkt_is_valid(const struct xnb_pkt *pxnb)
  271 {
  272         return (! pxnb->error);
  273 }
  274 
  275 #ifdef XNB_DEBUG
  276 /** xnb_pkt method: print the packet's contents in human-readable format*/
  277 static void __unused
  278 xnb_dump_pkt(const struct xnb_pkt *pkt) {
  279         if (pkt == NULL) {
  280           DPRINTF("Was passed a null pointer.\n");
  281           return;
  282         }
  283         DPRINTF("pkt address= %p\n", pkt);
  284         DPRINTF("pkt->size=%d\n", pkt->size);
  285         DPRINTF("pkt->car_size=%d\n", pkt->car_size);
  286         DPRINTF("pkt->flags=0x%04x\n", pkt->flags);
  287         DPRINTF("pkt->list_len=%d\n", pkt->list_len);
  288         /* DPRINTF("pkt->extra");       TODO */
  289         DPRINTF("pkt->car=%d\n", pkt->car);
  290         DPRINTF("pkt->cdr=%d\n", pkt->cdr);
  291         DPRINTF("pkt->error=%d\n", pkt->error);
  292 }
  293 #endif /* XNB_DEBUG */
  294 
  295 static void
  296 xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq)
  297 {
  298         if (txreq != NULL) {
  299                 DPRINTF("netif_tx_request index =%u\n", idx);
  300                 DPRINTF("netif_tx_request.gref  =%u\n", txreq->gref);
  301                 DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset);
  302                 DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags);
  303                 DPRINTF("netif_tx_request.id    =%hu\n", txreq->id);
  304                 DPRINTF("netif_tx_request.size  =%hu\n", txreq->size);
  305         }
  306 }
  307 
  308 /**
  309  * \brief Configuration data for a shared memory request ring
  310  *        used to communicate with the front-end client of this
  311  *        this driver.
  312  */
  313 struct xnb_ring_config {
  314         /**
  315          * Runtime structures for ring access.  Unfortunately, TX and RX rings
  316          * use different data structures, and that cannot be changed since it
  317          * is part of the interdomain protocol.
  318          */
  319         union{
  320                 netif_rx_back_ring_t      rx_ring;
  321                 netif_tx_back_ring_t      tx_ring;
  322         } back_ring;
  323 
  324         /**
  325          * The device bus address returned by the hypervisor when
  326          * mapping the ring and required to unmap it when a connection
  327          * is torn down.
  328          */
  329         uint64_t        bus_addr;
  330 
  331         /** The pseudo-physical address where ring memory is mapped.*/
  332         uint64_t        gnt_addr;
  333 
  334         /** KVA address where ring memory is mapped. */
  335         vm_offset_t     va;
  336 
  337         /**
  338          * Grant table handles, one per-ring page, returned by the
  339          * hyperpervisor upon mapping of the ring and required to
  340          * unmap it when a connection is torn down.
  341          */
  342         grant_handle_t  handle;
  343 
  344         /** The number of ring pages mapped for the current connection. */
  345         unsigned        ring_pages;
  346 
  347         /**
  348          * The grant references, one per-ring page, supplied by the
  349          * front-end, allowing us to reference the ring pages in the
  350          * front-end's domain and to map these pages into our own domain.
  351          */
  352         grant_ref_t     ring_ref;
  353 };
  354 
  355 /**
  356  * Per-instance connection state flags.
  357  */
  358 typedef enum
  359 {
  360         /** Communication with the front-end has been established. */
  361         XNBF_RING_CONNECTED    = 0x01,
  362 
  363         /**
  364          * Front-end requests exist in the ring and are waiting for
  365          * xnb_xen_req objects to free up.
  366          */
  367         XNBF_RESOURCE_SHORTAGE = 0x02,
  368 
  369         /** Connection teardown has started. */
  370         XNBF_SHUTDOWN          = 0x04,
  371 
  372         /** A thread is already performing shutdown processing. */
  373         XNBF_IN_SHUTDOWN       = 0x08
  374 } xnb_flag_t;
  375 
  376 /**
  377  * Types of rings.  Used for array indices and to identify a ring's control
  378  * data structure type
  379  */
  380 typedef enum{
  381         XNB_RING_TYPE_TX = 0,   /* ID of TX rings, used for array indices */
  382         XNB_RING_TYPE_RX = 1,   /* ID of RX rings, used for array indices */
  383         XNB_NUM_RING_TYPES
  384 } xnb_ring_type_t;
  385 
  386 /**
  387  * Per-instance configuration data.
  388  */
  389 struct xnb_softc {
  390         /** NewBus device corresponding to this instance. */
  391         device_t                dev;
  392 
  393         /* Media related fields */
  394 
  395         /** Generic network media state */
  396         struct ifmedia          sc_media;
  397 
  398         /** Media carrier info */
  399         struct ifnet            *xnb_ifp;
  400 
  401         /** Our own private carrier state */
  402         unsigned carrier;
  403 
  404         /** Device MAC Address */
  405         uint8_t                 mac[ETHER_ADDR_LEN];
  406 
  407         /* Xen related fields */
  408 
  409         /**
  410          * \brief The netif protocol abi in effect.
  411          *
  412          * There are situations where the back and front ends can
  413          * have a different, native abi (e.g. intel x86_64 and
  414          * 32bit x86 domains on the same machine).  The back-end
  415          * always accommodates the front-end's native abi.  That
  416          * value is pulled from the XenStore and recorded here.
  417          */
  418         int                     abi;
  419 
  420         /**
  421          * Name of the bridge to which this VIF is connected, if any
  422          * This field is dynamically allocated by xenbus and must be free()ed
  423          * when no longer needed
  424          */
  425         char                    *bridge;
  426 
  427         /** The interrupt driven even channel used to signal ring events. */
  428         evtchn_port_t           evtchn;
  429 
  430         /** Xen device handle.*/
  431         long                    handle;
  432 
  433         /** Handle to the communication ring event channel. */
  434         xen_intr_handle_t       xen_intr_handle;
  435 
  436         /**
  437          * \brief Cached value of the front-end's domain id.
  438          *
  439          * This value is used at once for each mapped page in
  440          * a transaction.  We cache it to avoid incuring the
  441          * cost of an ivar access every time this is needed.
  442          */
  443         domid_t                 otherend_id;
  444 
  445         /**
  446          * Undocumented frontend feature.  Has something to do with
  447          * scatter/gather IO
  448          */
  449         uint8_t                 can_sg;
  450         /** Undocumented frontend feature */
  451         uint8_t                 gso;
  452         /** Undocumented frontend feature */
  453         uint8_t                 gso_prefix;
  454         /** Can checksum TCP/UDP over IPv4 */
  455         uint8_t                 ip_csum;
  456 
  457         /* Implementation related fields */
  458         /**
  459          * Preallocated grant table copy descriptor for RX operations.
  460          * Access must be protected by rx_lock
  461          */
  462         gnttab_copy_table       rx_gnttab;
  463 
  464         /**
  465          * Preallocated grant table copy descriptor for TX operations.
  466          * Access must be protected by tx_lock
  467          */
  468         gnttab_copy_table       tx_gnttab;
  469 
  470         /**
  471          * Resource representing allocated physical address space
  472          * associated with our per-instance kva region.
  473          */
  474         struct resource         *pseudo_phys_res;
  475 
  476         /** Resource id for allocated physical address space. */
  477         int                     pseudo_phys_res_id;
  478 
  479         /** Ring mapping and interrupt configuration data. */
  480         struct xnb_ring_config  ring_configs[XNB_NUM_RING_TYPES];
  481 
  482         /**
  483          * Global pool of kva used for mapping remote domain ring
  484          * and I/O transaction data.
  485          */
  486         vm_offset_t             kva;
  487 
  488         /** Pseudo-physical address corresponding to kva. */
  489         uint64_t                gnt_base_addr;
  490 
  491         /** Various configuration and state bit flags. */
  492         xnb_flag_t              flags;
  493 
  494         /** Mutex protecting per-instance data in the receive path. */
  495         struct mtx              rx_lock;
  496 
  497         /** Mutex protecting per-instance data in the softc structure. */
  498         struct mtx              sc_lock;
  499 
  500         /** Mutex protecting per-instance data in the transmit path. */
  501         struct mtx              tx_lock;
  502 
  503         /** The size of the global kva pool. */
  504         int                     kva_size;
  505 
  506         /** Name of the interface */
  507         char                     if_name[IFNAMSIZ];
  508 };
  509 
  510 /*---------------------------- Debugging functions ---------------------------*/
  511 #ifdef XNB_DEBUG
  512 static void __unused
  513 xnb_dump_gnttab_copy(const struct gnttab_copy *entry)
  514 {
  515         if (entry == NULL) {
  516                 printf("NULL grant table pointer\n");
  517                 return;
  518         }
  519 
  520         if (entry->flags & GNTCOPY_dest_gref)
  521                 printf("gnttab dest ref=\t%u\n", entry->dest.u.ref);
  522         else
  523                 printf("gnttab dest gmfn=\t%"PRI_xen_pfn"\n",
  524                        entry->dest.u.gmfn);
  525         printf("gnttab dest offset=\t%hu\n", entry->dest.offset);
  526         printf("gnttab dest domid=\t%hu\n", entry->dest.domid);
  527         if (entry->flags & GNTCOPY_source_gref)
  528                 printf("gnttab source ref=\t%u\n", entry->source.u.ref);
  529         else
  530                 printf("gnttab source gmfn=\t%"PRI_xen_pfn"\n",
  531                        entry->source.u.gmfn);
  532         printf("gnttab source offset=\t%hu\n", entry->source.offset);
  533         printf("gnttab source domid=\t%hu\n", entry->source.domid);
  534         printf("gnttab len=\t%hu\n", entry->len);
  535         printf("gnttab flags=\t%hu\n", entry->flags);
  536         printf("gnttab status=\t%hd\n", entry->status);
  537 }
  538 
  539 static int
  540 xnb_dump_rings(SYSCTL_HANDLER_ARGS)
  541 {
  542         static char results[720];
  543         struct xnb_softc const* xnb = (struct xnb_softc*)arg1;
  544         netif_rx_back_ring_t const* rxb =
  545                 &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring;
  546         netif_tx_back_ring_t const* txb =
  547                 &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring;
  548 
  549         /* empty the result strings */
  550         results[0] = 0;
  551 
  552         if ( !txb || !txb->sring || !rxb || !rxb->sring )
  553                 return (SYSCTL_OUT(req, results, strnlen(results, 720)));
  554 
  555         snprintf(results, 720,
  556             "\n\t%35s %18s\n"   /* TX, RX */
  557             "\t%16s %18d %18d\n"        /* req_cons */
  558             "\t%16s %18d %18d\n"        /* nr_ents */
  559             "\t%16s %18d %18d\n"        /* rsp_prod_pvt */
  560             "\t%16s %18p %18p\n"        /* sring */
  561             "\t%16s %18d %18d\n"        /* req_prod */
  562             "\t%16s %18d %18d\n"        /* req_event */
  563             "\t%16s %18d %18d\n"        /* rsp_prod */
  564             "\t%16s %18d %18d\n",       /* rsp_event */
  565             "TX", "RX",
  566             "req_cons", txb->req_cons, rxb->req_cons,
  567             "nr_ents", txb->nr_ents, rxb->nr_ents,
  568             "rsp_prod_pvt", txb->rsp_prod_pvt, rxb->rsp_prod_pvt,
  569             "sring", txb->sring, rxb->sring,
  570             "sring->req_prod", txb->sring->req_prod, rxb->sring->req_prod,
  571             "sring->req_event", txb->sring->req_event, rxb->sring->req_event,
  572             "sring->rsp_prod", txb->sring->rsp_prod, rxb->sring->rsp_prod,
  573             "sring->rsp_event", txb->sring->rsp_event, rxb->sring->rsp_event);
  574 
  575         return (SYSCTL_OUT(req, results, strnlen(results, 720)));
  576 }
  577 
  578 static void __unused
  579 xnb_dump_mbuf(const struct mbuf *m)
  580 {
  581         int len;
  582         uint8_t *d;
  583         if (m == NULL)
  584                 return;
  585 
  586         printf("xnb_dump_mbuf:\n");
  587         if (m->m_flags & M_PKTHDR) {
  588                 printf("    flowid=%10d, csum_flags=%#8x, csum_data=%#8x, "
  589                        "tso_segsz=%5hd\n",
  590                        m->m_pkthdr.flowid, (int)m->m_pkthdr.csum_flags,
  591                        m->m_pkthdr.csum_data, m->m_pkthdr.tso_segsz);
  592                 printf("    rcvif=%16p,  len=%19d\n",
  593                        m->m_pkthdr.rcvif, m->m_pkthdr.len);
  594         }
  595         printf("    m_next=%16p, m_nextpk=%16p, m_data=%16p\n",
  596                m->m_next, m->m_nextpkt, m->m_data);
  597         printf("    m_len=%17d, m_flags=%#15x, m_type=%18u\n",
  598                m->m_len, m->m_flags, m->m_type);
  599 
  600         len = m->m_len;
  601         d = mtod(m, uint8_t*);
  602         while (len > 0) {
  603                 int i;
  604                 printf("                ");
  605                 for (i = 0; (i < 16) && (len > 0); i++, len--) {
  606                         printf("%02hhx ", *(d++));
  607                 }
  608                 printf("\n");
  609         }
  610 }
  611 #endif /* XNB_DEBUG */
  612 
  613 /*------------------------ Inter-Domain Communication ------------------------*/
  614 /**
  615  * Free dynamically allocated KVA or pseudo-physical address allocations.
  616  *
  617  * \param xnb  Per-instance xnb configuration structure.
  618  */
  619 static void
  620 xnb_free_communication_mem(struct xnb_softc *xnb)
  621 {
  622         if (xnb->kva != 0) {
  623                 if (xnb->pseudo_phys_res != NULL) {
  624                         xenmem_free(xnb->dev, xnb->pseudo_phys_res_id,
  625                             xnb->pseudo_phys_res);
  626                         xnb->pseudo_phys_res = NULL;
  627                 }
  628         }
  629         xnb->kva = 0;
  630         xnb->gnt_base_addr = 0;
  631 }
  632 
  633 /**
  634  * Cleanup all inter-domain communication mechanisms.
  635  *
  636  * \param xnb  Per-instance xnb configuration structure.
  637  */
  638 static int
  639 xnb_disconnect(struct xnb_softc *xnb)
  640 {
  641         struct gnttab_unmap_grant_ref gnts[XNB_NUM_RING_TYPES];
  642         int error __diagused;
  643         int i;
  644 
  645         if (xnb->xen_intr_handle != NULL)
  646                 xen_intr_unbind(&xnb->xen_intr_handle);
  647 
  648         /*
  649          * We may still have another thread currently processing requests.  We
  650          * must acquire the rx and tx locks to make sure those threads are done,
  651          * but we can release those locks as soon as we acquire them, because no
  652          * more interrupts will be arriving.
  653          */
  654         mtx_lock(&xnb->tx_lock);
  655         mtx_unlock(&xnb->tx_lock);
  656         mtx_lock(&xnb->rx_lock);
  657         mtx_unlock(&xnb->rx_lock);
  658 
  659         mtx_lock(&xnb->sc_lock);
  660         /* Free malloc'd softc member variables */
  661         if (xnb->bridge != NULL) {
  662                 free(xnb->bridge, M_XENSTORE);
  663                 xnb->bridge = NULL;
  664         }
  665 
  666         /* All request processing has stopped, so unmap the rings */
  667         for (i=0; i < XNB_NUM_RING_TYPES; i++) {
  668                 gnts[i].host_addr = xnb->ring_configs[i].gnt_addr;
  669                 gnts[i].dev_bus_addr = xnb->ring_configs[i].bus_addr;
  670                 gnts[i].handle = xnb->ring_configs[i].handle;
  671         }
  672         error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, gnts,
  673                                           XNB_NUM_RING_TYPES);
  674         KASSERT(error == 0, ("Grant table unmap op failed (%d)", error));
  675 
  676         xnb_free_communication_mem(xnb);
  677         /*
  678          * Zero the ring config structs because the pointers, handles, and
  679          * grant refs contained therein are no longer valid.
  680          */
  681         bzero(&xnb->ring_configs[XNB_RING_TYPE_TX],
  682             sizeof(struct xnb_ring_config));
  683         bzero(&xnb->ring_configs[XNB_RING_TYPE_RX],
  684             sizeof(struct xnb_ring_config));
  685 
  686         xnb->flags &= ~XNBF_RING_CONNECTED;
  687         mtx_unlock(&xnb->sc_lock);
  688 
  689         return (0);
  690 }
  691 
  692 /**
  693  * Map a single shared memory ring into domain local address space and
  694  * initialize its control structure
  695  *
  696  * \param xnb   Per-instance xnb configuration structure
  697  * \param ring_type     Array index of this ring in the xnb's array of rings
  698  * \return      An errno
  699  */
  700 static int
  701 xnb_connect_ring(struct xnb_softc *xnb, xnb_ring_type_t ring_type)
  702 {
  703         struct gnttab_map_grant_ref gnt;
  704         struct xnb_ring_config *ring = &xnb->ring_configs[ring_type];
  705         int error;
  706 
  707         /* TX ring type = 0, RX =1 */
  708         ring->va = xnb->kva + ring_type * PAGE_SIZE;
  709         ring->gnt_addr = xnb->gnt_base_addr + ring_type * PAGE_SIZE;
  710 
  711         gnt.host_addr = ring->gnt_addr;
  712         gnt.flags     = GNTMAP_host_map;
  713         gnt.ref       = ring->ring_ref;
  714         gnt.dom       = xnb->otherend_id;
  715 
  716         error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &gnt, 1);
  717         if (error != 0)
  718                 panic("netback: Ring page grant table op failed (%d)", error);
  719 
  720         if (gnt.status != 0) {
  721                 ring->va = 0;
  722                 error = EACCES;
  723                 xenbus_dev_fatal(xnb->dev, error,
  724                                  "Ring shared page mapping failed. "
  725                                  "Status %d.", gnt.status);
  726         } else {
  727                 ring->handle = gnt.handle;
  728                 ring->bus_addr = gnt.dev_bus_addr;
  729 
  730                 if (ring_type == XNB_RING_TYPE_TX) {
  731                         BACK_RING_INIT(&ring->back_ring.tx_ring,
  732                             (netif_tx_sring_t*)ring->va,
  733                             ring->ring_pages * PAGE_SIZE);
  734                 } else if (ring_type == XNB_RING_TYPE_RX) {
  735                         BACK_RING_INIT(&ring->back_ring.rx_ring,
  736                             (netif_rx_sring_t*)ring->va,
  737                             ring->ring_pages * PAGE_SIZE);
  738                 } else {
  739                         xenbus_dev_fatal(xnb->dev, error,
  740                                  "Unknown ring type %d", ring_type);
  741                 }
  742         }
  743 
  744         return error;
  745 }
  746 
  747 /**
  748  * Setup the shared memory rings and bind an interrupt to the event channel
  749  * used to notify us of ring changes.
  750  *
  751  * \param xnb  Per-instance xnb configuration structure.
  752  */
  753 static int
  754 xnb_connect_comms(struct xnb_softc *xnb)
  755 {
  756         int     error;
  757         xnb_ring_type_t i;
  758 
  759         if ((xnb->flags & XNBF_RING_CONNECTED) != 0)
  760                 return (0);
  761 
  762         /*
  763          * Kva for our rings are at the tail of the region of kva allocated
  764          * by xnb_alloc_communication_mem().
  765          */
  766         for (i=0; i < XNB_NUM_RING_TYPES; i++) {
  767                 error = xnb_connect_ring(xnb, i);
  768                 if (error != 0)
  769                         return error;
  770         }
  771 
  772         xnb->flags |= XNBF_RING_CONNECTED;
  773 
  774         error = xen_intr_bind_remote_port(xnb->dev,
  775                                           xnb->otherend_id,
  776                                           xnb->evtchn,
  777                                           /*filter*/NULL,
  778                                           xnb_intr, /*arg*/xnb,
  779                                           INTR_TYPE_NET | INTR_MPSAFE,
  780                                           &xnb->xen_intr_handle);
  781         if (error != 0) {
  782                 (void)xnb_disconnect(xnb);
  783                 xenbus_dev_fatal(xnb->dev, error, "binding event channel");
  784                 return (error);
  785         }
  786 
  787         DPRINTF("rings connected!\n");
  788 
  789         return (0);
  790 }
  791 
  792 /**
  793  * Size KVA and pseudo-physical address allocations based on negotiated
  794  * values for the size and number of I/O requests, and the size of our
  795  * communication ring.
  796  *
  797  * \param xnb  Per-instance xnb configuration structure.
  798  *
  799  * These address spaces are used to dynamically map pages in the
  800  * front-end's domain into our own.
  801  */
  802 static int
  803 xnb_alloc_communication_mem(struct xnb_softc *xnb)
  804 {
  805         xnb_ring_type_t i;
  806 
  807         xnb->kva_size = 0;
  808         for (i=0; i < XNB_NUM_RING_TYPES; i++) {
  809                 xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE;
  810         }
  811 
  812         /*
  813          * Reserve a range of pseudo physical memory that we can map
  814          * into kva.  These pages will only be backed by machine
  815          * pages ("real memory") during the lifetime of front-end requests
  816          * via grant table operations.  We will map the netif tx and rx rings
  817          * into this space.
  818          */
  819         xnb->pseudo_phys_res_id = 0;
  820         xnb->pseudo_phys_res = xenmem_alloc(xnb->dev, &xnb->pseudo_phys_res_id,
  821             xnb->kva_size);
  822         if (xnb->pseudo_phys_res == NULL) {
  823                 xnb->kva = 0;
  824                 return (ENOMEM);
  825         }
  826         xnb->kva = (vm_offset_t)rman_get_virtual(xnb->pseudo_phys_res);
  827         xnb->gnt_base_addr = rman_get_start(xnb->pseudo_phys_res);
  828         return (0);
  829 }
  830 
  831 /**
  832  * Collect information from the XenStore related to our device and its frontend
  833  *
  834  * \param xnb  Per-instance xnb configuration structure.
  835  */
  836 static int
  837 xnb_collect_xenstore_info(struct xnb_softc *xnb)
  838 {
  839         /**
  840          * \todo Linux collects the following info.  We should collect most
  841          * of this, too:
  842          * "feature-rx-notify"
  843          */
  844         const char *otherend_path;
  845         const char *our_path;
  846         int err;
  847         unsigned int rx_copy, bridge_len;
  848         uint8_t no_csum_offload;
  849 
  850         otherend_path = xenbus_get_otherend_path(xnb->dev);
  851         our_path = xenbus_get_node(xnb->dev);
  852 
  853         /* Collect the critical communication parameters */
  854         err = xs_gather(XST_NIL, otherend_path,
  855             "tx-ring-ref", "%l" PRIu32,
  856                 &xnb->ring_configs[XNB_RING_TYPE_TX].ring_ref,
  857             "rx-ring-ref", "%l" PRIu32,
  858                 &xnb->ring_configs[XNB_RING_TYPE_RX].ring_ref,
  859             "event-channel", "%" PRIu32, &xnb->evtchn,
  860             NULL);
  861         if (err != 0) {
  862                 xenbus_dev_fatal(xnb->dev, err,
  863                                  "Unable to retrieve ring information from "
  864                                  "frontend %s.  Unable to connect.",
  865                                  otherend_path);
  866                 return (err);
  867         }
  868 
  869         /* Collect the handle from xenstore */
  870         err = xs_scanf(XST_NIL, our_path, "handle", NULL, "%li", &xnb->handle);
  871         if (err != 0) {
  872                 xenbus_dev_fatal(xnb->dev, err,
  873                     "Error reading handle from frontend %s.  "
  874                     "Unable to connect.", otherend_path);
  875         }
  876 
  877         /*
  878          * Collect the bridgename, if any.  We do not need bridge_len; we just
  879          * throw it away
  880          */
  881         err = xs_read(XST_NIL, our_path, "bridge", &bridge_len,
  882                       (void**)&xnb->bridge);
  883         if (err != 0)
  884                 xnb->bridge = NULL;
  885 
  886         /*
  887          * Does the frontend request that we use rx copy?  If not, return an
  888          * error because this driver only supports rx copy.
  889          */
  890         err = xs_scanf(XST_NIL, otherend_path, "request-rx-copy", NULL,
  891                        "%" PRIu32, &rx_copy);
  892         if (err == ENOENT) {
  893                 err = 0;
  894                 rx_copy = 0;
  895         }
  896         if (err < 0) {
  897                 xenbus_dev_fatal(xnb->dev, err, "reading %s/request-rx-copy",
  898                                  otherend_path);
  899                 return err;
  900         }
  901         /**
  902          * \todo: figure out the exact meaning of this feature, and when
  903          * the frontend will set it to true.  It should be set to true
  904          * at some point
  905          */
  906 /*        if (!rx_copy)*/
  907 /*          return EOPNOTSUPP;*/
  908 
  909         /** \todo Collect the rx notify feature */
  910 
  911         /*  Collect the feature-sg. */
  912         if (xs_scanf(XST_NIL, otherend_path, "feature-sg", NULL,
  913                      "%hhu", &xnb->can_sg) < 0)
  914                 xnb->can_sg = 0;
  915 
  916         /* Collect remaining frontend features */
  917         if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4", NULL,
  918                      "%hhu", &xnb->gso) < 0)
  919                 xnb->gso = 0;
  920 
  921         if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4-prefix", NULL,
  922                      "%hhu", &xnb->gso_prefix) < 0)
  923                 xnb->gso_prefix = 0;
  924 
  925         if (xs_scanf(XST_NIL, otherend_path, "feature-no-csum-offload", NULL,
  926                      "%hhu", &no_csum_offload) < 0)
  927                 no_csum_offload = 0;
  928         xnb->ip_csum = (no_csum_offload == 0);
  929 
  930         return (0);
  931 }
  932 
  933 /**
  934  * Supply information about the physical device to the frontend
  935  * via XenBus.
  936  *
  937  * \param xnb  Per-instance xnb configuration structure.
  938  */
  939 static int
  940 xnb_publish_backend_info(struct xnb_softc *xnb)
  941 {
  942         struct xs_transaction xst;
  943         const char *our_path;
  944         int error;
  945 
  946         our_path = xenbus_get_node(xnb->dev);
  947 
  948         do {
  949                 error = xs_transaction_start(&xst);
  950                 if (error != 0) {
  951                         xenbus_dev_fatal(xnb->dev, error,
  952                                          "Error publishing backend info "
  953                                          "(start transaction)");
  954                         break;
  955                 }
  956 
  957                 error = xs_printf(xst, our_path, "feature-sg",
  958                                   "%d", XNB_SG);
  959                 if (error != 0)
  960                         break;
  961 
  962                 error = xs_printf(xst, our_path, "feature-gso-tcpv4",
  963                                   "%d", XNB_GSO_TCPV4);
  964                 if (error != 0)
  965                         break;
  966 
  967                 error = xs_printf(xst, our_path, "feature-rx-copy",
  968                                   "%d", XNB_RX_COPY);
  969                 if (error != 0)
  970                         break;
  971 
  972                 error = xs_printf(xst, our_path, "feature-rx-flip",
  973                                   "%d", XNB_RX_FLIP);
  974                 if (error != 0)
  975                         break;
  976 
  977                 error = xs_transaction_end(xst, 0);
  978                 if (error != 0 && error != EAGAIN) {
  979                         xenbus_dev_fatal(xnb->dev, error, "ending transaction");
  980                         break;
  981                 }
  982 
  983         } while (error == EAGAIN);
  984 
  985         return (error);
  986 }
  987 
  988 /**
  989  * Connect to our netfront peer now that it has completed publishing
  990  * its configuration into the XenStore.
  991  *
  992  * \param xnb  Per-instance xnb configuration structure.
  993  */
  994 static void
  995 xnb_connect(struct xnb_softc *xnb)
  996 {
  997         int     error;
  998 
  999         if (xenbus_get_state(xnb->dev) == XenbusStateConnected)
 1000                 return;
 1001 
 1002         if (xnb_collect_xenstore_info(xnb) != 0)
 1003                 return;
 1004 
 1005         xnb->flags &= ~XNBF_SHUTDOWN;
 1006 
 1007         /* Read front end configuration. */
 1008 
 1009         /* Allocate resources whose size depends on front-end configuration. */
 1010         error = xnb_alloc_communication_mem(xnb);
 1011         if (error != 0) {
 1012                 xenbus_dev_fatal(xnb->dev, error,
 1013                                  "Unable to allocate communication memory");
 1014                 return;
 1015         }
 1016 
 1017         /*
 1018          * Connect communication channel.
 1019          */
 1020         error = xnb_connect_comms(xnb);
 1021         if (error != 0) {
 1022                 /* Specific errors are reported by xnb_connect_comms(). */
 1023                 return;
 1024         }
 1025         xnb->carrier = 1;
 1026 
 1027         /* Ready for I/O. */
 1028         xenbus_set_state(xnb->dev, XenbusStateConnected);
 1029 }
 1030 
 1031 /*-------------------------- Device Teardown Support -------------------------*/
 1032 /**
 1033  * Perform device shutdown functions.
 1034  *
 1035  * \param xnb  Per-instance xnb configuration structure.
 1036  *
 1037  * Mark this instance as shutting down, wait for any active requests
 1038  * to drain, disconnect from the front-end, and notify any waiters (e.g.
 1039  * a thread invoking our detach method) that detach can now proceed.
 1040  */
 1041 static int
 1042 xnb_shutdown(struct xnb_softc *xnb)
 1043 {
 1044         /*
 1045          * Due to the need to drop our mutex during some
 1046          * xenbus operations, it is possible for two threads
 1047          * to attempt to close out shutdown processing at
 1048          * the same time.  Tell the caller that hits this
 1049          * race to try back later.
 1050          */
 1051         if ((xnb->flags & XNBF_IN_SHUTDOWN) != 0)
 1052                 return (EAGAIN);
 1053 
 1054         xnb->flags |= XNBF_SHUTDOWN;
 1055 
 1056         xnb->flags |= XNBF_IN_SHUTDOWN;
 1057 
 1058         mtx_unlock(&xnb->sc_lock);
 1059         /* Free the network interface */
 1060         xnb->carrier = 0;
 1061         if (xnb->xnb_ifp != NULL) {
 1062                 ether_ifdetach(xnb->xnb_ifp);
 1063                 if_free(xnb->xnb_ifp);
 1064                 xnb->xnb_ifp = NULL;
 1065         }
 1066 
 1067         xnb_disconnect(xnb);
 1068 
 1069         if (xenbus_get_state(xnb->dev) < XenbusStateClosing)
 1070                 xenbus_set_state(xnb->dev, XenbusStateClosing);
 1071         mtx_lock(&xnb->sc_lock);
 1072 
 1073         xnb->flags &= ~XNBF_IN_SHUTDOWN;
 1074 
 1075         /* Indicate to xnb_detach() that is it safe to proceed. */
 1076         wakeup(xnb);
 1077 
 1078         return (0);
 1079 }
 1080 
 1081 /**
 1082  * Report an attach time error to the console and Xen, and cleanup
 1083  * this instance by forcing immediate detach processing.
 1084  *
 1085  * \param xnb  Per-instance xnb configuration structure.
 1086  * \param err  Errno describing the error.
 1087  * \param fmt  Printf style format and arguments
 1088  */
 1089 static void
 1090 xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...)
 1091 {
 1092         va_list ap;
 1093         va_list ap_hotplug;
 1094 
 1095         va_start(ap, fmt);
 1096         va_copy(ap_hotplug, ap);
 1097         xs_vprintf(XST_NIL, xenbus_get_node(xnb->dev),
 1098                   "hotplug-error", fmt, ap_hotplug);
 1099         va_end(ap_hotplug);
 1100         (void)xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
 1101                   "hotplug-status", "error");
 1102 
 1103         xenbus_dev_vfatal(xnb->dev, err, fmt, ap);
 1104         va_end(ap);
 1105 
 1106         (void)xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "online", "");
 1107         xnb_detach(xnb->dev);
 1108 }
 1109 
 1110 /*---------------------------- NewBus Entrypoints ----------------------------*/
 1111 /**
 1112  * Inspect a XenBus device and claim it if is of the appropriate type.
 1113  *
 1114  * \param dev  NewBus device object representing a candidate XenBus device.
 1115  *
 1116  * \return  0 for success, errno codes for failure.
 1117  */
 1118 static int
 1119 xnb_probe(device_t dev)
 1120 {
 1121          if (!strcmp(xenbus_get_type(dev), "vif")) {
 1122                 DPRINTF("Claiming device %d, %s\n", device_get_unit(dev),
 1123                     devclass_get_name(device_get_devclass(dev)));
 1124                 device_set_desc(dev, "Backend Virtual Network Device");
 1125                 device_quiet(dev);
 1126                 return (0);
 1127         }
 1128         return (ENXIO);
 1129 }
 1130 
 1131 /**
 1132  * Setup sysctl variables to control various Network Back parameters.
 1133  *
 1134  * \param xnb  Xen Net Back softc.
 1135  *
 1136  */
 1137 static void
 1138 xnb_setup_sysctl(struct xnb_softc *xnb)
 1139 {
 1140         struct sysctl_ctx_list *sysctl_ctx = NULL;
 1141         struct sysctl_oid      *sysctl_tree = NULL;
 1142 
 1143         sysctl_ctx = device_get_sysctl_ctx(xnb->dev);
 1144         if (sysctl_ctx == NULL)
 1145                 return;
 1146 
 1147         sysctl_tree = device_get_sysctl_tree(xnb->dev);
 1148         if (sysctl_tree == NULL)
 1149                 return;
 1150 
 1151 #ifdef XNB_DEBUG
 1152         SYSCTL_ADD_PROC(sysctl_ctx,
 1153                         SYSCTL_CHILDREN(sysctl_tree),
 1154                         OID_AUTO,
 1155                         "unit_test_results",
 1156                         CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
 1157                         xnb,
 1158                         0,
 1159                         xnb_unit_test_main,
 1160                         "A",
 1161                         "Results of builtin unit tests");
 1162 
 1163         SYSCTL_ADD_PROC(sysctl_ctx,
 1164                         SYSCTL_CHILDREN(sysctl_tree),
 1165                         OID_AUTO,
 1166                         "dump_rings",
 1167                         CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
 1168                         xnb,
 1169                         0,
 1170                         xnb_dump_rings,
 1171                         "A",
 1172                         "Xennet Back Rings");
 1173 #endif /* XNB_DEBUG */
 1174 }
 1175 
 1176 /**
 1177  * Create a network device.
 1178  * @param handle device handle
 1179  */
 1180 int
 1181 create_netdev(device_t dev)
 1182 {
 1183         struct ifnet *ifp;
 1184         struct xnb_softc *xnb;
 1185         int err = 0;
 1186         uint32_t handle;
 1187 
 1188         xnb = device_get_softc(dev);
 1189         mtx_init(&xnb->sc_lock, "xnb_softc", "xen netback softc lock", MTX_DEF);
 1190         mtx_init(&xnb->tx_lock, "xnb_tx", "xen netback tx lock", MTX_DEF);
 1191         mtx_init(&xnb->rx_lock, "xnb_rx", "xen netback rx lock", MTX_DEF);
 1192 
 1193         xnb->dev = dev;
 1194 
 1195         ifmedia_init(&xnb->sc_media, 0, xnb_ifmedia_upd, xnb_ifmedia_sts);
 1196         ifmedia_add(&xnb->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL);
 1197         ifmedia_set(&xnb->sc_media, IFM_ETHER|IFM_MANUAL);
 1198 
 1199         /*
 1200          * Set the MAC address to a dummy value (00:00:00:00:00),
 1201          * if the MAC address of the host-facing interface is set
 1202          * to the same as the guest-facing one (the value found in
 1203          * xenstore), the bridge would stop delivering packets to
 1204          * us because it would see that the destination address of
 1205          * the packet is the same as the interface, and so the bridge
 1206          * would expect the packet has already been delivered locally
 1207          * (and just drop it).
 1208          */
 1209         bzero(&xnb->mac[0], sizeof(xnb->mac));
 1210 
 1211         /* The interface will be named using the following nomenclature:
 1212          *
 1213          * xnb<domid>.<handle>
 1214          *
 1215          * Where handle is the oder of the interface referred to the guest.
 1216          */
 1217         err = xs_scanf(XST_NIL, xenbus_get_node(xnb->dev), "handle", NULL,
 1218                        "%" PRIu32, &handle);
 1219         if (err != 0)
 1220                 return (err);
 1221         snprintf(xnb->if_name, IFNAMSIZ, "xnb%" PRIu16 ".%" PRIu32,
 1222             xenbus_get_otherend_id(dev), handle);
 1223 
 1224         if (err == 0) {
 1225                 /* Set up ifnet structure */
 1226                 ifp = xnb->xnb_ifp = if_alloc(IFT_ETHER);
 1227                 ifp->if_softc = xnb;
 1228                 if_initname(ifp, xnb->if_name,  IF_DUNIT_NONE);
 1229                 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 1230                 ifp->if_ioctl = xnb_ioctl;
 1231                 ifp->if_start = xnb_start;
 1232                 ifp->if_init = xnb_ifinit;
 1233                 ifp->if_mtu = ETHERMTU;
 1234                 ifp->if_snd.ifq_maxlen = NET_RX_RING_SIZE - 1;
 1235 
 1236                 ifp->if_hwassist = XNB_CSUM_FEATURES;
 1237                 ifp->if_capabilities = IFCAP_HWCSUM;
 1238                 ifp->if_capenable = IFCAP_HWCSUM;
 1239 
 1240                 ether_ifattach(ifp, xnb->mac);
 1241                 xnb->carrier = 0;
 1242         }
 1243 
 1244         return err;
 1245 }
 1246 
 1247 /**
 1248  * Attach to a XenBus device that has been claimed by our probe routine.
 1249  *
 1250  * \param dev  NewBus device object representing this Xen Net Back instance.
 1251  *
 1252  * \return  0 for success, errno codes for failure.
 1253  */
 1254 static int
 1255 xnb_attach(device_t dev)
 1256 {
 1257         struct xnb_softc *xnb;
 1258         int     error;
 1259         xnb_ring_type_t i;
 1260 
 1261         error = create_netdev(dev);
 1262         if (error != 0) {
 1263                 xenbus_dev_fatal(dev, error, "creating netdev");
 1264                 return (error);
 1265         }
 1266 
 1267         DPRINTF("Attaching to %s\n", xenbus_get_node(dev));
 1268 
 1269         /*
 1270          * Basic initialization.
 1271          * After this block it is safe to call xnb_detach()
 1272          * to clean up any allocated data for this instance.
 1273          */
 1274         xnb = device_get_softc(dev);
 1275         xnb->otherend_id = xenbus_get_otherend_id(dev);
 1276         for (i=0; i < XNB_NUM_RING_TYPES; i++) {
 1277                 xnb->ring_configs[i].ring_pages = 1;
 1278         }
 1279 
 1280         /*
 1281          * Setup sysctl variables.
 1282          */
 1283         xnb_setup_sysctl(xnb);
 1284 
 1285         /* Update hot-plug status to satisfy xend. */
 1286         error = xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
 1287                           "hotplug-status", "connected");
 1288         if (error != 0) {
 1289                 xnb_attach_failed(xnb, error, "writing %s/hotplug-status",
 1290                                   xenbus_get_node(xnb->dev));
 1291                 return (error);
 1292         }
 1293 
 1294         if ((error = xnb_publish_backend_info(xnb)) != 0) {
 1295                 /*
 1296                  * If we can't publish our data, we cannot participate
 1297                  * in this connection, and waiting for a front-end state
 1298                  * change will not help the situation.
 1299                  */
 1300                 xnb_attach_failed(xnb, error,
 1301                     "Publishing backend status for %s",
 1302                                   xenbus_get_node(xnb->dev));
 1303                 return error;
 1304         }
 1305 
 1306         /* Tell the front end that we are ready to connect. */
 1307         xenbus_set_state(dev, XenbusStateInitWait);
 1308 
 1309         return (0);
 1310 }
 1311 
 1312 /**
 1313  * Detach from a net back device instance.
 1314  *
 1315  * \param dev  NewBus device object representing this Xen Net Back instance.
 1316  *
 1317  * \return  0 for success, errno codes for failure.
 1318  *
 1319  * \note A net back device may be detached at any time in its life-cycle,
 1320  *       including part way through the attach process.  For this reason,
 1321  *       initialization order and the initialization state checks in this
 1322  *       routine must be carefully coupled so that attach time failures
 1323  *       are gracefully handled.
 1324  */
 1325 static int
 1326 xnb_detach(device_t dev)
 1327 {
 1328         struct xnb_softc *xnb;
 1329 
 1330         DPRINTF("\n");
 1331 
 1332         xnb = device_get_softc(dev);
 1333         mtx_lock(&xnb->sc_lock);
 1334         while (xnb_shutdown(xnb) == EAGAIN) {
 1335                 msleep(xnb, &xnb->sc_lock, /*wakeup prio unchanged*/0,
 1336                        "xnb_shutdown", 0);
 1337         }
 1338         mtx_unlock(&xnb->sc_lock);
 1339         DPRINTF("\n");
 1340 
 1341         mtx_destroy(&xnb->tx_lock);
 1342         mtx_destroy(&xnb->rx_lock);
 1343         mtx_destroy(&xnb->sc_lock);
 1344         return (0);
 1345 }
 1346 
 1347 /**
 1348  * Prepare this net back device for suspension of this VM.
 1349  *
 1350  * \param dev  NewBus device object representing this Xen net Back instance.
 1351  *
 1352  * \return  0 for success, errno codes for failure.
 1353  */
 1354 static int
 1355 xnb_suspend(device_t dev)
 1356 {
 1357         return (0);
 1358 }
 1359 
 1360 /**
 1361  * Perform any processing required to recover from a suspended state.
 1362  *
 1363  * \param dev  NewBus device object representing this Xen Net Back instance.
 1364  *
 1365  * \return  0 for success, errno codes for failure.
 1366  */
 1367 static int
 1368 xnb_resume(device_t dev)
 1369 {
 1370         return (0);
 1371 }
 1372 
 1373 /**
 1374  * Handle state changes expressed via the XenStore by our front-end peer.
 1375  *
 1376  * \param dev             NewBus device object representing this Xen
 1377  *                        Net Back instance.
 1378  * \param frontend_state  The new state of the front-end.
 1379  *
 1380  * \return  0 for success, errno codes for failure.
 1381  */
 1382 static void
 1383 xnb_frontend_changed(device_t dev, XenbusState frontend_state)
 1384 {
 1385         struct xnb_softc *xnb;
 1386 
 1387         xnb = device_get_softc(dev);
 1388 
 1389         DPRINTF("frontend_state=%s, xnb_state=%s\n",
 1390                 xenbus_strstate(frontend_state),
 1391                 xenbus_strstate(xenbus_get_state(xnb->dev)));
 1392 
 1393         switch (frontend_state) {
 1394         case XenbusStateInitialising:
 1395         case XenbusStateInitialised:
 1396                 break;
 1397         case XenbusStateConnected:
 1398                 xnb_connect(xnb);
 1399                 break;
 1400         case XenbusStateClosing:
 1401         case XenbusStateClosed:
 1402                 mtx_lock(&xnb->sc_lock);
 1403                 xnb_shutdown(xnb);
 1404                 mtx_unlock(&xnb->sc_lock);
 1405                 if (frontend_state == XenbusStateClosed)
 1406                         xenbus_set_state(xnb->dev, XenbusStateClosed);
 1407                 break;
 1408         default:
 1409                 xenbus_dev_fatal(xnb->dev, EINVAL, "saw state %d at frontend",
 1410                                  frontend_state);
 1411                 break;
 1412         }
 1413 }
 1414 
 1415 /*---------------------------- Request Processing ----------------------------*/
 1416 /**
 1417  * Interrupt handler bound to the shared ring's event channel.
 1418  * Entry point for the xennet transmit path in netback
 1419  * Transfers packets from the Xen ring to the host's generic networking stack
 1420  *
 1421  * \param arg  Callback argument registerd during event channel
 1422  *             binding - the xnb_softc for this instance.
 1423  */
 1424 static void
 1425 xnb_intr(void *arg)
 1426 {
 1427         struct xnb_softc *xnb;
 1428         struct ifnet *ifp;
 1429         netif_tx_back_ring_t *txb;
 1430         RING_IDX req_prod_local;
 1431 
 1432         xnb = (struct xnb_softc *)arg;
 1433         ifp = xnb->xnb_ifp;
 1434         txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring;
 1435 
 1436         mtx_lock(&xnb->tx_lock);
 1437         do {
 1438                 int notify;
 1439                 req_prod_local = txb->sring->req_prod;
 1440                 xen_rmb();
 1441 
 1442                 for (;;) {
 1443                         struct mbuf *mbufc;
 1444                         int err;
 1445 
 1446                         err = xnb_recv(txb, xnb->otherend_id, &mbufc, ifp,
 1447                                        xnb->tx_gnttab);
 1448                         if (err || (mbufc == NULL))
 1449                                 break;
 1450 
 1451                         /* Send the packet to the generic network stack */
 1452                         (*xnb->xnb_ifp->if_input)(xnb->xnb_ifp, mbufc);
 1453                 }
 1454 
 1455                 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify);
 1456                 if (notify != 0)
 1457                         xen_intr_signal(xnb->xen_intr_handle);
 1458 
 1459                 txb->sring->req_event = txb->req_cons + 1;
 1460                 xen_mb();
 1461         } while (txb->sring->req_prod != req_prod_local) ;
 1462         mtx_unlock(&xnb->tx_lock);
 1463 
 1464         xnb_start(ifp);
 1465 }
 1466 
 1467 /**
 1468  * Build a struct xnb_pkt based on netif_tx_request's from a netif tx ring.
 1469  * Will read exactly 0 or 1 packets from the ring; never a partial packet.
 1470  * \param[out]  pkt     The returned packet.  If there is an error building
 1471  *                      the packet, pkt.list_len will be set to 0.
 1472  * \param[in]   tx_ring Pointer to the Ring that is the input to this function
 1473  * \param[in]   start   The ring index of the first potential request
 1474  * \return              The number of requests consumed to build this packet
 1475  */
 1476 static int
 1477 xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring,
 1478              RING_IDX start)
 1479 {
 1480         /*
 1481          * Outline:
 1482          * 1) Initialize pkt
 1483          * 2) Read the first request of the packet
 1484          * 3) Read the extras
 1485          * 4) Set cdr
 1486          * 5) Loop on the remainder of the packet
 1487          * 6) Finalize pkt (stuff like car_size and list_len)
 1488          */
 1489         int idx = start;
 1490         int discard = 0;        /* whether to discard the packet */
 1491         int more_data = 0;      /* there are more request past the last one */
 1492         uint16_t cdr_size = 0;  /* accumulated size of requests 2 through n */
 1493 
 1494         xnb_pkt_initialize(pkt);
 1495 
 1496         /* Read the first request */
 1497         if (RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
 1498                 netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx);
 1499                 pkt->size = tx->size;
 1500                 pkt->flags = tx->flags & ~NETTXF_more_data;
 1501                 more_data = tx->flags & NETTXF_more_data;
 1502                 pkt->list_len++;
 1503                 pkt->car = idx;
 1504                 idx++;
 1505         }
 1506 
 1507         /* Read the extra info */
 1508         if ((pkt->flags & NETTXF_extra_info) &&
 1509             RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
 1510                 netif_extra_info_t *ext =
 1511                     (netif_extra_info_t*) RING_GET_REQUEST(tx_ring, idx);
 1512                 pkt->extra.type = ext->type;
 1513                 switch (pkt->extra.type) {
 1514                         case XEN_NETIF_EXTRA_TYPE_GSO:
 1515                                 pkt->extra.u.gso = ext->u.gso;
 1516                                 break;
 1517                         default:
 1518                                 /*
 1519                                  * The reference Linux netfront driver will
 1520                                  * never set any other extra.type.  So we don't
 1521                                  * know what to do with it.  Let's print an
 1522                                  * error, then consume and discard the packet
 1523                                  */
 1524                                 printf("xnb(%s:%d): Unknown extra info type %d."
 1525                                        "  Discarding packet\n",
 1526                                        __func__, __LINE__, pkt->extra.type);
 1527                                 xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring,
 1528                                     start));
 1529                                 xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring,
 1530                                     idx));
 1531                                 discard = 1;
 1532                                 break;
 1533                 }
 1534 
 1535                 pkt->extra.flags = ext->flags;
 1536                 if (ext->flags & XEN_NETIF_EXTRA_FLAG_MORE) {
 1537                         /*
 1538                          * The reference linux netfront driver never sets this
 1539                          * flag (nor does any other known netfront).  So we
 1540                          * will discard the packet.
 1541                          */
 1542                         printf("xnb(%s:%d): Request sets "
 1543                             "XEN_NETIF_EXTRA_FLAG_MORE, but we can't handle "
 1544                             "that\n", __func__, __LINE__);
 1545                         xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start));
 1546                         xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx));
 1547                         discard = 1;
 1548                 }
 1549 
 1550                 idx++;
 1551         }
 1552 
 1553         /* Set cdr.  If there is not more data, cdr is invalid */
 1554         pkt->cdr = idx;
 1555 
 1556         /* Loop on remainder of packet */
 1557         while (more_data && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
 1558                 netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx);
 1559                 pkt->list_len++;
 1560                 cdr_size += tx->size;
 1561                 if (tx->flags & ~NETTXF_more_data) {
 1562                         /* There should be no other flags set at this point */
 1563                         printf("xnb(%s:%d): Request sets unknown flags %d "
 1564                             "after the 1st request in the packet.\n",
 1565                             __func__, __LINE__, tx->flags);
 1566                         xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start));
 1567                         xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx));
 1568                 }
 1569 
 1570                 more_data = tx->flags & NETTXF_more_data;
 1571                 idx++;
 1572         }
 1573 
 1574         /* Finalize packet */
 1575         if (more_data != 0) {
 1576                 /* The ring ran out of requests before finishing the packet */
 1577                 xnb_pkt_invalidate(pkt);
 1578                 idx = start;    /* tell caller that we consumed no requests */
 1579         } else {
 1580                 /* Calculate car_size */
 1581                 pkt->car_size = pkt->size - cdr_size;
 1582         }
 1583         if (discard != 0) {
 1584                 xnb_pkt_invalidate(pkt);
 1585         }
 1586 
 1587         return idx - start;
 1588 }
 1589 
 1590 /**
 1591  * Respond to all the requests that constituted pkt.  Builds the responses and
 1592  * writes them to the ring, but doesn't push them to the shared ring.
 1593  * \param[in] pkt       the packet that needs a response
 1594  * \param[in] error     true if there was an error handling the packet, such
 1595  *                      as in the hypervisor copy op or mbuf allocation
 1596  * \param[out] ring     Responses go here
 1597  */
 1598 static void
 1599 xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring,
 1600               int error)
 1601 {
 1602         /*
 1603          * Outline:
 1604          * 1) Respond to the first request
 1605          * 2) Respond to the extra info reques
 1606          * Loop through every remaining request in the packet, generating
 1607          * responses that copy those requests' ids and sets the status
 1608          * appropriately.
 1609          */
 1610         netif_tx_request_t *tx;
 1611         netif_tx_response_t *rsp;
 1612         int i;
 1613         uint16_t status;
 1614 
 1615         status = (xnb_pkt_is_valid(pkt) == 0) || error ?
 1616                 NETIF_RSP_ERROR : NETIF_RSP_OKAY;
 1617         KASSERT((pkt->list_len == 0) || (ring->rsp_prod_pvt == pkt->car),
 1618             ("Cannot respond to ring requests out of order"));
 1619 
 1620         if (pkt->list_len >= 1) {
 1621                 uint16_t id;
 1622                 tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt);
 1623                 id = tx->id;
 1624                 rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 1625                 rsp->id = id;
 1626                 rsp->status = status;
 1627                 ring->rsp_prod_pvt++;
 1628 
 1629                 if (pkt->flags & NETRXF_extra_info) {
 1630                         rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 1631                         rsp->status = NETIF_RSP_NULL;
 1632                         ring->rsp_prod_pvt++;
 1633                 }
 1634         }
 1635 
 1636         for (i=0; i < pkt->list_len - 1; i++) {
 1637                 uint16_t id;
 1638                 tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt);
 1639                 id = tx->id;
 1640                 rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 1641                 rsp->id = id;
 1642                 rsp->status = status;
 1643                 ring->rsp_prod_pvt++;
 1644         }
 1645 }
 1646 
 1647 /**
 1648  * Create an mbuf chain to represent a packet.  Initializes all of the headers
 1649  * in the mbuf chain, but does not copy the data.  The returned chain must be
 1650  * free()'d when no longer needed
 1651  * \param[in]   pkt     A packet to model the mbuf chain after
 1652  * \return      A newly allocated mbuf chain, possibly with clusters attached.
 1653  *              NULL on failure
 1654  */
 1655 static struct mbuf*
 1656 xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp)
 1657 {
 1658         /**
 1659          * \todo consider using a memory pool for mbufs instead of
 1660          * reallocating them for every packet
 1661          */
 1662         /** \todo handle extra data */
 1663         struct mbuf *m;
 1664 
 1665         m = m_getm(NULL, pkt->size, M_NOWAIT, MT_DATA);
 1666 
 1667         if (m != NULL) {
 1668                 m->m_pkthdr.rcvif = ifp;
 1669                 if (pkt->flags & NETTXF_data_validated) {
 1670                         /*
 1671                          * We lie to the host OS and always tell it that the
 1672                          * checksums are ok, because the packet is unlikely to
 1673                          * get corrupted going across domains.
 1674                          */
 1675                         m->m_pkthdr.csum_flags = (
 1676                                 CSUM_IP_CHECKED |
 1677                                 CSUM_IP_VALID   |
 1678                                 CSUM_DATA_VALID |
 1679                                 CSUM_PSEUDO_HDR
 1680                                 );
 1681                         m->m_pkthdr.csum_data = 0xffff;
 1682                 }
 1683         }
 1684         return m;
 1685 }
 1686 
 1687 /**
 1688  * Build a gnttab_copy table that can be used to copy data from a pkt
 1689  * to an mbufc.  Does not actually perform the copy.  Always uses gref's on
 1690  * the packet side.
 1691  * \param[in]   pkt     pkt's associated requests form the src for
 1692  *                      the copy operation
 1693  * \param[in]   mbufc   mbufc's storage forms the dest for the copy operation
 1694  * \param[out]  gnttab  Storage for the returned grant table
 1695  * \param[in]   txb     Pointer to the backend ring structure
 1696  * \param[in]   otherend_id     The domain ID of the other end of the copy
 1697  * \return              The number of gnttab entries filled
 1698  */
 1699 static int
 1700 xnb_txpkt2gnttab(const struct xnb_pkt *pkt, struct mbuf *mbufc,
 1701                  gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb,
 1702                  domid_t otherend_id)
 1703 {
 1704 
 1705         struct mbuf *mbuf = mbufc;/* current mbuf within the chain */
 1706         int gnt_idx = 0;                /* index into grant table */
 1707         RING_IDX r_idx = pkt->car;      /* index into tx ring buffer */
 1708         int r_ofs = 0;  /* offset of next data within tx request's data area */
 1709         int m_ofs = 0;  /* offset of next data within mbuf's data area */
 1710         /* size in bytes that still needs to be represented in the table */
 1711         uint16_t size_remaining = pkt->size;
 1712 
 1713         while (size_remaining > 0) {
 1714                 const netif_tx_request_t *txq = RING_GET_REQUEST(txb, r_idx);
 1715                 const size_t mbuf_space = M_TRAILINGSPACE(mbuf) - m_ofs;
 1716                 const size_t req_size =
 1717                         r_idx == pkt->car ? pkt->car_size : txq->size;
 1718                 const size_t pkt_space = req_size - r_ofs;
 1719                 /*
 1720                  * space is the largest amount of data that can be copied in the
 1721                  * grant table's next entry
 1722                  */
 1723                 const size_t space = MIN(pkt_space, mbuf_space);
 1724 
 1725                 /* TODO: handle this error condition without panicking */
 1726                 KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short"));
 1727 
 1728                 gnttab[gnt_idx].source.u.ref = txq->gref;
 1729                 gnttab[gnt_idx].source.domid = otherend_id;
 1730                 gnttab[gnt_idx].source.offset = txq->offset + r_ofs;
 1731                 gnttab[gnt_idx].dest.u.gmfn = virt_to_mfn(
 1732                     mtod(mbuf, vm_offset_t) + m_ofs);
 1733                 gnttab[gnt_idx].dest.offset = virt_to_offset(
 1734                     mtod(mbuf, vm_offset_t) + m_ofs);
 1735                 gnttab[gnt_idx].dest.domid = DOMID_SELF;
 1736                 gnttab[gnt_idx].len = space;
 1737                 gnttab[gnt_idx].flags = GNTCOPY_source_gref;
 1738 
 1739                 gnt_idx++;
 1740                 r_ofs += space;
 1741                 m_ofs += space;
 1742                 size_remaining -= space;
 1743                 if (req_size - r_ofs <= 0) {
 1744                         /* Must move to the next tx request */
 1745                         r_ofs = 0;
 1746                         r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1;
 1747                 }
 1748                 if (M_TRAILINGSPACE(mbuf) - m_ofs <= 0) {
 1749                         /* Must move to the next mbuf */
 1750                         m_ofs = 0;
 1751                         mbuf = mbuf->m_next;
 1752                 }
 1753         }
 1754 
 1755         return gnt_idx;
 1756 }
 1757 
 1758 /**
 1759  * Check the status of the grant copy operations, and update mbufs various
 1760  * non-data fields to reflect the data present.
 1761  * \param[in,out] mbufc mbuf chain to update.  The chain must be valid and of
 1762  *                      the correct length, and data should already be present
 1763  * \param[in] gnttab    A grant table for a just completed copy op
 1764  * \param[in] n_entries The number of valid entries in the grant table
 1765  */
 1766 static void
 1767 xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab,
 1768                  int n_entries)
 1769 {
 1770         struct mbuf *mbuf = mbufc;
 1771         int i;
 1772         size_t total_size = 0;
 1773 
 1774         for (i = 0; i < n_entries; i++) {
 1775                 KASSERT(gnttab[i].status == GNTST_okay,
 1776                     ("Some gnttab_copy entry had error status %hd\n",
 1777                     gnttab[i].status));
 1778 
 1779                 mbuf->m_len += gnttab[i].len;
 1780                 total_size += gnttab[i].len;
 1781                 if (M_TRAILINGSPACE(mbuf) <= 0) {
 1782                         mbuf = mbuf->m_next;
 1783                 }
 1784         }
 1785         mbufc->m_pkthdr.len = total_size;
 1786 
 1787 #if defined(INET) || defined(INET6)
 1788         xnb_add_mbuf_cksum(mbufc);
 1789 #endif
 1790 }
 1791 
 1792 /**
 1793  * Dequeue at most one packet from the shared ring
 1794  * \param[in,out] txb   Netif tx ring.  A packet will be removed from it, and
 1795  *                      its private indices will be updated.  But the indices
 1796  *                      will not be pushed to the shared ring.
 1797  * \param[in] ifnet     Interface to which the packet will be sent
 1798  * \param[in] otherend  Domain ID of the other end of the ring
 1799  * \param[out] mbufc    The assembled mbuf chain, ready to send to the generic
 1800  *                      networking stack
 1801  * \param[in,out] gnttab Pointer to enough memory for a grant table.  We make
 1802  *                      this a function parameter so that we will take less
 1803  *                      stack space.
 1804  * \return              An error code
 1805  */
 1806 static int
 1807 xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, struct mbuf **mbufc,
 1808          struct ifnet *ifnet, gnttab_copy_table gnttab)
 1809 {
 1810         struct xnb_pkt pkt;
 1811         /* number of tx requests consumed to build the last packet */
 1812         int num_consumed;
 1813         int nr_ents;
 1814 
 1815         *mbufc = NULL;
 1816         num_consumed = xnb_ring2pkt(&pkt, txb, txb->req_cons);
 1817         if (num_consumed == 0)
 1818                 return 0;       /* Nothing to receive */
 1819 
 1820         /* update statistics independent of errors */
 1821         if_inc_counter(ifnet, IFCOUNTER_IPACKETS, 1);
 1822 
 1823         /*
 1824          * if we got here, then 1 or more requests was consumed, but the packet
 1825          * is not necessarily valid.
 1826          */
 1827         if (xnb_pkt_is_valid(&pkt) == 0) {
 1828                 /* got a garbage packet, respond and drop it */
 1829                 xnb_txpkt2rsp(&pkt, txb, 1);
 1830                 txb->req_cons += num_consumed;
 1831                 DPRINTF("xnb_intr: garbage packet, num_consumed=%d\n",
 1832                                 num_consumed);
 1833                 if_inc_counter(ifnet, IFCOUNTER_IERRORS, 1);
 1834                 return EINVAL;
 1835         }
 1836 
 1837         *mbufc = xnb_pkt2mbufc(&pkt, ifnet);
 1838 
 1839         if (*mbufc == NULL) {
 1840                 /*
 1841                  * Couldn't allocate mbufs.  Respond and drop the packet.  Do
 1842                  * not consume the requests
 1843                  */
 1844                 xnb_txpkt2rsp(&pkt, txb, 1);
 1845                 DPRINTF("xnb_intr: Couldn't allocate mbufs, num_consumed=%d\n",
 1846                     num_consumed);
 1847                 if_inc_counter(ifnet, IFCOUNTER_IQDROPS, 1);
 1848                 return ENOMEM;
 1849         }
 1850 
 1851         nr_ents = xnb_txpkt2gnttab(&pkt, *mbufc, gnttab, txb, otherend);
 1852 
 1853         if (nr_ents > 0) {
 1854                 int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
 1855                     gnttab, nr_ents);
 1856                 KASSERT(hv_ret == 0,
 1857                     ("HYPERVISOR_grant_table_op returned %d\n", hv_ret));
 1858                 xnb_update_mbufc(*mbufc, gnttab, nr_ents);
 1859         }
 1860 
 1861         xnb_txpkt2rsp(&pkt, txb, 0);
 1862         txb->req_cons += num_consumed;
 1863         return 0;
 1864 }
 1865 
 1866 /**
 1867  * Create an xnb_pkt based on the contents of an mbuf chain.
 1868  * \param[in] mbufc     mbuf chain to transform into a packet
 1869  * \param[out] pkt      Storage for the newly generated xnb_pkt
 1870  * \param[in] start     The ring index of the first available slot in the rx
 1871  *                      ring
 1872  * \param[in] space     The number of free slots in the rx ring
 1873  * \retval 0            Success
 1874  * \retval EINVAL       mbufc was corrupt or not convertible into a pkt
 1875  * \retval EAGAIN       There was not enough space in the ring to queue the
 1876  *                      packet
 1877  */
 1878 static int
 1879 xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt,
 1880               RING_IDX start, int space)
 1881 {
 1882 
 1883         int retval = 0;
 1884 
 1885         if ((mbufc == NULL) ||
 1886              ( (mbufc->m_flags & M_PKTHDR) == 0) ||
 1887              (mbufc->m_pkthdr.len == 0)) {
 1888                 xnb_pkt_invalidate(pkt);
 1889                 retval = EINVAL;
 1890         } else {
 1891                 int slots_required;
 1892 
 1893                 xnb_pkt_validate(pkt);
 1894                 pkt->flags = 0;
 1895                 pkt->size = mbufc->m_pkthdr.len;
 1896                 pkt->car = start;
 1897                 pkt->car_size = mbufc->m_len;
 1898 
 1899                 if (mbufc->m_pkthdr.csum_flags & CSUM_TSO) {
 1900                         pkt->flags |= NETRXF_extra_info;
 1901                         pkt->extra.u.gso.size = mbufc->m_pkthdr.tso_segsz;
 1902                         pkt->extra.u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 1903                         pkt->extra.u.gso.pad = 0;
 1904                         pkt->extra.u.gso.features = 0;
 1905                         pkt->extra.type = XEN_NETIF_EXTRA_TYPE_GSO;
 1906                         pkt->extra.flags = 0;
 1907                         pkt->cdr = start + 2;
 1908                 } else {
 1909                         pkt->cdr = start + 1;
 1910                 }
 1911                 if (mbufc->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_DELAY_DATA)) {
 1912                         pkt->flags |=
 1913                             (NETRXF_csum_blank | NETRXF_data_validated);
 1914                 }
 1915 
 1916                 /*
 1917                  * Each ring response can have up to PAGE_SIZE of data.
 1918                  * Assume that we can defragment the mbuf chain efficiently
 1919                  * into responses so that each response but the last uses all
 1920                  * PAGE_SIZE bytes.
 1921                  */
 1922                 pkt->list_len = howmany(pkt->size, PAGE_SIZE);
 1923 
 1924                 if (pkt->list_len > 1) {
 1925                         pkt->flags |= NETRXF_more_data;
 1926                 }
 1927 
 1928                 slots_required = pkt->list_len +
 1929                         (pkt->flags & NETRXF_extra_info ? 1 : 0);
 1930                 if (slots_required > space) {
 1931                         xnb_pkt_invalidate(pkt);
 1932                         retval = EAGAIN;
 1933                 }
 1934         }
 1935 
 1936         return retval;
 1937 }
 1938 
 1939 /**
 1940  * Build a gnttab_copy table that can be used to copy data from an mbuf chain
 1941  * to the frontend's shared buffers.  Does not actually perform the copy.
 1942  * Always uses gref's on the other end's side.
 1943  * \param[in]   pkt     pkt's associated responses form the dest for the copy
 1944  *                      operatoin
 1945  * \param[in]   mbufc   The source for the copy operation
 1946  * \param[out]  gnttab  Storage for the returned grant table
 1947  * \param[in]   rxb     Pointer to the backend ring structure
 1948  * \param[in]   otherend_id     The domain ID of the other end of the copy
 1949  * \return              The number of gnttab entries filled
 1950  */
 1951 static int
 1952 xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc,
 1953                  gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb,
 1954                  domid_t otherend_id)
 1955 {
 1956 
 1957         const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */
 1958         int gnt_idx = 0;                /* index into grant table */
 1959         RING_IDX r_idx = pkt->car;      /* index into rx ring buffer */
 1960         int r_ofs = 0;  /* offset of next data within rx request's data area */
 1961         int m_ofs = 0;  /* offset of next data within mbuf's data area */
 1962         /* size in bytes that still needs to be represented in the table */
 1963         uint16_t size_remaining;
 1964 
 1965         size_remaining = (xnb_pkt_is_valid(pkt) != 0) ? pkt->size : 0;
 1966 
 1967         while (size_remaining > 0) {
 1968                 const netif_rx_request_t *rxq = RING_GET_REQUEST(rxb, r_idx);
 1969                 const size_t mbuf_space = mbuf->m_len - m_ofs;
 1970                 /* Xen shared pages have an implied size of PAGE_SIZE */
 1971                 const size_t req_size = PAGE_SIZE;
 1972                 const size_t pkt_space = req_size - r_ofs;
 1973                 /*
 1974                  * space is the largest amount of data that can be copied in the
 1975                  * grant table's next entry
 1976                  */
 1977                 const size_t space = MIN(pkt_space, mbuf_space);
 1978 
 1979                 /* TODO: handle this error condition without panicing */
 1980                 KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short"));
 1981 
 1982                 gnttab[gnt_idx].dest.u.ref = rxq->gref;
 1983                 gnttab[gnt_idx].dest.domid = otherend_id;
 1984                 gnttab[gnt_idx].dest.offset = r_ofs;
 1985                 gnttab[gnt_idx].source.u.gmfn = virt_to_mfn(
 1986                     mtod(mbuf, vm_offset_t) + m_ofs);
 1987                 gnttab[gnt_idx].source.offset = virt_to_offset(
 1988                     mtod(mbuf, vm_offset_t) + m_ofs);
 1989                 gnttab[gnt_idx].source.domid = DOMID_SELF;
 1990                 gnttab[gnt_idx].len = space;
 1991                 gnttab[gnt_idx].flags = GNTCOPY_dest_gref;
 1992 
 1993                 gnt_idx++;
 1994 
 1995                 r_ofs += space;
 1996                 m_ofs += space;
 1997                 size_remaining -= space;
 1998                 if (req_size - r_ofs <= 0) {
 1999                         /* Must move to the next rx request */
 2000                         r_ofs = 0;
 2001                         r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1;
 2002                 }
 2003                 if (mbuf->m_len - m_ofs <= 0) {
 2004                         /* Must move to the next mbuf */
 2005                         m_ofs = 0;
 2006                         mbuf = mbuf->m_next;
 2007                 }
 2008         }
 2009 
 2010         return gnt_idx;
 2011 }
 2012 
 2013 /**
 2014  * Generates responses for all the requests that constituted pkt.  Builds
 2015  * responses and writes them to the ring, but doesn't push the shared ring
 2016  * indices.
 2017  * \param[in] pkt       the packet that needs a response
 2018  * \param[in] gnttab    The grant copy table corresponding to this packet.
 2019  *                      Used to determine how many rsp->netif_rx_response_t's to
 2020  *                      generate.
 2021  * \param[in] n_entries Number of relevant entries in the grant table
 2022  * \param[out] ring     Responses go here
 2023  * \return              The number of RX requests that were consumed to generate
 2024  *                      the responses
 2025  */
 2026 static int
 2027 xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab,
 2028               int n_entries, netif_rx_back_ring_t *ring)
 2029 {
 2030         /*
 2031          * This code makes the following assumptions:
 2032          *      * All entries in gnttab set GNTCOPY_dest_gref
 2033          *      * The entries in gnttab are grouped by their grefs: any two
 2034          *         entries with the same gref must be adjacent
 2035          */
 2036         int error = 0;
 2037         int gnt_idx, i;
 2038         int n_responses = 0;
 2039         grant_ref_t last_gref = GRANT_REF_INVALID;
 2040         RING_IDX r_idx;
 2041 
 2042         KASSERT(gnttab != NULL, ("Received a null granttable copy"));
 2043 
 2044         /*
 2045          * In the event of an error, we only need to send one response to the
 2046          * netfront.  In that case, we musn't write any data to the responses
 2047          * after the one we send.  So we must loop all the way through gnttab
 2048          * looking for errors before we generate any responses
 2049          *
 2050          * Since we're looping through the grant table anyway, we'll count the
 2051          * number of different gref's in it, which will tell us how many
 2052          * responses to generate
 2053          */
 2054         for (gnt_idx = 0; gnt_idx < n_entries; gnt_idx++) {
 2055                 int16_t status = gnttab[gnt_idx].status;
 2056                 if (status != GNTST_okay) {
 2057                         DPRINTF(
 2058                             "Got error %d for hypervisor gnttab_copy status\n",
 2059                             status);
 2060                         error = 1;
 2061                         break;
 2062                 }
 2063                 if (gnttab[gnt_idx].dest.u.ref != last_gref) {
 2064                         n_responses++;
 2065                         last_gref = gnttab[gnt_idx].dest.u.ref;
 2066                 }
 2067         }
 2068 
 2069         if (error != 0) {
 2070                 uint16_t id;
 2071                 netif_rx_response_t *rsp;
 2072                 
 2073                 id = RING_GET_REQUEST(ring, ring->rsp_prod_pvt)->id;
 2074                 rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 2075                 rsp->id = id;
 2076                 rsp->status = NETIF_RSP_ERROR;
 2077                 n_responses = 1;
 2078         } else {
 2079                 gnt_idx = 0;
 2080                 const int has_extra = pkt->flags & NETRXF_extra_info;
 2081                 if (has_extra != 0)
 2082                         n_responses++;
 2083 
 2084                 for (i = 0; i < n_responses; i++) {
 2085                         netif_rx_request_t rxq;
 2086                         netif_rx_response_t *rsp;
 2087 
 2088                         r_idx = ring->rsp_prod_pvt + i;
 2089                         /*
 2090                          * We copy the structure of rxq instead of making a
 2091                          * pointer because it shares the same memory as rsp.
 2092                          */
 2093                         rxq = *(RING_GET_REQUEST(ring, r_idx));
 2094                         rsp = RING_GET_RESPONSE(ring, r_idx);
 2095                         if (has_extra && (i == 1)) {
 2096                                 netif_extra_info_t *ext =
 2097                                         (netif_extra_info_t*)rsp;
 2098                                 ext->type = XEN_NETIF_EXTRA_TYPE_GSO;
 2099                                 ext->flags = 0;
 2100                                 ext->u.gso.size = pkt->extra.u.gso.size;
 2101                                 ext->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 2102                                 ext->u.gso.pad = 0;
 2103                                 ext->u.gso.features = 0;
 2104                         } else {
 2105                                 rsp->id = rxq.id;
 2106                                 rsp->status = GNTST_okay;
 2107                                 rsp->offset = 0;
 2108                                 rsp->flags = 0;
 2109                                 if (i < pkt->list_len - 1)
 2110                                         rsp->flags |= NETRXF_more_data;
 2111                                 if ((i == 0) && has_extra)
 2112                                         rsp->flags |= NETRXF_extra_info;
 2113                                 if ((i == 0) &&
 2114                                         (pkt->flags & NETRXF_data_validated)) {
 2115                                         rsp->flags |= NETRXF_data_validated;
 2116                                         rsp->flags |= NETRXF_csum_blank;
 2117                                 }
 2118                                 rsp->status = 0;
 2119                                 for (; gnttab[gnt_idx].dest.u.ref == rxq.gref;
 2120                                     gnt_idx++) {
 2121                                         rsp->status += gnttab[gnt_idx].len;
 2122                                 }
 2123                         }
 2124                 }
 2125         }
 2126 
 2127         ring->req_cons += n_responses;
 2128         ring->rsp_prod_pvt += n_responses;
 2129         return n_responses;
 2130 }
 2131 
 2132 #if defined(INET) || defined(INET6)
 2133 /**
 2134  * Add IP, TCP, and/or UDP checksums to every mbuf in a chain.  The first mbuf
 2135  * in the chain must start with a struct ether_header.
 2136  *
 2137  * XXX This function will perform incorrectly on UDP packets that are split up
 2138  * into multiple ethernet frames.
 2139  */
 2140 static void
 2141 xnb_add_mbuf_cksum(struct mbuf *mbufc)
 2142 {
 2143         struct ether_header *eh;
 2144         struct ip *iph;
 2145         uint16_t ether_type;
 2146 
 2147         eh = mtod(mbufc, struct ether_header*);
 2148         ether_type = ntohs(eh->ether_type);
 2149         if (ether_type != ETHERTYPE_IP) {
 2150                 /* Nothing to calculate */
 2151                 return;
 2152         }
 2153 
 2154         iph = (struct ip*)(eh + 1);
 2155         if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
 2156                 iph->ip_sum = 0;
 2157                 iph->ip_sum = in_cksum_hdr(iph);
 2158         }
 2159 
 2160         switch (iph->ip_p) {
 2161         case IPPROTO_TCP:
 2162                 if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
 2163                         size_t tcplen = ntohs(iph->ip_len) - sizeof(struct ip);
 2164                         struct tcphdr *th = (struct tcphdr*)(iph + 1);
 2165                         th->th_sum = in_pseudo(iph->ip_src.s_addr,
 2166                             iph->ip_dst.s_addr, htons(IPPROTO_TCP + tcplen));
 2167                         th->th_sum = in_cksum_skip(mbufc,
 2168                             sizeof(struct ether_header) + ntohs(iph->ip_len),
 2169                             sizeof(struct ether_header) + (iph->ip_hl << 2));
 2170                 }
 2171                 break;
 2172         case IPPROTO_UDP:
 2173                 if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
 2174                         size_t udplen = ntohs(iph->ip_len) - sizeof(struct ip);
 2175                         struct udphdr *uh = (struct udphdr*)(iph + 1);
 2176                         uh->uh_sum = in_pseudo(iph->ip_src.s_addr,
 2177                             iph->ip_dst.s_addr, htons(IPPROTO_UDP + udplen));
 2178                         uh->uh_sum = in_cksum_skip(mbufc,
 2179                             sizeof(struct ether_header) + ntohs(iph->ip_len),
 2180                             sizeof(struct ether_header) + (iph->ip_hl << 2));
 2181                 }
 2182                 break;
 2183         default:
 2184                 break;
 2185         }
 2186 }
 2187 #endif /* INET || INET6 */
 2188 
 2189 static void
 2190 xnb_stop(struct xnb_softc *xnb)
 2191 {
 2192         struct ifnet *ifp;
 2193 
 2194         mtx_assert(&xnb->sc_lock, MA_OWNED);
 2195         ifp = xnb->xnb_ifp;
 2196         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 2197         if_link_state_change(ifp, LINK_STATE_DOWN);
 2198 }
 2199 
 2200 static int
 2201 xnb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 2202 {
 2203         struct xnb_softc *xnb = ifp->if_softc;
 2204         struct ifreq *ifr = (struct ifreq*) data;
 2205 #ifdef INET
 2206         struct ifaddr *ifa = (struct ifaddr*)data;
 2207 #endif
 2208         int error = 0;
 2209 
 2210         switch (cmd) {
 2211                 case SIOCSIFFLAGS:
 2212                         mtx_lock(&xnb->sc_lock);
 2213                         if (ifp->if_flags & IFF_UP) {
 2214                                 xnb_ifinit_locked(xnb);
 2215                         } else {
 2216                                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 2217                                         xnb_stop(xnb);
 2218                                 }
 2219                         }
 2220                         /*
 2221                          * Note: netfront sets a variable named xn_if_flags
 2222                          * here, but that variable is never read
 2223                          */
 2224                         mtx_unlock(&xnb->sc_lock);
 2225                         break;
 2226                 case SIOCSIFADDR:
 2227 #ifdef INET
 2228                         mtx_lock(&xnb->sc_lock);
 2229                         if (ifa->ifa_addr->sa_family == AF_INET) {
 2230                                 ifp->if_flags |= IFF_UP;
 2231                                 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 2232                                         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING |
 2233                                                         IFF_DRV_OACTIVE);
 2234                                         if_link_state_change(ifp,
 2235                                                         LINK_STATE_DOWN);
 2236                                         ifp->if_drv_flags |= IFF_DRV_RUNNING;
 2237                                         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 2238                                         if_link_state_change(ifp,
 2239                                             LINK_STATE_UP);
 2240                                 }
 2241                                 arp_ifinit(ifp, ifa);
 2242                                 mtx_unlock(&xnb->sc_lock);
 2243                         } else {
 2244                                 mtx_unlock(&xnb->sc_lock);
 2245 #endif
 2246                                 error = ether_ioctl(ifp, cmd, data);
 2247 #ifdef INET
 2248                         }
 2249 #endif
 2250                         break;
 2251                 case SIOCSIFCAP:
 2252                         mtx_lock(&xnb->sc_lock);
 2253                         if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
 2254                                 ifp->if_capenable |= IFCAP_TXCSUM;
 2255                                 ifp->if_hwassist |= XNB_CSUM_FEATURES;
 2256                         } else {
 2257                                 ifp->if_capenable &= ~(IFCAP_TXCSUM);
 2258                                 ifp->if_hwassist &= ~(XNB_CSUM_FEATURES);
 2259                         }
 2260                         if ((ifr->ifr_reqcap & IFCAP_RXCSUM)) {
 2261                                 ifp->if_capenable |= IFCAP_RXCSUM;
 2262                         } else {
 2263                                 ifp->if_capenable &= ~(IFCAP_RXCSUM);
 2264                         }
 2265                         /*
 2266                          * TODO enable TSO4 and LRO once we no longer need
 2267                          * to calculate checksums in software
 2268                          */
 2269 #if 0
 2270                         if (ifr->if_reqcap |= IFCAP_TSO4) {
 2271                                 if (IFCAP_TXCSUM & ifp->if_capenable) {
 2272                                         printf("xnb: Xen netif requires that "
 2273                                                 "TXCSUM be enabled in order "
 2274                                                 "to use TSO4\n");
 2275                                         error = EINVAL;
 2276                                 } else {
 2277                                         ifp->if_capenable |= IFCAP_TSO4;
 2278                                         ifp->if_hwassist |= CSUM_TSO;
 2279                                 }
 2280                         } else {
 2281                                 ifp->if_capenable &= ~(IFCAP_TSO4);
 2282                                 ifp->if_hwassist &= ~(CSUM_TSO);
 2283                         }
 2284                         if (ifr->ifreqcap |= IFCAP_LRO) {
 2285                                 ifp->if_capenable |= IFCAP_LRO;
 2286                         } else {
 2287                                 ifp->if_capenable &= ~(IFCAP_LRO);
 2288                         }
 2289 #endif
 2290                         mtx_unlock(&xnb->sc_lock);
 2291                         break;
 2292                 case SIOCSIFMTU:
 2293                         ifp->if_mtu = ifr->ifr_mtu;
 2294                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 2295                         xnb_ifinit(xnb);
 2296                         break;
 2297                 case SIOCADDMULTI:
 2298                 case SIOCDELMULTI:
 2299                         break;
 2300                 case SIOCSIFMEDIA:
 2301                 case SIOCGIFMEDIA:
 2302                         error = ifmedia_ioctl(ifp, ifr, &xnb->sc_media, cmd);
 2303                         break;
 2304                 default:
 2305                         error = ether_ioctl(ifp, cmd, data);
 2306                         break;
 2307         }
 2308         return (error);
 2309 }
 2310 
 2311 static void
 2312 xnb_start_locked(struct ifnet *ifp)
 2313 {
 2314         netif_rx_back_ring_t *rxb;
 2315         struct xnb_softc *xnb;
 2316         struct mbuf *mbufc;
 2317         RING_IDX req_prod_local;
 2318 
 2319         xnb = ifp->if_softc;
 2320         rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring;
 2321 
 2322         if (!xnb->carrier)
 2323                 return;
 2324 
 2325         do {
 2326                 int out_of_space = 0;
 2327                 int notify;
 2328                 req_prod_local = rxb->sring->req_prod;
 2329                 xen_rmb();
 2330                 for (;;) {
 2331                         int error;
 2332 
 2333                         IF_DEQUEUE(&ifp->if_snd, mbufc);
 2334                         if (mbufc == NULL)
 2335                                 break;
 2336                         error = xnb_send(rxb, xnb->otherend_id, mbufc,
 2337                                          xnb->rx_gnttab);
 2338                         switch (error) {
 2339                                 case EAGAIN:
 2340                                         /*
 2341                                          * Insufficient space in the ring.
 2342                                          * Requeue pkt and send when space is
 2343                                          * available.
 2344                                          */
 2345                                         IF_PREPEND(&ifp->if_snd, mbufc);
 2346                                         /*
 2347                                          * Perhaps the frontend missed an IRQ
 2348                                          * and went to sleep.  Notify it to wake
 2349                                          * it up.
 2350                                          */
 2351                                         out_of_space = 1;
 2352                                         break;
 2353 
 2354                                 case EINVAL:
 2355                                         /* OS gave a corrupt packet.  Drop it.*/
 2356                                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2357                                         /* FALLTHROUGH */
 2358                                 default:
 2359                                         /* Send succeeded, or packet had error.
 2360                                          * Free the packet */
 2361                                         if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 2362                                         if (mbufc)
 2363                                                 m_freem(mbufc);
 2364                                         break;
 2365                         }
 2366                         if (out_of_space != 0)
 2367                                 break;
 2368                 }
 2369 
 2370                 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify);
 2371                 if ((notify != 0) || (out_of_space != 0))
 2372                         xen_intr_signal(xnb->xen_intr_handle);
 2373                 rxb->sring->req_event = req_prod_local + 1;
 2374                 xen_mb();
 2375         } while (rxb->sring->req_prod != req_prod_local) ;
 2376 }
 2377 
 2378 /**
 2379  * Sends one packet to the ring.  Blocks until the packet is on the ring
 2380  * \param[in]   mbufc   Contains one packet to send.  Caller must free
 2381  * \param[in,out] rxb   The packet will be pushed onto this ring, but the
 2382  *                      otherend will not be notified.
 2383  * \param[in]   otherend The domain ID of the other end of the connection
 2384  * \retval      EAGAIN  The ring did not have enough space for the packet.
 2385  *                      The ring has not been modified
 2386  * \param[in,out] gnttab Pointer to enough memory for a grant table.  We make
 2387  *                      this a function parameter so that we will take less
 2388  *                      stack space.
 2389  * \retval EINVAL       mbufc was corrupt or not convertible into a pkt
 2390  */
 2391 static int
 2392 xnb_send(netif_rx_back_ring_t *ring, domid_t otherend, const struct mbuf *mbufc,
 2393          gnttab_copy_table gnttab)
 2394 {
 2395         struct xnb_pkt pkt;
 2396         int error, n_entries;
 2397         RING_IDX space;
 2398 
 2399         space = ring->sring->req_prod - ring->req_cons;
 2400         error = xnb_mbufc2pkt(mbufc, &pkt, ring->rsp_prod_pvt, space);
 2401         if (error != 0)
 2402                 return error;
 2403         n_entries = xnb_rxpkt2gnttab(&pkt, mbufc, gnttab, ring, otherend);
 2404         if (n_entries != 0) {
 2405                 int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
 2406                     gnttab, n_entries);
 2407                 KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n",
 2408                     hv_ret));
 2409         }
 2410 
 2411         xnb_rxpkt2rsp(&pkt, gnttab, n_entries, ring);
 2412 
 2413         return 0;
 2414 }
 2415 
 2416 static void
 2417 xnb_start(struct ifnet *ifp)
 2418 {
 2419         struct xnb_softc *xnb;
 2420 
 2421         xnb = ifp->if_softc;
 2422         mtx_lock(&xnb->rx_lock);
 2423         xnb_start_locked(ifp);
 2424         mtx_unlock(&xnb->rx_lock);
 2425 }
 2426 
 2427 /* equivalent of network_open() in Linux */
 2428 static void
 2429 xnb_ifinit_locked(struct xnb_softc *xnb)
 2430 {
 2431         struct ifnet *ifp;
 2432 
 2433         ifp = xnb->xnb_ifp;
 2434 
 2435         mtx_assert(&xnb->sc_lock, MA_OWNED);
 2436 
 2437         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 2438                 return;
 2439 
 2440         xnb_stop(xnb);
 2441 
 2442         ifp->if_drv_flags |= IFF_DRV_RUNNING;
 2443         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 2444         if_link_state_change(ifp, LINK_STATE_UP);
 2445 }
 2446 
 2447 static void
 2448 xnb_ifinit(void *xsc)
 2449 {
 2450         struct xnb_softc *xnb = xsc;
 2451 
 2452         mtx_lock(&xnb->sc_lock);
 2453         xnb_ifinit_locked(xnb);
 2454         mtx_unlock(&xnb->sc_lock);
 2455 }
 2456 
 2457 /**
 2458  * Callback used by the generic networking code to tell us when our carrier
 2459  * state has changed.  Since we don't have a physical carrier, we don't care
 2460  */
 2461 static int
 2462 xnb_ifmedia_upd(struct ifnet *ifp)
 2463 {
 2464         return (0);
 2465 }
 2466 
 2467 /**
 2468  * Callback used by the generic networking code to ask us what our carrier
 2469  * state is.  Since we don't have a physical carrier, this is very simple
 2470  */
 2471 static void
 2472 xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 2473 {
 2474         ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE;
 2475         ifmr->ifm_active = IFM_ETHER|IFM_MANUAL;
 2476 }
 2477 
 2478 /*---------------------------- NewBus Registration ---------------------------*/
 2479 static device_method_t xnb_methods[] = {
 2480         /* Device interface */
 2481         DEVMETHOD(device_probe,         xnb_probe),
 2482         DEVMETHOD(device_attach,        xnb_attach),
 2483         DEVMETHOD(device_detach,        xnb_detach),
 2484         DEVMETHOD(device_shutdown,      bus_generic_shutdown),
 2485         DEVMETHOD(device_suspend,       xnb_suspend),
 2486         DEVMETHOD(device_resume,        xnb_resume),
 2487 
 2488         /* Xenbus interface */
 2489         DEVMETHOD(xenbus_otherend_changed, xnb_frontend_changed),
 2490         { 0, 0 }
 2491 };
 2492 
 2493 static driver_t xnb_driver = {
 2494         "xnb",
 2495         xnb_methods,
 2496         sizeof(struct xnb_softc),
 2497 };
 2498 
 2499 DRIVER_MODULE(xnb, xenbusb_back, xnb_driver, 0, 0);
 2500 
 2501 /*-------------------------- Unit Tests -------------------------------------*/
 2502 #ifdef XNB_DEBUG
 2503 #include "netback_unit_tests.c"
 2504 #endif

Cache object: aaa26c70dec357e98d3ffa69caa33f6a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.