The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/netmap/netmap_kern.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo
    5  * Copyright (C) 2013-2016 Universita` di Pisa
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  *   1. Redistributions of source code must retain the above copyright
   12  *      notice, this list of conditions and the following disclaimer.
   13  *   2. Redistributions in binary form must reproduce the above copyright
   14  *      notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 /*
   31  * $FreeBSD$
   32  *
   33  * The header contains the definitions of constants and function
   34  * prototypes used only in kernelspace.
   35  */
   36 
   37 #ifndef _NET_NETMAP_KERN_H_
   38 #define _NET_NETMAP_KERN_H_
   39 
   40 #if defined(linux)
   41 
   42 #if defined(CONFIG_NETMAP_EXTMEM)
   43 #define WITH_EXTMEM
   44 #endif
   45 #if  defined(CONFIG_NETMAP_VALE)
   46 #define WITH_VALE
   47 #endif
   48 #if defined(CONFIG_NETMAP_PIPE)
   49 #define WITH_PIPES
   50 #endif
   51 #if defined(CONFIG_NETMAP_MONITOR)
   52 #define WITH_MONITOR
   53 #endif
   54 #if defined(CONFIG_NETMAP_GENERIC)
   55 #define WITH_GENERIC
   56 #endif
   57 #if defined(CONFIG_NETMAP_PTNETMAP)
   58 #define WITH_PTNETMAP
   59 #endif
   60 #if defined(CONFIG_NETMAP_SINK)
   61 #define WITH_SINK
   62 #endif
   63 #if defined(CONFIG_NETMAP_NULL)
   64 #define WITH_NMNULL
   65 #endif
   66 
   67 #elif defined (_WIN32)
   68 #define WITH_VALE       // comment out to disable VALE support
   69 #define WITH_PIPES
   70 #define WITH_MONITOR
   71 #define WITH_GENERIC
   72 #define WITH_NMNULL
   73 
   74 #else   /* neither linux nor windows */
   75 #define WITH_VALE       // comment out to disable VALE support
   76 #define WITH_PIPES
   77 #define WITH_MONITOR
   78 #define WITH_GENERIC
   79 #define WITH_EXTMEM
   80 #define WITH_NMNULL
   81 #endif
   82 
   83 #if defined(__FreeBSD__)
   84 #include <sys/selinfo.h>
   85 
   86 #define likely(x)       __builtin_expect((long)!!(x), 1L)
   87 #define unlikely(x)     __builtin_expect((long)!!(x), 0L)
   88 #define __user
   89 
   90 #define NM_LOCK_T       struct mtx      /* low level spinlock, used to protect queues */
   91 
   92 #define NM_MTX_T        struct sx       /* OS-specific mutex (sleepable) */
   93 #define NM_MTX_INIT(m)          sx_init(&(m), #m)
   94 #define NM_MTX_DESTROY(m)       sx_destroy(&(m))
   95 #define NM_MTX_LOCK(m)          sx_xlock(&(m))
   96 #define NM_MTX_SPINLOCK(m)      while (!sx_try_xlock(&(m))) ;
   97 #define NM_MTX_UNLOCK(m)        sx_xunlock(&(m))
   98 #define NM_MTX_ASSERT(m)        sx_assert(&(m), SA_XLOCKED)
   99 
  100 #define NM_SELINFO_T    struct nm_selinfo
  101 #define NM_SELRECORD_T  struct thread
  102 #define MBUF_LEN(m)     ((m)->m_pkthdr.len)
  103 #define MBUF_TXQ(m)     ((m)->m_pkthdr.flowid)
  104 #define MBUF_TRANSMIT(na, ifp, m)       ((na)->if_transmit(ifp, m))
  105 #define GEN_TX_MBUF_IFP(m)      ((m)->m_pkthdr.rcvif)
  106 
  107 #define NM_ATOMIC_T     volatile int /* required by atomic/bitops.h */
  108 /* atomic operations */
  109 #include <machine/atomic.h>
  110 #define NM_ATOMIC_TEST_AND_SET(p)       (!atomic_cmpset_acq_int((p), 0, 1))
  111 #define NM_ATOMIC_CLEAR(p)              atomic_store_rel_int((p), 0)
  112 
  113 #define WNA(_ifp)       (_ifp)->if_netmap
  114 
  115 struct netmap_adapter *netmap_getna(if_t ifp);
  116 
  117 #define MBUF_REFCNT(m)          ((m)->m_ext.ext_count)
  118 #define SET_MBUF_REFCNT(m, x)   (m)->m_ext.ext_count = x
  119 
  120 #define MBUF_QUEUED(m)          1
  121 
  122 struct nm_selinfo {
  123         /* Support for select(2) and poll(2). */
  124         struct selinfo si;
  125         /* Support for kqueue(9). See comments in netmap_freebsd.c */
  126         struct taskqueue *ntfytq;
  127         struct task ntfytask;
  128         struct mtx m;
  129         char mtxname[32];
  130         int kqueue_users;
  131 };
  132 
  133 
  134 struct hrtimer {
  135     /* Not used in FreeBSD. */
  136 };
  137 
  138 #define NM_BNS_GET(b)
  139 #define NM_BNS_PUT(b)
  140 
  141 #elif defined (linux)
  142 
  143 #define NM_LOCK_T       safe_spinlock_t // see bsd_glue.h
  144 #define NM_SELINFO_T    wait_queue_head_t
  145 #define MBUF_LEN(m)     ((m)->len)
  146 #define MBUF_TRANSMIT(na, ifp, m)                                                       \
  147         ({                                                                              \
  148                 /* Avoid infinite recursion with generic. */                            \
  149                 m->priority = NM_MAGIC_PRIORITY_TX;                                     \
  150                 (((struct net_device_ops *)(na)->if_transmit)->ndo_start_xmit(m, ifp)); \
  151                 0;                                                                      \
  152         })
  153 
  154 /* See explanation in nm_os_generic_xmit_frame. */
  155 #define GEN_TX_MBUF_IFP(m)      ((struct ifnet *)skb_shinfo(m)->destructor_arg)
  156 
  157 #define NM_ATOMIC_T     volatile long unsigned int
  158 
  159 #define NM_MTX_T        struct mutex    /* OS-specific sleepable lock */
  160 #define NM_MTX_INIT(m)  mutex_init(&(m))
  161 #define NM_MTX_DESTROY(m)       do { (void)(m); } while (0)
  162 #define NM_MTX_LOCK(m)          mutex_lock(&(m))
  163 #define NM_MTX_UNLOCK(m)        mutex_unlock(&(m))
  164 #define NM_MTX_ASSERT(m)        mutex_is_locked(&(m))
  165 
  166 #ifndef DEV_NETMAP
  167 #define DEV_NETMAP
  168 #endif /* DEV_NETMAP */
  169 
  170 #elif defined (__APPLE__)
  171 
  172 #warning apple support is incomplete.
  173 #define likely(x)       __builtin_expect(!!(x), 1)
  174 #define unlikely(x)     __builtin_expect(!!(x), 0)
  175 #define NM_LOCK_T       IOLock *
  176 #define NM_SELINFO_T    struct selinfo
  177 #define MBUF_LEN(m)     ((m)->m_pkthdr.len)
  178 
  179 #elif defined (_WIN32)
  180 #include "../../../WINDOWS/win_glue.h"
  181 
  182 #define NM_SELRECORD_T          IO_STACK_LOCATION
  183 #define NM_SELINFO_T            win_SELINFO             // see win_glue.h
  184 #define NM_LOCK_T               win_spinlock_t  // see win_glue.h
  185 #define NM_MTX_T                KGUARDED_MUTEX  /* OS-specific mutex (sleepable) */
  186 
  187 #define NM_MTX_INIT(m)          KeInitializeGuardedMutex(&m);
  188 #define NM_MTX_DESTROY(m)       do { (void)(m); } while (0)
  189 #define NM_MTX_LOCK(m)          KeAcquireGuardedMutex(&(m))
  190 #define NM_MTX_UNLOCK(m)        KeReleaseGuardedMutex(&(m))
  191 #define NM_MTX_ASSERT(m)        assert(&m.Count>0)
  192 
  193 //These linknames are for the NDIS driver
  194 #define NETMAP_NDIS_LINKNAME_STRING             L"\\DosDevices\\NMAPNDIS"
  195 #define NETMAP_NDIS_NTDEVICE_STRING             L"\\Device\\NMAPNDIS"
  196 
  197 //Definition of internal driver-to-driver ioctl codes
  198 #define NETMAP_KERNEL_XCHANGE_POINTERS          _IO('i', 180)
  199 #define NETMAP_KERNEL_SEND_SHUTDOWN_SIGNAL      _IO_direct('i', 195)
  200 
  201 typedef struct hrtimer{
  202         KTIMER timer;
  203         BOOLEAN active;
  204         KDPC deferred_proc;
  205 };
  206 
  207 /* MSVC does not have likely/unlikely support */
  208 #ifdef _MSC_VER
  209 #define likely(x)       (x)
  210 #define unlikely(x)     (x)
  211 #else
  212 #define likely(x)       __builtin_expect((long)!!(x), 1L)
  213 #define unlikely(x)     __builtin_expect((long)!!(x), 0L)
  214 #endif //_MSC_VER
  215 
  216 #else
  217 
  218 #error unsupported platform
  219 
  220 #endif /* end - platform-specific code */
  221 
  222 #ifndef _WIN32 /* support for emulated sysctl */
  223 #define SYSBEGIN(x)
  224 #define SYSEND
  225 #endif /* _WIN32 */
  226 
  227 #define NM_ACCESS_ONCE(x)       (*(volatile __typeof__(x) *)&(x))
  228 
  229 #define NMG_LOCK_T              NM_MTX_T
  230 #define NMG_LOCK_INIT()         NM_MTX_INIT(netmap_global_lock)
  231 #define NMG_LOCK_DESTROY()      NM_MTX_DESTROY(netmap_global_lock)
  232 #define NMG_LOCK()              NM_MTX_LOCK(netmap_global_lock)
  233 #define NMG_UNLOCK()            NM_MTX_UNLOCK(netmap_global_lock)
  234 #define NMG_LOCK_ASSERT()       NM_MTX_ASSERT(netmap_global_lock)
  235 
  236 #if defined(__FreeBSD__)
  237 #define nm_prerr_int    printf
  238 #define nm_prinf_int    printf
  239 #elif defined (_WIN32)
  240 #define nm_prerr_int    DbgPrint
  241 #define nm_prinf_int    DbgPrint
  242 #elif defined(linux)
  243 #define nm_prerr_int(fmt, arg...)    printk(KERN_ERR fmt, ##arg)
  244 #define nm_prinf_int(fmt, arg...)    printk(KERN_INFO fmt, ##arg)
  245 #endif
  246 
  247 #define nm_prinf(format, ...)                                   \
  248         do {                                                    \
  249                 struct timeval __xxts;                          \
  250                 microtime(&__xxts);                             \
  251                 nm_prinf_int("%03d.%06d [%4d] %-25s " format "\n",\
  252                 (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
  253                 __LINE__, __FUNCTION__, ##__VA_ARGS__);         \
  254         } while (0)
  255 
  256 #define nm_prerr(format, ...)                                   \
  257         do {                                                    \
  258                 struct timeval __xxts;                          \
  259                 microtime(&__xxts);                             \
  260                 nm_prerr_int("%03d.%06d [%4d] %-25s " format "\n",\
  261                 (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
  262                 __LINE__, __FUNCTION__, ##__VA_ARGS__);         \
  263         } while (0)
  264 
  265 /* Disabled printf (used to be nm_prdis). */
  266 #define nm_prdis(format, ...)
  267 
  268 /* Rate limited, lps indicates how many per second. */
  269 #define nm_prlim(lps, format, ...)                              \
  270         do {                                                    \
  271                 static int t0, __cnt;                           \
  272                 if (t0 != time_second) {                        \
  273                         t0 = time_second;                       \
  274                         __cnt = 0;                              \
  275                 }                                               \
  276                 if (__cnt++ < lps)                              \
  277                         nm_prinf(format, ##__VA_ARGS__);        \
  278         } while (0)
  279 
  280 struct netmap_adapter;
  281 struct nm_bdg_fwd;
  282 struct nm_bridge;
  283 struct netmap_priv_d;
  284 struct nm_bdg_args;
  285 
  286 /* os-specific NM_SELINFO_T initialization/destruction functions */
  287 int nm_os_selinfo_init(NM_SELINFO_T *, const char *name);
  288 void nm_os_selinfo_uninit(NM_SELINFO_T *);
  289 
  290 const char *nm_dump_buf(char *p, int len, int lim, char *dst);
  291 
  292 void nm_os_selwakeup(NM_SELINFO_T *si);
  293 void nm_os_selrecord(NM_SELRECORD_T *sr, NM_SELINFO_T *si);
  294 
  295 int nm_os_ifnet_init(void);
  296 void nm_os_ifnet_fini(void);
  297 void nm_os_ifnet_lock(void);
  298 void nm_os_ifnet_unlock(void);
  299 
  300 unsigned nm_os_ifnet_mtu(struct ifnet *ifp);
  301 
  302 void nm_os_get_module(void);
  303 void nm_os_put_module(void);
  304 
  305 void netmap_make_zombie(struct ifnet *);
  306 void netmap_undo_zombie(struct ifnet *);
  307 
  308 /* os independent alloc/realloc/free */
  309 void *nm_os_malloc(size_t);
  310 void *nm_os_vmalloc(size_t);
  311 void *nm_os_realloc(void *, size_t new_size, size_t old_size);
  312 void nm_os_free(void *);
  313 void nm_os_vfree(void *);
  314 
  315 /* os specific attach/detach enter/exit-netmap-mode routines */
  316 void nm_os_onattach(struct ifnet *);
  317 void nm_os_ondetach(struct ifnet *);
  318 void nm_os_onenter(struct ifnet *);
  319 void nm_os_onexit(struct ifnet *);
  320 
  321 /* passes a packet up to the host stack.
  322  * If the packet is sent (or dropped) immediately it returns NULL,
  323  * otherwise it links the packet to prev and returns m.
  324  * In this case, a final call with m=NULL and prev != NULL will send up
  325  * the entire chain to the host stack.
  326  */
  327 void *nm_os_send_up(struct ifnet *, struct mbuf *m, struct mbuf *prev);
  328 
  329 int nm_os_mbuf_has_seg_offld(struct mbuf *m);
  330 int nm_os_mbuf_has_csum_offld(struct mbuf *m);
  331 
  332 #include "netmap_mbq.h"
  333 
  334 extern NMG_LOCK_T       netmap_global_lock;
  335 
  336 enum txrx { NR_RX = 0, NR_TX = 1, NR_TXRX };
  337 
  338 static __inline const char*
  339 nm_txrx2str(enum txrx t)
  340 {
  341         return (t== NR_RX ? "RX" : "TX");
  342 }
  343 
  344 static __inline enum txrx
  345 nm_txrx_swap(enum txrx t)
  346 {
  347         return (t== NR_RX ? NR_TX : NR_RX);
  348 }
  349 
  350 #define for_rx_tx(t)    for ((t) = 0; (t) < NR_TXRX; (t)++)
  351 
  352 #ifdef WITH_MONITOR
  353 struct netmap_zmon_list {
  354         struct netmap_kring *next;
  355         struct netmap_kring *prev;
  356 };
  357 #endif /* WITH_MONITOR */
  358 
  359 /*
  360  * private, kernel view of a ring. Keeps track of the status of
  361  * a ring across system calls.
  362  *
  363  *      nr_hwcur        index of the next buffer to refill.
  364  *                      It corresponds to ring->head
  365  *                      at the time the system call returns.
  366  *
  367  *      nr_hwtail       index of the first buffer owned by the kernel.
  368  *                      On RX, hwcur->hwtail are receive buffers
  369  *                      not yet released. hwcur is advanced following
  370  *                      ring->head, hwtail is advanced on incoming packets,
  371  *                      and a wakeup is generated when hwtail passes ring->cur
  372  *                          On TX, hwcur->rcur have been filled by the sender
  373  *                      but not sent yet to the NIC; rcur->hwtail are available
  374  *                      for new transmissions, and hwtail->hwcur-1 are pending
  375  *                      transmissions not yet acknowledged.
  376  *
  377  * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots.
  378  * This is so that, on a reset, buffers owned by userspace are not
  379  * modified by the kernel. In particular:
  380  * RX rings: the next empty buffer (hwtail + hwofs) coincides with
  381  *      the next empty buffer as known by the hardware (next_to_check or so).
  382  * TX rings: hwcur + hwofs coincides with next_to_send
  383  *
  384  * The following fields are used to implement lock-free copy of packets
  385  * from input to output ports in VALE switch:
  386  *      nkr_hwlease     buffer after the last one being copied.
  387  *                      A writer in nm_bdg_flush reserves N buffers
  388  *                      from nr_hwlease, advances it, then does the
  389  *                      copy outside the lock.
  390  *                      In RX rings (used for VALE ports),
  391  *                      nkr_hwtail <= nkr_hwlease < nkr_hwcur+N-1
  392  *                      In TX rings (used for NIC or host stack ports)
  393  *                      nkr_hwcur <= nkr_hwlease < nkr_hwtail
  394  *      nkr_leases      array of nkr_num_slots where writers can report
  395  *                      completion of their block. NR_NOSLOT (~0) indicates
  396  *                      that the writer has not finished yet
  397  *      nkr_lease_idx   index of next free slot in nr_leases, to be assigned
  398  *
  399  * The kring is manipulated by txsync/rxsync and generic netmap function.
  400  *
  401  * Concurrent rxsync or txsync on the same ring are prevented through
  402  * by nm_kr_(try)lock() which in turn uses nr_busy. This is all we need
  403  * for NIC rings, and for TX rings attached to the host stack.
  404  *
  405  * RX rings attached to the host stack use an mbq (rx_queue) on both
  406  * rxsync_from_host() and netmap_transmit(). The mbq is protected
  407  * by its internal lock.
  408  *
  409  * RX rings attached to the VALE switch are accessed by both senders
  410  * and receiver. They are protected through the q_lock on the RX ring.
  411  */
  412 struct netmap_kring {
  413         struct netmap_ring      *ring;
  414 
  415         uint32_t        nr_hwcur;  /* should be nr_hwhead */
  416         uint32_t        nr_hwtail;
  417 
  418         /*
  419          * Copies of values in user rings, so we do not need to look
  420          * at the ring (which could be modified). These are set in the
  421          * *sync_prologue()/finalize() routines.
  422          */
  423         uint32_t        rhead;
  424         uint32_t        rcur;
  425         uint32_t        rtail;
  426 
  427         uint32_t        nr_kflags;      /* private driver flags */
  428 #define NKR_PENDINTR    0x1             // Pending interrupt.
  429 #define NKR_EXCLUSIVE   0x2             /* exclusive binding */
  430 #define NKR_FORWARD     0x4             /* (host ring only) there are
  431                                            packets to forward
  432                                          */
  433 #define NKR_NEEDRING    0x8             /* ring needed even if users==0
  434                                          * (used internally by pipes and
  435                                          *  by ptnetmap host ports)
  436                                          */
  437 #define NKR_NOINTR      0x10            /* don't use interrupts on this ring */
  438 #define NKR_FAKERING    0x20            /* don't allocate/free buffers */
  439 
  440         uint32_t        nr_mode;
  441         uint32_t        nr_pending_mode;
  442 #define NKR_NETMAP_OFF  0x0
  443 #define NKR_NETMAP_ON   0x1
  444 
  445         uint32_t        nkr_num_slots;
  446 
  447         /*
  448          * On a NIC reset, the NIC ring indexes may be reset but the
  449          * indexes in the netmap rings remain the same. nkr_hwofs
  450          * keeps track of the offset between the two.
  451          *
  452          * Moreover, during reset, we can restore only the subset of
  453          * the NIC ring that corresponds to the kernel-owned part of
  454          * the netmap ring. The rest of the slots must be restored
  455          * by the *sync routines when the user releases more slots.
  456          * The nkr_to_refill field keeps track of the number of slots
  457          * that still need to be restored.
  458          */
  459         int32_t         nkr_hwofs;
  460         int32_t         nkr_to_refill;
  461 
  462         /* last_reclaim is opaque marker to help reduce the frequency
  463          * of operations such as reclaiming tx buffers. A possible use
  464          * is set it to ticks and do the reclaim only once per tick.
  465          */
  466         uint64_t        last_reclaim;
  467 
  468 
  469         NM_SELINFO_T    si;             /* poll/select wait queue */
  470         NM_LOCK_T       q_lock;         /* protects kring and ring. */
  471         NM_ATOMIC_T     nr_busy;        /* prevent concurrent syscalls */
  472 
  473         /* the adapter the owns this kring */
  474         struct netmap_adapter *na;
  475 
  476         /* the adapter that wants to be notified when this kring has
  477          * new slots available. This is usually the same as the above,
  478          * but wrappers may let it point to themselves
  479          */
  480         struct netmap_adapter *notify_na;
  481 
  482         /* The following fields are for VALE switch support */
  483         struct nm_bdg_fwd *nkr_ft;
  484         uint32_t        *nkr_leases;
  485 #define NR_NOSLOT       ((uint32_t)~0)  /* used in nkr_*lease* */
  486         uint32_t        nkr_hwlease;
  487         uint32_t        nkr_lease_idx;
  488 
  489         /* while nkr_stopped is set, no new [tr]xsync operations can
  490          * be started on this kring.
  491          * This is used by netmap_disable_all_rings()
  492          * to find a synchronization point where critical data
  493          * structures pointed to by the kring can be added or removed
  494          */
  495         volatile int nkr_stopped;
  496 
  497         /* Support for adapters without native netmap support.
  498          * On tx rings we preallocate an array of tx buffers
  499          * (same size as the netmap ring), on rx rings we
  500          * store incoming mbufs in a queue that is drained by
  501          * a rxsync.
  502          */
  503         struct mbuf     **tx_pool;
  504         struct mbuf     *tx_event;      /* TX event used as a notification */
  505         NM_LOCK_T       tx_event_lock;  /* protects the tx_event mbuf */
  506         struct mbq      rx_queue;       /* intercepted rx mbufs. */
  507 
  508         uint32_t        users;          /* existing bindings for this ring */
  509 
  510         uint32_t        ring_id;        /* kring identifier */
  511         enum txrx       tx;             /* kind of ring (tx or rx) */
  512         char name[64];                  /* diagnostic */
  513 
  514         /* [tx]sync callback for this kring.
  515          * The default nm_kring_create callback (netmap_krings_create)
  516          * sets the nm_sync callback of each hardware tx(rx) kring to
  517          * the corresponding nm_txsync(nm_rxsync) taken from the
  518          * netmap_adapter; moreover, it sets the sync callback
  519          * of the host tx(rx) ring to netmap_txsync_to_host
  520          * (netmap_rxsync_from_host).
  521          *
  522          * Overrides: the above configuration is not changed by
  523          * any of the nm_krings_create callbacks.
  524          */
  525         int (*nm_sync)(struct netmap_kring *kring, int flags);
  526         int (*nm_notify)(struct netmap_kring *kring, int flags);
  527 
  528 #ifdef WITH_PIPES
  529         struct netmap_kring *pipe;      /* if this is a pipe ring,
  530                                          * pointer to the other end
  531                                          */
  532         uint32_t pipe_tail;             /* hwtail updated by the other end */
  533 #endif /* WITH_PIPES */
  534 
  535         /* mask for the offset-related part of the ptr field in the slots */
  536         uint64_t offset_mask;
  537         /* maximum user-specified offset, as stipulated at bind time.
  538          * Larger offset requests will be silently capped to offset_max.
  539          */
  540         uint64_t offset_max;
  541         /* minimum gap between two consecutive offsets into the same
  542          * buffer, as stipulated at bind time. This is used to choose
  543          * the hwbuf_len, but is not otherwise checked for compliance
  544          * at runtime.
  545          */
  546         uint64_t offset_gap;
  547 
  548         /* size of hardware buffer. This may be less than the size of
  549          * the netmap buffers because of non-zero offsets, or because
  550          * the netmap buffer size exceeds the capability of the hardware.
  551          */
  552         uint64_t hwbuf_len;
  553 
  554         /* required alignment (in bytes) for the buffers used by this ring.
  555          * Netmap buffers are aligned to cachelines, which should suffice
  556          * for most NICs. If the user is passing offsets, though, we need
  557          * to check that the resulting buf address complies with any
  558          * alignment restriction.
  559          */
  560         uint64_t buf_align;
  561 
  562         /* hardware specific logic for the selection of the hwbuf_len */
  563         int (*nm_bufcfg)(struct netmap_kring *kring, uint64_t target);
  564 
  565         int (*save_notify)(struct netmap_kring *kring, int flags);
  566 
  567 #ifdef WITH_MONITOR
  568         /* array of krings that are monitoring this kring */
  569         struct netmap_kring **monitors;
  570         uint32_t max_monitors; /* current size of the monitors array */
  571         uint32_t n_monitors;    /* next unused entry in the monitor array */
  572         uint32_t mon_pos[NR_TXRX]; /* index of this ring in the monitored ring array */
  573         uint32_t mon_tail;  /* last seen slot on rx */
  574 
  575         /* circular list of zero-copy monitors */
  576         struct netmap_zmon_list zmon_list[NR_TXRX];
  577 
  578         /*
  579          * Monitors work by intercepting the sync and notify callbacks of the
  580          * monitored krings. This is implemented by replacing the pointers
  581          * above and saving the previous ones in mon_* pointers below
  582          */
  583         int (*mon_sync)(struct netmap_kring *kring, int flags);
  584         int (*mon_notify)(struct netmap_kring *kring, int flags);
  585 
  586 #endif
  587 }
  588 #ifdef _WIN32
  589 __declspec(align(64));
  590 #else
  591 __attribute__((__aligned__(64)));
  592 #endif
  593 
  594 /* return 1 iff the kring needs to be turned on */
  595 static inline int
  596 nm_kring_pending_on(struct netmap_kring *kring)
  597 {
  598         return kring->nr_pending_mode == NKR_NETMAP_ON &&
  599                kring->nr_mode == NKR_NETMAP_OFF;
  600 }
  601 
  602 /* return 1 iff the kring needs to be turned off */
  603 static inline int
  604 nm_kring_pending_off(struct netmap_kring *kring)
  605 {
  606         return kring->nr_pending_mode == NKR_NETMAP_OFF &&
  607                kring->nr_mode == NKR_NETMAP_ON;
  608 }
  609 
  610 /* return the next index, with wraparound */
  611 static inline uint32_t
  612 nm_next(uint32_t i, uint32_t lim)
  613 {
  614         return unlikely (i == lim) ? 0 : i + 1;
  615 }
  616 
  617 
  618 /* return the previous index, with wraparound */
  619 static inline uint32_t
  620 nm_prev(uint32_t i, uint32_t lim)
  621 {
  622         return unlikely (i == 0) ? lim : i - 1;
  623 }
  624 
  625 
  626 /*
  627  *
  628  * Here is the layout for the Rx and Tx rings.
  629 
  630        RxRING                            TxRING
  631 
  632       +-----------------+            +-----------------+
  633       |                 |            |                 |
  634       |      free       |            |      free       |
  635       +-----------------+            +-----------------+
  636 head->| owned by user   |<-hwcur     | not sent to nic |<-hwcur
  637       |                 |            | yet             |
  638       +-----------------+            |                 |
  639  cur->| available to    |            |                 |
  640       | user, not read  |            +-----------------+
  641       | yet             |       cur->| (being          |
  642       |                 |            |  prepared)      |
  643       |                 |            |                 |
  644       +-----------------+            +     ------      +
  645 tail->|                 |<-hwtail    |                 |<-hwlease
  646       | (being          | ...        |                 | ...
  647       |  prepared)      | ...        |                 | ...
  648       +-----------------+ ...        |                 | ...
  649       |                 |<-hwlease   +-----------------+
  650       |                 |      tail->|                 |<-hwtail
  651       |                 |            |                 |
  652       |                 |            |                 |
  653       |                 |            |                 |
  654       +-----------------+            +-----------------+
  655 
  656  * The cur/tail (user view) and hwcur/hwtail (kernel view)
  657  * are used in the normal operation of the card.
  658  *
  659  * When a ring is the output of a switch port (Rx ring for
  660  * a VALE port, Tx ring for the host stack or NIC), slots
  661  * are reserved in blocks through 'hwlease' which points
  662  * to the next unused slot.
  663  * On an Rx ring, hwlease is always after hwtail,
  664  * and completions cause hwtail to advance.
  665  * On a Tx ring, hwlease is always between cur and hwtail,
  666  * and completions cause cur to advance.
  667  *
  668  * nm_kr_space() returns the maximum number of slots that
  669  * can be assigned.
  670  * nm_kr_lease() reserves the required number of buffers,
  671  *    advances nkr_hwlease and also returns an entry in
  672  *    a circular array where completions should be reported.
  673  */
  674 
  675 struct lut_entry;
  676 #ifdef __FreeBSD__
  677 #define plut_entry lut_entry
  678 #endif
  679 
  680 struct netmap_lut {
  681         struct lut_entry *lut;
  682         struct plut_entry *plut;
  683         uint32_t objtotal;      /* max buffer index */
  684         uint32_t objsize;       /* buffer size */
  685 };
  686 
  687 struct netmap_vp_adapter; // forward
  688 struct nm_bridge;
  689 
  690 /* Struct to be filled by nm_config callbacks. */
  691 struct nm_config_info {
  692         unsigned num_tx_rings;
  693         unsigned num_rx_rings;
  694         unsigned num_tx_descs;
  695         unsigned num_rx_descs;
  696         unsigned rx_buf_maxsize;
  697 };
  698 
  699 /*
  700  * default type for the magic field.
  701  * May be overridden in glue code.
  702  */
  703 #ifndef NM_OS_MAGIC
  704 #define NM_OS_MAGIC uint32_t
  705 #endif /* !NM_OS_MAGIC */
  706 
  707 /*
  708  * The "struct netmap_adapter" extends the "struct adapter"
  709  * (or equivalent) device descriptor.
  710  * It contains all base fields needed to support netmap operation.
  711  * There are in fact different types of netmap adapters
  712  * (native, generic, VALE switch...) so a netmap_adapter is
  713  * just the first field in the derived type.
  714  */
  715 struct netmap_adapter {
  716         /*
  717          * On linux we do not have a good way to tell if an interface
  718          * is netmap-capable. So we always use the following trick:
  719          * NA(ifp) points here, and the first entry (which hopefully
  720          * always exists and is at least 32 bits) contains a magic
  721          * value which we can use to detect that the interface is good.
  722          */
  723         NM_OS_MAGIC magic;
  724         uint32_t na_flags;      /* enabled, and other flags */
  725 #define NAF_SKIP_INTR   1       /* use the regular interrupt handler.
  726                                  * useful during initialization
  727                                  */
  728 #define NAF_SW_ONLY     2       /* forward packets only to sw adapter */
  729 #define NAF_BDG_MAYSLEEP 4      /* the bridge is allowed to sleep when
  730                                  * forwarding packets coming from this
  731                                  * interface
  732                                  */
  733 #define NAF_MEM_OWNER   8       /* the adapter uses its own memory area
  734                                  * that cannot be changed
  735                                  */
  736 #define NAF_NATIVE      16      /* the adapter is native.
  737                                  * Virtual ports (non persistent vale ports,
  738                                  * pipes, monitors...) should never use
  739                                  * this flag.
  740                                  */
  741 #define NAF_NETMAP_ON   32      /* netmap is active (either native or
  742                                  * emulated). Where possible (e.g. FreeBSD)
  743                                  * IFCAP_NETMAP also mirrors this flag.
  744                                  */
  745 #define NAF_HOST_RINGS  64      /* the adapter supports the host rings */
  746 #define NAF_FORCE_NATIVE 128    /* the adapter is always NATIVE */
  747 /* free */
  748 #define NAF_MOREFRAG    512     /* the adapter supports NS_MOREFRAG */
  749 #define NAF_OFFSETS     1024    /* the adapter supports the slot offsets */
  750 #define NAF_HOST_ALL    2048    /* the adapter wants as many host rings as hw */
  751 #define NAF_ZOMBIE      (1U<<30) /* the nic driver has been unloaded */
  752 #define NAF_BUSY        (1U<<31) /* the adapter is used internally and
  753                                   * cannot be registered from userspace
  754                                   */
  755         int active_fds; /* number of user-space descriptors using this
  756                          interface, which is equal to the number of
  757                          struct netmap_if objs in the mapped region. */
  758 
  759         u_int num_rx_rings; /* number of adapter receive rings */
  760         u_int num_tx_rings; /* number of adapter transmit rings */
  761         u_int num_host_rx_rings; /* number of host receive rings */
  762         u_int num_host_tx_rings; /* number of host transmit rings */
  763 
  764         u_int num_tx_desc;  /* number of descriptor in each queue */
  765         u_int num_rx_desc;
  766 
  767         /* tx_rings and rx_rings are private but allocated as a
  768          * contiguous chunk of memory. Each array has N+K entries,
  769          * N for the hardware rings and K for the host rings.
  770          */
  771         struct netmap_kring **tx_rings; /* array of TX rings. */
  772         struct netmap_kring **rx_rings; /* array of RX rings. */
  773 
  774         void *tailroom;                /* space below the rings array */
  775                                        /* (used for leases) */
  776 
  777 
  778         NM_SELINFO_T si[NR_TXRX];       /* global wait queues */
  779 
  780         /* count users of the global wait queues */
  781         int si_users[NR_TXRX];
  782 
  783         void *pdev; /* used to store pci device */
  784 
  785         /* copy of if_qflush and if_transmit pointers, to intercept
  786          * packets from the network stack when netmap is active.
  787          */
  788         int     (*if_transmit)(struct ifnet *, struct mbuf *);
  789 
  790         /* copy of if_input for netmap_send_up() */
  791         void     (*if_input)(struct ifnet *, struct mbuf *);
  792 
  793         /* Back reference to the parent ifnet struct. Used for
  794          * hardware ports (emulated netmap included). */
  795         struct ifnet *ifp; /* adapter is ifp->if_softc */
  796 
  797         /*---- callbacks for this netmap adapter -----*/
  798         /*
  799          * nm_dtor() is the cleanup routine called when destroying
  800          *      the adapter.
  801          *      Called with NMG_LOCK held.
  802          *
  803          * nm_register() is called on NIOCREGIF and close() to enter
  804          *      or exit netmap mode on the NIC
  805          *      Called with NNG_LOCK held.
  806          *
  807          * nm_txsync() pushes packets to the underlying hw/switch
  808          *
  809          * nm_rxsync() collects packets from the underlying hw/switch
  810          *
  811          * nm_config() returns configuration information from the OS
  812          *      Called with NMG_LOCK held.
  813          *
  814          * nm_bufcfg()
  815          *      the purpose of this callback is to fill the kring->hwbuf_len
  816          *      (l) and kring->buf_align fields. The l value is most important
  817          *      for RX rings, where we want to disallow writes outside of the
  818          *      netmap buffer. The l value must be computed taking into account
  819          *      the stipulated max_offset (o), possibly increased if there are
  820          *      alignment constraints, the maxframe (m), if known, and the
  821          *      current NETMAP_BUF_SIZE (b) of the memory region used by the
  822          *      adapter. We want the largest supported l such that o + l <= b.
  823          *      If m is known to be <= b - o, the callback may also choose the
  824          *      largest l <= m, ignoring the offset.  The buf_align field is
  825          *      most important for TX rings when there are offsets.  The user
  826          *      will see this value in the ring->buf_align field.  Misaligned
  827          *      offsets will cause the corresponding packets to be silently
  828          *      dropped.
  829          *
  830          * nm_krings_create() create and init the tx_rings and
  831          *      rx_rings arrays of kring structures. In particular,
  832          *      set the nm_sync callbacks for each ring.
  833          *      There is no need to also allocate the corresponding
  834          *      netmap_rings, since netmap_mem_rings_create() will always
  835          *      be called to provide the missing ones.
  836          *      Called with NNG_LOCK held.
  837          *
  838          * nm_krings_delete() cleanup and delete the tx_rings and rx_rings
  839          *      arrays
  840          *      Called with NMG_LOCK held.
  841          *
  842          * nm_notify() is used to act after data have become available
  843          *      (or the stopped state of the ring has changed)
  844          *      For hw devices this is typically a selwakeup(),
  845          *      but for NIC/host ports attached to a switch (or vice-versa)
  846          *      we also need to invoke the 'txsync' code downstream.
  847          *      This callback pointer is actually used only to initialize
  848          *      kring->nm_notify.
  849          *      Return values are the same as for netmap_rx_irq().
  850          */
  851         void (*nm_dtor)(struct netmap_adapter *);
  852 
  853         int (*nm_register)(struct netmap_adapter *, int onoff);
  854         void (*nm_intr)(struct netmap_adapter *, int onoff);
  855 
  856         int (*nm_txsync)(struct netmap_kring *kring, int flags);
  857         int (*nm_rxsync)(struct netmap_kring *kring, int flags);
  858         int (*nm_notify)(struct netmap_kring *kring, int flags);
  859         int (*nm_bufcfg)(struct netmap_kring *kring, uint64_t target);
  860 #define NAF_FORCE_READ      1
  861 #define NAF_FORCE_RECLAIM   2
  862 #define NAF_CAN_FORWARD_DOWN 4
  863         /* return configuration information */
  864         int (*nm_config)(struct netmap_adapter *, struct nm_config_info *info);
  865         int (*nm_krings_create)(struct netmap_adapter *);
  866         void (*nm_krings_delete)(struct netmap_adapter *);
  867         /*
  868          * nm_bdg_attach() initializes the na_vp field to point
  869          *      to an adapter that can be attached to a VALE switch. If the
  870          *      current adapter is already a VALE port, na_vp is simply a cast;
  871          *      otherwise, na_vp points to a netmap_bwrap_adapter.
  872          *      If applicable, this callback also initializes na_hostvp,
  873          *      that can be used to connect the adapter host rings to the
  874          *      switch.
  875          *      Called with NMG_LOCK held.
  876          *
  877          * nm_bdg_ctl() is called on the actual attach/detach to/from
  878          *      to/from the switch, to perform adapter-specific
  879          *      initializations
  880          *      Called with NMG_LOCK held.
  881          */
  882         int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *,
  883                         struct nm_bridge *);
  884         int (*nm_bdg_ctl)(struct nmreq_header *, struct netmap_adapter *);
  885 
  886         /* adapter used to attach this adapter to a VALE switch (if any) */
  887         struct netmap_vp_adapter *na_vp;
  888         /* adapter used to attach the host rings of this adapter
  889          * to a VALE switch (if any) */
  890         struct netmap_vp_adapter *na_hostvp;
  891 
  892         /* standard refcount to control the lifetime of the adapter
  893          * (it should be equal to the lifetime of the corresponding ifp)
  894          */
  895         int na_refcount;
  896 
  897         /* memory allocator (opaque)
  898          * We also cache a pointer to the lut_entry for translating
  899          * buffer addresses, the total number of buffers and the buffer size.
  900          */
  901         struct netmap_mem_d *nm_mem;
  902         struct netmap_mem_d *nm_mem_prev;
  903         struct netmap_lut na_lut;
  904 
  905         /* additional information attached to this adapter
  906          * by other netmap subsystems. Currently used by
  907          * bwrap, LINUX/v1000 and ptnetmap
  908          */
  909         void *na_private;
  910 
  911         /* array of pipes that have this adapter as a parent */
  912         struct netmap_pipe_adapter **na_pipes;
  913         int na_next_pipe;       /* next free slot in the array */
  914         int na_max_pipes;       /* size of the array */
  915 
  916         /* Offset of ethernet header for each packet. */
  917         u_int virt_hdr_len;
  918 
  919         /* Max number of bytes that the NIC can store in the buffer
  920          * referenced by each RX descriptor. This translates to the maximum
  921          * bytes that a single netmap slot can reference. Larger packets
  922          * require NS_MOREFRAG support. */
  923         unsigned rx_buf_maxsize;
  924 
  925         char name[NETMAP_REQ_IFNAMSIZ]; /* used at least by pipes */
  926 
  927 #ifdef WITH_MONITOR
  928         unsigned long   monitor_id;     /* debugging */
  929 #endif
  930 };
  931 
  932 static __inline u_int
  933 nma_get_ndesc(struct netmap_adapter *na, enum txrx t)
  934 {
  935         return (t == NR_TX ? na->num_tx_desc : na->num_rx_desc);
  936 }
  937 
  938 static __inline void
  939 nma_set_ndesc(struct netmap_adapter *na, enum txrx t, u_int v)
  940 {
  941         if (t == NR_TX)
  942                 na->num_tx_desc = v;
  943         else
  944                 na->num_rx_desc = v;
  945 }
  946 
  947 static __inline u_int
  948 nma_get_nrings(struct netmap_adapter *na, enum txrx t)
  949 {
  950         return (t == NR_TX ? na->num_tx_rings : na->num_rx_rings);
  951 }
  952 
  953 static __inline u_int
  954 nma_get_host_nrings(struct netmap_adapter *na, enum txrx t)
  955 {
  956         return (t == NR_TX ? na->num_host_tx_rings : na->num_host_rx_rings);
  957 }
  958 
  959 static __inline void
  960 nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
  961 {
  962         if (t == NR_TX)
  963                 na->num_tx_rings = v;
  964         else
  965                 na->num_rx_rings = v;
  966 }
  967 
  968 static __inline void
  969 nma_set_host_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
  970 {
  971         if (t == NR_TX)
  972                 na->num_host_tx_rings = v;
  973         else
  974                 na->num_host_rx_rings = v;
  975 }
  976 
  977 static __inline struct netmap_kring**
  978 NMR(struct netmap_adapter *na, enum txrx t)
  979 {
  980         return (t == NR_TX ? na->tx_rings : na->rx_rings);
  981 }
  982 
  983 int nma_intr_enable(struct netmap_adapter *na, int onoff);
  984 
  985 /*
  986  * If the NIC is owned by the kernel
  987  * (i.e., bridge), neither another bridge nor user can use it;
  988  * if the NIC is owned by a user, only users can share it.
  989  * Evaluation must be done under NMG_LOCK().
  990  */
  991 #define NETMAP_OWNED_BY_KERN(na)        ((na)->na_flags & NAF_BUSY)
  992 #define NETMAP_OWNED_BY_ANY(na) \
  993         (NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0))
  994 
  995 /*
  996  * derived netmap adapters for various types of ports
  997  */
  998 struct netmap_vp_adapter {      /* VALE software port */
  999         struct netmap_adapter up;
 1000 
 1001         /*
 1002          * Bridge support:
 1003          *
 1004          * bdg_port is the port number used in the bridge;
 1005          * na_bdg points to the bridge this NA is attached to.
 1006          */
 1007         int bdg_port;
 1008         struct nm_bridge *na_bdg;
 1009         int retry;
 1010         int autodelete; /* remove the ifp on last reference */
 1011 
 1012         /* Maximum Frame Size, used in bdg_mismatch_datapath() */
 1013         u_int mfs;
 1014         /* Last source MAC on this port */
 1015         uint64_t last_smac;
 1016 };
 1017 
 1018 
 1019 struct netmap_hw_adapter {      /* physical device */
 1020         struct netmap_adapter up;
 1021 
 1022 #ifdef linux
 1023         struct net_device_ops nm_ndo;
 1024         struct ethtool_ops    nm_eto;
 1025 #endif
 1026         const struct ethtool_ops*   save_ethtool;
 1027 
 1028         int (*nm_hw_register)(struct netmap_adapter *, int onoff);
 1029 };
 1030 
 1031 #ifdef WITH_GENERIC
 1032 /* Mitigation support. */
 1033 struct nm_generic_mit {
 1034         struct hrtimer mit_timer;
 1035         int mit_pending;
 1036         int mit_ring_idx;  /* index of the ring being mitigated */
 1037         struct netmap_adapter *mit_na;  /* backpointer */
 1038 };
 1039 
 1040 struct netmap_generic_adapter { /* emulated device */
 1041         struct netmap_hw_adapter up;
 1042 
 1043         /* Pointer to a previously used netmap adapter. */
 1044         struct netmap_adapter *prev;
 1045 
 1046         /* Emulated netmap adapters support:
 1047          *  - save_if_input saves the if_input hook (FreeBSD);
 1048          *  - mit implements rx interrupt mitigation;
 1049          */
 1050         void (*save_if_input)(struct ifnet *, struct mbuf *);
 1051 
 1052         struct nm_generic_mit *mit;
 1053 #ifdef linux
 1054         netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *);
 1055 #endif
 1056         /* Is the adapter able to use multiple RX slots to scatter
 1057          * each packet pushed up by the driver? */
 1058         int rxsg;
 1059 
 1060         /* Is the transmission path controlled by a netmap-aware
 1061          * device queue (i.e. qdisc on linux)? */
 1062         int txqdisc;
 1063 };
 1064 #endif  /* WITH_GENERIC */
 1065 
 1066 static __inline u_int
 1067 netmap_real_rings(struct netmap_adapter *na, enum txrx t)
 1068 {
 1069         return nma_get_nrings(na, t) +
 1070                 !!(na->na_flags & NAF_HOST_RINGS) * nma_get_host_nrings(na, t);
 1071 }
 1072 
 1073 /* account for fake rings */
 1074 static __inline u_int
 1075 netmap_all_rings(struct netmap_adapter *na, enum txrx t)
 1076 {
 1077         return max(nma_get_nrings(na, t) + 1, netmap_real_rings(na, t));
 1078 }
 1079 
 1080 int netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
 1081                 struct nm_bridge *);
 1082 struct nm_bdg_polling_state;
 1083 /*
 1084  * Bridge wrapper for non VALE ports attached to a VALE switch.
 1085  *
 1086  * The real device must already have its own netmap adapter (hwna).
 1087  * The bridge wrapper and the hwna adapter share the same set of
 1088  * netmap rings and buffers, but they have two separate sets of
 1089  * krings descriptors, with tx/rx meanings swapped:
 1090  *
 1091  *                                  netmap
 1092  *           bwrap     krings       rings      krings      hwna
 1093  *         +------+   +------+     +-----+    +------+   +------+
 1094  *         |tx_rings->|      |\   /|     |----|      |<-tx_rings|
 1095  *         |      |   +------+ \ / +-----+    +------+   |      |
 1096  *         |      |             X                        |      |
 1097  *         |      |            / \                       |      |
 1098  *         |      |   +------+/   \+-----+    +------+   |      |
 1099  *         |rx_rings->|      |     |     |----|      |<-rx_rings|
 1100  *         |      |   +------+     +-----+    +------+   |      |
 1101  *         +------+                                      +------+
 1102  *
 1103  * - packets coming from the bridge go to the brwap rx rings,
 1104  *   which are also the hwna tx rings.  The bwrap notify callback
 1105  *   will then complete the hwna tx (see netmap_bwrap_notify).
 1106  *
 1107  * - packets coming from the outside go to the hwna rx rings,
 1108  *   which are also the bwrap tx rings.  The (overwritten) hwna
 1109  *   notify method will then complete the bridge tx
 1110  *   (see netmap_bwrap_intr_notify).
 1111  *
 1112  *   The bridge wrapper may optionally connect the hwna 'host' rings
 1113  *   to the bridge. This is done by using a second port in the
 1114  *   bridge and connecting it to the 'host' netmap_vp_adapter
 1115  *   contained in the netmap_bwrap_adapter. The brwap host adapter
 1116  *   cross-links the hwna host rings in the same way as shown above.
 1117  *
 1118  * - packets coming from the bridge and directed to the host stack
 1119  *   are handled by the bwrap host notify callback
 1120  *   (see netmap_bwrap_host_notify)
 1121  *
 1122  * - packets coming from the host stack are still handled by the
 1123  *   overwritten hwna notify callback (netmap_bwrap_intr_notify),
 1124  *   but are diverted to the host adapter depending on the ring number.
 1125  *
 1126  */
 1127 struct netmap_bwrap_adapter {
 1128         struct netmap_vp_adapter up;
 1129         struct netmap_vp_adapter host;  /* for host rings */
 1130         struct netmap_adapter *hwna;    /* the underlying device */
 1131 
 1132         /*
 1133          * When we attach a physical interface to the bridge, we
 1134          * allow the controlling process to terminate, so we need
 1135          * a place to store the n_detmap_priv_d data structure.
 1136          * This is only done when physical interfaces
 1137          * are attached to a bridge.
 1138          */
 1139         struct netmap_priv_d *na_kpriv;
 1140         struct nm_bdg_polling_state *na_polling_state;
 1141         /* we overwrite the hwna->na_vp pointer, so we save
 1142          * here its original value, to be restored at detach
 1143          */
 1144         struct netmap_vp_adapter *saved_na_vp;
 1145         int (*nm_intr_notify)(struct netmap_kring *kring, int flags);
 1146 };
 1147 int nm_is_bwrap(struct netmap_adapter *na);
 1148 int nm_bdg_polling(struct nmreq_header *hdr);
 1149 
 1150 int netmap_bdg_attach(struct nmreq_header *hdr, void *auth_token);
 1151 int netmap_bdg_detach(struct nmreq_header *hdr, void *auth_token);
 1152 #ifdef WITH_VALE
 1153 int netmap_vale_list(struct nmreq_header *hdr);
 1154 int netmap_vi_create(struct nmreq_header *hdr, int);
 1155 int nm_vi_create(struct nmreq_header *);
 1156 int nm_vi_destroy(const char *name);
 1157 #else /* !WITH_VALE */
 1158 #define netmap_vi_create(hdr, a) (EOPNOTSUPP)
 1159 #endif /* WITH_VALE */
 1160 
 1161 #ifdef WITH_PIPES
 1162 
 1163 #define NM_MAXPIPES     64      /* max number of pipes per adapter */
 1164 
 1165 struct netmap_pipe_adapter {
 1166         /* pipe identifier is up.name */
 1167         struct netmap_adapter up;
 1168 
 1169 #define NM_PIPE_ROLE_MASTER     0x1
 1170 #define NM_PIPE_ROLE_SLAVE      0x2
 1171         int role;       /* either NM_PIPE_ROLE_MASTER or NM_PIPE_ROLE_SLAVE */
 1172 
 1173         struct netmap_adapter *parent; /* adapter that owns the memory */
 1174         struct netmap_pipe_adapter *peer; /* the other end of the pipe */
 1175         int peer_ref;           /* 1 iff we are holding a ref to the peer */
 1176         struct ifnet *parent_ifp;       /* maybe null */
 1177 
 1178         u_int parent_slot; /* index in the parent pipe array */
 1179 };
 1180 
 1181 #endif /* WITH_PIPES */
 1182 
 1183 #ifdef WITH_NMNULL
 1184 struct netmap_null_adapter {
 1185         struct netmap_adapter up;
 1186 };
 1187 #endif /* WITH_NMNULL */
 1188 
 1189 
 1190 /* return slots reserved to rx clients; used in drivers */
 1191 static inline uint32_t
 1192 nm_kr_rxspace(struct netmap_kring *k)
 1193 {
 1194         int space = k->nr_hwtail - k->nr_hwcur;
 1195         if (space < 0)
 1196                 space += k->nkr_num_slots;
 1197         nm_prdis("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail);
 1198 
 1199         return space;
 1200 }
 1201 
 1202 /* return slots reserved to tx clients */
 1203 #define nm_kr_txspace(_k) nm_kr_rxspace(_k)
 1204 
 1205 
 1206 /* True if no space in the tx ring, only valid after txsync_prologue */
 1207 static inline int
 1208 nm_kr_txempty(struct netmap_kring *kring)
 1209 {
 1210         return kring->rhead == kring->nr_hwtail;
 1211 }
 1212 
 1213 /* True if no more completed slots in the rx ring, only valid after
 1214  * rxsync_prologue */
 1215 #define nm_kr_rxempty(_k)       nm_kr_txempty(_k)
 1216 
 1217 /* True if the application needs to wait for more space on the ring
 1218  * (more received packets or more free tx slots).
 1219  * Only valid after *xsync_prologue. */
 1220 static inline int
 1221 nm_kr_wouldblock(struct netmap_kring *kring)
 1222 {
 1223         return kring->rcur == kring->nr_hwtail;
 1224 }
 1225 
 1226 /*
 1227  * protect against multiple threads using the same ring.
 1228  * also check that the ring has not been stopped or locked
 1229  */
 1230 #define NM_KR_BUSY      1       /* some other thread is syncing the ring */
 1231 #define NM_KR_STOPPED   2       /* unbounded stop (ifconfig down or driver unload) */
 1232 #define NM_KR_LOCKED    3       /* bounded, brief stop for mutual exclusion */
 1233 
 1234 
 1235 /* release the previously acquired right to use the *sync() methods of the ring */
 1236 static __inline void nm_kr_put(struct netmap_kring *kr)
 1237 {
 1238         NM_ATOMIC_CLEAR(&kr->nr_busy);
 1239 }
 1240 
 1241 
 1242 /* true if the ifp that backed the adapter has disappeared (e.g., the
 1243  * driver has been unloaded)
 1244  */
 1245 static inline int nm_iszombie(struct netmap_adapter *na);
 1246 
 1247 /* try to obtain exclusive right to issue the *sync() operations on the ring.
 1248  * The right is obtained and must be later relinquished via nm_kr_put() if and
 1249  * only if nm_kr_tryget() returns 0.
 1250  * If can_sleep is 1 there are only two other possible outcomes:
 1251  * - the function returns NM_KR_BUSY
 1252  * - the function returns NM_KR_STOPPED and sets the POLLERR bit in *perr
 1253  *   (if non-null)
 1254  * In both cases the caller will typically skip the ring, possibly collecting
 1255  * errors along the way.
 1256  * If the calling context does not allow sleeping, the caller must pass 0 in can_sleep.
 1257  * In the latter case, the function may also return NM_KR_LOCKED and leave *perr
 1258  * untouched: ideally, the caller should try again at a later time.
 1259  */
 1260 static __inline int nm_kr_tryget(struct netmap_kring *kr, int can_sleep, int *perr)
 1261 {
 1262         int busy = 1, stopped;
 1263         /* check a first time without taking the lock
 1264          * to avoid starvation for nm_kr_get()
 1265          */
 1266 retry:
 1267         stopped = kr->nkr_stopped;
 1268         if (unlikely(stopped)) {
 1269                 goto stop;
 1270         }
 1271         busy = NM_ATOMIC_TEST_AND_SET(&kr->nr_busy);
 1272         /* we should not return NM_KR_BUSY if the ring was
 1273          * actually stopped, so check another time after
 1274          * the barrier provided by the atomic operation
 1275          */
 1276         stopped = kr->nkr_stopped;
 1277         if (unlikely(stopped)) {
 1278                 goto stop;
 1279         }
 1280 
 1281         if (unlikely(nm_iszombie(kr->na))) {
 1282                 stopped = NM_KR_STOPPED;
 1283                 goto stop;
 1284         }
 1285 
 1286         return unlikely(busy) ? NM_KR_BUSY : 0;
 1287 
 1288 stop:
 1289         if (!busy)
 1290                 nm_kr_put(kr);
 1291         if (stopped == NM_KR_STOPPED) {
 1292 /* if POLLERR is defined we want to use it to simplify netmap_poll().
 1293  * Otherwise, any non-zero value will do.
 1294  */
 1295 #ifdef POLLERR
 1296 #define NM_POLLERR POLLERR
 1297 #else
 1298 #define NM_POLLERR 1
 1299 #endif /* POLLERR */
 1300                 if (perr)
 1301                         *perr |= NM_POLLERR;
 1302 #undef NM_POLLERR
 1303         } else if (can_sleep) {
 1304                 tsleep(kr, 0, "NM_KR_TRYGET", 4);
 1305                 goto retry;
 1306         }
 1307         return stopped;
 1308 }
 1309 
 1310 /* put the ring in the 'stopped' state and wait for the current user (if any) to
 1311  * notice. stopped must be either NM_KR_STOPPED or NM_KR_LOCKED
 1312  */
 1313 static __inline void nm_kr_stop(struct netmap_kring *kr, int stopped)
 1314 {
 1315         kr->nkr_stopped = stopped;
 1316         while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
 1317                 tsleep(kr, 0, "NM_KR_GET", 4);
 1318 }
 1319 
 1320 /* restart a ring after a stop */
 1321 static __inline void nm_kr_start(struct netmap_kring *kr)
 1322 {
 1323         kr->nkr_stopped = 0;
 1324         nm_kr_put(kr);
 1325 }
 1326 
 1327 
 1328 /*
 1329  * The following functions are used by individual drivers to
 1330  * support netmap operation.
 1331  *
 1332  * netmap_attach() initializes a struct netmap_adapter, allocating the
 1333  *      struct netmap_ring's and the struct selinfo.
 1334  *
 1335  * netmap_detach() frees the memory allocated by netmap_attach().
 1336  *
 1337  * netmap_transmit() replaces the if_transmit routine of the interface,
 1338  *      and is used to intercept packets coming from the stack.
 1339  *
 1340  * netmap_load_map/netmap_reload_map are helper routines to set/reset
 1341  *      the dmamap for a packet buffer
 1342  *
 1343  * netmap_reset() is a helper routine to be called in the hw driver
 1344  *      when reinitializing a ring. It should not be called by
 1345  *      virtual ports (vale, pipes, monitor)
 1346  */
 1347 int netmap_attach(struct netmap_adapter *);
 1348 int netmap_attach_ext(struct netmap_adapter *, size_t size, int override_reg);
 1349 void netmap_detach(struct ifnet *);
 1350 int netmap_transmit(struct ifnet *, struct mbuf *);
 1351 struct netmap_slot *netmap_reset(struct netmap_adapter *na,
 1352         enum txrx tx, u_int n, u_int new_cur);
 1353 int netmap_ring_reinit(struct netmap_kring *);
 1354 int netmap_rings_config_get(struct netmap_adapter *, struct nm_config_info *);
 1355 
 1356 /* Return codes for netmap_*x_irq. */
 1357 enum {
 1358         /* Driver should do normal interrupt processing, e.g. because
 1359          * the interface is not in netmap mode. */
 1360         NM_IRQ_PASS = 0,
 1361         /* Port is in netmap mode, and the interrupt work has been
 1362          * completed. The driver does not have to notify netmap
 1363          * again before the next interrupt. */
 1364         NM_IRQ_COMPLETED = -1,
 1365         /* Port is in netmap mode, but the interrupt work has not been
 1366          * completed. The driver has to make sure netmap will be
 1367          * notified again soon, even if no more interrupts come (e.g.
 1368          * on Linux the driver should not call napi_complete()). */
 1369         NM_IRQ_RESCHED = -2,
 1370 };
 1371 
 1372 /* default functions to handle rx/tx interrupts */
 1373 int netmap_rx_irq(struct ifnet *, u_int, u_int *);
 1374 #define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
 1375 int netmap_common_irq(struct netmap_adapter *, u_int, u_int *work_done);
 1376 
 1377 
 1378 #ifdef WITH_VALE
 1379 /* functions used by external modules to interface with VALE */
 1380 #define netmap_vp_to_ifp(_vp)   ((_vp)->up.ifp)
 1381 #define netmap_ifp_to_vp(_ifp)  (NA(_ifp)->na_vp)
 1382 #define netmap_ifp_to_host_vp(_ifp) (NA(_ifp)->na_hostvp)
 1383 #define netmap_bdg_idx(_vp)     ((_vp)->bdg_port)
 1384 const char *netmap_bdg_name(struct netmap_vp_adapter *);
 1385 #else /* !WITH_VALE */
 1386 #define netmap_vp_to_ifp(_vp)   NULL
 1387 #define netmap_ifp_to_vp(_ifp)  NULL
 1388 #define netmap_ifp_to_host_vp(_ifp) NULL
 1389 #define netmap_bdg_idx(_vp)     -1
 1390 #endif /* WITH_VALE */
 1391 
 1392 static inline int
 1393 nm_netmap_on(struct netmap_adapter *na)
 1394 {
 1395         return na && na->na_flags & NAF_NETMAP_ON;
 1396 }
 1397 
 1398 static inline int
 1399 nm_native_on(struct netmap_adapter *na)
 1400 {
 1401         return nm_netmap_on(na) && (na->na_flags & NAF_NATIVE);
 1402 }
 1403 
 1404 static inline struct netmap_kring *
 1405 netmap_kring_on(struct netmap_adapter *na, u_int q, enum txrx t)
 1406 {
 1407         struct netmap_kring *kring = NULL;
 1408 
 1409         if (!nm_native_on(na))
 1410                 return NULL;
 1411 
 1412         if (t == NR_RX && q < na->num_rx_rings)
 1413                 kring = na->rx_rings[q];
 1414         else if (t == NR_TX && q < na->num_tx_rings)
 1415                 kring = na->tx_rings[q];
 1416         else
 1417                 return NULL;
 1418 
 1419         return (kring->nr_mode == NKR_NETMAP_ON) ? kring : NULL;
 1420 }
 1421 
 1422 static inline int
 1423 nm_iszombie(struct netmap_adapter *na)
 1424 {
 1425         return na == NULL || (na->na_flags & NAF_ZOMBIE);
 1426 }
 1427 
 1428 void nm_set_native_flags(struct netmap_adapter *);
 1429 void nm_clear_native_flags(struct netmap_adapter *);
 1430 
 1431 void netmap_krings_mode_commit(struct netmap_adapter *na, int onoff);
 1432 
 1433 /*
 1434  * nm_*sync_prologue() functions are used in ioctl/poll and ptnetmap
 1435  * kthreads.
 1436  * We need netmap_ring* parameter, because in ptnetmap it is decoupled
 1437  * from host kring.
 1438  * The user-space ring pointers (head/cur/tail) are shared through
 1439  * CSB between host and guest.
 1440  */
 1441 
 1442 /*
 1443  * validates parameters in the ring/kring, returns a value for head
 1444  * If any error, returns ring_size to force a reinit.
 1445  */
 1446 uint32_t nm_txsync_prologue(struct netmap_kring *, struct netmap_ring *);
 1447 
 1448 
 1449 /*
 1450  * validates parameters in the ring/kring, returns a value for head
 1451  * If any error, returns ring_size lim to force a reinit.
 1452  */
 1453 uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *);
 1454 
 1455 
 1456 /* check/fix address and len in tx rings */
 1457 #if 1 /* debug version */
 1458 #define NM_CHECK_ADDR_LEN(_na, _a, _l)  do {                            \
 1459         if (_a == NETMAP_BUF_BASE(_na) || _l > NETMAP_BUF_SIZE(_na)) {  \
 1460                 nm_prlim(5, "bad addr/len ring %d slot %d idx %d len %d",       \
 1461                         kring->ring_id, nm_i, slot->buf_idx, len);      \
 1462                 if (_l > NETMAP_BUF_SIZE(_na))                          \
 1463                         _l = NETMAP_BUF_SIZE(_na);                      \
 1464         } } while (0)
 1465 #else /* no debug version */
 1466 #define NM_CHECK_ADDR_LEN(_na, _a, _l)  do {                            \
 1467                 if (_l > NETMAP_BUF_SIZE(_na))                          \
 1468                         _l = NETMAP_BUF_SIZE(_na);                      \
 1469         } while (0)
 1470 #endif
 1471 
 1472 #define NM_CHECK_ADDR_LEN_OFF(na_, l_, o_) do {                         \
 1473         if ((l_) + (o_) < (l_) ||                                       \
 1474             (l_) + (o_) > NETMAP_BUF_SIZE(na_)) {                       \
 1475                 (l_) = NETMAP_BUF_SIZE(na_) - (o_);                     \
 1476         } } while (0)
 1477 
 1478 
 1479 /*---------------------------------------------------------------*/
 1480 /*
 1481  * Support routines used by netmap subsystems
 1482  * (native drivers, VALE, generic, pipes, monitors, ...)
 1483  */
 1484 
 1485 
 1486 /* common routine for all functions that create a netmap adapter. It performs
 1487  * two main tasks:
 1488  * - if the na points to an ifp, mark the ifp as netmap capable
 1489  *   using na as its native adapter;
 1490  * - provide defaults for the setup callbacks and the memory allocator
 1491  */
 1492 int netmap_attach_common(struct netmap_adapter *);
 1493 /* fill priv->np_[tr]xq{first,last} using the ringid and flags information
 1494  * coming from a struct nmreq_register
 1495  */
 1496 int netmap_interp_ringid(struct netmap_priv_d *priv, struct nmreq_header *hdr);
 1497 /* update the ring parameters (number and size of tx and rx rings).
 1498  * It calls the nm_config callback, if available.
 1499  */
 1500 int netmap_update_config(struct netmap_adapter *na);
 1501 /* create and initialize the common fields of the krings array.
 1502  * using the information that must be already available in the na.
 1503  * tailroom can be used to request the allocation of additional
 1504  * tailroom bytes after the krings array. This is used by
 1505  * netmap_vp_adapter's (i.e., VALE ports) to make room for
 1506  * leasing-related data structures
 1507  */
 1508 int netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
 1509 /* deletes the kring array of the adapter. The array must have
 1510  * been created using netmap_krings_create
 1511  */
 1512 void netmap_krings_delete(struct netmap_adapter *na);
 1513 
 1514 int netmap_hw_krings_create(struct netmap_adapter *na);
 1515 void netmap_hw_krings_delete(struct netmap_adapter *na);
 1516 
 1517 /* set the stopped/enabled status of ring
 1518  * When stopping, they also wait for all current activity on the ring to
 1519  * terminate. The status change is then notified using the na nm_notify
 1520  * callback.
 1521  */
 1522 void netmap_set_ring(struct netmap_adapter *, u_int ring_id, enum txrx, int stopped);
 1523 /* set the stopped/enabled status of all rings of the adapter. */
 1524 void netmap_set_all_rings(struct netmap_adapter *, int stopped);
 1525 /* convenience wrappers for netmap_set_all_rings */
 1526 void netmap_disable_all_rings(struct ifnet *);
 1527 void netmap_enable_all_rings(struct ifnet *);
 1528 
 1529 int netmap_buf_size_validate(const struct netmap_adapter *na, unsigned mtu);
 1530 int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
 1531                 struct nmreq_header *);
 1532 void netmap_do_unregif(struct netmap_priv_d *priv);
 1533 
 1534 u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
 1535 int netmap_get_na(struct nmreq_header *hdr, struct netmap_adapter **na,
 1536                 struct ifnet **ifp, struct netmap_mem_d *nmd, int create);
 1537 void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp);
 1538 int netmap_get_hw_na(struct ifnet *ifp,
 1539                 struct netmap_mem_d *nmd, struct netmap_adapter **na);
 1540 void netmap_mem_restore(struct netmap_adapter *na);
 1541 
 1542 #ifdef WITH_VALE
 1543 uint32_t netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
 1544                 struct netmap_vp_adapter *, void *private_data);
 1545 
 1546 /* these are redefined in case of no VALE support */
 1547 int netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
 1548                 struct netmap_mem_d *nmd, int create);
 1549 void *netmap_vale_create(const char *bdg_name, int *return_status);
 1550 int netmap_vale_destroy(const char *bdg_name, void *auth_token);
 1551 
 1552 extern unsigned int vale_max_bridges;
 1553 
 1554 #else /* !WITH_VALE */
 1555 #define netmap_bdg_learning(_1, _2, _3, _4)     0
 1556 #define netmap_get_vale_na(_1, _2, _3, _4)      0
 1557 #define netmap_bdg_create(_1, _2)       NULL
 1558 #define netmap_bdg_destroy(_1, _2)      0
 1559 #define vale_max_bridges                1
 1560 #endif /* !WITH_VALE */
 1561 
 1562 #ifdef WITH_PIPES
 1563 /* max number of pipes per device */
 1564 #define NM_MAXPIPES     64      /* XXX this should probably be a sysctl */
 1565 void netmap_pipe_dealloc(struct netmap_adapter *);
 1566 int netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
 1567                         struct netmap_mem_d *nmd, int create);
 1568 #else /* !WITH_PIPES */
 1569 #define NM_MAXPIPES     0
 1570 #define netmap_pipe_alloc(_1, _2)       0
 1571 #define netmap_pipe_dealloc(_1)
 1572 #define netmap_get_pipe_na(hdr, _2, _3, _4)     \
 1573         ((strchr(hdr->nr_name, '{') != NULL || strchr(hdr->nr_name, '}') != NULL) ? EOPNOTSUPP : 0)
 1574 #endif
 1575 
 1576 #ifdef WITH_MONITOR
 1577 int netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
 1578                 struct netmap_mem_d *nmd, int create);
 1579 void netmap_monitor_stop(struct netmap_adapter *na);
 1580 #else
 1581 #define netmap_get_monitor_na(hdr, _2, _3, _4) \
 1582         (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
 1583 #endif
 1584 
 1585 #ifdef WITH_NMNULL
 1586 int netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na,
 1587                 struct netmap_mem_d *nmd, int create);
 1588 #else /* !WITH_NMNULL */
 1589 #define netmap_get_null_na(hdr, _2, _3, _4) \
 1590         (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
 1591 #endif /* WITH_NMNULL */
 1592 
 1593 #ifdef CONFIG_NET_NS
 1594 struct net *netmap_bns_get(void);
 1595 void netmap_bns_put(struct net *);
 1596 void netmap_bns_getbridges(struct nm_bridge **, u_int *);
 1597 #else
 1598 extern struct nm_bridge *nm_bridges;
 1599 #define netmap_bns_get()
 1600 #define netmap_bns_put(_1)
 1601 #define netmap_bns_getbridges(b, n) \
 1602         do { *b = nm_bridges; *n = vale_max_bridges; } while (0)
 1603 #endif
 1604 
 1605 /* Various prototypes */
 1606 int netmap_poll(struct netmap_priv_d *, int events, NM_SELRECORD_T *td);
 1607 int netmap_init(void);
 1608 void netmap_fini(void);
 1609 int netmap_get_memory(struct netmap_priv_d* p);
 1610 void netmap_dtor(void *data);
 1611 
 1612 int netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
 1613                 struct thread *, int nr_body_is_user);
 1614 int netmap_ioctl_legacy(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
 1615                         struct thread *td);
 1616 size_t nmreq_size_by_type(uint16_t nr_reqtype);
 1617 
 1618 /* netmap_adapter creation/destruction */
 1619 
 1620 // #define NM_DEBUG_PUTGET 1
 1621 
 1622 #ifdef NM_DEBUG_PUTGET
 1623 
 1624 #define NM_DBG(f) __##f
 1625 
 1626 void __netmap_adapter_get(struct netmap_adapter *na);
 1627 
 1628 #define netmap_adapter_get(na)                          \
 1629         do {                                            \
 1630                 struct netmap_adapter *__na = na;       \
 1631                 __netmap_adapter_get(__na);             \
 1632                 nm_prinf("getting %p:%s -> %d", __na, (__na)->name, (__na)->na_refcount);       \
 1633         } while (0)
 1634 
 1635 int __netmap_adapter_put(struct netmap_adapter *na);
 1636 
 1637 #define netmap_adapter_put(na)                          \
 1638         ({                                              \
 1639                 struct netmap_adapter *__na = na;       \
 1640                 if (__na == NULL)                       \
 1641                         nm_prinf("putting NULL");       \
 1642                 else                                    \
 1643                         nm_prinf("putting %p:%s -> %d", __na, (__na)->name, (__na)->na_refcount - 1);   \
 1644                 __netmap_adapter_put(__na);     \
 1645         })
 1646 
 1647 #else /* !NM_DEBUG_PUTGET */
 1648 
 1649 #define NM_DBG(f) f
 1650 void netmap_adapter_get(struct netmap_adapter *na);
 1651 int netmap_adapter_put(struct netmap_adapter *na);
 1652 
 1653 #endif /* !NM_DEBUG_PUTGET */
 1654 
 1655 
 1656 /*
 1657  * module variables
 1658  */
 1659 #define NETMAP_BUF_BASE(_na)    ((_na)->na_lut.lut[0].vaddr)
 1660 #define NETMAP_BUF_SIZE(_na)    ((_na)->na_lut.objsize)
 1661 extern int netmap_no_pendintr;
 1662 extern int netmap_verbose;
 1663 #ifdef CONFIG_NETMAP_DEBUG
 1664 extern int netmap_debug;                /* for debugging */
 1665 #else /* !CONFIG_NETMAP_DEBUG */
 1666 #define netmap_debug (0)
 1667 #endif /* !CONFIG_NETMAP_DEBUG */
 1668 enum {                                  /* debug flags */
 1669         NM_DEBUG_ON = 1,                /* generic debug messages */
 1670         NM_DEBUG_HOST = 0x2,            /* debug host stack */
 1671         NM_DEBUG_RXSYNC = 0x10,         /* debug on rxsync/txsync */
 1672         NM_DEBUG_TXSYNC = 0x20,
 1673         NM_DEBUG_RXINTR = 0x100,        /* debug on rx/tx intr (driver) */
 1674         NM_DEBUG_TXINTR = 0x200,
 1675         NM_DEBUG_NIC_RXSYNC = 0x1000,   /* debug on rx/tx intr (driver) */
 1676         NM_DEBUG_NIC_TXSYNC = 0x2000,
 1677         NM_DEBUG_MEM = 0x4000,          /* verbose memory allocations/deallocations */
 1678         NM_DEBUG_VALE = 0x8000,         /* debug messages from memory allocators */
 1679         NM_DEBUG_BDG = NM_DEBUG_VALE,
 1680 };
 1681 
 1682 extern int netmap_txsync_retry;
 1683 extern int netmap_generic_hwcsum;
 1684 extern int netmap_generic_mit;
 1685 extern int netmap_generic_ringsize;
 1686 extern int netmap_generic_rings;
 1687 #ifdef linux
 1688 extern int netmap_generic_txqdisc;
 1689 #endif
 1690 
 1691 /*
 1692  * NA returns a pointer to the struct netmap adapter from the ifp.
 1693  * WNA is os-specific and must be defined in glue code.
 1694  */
 1695 #define NA(_ifp)        ((struct netmap_adapter *)WNA(_ifp))
 1696 
 1697 /*
 1698  * we provide a default implementation of NM_ATTACH_NA/NM_DETACH_NA
 1699  * based on the WNA field.
 1700  * Glue code may override this by defining its own NM_ATTACH_NA
 1701  */
 1702 #ifndef NM_ATTACH_NA
 1703 /*
 1704  * On old versions of FreeBSD, NA(ifp) is a pspare. On linux we
 1705  * overload another pointer in the netdev.
 1706  *
 1707  * We check if NA(ifp) is set and its first element has a related
 1708  * magic value. The capenable is within the struct netmap_adapter.
 1709  */
 1710 #define NETMAP_MAGIC    0x52697a7a
 1711 
 1712 #define NM_NA_VALID(ifp)        (NA(ifp) &&             \
 1713         ((uint32_t)(uintptr_t)NA(ifp) ^ NA(ifp)->magic) == NETMAP_MAGIC )
 1714 
 1715 #define NM_ATTACH_NA(ifp, na) do {                                      \
 1716         WNA(ifp) = na;                                                  \
 1717         if (NA(ifp))                                                    \
 1718                 NA(ifp)->magic =                                        \
 1719                         ((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC;  \
 1720 } while(0)
 1721 #define NM_RESTORE_NA(ifp, na)  WNA(ifp) = na;
 1722 
 1723 #define NM_DETACH_NA(ifp)       do { WNA(ifp) = NULL; } while (0)
 1724 #define NM_NA_CLASH(ifp)        (NA(ifp) && !NM_NA_VALID(ifp))
 1725 #endif /* !NM_ATTACH_NA */
 1726 
 1727 
 1728 #define NM_IS_NATIVE(ifp)       (NM_NA_VALID(ifp) && NA(ifp)->nm_dtor == netmap_hw_dtor)
 1729 
 1730 #if defined(__FreeBSD__)
 1731 
 1732 /* Assigns the device IOMMU domain to an allocator.
 1733  * Returns -ENOMEM in case the domain is different */
 1734 #define nm_iommu_group_id(dev) (-1)
 1735 
 1736 /* Callback invoked by the dma machinery after a successful dmamap_load */
 1737 static void netmap_dmamap_cb(__unused void *arg,
 1738     __unused bus_dma_segment_t * segs, __unused int nseg, __unused int error)
 1739 {
 1740 }
 1741 
 1742 /* bus_dmamap_load wrapper: call aforementioned function if map != NULL.
 1743  * XXX can we do it without a callback ?
 1744  */
 1745 static inline int
 1746 netmap_load_map(struct netmap_adapter *na,
 1747         bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
 1748 {
 1749         if (map)
 1750                 bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
 1751                     netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
 1752         return 0;
 1753 }
 1754 
 1755 static inline void
 1756 netmap_unload_map(struct netmap_adapter *na,
 1757         bus_dma_tag_t tag, bus_dmamap_t map)
 1758 {
 1759         if (map)
 1760                 bus_dmamap_unload(tag, map);
 1761 }
 1762 
 1763 #define netmap_sync_map(na, tag, map, sz, t)
 1764 
 1765 /* update the map when a buffer changes. */
 1766 static inline void
 1767 netmap_reload_map(struct netmap_adapter *na,
 1768         bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
 1769 {
 1770         if (map) {
 1771                 bus_dmamap_unload(tag, map);
 1772                 bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
 1773                     netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
 1774         }
 1775 }
 1776 
 1777 #elif defined(_WIN32)
 1778 
 1779 #else /* linux */
 1780 
 1781 int nm_iommu_group_id(bus_dma_tag_t dev);
 1782 #include <linux/dma-mapping.h>
 1783 
 1784 /*
 1785  * on linux we need
 1786  *      dma_map_single(&pdev->dev, virt_addr, len, direction)
 1787  *      dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction)
 1788  */
 1789 #if 0
 1790         struct e1000_buffer *buffer_info =  &tx_ring->buffer_info[l];
 1791         /* set time_stamp *before* dma to help avoid a possible race */
 1792         buffer_info->time_stamp = jiffies;
 1793         buffer_info->mapped_as_page = false;
 1794         buffer_info->length = len;
 1795         //buffer_info->next_to_watch = l;
 1796         /* reload dma map */
 1797         dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,
 1798                         NETMAP_BUF_SIZE, DMA_TO_DEVICE);
 1799         buffer_info->dma = dma_map_single(&adapter->pdev->dev,
 1800                         addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE);
 1801 
 1802         if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
 1803                 nm_prerr("dma mapping error");
 1804                 /* goto dma_error; See e1000_put_txbuf() */
 1805                 /* XXX reset */
 1806         }
 1807         tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX
 1808 
 1809 #endif
 1810 
 1811 static inline int
 1812 netmap_load_map(struct netmap_adapter *na,
 1813         bus_dma_tag_t tag, bus_dmamap_t map, void *buf, u_int size)
 1814 {
 1815         if (map) {
 1816                 *map = dma_map_single(na->pdev, buf, size,
 1817                                       DMA_BIDIRECTIONAL);
 1818                 if (dma_mapping_error(na->pdev, *map)) {
 1819                         *map = 0;
 1820                         return ENOMEM;
 1821                 }
 1822         }
 1823         return 0;
 1824 }
 1825 
 1826 static inline void
 1827 netmap_unload_map(struct netmap_adapter *na,
 1828         bus_dma_tag_t tag, bus_dmamap_t map, u_int sz)
 1829 {
 1830         if (*map) {
 1831                 dma_unmap_single(na->pdev, *map, sz,
 1832                                  DMA_BIDIRECTIONAL);
 1833         }
 1834 }
 1835 
 1836 #ifdef NETMAP_LINUX_HAVE_DMASYNC
 1837 static inline void
 1838 netmap_sync_map_cpu(struct netmap_adapter *na,
 1839         bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t)
 1840 {
 1841         if (*map) {
 1842                 dma_sync_single_for_cpu(na->pdev, *map, sz,
 1843                         (t == NR_TX ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
 1844         }
 1845 }
 1846 
 1847 static inline void
 1848 netmap_sync_map_dev(struct netmap_adapter *na,
 1849         bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t)
 1850 {
 1851         if (*map) {
 1852                 dma_sync_single_for_device(na->pdev, *map, sz,
 1853                         (t == NR_TX ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
 1854         }
 1855 }
 1856 
 1857 static inline void
 1858 netmap_reload_map(struct netmap_adapter *na,
 1859         bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
 1860 {
 1861         u_int sz = NETMAP_BUF_SIZE(na);
 1862 
 1863         if (*map) {
 1864                 dma_unmap_single(na->pdev, *map, sz,
 1865                                 DMA_BIDIRECTIONAL);
 1866         }
 1867 
 1868         *map = dma_map_single(na->pdev, buf, sz,
 1869                                 DMA_BIDIRECTIONAL);
 1870 }
 1871 #else /* !NETMAP_LINUX_HAVE_DMASYNC */
 1872 #define netmap_sync_map_cpu(na, tag, map, sz, t)
 1873 #define netmap_sync_map_dev(na, tag, map, sz, t)
 1874 #endif /* NETMAP_LINUX_HAVE_DMASYNC */
 1875 
 1876 #endif /* linux */
 1877 
 1878 
 1879 /*
 1880  * functions to map NIC to KRING indexes (n2k) and vice versa (k2n)
 1881  */
 1882 static inline int
 1883 netmap_idx_n2k(struct netmap_kring *kr, int idx)
 1884 {
 1885         int n = kr->nkr_num_slots;
 1886 
 1887         if (likely(kr->nkr_hwofs == 0)) {
 1888                 return idx;
 1889         }
 1890 
 1891         idx += kr->nkr_hwofs;
 1892         if (idx < 0)
 1893                 return idx + n;
 1894         else if (idx < n)
 1895                 return idx;
 1896         else
 1897                 return idx - n;
 1898 }
 1899 
 1900 
 1901 static inline int
 1902 netmap_idx_k2n(struct netmap_kring *kr, int idx)
 1903 {
 1904         int n = kr->nkr_num_slots;
 1905 
 1906         if (likely(kr->nkr_hwofs == 0)) {
 1907                 return idx;
 1908         }
 1909 
 1910         idx -= kr->nkr_hwofs;
 1911         if (idx < 0)
 1912                 return idx + n;
 1913         else if (idx < n)
 1914                 return idx;
 1915         else
 1916                 return idx - n;
 1917 }
 1918 
 1919 
 1920 /* Entries of the look-up table. */
 1921 #ifdef __FreeBSD__
 1922 struct lut_entry {
 1923         void *vaddr;            /* virtual address. */
 1924         vm_paddr_t paddr;       /* physical address. */
 1925 };
 1926 #else /* linux & _WIN32 */
 1927 /* dma-mapping in linux can assign a buffer a different address
 1928  * depending on the device, so we need to have a separate
 1929  * physical-address look-up table for each na.
 1930  * We can still share the vaddrs, though, therefore we split
 1931  * the lut_entry structure.
 1932  */
 1933 struct lut_entry {
 1934         void *vaddr;            /* virtual address. */
 1935 };
 1936 
 1937 struct plut_entry {
 1938         vm_paddr_t paddr;       /* physical address. */
 1939 };
 1940 #endif /* linux & _WIN32 */
 1941 
 1942 struct netmap_obj_pool;
 1943 
 1944 /* alignment for netmap buffers */
 1945 #define NM_BUF_ALIGN    64
 1946 
 1947 /*
 1948  * NMB return the virtual address of a buffer (buffer 0 on bad index)
 1949  * PNMB also fills the physical address
 1950  */
 1951 static inline void *
 1952 NMB(struct netmap_adapter *na, struct netmap_slot *slot)
 1953 {
 1954         struct lut_entry *lut = na->na_lut.lut;
 1955         uint32_t i = slot->buf_idx;
 1956         return (unlikely(i >= na->na_lut.objtotal)) ?
 1957                 lut[0].vaddr : lut[i].vaddr;
 1958 }
 1959 
 1960 static inline void *
 1961 PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
 1962 {
 1963         uint32_t i = slot->buf_idx;
 1964         struct lut_entry *lut = na->na_lut.lut;
 1965         struct plut_entry *plut = na->na_lut.plut;
 1966         void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr;
 1967 
 1968 #ifdef _WIN32
 1969         *pp = (i >= na->na_lut.objtotal) ? (uint64_t)plut[0].paddr.QuadPart : (uint64_t)plut[i].paddr.QuadPart;
 1970 #else
 1971         *pp = (i >= na->na_lut.objtotal) ? plut[0].paddr : plut[i].paddr;
 1972 #endif
 1973         return ret;
 1974 }
 1975 
 1976 static inline void
 1977 nm_write_offset(struct netmap_kring *kring,
 1978                 struct netmap_slot *slot, uint64_t offset)
 1979 {
 1980         slot->ptr = (slot->ptr & ~kring->offset_mask) |
 1981                 (offset & kring->offset_mask);
 1982 }
 1983 
 1984 static inline uint64_t
 1985 nm_get_offset(struct netmap_kring *kring, struct netmap_slot *slot)
 1986 {
 1987         uint64_t offset = (slot->ptr & kring->offset_mask);
 1988         if (unlikely(offset > kring->offset_max))
 1989                 offset = kring->offset_max;
 1990         return offset;
 1991 }
 1992 
 1993 static inline void *
 1994 NMB_O(struct netmap_kring *kring, struct netmap_slot *slot)
 1995 {
 1996         void *addr = NMB(kring->na, slot);
 1997         return (char *)addr + nm_get_offset(kring, slot);
 1998 }
 1999 
 2000 static inline void *
 2001 PNMB_O(struct netmap_kring *kring, struct netmap_slot *slot, uint64_t *pp)
 2002 {
 2003         void *addr = PNMB(kring->na, slot, pp);
 2004         uint64_t offset = nm_get_offset(kring, slot);
 2005         addr = (char *)addr + offset;
 2006         *pp += offset;
 2007         return addr;
 2008 }
 2009 
 2010 
 2011 /*
 2012  * Structure associated to each netmap file descriptor.
 2013  * It is created on open and left unbound (np_nifp == NULL).
 2014  * A successful NIOCREGIF will set np_nifp and the first few fields;
 2015  * this is protected by a global lock (NMG_LOCK) due to low contention.
 2016  *
 2017  * np_refs counts the number of references to the structure: one for the fd,
 2018  * plus (on FreeBSD) one for each active mmap which we track ourselves
 2019  * (linux automatically tracks them, but FreeBSD does not).
 2020  * np_refs is protected by NMG_LOCK.
 2021  *
 2022  * Read access to the structure is lock free, because ni_nifp once set
 2023  * can only go to 0 when nobody is using the entry anymore. Readers
 2024  * must check that np_nifp != NULL before using the other fields.
 2025  */
 2026 struct netmap_priv_d {
 2027         struct netmap_if * volatile np_nifp;    /* netmap if descriptor. */
 2028 
 2029         struct netmap_adapter   *np_na;
 2030         struct ifnet    *np_ifp;
 2031         uint32_t        np_flags;       /* from the ioctl */
 2032         u_int           np_qfirst[NR_TXRX],
 2033                         np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
 2034         uint16_t        np_txpoll;
 2035         uint16_t        np_kloop_state; /* use with NMG_LOCK held */
 2036 #define NM_SYNC_KLOOP_RUNNING   (1 << 0)
 2037 #define NM_SYNC_KLOOP_STOPPING  (1 << 1)
 2038         int             np_sync_flags; /* to be passed to nm_sync */
 2039 
 2040         int             np_refs;        /* use with NMG_LOCK held */
 2041 
 2042         /* pointers to the selinfo to be used for selrecord.
 2043          * Either the local or the global one depending on the
 2044          * number of rings.
 2045          */
 2046         NM_SELINFO_T *np_si[NR_TXRX];
 2047 
 2048         /* In the optional CSB mode, the user must specify the start address
 2049          * of two arrays of Communication Status Block (CSB) entries, for the
 2050          * two directions (kernel read application write, and kernel write
 2051          * application read).
 2052          * The number of entries must agree with the number of rings bound to
 2053          * the netmap file descriptor. The entries corresponding to the TX
 2054          * rings are laid out before the ones corresponding to the RX rings.
 2055          *
 2056          * Array of CSB entries for application --> kernel communication
 2057          * (N entries). */
 2058         struct nm_csb_atok      *np_csb_atok_base;
 2059         /* Array of CSB entries for kernel --> application communication
 2060          * (N entries). */
 2061         struct nm_csb_ktoa      *np_csb_ktoa_base;
 2062 
 2063 #ifdef linux
 2064         struct file     *np_filp;  /* used by sync kloop */
 2065 #endif /* linux */
 2066 };
 2067 
 2068 struct netmap_priv_d *netmap_priv_new(void);
 2069 void netmap_priv_delete(struct netmap_priv_d *);
 2070 
 2071 static inline int nm_kring_pending(struct netmap_priv_d *np)
 2072 {
 2073         struct netmap_adapter *na = np->np_na;
 2074         enum txrx t;
 2075         int i;
 2076 
 2077         for_rx_tx(t) {
 2078                 for (i = np->np_qfirst[t]; i < np->np_qlast[t]; i++) {
 2079                         struct netmap_kring *kring = NMR(na, t)[i];
 2080                         if (kring->nr_mode != kring->nr_pending_mode) {
 2081                                 return 1;
 2082                         }
 2083                 }
 2084         }
 2085         return 0;
 2086 }
 2087 
 2088 /* call with NMG_LOCK held */
 2089 static __inline int
 2090 nm_si_user(struct netmap_priv_d *priv, enum txrx t)
 2091 {
 2092         return (priv->np_na != NULL &&
 2093                 (priv->np_qlast[t] - priv->np_qfirst[t] > 1));
 2094 }
 2095 
 2096 #ifdef WITH_PIPES
 2097 int netmap_pipe_txsync(struct netmap_kring *txkring, int flags);
 2098 int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags);
 2099 int netmap_pipe_krings_create_both(struct netmap_adapter *na,
 2100                                   struct netmap_adapter *ona);
 2101 void netmap_pipe_krings_delete_both(struct netmap_adapter *na,
 2102                                     struct netmap_adapter *ona);
 2103 int netmap_pipe_reg_both(struct netmap_adapter *na,
 2104                          struct netmap_adapter *ona);
 2105 #endif /* WITH_PIPES */
 2106 
 2107 #ifdef WITH_MONITOR
 2108 
 2109 struct netmap_monitor_adapter {
 2110         struct netmap_adapter up;
 2111 
 2112         struct netmap_priv_d priv;
 2113         uint32_t flags;
 2114 };
 2115 
 2116 #endif /* WITH_MONITOR */
 2117 
 2118 
 2119 #ifdef WITH_GENERIC
 2120 /*
 2121  * generic netmap emulation for devices that do not have
 2122  * native netmap support.
 2123  */
 2124 int generic_netmap_attach(struct ifnet *ifp);
 2125 int generic_rx_handler(struct ifnet *ifp, struct mbuf *m);
 2126 
 2127 int nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept);
 2128 int nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept);
 2129 
 2130 int na_is_generic(struct netmap_adapter *na);
 2131 
 2132 /*
 2133  * the generic transmit routine is passed a structure to optionally
 2134  * build a queue of descriptors, in an OS-specific way.
 2135  * The payload is at addr, if non-null, and the routine should send or queue
 2136  * the packet, returning 0 if successful, 1 on failure.
 2137  *
 2138  * At the end, if head is non-null, there will be an additional call
 2139  * to the function with addr = NULL; this should tell the OS-specific
 2140  * routine to send the queue and free any resources. Failure is ignored.
 2141  */
 2142 struct nm_os_gen_arg {
 2143         struct ifnet *ifp;
 2144         void *m;        /* os-specific mbuf-like object */
 2145         void *head, *tail; /* tailq, if the OS-specific routine needs to build one */
 2146         void *addr;     /* payload of current packet */
 2147         u_int len;      /* packet length */
 2148         u_int ring_nr;  /* transmit ring index */
 2149         u_int qevent;   /* in txqdisc mode, place an event on this mbuf */
 2150 };
 2151 
 2152 int nm_os_generic_xmit_frame(struct nm_os_gen_arg *);
 2153 int nm_os_generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
 2154 void nm_os_generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
 2155 void nm_os_generic_set_features(struct netmap_generic_adapter *gna);
 2156 
 2157 static inline struct ifnet*
 2158 netmap_generic_getifp(struct netmap_generic_adapter *gna)
 2159 {
 2160         if (gna->prev)
 2161             return gna->prev->ifp;
 2162 
 2163         return gna->up.up.ifp;
 2164 }
 2165 
 2166 void netmap_generic_irq(struct netmap_adapter *na, u_int q, u_int *work_done);
 2167 
 2168 //#define RATE_GENERIC  /* Enables communication statistics for generic. */
 2169 #ifdef RATE_GENERIC
 2170 void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi);
 2171 #else
 2172 #define generic_rate(txp, txs, txi, rxp, rxs, rxi)
 2173 #endif
 2174 
 2175 /*
 2176  * netmap_mitigation API. This is used by the generic adapter
 2177  * to reduce the number of interrupt requests/selwakeup
 2178  * to clients on incoming packets.
 2179  */
 2180 void nm_os_mitigation_init(struct nm_generic_mit *mit, int idx,
 2181                                 struct netmap_adapter *na);
 2182 void nm_os_mitigation_start(struct nm_generic_mit *mit);
 2183 void nm_os_mitigation_restart(struct nm_generic_mit *mit);
 2184 int nm_os_mitigation_active(struct nm_generic_mit *mit);
 2185 void nm_os_mitigation_cleanup(struct nm_generic_mit *mit);
 2186 #else /* !WITH_GENERIC */
 2187 #define generic_netmap_attach(ifp)      (EOPNOTSUPP)
 2188 #define na_is_generic(na)               (0)
 2189 #endif /* WITH_GENERIC */
 2190 
 2191 /* Shared declarations for the VALE switch. */
 2192 
 2193 /*
 2194  * Each transmit queue accumulates a batch of packets into
 2195  * a structure before forwarding. Packets to the same
 2196  * destination are put in a list using ft_next as a link field.
 2197  * ft_frags and ft_next are valid only on the first fragment.
 2198  */
 2199 struct nm_bdg_fwd {     /* forwarding entry for a bridge */
 2200         void *ft_buf;           /* netmap or indirect buffer */
 2201         uint8_t ft_frags;       /* how many fragments (only on 1st frag) */
 2202         uint16_t ft_offset;     /* dst port (unused) */
 2203         uint16_t ft_flags;      /* flags, e.g. indirect */
 2204         uint16_t ft_len;        /* src fragment len */
 2205         uint16_t ft_next;       /* next packet to same destination */
 2206 };
 2207 
 2208 /* struct 'virtio_net_hdr' from linux. */
 2209 struct nm_vnet_hdr {
 2210 #define VIRTIO_NET_HDR_F_NEEDS_CSUM     1       /* Use csum_start, csum_offset */
 2211 #define VIRTIO_NET_HDR_F_DATA_VALID    2        /* Csum is valid */
 2212     uint8_t flags;
 2213 #define VIRTIO_NET_HDR_GSO_NONE         0       /* Not a GSO frame */
 2214 #define VIRTIO_NET_HDR_GSO_TCPV4        1       /* GSO frame, IPv4 TCP (TSO) */
 2215 #define VIRTIO_NET_HDR_GSO_UDP          3       /* GSO frame, IPv4 UDP (UFO) */
 2216 #define VIRTIO_NET_HDR_GSO_TCPV6        4       /* GSO frame, IPv6 TCP */
 2217 #define VIRTIO_NET_HDR_GSO_ECN          0x80    /* TCP has ECN set */
 2218     uint8_t gso_type;
 2219     uint16_t hdr_len;
 2220     uint16_t gso_size;
 2221     uint16_t csum_start;
 2222     uint16_t csum_offset;
 2223 };
 2224 
 2225 #define WORST_CASE_GSO_HEADER   (14+40+60)  /* IPv6 + TCP */
 2226 
 2227 /* Private definitions for IPv4, IPv6, UDP and TCP headers. */
 2228 
 2229 struct nm_iphdr {
 2230         uint8_t         version_ihl;
 2231         uint8_t         tos;
 2232         uint16_t        tot_len;
 2233         uint16_t        id;
 2234         uint16_t        frag_off;
 2235         uint8_t         ttl;
 2236         uint8_t         protocol;
 2237         uint16_t        check;
 2238         uint32_t        saddr;
 2239         uint32_t        daddr;
 2240         /*The options start here. */
 2241 };
 2242 
 2243 struct nm_tcphdr {
 2244         uint16_t        source;
 2245         uint16_t        dest;
 2246         uint32_t        seq;
 2247         uint32_t        ack_seq;
 2248         uint8_t         doff;  /* Data offset + Reserved */
 2249         uint8_t         flags;
 2250         uint16_t        window;
 2251         uint16_t        check;
 2252         uint16_t        urg_ptr;
 2253 };
 2254 
 2255 struct nm_udphdr {
 2256         uint16_t        source;
 2257         uint16_t        dest;
 2258         uint16_t        len;
 2259         uint16_t        check;
 2260 };
 2261 
 2262 struct nm_ipv6hdr {
 2263         uint8_t         priority_version;
 2264         uint8_t         flow_lbl[3];
 2265 
 2266         uint16_t        payload_len;
 2267         uint8_t         nexthdr;
 2268         uint8_t         hop_limit;
 2269 
 2270         uint8_t         saddr[16];
 2271         uint8_t         daddr[16];
 2272 };
 2273 
 2274 /* Type used to store a checksum (in host byte order) that hasn't been
 2275  * folded yet.
 2276  */
 2277 #define rawsum_t uint32_t
 2278 
 2279 rawsum_t nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum);
 2280 uint16_t nm_os_csum_ipv4(struct nm_iphdr *iph);
 2281 void nm_os_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
 2282                       size_t datalen, uint16_t *check);
 2283 void nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
 2284                       size_t datalen, uint16_t *check);
 2285 uint16_t nm_os_csum_fold(rawsum_t cur_sum);
 2286 
 2287 void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
 2288                            struct netmap_vp_adapter *dst_na,
 2289                            const struct nm_bdg_fwd *ft_p,
 2290                            struct netmap_ring *dst_ring,
 2291                            u_int *j, u_int lim, u_int *howmany);
 2292 
 2293 /* persistent virtual port routines */
 2294 int nm_os_vi_persist(const char *, struct ifnet **);
 2295 void nm_os_vi_detach(struct ifnet *);
 2296 void nm_os_vi_init_index(void);
 2297 
 2298 /*
 2299  * kernel thread routines
 2300  */
 2301 struct nm_kctx; /* OS-specific kernel context - opaque */
 2302 typedef void (*nm_kctx_worker_fn_t)(void *data);
 2303 
 2304 /* kthread configuration */
 2305 struct nm_kctx_cfg {
 2306         long                    type;           /* kthread type/identifier */
 2307         nm_kctx_worker_fn_t     worker_fn;      /* worker function */
 2308         void                    *worker_private;/* worker parameter */
 2309         int                     attach_user;    /* attach kthread to user process */
 2310 };
 2311 /* kthread configuration */
 2312 struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg,
 2313                                         void *opaque);
 2314 int nm_os_kctx_worker_start(struct nm_kctx *);
 2315 void nm_os_kctx_worker_stop(struct nm_kctx *);
 2316 void nm_os_kctx_destroy(struct nm_kctx *);
 2317 void nm_os_kctx_worker_setaff(struct nm_kctx *, int);
 2318 u_int nm_os_ncpus(void);
 2319 
 2320 int netmap_sync_kloop(struct netmap_priv_d *priv,
 2321                       struct nmreq_header *hdr);
 2322 int netmap_sync_kloop_stop(struct netmap_priv_d *priv);
 2323 
 2324 #ifdef WITH_PTNETMAP
 2325 /* ptnetmap guest routines */
 2326 
 2327 /*
 2328  * ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver
 2329  */
 2330 struct ptnetmap_memdev;
 2331 int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **,
 2332                           uint64_t *);
 2333 void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
 2334 uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int);
 2335 
 2336 /*
 2337  * netmap adapter for guest ptnetmap ports
 2338  */
 2339 struct netmap_pt_guest_adapter {
 2340         /* The netmap adapter to be used by netmap applications.
 2341          * This field must be the first, to allow upcast. */
 2342         struct netmap_hw_adapter hwup;
 2343 
 2344         /* The netmap adapter to be used by the driver. */
 2345         struct netmap_hw_adapter dr;
 2346 
 2347         /* Reference counter to track users of backend netmap port: the
 2348          * network stack and netmap clients.
 2349          * Used to decide when we need (de)allocate krings/rings and
 2350          * start (stop) ptnetmap kthreads. */
 2351         int backend_users;
 2352 
 2353 };
 2354 
 2355 int netmap_pt_guest_attach(struct netmap_adapter *na,
 2356                         unsigned int nifp_offset,
 2357                         unsigned int memid);
 2358 bool netmap_pt_guest_txsync(struct nm_csb_atok *atok,
 2359                         struct nm_csb_ktoa *ktoa,
 2360                         struct netmap_kring *kring, int flags);
 2361 bool netmap_pt_guest_rxsync(struct nm_csb_atok *atok,
 2362                         struct nm_csb_ktoa *ktoa,
 2363                         struct netmap_kring *kring, int flags);
 2364 int ptnet_nm_krings_create(struct netmap_adapter *na);
 2365 void ptnet_nm_krings_delete(struct netmap_adapter *na);
 2366 void ptnet_nm_dtor(struct netmap_adapter *na);
 2367 
 2368 /* Helper function wrapping nm_sync_kloop_appl_read(). */
 2369 static inline void
 2370 ptnet_sync_tail(struct nm_csb_ktoa *ktoa, struct netmap_kring *kring)
 2371 {
 2372         struct netmap_ring *ring = kring->ring;
 2373 
 2374         /* Update hwcur and hwtail as known by the host. */
 2375         nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur);
 2376 
 2377         /* nm_sync_finalize */
 2378         ring->tail = kring->rtail = kring->nr_hwtail;
 2379 }
 2380 #endif /* WITH_PTNETMAP */
 2381 
 2382 #ifdef __FreeBSD__
 2383 /*
 2384  * FreeBSD mbuf allocator/deallocator in emulation mode:
 2385  *
 2386  * We allocate mbufs with m_gethdr(), since the mbuf header is needed
 2387  * by the driver. We also attach a customly-provided external storage,
 2388  * which in this case is a netmap buffer. When calling m_extadd(), however
 2389  * we pass a NULL address, since the real address (and length) will be
 2390  * filled in by nm_os_generic_xmit_frame() right before calling
 2391  * if_transmit().
 2392  *
 2393  * The dtor function does nothing, however we need it since mb_free_ext()
 2394  * has a KASSERT(), checking that the mbuf dtor function is not NULL.
 2395  */
 2396 
 2397 static void void_mbuf_dtor(struct mbuf *m) { }
 2398 
 2399 #define SET_MBUF_DESTRUCTOR(m, fn)      do {            \
 2400         (m)->m_ext.ext_free = (fn != NULL) ?            \
 2401             (void *)fn : (void *)void_mbuf_dtor;        \
 2402 } while (0)
 2403 
 2404 static inline struct mbuf *
 2405 nm_os_get_mbuf(struct ifnet *ifp, int len)
 2406 {
 2407         struct mbuf *m;
 2408 
 2409         (void)ifp;
 2410         (void)len;
 2411 
 2412         m = m_gethdr(M_NOWAIT, MT_DATA);
 2413         if (m == NULL) {
 2414                 return m;
 2415         }
 2416 
 2417         m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor,
 2418                  NULL, NULL, 0, EXT_NET_DRV);
 2419 
 2420         return m;
 2421 }
 2422 
 2423 #endif /* __FreeBSD__ */
 2424 
 2425 struct nmreq_option * nmreq_getoption(struct nmreq_header *, uint16_t);
 2426 
 2427 int netmap_init_bridges(void);
 2428 void netmap_uninit_bridges(void);
 2429 
 2430 /* Functions to read and write CSB fields from the kernel. */
 2431 #if defined (linux)
 2432 #define CSB_READ(csb, field, r) (get_user(r, &csb->field))
 2433 #define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
 2434 #else  /* ! linux */
 2435 #define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
 2436 #define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
 2437 #endif /* ! linux */
 2438 
 2439 /* some macros that may not be defined */
 2440 #ifndef ETH_HLEN
 2441 #define ETH_HLEN 6
 2442 #endif
 2443 #ifndef ETH_FCS_LEN
 2444 #define ETH_FCS_LEN 4
 2445 #endif
 2446 #ifndef VLAN_HLEN
 2447 #define VLAN_HLEN 4
 2448 #endif
 2449 
 2450 #endif /* _NET_NETMAP_KERN_H_ */

Cache object: d8e8f6a228186fd8a4324cd9683359f1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.