The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_vxlan.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
    3  * All rights reserved.
    4  * Copyright (c) 2020, Chelsio Communications.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice unmodified, this list of conditions, and the following
   11  *    disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   26  */
   27 
   28 #include "opt_inet.h"
   29 #include "opt_inet6.h"
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include <sys/param.h>
   35 #include <sys/eventhandler.h>
   36 #include <sys/kernel.h>
   37 #include <sys/lock.h>
   38 #include <sys/hash.h>
   39 #include <sys/malloc.h>
   40 #include <sys/mbuf.h>
   41 #include <sys/module.h>
   42 #include <sys/refcount.h>
   43 #include <sys/rmlock.h>
   44 #include <sys/priv.h>
   45 #include <sys/proc.h>
   46 #include <sys/queue.h>
   47 #include <sys/sbuf.h>
   48 #include <sys/socket.h>
   49 #include <sys/socketvar.h>
   50 #include <sys/sockio.h>
   51 #include <sys/sysctl.h>
   52 #include <sys/systm.h>
   53 
   54 #include <net/bpf.h>
   55 #include <net/ethernet.h>
   56 #include <net/if.h>
   57 #include <net/if_var.h>
   58 #include <net/if_private.h>
   59 #include <net/if_clone.h>
   60 #include <net/if_dl.h>
   61 #include <net/if_media.h>
   62 #include <net/if_types.h>
   63 #include <net/if_vxlan.h>
   64 #include <net/netisr.h>
   65 #include <net/route.h>
   66 #include <net/route/nhop.h>
   67 
   68 #include <netinet/in.h>
   69 #include <netinet/in_systm.h>
   70 #include <netinet/in_var.h>
   71 #include <netinet/in_pcb.h>
   72 #include <netinet/ip.h>
   73 #include <netinet/ip6.h>
   74 #include <netinet/ip_var.h>
   75 #include <netinet/udp.h>
   76 #include <netinet/udp_var.h>
   77 #include <netinet/in_fib.h>
   78 #include <netinet6/in6_fib.h>
   79 
   80 #include <netinet6/ip6_var.h>
   81 #include <netinet6/scope6_var.h>
   82 
   83 struct vxlan_softc;
   84 LIST_HEAD(vxlan_softc_head, vxlan_softc);
   85 
   86 struct sx vxlan_sx;
   87 SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock");
   88 
   89 struct vxlan_socket_mc_info {
   90         union vxlan_sockaddr             vxlsomc_saddr;
   91         union vxlan_sockaddr             vxlsomc_gaddr;
   92         int                              vxlsomc_ifidx;
   93         int                              vxlsomc_users;
   94 };
   95 
   96 /*
   97  * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet.
   98  */
   99 #define VXLAN_MAX_MTU   (IP_MAXPACKET - \
  100                 60 /* Maximum IPv4 header len */ - \
  101                 sizeof(struct udphdr) - \
  102                 sizeof(struct vxlan_header) - \
  103                 ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN)
  104 #define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU)
  105 
  106 #define VXLAN_SO_MC_MAX_GROUPS          32
  107 
  108 #define VXLAN_SO_VNI_HASH_SHIFT         6
  109 #define VXLAN_SO_VNI_HASH_SIZE          (1 << VXLAN_SO_VNI_HASH_SHIFT)
  110 #define VXLAN_SO_VNI_HASH(_vni)         ((_vni) % VXLAN_SO_VNI_HASH_SIZE)
  111 
  112 struct vxlan_socket {
  113         struct socket                   *vxlso_sock;
  114         struct rmlock                    vxlso_lock;
  115         u_int                            vxlso_refcnt;
  116         union vxlan_sockaddr             vxlso_laddr;
  117         LIST_ENTRY(vxlan_socket)         vxlso_entry;
  118         struct vxlan_softc_head          vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
  119         struct vxlan_socket_mc_info      vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
  120 };
  121 
  122 #define VXLAN_SO_RLOCK(_vso, _p)        rm_rlock(&(_vso)->vxlso_lock, (_p))
  123 #define VXLAN_SO_RUNLOCK(_vso, _p)      rm_runlock(&(_vso)->vxlso_lock, (_p))
  124 #define VXLAN_SO_WLOCK(_vso)            rm_wlock(&(_vso)->vxlso_lock)
  125 #define VXLAN_SO_WUNLOCK(_vso)          rm_wunlock(&(_vso)->vxlso_lock)
  126 #define VXLAN_SO_LOCK_ASSERT(_vso) \
  127     rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
  128 #define VXLAN_SO_LOCK_WASSERT(_vso) \
  129     rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
  130 
  131 #define VXLAN_SO_ACQUIRE(_vso)          refcount_acquire(&(_vso)->vxlso_refcnt)
  132 #define VXLAN_SO_RELEASE(_vso)          refcount_release(&(_vso)->vxlso_refcnt)
  133 
  134 struct vxlan_ftable_entry {
  135         LIST_ENTRY(vxlan_ftable_entry)   vxlfe_hash;
  136         uint16_t                         vxlfe_flags;
  137         uint8_t                          vxlfe_mac[ETHER_ADDR_LEN];
  138         union vxlan_sockaddr             vxlfe_raddr;
  139         time_t                           vxlfe_expire;
  140 };
  141 
  142 #define VXLAN_FE_FLAG_DYNAMIC           0x01
  143 #define VXLAN_FE_FLAG_STATIC            0x02
  144 
  145 #define VXLAN_FE_IS_DYNAMIC(_fe) \
  146     ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
  147 
  148 #define VXLAN_SC_FTABLE_SHIFT           9
  149 #define VXLAN_SC_FTABLE_SIZE            (1 << VXLAN_SC_FTABLE_SHIFT)
  150 #define VXLAN_SC_FTABLE_MASK            (VXLAN_SC_FTABLE_SIZE - 1)
  151 #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \
  152     (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
  153 
  154 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
  155 
  156 struct vxlan_statistics {
  157         uint32_t        ftable_nospace;
  158         uint32_t        ftable_lock_upgrade_failed;
  159         counter_u64_t   txcsum;
  160         counter_u64_t   tso;
  161         counter_u64_t   rxcsum;
  162 };
  163 
  164 struct vxlan_softc {
  165         struct ifnet                    *vxl_ifp;
  166         int                              vxl_reqcap;
  167         u_int                            vxl_fibnum;
  168         struct vxlan_socket             *vxl_sock;
  169         uint32_t                         vxl_vni;
  170         union vxlan_sockaddr             vxl_src_addr;
  171         union vxlan_sockaddr             vxl_dst_addr;
  172         uint32_t                         vxl_flags;
  173 #define VXLAN_FLAG_INIT         0x0001
  174 #define VXLAN_FLAG_TEARDOWN     0x0002
  175 #define VXLAN_FLAG_LEARN        0x0004
  176 #define VXLAN_FLAG_USER_MTU     0x0008
  177 
  178         uint32_t                         vxl_port_hash_key;
  179         uint16_t                         vxl_min_port;
  180         uint16_t                         vxl_max_port;
  181         uint8_t                          vxl_ttl;
  182 
  183         /* Lookup table from MAC address to forwarding entry. */
  184         uint32_t                         vxl_ftable_cnt;
  185         uint32_t                         vxl_ftable_max;
  186         uint32_t                         vxl_ftable_timeout;
  187         uint32_t                         vxl_ftable_hash_key;
  188         struct vxlan_ftable_head        *vxl_ftable;
  189 
  190         /* Derived from vxl_dst_addr. */
  191         struct vxlan_ftable_entry        vxl_default_fe;
  192 
  193         struct ip_moptions              *vxl_im4o;
  194         struct ip6_moptions             *vxl_im6o;
  195 
  196         struct rmlock                    vxl_lock;
  197         volatile u_int                   vxl_refcnt;
  198 
  199         int                              vxl_unit;
  200         int                              vxl_vso_mc_index;
  201         struct vxlan_statistics          vxl_stats;
  202         struct sysctl_oid               *vxl_sysctl_node;
  203         struct sysctl_ctx_list           vxl_sysctl_ctx;
  204         struct callout                   vxl_callout;
  205         struct ether_addr                vxl_hwaddr;
  206         int                              vxl_mc_ifindex;
  207         struct ifnet                    *vxl_mc_ifp;
  208         struct ifmedia                   vxl_media;
  209         char                             vxl_mc_ifname[IFNAMSIZ];
  210         LIST_ENTRY(vxlan_softc)          vxl_entry;
  211         LIST_ENTRY(vxlan_softc)          vxl_ifdetach_list;
  212 
  213         /* For rate limiting errors on the tx fast path. */
  214         struct timeval err_time;
  215         int err_pps;
  216 };
  217 
  218 #define VXLAN_RLOCK(_sc, _p)    rm_rlock(&(_sc)->vxl_lock, (_p))
  219 #define VXLAN_RUNLOCK(_sc, _p)  rm_runlock(&(_sc)->vxl_lock, (_p))
  220 #define VXLAN_WLOCK(_sc)        rm_wlock(&(_sc)->vxl_lock)
  221 #define VXLAN_WUNLOCK(_sc)      rm_wunlock(&(_sc)->vxl_lock)
  222 #define VXLAN_LOCK_WOWNED(_sc)  rm_wowned(&(_sc)->vxl_lock)
  223 #define VXLAN_LOCK_ASSERT(_sc)  rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
  224 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
  225 #define VXLAN_UNLOCK(_sc, _p) do {              \
  226     if (VXLAN_LOCK_WOWNED(_sc))                 \
  227         VXLAN_WUNLOCK(_sc);                     \
  228     else                                        \
  229         VXLAN_RUNLOCK(_sc, _p);                 \
  230 } while (0)
  231 
  232 #define VXLAN_ACQUIRE(_sc)      refcount_acquire(&(_sc)->vxl_refcnt)
  233 #define VXLAN_RELEASE(_sc)      refcount_release(&(_sc)->vxl_refcnt)
  234 
  235 #define satoconstsin(sa)        ((const struct sockaddr_in *)(sa))
  236 #define satoconstsin6(sa)       ((const struct sockaddr_in6 *)(sa))
  237 
  238 struct vxlanudphdr {
  239         struct udphdr           vxlh_udp;
  240         struct vxlan_header     vxlh_hdr;
  241 } __packed;
  242 
  243 static int      vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
  244 static void     vxlan_ftable_init(struct vxlan_softc *);
  245 static void     vxlan_ftable_fini(struct vxlan_softc *);
  246 static void     vxlan_ftable_flush(struct vxlan_softc *, int);
  247 static void     vxlan_ftable_expire(struct vxlan_softc *);
  248 static int      vxlan_ftable_update_locked(struct vxlan_softc *,
  249                     const union vxlan_sockaddr *, const uint8_t *,
  250                     struct rm_priotracker *);
  251 static int      vxlan_ftable_learn(struct vxlan_softc *,
  252                     const struct sockaddr *, const uint8_t *);
  253 static int      vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
  254 
  255 static struct vxlan_ftable_entry *
  256                 vxlan_ftable_entry_alloc(void);
  257 static void     vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
  258 static void     vxlan_ftable_entry_init(struct vxlan_softc *,
  259                     struct vxlan_ftable_entry *, const uint8_t *,
  260                     const struct sockaddr *, uint32_t);
  261 static void     vxlan_ftable_entry_destroy(struct vxlan_softc *,
  262                     struct vxlan_ftable_entry *);
  263 static int      vxlan_ftable_entry_insert(struct vxlan_softc *,
  264                     struct vxlan_ftable_entry *);
  265 static struct vxlan_ftable_entry *
  266                 vxlan_ftable_entry_lookup(struct vxlan_softc *,
  267                     const uint8_t *);
  268 static void     vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
  269                     struct sbuf *);
  270 
  271 static struct vxlan_socket *
  272                 vxlan_socket_alloc(const union vxlan_sockaddr *);
  273 static void     vxlan_socket_destroy(struct vxlan_socket *);
  274 static void     vxlan_socket_release(struct vxlan_socket *);
  275 static struct vxlan_socket *
  276                 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
  277 static void     vxlan_socket_insert(struct vxlan_socket *);
  278 static int      vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
  279 static int      vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
  280 static int      vxlan_socket_create(struct ifnet *, int,
  281                     const union vxlan_sockaddr *, struct vxlan_socket **);
  282 static void     vxlan_socket_ifdetach(struct vxlan_socket *,
  283                     struct ifnet *, struct vxlan_softc_head *);
  284 
  285 static struct vxlan_socket *
  286                 vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
  287 static int      vxlan_sockaddr_mc_info_match(
  288                     const struct vxlan_socket_mc_info *,
  289                     const union vxlan_sockaddr *,
  290                     const union vxlan_sockaddr *, int);
  291 static int      vxlan_socket_mc_join_group(struct vxlan_socket *,
  292                     const union vxlan_sockaddr *, const union vxlan_sockaddr *,
  293                     int *, union vxlan_sockaddr *);
  294 static int      vxlan_socket_mc_leave_group(struct vxlan_socket *,
  295                     const union vxlan_sockaddr *,
  296                     const union vxlan_sockaddr *, int);
  297 static int      vxlan_socket_mc_add_group(struct vxlan_socket *,
  298                     const union vxlan_sockaddr *, const union vxlan_sockaddr *,
  299                     int, int *);
  300 static void     vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
  301                     int);
  302 
  303 static struct vxlan_softc *
  304                 vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
  305                     uint32_t);
  306 static struct vxlan_softc *
  307                 vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
  308 static int      vxlan_socket_insert_softc(struct vxlan_socket *,
  309                     struct vxlan_softc *);
  310 static void     vxlan_socket_remove_softc(struct vxlan_socket *,
  311                     struct vxlan_softc *);
  312 
  313 static struct ifnet *
  314                 vxlan_multicast_if_ref(struct vxlan_softc *, int);
  315 static void     vxlan_free_multicast(struct vxlan_softc *);
  316 static int      vxlan_setup_multicast_interface(struct vxlan_softc *);
  317 
  318 static int      vxlan_setup_multicast(struct vxlan_softc *);
  319 static int      vxlan_setup_socket(struct vxlan_softc *);
  320 #ifdef INET6
  321 static void     vxlan_setup_zero_checksum_port(struct vxlan_softc *);
  322 #endif
  323 static void     vxlan_setup_interface_hdrlen(struct vxlan_softc *);
  324 static int      vxlan_valid_init_config(struct vxlan_softc *);
  325 static void     vxlan_init_wait(struct vxlan_softc *);
  326 static void     vxlan_init_complete(struct vxlan_softc *);
  327 static void     vxlan_init(void *);
  328 static void     vxlan_release(struct vxlan_softc *);
  329 static void     vxlan_teardown_wait(struct vxlan_softc *);
  330 static void     vxlan_teardown_complete(struct vxlan_softc *);
  331 static void     vxlan_teardown_locked(struct vxlan_softc *);
  332 static void     vxlan_teardown(struct vxlan_softc *);
  333 static void     vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
  334                     struct vxlan_softc_head *);
  335 static void     vxlan_timer(void *);
  336 
  337 static int      vxlan_ctrl_get_config(struct vxlan_softc *, void *);
  338 static int      vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
  339 static int      vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
  340 static int      vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
  341 static int      vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
  342 static int      vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
  343 static int      vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
  344 static int      vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
  345 static int      vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
  346 static int      vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
  347 static int      vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
  348 static int      vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
  349 static int      vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
  350 static int      vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
  351 static int      vxlan_ctrl_flush(struct vxlan_softc *, void *);
  352 static int      vxlan_ioctl_drvspec(struct vxlan_softc *,
  353                     struct ifdrv *, int);
  354 static int      vxlan_ioctl_ifflags(struct vxlan_softc *);
  355 static int      vxlan_ioctl(struct ifnet *, u_long, caddr_t);
  356 
  357 #if defined(INET) || defined(INET6)
  358 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
  359 static void     vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
  360                     int, uint16_t, uint16_t);
  361 #endif
  362 static int      vxlan_encap4(struct vxlan_softc *,
  363                     const union vxlan_sockaddr *, struct mbuf *);
  364 static int      vxlan_encap6(struct vxlan_softc *,
  365                     const union vxlan_sockaddr *, struct mbuf *);
  366 static int      vxlan_transmit(struct ifnet *, struct mbuf *);
  367 static void     vxlan_qflush(struct ifnet *);
  368 static bool     vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
  369                     const struct sockaddr *, void *);
  370 static int      vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
  371                     const struct sockaddr *);
  372 
  373 static int      vxlan_stats_alloc(struct vxlan_softc *);
  374 static void     vxlan_stats_free(struct vxlan_softc *);
  375 static void     vxlan_set_default_config(struct vxlan_softc *);
  376 static int      vxlan_set_user_config(struct vxlan_softc *,
  377                      struct ifvxlanparam *);
  378 static int      vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int);
  379 static void     vxlan_set_hwcaps(struct vxlan_softc *);
  380 static int      vxlan_clone_create(struct if_clone *, char *, size_t,
  381                     struct ifc_data *, struct ifnet **);
  382 static int      vxlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
  383 
  384 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
  385 static int      vxlan_media_change(struct ifnet *);
  386 static void     vxlan_media_status(struct ifnet *, struct ifmediareq *);
  387 
  388 static int      vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
  389                     const struct sockaddr *);
  390 static void     vxlan_sockaddr_copy(union vxlan_sockaddr *,
  391                     const struct sockaddr *);
  392 static int      vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
  393                     const struct sockaddr *);
  394 static void     vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
  395                     const struct sockaddr *);
  396 static int      vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
  397 static int      vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
  398 static int      vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
  399 static int      vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *);
  400 
  401 static int      vxlan_can_change_config(struct vxlan_softc *);
  402 static int      vxlan_check_vni(uint32_t);
  403 static int      vxlan_check_ttl(int);
  404 static int      vxlan_check_ftable_timeout(uint32_t);
  405 static int      vxlan_check_ftable_max(uint32_t);
  406 
  407 static void     vxlan_sysctl_setup(struct vxlan_softc *);
  408 static void     vxlan_sysctl_destroy(struct vxlan_softc *);
  409 static int      vxlan_tunable_int(struct vxlan_softc *, const char *, int);
  410 
  411 static void     vxlan_ifdetach_event(void *, struct ifnet *);
  412 static void     vxlan_load(void);
  413 static void     vxlan_unload(void);
  414 static int      vxlan_modevent(module_t, int, void *);
  415 
  416 static const char vxlan_name[] = "vxlan";
  417 static MALLOC_DEFINE(M_VXLAN, vxlan_name,
  418     "Virtual eXtensible LAN Interface");
  419 static struct if_clone *vxlan_cloner;
  420 
  421 static struct mtx vxlan_list_mtx;
  422 #define VXLAN_LIST_LOCK()       mtx_lock(&vxlan_list_mtx)
  423 #define VXLAN_LIST_UNLOCK()     mtx_unlock(&vxlan_list_mtx)
  424 
  425 static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
  426 
  427 static eventhandler_tag vxlan_ifdetach_event_tag;
  428 
  429 SYSCTL_DECL(_net_link);
  430 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  431     "Virtual eXtensible Local Area Network");
  432 
  433 static int vxlan_legacy_port = 0;
  434 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
  435 static int vxlan_reuse_port = 0;
  436 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
  437 
  438 /* Default maximum number of addresses in the forwarding table. */
  439 #ifndef VXLAN_FTABLE_MAX
  440 #define VXLAN_FTABLE_MAX        2000
  441 #endif
  442 
  443 /* Timeout (in seconds) of addresses learned in the forwarding table. */
  444 #ifndef VXLAN_FTABLE_TIMEOUT
  445 #define VXLAN_FTABLE_TIMEOUT    (20 * 60)
  446 #endif
  447 
  448 /*
  449  * Maximum timeout (in seconds) of addresses learned in the forwarding
  450  * table.
  451  */
  452 #ifndef VXLAN_FTABLE_MAX_TIMEOUT
  453 #define VXLAN_FTABLE_MAX_TIMEOUT        (60 * 60 * 24)
  454 #endif
  455 
  456 /* Number of seconds between pruning attempts of the forwarding table. */
  457 #ifndef VXLAN_FTABLE_PRUNE
  458 #define VXLAN_FTABLE_PRUNE      (5 * 60)
  459 #endif
  460 
  461 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
  462 
  463 struct vxlan_control {
  464         int     (*vxlc_func)(struct vxlan_softc *, void *);
  465         int     vxlc_argsize;
  466         int     vxlc_flags;
  467 #define VXLAN_CTRL_FLAG_COPYIN  0x01
  468 #define VXLAN_CTRL_FLAG_COPYOUT 0x02
  469 #define VXLAN_CTRL_FLAG_SUSER   0x04
  470 };
  471 
  472 static const struct vxlan_control vxlan_control_table[] = {
  473         [VXLAN_CMD_GET_CONFIG] =
  474             {   vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
  475                 VXLAN_CTRL_FLAG_COPYOUT
  476             },
  477 
  478         [VXLAN_CMD_SET_VNI] =
  479             {   vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
  480                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  481             },
  482 
  483         [VXLAN_CMD_SET_LOCAL_ADDR] =
  484             {   vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
  485                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  486             },
  487 
  488         [VXLAN_CMD_SET_REMOTE_ADDR] =
  489             {   vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
  490                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  491             },
  492 
  493         [VXLAN_CMD_SET_LOCAL_PORT] =
  494             {   vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
  495                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  496             },
  497 
  498         [VXLAN_CMD_SET_REMOTE_PORT] =
  499             {   vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
  500                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  501             },
  502 
  503         [VXLAN_CMD_SET_PORT_RANGE] =
  504             {   vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
  505                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  506             },
  507 
  508         [VXLAN_CMD_SET_FTABLE_TIMEOUT] =
  509             {   vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
  510                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  511             },
  512 
  513         [VXLAN_CMD_SET_FTABLE_MAX] =
  514             {   vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
  515                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  516             },
  517 
  518         [VXLAN_CMD_SET_MULTICAST_IF] =
  519             {   vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
  520                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  521             },
  522 
  523         [VXLAN_CMD_SET_TTL] =
  524             {   vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
  525                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  526             },
  527 
  528         [VXLAN_CMD_SET_LEARN] =
  529             {   vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
  530                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  531             },
  532 
  533         [VXLAN_CMD_FTABLE_ENTRY_ADD] =
  534             {   vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
  535                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  536             },
  537 
  538         [VXLAN_CMD_FTABLE_ENTRY_REM] =
  539             {   vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
  540                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  541             },
  542 
  543         [VXLAN_CMD_FLUSH] =
  544             {   vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
  545                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  546             },
  547 };
  548 
  549 static const int vxlan_control_table_size = nitems(vxlan_control_table);
  550 
  551 static int
  552 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
  553 {
  554         int i, d;
  555 
  556         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
  557                 d = ((int)a[i]) - ((int)b[i]);
  558 
  559         return (d);
  560 }
  561 
  562 static void
  563 vxlan_ftable_init(struct vxlan_softc *sc)
  564 {
  565         int i;
  566 
  567         sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
  568             VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
  569 
  570         for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
  571                 LIST_INIT(&sc->vxl_ftable[i]);
  572         sc->vxl_ftable_hash_key = arc4random();
  573 }
  574 
  575 static void
  576 vxlan_ftable_fini(struct vxlan_softc *sc)
  577 {
  578         int i;
  579 
  580         for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
  581                 KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
  582                     ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
  583         }
  584         MPASS(sc->vxl_ftable_cnt == 0);
  585 
  586         free(sc->vxl_ftable, M_VXLAN);
  587         sc->vxl_ftable = NULL;
  588 }
  589 
  590 static void
  591 vxlan_ftable_flush(struct vxlan_softc *sc, int all)
  592 {
  593         struct vxlan_ftable_entry *fe, *tfe;
  594         int i;
  595 
  596         for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
  597                 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
  598                         if (all || VXLAN_FE_IS_DYNAMIC(fe))
  599                                 vxlan_ftable_entry_destroy(sc, fe);
  600                 }
  601         }
  602 }
  603 
  604 static void
  605 vxlan_ftable_expire(struct vxlan_softc *sc)
  606 {
  607         struct vxlan_ftable_entry *fe, *tfe;
  608         int i;
  609 
  610         VXLAN_LOCK_WASSERT(sc);
  611 
  612         for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
  613                 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
  614                         if (VXLAN_FE_IS_DYNAMIC(fe) &&
  615                             time_uptime >= fe->vxlfe_expire)
  616                                 vxlan_ftable_entry_destroy(sc, fe);
  617                 }
  618         }
  619 }
  620 
  621 static int
  622 vxlan_ftable_update_locked(struct vxlan_softc *sc,
  623     const union vxlan_sockaddr *vxlsa, const uint8_t *mac,
  624     struct rm_priotracker *tracker)
  625 {
  626         struct vxlan_ftable_entry *fe;
  627         int error __unused;
  628 
  629         VXLAN_LOCK_ASSERT(sc);
  630 
  631 again:
  632         /*
  633          * A forwarding entry for this MAC address might already exist. If
  634          * so, update it, otherwise create a new one. We may have to upgrade
  635          * the lock if we have to change or create an entry.
  636          */
  637         fe = vxlan_ftable_entry_lookup(sc, mac);
  638         if (fe != NULL) {
  639                 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
  640 
  641                 if (!VXLAN_FE_IS_DYNAMIC(fe) ||
  642                     vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa))
  643                         return (0);
  644                 if (!VXLAN_LOCK_WOWNED(sc)) {
  645                         VXLAN_RUNLOCK(sc, tracker);
  646                         VXLAN_WLOCK(sc);
  647                         sc->vxl_stats.ftable_lock_upgrade_failed++;
  648                         goto again;
  649                 }
  650                 vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa);
  651                 return (0);
  652         }
  653 
  654         if (!VXLAN_LOCK_WOWNED(sc)) {
  655                 VXLAN_RUNLOCK(sc, tracker);
  656                 VXLAN_WLOCK(sc);
  657                 sc->vxl_stats.ftable_lock_upgrade_failed++;
  658                 goto again;
  659         }
  660 
  661         if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
  662                 sc->vxl_stats.ftable_nospace++;
  663                 return (ENOSPC);
  664         }
  665 
  666         fe = vxlan_ftable_entry_alloc();
  667         if (fe == NULL)
  668                 return (ENOMEM);
  669 
  670         vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC);
  671 
  672         /* The prior lookup failed, so the insert should not. */
  673         error = vxlan_ftable_entry_insert(sc, fe);
  674         MPASS(error == 0);
  675 
  676         return (0);
  677 }
  678 
  679 static int
  680 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa,
  681     const uint8_t *mac)
  682 {
  683         struct rm_priotracker tracker;
  684         union vxlan_sockaddr vxlsa;
  685         int error;
  686 
  687         /*
  688          * The source port may be randomly selected by the remote host, so
  689          * use the port of the default destination address.
  690          */
  691         vxlan_sockaddr_copy(&vxlsa, sa);
  692         vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
  693 
  694         if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
  695                 error = vxlan_sockaddr_in6_embedscope(&vxlsa);
  696                 if (error)
  697                         return (error);
  698         }
  699 
  700         VXLAN_RLOCK(sc, &tracker);
  701         error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker);
  702         VXLAN_UNLOCK(sc, &tracker);
  703 
  704         return (error);
  705 }
  706 
  707 static int
  708 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
  709 {
  710         struct rm_priotracker tracker;
  711         struct sbuf sb;
  712         struct vxlan_softc *sc;
  713         struct vxlan_ftable_entry *fe;
  714         size_t size;
  715         int i, error;
  716 
  717         /*
  718          * This is mostly intended for debugging during development. It is
  719          * not practical to dump an entire large table this way.
  720          */
  721 
  722         sc = arg1;
  723         size = PAGE_SIZE;       /* Calculate later. */
  724 
  725         sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
  726         sbuf_putc(&sb, '\n');
  727 
  728         VXLAN_RLOCK(sc, &tracker);
  729         for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
  730                 LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
  731                         if (sbuf_error(&sb) != 0)
  732                                 break;
  733                         vxlan_ftable_entry_dump(fe, &sb);
  734                 }
  735         }
  736         VXLAN_RUNLOCK(sc, &tracker);
  737 
  738         if (sbuf_len(&sb) == 1)
  739                 sbuf_setpos(&sb, 0);
  740 
  741         sbuf_finish(&sb);
  742         error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
  743         sbuf_delete(&sb);
  744 
  745         return (error);
  746 }
  747 
  748 static struct vxlan_ftable_entry *
  749 vxlan_ftable_entry_alloc(void)
  750 {
  751         struct vxlan_ftable_entry *fe;
  752 
  753         fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
  754 
  755         return (fe);
  756 }
  757 
  758 static void
  759 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
  760 {
  761 
  762         free(fe, M_VXLAN);
  763 }
  764 
  765 static void
  766 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
  767     const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
  768 {
  769 
  770         fe->vxlfe_flags = flags;
  771         fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
  772         memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
  773         vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
  774 }
  775 
  776 static void
  777 vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
  778     struct vxlan_ftable_entry *fe)
  779 {
  780 
  781         sc->vxl_ftable_cnt--;
  782         LIST_REMOVE(fe, vxlfe_hash);
  783         vxlan_ftable_entry_free(fe);
  784 }
  785 
  786 static int
  787 vxlan_ftable_entry_insert(struct vxlan_softc *sc,
  788     struct vxlan_ftable_entry *fe)
  789 {
  790         struct vxlan_ftable_entry *lfe;
  791         uint32_t hash;
  792         int dir;
  793 
  794         VXLAN_LOCK_WASSERT(sc);
  795         hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
  796 
  797         lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
  798         if (lfe == NULL) {
  799                 LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
  800                 goto out;
  801         }
  802 
  803         do {
  804                 dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
  805                 if (dir == 0)
  806                         return (EEXIST);
  807                 if (dir > 0) {
  808                         LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
  809                         goto out;
  810                 } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
  811                         LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
  812                         goto out;
  813                 } else
  814                         lfe = LIST_NEXT(lfe, vxlfe_hash);
  815         } while (lfe != NULL);
  816 
  817 out:
  818         sc->vxl_ftable_cnt++;
  819 
  820         return (0);
  821 }
  822 
  823 static struct vxlan_ftable_entry *
  824 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
  825 {
  826         struct vxlan_ftable_entry *fe;
  827         uint32_t hash;
  828         int dir;
  829 
  830         VXLAN_LOCK_ASSERT(sc);
  831         hash = VXLAN_SC_FTABLE_HASH(sc, mac);
  832 
  833         LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
  834                 dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac);
  835                 if (dir == 0)
  836                         return (fe);
  837                 if (dir > 0)
  838                         break;
  839         }
  840 
  841         return (NULL);
  842 }
  843 
  844 static void
  845 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
  846 {
  847         char buf[64];
  848         const union vxlan_sockaddr *sa;
  849         const void *addr;
  850         int i, len, af, width;
  851 
  852         sa = &fe->vxlfe_raddr;
  853         af = sa->sa.sa_family;
  854         len = sbuf_len(sb);
  855 
  856         sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
  857             fe->vxlfe_flags);
  858 
  859         for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
  860                 sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
  861         sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
  862 
  863         if (af == AF_INET) {
  864                 addr = &sa->in4.sin_addr;
  865                 width = INET_ADDRSTRLEN - 1;
  866         } else {
  867                 addr = &sa->in6.sin6_addr;
  868                 width = INET6_ADDRSTRLEN - 1;
  869         }
  870         inet_ntop(af, addr, buf, sizeof(buf));
  871         sbuf_printf(sb, "%*s ", width, buf);
  872 
  873         sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
  874 
  875         sbuf_putc(sb, '\n');
  876 
  877         /* Truncate a partial line. */
  878         if (sbuf_error(sb) != 0)
  879                 sbuf_setpos(sb, len);
  880 }
  881 
  882 static struct vxlan_socket *
  883 vxlan_socket_alloc(const union vxlan_sockaddr *sa)
  884 {
  885         struct vxlan_socket *vso;
  886         int i;
  887 
  888         vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
  889         rm_init(&vso->vxlso_lock, "vxlansorm");
  890         refcount_init(&vso->vxlso_refcnt, 0);
  891         for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
  892                 LIST_INIT(&vso->vxlso_vni_hash[i]);
  893         vso->vxlso_laddr = *sa;
  894 
  895         return (vso);
  896 }
  897 
  898 static void
  899 vxlan_socket_destroy(struct vxlan_socket *vso)
  900 {
  901         struct socket *so;
  902 #ifdef INVARIANTS
  903         int i;
  904         struct vxlan_socket_mc_info *mc;
  905 
  906         for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
  907                 mc = &vso->vxlso_mc[i];
  908                 KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
  909                     ("%s: socket %p mc[%d] still has address",
  910                      __func__, vso, i));
  911         }
  912 
  913         for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
  914                 KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
  915                     ("%s: socket %p vni_hash[%d] not empty",
  916                      __func__, vso, i));
  917         }
  918 #endif
  919         so = vso->vxlso_sock;
  920         if (so != NULL) {
  921                 vso->vxlso_sock = NULL;
  922                 soclose(so);
  923         }
  924 
  925         rm_destroy(&vso->vxlso_lock);
  926         free(vso, M_VXLAN);
  927 }
  928 
  929 static void
  930 vxlan_socket_release(struct vxlan_socket *vso)
  931 {
  932         int destroy;
  933 
  934         VXLAN_LIST_LOCK();
  935         destroy = VXLAN_SO_RELEASE(vso);
  936         if (destroy != 0)
  937                 LIST_REMOVE(vso, vxlso_entry);
  938         VXLAN_LIST_UNLOCK();
  939 
  940         if (destroy != 0)
  941                 vxlan_socket_destroy(vso);
  942 }
  943 
  944 static struct vxlan_socket *
  945 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
  946 {
  947         struct vxlan_socket *vso;
  948 
  949         VXLAN_LIST_LOCK();
  950         LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
  951                 if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
  952                         VXLAN_SO_ACQUIRE(vso);
  953                         break;
  954                 }
  955         }
  956         VXLAN_LIST_UNLOCK();
  957 
  958         return (vso);
  959 }
  960 
  961 static void
  962 vxlan_socket_insert(struct vxlan_socket *vso)
  963 {
  964 
  965         VXLAN_LIST_LOCK();
  966         VXLAN_SO_ACQUIRE(vso);
  967         LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
  968         VXLAN_LIST_UNLOCK();
  969 }
  970 
  971 static int
  972 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
  973 {
  974         struct thread *td;
  975         int error;
  976 
  977         td = curthread;
  978 
  979         error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
  980             SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
  981         if (error) {
  982                 if_printf(ifp, "cannot create socket: %d\n", error);
  983                 return (error);
  984         }
  985 
  986         error = udp_set_kernel_tunneling(vso->vxlso_sock,
  987             vxlan_rcv_udp_packet, NULL, vso);
  988         if (error) {
  989                 if_printf(ifp, "cannot set tunneling function: %d\n", error);
  990                 return (error);
  991         }
  992 
  993         if (vxlan_reuse_port != 0) {
  994                 struct sockopt sopt;
  995                 int val = 1;
  996 
  997                 bzero(&sopt, sizeof(sopt));
  998                 sopt.sopt_dir = SOPT_SET;
  999                 sopt.sopt_level = IPPROTO_IP;
 1000                 sopt.sopt_name = SO_REUSEPORT;
 1001                 sopt.sopt_val = &val;
 1002                 sopt.sopt_valsize = sizeof(val);
 1003                 error = sosetopt(vso->vxlso_sock, &sopt);
 1004                 if (error) {
 1005                         if_printf(ifp,
 1006                             "cannot set REUSEADDR socket opt: %d\n", error);
 1007                         return (error);
 1008                 }
 1009         }
 1010 
 1011         return (0);
 1012 }
 1013 
 1014 static int
 1015 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
 1016 {
 1017         union vxlan_sockaddr laddr;
 1018         struct thread *td;
 1019         int error;
 1020 
 1021         td = curthread;
 1022         laddr = vso->vxlso_laddr;
 1023 
 1024         error = sobind(vso->vxlso_sock, &laddr.sa, td);
 1025         if (error) {
 1026                 if (error != EADDRINUSE)
 1027                         if_printf(ifp, "cannot bind socket: %d\n", error);
 1028                 return (error);
 1029         }
 1030 
 1031         return (0);
 1032 }
 1033 
 1034 static int
 1035 vxlan_socket_create(struct ifnet *ifp, int multicast,
 1036     const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
 1037 {
 1038         union vxlan_sockaddr laddr;
 1039         struct vxlan_socket *vso;
 1040         int error;
 1041 
 1042         laddr = *saddr;
 1043 
 1044         /*
 1045          * If this socket will be multicast, then only the local port
 1046          * must be specified when binding.
 1047          */
 1048         if (multicast != 0) {
 1049                 if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
 1050                         laddr.in4.sin_addr.s_addr = INADDR_ANY;
 1051 #ifdef INET6
 1052                 else
 1053                         laddr.in6.sin6_addr = in6addr_any;
 1054 #endif
 1055         }
 1056 
 1057         vso = vxlan_socket_alloc(&laddr);
 1058         if (vso == NULL)
 1059                 return (ENOMEM);
 1060 
 1061         error = vxlan_socket_init(vso, ifp);
 1062         if (error)
 1063                 goto fail;
 1064 
 1065         error = vxlan_socket_bind(vso, ifp);
 1066         if (error)
 1067                 goto fail;
 1068 
 1069         /*
 1070          * There is a small window between the bind completing and
 1071          * inserting the socket, so that a concurrent create may fail.
 1072          * Let's not worry about that for now.
 1073          */
 1074         vxlan_socket_insert(vso);
 1075         *vsop = vso;
 1076 
 1077         return (0);
 1078 
 1079 fail:
 1080         vxlan_socket_destroy(vso);
 1081 
 1082         return (error);
 1083 }
 1084 
 1085 static void
 1086 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
 1087     struct vxlan_softc_head *list)
 1088 {
 1089         struct rm_priotracker tracker;
 1090         struct vxlan_softc *sc;
 1091         int i;
 1092 
 1093         VXLAN_SO_RLOCK(vso, &tracker);
 1094         for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
 1095                 LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
 1096                         vxlan_ifdetach(sc, ifp, list);
 1097         }
 1098         VXLAN_SO_RUNLOCK(vso, &tracker);
 1099 }
 1100 
 1101 static struct vxlan_socket *
 1102 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
 1103 {
 1104         union vxlan_sockaddr laddr;
 1105         struct vxlan_socket *vso;
 1106 
 1107         laddr = *vxlsa;
 1108 
 1109         if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
 1110                 laddr.in4.sin_addr.s_addr = INADDR_ANY;
 1111 #ifdef INET6
 1112         else
 1113                 laddr.in6.sin6_addr = in6addr_any;
 1114 #endif
 1115 
 1116         vso = vxlan_socket_lookup(&laddr);
 1117 
 1118         return (vso);
 1119 }
 1120 
 1121 static int
 1122 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
 1123     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
 1124     int ifidx)
 1125 {
 1126 
 1127         if (!vxlan_sockaddr_in_any(local) &&
 1128             !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
 1129                 return (0);
 1130         if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
 1131                 return (0);
 1132         if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
 1133                 return (0);
 1134 
 1135         return (1);
 1136 }
 1137 
 1138 static int
 1139 vxlan_socket_mc_join_group(struct vxlan_socket *vso,
 1140     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
 1141     int *ifidx, union vxlan_sockaddr *source)
 1142 {
 1143         struct sockopt sopt;
 1144         int error;
 1145 
 1146         *source = *local;
 1147 
 1148         if (VXLAN_SOCKADDR_IS_IPV4(group)) {
 1149                 struct ip_mreq mreq;
 1150 
 1151                 mreq.imr_multiaddr = group->in4.sin_addr;
 1152                 mreq.imr_interface = local->in4.sin_addr;
 1153 
 1154                 bzero(&sopt, sizeof(sopt));
 1155                 sopt.sopt_dir = SOPT_SET;
 1156                 sopt.sopt_level = IPPROTO_IP;
 1157                 sopt.sopt_name = IP_ADD_MEMBERSHIP;
 1158                 sopt.sopt_val = &mreq;
 1159                 sopt.sopt_valsize = sizeof(mreq);
 1160                 error = sosetopt(vso->vxlso_sock, &sopt);
 1161                 if (error)
 1162                         return (error);
 1163 
 1164                 /*
 1165                  * BMV: Ideally, there would be a formal way for us to get
 1166                  * the local interface that was selected based on the
 1167                  * imr_interface address. We could then update *ifidx so
 1168                  * vxlan_sockaddr_mc_info_match() would return a match for
 1169                  * later creates that explicitly set the multicast interface.
 1170                  *
 1171                  * If we really need to, we can of course look in the INP's
 1172                  * membership list:
 1173                  *     sotoinpcb(vso->vxlso_sock)->inp_moptions->
 1174                  *         imo_head[]->imf_inm->inm_ifp
 1175                  * similarly to imo_match_group().
 1176                  */
 1177                 source->in4.sin_addr = local->in4.sin_addr;
 1178 
 1179         } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
 1180                 struct ipv6_mreq mreq;
 1181 
 1182                 mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
 1183                 mreq.ipv6mr_interface = *ifidx;
 1184 
 1185                 bzero(&sopt, sizeof(sopt));
 1186                 sopt.sopt_dir = SOPT_SET;
 1187                 sopt.sopt_level = IPPROTO_IPV6;
 1188                 sopt.sopt_name = IPV6_JOIN_GROUP;
 1189                 sopt.sopt_val = &mreq;
 1190                 sopt.sopt_valsize = sizeof(mreq);
 1191                 error = sosetopt(vso->vxlso_sock, &sopt);
 1192                 if (error)
 1193                         return (error);
 1194 
 1195                 /*
 1196                  * BMV: As with IPv4, we would really like to know what
 1197                  * interface in6p_lookup_mcast_ifp() selected.
 1198                  */
 1199         } else
 1200                 error = EAFNOSUPPORT;
 1201 
 1202         return (error);
 1203 }
 1204 
 1205 static int
 1206 vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
 1207     const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
 1208     int ifidx)
 1209 {
 1210         struct sockopt sopt;
 1211         int error;
 1212 
 1213         bzero(&sopt, sizeof(sopt));
 1214         sopt.sopt_dir = SOPT_SET;
 1215 
 1216         if (VXLAN_SOCKADDR_IS_IPV4(group)) {
 1217                 struct ip_mreq mreq;
 1218 
 1219                 mreq.imr_multiaddr = group->in4.sin_addr;
 1220                 mreq.imr_interface = source->in4.sin_addr;
 1221 
 1222                 sopt.sopt_level = IPPROTO_IP;
 1223                 sopt.sopt_name = IP_DROP_MEMBERSHIP;
 1224                 sopt.sopt_val = &mreq;
 1225                 sopt.sopt_valsize = sizeof(mreq);
 1226                 error = sosetopt(vso->vxlso_sock, &sopt);
 1227 
 1228         } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
 1229                 struct ipv6_mreq mreq;
 1230 
 1231                 mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
 1232                 mreq.ipv6mr_interface = ifidx;
 1233 
 1234                 sopt.sopt_level = IPPROTO_IPV6;
 1235                 sopt.sopt_name = IPV6_LEAVE_GROUP;
 1236                 sopt.sopt_val = &mreq;
 1237                 sopt.sopt_valsize = sizeof(mreq);
 1238                 error = sosetopt(vso->vxlso_sock, &sopt);
 1239 
 1240         } else
 1241                 error = EAFNOSUPPORT;
 1242 
 1243         return (error);
 1244 }
 1245 
 1246 static int
 1247 vxlan_socket_mc_add_group(struct vxlan_socket *vso,
 1248     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
 1249     int ifidx, int *idx)
 1250 {
 1251         union vxlan_sockaddr source;
 1252         struct vxlan_socket_mc_info *mc;
 1253         int i, empty, error;
 1254 
 1255         /*
 1256          * Within a socket, the same multicast group may be used by multiple
 1257          * interfaces, each with a different network identifier. But a socket
 1258          * may only join a multicast group once, so keep track of the users
 1259          * here.
 1260          */
 1261 
 1262         VXLAN_SO_WLOCK(vso);
 1263         for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
 1264                 mc = &vso->vxlso_mc[i];
 1265 
 1266                 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
 1267                         empty++;
 1268                         continue;
 1269                 }
 1270 
 1271                 if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
 1272                         goto out;
 1273         }
 1274         VXLAN_SO_WUNLOCK(vso);
 1275 
 1276         if (empty == 0)
 1277                 return (ENOSPC);
 1278 
 1279         error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
 1280         if (error)
 1281                 return (error);
 1282 
 1283         VXLAN_SO_WLOCK(vso);
 1284         for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
 1285                 mc = &vso->vxlso_mc[i];
 1286 
 1287                 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
 1288                         vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
 1289                         vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
 1290                         mc->vxlsomc_ifidx = ifidx;
 1291                         goto out;
 1292                 }
 1293         }
 1294         VXLAN_SO_WUNLOCK(vso);
 1295 
 1296         error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
 1297         MPASS(error == 0);
 1298 
 1299         return (ENOSPC);
 1300 
 1301 out:
 1302         mc->vxlsomc_users++;
 1303         VXLAN_SO_WUNLOCK(vso);
 1304 
 1305         *idx = i;
 1306 
 1307         return (0);
 1308 }
 1309 
 1310 static void
 1311 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
 1312 {
 1313         union vxlan_sockaddr group, source;
 1314         struct vxlan_socket_mc_info *mc;
 1315         int ifidx, leave;
 1316 
 1317         KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
 1318             ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
 1319 
 1320         leave = 0;
 1321         mc = &vso->vxlso_mc[idx];
 1322 
 1323         VXLAN_SO_WLOCK(vso);
 1324         mc->vxlsomc_users--;
 1325         if (mc->vxlsomc_users == 0) {
 1326                 group = mc->vxlsomc_gaddr;
 1327                 source = mc->vxlsomc_saddr;
 1328                 ifidx = mc->vxlsomc_ifidx;
 1329                 bzero(mc, sizeof(*mc));
 1330                 leave = 1;
 1331         }
 1332         VXLAN_SO_WUNLOCK(vso);
 1333 
 1334         if (leave != 0) {
 1335                 /*
 1336                  * Our socket's membership in this group may have already
 1337                  * been removed if we joined through an interface that's
 1338                  * been detached.
 1339                  */
 1340                 vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
 1341         }
 1342 }
 1343 
 1344 static struct vxlan_softc *
 1345 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
 1346 {
 1347         struct vxlan_softc *sc;
 1348         uint32_t hash;
 1349 
 1350         VXLAN_SO_LOCK_ASSERT(vso);
 1351         hash = VXLAN_SO_VNI_HASH(vni);
 1352 
 1353         LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
 1354                 if (sc->vxl_vni == vni) {
 1355                         VXLAN_ACQUIRE(sc);
 1356                         break;
 1357                 }
 1358         }
 1359 
 1360         return (sc);
 1361 }
 1362 
 1363 static struct vxlan_softc *
 1364 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
 1365 {
 1366         struct rm_priotracker tracker;
 1367         struct vxlan_softc *sc;
 1368 
 1369         VXLAN_SO_RLOCK(vso, &tracker);
 1370         sc = vxlan_socket_lookup_softc_locked(vso, vni);
 1371         VXLAN_SO_RUNLOCK(vso, &tracker);
 1372 
 1373         return (sc);
 1374 }
 1375 
 1376 static int
 1377 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
 1378 {
 1379         struct vxlan_softc *tsc;
 1380         uint32_t vni, hash;
 1381 
 1382         vni = sc->vxl_vni;
 1383         hash = VXLAN_SO_VNI_HASH(vni);
 1384 
 1385         VXLAN_SO_WLOCK(vso);
 1386         tsc = vxlan_socket_lookup_softc_locked(vso, vni);
 1387         if (tsc != NULL) {
 1388                 VXLAN_SO_WUNLOCK(vso);
 1389                 vxlan_release(tsc);
 1390                 return (EEXIST);
 1391         }
 1392 
 1393         VXLAN_ACQUIRE(sc);
 1394         LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
 1395         VXLAN_SO_WUNLOCK(vso);
 1396 
 1397         return (0);
 1398 }
 1399 
 1400 static void
 1401 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
 1402 {
 1403 
 1404         VXLAN_SO_WLOCK(vso);
 1405         LIST_REMOVE(sc, vxl_entry);
 1406         VXLAN_SO_WUNLOCK(vso);
 1407 
 1408         vxlan_release(sc);
 1409 }
 1410 
 1411 static struct ifnet *
 1412 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
 1413 {
 1414         struct ifnet *ifp;
 1415 
 1416         VXLAN_LOCK_ASSERT(sc);
 1417 
 1418         if (ipv4 && sc->vxl_im4o != NULL)
 1419                 ifp = sc->vxl_im4o->imo_multicast_ifp;
 1420         else if (!ipv4 && sc->vxl_im6o != NULL)
 1421                 ifp = sc->vxl_im6o->im6o_multicast_ifp;
 1422         else
 1423                 ifp = NULL;
 1424 
 1425         if (ifp != NULL)
 1426                 if_ref(ifp);
 1427 
 1428         return (ifp);
 1429 }
 1430 
 1431 static void
 1432 vxlan_free_multicast(struct vxlan_softc *sc)
 1433 {
 1434 
 1435         if (sc->vxl_mc_ifp != NULL) {
 1436                 if_rele(sc->vxl_mc_ifp);
 1437                 sc->vxl_mc_ifp = NULL;
 1438                 sc->vxl_mc_ifindex = 0;
 1439         }
 1440 
 1441         if (sc->vxl_im4o != NULL) {
 1442                 free(sc->vxl_im4o, M_VXLAN);
 1443                 sc->vxl_im4o = NULL;
 1444         }
 1445 
 1446         if (sc->vxl_im6o != NULL) {
 1447                 free(sc->vxl_im6o, M_VXLAN);
 1448                 sc->vxl_im6o = NULL;
 1449         }
 1450 }
 1451 
 1452 static int
 1453 vxlan_setup_multicast_interface(struct vxlan_softc *sc)
 1454 {
 1455         struct ifnet *ifp;
 1456 
 1457         ifp = ifunit_ref(sc->vxl_mc_ifname);
 1458         if (ifp == NULL) {
 1459                 if_printf(sc->vxl_ifp, "multicast interface %s does "
 1460                     "not exist\n", sc->vxl_mc_ifname);
 1461                 return (ENOENT);
 1462         }
 1463 
 1464         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 1465                 if_printf(sc->vxl_ifp, "interface %s does not support "
 1466                      "multicast\n", sc->vxl_mc_ifname);
 1467                 if_rele(ifp);
 1468                 return (ENOTSUP);
 1469         }
 1470 
 1471         sc->vxl_mc_ifp = ifp;
 1472         sc->vxl_mc_ifindex = ifp->if_index;
 1473 
 1474         return (0);
 1475 }
 1476 
 1477 static int
 1478 vxlan_setup_multicast(struct vxlan_softc *sc)
 1479 {
 1480         const union vxlan_sockaddr *group;
 1481         int error;
 1482 
 1483         group = &sc->vxl_dst_addr;
 1484         error = 0;
 1485 
 1486         if (sc->vxl_mc_ifname[0] != '\0') {
 1487                 error = vxlan_setup_multicast_interface(sc);
 1488                 if (error)
 1489                         return (error);
 1490         }
 1491 
 1492         /*
 1493          * Initialize an multicast options structure that is sufficiently
 1494          * populated for use in the respective IP output routine. This
 1495          * structure is typically stored in the socket, but our sockets
 1496          * may be shared among multiple interfaces.
 1497          */
 1498         if (VXLAN_SOCKADDR_IS_IPV4(group)) {
 1499                 sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
 1500                     M_ZERO | M_WAITOK);
 1501                 sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
 1502                 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
 1503                 sc->vxl_im4o->imo_multicast_vif = -1;
 1504         } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
 1505                 sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
 1506                     M_ZERO | M_WAITOK);
 1507                 sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
 1508                 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
 1509         }
 1510 
 1511         return (error);
 1512 }
 1513 
 1514 static int
 1515 vxlan_setup_socket(struct vxlan_softc *sc)
 1516 {
 1517         struct vxlan_socket *vso;
 1518         struct ifnet *ifp;
 1519         union vxlan_sockaddr *saddr, *daddr;
 1520         int multicast, error;
 1521 
 1522         vso = NULL;
 1523         ifp = sc->vxl_ifp;
 1524         saddr = &sc->vxl_src_addr;
 1525         daddr = &sc->vxl_dst_addr;
 1526 
 1527         multicast = vxlan_sockaddr_in_multicast(daddr);
 1528         MPASS(multicast != -1);
 1529         sc->vxl_vso_mc_index = -1;
 1530 
 1531         /*
 1532          * Try to create the socket. If that fails, attempt to use an
 1533          * existing socket.
 1534          */
 1535         error = vxlan_socket_create(ifp, multicast, saddr, &vso);
 1536         if (error) {
 1537                 if (multicast != 0)
 1538                         vso = vxlan_socket_mc_lookup(saddr);
 1539                 else
 1540                         vso = vxlan_socket_lookup(saddr);
 1541 
 1542                 if (vso == NULL) {
 1543                         if_printf(ifp, "cannot create socket (error: %d), "
 1544                             "and no existing socket found\n", error);
 1545                         goto out;
 1546                 }
 1547         }
 1548 
 1549         if (multicast != 0) {
 1550                 error = vxlan_setup_multicast(sc);
 1551                 if (error)
 1552                         goto out;
 1553 
 1554                 error = vxlan_socket_mc_add_group(vso, daddr, saddr,
 1555                     sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
 1556                 if (error)
 1557                         goto out;
 1558         }
 1559 
 1560         sc->vxl_sock = vso;
 1561         error = vxlan_socket_insert_softc(vso, sc);
 1562         if (error) {
 1563                 sc->vxl_sock = NULL;
 1564                 if_printf(ifp, "network identifier %d already exists in "
 1565                     "this socket\n", sc->vxl_vni);
 1566                 goto out;
 1567         }
 1568 
 1569         return (0);
 1570 
 1571 out:
 1572         if (vso != NULL) {
 1573                 if (sc->vxl_vso_mc_index != -1) {
 1574                         vxlan_socket_mc_release_group_by_idx(vso,
 1575                             sc->vxl_vso_mc_index);
 1576                         sc->vxl_vso_mc_index = -1;
 1577                 }
 1578                 if (multicast != 0)
 1579                         vxlan_free_multicast(sc);
 1580                 vxlan_socket_release(vso);
 1581         }
 1582 
 1583         return (error);
 1584 }
 1585 
 1586 #ifdef INET6
 1587 static void
 1588 vxlan_setup_zero_checksum_port(struct vxlan_softc *sc)
 1589 {
 1590 
 1591         if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr))
 1592                 return;
 1593 
 1594         MPASS(sc->vxl_src_addr.in6.sin6_port != 0);
 1595         MPASS(sc->vxl_dst_addr.in6.sin6_port != 0);
 1596 
 1597         if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) {
 1598                 if_printf(sc->vxl_ifp, "port %d in src address does not match "
 1599                     "port %d in dst address, rfc6935_port (%d) not updated.\n",
 1600                     ntohs(sc->vxl_src_addr.in6.sin6_port),
 1601                     ntohs(sc->vxl_dst_addr.in6.sin6_port),
 1602                     V_zero_checksum_port);
 1603                 return;
 1604         }
 1605 
 1606         if (V_zero_checksum_port != 0) {
 1607                 if (V_zero_checksum_port !=
 1608                     ntohs(sc->vxl_src_addr.in6.sin6_port)) {
 1609                         if_printf(sc->vxl_ifp, "rfc6935_port is already set to "
 1610                             "%d, cannot set it to %d.\n", V_zero_checksum_port,
 1611                             ntohs(sc->vxl_src_addr.in6.sin6_port));
 1612                 }
 1613                 return;
 1614         }
 1615 
 1616         V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port);
 1617         if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n",
 1618             V_zero_checksum_port);
 1619 }
 1620 #endif
 1621 
 1622 static void
 1623 vxlan_setup_interface_hdrlen(struct vxlan_softc *sc)
 1624 {
 1625         struct ifnet *ifp;
 1626 
 1627         VXLAN_LOCK_WASSERT(sc);
 1628 
 1629         ifp = sc->vxl_ifp;
 1630         ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
 1631 
 1632         if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
 1633                 ifp->if_hdrlen += sizeof(struct ip);
 1634         else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
 1635                 ifp->if_hdrlen += sizeof(struct ip6_hdr);
 1636 
 1637         if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0)
 1638                 ifp->if_mtu = ETHERMTU - ifp->if_hdrlen;
 1639 }
 1640 
 1641 static int
 1642 vxlan_valid_init_config(struct vxlan_softc *sc)
 1643 {
 1644         const char *reason;
 1645 
 1646         if (vxlan_check_vni(sc->vxl_vni) != 0) {
 1647                 reason = "invalid virtual network identifier specified";
 1648                 goto fail;
 1649         }
 1650 
 1651         if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
 1652                 reason = "source address type is not supported";
 1653                 goto fail;
 1654         }
 1655 
 1656         if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
 1657                 reason = "destination address type is not supported";
 1658                 goto fail;
 1659         }
 1660 
 1661         if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
 1662                 reason = "no valid destination address specified";
 1663                 goto fail;
 1664         }
 1665 
 1666         if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
 1667             sc->vxl_mc_ifname[0] != '\0') {
 1668                 reason = "can only specify interface with a group address";
 1669                 goto fail;
 1670         }
 1671 
 1672         if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
 1673                 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
 1674                     VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
 1675                         reason = "source and destination address must both "
 1676                             "be either IPv4 or IPv6";
 1677                         goto fail;
 1678                 }
 1679         }
 1680 
 1681         if (sc->vxl_src_addr.in4.sin_port == 0) {
 1682                 reason = "local port not specified";
 1683                 goto fail;
 1684         }
 1685 
 1686         if (sc->vxl_dst_addr.in4.sin_port == 0) {
 1687                 reason = "remote port not specified";
 1688                 goto fail;
 1689         }
 1690 
 1691         return (0);
 1692 
 1693 fail:
 1694         if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
 1695         return (EINVAL);
 1696 }
 1697 
 1698 static void
 1699 vxlan_init_wait(struct vxlan_softc *sc)
 1700 {
 1701 
 1702         VXLAN_LOCK_WASSERT(sc);
 1703         while (sc->vxl_flags & VXLAN_FLAG_INIT)
 1704                 rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
 1705 }
 1706 
 1707 static void
 1708 vxlan_init_complete(struct vxlan_softc *sc)
 1709 {
 1710 
 1711         VXLAN_WLOCK(sc);
 1712         sc->vxl_flags &= ~VXLAN_FLAG_INIT;
 1713         wakeup(sc);
 1714         VXLAN_WUNLOCK(sc);
 1715 }
 1716 
 1717 static void
 1718 vxlan_init(void *xsc)
 1719 {
 1720         static const uint8_t empty_mac[ETHER_ADDR_LEN];
 1721         struct vxlan_softc *sc;
 1722         struct ifnet *ifp;
 1723 
 1724         sc = xsc;
 1725         ifp = sc->vxl_ifp;
 1726 
 1727         sx_xlock(&vxlan_sx);
 1728         VXLAN_WLOCK(sc);
 1729         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 1730                 VXLAN_WUNLOCK(sc);
 1731                 sx_xunlock(&vxlan_sx);
 1732                 return;
 1733         }
 1734         sc->vxl_flags |= VXLAN_FLAG_INIT;
 1735         VXLAN_WUNLOCK(sc);
 1736 
 1737         if (vxlan_valid_init_config(sc) != 0)
 1738                 goto out;
 1739 
 1740         if (vxlan_setup_socket(sc) != 0)
 1741                 goto out;
 1742 
 1743 #ifdef INET6
 1744         vxlan_setup_zero_checksum_port(sc);
 1745 #endif
 1746 
 1747         /* Initialize the default forwarding entry. */
 1748         vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
 1749             &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
 1750 
 1751         VXLAN_WLOCK(sc);
 1752         ifp->if_drv_flags |= IFF_DRV_RUNNING;
 1753         callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
 1754             vxlan_timer, sc);
 1755         VXLAN_WUNLOCK(sc);
 1756 
 1757         if_link_state_change(ifp, LINK_STATE_UP);
 1758 
 1759         EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family,
 1760             ntohs(sc->vxl_src_addr.in4.sin_port));
 1761 out:
 1762         vxlan_init_complete(sc);
 1763         sx_xunlock(&vxlan_sx);
 1764 }
 1765 
 1766 static void
 1767 vxlan_release(struct vxlan_softc *sc)
 1768 {
 1769 
 1770         /*
 1771          * The softc may be destroyed as soon as we release our reference,
 1772          * so we cannot serialize the wakeup with the softc lock. We use a
 1773          * timeout in our sleeps so a missed wakeup is unfortunate but not
 1774          * fatal.
 1775          */
 1776         if (VXLAN_RELEASE(sc) != 0)
 1777                 wakeup(sc);
 1778 }
 1779 
 1780 static void
 1781 vxlan_teardown_wait(struct vxlan_softc *sc)
 1782 {
 1783 
 1784         VXLAN_LOCK_WASSERT(sc);
 1785         while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
 1786                 rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
 1787 }
 1788 
 1789 static void
 1790 vxlan_teardown_complete(struct vxlan_softc *sc)
 1791 {
 1792 
 1793         VXLAN_WLOCK(sc);
 1794         sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
 1795         wakeup(sc);
 1796         VXLAN_WUNLOCK(sc);
 1797 }
 1798 
 1799 static void
 1800 vxlan_teardown_locked(struct vxlan_softc *sc)
 1801 {
 1802         struct ifnet *ifp;
 1803         struct vxlan_socket *vso;
 1804 
 1805         sx_assert(&vxlan_sx, SA_XLOCKED);
 1806         VXLAN_LOCK_WASSERT(sc);
 1807         MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
 1808 
 1809         ifp = sc->vxl_ifp;
 1810         ifp->if_flags &= ~IFF_UP;
 1811         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 1812         callout_stop(&sc->vxl_callout);
 1813         vso = sc->vxl_sock;
 1814         sc->vxl_sock = NULL;
 1815 
 1816         VXLAN_WUNLOCK(sc);
 1817         if_link_state_change(ifp, LINK_STATE_DOWN);
 1818         EVENTHANDLER_INVOKE(vxlan_stop, ifp, sc->vxl_src_addr.in4.sin_family,
 1819             ntohs(sc->vxl_src_addr.in4.sin_port));
 1820 
 1821         if (vso != NULL) {
 1822                 vxlan_socket_remove_softc(vso, sc);
 1823 
 1824                 if (sc->vxl_vso_mc_index != -1) {
 1825                         vxlan_socket_mc_release_group_by_idx(vso,
 1826                             sc->vxl_vso_mc_index);
 1827                         sc->vxl_vso_mc_index = -1;
 1828                 }
 1829         }
 1830 
 1831         VXLAN_WLOCK(sc);
 1832         while (sc->vxl_refcnt != 0)
 1833                 rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
 1834         VXLAN_WUNLOCK(sc);
 1835 
 1836         callout_drain(&sc->vxl_callout);
 1837 
 1838         vxlan_free_multicast(sc);
 1839         if (vso != NULL)
 1840                 vxlan_socket_release(vso);
 1841 
 1842         vxlan_teardown_complete(sc);
 1843 }
 1844 
 1845 static void
 1846 vxlan_teardown(struct vxlan_softc *sc)
 1847 {
 1848 
 1849         sx_xlock(&vxlan_sx);
 1850         VXLAN_WLOCK(sc);
 1851         if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
 1852                 vxlan_teardown_wait(sc);
 1853                 VXLAN_WUNLOCK(sc);
 1854                 sx_xunlock(&vxlan_sx);
 1855                 return;
 1856         }
 1857 
 1858         sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
 1859         vxlan_teardown_locked(sc);
 1860         sx_xunlock(&vxlan_sx);
 1861 }
 1862 
 1863 static void
 1864 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
 1865     struct vxlan_softc_head *list)
 1866 {
 1867 
 1868         VXLAN_WLOCK(sc);
 1869 
 1870         if (sc->vxl_mc_ifp != ifp)
 1871                 goto out;
 1872         if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
 1873                 goto out;
 1874 
 1875         sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
 1876         LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
 1877 
 1878 out:
 1879         VXLAN_WUNLOCK(sc);
 1880 }
 1881 
 1882 static void
 1883 vxlan_timer(void *xsc)
 1884 {
 1885         struct vxlan_softc *sc;
 1886 
 1887         sc = xsc;
 1888         VXLAN_LOCK_WASSERT(sc);
 1889 
 1890         vxlan_ftable_expire(sc);
 1891         callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
 1892 }
 1893 
 1894 static int
 1895 vxlan_ioctl_ifflags(struct vxlan_softc *sc)
 1896 {
 1897         struct ifnet *ifp;
 1898 
 1899         ifp = sc->vxl_ifp;
 1900 
 1901         if (ifp->if_flags & IFF_UP) {
 1902                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 1903                         vxlan_init(sc);
 1904         } else {
 1905                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 1906                         vxlan_teardown(sc);
 1907         }
 1908 
 1909         return (0);
 1910 }
 1911 
 1912 static int
 1913 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
 1914 {
 1915         struct rm_priotracker tracker;
 1916         struct ifvxlancfg *cfg;
 1917 
 1918         cfg = arg;
 1919         bzero(cfg, sizeof(*cfg));
 1920 
 1921         VXLAN_RLOCK(sc, &tracker);
 1922         cfg->vxlc_vni = sc->vxl_vni;
 1923         memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
 1924             sizeof(union vxlan_sockaddr));
 1925         memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
 1926             sizeof(union vxlan_sockaddr));
 1927         cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
 1928         cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
 1929         cfg->vxlc_ftable_max = sc->vxl_ftable_max;
 1930         cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
 1931         cfg->vxlc_port_min = sc->vxl_min_port;
 1932         cfg->vxlc_port_max = sc->vxl_max_port;
 1933         cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
 1934         cfg->vxlc_ttl = sc->vxl_ttl;
 1935         VXLAN_RUNLOCK(sc, &tracker);
 1936 
 1937 #ifdef INET6
 1938         if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa))
 1939                 sa6_recoverscope(&cfg->vxlc_local_sa.in6);
 1940         if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa))
 1941                 sa6_recoverscope(&cfg->vxlc_remote_sa.in6);
 1942 #endif
 1943 
 1944         return (0);
 1945 }
 1946 
 1947 static int
 1948 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
 1949 {
 1950         struct ifvxlancmd *cmd;
 1951         int error;
 1952 
 1953         cmd = arg;
 1954 
 1955         if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
 1956                 return (EINVAL);
 1957 
 1958         VXLAN_WLOCK(sc);
 1959         if (vxlan_can_change_config(sc)) {
 1960                 sc->vxl_vni = cmd->vxlcmd_vni;
 1961                 error = 0;
 1962         } else
 1963                 error = EBUSY;
 1964         VXLAN_WUNLOCK(sc);
 1965 
 1966         return (error);
 1967 }
 1968 
 1969 static int
 1970 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
 1971 {
 1972         struct ifvxlancmd *cmd;
 1973         union vxlan_sockaddr *vxlsa;
 1974         int error;
 1975 
 1976         cmd = arg;
 1977         vxlsa = &cmd->vxlcmd_sa;
 1978 
 1979         if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
 1980                 return (EINVAL);
 1981         if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
 1982                 return (EINVAL);
 1983         if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
 1984                 error = vxlan_sockaddr_in6_embedscope(vxlsa);
 1985                 if (error)
 1986                         return (error);
 1987         }
 1988 
 1989         VXLAN_WLOCK(sc);
 1990         if (vxlan_can_change_config(sc)) {
 1991                 vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
 1992                 vxlan_set_hwcaps(sc);
 1993                 error = 0;
 1994         } else
 1995                 error = EBUSY;
 1996         VXLAN_WUNLOCK(sc);
 1997 
 1998         return (error);
 1999 }
 2000 
 2001 static int
 2002 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
 2003 {
 2004         struct ifvxlancmd *cmd;
 2005         union vxlan_sockaddr *vxlsa;
 2006         int error;
 2007 
 2008         cmd = arg;
 2009         vxlsa = &cmd->vxlcmd_sa;
 2010 
 2011         if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
 2012                 return (EINVAL);
 2013         if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
 2014                 error = vxlan_sockaddr_in6_embedscope(vxlsa);
 2015                 if (error)
 2016                         return (error);
 2017         }
 2018 
 2019         VXLAN_WLOCK(sc);
 2020         if (vxlan_can_change_config(sc)) {
 2021                 vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
 2022                 vxlan_setup_interface_hdrlen(sc);
 2023                 error = 0;
 2024         } else
 2025                 error = EBUSY;
 2026         VXLAN_WUNLOCK(sc);
 2027 
 2028         return (error);
 2029 }
 2030 
 2031 static int
 2032 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
 2033 {
 2034         struct ifvxlancmd *cmd;
 2035         int error;
 2036 
 2037         cmd = arg;
 2038 
 2039         if (cmd->vxlcmd_port == 0)
 2040                 return (EINVAL);
 2041 
 2042         VXLAN_WLOCK(sc);
 2043         if (vxlan_can_change_config(sc)) {
 2044                 sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
 2045                 error = 0;
 2046         } else
 2047                 error = EBUSY;
 2048         VXLAN_WUNLOCK(sc);
 2049 
 2050         return (error);
 2051 }
 2052 
 2053 static int
 2054 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
 2055 {
 2056         struct ifvxlancmd *cmd;
 2057         int error;
 2058 
 2059         cmd = arg;
 2060 
 2061         if (cmd->vxlcmd_port == 0)
 2062                 return (EINVAL);
 2063 
 2064         VXLAN_WLOCK(sc);
 2065         if (vxlan_can_change_config(sc)) {
 2066                 sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
 2067                 error = 0;
 2068         } else
 2069                 error = EBUSY;
 2070         VXLAN_WUNLOCK(sc);
 2071 
 2072         return (error);
 2073 }
 2074 
 2075 static int
 2076 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
 2077 {
 2078         struct ifvxlancmd *cmd;
 2079         uint16_t min, max;
 2080         int error;
 2081 
 2082         cmd = arg;
 2083         min = cmd->vxlcmd_port_min;
 2084         max = cmd->vxlcmd_port_max;
 2085 
 2086         if (max < min)
 2087                 return (EINVAL);
 2088 
 2089         VXLAN_WLOCK(sc);
 2090         if (vxlan_can_change_config(sc)) {
 2091                 sc->vxl_min_port = min;
 2092                 sc->vxl_max_port = max;
 2093                 error = 0;
 2094         } else
 2095                 error = EBUSY;
 2096         VXLAN_WUNLOCK(sc);
 2097 
 2098         return (error);
 2099 }
 2100 
 2101 static int
 2102 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
 2103 {
 2104         struct ifvxlancmd *cmd;
 2105         int error;
 2106 
 2107         cmd = arg;
 2108 
 2109         VXLAN_WLOCK(sc);
 2110         if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
 2111                 sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
 2112                 error = 0;
 2113         } else
 2114                 error = EINVAL;
 2115         VXLAN_WUNLOCK(sc);
 2116 
 2117         return (error);
 2118 }
 2119 
 2120 static int
 2121 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
 2122 {
 2123         struct ifvxlancmd *cmd;
 2124         int error;
 2125 
 2126         cmd = arg;
 2127 
 2128         VXLAN_WLOCK(sc);
 2129         if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
 2130                 sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
 2131                 error = 0;
 2132         } else
 2133                 error = EINVAL;
 2134         VXLAN_WUNLOCK(sc);
 2135 
 2136         return (error);
 2137 }
 2138 
 2139 static int
 2140 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
 2141 {
 2142         struct ifvxlancmd *cmd;
 2143         int error;
 2144 
 2145         cmd = arg;
 2146 
 2147         VXLAN_WLOCK(sc);
 2148         if (vxlan_can_change_config(sc)) {
 2149                 strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
 2150                 vxlan_set_hwcaps(sc);
 2151                 error = 0;
 2152         } else
 2153                 error = EBUSY;
 2154         VXLAN_WUNLOCK(sc);
 2155 
 2156         return (error);
 2157 }
 2158 
 2159 static int
 2160 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
 2161 {
 2162         struct ifvxlancmd *cmd;
 2163         int error;
 2164 
 2165         cmd = arg;
 2166 
 2167         VXLAN_WLOCK(sc);
 2168         if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
 2169                 sc->vxl_ttl = cmd->vxlcmd_ttl;
 2170                 if (sc->vxl_im4o != NULL)
 2171                         sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
 2172                 if (sc->vxl_im6o != NULL)
 2173                         sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
 2174                 error = 0;
 2175         } else
 2176                 error = EINVAL;
 2177         VXLAN_WUNLOCK(sc);
 2178 
 2179         return (error);
 2180 }
 2181 
 2182 static int
 2183 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
 2184 {
 2185         struct ifvxlancmd *cmd;
 2186 
 2187         cmd = arg;
 2188 
 2189         VXLAN_WLOCK(sc);
 2190         if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
 2191                 sc->vxl_flags |= VXLAN_FLAG_LEARN;
 2192         else
 2193                 sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
 2194         VXLAN_WUNLOCK(sc);
 2195 
 2196         return (0);
 2197 }
 2198 
 2199 static int
 2200 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
 2201 {
 2202         union vxlan_sockaddr vxlsa;
 2203         struct ifvxlancmd *cmd;
 2204         struct vxlan_ftable_entry *fe;
 2205         int error;
 2206 
 2207         cmd = arg;
 2208         vxlsa = cmd->vxlcmd_sa;
 2209 
 2210         if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
 2211                 return (EINVAL);
 2212         if (vxlan_sockaddr_in_any(&vxlsa) != 0)
 2213                 return (EINVAL);
 2214         if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
 2215                 return (EINVAL);
 2216         /* BMV: We could support both IPv4 and IPv6 later. */
 2217         if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
 2218                 return (EAFNOSUPPORT);
 2219 
 2220         if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
 2221                 error = vxlan_sockaddr_in6_embedscope(&vxlsa);
 2222                 if (error)
 2223                         return (error);
 2224         }
 2225 
 2226         fe = vxlan_ftable_entry_alloc();
 2227         if (fe == NULL)
 2228                 return (ENOMEM);
 2229 
 2230         if (vxlsa.in4.sin_port == 0)
 2231                 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
 2232 
 2233         vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
 2234             VXLAN_FE_FLAG_STATIC);
 2235 
 2236         VXLAN_WLOCK(sc);
 2237         error = vxlan_ftable_entry_insert(sc, fe);
 2238         VXLAN_WUNLOCK(sc);
 2239 
 2240         if (error)
 2241                 vxlan_ftable_entry_free(fe);
 2242 
 2243         return (error);
 2244 }
 2245 
 2246 static int
 2247 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
 2248 {
 2249         struct ifvxlancmd *cmd;
 2250         struct vxlan_ftable_entry *fe;
 2251         int error;
 2252 
 2253         cmd = arg;
 2254 
 2255         VXLAN_WLOCK(sc);
 2256         fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
 2257         if (fe != NULL) {
 2258                 vxlan_ftable_entry_destroy(sc, fe);
 2259                 error = 0;
 2260         } else
 2261                 error = ENOENT;
 2262         VXLAN_WUNLOCK(sc);
 2263 
 2264         return (error);
 2265 }
 2266 
 2267 static int
 2268 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
 2269 {
 2270         struct ifvxlancmd *cmd;
 2271         int all;
 2272 
 2273         cmd = arg;
 2274         all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
 2275 
 2276         VXLAN_WLOCK(sc);
 2277         vxlan_ftable_flush(sc, all);
 2278         VXLAN_WUNLOCK(sc);
 2279 
 2280         return (0);
 2281 }
 2282 
 2283 static int
 2284 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
 2285 {
 2286         const struct vxlan_control *vc;
 2287         union {
 2288                 struct ifvxlancfg       cfg;
 2289                 struct ifvxlancmd       cmd;
 2290         } args;
 2291         int out, error;
 2292 
 2293         if (ifd->ifd_cmd >= vxlan_control_table_size)
 2294                 return (EINVAL);
 2295 
 2296         bzero(&args, sizeof(args));
 2297         vc = &vxlan_control_table[ifd->ifd_cmd];
 2298         out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
 2299 
 2300         if ((get != 0 && out == 0) || (get == 0 && out != 0))
 2301                 return (EINVAL);
 2302 
 2303         if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
 2304                 error = priv_check(curthread, PRIV_NET_VXLAN);
 2305                 if (error)
 2306                         return (error);
 2307         }
 2308 
 2309         if (ifd->ifd_len != vc->vxlc_argsize ||
 2310             ifd->ifd_len > sizeof(args))
 2311                 return (EINVAL);
 2312 
 2313         if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
 2314                 error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
 2315                 if (error)
 2316                         return (error);
 2317         }
 2318 
 2319         error = vc->vxlc_func(sc, &args);
 2320         if (error)
 2321                 return (error);
 2322 
 2323         if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
 2324                 error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
 2325                 if (error)
 2326                         return (error);
 2327         }
 2328 
 2329         return (0);
 2330 }
 2331 
 2332 static int
 2333 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 2334 {
 2335         struct rm_priotracker tracker;
 2336         struct vxlan_softc *sc;
 2337         struct ifreq *ifr;
 2338         struct ifdrv *ifd;
 2339         int error;
 2340 
 2341         sc = ifp->if_softc;
 2342         ifr = (struct ifreq *) data;
 2343         ifd = (struct ifdrv *) data;
 2344 
 2345         error = 0;
 2346 
 2347         switch (cmd) {
 2348         case SIOCADDMULTI:
 2349         case SIOCDELMULTI:
 2350                 break;
 2351 
 2352         case SIOCGDRVSPEC:
 2353         case SIOCSDRVSPEC:
 2354                 error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
 2355                 break;
 2356 
 2357         case SIOCSIFFLAGS:
 2358                 error = vxlan_ioctl_ifflags(sc);
 2359                 break;
 2360 
 2361         case SIOCSIFMEDIA:
 2362         case SIOCGIFMEDIA:
 2363                 error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd);
 2364                 break;
 2365 
 2366         case SIOCSIFMTU:
 2367                 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) {
 2368                         error = EINVAL;
 2369                 } else {
 2370                         VXLAN_WLOCK(sc);
 2371                         ifp->if_mtu = ifr->ifr_mtu;
 2372                         sc->vxl_flags |= VXLAN_FLAG_USER_MTU;
 2373                         VXLAN_WUNLOCK(sc);
 2374                 }
 2375                 break;
 2376 
 2377         case SIOCSIFCAP:
 2378                 VXLAN_WLOCK(sc);
 2379                 error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap);
 2380                 if (error == 0)
 2381                         vxlan_set_hwcaps(sc);
 2382                 VXLAN_WUNLOCK(sc);
 2383                 break;
 2384 
 2385         case SIOCGTUNFIB:
 2386                 VXLAN_RLOCK(sc, &tracker);
 2387                 ifr->ifr_fib = sc->vxl_fibnum;
 2388                 VXLAN_RUNLOCK(sc, &tracker);
 2389                 break;
 2390 
 2391         case SIOCSTUNFIB:
 2392                 if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0)
 2393                         break;
 2394 
 2395                 if (ifr->ifr_fib >= rt_numfibs)
 2396                         error = EINVAL;
 2397                 else {
 2398                         VXLAN_WLOCK(sc);
 2399                         sc->vxl_fibnum = ifr->ifr_fib;
 2400                         VXLAN_WUNLOCK(sc);
 2401                 }
 2402                 break;
 2403 
 2404         default:
 2405                 error = ether_ioctl(ifp, cmd, data);
 2406                 break;
 2407         }
 2408 
 2409         return (error);
 2410 }
 2411 
 2412 #if defined(INET) || defined(INET6)
 2413 static uint16_t
 2414 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
 2415 {
 2416         int range;
 2417         uint32_t hash;
 2418 
 2419         range = sc->vxl_max_port - sc->vxl_min_port + 1;
 2420 
 2421         if (M_HASHTYPE_ISHASH(m))
 2422                 hash = m->m_pkthdr.flowid;
 2423         else
 2424                 hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
 2425                     sc->vxl_port_hash_key);
 2426 
 2427         return (sc->vxl_min_port + (hash % range));
 2428 }
 2429 
 2430 static void
 2431 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
 2432     uint16_t srcport, uint16_t dstport)
 2433 {
 2434         struct vxlanudphdr *hdr;
 2435         struct udphdr *udph;
 2436         struct vxlan_header *vxh;
 2437         int len;
 2438 
 2439         len = m->m_pkthdr.len - ipoff;
 2440         MPASS(len >= sizeof(struct vxlanudphdr));
 2441         hdr = mtodo(m, ipoff);
 2442 
 2443         udph = &hdr->vxlh_udp;
 2444         udph->uh_sport = srcport;
 2445         udph->uh_dport = dstport;
 2446         udph->uh_ulen = htons(len);
 2447         udph->uh_sum = 0;
 2448 
 2449         vxh = &hdr->vxlh_hdr;
 2450         vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
 2451         vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
 2452 }
 2453 #endif
 2454 
 2455 #if defined(INET6) || defined(INET)
 2456 /*
 2457  * Return the CSUM_INNER_* equivalent of CSUM_* caps.
 2458  */
 2459 static uint32_t
 2460 csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap)
 2461 {
 2462         uint32_t csum_flags = encap;
 2463         const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP;
 2464 
 2465         /*
 2466          * csum_flags can request either v4 or v6 offload but not both.
 2467          * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO)
 2468          * so those bits are no good to detect the IP version.  Other bits are
 2469          * always set with CSUM_TSO and we use those to figure out the IP
 2470          * version.
 2471          */
 2472         if (csum_flags_in & v4) {
 2473                 if (csum_flags_in & CSUM_IP)
 2474                         csum_flags |= CSUM_INNER_IP;
 2475                 if (csum_flags_in & CSUM_IP_UDP)
 2476                         csum_flags |= CSUM_INNER_IP_UDP;
 2477                 if (csum_flags_in & CSUM_IP_TCP)
 2478                         csum_flags |= CSUM_INNER_IP_TCP;
 2479                 if (csum_flags_in & CSUM_IP_TSO)
 2480                         csum_flags |= CSUM_INNER_IP_TSO;
 2481         } else {
 2482 #ifdef INVARIANTS
 2483                 const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP;
 2484 
 2485                 MPASS((csum_flags_in & v6) != 0);
 2486 #endif
 2487                 if (csum_flags_in & CSUM_IP6_UDP)
 2488                         csum_flags |= CSUM_INNER_IP6_UDP;
 2489                 if (csum_flags_in & CSUM_IP6_TCP)
 2490                         csum_flags |= CSUM_INNER_IP6_TCP;
 2491                 if (csum_flags_in & CSUM_IP6_TSO)
 2492                         csum_flags |= CSUM_INNER_IP6_TSO;
 2493         }
 2494 
 2495         return (csum_flags);
 2496 }
 2497 #endif
 2498 
 2499 static int
 2500 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
 2501     struct mbuf *m)
 2502 {
 2503 #ifdef INET
 2504         struct ifnet *ifp;
 2505         struct ip *ip;
 2506         struct in_addr srcaddr, dstaddr;
 2507         uint16_t srcport, dstport;
 2508         int plen, mcast, error;
 2509         struct route route, *ro;
 2510         struct sockaddr_in *sin;
 2511         uint32_t csum_flags;
 2512 
 2513         NET_EPOCH_ASSERT();
 2514 
 2515         ifp = sc->vxl_ifp;
 2516         srcaddr = sc->vxl_src_addr.in4.sin_addr;
 2517         srcport = vxlan_pick_source_port(sc, m);
 2518         dstaddr = fvxlsa->in4.sin_addr;
 2519         dstport = fvxlsa->in4.sin_port;
 2520 
 2521         plen = m->m_pkthdr.len;
 2522         M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
 2523             M_NOWAIT);
 2524         if (m == NULL) {
 2525                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2526                 return (ENOBUFS);
 2527         }
 2528 
 2529         ip = mtod(m, struct ip *);
 2530         ip->ip_tos = 0;
 2531         ip->ip_len = htons(m->m_pkthdr.len);
 2532         ip->ip_off = 0;
 2533         ip->ip_ttl = sc->vxl_ttl;
 2534         ip->ip_p = IPPROTO_UDP;
 2535         ip->ip_sum = 0;
 2536         ip->ip_src = srcaddr;
 2537         ip->ip_dst = dstaddr;
 2538 
 2539         vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
 2540 
 2541         mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 2542         m->m_flags &= ~(M_MCAST | M_BCAST);
 2543 
 2544         m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
 2545         if (m->m_pkthdr.csum_flags != 0) {
 2546                 /*
 2547                  * HW checksum (L3 and/or L4) or TSO has been requested.  Look
 2548                  * up the ifnet for the outbound route and verify that the
 2549                  * outbound ifnet can perform the requested operation on the
 2550                  * inner frame.
 2551                  */
 2552                 bzero(&route, sizeof(route));
 2553                 ro = &route;
 2554                 sin = (struct sockaddr_in *)&ro->ro_dst;
 2555                 sin->sin_family = AF_INET;
 2556                 sin->sin_len = sizeof(*sin);
 2557                 sin->sin_addr = ip->ip_dst;
 2558                 ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE,
 2559                     0);
 2560                 if (ro->ro_nh == NULL) {
 2561                         m_freem(m);
 2562                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2563                         return (EHOSTUNREACH);
 2564                 }
 2565 
 2566                 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
 2567                     CSUM_ENCAP_VXLAN);
 2568                 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
 2569                     csum_flags) {
 2570                         if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
 2571                                 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
 2572 
 2573                                 if_printf(ifp, "interface %s is missing hwcaps "
 2574                                     "0x%08x, csum_flags 0x%08x -> 0x%08x, "
 2575                                     "hwassist 0x%08x\n", nh_ifp->if_xname,
 2576                                     csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
 2577                                     m->m_pkthdr.csum_flags, csum_flags,
 2578                                     (uint32_t)nh_ifp->if_hwassist);
 2579                         }
 2580                         m_freem(m);
 2581                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2582                         return (ENXIO);
 2583                 }
 2584                 m->m_pkthdr.csum_flags = csum_flags;
 2585                 if (csum_flags &
 2586                     (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
 2587                     CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
 2588                         counter_u64_add(sc->vxl_stats.txcsum, 1);
 2589                         if (csum_flags & CSUM_INNER_TSO)
 2590                                 counter_u64_add(sc->vxl_stats.tso, 1);
 2591                 }
 2592         } else
 2593                 ro = NULL;
 2594         error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL);
 2595         if (error == 0) {
 2596                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 2597                 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
 2598                 if (mcast != 0)
 2599                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 2600         } else
 2601                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2602 
 2603         return (error);
 2604 #else
 2605         m_freem(m);
 2606         return (ENOTSUP);
 2607 #endif
 2608 }
 2609 
 2610 static int
 2611 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
 2612     struct mbuf *m)
 2613 {
 2614 #ifdef INET6
 2615         struct ifnet *ifp;
 2616         struct ip6_hdr *ip6;
 2617         const struct in6_addr *srcaddr, *dstaddr;
 2618         uint16_t srcport, dstport;
 2619         int plen, mcast, error;
 2620         struct route_in6 route, *ro;
 2621         struct sockaddr_in6 *sin6;
 2622         uint32_t csum_flags;
 2623 
 2624         NET_EPOCH_ASSERT();
 2625 
 2626         ifp = sc->vxl_ifp;
 2627         srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
 2628         srcport = vxlan_pick_source_port(sc, m);
 2629         dstaddr = &fvxlsa->in6.sin6_addr;
 2630         dstport = fvxlsa->in6.sin6_port;
 2631 
 2632         plen = m->m_pkthdr.len;
 2633         M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
 2634             M_NOWAIT);
 2635         if (m == NULL) {
 2636                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2637                 return (ENOBUFS);
 2638         }
 2639 
 2640         ip6 = mtod(m, struct ip6_hdr *);
 2641         ip6->ip6_flow = 0;              /* BMV: Keep in forwarding entry? */
 2642         ip6->ip6_vfc = IPV6_VERSION;
 2643         ip6->ip6_plen = 0;
 2644         ip6->ip6_nxt = IPPROTO_UDP;
 2645         ip6->ip6_hlim = sc->vxl_ttl;
 2646         ip6->ip6_src = *srcaddr;
 2647         ip6->ip6_dst = *dstaddr;
 2648 
 2649         vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
 2650 
 2651         mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 2652         m->m_flags &= ~(M_MCAST | M_BCAST);
 2653 
 2654         ro = NULL;
 2655         m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
 2656         if (m->m_pkthdr.csum_flags != 0) {
 2657                 /*
 2658                  * HW checksum (L3 and/or L4) or TSO has been requested.  Look
 2659                  * up the ifnet for the outbound route and verify that the
 2660                  * outbound ifnet can perform the requested operation on the
 2661                  * inner frame.
 2662                  */
 2663                 bzero(&route, sizeof(route));
 2664                 ro = &route;
 2665                 sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
 2666                 sin6->sin6_family = AF_INET6;
 2667                 sin6->sin6_len = sizeof(*sin6);
 2668                 sin6->sin6_addr = ip6->ip6_dst;
 2669                 ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0,
 2670                     NHR_NONE, 0);
 2671                 if (ro->ro_nh == NULL) {
 2672                         m_freem(m);
 2673                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2674                         return (EHOSTUNREACH);
 2675                 }
 2676 
 2677                 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
 2678                     CSUM_ENCAP_VXLAN);
 2679                 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
 2680                     csum_flags) {
 2681                         if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
 2682                                 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
 2683 
 2684                                 if_printf(ifp, "interface %s is missing hwcaps "
 2685                                     "0x%08x, csum_flags 0x%08x -> 0x%08x, "
 2686                                     "hwassist 0x%08x\n", nh_ifp->if_xname,
 2687                                     csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
 2688                                     m->m_pkthdr.csum_flags, csum_flags,
 2689                                     (uint32_t)nh_ifp->if_hwassist);
 2690                         }
 2691                         m_freem(m);
 2692                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2693                         return (ENXIO);
 2694                 }
 2695                 m->m_pkthdr.csum_flags = csum_flags;
 2696                 if (csum_flags &
 2697                     (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
 2698                     CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
 2699                         counter_u64_add(sc->vxl_stats.txcsum, 1);
 2700                         if (csum_flags & CSUM_INNER_TSO)
 2701                                 counter_u64_add(sc->vxl_stats.tso, 1);
 2702                 }
 2703         } else if (ntohs(dstport) != V_zero_checksum_port) {
 2704                 struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
 2705 
 2706                 hdr->uh_sum = in6_cksum_pseudo(ip6,
 2707                     m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
 2708                 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 2709                 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 2710         }
 2711         error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL);
 2712         if (error == 0) {
 2713                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 2714                 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
 2715                 if (mcast != 0)
 2716                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 2717         } else
 2718                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2719 
 2720         return (error);
 2721 #else
 2722         m_freem(m);
 2723         return (ENOTSUP);
 2724 #endif
 2725 }
 2726 
 2727 static int
 2728 vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
 2729 {
 2730         struct rm_priotracker tracker;
 2731         union vxlan_sockaddr vxlsa;
 2732         struct vxlan_softc *sc;
 2733         struct vxlan_ftable_entry *fe;
 2734         struct ifnet *mcifp;
 2735         struct ether_header *eh;
 2736         int ipv4, error;
 2737 
 2738         sc = ifp->if_softc;
 2739         eh = mtod(m, struct ether_header *);
 2740         fe = NULL;
 2741         mcifp = NULL;
 2742 
 2743         ETHER_BPF_MTAP(ifp, m);
 2744 
 2745         VXLAN_RLOCK(sc, &tracker);
 2746         M_SETFIB(m, sc->vxl_fibnum);
 2747         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 2748                 VXLAN_RUNLOCK(sc, &tracker);
 2749                 m_freem(m);
 2750                 return (ENETDOWN);
 2751         }
 2752 
 2753         if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
 2754                 fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
 2755         if (fe == NULL)
 2756                 fe = &sc->vxl_default_fe;
 2757         vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
 2758 
 2759         ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
 2760         if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
 2761                 mcifp = vxlan_multicast_if_ref(sc, ipv4);
 2762 
 2763         VXLAN_ACQUIRE(sc);
 2764         VXLAN_RUNLOCK(sc, &tracker);
 2765 
 2766         if (ipv4 != 0)
 2767                 error = vxlan_encap4(sc, &vxlsa, m);
 2768         else
 2769                 error = vxlan_encap6(sc, &vxlsa, m);
 2770 
 2771         vxlan_release(sc);
 2772         if (mcifp != NULL)
 2773                 if_rele(mcifp);
 2774 
 2775         return (error);
 2776 }
 2777 
 2778 static void
 2779 vxlan_qflush(struct ifnet *ifp __unused)
 2780 {
 2781 }
 2782 
 2783 static bool
 2784 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
 2785     const struct sockaddr *srcsa, void *xvso)
 2786 {
 2787         struct vxlan_socket *vso;
 2788         struct vxlan_header *vxh, vxlanhdr;
 2789         uint32_t vni;
 2790         int error __unused;
 2791 
 2792         M_ASSERTPKTHDR(m);
 2793         vso = xvso;
 2794         offset += sizeof(struct udphdr);
 2795 
 2796         if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
 2797                 goto out;
 2798 
 2799         if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
 2800                 m_copydata(m, offset, sizeof(struct vxlan_header),
 2801                     (caddr_t) &vxlanhdr);
 2802                 vxh = &vxlanhdr;
 2803         } else
 2804                 vxh = mtodo(m, offset);
 2805 
 2806         /*
 2807          * Drop if there is a reserved bit set in either the flags or VNI
 2808          * fields of the header. This goes against the specification, but
 2809          * a bit set may indicate an unsupported new feature. This matches
 2810          * the behavior of the Linux implementation.
 2811          */
 2812         if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
 2813             vxh->vxlh_vni & ~VXLAN_VNI_MASK)
 2814                 goto out;
 2815 
 2816         vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
 2817 
 2818         /* Adjust to the start of the inner Ethernet frame. */
 2819         m_adj_decap(m, offset + sizeof(struct vxlan_header));
 2820 
 2821         error = vxlan_input(vso, vni, &m, srcsa);
 2822         MPASS(error != 0 || m == NULL);
 2823 
 2824 out:
 2825         if (m != NULL)
 2826                 m_freem(m);
 2827 
 2828         return (true);
 2829 }
 2830 
 2831 static int
 2832 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
 2833     const struct sockaddr *sa)
 2834 {
 2835         struct vxlan_softc *sc;
 2836         struct ifnet *ifp;
 2837         struct mbuf *m;
 2838         struct ether_header *eh;
 2839         int error;
 2840 
 2841         m = *m0;
 2842 
 2843         if (m->m_pkthdr.len < ETHER_HDR_LEN)
 2844                 return (EINVAL);
 2845 
 2846         sc = vxlan_socket_lookup_softc(vso, vni);
 2847         if (sc == NULL)
 2848                 return (ENOENT);
 2849 
 2850         ifp = sc->vxl_ifp;
 2851         if (m->m_len < ETHER_HDR_LEN &&
 2852             (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
 2853                 *m0 = NULL;
 2854                 error = ENOBUFS;
 2855                 goto out;
 2856         }
 2857         eh = mtod(m, struct ether_header *);
 2858 
 2859         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 2860                 error = ENETDOWN;
 2861                 goto out;
 2862         } else if (ifp == m->m_pkthdr.rcvif) {
 2863                 /* XXX Does not catch more complex loops. */
 2864                 error = EDEADLK;
 2865                 goto out;
 2866         }
 2867 
 2868         if (sc->vxl_flags & VXLAN_FLAG_LEARN)
 2869                 vxlan_ftable_learn(sc, sa, eh->ether_shost);
 2870 
 2871         m_clrprotoflags(m);
 2872         m->m_pkthdr.rcvif = ifp;
 2873         M_SETFIB(m, ifp->if_fib);
 2874         if (((ifp->if_capenable & IFCAP_RXCSUM &&
 2875             m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) ||
 2876             (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
 2877             !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) {
 2878                 uint32_t csum_flags = 0;
 2879 
 2880                 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)
 2881                         csum_flags |= CSUM_L3_CALC;
 2882                 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID)
 2883                         csum_flags |= CSUM_L3_VALID;
 2884                 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC)
 2885                         csum_flags |= CSUM_L4_CALC;
 2886                 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID)
 2887                         csum_flags |= CSUM_L4_VALID;
 2888                 m->m_pkthdr.csum_flags = csum_flags;
 2889                 counter_u64_add(sc->vxl_stats.rxcsum, 1);
 2890         } else {
 2891                 /* clear everything */
 2892                 m->m_pkthdr.csum_flags = 0;
 2893                 m->m_pkthdr.csum_data = 0;
 2894         }
 2895 
 2896         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 2897         (*ifp->if_input)(ifp, m);
 2898         *m0 = NULL;
 2899         error = 0;
 2900 
 2901 out:
 2902         vxlan_release(sc);
 2903         return (error);
 2904 }
 2905 
 2906 static int
 2907 vxlan_stats_alloc(struct vxlan_softc *sc)
 2908 {
 2909         struct vxlan_statistics *stats = &sc->vxl_stats;
 2910 
 2911         stats->txcsum = counter_u64_alloc(M_WAITOK);
 2912         if (stats->txcsum == NULL)
 2913                 goto failed;
 2914 
 2915         stats->tso = counter_u64_alloc(M_WAITOK);
 2916         if (stats->tso == NULL)
 2917                 goto failed;
 2918 
 2919         stats->rxcsum = counter_u64_alloc(M_WAITOK);
 2920         if (stats->rxcsum == NULL)
 2921                 goto failed;
 2922 
 2923         return (0);
 2924 failed:
 2925         vxlan_stats_free(sc);
 2926         return (ENOMEM);
 2927 }
 2928 
 2929 static void
 2930 vxlan_stats_free(struct vxlan_softc *sc)
 2931 {
 2932         struct vxlan_statistics *stats = &sc->vxl_stats;
 2933 
 2934         if (stats->txcsum != NULL) {
 2935                 counter_u64_free(stats->txcsum);
 2936                 stats->txcsum = NULL;
 2937         }
 2938         if (stats->tso != NULL) {
 2939                 counter_u64_free(stats->tso);
 2940                 stats->tso = NULL;
 2941         }
 2942         if (stats->rxcsum != NULL) {
 2943                 counter_u64_free(stats->rxcsum);
 2944                 stats->rxcsum = NULL;
 2945         }
 2946 }
 2947 
 2948 static void
 2949 vxlan_set_default_config(struct vxlan_softc *sc)
 2950 {
 2951 
 2952         sc->vxl_flags |= VXLAN_FLAG_LEARN;
 2953 
 2954         sc->vxl_vni = VXLAN_VNI_MAX;
 2955         sc->vxl_ttl = IPDEFTTL;
 2956 
 2957         if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
 2958                 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
 2959                 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
 2960         } else {
 2961                 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
 2962                 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
 2963         }
 2964 
 2965         sc->vxl_min_port = V_ipport_firstauto;
 2966         sc->vxl_max_port = V_ipport_lastauto;
 2967 
 2968         sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
 2969         sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
 2970 }
 2971 
 2972 static int
 2973 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
 2974 {
 2975 
 2976 #ifndef INET
 2977         if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
 2978             VXLAN_PARAM_WITH_REMOTE_ADDR4))
 2979                 return (EAFNOSUPPORT);
 2980 #endif
 2981 
 2982 #ifndef INET6
 2983         if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
 2984             VXLAN_PARAM_WITH_REMOTE_ADDR6))
 2985                 return (EAFNOSUPPORT);
 2986 #else
 2987         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
 2988                 int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa);
 2989                 if (error)
 2990                         return (error);
 2991         }
 2992         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
 2993                 int error = vxlan_sockaddr_in6_embedscope(
 2994                    &vxlp->vxlp_remote_sa);
 2995                 if (error)
 2996                         return (error);
 2997         }
 2998 #endif
 2999 
 3000         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
 3001                 if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
 3002                         sc->vxl_vni = vxlp->vxlp_vni;
 3003         }
 3004 
 3005         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
 3006                 sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
 3007                 sc->vxl_src_addr.in4.sin_family = AF_INET;
 3008                 sc->vxl_src_addr.in4.sin_addr =
 3009                     vxlp->vxlp_local_sa.in4.sin_addr;
 3010         } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
 3011                 sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
 3012                 sc->vxl_src_addr.in6.sin6_family = AF_INET6;
 3013                 sc->vxl_src_addr.in6.sin6_addr =
 3014                     vxlp->vxlp_local_sa.in6.sin6_addr;
 3015         }
 3016 
 3017         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
 3018                 sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
 3019                 sc->vxl_dst_addr.in4.sin_family = AF_INET;
 3020                 sc->vxl_dst_addr.in4.sin_addr =
 3021                     vxlp->vxlp_remote_sa.in4.sin_addr;
 3022         } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
 3023                 sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
 3024                 sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
 3025                 sc->vxl_dst_addr.in6.sin6_addr =
 3026                     vxlp->vxlp_remote_sa.in6.sin6_addr;
 3027         }
 3028 
 3029         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
 3030                 sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
 3031         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
 3032                 sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
 3033 
 3034         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
 3035                 if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
 3036                         sc->vxl_min_port = vxlp->vxlp_min_port;
 3037                         sc->vxl_max_port = vxlp->vxlp_max_port;
 3038                 }
 3039         }
 3040 
 3041         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
 3042                 strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
 3043 
 3044         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
 3045                 if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
 3046                         sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
 3047         }
 3048 
 3049         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
 3050                 if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
 3051                         sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
 3052         }
 3053 
 3054         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
 3055                 if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
 3056                         sc->vxl_ttl = vxlp->vxlp_ttl;
 3057         }
 3058 
 3059         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
 3060                 if (vxlp->vxlp_learn == 0)
 3061                         sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
 3062         }
 3063 
 3064         return (0);
 3065 }
 3066 
 3067 static int
 3068 vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap)
 3069 {
 3070         int mask = reqcap ^ ifp->if_capenable;
 3071 
 3072         /* Disable TSO if tx checksums are disabled. */
 3073         if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) &&
 3074             reqcap & IFCAP_TSO4) {
 3075                 reqcap &= ~IFCAP_TSO4;
 3076                 if_printf(ifp, "tso4 disabled due to -txcsum.\n");
 3077         }
 3078         if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) &&
 3079             reqcap & IFCAP_TSO6) {
 3080                 reqcap &= ~IFCAP_TSO6;
 3081                 if_printf(ifp, "tso6 disabled due to -txcsum6.\n");
 3082         }
 3083 
 3084         /* Do not enable TSO if tx checksums are disabled. */
 3085         if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 &&
 3086             !(reqcap & IFCAP_TXCSUM)) {
 3087                 if_printf(ifp, "enable txcsum first.\n");
 3088                 return (EAGAIN);
 3089         }
 3090         if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 &&
 3091             !(reqcap & IFCAP_TXCSUM_IPV6)) {
 3092                 if_printf(ifp, "enable txcsum6 first.\n");
 3093                 return (EAGAIN);
 3094         }
 3095 
 3096         sc->vxl_reqcap = reqcap;
 3097         return (0);
 3098 }
 3099 
 3100 /*
 3101  * A VXLAN interface inherits the capabilities of the vxlandev or the interface
 3102  * hosting the vxlanlocal address.
 3103  */
 3104 static void
 3105 vxlan_set_hwcaps(struct vxlan_softc *sc)
 3106 {
 3107         struct epoch_tracker et;
 3108         struct ifnet *p;
 3109         struct ifaddr *ifa;
 3110         u_long hwa;
 3111         int cap, ena;
 3112         bool rel;
 3113         struct ifnet *ifp = sc->vxl_ifp;
 3114 
 3115         /* reset caps */
 3116         ifp->if_capabilities &= VXLAN_BASIC_IFCAPS;
 3117         ifp->if_capenable &= VXLAN_BASIC_IFCAPS;
 3118         ifp->if_hwassist = 0;
 3119 
 3120         NET_EPOCH_ENTER(et);
 3121         CURVNET_SET(ifp->if_vnet);
 3122 
 3123         rel = false;
 3124         p = NULL;
 3125         if (sc->vxl_mc_ifname[0] != '\0') {
 3126                 rel = true;
 3127                 p = ifunit_ref(sc->vxl_mc_ifname);
 3128         } else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
 3129                 if (sc->vxl_src_addr.sa.sa_family == AF_INET) {
 3130                         struct sockaddr_in in4 = sc->vxl_src_addr.in4;
 3131 
 3132                         in4.sin_port = 0;
 3133                         ifa = ifa_ifwithaddr((struct sockaddr *)&in4);
 3134                         if (ifa != NULL)
 3135                                 p = ifa->ifa_ifp;
 3136                 } else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) {
 3137                         struct sockaddr_in6 in6 = sc->vxl_src_addr.in6;
 3138 
 3139                         in6.sin6_port = 0;
 3140                         ifa = ifa_ifwithaddr((struct sockaddr *)&in6);
 3141                         if (ifa != NULL)
 3142                                 p = ifa->ifa_ifp;
 3143                 }
 3144         }
 3145         if (p == NULL)
 3146                 goto done;
 3147 
 3148         cap = ena = hwa = 0;
 3149 
 3150         /* checksum offload */
 3151         if (p->if_capabilities & IFCAP_VXLAN_HWCSUM)
 3152                 cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
 3153         if (p->if_capenable & IFCAP_VXLAN_HWCSUM) {
 3154                 ena |= sc->vxl_reqcap & p->if_capenable &
 3155                     (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
 3156                 if (ena & IFCAP_TXCSUM) {
 3157                         if (p->if_hwassist & CSUM_INNER_IP)
 3158                                 hwa |= CSUM_IP;
 3159                         if (p->if_hwassist & CSUM_INNER_IP_UDP)
 3160                                 hwa |= CSUM_IP_UDP;
 3161                         if (p->if_hwassist & CSUM_INNER_IP_TCP)
 3162                                 hwa |= CSUM_IP_TCP;
 3163                 }
 3164                 if (ena & IFCAP_TXCSUM_IPV6) {
 3165                         if (p->if_hwassist & CSUM_INNER_IP6_UDP)
 3166                                 hwa |= CSUM_IP6_UDP;
 3167                         if (p->if_hwassist & CSUM_INNER_IP6_TCP)
 3168                                 hwa |= CSUM_IP6_TCP;
 3169                 }
 3170         }
 3171 
 3172         /* hardware TSO */
 3173         if (p->if_capabilities & IFCAP_VXLAN_HWTSO) {
 3174                 cap |= p->if_capabilities & IFCAP_TSO;
 3175                 if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen)
 3176                         ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen;
 3177                 else
 3178                         ifp->if_hw_tsomax = p->if_hw_tsomax;
 3179                 /* XXX: tsomaxsegcount decrement is cxgbe specific  */
 3180                 ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1;
 3181                 ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize;
 3182         }
 3183         if (p->if_capenable & IFCAP_VXLAN_HWTSO) {
 3184                 ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO;
 3185                 if (ena & IFCAP_TSO) {
 3186                         if (p->if_hwassist & CSUM_INNER_IP_TSO)
 3187                                 hwa |= CSUM_IP_TSO;
 3188                         if (p->if_hwassist & CSUM_INNER_IP6_TSO)
 3189                                 hwa |= CSUM_IP6_TSO;
 3190                 }
 3191         }
 3192 
 3193         ifp->if_capabilities |= cap;
 3194         ifp->if_capenable |= ena;
 3195         ifp->if_hwassist |= hwa;
 3196         if (rel)
 3197                 if_rele(p);
 3198 done:
 3199         CURVNET_RESTORE();
 3200         NET_EPOCH_EXIT(et);
 3201 }
 3202 
 3203 static int
 3204 vxlan_clone_create(struct if_clone *ifc, char *name, size_t len,
 3205     struct ifc_data *ifd, struct ifnet **ifpp)
 3206 {
 3207         struct vxlan_softc *sc;
 3208         struct ifnet *ifp;
 3209         struct ifvxlanparam vxlp;
 3210         int error;
 3211 
 3212         sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
 3213         sc->vxl_unit = ifd->unit;
 3214         sc->vxl_fibnum = curthread->td_proc->p_fibnum;
 3215         vxlan_set_default_config(sc);
 3216         error = vxlan_stats_alloc(sc);
 3217         if (error != 0)
 3218                 goto fail;
 3219 
 3220         if (ifd->params != NULL) {
 3221                 error = ifc_copyin(ifd, &vxlp, sizeof(vxlp));
 3222                 if (error)
 3223                         goto fail;
 3224 
 3225                 error = vxlan_set_user_config(sc, &vxlp);
 3226                 if (error)
 3227                         goto fail;
 3228         }
 3229 
 3230         ifp = if_alloc(IFT_ETHER);
 3231         if (ifp == NULL) {
 3232                 error = ENOSPC;
 3233                 goto fail;
 3234         }
 3235 
 3236         sc->vxl_ifp = ifp;
 3237         rm_init(&sc->vxl_lock, "vxlanrm");
 3238         callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
 3239         sc->vxl_port_hash_key = arc4random();
 3240         vxlan_ftable_init(sc);
 3241 
 3242         vxlan_sysctl_setup(sc);
 3243 
 3244         ifp->if_softc = sc;
 3245         if_initname(ifp, vxlan_name, ifd->unit);
 3246         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 3247         ifp->if_init = vxlan_init;
 3248         ifp->if_ioctl = vxlan_ioctl;
 3249         ifp->if_transmit = vxlan_transmit;
 3250         ifp->if_qflush = vxlan_qflush;
 3251         ifp->if_capabilities = VXLAN_BASIC_IFCAPS;
 3252         ifp->if_capenable = VXLAN_BASIC_IFCAPS;
 3253         sc->vxl_reqcap = -1;
 3254         vxlan_set_hwcaps(sc);
 3255 
 3256         ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status);
 3257         ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 3258         ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO);
 3259 
 3260         ether_gen_addr(ifp, &sc->vxl_hwaddr);
 3261         ether_ifattach(ifp, sc->vxl_hwaddr.octet);
 3262 
 3263         ifp->if_baudrate = 0;
 3264 
 3265         VXLAN_WLOCK(sc);
 3266         vxlan_setup_interface_hdrlen(sc);
 3267         VXLAN_WUNLOCK(sc);
 3268         *ifpp = ifp;
 3269 
 3270         return (0);
 3271 
 3272 fail:
 3273         free(sc, M_VXLAN);
 3274         return (error);
 3275 }
 3276 
 3277 static int
 3278 vxlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
 3279 {
 3280         struct vxlan_softc *sc;
 3281 
 3282         sc = ifp->if_softc;
 3283 
 3284         vxlan_teardown(sc);
 3285 
 3286         vxlan_ftable_flush(sc, 1);
 3287 
 3288         ether_ifdetach(ifp);
 3289         if_free(ifp);
 3290         ifmedia_removeall(&sc->vxl_media);
 3291 
 3292         vxlan_ftable_fini(sc);
 3293 
 3294         vxlan_sysctl_destroy(sc);
 3295         rm_destroy(&sc->vxl_lock);
 3296         vxlan_stats_free(sc);
 3297         free(sc, M_VXLAN);
 3298 
 3299         return (0);
 3300 }
 3301 
 3302 /* BMV: Taken from if_bridge. */
 3303 static uint32_t
 3304 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
 3305 {
 3306         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
 3307 
 3308         b += addr[5] << 8;
 3309         b += addr[4];
 3310         a += addr[3] << 24;
 3311         a += addr[2] << 16;
 3312         a += addr[1] << 8;
 3313         a += addr[0];
 3314 
 3315 /*
 3316  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
 3317  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
 3318  */
 3319 #define mix(a, b, c)                                                    \
 3320 do {                                                                    \
 3321         a -= b; a -= c; a ^= (c >> 13);                                 \
 3322         b -= c; b -= a; b ^= (a << 8);                                  \
 3323         c -= a; c -= b; c ^= (b >> 13);                                 \
 3324         a -= b; a -= c; a ^= (c >> 12);                                 \
 3325         b -= c; b -= a; b ^= (a << 16);                                 \
 3326         c -= a; c -= b; c ^= (b >> 5);                                  \
 3327         a -= b; a -= c; a ^= (c >> 3);                                  \
 3328         b -= c; b -= a; b ^= (a << 10);                                 \
 3329         c -= a; c -= b; c ^= (b >> 15);                                 \
 3330 } while (0)
 3331 
 3332         mix(a, b, c);
 3333 
 3334 #undef mix
 3335 
 3336         return (c);
 3337 }
 3338 
 3339 static int
 3340 vxlan_media_change(struct ifnet *ifp)
 3341 {
 3342 
 3343         /* Ignore. */
 3344         return (0);
 3345 }
 3346 
 3347 static void
 3348 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
 3349 {
 3350 
 3351         ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
 3352         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
 3353 }
 3354 
 3355 static int
 3356 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
 3357     const struct sockaddr *sa)
 3358 {
 3359 
 3360         return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
 3361 }
 3362 
 3363 static void
 3364 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
 3365     const struct sockaddr *sa)
 3366 {
 3367 
 3368         MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
 3369         bzero(vxladdr, sizeof(*vxladdr));
 3370 
 3371         if (sa->sa_family == AF_INET) {
 3372                 vxladdr->in4 = *satoconstsin(sa);
 3373                 vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
 3374         } else if (sa->sa_family == AF_INET6) {
 3375                 vxladdr->in6 = *satoconstsin6(sa);
 3376                 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
 3377         }
 3378 }
 3379 
 3380 static int
 3381 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
 3382     const struct sockaddr *sa)
 3383 {
 3384         int equal;
 3385 
 3386         if (sa->sa_family == AF_INET) {
 3387                 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
 3388                 equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
 3389         } else if (sa->sa_family == AF_INET6) {
 3390                 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
 3391                 equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
 3392         } else
 3393                 equal = 0;
 3394 
 3395         return (equal);
 3396 }
 3397 
 3398 static void
 3399 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
 3400     const struct sockaddr *sa)
 3401 {
 3402 
 3403         MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
 3404 
 3405         if (sa->sa_family == AF_INET) {
 3406                 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
 3407                 vxladdr->in4.sin_family = AF_INET;
 3408                 vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
 3409                 vxladdr->in4.sin_addr = *in4;
 3410         } else if (sa->sa_family == AF_INET6) {
 3411                 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
 3412                 vxladdr->in6.sin6_family = AF_INET6;
 3413                 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
 3414                 vxladdr->in6.sin6_addr = *in6;
 3415         }
 3416 }
 3417 
 3418 static int
 3419 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
 3420 {
 3421         const struct sockaddr *sa;
 3422         int supported;
 3423 
 3424         sa = &vxladdr->sa;
 3425         supported = 0;
 3426 
 3427         if (sa->sa_family == AF_UNSPEC && unspec != 0) {
 3428                 supported = 1;
 3429         } else if (sa->sa_family == AF_INET) {
 3430 #ifdef INET
 3431                 supported = 1;
 3432 #endif
 3433         } else if (sa->sa_family == AF_INET6) {
 3434 #ifdef INET6
 3435                 supported = 1;
 3436 #endif
 3437         }
 3438 
 3439         return (supported);
 3440 }
 3441 
 3442 static int
 3443 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
 3444 {
 3445         const struct sockaddr *sa;
 3446         int any;
 3447 
 3448         sa = &vxladdr->sa;
 3449 
 3450         if (sa->sa_family == AF_INET) {
 3451                 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
 3452                 any = in4->s_addr == INADDR_ANY;
 3453         } else if (sa->sa_family == AF_INET6) {
 3454                 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
 3455                 any = IN6_IS_ADDR_UNSPECIFIED(in6);
 3456         } else
 3457                 any = -1;
 3458 
 3459         return (any);
 3460 }
 3461 
 3462 static int
 3463 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
 3464 {
 3465         const struct sockaddr *sa;
 3466         int mc;
 3467 
 3468         sa = &vxladdr->sa;
 3469 
 3470         if (sa->sa_family == AF_INET) {
 3471                 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
 3472                 mc = IN_MULTICAST(ntohl(in4->s_addr));
 3473         } else if (sa->sa_family == AF_INET6) {
 3474                 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
 3475                 mc = IN6_IS_ADDR_MULTICAST(in6);
 3476         } else
 3477                 mc = -1;
 3478 
 3479         return (mc);
 3480 }
 3481 
 3482 static int
 3483 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr)
 3484 {
 3485         int error;
 3486 
 3487         MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr));
 3488 #ifdef INET6
 3489         error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone);
 3490 #else
 3491         error = EAFNOSUPPORT;
 3492 #endif
 3493 
 3494         return (error);
 3495 }
 3496 
 3497 static int
 3498 vxlan_can_change_config(struct vxlan_softc *sc)
 3499 {
 3500         struct ifnet *ifp;
 3501 
 3502         ifp = sc->vxl_ifp;
 3503         VXLAN_LOCK_ASSERT(sc);
 3504 
 3505         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 3506                 return (0);
 3507         if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
 3508                 return (0);
 3509 
 3510         return (1);
 3511 }
 3512 
 3513 static int
 3514 vxlan_check_vni(uint32_t vni)
 3515 {
 3516 
 3517         return (vni >= VXLAN_VNI_MAX);
 3518 }
 3519 
 3520 static int
 3521 vxlan_check_ttl(int ttl)
 3522 {
 3523 
 3524         return (ttl > MAXTTL);
 3525 }
 3526 
 3527 static int
 3528 vxlan_check_ftable_timeout(uint32_t timeout)
 3529 {
 3530 
 3531         return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
 3532 }
 3533 
 3534 static int
 3535 vxlan_check_ftable_max(uint32_t max)
 3536 {
 3537 
 3538         return (max > VXLAN_FTABLE_MAX);
 3539 }
 3540 
 3541 static void
 3542 vxlan_sysctl_setup(struct vxlan_softc *sc)
 3543 {
 3544         struct sysctl_ctx_list *ctx;
 3545         struct sysctl_oid *node;
 3546         struct vxlan_statistics *stats;
 3547         char namebuf[8];
 3548 
 3549         ctx = &sc->vxl_sysctl_ctx;
 3550         stats = &sc->vxl_stats;
 3551         snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
 3552 
 3553         sysctl_ctx_init(ctx);
 3554         sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
 3555             SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
 3556             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
 3557 
 3558         node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
 3559             OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
 3560         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
 3561             CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
 3562             "Number of entries in forwarding table");
 3563         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
 3564              CTLFLAG_RD, &sc->vxl_ftable_max, 0,
 3565             "Maximum number of entries allowed in forwarding table");
 3566         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
 3567             CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
 3568             "Number of seconds between prunes of the forwarding table");
 3569         SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
 3570             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
 3571             sc, 0, vxlan_ftable_sysctl_dump, "A",
 3572             "Dump the forwarding table entries");
 3573 
 3574         node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
 3575             OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
 3576         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
 3577             "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
 3578             "Fowarding table reached maximum entries");
 3579         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
 3580             "ftable_lock_upgrade_failed", CTLFLAG_RD,
 3581             &stats->ftable_lock_upgrade_failed, 0,
 3582             "Forwarding table update required lock upgrade");
 3583 
 3584         SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum",
 3585             CTLFLAG_RD, &stats->txcsum,
 3586             "# of times hardware assisted with tx checksum");
 3587         SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso",
 3588             CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO");
 3589         SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum",
 3590             CTLFLAG_RD, &stats->rxcsum,
 3591             "# of times hardware assisted with rx checksum");
 3592 }
 3593 
 3594 static void
 3595 vxlan_sysctl_destroy(struct vxlan_softc *sc)
 3596 {
 3597 
 3598         sysctl_ctx_free(&sc->vxl_sysctl_ctx);
 3599         sc->vxl_sysctl_node = NULL;
 3600 }
 3601 
 3602 static int
 3603 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
 3604 {
 3605         char path[64];
 3606 
 3607         snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
 3608             sc->vxl_unit, knob);
 3609         TUNABLE_INT_FETCH(path, &def);
 3610 
 3611         return (def);
 3612 }
 3613 
 3614 static void
 3615 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
 3616 {
 3617         struct vxlan_softc_head list;
 3618         struct vxlan_socket *vso;
 3619         struct vxlan_softc *sc, *tsc;
 3620 
 3621         LIST_INIT(&list);
 3622 
 3623         if (ifp->if_flags & IFF_RENAMING)
 3624                 return;
 3625         if ((ifp->if_flags & IFF_MULTICAST) == 0)
 3626                 return;
 3627 
 3628         VXLAN_LIST_LOCK();
 3629         LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
 3630                 vxlan_socket_ifdetach(vso, ifp, &list);
 3631         VXLAN_LIST_UNLOCK();
 3632 
 3633         LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
 3634                 LIST_REMOVE(sc, vxl_ifdetach_list);
 3635 
 3636                 sx_xlock(&vxlan_sx);
 3637                 VXLAN_WLOCK(sc);
 3638                 if (sc->vxl_flags & VXLAN_FLAG_INIT)
 3639                         vxlan_init_wait(sc);
 3640                 vxlan_teardown_locked(sc);
 3641                 sx_xunlock(&vxlan_sx);
 3642         }
 3643 }
 3644 
 3645 static void
 3646 vxlan_load(void)
 3647 {
 3648 
 3649         mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
 3650         LIST_INIT(&vxlan_socket_list);
 3651         vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
 3652             vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
 3653 
 3654         struct if_clone_addreq req = {
 3655                 .create_f = vxlan_clone_create,
 3656                 .destroy_f = vxlan_clone_destroy,
 3657                 .flags = IFC_F_AUTOUNIT,
 3658         };
 3659         vxlan_cloner = ifc_attach_cloner(vxlan_name, &req);
 3660 }
 3661 
 3662 static void
 3663 vxlan_unload(void)
 3664 {
 3665 
 3666         EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 3667             vxlan_ifdetach_event_tag);
 3668         ifc_detach_cloner(vxlan_cloner);
 3669         mtx_destroy(&vxlan_list_mtx);
 3670         MPASS(LIST_EMPTY(&vxlan_socket_list));
 3671 }
 3672 
 3673 static int
 3674 vxlan_modevent(module_t mod, int type, void *unused)
 3675 {
 3676         int error;
 3677 
 3678         error = 0;
 3679 
 3680         switch (type) {
 3681         case MOD_LOAD:
 3682                 vxlan_load();
 3683                 break;
 3684         case MOD_UNLOAD:
 3685                 vxlan_unload();
 3686                 break;
 3687         default:
 3688                 error = ENOTSUP;
 3689                 break;
 3690         }
 3691 
 3692         return (error);
 3693 }
 3694 
 3695 static moduledata_t vxlan_mod = {
 3696         "if_vxlan",
 3697         vxlan_modevent,
 3698         0
 3699 };
 3700 
 3701 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 3702 MODULE_VERSION(if_vxlan, 1);

Cache object: 3819c1ccaa3edd665f9d909166c7050e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.