The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_vxlan.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice unmodified, this list of conditions, and the following
   10  *    disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 #include "opt_inet.h"
   28 #include "opt_inet6.h"
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/11.2/sys/net/if_vxlan.c 327927 2018-01-13 17:21:45Z bryanv $");
   32 
   33 #include <sys/param.h>
   34 #include <sys/eventhandler.h>
   35 #include <sys/kernel.h>
   36 #include <sys/lock.h>
   37 #include <sys/hash.h>
   38 #include <sys/malloc.h>
   39 #include <sys/mbuf.h>
   40 #include <sys/module.h>
   41 #include <sys/refcount.h>
   42 #include <sys/rmlock.h>
   43 #include <sys/priv.h>
   44 #include <sys/proc.h>
   45 #include <sys/queue.h>
   46 #include <sys/sbuf.h>
   47 #include <sys/socket.h>
   48 #include <sys/socketvar.h>
   49 #include <sys/sockio.h>
   50 #include <sys/sysctl.h>
   51 #include <sys/systm.h>
   52 
   53 #include <net/bpf.h>
   54 #include <net/ethernet.h>
   55 #include <net/if.h>
   56 #include <net/if_var.h>
   57 #include <net/if_clone.h>
   58 #include <net/if_dl.h>
   59 #include <net/if_media.h>
   60 #include <net/if_types.h>
   61 #include <net/if_vxlan.h>
   62 #include <net/netisr.h>
   63 
   64 #include <netinet/in.h>
   65 #include <netinet/in_systm.h>
   66 #include <netinet/in_var.h>
   67 #include <netinet/in_pcb.h>
   68 #include <netinet/ip.h>
   69 #include <netinet/ip6.h>
   70 #include <netinet/ip_var.h>
   71 #include <netinet/udp.h>
   72 #include <netinet/udp_var.h>
   73 
   74 #include <netinet6/ip6_var.h>
   75 #include <netinet6/scope6_var.h>
   76 
   77 struct vxlan_softc;
   78 LIST_HEAD(vxlan_softc_head, vxlan_softc);
   79 
   80 struct vxlan_socket_mc_info {
   81         union vxlan_sockaddr             vxlsomc_saddr;
   82         union vxlan_sockaddr             vxlsomc_gaddr;
   83         int                              vxlsomc_ifidx;
   84         int                              vxlsomc_users;
   85 };
   86 
   87 #define VXLAN_SO_MC_MAX_GROUPS          32
   88 
   89 #define VXLAN_SO_VNI_HASH_SHIFT         6
   90 #define VXLAN_SO_VNI_HASH_SIZE          (1 << VXLAN_SO_VNI_HASH_SHIFT)
   91 #define VXLAN_SO_VNI_HASH(_vni)         ((_vni) % VXLAN_SO_VNI_HASH_SIZE)
   92 
   93 struct vxlan_socket {
   94         struct socket                   *vxlso_sock;
   95         struct rmlock                    vxlso_lock;
   96         u_int                            vxlso_refcnt;
   97         union vxlan_sockaddr             vxlso_laddr;
   98         LIST_ENTRY(vxlan_socket)         vxlso_entry;
   99         struct vxlan_softc_head          vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
  100         struct vxlan_socket_mc_info      vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
  101 };
  102 
  103 #define VXLAN_SO_RLOCK(_vso, _p)        rm_rlock(&(_vso)->vxlso_lock, (_p))
  104 #define VXLAN_SO_RUNLOCK(_vso, _p)      rm_runlock(&(_vso)->vxlso_lock, (_p))
  105 #define VXLAN_SO_WLOCK(_vso)            rm_wlock(&(_vso)->vxlso_lock)
  106 #define VXLAN_SO_WUNLOCK(_vso)          rm_wunlock(&(_vso)->vxlso_lock)
  107 #define VXLAN_SO_LOCK_ASSERT(_vso) \
  108     rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
  109 #define VXLAN_SO_LOCK_WASSERT(_vso) \
  110     rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
  111 
  112 #define VXLAN_SO_ACQUIRE(_vso)          refcount_acquire(&(_vso)->vxlso_refcnt)
  113 #define VXLAN_SO_RELEASE(_vso)          refcount_release(&(_vso)->vxlso_refcnt)
  114 
  115 struct vxlan_ftable_entry {
  116         LIST_ENTRY(vxlan_ftable_entry)   vxlfe_hash;
  117         uint16_t                         vxlfe_flags;
  118         uint8_t                          vxlfe_mac[ETHER_ADDR_LEN];
  119         union vxlan_sockaddr             vxlfe_raddr;
  120         time_t                           vxlfe_expire;
  121 };
  122 
  123 #define VXLAN_FE_FLAG_DYNAMIC           0x01
  124 #define VXLAN_FE_FLAG_STATIC            0x02
  125 
  126 #define VXLAN_FE_IS_DYNAMIC(_fe) \
  127     ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
  128 
  129 #define VXLAN_SC_FTABLE_SHIFT           9
  130 #define VXLAN_SC_FTABLE_SIZE            (1 << VXLAN_SC_FTABLE_SHIFT)
  131 #define VXLAN_SC_FTABLE_MASK            (VXLAN_SC_FTABLE_SIZE - 1)
  132 #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \
  133     (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
  134 
  135 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
  136 
  137 struct vxlan_statistics {
  138         uint32_t        ftable_nospace;
  139         uint32_t        ftable_lock_upgrade_failed;
  140 };
  141 
  142 struct vxlan_softc {
  143         struct ifnet                    *vxl_ifp;
  144         struct vxlan_socket             *vxl_sock;
  145         uint32_t                         vxl_vni;
  146         union vxlan_sockaddr             vxl_src_addr;
  147         union vxlan_sockaddr             vxl_dst_addr;
  148         uint32_t                         vxl_flags;
  149 #define VXLAN_FLAG_INIT         0x0001
  150 #define VXLAN_FLAG_TEARDOWN     0x0002
  151 #define VXLAN_FLAG_LEARN        0x0004
  152 
  153         uint32_t                         vxl_port_hash_key;
  154         uint16_t                         vxl_min_port;
  155         uint16_t                         vxl_max_port;
  156         uint8_t                          vxl_ttl;
  157 
  158         /* Lookup table from MAC address to forwarding entry. */
  159         uint32_t                         vxl_ftable_cnt;
  160         uint32_t                         vxl_ftable_max;
  161         uint32_t                         vxl_ftable_timeout;
  162         uint32_t                         vxl_ftable_hash_key;
  163         struct vxlan_ftable_head        *vxl_ftable;
  164 
  165         /* Derived from vxl_dst_addr. */
  166         struct vxlan_ftable_entry        vxl_default_fe;
  167 
  168         struct ip_moptions              *vxl_im4o;
  169         struct ip6_moptions             *vxl_im6o;
  170 
  171         struct rmlock                    vxl_lock;
  172         volatile u_int                   vxl_refcnt;
  173 
  174         int                              vxl_unit;
  175         int                              vxl_vso_mc_index;
  176         struct vxlan_statistics          vxl_stats;
  177         struct sysctl_oid               *vxl_sysctl_node;
  178         struct sysctl_ctx_list           vxl_sysctl_ctx;
  179         struct callout                   vxl_callout;
  180         uint8_t                          vxl_hwaddr[ETHER_ADDR_LEN];
  181         int                              vxl_mc_ifindex;
  182         struct ifnet                    *vxl_mc_ifp;
  183         struct ifmedia                   vxl_media;
  184         char                             vxl_mc_ifname[IFNAMSIZ];
  185         LIST_ENTRY(vxlan_softc)          vxl_entry;
  186         LIST_ENTRY(vxlan_softc)          vxl_ifdetach_list;
  187 };
  188 
  189 #define VXLAN_RLOCK(_sc, _p)    rm_rlock(&(_sc)->vxl_lock, (_p))
  190 #define VXLAN_RUNLOCK(_sc, _p)  rm_runlock(&(_sc)->vxl_lock, (_p))
  191 #define VXLAN_WLOCK(_sc)        rm_wlock(&(_sc)->vxl_lock)
  192 #define VXLAN_WUNLOCK(_sc)      rm_wunlock(&(_sc)->vxl_lock)
  193 #define VXLAN_LOCK_WOWNED(_sc)  rm_wowned(&(_sc)->vxl_lock)
  194 #define VXLAN_LOCK_ASSERT(_sc)  rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
  195 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
  196 #define VXLAN_UNLOCK(_sc, _p) do {              \
  197     if (VXLAN_LOCK_WOWNED(_sc))                 \
  198         VXLAN_WUNLOCK(_sc);                     \
  199     else                                        \
  200         VXLAN_RUNLOCK(_sc, _p);                 \
  201 } while (0)
  202 
  203 #define VXLAN_ACQUIRE(_sc)      refcount_acquire(&(_sc)->vxl_refcnt)
  204 #define VXLAN_RELEASE(_sc)      refcount_release(&(_sc)->vxl_refcnt)
  205 
  206 #define satoconstsin(sa)        ((const struct sockaddr_in *)(sa))
  207 #define satoconstsin6(sa)       ((const struct sockaddr_in6 *)(sa))
  208 
  209 struct vxlanudphdr {
  210         struct udphdr           vxlh_udp;
  211         struct vxlan_header     vxlh_hdr;
  212 } __packed;
  213 
  214 static int      vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
  215 static void     vxlan_ftable_init(struct vxlan_softc *);
  216 static void     vxlan_ftable_fini(struct vxlan_softc *);
  217 static void     vxlan_ftable_flush(struct vxlan_softc *, int);
  218 static void     vxlan_ftable_expire(struct vxlan_softc *);
  219 static int      vxlan_ftable_update_locked(struct vxlan_softc *,
  220                     const union vxlan_sockaddr *, const uint8_t *,
  221                     struct rm_priotracker *);
  222 static int      vxlan_ftable_learn(struct vxlan_softc *,
  223                     const struct sockaddr *, const uint8_t *);
  224 static int      vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
  225 
  226 static struct vxlan_ftable_entry *
  227                 vxlan_ftable_entry_alloc(void);
  228 static void     vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
  229 static void     vxlan_ftable_entry_init(struct vxlan_softc *,
  230                     struct vxlan_ftable_entry *, const uint8_t *,
  231                     const struct sockaddr *, uint32_t);
  232 static void     vxlan_ftable_entry_destroy(struct vxlan_softc *,
  233                     struct vxlan_ftable_entry *);
  234 static int      vxlan_ftable_entry_insert(struct vxlan_softc *,
  235                     struct vxlan_ftable_entry *);
  236 static struct vxlan_ftable_entry *
  237                 vxlan_ftable_entry_lookup(struct vxlan_softc *,
  238                     const uint8_t *);
  239 static void     vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
  240                     struct sbuf *);
  241 
  242 static struct vxlan_socket *
  243                 vxlan_socket_alloc(const union vxlan_sockaddr *);
  244 static void     vxlan_socket_destroy(struct vxlan_socket *);
  245 static void     vxlan_socket_release(struct vxlan_socket *);
  246 static struct vxlan_socket *
  247                 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
  248 static void     vxlan_socket_insert(struct vxlan_socket *);
  249 static int      vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
  250 static int      vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
  251 static int      vxlan_socket_create(struct ifnet *, int,
  252                     const union vxlan_sockaddr *, struct vxlan_socket **);
  253 static void     vxlan_socket_ifdetach(struct vxlan_socket *,
  254                     struct ifnet *, struct vxlan_softc_head *);
  255 
  256 static struct vxlan_socket *
  257                 vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
  258 static int      vxlan_sockaddr_mc_info_match(
  259                     const struct vxlan_socket_mc_info *,
  260                     const union vxlan_sockaddr *,
  261                     const union vxlan_sockaddr *, int);
  262 static int      vxlan_socket_mc_join_group(struct vxlan_socket *,
  263                     const union vxlan_sockaddr *, const union vxlan_sockaddr *,
  264                     int *, union vxlan_sockaddr *);
  265 static int      vxlan_socket_mc_leave_group(struct vxlan_socket *,
  266                     const union vxlan_sockaddr *,
  267                     const union vxlan_sockaddr *, int);
  268 static int      vxlan_socket_mc_add_group(struct vxlan_socket *,
  269                     const union vxlan_sockaddr *, const union vxlan_sockaddr *,
  270                     int, int *);
  271 static void     vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
  272                     int);
  273 
  274 static struct vxlan_softc *
  275                 vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
  276                     uint32_t);
  277 static struct vxlan_softc *
  278                 vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
  279 static int      vxlan_socket_insert_softc(struct vxlan_socket *,
  280                     struct vxlan_softc *);
  281 static void     vxlan_socket_remove_softc(struct vxlan_socket *,
  282                     struct vxlan_softc *);
  283 
  284 static struct ifnet *
  285                 vxlan_multicast_if_ref(struct vxlan_softc *, int);
  286 static void     vxlan_free_multicast(struct vxlan_softc *);
  287 static int      vxlan_setup_multicast_interface(struct vxlan_softc *);
  288 
  289 static int      vxlan_setup_multicast(struct vxlan_softc *);
  290 static int      vxlan_setup_socket(struct vxlan_softc *);
  291 static void     vxlan_setup_interface(struct vxlan_softc *);
  292 static int      vxlan_valid_init_config(struct vxlan_softc *);
  293 static void     vxlan_init_wait(struct vxlan_softc *);
  294 static void     vxlan_init_complete(struct vxlan_softc *);
  295 static void     vxlan_init(void *);
  296 static void     vxlan_release(struct vxlan_softc *);
  297 static void     vxlan_teardown_wait(struct vxlan_softc *);
  298 static void     vxlan_teardown_complete(struct vxlan_softc *);
  299 static void     vxlan_teardown_locked(struct vxlan_softc *);
  300 static void     vxlan_teardown(struct vxlan_softc *);
  301 static void     vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
  302                     struct vxlan_softc_head *);
  303 static void     vxlan_timer(void *);
  304 
  305 static int      vxlan_ctrl_get_config(struct vxlan_softc *, void *);
  306 static int      vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
  307 static int      vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
  308 static int      vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
  309 static int      vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
  310 static int      vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
  311 static int      vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
  312 static int      vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
  313 static int      vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
  314 static int      vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
  315 static int      vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
  316 static int      vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
  317 static int      vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
  318 static int      vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
  319 static int      vxlan_ctrl_flush(struct vxlan_softc *, void *);
  320 static int      vxlan_ioctl_drvspec(struct vxlan_softc *,
  321                     struct ifdrv *, int);
  322 static int      vxlan_ioctl_ifflags(struct vxlan_softc *);
  323 static int      vxlan_ioctl(struct ifnet *, u_long, caddr_t);
  324 
  325 #if defined(INET) || defined(INET6)
  326 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
  327 static void     vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
  328                     int, uint16_t, uint16_t);
  329 #endif
  330 static int      vxlan_encap4(struct vxlan_softc *,
  331                     const union vxlan_sockaddr *, struct mbuf *);
  332 static int      vxlan_encap6(struct vxlan_softc *,
  333                     const union vxlan_sockaddr *, struct mbuf *);
  334 static int      vxlan_transmit(struct ifnet *, struct mbuf *);
  335 static void     vxlan_qflush(struct ifnet *);
  336 static void     vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
  337                     const struct sockaddr *, void *);
  338 static int      vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
  339                     const struct sockaddr *);
  340 
  341 static void     vxlan_set_default_config(struct vxlan_softc *);
  342 static int      vxlan_set_user_config(struct vxlan_softc *,
  343                      struct ifvxlanparam *);
  344 static int      vxlan_clone_create(struct if_clone *, int, caddr_t);
  345 static void     vxlan_clone_destroy(struct ifnet *);
  346 
  347 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
  348 static void     vxlan_fakeaddr(struct vxlan_softc *);
  349 static int      vxlan_media_change(struct ifnet *);
  350 static void     vxlan_media_status(struct ifnet *, struct ifmediareq *);
  351 
  352 static int      vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
  353                     const struct sockaddr *);
  354 static void     vxlan_sockaddr_copy(union vxlan_sockaddr *,
  355                     const struct sockaddr *);
  356 static int      vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
  357                     const struct sockaddr *);
  358 static void     vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
  359                     const struct sockaddr *);
  360 static int      vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
  361 static int      vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
  362 static int      vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
  363 static int      vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *);
  364 
  365 static int      vxlan_can_change_config(struct vxlan_softc *);
  366 static int      vxlan_check_vni(uint32_t);
  367 static int      vxlan_check_ttl(int);
  368 static int      vxlan_check_ftable_timeout(uint32_t);
  369 static int      vxlan_check_ftable_max(uint32_t);
  370 
  371 static void     vxlan_sysctl_setup(struct vxlan_softc *);
  372 static void     vxlan_sysctl_destroy(struct vxlan_softc *);
  373 static int      vxlan_tunable_int(struct vxlan_softc *, const char *, int);
  374 
  375 static void     vxlan_ifdetach_event(void *, struct ifnet *);
  376 static void     vxlan_load(void);
  377 static void     vxlan_unload(void);
  378 static int      vxlan_modevent(module_t, int, void *);
  379 
  380 static const char vxlan_name[] = "vxlan";
  381 static MALLOC_DEFINE(M_VXLAN, vxlan_name,
  382     "Virtual eXtensible LAN Interface");
  383 static struct if_clone *vxlan_cloner;
  384 
  385 static struct mtx vxlan_list_mtx;
  386 #define VXLAN_LIST_LOCK()       mtx_lock(&vxlan_list_mtx)
  387 #define VXLAN_LIST_UNLOCK()     mtx_unlock(&vxlan_list_mtx)
  388 
  389 static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
  390 
  391 static eventhandler_tag vxlan_ifdetach_event_tag;
  392 
  393 SYSCTL_DECL(_net_link);
  394 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW, 0,
  395     "Virtual eXtensible Local Area Network");
  396 
  397 static int vxlan_legacy_port = 0;
  398 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
  399 static int vxlan_reuse_port = 0;
  400 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
  401 
  402 /* Default maximum number of addresses in the forwarding table. */
  403 #ifndef VXLAN_FTABLE_MAX
  404 #define VXLAN_FTABLE_MAX        2000
  405 #endif
  406 
  407 /* Timeout (in seconds) of addresses learned in the forwarding table. */
  408 #ifndef VXLAN_FTABLE_TIMEOUT
  409 #define VXLAN_FTABLE_TIMEOUT    (20 * 60)
  410 #endif
  411 
  412 /*
  413  * Maximum timeout (in seconds) of addresses learned in the forwarding
  414  * table.
  415  */
  416 #ifndef VXLAN_FTABLE_MAX_TIMEOUT
  417 #define VXLAN_FTABLE_MAX_TIMEOUT        (60 * 60 * 24)
  418 #endif
  419 
  420 /* Number of seconds between pruning attempts of the forwarding table. */
  421 #ifndef VXLAN_FTABLE_PRUNE
  422 #define VXLAN_FTABLE_PRUNE      (5 * 60)
  423 #endif
  424 
  425 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
  426 
  427 struct vxlan_control {
  428         int     (*vxlc_func)(struct vxlan_softc *, void *);
  429         int     vxlc_argsize;
  430         int     vxlc_flags;
  431 #define VXLAN_CTRL_FLAG_COPYIN  0x01
  432 #define VXLAN_CTRL_FLAG_COPYOUT 0x02
  433 #define VXLAN_CTRL_FLAG_SUSER   0x04
  434 };
  435 
  436 static const struct vxlan_control vxlan_control_table[] = {
  437         [VXLAN_CMD_GET_CONFIG] =
  438             {   vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
  439                 VXLAN_CTRL_FLAG_COPYOUT
  440             },
  441 
  442         [VXLAN_CMD_SET_VNI] =
  443             {   vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
  444                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  445             },
  446 
  447         [VXLAN_CMD_SET_LOCAL_ADDR] =
  448             {   vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
  449                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  450             },
  451 
  452         [VXLAN_CMD_SET_REMOTE_ADDR] =
  453             {   vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
  454                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  455             },
  456 
  457         [VXLAN_CMD_SET_LOCAL_PORT] =
  458             {   vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
  459                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  460             },
  461 
  462         [VXLAN_CMD_SET_REMOTE_PORT] =
  463             {   vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
  464                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  465             },
  466 
  467         [VXLAN_CMD_SET_PORT_RANGE] =
  468             {   vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
  469                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  470             },
  471 
  472         [VXLAN_CMD_SET_FTABLE_TIMEOUT] =
  473             {   vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
  474                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  475             },
  476 
  477         [VXLAN_CMD_SET_FTABLE_MAX] =
  478             {   vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
  479                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  480             },
  481 
  482         [VXLAN_CMD_SET_MULTICAST_IF] =
  483             {   vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
  484                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  485             },
  486 
  487         [VXLAN_CMD_SET_TTL] =
  488             {   vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
  489                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  490             },
  491 
  492         [VXLAN_CMD_SET_LEARN] =
  493             {   vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
  494                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  495             },
  496 
  497         [VXLAN_CMD_FTABLE_ENTRY_ADD] =
  498             {   vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
  499                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  500             },
  501 
  502         [VXLAN_CMD_FTABLE_ENTRY_REM] =
  503             {   vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
  504                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  505             },
  506 
  507         [VXLAN_CMD_FLUSH] =
  508             {   vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
  509                 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
  510             },
  511 };
  512 
  513 static const int vxlan_control_table_size = nitems(vxlan_control_table);
  514 
  515 static int
  516 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
  517 {
  518         int i, d;
  519 
  520         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
  521                 d = ((int)a[i]) - ((int)b[i]);
  522 
  523         return (d);
  524 }
  525 
  526 static void
  527 vxlan_ftable_init(struct vxlan_softc *sc)
  528 {
  529         int i;
  530 
  531         sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
  532             VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
  533 
  534         for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
  535                 LIST_INIT(&sc->vxl_ftable[i]);
  536         sc->vxl_ftable_hash_key = arc4random();
  537 }
  538 
  539 static void
  540 vxlan_ftable_fini(struct vxlan_softc *sc)
  541 {
  542         int i;
  543 
  544         for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
  545                 KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
  546                     ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
  547         }
  548         MPASS(sc->vxl_ftable_cnt == 0);
  549 
  550         free(sc->vxl_ftable, M_VXLAN);
  551         sc->vxl_ftable = NULL;
  552 }
  553 
  554 static void
  555 vxlan_ftable_flush(struct vxlan_softc *sc, int all)
  556 {
  557         struct vxlan_ftable_entry *fe, *tfe;
  558         int i;
  559 
  560         for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
  561                 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
  562                         if (all || VXLAN_FE_IS_DYNAMIC(fe))
  563                                 vxlan_ftable_entry_destroy(sc, fe);
  564                 }
  565         }
  566 }
  567 
  568 static void
  569 vxlan_ftable_expire(struct vxlan_softc *sc)
  570 {
  571         struct vxlan_ftable_entry *fe, *tfe;
  572         int i;
  573 
  574         VXLAN_LOCK_WASSERT(sc);
  575 
  576         for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
  577                 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
  578                         if (VXLAN_FE_IS_DYNAMIC(fe) &&
  579                             time_uptime >= fe->vxlfe_expire)
  580                                 vxlan_ftable_entry_destroy(sc, fe);
  581                 }
  582         }
  583 }
  584 
  585 static int
  586 vxlan_ftable_update_locked(struct vxlan_softc *sc,
  587     const union vxlan_sockaddr *vxlsa, const uint8_t *mac,
  588     struct rm_priotracker *tracker)
  589 {
  590         struct vxlan_ftable_entry *fe;
  591         int error;
  592 
  593         VXLAN_LOCK_ASSERT(sc);
  594 
  595 again:
  596         /*
  597          * A forwarding entry for this MAC address might already exist. If
  598          * so, update it, otherwise create a new one. We may have to upgrade
  599          * the lock if we have to change or create an entry.
  600          */
  601         fe = vxlan_ftable_entry_lookup(sc, mac);
  602         if (fe != NULL) {
  603                 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
  604 
  605                 if (!VXLAN_FE_IS_DYNAMIC(fe) ||
  606                     vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa))
  607                         return (0);
  608                 if (!VXLAN_LOCK_WOWNED(sc)) {
  609                         VXLAN_RUNLOCK(sc, tracker);
  610                         VXLAN_WLOCK(sc);
  611                         sc->vxl_stats.ftable_lock_upgrade_failed++;
  612                         goto again;
  613                 }
  614                 vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa);
  615                 return (0);
  616         }
  617 
  618         if (!VXLAN_LOCK_WOWNED(sc)) {
  619                 VXLAN_RUNLOCK(sc, tracker);
  620                 VXLAN_WLOCK(sc);
  621                 sc->vxl_stats.ftable_lock_upgrade_failed++;
  622                 goto again;
  623         }
  624 
  625         if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
  626                 sc->vxl_stats.ftable_nospace++;
  627                 return (ENOSPC);
  628         }
  629 
  630         fe = vxlan_ftable_entry_alloc();
  631         if (fe == NULL)
  632                 return (ENOMEM);
  633 
  634         vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC);
  635 
  636         /* The prior lookup failed, so the insert should not. */
  637         error = vxlan_ftable_entry_insert(sc, fe);
  638         MPASS(error == 0);
  639 
  640         return (0);
  641 }
  642 
  643 static int
  644 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa,
  645     const uint8_t *mac)
  646 {
  647         struct rm_priotracker tracker;
  648         union vxlan_sockaddr vxlsa;
  649         int error;
  650 
  651         /*
  652          * The source port may be randomly selected by the remote host, so
  653          * use the port of the default destination address.
  654          */
  655         vxlan_sockaddr_copy(&vxlsa, sa);
  656         vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
  657 
  658         if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
  659                 error = vxlan_sockaddr_in6_embedscope(&vxlsa);
  660                 if (error)
  661                         return (error);
  662         }
  663 
  664         VXLAN_RLOCK(sc, &tracker);
  665         error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker);
  666         VXLAN_UNLOCK(sc, &tracker);
  667 
  668         return (error);
  669 }
  670 
  671 static int
  672 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
  673 {
  674         struct rm_priotracker tracker;
  675         struct sbuf sb;
  676         struct vxlan_softc *sc;
  677         struct vxlan_ftable_entry *fe;
  678         size_t size;
  679         int i, error;
  680 
  681         /*
  682          * This is mostly intended for debugging during development. It is
  683          * not practical to dump an entire large table this way.
  684          */
  685 
  686         sc = arg1;
  687         size = PAGE_SIZE;       /* Calculate later. */
  688 
  689         sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
  690         sbuf_putc(&sb, '\n');
  691 
  692         VXLAN_RLOCK(sc, &tracker);
  693         for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
  694                 LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
  695                         if (sbuf_error(&sb) != 0)
  696                                 break;
  697                         vxlan_ftable_entry_dump(fe, &sb);
  698                 }
  699         }
  700         VXLAN_RUNLOCK(sc, &tracker);
  701 
  702         if (sbuf_len(&sb) == 1)
  703                 sbuf_setpos(&sb, 0);
  704 
  705         sbuf_finish(&sb);
  706         error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
  707         sbuf_delete(&sb);
  708 
  709         return (error);
  710 }
  711 
  712 static struct vxlan_ftable_entry *
  713 vxlan_ftable_entry_alloc(void)
  714 {
  715         struct vxlan_ftable_entry *fe;
  716 
  717         fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
  718 
  719         return (fe);
  720 }
  721 
  722 static void
  723 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
  724 {
  725 
  726         free(fe, M_VXLAN);
  727 }
  728 
  729 static void
  730 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
  731     const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
  732 {
  733 
  734         fe->vxlfe_flags = flags;
  735         fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
  736         memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
  737         vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
  738 }
  739 
  740 static void
  741 vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
  742     struct vxlan_ftable_entry *fe)
  743 {
  744 
  745         sc->vxl_ftable_cnt--;
  746         LIST_REMOVE(fe, vxlfe_hash);
  747         vxlan_ftable_entry_free(fe);
  748 }
  749 
  750 static int
  751 vxlan_ftable_entry_insert(struct vxlan_softc *sc,
  752     struct vxlan_ftable_entry *fe)
  753 {
  754         struct vxlan_ftable_entry *lfe;
  755         uint32_t hash;
  756         int dir;
  757 
  758         VXLAN_LOCK_WASSERT(sc);
  759         hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
  760 
  761         lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
  762         if (lfe == NULL) {
  763                 LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
  764                 goto out;
  765         }
  766 
  767         do {
  768                 dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
  769                 if (dir == 0)
  770                         return (EEXIST);
  771                 if (dir > 0) {
  772                         LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
  773                         goto out;
  774                 } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
  775                         LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
  776                         goto out;
  777                 } else
  778                         lfe = LIST_NEXT(lfe, vxlfe_hash);
  779         } while (lfe != NULL);
  780 
  781 out:
  782         sc->vxl_ftable_cnt++;
  783 
  784         return (0);
  785 }
  786 
  787 static struct vxlan_ftable_entry *
  788 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
  789 {
  790         struct vxlan_ftable_entry *fe;
  791         uint32_t hash;
  792         int dir;
  793 
  794         VXLAN_LOCK_ASSERT(sc);
  795         hash = VXLAN_SC_FTABLE_HASH(sc, mac);
  796 
  797         LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
  798                 dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac);
  799                 if (dir == 0)
  800                         return (fe);
  801                 if (dir > 0)
  802                         break;
  803         }
  804 
  805         return (NULL);
  806 }
  807 
  808 static void
  809 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
  810 {
  811         char buf[64];
  812         const union vxlan_sockaddr *sa;
  813         const void *addr;
  814         int i, len, af, width;
  815 
  816         sa = &fe->vxlfe_raddr;
  817         af = sa->sa.sa_family;
  818         len = sbuf_len(sb);
  819 
  820         sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
  821             fe->vxlfe_flags);
  822 
  823         for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
  824                 sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
  825         sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
  826 
  827         if (af == AF_INET) {
  828                 addr = &sa->in4.sin_addr;
  829                 width = INET_ADDRSTRLEN - 1;
  830         } else {
  831                 addr = &sa->in6.sin6_addr;
  832                 width = INET6_ADDRSTRLEN - 1;
  833         }
  834         inet_ntop(af, addr, buf, sizeof(buf));
  835         sbuf_printf(sb, "%*s ", width, buf);
  836 
  837         sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
  838 
  839         sbuf_putc(sb, '\n');
  840 
  841         /* Truncate a partial line. */
  842         if (sbuf_error(sb) != 0)
  843                 sbuf_setpos(sb, len);
  844 }
  845 
  846 static struct vxlan_socket *
  847 vxlan_socket_alloc(const union vxlan_sockaddr *sa)
  848 {
  849         struct vxlan_socket *vso;
  850         int i;
  851 
  852         vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
  853         rm_init(&vso->vxlso_lock, "vxlansorm");
  854         refcount_init(&vso->vxlso_refcnt, 0);
  855         for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
  856                 LIST_INIT(&vso->vxlso_vni_hash[i]);
  857         vso->vxlso_laddr = *sa;
  858 
  859         return (vso);
  860 }
  861 
  862 static void
  863 vxlan_socket_destroy(struct vxlan_socket *vso)
  864 {
  865         struct socket *so;
  866         struct vxlan_socket_mc_info *mc;
  867         int i;
  868 
  869         for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
  870                 mc = &vso->vxlso_mc[i];
  871                 KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
  872                     ("%s: socket %p mc[%d] still has address",
  873                      __func__, vso, i));
  874         }
  875 
  876         for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
  877                 KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
  878                     ("%s: socket %p vni_hash[%d] not empty",
  879                      __func__, vso, i));
  880         }
  881 
  882         so = vso->vxlso_sock;
  883         if (so != NULL) {
  884                 vso->vxlso_sock = NULL;
  885                 soclose(so);
  886         }
  887 
  888         rm_destroy(&vso->vxlso_lock);
  889         free(vso, M_VXLAN);
  890 }
  891 
  892 static void
  893 vxlan_socket_release(struct vxlan_socket *vso)
  894 {
  895         int destroy;
  896 
  897         VXLAN_LIST_LOCK();
  898         destroy = VXLAN_SO_RELEASE(vso);
  899         if (destroy != 0)
  900                 LIST_REMOVE(vso, vxlso_entry);
  901         VXLAN_LIST_UNLOCK();
  902 
  903         if (destroy != 0)
  904                 vxlan_socket_destroy(vso);
  905 }
  906 
  907 static struct vxlan_socket *
  908 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
  909 {
  910         struct vxlan_socket *vso;
  911 
  912         VXLAN_LIST_LOCK();
  913         LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
  914                 if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
  915                         VXLAN_SO_ACQUIRE(vso);
  916                         break;
  917                 }
  918         }
  919         VXLAN_LIST_UNLOCK();
  920 
  921         return (vso);
  922 }
  923 
  924 static void
  925 vxlan_socket_insert(struct vxlan_socket *vso)
  926 {
  927 
  928         VXLAN_LIST_LOCK();
  929         VXLAN_SO_ACQUIRE(vso);
  930         LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
  931         VXLAN_LIST_UNLOCK();
  932 }
  933 
  934 static int
  935 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
  936 {
  937         struct thread *td;
  938         int error;
  939 
  940         td = curthread;
  941 
  942         error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
  943             SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
  944         if (error) {
  945                 if_printf(ifp, "cannot create socket: %d\n", error);
  946                 return (error);
  947         }
  948 
  949         error = udp_set_kernel_tunneling(vso->vxlso_sock,
  950             vxlan_rcv_udp_packet, NULL, vso);
  951         if (error) {
  952                 if_printf(ifp, "cannot set tunneling function: %d\n", error);
  953                 return (error);
  954         }
  955 
  956         if (vxlan_reuse_port != 0) {
  957                 struct sockopt sopt;
  958                 int val = 1;
  959 
  960                 bzero(&sopt, sizeof(sopt));
  961                 sopt.sopt_dir = SOPT_SET;
  962                 sopt.sopt_level = IPPROTO_IP;
  963                 sopt.sopt_name = SO_REUSEPORT;
  964                 sopt.sopt_val = &val;
  965                 sopt.sopt_valsize = sizeof(val);
  966                 error = sosetopt(vso->vxlso_sock, &sopt);
  967                 if (error) {
  968                         if_printf(ifp,
  969                             "cannot set REUSEADDR socket opt: %d\n", error);
  970                         return (error);
  971                 }
  972         }
  973 
  974         return (0);
  975 }
  976 
  977 static int
  978 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
  979 {
  980         union vxlan_sockaddr laddr;
  981         struct thread *td;
  982         int error;
  983 
  984         td = curthread;
  985         laddr = vso->vxlso_laddr;
  986 
  987         error = sobind(vso->vxlso_sock, &laddr.sa, td);
  988         if (error) {
  989                 if (error != EADDRINUSE)
  990                         if_printf(ifp, "cannot bind socket: %d\n", error);
  991                 return (error);
  992         }
  993 
  994         return (0);
  995 }
  996 
  997 static int
  998 vxlan_socket_create(struct ifnet *ifp, int multicast,
  999     const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
 1000 {
 1001         union vxlan_sockaddr laddr;
 1002         struct vxlan_socket *vso;
 1003         int error;
 1004 
 1005         laddr = *saddr;
 1006 
 1007         /*
 1008          * If this socket will be multicast, then only the local port
 1009          * must be specified when binding.
 1010          */
 1011         if (multicast != 0) {
 1012                 if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
 1013                         laddr.in4.sin_addr.s_addr = INADDR_ANY;
 1014 #ifdef INET6
 1015                 else
 1016                         laddr.in6.sin6_addr = in6addr_any;
 1017 #endif
 1018         }
 1019 
 1020         vso = vxlan_socket_alloc(&laddr);
 1021         if (vso == NULL)
 1022                 return (ENOMEM);
 1023 
 1024         error = vxlan_socket_init(vso, ifp);
 1025         if (error)
 1026                 goto fail;
 1027 
 1028         error = vxlan_socket_bind(vso, ifp);
 1029         if (error)
 1030                 goto fail;
 1031 
 1032         /*
 1033          * There is a small window between the bind completing and
 1034          * inserting the socket, so that a concurrent create may fail.
 1035          * Let's not worry about that for now.
 1036          */
 1037         vxlan_socket_insert(vso);
 1038         *vsop = vso;
 1039 
 1040         return (0);
 1041 
 1042 fail:
 1043         vxlan_socket_destroy(vso);
 1044 
 1045         return (error);
 1046 }
 1047 
 1048 static void
 1049 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
 1050     struct vxlan_softc_head *list)
 1051 {
 1052         struct rm_priotracker tracker;
 1053         struct vxlan_softc *sc;
 1054         int i;
 1055 
 1056         VXLAN_SO_RLOCK(vso, &tracker);
 1057         for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
 1058                 LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
 1059                         vxlan_ifdetach(sc, ifp, list);
 1060         }
 1061         VXLAN_SO_RUNLOCK(vso, &tracker);
 1062 }
 1063 
 1064 static struct vxlan_socket *
 1065 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
 1066 {
 1067         union vxlan_sockaddr laddr;
 1068         struct vxlan_socket *vso;
 1069 
 1070         laddr = *vxlsa;
 1071 
 1072         if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
 1073                 laddr.in4.sin_addr.s_addr = INADDR_ANY;
 1074 #ifdef INET6
 1075         else
 1076                 laddr.in6.sin6_addr = in6addr_any;
 1077 #endif
 1078 
 1079         vso = vxlan_socket_lookup(&laddr);
 1080 
 1081         return (vso);
 1082 }
 1083 
 1084 static int
 1085 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
 1086     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
 1087     int ifidx)
 1088 {
 1089 
 1090         if (!vxlan_sockaddr_in_any(local) &&
 1091             !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
 1092                 return (0);
 1093         if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
 1094                 return (0);
 1095         if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
 1096                 return (0);
 1097 
 1098         return (1);
 1099 }
 1100 
 1101 static int
 1102 vxlan_socket_mc_join_group(struct vxlan_socket *vso,
 1103     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
 1104     int *ifidx, union vxlan_sockaddr *source)
 1105 {
 1106         struct sockopt sopt;
 1107         int error;
 1108 
 1109         *source = *local;
 1110 
 1111         if (VXLAN_SOCKADDR_IS_IPV4(group)) {
 1112                 struct ip_mreq mreq;
 1113 
 1114                 mreq.imr_multiaddr = group->in4.sin_addr;
 1115                 mreq.imr_interface = local->in4.sin_addr;
 1116 
 1117                 bzero(&sopt, sizeof(sopt));
 1118                 sopt.sopt_dir = SOPT_SET;
 1119                 sopt.sopt_level = IPPROTO_IP;
 1120                 sopt.sopt_name = IP_ADD_MEMBERSHIP;
 1121                 sopt.sopt_val = &mreq;
 1122                 sopt.sopt_valsize = sizeof(mreq);
 1123                 error = sosetopt(vso->vxlso_sock, &sopt);
 1124                 if (error)
 1125                         return (error);
 1126 
 1127                 /*
 1128                  * BMV: Ideally, there would be a formal way for us to get
 1129                  * the local interface that was selected based on the
 1130                  * imr_interface address. We could then update *ifidx so
 1131                  * vxlan_sockaddr_mc_info_match() would return a match for
 1132                  * later creates that explicitly set the multicast interface.
 1133                  *
 1134                  * If we really need to, we can of course look in the INP's
 1135                  * membership list:
 1136                  *     sotoinpcb(vso->vxlso_sock)->inp_moptions->
 1137                  *         imo_membership[]->inm_ifp
 1138                  * similarly to imo_match_group().
 1139                  */
 1140                 source->in4.sin_addr = local->in4.sin_addr;
 1141 
 1142         } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
 1143                 struct ipv6_mreq mreq;
 1144 
 1145                 mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
 1146                 mreq.ipv6mr_interface = *ifidx;
 1147 
 1148                 bzero(&sopt, sizeof(sopt));
 1149                 sopt.sopt_dir = SOPT_SET;
 1150                 sopt.sopt_level = IPPROTO_IPV6;
 1151                 sopt.sopt_name = IPV6_JOIN_GROUP;
 1152                 sopt.sopt_val = &mreq;
 1153                 sopt.sopt_valsize = sizeof(mreq);
 1154                 error = sosetopt(vso->vxlso_sock, &sopt);
 1155                 if (error)
 1156                         return (error);
 1157 
 1158                 /*
 1159                  * BMV: As with IPv4, we would really like to know what
 1160                  * interface in6p_lookup_mcast_ifp() selected.
 1161                  */
 1162         } else
 1163                 error = EAFNOSUPPORT;
 1164 
 1165         return (error);
 1166 }
 1167 
 1168 static int
 1169 vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
 1170     const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
 1171     int ifidx)
 1172 {
 1173         struct sockopt sopt;
 1174         int error;
 1175 
 1176         bzero(&sopt, sizeof(sopt));
 1177         sopt.sopt_dir = SOPT_SET;
 1178 
 1179         if (VXLAN_SOCKADDR_IS_IPV4(group)) {
 1180                 struct ip_mreq mreq;
 1181 
 1182                 mreq.imr_multiaddr = group->in4.sin_addr;
 1183                 mreq.imr_interface = source->in4.sin_addr;
 1184 
 1185                 sopt.sopt_level = IPPROTO_IP;
 1186                 sopt.sopt_name = IP_DROP_MEMBERSHIP;
 1187                 sopt.sopt_val = &mreq;
 1188                 sopt.sopt_valsize = sizeof(mreq);
 1189                 error = sosetopt(vso->vxlso_sock, &sopt);
 1190 
 1191         } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
 1192                 struct ipv6_mreq mreq;
 1193 
 1194                 mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
 1195                 mreq.ipv6mr_interface = ifidx;
 1196 
 1197                 sopt.sopt_level = IPPROTO_IPV6;
 1198                 sopt.sopt_name = IPV6_LEAVE_GROUP;
 1199                 sopt.sopt_val = &mreq;
 1200                 sopt.sopt_valsize = sizeof(mreq);
 1201                 error = sosetopt(vso->vxlso_sock, &sopt);
 1202 
 1203         } else
 1204                 error = EAFNOSUPPORT;
 1205 
 1206         return (error);
 1207 }
 1208 
 1209 static int
 1210 vxlan_socket_mc_add_group(struct vxlan_socket *vso,
 1211     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
 1212     int ifidx, int *idx)
 1213 {
 1214         union vxlan_sockaddr source;
 1215         struct vxlan_socket_mc_info *mc;
 1216         int i, empty, error;
 1217 
 1218         /*
 1219          * Within a socket, the same multicast group may be used by multiple
 1220          * interfaces, each with a different network identifier. But a socket
 1221          * may only join a multicast group once, so keep track of the users
 1222          * here.
 1223          */
 1224 
 1225         VXLAN_SO_WLOCK(vso);
 1226         for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
 1227                 mc = &vso->vxlso_mc[i];
 1228 
 1229                 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
 1230                         empty++;
 1231                         continue;
 1232                 }
 1233 
 1234                 if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
 1235                         goto out;
 1236         }
 1237         VXLAN_SO_WUNLOCK(vso);
 1238 
 1239         if (empty == 0)
 1240                 return (ENOSPC);
 1241 
 1242         error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
 1243         if (error)
 1244                 return (error);
 1245 
 1246         VXLAN_SO_WLOCK(vso);
 1247         for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
 1248                 mc = &vso->vxlso_mc[i];
 1249 
 1250                 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
 1251                         vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
 1252                         vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
 1253                         mc->vxlsomc_ifidx = ifidx;
 1254                         goto out;
 1255                 }
 1256         }
 1257         VXLAN_SO_WUNLOCK(vso);
 1258 
 1259         error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
 1260         MPASS(error == 0);
 1261 
 1262         return (ENOSPC);
 1263 
 1264 out:
 1265         mc->vxlsomc_users++;
 1266         VXLAN_SO_WUNLOCK(vso);
 1267 
 1268         *idx = i;
 1269 
 1270         return (0);
 1271 }
 1272 
 1273 static void
 1274 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
 1275 {
 1276         union vxlan_sockaddr group, source;
 1277         struct vxlan_socket_mc_info *mc;
 1278         int ifidx, leave;
 1279 
 1280         KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
 1281             ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
 1282 
 1283         leave = 0;
 1284         mc = &vso->vxlso_mc[idx];
 1285 
 1286         VXLAN_SO_WLOCK(vso);
 1287         mc->vxlsomc_users--;
 1288         if (mc->vxlsomc_users == 0) {
 1289                 group = mc->vxlsomc_gaddr;
 1290                 source = mc->vxlsomc_saddr;
 1291                 ifidx = mc->vxlsomc_ifidx;
 1292                 bzero(mc, sizeof(*mc));
 1293                 leave = 1;
 1294         }
 1295         VXLAN_SO_WUNLOCK(vso);
 1296 
 1297         if (leave != 0) {
 1298                 /*
 1299                  * Our socket's membership in this group may have already
 1300                  * been removed if we joined through an interface that's
 1301                  * been detached.
 1302                  */
 1303                 vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
 1304         }
 1305 }
 1306 
 1307 static struct vxlan_softc *
 1308 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
 1309 {
 1310         struct vxlan_softc *sc;
 1311         uint32_t hash;
 1312 
 1313         VXLAN_SO_LOCK_ASSERT(vso);
 1314         hash = VXLAN_SO_VNI_HASH(vni);
 1315 
 1316         LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
 1317                 if (sc->vxl_vni == vni) {
 1318                         VXLAN_ACQUIRE(sc);
 1319                         break;
 1320                 }
 1321         }
 1322 
 1323         return (sc);
 1324 }
 1325 
 1326 static struct vxlan_softc *
 1327 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
 1328 {
 1329         struct rm_priotracker tracker;
 1330         struct vxlan_softc *sc;
 1331 
 1332         VXLAN_SO_RLOCK(vso, &tracker);
 1333         sc = vxlan_socket_lookup_softc_locked(vso, vni);
 1334         VXLAN_SO_RUNLOCK(vso, &tracker);
 1335 
 1336         return (sc);
 1337 }
 1338 
 1339 static int
 1340 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
 1341 {
 1342         struct vxlan_softc *tsc;
 1343         uint32_t vni, hash;
 1344 
 1345         vni = sc->vxl_vni;
 1346         hash = VXLAN_SO_VNI_HASH(vni);
 1347 
 1348         VXLAN_SO_WLOCK(vso);
 1349         tsc = vxlan_socket_lookup_softc_locked(vso, vni);
 1350         if (tsc != NULL) {
 1351                 VXLAN_SO_WUNLOCK(vso);
 1352                 vxlan_release(tsc);
 1353                 return (EEXIST);
 1354         }
 1355 
 1356         VXLAN_ACQUIRE(sc);
 1357         LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
 1358         VXLAN_SO_WUNLOCK(vso);
 1359 
 1360         return (0);
 1361 }
 1362 
 1363 static void
 1364 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
 1365 {
 1366 
 1367         VXLAN_SO_WLOCK(vso);
 1368         LIST_REMOVE(sc, vxl_entry);
 1369         VXLAN_SO_WUNLOCK(vso);
 1370 
 1371         vxlan_release(sc);
 1372 }
 1373 
 1374 static struct ifnet *
 1375 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
 1376 {
 1377         struct ifnet *ifp;
 1378 
 1379         VXLAN_LOCK_ASSERT(sc);
 1380 
 1381         if (ipv4 && sc->vxl_im4o != NULL)
 1382                 ifp = sc->vxl_im4o->imo_multicast_ifp;
 1383         else if (!ipv4 && sc->vxl_im6o != NULL)
 1384                 ifp = sc->vxl_im6o->im6o_multicast_ifp;
 1385         else
 1386                 ifp = NULL;
 1387 
 1388         if (ifp != NULL)
 1389                 if_ref(ifp);
 1390 
 1391         return (ifp);
 1392 }
 1393 
 1394 static void
 1395 vxlan_free_multicast(struct vxlan_softc *sc)
 1396 {
 1397 
 1398         if (sc->vxl_mc_ifp != NULL) {
 1399                 if_rele(sc->vxl_mc_ifp);
 1400                 sc->vxl_mc_ifp = NULL;
 1401                 sc->vxl_mc_ifindex = 0;
 1402         }
 1403 
 1404         if (sc->vxl_im4o != NULL) {
 1405                 free(sc->vxl_im4o, M_VXLAN);
 1406                 sc->vxl_im4o = NULL;
 1407         }
 1408 
 1409         if (sc->vxl_im6o != NULL) {
 1410                 free(sc->vxl_im6o, M_VXLAN);
 1411                 sc->vxl_im6o = NULL;
 1412         }
 1413 }
 1414 
 1415 static int
 1416 vxlan_setup_multicast_interface(struct vxlan_softc *sc)
 1417 {
 1418         struct ifnet *ifp;
 1419 
 1420         ifp = ifunit_ref(sc->vxl_mc_ifname);
 1421         if (ifp == NULL) {
 1422                 if_printf(sc->vxl_ifp, "multicast interface %s does "
 1423                     "not exist\n", sc->vxl_mc_ifname);
 1424                 return (ENOENT);
 1425         }
 1426 
 1427         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 1428                 if_printf(sc->vxl_ifp, "interface %s does not support "
 1429                      "multicast\n", sc->vxl_mc_ifname);
 1430                 if_rele(ifp);
 1431                 return (ENOTSUP);
 1432         }
 1433 
 1434         sc->vxl_mc_ifp = ifp;
 1435         sc->vxl_mc_ifindex = ifp->if_index;
 1436 
 1437         return (0);
 1438 }
 1439 
 1440 static int
 1441 vxlan_setup_multicast(struct vxlan_softc *sc)
 1442 {
 1443         const union vxlan_sockaddr *group;
 1444         int error;
 1445 
 1446         group = &sc->vxl_dst_addr;
 1447         error = 0;
 1448 
 1449         if (sc->vxl_mc_ifname[0] != '\0') {
 1450                 error = vxlan_setup_multicast_interface(sc);
 1451                 if (error)
 1452                         return (error);
 1453         }
 1454 
 1455         /*
 1456          * Initialize an multicast options structure that is sufficiently
 1457          * populated for use in the respective IP output routine. This
 1458          * structure is typically stored in the socket, but our sockets
 1459          * may be shared among multiple interfaces.
 1460          */
 1461         if (VXLAN_SOCKADDR_IS_IPV4(group)) {
 1462                 sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
 1463                     M_ZERO | M_WAITOK);
 1464                 sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
 1465                 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
 1466                 sc->vxl_im4o->imo_multicast_vif = -1;
 1467         } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
 1468                 sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
 1469                     M_ZERO | M_WAITOK);
 1470                 sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
 1471                 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
 1472         }
 1473 
 1474         return (error);
 1475 }
 1476 
 1477 static int
 1478 vxlan_setup_socket(struct vxlan_softc *sc)
 1479 {
 1480         struct vxlan_socket *vso;
 1481         struct ifnet *ifp;
 1482         union vxlan_sockaddr *saddr, *daddr;
 1483         int multicast, error;
 1484 
 1485         vso = NULL;
 1486         ifp = sc->vxl_ifp;
 1487         saddr = &sc->vxl_src_addr;
 1488         daddr = &sc->vxl_dst_addr;
 1489 
 1490         multicast = vxlan_sockaddr_in_multicast(daddr);
 1491         MPASS(multicast != -1);
 1492         sc->vxl_vso_mc_index = -1;
 1493 
 1494         /*
 1495          * Try to create the socket. If that fails, attempt to use an
 1496          * existing socket.
 1497          */
 1498         error = vxlan_socket_create(ifp, multicast, saddr, &vso);
 1499         if (error) {
 1500                 if (multicast != 0)
 1501                         vso = vxlan_socket_mc_lookup(saddr);
 1502                 else
 1503                         vso = vxlan_socket_lookup(saddr);
 1504 
 1505                 if (vso == NULL) {
 1506                         if_printf(ifp, "cannot create socket (error: %d), "
 1507                             "and no existing socket found\n", error);
 1508                         goto out;
 1509                 }
 1510         }
 1511 
 1512         if (multicast != 0) {
 1513                 error = vxlan_setup_multicast(sc);
 1514                 if (error)
 1515                         goto out;
 1516 
 1517                 error = vxlan_socket_mc_add_group(vso, daddr, saddr,
 1518                     sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
 1519                 if (error)
 1520                         goto out;
 1521         }
 1522 
 1523         sc->vxl_sock = vso;
 1524         error = vxlan_socket_insert_softc(vso, sc);
 1525         if (error) {
 1526                 sc->vxl_sock = NULL;
 1527                 if_printf(ifp, "network identifier %d already exists in "
 1528                     "this socket\n", sc->vxl_vni);
 1529                 goto out;
 1530         }
 1531 
 1532         return (0);
 1533 
 1534 out:
 1535         if (vso != NULL) {
 1536                 if (sc->vxl_vso_mc_index != -1) {
 1537                         vxlan_socket_mc_release_group_by_idx(vso,
 1538                             sc->vxl_vso_mc_index);
 1539                         sc->vxl_vso_mc_index = -1;
 1540                 }
 1541                 if (multicast != 0)
 1542                         vxlan_free_multicast(sc);
 1543                 vxlan_socket_release(vso);
 1544         }
 1545 
 1546         return (error);
 1547 }
 1548 
 1549 static void
 1550 vxlan_setup_interface(struct vxlan_softc *sc)
 1551 {
 1552         struct ifnet *ifp;
 1553 
 1554         ifp = sc->vxl_ifp;
 1555         ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
 1556 
 1557         if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
 1558                 ifp->if_hdrlen += sizeof(struct ip);
 1559         else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
 1560                 ifp->if_hdrlen += sizeof(struct ip6_hdr);
 1561 }
 1562 
 1563 static int
 1564 vxlan_valid_init_config(struct vxlan_softc *sc)
 1565 {
 1566         const char *reason;
 1567 
 1568         if (vxlan_check_vni(sc->vxl_vni) != 0) {
 1569                 reason = "invalid virtual network identifier specified";
 1570                 goto fail;
 1571         }
 1572 
 1573         if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
 1574                 reason = "source address type is not supported";
 1575                 goto fail;
 1576         }
 1577 
 1578         if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
 1579                 reason = "destination address type is not supported";
 1580                 goto fail;
 1581         }
 1582 
 1583         if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
 1584                 reason = "no valid destination address specified";
 1585                 goto fail;
 1586         }
 1587 
 1588         if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
 1589             sc->vxl_mc_ifname[0] != '\0') {
 1590                 reason = "can only specify interface with a group address";
 1591                 goto fail;
 1592         }
 1593 
 1594         if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
 1595                 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
 1596                     VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
 1597                         reason = "source and destination address must both "
 1598                             "be either IPv4 or IPv6";
 1599                         goto fail;
 1600                 }
 1601         }
 1602 
 1603         if (sc->vxl_src_addr.in4.sin_port == 0) {
 1604                 reason = "local port not specified";
 1605                 goto fail;
 1606         }
 1607 
 1608         if (sc->vxl_dst_addr.in4.sin_port == 0) {
 1609                 reason = "remote port not specified";
 1610                 goto fail;
 1611         }
 1612 
 1613         return (0);
 1614 
 1615 fail:
 1616         if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
 1617         return (EINVAL);
 1618 }
 1619 
 1620 static void
 1621 vxlan_init_wait(struct vxlan_softc *sc)
 1622 {
 1623 
 1624         VXLAN_LOCK_WASSERT(sc);
 1625         while (sc->vxl_flags & VXLAN_FLAG_INIT)
 1626                 rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
 1627 }
 1628 
 1629 static void
 1630 vxlan_init_complete(struct vxlan_softc *sc)
 1631 {
 1632 
 1633         VXLAN_WLOCK(sc);
 1634         sc->vxl_flags &= ~VXLAN_FLAG_INIT;
 1635         wakeup(sc);
 1636         VXLAN_WUNLOCK(sc);
 1637 }
 1638 
 1639 static void
 1640 vxlan_init(void *xsc)
 1641 {
 1642         static const uint8_t empty_mac[ETHER_ADDR_LEN];
 1643         struct vxlan_softc *sc;
 1644         struct ifnet *ifp;
 1645 
 1646         sc = xsc;
 1647         ifp = sc->vxl_ifp;
 1648 
 1649         VXLAN_WLOCK(sc);
 1650         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 1651                 VXLAN_WUNLOCK(sc);
 1652                 return;
 1653         }
 1654         sc->vxl_flags |= VXLAN_FLAG_INIT;
 1655         VXLAN_WUNLOCK(sc);
 1656 
 1657         if (vxlan_valid_init_config(sc) != 0)
 1658                 goto out;
 1659 
 1660         vxlan_setup_interface(sc);
 1661 
 1662         if (vxlan_setup_socket(sc) != 0)
 1663                 goto out;
 1664 
 1665         /* Initialize the default forwarding entry. */
 1666         vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
 1667             &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
 1668 
 1669         VXLAN_WLOCK(sc);
 1670         ifp->if_drv_flags |= IFF_DRV_RUNNING;
 1671         callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
 1672             vxlan_timer, sc);
 1673         VXLAN_WUNLOCK(sc);
 1674 
 1675         if_link_state_change(ifp, LINK_STATE_UP);
 1676 out:
 1677         vxlan_init_complete(sc);
 1678 }
 1679 
 1680 static void
 1681 vxlan_release(struct vxlan_softc *sc)
 1682 {
 1683 
 1684         /*
 1685          * The softc may be destroyed as soon as we release our reference,
 1686          * so we cannot serialize the wakeup with the softc lock. We use a
 1687          * timeout in our sleeps so a missed wakeup is unfortunate but not
 1688          * fatal.
 1689          */
 1690         if (VXLAN_RELEASE(sc) != 0)
 1691                 wakeup(sc);
 1692 }
 1693 
 1694 static void
 1695 vxlan_teardown_wait(struct vxlan_softc *sc)
 1696 {
 1697 
 1698         VXLAN_LOCK_WASSERT(sc);
 1699         while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
 1700                 rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
 1701 }
 1702 
 1703 static void
 1704 vxlan_teardown_complete(struct vxlan_softc *sc)
 1705 {
 1706 
 1707         VXLAN_WLOCK(sc);
 1708         sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
 1709         wakeup(sc);
 1710         VXLAN_WUNLOCK(sc);
 1711 }
 1712 
 1713 static void
 1714 vxlan_teardown_locked(struct vxlan_softc *sc)
 1715 {
 1716         struct ifnet *ifp;
 1717         struct vxlan_socket *vso;
 1718 
 1719         ifp = sc->vxl_ifp;
 1720 
 1721         VXLAN_LOCK_WASSERT(sc);
 1722         MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
 1723 
 1724         ifp->if_flags &= ~IFF_UP;
 1725         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 1726         callout_stop(&sc->vxl_callout);
 1727         vso = sc->vxl_sock;
 1728         sc->vxl_sock = NULL;
 1729 
 1730         VXLAN_WUNLOCK(sc);
 1731         if_link_state_change(ifp, LINK_STATE_DOWN);
 1732 
 1733         if (vso != NULL) {
 1734                 vxlan_socket_remove_softc(vso, sc);
 1735 
 1736                 if (sc->vxl_vso_mc_index != -1) {
 1737                         vxlan_socket_mc_release_group_by_idx(vso,
 1738                             sc->vxl_vso_mc_index);
 1739                         sc->vxl_vso_mc_index = -1;
 1740                 }
 1741         }
 1742 
 1743         VXLAN_WLOCK(sc);
 1744         while (sc->vxl_refcnt != 0)
 1745                 rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
 1746         VXLAN_WUNLOCK(sc);
 1747 
 1748         callout_drain(&sc->vxl_callout);
 1749 
 1750         vxlan_free_multicast(sc);
 1751         if (vso != NULL)
 1752                 vxlan_socket_release(vso);
 1753 
 1754         vxlan_teardown_complete(sc);
 1755 }
 1756 
 1757 static void
 1758 vxlan_teardown(struct vxlan_softc *sc)
 1759 {
 1760 
 1761         VXLAN_WLOCK(sc);
 1762         if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
 1763                 vxlan_teardown_wait(sc);
 1764                 VXLAN_WUNLOCK(sc);
 1765                 return;
 1766         }
 1767 
 1768         sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
 1769         vxlan_teardown_locked(sc);
 1770 }
 1771 
 1772 static void
 1773 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
 1774     struct vxlan_softc_head *list)
 1775 {
 1776 
 1777         VXLAN_WLOCK(sc);
 1778 
 1779         if (sc->vxl_mc_ifp != ifp)
 1780                 goto out;
 1781         if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
 1782                 goto out;
 1783 
 1784         sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
 1785         LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
 1786 
 1787 out:
 1788         VXLAN_WUNLOCK(sc);
 1789 }
 1790 
 1791 static void
 1792 vxlan_timer(void *xsc)
 1793 {
 1794         struct vxlan_softc *sc;
 1795 
 1796         sc = xsc;
 1797         VXLAN_LOCK_WASSERT(sc);
 1798 
 1799         vxlan_ftable_expire(sc);
 1800         callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
 1801 }
 1802 
 1803 static int
 1804 vxlan_ioctl_ifflags(struct vxlan_softc *sc)
 1805 {
 1806         struct ifnet *ifp;
 1807 
 1808         ifp = sc->vxl_ifp;
 1809 
 1810         if (ifp->if_flags & IFF_UP) {
 1811                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 1812                         vxlan_init(sc);
 1813         } else {
 1814                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 1815                         vxlan_teardown(sc);
 1816         }
 1817 
 1818         return (0);
 1819 }
 1820 
 1821 static int
 1822 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
 1823 {
 1824         struct rm_priotracker tracker;
 1825         struct ifvxlancfg *cfg;
 1826 
 1827         cfg = arg;
 1828         bzero(cfg, sizeof(*cfg));
 1829 
 1830         VXLAN_RLOCK(sc, &tracker);
 1831         cfg->vxlc_vni = sc->vxl_vni;
 1832         memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
 1833             sizeof(union vxlan_sockaddr));
 1834         memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
 1835             sizeof(union vxlan_sockaddr));
 1836         cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
 1837         cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
 1838         cfg->vxlc_ftable_max = sc->vxl_ftable_max;
 1839         cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
 1840         cfg->vxlc_port_min = sc->vxl_min_port;
 1841         cfg->vxlc_port_max = sc->vxl_max_port;
 1842         cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
 1843         cfg->vxlc_ttl = sc->vxl_ttl;
 1844         VXLAN_RUNLOCK(sc, &tracker);
 1845 
 1846 #ifdef INET6
 1847         if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa))
 1848                 sa6_recoverscope(&cfg->vxlc_local_sa.in6);
 1849         if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa))
 1850                 sa6_recoverscope(&cfg->vxlc_remote_sa.in6);
 1851 #endif
 1852 
 1853         return (0);
 1854 }
 1855 
 1856 static int
 1857 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
 1858 {
 1859         struct ifvxlancmd *cmd;
 1860         int error;
 1861 
 1862         cmd = arg;
 1863 
 1864         if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
 1865                 return (EINVAL);
 1866 
 1867         VXLAN_WLOCK(sc);
 1868         if (vxlan_can_change_config(sc)) {
 1869                 sc->vxl_vni = cmd->vxlcmd_vni;
 1870                 error = 0;
 1871         } else
 1872                 error = EBUSY;
 1873         VXLAN_WUNLOCK(sc);
 1874 
 1875         return (error);
 1876 }
 1877 
 1878 static int
 1879 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
 1880 {
 1881         struct ifvxlancmd *cmd;
 1882         union vxlan_sockaddr *vxlsa;
 1883         int error;
 1884 
 1885         cmd = arg;
 1886         vxlsa = &cmd->vxlcmd_sa;
 1887 
 1888         if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
 1889                 return (EINVAL);
 1890         if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
 1891                 return (EINVAL);
 1892         if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
 1893                 error = vxlan_sockaddr_in6_embedscope(vxlsa);
 1894                 if (error)
 1895                         return (error);
 1896         }
 1897 
 1898         VXLAN_WLOCK(sc);
 1899         if (vxlan_can_change_config(sc)) {
 1900                 vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
 1901                 error = 0;
 1902         } else
 1903                 error = EBUSY;
 1904         VXLAN_WUNLOCK(sc);
 1905 
 1906         return (error);
 1907 }
 1908 
 1909 static int
 1910 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
 1911 {
 1912         struct ifvxlancmd *cmd;
 1913         union vxlan_sockaddr *vxlsa;
 1914         int error;
 1915 
 1916         cmd = arg;
 1917         vxlsa = &cmd->vxlcmd_sa;
 1918 
 1919         if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
 1920                 return (EINVAL);
 1921         if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
 1922                 error = vxlan_sockaddr_in6_embedscope(vxlsa);
 1923                 if (error)
 1924                         return (error);
 1925         }
 1926 
 1927         VXLAN_WLOCK(sc);
 1928         if (vxlan_can_change_config(sc)) {
 1929                 vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
 1930                 error = 0;
 1931         } else
 1932                 error = EBUSY;
 1933         VXLAN_WUNLOCK(sc);
 1934 
 1935         return (error);
 1936 }
 1937 
 1938 static int
 1939 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
 1940 {
 1941         struct ifvxlancmd *cmd;
 1942         int error;
 1943 
 1944         cmd = arg;
 1945 
 1946         if (cmd->vxlcmd_port == 0)
 1947                 return (EINVAL);
 1948 
 1949         VXLAN_WLOCK(sc);
 1950         if (vxlan_can_change_config(sc)) {
 1951                 sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
 1952                 error = 0;
 1953         } else
 1954                 error = EBUSY;
 1955         VXLAN_WUNLOCK(sc);
 1956 
 1957         return (error);
 1958 }
 1959 
 1960 static int
 1961 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
 1962 {
 1963         struct ifvxlancmd *cmd;
 1964         int error;
 1965 
 1966         cmd = arg;
 1967 
 1968         if (cmd->vxlcmd_port == 0)
 1969                 return (EINVAL);
 1970 
 1971         VXLAN_WLOCK(sc);
 1972         if (vxlan_can_change_config(sc)) {
 1973                 sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
 1974                 error = 0;
 1975         } else
 1976                 error = EBUSY;
 1977         VXLAN_WUNLOCK(sc);
 1978 
 1979         return (error);
 1980 }
 1981 
 1982 static int
 1983 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
 1984 {
 1985         struct ifvxlancmd *cmd;
 1986         uint16_t min, max;
 1987         int error;
 1988 
 1989         cmd = arg;
 1990         min = cmd->vxlcmd_port_min;
 1991         max = cmd->vxlcmd_port_max;
 1992 
 1993         if (max < min)
 1994                 return (EINVAL);
 1995 
 1996         VXLAN_WLOCK(sc);
 1997         if (vxlan_can_change_config(sc)) {
 1998                 sc->vxl_min_port = min;
 1999                 sc->vxl_max_port = max;
 2000                 error = 0;
 2001         } else
 2002                 error = EBUSY;
 2003         VXLAN_WUNLOCK(sc);
 2004 
 2005         return (error);
 2006 }
 2007 
 2008 static int
 2009 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
 2010 {
 2011         struct ifvxlancmd *cmd;
 2012         int error;
 2013 
 2014         cmd = arg;
 2015 
 2016         VXLAN_WLOCK(sc);
 2017         if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
 2018                 sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
 2019                 error = 0;
 2020         } else
 2021                 error = EINVAL;
 2022         VXLAN_WUNLOCK(sc);
 2023 
 2024         return (error);
 2025 }
 2026 
 2027 static int
 2028 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
 2029 {
 2030         struct ifvxlancmd *cmd;
 2031         int error;
 2032 
 2033         cmd = arg;
 2034 
 2035         VXLAN_WLOCK(sc);
 2036         if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
 2037                 sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
 2038                 error = 0;
 2039         } else
 2040                 error = EINVAL;
 2041         VXLAN_WUNLOCK(sc);
 2042 
 2043         return (error);
 2044 }
 2045 
 2046 static int
 2047 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
 2048 {
 2049         struct ifvxlancmd *cmd;
 2050         int error;
 2051 
 2052         cmd = arg;
 2053 
 2054         VXLAN_WLOCK(sc);
 2055         if (vxlan_can_change_config(sc)) {
 2056                 strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
 2057                 error = 0;
 2058         } else
 2059                 error = EBUSY;
 2060         VXLAN_WUNLOCK(sc);
 2061 
 2062         return (error);
 2063 }
 2064 
 2065 static int
 2066 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
 2067 {
 2068         struct ifvxlancmd *cmd;
 2069         int error;
 2070 
 2071         cmd = arg;
 2072 
 2073         VXLAN_WLOCK(sc);
 2074         if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
 2075                 sc->vxl_ttl = cmd->vxlcmd_ttl;
 2076                 if (sc->vxl_im4o != NULL)
 2077                         sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
 2078                 if (sc->vxl_im6o != NULL)
 2079                         sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
 2080                 error = 0;
 2081         } else
 2082                 error = EINVAL;
 2083         VXLAN_WUNLOCK(sc);
 2084 
 2085         return (error);
 2086 }
 2087 
 2088 static int
 2089 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
 2090 {
 2091         struct ifvxlancmd *cmd;
 2092 
 2093         cmd = arg;
 2094 
 2095         VXLAN_WLOCK(sc);
 2096         if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
 2097                 sc->vxl_flags |= VXLAN_FLAG_LEARN;
 2098         else
 2099                 sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
 2100         VXLAN_WUNLOCK(sc);
 2101 
 2102         return (0);
 2103 }
 2104 
 2105 static int
 2106 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
 2107 {
 2108         union vxlan_sockaddr vxlsa;
 2109         struct ifvxlancmd *cmd;
 2110         struct vxlan_ftable_entry *fe;
 2111         int error;
 2112 
 2113         cmd = arg;
 2114         vxlsa = cmd->vxlcmd_sa;
 2115 
 2116         if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
 2117                 return (EINVAL);
 2118         if (vxlan_sockaddr_in_any(&vxlsa) != 0)
 2119                 return (EINVAL);
 2120         if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
 2121                 return (EINVAL);
 2122         /* BMV: We could support both IPv4 and IPv6 later. */
 2123         if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
 2124                 return (EAFNOSUPPORT);
 2125 
 2126         if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
 2127                 error = vxlan_sockaddr_in6_embedscope(&vxlsa);
 2128                 if (error)
 2129                         return (error);
 2130         }
 2131 
 2132         fe = vxlan_ftable_entry_alloc();
 2133         if (fe == NULL)
 2134                 return (ENOMEM);
 2135 
 2136         if (vxlsa.in4.sin_port == 0)
 2137                 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
 2138 
 2139         vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
 2140             VXLAN_FE_FLAG_STATIC);
 2141 
 2142         VXLAN_WLOCK(sc);
 2143         error = vxlan_ftable_entry_insert(sc, fe);
 2144         VXLAN_WUNLOCK(sc);
 2145 
 2146         if (error)
 2147                 vxlan_ftable_entry_free(fe);
 2148 
 2149         return (error);
 2150 }
 2151 
 2152 static int
 2153 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
 2154 {
 2155         struct ifvxlancmd *cmd;
 2156         struct vxlan_ftable_entry *fe;
 2157         int error;
 2158 
 2159         cmd = arg;
 2160 
 2161         VXLAN_WLOCK(sc);
 2162         fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
 2163         if (fe != NULL) {
 2164                 vxlan_ftable_entry_destroy(sc, fe);
 2165                 error = 0;
 2166         } else
 2167                 error = ENOENT;
 2168         VXLAN_WUNLOCK(sc);
 2169 
 2170         return (error);
 2171 }
 2172 
 2173 static int
 2174 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
 2175 {
 2176         struct ifvxlancmd *cmd;
 2177         int all;
 2178 
 2179         cmd = arg;
 2180         all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
 2181 
 2182         VXLAN_WLOCK(sc);
 2183         vxlan_ftable_flush(sc, all);
 2184         VXLAN_WUNLOCK(sc);
 2185 
 2186         return (0);
 2187 }
 2188 
 2189 static int
 2190 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
 2191 {
 2192         const struct vxlan_control *vc;
 2193         union {
 2194                 struct ifvxlancfg       cfg;
 2195                 struct ifvxlancmd       cmd;
 2196         } args;
 2197         int out, error;
 2198 
 2199         if (ifd->ifd_cmd >= vxlan_control_table_size)
 2200                 return (EINVAL);
 2201 
 2202         bzero(&args, sizeof(args));
 2203         vc = &vxlan_control_table[ifd->ifd_cmd];
 2204         out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
 2205 
 2206         if ((get != 0 && out == 0) || (get == 0 && out != 0))
 2207                 return (EINVAL);
 2208 
 2209         if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
 2210                 error = priv_check(curthread, PRIV_NET_VXLAN);
 2211                 if (error)
 2212                         return (error);
 2213         }
 2214 
 2215         if (ifd->ifd_len != vc->vxlc_argsize ||
 2216             ifd->ifd_len > sizeof(args))
 2217                 return (EINVAL);
 2218 
 2219         if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
 2220                 error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
 2221                 if (error)
 2222                         return (error);
 2223         }
 2224 
 2225         error = vc->vxlc_func(sc, &args);
 2226         if (error)
 2227                 return (error);
 2228 
 2229         if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
 2230                 error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
 2231                 if (error)
 2232                         return (error);
 2233         }
 2234 
 2235         return (0);
 2236 }
 2237 
 2238 static int
 2239 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 2240 {
 2241         struct vxlan_softc *sc;
 2242         struct ifreq *ifr;
 2243         struct ifdrv *ifd;
 2244         int error;
 2245 
 2246         sc = ifp->if_softc;
 2247         ifr = (struct ifreq *) data;
 2248         ifd = (struct ifdrv *) data;
 2249 
 2250         switch (cmd) {
 2251         case SIOCADDMULTI:
 2252         case SIOCDELMULTI:
 2253                 error = 0;
 2254                 break;
 2255 
 2256         case SIOCGDRVSPEC:
 2257         case SIOCSDRVSPEC:
 2258                 error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
 2259                 break;
 2260 
 2261         case SIOCSIFFLAGS:
 2262                 error = vxlan_ioctl_ifflags(sc);
 2263                 break;
 2264 
 2265         case SIOCSIFMEDIA:
 2266         case SIOCGIFMEDIA:
 2267                 error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd);
 2268                 break;
 2269 
 2270         default:
 2271                 error = ether_ioctl(ifp, cmd, data);
 2272                 break;
 2273         }
 2274 
 2275         return (error);
 2276 }
 2277 
 2278 #if defined(INET) || defined(INET6)
 2279 static uint16_t
 2280 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
 2281 {
 2282         int range;
 2283         uint32_t hash;
 2284 
 2285         range = sc->vxl_max_port - sc->vxl_min_port + 1;
 2286 
 2287         if (M_HASHTYPE_ISHASH(m))
 2288                 hash = m->m_pkthdr.flowid;
 2289         else
 2290                 hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
 2291                     sc->vxl_port_hash_key);
 2292 
 2293         return (sc->vxl_min_port + (hash % range));
 2294 }
 2295 
 2296 static void
 2297 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
 2298     uint16_t srcport, uint16_t dstport)
 2299 {
 2300         struct vxlanudphdr *hdr;
 2301         struct udphdr *udph;
 2302         struct vxlan_header *vxh;
 2303         int len;
 2304 
 2305         len = m->m_pkthdr.len - ipoff;
 2306         MPASS(len >= sizeof(struct vxlanudphdr));
 2307         hdr = mtodo(m, ipoff);
 2308 
 2309         udph = &hdr->vxlh_udp;
 2310         udph->uh_sport = srcport;
 2311         udph->uh_dport = dstport;
 2312         udph->uh_ulen = htons(len);
 2313         udph->uh_sum = 0;
 2314 
 2315         vxh = &hdr->vxlh_hdr;
 2316         vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
 2317         vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
 2318 }
 2319 #endif
 2320 
 2321 static int
 2322 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
 2323     struct mbuf *m)
 2324 {
 2325 #ifdef INET
 2326         struct ifnet *ifp;
 2327         struct ip *ip;
 2328         struct in_addr srcaddr, dstaddr;
 2329         uint16_t srcport, dstport;
 2330         int len, mcast, error;
 2331 
 2332         ifp = sc->vxl_ifp;
 2333         srcaddr = sc->vxl_src_addr.in4.sin_addr;
 2334         srcport = vxlan_pick_source_port(sc, m);
 2335         dstaddr = fvxlsa->in4.sin_addr;
 2336         dstport = fvxlsa->in4.sin_port;
 2337 
 2338         M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
 2339             M_NOWAIT);
 2340         if (m == NULL) {
 2341                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2342                 return (ENOBUFS);
 2343         }
 2344 
 2345         len = m->m_pkthdr.len;
 2346 
 2347         ip = mtod(m, struct ip *);
 2348         ip->ip_tos = 0;
 2349         ip->ip_len = htons(len);
 2350         ip->ip_off = 0;
 2351         ip->ip_ttl = sc->vxl_ttl;
 2352         ip->ip_p = IPPROTO_UDP;
 2353         ip->ip_sum = 0;
 2354         ip->ip_src = srcaddr;
 2355         ip->ip_dst = dstaddr;
 2356 
 2357         vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
 2358 
 2359         mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 2360         m->m_flags &= ~(M_MCAST | M_BCAST);
 2361 
 2362         error = ip_output(m, NULL, NULL, 0, sc->vxl_im4o, NULL);
 2363         if (error == 0) {
 2364                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 2365                 if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 2366                 if (mcast != 0)
 2367                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 2368         } else
 2369                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2370 
 2371         return (error);
 2372 #else
 2373         m_freem(m);
 2374         return (ENOTSUP);
 2375 #endif
 2376 }
 2377 
 2378 static int
 2379 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
 2380     struct mbuf *m)
 2381 {
 2382 #ifdef INET6
 2383         struct ifnet *ifp;
 2384         struct ip6_hdr *ip6;
 2385         const struct in6_addr *srcaddr, *dstaddr;
 2386         uint16_t srcport, dstport;
 2387         int len, mcast, error;
 2388 
 2389         ifp = sc->vxl_ifp;
 2390         srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
 2391         srcport = vxlan_pick_source_port(sc, m);
 2392         dstaddr = &fvxlsa->in6.sin6_addr;
 2393         dstport = fvxlsa->in6.sin6_port;
 2394 
 2395         M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
 2396             M_NOWAIT);
 2397         if (m == NULL) {
 2398                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2399                 return (ENOBUFS);
 2400         }
 2401 
 2402         len = m->m_pkthdr.len;
 2403 
 2404         ip6 = mtod(m, struct ip6_hdr *);
 2405         ip6->ip6_flow = 0;              /* BMV: Keep in forwarding entry? */
 2406         ip6->ip6_vfc = IPV6_VERSION;
 2407         ip6->ip6_plen = 0;
 2408         ip6->ip6_nxt = IPPROTO_UDP;
 2409         ip6->ip6_hlim = sc->vxl_ttl;
 2410         ip6->ip6_src = *srcaddr;
 2411         ip6->ip6_dst = *dstaddr;
 2412 
 2413         vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
 2414 
 2415         /*
 2416          * XXX BMV We need support for RFC6935 before we can send and
 2417          * receive IPv6 UDP packets with a zero checksum.
 2418          */
 2419         {
 2420                 struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
 2421                 hdr->uh_sum = in6_cksum_pseudo(ip6,
 2422                     m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
 2423                 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 2424                 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 2425         }
 2426 
 2427         mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 2428         m->m_flags &= ~(M_MCAST | M_BCAST);
 2429 
 2430         error = ip6_output(m, NULL, NULL, 0, sc->vxl_im6o, NULL, NULL);
 2431         if (error == 0) {
 2432                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 2433                 if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 2434                 if (mcast != 0)
 2435                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 2436         } else
 2437                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2438 
 2439         return (error);
 2440 #else
 2441         m_freem(m);
 2442         return (ENOTSUP);
 2443 #endif
 2444 }
 2445 
 2446 static int
 2447 vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
 2448 {
 2449         struct rm_priotracker tracker;
 2450         union vxlan_sockaddr vxlsa;
 2451         struct vxlan_softc *sc;
 2452         struct vxlan_ftable_entry *fe;
 2453         struct ifnet *mcifp;
 2454         struct ether_header *eh;
 2455         int ipv4, error;
 2456 
 2457         sc = ifp->if_softc;
 2458         eh = mtod(m, struct ether_header *);
 2459         fe = NULL;
 2460         mcifp = NULL;
 2461 
 2462         ETHER_BPF_MTAP(ifp, m);
 2463 
 2464         VXLAN_RLOCK(sc, &tracker);
 2465         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 2466                 VXLAN_RUNLOCK(sc, &tracker);
 2467                 m_freem(m);
 2468                 return (ENETDOWN);
 2469         }
 2470 
 2471         if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
 2472                 fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
 2473         if (fe == NULL)
 2474                 fe = &sc->vxl_default_fe;
 2475         vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
 2476 
 2477         ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
 2478         if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
 2479                 mcifp = vxlan_multicast_if_ref(sc, ipv4);
 2480 
 2481         VXLAN_ACQUIRE(sc);
 2482         VXLAN_RUNLOCK(sc, &tracker);
 2483 
 2484         if (ipv4 != 0)
 2485                 error = vxlan_encap4(sc, &vxlsa, m);
 2486         else
 2487                 error = vxlan_encap6(sc, &vxlsa, m);
 2488 
 2489         vxlan_release(sc);
 2490         if (mcifp != NULL)
 2491                 if_rele(mcifp);
 2492 
 2493         return (error);
 2494 }
 2495 
 2496 static void
 2497 vxlan_qflush(struct ifnet *ifp __unused)
 2498 {
 2499 }
 2500 
 2501 static void
 2502 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
 2503     const struct sockaddr *srcsa, void *xvso)
 2504 {
 2505         struct vxlan_socket *vso;
 2506         struct vxlan_header *vxh, vxlanhdr;
 2507         uint32_t vni;
 2508         int error;
 2509 
 2510         M_ASSERTPKTHDR(m);
 2511         vso = xvso;
 2512         offset += sizeof(struct udphdr);
 2513 
 2514         if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
 2515                 goto out;
 2516 
 2517         if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
 2518                 m_copydata(m, offset, sizeof(struct vxlan_header),
 2519                     (caddr_t) &vxlanhdr);
 2520                 vxh = &vxlanhdr;
 2521         } else
 2522                 vxh = mtodo(m, offset);
 2523 
 2524         /*
 2525          * Drop if there is a reserved bit set in either the flags or VNI
 2526          * fields of the header. This goes against the specification, but
 2527          * a bit set may indicate an unsupported new feature. This matches
 2528          * the behavior of the Linux implementation.
 2529          */
 2530         if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
 2531             vxh->vxlh_vni & ~htonl(VXLAN_VNI_MASK))
 2532                 goto out;
 2533 
 2534         vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
 2535         /* Adjust to the start of the inner Ethernet frame. */
 2536         m_adj(m, offset + sizeof(struct vxlan_header));
 2537 
 2538         error = vxlan_input(vso, vni, &m, srcsa);
 2539         MPASS(error != 0 || m == NULL);
 2540 
 2541 out:
 2542         if (m != NULL)
 2543                 m_freem(m);
 2544 }
 2545 
 2546 static int
 2547 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
 2548     const struct sockaddr *sa)
 2549 {
 2550         struct vxlan_softc *sc;
 2551         struct ifnet *ifp;
 2552         struct mbuf *m;
 2553         struct ether_header *eh;
 2554         int error;
 2555 
 2556         sc = vxlan_socket_lookup_softc(vso, vni);
 2557         if (sc == NULL)
 2558                 return (ENOENT);
 2559 
 2560         ifp = sc->vxl_ifp;
 2561         m = *m0;
 2562         eh = mtod(m, struct ether_header *);
 2563 
 2564         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 2565                 error = ENETDOWN;
 2566                 goto out;
 2567         } else if (ifp == m->m_pkthdr.rcvif) {
 2568                 /* XXX Does not catch more complex loops. */
 2569                 error = EDEADLK;
 2570                 goto out;
 2571         }
 2572 
 2573         if (sc->vxl_flags & VXLAN_FLAG_LEARN)
 2574                 vxlan_ftable_learn(sc, sa, eh->ether_shost);
 2575 
 2576         m_clrprotoflags(m);
 2577         m->m_pkthdr.rcvif = ifp;
 2578         M_SETFIB(m, ifp->if_fib);
 2579 
 2580         error = netisr_queue_src(NETISR_ETHER, 0, m);
 2581         *m0 = NULL;
 2582 
 2583 out:
 2584         vxlan_release(sc);
 2585         return (error);
 2586 }
 2587 
 2588 static void
 2589 vxlan_set_default_config(struct vxlan_softc *sc)
 2590 {
 2591 
 2592         sc->vxl_flags |= VXLAN_FLAG_LEARN;
 2593 
 2594         sc->vxl_vni = VXLAN_VNI_MAX;
 2595         sc->vxl_ttl = IPDEFTTL;
 2596 
 2597         if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
 2598                 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
 2599                 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
 2600         } else {
 2601                 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
 2602                 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
 2603         }
 2604 
 2605         sc->vxl_min_port = V_ipport_firstauto;
 2606         sc->vxl_max_port = V_ipport_lastauto;
 2607 
 2608         sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
 2609         sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
 2610 }
 2611 
 2612 static int
 2613 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
 2614 {
 2615 
 2616 #ifndef INET
 2617         if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
 2618             VXLAN_PARAM_WITH_REMOTE_ADDR4))
 2619                 return (EAFNOSUPPORT);
 2620 #endif
 2621 
 2622 #ifndef INET6
 2623         if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
 2624             VXLAN_PARAM_WITH_REMOTE_ADDR6))
 2625                 return (EAFNOSUPPORT);
 2626 #else
 2627         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
 2628                 int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa);
 2629                 if (error)
 2630                         return (error);
 2631         }
 2632         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
 2633                 int error = vxlan_sockaddr_in6_embedscope(
 2634                    &vxlp->vxlp_remote_sa);
 2635                 if (error)
 2636                         return (error);
 2637         }
 2638 #endif
 2639 
 2640         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
 2641                 if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
 2642                         sc->vxl_vni = vxlp->vxlp_vni;
 2643         }
 2644 
 2645         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
 2646                 sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
 2647                 sc->vxl_src_addr.in4.sin_family = AF_INET;
 2648                 sc->vxl_src_addr.in4.sin_addr =
 2649                     vxlp->vxlp_local_sa.in4.sin_addr;
 2650         } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
 2651                 sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
 2652                 sc->vxl_src_addr.in6.sin6_family = AF_INET6;
 2653                 sc->vxl_src_addr.in6.sin6_addr =
 2654                     vxlp->vxlp_local_sa.in6.sin6_addr;
 2655         }
 2656 
 2657         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
 2658                 sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
 2659                 sc->vxl_dst_addr.in4.sin_family = AF_INET;
 2660                 sc->vxl_dst_addr.in4.sin_addr =
 2661                     vxlp->vxlp_remote_sa.in4.sin_addr;
 2662         } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
 2663                 sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
 2664                 sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
 2665                 sc->vxl_dst_addr.in6.sin6_addr =
 2666                     vxlp->vxlp_remote_sa.in6.sin6_addr;
 2667         }
 2668 
 2669         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
 2670                 sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
 2671         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
 2672                 sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
 2673 
 2674         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
 2675                 if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
 2676                         sc->vxl_min_port = vxlp->vxlp_min_port;
 2677                         sc->vxl_max_port = vxlp->vxlp_max_port;
 2678                 }
 2679         }
 2680 
 2681         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
 2682                 strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
 2683 
 2684         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
 2685                 if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
 2686                         sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
 2687         }
 2688 
 2689         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
 2690                 if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
 2691                         sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
 2692         }
 2693 
 2694         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
 2695                 if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
 2696                         sc->vxl_ttl = vxlp->vxlp_ttl;
 2697         }
 2698 
 2699         if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
 2700                 if (vxlp->vxlp_learn == 0)
 2701                         sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
 2702         }
 2703 
 2704         return (0);
 2705 }
 2706 
 2707 static int
 2708 vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 2709 {
 2710         struct vxlan_softc *sc;
 2711         struct ifnet *ifp;
 2712         struct ifvxlanparam vxlp;
 2713         int error;
 2714 
 2715         sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
 2716         sc->vxl_unit = unit;
 2717         vxlan_set_default_config(sc);
 2718 
 2719         if (params != 0) {
 2720                 error = copyin(params, &vxlp, sizeof(vxlp));
 2721                 if (error)
 2722                         goto fail;
 2723 
 2724                 error = vxlan_set_user_config(sc, &vxlp);
 2725                 if (error)
 2726                         goto fail;
 2727         }
 2728 
 2729         ifp = if_alloc(IFT_ETHER);
 2730         if (ifp == NULL) {
 2731                 error = ENOSPC;
 2732                 goto fail;
 2733         }
 2734 
 2735         sc->vxl_ifp = ifp;
 2736         rm_init(&sc->vxl_lock, "vxlanrm");
 2737         callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
 2738         sc->vxl_port_hash_key = arc4random();
 2739         vxlan_ftable_init(sc);
 2740 
 2741         vxlan_sysctl_setup(sc);
 2742 
 2743         ifp->if_softc = sc;
 2744         if_initname(ifp, vxlan_name, unit);
 2745         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 2746         ifp->if_init = vxlan_init;
 2747         ifp->if_ioctl = vxlan_ioctl;
 2748         ifp->if_transmit = vxlan_transmit;
 2749         ifp->if_qflush = vxlan_qflush;
 2750         ifp->if_capabilities |= IFCAP_LINKSTATE;
 2751         ifp->if_capenable |= IFCAP_LINKSTATE;
 2752 
 2753         ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status);
 2754         ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 2755         ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO);
 2756 
 2757         vxlan_fakeaddr(sc);
 2758         ether_ifattach(ifp, sc->vxl_hwaddr);
 2759 
 2760         ifp->if_baudrate = 0;
 2761         ifp->if_hdrlen = 0;
 2762 
 2763         return (0);
 2764 
 2765 fail:
 2766         free(sc, M_VXLAN);
 2767         return (error);
 2768 }
 2769 
 2770 static void
 2771 vxlan_clone_destroy(struct ifnet *ifp)
 2772 {
 2773         struct vxlan_softc *sc;
 2774 
 2775         sc = ifp->if_softc;
 2776 
 2777         vxlan_teardown(sc);
 2778 
 2779         vxlan_ftable_flush(sc, 1);
 2780 
 2781         ether_ifdetach(ifp);
 2782         if_free(ifp);
 2783         ifmedia_removeall(&sc->vxl_media);
 2784 
 2785         vxlan_ftable_fini(sc);
 2786 
 2787         vxlan_sysctl_destroy(sc);
 2788         rm_destroy(&sc->vxl_lock);
 2789         free(sc, M_VXLAN);
 2790 }
 2791 
 2792 /* BMV: Taken from if_bridge. */
 2793 static uint32_t
 2794 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
 2795 {
 2796         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
 2797 
 2798         b += addr[5] << 8;
 2799         b += addr[4];
 2800         a += addr[3] << 24;
 2801         a += addr[2] << 16;
 2802         a += addr[1] << 8;
 2803         a += addr[0];
 2804 
 2805 /*
 2806  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
 2807  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
 2808  */
 2809 #define mix(a, b, c)                                                    \
 2810 do {                                                                    \
 2811         a -= b; a -= c; a ^= (c >> 13);                                 \
 2812         b -= c; b -= a; b ^= (a << 8);                                  \
 2813         c -= a; c -= b; c ^= (b >> 13);                                 \
 2814         a -= b; a -= c; a ^= (c >> 12);                                 \
 2815         b -= c; b -= a; b ^= (a << 16);                                 \
 2816         c -= a; c -= b; c ^= (b >> 5);                                  \
 2817         a -= b; a -= c; a ^= (c >> 3);                                  \
 2818         b -= c; b -= a; b ^= (a << 10);                                 \
 2819         c -= a; c -= b; c ^= (b >> 15);                                 \
 2820 } while (0)
 2821 
 2822         mix(a, b, c);
 2823 
 2824 #undef mix
 2825 
 2826         return (c);
 2827 }
 2828 
 2829 static void
 2830 vxlan_fakeaddr(struct vxlan_softc *sc)
 2831 {
 2832 
 2833         /*
 2834          * Generate a non-multicast, locally administered address.
 2835          *
 2836          * BMV: Should we use the FreeBSD OUI range instead?
 2837          */
 2838         arc4rand(sc->vxl_hwaddr, ETHER_ADDR_LEN, 1);
 2839         sc->vxl_hwaddr[0] &= ~1;
 2840         sc->vxl_hwaddr[0] |= 2;
 2841 }
 2842 
 2843 static int
 2844 vxlan_media_change(struct ifnet *ifp)
 2845 {
 2846 
 2847         /* Ignore. */
 2848         return (0);
 2849 }
 2850 
 2851 static void
 2852 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
 2853 {
 2854 
 2855         ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
 2856         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
 2857 }
 2858 
 2859 static int
 2860 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
 2861     const struct sockaddr *sa)
 2862 {
 2863 
 2864         return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
 2865 }
 2866 
 2867 static void
 2868 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
 2869     const struct sockaddr *sa)
 2870 {
 2871 
 2872         MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
 2873         bzero(vxladdr, sizeof(*vxladdr));
 2874 
 2875         if (sa->sa_family == AF_INET) {
 2876                 vxladdr->in4 = *satoconstsin(sa);
 2877                 vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
 2878         } else if (sa->sa_family == AF_INET6) {
 2879                 vxladdr->in6 = *satoconstsin6(sa);
 2880                 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
 2881         }
 2882 }
 2883 
 2884 static int
 2885 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
 2886     const struct sockaddr *sa)
 2887 {
 2888         int equal;
 2889 
 2890         if (sa->sa_family == AF_INET) {
 2891                 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
 2892                 equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
 2893         } else if (sa->sa_family == AF_INET6) {
 2894                 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
 2895                 equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
 2896         } else
 2897                 equal = 0;
 2898 
 2899         return (equal);
 2900 }
 2901 
 2902 static void
 2903 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
 2904     const struct sockaddr *sa)
 2905 {
 2906 
 2907         MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
 2908 
 2909         if (sa->sa_family == AF_INET) {
 2910                 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
 2911                 vxladdr->in4.sin_family = AF_INET;
 2912                 vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
 2913                 vxladdr->in4.sin_addr = *in4;
 2914         } else if (sa->sa_family == AF_INET6) {
 2915                 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
 2916                 vxladdr->in6.sin6_family = AF_INET6;
 2917                 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
 2918                 vxladdr->in6.sin6_addr = *in6;
 2919         }
 2920 }
 2921 
 2922 static int
 2923 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
 2924 {
 2925         const struct sockaddr *sa;
 2926         int supported;
 2927 
 2928         sa = &vxladdr->sa;
 2929         supported = 0;
 2930 
 2931         if (sa->sa_family == AF_UNSPEC && unspec != 0) {
 2932                 supported = 1;
 2933         } else if (sa->sa_family == AF_INET) {
 2934 #ifdef INET
 2935                 supported = 1;
 2936 #endif
 2937         } else if (sa->sa_family == AF_INET6) {
 2938 #ifdef INET6
 2939                 supported = 1;
 2940 #endif
 2941         }
 2942 
 2943         return (supported);
 2944 }
 2945 
 2946 static int
 2947 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
 2948 {
 2949         const struct sockaddr *sa;
 2950         int any;
 2951 
 2952         sa = &vxladdr->sa;
 2953 
 2954         if (sa->sa_family == AF_INET) {
 2955                 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
 2956                 any = in4->s_addr == INADDR_ANY;
 2957         } else if (sa->sa_family == AF_INET6) {
 2958                 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
 2959                 any = IN6_IS_ADDR_UNSPECIFIED(in6);
 2960         } else
 2961                 any = -1;
 2962 
 2963         return (any);
 2964 }
 2965 
 2966 static int
 2967 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
 2968 {
 2969         const struct sockaddr *sa;
 2970         int mc;
 2971 
 2972         sa = &vxladdr->sa;
 2973 
 2974         if (sa->sa_family == AF_INET) {
 2975                 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
 2976                 mc = IN_MULTICAST(ntohl(in4->s_addr));
 2977         } else if (sa->sa_family == AF_INET6) {
 2978                 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
 2979                 mc = IN6_IS_ADDR_MULTICAST(in6);
 2980         } else
 2981                 mc = -1;
 2982 
 2983         return (mc);
 2984 }
 2985 
 2986 static int
 2987 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr)
 2988 {
 2989         int error;
 2990 
 2991         MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr));
 2992 #ifdef INET6
 2993         error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone);
 2994 #else
 2995         error = EAFNOSUPPORT;
 2996 #endif
 2997 
 2998         return (error);
 2999 }
 3000 
 3001 static int
 3002 vxlan_can_change_config(struct vxlan_softc *sc)
 3003 {
 3004         struct ifnet *ifp;
 3005 
 3006         ifp = sc->vxl_ifp;
 3007         VXLAN_LOCK_ASSERT(sc);
 3008 
 3009         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 3010                 return (0);
 3011         if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
 3012                 return (0);
 3013 
 3014         return (1);
 3015 }
 3016 
 3017 static int
 3018 vxlan_check_vni(uint32_t vni)
 3019 {
 3020 
 3021         return (vni >= VXLAN_VNI_MAX);
 3022 }
 3023 
 3024 static int
 3025 vxlan_check_ttl(int ttl)
 3026 {
 3027 
 3028         return (ttl > MAXTTL);
 3029 }
 3030 
 3031 static int
 3032 vxlan_check_ftable_timeout(uint32_t timeout)
 3033 {
 3034 
 3035         return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
 3036 }
 3037 
 3038 static int
 3039 vxlan_check_ftable_max(uint32_t max)
 3040 {
 3041 
 3042         return (max > VXLAN_FTABLE_MAX);
 3043 }
 3044 
 3045 static void
 3046 vxlan_sysctl_setup(struct vxlan_softc *sc)
 3047 {
 3048         struct sysctl_ctx_list *ctx;
 3049         struct sysctl_oid *node;
 3050         struct vxlan_statistics *stats;
 3051         char namebuf[8];
 3052 
 3053         ctx = &sc->vxl_sysctl_ctx;
 3054         stats = &sc->vxl_stats;
 3055         snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
 3056 
 3057         sysctl_ctx_init(ctx);
 3058         sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
 3059             SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
 3060             CTLFLAG_RD, NULL, "");
 3061 
 3062         node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
 3063             OID_AUTO, "ftable", CTLFLAG_RD, NULL, "");
 3064         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
 3065             CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
 3066             "Number of entries in fowarding table");
 3067         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
 3068              CTLFLAG_RD, &sc->vxl_ftable_max, 0,
 3069             "Maximum number of entries allowed in fowarding table");
 3070         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
 3071             CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
 3072             "Number of seconds between prunes of the forwarding table");
 3073         SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
 3074             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
 3075             sc, 0, vxlan_ftable_sysctl_dump, "A",
 3076             "Dump the forwarding table entries");
 3077 
 3078         node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
 3079             OID_AUTO, "stats", CTLFLAG_RD, NULL, "");
 3080         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
 3081             "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
 3082             "Fowarding table reached maximum entries");
 3083         SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
 3084             "ftable_lock_upgrade_failed", CTLFLAG_RD,
 3085             &stats->ftable_lock_upgrade_failed, 0,
 3086             "Forwarding table update required lock upgrade");
 3087 }
 3088 
 3089 static void
 3090 vxlan_sysctl_destroy(struct vxlan_softc *sc)
 3091 {
 3092 
 3093         sysctl_ctx_free(&sc->vxl_sysctl_ctx);
 3094         sc->vxl_sysctl_node = NULL;
 3095 }
 3096 
 3097 static int
 3098 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
 3099 {
 3100         char path[64];
 3101 
 3102         snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
 3103             sc->vxl_unit, knob);
 3104         TUNABLE_INT_FETCH(path, &def);
 3105 
 3106         return (def);
 3107 }
 3108 
 3109 static void
 3110 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
 3111 {
 3112         struct vxlan_softc_head list;
 3113         struct vxlan_socket *vso;
 3114         struct vxlan_softc *sc, *tsc;
 3115 
 3116         LIST_INIT(&list);
 3117 
 3118         if (ifp->if_flags & IFF_RENAMING)
 3119                 return;
 3120         if ((ifp->if_flags & IFF_MULTICAST) == 0)
 3121                 return;
 3122 
 3123         VXLAN_LIST_LOCK();
 3124         LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
 3125                 vxlan_socket_ifdetach(vso, ifp, &list);
 3126         VXLAN_LIST_UNLOCK();
 3127 
 3128         LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
 3129                 LIST_REMOVE(sc, vxl_ifdetach_list);
 3130 
 3131                 VXLAN_WLOCK(sc);
 3132                 if (sc->vxl_flags & VXLAN_FLAG_INIT)
 3133                         vxlan_init_wait(sc);
 3134                 vxlan_teardown_locked(sc);
 3135         }
 3136 }
 3137 
 3138 static void
 3139 vxlan_load(void)
 3140 {
 3141 
 3142         mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
 3143         LIST_INIT(&vxlan_socket_list);
 3144         vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
 3145             vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
 3146         vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create,
 3147             vxlan_clone_destroy, 0);
 3148 }
 3149 
 3150 static void
 3151 vxlan_unload(void)
 3152 {
 3153 
 3154         EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 3155             vxlan_ifdetach_event_tag);
 3156         if_clone_detach(vxlan_cloner);
 3157         mtx_destroy(&vxlan_list_mtx);
 3158         MPASS(LIST_EMPTY(&vxlan_socket_list));
 3159 }
 3160 
 3161 static int
 3162 vxlan_modevent(module_t mod, int type, void *unused)
 3163 {
 3164         int error;
 3165 
 3166         error = 0;
 3167 
 3168         switch (type) {
 3169         case MOD_LOAD:
 3170                 vxlan_load();
 3171                 break;
 3172         case MOD_UNLOAD:
 3173                 vxlan_unload();
 3174                 break;
 3175         default:
 3176                 error = ENOTSUP;
 3177                 break;
 3178         }
 3179 
 3180         return (error);
 3181 }
 3182 
 3183 static moduledata_t vxlan_mod = {
 3184         "if_vxlan",
 3185         vxlan_modevent,
 3186         0
 3187 };
 3188 
 3189 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 3190 MODULE_VERSION(if_vxlan, 1);

Cache object: b0343bc5bcc941a8c2ca9208a3646fad


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.