The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_ethersubr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)if_ethersubr.c      8.1 (Berkeley) 6/10/93
   32  * $FreeBSD$
   33  */
   34 
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 #include "opt_netgraph.h"
   38 #include "opt_mbuf_profiling.h"
   39 #include "opt_rss.h"
   40 
   41 #include <sys/param.h>
   42 #include <sys/systm.h>
   43 #include <sys/devctl.h>
   44 #include <sys/eventhandler.h>
   45 #include <sys/jail.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/malloc.h>
   49 #include <sys/mbuf.h>
   50 #include <sys/module.h>
   51 #include <sys/msan.h>
   52 #include <sys/proc.h>
   53 #include <sys/priv.h>
   54 #include <sys/random.h>
   55 #include <sys/socket.h>
   56 #include <sys/sockio.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/uuid.h>
   59 
   60 #include <net/ieee_oui.h>
   61 #include <net/if.h>
   62 #include <net/if_var.h>
   63 #include <net/if_private.h>
   64 #include <net/if_arp.h>
   65 #include <net/netisr.h>
   66 #include <net/route.h>
   67 #include <net/if_llc.h>
   68 #include <net/if_dl.h>
   69 #include <net/if_types.h>
   70 #include <net/bpf.h>
   71 #include <net/ethernet.h>
   72 #include <net/if_bridgevar.h>
   73 #include <net/if_vlan_var.h>
   74 #include <net/if_llatbl.h>
   75 #include <net/pfil.h>
   76 #include <net/rss_config.h>
   77 #include <net/vnet.h>
   78 
   79 #include <netpfil/pf/pf_mtag.h>
   80 
   81 #if defined(INET) || defined(INET6)
   82 #include <netinet/in.h>
   83 #include <netinet/in_var.h>
   84 #include <netinet/if_ether.h>
   85 #include <netinet/ip_carp.h>
   86 #include <netinet/ip_var.h>
   87 #endif
   88 #ifdef INET6
   89 #include <netinet6/nd6.h>
   90 #endif
   91 #include <security/mac/mac_framework.h>
   92 
   93 #include <crypto/sha1.h>
   94 
   95 #ifdef CTASSERT
   96 CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
   97 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
   98 #endif
   99 
  100 VNET_DEFINE(pfil_head_t, link_pfil_head);       /* Packet filter hooks */
  101 
  102 /* netgraph node hooks for ng_ether(4) */
  103 void    (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
  104 void    (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
  105 int     (*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
  106 void    (*ng_ether_attach_p)(struct ifnet *ifp);
  107 void    (*ng_ether_detach_p)(struct ifnet *ifp);
  108 
  109 void    (*vlan_input_p)(struct ifnet *, struct mbuf *);
  110 
  111 /* if_bridge(4) support */
  112 void    (*bridge_dn_p)(struct mbuf *, struct ifnet *);
  113 
  114 /* if_lagg(4) support */
  115 struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); 
  116 
  117 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
  118                         { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
  119 
  120 static  int ether_resolvemulti(struct ifnet *, struct sockaddr **,
  121                 struct sockaddr *);
  122 static  int ether_requestencap(struct ifnet *, struct if_encap_req *);
  123 
  124 #define senderr(e) do { error = (e); goto bad;} while (0)
  125 
  126 static void
  127 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
  128 {
  129         int csum_flags = 0;
  130 
  131         if (src->m_pkthdr.csum_flags & CSUM_IP)
  132                 csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
  133         if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
  134                 csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
  135         if (src->m_pkthdr.csum_flags & CSUM_SCTP)
  136                 csum_flags |= CSUM_SCTP_VALID;
  137         dst->m_pkthdr.csum_flags |= csum_flags;
  138         if (csum_flags & CSUM_DATA_VALID)
  139                 dst->m_pkthdr.csum_data = 0xffff;
  140 }
  141 
  142 /*
  143  * Handle link-layer encapsulation requests.
  144  */
  145 static int
  146 ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
  147 {
  148         struct ether_header *eh;
  149         struct arphdr *ah;
  150         uint16_t etype;
  151         const u_char *lladdr;
  152 
  153         if (req->rtype != IFENCAP_LL)
  154                 return (EOPNOTSUPP);
  155 
  156         if (req->bufsize < ETHER_HDR_LEN)
  157                 return (ENOMEM);
  158 
  159         eh = (struct ether_header *)req->buf;
  160         lladdr = req->lladdr;
  161         req->lladdr_off = 0;
  162 
  163         switch (req->family) {
  164         case AF_INET:
  165                 etype = htons(ETHERTYPE_IP);
  166                 break;
  167         case AF_INET6:
  168                 etype = htons(ETHERTYPE_IPV6);
  169                 break;
  170         case AF_ARP:
  171                 ah = (struct arphdr *)req->hdata;
  172                 ah->ar_hrd = htons(ARPHRD_ETHER);
  173 
  174                 switch(ntohs(ah->ar_op)) {
  175                 case ARPOP_REVREQUEST:
  176                 case ARPOP_REVREPLY:
  177                         etype = htons(ETHERTYPE_REVARP);
  178                         break;
  179                 case ARPOP_REQUEST:
  180                 case ARPOP_REPLY:
  181                 default:
  182                         etype = htons(ETHERTYPE_ARP);
  183                         break;
  184                 }
  185 
  186                 if (req->flags & IFENCAP_FLAG_BROADCAST)
  187                         lladdr = ifp->if_broadcastaddr;
  188                 break;
  189         default:
  190                 return (EAFNOSUPPORT);
  191         }
  192 
  193         memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
  194         memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
  195         memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
  196         req->bufsize = sizeof(struct ether_header);
  197 
  198         return (0);
  199 }
  200 
  201 static int
  202 ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
  203         const struct sockaddr *dst, struct route *ro, u_char *phdr,
  204         uint32_t *pflags, struct llentry **plle)
  205 {
  206         uint32_t lleflags = 0;
  207         int error = 0;
  208 #if defined(INET) || defined(INET6)
  209         struct ether_header *eh = (struct ether_header *)phdr;
  210         uint16_t etype;
  211 #endif
  212 
  213         if (plle)
  214                 *plle = NULL;
  215 
  216         switch (dst->sa_family) {
  217 #ifdef INET
  218         case AF_INET:
  219                 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
  220                         error = arpresolve(ifp, 0, m, dst, phdr, &lleflags,
  221                             plle);
  222                 else {
  223                         if (m->m_flags & M_BCAST)
  224                                 memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
  225                                     ETHER_ADDR_LEN);
  226                         else {
  227                                 const struct in_addr *a;
  228                                 a = &(((const struct sockaddr_in *)dst)->sin_addr);
  229                                 ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
  230                         }
  231                         etype = htons(ETHERTYPE_IP);
  232                         memcpy(&eh->ether_type, &etype, sizeof(etype));
  233                         memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
  234                 }
  235                 break;
  236 #endif
  237 #ifdef INET6
  238         case AF_INET6:
  239                 if ((m->m_flags & M_MCAST) == 0) {
  240                         int af = RO_GET_FAMILY(ro, dst);
  241                         error = nd6_resolve(ifp, LLE_SF(af, 0), m, dst, phdr,
  242                             &lleflags, plle);
  243                 } else {
  244                         const struct in6_addr *a6;
  245                         a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
  246                         ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
  247                         etype = htons(ETHERTYPE_IPV6);
  248                         memcpy(&eh->ether_type, &etype, sizeof(etype));
  249                         memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
  250                 }
  251                 break;
  252 #endif
  253         default:
  254                 if_printf(ifp, "can't handle af%d\n", dst->sa_family);
  255                 if (m != NULL)
  256                         m_freem(m);
  257                 return (EAFNOSUPPORT);
  258         }
  259 
  260         if (error == EHOSTDOWN) {
  261                 if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
  262                         error = EHOSTUNREACH;
  263         }
  264 
  265         if (error != 0)
  266                 return (error);
  267 
  268         *pflags = RT_MAY_LOOP;
  269         if (lleflags & LLE_IFADDR)
  270                 *pflags |= RT_L2_ME;
  271 
  272         return (0);
  273 }
  274 
  275 /*
  276  * Ethernet output routine.
  277  * Encapsulate a packet of type family for the local net.
  278  * Use trailer local net encapsulation if enough data in first
  279  * packet leaves a multiple of 512 bytes of data in remainder.
  280  */
  281 int
  282 ether_output(struct ifnet *ifp, struct mbuf *m,
  283         const struct sockaddr *dst, struct route *ro)
  284 {
  285         int error = 0;
  286         char linkhdr[ETHER_HDR_LEN], *phdr;
  287         struct ether_header *eh;
  288         struct pf_mtag *t;
  289         bool loop_copy;
  290         int hlen;       /* link layer header length */
  291         uint32_t pflags;
  292         struct llentry *lle = NULL;
  293         int addref = 0;
  294 
  295         phdr = NULL;
  296         pflags = 0;
  297         if (ro != NULL) {
  298                 /* XXX BPF uses ro_prepend */
  299                 if (ro->ro_prepend != NULL) {
  300                         phdr = ro->ro_prepend;
  301                         hlen = ro->ro_plen;
  302                 } else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
  303                         if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
  304                                 lle = ro->ro_lle;
  305                                 if (lle != NULL &&
  306                                     (lle->la_flags & LLE_VALID) == 0) {
  307                                         LLE_FREE(lle);
  308                                         lle = NULL;     /* redundant */
  309                                         ro->ro_lle = NULL;
  310                                 }
  311                                 if (lle == NULL) {
  312                                         /* if we lookup, keep cache */
  313                                         addref = 1;
  314                                 } else
  315                                         /*
  316                                          * Notify LLE code that
  317                                          * the entry was used
  318                                          * by datapath.
  319                                          */
  320                                         llentry_provide_feedback(lle);
  321                         }
  322                         if (lle != NULL) {
  323                                 phdr = lle->r_linkdata;
  324                                 hlen = lle->r_hdrlen;
  325                                 pflags = lle->r_flags;
  326                         }
  327                 }
  328         }
  329 
  330 #ifdef MAC
  331         error = mac_ifnet_check_transmit(ifp, m);
  332         if (error)
  333                 senderr(error);
  334 #endif
  335 
  336         M_PROFILE(m);
  337         if (ifp->if_flags & IFF_MONITOR)
  338                 senderr(ENETDOWN);
  339         if (!((ifp->if_flags & IFF_UP) &&
  340             (ifp->if_drv_flags & IFF_DRV_RUNNING)))
  341                 senderr(ENETDOWN);
  342 
  343         if (phdr == NULL) {
  344                 /* No prepend data supplied. Try to calculate ourselves. */
  345                 phdr = linkhdr;
  346                 hlen = ETHER_HDR_LEN;
  347                 error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
  348                     addref ? &lle : NULL);
  349                 if (addref && lle != NULL)
  350                         ro->ro_lle = lle;
  351                 if (error != 0)
  352                         return (error == EWOULDBLOCK ? 0 : error);
  353         }
  354 
  355         if ((pflags & RT_L2_ME) != 0) {
  356                 update_mbuf_csumflags(m, m);
  357                 return (if_simloop(ifp, m, RO_GET_FAMILY(ro, dst), 0));
  358         }
  359         loop_copy = (pflags & RT_MAY_LOOP) != 0;
  360 
  361         /*
  362          * Add local net header.  If no space in first mbuf,
  363          * allocate another.
  364          *
  365          * Note that we do prepend regardless of RT_HAS_HEADER flag.
  366          * This is done because BPF code shifts m_data pointer
  367          * to the end of ethernet header prior to calling if_output().
  368          */
  369         M_PREPEND(m, hlen, M_NOWAIT);
  370         if (m == NULL)
  371                 senderr(ENOBUFS);
  372         if ((pflags & RT_HAS_HEADER) == 0) {
  373                 eh = mtod(m, struct ether_header *);
  374                 memcpy(eh, phdr, hlen);
  375         }
  376 
  377         /*
  378          * If a simplex interface, and the packet is being sent to our
  379          * Ethernet address or a broadcast address, loopback a copy.
  380          * XXX To make a simplex device behave exactly like a duplex
  381          * device, we should copy in the case of sending to our own
  382          * ethernet address (thus letting the original actually appear
  383          * on the wire). However, we don't do that here for security
  384          * reasons and compatibility with the original behavior.
  385          */
  386         if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
  387             ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
  388                 struct mbuf *n;
  389 
  390                 /*
  391                  * Because if_simloop() modifies the packet, we need a
  392                  * writable copy through m_dup() instead of a readonly
  393                  * one as m_copy[m] would give us. The alternative would
  394                  * be to modify if_simloop() to handle the readonly mbuf,
  395                  * but performancewise it is mostly equivalent (trading
  396                  * extra data copying vs. extra locking).
  397                  *
  398                  * XXX This is a local workaround.  A number of less
  399                  * often used kernel parts suffer from the same bug.
  400                  * See PR kern/105943 for a proposed general solution.
  401                  */
  402                 if ((n = m_dup(m, M_NOWAIT)) != NULL) {
  403                         update_mbuf_csumflags(m, n);
  404                         (void)if_simloop(ifp, n, RO_GET_FAMILY(ro, dst), hlen);
  405                 } else
  406                         if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
  407         }
  408 
  409        /*
  410         * Bridges require special output handling.
  411         */
  412         if (ifp->if_bridge) {
  413                 BRIDGE_OUTPUT(ifp, m, error);
  414                 return (error);
  415         }
  416 
  417 #if defined(INET) || defined(INET6)
  418         if (ifp->if_carp &&
  419             (error = (*carp_output_p)(ifp, m, dst)))
  420                 goto bad;
  421 #endif
  422 
  423         /* Handle ng_ether(4) processing, if any */
  424         if (ifp->if_l2com != NULL) {
  425                 KASSERT(ng_ether_output_p != NULL,
  426                     ("ng_ether_output_p is NULL"));
  427                 if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
  428 bad:                    if (m != NULL)
  429                                 m_freem(m);
  430                         return (error);
  431                 }
  432                 if (m == NULL)
  433                         return (0);
  434         }
  435 
  436         /* Continue with link-layer output */
  437         return ether_output_frame(ifp, m);
  438 }
  439 
  440 static bool
  441 ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp)
  442 {
  443         struct ether_8021q_tag qtag;
  444         struct ether_header *eh;
  445 
  446         eh = mtod(*mp, struct ether_header *);
  447         if (ntohs(eh->ether_type) == ETHERTYPE_VLAN ||
  448             ntohs(eh->ether_type) == ETHERTYPE_QINQ)
  449                 return (true);
  450 
  451         qtag.vid = 0;
  452         qtag.pcp = pcp;
  453         qtag.proto = ETHERTYPE_VLAN;
  454         if (ether_8021q_frame(mp, ifp, ifp, &qtag))
  455                 return (true);
  456         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  457         return (false);
  458 }
  459 
  460 /*
  461  * Ethernet link layer output routine to send a raw frame to the device.
  462  *
  463  * This assumes that the 14 byte Ethernet header is present and contiguous
  464  * in the first mbuf (if BRIDGE'ing).
  465  */
  466 int
  467 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
  468 {
  469         uint8_t pcp;
  470 
  471         pcp = ifp->if_pcp;
  472         if (pcp != IFNET_PCP_NONE && ifp->if_type != IFT_L2VLAN &&
  473             !ether_set_pcp(&m, ifp, pcp))
  474                 return (0);
  475 
  476         if (PFIL_HOOKED_OUT(V_link_pfil_head))
  477                 switch (pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_OUT,
  478                     NULL)) {
  479                 case PFIL_DROPPED:
  480                         return (EACCES);
  481                 case PFIL_CONSUMED:
  482                         return (0);
  483                 }
  484 
  485 #ifdef EXPERIMENTAL
  486 #if defined(INET6) && defined(INET)
  487         /* draft-ietf-6man-ipv6only-flag */
  488         /* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
  489         if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
  490                 struct ether_header *eh;
  491 
  492                 eh = mtod(m, struct ether_header *);
  493                 switch (ntohs(eh->ether_type)) {
  494                 case ETHERTYPE_IP:
  495                 case ETHERTYPE_ARP:
  496                 case ETHERTYPE_REVARP:
  497                         m_freem(m);
  498                         return (EAFNOSUPPORT);
  499                         /* NOTREACHED */
  500                         break;
  501                 };
  502         }
  503 #endif
  504 #endif
  505 
  506         /*
  507          * Queue message on interface, update output statistics if successful,
  508          * and start output if interface not yet active.
  509          *
  510          * If KMSAN is enabled, use it to verify that the data does not contain
  511          * any uninitialized bytes.
  512          */
  513         kmsan_check_mbuf(m, "ether_output");
  514         return ((ifp->if_transmit)(ifp, m));
  515 }
  516 
  517 /*
  518  * Process a received Ethernet packet; the packet is in the
  519  * mbuf chain m with the ethernet header at the front.
  520  */
  521 static void
  522 ether_input_internal(struct ifnet *ifp, struct mbuf *m)
  523 {
  524         struct ether_header *eh;
  525         u_short etype;
  526 
  527         if ((ifp->if_flags & IFF_UP) == 0) {
  528                 m_freem(m);
  529                 return;
  530         }
  531 #ifdef DIAGNOSTIC
  532         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
  533                 if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
  534                 m_freem(m);
  535                 return;
  536         }
  537 #endif
  538         if (m->m_len < ETHER_HDR_LEN) {
  539                 /* XXX maybe should pullup? */
  540                 if_printf(ifp, "discard frame w/o leading ethernet "
  541                                 "header (len %u pkt len %u)\n",
  542                                 m->m_len, m->m_pkthdr.len);
  543                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
  544                 m_freem(m);
  545                 return;
  546         }
  547         eh = mtod(m, struct ether_header *);
  548         etype = ntohs(eh->ether_type);
  549         random_harvest_queue_ether(m, sizeof(*m));
  550 
  551 #ifdef EXPERIMENTAL
  552 #if defined(INET6) && defined(INET)
  553         /* draft-ietf-6man-ipv6only-flag */
  554         /* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
  555         if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
  556                 switch (etype) {
  557                 case ETHERTYPE_IP:
  558                 case ETHERTYPE_ARP:
  559                 case ETHERTYPE_REVARP:
  560                         m_freem(m);
  561                         return;
  562                         /* NOTREACHED */
  563                         break;
  564                 };
  565         }
  566 #endif
  567 #endif
  568 
  569         CURVNET_SET_QUIET(ifp->if_vnet);
  570 
  571         if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
  572                 if (ETHER_IS_BROADCAST(eh->ether_dhost))
  573                         m->m_flags |= M_BCAST;
  574                 else
  575                         m->m_flags |= M_MCAST;
  576                 if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
  577         }
  578 
  579 #ifdef MAC
  580         /*
  581          * Tag the mbuf with an appropriate MAC label before any other
  582          * consumers can get to it.
  583          */
  584         mac_ifnet_create_mbuf(ifp, m);
  585 #endif
  586 
  587         /*
  588          * Give bpf a chance at the packet.
  589          */
  590         ETHER_BPF_MTAP(ifp, m);
  591 
  592         /*
  593          * If the CRC is still on the packet, trim it off. We do this once
  594          * and once only in case we are re-entered. Nothing else on the
  595          * Ethernet receive path expects to see the FCS.
  596          */
  597         if (m->m_flags & M_HASFCS) {
  598                 m_adj(m, -ETHER_CRC_LEN);
  599                 m->m_flags &= ~M_HASFCS;
  600         }
  601 
  602         if (!(ifp->if_capenable & IFCAP_HWSTATS))
  603                 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
  604 
  605         /* Allow monitor mode to claim this frame, after stats are updated. */
  606         if (ifp->if_flags & IFF_MONITOR) {
  607                 m_freem(m);
  608                 CURVNET_RESTORE();
  609                 return;
  610         }
  611 
  612         /* Handle input from a lagg(4) port */
  613         if (ifp->if_type == IFT_IEEE8023ADLAG) {
  614                 KASSERT(lagg_input_ethernet_p != NULL,
  615                     ("%s: if_lagg not loaded!", __func__));
  616                 m = (*lagg_input_ethernet_p)(ifp, m);
  617                 if (m != NULL)
  618                         ifp = m->m_pkthdr.rcvif;
  619                 else {
  620                         CURVNET_RESTORE();
  621                         return;
  622                 }
  623         }
  624 
  625         /*
  626          * If the hardware did not process an 802.1Q tag, do this now,
  627          * to allow 802.1P priority frames to be passed to the main input
  628          * path correctly.
  629          */
  630         if ((m->m_flags & M_VLANTAG) == 0 &&
  631             ((etype == ETHERTYPE_VLAN) || (etype == ETHERTYPE_QINQ))) {
  632                 struct ether_vlan_header *evl;
  633 
  634                 if (m->m_len < sizeof(*evl) &&
  635                     (m = m_pullup(m, sizeof(*evl))) == NULL) {
  636 #ifdef DIAGNOSTIC
  637                         if_printf(ifp, "cannot pullup VLAN header\n");
  638 #endif
  639                         if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
  640                         CURVNET_RESTORE();
  641                         return;
  642                 }
  643 
  644                 evl = mtod(m, struct ether_vlan_header *);
  645                 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
  646                 m->m_flags |= M_VLANTAG;
  647 
  648                 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
  649                     ETHER_HDR_LEN - ETHER_TYPE_LEN);
  650                 m_adj(m, ETHER_VLAN_ENCAP_LEN);
  651                 eh = mtod(m, struct ether_header *);
  652         }
  653 
  654         M_SETFIB(m, ifp->if_fib);
  655 
  656         /* Allow ng_ether(4) to claim this frame. */
  657         if (ifp->if_l2com != NULL) {
  658                 KASSERT(ng_ether_input_p != NULL,
  659                     ("%s: ng_ether_input_p is NULL", __func__));
  660                 m->m_flags &= ~M_PROMISC;
  661                 (*ng_ether_input_p)(ifp, &m);
  662                 if (m == NULL) {
  663                         CURVNET_RESTORE();
  664                         return;
  665                 }
  666                 eh = mtod(m, struct ether_header *);
  667         }
  668 
  669         /*
  670          * Allow if_bridge(4) to claim this frame.
  671          * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
  672          * and the frame should be delivered locally.
  673          */
  674         if (ifp->if_bridge != NULL) {
  675                 m->m_flags &= ~M_PROMISC;
  676                 BRIDGE_INPUT(ifp, m);
  677                 if (m == NULL) {
  678                         CURVNET_RESTORE();
  679                         return;
  680                 }
  681                 eh = mtod(m, struct ether_header *);
  682         }
  683 
  684 #if defined(INET) || defined(INET6)
  685         /*
  686          * Clear M_PROMISC on frame so that carp(4) will see it when the
  687          * mbuf flows up to Layer 3.
  688          * FreeBSD's implementation of carp(4) uses the inprotosw
  689          * to dispatch IPPROTO_CARP. carp(4) also allocates its own
  690          * Ethernet addresses of the form 00:00:5e:00:01:xx, which
  691          * is outside the scope of the M_PROMISC test below.
  692          * TODO: Maintain a hash table of ethernet addresses other than
  693          * ether_dhost which may be active on this ifp.
  694          */
  695         if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
  696                 m->m_flags &= ~M_PROMISC;
  697         } else
  698 #endif
  699         {
  700                 /*
  701                  * If the frame received was not for our MAC address, set the
  702                  * M_PROMISC flag on the mbuf chain. The frame may need to
  703                  * be seen by the rest of the Ethernet input path in case of
  704                  * re-entry (e.g. bridge, vlan, netgraph) but should not be
  705                  * seen by upper protocol layers.
  706                  */
  707                 if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
  708                     bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
  709                         m->m_flags |= M_PROMISC;
  710         }
  711 
  712         ether_demux(ifp, m);
  713         CURVNET_RESTORE();
  714 }
  715 
  716 /*
  717  * Ethernet input dispatch; by default, direct dispatch here regardless of
  718  * global configuration.  However, if RSS is enabled, hook up RSS affinity
  719  * so that when deferred or hybrid dispatch is enabled, we can redistribute
  720  * load based on RSS.
  721  *
  722  * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
  723  * not it had already done work distribution via multi-queue.  Then we could
  724  * direct dispatch in the event load balancing was already complete and
  725  * handle the case of interfaces with different capabilities better.
  726  *
  727  * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
  728  * at multiple layers?
  729  *
  730  * XXXRW: For now, enable all this only if RSS is compiled in, although it
  731  * works fine without RSS.  Need to characterise the performance overhead
  732  * of the detour through the netisr code in the event the result is always
  733  * direct dispatch.
  734  */
  735 static void
  736 ether_nh_input(struct mbuf *m)
  737 {
  738 
  739         M_ASSERTPKTHDR(m);
  740         KASSERT(m->m_pkthdr.rcvif != NULL,
  741             ("%s: NULL interface pointer", __func__));
  742         ether_input_internal(m->m_pkthdr.rcvif, m);
  743 }
  744 
  745 static struct netisr_handler    ether_nh = {
  746         .nh_name = "ether",
  747         .nh_handler = ether_nh_input,
  748         .nh_proto = NETISR_ETHER,
  749 #ifdef RSS
  750         .nh_policy = NETISR_POLICY_CPU,
  751         .nh_dispatch = NETISR_DISPATCH_DIRECT,
  752         .nh_m2cpuid = rss_m2cpuid,
  753 #else
  754         .nh_policy = NETISR_POLICY_SOURCE,
  755         .nh_dispatch = NETISR_DISPATCH_DIRECT,
  756 #endif
  757 };
  758 
  759 static void
  760 ether_init(__unused void *arg)
  761 {
  762 
  763         netisr_register(&ether_nh);
  764 }
  765 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
  766 
  767 static void
  768 vnet_ether_init(__unused void *arg)
  769 {
  770         struct pfil_head_args args;
  771 
  772         args.pa_version = PFIL_VERSION;
  773         args.pa_flags = PFIL_IN | PFIL_OUT;
  774         args.pa_type = PFIL_TYPE_ETHERNET;
  775         args.pa_headname = PFIL_ETHER_NAME;
  776         V_link_pfil_head = pfil_head_register(&args);
  777 
  778 #ifdef VIMAGE
  779         netisr_register_vnet(&ether_nh);
  780 #endif
  781 }
  782 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
  783     vnet_ether_init, NULL);
  784 
  785 #ifdef VIMAGE
  786 static void
  787 vnet_ether_pfil_destroy(__unused void *arg)
  788 {
  789 
  790         pfil_head_unregister(V_link_pfil_head);
  791 }
  792 VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
  793     vnet_ether_pfil_destroy, NULL);
  794 
  795 static void
  796 vnet_ether_destroy(__unused void *arg)
  797 {
  798 
  799         netisr_unregister_vnet(&ether_nh);
  800 }
  801 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
  802     vnet_ether_destroy, NULL);
  803 #endif
  804 
  805 static void
  806 ether_input(struct ifnet *ifp, struct mbuf *m)
  807 {
  808         struct epoch_tracker et;
  809         struct mbuf *mn;
  810         bool needs_epoch;
  811 
  812         needs_epoch = !(ifp->if_flags & IFF_KNOWSEPOCH);
  813 
  814         /*
  815          * The drivers are allowed to pass in a chain of packets linked with
  816          * m_nextpkt. We split them up into separate packets here and pass
  817          * them up. This allows the drivers to amortize the receive lock.
  818          */
  819         CURVNET_SET_QUIET(ifp->if_vnet);
  820         if (__predict_false(needs_epoch))
  821                 NET_EPOCH_ENTER(et);
  822         while (m) {
  823                 mn = m->m_nextpkt;
  824                 m->m_nextpkt = NULL;
  825 
  826                 /*
  827                  * We will rely on rcvif being set properly in the deferred
  828                  * context, so assert it is correct here.
  829                  */
  830                 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
  831                 KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
  832                     "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
  833                 netisr_dispatch(NETISR_ETHER, m);
  834                 m = mn;
  835         }
  836         if (__predict_false(needs_epoch))
  837                 NET_EPOCH_EXIT(et);
  838         CURVNET_RESTORE();
  839 }
  840 
  841 /*
  842  * Upper layer processing for a received Ethernet packet.
  843  */
  844 void
  845 ether_demux(struct ifnet *ifp, struct mbuf *m)
  846 {
  847         struct ether_header *eh;
  848         int i, isr;
  849         u_short ether_type;
  850 
  851         NET_EPOCH_ASSERT();
  852         KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
  853 
  854         /* Do not grab PROMISC frames in case we are re-entered. */
  855         if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) {
  856                 i = pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_IN, NULL);
  857                 if (i != 0 || m == NULL)
  858                         return;
  859         }
  860 
  861         eh = mtod(m, struct ether_header *);
  862         ether_type = ntohs(eh->ether_type);
  863 
  864         /*
  865          * If this frame has a VLAN tag other than 0, call vlan_input()
  866          * if its module is loaded. Otherwise, drop.
  867          */
  868         if ((m->m_flags & M_VLANTAG) &&
  869             EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
  870                 if (ifp->if_vlantrunk == NULL) {
  871                         if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
  872                         m_freem(m);
  873                         return;
  874                 }
  875                 KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
  876                     __func__));
  877                 /* Clear before possibly re-entering ether_input(). */
  878                 m->m_flags &= ~M_PROMISC;
  879                 (*vlan_input_p)(ifp, m);
  880                 return;
  881         }
  882 
  883         /*
  884          * Pass promiscuously received frames to the upper layer if the user
  885          * requested this by setting IFF_PPROMISC. Otherwise, drop them.
  886          */
  887         if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
  888                 m_freem(m);
  889                 return;
  890         }
  891 
  892         /*
  893          * Reset layer specific mbuf flags to avoid confusing upper layers.
  894          */
  895         m->m_flags &= ~M_VLANTAG;
  896         m_clrprotoflags(m);
  897 
  898         /*
  899          * Dispatch frame to upper layer.
  900          */
  901         switch (ether_type) {
  902 #ifdef INET
  903         case ETHERTYPE_IP:
  904                 isr = NETISR_IP;
  905                 break;
  906 
  907         case ETHERTYPE_ARP:
  908                 if (ifp->if_flags & IFF_NOARP) {
  909                         /* Discard packet if ARP is disabled on interface */
  910                         m_freem(m);
  911                         return;
  912                 }
  913                 isr = NETISR_ARP;
  914                 break;
  915 #endif
  916 #ifdef INET6
  917         case ETHERTYPE_IPV6:
  918                 isr = NETISR_IPV6;
  919                 break;
  920 #endif
  921         default:
  922                 goto discard;
  923         }
  924 
  925         /* Strip off Ethernet header. */
  926         m_adj(m, ETHER_HDR_LEN);
  927 
  928         netisr_dispatch(isr, m);
  929         return;
  930 
  931 discard:
  932         /*
  933          * Packet is to be discarded.  If netgraph is present,
  934          * hand the packet to it for last chance processing;
  935          * otherwise dispose of it.
  936          */
  937         if (ifp->if_l2com != NULL) {
  938                 KASSERT(ng_ether_input_orphan_p != NULL,
  939                     ("ng_ether_input_orphan_p is NULL"));
  940                 (*ng_ether_input_orphan_p)(ifp, m);
  941                 return;
  942         }
  943         m_freem(m);
  944 }
  945 
  946 /*
  947  * Convert Ethernet address to printable (loggable) representation.
  948  * This routine is for compatibility; it's better to just use
  949  *
  950  *      printf("%6D", <pointer to address>, ":");
  951  *
  952  * since there's no static buffer involved.
  953  */
  954 char *
  955 ether_sprintf(const u_char *ap)
  956 {
  957         static char etherbuf[18];
  958         snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
  959         return (etherbuf);
  960 }
  961 
  962 /*
  963  * Perform common duties while attaching to interface list
  964  */
  965 void
  966 ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
  967 {
  968         int i;
  969         struct ifaddr *ifa;
  970         struct sockaddr_dl *sdl;
  971 
  972         ifp->if_addrlen = ETHER_ADDR_LEN;
  973         ifp->if_hdrlen = ETHER_HDR_LEN;
  974         ifp->if_mtu = ETHERMTU;
  975         if_attach(ifp);
  976         ifp->if_output = ether_output;
  977         ifp->if_input = ether_input;
  978         ifp->if_resolvemulti = ether_resolvemulti;
  979         ifp->if_requestencap = ether_requestencap;
  980 #ifdef VIMAGE
  981         ifp->if_reassign = ether_reassign;
  982 #endif
  983         if (ifp->if_baudrate == 0)
  984                 ifp->if_baudrate = IF_Mbps(10);         /* just a default */
  985         ifp->if_broadcastaddr = etherbroadcastaddr;
  986 
  987         ifa = ifp->if_addr;
  988         KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
  989         sdl = (struct sockaddr_dl *)ifa->ifa_addr;
  990         sdl->sdl_type = IFT_ETHER;
  991         sdl->sdl_alen = ifp->if_addrlen;
  992         bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
  993 
  994         if (ifp->if_hw_addr != NULL)
  995                 bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen);
  996 
  997         bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
  998         if (ng_ether_attach_p != NULL)
  999                 (*ng_ether_attach_p)(ifp);
 1000 
 1001         /* Announce Ethernet MAC address if non-zero. */
 1002         for (i = 0; i < ifp->if_addrlen; i++)
 1003                 if (lla[i] != 0)
 1004                         break; 
 1005         if (i != ifp->if_addrlen)
 1006                 if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
 1007 
 1008         uuid_ether_add(LLADDR(sdl));
 1009 
 1010         /* Add necessary bits are setup; announce it now. */
 1011         EVENTHANDLER_INVOKE(ether_ifattach_event, ifp);
 1012         if (IS_DEFAULT_VNET(curvnet))
 1013                 devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL);
 1014 }
 1015 
 1016 /*
 1017  * Perform common duties while detaching an Ethernet interface
 1018  */
 1019 void
 1020 ether_ifdetach(struct ifnet *ifp)
 1021 {
 1022         struct sockaddr_dl *sdl;
 1023 
 1024         sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
 1025         uuid_ether_del(LLADDR(sdl));
 1026 
 1027         if (ifp->if_l2com != NULL) {
 1028                 KASSERT(ng_ether_detach_p != NULL,
 1029                     ("ng_ether_detach_p is NULL"));
 1030                 (*ng_ether_detach_p)(ifp);
 1031         }
 1032 
 1033         bpfdetach(ifp);
 1034         if_detach(ifp);
 1035 }
 1036 
 1037 #ifdef VIMAGE
 1038 void
 1039 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
 1040 {
 1041 
 1042         if (ifp->if_l2com != NULL) {
 1043                 KASSERT(ng_ether_detach_p != NULL,
 1044                     ("ng_ether_detach_p is NULL"));
 1045                 (*ng_ether_detach_p)(ifp);
 1046         }
 1047 
 1048         if (ng_ether_attach_p != NULL) {
 1049                 CURVNET_SET_QUIET(new_vnet);
 1050                 (*ng_ether_attach_p)(ifp);
 1051                 CURVNET_RESTORE();
 1052         }
 1053 }
 1054 #endif
 1055 
 1056 SYSCTL_DECL(_net_link);
 1057 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 1058     "Ethernet");
 1059 
 1060 #if 0
 1061 /*
 1062  * This is for reference.  We have a table-driven version
 1063  * of the little-endian crc32 generator, which is faster
 1064  * than the double-loop.
 1065  */
 1066 uint32_t
 1067 ether_crc32_le(const uint8_t *buf, size_t len)
 1068 {
 1069         size_t i;
 1070         uint32_t crc;
 1071         int bit;
 1072         uint8_t data;
 1073 
 1074         crc = 0xffffffff;       /* initial value */
 1075 
 1076         for (i = 0; i < len; i++) {
 1077                 for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 1078                         carry = (crc ^ data) & 1;
 1079                         crc >>= 1;
 1080                         if (carry)
 1081                                 crc = (crc ^ ETHER_CRC_POLY_LE);
 1082                 }
 1083         }
 1084 
 1085         return (crc);
 1086 }
 1087 #else
 1088 uint32_t
 1089 ether_crc32_le(const uint8_t *buf, size_t len)
 1090 {
 1091         static const uint32_t crctab[] = {
 1092                 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
 1093                 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
 1094                 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
 1095                 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
 1096         };
 1097         size_t i;
 1098         uint32_t crc;
 1099 
 1100         crc = 0xffffffff;       /* initial value */
 1101 
 1102         for (i = 0; i < len; i++) {
 1103                 crc ^= buf[i];
 1104                 crc = (crc >> 4) ^ crctab[crc & 0xf];
 1105                 crc = (crc >> 4) ^ crctab[crc & 0xf];
 1106         }
 1107 
 1108         return (crc);
 1109 }
 1110 #endif
 1111 
 1112 uint32_t
 1113 ether_crc32_be(const uint8_t *buf, size_t len)
 1114 {
 1115         size_t i;
 1116         uint32_t crc, carry;
 1117         int bit;
 1118         uint8_t data;
 1119 
 1120         crc = 0xffffffff;       /* initial value */
 1121 
 1122         for (i = 0; i < len; i++) {
 1123                 for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 1124                         carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
 1125                         crc <<= 1;
 1126                         if (carry)
 1127                                 crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
 1128                 }
 1129         }
 1130 
 1131         return (crc);
 1132 }
 1133 
 1134 int
 1135 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 1136 {
 1137         struct ifaddr *ifa = (struct ifaddr *) data;
 1138         struct ifreq *ifr = (struct ifreq *) data;
 1139         int error = 0;
 1140 
 1141         switch (command) {
 1142         case SIOCSIFADDR:
 1143                 ifp->if_flags |= IFF_UP;
 1144 
 1145                 switch (ifa->ifa_addr->sa_family) {
 1146 #ifdef INET
 1147                 case AF_INET:
 1148                         ifp->if_init(ifp->if_softc);    /* before arpwhohas */
 1149                         arp_ifinit(ifp, ifa);
 1150                         break;
 1151 #endif
 1152                 default:
 1153                         ifp->if_init(ifp->if_softc);
 1154                         break;
 1155                 }
 1156                 break;
 1157 
 1158         case SIOCGIFADDR:
 1159                 bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
 1160                     ETHER_ADDR_LEN);
 1161                 break;
 1162 
 1163         case SIOCSIFMTU:
 1164                 /*
 1165                  * Set the interface MTU.
 1166                  */
 1167                 if (ifr->ifr_mtu > ETHERMTU) {
 1168                         error = EINVAL;
 1169                 } else {
 1170                         ifp->if_mtu = ifr->ifr_mtu;
 1171                 }
 1172                 break;
 1173 
 1174         case SIOCSLANPCP:
 1175                 error = priv_check(curthread, PRIV_NET_SETLANPCP);
 1176                 if (error != 0)
 1177                         break;
 1178                 if (ifr->ifr_lan_pcp > 7 &&
 1179                     ifr->ifr_lan_pcp != IFNET_PCP_NONE) {
 1180                         error = EINVAL;
 1181                 } else {
 1182                         ifp->if_pcp = ifr->ifr_lan_pcp;
 1183                         /* broadcast event about PCP change */
 1184                         EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP);
 1185                 }
 1186                 break;
 1187 
 1188         case SIOCGLANPCP:
 1189                 ifr->ifr_lan_pcp = ifp->if_pcp;
 1190                 break;
 1191 
 1192         default:
 1193                 error = EINVAL;                 /* XXX netbsd has ENOTTY??? */
 1194                 break;
 1195         }
 1196         return (error);
 1197 }
 1198 
 1199 static int
 1200 ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 1201         struct sockaddr *sa)
 1202 {
 1203         struct sockaddr_dl *sdl;
 1204 #ifdef INET
 1205         struct sockaddr_in *sin;
 1206 #endif
 1207 #ifdef INET6
 1208         struct sockaddr_in6 *sin6;
 1209 #endif
 1210         u_char *e_addr;
 1211 
 1212         switch(sa->sa_family) {
 1213         case AF_LINK:
 1214                 /*
 1215                  * No mapping needed. Just check that it's a valid MC address.
 1216                  */
 1217                 sdl = (struct sockaddr_dl *)sa;
 1218                 e_addr = LLADDR(sdl);
 1219                 if (!ETHER_IS_MULTICAST(e_addr))
 1220                         return EADDRNOTAVAIL;
 1221                 *llsa = NULL;
 1222                 return 0;
 1223 
 1224 #ifdef INET
 1225         case AF_INET:
 1226                 sin = (struct sockaddr_in *)sa;
 1227                 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 1228                         return EADDRNOTAVAIL;
 1229                 sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 1230                 sdl->sdl_alen = ETHER_ADDR_LEN;
 1231                 e_addr = LLADDR(sdl);
 1232                 ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 1233                 *llsa = (struct sockaddr *)sdl;
 1234                 return 0;
 1235 #endif
 1236 #ifdef INET6
 1237         case AF_INET6:
 1238                 sin6 = (struct sockaddr_in6 *)sa;
 1239                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 1240                         /*
 1241                          * An IP6 address of 0 means listen to all
 1242                          * of the Ethernet multicast address used for IP6.
 1243                          * (This is used for multicast routers.)
 1244                          */
 1245                         ifp->if_flags |= IFF_ALLMULTI;
 1246                         *llsa = NULL;
 1247                         return 0;
 1248                 }
 1249                 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 1250                         return EADDRNOTAVAIL;
 1251                 sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 1252                 sdl->sdl_alen = ETHER_ADDR_LEN;
 1253                 e_addr = LLADDR(sdl);
 1254                 ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 1255                 *llsa = (struct sockaddr *)sdl;
 1256                 return 0;
 1257 #endif
 1258 
 1259         default:
 1260                 /*
 1261                  * Well, the text isn't quite right, but it's the name
 1262                  * that counts...
 1263                  */
 1264                 return EAFNOSUPPORT;
 1265         }
 1266 }
 1267 
 1268 static moduledata_t ether_mod = {
 1269         .name = "ether",
 1270 };
 1271 
 1272 void
 1273 ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
 1274 {
 1275         struct ether_vlan_header vlan;
 1276         struct mbuf mv, mb;
 1277 
 1278         KASSERT((m->m_flags & M_VLANTAG) != 0,
 1279             ("%s: vlan information not present", __func__));
 1280         KASSERT(m->m_len >= sizeof(struct ether_header),
 1281             ("%s: mbuf not large enough for header", __func__));
 1282         bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
 1283         vlan.evl_proto = vlan.evl_encap_proto;
 1284         vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
 1285         vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
 1286         m->m_len -= sizeof(struct ether_header);
 1287         m->m_data += sizeof(struct ether_header);
 1288         /*
 1289          * If a data link has been supplied by the caller, then we will need to
 1290          * re-create a stack allocated mbuf chain with the following structure:
 1291          *
 1292          * (1) mbuf #1 will contain the supplied data link
 1293          * (2) mbuf #2 will contain the vlan header
 1294          * (3) mbuf #3 will contain the original mbuf's packet data
 1295          *
 1296          * Otherwise, submit the packet and vlan header via bpf_mtap2().
 1297          */
 1298         if (data != NULL) {
 1299                 mv.m_next = m;
 1300                 mv.m_data = (caddr_t)&vlan;
 1301                 mv.m_len = sizeof(vlan);
 1302                 mb.m_next = &mv;
 1303                 mb.m_data = data;
 1304                 mb.m_len = dlen;
 1305                 bpf_mtap(bp, &mb);
 1306         } else
 1307                 bpf_mtap2(bp, &vlan, sizeof(vlan), m);
 1308         m->m_len += sizeof(struct ether_header);
 1309         m->m_data -= sizeof(struct ether_header);
 1310 }
 1311 
 1312 struct mbuf *
 1313 ether_vlanencap_proto(struct mbuf *m, uint16_t tag, uint16_t proto)
 1314 {
 1315         struct ether_vlan_header *evl;
 1316 
 1317         M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 1318         if (m == NULL)
 1319                 return (NULL);
 1320         /* M_PREPEND takes care of m_len, m_pkthdr.len for us */
 1321 
 1322         if (m->m_len < sizeof(*evl)) {
 1323                 m = m_pullup(m, sizeof(*evl));
 1324                 if (m == NULL)
 1325                         return (NULL);
 1326         }
 1327 
 1328         /*
 1329          * Transform the Ethernet header into an Ethernet header
 1330          * with 802.1Q encapsulation.
 1331          */
 1332         evl = mtod(m, struct ether_vlan_header *);
 1333         bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
 1334             (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
 1335         evl->evl_encap_proto = htons(proto);
 1336         evl->evl_tag = htons(tag);
 1337         return (m);
 1338 }
 1339 
 1340 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 1341     "IEEE 802.1Q VLAN");
 1342 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link,
 1343     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 1344     "for consistency");
 1345 
 1346 VNET_DEFINE_STATIC(int, soft_pad);
 1347 #define V_soft_pad      VNET(soft_pad)
 1348 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
 1349     &VNET_NAME(soft_pad), 0,
 1350     "pad short frames before tagging");
 1351 
 1352 /*
 1353  * For now, make preserving PCP via an mbuf tag optional, as it increases
 1354  * per-packet memory allocations and frees.  In the future, it would be
 1355  * preferable to reuse ether_vtag for this, or similar.
 1356  */
 1357 VNET_DEFINE(int, vlan_mtag_pcp) = 0;
 1358 #define V_vlan_mtag_pcp VNET(vlan_mtag_pcp)
 1359 SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW | CTLFLAG_VNET,
 1360     &VNET_NAME(vlan_mtag_pcp), 0,
 1361     "Retain VLAN PCP information as packets are passed up the stack");
 1362 
 1363 bool
 1364 ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
 1365     struct ether_8021q_tag *qtag)
 1366 {
 1367         struct m_tag *mtag;
 1368         int n;
 1369         uint16_t tag;
 1370         static const char pad[8];       /* just zeros */
 1371 
 1372         /*
 1373          * Pad the frame to the minimum size allowed if told to.
 1374          * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
 1375          * paragraph C.4.4.3.b.  It can help to work around buggy
 1376          * bridges that violate paragraph C.4.4.3.a from the same
 1377          * document, i.e., fail to pad short frames after untagging.
 1378          * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
 1379          * untagging it will produce a 62-byte frame, which is a runt
 1380          * and requires padding.  There are VLAN-enabled network
 1381          * devices that just discard such runts instead or mishandle
 1382          * them somehow.
 1383          */
 1384         if (V_soft_pad && p->if_type == IFT_ETHER) {
 1385                 for (n = ETHERMIN + ETHER_HDR_LEN - (*mp)->m_pkthdr.len;
 1386                      n > 0; n -= sizeof(pad)) {
 1387                         if (!m_append(*mp, min(n, sizeof(pad)), pad))
 1388                                 break;
 1389                 }
 1390                 if (n > 0) {
 1391                         m_freem(*mp);
 1392                         *mp = NULL;
 1393                         if_printf(ife, "cannot pad short frame");
 1394                         return (false);
 1395                 }
 1396         }
 1397 
 1398         /*
 1399          * If PCP is set in mbuf, use it
 1400          */
 1401         if ((*mp)->m_flags & M_VLANTAG) {
 1402                 qtag->pcp = EVL_PRIOFTAG((*mp)->m_pkthdr.ether_vtag);
 1403         }
 1404 
 1405         /*
 1406          * If underlying interface can do VLAN tag insertion itself,
 1407          * just pass the packet along. However, we need some way to
 1408          * tell the interface where the packet came from so that it
 1409          * knows how to find the VLAN tag to use, so we attach a
 1410          * packet tag that holds it.
 1411          */
 1412         if (V_vlan_mtag_pcp && (mtag = m_tag_locate(*mp, MTAG_8021Q,
 1413             MTAG_8021Q_PCP_OUT, NULL)) != NULL)
 1414                 tag = EVL_MAKETAG(qtag->vid, *(uint8_t *)(mtag + 1), 0);
 1415         else
 1416                 tag = EVL_MAKETAG(qtag->vid, qtag->pcp, 0);
 1417         if ((p->if_capenable & IFCAP_VLAN_HWTAGGING) &&
 1418             (qtag->proto == ETHERTYPE_VLAN)) {
 1419                 (*mp)->m_pkthdr.ether_vtag = tag;
 1420                 (*mp)->m_flags |= M_VLANTAG;
 1421         } else {
 1422                 *mp = ether_vlanencap_proto(*mp, tag, qtag->proto);
 1423                 if (*mp == NULL) {
 1424                         if_printf(ife, "unable to prepend 802.1Q header");
 1425                         return (false);
 1426                 }
 1427         }
 1428         return (true);
 1429 }
 1430 
 1431 /*
 1432  * Allocate an address from the FreeBSD Foundation OUI.  This uses a
 1433  * cryptographic hash function on the containing jail's name, UUID and the
 1434  * interface name to attempt to provide a unique but stable address.
 1435  * Pseudo-interfaces which require a MAC address should use this function to
 1436  * allocate non-locally-administered addresses.
 1437  */
 1438 void
 1439 ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr)
 1440 {
 1441         SHA1_CTX ctx;
 1442         char *buf;
 1443         char uuid[HOSTUUIDLEN + 1];
 1444         uint64_t addr;
 1445         int i, sz;
 1446         char digest[SHA1_RESULTLEN];
 1447         char jailname[MAXHOSTNAMELEN];
 1448 
 1449         getcredhostuuid(curthread->td_ucred, uuid, sizeof(uuid));
 1450         if (strncmp(uuid, DEFAULT_HOSTUUID, sizeof(uuid)) == 0) {
 1451                 /* Fall back to a random mac address. */
 1452                 goto rando;
 1453         }
 1454 
 1455         /* If each (vnet) jail would also have a unique hostuuid this would not
 1456          * be necessary. */
 1457         getjailname(curthread->td_ucred, jailname, sizeof(jailname));
 1458         sz = asprintf(&buf, M_TEMP, "%s-%s-%s", uuid, if_name(ifp),
 1459             jailname);
 1460         if (sz < 0) {
 1461                 /* Fall back to a random mac address. */
 1462                 goto rando;
 1463         }
 1464 
 1465         SHA1Init(&ctx);
 1466         SHA1Update(&ctx, buf, sz);
 1467         SHA1Final(digest, &ctx);
 1468         free(buf, M_TEMP);
 1469 
 1470         addr = ((digest[0] << 16) | (digest[1] << 8) | digest[2]) &
 1471             OUI_FREEBSD_GENERATED_MASK;
 1472         addr = OUI_FREEBSD(addr);
 1473         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
 1474                 hwaddr->octet[i] = addr >> ((ETHER_ADDR_LEN - i - 1) * 8) &
 1475                     0xFF;
 1476         }
 1477 
 1478         return;
 1479 rando:
 1480         arc4rand(hwaddr, sizeof(*hwaddr), 0);
 1481         /* Unicast */
 1482         hwaddr->octet[0] &= 0xFE;
 1483         /* Locally administered. */
 1484         hwaddr->octet[0] |= 0x02;
 1485 }
 1486 
 1487 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 1488 MODULE_VERSION(ether, 1);

Cache object: d29fa6a5ebf049bcb59bdc572814f03b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.