The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_ethersubr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)if_ethersubr.c      8.1 (Berkeley) 6/10/93
   32  * $FreeBSD$
   33  */
   34 
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 #include "opt_netgraph.h"
   38 #include "opt_mbuf_profiling.h"
   39 #include "opt_rss.h"
   40 
   41 #include <sys/param.h>
   42 #include <sys/systm.h>
   43 #include <sys/bus.h>
   44 #include <sys/eventhandler.h>
   45 #include <sys/jail.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/malloc.h>
   49 #include <sys/module.h>
   50 #include <sys/mbuf.h>
   51 #include <sys/proc.h>
   52 #include <sys/priv.h>
   53 #include <sys/random.h>
   54 #include <sys/socket.h>
   55 #include <sys/sockio.h>
   56 #include <sys/sysctl.h>
   57 #include <sys/uuid.h>
   58 
   59 #include <net/ieee_oui.h>
   60 #include <net/if.h>
   61 #include <net/if_var.h>
   62 #include <net/if_arp.h>
   63 #include <net/netisr.h>
   64 #include <net/route.h>
   65 #include <net/if_llc.h>
   66 #include <net/if_dl.h>
   67 #include <net/if_types.h>
   68 #include <net/bpf.h>
   69 #include <net/ethernet.h>
   70 #include <net/if_bridgevar.h>
   71 #include <net/if_vlan_var.h>
   72 #include <net/if_llatbl.h>
   73 #include <net/pfil.h>
   74 #include <net/rss_config.h>
   75 #include <net/vnet.h>
   76 
   77 #include <netpfil/pf/pf_mtag.h>
   78 
   79 #if defined(INET) || defined(INET6)
   80 #include <netinet/in.h>
   81 #include <netinet/in_var.h>
   82 #include <netinet/if_ether.h>
   83 #include <netinet/ip_carp.h>
   84 #include <netinet/ip_var.h>
   85 #endif
   86 #ifdef INET6
   87 #include <netinet6/nd6.h>
   88 #endif
   89 #include <security/mac/mac_framework.h>
   90 
   91 #include <crypto/sha1.h>
   92 
   93 #ifdef CTASSERT
   94 CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
   95 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
   96 #endif
   97 
   98 VNET_DEFINE(struct pfil_head, link_pfil_hook);  /* Packet filter hooks */
   99 
  100 /* netgraph node hooks for ng_ether(4) */
  101 void    (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
  102 void    (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
  103 int     (*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
  104 void    (*ng_ether_attach_p)(struct ifnet *ifp);
  105 void    (*ng_ether_detach_p)(struct ifnet *ifp);
  106 
  107 void    (*vlan_input_p)(struct ifnet *, struct mbuf *);
  108 
  109 /* if_bridge(4) support */
  110 void    (*bridge_dn_p)(struct mbuf *, struct ifnet *);
  111 
  112 /* if_lagg(4) support */
  113 struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); 
  114 
  115 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
  116                         { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
  117 
  118 static  int ether_resolvemulti(struct ifnet *, struct sockaddr **,
  119                 struct sockaddr *);
  120 static  int ether_requestencap(struct ifnet *, struct if_encap_req *);
  121 
  122 
  123 #define senderr(e) do { error = (e); goto bad;} while (0)
  124 
  125 static void
  126 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
  127 {
  128         int csum_flags = 0;
  129 
  130         if (src->m_pkthdr.csum_flags & CSUM_IP)
  131                 csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
  132         if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
  133                 csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
  134         if (src->m_pkthdr.csum_flags & CSUM_SCTP)
  135                 csum_flags |= CSUM_SCTP_VALID;
  136         dst->m_pkthdr.csum_flags |= csum_flags;
  137         if (csum_flags & CSUM_DATA_VALID)
  138                 dst->m_pkthdr.csum_data = 0xffff;
  139 }
  140 
  141 /*
  142  * Handle link-layer encapsulation requests.
  143  */
  144 static int
  145 ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
  146 {
  147         struct ether_header *eh;
  148         struct arphdr *ah;
  149         uint16_t etype;
  150         const u_char *lladdr;
  151 
  152         if (req->rtype != IFENCAP_LL)
  153                 return (EOPNOTSUPP);
  154 
  155         if (req->bufsize < ETHER_HDR_LEN)
  156                 return (ENOMEM);
  157 
  158         eh = (struct ether_header *)req->buf;
  159         lladdr = req->lladdr;
  160         req->lladdr_off = 0;
  161 
  162         switch (req->family) {
  163         case AF_INET:
  164                 etype = htons(ETHERTYPE_IP);
  165                 break;
  166         case AF_INET6:
  167                 etype = htons(ETHERTYPE_IPV6);
  168                 break;
  169         case AF_ARP:
  170                 ah = (struct arphdr *)req->hdata;
  171                 ah->ar_hrd = htons(ARPHRD_ETHER);
  172 
  173                 switch(ntohs(ah->ar_op)) {
  174                 case ARPOP_REVREQUEST:
  175                 case ARPOP_REVREPLY:
  176                         etype = htons(ETHERTYPE_REVARP);
  177                         break;
  178                 case ARPOP_REQUEST:
  179                 case ARPOP_REPLY:
  180                 default:
  181                         etype = htons(ETHERTYPE_ARP);
  182                         break;
  183                 }
  184 
  185                 if (req->flags & IFENCAP_FLAG_BROADCAST)
  186                         lladdr = ifp->if_broadcastaddr;
  187                 break;
  188         default:
  189                 return (EAFNOSUPPORT);
  190         }
  191 
  192         memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
  193         memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
  194         memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
  195         req->bufsize = sizeof(struct ether_header);
  196 
  197         return (0);
  198 }
  199 
  200 
  201 static int
  202 ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
  203         const struct sockaddr *dst, struct route *ro, u_char *phdr,
  204         uint32_t *pflags, struct llentry **plle)
  205 {
  206         struct ether_header *eh;
  207         uint32_t lleflags = 0;
  208         int error = 0;
  209 #if defined(INET) || defined(INET6)
  210         uint16_t etype;
  211 #endif
  212 
  213         if (plle)
  214                 *plle = NULL;
  215         eh = (struct ether_header *)phdr;
  216 
  217         switch (dst->sa_family) {
  218 #ifdef INET
  219         case AF_INET:
  220                 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
  221                         error = arpresolve(ifp, 0, m, dst, phdr, &lleflags,
  222                             plle);
  223                 else {
  224                         if (m->m_flags & M_BCAST)
  225                                 memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
  226                                     ETHER_ADDR_LEN);
  227                         else {
  228                                 const struct in_addr *a;
  229                                 a = &(((const struct sockaddr_in *)dst)->sin_addr);
  230                                 ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
  231                         }
  232                         etype = htons(ETHERTYPE_IP);
  233                         memcpy(&eh->ether_type, &etype, sizeof(etype));
  234                         memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
  235                 }
  236                 break;
  237 #endif
  238 #ifdef INET6
  239         case AF_INET6:
  240                 if ((m->m_flags & M_MCAST) == 0)
  241                         error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags,
  242                             plle);
  243                 else {
  244                         const struct in6_addr *a6;
  245                         a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
  246                         ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
  247                         etype = htons(ETHERTYPE_IPV6);
  248                         memcpy(&eh->ether_type, &etype, sizeof(etype));
  249                         memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
  250                 }
  251                 break;
  252 #endif
  253         default:
  254                 if_printf(ifp, "can't handle af%d\n", dst->sa_family);
  255                 if (m != NULL)
  256                         m_freem(m);
  257                 return (EAFNOSUPPORT);
  258         }
  259 
  260         if (error == EHOSTDOWN) {
  261                 if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
  262                         error = EHOSTUNREACH;
  263         }
  264 
  265         if (error != 0)
  266                 return (error);
  267 
  268         *pflags = RT_MAY_LOOP;
  269         if (lleflags & LLE_IFADDR)
  270                 *pflags |= RT_L2_ME;
  271 
  272         return (0);
  273 }
  274 
  275 /*
  276  * Ethernet output routine.
  277  * Encapsulate a packet of type family for the local net.
  278  * Use trailer local net encapsulation if enough data in first
  279  * packet leaves a multiple of 512 bytes of data in remainder.
  280  */
  281 int
  282 ether_output(struct ifnet *ifp, struct mbuf *m,
  283         const struct sockaddr *dst, struct route *ro)
  284 {
  285         int error = 0;
  286         char linkhdr[ETHER_HDR_LEN], *phdr;
  287         struct ether_header *eh;
  288         struct pf_mtag *t;
  289         bool loop_copy;
  290         int hlen;       /* link layer header length */
  291         uint32_t pflags;
  292         struct llentry *lle = NULL;
  293         int addref = 0;
  294 
  295         phdr = NULL;
  296         pflags = 0;
  297         if (ro != NULL) {
  298                 /* XXX BPF uses ro_prepend */
  299                 if (ro->ro_prepend != NULL) {
  300                         phdr = ro->ro_prepend;
  301                         hlen = ro->ro_plen;
  302                 } else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
  303                         if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
  304                                 lle = ro->ro_lle;
  305                                 if (lle != NULL &&
  306                                     (lle->la_flags & LLE_VALID) == 0) {
  307                                         LLE_FREE(lle);
  308                                         lle = NULL;     /* redundant */
  309                                         ro->ro_lle = NULL;
  310                                 }
  311                                 if (lle == NULL) {
  312                                         /* if we lookup, keep cache */
  313                                         addref = 1;
  314                                 } else
  315                                         /*
  316                                          * Notify LLE code that
  317                                          * the entry was used
  318                                          * by datapath.
  319                                          */
  320                                         llentry_mark_used(lle);
  321                         }
  322                         if (lle != NULL) {
  323                                 phdr = lle->r_linkdata;
  324                                 hlen = lle->r_hdrlen;
  325                                 pflags = lle->r_flags;
  326                         }
  327                 }
  328         }
  329 
  330 #ifdef MAC
  331         error = mac_ifnet_check_transmit(ifp, m);
  332         if (error)
  333                 senderr(error);
  334 #endif
  335 
  336         M_PROFILE(m);
  337         if (ifp->if_flags & IFF_MONITOR)
  338                 senderr(ENETDOWN);
  339         if (!((ifp->if_flags & IFF_UP) &&
  340             (ifp->if_drv_flags & IFF_DRV_RUNNING)))
  341                 senderr(ENETDOWN);
  342 
  343         if (phdr == NULL) {
  344                 /* No prepend data supplied. Try to calculate ourselves. */
  345                 phdr = linkhdr;
  346                 hlen = ETHER_HDR_LEN;
  347                 error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
  348                     addref ? &lle : NULL);
  349                 if (addref && lle != NULL)
  350                         ro->ro_lle = lle;
  351                 if (error != 0)
  352                         return (error == EWOULDBLOCK ? 0 : error);
  353         }
  354 
  355         if ((pflags & RT_L2_ME) != 0) {
  356                 update_mbuf_csumflags(m, m);
  357                 return (if_simloop(ifp, m, dst->sa_family, 0));
  358         }
  359         loop_copy = (pflags & RT_MAY_LOOP) != 0;
  360 
  361         /*
  362          * Add local net header.  If no space in first mbuf,
  363          * allocate another.
  364          *
  365          * Note that we do prepend regardless of RT_HAS_HEADER flag.
  366          * This is done because BPF code shifts m_data pointer
  367          * to the end of ethernet header prior to calling if_output().
  368          */
  369         M_PREPEND(m, hlen, M_NOWAIT);
  370         if (m == NULL)
  371                 senderr(ENOBUFS);
  372         if ((pflags & RT_HAS_HEADER) == 0) {
  373                 eh = mtod(m, struct ether_header *);
  374                 memcpy(eh, phdr, hlen);
  375         }
  376 
  377         /*
  378          * If a simplex interface, and the packet is being sent to our
  379          * Ethernet address or a broadcast address, loopback a copy.
  380          * XXX To make a simplex device behave exactly like a duplex
  381          * device, we should copy in the case of sending to our own
  382          * ethernet address (thus letting the original actually appear
  383          * on the wire). However, we don't do that here for security
  384          * reasons and compatibility with the original behavior.
  385          */
  386         if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
  387             ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
  388                 struct mbuf *n;
  389 
  390                 /*
  391                  * Because if_simloop() modifies the packet, we need a
  392                  * writable copy through m_dup() instead of a readonly
  393                  * one as m_copy[m] would give us. The alternative would
  394                  * be to modify if_simloop() to handle the readonly mbuf,
  395                  * but performancewise it is mostly equivalent (trading
  396                  * extra data copying vs. extra locking).
  397                  *
  398                  * XXX This is a local workaround.  A number of less
  399                  * often used kernel parts suffer from the same bug.
  400                  * See PR kern/105943 for a proposed general solution.
  401                  */
  402                 if ((n = m_dup(m, M_NOWAIT)) != NULL) {
  403                         update_mbuf_csumflags(m, n);
  404                         (void)if_simloop(ifp, n, dst->sa_family, hlen);
  405                 } else
  406                         if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
  407         }
  408 
  409        /*
  410         * Bridges require special output handling.
  411         */
  412         if (ifp->if_bridge) {
  413                 BRIDGE_OUTPUT(ifp, m, error);
  414                 return (error);
  415         }
  416 
  417 #if defined(INET) || defined(INET6)
  418         if (ifp->if_carp &&
  419             (error = (*carp_output_p)(ifp, m, dst)))
  420                 goto bad;
  421 #endif
  422 
  423         /* Handle ng_ether(4) processing, if any */
  424         if (ifp->if_l2com != NULL) {
  425                 KASSERT(ng_ether_output_p != NULL,
  426                     ("ng_ether_output_p is NULL"));
  427                 if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
  428 bad:                    if (m != NULL)
  429                                 m_freem(m);
  430                         return (error);
  431                 }
  432                 if (m == NULL)
  433                         return (0);
  434         }
  435 
  436         /* Continue with link-layer output */
  437         return ether_output_frame(ifp, m);
  438 }
  439 
  440 static bool
  441 ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp)
  442 {
  443         struct ether_header *eh;
  444 
  445         eh = mtod(*mp, struct ether_header *);
  446         if (ntohs(eh->ether_type) == ETHERTYPE_VLAN ||
  447             ether_8021q_frame(mp, ifp, ifp, 0, pcp))
  448                 return (true);
  449         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  450         return (false);
  451 }
  452 
  453 /*
  454  * Ethernet link layer output routine to send a raw frame to the device.
  455  *
  456  * This assumes that the 14 byte Ethernet header is present and contiguous
  457  * in the first mbuf (if BRIDGE'ing).
  458  */
  459 int
  460 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
  461 {
  462         int error;
  463         uint8_t pcp;
  464 
  465         pcp = ifp->if_pcp;
  466         if (pcp != IFNET_PCP_NONE && ifp->if_type != IFT_L2VLAN &&
  467             !ether_set_pcp(&m, ifp, pcp))
  468                 return (0);
  469 
  470         if (PFIL_HOOKED(&V_link_pfil_hook)) {
  471                 error = pfil_run_hooks(&V_link_pfil_hook, &m, ifp,
  472                     PFIL_OUT, 0, NULL);
  473                 if (error != 0)
  474                         return (EACCES);
  475 
  476                 if (m == NULL)
  477                         return (0);
  478         }
  479 
  480         /*
  481          * Queue message on interface, update output statistics if
  482          * successful, and start output if interface not yet active.
  483          */
  484         return ((ifp->if_transmit)(ifp, m));
  485 }
  486 
  487 /*
  488  * Process a received Ethernet packet; the packet is in the
  489  * mbuf chain m with the ethernet header at the front.
  490  */
  491 static void
  492 ether_input_internal(struct ifnet *ifp, struct mbuf *m)
  493 {
  494         struct ether_header *eh;
  495         u_short etype;
  496 
  497         if ((ifp->if_flags & IFF_UP) == 0) {
  498                 m_freem(m);
  499                 return;
  500         }
  501 #ifdef DIAGNOSTIC
  502         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
  503                 if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
  504                 m_freem(m);
  505                 return;
  506         }
  507 #endif
  508         if (m->m_len < ETHER_HDR_LEN) {
  509                 /* XXX maybe should pullup? */
  510                 if_printf(ifp, "discard frame w/o leading ethernet "
  511                                 "header (len %u pkt len %u)\n",
  512                                 m->m_len, m->m_pkthdr.len);
  513                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
  514                 m_freem(m);
  515                 return;
  516         }
  517         eh = mtod(m, struct ether_header *);
  518         etype = ntohs(eh->ether_type);
  519         random_harvest_queue_ether(m, sizeof(*m));
  520 
  521         CURVNET_SET_QUIET(ifp->if_vnet);
  522 
  523         if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
  524                 if (ETHER_IS_BROADCAST(eh->ether_dhost))
  525                         m->m_flags |= M_BCAST;
  526                 else
  527                         m->m_flags |= M_MCAST;
  528                 if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
  529         }
  530 
  531 #ifdef MAC
  532         /*
  533          * Tag the mbuf with an appropriate MAC label before any other
  534          * consumers can get to it.
  535          */
  536         mac_ifnet_create_mbuf(ifp, m);
  537 #endif
  538 
  539         /*
  540          * Give bpf a chance at the packet.
  541          */
  542         ETHER_BPF_MTAP(ifp, m);
  543 
  544         /*
  545          * If the CRC is still on the packet, trim it off. We do this once
  546          * and once only in case we are re-entered. Nothing else on the
  547          * Ethernet receive path expects to see the FCS.
  548          */
  549         if (m->m_flags & M_HASFCS) {
  550                 m_adj(m, -ETHER_CRC_LEN);
  551                 m->m_flags &= ~M_HASFCS;
  552         }
  553 
  554         if (!(ifp->if_capenable & IFCAP_HWSTATS))
  555                 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
  556 
  557         /* Allow monitor mode to claim this frame, after stats are updated. */
  558         if (ifp->if_flags & IFF_MONITOR) {
  559                 m_freem(m);
  560                 CURVNET_RESTORE();
  561                 return;
  562         }
  563 
  564         /* Handle input from a lagg(4) port */
  565         if (ifp->if_type == IFT_IEEE8023ADLAG) {
  566                 KASSERT(lagg_input_ethernet_p != NULL,
  567                     ("%s: if_lagg not loaded!", __func__));
  568                 m = (*lagg_input_ethernet_p)(ifp, m);
  569                 if (m != NULL)
  570                         ifp = m->m_pkthdr.rcvif;
  571                 else {
  572                         CURVNET_RESTORE();
  573                         return;
  574                 }
  575         }
  576 
  577         /*
  578          * If the hardware did not process an 802.1Q tag, do this now,
  579          * to allow 802.1P priority frames to be passed to the main input
  580          * path correctly.
  581          * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels.
  582          */
  583         if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) {
  584                 struct ether_vlan_header *evl;
  585 
  586                 if (m->m_len < sizeof(*evl) &&
  587                     (m = m_pullup(m, sizeof(*evl))) == NULL) {
  588 #ifdef DIAGNOSTIC
  589                         if_printf(ifp, "cannot pullup VLAN header\n");
  590 #endif
  591                         if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
  592                         CURVNET_RESTORE();
  593                         return;
  594                 }
  595 
  596                 evl = mtod(m, struct ether_vlan_header *);
  597                 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
  598                 m->m_flags |= M_VLANTAG;
  599 
  600                 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
  601                     ETHER_HDR_LEN - ETHER_TYPE_LEN);
  602                 m_adj(m, ETHER_VLAN_ENCAP_LEN);
  603                 eh = mtod(m, struct ether_header *);
  604         }
  605 
  606         M_SETFIB(m, ifp->if_fib);
  607 
  608         /* Allow ng_ether(4) to claim this frame. */
  609         if (ifp->if_l2com != NULL) {
  610                 KASSERT(ng_ether_input_p != NULL,
  611                     ("%s: ng_ether_input_p is NULL", __func__));
  612                 m->m_flags &= ~M_PROMISC;
  613                 (*ng_ether_input_p)(ifp, &m);
  614                 if (m == NULL) {
  615                         CURVNET_RESTORE();
  616                         return;
  617                 }
  618                 eh = mtod(m, struct ether_header *);
  619         }
  620 
  621         /*
  622          * Allow if_bridge(4) to claim this frame.
  623          * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
  624          * and the frame should be delivered locally.
  625          */
  626         if (ifp->if_bridge != NULL) {
  627                 m->m_flags &= ~M_PROMISC;
  628                 BRIDGE_INPUT(ifp, m);
  629                 if (m == NULL) {
  630                         CURVNET_RESTORE();
  631                         return;
  632                 }
  633                 eh = mtod(m, struct ether_header *);
  634         }
  635 
  636 #if defined(INET) || defined(INET6)
  637         /*
  638          * Clear M_PROMISC on frame so that carp(4) will see it when the
  639          * mbuf flows up to Layer 3.
  640          * FreeBSD's implementation of carp(4) uses the inprotosw
  641          * to dispatch IPPROTO_CARP. carp(4) also allocates its own
  642          * Ethernet addresses of the form 00:00:5e:00:01:xx, which
  643          * is outside the scope of the M_PROMISC test below.
  644          * TODO: Maintain a hash table of ethernet addresses other than
  645          * ether_dhost which may be active on this ifp.
  646          */
  647         if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
  648                 m->m_flags &= ~M_PROMISC;
  649         } else
  650 #endif
  651         {
  652                 /*
  653                  * If the frame received was not for our MAC address, set the
  654                  * M_PROMISC flag on the mbuf chain. The frame may need to
  655                  * be seen by the rest of the Ethernet input path in case of
  656                  * re-entry (e.g. bridge, vlan, netgraph) but should not be
  657                  * seen by upper protocol layers.
  658                  */
  659                 if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
  660                     bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
  661                         m->m_flags |= M_PROMISC;
  662         }
  663 
  664         ether_demux(ifp, m);
  665         CURVNET_RESTORE();
  666 }
  667 
  668 /*
  669  * Ethernet input dispatch; by default, direct dispatch here regardless of
  670  * global configuration.  However, if RSS is enabled, hook up RSS affinity
  671  * so that when deferred or hybrid dispatch is enabled, we can redistribute
  672  * load based on RSS.
  673  *
  674  * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
  675  * not it had already done work distribution via multi-queue.  Then we could
  676  * direct dispatch in the event load balancing was already complete and
  677  * handle the case of interfaces with different capabilities better.
  678  *
  679  * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
  680  * at multiple layers?
  681  *
  682  * XXXRW: For now, enable all this only if RSS is compiled in, although it
  683  * works fine without RSS.  Need to characterise the performance overhead
  684  * of the detour through the netisr code in the event the result is always
  685  * direct dispatch.
  686  */
  687 static void
  688 ether_nh_input(struct mbuf *m)
  689 {
  690 
  691         M_ASSERTPKTHDR(m);
  692         KASSERT(m->m_pkthdr.rcvif != NULL,
  693             ("%s: NULL interface pointer", __func__));
  694         ether_input_internal(m->m_pkthdr.rcvif, m);
  695 }
  696 
  697 static struct netisr_handler    ether_nh = {
  698         .nh_name = "ether",
  699         .nh_handler = ether_nh_input,
  700         .nh_proto = NETISR_ETHER,
  701 #ifdef RSS
  702         .nh_policy = NETISR_POLICY_CPU,
  703         .nh_dispatch = NETISR_DISPATCH_DIRECT,
  704         .nh_m2cpuid = rss_m2cpuid,
  705 #else
  706         .nh_policy = NETISR_POLICY_SOURCE,
  707         .nh_dispatch = NETISR_DISPATCH_DIRECT,
  708 #endif
  709 };
  710 
  711 static void
  712 ether_init(__unused void *arg)
  713 {
  714 
  715         netisr_register(&ether_nh);
  716 }
  717 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
  718 
  719 static void
  720 vnet_ether_init(__unused void *arg)
  721 {
  722         int i;
  723 
  724         /* Initialize packet filter hooks. */
  725         V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
  726         V_link_pfil_hook.ph_af = AF_LINK;
  727         if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
  728                 printf("%s: WARNING: unable to register pfil link hook, "
  729                         "error %d\n", __func__, i);
  730 #ifdef VIMAGE
  731         netisr_register_vnet(&ether_nh);
  732 #endif
  733 }
  734 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
  735     vnet_ether_init, NULL);
  736  
  737 #ifdef VIMAGE
  738 static void
  739 vnet_ether_pfil_destroy(__unused void *arg)
  740 {
  741         int i;
  742 
  743         if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
  744                 printf("%s: WARNING: unable to unregister pfil link hook, "
  745                         "error %d\n", __func__, i);
  746 }
  747 VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
  748     vnet_ether_pfil_destroy, NULL);
  749 
  750 static void
  751 vnet_ether_destroy(__unused void *arg)
  752 {
  753 
  754         netisr_unregister_vnet(&ether_nh);
  755 }
  756 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
  757     vnet_ether_destroy, NULL);
  758 #endif
  759 
  760 
  761 
  762 static void
  763 ether_input(struct ifnet *ifp, struct mbuf *m)
  764 {
  765 
  766         struct mbuf *mn;
  767 
  768         /*
  769          * The drivers are allowed to pass in a chain of packets linked with
  770          * m_nextpkt. We split them up into separate packets here and pass
  771          * them up. This allows the drivers to amortize the receive lock.
  772          */
  773         while (m) {
  774                 mn = m->m_nextpkt;
  775                 m->m_nextpkt = NULL;
  776 
  777                 /*
  778                  * We will rely on rcvif being set properly in the deferred context,
  779                  * so assert it is correct here.
  780                  */
  781                 KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
  782                     "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
  783                 CURVNET_SET_QUIET(ifp->if_vnet);
  784                 netisr_dispatch(NETISR_ETHER, m);
  785                 CURVNET_RESTORE();
  786                 m = mn;
  787         }
  788 }
  789 
  790 /*
  791  * Upper layer processing for a received Ethernet packet.
  792  */
  793 void
  794 ether_demux(struct ifnet *ifp, struct mbuf *m)
  795 {
  796         struct ether_header *eh;
  797         int i, isr;
  798         u_short ether_type;
  799 
  800         KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
  801 
  802         /* Do not grab PROMISC frames in case we are re-entered. */
  803         if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
  804                 i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, 0,
  805                     NULL);
  806 
  807                 if (i != 0 || m == NULL)
  808                         return;
  809         }
  810 
  811         eh = mtod(m, struct ether_header *);
  812         ether_type = ntohs(eh->ether_type);
  813 
  814         /*
  815          * If this frame has a VLAN tag other than 0, call vlan_input()
  816          * if its module is loaded. Otherwise, drop.
  817          */
  818         if ((m->m_flags & M_VLANTAG) &&
  819             EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
  820                 if (ifp->if_vlantrunk == NULL) {
  821                         if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
  822                         m_freem(m);
  823                         return;
  824                 }
  825                 KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
  826                     __func__));
  827                 /* Clear before possibly re-entering ether_input(). */
  828                 m->m_flags &= ~M_PROMISC;
  829                 (*vlan_input_p)(ifp, m);
  830                 return;
  831         }
  832 
  833         /*
  834          * Pass promiscuously received frames to the upper layer if the user
  835          * requested this by setting IFF_PPROMISC. Otherwise, drop them.
  836          */
  837         if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
  838                 m_freem(m);
  839                 return;
  840         }
  841 
  842         /*
  843          * Reset layer specific mbuf flags to avoid confusing upper layers.
  844          * Strip off Ethernet header.
  845          */
  846         m->m_flags &= ~M_VLANTAG;
  847         m_clrprotoflags(m);
  848         m_adj(m, ETHER_HDR_LEN);
  849 
  850         /*
  851          * Dispatch frame to upper layer.
  852          */
  853         switch (ether_type) {
  854 #ifdef INET
  855         case ETHERTYPE_IP:
  856                 isr = NETISR_IP;
  857                 break;
  858 
  859         case ETHERTYPE_ARP:
  860                 if (ifp->if_flags & IFF_NOARP) {
  861                         /* Discard packet if ARP is disabled on interface */
  862                         m_freem(m);
  863                         return;
  864                 }
  865                 isr = NETISR_ARP;
  866                 break;
  867 #endif
  868 #ifdef INET6
  869         case ETHERTYPE_IPV6:
  870                 isr = NETISR_IPV6;
  871                 break;
  872 #endif
  873         default:
  874                 goto discard;
  875         }
  876         netisr_dispatch(isr, m);
  877         return;
  878 
  879 discard:
  880         /*
  881          * Packet is to be discarded.  If netgraph is present,
  882          * hand the packet to it for last chance processing;
  883          * otherwise dispose of it.
  884          */
  885         if (ifp->if_l2com != NULL) {
  886                 KASSERT(ng_ether_input_orphan_p != NULL,
  887                     ("ng_ether_input_orphan_p is NULL"));
  888                 /*
  889                  * Put back the ethernet header so netgraph has a
  890                  * consistent view of inbound packets.
  891                  */
  892                 M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
  893                 (*ng_ether_input_orphan_p)(ifp, m);
  894                 return;
  895         }
  896         m_freem(m);
  897 }
  898 
  899 /*
  900  * Convert Ethernet address to printable (loggable) representation.
  901  * This routine is for compatibility; it's better to just use
  902  *
  903  *      printf("%6D", <pointer to address>, ":");
  904  *
  905  * since there's no static buffer involved.
  906  */
  907 char *
  908 ether_sprintf(const u_char *ap)
  909 {
  910         static char etherbuf[18];
  911         snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
  912         return (etherbuf);
  913 }
  914 
  915 /*
  916  * Perform common duties while attaching to interface list
  917  */
  918 void
  919 ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
  920 {
  921         int i;
  922         struct ifaddr *ifa;
  923         struct sockaddr_dl *sdl;
  924 
  925         ifp->if_addrlen = ETHER_ADDR_LEN;
  926         ifp->if_hdrlen = ETHER_HDR_LEN;
  927         ifp->if_mtu = ETHERMTU;
  928         if_attach(ifp);
  929         ifp->if_output = ether_output;
  930         ifp->if_input = ether_input;
  931         ifp->if_resolvemulti = ether_resolvemulti;
  932         ifp->if_requestencap = ether_requestencap;
  933 #ifdef VIMAGE
  934         ifp->if_reassign = ether_reassign;
  935 #endif
  936         if (ifp->if_baudrate == 0)
  937                 ifp->if_baudrate = IF_Mbps(10);         /* just a default */
  938         ifp->if_broadcastaddr = etherbroadcastaddr;
  939 
  940         ifa = ifp->if_addr;
  941         KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
  942         sdl = (struct sockaddr_dl *)ifa->ifa_addr;
  943         sdl->sdl_type = IFT_ETHER;
  944         sdl->sdl_alen = ifp->if_addrlen;
  945         bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
  946 
  947         if (ifp->if_hw_addr != NULL)
  948                 bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen);
  949 
  950         bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
  951         if (ng_ether_attach_p != NULL)
  952                 (*ng_ether_attach_p)(ifp);
  953 
  954         /* Announce Ethernet MAC address if non-zero. */
  955         for (i = 0; i < ifp->if_addrlen; i++)
  956                 if (lla[i] != 0)
  957                         break; 
  958         if (i != ifp->if_addrlen)
  959                 if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
  960 
  961         uuid_ether_add(LLADDR(sdl));
  962 
  963         /* Add necessary bits are setup; announce it now. */
  964         EVENTHANDLER_INVOKE(ether_ifattach_event, ifp);
  965         if (IS_DEFAULT_VNET(curvnet))
  966                 devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL);
  967 }
  968 
  969 /*
  970  * Perform common duties while detaching an Ethernet interface
  971  */
  972 void
  973 ether_ifdetach(struct ifnet *ifp)
  974 {
  975         struct sockaddr_dl *sdl;
  976 
  977         sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
  978         uuid_ether_del(LLADDR(sdl));
  979 
  980         if (ifp->if_l2com != NULL) {
  981                 KASSERT(ng_ether_detach_p != NULL,
  982                     ("ng_ether_detach_p is NULL"));
  983                 (*ng_ether_detach_p)(ifp);
  984         }
  985 
  986         bpfdetach(ifp);
  987         if_detach(ifp);
  988 }
  989 
  990 #ifdef VIMAGE
  991 void
  992 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
  993 {
  994 
  995         if (ifp->if_l2com != NULL) {
  996                 KASSERT(ng_ether_detach_p != NULL,
  997                     ("ng_ether_detach_p is NULL"));
  998                 (*ng_ether_detach_p)(ifp);
  999         }
 1000 
 1001         if (ng_ether_attach_p != NULL) {
 1002                 CURVNET_SET_QUIET(new_vnet);
 1003                 (*ng_ether_attach_p)(ifp);
 1004                 CURVNET_RESTORE();
 1005         }
 1006 }
 1007 #endif
 1008 
 1009 SYSCTL_DECL(_net_link);
 1010 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
 1011 
 1012 #if 0
 1013 /*
 1014  * This is for reference.  We have a table-driven version
 1015  * of the little-endian crc32 generator, which is faster
 1016  * than the double-loop.
 1017  */
 1018 uint32_t
 1019 ether_crc32_le(const uint8_t *buf, size_t len)
 1020 {
 1021         size_t i;
 1022         uint32_t crc;
 1023         int bit;
 1024         uint8_t data;
 1025 
 1026         crc = 0xffffffff;       /* initial value */
 1027 
 1028         for (i = 0; i < len; i++) {
 1029                 for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 1030                         carry = (crc ^ data) & 1;
 1031                         crc >>= 1;
 1032                         if (carry)
 1033                                 crc = (crc ^ ETHER_CRC_POLY_LE);
 1034                 }
 1035         }
 1036 
 1037         return (crc);
 1038 }
 1039 #else
 1040 uint32_t
 1041 ether_crc32_le(const uint8_t *buf, size_t len)
 1042 {
 1043         static const uint32_t crctab[] = {
 1044                 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
 1045                 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
 1046                 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
 1047                 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
 1048         };
 1049         size_t i;
 1050         uint32_t crc;
 1051 
 1052         crc = 0xffffffff;       /* initial value */
 1053 
 1054         for (i = 0; i < len; i++) {
 1055                 crc ^= buf[i];
 1056                 crc = (crc >> 4) ^ crctab[crc & 0xf];
 1057                 crc = (crc >> 4) ^ crctab[crc & 0xf];
 1058         }
 1059 
 1060         return (crc);
 1061 }
 1062 #endif
 1063 
 1064 uint32_t
 1065 ether_crc32_be(const uint8_t *buf, size_t len)
 1066 {
 1067         size_t i;
 1068         uint32_t crc, carry;
 1069         int bit;
 1070         uint8_t data;
 1071 
 1072         crc = 0xffffffff;       /* initial value */
 1073 
 1074         for (i = 0; i < len; i++) {
 1075                 for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 1076                         carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
 1077                         crc <<= 1;
 1078                         if (carry)
 1079                                 crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
 1080                 }
 1081         }
 1082 
 1083         return (crc);
 1084 }
 1085 
 1086 int
 1087 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 1088 {
 1089         struct ifaddr *ifa = (struct ifaddr *) data;
 1090         struct ifreq *ifr = (struct ifreq *) data;
 1091         int error = 0;
 1092 
 1093         switch (command) {
 1094         case SIOCSIFADDR:
 1095                 ifp->if_flags |= IFF_UP;
 1096 
 1097                 switch (ifa->ifa_addr->sa_family) {
 1098 #ifdef INET
 1099                 case AF_INET:
 1100                         ifp->if_init(ifp->if_softc);    /* before arpwhohas */
 1101                         arp_ifinit(ifp, ifa);
 1102                         break;
 1103 #endif
 1104                 default:
 1105                         ifp->if_init(ifp->if_softc);
 1106                         break;
 1107                 }
 1108                 break;
 1109 
 1110         case SIOCGIFADDR:
 1111                 bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
 1112                     ETHER_ADDR_LEN);
 1113                 break;
 1114 
 1115         case SIOCSIFMTU:
 1116                 /*
 1117                  * Set the interface MTU.
 1118                  */
 1119                 if (ifr->ifr_mtu > ETHERMTU) {
 1120                         error = EINVAL;
 1121                 } else {
 1122                         ifp->if_mtu = ifr->ifr_mtu;
 1123                 }
 1124                 break;
 1125 
 1126         case SIOCSLANPCP:
 1127                 error = priv_check(curthread, PRIV_NET_SETLANPCP);
 1128                 if (error != 0)
 1129                         break;
 1130                 if (ifr->ifr_lan_pcp > 7 &&
 1131                     ifr->ifr_lan_pcp != IFNET_PCP_NONE) {
 1132                         error = EINVAL;
 1133                 } else {
 1134                         ifp->if_pcp = ifr->ifr_lan_pcp;
 1135                         /* broadcast event about PCP change */
 1136                         EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP);
 1137                 }
 1138                 break;
 1139 
 1140         case SIOCGLANPCP:
 1141                 ifr->ifr_lan_pcp = ifp->if_pcp;
 1142                 break;
 1143 
 1144         default:
 1145                 error = EINVAL;                 /* XXX netbsd has ENOTTY??? */
 1146                 break;
 1147         }
 1148         return (error);
 1149 }
 1150 
 1151 static int
 1152 ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 1153         struct sockaddr *sa)
 1154 {
 1155         struct sockaddr_dl *sdl;
 1156 #ifdef INET
 1157         struct sockaddr_in *sin;
 1158 #endif
 1159 #ifdef INET6
 1160         struct sockaddr_in6 *sin6;
 1161 #endif
 1162         u_char *e_addr;
 1163 
 1164         switch(sa->sa_family) {
 1165         case AF_LINK:
 1166                 /*
 1167                  * No mapping needed. Just check that it's a valid MC address.
 1168                  */
 1169                 sdl = (struct sockaddr_dl *)sa;
 1170                 e_addr = LLADDR(sdl);
 1171                 if (!ETHER_IS_MULTICAST(e_addr))
 1172                         return EADDRNOTAVAIL;
 1173                 *llsa = NULL;
 1174                 return 0;
 1175 
 1176 #ifdef INET
 1177         case AF_INET:
 1178                 sin = (struct sockaddr_in *)sa;
 1179                 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 1180                         return EADDRNOTAVAIL;
 1181                 sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 1182                 sdl->sdl_alen = ETHER_ADDR_LEN;
 1183                 e_addr = LLADDR(sdl);
 1184                 ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 1185                 *llsa = (struct sockaddr *)sdl;
 1186                 return 0;
 1187 #endif
 1188 #ifdef INET6
 1189         case AF_INET6:
 1190                 sin6 = (struct sockaddr_in6 *)sa;
 1191                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 1192                         /*
 1193                          * An IP6 address of 0 means listen to all
 1194                          * of the Ethernet multicast address used for IP6.
 1195                          * (This is used for multicast routers.)
 1196                          */
 1197                         ifp->if_flags |= IFF_ALLMULTI;
 1198                         *llsa = NULL;
 1199                         return 0;
 1200                 }
 1201                 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 1202                         return EADDRNOTAVAIL;
 1203                 sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 1204                 sdl->sdl_alen = ETHER_ADDR_LEN;
 1205                 e_addr = LLADDR(sdl);
 1206                 ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 1207                 *llsa = (struct sockaddr *)sdl;
 1208                 return 0;
 1209 #endif
 1210 
 1211         default:
 1212                 /*
 1213                  * Well, the text isn't quite right, but it's the name
 1214                  * that counts...
 1215                  */
 1216                 return EAFNOSUPPORT;
 1217         }
 1218 }
 1219 
 1220 static moduledata_t ether_mod = {
 1221         .name = "ether",
 1222 };
 1223 
 1224 void
 1225 ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
 1226 {
 1227         struct ether_vlan_header vlan;
 1228         struct mbuf mv, mb;
 1229 
 1230         KASSERT((m->m_flags & M_VLANTAG) != 0,
 1231             ("%s: vlan information not present", __func__));
 1232         KASSERT(m->m_len >= sizeof(struct ether_header),
 1233             ("%s: mbuf not large enough for header", __func__));
 1234         bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
 1235         vlan.evl_proto = vlan.evl_encap_proto;
 1236         vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
 1237         vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
 1238         m->m_len -= sizeof(struct ether_header);
 1239         m->m_data += sizeof(struct ether_header);
 1240         /*
 1241          * If a data link has been supplied by the caller, then we will need to
 1242          * re-create a stack allocated mbuf chain with the following structure:
 1243          *
 1244          * (1) mbuf #1 will contain the supplied data link
 1245          * (2) mbuf #2 will contain the vlan header
 1246          * (3) mbuf #3 will contain the original mbuf's packet data
 1247          *
 1248          * Otherwise, submit the packet and vlan header via bpf_mtap2().
 1249          */
 1250         if (data != NULL) {
 1251                 mv.m_next = m;
 1252                 mv.m_data = (caddr_t)&vlan;
 1253                 mv.m_len = sizeof(vlan);
 1254                 mb.m_next = &mv;
 1255                 mb.m_data = data;
 1256                 mb.m_len = dlen;
 1257                 bpf_mtap(bp, &mb);
 1258         } else
 1259                 bpf_mtap2(bp, &vlan, sizeof(vlan), m);
 1260         m->m_len += sizeof(struct ether_header);
 1261         m->m_data -= sizeof(struct ether_header);
 1262 }
 1263 
 1264 struct mbuf *
 1265 ether_vlanencap(struct mbuf *m, uint16_t tag)
 1266 {
 1267         struct ether_vlan_header *evl;
 1268 
 1269         M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 1270         if (m == NULL)
 1271                 return (NULL);
 1272         /* M_PREPEND takes care of m_len, m_pkthdr.len for us */
 1273 
 1274         if (m->m_len < sizeof(*evl)) {
 1275                 m = m_pullup(m, sizeof(*evl));
 1276                 if (m == NULL)
 1277                         return (NULL);
 1278         }
 1279 
 1280         /*
 1281          * Transform the Ethernet header into an Ethernet header
 1282          * with 802.1Q encapsulation.
 1283          */
 1284         evl = mtod(m, struct ether_vlan_header *);
 1285         bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
 1286             (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
 1287         evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
 1288         evl->evl_tag = htons(tag);
 1289         return (m);
 1290 }
 1291 
 1292 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
 1293     "IEEE 802.1Q VLAN");
 1294 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
 1295     "for consistency");
 1296 
 1297 VNET_DEFINE_STATIC(int, soft_pad);
 1298 #define V_soft_pad      VNET(soft_pad)
 1299 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
 1300     &VNET_NAME(soft_pad), 0,
 1301     "pad short frames before tagging");
 1302 
 1303 /*
 1304  * For now, make preserving PCP via an mbuf tag optional, as it increases
 1305  * per-packet memory allocations and frees.  In the future, it would be
 1306  * preferable to reuse ether_vtag for this, or similar.
 1307  */
 1308 int vlan_mtag_pcp = 0;
 1309 SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW,
 1310     &vlan_mtag_pcp, 0,
 1311     "Retain VLAN PCP information as packets are passed up the stack");
 1312 
 1313 bool
 1314 ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
 1315     uint16_t vid, uint8_t pcp)
 1316 {
 1317         struct m_tag *mtag;
 1318         int n;
 1319         uint16_t tag;
 1320         static const char pad[8];       /* just zeros */
 1321 
 1322         /*
 1323          * Pad the frame to the minimum size allowed if told to.
 1324          * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
 1325          * paragraph C.4.4.3.b.  It can help to work around buggy
 1326          * bridges that violate paragraph C.4.4.3.a from the same
 1327          * document, i.e., fail to pad short frames after untagging.
 1328          * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
 1329          * untagging it will produce a 62-byte frame, which is a runt
 1330          * and requires padding.  There are VLAN-enabled network
 1331          * devices that just discard such runts instead or mishandle
 1332          * them somehow.
 1333          */
 1334         if (V_soft_pad && p->if_type == IFT_ETHER) {
 1335                 for (n = ETHERMIN + ETHER_HDR_LEN - (*mp)->m_pkthdr.len;
 1336                      n > 0; n -= sizeof(pad)) {
 1337                         if (!m_append(*mp, min(n, sizeof(pad)), pad))
 1338                                 break;
 1339                 }
 1340                 if (n > 0) {
 1341                         m_freem(*mp);
 1342                         *mp = NULL;
 1343                         if_printf(ife, "cannot pad short frame");
 1344                         return (false);
 1345                 }
 1346         }
 1347 
 1348         /*
 1349          * If PCP is set in mbuf, use it
 1350          */
 1351         if ((*mp)->m_flags & M_VLANTAG) {
 1352                 pcp = EVL_PRIOFTAG((*mp)->m_pkthdr.ether_vtag);
 1353         }
 1354 
 1355         /*
 1356          * If underlying interface can do VLAN tag insertion itself,
 1357          * just pass the packet along. However, we need some way to
 1358          * tell the interface where the packet came from so that it
 1359          * knows how to find the VLAN tag to use, so we attach a
 1360          * packet tag that holds it.
 1361          */
 1362         if (vlan_mtag_pcp && (mtag = m_tag_locate(*mp, MTAG_8021Q,
 1363             MTAG_8021Q_PCP_OUT, NULL)) != NULL)
 1364                 tag = EVL_MAKETAG(vid, *(uint8_t *)(mtag + 1), 0);
 1365         else
 1366                 tag = EVL_MAKETAG(vid, pcp, 0);
 1367         if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
 1368                 (*mp)->m_pkthdr.ether_vtag = tag;
 1369                 (*mp)->m_flags |= M_VLANTAG;
 1370         } else {
 1371                 *mp = ether_vlanencap(*mp, tag);
 1372                 if (*mp == NULL) {
 1373                         if_printf(ife, "unable to prepend 802.1Q header");
 1374                         return (false);
 1375                 }
 1376         }
 1377         return (true);
 1378 }
 1379 
 1380 /*
 1381  * Allocate an address from the FreeBSD Foundation OUI.  This uses a
 1382  * cryptographic hash function on the containing jail's name, UUID and the
 1383  * interface name to attempt to provide a unique but stable address.
 1384  * Pseudo-interfaces which require a MAC address should use this function to
 1385  * allocate non-locally-administered addresses.
 1386  */
 1387 void
 1388 ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr)
 1389 {
 1390         SHA1_CTX ctx;
 1391         char *buf;
 1392         char uuid[HOSTUUIDLEN + 1];
 1393         uint64_t addr;
 1394         int i, sz;
 1395         char digest[SHA1_RESULTLEN];
 1396         char jailname[MAXHOSTNAMELEN];
 1397 
 1398         getcredhostuuid(curthread->td_ucred, uuid, sizeof(uuid));
 1399         if (strncmp(uuid, DEFAULT_HOSTUUID, sizeof(uuid)) == 0) {
 1400                 /* Fall back to a random mac address. */
 1401                 goto rando;
 1402         }
 1403 
 1404         /* If each (vnet) jail would also have a unique hostuuid this would not
 1405          * be necessary. */
 1406         getjailname(curthread->td_ucred, jailname, sizeof(jailname));
 1407         sz = asprintf(&buf, M_TEMP, "%s-%s-%s", uuid, if_name(ifp),
 1408             jailname);
 1409         if (sz < 0) {
 1410                 /* Fall back to a random mac address. */
 1411                 goto rando;
 1412         }
 1413 
 1414         SHA1Init(&ctx);
 1415         SHA1Update(&ctx, buf, sz);
 1416         SHA1Final(digest, &ctx);
 1417         free(buf, M_TEMP);
 1418 
 1419         addr = ((digest[0] << 16) | (digest[1] << 8) | digest[2]) &
 1420             OUI_FREEBSD_GENERATED_MASK;
 1421         addr = OUI_FREEBSD(addr);
 1422         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
 1423                 hwaddr->octet[i] = addr >> ((ETHER_ADDR_LEN - i - 1) * 8) &
 1424                     0xFF;
 1425         }
 1426 
 1427         return;
 1428 rando:
 1429         arc4rand(hwaddr, sizeof(*hwaddr), 0);
 1430         /* Unicast */
 1431         hwaddr->octet[0] &= 0xFE;
 1432         /* Locally administered. */
 1433         hwaddr->octet[0] |= 0x02;
 1434 }
 1435 
 1436 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 1437 MODULE_VERSION(ether, 1);

Cache object: 58787b16be45f4fd9cfd5761ed8183f8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.