1 /*-
2 * Copyright (c) 1982, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93
30 * $FreeBSD: releng/11.2/sys/net/if_ethersubr.c 333101 2018-04-30 08:13:30Z hselasky $
31 */
32
33 #include "opt_inet.h"
34 #include "opt_inet6.h"
35 #include "opt_netgraph.h"
36 #include "opt_mbuf_profiling.h"
37 #include "opt_rss.h"
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/bus.h>
42 #include <sys/eventhandler.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/module.h>
47 #include <sys/mbuf.h>
48 #include <sys/priv.h>
49 #include <sys/random.h>
50 #include <sys/socket.h>
51 #include <sys/sockio.h>
52 #include <sys/sysctl.h>
53 #include <sys/uuid.h>
54
55 #include <net/if.h>
56 #include <net/if_var.h>
57 #include <net/if_arp.h>
58 #include <net/netisr.h>
59 #include <net/route.h>
60 #include <net/if_llc.h>
61 #include <net/if_dl.h>
62 #include <net/if_types.h>
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if_bridgevar.h>
66 #include <net/if_vlan_var.h>
67 #include <net/if_llatbl.h>
68 #include <net/pfil.h>
69 #include <net/rss_config.h>
70 #include <net/vnet.h>
71
72 #include <netpfil/pf/pf_mtag.h>
73
74 #if defined(INET) || defined(INET6)
75 #include <netinet/in.h>
76 #include <netinet/in_var.h>
77 #include <netinet/if_ether.h>
78 #include <netinet/ip_carp.h>
79 #include <netinet/ip_var.h>
80 #endif
81 #ifdef INET6
82 #include <netinet6/nd6.h>
83 #endif
84 #include <security/mac/mac_framework.h>
85
86 #ifdef CTASSERT
87 CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
88 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
89 #endif
90
91 VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */
92
93 /* netgraph node hooks for ng_ether(4) */
94 void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
95 void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
96 int (*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
97 void (*ng_ether_attach_p)(struct ifnet *ifp);
98 void (*ng_ether_detach_p)(struct ifnet *ifp);
99
100 void (*vlan_input_p)(struct ifnet *, struct mbuf *);
101
102 /* if_bridge(4) support */
103 struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
104 int (*bridge_output_p)(struct ifnet *, struct mbuf *,
105 struct sockaddr *, struct rtentry *);
106 void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
107
108 /* if_lagg(4) support */
109 struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
110
111 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
112 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
113
114 static int ether_resolvemulti(struct ifnet *, struct sockaddr **,
115 struct sockaddr *);
116 #ifdef VIMAGE
117 static void ether_reassign(struct ifnet *, struct vnet *, char *);
118 #endif
119 static int ether_requestencap(struct ifnet *, struct if_encap_req *);
120
121 #define ETHER_IS_BROADCAST(addr) \
122 (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
123
124 #define senderr(e) do { error = (e); goto bad;} while (0)
125
126 static void
127 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
128 {
129 int csum_flags = 0;
130
131 if (src->m_pkthdr.csum_flags & CSUM_IP)
132 csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
133 if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
134 csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
135 if (src->m_pkthdr.csum_flags & CSUM_SCTP)
136 csum_flags |= CSUM_SCTP_VALID;
137 dst->m_pkthdr.csum_flags |= csum_flags;
138 if (csum_flags & CSUM_DATA_VALID)
139 dst->m_pkthdr.csum_data = 0xffff;
140 }
141
142 /*
143 * Handle link-layer encapsulation requests.
144 */
145 static int
146 ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
147 {
148 struct ether_header *eh;
149 struct arphdr *ah;
150 uint16_t etype;
151 const u_char *lladdr;
152
153 if (req->rtype != IFENCAP_LL)
154 return (EOPNOTSUPP);
155
156 if (req->bufsize < ETHER_HDR_LEN)
157 return (ENOMEM);
158
159 eh = (struct ether_header *)req->buf;
160 lladdr = req->lladdr;
161 req->lladdr_off = 0;
162
163 switch (req->family) {
164 case AF_INET:
165 etype = htons(ETHERTYPE_IP);
166 break;
167 case AF_INET6:
168 etype = htons(ETHERTYPE_IPV6);
169 break;
170 case AF_ARP:
171 ah = (struct arphdr *)req->hdata;
172 ah->ar_hrd = htons(ARPHRD_ETHER);
173
174 switch(ntohs(ah->ar_op)) {
175 case ARPOP_REVREQUEST:
176 case ARPOP_REVREPLY:
177 etype = htons(ETHERTYPE_REVARP);
178 break;
179 case ARPOP_REQUEST:
180 case ARPOP_REPLY:
181 default:
182 etype = htons(ETHERTYPE_ARP);
183 break;
184 }
185
186 if (req->flags & IFENCAP_FLAG_BROADCAST)
187 lladdr = ifp->if_broadcastaddr;
188 break;
189 default:
190 return (EAFNOSUPPORT);
191 }
192
193 memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
194 memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
195 memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
196 req->bufsize = sizeof(struct ether_header);
197
198 return (0);
199 }
200
201
202 static int
203 ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
204 const struct sockaddr *dst, struct route *ro, u_char *phdr,
205 uint32_t *pflags, struct llentry **plle)
206 {
207 struct ether_header *eh;
208 uint32_t lleflags = 0;
209 int error = 0;
210 #if defined(INET) || defined(INET6)
211 uint16_t etype;
212 #endif
213
214 if (plle)
215 *plle = NULL;
216 eh = (struct ether_header *)phdr;
217
218 switch (dst->sa_family) {
219 #ifdef INET
220 case AF_INET:
221 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
222 error = arpresolve(ifp, 0, m, dst, phdr, &lleflags,
223 plle);
224 else {
225 if (m->m_flags & M_BCAST)
226 memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
227 ETHER_ADDR_LEN);
228 else {
229 const struct in_addr *a;
230 a = &(((const struct sockaddr_in *)dst)->sin_addr);
231 ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
232 }
233 etype = htons(ETHERTYPE_IP);
234 memcpy(&eh->ether_type, &etype, sizeof(etype));
235 memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
236 }
237 break;
238 #endif
239 #ifdef INET6
240 case AF_INET6:
241 if ((m->m_flags & M_MCAST) == 0)
242 error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags,
243 plle);
244 else {
245 const struct in6_addr *a6;
246 a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
247 ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
248 etype = htons(ETHERTYPE_IPV6);
249 memcpy(&eh->ether_type, &etype, sizeof(etype));
250 memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
251 }
252 break;
253 #endif
254 default:
255 if_printf(ifp, "can't handle af%d\n", dst->sa_family);
256 if (m != NULL)
257 m_freem(m);
258 return (EAFNOSUPPORT);
259 }
260
261 if (error == EHOSTDOWN) {
262 if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
263 error = EHOSTUNREACH;
264 }
265
266 if (error != 0)
267 return (error);
268
269 *pflags = RT_MAY_LOOP;
270 if (lleflags & LLE_IFADDR)
271 *pflags |= RT_L2_ME;
272
273 return (0);
274 }
275
276 /*
277 * Ethernet output routine.
278 * Encapsulate a packet of type family for the local net.
279 * Use trailer local net encapsulation if enough data in first
280 * packet leaves a multiple of 512 bytes of data in remainder.
281 */
282 int
283 ether_output(struct ifnet *ifp, struct mbuf *m,
284 const struct sockaddr *dst, struct route *ro)
285 {
286 int error = 0;
287 char linkhdr[ETHER_HDR_LEN], *phdr;
288 struct ether_header *eh;
289 struct pf_mtag *t;
290 int loop_copy = 1;
291 int hlen; /* link layer header length */
292 uint32_t pflags;
293 struct llentry *lle = NULL;
294 struct rtentry *rt0 = NULL;
295 int addref = 0;
296
297 phdr = NULL;
298 pflags = 0;
299 if (ro != NULL) {
300 /* XXX BPF uses ro_prepend */
301 if (ro->ro_prepend != NULL) {
302 phdr = ro->ro_prepend;
303 hlen = ro->ro_plen;
304 } else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
305 if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
306 lle = ro->ro_lle;
307 if (lle != NULL &&
308 (lle->la_flags & LLE_VALID) == 0) {
309 LLE_FREE(lle);
310 lle = NULL; /* redundant */
311 ro->ro_lle = NULL;
312 }
313 if (lle == NULL) {
314 /* if we lookup, keep cache */
315 addref = 1;
316 }
317 }
318 if (lle != NULL) {
319 phdr = lle->r_linkdata;
320 hlen = lle->r_hdrlen;
321 pflags = lle->r_flags;
322 }
323 }
324 rt0 = ro->ro_rt;
325 }
326
327 #ifdef MAC
328 error = mac_ifnet_check_transmit(ifp, m);
329 if (error)
330 senderr(error);
331 #endif
332
333 M_PROFILE(m);
334 if (ifp->if_flags & IFF_MONITOR)
335 senderr(ENETDOWN);
336 if (!((ifp->if_flags & IFF_UP) &&
337 (ifp->if_drv_flags & IFF_DRV_RUNNING)))
338 senderr(ENETDOWN);
339
340 if (phdr == NULL) {
341 /* No prepend data supplied. Try to calculate ourselves. */
342 phdr = linkhdr;
343 hlen = ETHER_HDR_LEN;
344 error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
345 addref ? &lle : NULL);
346 if (addref && lle != NULL)
347 ro->ro_lle = lle;
348 if (error != 0)
349 return (error == EWOULDBLOCK ? 0 : error);
350 }
351
352 if ((pflags & RT_L2_ME) != 0) {
353 update_mbuf_csumflags(m, m);
354 return (if_simloop(ifp, m, dst->sa_family, 0));
355 }
356 loop_copy = pflags & RT_MAY_LOOP;
357
358 /*
359 * Add local net header. If no space in first mbuf,
360 * allocate another.
361 *
362 * Note that we do prepend regardless of RT_HAS_HEADER flag.
363 * This is done because BPF code shifts m_data pointer
364 * to the end of ethernet header prior to calling if_output().
365 */
366 M_PREPEND(m, hlen, M_NOWAIT);
367 if (m == NULL)
368 senderr(ENOBUFS);
369 if ((pflags & RT_HAS_HEADER) == 0) {
370 eh = mtod(m, struct ether_header *);
371 memcpy(eh, phdr, hlen);
372 }
373
374 /*
375 * If a simplex interface, and the packet is being sent to our
376 * Ethernet address or a broadcast address, loopback a copy.
377 * XXX To make a simplex device behave exactly like a duplex
378 * device, we should copy in the case of sending to our own
379 * ethernet address (thus letting the original actually appear
380 * on the wire). However, we don't do that here for security
381 * reasons and compatibility with the original behavior.
382 */
383 if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
384 ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
385 struct mbuf *n;
386
387 /*
388 * Because if_simloop() modifies the packet, we need a
389 * writable copy through m_dup() instead of a readonly
390 * one as m_copy[m] would give us. The alternative would
391 * be to modify if_simloop() to handle the readonly mbuf,
392 * but performancewise it is mostly equivalent (trading
393 * extra data copying vs. extra locking).
394 *
395 * XXX This is a local workaround. A number of less
396 * often used kernel parts suffer from the same bug.
397 * See PR kern/105943 for a proposed general solution.
398 */
399 if ((n = m_dup(m, M_NOWAIT)) != NULL) {
400 update_mbuf_csumflags(m, n);
401 (void)if_simloop(ifp, n, dst->sa_family, hlen);
402 } else
403 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
404 }
405
406 /*
407 * Bridges require special output handling.
408 */
409 if (ifp->if_bridge) {
410 BRIDGE_OUTPUT(ifp, m, error);
411 return (error);
412 }
413
414 #if defined(INET) || defined(INET6)
415 if (ifp->if_carp &&
416 (error = (*carp_output_p)(ifp, m, dst)))
417 goto bad;
418 #endif
419
420 /* Handle ng_ether(4) processing, if any */
421 if (ifp->if_l2com != NULL) {
422 KASSERT(ng_ether_output_p != NULL,
423 ("ng_ether_output_p is NULL"));
424 if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
425 bad: if (m != NULL)
426 m_freem(m);
427 return (error);
428 }
429 if (m == NULL)
430 return (0);
431 }
432
433 /* Continue with link-layer output */
434 return ether_output_frame(ifp, m);
435 }
436
437 static bool
438 ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp)
439 {
440 struct ether_header *eh;
441
442 eh = mtod(*mp, struct ether_header *);
443 if (ntohs(eh->ether_type) == ETHERTYPE_VLAN ||
444 ether_8021q_frame(mp, ifp, ifp, 0, pcp))
445 return (true);
446 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
447 return (false);
448 }
449
450 /*
451 * Ethernet link layer output routine to send a raw frame to the device.
452 *
453 * This assumes that the 14 byte Ethernet header is present and contiguous
454 * in the first mbuf (if BRIDGE'ing).
455 */
456 int
457 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
458 {
459 int error;
460 uint8_t pcp;
461
462 pcp = ifp->if_pcp;
463 if (pcp != IFNET_PCP_NONE && !ether_set_pcp(&m, ifp, pcp))
464 return (0);
465
466 if (PFIL_HOOKED(&V_link_pfil_hook)) {
467 error = pfil_run_hooks(&V_link_pfil_hook, &m, ifp,
468 PFIL_OUT, 0, NULL);
469 if (error != 0)
470 return (EACCES);
471
472 if (m == NULL)
473 return (0);
474 }
475
476 /*
477 * Queue message on interface, update output statistics if
478 * successful, and start output if interface not yet active.
479 */
480 return ((ifp->if_transmit)(ifp, m));
481 }
482
483 /*
484 * Process a received Ethernet packet; the packet is in the
485 * mbuf chain m with the ethernet header at the front.
486 */
487 static void
488 ether_input_internal(struct ifnet *ifp, struct mbuf *m)
489 {
490 struct ether_header *eh;
491 u_short etype;
492
493 if ((ifp->if_flags & IFF_UP) == 0) {
494 m_freem(m);
495 return;
496 }
497 #ifdef DIAGNOSTIC
498 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
499 if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
500 m_freem(m);
501 return;
502 }
503 #endif
504 if (m->m_len < ETHER_HDR_LEN) {
505 /* XXX maybe should pullup? */
506 if_printf(ifp, "discard frame w/o leading ethernet "
507 "header (len %u pkt len %u)\n",
508 m->m_len, m->m_pkthdr.len);
509 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
510 m_freem(m);
511 return;
512 }
513 eh = mtod(m, struct ether_header *);
514 etype = ntohs(eh->ether_type);
515 random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_ETHER);
516
517 CURVNET_SET_QUIET(ifp->if_vnet);
518
519 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
520 if (ETHER_IS_BROADCAST(eh->ether_dhost))
521 m->m_flags |= M_BCAST;
522 else
523 m->m_flags |= M_MCAST;
524 if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
525 }
526
527 #ifdef MAC
528 /*
529 * Tag the mbuf with an appropriate MAC label before any other
530 * consumers can get to it.
531 */
532 mac_ifnet_create_mbuf(ifp, m);
533 #endif
534
535 /*
536 * Give bpf a chance at the packet.
537 */
538 ETHER_BPF_MTAP(ifp, m);
539
540 /*
541 * If the CRC is still on the packet, trim it off. We do this once
542 * and once only in case we are re-entered. Nothing else on the
543 * Ethernet receive path expects to see the FCS.
544 */
545 if (m->m_flags & M_HASFCS) {
546 m_adj(m, -ETHER_CRC_LEN);
547 m->m_flags &= ~M_HASFCS;
548 }
549
550 if (!(ifp->if_capenable & IFCAP_HWSTATS))
551 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
552
553 /* Allow monitor mode to claim this frame, after stats are updated. */
554 if (ifp->if_flags & IFF_MONITOR) {
555 m_freem(m);
556 CURVNET_RESTORE();
557 return;
558 }
559
560 /* Handle input from a lagg(4) port */
561 if (ifp->if_type == IFT_IEEE8023ADLAG) {
562 KASSERT(lagg_input_p != NULL,
563 ("%s: if_lagg not loaded!", __func__));
564 m = (*lagg_input_p)(ifp, m);
565 if (m != NULL)
566 ifp = m->m_pkthdr.rcvif;
567 else {
568 CURVNET_RESTORE();
569 return;
570 }
571 }
572
573 /*
574 * If the hardware did not process an 802.1Q tag, do this now,
575 * to allow 802.1P priority frames to be passed to the main input
576 * path correctly.
577 * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels.
578 */
579 if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) {
580 struct ether_vlan_header *evl;
581
582 if (m->m_len < sizeof(*evl) &&
583 (m = m_pullup(m, sizeof(*evl))) == NULL) {
584 #ifdef DIAGNOSTIC
585 if_printf(ifp, "cannot pullup VLAN header\n");
586 #endif
587 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
588 CURVNET_RESTORE();
589 return;
590 }
591
592 evl = mtod(m, struct ether_vlan_header *);
593 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
594 m->m_flags |= M_VLANTAG;
595
596 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
597 ETHER_HDR_LEN - ETHER_TYPE_LEN);
598 m_adj(m, ETHER_VLAN_ENCAP_LEN);
599 eh = mtod(m, struct ether_header *);
600 }
601
602 M_SETFIB(m, ifp->if_fib);
603
604 /* Allow ng_ether(4) to claim this frame. */
605 if (ifp->if_l2com != NULL) {
606 KASSERT(ng_ether_input_p != NULL,
607 ("%s: ng_ether_input_p is NULL", __func__));
608 m->m_flags &= ~M_PROMISC;
609 (*ng_ether_input_p)(ifp, &m);
610 if (m == NULL) {
611 CURVNET_RESTORE();
612 return;
613 }
614 eh = mtod(m, struct ether_header *);
615 }
616
617 /*
618 * Allow if_bridge(4) to claim this frame.
619 * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
620 * and the frame should be delivered locally.
621 */
622 if (ifp->if_bridge != NULL) {
623 m->m_flags &= ~M_PROMISC;
624 BRIDGE_INPUT(ifp, m);
625 if (m == NULL) {
626 CURVNET_RESTORE();
627 return;
628 }
629 eh = mtod(m, struct ether_header *);
630 }
631
632 #if defined(INET) || defined(INET6)
633 /*
634 * Clear M_PROMISC on frame so that carp(4) will see it when the
635 * mbuf flows up to Layer 3.
636 * FreeBSD's implementation of carp(4) uses the inprotosw
637 * to dispatch IPPROTO_CARP. carp(4) also allocates its own
638 * Ethernet addresses of the form 00:00:5e:00:01:xx, which
639 * is outside the scope of the M_PROMISC test below.
640 * TODO: Maintain a hash table of ethernet addresses other than
641 * ether_dhost which may be active on this ifp.
642 */
643 if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
644 m->m_flags &= ~M_PROMISC;
645 } else
646 #endif
647 {
648 /*
649 * If the frame received was not for our MAC address, set the
650 * M_PROMISC flag on the mbuf chain. The frame may need to
651 * be seen by the rest of the Ethernet input path in case of
652 * re-entry (e.g. bridge, vlan, netgraph) but should not be
653 * seen by upper protocol layers.
654 */
655 if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
656 bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
657 m->m_flags |= M_PROMISC;
658 }
659
660 ether_demux(ifp, m);
661 CURVNET_RESTORE();
662 }
663
664 /*
665 * Ethernet input dispatch; by default, direct dispatch here regardless of
666 * global configuration. However, if RSS is enabled, hook up RSS affinity
667 * so that when deferred or hybrid dispatch is enabled, we can redistribute
668 * load based on RSS.
669 *
670 * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
671 * not it had already done work distribution via multi-queue. Then we could
672 * direct dispatch in the event load balancing was already complete and
673 * handle the case of interfaces with different capabilities better.
674 *
675 * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
676 * at multiple layers?
677 *
678 * XXXRW: For now, enable all this only if RSS is compiled in, although it
679 * works fine without RSS. Need to characterise the performance overhead
680 * of the detour through the netisr code in the event the result is always
681 * direct dispatch.
682 */
683 static void
684 ether_nh_input(struct mbuf *m)
685 {
686
687 M_ASSERTPKTHDR(m);
688 KASSERT(m->m_pkthdr.rcvif != NULL,
689 ("%s: NULL interface pointer", __func__));
690 ether_input_internal(m->m_pkthdr.rcvif, m);
691 }
692
693 static struct netisr_handler ether_nh = {
694 .nh_name = "ether",
695 .nh_handler = ether_nh_input,
696 .nh_proto = NETISR_ETHER,
697 #ifdef RSS
698 .nh_policy = NETISR_POLICY_CPU,
699 .nh_dispatch = NETISR_DISPATCH_DIRECT,
700 .nh_m2cpuid = rss_m2cpuid,
701 #else
702 .nh_policy = NETISR_POLICY_SOURCE,
703 .nh_dispatch = NETISR_DISPATCH_DIRECT,
704 #endif
705 };
706
707 static void
708 ether_init(__unused void *arg)
709 {
710
711 netisr_register(ðer_nh);
712 }
713 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
714
715 static void
716 vnet_ether_init(__unused void *arg)
717 {
718 int i;
719
720 /* Initialize packet filter hooks. */
721 V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
722 V_link_pfil_hook.ph_af = AF_LINK;
723 if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
724 printf("%s: WARNING: unable to register pfil link hook, "
725 "error %d\n", __func__, i);
726 #ifdef VIMAGE
727 netisr_register_vnet(ðer_nh);
728 #endif
729 }
730 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
731 vnet_ether_init, NULL);
732
733 #ifdef VIMAGE
734 static void
735 vnet_ether_pfil_destroy(__unused void *arg)
736 {
737 int i;
738
739 if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
740 printf("%s: WARNING: unable to unregister pfil link hook, "
741 "error %d\n", __func__, i);
742 }
743 VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
744 vnet_ether_pfil_destroy, NULL);
745
746 static void
747 vnet_ether_destroy(__unused void *arg)
748 {
749
750 netisr_unregister_vnet(ðer_nh);
751 }
752 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
753 vnet_ether_destroy, NULL);
754 #endif
755
756
757
758 static void
759 ether_input(struct ifnet *ifp, struct mbuf *m)
760 {
761
762 struct mbuf *mn;
763
764 /*
765 * The drivers are allowed to pass in a chain of packets linked with
766 * m_nextpkt. We split them up into separate packets here and pass
767 * them up. This allows the drivers to amortize the receive lock.
768 */
769 while (m) {
770 mn = m->m_nextpkt;
771 m->m_nextpkt = NULL;
772
773 /*
774 * We will rely on rcvif being set properly in the deferred context,
775 * so assert it is correct here.
776 */
777 KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
778 "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
779 CURVNET_SET_QUIET(ifp->if_vnet);
780 netisr_dispatch(NETISR_ETHER, m);
781 CURVNET_RESTORE();
782 m = mn;
783 }
784 }
785
786 /*
787 * Upper layer processing for a received Ethernet packet.
788 */
789 void
790 ether_demux(struct ifnet *ifp, struct mbuf *m)
791 {
792 struct ether_header *eh;
793 int i, isr;
794 u_short ether_type;
795
796 KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
797
798 /* Do not grab PROMISC frames in case we are re-entered. */
799 if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
800 i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, 0,
801 NULL);
802
803 if (i != 0 || m == NULL)
804 return;
805 }
806
807 eh = mtod(m, struct ether_header *);
808 ether_type = ntohs(eh->ether_type);
809
810 /*
811 * If this frame has a VLAN tag other than 0, call vlan_input()
812 * if its module is loaded. Otherwise, drop.
813 */
814 if ((m->m_flags & M_VLANTAG) &&
815 EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
816 if (ifp->if_vlantrunk == NULL) {
817 if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
818 m_freem(m);
819 return;
820 }
821 KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
822 __func__));
823 /* Clear before possibly re-entering ether_input(). */
824 m->m_flags &= ~M_PROMISC;
825 (*vlan_input_p)(ifp, m);
826 return;
827 }
828
829 /*
830 * Pass promiscuously received frames to the upper layer if the user
831 * requested this by setting IFF_PPROMISC. Otherwise, drop them.
832 */
833 if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
834 m_freem(m);
835 return;
836 }
837
838 /*
839 * Reset layer specific mbuf flags to avoid confusing upper layers.
840 * Strip off Ethernet header.
841 */
842 m->m_flags &= ~M_VLANTAG;
843 m_clrprotoflags(m);
844 m_adj(m, ETHER_HDR_LEN);
845
846 /*
847 * Dispatch frame to upper layer.
848 */
849 switch (ether_type) {
850 #ifdef INET
851 case ETHERTYPE_IP:
852 isr = NETISR_IP;
853 break;
854
855 case ETHERTYPE_ARP:
856 if (ifp->if_flags & IFF_NOARP) {
857 /* Discard packet if ARP is disabled on interface */
858 m_freem(m);
859 return;
860 }
861 isr = NETISR_ARP;
862 break;
863 #endif
864 #ifdef INET6
865 case ETHERTYPE_IPV6:
866 isr = NETISR_IPV6;
867 break;
868 #endif
869 default:
870 goto discard;
871 }
872 netisr_dispatch(isr, m);
873 return;
874
875 discard:
876 /*
877 * Packet is to be discarded. If netgraph is present,
878 * hand the packet to it for last chance processing;
879 * otherwise dispose of it.
880 */
881 if (ifp->if_l2com != NULL) {
882 KASSERT(ng_ether_input_orphan_p != NULL,
883 ("ng_ether_input_orphan_p is NULL"));
884 /*
885 * Put back the ethernet header so netgraph has a
886 * consistent view of inbound packets.
887 */
888 M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
889 (*ng_ether_input_orphan_p)(ifp, m);
890 return;
891 }
892 m_freem(m);
893 }
894
895 /*
896 * Convert Ethernet address to printable (loggable) representation.
897 * This routine is for compatibility; it's better to just use
898 *
899 * printf("%6D", <pointer to address>, ":");
900 *
901 * since there's no static buffer involved.
902 */
903 char *
904 ether_sprintf(const u_char *ap)
905 {
906 static char etherbuf[18];
907 snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
908 return (etherbuf);
909 }
910
911 /*
912 * Perform common duties while attaching to interface list
913 */
914 void
915 ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
916 {
917 int i;
918 struct ifaddr *ifa;
919 struct sockaddr_dl *sdl;
920
921 ifp->if_addrlen = ETHER_ADDR_LEN;
922 ifp->if_hdrlen = ETHER_HDR_LEN;
923 if_attach(ifp);
924 ifp->if_mtu = ETHERMTU;
925 ifp->if_output = ether_output;
926 ifp->if_input = ether_input;
927 ifp->if_resolvemulti = ether_resolvemulti;
928 ifp->if_requestencap = ether_requestencap;
929 #ifdef VIMAGE
930 ifp->if_reassign = ether_reassign;
931 #endif
932 if (ifp->if_baudrate == 0)
933 ifp->if_baudrate = IF_Mbps(10); /* just a default */
934 ifp->if_broadcastaddr = etherbroadcastaddr;
935
936 ifa = ifp->if_addr;
937 KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
938 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
939 sdl->sdl_type = IFT_ETHER;
940 sdl->sdl_alen = ifp->if_addrlen;
941 bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
942
943 if (ifp->if_hw_addr != NULL)
944 bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen);
945
946 bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
947 if (ng_ether_attach_p != NULL)
948 (*ng_ether_attach_p)(ifp);
949
950 /* Announce Ethernet MAC address if non-zero. */
951 for (i = 0; i < ifp->if_addrlen; i++)
952 if (lla[i] != 0)
953 break;
954 if (i != ifp->if_addrlen)
955 if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
956
957 uuid_ether_add(LLADDR(sdl));
958
959 /* Add necessary bits are setup; announce it now. */
960 EVENTHANDLER_INVOKE(ether_ifattach_event, ifp);
961 if (IS_DEFAULT_VNET(curvnet))
962 devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL);
963 }
964
965 /*
966 * Perform common duties while detaching an Ethernet interface
967 */
968 void
969 ether_ifdetach(struct ifnet *ifp)
970 {
971 struct sockaddr_dl *sdl;
972
973 sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
974 uuid_ether_del(LLADDR(sdl));
975
976 if (ifp->if_l2com != NULL) {
977 KASSERT(ng_ether_detach_p != NULL,
978 ("ng_ether_detach_p is NULL"));
979 (*ng_ether_detach_p)(ifp);
980 }
981
982 bpfdetach(ifp);
983 if_detach(ifp);
984 }
985
986 #ifdef VIMAGE
987 void
988 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
989 {
990
991 if (ifp->if_l2com != NULL) {
992 KASSERT(ng_ether_detach_p != NULL,
993 ("ng_ether_detach_p is NULL"));
994 (*ng_ether_detach_p)(ifp);
995 }
996
997 if (ng_ether_attach_p != NULL) {
998 CURVNET_SET_QUIET(new_vnet);
999 (*ng_ether_attach_p)(ifp);
1000 CURVNET_RESTORE();
1001 }
1002 }
1003 #endif
1004
1005 SYSCTL_DECL(_net_link);
1006 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
1007
1008 #if 0
1009 /*
1010 * This is for reference. We have a table-driven version
1011 * of the little-endian crc32 generator, which is faster
1012 * than the double-loop.
1013 */
1014 uint32_t
1015 ether_crc32_le(const uint8_t *buf, size_t len)
1016 {
1017 size_t i;
1018 uint32_t crc;
1019 int bit;
1020 uint8_t data;
1021
1022 crc = 0xffffffff; /* initial value */
1023
1024 for (i = 0; i < len; i++) {
1025 for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
1026 carry = (crc ^ data) & 1;
1027 crc >>= 1;
1028 if (carry)
1029 crc = (crc ^ ETHER_CRC_POLY_LE);
1030 }
1031 }
1032
1033 return (crc);
1034 }
1035 #else
1036 uint32_t
1037 ether_crc32_le(const uint8_t *buf, size_t len)
1038 {
1039 static const uint32_t crctab[] = {
1040 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
1041 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
1042 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
1043 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
1044 };
1045 size_t i;
1046 uint32_t crc;
1047
1048 crc = 0xffffffff; /* initial value */
1049
1050 for (i = 0; i < len; i++) {
1051 crc ^= buf[i];
1052 crc = (crc >> 4) ^ crctab[crc & 0xf];
1053 crc = (crc >> 4) ^ crctab[crc & 0xf];
1054 }
1055
1056 return (crc);
1057 }
1058 #endif
1059
1060 uint32_t
1061 ether_crc32_be(const uint8_t *buf, size_t len)
1062 {
1063 size_t i;
1064 uint32_t crc, carry;
1065 int bit;
1066 uint8_t data;
1067
1068 crc = 0xffffffff; /* initial value */
1069
1070 for (i = 0; i < len; i++) {
1071 for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
1072 carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
1073 crc <<= 1;
1074 if (carry)
1075 crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
1076 }
1077 }
1078
1079 return (crc);
1080 }
1081
1082 int
1083 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1084 {
1085 struct ifaddr *ifa = (struct ifaddr *) data;
1086 struct ifreq *ifr = (struct ifreq *) data;
1087 int error = 0;
1088
1089 switch (command) {
1090 case SIOCSIFADDR:
1091 ifp->if_flags |= IFF_UP;
1092
1093 switch (ifa->ifa_addr->sa_family) {
1094 #ifdef INET
1095 case AF_INET:
1096 ifp->if_init(ifp->if_softc); /* before arpwhohas */
1097 arp_ifinit(ifp, ifa);
1098 break;
1099 #endif
1100 default:
1101 ifp->if_init(ifp->if_softc);
1102 break;
1103 }
1104 break;
1105
1106 case SIOCGIFADDR:
1107 bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
1108 ETHER_ADDR_LEN);
1109 break;
1110
1111 case SIOCSIFMTU:
1112 /*
1113 * Set the interface MTU.
1114 */
1115 if (ifr->ifr_mtu > ETHERMTU) {
1116 error = EINVAL;
1117 } else {
1118 ifp->if_mtu = ifr->ifr_mtu;
1119 }
1120 break;
1121
1122 case SIOCSLANPCP:
1123 error = priv_check(curthread, PRIV_NET_SETLANPCP);
1124 if (error != 0)
1125 break;
1126 if (ifr->ifr_lan_pcp > 7 &&
1127 ifr->ifr_lan_pcp != IFNET_PCP_NONE) {
1128 error = EINVAL;
1129 } else {
1130 ifp->if_pcp = ifr->ifr_lan_pcp;
1131 /* broadcast event about PCP change */
1132 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP);
1133 }
1134 break;
1135
1136 case SIOCGLANPCP:
1137 ifr->ifr_lan_pcp = ifp->if_pcp;
1138 break;
1139
1140 default:
1141 error = EINVAL; /* XXX netbsd has ENOTTY??? */
1142 break;
1143 }
1144 return (error);
1145 }
1146
1147 static int
1148 ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
1149 struct sockaddr *sa)
1150 {
1151 struct sockaddr_dl *sdl;
1152 #ifdef INET
1153 struct sockaddr_in *sin;
1154 #endif
1155 #ifdef INET6
1156 struct sockaddr_in6 *sin6;
1157 #endif
1158 u_char *e_addr;
1159
1160 switch(sa->sa_family) {
1161 case AF_LINK:
1162 /*
1163 * No mapping needed. Just check that it's a valid MC address.
1164 */
1165 sdl = (struct sockaddr_dl *)sa;
1166 e_addr = LLADDR(sdl);
1167 if (!ETHER_IS_MULTICAST(e_addr))
1168 return EADDRNOTAVAIL;
1169 *llsa = NULL;
1170 return 0;
1171
1172 #ifdef INET
1173 case AF_INET:
1174 sin = (struct sockaddr_in *)sa;
1175 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
1176 return EADDRNOTAVAIL;
1177 sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
1178 sdl->sdl_alen = ETHER_ADDR_LEN;
1179 e_addr = LLADDR(sdl);
1180 ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
1181 *llsa = (struct sockaddr *)sdl;
1182 return 0;
1183 #endif
1184 #ifdef INET6
1185 case AF_INET6:
1186 sin6 = (struct sockaddr_in6 *)sa;
1187 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1188 /*
1189 * An IP6 address of 0 means listen to all
1190 * of the Ethernet multicast address used for IP6.
1191 * (This is used for multicast routers.)
1192 */
1193 ifp->if_flags |= IFF_ALLMULTI;
1194 *llsa = NULL;
1195 return 0;
1196 }
1197 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1198 return EADDRNOTAVAIL;
1199 sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
1200 sdl->sdl_alen = ETHER_ADDR_LEN;
1201 e_addr = LLADDR(sdl);
1202 ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
1203 *llsa = (struct sockaddr *)sdl;
1204 return 0;
1205 #endif
1206
1207 default:
1208 /*
1209 * Well, the text isn't quite right, but it's the name
1210 * that counts...
1211 */
1212 return EAFNOSUPPORT;
1213 }
1214 }
1215
1216 static moduledata_t ether_mod = {
1217 .name = "ether",
1218 };
1219
1220 void
1221 ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
1222 {
1223 struct ether_vlan_header vlan;
1224 struct mbuf mv, mb;
1225
1226 KASSERT((m->m_flags & M_VLANTAG) != 0,
1227 ("%s: vlan information not present", __func__));
1228 KASSERT(m->m_len >= sizeof(struct ether_header),
1229 ("%s: mbuf not large enough for header", __func__));
1230 bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
1231 vlan.evl_proto = vlan.evl_encap_proto;
1232 vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
1233 vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
1234 m->m_len -= sizeof(struct ether_header);
1235 m->m_data += sizeof(struct ether_header);
1236 /*
1237 * If a data link has been supplied by the caller, then we will need to
1238 * re-create a stack allocated mbuf chain with the following structure:
1239 *
1240 * (1) mbuf #1 will contain the supplied data link
1241 * (2) mbuf #2 will contain the vlan header
1242 * (3) mbuf #3 will contain the original mbuf's packet data
1243 *
1244 * Otherwise, submit the packet and vlan header via bpf_mtap2().
1245 */
1246 if (data != NULL) {
1247 mv.m_next = m;
1248 mv.m_data = (caddr_t)&vlan;
1249 mv.m_len = sizeof(vlan);
1250 mb.m_next = &mv;
1251 mb.m_data = data;
1252 mb.m_len = dlen;
1253 bpf_mtap(bp, &mb);
1254 } else
1255 bpf_mtap2(bp, &vlan, sizeof(vlan), m);
1256 m->m_len += sizeof(struct ether_header);
1257 m->m_data -= sizeof(struct ether_header);
1258 }
1259
1260 struct mbuf *
1261 ether_vlanencap(struct mbuf *m, uint16_t tag)
1262 {
1263 struct ether_vlan_header *evl;
1264
1265 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
1266 if (m == NULL)
1267 return (NULL);
1268 /* M_PREPEND takes care of m_len, m_pkthdr.len for us */
1269
1270 if (m->m_len < sizeof(*evl)) {
1271 m = m_pullup(m, sizeof(*evl));
1272 if (m == NULL)
1273 return (NULL);
1274 }
1275
1276 /*
1277 * Transform the Ethernet header into an Ethernet header
1278 * with 802.1Q encapsulation.
1279 */
1280 evl = mtod(m, struct ether_vlan_header *);
1281 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
1282 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1283 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1284 evl->evl_tag = htons(tag);
1285 return (m);
1286 }
1287
1288 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
1289 "IEEE 802.1Q VLAN");
1290 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
1291 "for consistency");
1292
1293 static VNET_DEFINE(int, soft_pad);
1294 #define V_soft_pad VNET(soft_pad)
1295 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
1296 &VNET_NAME(soft_pad), 0,
1297 "pad short frames before tagging");
1298
1299 /*
1300 * For now, make preserving PCP via an mbuf tag optional, as it increases
1301 * per-packet memory allocations and frees. In the future, it would be
1302 * preferable to reuse ether_vtag for this, or similar.
1303 */
1304 int vlan_mtag_pcp = 0;
1305 SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW,
1306 &vlan_mtag_pcp, 0,
1307 "Retain VLAN PCP information as packets are passed up the stack");
1308
1309 bool
1310 ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
1311 uint16_t vid, uint8_t pcp)
1312 {
1313 struct m_tag *mtag;
1314 int n;
1315 uint16_t tag;
1316 static const char pad[8]; /* just zeros */
1317
1318 /*
1319 * Pad the frame to the minimum size allowed if told to.
1320 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
1321 * paragraph C.4.4.3.b. It can help to work around buggy
1322 * bridges that violate paragraph C.4.4.3.a from the same
1323 * document, i.e., fail to pad short frames after untagging.
1324 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
1325 * untagging it will produce a 62-byte frame, which is a runt
1326 * and requires padding. There are VLAN-enabled network
1327 * devices that just discard such runts instead or mishandle
1328 * them somehow.
1329 */
1330 if (V_soft_pad && p->if_type == IFT_ETHER) {
1331 for (n = ETHERMIN + ETHER_HDR_LEN - (*mp)->m_pkthdr.len;
1332 n > 0; n -= sizeof(pad)) {
1333 if (!m_append(*mp, min(n, sizeof(pad)), pad))
1334 break;
1335 }
1336 if (n > 0) {
1337 m_freem(*mp);
1338 *mp = NULL;
1339 if_printf(ife, "cannot pad short frame");
1340 return (false);
1341 }
1342 }
1343
1344 /*
1345 * If underlying interface can do VLAN tag insertion itself,
1346 * just pass the packet along. However, we need some way to
1347 * tell the interface where the packet came from so that it
1348 * knows how to find the VLAN tag to use, so we attach a
1349 * packet tag that holds it.
1350 */
1351 if (vlan_mtag_pcp && (mtag = m_tag_locate(*mp, MTAG_8021Q,
1352 MTAG_8021Q_PCP_OUT, NULL)) != NULL)
1353 tag = EVL_MAKETAG(vid, *(uint8_t *)(mtag + 1), 0);
1354 else
1355 tag = EVL_MAKETAG(vid, pcp, 0);
1356 if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1357 (*mp)->m_pkthdr.ether_vtag = tag;
1358 (*mp)->m_flags |= M_VLANTAG;
1359 } else {
1360 *mp = ether_vlanencap(*mp, tag);
1361 if (*mp == NULL) {
1362 if_printf(ife, "unable to prepend 802.1Q header");
1363 return (false);
1364 }
1365 }
1366 return (true);
1367 }
1368
1369 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
1370 MODULE_VERSION(ether, 1);
Cache object: ae49d88e133e1cfc8f12e30bf4214e4c
|