1 /*-
2 * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26 #include "opt_inet.h"
27 #include "opt_inet6.h"
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/devctl.h>
35 #include <sys/eventhandler.h>
36 #include <sys/kernel.h>
37 #include <sys/mbuf.h>
38 #include <sys/module.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
41
42 #include <net/bpf.h>
43 #include <net/ethernet.h>
44 #include <net/infiniband.h>
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/if_private.h>
48 #include <net/if_dl.h>
49 #include <net/if_media.h>
50 #include <net/if_lagg.h>
51 #include <net/if_llatbl.h>
52 #include <net/if_types.h>
53 #include <net/netisr.h>
54 #include <net/route.h>
55 #include <netinet/if_ether.h>
56 #include <netinet/in.h>
57 #include <netinet/ip6.h>
58 #include <netinet6/in6_var.h>
59 #include <netinet6/nd6.h>
60
61 #include <security/mac/mac_framework.h>
62
63 /* if_lagg(4) support */
64 struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
65
66 #ifdef INET
67 static inline void
68 infiniband_ipv4_multicast_map(uint32_t addr,
69 const uint8_t *broadcast, uint8_t *buf)
70 {
71 uint8_t scope;
72
73 addr = ntohl(addr);
74 scope = broadcast[5] & 0xF;
75
76 buf[0] = 0;
77 buf[1] = 0xff;
78 buf[2] = 0xff;
79 buf[3] = 0xff;
80 buf[4] = 0xff;
81 buf[5] = 0x10 | scope;
82 buf[6] = 0x40;
83 buf[7] = 0x1b;
84 buf[8] = broadcast[8];
85 buf[9] = broadcast[9];
86 buf[10] = 0;
87 buf[11] = 0;
88 buf[12] = 0;
89 buf[13] = 0;
90 buf[14] = 0;
91 buf[15] = 0;
92 buf[16] = (addr >> 24) & 0xff;
93 buf[17] = (addr >> 16) & 0xff;
94 buf[18] = (addr >> 8) & 0xff;
95 buf[19] = addr & 0xff;
96 }
97 #endif
98
99 #ifdef INET6
100 static inline void
101 infiniband_ipv6_multicast_map(const struct in6_addr *addr,
102 const uint8_t *broadcast, uint8_t *buf)
103 {
104 uint8_t scope;
105
106 scope = broadcast[5] & 0xF;
107
108 buf[0] = 0;
109 buf[1] = 0xff;
110 buf[2] = 0xff;
111 buf[3] = 0xff;
112 buf[4] = 0xff;
113 buf[5] = 0x10 | scope;
114 buf[6] = 0x60;
115 buf[7] = 0x1b;
116 buf[8] = broadcast[8];
117 buf[9] = broadcast[9];
118 memcpy(&buf[10], &addr->s6_addr[6], 10);
119 }
120 #endif
121
122 /*
123 * This is for clients that have an infiniband_header in the mbuf.
124 */
125 void
126 infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
127 {
128 struct infiniband_header *ibh;
129 struct ether_header eh;
130
131 if (mb->m_len < sizeof(*ibh))
132 return;
133
134 ibh = mtod(mb, struct infiniband_header *);
135 eh.ether_type = ibh->ib_protocol;
136 memset(eh.ether_shost, 0, ETHER_ADDR_LEN);
137 memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN);
138 mb->m_data += sizeof(*ibh);
139 mb->m_len -= sizeof(*ibh);
140 mb->m_pkthdr.len -= sizeof(*ibh);
141 bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
142 mb->m_data -= sizeof(*ibh);
143 mb->m_len += sizeof(*ibh);
144 mb->m_pkthdr.len += sizeof(*ibh);
145 }
146
147 static void
148 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
149 {
150 int csum_flags = 0;
151
152 if (src->m_pkthdr.csum_flags & CSUM_IP)
153 csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
154 if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
155 csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
156 if (src->m_pkthdr.csum_flags & CSUM_SCTP)
157 csum_flags |= CSUM_SCTP_VALID;
158 dst->m_pkthdr.csum_flags |= csum_flags;
159 if (csum_flags & CSUM_DATA_VALID)
160 dst->m_pkthdr.csum_data = 0xffff;
161 }
162
163 /*
164 * Handle link-layer encapsulation requests.
165 */
166 static int
167 infiniband_requestencap(struct ifnet *ifp, struct if_encap_req *req)
168 {
169 struct infiniband_header *ih;
170 struct arphdr *ah;
171 uint16_t etype;
172 const uint8_t *lladdr;
173
174 if (req->rtype != IFENCAP_LL)
175 return (EOPNOTSUPP);
176
177 if (req->bufsize < INFINIBAND_HDR_LEN)
178 return (ENOMEM);
179
180 ih = (struct infiniband_header *)req->buf;
181 lladdr = req->lladdr;
182 req->lladdr_off = 0;
183
184 switch (req->family) {
185 case AF_INET:
186 etype = htons(ETHERTYPE_IP);
187 break;
188 case AF_INET6:
189 etype = htons(ETHERTYPE_IPV6);
190 break;
191 case AF_ARP:
192 ah = (struct arphdr *)req->hdata;
193 ah->ar_hrd = htons(ARPHRD_INFINIBAND);
194
195 switch (ntohs(ah->ar_op)) {
196 case ARPOP_REVREQUEST:
197 case ARPOP_REVREPLY:
198 etype = htons(ETHERTYPE_REVARP);
199 break;
200 case ARPOP_REQUEST:
201 case ARPOP_REPLY:
202 default:
203 etype = htons(ETHERTYPE_ARP);
204 break;
205 }
206
207 if (req->flags & IFENCAP_FLAG_BROADCAST)
208 lladdr = ifp->if_broadcastaddr;
209 break;
210 default:
211 return (EAFNOSUPPORT);
212 }
213
214 ih->ib_protocol = etype;
215 ih->ib_reserved = 0;
216 memcpy(ih->ib_hwaddr, lladdr, INFINIBAND_ADDR_LEN);
217 req->bufsize = sizeof(struct infiniband_header);
218
219 return (0);
220 }
221
222 static int
223 infiniband_resolve_addr(struct ifnet *ifp, struct mbuf *m,
224 const struct sockaddr *dst, struct route *ro, uint8_t *phdr,
225 uint32_t *pflags, struct llentry **plle)
226 {
227 #if defined(INET) || defined(INET6)
228 struct infiniband_header *ih = (struct infiniband_header *)phdr;
229 #endif
230 uint32_t lleflags = 0;
231 int error = 0;
232
233 if (plle)
234 *plle = NULL;
235
236 switch (dst->sa_family) {
237 #ifdef INET
238 case AF_INET:
239 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
240 error = arpresolve(ifp, 0, m, dst, phdr, &lleflags, plle);
241 } else {
242 if (m->m_flags & M_BCAST) {
243 memcpy(ih->ib_hwaddr, ifp->if_broadcastaddr,
244 INFINIBAND_ADDR_LEN);
245 } else {
246 infiniband_ipv4_multicast_map(
247 ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
248 ifp->if_broadcastaddr, ih->ib_hwaddr);
249 }
250 ih->ib_protocol = htons(ETHERTYPE_IP);
251 ih->ib_reserved = 0;
252 }
253 break;
254 #endif
255 #ifdef INET6
256 case AF_INET6:
257 if ((m->m_flags & M_MCAST) == 0) {
258 int af = RO_GET_FAMILY(ro, dst);
259 error = nd6_resolve(ifp, LLE_SF(af, 0), m, dst, phdr,
260 &lleflags, plle);
261 } else {
262 infiniband_ipv6_multicast_map(
263 &((const struct sockaddr_in6 *)dst)->sin6_addr,
264 ifp->if_broadcastaddr, ih->ib_hwaddr);
265 ih->ib_protocol = htons(ETHERTYPE_IPV6);
266 ih->ib_reserved = 0;
267 }
268 break;
269 #endif
270 default:
271 if_printf(ifp, "can't handle af%d\n", dst->sa_family);
272 if (m != NULL)
273 m_freem(m);
274 return (EAFNOSUPPORT);
275 }
276
277 if (error == EHOSTDOWN) {
278 if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
279 error = EHOSTUNREACH;
280 }
281
282 if (error != 0)
283 return (error);
284
285 *pflags = RT_MAY_LOOP;
286 if (lleflags & LLE_IFADDR)
287 *pflags |= RT_L2_ME;
288
289 return (0);
290 }
291
292 /*
293 * Infiniband output routine.
294 */
295 static int
296 infiniband_output(struct ifnet *ifp, struct mbuf *m,
297 const struct sockaddr *dst, struct route *ro)
298 {
299 uint8_t linkhdr[INFINIBAND_HDR_LEN];
300 uint8_t *phdr;
301 struct llentry *lle = NULL;
302 struct infiniband_header *ih;
303 int error = 0;
304 int hlen; /* link layer header length */
305 uint32_t pflags;
306 bool addref;
307
308 NET_EPOCH_ASSERT();
309
310 addref = false;
311 phdr = NULL;
312 pflags = 0;
313 if (ro != NULL) {
314 /* XXX BPF uses ro_prepend */
315 if (ro->ro_prepend != NULL) {
316 phdr = ro->ro_prepend;
317 hlen = ro->ro_plen;
318 } else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
319 if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
320 lle = ro->ro_lle;
321 if (lle != NULL &&
322 (lle->la_flags & LLE_VALID) == 0) {
323 LLE_FREE(lle);
324 lle = NULL; /* redundant */
325 ro->ro_lle = NULL;
326 }
327 if (lle == NULL) {
328 /* if we lookup, keep cache */
329 addref = 1;
330 } else
331 /*
332 * Notify LLE code that
333 * the entry was used
334 * by datapath.
335 */
336 llentry_provide_feedback(lle);
337 }
338 if (lle != NULL) {
339 phdr = lle->r_linkdata;
340 hlen = lle->r_hdrlen;
341 pflags = lle->r_flags;
342 }
343 }
344 }
345
346 #ifdef MAC
347 error = mac_ifnet_check_transmit(ifp, m);
348 if (error)
349 goto bad;
350 #endif
351
352 M_PROFILE(m);
353 if (ifp->if_flags & IFF_MONITOR) {
354 error = ENETDOWN;
355 goto bad;
356 }
357 if (!((ifp->if_flags & IFF_UP) &&
358 (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
359 error = ENETDOWN;
360 goto bad;
361 }
362
363 if (phdr == NULL) {
364 /* No prepend data supplied. Try to calculate ourselves. */
365 phdr = linkhdr;
366 hlen = INFINIBAND_HDR_LEN;
367 error = infiniband_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
368 addref ? &lle : NULL);
369 if (addref && lle != NULL)
370 ro->ro_lle = lle;
371 if (error != 0)
372 return (error == EWOULDBLOCK ? 0 : error);
373 }
374
375 if ((pflags & RT_L2_ME) != 0) {
376 update_mbuf_csumflags(m, m);
377 return (if_simloop(ifp, m, RO_GET_FAMILY(ro, dst), 0));
378 }
379
380 /*
381 * Add local infiniband header. If no space in first mbuf,
382 * allocate another.
383 */
384 M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
385 if (m == NULL) {
386 error = ENOBUFS;
387 goto bad;
388 }
389 if ((pflags & RT_HAS_HEADER) == 0) {
390 ih = mtod(m, struct infiniband_header *);
391 memcpy(ih, phdr, hlen);
392 }
393
394 /*
395 * Queue message on interface, update output statistics if
396 * successful, and start output if interface not yet active.
397 */
398 return (ifp->if_transmit(ifp, m));
399 bad:
400 if (m != NULL)
401 m_freem(m);
402 return (error);
403 }
404
405 /*
406 * Process a received Infiniband packet.
407 */
408 static void
409 infiniband_input(struct ifnet *ifp, struct mbuf *m)
410 {
411 struct infiniband_header *ibh;
412 struct epoch_tracker et;
413 int isr;
414
415 CURVNET_SET_QUIET(ifp->if_vnet);
416
417 if ((ifp->if_flags & IFF_UP) == 0) {
418 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
419 m_freem(m);
420 goto done;
421 }
422
423 ibh = mtod(m, struct infiniband_header *);
424
425 /*
426 * Reset layer specific mbuf flags to avoid confusing upper
427 * layers:
428 */
429 m->m_flags &= ~M_VLANTAG;
430 m_clrprotoflags(m);
431
432 if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) {
433 if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr,
434 ifp->if_addrlen) == 0)
435 m->m_flags |= M_BCAST;
436 else
437 m->m_flags |= M_MCAST;
438 if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
439 }
440
441 /* Let BPF have it before we strip the header. */
442 INFINIBAND_BPF_MTAP(ifp, m);
443
444 /* Allow monitor mode to claim this frame, after stats are updated. */
445 if (ifp->if_flags & IFF_MONITOR) {
446 m_freem(m);
447 goto done;
448 }
449
450 /* Direct packet to correct FIB based on interface config. */
451 M_SETFIB(m, ifp->if_fib);
452
453 /* Handle input from a lagg<N> port */
454 if (ifp->if_type == IFT_INFINIBANDLAG) {
455 KASSERT(lagg_input_infiniband_p != NULL,
456 ("%s: if_lagg not loaded!", __func__));
457 m = (*lagg_input_infiniband_p)(ifp, m);
458 if (__predict_false(m == NULL))
459 goto done;
460 ifp = m->m_pkthdr.rcvif;
461 }
462
463 /*
464 * Dispatch frame to upper layer.
465 */
466 switch (ibh->ib_protocol) {
467 #ifdef INET
468 case htons(ETHERTYPE_IP):
469 isr = NETISR_IP;
470 break;
471
472 case htons(ETHERTYPE_ARP):
473 if (ifp->if_flags & IFF_NOARP) {
474 /* Discard packet if ARP is disabled on interface */
475 m_freem(m);
476 goto done;
477 }
478 isr = NETISR_ARP;
479 break;
480 #endif
481 #ifdef INET6
482 case htons(ETHERTYPE_IPV6):
483 isr = NETISR_IPV6;
484 break;
485 #endif
486 default:
487 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
488 m_freem(m);
489 goto done;
490 }
491
492 /* Strip off the Infiniband header. */
493 m_adj(m, INFINIBAND_HDR_LEN);
494
495 #ifdef MAC
496 /*
497 * Tag the mbuf with an appropriate MAC label before any other
498 * consumers can get to it.
499 */
500 mac_ifnet_create_mbuf(ifp, m);
501 #endif
502 /* Allow monitor mode to claim this frame, after stats are updated. */
503 NET_EPOCH_ENTER(et);
504 netisr_dispatch(isr, m);
505 NET_EPOCH_EXIT(et);
506 done:
507 CURVNET_RESTORE();
508 }
509
510 static int
511 infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
512 struct sockaddr *sa)
513 {
514 struct sockaddr_dl *sdl;
515 #ifdef INET
516 struct sockaddr_in *sin;
517 #endif
518 #ifdef INET6
519 struct sockaddr_in6 *sin6;
520 #endif
521 uint8_t *e_addr;
522
523 switch (sa->sa_family) {
524 case AF_LINK:
525 /*
526 * No mapping needed. Just check that it's a valid MC address.
527 */
528 sdl = (struct sockaddr_dl *)sa;
529 e_addr = LLADDR(sdl);
530 if (!INFINIBAND_IS_MULTICAST(e_addr))
531 return (EADDRNOTAVAIL);
532 *llsa = NULL;
533 return 0;
534
535 #ifdef INET
536 case AF_INET:
537 sin = (struct sockaddr_in *)sa;
538 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
539 return (EADDRNOTAVAIL);
540 sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
541 sdl->sdl_alen = INFINIBAND_ADDR_LEN;
542 e_addr = LLADDR(sdl);
543 infiniband_ipv4_multicast_map(
544 sin->sin_addr.s_addr, ifp->if_broadcastaddr, e_addr);
545 *llsa = (struct sockaddr *)sdl;
546 return (0);
547 #endif
548 #ifdef INET6
549 case AF_INET6:
550 sin6 = (struct sockaddr_in6 *)sa;
551 /*
552 * An IP6 address of 0 means listen to all of the
553 * multicast address used for IP6. This has no meaning
554 * in infiniband.
555 */
556 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
557 return (EADDRNOTAVAIL);
558 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
559 return (EADDRNOTAVAIL);
560 sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
561 sdl->sdl_alen = INFINIBAND_ADDR_LEN;
562 e_addr = LLADDR(sdl);
563 infiniband_ipv6_multicast_map(
564 &sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
565 *llsa = (struct sockaddr *)sdl;
566 return (0);
567 #endif
568 default:
569 return (EAFNOSUPPORT);
570 }
571 }
572
573 void
574 infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
575 {
576 struct sockaddr_dl *sdl;
577 struct ifaddr *ifa;
578 int i;
579
580 ifp->if_addrlen = INFINIBAND_ADDR_LEN;
581 ifp->if_hdrlen = INFINIBAND_HDR_LEN;
582 ifp->if_mtu = INFINIBAND_MTU;
583 if_attach(ifp);
584 ifp->if_output = infiniband_output;
585 ifp->if_input = infiniband_input;
586 ifp->if_resolvemulti = infiniband_resolvemulti;
587 ifp->if_requestencap = infiniband_requestencap;
588
589 if (ifp->if_baudrate == 0)
590 ifp->if_baudrate = IF_Gbps(10); /* default value */
591 if (llb != NULL)
592 ifp->if_broadcastaddr = llb;
593
594 ifa = ifp->if_addr;
595 KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
596 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
597 sdl->sdl_type = IFT_INFINIBAND;
598 sdl->sdl_alen = ifp->if_addrlen;
599
600 if (lla != NULL) {
601 memcpy(LLADDR(sdl), lla, ifp->if_addrlen);
602
603 if (ifp->if_hw_addr != NULL)
604 memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen);
605 } else {
606 lla = LLADDR(sdl);
607 }
608
609 /* Attach ethernet compatible network device */
610 bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
611
612 /* Announce Infiniband MAC address if non-zero. */
613 for (i = 0; i < ifp->if_addrlen; i++)
614 if (lla[i] != 0)
615 break;
616 if (i != ifp->if_addrlen)
617 if_printf(ifp, "Infiniband address: %20D\n", lla, ":");
618
619 /* Add necessary bits are setup; announce it now. */
620 EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp);
621
622 if (IS_DEFAULT_VNET(curvnet))
623 devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL);
624 }
625
626 /*
627 * Perform common duties while detaching an Infiniband interface
628 */
629 void
630 infiniband_ifdetach(struct ifnet *ifp)
631 {
632 bpfdetach(ifp);
633 if_detach(ifp);
634 }
635
636 static int
637 infiniband_modevent(module_t mod, int type, void *data)
638 {
639 switch (type) {
640 case MOD_LOAD:
641 case MOD_UNLOAD:
642 return (0);
643 default:
644 return (EOPNOTSUPP);
645 }
646 }
647
648 static moduledata_t infiniband_mod = {
649 .name = "if_infiniband",
650 .evhand = &infiniband_modevent,
651 };
652
653 DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
654 MODULE_VERSION(if_infiniband, 1);
Cache object: 047d98e1574d7213b76d407e2473b30e
|