1 /*-
2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (C) 2013 Intel Corporation
4 * Copyright (C) 2015 EMC Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31 * two or more systems using a PCI-e links, providing remote memory access.
32 *
33 * This module contains a driver for simulated Ethernet device, using
34 * underlying NTB Transport device.
35 *
36 * NOTE: Much of the code in this module is shared with Linux. Any patches may
37 * be picked up and redistributed in Linux with a dual GPL/BSD license.
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 #include <sys/param.h>
44 #include <sys/kernel.h>
45 #include <sys/systm.h>
46 #include <sys/buf_ring.h>
47 #include <sys/bus.h>
48 #include <sys/ktr.h>
49 #include <sys/limits.h>
50 #include <sys/module.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/sysctl.h>
54 #include <sys/taskqueue.h>
55
56 #include <net/if.h>
57 #include <net/if_media.h>
58 #include <net/if_types.h>
59 #include <net/if_media.h>
60 #include <net/if_var.h>
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63
64 #include <machine/bus.h>
65
66 #include "../ntb_transport.h"
67
68 #define KTR_NTB KTR_SPARE3
69 #define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX)
70
71 #define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
72 #define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
73 #define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
74 CSUM_PSEUDO_HDR | \
75 CSUM_IP_CHECKED | CSUM_IP_VALID | \
76 CSUM_SCTP_VALID)
77
78 static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
79 "if_ntb");
80
81 static unsigned g_if_ntb_num_queues = UINT_MAX;
82 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
83 &g_if_ntb_num_queues, 0, "Number of queues per interface");
84
85 struct ntb_net_queue {
86 struct ntb_net_ctx *sc;
87 if_t ifp;
88 struct ntb_transport_qp *qp;
89 struct buf_ring *br;
90 struct task tx_task;
91 struct taskqueue *tx_tq;
92 struct mtx tx_lock;
93 struct callout queue_full;
94 };
95
96 struct ntb_net_ctx {
97 if_t ifp;
98 struct ifmedia media;
99 u_char eaddr[ETHER_ADDR_LEN];
100 int num_queues;
101 struct ntb_net_queue *queues;
102 int mtu;
103 };
104
105 static int ntb_net_probe(device_t dev);
106 static int ntb_net_attach(device_t dev);
107 static int ntb_net_detach(device_t dev);
108 static void ntb_net_init(void *arg);
109 static int ntb_ifmedia_upd(struct ifnet *);
110 static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
111 static int ntb_ioctl(if_t ifp, u_long command, caddr_t data);
112 static int ntb_transmit(if_t ifp, struct mbuf *m);
113 static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
114 void *data, int len);
115 static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
116 void *data, int len);
117 static void ntb_net_event_handler(void *data, enum ntb_link_event status);
118 static void ntb_handle_tx(void *arg, int pending);
119 static void ntb_qp_full(void *arg);
120 static void ntb_qflush(if_t ifp);
121 static void create_random_local_eui48(u_char *eaddr);
122
123 static int
124 ntb_net_probe(device_t dev)
125 {
126
127 device_set_desc(dev, "NTB Network Interface");
128 return (0);
129 }
130
131 static int
132 ntb_net_attach(device_t dev)
133 {
134 struct ntb_net_ctx *sc = device_get_softc(dev);
135 struct ntb_net_queue *q;
136 if_t ifp;
137 struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
138 ntb_net_tx_handler, ntb_net_event_handler };
139 int i;
140
141 ifp = sc->ifp = if_gethandle(IFT_ETHER);
142 if (ifp == NULL) {
143 printf("ntb: Cannot allocate ifnet structure\n");
144 return (ENOMEM);
145 }
146 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
147 if_setdev(ifp, dev);
148
149 sc->num_queues = min(g_if_ntb_num_queues,
150 ntb_transport_queue_count(dev));
151 sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
152 M_DEVBUF, M_WAITOK | M_ZERO);
153 sc->mtu = INT_MAX;
154 for (i = 0; i < sc->num_queues; i++) {
155 q = &sc->queues[i];
156 q->sc = sc;
157 q->ifp = ifp;
158 q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
159 if (q->qp == NULL)
160 break;
161 sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
162 mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
163 q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
164 TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
165 q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
166 taskqueue_thread_enqueue, &q->tx_tq);
167 taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
168 device_get_nameunit(dev), i);
169 callout_init(&q->queue_full, 1);
170 }
171 sc->num_queues = i;
172 device_printf(dev, "%d queue(s)\n", sc->num_queues);
173
174 if_setinitfn(ifp, ntb_net_init);
175 if_setsoftc(ifp, sc);
176 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
177 if_setioctlfn(ifp, ntb_ioctl);
178 if_settransmitfn(ifp, ntb_transmit);
179 if_setqflushfn(ifp, ntb_qflush);
180 create_random_local_eui48(sc->eaddr);
181 ether_ifattach(ifp, sc->eaddr);
182 if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
183 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
184 if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
185 if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN);
186
187 ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
188 ntb_ifmedia_sts);
189 ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
190 ifmedia_set(&sc->media, NTB_MEDIATYPE);
191
192 for (i = 0; i < sc->num_queues; i++)
193 ntb_transport_link_up(sc->queues[i].qp);
194 return (0);
195 }
196
197 static int
198 ntb_net_detach(device_t dev)
199 {
200 struct ntb_net_ctx *sc = device_get_softc(dev);
201 struct ntb_net_queue *q;
202 int i;
203
204 for (i = 0; i < sc->num_queues; i++)
205 ntb_transport_link_down(sc->queues[i].qp);
206 ether_ifdetach(sc->ifp);
207 if_free(sc->ifp);
208 ifmedia_removeall(&sc->media);
209 for (i = 0; i < sc->num_queues; i++) {
210 q = &sc->queues[i];
211 ntb_transport_free_queue(q->qp);
212 buf_ring_free(q->br, M_DEVBUF);
213 callout_drain(&q->queue_full);
214 taskqueue_drain_all(q->tx_tq);
215 mtx_destroy(&q->tx_lock);
216 }
217 free(sc->queues, M_DEVBUF);
218 return (0);
219 }
220
221 /* Network device interface */
222
223 static void
224 ntb_net_init(void *arg)
225 {
226 struct ntb_net_ctx *sc = arg;
227 if_t ifp = sc->ifp;
228
229 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
230 if_setbaudrate(ifp, ntb_transport_link_speed(sc->queues[0].qp));
231 if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
232 LINK_STATE_UP : LINK_STATE_DOWN);
233 }
234
235 static int
236 ntb_ioctl(if_t ifp, u_long command, caddr_t data)
237 {
238 struct ntb_net_ctx *sc = if_getsoftc(ifp);
239 struct ifreq *ifr = (struct ifreq *)data;
240 int error = 0;
241
242 switch (command) {
243 case SIOCSIFFLAGS:
244 case SIOCADDMULTI:
245 case SIOCDELMULTI:
246 break;
247
248 case SIOCSIFMTU:
249 {
250 if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
251 error = EINVAL;
252 break;
253 }
254
255 if_setmtu(ifp, ifr->ifr_mtu);
256 break;
257 }
258
259 case SIOCSIFMEDIA:
260 case SIOCGIFMEDIA:
261 error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
262 break;
263
264 case SIOCSIFCAP:
265 if (ifr->ifr_reqcap & IFCAP_RXCSUM)
266 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
267 else
268 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
269 if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
270 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
271 if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0);
272 } else {
273 if_setcapenablebit(ifp, 0, IFCAP_TXCSUM);
274 if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES);
275 }
276 if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
277 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
278 else
279 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
280 if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
281 if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
282 if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0);
283 } else {
284 if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6);
285 if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6);
286 }
287 break;
288
289 default:
290 error = ether_ioctl(ifp, command, data);
291 break;
292 }
293
294 return (error);
295 }
296
297 static int
298 ntb_ifmedia_upd(struct ifnet *ifp)
299 {
300 struct ntb_net_ctx *sc = if_getsoftc(ifp);
301 struct ifmedia *ifm = &sc->media;
302
303 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
304 return (EINVAL);
305
306 return (0);
307 }
308
309 static void
310 ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
311 {
312 struct ntb_net_ctx *sc = if_getsoftc(ifp);
313
314 ifmr->ifm_status = IFM_AVALID;
315 ifmr->ifm_active = NTB_MEDIATYPE;
316 if (ntb_transport_link_query(sc->queues[0].qp))
317 ifmr->ifm_status |= IFM_ACTIVE;
318 }
319
320 static void
321 ntb_transmit_locked(struct ntb_net_queue *q)
322 {
323 if_t ifp = q->ifp;
324 struct mbuf *m;
325 int rc, len;
326 short mflags;
327
328 CTR0(KTR_NTB, "TX: ntb_transmit_locked");
329 while ((m = drbr_peek(ifp, q->br)) != NULL) {
330 CTR1(KTR_NTB, "TX: start mbuf %p", m);
331 if_etherbpfmtap(ifp, m);
332 len = m->m_pkthdr.len;
333 mflags = m->m_flags;
334 rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
335 if (rc != 0) {
336 CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
337 if (rc == EAGAIN) {
338 drbr_putback(ifp, q->br, m);
339 callout_reset_sbt(&q->queue_full,
340 SBT_1MS / 4, SBT_1MS / 4,
341 ntb_qp_full, q, 0);
342 } else {
343 m_freem(m);
344 drbr_advance(ifp, q->br);
345 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
346 }
347 break;
348 }
349 drbr_advance(ifp, q->br);
350 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
351 if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
352 if (mflags & M_MCAST)
353 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
354 }
355 }
356
357 static int
358 ntb_transmit(if_t ifp, struct mbuf *m)
359 {
360 struct ntb_net_ctx *sc = if_getsoftc(ifp);
361 struct ntb_net_queue *q;
362 int error, i;
363
364 CTR0(KTR_NTB, "TX: ntb_transmit");
365 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
366 i = m->m_pkthdr.flowid % sc->num_queues;
367 else
368 i = curcpu % sc->num_queues;
369 q = &sc->queues[i];
370
371 error = drbr_enqueue(ifp, q->br, m);
372 if (error)
373 return (error);
374
375 if (mtx_trylock(&q->tx_lock)) {
376 ntb_transmit_locked(q);
377 mtx_unlock(&q->tx_lock);
378 } else
379 taskqueue_enqueue(q->tx_tq, &q->tx_task);
380 return (0);
381 }
382
383 static void
384 ntb_handle_tx(void *arg, int pending)
385 {
386 struct ntb_net_queue *q = arg;
387
388 mtx_lock(&q->tx_lock);
389 ntb_transmit_locked(q);
390 mtx_unlock(&q->tx_lock);
391 }
392
393 static void
394 ntb_qp_full(void *arg)
395 {
396 struct ntb_net_queue *q = arg;
397
398 CTR0(KTR_NTB, "TX: qp_full callout");
399 if (ntb_transport_tx_free_entry(q->qp) > 0)
400 taskqueue_enqueue(q->tx_tq, &q->tx_task);
401 else
402 callout_schedule_sbt(&q->queue_full,
403 SBT_1MS / 4, SBT_1MS / 4, 0);
404 }
405
406 static void
407 ntb_qflush(if_t ifp)
408 {
409 struct ntb_net_ctx *sc = if_getsoftc(ifp);
410 struct ntb_net_queue *q;
411 struct mbuf *m;
412 int i;
413
414 for (i = 0; i < sc->num_queues; i++) {
415 q = &sc->queues[i];
416 mtx_lock(&q->tx_lock);
417 while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
418 m_freem(m);
419 mtx_unlock(&q->tx_lock);
420 }
421 if_qflush(ifp);
422 }
423
424 /* Network Device Callbacks */
425 static void
426 ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
427 int len)
428 {
429
430 m_freem(data);
431 CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
432 }
433
434 static void
435 ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
436 int len)
437 {
438 struct ntb_net_queue *q = qp_data;
439 struct ntb_net_ctx *sc = q->sc;
440 struct mbuf *m = data;
441 if_t ifp = q->ifp;
442 uint16_t proto;
443
444 CTR1(KTR_NTB, "RX: rx handler (%d)", len);
445 if (len < 0) {
446 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
447 return;
448 }
449
450 m->m_pkthdr.rcvif = ifp;
451 if (sc->num_queues > 1) {
452 m->m_pkthdr.flowid = q - sc->queues;
453 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
454 }
455 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
456 m_copydata(m, 12, 2, (void *)&proto);
457 switch (ntohs(proto)) {
458 case ETHERTYPE_IP:
459 if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
460 m->m_pkthdr.csum_data = 0xffff;
461 m->m_pkthdr.csum_flags = NTB_CSUM_SET;
462 }
463 break;
464 case ETHERTYPE_IPV6:
465 if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) {
466 m->m_pkthdr.csum_data = 0xffff;
467 m->m_pkthdr.csum_flags = NTB_CSUM_SET;
468 }
469 break;
470 }
471 }
472 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
473 if_input(ifp, m);
474 }
475
476 static void
477 ntb_net_event_handler(void *data, enum ntb_link_event status)
478 {
479 struct ntb_net_queue *q = data;
480
481 if_setbaudrate(q->ifp, ntb_transport_link_speed(q->qp));
482 if_link_state_change(q->ifp, (status == NTB_LINK_UP) ? LINK_STATE_UP :
483 LINK_STATE_DOWN);
484 }
485
486 /* Helper functions */
487 /* TODO: This too should really be part of the kernel */
488 #define EUI48_MULTICAST 1 << 0
489 #define EUI48_LOCALLY_ADMINISTERED 1 << 1
490 static void
491 create_random_local_eui48(u_char *eaddr)
492 {
493 static uint8_t counter = 0;
494
495 eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
496 arc4rand(&eaddr[1], 4, 0);
497 eaddr[5] = counter++;
498 }
499
500 static device_method_t ntb_net_methods[] = {
501 /* Device interface */
502 DEVMETHOD(device_probe, ntb_net_probe),
503 DEVMETHOD(device_attach, ntb_net_attach),
504 DEVMETHOD(device_detach, ntb_net_detach),
505 DEVMETHOD_END
506 };
507
508 static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
509 sizeof(struct ntb_net_ctx));
510 DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, NULL, NULL);
511 MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
512 MODULE_VERSION(if_ntb, 1);
Cache object: 596b35180b1d6136e529885fe5c8b8be
|