FreeBSD/Linux Kernel Cross Reference
sys/net/if_epair.c
1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2008 The FreeBSD Foundation
5 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org>
6 * All rights reserved.
7 *
8 * This software was developed by CK Software GmbH under sponsorship
9 * from the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * A pair of virtual back-to-back connected ethernet like interfaces
35 * (``two interfaces with a virtual cross-over cable'').
36 *
37 * This is mostly intended to be used to provide connectivity between
38 * different virtual network stack instances.
39 */
40 /*
41 * Things to re-think once we have more experience:
42 * - ifp->if_reassign function once we can test with vimage. Depending on
43 * how if_vmove() is going to be improved.
44 * - Real random etheraddrs that are checked to be uniquish; we would need
45 * to re-do them in case we move the interface between network stacks
46 * in a private if_reassign function.
47 * In case we bridge to a real interface/network or between indepedent
48 * epairs on multiple stacks/machines, we may need this.
49 * For now let the user handle that case.
50 */
51
52 #include <sys/cdefs.h>
53 __FBSDID("$FreeBSD$");
54
55 #include <sys/param.h>
56 #include <sys/hash.h>
57 #include <sys/jail.h>
58 #include <sys/kernel.h>
59 #include <sys/libkern.h>
60 #include <sys/malloc.h>
61 #include <sys/mbuf.h>
62 #include <sys/module.h>
63 #include <sys/proc.h>
64 #include <sys/refcount.h>
65 #include <sys/queue.h>
66 #include <sys/smp.h>
67 #include <sys/socket.h>
68 #include <sys/sockio.h>
69 #include <sys/sysctl.h>
70 #include <sys/types.h>
71
72 #include <net/bpf.h>
73 #include <net/ethernet.h>
74 #include <net/if.h>
75 #include <net/if_var.h>
76 #include <net/if_clone.h>
77 #include <net/if_media.h>
78 #include <net/if_var.h>
79 #include <net/if_types.h>
80 #include <net/netisr.h>
81 #include <net/vnet.h>
82
83 SYSCTL_DECL(_net_link);
84 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
85
86 #ifdef EPAIR_DEBUG
87 static int epair_debug = 0;
88 SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW,
89 &epair_debug, 0, "if_epair(4) debugging.");
90 #define DPRINTF(fmt, arg...) \
91 if (epair_debug) \
92 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg)
93 #else
94 #define DPRINTF(fmt, arg...)
95 #endif
96
97 static void epair_nh_sintr(struct mbuf *);
98 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *);
99 static void epair_nh_drainedcpu(u_int);
100
101 static void epair_start_locked(struct ifnet *);
102 static int epair_media_change(struct ifnet *);
103 static void epair_media_status(struct ifnet *, struct ifmediareq *);
104
105 static int epair_clone_match(struct if_clone *, const char *);
106 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
107 static int epair_clone_destroy(struct if_clone *, struct ifnet *);
108
109 static const char epairname[] = "epair";
110 static unsigned int next_index = 0;
111
112 /* Netisr related definitions and sysctl. */
113 static struct netisr_handler epair_nh = {
114 .nh_name = epairname,
115 .nh_proto = NETISR_EPAIR,
116 .nh_policy = NETISR_POLICY_CPU,
117 .nh_handler = epair_nh_sintr,
118 .nh_m2cpuid = epair_nh_m2cpuid,
119 .nh_drainedcpu = epair_nh_drainedcpu,
120 };
121
122 static int
123 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
124 {
125 int error, qlimit;
126
127 netisr_getqlimit(&epair_nh, &qlimit);
128 error = sysctl_handle_int(oidp, &qlimit, 0, req);
129 if (error || !req->newptr)
130 return (error);
131 if (qlimit < 1)
132 return (EINVAL);
133 return (netisr_setqlimit(&epair_nh, qlimit));
134 }
135 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
136 0, 0, sysctl_epair_netisr_maxqlen, "I",
137 "Maximum if_epair(4) netisr \"hw\" queue length");
138
139 struct epair_softc {
140 struct ifnet *ifp; /* This ifp. */
141 struct ifnet *oifp; /* other ifp of pair. */
142 struct ifmedia media; /* Media config (fake). */
143 u_int refcount; /* # of mbufs in flight. */
144 u_int cpuid; /* CPU ID assigned upon creation. */
145 void (*if_qflush)(struct ifnet *);
146 /* Original if_qflush routine. */
147 };
148
149 /*
150 * Per-CPU list of ifps with data in the ifq that needs to be flushed
151 * to the netisr ``hw'' queue before we allow any further direct queuing
152 * to the ``hw'' queue.
153 */
154 struct epair_ifp_drain {
155 STAILQ_ENTRY(epair_ifp_drain) ifp_next;
156 struct ifnet *ifp;
157 };
158 STAILQ_HEAD(eid_list, epair_ifp_drain);
159
160 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \
161 "if_epair", NULL, MTX_DEF)
162 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx)
163 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \
164 MA_OWNED)
165 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx)
166 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx)
167
168 #ifdef INVARIANTS
169 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v))
170 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r))
171 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r))
172 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p)
173 #else
174 #define EPAIR_REFCOUNT_INIT(r, v)
175 #define EPAIR_REFCOUNT_AQUIRE(r)
176 #define EPAIR_REFCOUNT_RELEASE(r)
177 #define EPAIR_REFCOUNT_ASSERT(a, p)
178 #endif
179
180 static MALLOC_DEFINE(M_EPAIR, epairname,
181 "Pair of virtual cross-over connected Ethernet-like interfaces");
182
183 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner);
184 #define V_epair_cloner VNET(epair_cloner)
185
186 /*
187 * DPCPU area and functions.
188 */
189 struct epair_dpcpu {
190 struct mtx if_epair_mtx; /* Per-CPU locking. */
191 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */
192 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with
193 * data in the ifq. */
194 };
195 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu);
196
197 static void
198 epair_clear_mbuf(struct mbuf *m)
199 {
200 m_tag_delete_nonpersistent(m);
201 }
202
203 static void
204 epair_dpcpu_init(void)
205 {
206 struct epair_dpcpu *epair_dpcpu;
207 struct eid_list *s;
208 u_int cpuid;
209
210 CPU_FOREACH(cpuid) {
211 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
212
213 /* Initialize per-cpu lock. */
214 EPAIR_LOCK_INIT(epair_dpcpu);
215
216 /* Driver flags are per-cpu as are our netisr "hw" queues. */
217 epair_dpcpu->epair_drv_flags = 0;
218
219 /*
220 * Initialize per-cpu drain list.
221 * Manually do what STAILQ_HEAD_INITIALIZER would do.
222 */
223 s = &epair_dpcpu->epair_ifp_drain_list;
224 s->stqh_first = NULL;
225 s->stqh_last = &s->stqh_first;
226 }
227 }
228
229 static void
230 epair_dpcpu_detach(void)
231 {
232 struct epair_dpcpu *epair_dpcpu;
233 u_int cpuid;
234
235 CPU_FOREACH(cpuid) {
236 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
237
238 /* Destroy per-cpu lock. */
239 EPAIR_LOCK_DESTROY(epair_dpcpu);
240 }
241 }
242
243 /*
244 * Helper functions.
245 */
246 static u_int
247 cpuid_from_ifp(struct ifnet *ifp)
248 {
249 struct epair_softc *sc;
250
251 if (ifp == NULL)
252 return (0);
253 sc = ifp->if_softc;
254
255 return (sc->cpuid);
256 }
257
258 /*
259 * Netisr handler functions.
260 */
261 static void
262 epair_nh_sintr(struct mbuf *m)
263 {
264 struct ifnet *ifp;
265 struct epair_softc *sc __unused;
266
267 ifp = m->m_pkthdr.rcvif;
268 (*ifp->if_input)(ifp, m);
269 sc = ifp->if_softc;
270 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
271 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
272 ("%s: ifp=%p sc->refcount not >= 1: %d",
273 __func__, ifp, sc->refcount));
274 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount);
275 }
276
277 static struct mbuf *
278 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid)
279 {
280
281 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif);
282
283 return (m);
284 }
285
286 static void
287 epair_nh_drainedcpu(u_int cpuid)
288 {
289 struct epair_dpcpu *epair_dpcpu;
290 struct epair_ifp_drain *elm, *tvar;
291 struct ifnet *ifp;
292
293 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
294 EPAIR_LOCK(epair_dpcpu);
295 /*
296 * Assume our "hw" queue and possibly ifq will be emptied
297 * again. In case we will overflow the "hw" queue while
298 * draining, epair_start_locked will set IFF_DRV_OACTIVE
299 * again and we will stop and return.
300 */
301 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
302 ifp_next, tvar) {
303 ifp = elm->ifp;
304 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE;
305 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
306 epair_start_locked(ifp);
307
308 IFQ_LOCK(&ifp->if_snd);
309 if (IFQ_IS_EMPTY(&ifp->if_snd)) {
310 struct epair_softc *sc __unused;
311
312 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list,
313 elm, epair_ifp_drain, ifp_next);
314 /* The cached ifp goes off the list. */
315 sc = ifp->if_softc;
316 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
317 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
318 ("%s: ifp=%p sc->refcount not >= 1: %d",
319 __func__, ifp, sc->refcount));
320 free(elm, M_EPAIR);
321 }
322 IFQ_UNLOCK(&ifp->if_snd);
323
324 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) {
325 /* Our "hw"q overflew again. */
326 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
327 DPRINTF("hw queue length overflow at %u\n",
328 epair_nh.nh_qlimit);
329 break;
330 }
331 }
332 EPAIR_UNLOCK(epair_dpcpu);
333 }
334
335 /*
336 * Network interface (`if') related functions.
337 */
338 static void
339 epair_remove_ifp_from_draining(struct ifnet *ifp)
340 {
341 struct epair_dpcpu *epair_dpcpu;
342 struct epair_ifp_drain *elm, *tvar;
343 u_int cpuid;
344
345 CPU_FOREACH(cpuid) {
346 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
347 EPAIR_LOCK(epair_dpcpu);
348 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
349 ifp_next, tvar) {
350 if (ifp == elm->ifp) {
351 struct epair_softc *sc __unused;
352
353 STAILQ_REMOVE(
354 &epair_dpcpu->epair_ifp_drain_list, elm,
355 epair_ifp_drain, ifp_next);
356 /* The cached ifp goes off the list. */
357 sc = ifp->if_softc;
358 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
359 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
360 ("%s: ifp=%p sc->refcount not >= 1: %d",
361 __func__, ifp, sc->refcount));
362 free(elm, M_EPAIR);
363 }
364 }
365 EPAIR_UNLOCK(epair_dpcpu);
366 }
367 }
368
369 static int
370 epair_add_ifp_for_draining(struct ifnet *ifp)
371 {
372 struct epair_dpcpu *epair_dpcpu;
373 struct epair_softc *sc;
374 struct epair_ifp_drain *elm = NULL;
375
376 sc = ifp->if_softc;
377 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
378 EPAIR_LOCK_ASSERT(epair_dpcpu);
379 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next)
380 if (elm->ifp == ifp)
381 break;
382 /* If the ifp is there already, return success. */
383 if (elm != NULL)
384 return (0);
385
386 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO);
387 if (elm == NULL)
388 return (ENOMEM);
389
390 elm->ifp = ifp;
391 /* Add a reference for the ifp pointer on the list. */
392 EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
393 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next);
394
395 return (0);
396 }
397
398 static void
399 epair_start_locked(struct ifnet *ifp)
400 {
401 struct epair_dpcpu *epair_dpcpu;
402 struct mbuf *m;
403 struct epair_softc *sc;
404 struct ifnet *oifp;
405 int error;
406
407 DPRINTF("ifp=%p\n", ifp);
408 sc = ifp->if_softc;
409 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
410 EPAIR_LOCK_ASSERT(epair_dpcpu);
411
412 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
413 return;
414 if ((ifp->if_flags & IFF_UP) == 0)
415 return;
416
417 /*
418 * We get packets here from ether_output via if_handoff()
419 * and need to put them into the input queue of the oifp
420 * and call oifp->if_input() via netisr/epair_sintr().
421 */
422 oifp = sc->oifp;
423 sc = oifp->if_softc;
424 for (;;) {
425 IFQ_DEQUEUE(&ifp->if_snd, m);
426 if (m == NULL)
427 break;
428 BPF_MTAP(ifp, m);
429
430 /*
431 * In case the outgoing interface is not usable,
432 * drop the packet.
433 */
434 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
435 (oifp->if_flags & IFF_UP) ==0) {
436 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
437 m_freem(m);
438 continue;
439 }
440 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
441
442 epair_clear_mbuf(m);
443
444 /*
445 * Add a reference so the interface cannot go while the
446 * packet is in transit as we rely on rcvif to stay valid.
447 */
448 EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
449 m->m_pkthdr.rcvif = oifp;
450 CURVNET_SET_QUIET(oifp->if_vnet);
451 error = netisr_queue(NETISR_EPAIR, m);
452 CURVNET_RESTORE();
453 if (!error) {
454 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
455 /* Someone else received the packet. */
456 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
457 } else {
458 /* The packet was freed already. */
459 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
460 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
461 (void) epair_add_ifp_for_draining(ifp);
462 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
463 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
464 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
465 ("%s: ifp=%p sc->refcount not >= 1: %d",
466 __func__, oifp, sc->refcount));
467 }
468 }
469 }
470
471 static void
472 epair_start(struct ifnet *ifp)
473 {
474 struct epair_dpcpu *epair_dpcpu;
475
476 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu);
477 EPAIR_LOCK(epair_dpcpu);
478 epair_start_locked(ifp);
479 EPAIR_UNLOCK(epair_dpcpu);
480 }
481
482 static int
483 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
484 {
485 struct epair_dpcpu *epair_dpcpu;
486 struct epair_softc *sc;
487 struct ifnet *oifp;
488 int error, len;
489 short mflags;
490
491 DPRINTF("ifp=%p m=%p\n", ifp, m);
492 sc = ifp->if_softc;
493 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
494 EPAIR_LOCK_ASSERT(epair_dpcpu);
495
496 if (m == NULL)
497 return (0);
498
499 /*
500 * We are not going to use the interface en/dequeue mechanism
501 * on the TX side. We are called from ether_output_frame()
502 * and will put the packet into the incoming queue of the
503 * other interface of our pair via the netsir.
504 */
505 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
506 m_freem(m);
507 return (ENXIO);
508 }
509 if ((ifp->if_flags & IFF_UP) == 0) {
510 m_freem(m);
511 return (ENETDOWN);
512 }
513
514 BPF_MTAP(ifp, m);
515
516 /*
517 * In case the outgoing interface is not usable,
518 * drop the packet.
519 */
520 oifp = sc->oifp;
521 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
522 (oifp->if_flags & IFF_UP) ==0) {
523 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
524 m_freem(m);
525 return (0);
526 }
527 len = m->m_pkthdr.len;
528 mflags = m->m_flags;
529 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
530
531 #ifdef ALTQ
532 /* Support ALTQ via the classic if_start() path. */
533 IF_LOCK(&ifp->if_snd);
534 if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
535 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
536 if (error)
537 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
538 IF_UNLOCK(&ifp->if_snd);
539 if (!error) {
540 if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
541 if (mflags & (M_BCAST|M_MCAST))
542 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
543
544 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
545 epair_start_locked(ifp);
546 else
547 (void)epair_add_ifp_for_draining(ifp);
548 }
549 return (error);
550 }
551 IF_UNLOCK(&ifp->if_snd);
552 #endif
553
554 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) {
555 /*
556 * Our hardware queue is full, try to fall back
557 * queuing to the ifq but do not call ifp->if_start.
558 * Either we are lucky or the packet is gone.
559 */
560 IFQ_ENQUEUE(&ifp->if_snd, m, error);
561 if (!error)
562 (void)epair_add_ifp_for_draining(ifp);
563 return (error);
564 }
565
566 epair_clear_mbuf(m);
567
568 sc = oifp->if_softc;
569 /*
570 * Add a reference so the interface cannot go while the
571 * packet is in transit as we rely on rcvif to stay valid.
572 */
573 EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
574 m->m_pkthdr.rcvif = oifp;
575 CURVNET_SET_QUIET(oifp->if_vnet);
576 error = netisr_queue(NETISR_EPAIR, m);
577 CURVNET_RESTORE();
578 if (!error) {
579 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
580 /*
581 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics,
582 * but as we bypass all this we have to duplicate
583 * the logic another time.
584 */
585 if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
586 if (mflags & (M_BCAST|M_MCAST))
587 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
588 /* Someone else received the packet. */
589 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
590 } else {
591 /* The packet was freed already. */
592 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
593 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
594 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
595 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
596 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
597 ("%s: ifp=%p sc->refcount not >= 1: %d",
598 __func__, oifp, sc->refcount));
599 }
600
601 return (error);
602 }
603
604 static int
605 epair_transmit(struct ifnet *ifp, struct mbuf *m)
606 {
607 struct epair_dpcpu *epair_dpcpu;
608 int error;
609
610 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu);
611 EPAIR_LOCK(epair_dpcpu);
612 error = epair_transmit_locked(ifp, m);
613 EPAIR_UNLOCK(epair_dpcpu);
614 return (error);
615 }
616
617 static void
618 epair_qflush(struct ifnet *ifp)
619 {
620 struct epair_softc *sc;
621
622 sc = ifp->if_softc;
623 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n",
624 __func__, ifp, sc));
625 /*
626 * Remove this ifp from all backpointer lists. The interface will not
627 * usable for flushing anyway nor should it have anything to flush
628 * after if_qflush().
629 */
630 epair_remove_ifp_from_draining(ifp);
631
632 if (sc->if_qflush)
633 sc->if_qflush(ifp);
634 }
635
636 static int
637 epair_media_change(struct ifnet *ifp __unused)
638 {
639
640 /* Do nothing. */
641 return (0);
642 }
643
644 static void
645 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr)
646 {
647
648 imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
649 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX;
650 }
651
652 static int
653 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
654 {
655 struct epair_softc *sc;
656 struct ifreq *ifr;
657 int error;
658
659 ifr = (struct ifreq *)data;
660 switch (cmd) {
661 case SIOCSIFFLAGS:
662 case SIOCADDMULTI:
663 case SIOCDELMULTI:
664 error = 0;
665 break;
666
667 case SIOCSIFMEDIA:
668 case SIOCGIFMEDIA:
669 sc = ifp->if_softc;
670 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd);
671 break;
672
673 case SIOCSIFMTU:
674 /* We basically allow all kinds of MTUs. */
675 ifp->if_mtu = ifr->ifr_mtu;
676 error = 0;
677 break;
678
679 default:
680 /* Let the common ethernet handler process this. */
681 error = ether_ioctl(ifp, cmd, data);
682 break;
683 }
684
685 return (error);
686 }
687
688 static void
689 epair_init(void *dummy __unused)
690 {
691 }
692
693
694 /*
695 * Interface cloning functions.
696 * We use our private ones so that we can create/destroy our secondary
697 * device along with the primary one.
698 */
699 static int
700 epair_clone_match(struct if_clone *ifc, const char *name)
701 {
702 const char *cp;
703
704 DPRINTF("name='%s'\n", name);
705
706 /*
707 * Our base name is epair.
708 * Our interfaces will be named epair<n>[ab].
709 * So accept anything of the following list:
710 * - epair
711 * - epair<n>
712 * but not the epair<n>[ab] versions.
713 */
714 if (strncmp(epairname, name, sizeof(epairname)-1) != 0)
715 return (0);
716
717 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) {
718 if (*cp < '' || *cp > '9')
719 return (0);
720 }
721
722 return (1);
723 }
724
725 static void
726 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb)
727 {
728 struct ifnet *ifp;
729 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
730
731 ifp = scb->ifp;
732 /* Copy epairNa etheraddr and change the last byte. */
733 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN);
734 eaddr[5] = 0x0b;
735 ether_ifattach(ifp, eaddr);
736
737 if_clone_addif(ifc, ifp);
738 }
739
740 static int
741 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
742 {
743 struct epair_softc *sca, *scb;
744 struct ifnet *ifp;
745 char *dp;
746 int error, unit, wildcard;
747 uint64_t hostid;
748 uint32_t key[3];
749 uint32_t hash;
750 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
751
752 /* Try to see if a special unit was requested. */
753 error = ifc_name2unit(name, &unit);
754 if (error != 0)
755 return (error);
756 wildcard = (unit < 0);
757
758 error = ifc_alloc_unit(ifc, &unit);
759 if (error != 0)
760 return (error);
761
762 /*
763 * If no unit had been given, we need to adjust the ifName.
764 * Also make sure there is space for our extra [ab] suffix.
765 */
766 for (dp = name; *dp != '\0'; dp++);
767 if (wildcard) {
768 error = snprintf(dp, len - (dp - name), "%d", unit);
769 if (error > len - (dp - name) - 1) {
770 /* ifName too long. */
771 ifc_free_unit(ifc, unit);
772 return (ENOSPC);
773 }
774 dp += error;
775 }
776 if (len - (dp - name) - 1 < 1) {
777 /* No space left for our [ab] suffix. */
778 ifc_free_unit(ifc, unit);
779 return (ENOSPC);
780 }
781 *dp = 'b';
782 /* Must not change dp so we can replace 'a' by 'b' later. */
783 *(dp+1) = '\0';
784
785 /* Check if 'a' and 'b' interfaces already exist. */
786 if (ifunit(name) != NULL)
787 return (EEXIST);
788 *dp = 'a';
789 if (ifunit(name) != NULL)
790 return (EEXIST);
791
792 /* Allocate memory for both [ab] interfaces */
793 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
794 EPAIR_REFCOUNT_INIT(&sca->refcount, 1);
795 sca->ifp = if_alloc(IFT_ETHER);
796 if (sca->ifp == NULL) {
797 free(sca, M_EPAIR);
798 ifc_free_unit(ifc, unit);
799 return (ENOSPC);
800 }
801
802 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
803 EPAIR_REFCOUNT_INIT(&scb->refcount, 1);
804 scb->ifp = if_alloc(IFT_ETHER);
805 if (scb->ifp == NULL) {
806 free(scb, M_EPAIR);
807 if_free(sca->ifp);
808 free(sca, M_EPAIR);
809 ifc_free_unit(ifc, unit);
810 return (ENOSPC);
811 }
812
813 /*
814 * Cross-reference the interfaces so we will be able to free both.
815 */
816 sca->oifp = scb->ifp;
817 scb->oifp = sca->ifp;
818
819 /*
820 * Calculate the cpuid for netisr queueing based on the
821 * ifIndex of the interfaces. As long as we cannot configure
822 * this or use cpuset information easily we cannot guarantee
823 * cache locality but we can at least allow parallelism.
824 */
825 sca->cpuid =
826 netisr_get_cpuid(sca->ifp->if_index);
827 scb->cpuid =
828 netisr_get_cpuid(scb->ifp->if_index);
829
830 /* Initialise pseudo media types. */
831 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
832 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
833 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
834 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
835 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
836 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
837
838 /* Finish initialization of interface <n>a. */
839 ifp = sca->ifp;
840 ifp->if_softc = sca;
841 strlcpy(ifp->if_xname, name, IFNAMSIZ);
842 ifp->if_dname = epairname;
843 ifp->if_dunit = unit;
844 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
845 ifp->if_capabilities = IFCAP_VLAN_MTU;
846 ifp->if_capenable = IFCAP_VLAN_MTU;
847 ifp->if_start = epair_start;
848 ifp->if_ioctl = epair_ioctl;
849 ifp->if_init = epair_init;
850 if_setsendqlen(ifp, ifqmaxlen);
851 if_setsendqready(ifp);
852
853 /*
854 * Calculate the etheraddr hashing the hostid and the
855 * interface index. The result would be hopefully unique.
856 * Note that the "a" component of an epair instance may get moved
857 * to a different VNET after creation. In that case its index
858 * will be freed and the index can get reused by new epair instance.
859 * Make sure we do not create same etheraddr again.
860 */
861 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid);
862 if (hostid == 0)
863 arc4rand(&hostid, sizeof(hostid), 0);
864
865 if (ifp->if_index > next_index)
866 next_index = ifp->if_index;
867 else
868 next_index++;
869
870 key[0] = (uint32_t)next_index;
871 key[1] = (uint32_t)(hostid & 0xffffffff);
872 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff);
873 hash = jenkins_hash32(key, 3, 0);
874
875 eaddr[0] = 0x02;
876 memcpy(&eaddr[1], &hash, 4);
877 eaddr[5] = 0x0a;
878 ether_ifattach(ifp, eaddr);
879 sca->if_qflush = ifp->if_qflush;
880 ifp->if_qflush = epair_qflush;
881 ifp->if_transmit = epair_transmit;
882 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */
883
884 /* Swap the name and finish initialization of interface <n>b. */
885 *dp = 'b';
886
887 ifp = scb->ifp;
888 ifp->if_softc = scb;
889 strlcpy(ifp->if_xname, name, IFNAMSIZ);
890 ifp->if_dname = epairname;
891 ifp->if_dunit = unit;
892 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
893 ifp->if_capabilities = IFCAP_VLAN_MTU;
894 ifp->if_capenable = IFCAP_VLAN_MTU;
895 ifp->if_start = epair_start;
896 ifp->if_ioctl = epair_ioctl;
897 ifp->if_init = epair_init;
898 if_setsendqlen(ifp, ifqmaxlen);
899 if_setsendqready(ifp);
900 /* We need to play some tricks here for the second interface. */
901 strlcpy(name, epairname, len);
902
903 /* Correctly set the name for the cloner list. */
904 strlcpy(name, scb->ifp->if_xname, len);
905 epair_clone_add(ifc, scb);
906
907 scb->if_qflush = ifp->if_qflush;
908 ifp->if_qflush = epair_qflush;
909 ifp->if_transmit = epair_transmit;
910 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */
911
912 /*
913 * Restore name to <n>a as the ifp for this will go into the
914 * cloner list for the initial call.
915 */
916 strlcpy(name, sca->ifp->if_xname, len);
917 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
918
919 /* Tell the world, that we are ready to rock. */
920 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
921 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
922 if_link_state_change(sca->ifp, LINK_STATE_UP);
923 if_link_state_change(scb->ifp, LINK_STATE_UP);
924
925 return (0);
926 }
927
928 static int
929 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
930 {
931 struct ifnet *oifp;
932 struct epair_softc *sca, *scb;
933 int unit, error;
934
935 DPRINTF("ifp=%p\n", ifp);
936
937 /*
938 * In case we called into if_clone_destroyif() ourselves
939 * again to remove the second interface, the softc will be
940 * NULL. In that case so not do anything but return success.
941 */
942 if (ifp->if_softc == NULL)
943 return (0);
944
945 unit = ifp->if_dunit;
946 sca = ifp->if_softc;
947 oifp = sca->oifp;
948 scb = oifp->if_softc;
949
950 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp);
951 if_link_state_change(ifp, LINK_STATE_DOWN);
952 if_link_state_change(oifp, LINK_STATE_DOWN);
953 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
954 oifp->if_drv_flags &= ~IFF_DRV_RUNNING;
955
956 /*
957 * Get rid of our second half. As the other of the two
958 * interfaces may reside in a different vnet, we need to
959 * switch before freeing them.
960 */
961 CURVNET_SET_QUIET(oifp->if_vnet);
962 ether_ifdetach(oifp);
963 /*
964 * Wait for all packets to be dispatched to if_input.
965 * The numbers can only go down as the interface is
966 * detached so there is no need to use atomics.
967 */
968 DPRINTF("scb refcnt=%u\n", scb->refcount);
969 EPAIR_REFCOUNT_ASSERT(scb->refcount == 1,
970 ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount));
971 oifp->if_softc = NULL;
972 error = if_clone_destroyif(ifc, oifp);
973 if (error)
974 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
975 __func__, error);
976 if_free(oifp);
977 ifmedia_removeall(&scb->media);
978 free(scb, M_EPAIR);
979 CURVNET_RESTORE();
980
981 ether_ifdetach(ifp);
982 /*
983 * Wait for all packets to be dispatched to if_input.
984 */
985 DPRINTF("sca refcnt=%u\n", sca->refcount);
986 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1,
987 ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount));
988 if_free(ifp);
989 ifmedia_removeall(&sca->media);
990 free(sca, M_EPAIR);
991 ifc_free_unit(ifc, unit);
992
993 return (0);
994 }
995
996 static void
997 vnet_epair_init(const void *unused __unused)
998 {
999
1000 V_epair_cloner = if_clone_advanced(epairname, 0,
1001 epair_clone_match, epair_clone_create, epair_clone_destroy);
1002 #ifdef VIMAGE
1003 netisr_register_vnet(&epair_nh);
1004 #endif
1005 }
1006 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
1007 vnet_epair_init, NULL);
1008
1009 static void
1010 vnet_epair_uninit(const void *unused __unused)
1011 {
1012
1013 #ifdef VIMAGE
1014 netisr_unregister_vnet(&epair_nh);
1015 #endif
1016 if_clone_detach(V_epair_cloner);
1017 }
1018 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
1019 vnet_epair_uninit, NULL);
1020
1021 static void
1022 epair_uninit(const void *unused __unused)
1023 {
1024 netisr_unregister(&epair_nh);
1025 epair_dpcpu_detach();
1026 if (bootverbose)
1027 printf("%s unloaded.\n", epairname);
1028 }
1029 SYSUNINIT(epair_uninit, SI_SUB_INIT_IF, SI_ORDER_MIDDLE,
1030 epair_uninit, NULL);
1031
1032 static int
1033 epair_modevent(module_t mod, int type, void *data)
1034 {
1035 int qlimit;
1036
1037 switch (type) {
1038 case MOD_LOAD:
1039 /* For now limit us to one global mutex and one inq. */
1040 epair_dpcpu_init();
1041 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */
1042 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit))
1043 epair_nh.nh_qlimit = qlimit;
1044 netisr_register(&epair_nh);
1045 if (bootverbose)
1046 printf("%s initialized.\n", epairname);
1047 break;
1048 case MOD_UNLOAD:
1049 /* Handled in epair_uninit() */
1050 break;
1051 default:
1052 return (EOPNOTSUPP);
1053 }
1054 return (0);
1055 }
1056
1057 static moduledata_t epair_mod = {
1058 "if_epair",
1059 epair_modevent,
1060 0
1061 };
1062
1063 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
1064 MODULE_VERSION(if_epair, 1);
Cache object: f7f152194e46d24600d36e5f3201b6d2
|