1 /*
2 * Copyright (C) 2013-2016 Universita` di Pisa
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27
28 /*
29 * This module implements the VALE switch for netmap
30
31 --- VALE SWITCH ---
32
33 NMG_LOCK() serializes all modifications to switches and ports.
34 A switch cannot be deleted until all ports are gone.
35
36 For each switch, an SX lock (RWlock on linux) protects
37 deletion of ports. When configuring or deleting a new port, the
38 lock is acquired in exclusive mode (after holding NMG_LOCK).
39 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
40 The lock is held throughout the entire forwarding cycle,
41 during which the thread may incur in a page fault.
42 Hence it is important that sleepable shared locks are used.
43
44 On the rx ring, the per-port lock is grabbed initially to reserve
45 a number of slot in the ring, then the lock is released,
46 packets are copied from source to destination, and then
47 the lock is acquired again and the receive ring is updated.
48 (A similar thing is done on the tx ring for NIC and host stack
49 ports attached to the switch)
50
51 */
52
53 /*
54 * OS-specific code that is used only within this file.
55 * Other OS-specific code that must be accessed by drivers
56 * is present in netmap_kern.h
57 */
58
59 #if defined(__FreeBSD__)
60 #include <sys/cdefs.h> /* prerequisite */
61 __FBSDID("$FreeBSD$");
62
63 #include <sys/types.h>
64 #include <sys/errno.h>
65 #include <sys/param.h> /* defines used in kernel.h */
66 #include <sys/kernel.h> /* types used in module initialization */
67 #include <sys/conf.h> /* cdevsw struct, UID, GID */
68 #include <sys/sockio.h>
69 #include <sys/socketvar.h> /* struct socket */
70 #include <sys/malloc.h>
71 #include <sys/poll.h>
72 #include <sys/rwlock.h>
73 #include <sys/socket.h> /* sockaddrs */
74 #include <sys/selinfo.h>
75 #include <sys/sysctl.h>
76 #include <net/if.h>
77 #include <net/if_var.h>
78 #include <net/bpf.h> /* BIOCIMMEDIATE */
79 #include <machine/bus.h> /* bus_dmamap_* */
80 #include <sys/endian.h>
81 #include <sys/refcount.h>
82 #include <sys/smp.h>
83
84
85 #elif defined(linux)
86
87 #include "bsd_glue.h"
88
89 #elif defined(__APPLE__)
90
91 #warning OSX support is only partial
92 #include "osx_glue.h"
93
94 #elif defined(_WIN32)
95 #include "win_glue.h"
96
97 #else
98
99 #error Unsupported platform
100
101 #endif /* unsupported */
102
103 /*
104 * common headers
105 */
106
107 #include <net/netmap.h>
108 #include <dev/netmap/netmap_kern.h>
109 #include <dev/netmap/netmap_mem2.h>
110
111 #include <dev/netmap/netmap_bdg.h>
112
113 const char*
114 netmap_bdg_name(struct netmap_vp_adapter *vp)
115 {
116 struct nm_bridge *b = vp->na_bdg;
117 if (b == NULL)
118 return NULL;
119 return b->bdg_basename;
120 }
121
122
123 #ifndef CONFIG_NET_NS
124 /*
125 * XXX in principle nm_bridges could be created dynamically
126 * Right now we have a static array and deletions are protected
127 * by an exclusive lock.
128 */
129 struct nm_bridge *nm_bridges;
130 #endif /* !CONFIG_NET_NS */
131
132
133 static int
134 nm_is_id_char(const char c)
135 {
136 return (c >= 'a' && c <= 'z') ||
137 (c >= 'A' && c <= 'Z') ||
138 (c >= '' && c <= '9') ||
139 (c == '_');
140 }
141
142 /* Validate the name of a bdg port and return the
143 * position of the ":" character. */
144 static int
145 nm_bdg_name_validate(const char *name, size_t prefixlen)
146 {
147 int colon_pos = -1;
148 int i;
149
150 if (!name || strlen(name) < prefixlen) {
151 return -1;
152 }
153
154 for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
155 if (name[i] == ':') {
156 colon_pos = i;
157 break;
158 } else if (!nm_is_id_char(name[i])) {
159 return -1;
160 }
161 }
162
163 if (strlen(name) - colon_pos > IFNAMSIZ) {
164 /* interface name too long */
165 return -1;
166 }
167
168 return colon_pos;
169 }
170
171 /*
172 * locate a bridge among the existing ones.
173 * MUST BE CALLED WITH NMG_LOCK()
174 *
175 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
176 * We assume that this is called with a name of at least NM_NAME chars.
177 */
178 struct nm_bridge *
179 nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
180 {
181 int i, namelen;
182 struct nm_bridge *b = NULL, *bridges;
183 u_int num_bridges;
184
185 NMG_LOCK_ASSERT();
186
187 netmap_bns_getbridges(&bridges, &num_bridges);
188
189 namelen = nm_bdg_name_validate(name,
190 (ops != NULL ? strlen(ops->name) : 0));
191 if (namelen < 0) {
192 nm_prerr("invalid bridge name %s", name ? name : NULL);
193 return NULL;
194 }
195
196 /* lookup the name, remember empty slot if there is one */
197 for (i = 0; i < num_bridges; i++) {
198 struct nm_bridge *x = bridges + i;
199
200 if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
201 if (create && b == NULL)
202 b = x; /* record empty slot */
203 } else if (x->bdg_namelen != namelen) {
204 continue;
205 } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
206 nm_prdis("found '%.*s' at %d", namelen, name, i);
207 b = x;
208 break;
209 }
210 }
211 if (i == num_bridges && b) { /* name not found, can create entry */
212 /* initialize the bridge */
213 nm_prdis("create new bridge %s with ports %d", b->bdg_basename,
214 b->bdg_active_ports);
215 b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
216 if (b->ht == NULL) {
217 nm_prerr("failed to allocate hash table");
218 return NULL;
219 }
220 strncpy(b->bdg_basename, name, namelen);
221 b->bdg_namelen = namelen;
222 b->bdg_active_ports = 0;
223 for (i = 0; i < NM_BDG_MAXPORTS; i++)
224 b->bdg_port_index[i] = i;
225 /* set the default function */
226 b->bdg_ops = b->bdg_saved_ops = *ops;
227 b->private_data = b->ht;
228 b->bdg_flags = 0;
229 NM_BNS_GET(b);
230 }
231 return b;
232 }
233
234
235 int
236 netmap_bdg_free(struct nm_bridge *b)
237 {
238 if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
239 return EBUSY;
240 }
241
242 nm_prdis("marking bridge %s as free", b->bdg_basename);
243 nm_os_free(b->ht);
244 memset(&b->bdg_ops, 0, sizeof(b->bdg_ops));
245 memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops));
246 b->bdg_flags = 0;
247 NM_BNS_PUT(b);
248 return 0;
249 }
250
251 /* Called by external kernel modules (e.g., Openvswitch).
252 * to modify the private data previously given to regops().
253 * 'name' may be just bridge's name (including ':' if it
254 * is not just NM_BDG_NAME).
255 * Called without NMG_LOCK.
256 */
257 int
258 netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
259 void *callback_data, void *auth_token)
260 {
261 void *private_data = NULL;
262 struct nm_bridge *b;
263 int error = 0;
264
265 NMG_LOCK();
266 b = nm_find_bridge(name, 0 /* don't create */, NULL);
267 if (!b) {
268 error = EINVAL;
269 goto unlock_update_priv;
270 }
271 if (!nm_bdg_valid_auth_token(b, auth_token)) {
272 error = EACCES;
273 goto unlock_update_priv;
274 }
275 BDG_WLOCK(b);
276 private_data = callback(b->private_data, callback_data, &error);
277 b->private_data = private_data;
278 BDG_WUNLOCK(b);
279
280 unlock_update_priv:
281 NMG_UNLOCK();
282 return error;
283 }
284
285
286
287 /* remove from bridge b the ports in slots hw and sw
288 * (sw can be -1 if not needed)
289 */
290 void
291 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
292 {
293 int s_hw = hw, s_sw = sw;
294 int i, lim =b->bdg_active_ports;
295 uint32_t *tmp = b->tmp_bdg_port_index;
296
297 /*
298 New algorithm:
299 make a copy of bdg_port_index;
300 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
301 in the array of bdg_port_index, replacing them with
302 entries from the bottom of the array;
303 decrement bdg_active_ports;
304 acquire BDG_WLOCK() and copy back the array.
305 */
306
307 if (netmap_debug & NM_DEBUG_BDG)
308 nm_prinf("detach %d and %d (lim %d)", hw, sw, lim);
309 /* make a copy of the list of active ports, update it,
310 * and then copy back within BDG_WLOCK().
311 */
312 memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
313 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
314 if (hw >= 0 && tmp[i] == hw) {
315 nm_prdis("detach hw %d at %d", hw, i);
316 lim--; /* point to last active port */
317 tmp[i] = tmp[lim]; /* swap with i */
318 tmp[lim] = hw; /* now this is inactive */
319 hw = -1;
320 } else if (sw >= 0 && tmp[i] == sw) {
321 nm_prdis("detach sw %d at %d", sw, i);
322 lim--;
323 tmp[i] = tmp[lim];
324 tmp[lim] = sw;
325 sw = -1;
326 } else {
327 i++;
328 }
329 }
330 if (hw >= 0 || sw >= 0) {
331 nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw);
332 }
333
334 BDG_WLOCK(b);
335 if (b->bdg_ops.dtor)
336 b->bdg_ops.dtor(b->bdg_ports[s_hw]);
337 b->bdg_ports[s_hw] = NULL;
338 if (s_sw >= 0) {
339 b->bdg_ports[s_sw] = NULL;
340 }
341 memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
342 b->bdg_active_ports = lim;
343 BDG_WUNLOCK(b);
344
345 nm_prdis("now %d active ports", lim);
346 netmap_bdg_free(b);
347 }
348
349
350 /* nm_bdg_ctl callback for VALE ports */
351 int
352 netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
353 {
354 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
355 struct nm_bridge *b = vpna->na_bdg;
356
357 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
358 return 0; /* nothing to do */
359 }
360 if (b) {
361 netmap_set_all_rings(na, 0 /* disable */);
362 netmap_bdg_detach_common(b, vpna->bdg_port, -1);
363 vpna->na_bdg = NULL;
364 netmap_set_all_rings(na, 1 /* enable */);
365 }
366 /* I have took reference just for attach */
367 netmap_adapter_put(na);
368 return 0;
369 }
370
371 int
372 netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
373 struct nm_bridge *b)
374 {
375 return NM_NEED_BWRAP;
376 }
377
378 /* Try to get a reference to a netmap adapter attached to a VALE switch.
379 * If the adapter is found (or is created), this function returns 0, a
380 * non NULL pointer is returned into *na, and the caller holds a
381 * reference to the adapter.
382 * If an adapter is not found, then no reference is grabbed and the
383 * function returns an error code, or 0 if there is just a VALE prefix
384 * mismatch. Therefore the caller holds a reference when
385 * (*na != NULL && return == 0).
386 */
387 int
388 netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
389 struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
390 {
391 char *nr_name = hdr->nr_name;
392 const char *ifname;
393 struct ifnet *ifp = NULL;
394 int error = 0;
395 struct netmap_vp_adapter *vpna, *hostna = NULL;
396 struct nm_bridge *b;
397 uint32_t i, j;
398 uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
399 int needed;
400
401 *na = NULL; /* default return value */
402
403 /* first try to see if this is a bridge port. */
404 NMG_LOCK_ASSERT();
405 if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
406 return 0; /* no error, but no VALE prefix */
407 }
408
409 b = nm_find_bridge(nr_name, create, ops);
410 if (b == NULL) {
411 nm_prdis("no bridges available for '%s'", nr_name);
412 return (create ? ENOMEM : ENXIO);
413 }
414 if (strlen(nr_name) < b->bdg_namelen) /* impossible */
415 panic("x");
416
417 /* Now we are sure that name starts with the bridge's name,
418 * lookup the port in the bridge. We need to scan the entire
419 * list. It is not important to hold a WLOCK on the bridge
420 * during the search because NMG_LOCK already guarantees
421 * that there are no other possible writers.
422 */
423
424 /* lookup in the local list of ports */
425 for (j = 0; j < b->bdg_active_ports; j++) {
426 i = b->bdg_port_index[j];
427 vpna = b->bdg_ports[i];
428 nm_prdis("checking %s", vpna->up.name);
429 if (!strcmp(vpna->up.name, nr_name)) {
430 netmap_adapter_get(&vpna->up);
431 nm_prdis("found existing if %s refs %d", nr_name)
432 *na = &vpna->up;
433 return 0;
434 }
435 }
436 /* not found, should we create it? */
437 if (!create)
438 return ENXIO;
439 /* yes we should, see if we have space to attach entries */
440 needed = 2; /* in some cases we only need 1 */
441 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
442 nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports);
443 return ENOMEM;
444 }
445 /* record the next two ports available, but do not allocate yet */
446 cand = b->bdg_port_index[b->bdg_active_ports];
447 cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
448 nm_prdis("+++ bridge %s port %s used %d avail %d %d",
449 b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
450
451 /*
452 * try see if there is a matching NIC with this name
453 * (after the bridge's name)
454 */
455 ifname = nr_name + b->bdg_namelen + 1;
456 ifp = ifunit_ref(ifname);
457 if (!ifp) {
458 /* Create an ephemeral virtual port.
459 * This block contains all the ephemeral-specific logic.
460 */
461
462 if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
463 error = EINVAL;
464 goto out;
465 }
466
467 /* bdg_netmap_attach creates a struct netmap_adapter */
468 error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna);
469 if (error) {
470 if (netmap_debug & NM_DEBUG_BDG)
471 nm_prerr("error %d", error);
472 goto out;
473 }
474 /* shortcut - we can skip get_hw_na(),
475 * ownership check and nm_bdg_attach()
476 */
477
478 } else {
479 struct netmap_adapter *hw;
480
481 /* the vale:nic syntax is only valid for some commands */
482 switch (hdr->nr_reqtype) {
483 case NETMAP_REQ_VALE_ATTACH:
484 case NETMAP_REQ_VALE_DETACH:
485 case NETMAP_REQ_VALE_POLLING_ENABLE:
486 case NETMAP_REQ_VALE_POLLING_DISABLE:
487 break; /* ok */
488 default:
489 error = EINVAL;
490 goto out;
491 }
492
493 error = netmap_get_hw_na(ifp, nmd, &hw);
494 if (error || hw == NULL)
495 goto out;
496
497 /* host adapter might not be created */
498 error = hw->nm_bdg_attach(nr_name, hw, b);
499 if (error == NM_NEED_BWRAP) {
500 error = b->bdg_ops.bwrap_attach(nr_name, hw);
501 }
502 if (error)
503 goto out;
504 vpna = hw->na_vp;
505 hostna = hw->na_hostvp;
506 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
507 /* Check if we need to skip the host rings. */
508 struct nmreq_vale_attach *areq =
509 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
510 if (areq->reg.nr_mode != NR_REG_NIC_SW) {
511 hostna = NULL;
512 }
513 }
514 }
515
516 BDG_WLOCK(b);
517 vpna->bdg_port = cand;
518 nm_prdis("NIC %p to bridge port %d", vpna, cand);
519 /* bind the port to the bridge (virtual ports are not active) */
520 b->bdg_ports[cand] = vpna;
521 vpna->na_bdg = b;
522 b->bdg_active_ports++;
523 if (hostna != NULL) {
524 /* also bind the host stack to the bridge */
525 b->bdg_ports[cand2] = hostna;
526 hostna->bdg_port = cand2;
527 hostna->na_bdg = b;
528 b->bdg_active_ports++;
529 nm_prdis("host %p to bridge port %d", hostna, cand2);
530 }
531 nm_prdis("if %s refs %d", ifname, vpna->up.na_refcount);
532 BDG_WUNLOCK(b);
533 *na = &vpna->up;
534 netmap_adapter_get(*na);
535
536 out:
537 if (ifp)
538 if_rele(ifp);
539
540 return error;
541 }
542
543 /* Process NETMAP_REQ_VALE_ATTACH.
544 */
545 int
546 netmap_bdg_attach(struct nmreq_header *hdr, void *auth_token)
547 {
548 struct nmreq_vale_attach *req =
549 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
550 struct netmap_vp_adapter * vpna;
551 struct netmap_adapter *na = NULL;
552 struct netmap_mem_d *nmd = NULL;
553 struct nm_bridge *b = NULL;
554 int error;
555
556 NMG_LOCK();
557 /* permission check for modified bridges */
558 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
559 if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
560 error = EACCES;
561 goto unlock_exit;
562 }
563
564 if (req->reg.nr_mem_id) {
565 nmd = netmap_mem_find(req->reg.nr_mem_id);
566 if (nmd == NULL) {
567 error = EINVAL;
568 goto unlock_exit;
569 }
570 }
571
572 /* check for existing one */
573 error = netmap_get_vale_na(hdr, &na, nmd, 0);
574 if (na) {
575 error = EBUSY;
576 goto unref_exit;
577 }
578 error = netmap_get_vale_na(hdr, &na,
579 nmd, 1 /* create if not exists */);
580 if (error) { /* no device */
581 goto unlock_exit;
582 }
583
584 if (na == NULL) { /* VALE prefix missing */
585 error = EINVAL;
586 goto unlock_exit;
587 }
588
589 if (NETMAP_OWNED_BY_ANY(na)) {
590 error = EBUSY;
591 goto unref_exit;
592 }
593
594 if (na->nm_bdg_ctl) {
595 /* nop for VALE ports. The bwrap needs to put the hwna
596 * in netmap mode (see netmap_bwrap_bdg_ctl)
597 */
598 error = na->nm_bdg_ctl(hdr, na);
599 if (error)
600 goto unref_exit;
601 nm_prdis("registered %s to netmap-mode", na->name);
602 }
603 vpna = (struct netmap_vp_adapter *)na;
604 req->port_index = vpna->bdg_port;
605
606 if (nmd)
607 netmap_mem_put(nmd);
608
609 NMG_UNLOCK();
610 return 0;
611
612 unref_exit:
613 netmap_adapter_put(na);
614 unlock_exit:
615 if (nmd)
616 netmap_mem_put(nmd);
617
618 NMG_UNLOCK();
619 return error;
620 }
621
622
623 int
624 nm_is_bwrap(struct netmap_adapter *na)
625 {
626 return na->nm_register == netmap_bwrap_reg;
627 }
628
629 /* Process NETMAP_REQ_VALE_DETACH.
630 */
631 int
632 netmap_bdg_detach(struct nmreq_header *hdr, void *auth_token)
633 {
634 int error;
635
636 NMG_LOCK();
637 error = netmap_bdg_detach_locked(hdr, auth_token);
638 NMG_UNLOCK();
639 return error;
640 }
641
642 int
643 netmap_bdg_detach_locked(struct nmreq_header *hdr, void *auth_token)
644 {
645 struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
646 struct netmap_vp_adapter *vpna;
647 struct netmap_adapter *na;
648 struct nm_bridge *b = NULL;
649 int error;
650
651 /* permission check for modified bridges */
652 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
653 if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
654 error = EACCES;
655 goto error_exit;
656 }
657
658 error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
659 if (error) { /* no device, or another bridge or user owns the device */
660 goto error_exit;
661 }
662
663 if (na == NULL) { /* VALE prefix missing */
664 error = EINVAL;
665 goto error_exit;
666 } else if (nm_is_bwrap(na) &&
667 ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
668 /* Don't detach a NIC with polling */
669 error = EBUSY;
670 goto unref_exit;
671 }
672
673 vpna = (struct netmap_vp_adapter *)na;
674 if (na->na_vp != vpna) {
675 /* trying to detach first attach of VALE persistent port attached
676 * to 2 bridges
677 */
678 error = EBUSY;
679 goto unref_exit;
680 }
681 nmreq_det->port_index = vpna->bdg_port;
682
683 if (na->nm_bdg_ctl) {
684 /* remove the port from bridge. The bwrap
685 * also needs to put the hwna in normal mode
686 */
687 error = na->nm_bdg_ctl(hdr, na);
688 }
689
690 unref_exit:
691 netmap_adapter_put(na);
692 error_exit:
693 return error;
694
695 }
696
697
698 struct nm_bdg_polling_state;
699 struct
700 nm_bdg_kthread {
701 struct nm_kctx *nmk;
702 u_int qfirst;
703 u_int qlast;
704 struct nm_bdg_polling_state *bps;
705 };
706
707 struct nm_bdg_polling_state {
708 bool configured;
709 bool stopped;
710 struct netmap_bwrap_adapter *bna;
711 uint32_t mode;
712 u_int qfirst;
713 u_int qlast;
714 u_int cpu_from;
715 u_int ncpus;
716 struct nm_bdg_kthread *kthreads;
717 };
718
719 static void
720 netmap_bwrap_polling(void *data)
721 {
722 struct nm_bdg_kthread *nbk = data;
723 struct netmap_bwrap_adapter *bna;
724 u_int qfirst, qlast, i;
725 struct netmap_kring **kring0, *kring;
726
727 if (!nbk)
728 return;
729 qfirst = nbk->qfirst;
730 qlast = nbk->qlast;
731 bna = nbk->bps->bna;
732 kring0 = NMR(bna->hwna, NR_RX);
733
734 for (i = qfirst; i < qlast; i++) {
735 kring = kring0[i];
736 kring->nm_notify(kring, 0);
737 }
738 }
739
740 static int
741 nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
742 {
743 struct nm_kctx_cfg kcfg;
744 int i, j;
745
746 bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
747 if (bps->kthreads == NULL)
748 return ENOMEM;
749
750 bzero(&kcfg, sizeof(kcfg));
751 kcfg.worker_fn = netmap_bwrap_polling;
752 for (i = 0; i < bps->ncpus; i++) {
753 struct nm_bdg_kthread *t = bps->kthreads + i;
754 int all = (bps->ncpus == 1 &&
755 bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
756 int affinity = bps->cpu_from + i;
757
758 t->bps = bps;
759 t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
760 t->qlast = all ? bps->qlast : t->qfirst + 1;
761 if (netmap_verbose)
762 nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
763 t->qlast);
764
765 kcfg.type = i;
766 kcfg.worker_private = t;
767 t->nmk = nm_os_kctx_create(&kcfg, NULL);
768 if (t->nmk == NULL) {
769 goto cleanup;
770 }
771 nm_os_kctx_worker_setaff(t->nmk, affinity);
772 }
773 return 0;
774
775 cleanup:
776 for (j = 0; j < i; j++) {
777 struct nm_bdg_kthread *t = bps->kthreads + i;
778 nm_os_kctx_destroy(t->nmk);
779 }
780 nm_os_free(bps->kthreads);
781 return EFAULT;
782 }
783
784 /* A variant of ptnetmap_start_kthreads() */
785 static int
786 nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
787 {
788 int error, i, j;
789
790 if (!bps) {
791 nm_prerr("polling is not configured");
792 return EFAULT;
793 }
794 bps->stopped = false;
795
796 for (i = 0; i < bps->ncpus; i++) {
797 struct nm_bdg_kthread *t = bps->kthreads + i;
798 error = nm_os_kctx_worker_start(t->nmk);
799 if (error) {
800 nm_prerr("error in nm_kthread_start(): %d", error);
801 goto cleanup;
802 }
803 }
804 return 0;
805
806 cleanup:
807 for (j = 0; j < i; j++) {
808 struct nm_bdg_kthread *t = bps->kthreads + i;
809 nm_os_kctx_worker_stop(t->nmk);
810 }
811 bps->stopped = true;
812 return error;
813 }
814
815 static void
816 nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
817 {
818 int i;
819
820 if (!bps)
821 return;
822
823 for (i = 0; i < bps->ncpus; i++) {
824 struct nm_bdg_kthread *t = bps->kthreads + i;
825 nm_os_kctx_worker_stop(t->nmk);
826 nm_os_kctx_destroy(t->nmk);
827 }
828 bps->stopped = true;
829 }
830
831 static int
832 get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
833 struct nm_bdg_polling_state *bps)
834 {
835 unsigned int avail_cpus, core_from;
836 unsigned int qfirst, qlast;
837 uint32_t i = req->nr_first_cpu_id;
838 uint32_t req_cpus = req->nr_num_polling_cpus;
839
840 avail_cpus = nm_os_ncpus();
841
842 if (req_cpus == 0) {
843 nm_prerr("req_cpus must be > 0");
844 return EINVAL;
845 } else if (req_cpus >= avail_cpus) {
846 nm_prerr("Cannot use all the CPUs in the system");
847 return EINVAL;
848 }
849
850 if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
851 /* Use a separate core for each ring. If nr_num_polling_cpus>1
852 * more consecutive rings are polled.
853 * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
854 * ring 2 and 3 are polled by core 2 and 3, respectively. */
855 if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
856 nm_prerr("Rings %u-%u not in range (have %d rings)",
857 i, i + req_cpus, nma_get_nrings(na, NR_RX));
858 return EINVAL;
859 }
860 qfirst = i;
861 qlast = qfirst + req_cpus;
862 core_from = qfirst;
863
864 } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
865 /* Poll all the rings using a core specified by nr_first_cpu_id.
866 * the number of cores must be 1. */
867 if (req_cpus != 1) {
868 nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
869 "(was %d)", req_cpus);
870 return EINVAL;
871 }
872 qfirst = 0;
873 qlast = nma_get_nrings(na, NR_RX);
874 core_from = i;
875 } else {
876 nm_prerr("Invalid polling mode");
877 return EINVAL;
878 }
879
880 bps->mode = req->nr_mode;
881 bps->qfirst = qfirst;
882 bps->qlast = qlast;
883 bps->cpu_from = core_from;
884 bps->ncpus = req_cpus;
885 nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u",
886 req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
887 "MULTI" : "SINGLE",
888 qfirst, qlast, core_from, req_cpus);
889 return 0;
890 }
891
892 static int
893 nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
894 {
895 struct nm_bdg_polling_state *bps;
896 struct netmap_bwrap_adapter *bna;
897 int error;
898
899 bna = (struct netmap_bwrap_adapter *)na;
900 if (bna->na_polling_state) {
901 nm_prerr("ERROR adapter already in polling mode");
902 return EFAULT;
903 }
904
905 bps = nm_os_malloc(sizeof(*bps));
906 if (!bps)
907 return ENOMEM;
908 bps->configured = false;
909 bps->stopped = true;
910
911 if (get_polling_cfg(req, na, bps)) {
912 nm_os_free(bps);
913 return EINVAL;
914 }
915
916 if (nm_bdg_create_kthreads(bps)) {
917 nm_os_free(bps);
918 return EFAULT;
919 }
920
921 bps->configured = true;
922 bna->na_polling_state = bps;
923 bps->bna = bna;
924
925 /* disable interrupts if possible */
926 nma_intr_enable(bna->hwna, 0);
927 /* start kthread now */
928 error = nm_bdg_polling_start_kthreads(bps);
929 if (error) {
930 nm_prerr("ERROR nm_bdg_polling_start_kthread()");
931 nm_os_free(bps->kthreads);
932 nm_os_free(bps);
933 bna->na_polling_state = NULL;
934 nma_intr_enable(bna->hwna, 1);
935 }
936 return error;
937 }
938
939 static int
940 nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
941 {
942 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
943 struct nm_bdg_polling_state *bps;
944
945 if (!bna->na_polling_state) {
946 nm_prerr("ERROR adapter is not in polling mode");
947 return EFAULT;
948 }
949 bps = bna->na_polling_state;
950 nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
951 bps->configured = false;
952 nm_os_free(bps);
953 bna->na_polling_state = NULL;
954 /* re-enable interrupts */
955 nma_intr_enable(bna->hwna, 1);
956 return 0;
957 }
958
959 int
960 nm_bdg_polling(struct nmreq_header *hdr)
961 {
962 struct nmreq_vale_polling *req =
963 (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body;
964 struct netmap_adapter *na = NULL;
965 int error = 0;
966
967 NMG_LOCK();
968 error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0);
969 if (na && !error) {
970 if (!nm_is_bwrap(na)) {
971 error = EOPNOTSUPP;
972 } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
973 error = nm_bdg_ctl_polling_start(req, na);
974 if (!error)
975 netmap_adapter_get(na);
976 } else {
977 error = nm_bdg_ctl_polling_stop(na);
978 if (!error)
979 netmap_adapter_put(na);
980 }
981 netmap_adapter_put(na);
982 } else if (!na && !error) {
983 /* Not VALE port. */
984 error = EINVAL;
985 }
986 NMG_UNLOCK();
987
988 return error;
989 }
990
991 /* Called by external kernel modules (e.g., Openvswitch).
992 * to set configure/lookup/dtor functions of a VALE instance.
993 * Register callbacks to the given bridge. 'name' may be just
994 * bridge's name (including ':' if it is not just NM_BDG_NAME).
995 *
996 * Called without NMG_LOCK.
997 */
998
999 int
1000 netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
1001 {
1002 struct nm_bridge *b;
1003 int error = 0;
1004
1005 NMG_LOCK();
1006 b = nm_find_bridge(name, 0 /* don't create */, NULL);
1007 if (!b) {
1008 error = ENXIO;
1009 goto unlock_regops;
1010 }
1011 if (!nm_bdg_valid_auth_token(b, auth_token)) {
1012 error = EACCES;
1013 goto unlock_regops;
1014 }
1015
1016 BDG_WLOCK(b);
1017 if (!bdg_ops) {
1018 /* resetting the bridge */
1019 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
1020 b->bdg_ops = b->bdg_saved_ops;
1021 b->private_data = b->ht;
1022 } else {
1023 /* modifying the bridge */
1024 b->private_data = private_data;
1025 #define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m
1026 nm_bdg_override(lookup);
1027 nm_bdg_override(config);
1028 nm_bdg_override(dtor);
1029 nm_bdg_override(vp_create);
1030 nm_bdg_override(bwrap_attach);
1031 #undef nm_bdg_override
1032
1033 }
1034 BDG_WUNLOCK(b);
1035
1036 unlock_regops:
1037 NMG_UNLOCK();
1038 return error;
1039 }
1040
1041
1042 int
1043 netmap_bdg_config(struct nm_ifreq *nr)
1044 {
1045 struct nm_bridge *b;
1046 int error = EINVAL;
1047
1048 NMG_LOCK();
1049 b = nm_find_bridge(nr->nifr_name, 0, NULL);
1050 if (!b) {
1051 NMG_UNLOCK();
1052 return error;
1053 }
1054 NMG_UNLOCK();
1055 /* Don't call config() with NMG_LOCK() held */
1056 BDG_RLOCK(b);
1057 if (b->bdg_ops.config != NULL)
1058 error = b->bdg_ops.config(nr);
1059 BDG_RUNLOCK(b);
1060 return error;
1061 }
1062
1063
1064 /* nm_register callback for VALE ports */
1065 int
1066 netmap_vp_reg(struct netmap_adapter *na, int onoff)
1067 {
1068 struct netmap_vp_adapter *vpna =
1069 (struct netmap_vp_adapter*)na;
1070
1071 /* persistent ports may be put in netmap mode
1072 * before being attached to a bridge
1073 */
1074 if (vpna->na_bdg)
1075 BDG_WLOCK(vpna->na_bdg);
1076 if (onoff) {
1077 netmap_krings_mode_commit(na, onoff);
1078 if (na->active_fds == 0)
1079 na->na_flags |= NAF_NETMAP_ON;
1080 /* XXX on FreeBSD, persistent VALE ports should also
1081 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
1082 */
1083 } else {
1084 if (na->active_fds == 0)
1085 na->na_flags &= ~NAF_NETMAP_ON;
1086 netmap_krings_mode_commit(na, onoff);
1087 }
1088 if (vpna->na_bdg)
1089 BDG_WUNLOCK(vpna->na_bdg);
1090 return 0;
1091 }
1092
1093
1094 /* rxsync code used by VALE ports nm_rxsync callback and also
1095 * internally by the brwap
1096 */
1097 static int
1098 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
1099 {
1100 struct netmap_adapter *na = kring->na;
1101 struct netmap_ring *ring = kring->ring;
1102 u_int nm_i, lim = kring->nkr_num_slots - 1;
1103 u_int head = kring->rhead;
1104 int n;
1105
1106 if (head > lim) {
1107 nm_prerr("ouch dangerous reset!!!");
1108 n = netmap_ring_reinit(kring);
1109 goto done;
1110 }
1111
1112 /* First part, import newly received packets. */
1113 /* actually nothing to do here, they are already in the kring */
1114
1115 /* Second part, skip past packets that userspace has released. */
1116 nm_i = kring->nr_hwcur;
1117 if (nm_i != head) {
1118 /* consistency check, but nothing really important here */
1119 for (n = 0; likely(nm_i != head); n++) {
1120 struct netmap_slot *slot = &ring->slot[nm_i];
1121 void *addr = NMB(na, slot);
1122
1123 if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
1124 nm_prerr("bad buffer index %d, ignore ?",
1125 slot->buf_idx);
1126 }
1127 slot->flags &= ~NS_BUF_CHANGED;
1128 nm_i = nm_next(nm_i, lim);
1129 }
1130 kring->nr_hwcur = head;
1131 }
1132
1133 n = 0;
1134 done:
1135 return n;
1136 }
1137
1138 /*
1139 * nm_rxsync callback for VALE ports
1140 * user process reading from a VALE switch.
1141 * Already protected against concurrent calls from userspace,
1142 * but we must acquire the queue's lock to protect against
1143 * writers on the same queue.
1144 */
1145 int
1146 netmap_vp_rxsync(struct netmap_kring *kring, int flags)
1147 {
1148 int n;
1149
1150 mtx_lock(&kring->q_lock);
1151 n = netmap_vp_rxsync_locked(kring, flags);
1152 mtx_unlock(&kring->q_lock);
1153 return n;
1154 }
1155
1156 int
1157 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna,
1158 struct netmap_bdg_ops *ops)
1159 {
1160 return ops->bwrap_attach(nr_name, hwna);
1161 }
1162
1163
1164 /* Bridge wrapper code (bwrap).
1165 * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1166 * VALE switch.
1167 * The main task is to swap the meaning of tx and rx rings to match the
1168 * expectations of the VALE switch code (see nm_bdg_flush).
1169 *
1170 * The bwrap works by interposing a netmap_bwrap_adapter between the
1171 * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1172 * a netmap_vp_adapter to the rest the system, but, internally, it
1173 * translates all callbacks to what the hwna expects.
1174 *
1175 * Note that we have to intercept callbacks coming from two sides:
1176 *
1177 * - callbacks coming from the netmap module are intercepted by
1178 * passing around the netmap_bwrap_adapter instead of the hwna
1179 *
1180 * - callbacks coming from outside of the netmap module only know
1181 * about the hwna. This, however, only happens in interrupt
1182 * handlers, where only the hwna->nm_notify callback is called.
1183 * What the bwrap does is to overwrite the hwna->nm_notify callback
1184 * with its own netmap_bwrap_intr_notify.
1185 * XXX This assumes that the hwna->nm_notify callback was the
1186 * standard netmap_notify(), as it is the case for nic adapters.
1187 * Any additional action performed by hwna->nm_notify will not be
1188 * performed by netmap_bwrap_intr_notify.
1189 *
1190 * Additionally, the bwrap can optionally attach the host rings pair
1191 * of the wrapped adapter to a different port of the switch.
1192 */
1193
1194
1195 static void
1196 netmap_bwrap_dtor(struct netmap_adapter *na)
1197 {
1198 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1199 struct netmap_adapter *hwna = bna->hwna;
1200 struct nm_bridge *b = bna->up.na_bdg,
1201 *bh = bna->host.na_bdg;
1202
1203 if (bna->host.up.nm_mem)
1204 netmap_mem_put(bna->host.up.nm_mem);
1205
1206 if (b) {
1207 netmap_bdg_detach_common(b, bna->up.bdg_port,
1208 (bh ? bna->host.bdg_port : -1));
1209 }
1210
1211 nm_prdis("na %p", na);
1212 na->ifp = NULL;
1213 bna->host.up.ifp = NULL;
1214 hwna->na_vp = bna->saved_na_vp;
1215 hwna->na_hostvp = NULL;
1216 hwna->na_private = NULL;
1217 hwna->na_flags &= ~NAF_BUSY;
1218 netmap_adapter_put(hwna);
1219
1220 }
1221
1222
1223 /*
1224 * Intr callback for NICs connected to a bridge.
1225 * Simply ignore tx interrupts (maybe we could try to recover space ?)
1226 * and pass received packets from nic to the bridge.
1227 *
1228 * XXX TODO check locking: this is called from the interrupt
1229 * handler so we should make sure that the interface is not
1230 * disconnected while passing down an interrupt.
1231 *
1232 * Note, no user process can access this NIC or the host stack.
1233 * The only part of the ring that is significant are the slots,
1234 * and head/cur/tail are set from the kring as needed
1235 * (part as a receive ring, part as a transmit ring).
1236 *
1237 * callback that overwrites the hwna notify callback.
1238 * Packets come from the outside or from the host stack and are put on an
1239 * hwna rx ring.
1240 * The bridge wrapper then sends the packets through the bridge.
1241 */
1242 int
1243 netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
1244 {
1245 struct netmap_adapter *na = kring->na;
1246 struct netmap_bwrap_adapter *bna = na->na_private;
1247 struct netmap_kring *bkring;
1248 struct netmap_vp_adapter *vpna = &bna->up;
1249 u_int ring_nr = kring->ring_id;
1250 int ret = NM_IRQ_COMPLETED;
1251 int error;
1252
1253 if (netmap_debug & NM_DEBUG_RXINTR)
1254 nm_prinf("%s %s 0x%x", na->name, kring->name, flags);
1255
1256 bkring = vpna->up.tx_rings[ring_nr];
1257
1258 /* make sure the ring is not disabled */
1259 if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
1260 return EIO;
1261 }
1262
1263 if (netmap_debug & NM_DEBUG_RXINTR)
1264 nm_prinf("%s head %d cur %d tail %d", na->name,
1265 kring->rhead, kring->rcur, kring->rtail);
1266
1267 /* simulate a user wakeup on the rx ring
1268 * fetch packets that have arrived.
1269 */
1270 error = kring->nm_sync(kring, 0);
1271 if (error)
1272 goto put_out;
1273 if (kring->nr_hwcur == kring->nr_hwtail) {
1274 if (netmap_verbose)
1275 nm_prlim(1, "interrupt with no packets on %s",
1276 kring->name);
1277 goto put_out;
1278 }
1279
1280 /* new packets are kring->rcur to kring->nr_hwtail, and the bkring
1281 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
1282 * to push all packets out.
1283 */
1284 bkring->rhead = bkring->rcur = kring->nr_hwtail;
1285
1286 bkring->nm_sync(bkring, flags);
1287
1288 /* mark all buffers as released on this ring */
1289 kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
1290 /* another call to actually release the buffers */
1291 error = kring->nm_sync(kring, 0);
1292
1293 /* The second rxsync may have further advanced hwtail. If this happens,
1294 * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
1295 if (kring->rcur != kring->nr_hwtail) {
1296 ret = NM_IRQ_RESCHED;
1297 }
1298 put_out:
1299 nm_kr_put(kring);
1300
1301 return error ? error : ret;
1302 }
1303
1304
1305 /* nm_register callback for bwrap */
1306 int
1307 netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
1308 {
1309 struct netmap_bwrap_adapter *bna =
1310 (struct netmap_bwrap_adapter *)na;
1311 struct netmap_adapter *hwna = bna->hwna;
1312 struct netmap_vp_adapter *hostna = &bna->host;
1313 int error, i;
1314 enum txrx t;
1315
1316 nm_prdis("%s %s", na->name, onoff ? "on" : "off");
1317
1318 if (onoff) {
1319 /* netmap_do_regif has been called on the bwrap na.
1320 * We need to pass the information about the
1321 * memory allocator down to the hwna before
1322 * putting it in netmap mode
1323 */
1324 hwna->na_lut = na->na_lut;
1325
1326 if (hostna->na_bdg) {
1327 /* if the host rings have been attached to switch,
1328 * we need to copy the memory allocator information
1329 * in the hostna also
1330 */
1331 hostna->up.na_lut = na->na_lut;
1332 }
1333
1334 }
1335
1336 /* pass down the pending ring state information */
1337 for_rx_tx(t) {
1338 for (i = 0; i < netmap_all_rings(na, t); i++) {
1339 NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode =
1340 NMR(na, t)[i]->nr_pending_mode;
1341 }
1342 }
1343
1344 /* forward the request to the hwna */
1345 error = hwna->nm_register(hwna, onoff);
1346 if (error)
1347 return error;
1348
1349 /* copy up the current ring state information */
1350 for_rx_tx(t) {
1351 for (i = 0; i < netmap_all_rings(na, t); i++) {
1352 struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i];
1353 NMR(na, t)[i]->nr_mode = kring->nr_mode;
1354 }
1355 }
1356
1357 /* impersonate a netmap_vp_adapter */
1358 netmap_vp_reg(na, onoff);
1359 if (hostna->na_bdg)
1360 netmap_vp_reg(&hostna->up, onoff);
1361
1362 if (onoff) {
1363 u_int i;
1364 /* intercept the hwna nm_nofify callback on the hw rings */
1365 for (i = 0; i < hwna->num_rx_rings; i++) {
1366 hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
1367 hwna->rx_rings[i]->nm_notify = bna->nm_intr_notify;
1368 }
1369 i = hwna->num_rx_rings; /* for safety */
1370 /* save the host ring notify unconditionally */
1371 for (; i < netmap_real_rings(hwna, NR_RX); i++) {
1372 hwna->rx_rings[i]->save_notify =
1373 hwna->rx_rings[i]->nm_notify;
1374 if (hostna->na_bdg) {
1375 /* also intercept the host ring notify */
1376 hwna->rx_rings[i]->nm_notify =
1377 netmap_bwrap_intr_notify;
1378 na->tx_rings[i]->nm_sync = na->nm_txsync;
1379 }
1380 }
1381 if (na->active_fds == 0)
1382 na->na_flags |= NAF_NETMAP_ON;
1383 } else {
1384 u_int i;
1385
1386 if (na->active_fds == 0)
1387 na->na_flags &= ~NAF_NETMAP_ON;
1388
1389 /* reset all notify callbacks (including host ring) */
1390 for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) {
1391 hwna->rx_rings[i]->nm_notify =
1392 hwna->rx_rings[i]->save_notify;
1393 hwna->rx_rings[i]->save_notify = NULL;
1394 }
1395 hwna->na_lut.lut = NULL;
1396 hwna->na_lut.plut = NULL;
1397 hwna->na_lut.objtotal = 0;
1398 hwna->na_lut.objsize = 0;
1399
1400 /* reset the number of host rings to default */
1401 for_rx_tx(t) {
1402 nma_set_host_nrings(hwna, t, 1);
1403 }
1404
1405 }
1406
1407 return 0;
1408 }
1409
1410 /* nm_config callback for bwrap */
1411 static int
1412 netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
1413 {
1414 struct netmap_bwrap_adapter *bna =
1415 (struct netmap_bwrap_adapter *)na;
1416 struct netmap_adapter *hwna = bna->hwna;
1417 int error;
1418
1419 /* cache the lut in the embedded host adapter */
1420 error = netmap_mem_get_lut(hwna->nm_mem, &bna->host.up.na_lut);
1421 if (error)
1422 return error;
1423
1424 /* Forward the request to the hwna. It may happen that nobody
1425 * registered hwna yet, so netmap_mem_get_lut() may have not
1426 * been called yet. */
1427 error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut);
1428 if (error)
1429 return error;
1430 netmap_update_config(hwna);
1431 /* swap the results and propagate */
1432 info->num_tx_rings = hwna->num_rx_rings;
1433 info->num_tx_descs = hwna->num_rx_desc;
1434 info->num_rx_rings = hwna->num_tx_rings;
1435 info->num_rx_descs = hwna->num_tx_desc;
1436 info->rx_buf_maxsize = hwna->rx_buf_maxsize;
1437
1438 if (na->na_flags & NAF_HOST_RINGS) {
1439 struct netmap_adapter *hostna = &bna->host.up;
1440 enum txrx t;
1441
1442 /* limit the number of host rings to that of hw */
1443 if (na->na_flags & NAF_HOST_ALL) {
1444 hostna->num_tx_rings = nma_get_nrings(hwna, NR_RX);
1445 hostna->num_rx_rings = nma_get_nrings(hwna, NR_TX);
1446 } else {
1447 nm_bound_var(&hostna->num_tx_rings, 1, 1,
1448 nma_get_nrings(hwna, NR_TX), NULL);
1449 nm_bound_var(&hostna->num_rx_rings, 1, 1,
1450 nma_get_nrings(hwna, NR_RX), NULL);
1451 }
1452 for_rx_tx(t) {
1453 enum txrx r = nm_txrx_swap(t);
1454 u_int nr = nma_get_nrings(hostna, t);
1455
1456 nma_set_host_nrings(na, t, nr);
1457 if (nma_get_host_nrings(hwna, t) < nr) {
1458 nma_set_host_nrings(hwna, t, nr);
1459 }
1460 nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
1461 }
1462 }
1463
1464 return 0;
1465 }
1466
1467 /* nm_bufcfg callback for bwrap */
1468 static int
1469 netmap_bwrap_bufcfg(struct netmap_kring *kring, uint64_t target)
1470 {
1471 struct netmap_adapter *na = kring->na;
1472 struct netmap_bwrap_adapter *bna =
1473 (struct netmap_bwrap_adapter *)na;
1474 struct netmap_adapter *hwna = bna->hwna;
1475 struct netmap_kring *hwkring;
1476 enum txrx r;
1477 int error;
1478
1479 /* we need the hw kring that corresponds to the bwrap one:
1480 * remember that rx and tx are swapped
1481 */
1482 r = nm_txrx_swap(kring->tx);
1483 hwkring = NMR(hwna, r)[kring->ring_id];
1484
1485 /* copy down the offset information, forward the request
1486 * and copy up the results
1487 */
1488 hwkring->offset_mask = kring->offset_mask;
1489 hwkring->offset_max = kring->offset_max;
1490 hwkring->offset_gap = kring->offset_gap;
1491
1492 error = hwkring->nm_bufcfg(hwkring, target);
1493 if (error)
1494 return error;
1495
1496 kring->hwbuf_len = hwkring->hwbuf_len;
1497 kring->buf_align = hwkring->buf_align;
1498
1499 return 0;
1500 }
1501
1502 /* nm_krings_create callback for bwrap */
1503 int
1504 netmap_bwrap_krings_create_common(struct netmap_adapter *na)
1505 {
1506 struct netmap_bwrap_adapter *bna =
1507 (struct netmap_bwrap_adapter *)na;
1508 struct netmap_adapter *hwna = bna->hwna;
1509 struct netmap_adapter *hostna = &bna->host.up;
1510 int i, error = 0;
1511 enum txrx t;
1512
1513 /* also create the hwna krings */
1514 error = hwna->nm_krings_create(hwna);
1515 if (error) {
1516 return error;
1517 }
1518
1519 /* increment the usage counter for all the hwna krings */
1520 for_rx_tx(t) {
1521 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1522 NMR(hwna, t)[i]->users++;
1523 /* this to prevent deletion of the rings through
1524 * our krings, instead of through the hwna ones */
1525 NMR(na, t)[i]->nr_kflags |= NKR_NEEDRING;
1526 }
1527 }
1528
1529 /* now create the actual rings */
1530 error = netmap_mem_rings_create(hwna);
1531 if (error) {
1532 goto err_dec_users;
1533 }
1534
1535 /* cross-link the netmap rings
1536 * The original number of rings comes from hwna,
1537 * rx rings on one side equals tx rings on the other.
1538 */
1539 for_rx_tx(t) {
1540 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
1541 for (i = 0; i < netmap_all_rings(hwna, r); i++) {
1542 NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
1543 NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
1544 }
1545 }
1546
1547 if (na->na_flags & NAF_HOST_RINGS) {
1548 /* the hostna rings are the host rings of the bwrap.
1549 * The corresponding krings must point back to the
1550 * hostna
1551 */
1552 hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
1553 hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
1554 for_rx_tx(t) {
1555 for (i = 0; i < nma_get_nrings(hostna, t); i++) {
1556 NMR(hostna, t)[i]->na = hostna;
1557 }
1558 }
1559 }
1560
1561 return 0;
1562
1563 err_dec_users:
1564 for_rx_tx(t) {
1565 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1566 NMR(hwna, t)[i]->users--;
1567 NMR(na, t)[i]->users--;
1568 }
1569 }
1570 hwna->nm_krings_delete(hwna);
1571 return error;
1572 }
1573
1574
1575 void
1576 netmap_bwrap_krings_delete_common(struct netmap_adapter *na)
1577 {
1578 struct netmap_bwrap_adapter *bna =
1579 (struct netmap_bwrap_adapter *)na;
1580 struct netmap_adapter *hwna = bna->hwna;
1581 enum txrx t;
1582 int i;
1583
1584 nm_prdis("%s", na->name);
1585
1586 /* decrement the usage counter for all the hwna krings */
1587 for_rx_tx(t) {
1588 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1589 NMR(hwna, t)[i]->users--;
1590 NMR(na, t)[i]->users--;
1591 }
1592 }
1593
1594 /* delete any netmap rings that are no longer needed */
1595 netmap_mem_rings_delete(hwna);
1596 hwna->nm_krings_delete(hwna);
1597 }
1598
1599
1600 /* notify method for the bridge-->hwna direction */
1601 int
1602 netmap_bwrap_notify(struct netmap_kring *kring, int flags)
1603 {
1604 struct netmap_adapter *na = kring->na;
1605 struct netmap_bwrap_adapter *bna = na->na_private;
1606 struct netmap_adapter *hwna = bna->hwna;
1607 u_int ring_n = kring->ring_id;
1608 u_int lim = kring->nkr_num_slots - 1;
1609 struct netmap_kring *hw_kring;
1610 int error;
1611
1612 nm_prdis("%s: na %s hwna %s",
1613 (kring ? kring->name : "NULL!"),
1614 (na ? na->name : "NULL!"),
1615 (hwna ? hwna->name : "NULL!"));
1616 hw_kring = hwna->tx_rings[ring_n];
1617
1618 if (nm_kr_tryget(hw_kring, 0, NULL)) {
1619 return ENXIO;
1620 }
1621
1622 /* first step: simulate a user wakeup on the rx ring */
1623 netmap_vp_rxsync(kring, flags);
1624 nm_prdis("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1625 na->name, ring_n,
1626 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1627 kring->rhead, kring->rcur, kring->rtail,
1628 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
1629 /* second step: the new packets are sent on the tx ring
1630 * (which is actually the same ring)
1631 */
1632 hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
1633 error = hw_kring->nm_sync(hw_kring, flags);
1634 if (error)
1635 goto put_out;
1636
1637 /* third step: now we are back the rx ring */
1638 /* claim ownership on all hw owned bufs */
1639 kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
1640
1641 /* fourth step: the user goes to sleep again, causing another rxsync */
1642 netmap_vp_rxsync(kring, flags);
1643 nm_prdis("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1644 na->name, ring_n,
1645 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1646 kring->rhead, kring->rcur, kring->rtail,
1647 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
1648 put_out:
1649 nm_kr_put(hw_kring);
1650
1651 return error ? error : NM_IRQ_COMPLETED;
1652 }
1653
1654
1655 /* nm_bdg_ctl callback for the bwrap.
1656 * Called on bridge-attach and detach, as an effect of valectl -[ahd].
1657 * On attach, it needs to provide a fake netmap_priv_d structure and
1658 * perform a netmap_do_regif() on the bwrap. This will put both the
1659 * bwrap and the hwna in netmap mode, with the netmap rings shared
1660 * and cross linked. Moroever, it will start intercepting interrupts
1661 * directed to hwna.
1662 */
1663 static int
1664 netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
1665 {
1666 struct netmap_priv_d *npriv;
1667 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1668 int error = 0;
1669
1670 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
1671 struct nmreq_vale_attach *req =
1672 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
1673 if (req->reg.nr_ringid != 0 ||
1674 (req->reg.nr_mode != NR_REG_ALL_NIC &&
1675 req->reg.nr_mode != NR_REG_NIC_SW)) {
1676 /* We only support attaching all the NIC rings
1677 * and/or the host stack. */
1678 return EINVAL;
1679 }
1680 if (NETMAP_OWNED_BY_ANY(na)) {
1681 return EBUSY;
1682 }
1683 if (bna->na_kpriv) {
1684 /* nothing to do */
1685 return 0;
1686 }
1687 npriv = netmap_priv_new();
1688 if (npriv == NULL)
1689 return ENOMEM;
1690 npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
1691 error = netmap_do_regif(npriv, na, hdr);
1692 if (error) {
1693 netmap_priv_delete(npriv);
1694 netmap_mem_restore(bna->hwna);
1695 return error;
1696 }
1697 bna->na_kpriv = npriv;
1698 na->na_flags |= NAF_BUSY;
1699 } else {
1700 if (na->active_fds == 0) /* not registered */
1701 return EINVAL;
1702 netmap_priv_delete(bna->na_kpriv);
1703 bna->na_kpriv = NULL;
1704 na->na_flags &= ~NAF_BUSY;
1705 netmap_mem_restore(bna->hwna);
1706 }
1707
1708 return error;
1709 }
1710
1711 /* attach a bridge wrapper to the 'real' device */
1712 int
1713 netmap_bwrap_attach_common(struct netmap_adapter *na,
1714 struct netmap_adapter *hwna)
1715 {
1716 struct netmap_bwrap_adapter *bna;
1717 struct netmap_adapter *hostna = NULL;
1718 int error = 0;
1719 enum txrx t;
1720
1721 /* make sure the NIC is not already in use */
1722 if (NETMAP_OWNED_BY_ANY(hwna)) {
1723 nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name);
1724 return EBUSY;
1725 }
1726
1727 bna = (struct netmap_bwrap_adapter *)na;
1728 /* make bwrap ifp point to the real ifp */
1729 na->ifp = hwna->ifp;
1730 if_ref(na->ifp);
1731 na->na_private = bna;
1732 /* fill the ring data for the bwrap adapter with rx/tx meanings
1733 * swapped. The real cross-linking will be done during register,
1734 * when all the krings will have been created.
1735 */
1736 for_rx_tx(t) {
1737 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
1738 nma_set_nrings(na, t, nma_get_nrings(hwna, r));
1739 nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
1740 }
1741 na->nm_dtor = netmap_bwrap_dtor;
1742 na->nm_config = netmap_bwrap_config;
1743 na->nm_bufcfg = netmap_bwrap_bufcfg;
1744 na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
1745 na->pdev = hwna->pdev;
1746 na->nm_mem = netmap_mem_get(hwna->nm_mem);
1747 na->virt_hdr_len = hwna->virt_hdr_len;
1748 na->rx_buf_maxsize = hwna->rx_buf_maxsize;
1749
1750 bna->hwna = hwna;
1751 netmap_adapter_get(hwna);
1752 hwna->na_private = bna; /* weak reference */
1753 bna->saved_na_vp = hwna->na_vp;
1754 hwna->na_vp = &bna->up;
1755 bna->up.up.na_vp = &(bna->up);
1756
1757 if (hwna->na_flags & NAF_HOST_RINGS) {
1758 if (hwna->na_flags & NAF_SW_ONLY)
1759 na->na_flags |= NAF_SW_ONLY;
1760 na->na_flags |= NAF_HOST_RINGS;
1761 hostna = &bna->host.up;
1762
1763 snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name);
1764 hostna->ifp = hwna->ifp;
1765 // hostna->nm_txsync = netmap_bwrap_host_txsync;
1766 // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1767 hostna->nm_mem = netmap_mem_get(na->nm_mem);
1768 hostna->na_private = bna;
1769 hostna->na_vp = &bna->up;
1770 na->na_hostvp = hwna->na_hostvp =
1771 hostna->na_hostvp = &bna->host;
1772 hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
1773 hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
1774 /* bwrap_config() will determine the number of host rings */
1775 }
1776 if (hwna->na_flags & NAF_MOREFRAG)
1777 na->na_flags |= NAF_MOREFRAG;
1778
1779 nm_prdis("%s<->%s txr %d txd %d rxr %d rxd %d",
1780 na->name, ifp->if_xname,
1781 na->num_tx_rings, na->num_tx_desc,
1782 na->num_rx_rings, na->num_rx_desc);
1783
1784 error = netmap_attach_common(na);
1785 if (error) {
1786 goto err_put;
1787 }
1788 hwna->na_flags |= NAF_BUSY;
1789 return 0;
1790
1791 err_put:
1792 hwna->na_vp = hwna->na_hostvp = NULL;
1793 netmap_adapter_put(hwna);
1794 return error;
1795
1796 }
1797
1798 struct nm_bridge *
1799 netmap_init_bridges2(u_int n)
1800 {
1801 int i;
1802 struct nm_bridge *b;
1803
1804 b = nm_os_malloc(sizeof(struct nm_bridge) * n);
1805 if (b == NULL)
1806 return NULL;
1807 for (i = 0; i < n; i++)
1808 BDG_RWINIT(&b[i]);
1809 return b;
1810 }
1811
1812 void
1813 netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
1814 {
1815 int i;
1816
1817 if (b == NULL)
1818 return;
1819
1820 for (i = 0; i < n; i++)
1821 BDG_RWDESTROY(&b[i]);
1822 nm_os_free(b);
1823 }
1824
1825 int
1826 netmap_init_bridges(void)
1827 {
1828 #ifdef CONFIG_NET_NS
1829 return netmap_bns_register();
1830 #else
1831 nm_bridges = netmap_init_bridges2(vale_max_bridges);
1832 if (nm_bridges == NULL)
1833 return ENOMEM;
1834 return 0;
1835 #endif
1836 }
1837
1838 void
1839 netmap_uninit_bridges(void)
1840 {
1841 #ifdef CONFIG_NET_NS
1842 netmap_bns_unregister();
1843 #else
1844 netmap_uninit_bridges2(nm_bridges, vale_max_bridges);
1845 #endif
1846 }
Cache object: 1d72653116fc082f828f00f27fe9b9be
|