1 /*
2 * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26 /*
27 * $FreeBSD: releng/8.4/sys/dev/netmap/ixgbe_netmap.h 231717 2012-02-14 22:49:34Z luigi $
28 * $Id: ixgbe_netmap.h 9802 2011-12-02 18:42:37Z luigi $
29 *
30 * netmap modifications for ixgbe
31 *
32 * This file is meant to be a reference on how to implement
33 * netmap support for a network driver.
34 * This file contains code but only static or inline functions
35 * that are used by a single driver. To avoid replication of
36 * code we just #include it near the beginning of the
37 * standard driver.
38 */
39
40 #include <net/netmap.h>
41 #include <sys/selinfo.h>
42 /*
43 * Some drivers may need the following headers. Others
44 * already include them by default
45
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48
49 */
50
51 #include <dev/netmap/netmap_kern.h>
52
53 /*
54 * prototypes for the new API calls that are used by the
55 * *_netmap_attach() routine.
56 */
57 static int ixgbe_netmap_reg(struct ifnet *, int onoff);
58 static int ixgbe_netmap_txsync(struct ifnet *, u_int, int);
59 static int ixgbe_netmap_rxsync(struct ifnet *, u_int, int);
60 static void ixgbe_netmap_lock_wrapper(struct ifnet *, int, u_int);
61
62
63 /*
64 * The attach routine, called near the end of ixgbe_attach(),
65 * fills the parameters for netmap_attach() and calls it.
66 * It cannot fail, in the worst case (such as no memory)
67 * netmap mode will be disabled and the driver will only
68 * operate in standard mode.
69 */
70 static void
71 ixgbe_netmap_attach(struct adapter *adapter)
72 {
73 struct netmap_adapter na;
74
75 bzero(&na, sizeof(na));
76
77 na.ifp = adapter->ifp;
78 na.separate_locks = 1; /* this card has separate rx/tx locks */
79 na.num_tx_desc = adapter->num_tx_desc;
80 na.num_rx_desc = adapter->num_rx_desc;
81 na.nm_txsync = ixgbe_netmap_txsync;
82 na.nm_rxsync = ixgbe_netmap_rxsync;
83 na.nm_lock = ixgbe_netmap_lock_wrapper;
84 na.nm_register = ixgbe_netmap_reg;
85 netmap_attach(&na, adapter->num_queues);
86 }
87
88
89 /*
90 * wrapper to export locks to the generic netmap code.
91 */
92 static void
93 ixgbe_netmap_lock_wrapper(struct ifnet *_a, int what, u_int queueid)
94 {
95 struct adapter *adapter = _a->if_softc;
96
97 ASSERT(queueid < adapter->num_queues);
98 switch (what) {
99 case NETMAP_CORE_LOCK:
100 IXGBE_CORE_LOCK(adapter);
101 break;
102 case NETMAP_CORE_UNLOCK:
103 IXGBE_CORE_UNLOCK(adapter);
104 break;
105 case NETMAP_TX_LOCK:
106 IXGBE_TX_LOCK(&adapter->tx_rings[queueid]);
107 break;
108 case NETMAP_TX_UNLOCK:
109 IXGBE_TX_UNLOCK(&adapter->tx_rings[queueid]);
110 break;
111 case NETMAP_RX_LOCK:
112 IXGBE_RX_LOCK(&adapter->rx_rings[queueid]);
113 break;
114 case NETMAP_RX_UNLOCK:
115 IXGBE_RX_UNLOCK(&adapter->rx_rings[queueid]);
116 break;
117 }
118 }
119
120
121 /*
122 * Netmap register/unregister. We are already under core lock.
123 * Only called on the first register or the last unregister.
124 */
125 static int
126 ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
127 {
128 struct adapter *adapter = ifp->if_softc;
129 struct netmap_adapter *na = NA(ifp);
130 int error = 0;
131
132 if (!na) /* probably, netmap_attach() failed */
133 return EINVAL;
134
135 ixgbe_disable_intr(adapter);
136
137 /* Tell the stack that the interface is no longer active */
138 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
139
140 if (onoff) { /* enable netmap mode */
141 ifp->if_capenable |= IFCAP_NETMAP;
142
143 /* save if_transmit and replace with our routine */
144 na->if_transmit = ifp->if_transmit;
145 ifp->if_transmit = netmap_start;
146
147 /*
148 * reinitialize the adapter, now with netmap flag set,
149 * so the rings will be set accordingly.
150 */
151 ixgbe_init_locked(adapter);
152 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
153 error = ENOMEM;
154 goto fail;
155 }
156 } else { /* reset normal mode (explicit request or netmap failed) */
157 fail:
158 /* restore if_transmit */
159 ifp->if_transmit = na->if_transmit;
160 ifp->if_capenable &= ~IFCAP_NETMAP;
161 /* initialize the card, this time in standard mode */
162 ixgbe_init_locked(adapter); /* also enables intr */
163 }
164 return (error);
165 }
166
167
168 /*
169 * Reconcile kernel and user view of the transmit ring.
170 * This routine might be called frequently so it must be efficient.
171 *
172 * Userspace has filled tx slots up to ring->cur (excluded).
173 * The last unused slot previously known to the kernel was kring->nkr_hwcur,
174 * and the last interrupt reported kring->nr_hwavail slots available.
175 *
176 * This function runs under lock (acquired from the caller or internally).
177 * It must first update ring->avail to what the kernel knows,
178 * subtract the newly used slots (ring->cur - kring->nkr_hwcur)
179 * from both avail and nr_hwavail, and set ring->nkr_hwcur = ring->cur
180 * issuing a dmamap_sync on all slots.
181 *
182 * Since ring comes from userspace, its content must be read only once,
183 * and validated before being used to update the kernel's structures.
184 * (this is also true for every use of ring in the kernel).
185 *
186 * ring->avail is never used, only checked for bogus values.
187 *
188 * do_lock is set iff the function is called from the ioctl handler.
189 * In this case, grab a lock around the body, and also reclaim transmitted
190 * buffers irrespective of interrupt mitigation.
191 */
192 static int
193 ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
194 {
195 struct adapter *adapter = ifp->if_softc;
196 struct tx_ring *txr = &adapter->tx_rings[ring_nr];
197 struct netmap_adapter *na = NA(adapter->ifp);
198 struct netmap_kring *kring = &na->tx_rings[ring_nr];
199 struct netmap_ring *ring = kring->ring;
200 int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
201
202 /*
203 * ixgbe can generate an interrupt on every tx packet, but it
204 * seems very expensive, so we interrupt once every half ring,
205 * or when requested with NS_REPORT
206 */
207 int report_frequency = kring->nkr_num_slots >> 1;
208
209 if (do_lock)
210 IXGBE_TX_LOCK(txr);
211 /* take a copy of ring->cur now, and never read it again */
212 k = ring->cur;
213 l = k - kring->nr_hwcur;
214 if (l < 0)
215 l += lim + 1;
216 /* if cur is invalid reinitialize the ring. */
217 if (k > lim || l > kring->nr_hwavail) {
218 if (do_lock)
219 IXGBE_TX_UNLOCK(txr);
220 return netmap_ring_reinit(kring);
221 }
222
223 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
224 BUS_DMASYNC_POSTREAD);
225
226 /*
227 * Process new packets to send. j is the current index in the
228 * netmap ring, l is the corresponding index in the NIC ring.
229 * The two numbers differ because upon a *_init() we reset
230 * the NIC ring but leave the netmap ring unchanged.
231 * For the transmit ring, we have
232 *
233 * j = kring->nr_hwcur
234 * l = IXGBE_TDT (not tracked in the driver)
235 * and
236 * j == (l + kring->nkr_hwofs) % ring_size
237 *
238 * In this driver kring->nkr_hwofs >= 0, but for other
239 * drivers it might be negative as well.
240 */
241 j = kring->nr_hwcur;
242 if (j != k) { /* we have new packets to send */
243 l = j - kring->nkr_hwofs;
244 if (l < 0) /* wraparound */
245 l += lim + 1;
246
247 while (j != k) {
248 /*
249 * Collect per-slot info.
250 * Note that txbuf and curr are indexed by l.
251 *
252 * In this driver we collect the buffer address
253 * (using the PNMB() macro) because we always
254 * need to rewrite it into the NIC ring.
255 * Many other drivers preserve the address, so
256 * we only need to access it if NS_BUF_CHANGED
257 * is set.
258 */
259 struct netmap_slot *slot = &ring->slot[j];
260 struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[l];
261 union ixgbe_adv_tx_desc *curr = &txr->tx_base[l];
262 uint64_t paddr;
263 void *addr = PNMB(slot, &paddr);
264 // XXX type for flags and len ?
265 int flags = ((slot->flags & NS_REPORT) ||
266 j == 0 || j == report_frequency) ?
267 IXGBE_TXD_CMD_RS : 0;
268 int len = slot->len;
269
270 /*
271 * Quick check for valid addr and len.
272 * NMB() returns netmap_buffer_base for invalid
273 * buffer indexes (but the address is still a
274 * valid one to be used in a ring). slot->len is
275 * unsigned so no need to check for negative values.
276 */
277 if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
278 ring_reset:
279 if (do_lock)
280 IXGBE_TX_UNLOCK(txr);
281 return netmap_ring_reinit(kring);
282 }
283
284 slot->flags &= ~NS_REPORT;
285 /*
286 * Fill the slot in the NIC ring.
287 * In this driver we need to rewrite the buffer
288 * address in the NIC ring. Other drivers do not
289 * need this.
290 */
291 curr->read.buffer_addr = htole64(paddr);
292 curr->read.olinfo_status = htole32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
293 curr->read.cmd_type_len =
294 htole32(txr->txd_cmd | len |
295 (IXGBE_ADVTXD_DTYP_DATA |
296 IXGBE_ADVTXD_DCMD_DEXT |
297 IXGBE_ADVTXD_DCMD_IFCS |
298 IXGBE_TXD_CMD_EOP | flags) );
299 /* If the buffer has changed, unload and reload map
300 * (and possibly the physical address in the NIC
301 * slot, but we did it already).
302 */
303 if (slot->flags & NS_BUF_CHANGED) {
304 /* buffer has changed, unload and reload map */
305 netmap_reload_map(txr->txtag, txbuf->map, addr);
306 slot->flags &= ~NS_BUF_CHANGED;
307 }
308
309 /* make sure changes to the buffer are synced */
310 bus_dmamap_sync(txr->txtag, txbuf->map,
311 BUS_DMASYNC_PREWRITE);
312 j = (j == lim) ? 0 : j + 1;
313 l = (l == lim) ? 0 : l + 1;
314 n++;
315 }
316 kring->nr_hwcur = k; /* the saved ring->cur */
317
318 /* decrease avail by number of sent packets */
319 kring->nr_hwavail -= n;
320
321 /* synchronize the NIC ring */
322 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
323 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
324 /* (re)start the transmitter up to slot l (excluded) */
325 IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), l);
326 }
327
328 /*
329 * Reclaim buffers for completed transmissions.
330 * Because this is expensive (we read a NIC register etc.)
331 * we only do it in specific cases (see below).
332 * In all cases kring->nr_kflags indicates which slot will be
333 * checked upon a tx interrupt (nkr_num_slots means none).
334 */
335 if (do_lock) {
336 j = 1; /* forced reclaim, ignore interrupts */
337 kring->nr_kflags = kring->nkr_num_slots;
338 } else if (kring->nr_hwavail > 0) {
339 j = 0; /* buffers still available: no reclaim, ignore intr. */
340 kring->nr_kflags = kring->nkr_num_slots;
341 } else {
342 /*
343 * no buffers available, locate a slot for which we request
344 * ReportStatus (approximately half ring after next_to_clean)
345 * and record it in kring->nr_kflags.
346 * If the slot has DD set, do the reclaim looking at TDH,
347 * otherwise we go to sleep (in netmap_poll()) and will be
348 * woken up when slot nr_kflags will be ready.
349 */
350 struct ixgbe_legacy_tx_desc *txd =
351 (struct ixgbe_legacy_tx_desc *)txr->tx_base;
352
353 j = txr->next_to_clean + kring->nkr_num_slots/2;
354 if (j >= kring->nkr_num_slots)
355 j -= kring->nkr_num_slots;
356 // round to the closest with dd set
357 j= (j < kring->nkr_num_slots / 4 || j >= kring->nkr_num_slots*3/4) ?
358 0 : report_frequency;
359 kring->nr_kflags = j; /* the slot to check */
360 j = txd[j].upper.fields.status & IXGBE_TXD_STAT_DD;
361 }
362 if (j) {
363 int delta;
364
365 /*
366 * Record completed transmissions.
367 * We (re)use the driver's txr->next_to_clean to keep
368 * track of the most recently completed transmission.
369 *
370 * The datasheet discourages the use of TDH to find out the
371 * number of sent packets. We should rather check the DD
372 * status bit in a packet descriptor. However, we only set
373 * the "report status" bit for some descriptors (a kind of
374 * interrupt mitigation), so we can only check on those.
375 * For the time being we use TDH, as we do it infrequently
376 * enough not to pose performance problems.
377 */
378 l = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
379 if (l >= kring->nkr_num_slots) { /* XXX can happen */
380 D("TDH wrap %d", l);
381 l -= kring->nkr_num_slots;
382 }
383 delta = l - txr->next_to_clean;
384 if (delta) {
385 /* some tx completed, increment avail */
386 if (delta < 0)
387 delta += kring->nkr_num_slots;
388 txr->next_to_clean = l;
389 kring->nr_hwavail += delta;
390 if (kring->nr_hwavail > lim)
391 goto ring_reset;
392 }
393 }
394 /* update avail to what the kernel knows */
395 ring->avail = kring->nr_hwavail;
396
397 if (do_lock)
398 IXGBE_TX_UNLOCK(txr);
399 return 0;
400
401 }
402
403
404 /*
405 * Reconcile kernel and user view of the receive ring.
406 * Same as for the txsync, this routine must be efficient and
407 * avoid races in accessing the shared regions.
408 *
409 * When called, userspace has read data from slots kring->nr_hwcur
410 * up to ring->cur (excluded).
411 *
412 * The last interrupt reported kring->nr_hwavail slots available
413 * after kring->nr_hwcur.
414 * We must subtract the newly consumed slots (cur - nr_hwcur)
415 * from nr_hwavail, make the descriptors available for the next reads,
416 * and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail.
417 *
418 * do_lock has a special meaning: please refer to txsync.
419 */
420 static int
421 ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
422 {
423 struct adapter *adapter = ifp->if_softc;
424 struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
425 struct netmap_adapter *na = NA(adapter->ifp);
426 struct netmap_kring *kring = &na->rx_rings[ring_nr];
427 struct netmap_ring *ring = kring->ring;
428 int j, k, l, n, lim = kring->nkr_num_slots - 1;
429 int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
430
431 k = ring->cur; /* cache and check value, same as in txsync */
432 n = k - kring->nr_hwcur;
433 if (n < 0)
434 n += lim + 1;
435 if (k > lim || n > kring->nr_hwavail) /* userspace is cheating */
436 return netmap_ring_reinit(kring);
437
438 if (do_lock)
439 IXGBE_RX_LOCK(rxr);
440 if (n < 0)
441 n += lim + 1;
442 /* XXX check sync modes */
443 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
444 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
445
446 /*
447 * First part, import newly received packets into the netmap ring.
448 *
449 * j is the index of the next free slot in the netmap ring,
450 * and l is the index of the next received packet in the NIC ring,
451 * and they may differ in case if_init() has been called while
452 * in netmap mode. For the receive ring we have
453 *
454 * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
455 * l = rxr->next_to_check;
456 * and
457 * j == (l + kring->nkr_hwofs) % ring_size
458 *
459 * rxr->next_to_check is set to 0 on a ring reinit
460 */
461 l = rxr->next_to_check;
462 j = rxr->next_to_check + kring->nkr_hwofs;
463 if (j > lim)
464 j -= lim + 1;
465
466 if (netmap_no_pendintr || force_update) {
467 for (n = 0; ; n++) {
468 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
469 uint32_t staterr = le32toh(curr->wb.upper.status_error);
470
471 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
472 break;
473 ring->slot[j].len = le16toh(curr->wb.upper.length);
474 bus_dmamap_sync(rxr->ptag,
475 rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
476 j = (j == lim) ? 0 : j + 1;
477 l = (l == lim) ? 0 : l + 1;
478 }
479 if (n) { /* update the state variables */
480 rxr->next_to_check = l;
481 kring->nr_hwavail += n;
482 }
483 kring->nr_kflags &= ~NKR_PENDINTR;
484 }
485
486 /*
487 * Skip past packets that userspace has already processed
488 * (from kring->nr_hwcur to ring->cur excluded), and make
489 * the buffers available for reception.
490 * As usual j is the index in the netmap ring, l is the index
491 * in the NIC ring, and j == (l + kring->nkr_hwofs) % ring_size
492 */
493 j = kring->nr_hwcur;
494 if (j != k) { /* userspace has read some packets. */
495 n = 0;
496 l = kring->nr_hwcur - kring->nkr_hwofs;
497 if (l < 0)
498 l += lim + 1;
499 while (j != k) {
500 /* collect per-slot info, with similar validations
501 * and flag handling as in the txsync code.
502 *
503 * NOTE curr and rxbuf are indexed by l.
504 * Also, this driver needs to update the physical
505 * address in the NIC ring, but other drivers
506 * may not have this requirement.
507 */
508 struct netmap_slot *slot = &ring->slot[j];
509 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
510 struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[l];
511 uint64_t paddr;
512 void *addr = PNMB(slot, &paddr);
513
514 if (addr == netmap_buffer_base) /* bad buf */
515 goto ring_reset;
516
517 curr->wb.upper.status_error = 0;
518 curr->read.pkt_addr = htole64(paddr);
519 if (slot->flags & NS_BUF_CHANGED) {
520 netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
521 slot->flags &= ~NS_BUF_CHANGED;
522 }
523
524 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
525 BUS_DMASYNC_PREREAD);
526
527 j = (j == lim) ? 0 : j + 1;
528 l = (l == lim) ? 0 : l + 1;
529 n++;
530 }
531 kring->nr_hwavail -= n;
532 kring->nr_hwcur = k;
533 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
534 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
535 /* IMPORTANT: we must leave one free slot in the ring,
536 * so move l back by one unit
537 */
538 l = (l == 0) ? lim : l - 1;
539 IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), l);
540 }
541 /* tell userspace that there are new packets */
542 ring->avail = kring->nr_hwavail ;
543
544 if (do_lock)
545 IXGBE_RX_UNLOCK(rxr);
546 return 0;
547
548 ring_reset:
549 if (do_lock)
550 IXGBE_RX_UNLOCK(rxr);
551 return netmap_ring_reinit(kring);
552 }
553 /* end of file */
Cache object: 7e26c9d6a3312a7d11604cb10231ca9d
|