The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/sfxge/sfxge_rx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010-2011 Solarflare Communications, Inc.
    3  * All rights reserved.
    4  *
    5  * This software was developed in part by Philip Paeps under contract for
    6  * Solarflare Communications, Inc.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/9.2/sys/dev/sfxge/sfxge_rx.c 227569 2011-11-16 17:11:13Z philip $");
   32 
   33 #include <sys/types.h>
   34 #include <sys/mbuf.h>
   35 #include <sys/smp.h>
   36 #include <sys/socket.h>
   37 #include <sys/sysctl.h>
   38 #include <sys/limits.h>
   39 
   40 #include <net/ethernet.h>
   41 #include <net/if.h>
   42 #include <net/if_vlan_var.h>
   43 
   44 #include <netinet/in.h>
   45 #include <netinet/ip.h>
   46 #include <netinet/ip6.h>
   47 #include <netinet/tcp.h>
   48 
   49 #include <machine/in_cksum.h>
   50 
   51 #include "common/efx.h"
   52 
   53 
   54 #include "sfxge.h"
   55 #include "sfxge_rx.h"
   56 
   57 #define RX_REFILL_THRESHOLD (EFX_RXQ_LIMIT(SFXGE_NDESCS) * 9 / 10)
   58 #define RX_REFILL_THRESHOLD_2 (RX_REFILL_THRESHOLD / 2)
   59 
   60 /* Size of the LRO hash table.  Must be a power of 2.  A larger table
   61  * means we can accelerate a larger number of streams.
   62  */
   63 static unsigned lro_table_size = 128;
   64 
   65 /* Maximum length of a hash chain.  If chains get too long then the lookup
   66  * time increases and may exceed the benefit of LRO.
   67  */
   68 static unsigned lro_chain_max = 20;
   69 
   70 /* Maximum time (in ticks) that a connection can be idle before it's LRO
   71  * state is discarded.
   72  */
   73 static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */
   74 
   75 /* Number of packets with payload that must arrive in-order before a
   76  * connection is eligible for LRO.  The idea is we should avoid coalescing
   77  * segments when the sender is in slow-start because reducing the ACK rate
   78  * can damage performance.
   79  */
   80 static int lro_slow_start_packets = 2000;
   81 
   82 /* Number of packets with payload that must arrive in-order following loss
   83  * before a connection is eligible for LRO.  The idea is we should avoid
   84  * coalescing segments when the sender is recovering from loss, because
   85  * reducing the ACK rate can damage performance.
   86  */
   87 static int lro_loss_packets = 20;
   88 
   89 /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */
   90 #define SFXGE_LRO_L2_ID_VLAN 0x4000
   91 #define SFXGE_LRO_L2_ID_IPV6 0x8000
   92 #define SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN)
   93 #define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
   94 
   95 /* Compare IPv6 addresses, avoiding conditional branches */
   96 static __inline unsigned long ipv6_addr_cmp(const struct in6_addr *left,
   97                                             const struct in6_addr *right)
   98 {
   99 #if LONG_BIT == 64
  100         const uint64_t *left64 = (const uint64_t *)left;
  101         const uint64_t *right64 = (const uint64_t *)right;
  102         return (left64[0] - right64[0]) | (left64[1] - right64[1]);
  103 #else
  104         return (left->s6_addr32[0] - right->s6_addr32[0]) |
  105                (left->s6_addr32[1] - right->s6_addr32[1]) |
  106                (left->s6_addr32[2] - right->s6_addr32[2]) |
  107                (left->s6_addr32[3] - right->s6_addr32[3]);
  108 #endif
  109 }
  110 
  111 void
  112 sfxge_rx_qflush_done(struct sfxge_rxq *rxq)
  113 {
  114 
  115         rxq->flush_state = SFXGE_FLUSH_DONE;
  116 }
  117 
  118 void
  119 sfxge_rx_qflush_failed(struct sfxge_rxq *rxq)
  120 {
  121 
  122         rxq->flush_state = SFXGE_FLUSH_FAILED;
  123 }
  124 
  125 static uint8_t toep_key[] = {
  126         0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
  127         0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
  128         0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
  129         0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
  130         0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
  131 };
  132 
  133 static void
  134 sfxge_rx_post_refill(void *arg)
  135 {
  136         struct sfxge_rxq *rxq = arg;
  137         struct sfxge_softc *sc;
  138         unsigned int index;
  139         struct sfxge_evq *evq;
  140         uint16_t magic;
  141 
  142         sc = rxq->sc;
  143         index = rxq->index;
  144         evq = sc->evq[index];
  145 
  146         magic = SFXGE_MAGIC_RX_QREFILL | index;
  147 
  148         /* This is guaranteed due to the start/stop order of rx and ev */
  149         KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
  150             ("evq not started"));
  151         KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
  152             ("rxq not started"));
  153         efx_ev_qpost(evq->common, magic);
  154 }
  155 
  156 static void
  157 sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
  158 {
  159         /* Initially retry after 100 ms, but back off in case of
  160          * repeated failures as we probably have to wait for the
  161          * administrator to raise the pool limit. */
  162         if (retrying)
  163                 rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz);
  164         else
  165                 rxq->refill_delay = hz / 10;
  166 
  167         callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay,
  168                              sfxge_rx_post_refill, rxq);
  169 }
  170 
  171 static inline struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
  172 {
  173         struct mb_args args;
  174         struct mbuf *m;
  175 
  176         /* Allocate mbuf structure */
  177         args.flags = M_PKTHDR;
  178         args.type = MT_DATA;
  179         m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_DONTWAIT);
  180 
  181         /* Allocate (and attach) packet buffer */
  182         if (m && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_DONTWAIT)) {
  183                 uma_zfree(zone_mbuf, m);
  184                 m = NULL;
  185         }
  186 
  187         return m;
  188 }
  189 
  190 #define SFXGE_REFILL_BATCH  64
  191 
  192 static void
  193 sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying)
  194 {
  195         struct sfxge_softc *sc;
  196         unsigned int index;
  197         struct sfxge_evq *evq;
  198         unsigned int batch;
  199         unsigned int rxfill;
  200         unsigned int mblksize;
  201         int ntodo;
  202         efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
  203 
  204         sc = rxq->sc;
  205         index = rxq->index;
  206         evq = sc->evq[index];
  207 
  208         prefetch_read_many(sc->enp);
  209         prefetch_read_many(rxq->common);
  210 
  211         mtx_assert(&evq->lock, MA_OWNED);
  212 
  213         if (rxq->init_state != SFXGE_RXQ_STARTED)
  214                 return;
  215 
  216         rxfill = rxq->added - rxq->completed;
  217         KASSERT(rxfill <= EFX_RXQ_LIMIT(SFXGE_NDESCS),
  218             ("rxfill > EFX_RXQ_LIMIT(SFXGE_NDESCS)"));
  219         ntodo = min(EFX_RXQ_LIMIT(SFXGE_NDESCS) - rxfill, target);
  220         KASSERT(ntodo <= EFX_RXQ_LIMIT(SFXGE_NDESCS),
  221             ("ntodo > EFX_RQX_LIMIT(SFXGE_NDESCS)"));
  222 
  223         if (ntodo == 0)
  224                 return;
  225 
  226         batch = 0;
  227         mblksize = sc->rx_buffer_size;
  228         while (ntodo-- > 0) {
  229                 unsigned int id;
  230                 struct sfxge_rx_sw_desc *rx_desc;
  231                 bus_dma_segment_t seg;
  232                 struct mbuf *m;
  233 
  234                 id = (rxq->added + batch) & (SFXGE_NDESCS - 1);
  235                 rx_desc = &rxq->queue[id];
  236                 KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL"));
  237 
  238                 rx_desc->flags = EFX_DISCARD;
  239                 m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc);
  240                 if (m == NULL)
  241                         break;
  242                 sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg);
  243                 addr[batch++] = seg.ds_addr;
  244 
  245                 if (batch == SFXGE_REFILL_BATCH) {
  246                         efx_rx_qpost(rxq->common, addr, mblksize, batch,
  247                             rxq->completed, rxq->added);
  248                         rxq->added += batch;
  249                         batch = 0;
  250                 }
  251         }
  252 
  253         if (ntodo != 0)
  254                 sfxge_rx_schedule_refill(rxq, retrying);
  255 
  256         if (batch != 0) {
  257                 efx_rx_qpost(rxq->common, addr, mblksize, batch,
  258                     rxq->completed, rxq->added);
  259                 rxq->added += batch;
  260         }
  261 
  262         /* Make the descriptors visible to the hardware */
  263         bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map,
  264                         BUS_DMASYNC_PREWRITE);
  265 
  266         efx_rx_qpush(rxq->common, rxq->added);
  267 }
  268 
  269 void
  270 sfxge_rx_qrefill(struct sfxge_rxq *rxq)
  271 {
  272 
  273         if (rxq->init_state != SFXGE_RXQ_STARTED)
  274                 return;
  275 
  276         /* Make sure the queue is full */
  277         sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(SFXGE_NDESCS), B_TRUE);
  278 }
  279 
  280 static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m)
  281 {
  282         struct ifnet *ifp = sc->ifnet;
  283 
  284         m->m_pkthdr.rcvif = ifp;
  285         m->m_pkthdr.header = m->m_data;
  286         m->m_pkthdr.csum_data = 0xffff;
  287         ifp->if_input(ifp, m);
  288 }
  289 
  290 static void
  291 sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc)
  292 {
  293         struct mbuf *m = rx_desc->mbuf;
  294         int csum_flags;
  295 
  296         /* Convert checksum flags */
  297         csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ?
  298                 (CSUM_IP_CHECKED | CSUM_IP_VALID) : 0;
  299         if (rx_desc->flags & EFX_CKSUM_TCPUDP)
  300                 csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  301 
  302 #ifdef SFXGE_HAVE_MQ
  303         /* The hash covers a 4-tuple for TCP only */
  304         if (rx_desc->flags & EFX_PKT_TCP) {
  305                 m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
  306                                                        mtod(m, uint8_t *));
  307                 m->m_flags |= M_FLOWID;
  308         }
  309 #endif
  310         m->m_data += sc->rx_prefix_size;
  311         m->m_len = rx_desc->size - sc->rx_prefix_size;
  312         m->m_pkthdr.len = m->m_len;
  313         m->m_pkthdr.csum_flags = csum_flags;
  314         __sfxge_rx_deliver(sc, rx_desc->mbuf);
  315 
  316         rx_desc->flags = EFX_DISCARD;
  317         rx_desc->mbuf = NULL;
  318 }
  319 
  320 static void
  321 sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c)
  322 {
  323         struct sfxge_softc *sc = st->sc;
  324         struct mbuf *m = c->mbuf;
  325         struct tcphdr *c_th;
  326         int csum_flags;
  327 
  328         KASSERT(m, ("no mbuf to deliver"));
  329 
  330         ++st->n_bursts;
  331 
  332         /* Finish off packet munging and recalculate IP header checksum. */
  333         if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
  334                 struct ip *iph = c->nh;
  335                 iph->ip_len = htons(iph->ip_len);
  336                 iph->ip_sum = 0;
  337                 iph->ip_sum = in_cksum_hdr(iph);
  338                 c_th = (struct tcphdr *)(iph + 1);
  339                 csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
  340                               CSUM_IP_CHECKED | CSUM_IP_VALID);
  341         } else {
  342                 struct ip6_hdr *iph = c->nh;
  343                 iph->ip6_plen = htons(iph->ip6_plen);
  344                 c_th = (struct tcphdr *)(iph + 1);
  345                 csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  346         }
  347 
  348         c_th->th_win = c->th_last->th_win;
  349         c_th->th_ack = c->th_last->th_ack;
  350         if (c_th->th_off == c->th_last->th_off) {
  351                 /* Copy TCP options (take care to avoid going negative). */
  352                 int optlen = ((c_th->th_off - 5) & 0xf) << 2u;
  353                 memcpy(c_th + 1, c->th_last + 1, optlen);
  354         }
  355 
  356 #ifdef SFXGE_HAVE_MQ
  357         m->m_pkthdr.flowid = c->conn_hash;
  358         m->m_flags |= M_FLOWID;
  359 #endif
  360         m->m_pkthdr.csum_flags = csum_flags;
  361         __sfxge_rx_deliver(sc, m);
  362 
  363         c->mbuf = NULL;
  364         c->delivered = 1;
  365 }
  366 
  367 /* Drop the given connection, and add it to the free list. */
  368 static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
  369 {
  370         unsigned bucket;
  371 
  372         KASSERT(!c->mbuf, ("found orphaned mbuf"));
  373 
  374         if (c->next_buf.mbuf) {
  375                 sfxge_rx_deliver(rxq->sc, &c->next_buf);
  376                 LIST_REMOVE(c, active_link);
  377         }
  378 
  379         bucket = c->conn_hash & rxq->lro.conns_mask;
  380         KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong"));
  381         --rxq->lro.conns_n[bucket];
  382         TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
  383         TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link);
  384 }
  385 
  386 /* Stop tracking connections that have gone idle in order to keep hash
  387  * chains short.
  388  */
  389 static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now)
  390 {
  391         struct sfxge_lro_conn *c;
  392         unsigned i;
  393 
  394         KASSERT(LIST_EMPTY(&rxq->lro.active_conns),
  395                 ("found active connections"));
  396 
  397         rxq->lro.last_purge_ticks = now;
  398         for (i = 0; i <= rxq->lro.conns_mask; ++i) {
  399                 if (TAILQ_EMPTY(&rxq->lro.conns[i]))
  400                         continue;
  401 
  402                 c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq);
  403                 if (now - c->last_pkt_ticks > lro_idle_ticks) {
  404                         ++rxq->lro.n_drop_idle;
  405                         sfxge_lro_drop(rxq, c);
  406                 }
  407         }
  408 }
  409 
  410 static void
  411 sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
  412                 struct mbuf *mbuf, struct tcphdr *th)
  413 {
  414         struct tcphdr *c_th;
  415 
  416         /* Tack the new mbuf onto the chain. */
  417         KASSERT(!mbuf->m_next, ("mbuf already chained"));
  418         c->mbuf_tail->m_next = mbuf;
  419         c->mbuf_tail = mbuf;
  420 
  421         /* Increase length appropriately */
  422         c->mbuf->m_pkthdr.len += mbuf->m_len;
  423 
  424         /* Update the connection state flags */
  425         if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
  426                 struct ip *iph = c->nh;
  427                 iph->ip_len += mbuf->m_len;
  428                 c_th = (struct tcphdr *)(iph + 1);
  429         } else {
  430                 struct ip6_hdr *iph = c->nh;
  431                 iph->ip6_plen += mbuf->m_len;
  432                 c_th = (struct tcphdr *)(iph + 1);
  433         }
  434         c_th->th_flags |= (th->th_flags & TH_PUSH);
  435         c->th_last = th;
  436         ++st->n_merges;
  437 
  438         /* Pass packet up now if another segment could overflow the IP
  439          * length.
  440          */
  441         if (c->mbuf->m_pkthdr.len > 65536 - 9200)
  442                 sfxge_lro_deliver(st, c);
  443 }
  444 
  445 static void
  446 sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
  447                 struct mbuf *mbuf, void *nh, struct tcphdr *th)
  448 {
  449         /* Start the chain */
  450         c->mbuf = mbuf;
  451         c->mbuf_tail = c->mbuf;
  452         c->nh = nh;
  453         c->th_last = th;
  454 
  455         mbuf->m_pkthdr.len = mbuf->m_len;
  456 
  457         /* Mangle header fields for later processing */
  458         if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
  459                 struct ip *iph = nh;
  460                 iph->ip_len = ntohs(iph->ip_len);
  461         } else {
  462                 struct ip6_hdr *iph = nh;
  463                 iph->ip6_plen = ntohs(iph->ip6_plen);
  464         }
  465 }
  466 
  467 /* Try to merge or otherwise hold or deliver (as appropriate) the
  468  * packet buffered for this connection (c->next_buf).  Return a flag
  469  * indicating whether the connection is still active for LRO purposes.
  470  */
  471 static int
  472 sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
  473 {
  474         struct sfxge_rx_sw_desc *rx_buf = &c->next_buf;
  475         char *eh = c->next_eh;
  476         int data_length, hdr_length, dont_merge;
  477         unsigned th_seq, pkt_length;
  478         struct tcphdr *th;
  479         unsigned now;
  480 
  481         if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
  482                 struct ip *iph = c->next_nh;
  483                 th = (struct tcphdr *)(iph + 1);
  484                 pkt_length = ntohs(iph->ip_len) + (char *) iph - eh;
  485         } else {
  486                 struct ip6_hdr *iph = c->next_nh;
  487                 th = (struct tcphdr *)(iph + 1);
  488                 pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh;
  489         }
  490 
  491         hdr_length = (char *) th + th->th_off * 4 - eh;
  492         data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) -
  493                        hdr_length);
  494         th_seq = ntohl(th->th_seq);
  495         dont_merge = ((data_length <= 0)
  496                       | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN)));
  497 
  498         /* Check for options other than aligned timestamp. */
  499         if (th->th_off != 5) {
  500                 const uint32_t *opt_ptr = (const uint32_t *) (th + 1);
  501                 if (th->th_off == 8 &&
  502                     opt_ptr[0] == ntohl((TCPOPT_NOP << 24) |
  503                                         (TCPOPT_NOP << 16) |
  504                                         (TCPOPT_TIMESTAMP << 8) |
  505                                         TCPOLEN_TIMESTAMP)) {
  506                         /* timestamp option -- okay */
  507                 } else {
  508                         dont_merge = 1;
  509                 }
  510         }
  511 
  512         if (__predict_false(th_seq != c->next_seq)) {
  513                 /* Out-of-order, so start counting again. */
  514                 if (c->mbuf)
  515                         sfxge_lro_deliver(&rxq->lro, c);
  516                 c->n_in_order_pkts -= lro_loss_packets;
  517                 c->next_seq = th_seq + data_length;
  518                 ++rxq->lro.n_misorder;
  519                 goto deliver_buf_out;
  520         }
  521         c->next_seq = th_seq + data_length;
  522 
  523         now = ticks;
  524         if (now - c->last_pkt_ticks > lro_idle_ticks) {
  525                 ++rxq->lro.n_drop_idle;
  526                 if (c->mbuf)
  527                         sfxge_lro_deliver(&rxq->lro, c);
  528                 sfxge_lro_drop(rxq, c);
  529                 return 0;
  530         }
  531         c->last_pkt_ticks = ticks;
  532 
  533         if (c->n_in_order_pkts < lro_slow_start_packets) {
  534                 /* May be in slow-start, so don't merge. */
  535                 ++rxq->lro.n_slow_start;
  536                 ++c->n_in_order_pkts;
  537                 goto deliver_buf_out;
  538         }
  539 
  540         if (__predict_false(dont_merge)) {
  541                 if (c->mbuf)
  542                         sfxge_lro_deliver(&rxq->lro, c);
  543                 if (th->th_flags & (TH_FIN | TH_RST)) {
  544                         ++rxq->lro.n_drop_closed;
  545                         sfxge_lro_drop(rxq, c);
  546                         return 0;
  547                 }
  548                 goto deliver_buf_out;
  549         }
  550 
  551         rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size;
  552 
  553         if (__predict_true(c->mbuf != NULL)) {
  554                 /* Remove headers and any padding */
  555                 rx_buf->mbuf->m_data += hdr_length;
  556                 rx_buf->mbuf->m_len = data_length;
  557 
  558                 sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th);
  559         } else {
  560                 /* Remove any padding */
  561                 rx_buf->mbuf->m_len = pkt_length;
  562 
  563                 sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th);
  564         }
  565 
  566         rx_buf->mbuf = NULL;
  567         return 1;
  568 
  569  deliver_buf_out:
  570         sfxge_rx_deliver(rxq->sc, rx_buf);
  571         return 1;
  572 }
  573 
  574 static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash,
  575                                uint16_t l2_id, void *nh, struct tcphdr *th)
  576 {
  577         unsigned bucket = conn_hash & st->conns_mask;
  578         struct sfxge_lro_conn *c;
  579 
  580         if (st->conns_n[bucket] >= lro_chain_max) {
  581                 ++st->n_too_many;
  582                 return;
  583         }
  584 
  585         if (!TAILQ_EMPTY(&st->free_conns)) {
  586                 c = TAILQ_FIRST(&st->free_conns);
  587                 TAILQ_REMOVE(&st->free_conns, c, link);
  588         } else {
  589                 c = malloc(sizeof(*c), M_SFXGE, M_DONTWAIT);
  590                 if (c == NULL)
  591                         return;
  592                 c->mbuf = NULL;
  593                 c->next_buf.mbuf = NULL;
  594         }
  595 
  596         /* Create the connection tracking data */
  597         ++st->conns_n[bucket];
  598         TAILQ_INSERT_HEAD(&st->conns[bucket], c, link);
  599         c->l2_id = l2_id;
  600         c->conn_hash = conn_hash;
  601         c->source = th->th_sport;
  602         c->dest = th->th_dport;
  603         c->n_in_order_pkts = 0;
  604         c->last_pkt_ticks = *(volatile int *)&ticks;
  605         c->delivered = 0;
  606         ++st->n_new_stream;
  607         /* NB. We don't initialise c->next_seq, and it doesn't matter what
  608          * value it has.  Most likely the next packet received for this
  609          * connection will not match -- no harm done.
  610          */
  611 }
  612 
  613 /* Process mbuf and decide whether to dispatch it to the stack now or
  614  * later.
  615  */
  616 static void
  617 sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
  618 {
  619         struct sfxge_softc *sc = rxq->sc;
  620         struct mbuf *m = rx_buf->mbuf;
  621         struct ether_header *eh;
  622         struct sfxge_lro_conn *c;
  623         uint16_t l2_id;
  624         uint16_t l3_proto;
  625         void *nh;
  626         struct tcphdr *th;
  627         uint32_t conn_hash;
  628         unsigned bucket;
  629 
  630         /* Get the hardware hash */
  631         conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
  632                                       mtod(m, uint8_t *));
  633 
  634         eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size);
  635         if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
  636                 struct ether_vlan_header *veh = (struct ether_vlan_header *)eh;
  637                 l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) |
  638                         SFXGE_LRO_L2_ID_VLAN;
  639                 l3_proto = veh->evl_proto;
  640                 nh = veh + 1;
  641         } else {
  642                 l2_id = 0;
  643                 l3_proto = eh->ether_type;
  644                 nh = eh + 1;
  645         }
  646 
  647         /* Check whether this is a suitable packet (unfragmented
  648          * TCP/IPv4 or TCP/IPv6).  If so, find the TCP header and
  649          * length, and compute a hash if necessary.  If not, return.
  650          */
  651         if (l3_proto == htons(ETHERTYPE_IP)) {
  652                 struct ip *iph = nh;
  653                 if ((iph->ip_p - IPPROTO_TCP) |
  654                     (iph->ip_hl - (sizeof(*iph) >> 2u)) |
  655                     (iph->ip_off & htons(IP_MF | IP_OFFMASK)))
  656                         goto deliver_now;
  657                 th = (struct tcphdr *)(iph + 1);
  658         } else if (l3_proto == htons(ETHERTYPE_IPV6)) {
  659                 struct ip6_hdr *iph = nh;
  660                 if (iph->ip6_nxt != IPPROTO_TCP)
  661                         goto deliver_now;
  662                 l2_id |= SFXGE_LRO_L2_ID_IPV6;
  663                 th = (struct tcphdr *)(iph + 1);
  664         } else {
  665                 goto deliver_now;
  666         }
  667 
  668         bucket = conn_hash & rxq->lro.conns_mask;
  669 
  670         TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) {
  671                 if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash))
  672                         continue;
  673                 if ((c->source - th->th_sport) | (c->dest - th->th_dport))
  674                         continue;
  675                 if (c->mbuf) {
  676                         if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
  677                                 struct ip *c_iph, *iph = nh;
  678                                 c_iph = c->nh;
  679                                 if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) |
  680                                     (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr))
  681                                         continue;
  682                         } else {
  683                                 struct ip6_hdr *c_iph, *iph = nh;
  684                                 c_iph = c->nh;
  685                                 if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) |
  686                                     ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst))
  687                                         continue;
  688                         }
  689                 }
  690 
  691                 /* Re-insert at head of list to reduce lookup time. */
  692                 TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
  693                 TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link);
  694 
  695                 if (c->next_buf.mbuf) {
  696                         if (!sfxge_lro_try_merge(rxq, c))
  697                                 goto deliver_now;
  698                 } else {
  699                         LIST_INSERT_HEAD(&rxq->lro.active_conns, c,
  700                             active_link);
  701                 }
  702                 c->next_buf = *rx_buf;
  703                 c->next_eh = eh;
  704                 c->next_nh = nh;
  705 
  706                 rx_buf->mbuf = NULL;
  707                 rx_buf->flags = EFX_DISCARD;
  708                 return;
  709         }
  710 
  711         sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th);
  712  deliver_now:
  713         sfxge_rx_deliver(sc, rx_buf);
  714 }
  715 
  716 static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
  717 {
  718         struct sfxge_lro_state *st = &rxq->lro;
  719         struct sfxge_lro_conn *c;
  720         unsigned t;
  721 
  722         while (!LIST_EMPTY(&st->active_conns)) {
  723                 c = LIST_FIRST(&st->active_conns);
  724                 if (!c->delivered && c->mbuf)
  725                         sfxge_lro_deliver(st, c);
  726                 if (sfxge_lro_try_merge(rxq, c)) {
  727                         if (c->mbuf)
  728                                 sfxge_lro_deliver(st, c);
  729                         LIST_REMOVE(c, active_link);
  730                 }
  731                 c->delivered = 0;
  732         }
  733 
  734         t = *(volatile int *)&ticks;
  735         if (__predict_false(t != st->last_purge_ticks))
  736                 sfxge_lro_purge_idle(rxq, t);
  737 }
  738 
  739 void
  740 sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop)
  741 {
  742         struct sfxge_softc *sc = rxq->sc;
  743         int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO;
  744         unsigned int index;
  745         struct sfxge_evq *evq;
  746         unsigned int completed;
  747         unsigned int level;
  748         struct mbuf *m;
  749         struct sfxge_rx_sw_desc *prev = NULL;
  750 
  751         index = rxq->index;
  752         evq = sc->evq[index];
  753 
  754         mtx_assert(&evq->lock, MA_OWNED);
  755 
  756         completed = rxq->completed;
  757         while (completed != rxq->pending) {
  758                 unsigned int id;
  759                 struct sfxge_rx_sw_desc *rx_desc;
  760 
  761                 id = completed++ & (SFXGE_NDESCS - 1);
  762                 rx_desc = &rxq->queue[id];
  763                 m = rx_desc->mbuf;
  764 
  765                 if (rxq->init_state != SFXGE_RXQ_STARTED)
  766                         goto discard;
  767 
  768                 if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
  769                         goto discard;
  770 
  771                 prefetch_read_many(mtod(m, caddr_t));
  772 
  773                 /* Check for loopback packets */
  774                 if (!(rx_desc->flags & EFX_PKT_IPV4) &&
  775                     !(rx_desc->flags & EFX_PKT_IPV6)) {
  776                         struct ether_header *etherhp;
  777 
  778                         /*LINTED*/
  779                         etherhp = mtod(m, struct ether_header *);
  780 
  781                         if (etherhp->ether_type ==
  782                             htons(SFXGE_ETHERTYPE_LOOPBACK)) {
  783                                 EFSYS_PROBE(loopback);
  784 
  785                                 rxq->loopback++;
  786                                 goto discard;
  787                         }
  788                 }
  789 
  790                 /* Pass packet up the stack or into LRO (pipelined) */
  791                 if (prev != NULL) {
  792                         if (lro_enabled)
  793                                 sfxge_lro(rxq, prev);
  794                         else
  795                                 sfxge_rx_deliver(sc, prev);
  796                 }
  797                 prev = rx_desc;
  798                 continue;
  799 
  800 discard:
  801                 /* Return the packet to the pool */
  802                 m_free(m);
  803                 rx_desc->mbuf = NULL;
  804         }
  805         rxq->completed = completed;
  806 
  807         level = rxq->added - rxq->completed;
  808 
  809         /* Pass last packet up the stack or into LRO */
  810         if (prev != NULL) {
  811                 if (lro_enabled)
  812                         sfxge_lro(rxq, prev);
  813                 else
  814                         sfxge_rx_deliver(sc, prev);
  815         }
  816 
  817         /*
  818          * If there are any pending flows and this is the end of the
  819          * poll then they must be completed.
  820          */
  821         if (eop)
  822                 sfxge_lro_end_of_burst(rxq);
  823 
  824         /* Top up the queue if necessary */
  825         if (level < RX_REFILL_THRESHOLD)
  826                 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(SFXGE_NDESCS), B_FALSE);
  827 }
  828 
  829 static void
  830 sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index)
  831 {
  832         struct sfxge_rxq *rxq;
  833         struct sfxge_evq *evq;
  834         unsigned int count;
  835 
  836         rxq = sc->rxq[index];
  837         evq = sc->evq[index];
  838 
  839         mtx_lock(&evq->lock);
  840         
  841         KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
  842             ("rxq not started"));
  843 
  844         rxq->init_state = SFXGE_RXQ_INITIALIZED;
  845 
  846         callout_stop(&rxq->refill_callout);
  847 
  848 again:
  849         rxq->flush_state = SFXGE_FLUSH_PENDING;
  850 
  851         /* Flush the receive queue */
  852         efx_rx_qflush(rxq->common);
  853 
  854         mtx_unlock(&evq->lock);
  855 
  856         count = 0;
  857         do {
  858                 /* Spin for 100 ms */
  859                 DELAY(100000);
  860 
  861                 if (rxq->flush_state != SFXGE_FLUSH_PENDING)
  862                         break;
  863 
  864         } while (++count < 20);
  865 
  866         mtx_lock(&evq->lock);
  867 
  868         if (rxq->flush_state == SFXGE_FLUSH_FAILED)
  869                 goto again;
  870 
  871         rxq->flush_state = SFXGE_FLUSH_DONE;
  872 
  873         rxq->pending = rxq->added;
  874         sfxge_rx_qcomplete(rxq, B_TRUE);
  875 
  876         KASSERT(rxq->completed == rxq->pending,
  877             ("rxq->completed != rxq->pending"));
  878 
  879         rxq->added = 0;
  880         rxq->pending = 0;
  881         rxq->completed = 0;
  882         rxq->loopback = 0;
  883 
  884         /* Destroy the common code receive queue. */
  885         efx_rx_qdestroy(rxq->common);   
  886 
  887         efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
  888             EFX_RXQ_NBUFS(SFXGE_NDESCS));
  889 
  890         mtx_unlock(&evq->lock);
  891 }
  892 
  893 static int
  894 sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index)
  895 {
  896         struct sfxge_rxq *rxq;
  897         efsys_mem_t *esmp;
  898         struct sfxge_evq *evq;
  899         int rc;
  900 
  901         rxq = sc->rxq[index];
  902         esmp = &rxq->mem;
  903         evq = sc->evq[index];
  904 
  905         KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
  906             ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
  907         KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
  908             ("evq->init_state != SFXGE_EVQ_STARTED"));
  909 
  910         /* Program the buffer table. */
  911         if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp,
  912             EFX_RXQ_NBUFS(SFXGE_NDESCS))) != 0)
  913                 return rc;
  914 
  915         /* Create the common code receive queue. */
  916         if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT,
  917             esmp, SFXGE_NDESCS, rxq->buf_base_id, evq->common,
  918             &rxq->common)) != 0)
  919                 goto fail;
  920 
  921         mtx_lock(&evq->lock);
  922 
  923         /* Enable the receive queue. */
  924         efx_rx_qenable(rxq->common);
  925 
  926         rxq->init_state = SFXGE_RXQ_STARTED;
  927 
  928         /* Try to fill the queue from the pool. */
  929         sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(SFXGE_NDESCS), B_FALSE);
  930 
  931         mtx_unlock(&evq->lock);
  932 
  933         return (0);
  934 
  935 fail:
  936         efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
  937             EFX_RXQ_NBUFS(SFXGE_NDESCS));
  938         return rc;
  939 }
  940 
  941 void
  942 sfxge_rx_stop(struct sfxge_softc *sc)
  943 {
  944         struct sfxge_intr *intr;
  945         int index;
  946 
  947         intr = &sc->intr;
  948 
  949         /* Stop the receive queue(s) */
  950         index = intr->n_alloc;
  951         while (--index >= 0)
  952                 sfxge_rx_qstop(sc, index);
  953 
  954         sc->rx_prefix_size = 0;
  955         sc->rx_buffer_size = 0;
  956 
  957         efx_rx_fini(sc->enp);
  958 }
  959 
  960 int
  961 sfxge_rx_start(struct sfxge_softc *sc)
  962 {
  963         struct sfxge_intr *intr;
  964         int index;
  965         int rc;
  966 
  967         intr = &sc->intr;
  968 
  969         /* Initialize the common code receive module. */
  970         if ((rc = efx_rx_init(sc->enp)) != 0)
  971                 return (rc);
  972 
  973         /* Calculate the receive packet buffer size. */
  974         sc->rx_prefix_size = EFX_RX_PREFIX_SIZE;
  975         sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) +
  976                               sc->rx_prefix_size);
  977 
  978         /* Select zone for packet buffers */
  979         if (sc->rx_buffer_size <= MCLBYTES)
  980                 sc->rx_buffer_zone = zone_clust;
  981         else if (sc->rx_buffer_size <= MJUMPAGESIZE)
  982                 sc->rx_buffer_zone = zone_jumbop;
  983         else if (sc->rx_buffer_size <= MJUM9BYTES)
  984                 sc->rx_buffer_zone = zone_jumbo9;
  985         else
  986                 sc->rx_buffer_zone = zone_jumbo16;
  987 
  988         /*
  989          * Set up the scale table.  Enable all hash types and hash insertion.
  990          */
  991         for (index = 0; index < SFXGE_RX_SCALE_MAX; index++)
  992                 sc->rx_indir_table[index] = index % sc->intr.n_alloc;
  993         if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table,
  994                                        SFXGE_RX_SCALE_MAX)) != 0)
  995                 goto fail;
  996         (void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ,
  997             (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) |
  998             (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE);
  999 
 1000         if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key,
 1001             sizeof(toep_key))) != 0)
 1002                 goto fail;
 1003 
 1004         /* Start the receive queue(s). */
 1005         for (index = 0; index < intr->n_alloc; index++) {
 1006                 if ((rc = sfxge_rx_qstart(sc, index)) != 0)
 1007                         goto fail2;
 1008         }
 1009 
 1010         return (0);
 1011 
 1012 fail2:
 1013         while (--index >= 0)
 1014                 sfxge_rx_qstop(sc, index);
 1015 
 1016 fail:
 1017         efx_rx_fini(sc->enp);
 1018 
 1019         return (rc);
 1020 }
 1021 
 1022 static void sfxge_lro_init(struct sfxge_rxq *rxq)
 1023 {
 1024         struct sfxge_lro_state *st = &rxq->lro;
 1025         unsigned i;
 1026 
 1027         st->conns_mask = lro_table_size - 1;
 1028         KASSERT(!((st->conns_mask + 1) & st->conns_mask),
 1029                 ("lro_table_size must be a power of 2"));
 1030         st->sc = rxq->sc;
 1031         st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]),
 1032                            M_SFXGE, M_WAITOK);
 1033         st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]),
 1034                              M_SFXGE, M_WAITOK);
 1035         for (i = 0; i <= st->conns_mask; ++i) {
 1036                 TAILQ_INIT(&st->conns[i]);
 1037                 st->conns_n[i] = 0;
 1038         }
 1039         LIST_INIT(&st->active_conns);
 1040         TAILQ_INIT(&st->free_conns);
 1041 }
 1042 
 1043 static void sfxge_lro_fini(struct sfxge_rxq *rxq)
 1044 {
 1045         struct sfxge_lro_state *st = &rxq->lro;
 1046         struct sfxge_lro_conn *c;
 1047         unsigned i;
 1048 
 1049         /* Return cleanly if sfxge_lro_init() has not been called. */
 1050         if (st->conns == NULL)
 1051                 return;
 1052 
 1053         KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections"));
 1054 
 1055         for (i = 0; i <= st->conns_mask; ++i) {
 1056                 while (!TAILQ_EMPTY(&st->conns[i])) {
 1057                         c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq);
 1058                         sfxge_lro_drop(rxq, c);
 1059                 }
 1060         }
 1061 
 1062         while (!TAILQ_EMPTY(&st->free_conns)) {
 1063                 c = TAILQ_FIRST(&st->free_conns);
 1064                 TAILQ_REMOVE(&st->free_conns, c, link);
 1065                 KASSERT(!c->mbuf, ("found orphaned mbuf"));
 1066                 free(c, M_SFXGE);
 1067         }
 1068 
 1069         free(st->conns_n, M_SFXGE);
 1070         free(st->conns, M_SFXGE);
 1071         st->conns = NULL;
 1072 }
 1073 
 1074 static void
 1075 sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index)
 1076 {
 1077         struct sfxge_rxq *rxq;
 1078 
 1079         rxq = sc->rxq[index];
 1080 
 1081         KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
 1082             ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
 1083 
 1084         /* Free the context array and the flow table. */
 1085         free(rxq->queue, M_SFXGE);
 1086         sfxge_lro_fini(rxq);
 1087 
 1088         /* Release DMA memory. */
 1089         sfxge_dma_free(&rxq->mem);
 1090 
 1091         sc->rxq[index] = NULL;
 1092 
 1093         free(rxq, M_SFXGE);
 1094 }
 1095 
 1096 static int
 1097 sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index)
 1098 {
 1099         struct sfxge_rxq *rxq;
 1100         struct sfxge_evq *evq;
 1101         efsys_mem_t *esmp;
 1102         int rc;
 1103 
 1104         KASSERT(index < sc->intr.n_alloc, ("index >= %d", sc->intr.n_alloc));
 1105 
 1106         rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK);
 1107         rxq->sc = sc;
 1108         rxq->index = index;
 1109 
 1110         sc->rxq[index] = rxq;
 1111         esmp = &rxq->mem;
 1112 
 1113         evq = sc->evq[index];
 1114 
 1115         /* Allocate and zero DMA space. */
 1116         if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(SFXGE_NDESCS), esmp)) != 0)
 1117                 return (rc);
 1118         (void)memset(esmp->esm_base, 0, EFX_RXQ_SIZE(SFXGE_NDESCS));
 1119 
 1120         /* Allocate buffer table entries. */
 1121         sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(SFXGE_NDESCS),
 1122                                  &rxq->buf_base_id);
 1123 
 1124         /* Allocate the context array and the flow table. */
 1125         rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * SFXGE_NDESCS,
 1126             M_SFXGE, M_WAITOK | M_ZERO);
 1127         sfxge_lro_init(rxq);
 1128 
 1129         callout_init(&rxq->refill_callout, B_TRUE);
 1130 
 1131         rxq->init_state = SFXGE_RXQ_INITIALIZED;
 1132 
 1133         return (0);
 1134 }
 1135 
 1136 static const struct {
 1137         const char *name;
 1138         size_t offset;
 1139 } sfxge_rx_stats[] = {
 1140 #define SFXGE_RX_STAT(name, member) \
 1141         { #name, offsetof(struct sfxge_rxq, member) }
 1142         SFXGE_RX_STAT(lro_merges, lro.n_merges),
 1143         SFXGE_RX_STAT(lro_bursts, lro.n_bursts),
 1144         SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start),
 1145         SFXGE_RX_STAT(lro_misorder, lro.n_misorder),
 1146         SFXGE_RX_STAT(lro_too_many, lro.n_too_many),
 1147         SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream),
 1148         SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle),
 1149         SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed)
 1150 };
 1151 
 1152 static int
 1153 sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS)
 1154 {
 1155         struct sfxge_softc *sc = arg1;
 1156         unsigned int id = arg2;
 1157         unsigned int sum, index;
 1158 
 1159         /* Sum across all RX queues */
 1160         sum = 0;
 1161         for (index = 0; index < sc->intr.n_alloc; index++)
 1162                 sum += *(unsigned int *)((caddr_t)sc->rxq[index] +
 1163                                          sfxge_rx_stats[id].offset);
 1164 
 1165         return SYSCTL_OUT(req, &sum, sizeof(sum));
 1166 }
 1167 
 1168 static void
 1169 sfxge_rx_stat_init(struct sfxge_softc *sc)
 1170 {
 1171         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
 1172         struct sysctl_oid_list *stat_list;
 1173         unsigned int id;
 1174 
 1175         stat_list = SYSCTL_CHILDREN(sc->stats_node);
 1176 
 1177         for (id = 0;
 1178              id < sizeof(sfxge_rx_stats) / sizeof(sfxge_rx_stats[0]);
 1179              id++) {
 1180                 SYSCTL_ADD_PROC(
 1181                         ctx, stat_list,
 1182                         OID_AUTO, sfxge_rx_stats[id].name,
 1183                         CTLTYPE_UINT|CTLFLAG_RD,
 1184                         sc, id, sfxge_rx_stat_handler, "IU",
 1185                         "");
 1186         }
 1187 }
 1188 
 1189 void
 1190 sfxge_rx_fini(struct sfxge_softc *sc)
 1191 {
 1192         struct sfxge_intr *intr;
 1193         int index;
 1194 
 1195         intr = &sc->intr;
 1196 
 1197         index = intr->n_alloc;
 1198         while (--index >= 0)
 1199                 sfxge_rx_qfini(sc, index);
 1200 }
 1201 
 1202 int
 1203 sfxge_rx_init(struct sfxge_softc *sc)
 1204 {
 1205         struct sfxge_intr *intr;
 1206         int index;
 1207         int rc;
 1208 
 1209         if (lro_idle_ticks == 0)
 1210                 lro_idle_ticks = hz / 10 + 1; /* 100 ms */
 1211 
 1212         intr = &sc->intr;
 1213 
 1214         KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
 1215             ("intr->state != SFXGE_INTR_INITIALIZED"));
 1216 
 1217         /* Initialize the receive queue(s) - one per interrupt. */
 1218         for (index = 0; index < intr->n_alloc; index++) {
 1219                 if ((rc = sfxge_rx_qinit(sc, index)) != 0)
 1220                         goto fail;
 1221         }
 1222 
 1223         sfxge_rx_stat_init(sc);
 1224 
 1225         return (0);
 1226 
 1227 fail:
 1228         /* Tear down the receive queue(s). */
 1229         while (--index >= 0)
 1230                 sfxge_rx_qfini(sc, index);
 1231 
 1232         return (rc);
 1233 }

Cache object: f427e957d9c82322328ae8dd925cf7a1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.