The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/mlx4/mlx4_en/mlx4_en_tx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
    3  *
    4  * This software is available to you under a choice of one of two
    5  * licenses.  You may choose to be licensed under the terms of the GNU
    6  * General Public License (GPL) Version 2, available from the file
    7  * COPYING in the main directory of this source tree, or the
    8  * OpenIB.org BSD license below:
    9  *
   10  *     Redistribution and use in source and binary forms, with or
   11  *     without modification, are permitted provided that the following
   12  *     conditions are met:
   13  *
   14  *      - Redistributions of source code must retain the above
   15  *        copyright notice, this list of conditions and the following
   16  *        disclaimer.
   17  *
   18  *      - Redistributions in binary form must reproduce the above
   19  *        copyright notice, this list of conditions and the following
   20  *        disclaimer in the documentation and/or other materials
   21  *        provided with the distribution.
   22  *
   23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   30  * SOFTWARE.
   31  *
   32  */
   33 
   34 #define LINUXKPI_PARAM_PREFIX mlx4_
   35 
   36 #include <linux/page.h>
   37 #include <dev/mlx4/cq.h>
   38 #include <linux/slab.h>
   39 #include <dev/mlx4/qp.h>
   40 #include <linux/if_vlan.h>
   41 #include <linux/vmalloc.h>
   42 #include <linux/moduleparam.h>
   43 
   44 #include <netinet/in_systm.h>
   45 #include <netinet/in.h>
   46 #include <netinet/if_ether.h>
   47 #include <netinet/ip.h>
   48 #include <netinet/ip6.h>
   49 #include <netinet/tcp.h>
   50 #include <netinet/tcp_lro.h>
   51 #include <netinet/udp.h>
   52 
   53 #include "en.h"
   54 
   55 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
   56                            struct mlx4_en_tx_ring **pring, u32 size,
   57                            u16 stride, int node, int queue_idx)
   58 {
   59         struct mlx4_en_dev *mdev = priv->mdev;
   60         struct mlx4_en_tx_ring *ring;
   61         uint32_t x;
   62         int tmp;
   63         int err;
   64 
   65         ring = kzalloc_node(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL, node);
   66         if (!ring) {
   67                 ring = kzalloc(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL);
   68                 if (!ring) {
   69                         en_err(priv, "Failed allocating TX ring\n");
   70                         return -ENOMEM;
   71                 }
   72         }
   73 
   74         /* Create DMA descriptor TAG */
   75         if ((err = -bus_dma_tag_create(
   76             bus_get_dma_tag(mdev->pdev->dev.bsddev),
   77             1,                                  /* any alignment */
   78             0,                                  /* no boundary */
   79             BUS_SPACE_MAXADDR,                  /* lowaddr */
   80             BUS_SPACE_MAXADDR,                  /* highaddr */
   81             NULL, NULL,                         /* filter, filterarg */
   82             MLX4_EN_TX_MAX_PAYLOAD_SIZE,        /* maxsize */
   83             MLX4_EN_TX_MAX_MBUF_FRAGS,          /* nsegments */
   84             MLX4_EN_TX_MAX_MBUF_SIZE,           /* maxsegsize */
   85             0,                                  /* flags */
   86             NULL, NULL,                         /* lockfunc, lockfuncarg */
   87             &ring->dma_tag)))
   88                 goto done;
   89 
   90         ring->size = size;
   91         ring->size_mask = size - 1;
   92         ring->stride = stride;
   93         ring->inline_thold = MAX(MIN_PKT_LEN, MIN(priv->prof->inline_thold, MAX_INLINE));
   94         mtx_init(&ring->tx_lock.m, "mlx4 tx", NULL, MTX_DEF);
   95         mtx_init(&ring->comp_lock.m, "mlx4 comp", NULL, MTX_DEF);
   96 
   97         tmp = size * sizeof(struct mlx4_en_tx_info);
   98         ring->tx_info = kzalloc_node(tmp, GFP_KERNEL, node);
   99         if (!ring->tx_info) {
  100                 ring->tx_info = kzalloc(tmp, GFP_KERNEL);
  101                 if (!ring->tx_info) {
  102                         err = -ENOMEM;
  103                         goto err_ring;
  104                 }
  105         }
  106 
  107         /* Create DMA descriptor MAPs */
  108         for (x = 0; x != size; x++) {
  109                 err = -bus_dmamap_create(ring->dma_tag, 0,
  110                     &ring->tx_info[x].dma_map);
  111                 if (err != 0) {
  112                         while (x--) {
  113                                 bus_dmamap_destroy(ring->dma_tag,
  114                                     ring->tx_info[x].dma_map);
  115                         }
  116                         goto err_info;
  117                 }
  118         }
  119 
  120         en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
  121                  ring->tx_info, tmp);
  122 
  123         ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
  124 
  125         /* Allocate HW buffers on provided NUMA node */
  126         err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
  127                                  2 * PAGE_SIZE);
  128         if (err) {
  129                 en_err(priv, "Failed allocating hwq resources\n");
  130                 goto err_dma_map;
  131         }
  132 
  133         err = mlx4_en_map_buffer(&ring->wqres.buf);
  134         if (err) {
  135                 en_err(priv, "Failed to map TX buffer\n");
  136                 goto err_hwq_res;
  137         }
  138 
  139         ring->buf = ring->wqres.buf.direct.buf;
  140 
  141         en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d "
  142                "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size,
  143                ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
  144 
  145         err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn,
  146                                     MLX4_RESERVE_ETH_BF_QP);
  147         if (err) {
  148                 en_err(priv, "failed reserving qp for TX ring\n");
  149                 goto err_map;
  150         }
  151 
  152         err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
  153         if (err) {
  154                 en_err(priv, "Failed allocating qp %d\n", ring->qpn);
  155                 goto err_reserve;
  156         }
  157         ring->qp.event = mlx4_en_sqp_event;
  158 
  159         err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
  160         if (err) {
  161                 en_dbg(DRV, priv, "working without blueflame (%d)", err);
  162                 ring->bf.uar = &mdev->priv_uar;
  163                 ring->bf.uar->map = mdev->uar_map;
  164                 ring->bf_enabled = false;
  165         } else
  166                 ring->bf_enabled = true;
  167         ring->queue_index = queue_idx;
  168 
  169         *pring = ring;
  170         return 0;
  171 
  172 err_reserve:
  173         mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
  174 err_map:
  175         mlx4_en_unmap_buffer(&ring->wqres.buf);
  176 err_hwq_res:
  177         mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
  178 err_dma_map:
  179         for (x = 0; x != size; x++)
  180                 bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map);
  181 err_info:
  182         vfree(ring->tx_info);
  183 err_ring:
  184         bus_dma_tag_destroy(ring->dma_tag);
  185 done:
  186         kfree(ring);
  187         return err;
  188 }
  189 
  190 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
  191                              struct mlx4_en_tx_ring **pring)
  192 {
  193         struct mlx4_en_dev *mdev = priv->mdev;
  194         struct mlx4_en_tx_ring *ring = *pring;
  195         uint32_t x;
  196         en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);
  197 
  198         if (ring->bf_enabled)
  199                 mlx4_bf_free(mdev->dev, &ring->bf);
  200         mlx4_qp_remove(mdev->dev, &ring->qp);
  201         mlx4_qp_free(mdev->dev, &ring->qp);
  202         mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
  203         mlx4_en_unmap_buffer(&ring->wqres.buf);
  204         mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
  205         for (x = 0; x != ring->size; x++)
  206                 bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map);
  207         vfree(ring->tx_info);
  208         mtx_destroy(&ring->tx_lock.m);
  209         mtx_destroy(&ring->comp_lock.m);
  210         bus_dma_tag_destroy(ring->dma_tag);
  211         kfree(ring);
  212         *pring = NULL;
  213 }
  214 
  215 int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
  216                              struct mlx4_en_tx_ring *ring,
  217                              int cq, int user_prio)
  218 {
  219         struct mlx4_en_dev *mdev = priv->mdev;
  220         int err;
  221 
  222         ring->cqn = cq;
  223         ring->prod = 0;
  224         ring->cons = 0xffffffff;
  225         ring->last_nr_txbb = 1;
  226         ring->poll_cnt = 0;
  227         memset(ring->buf, 0, ring->buf_size);
  228         ring->watchdog_time = 0;
  229 
  230         ring->qp_state = MLX4_QP_STATE_RST;
  231         ring->doorbell_qpn = ring->qp.qpn << 8;
  232 
  233         mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
  234                                 ring->cqn, user_prio, &ring->context);
  235         if (ring->bf_enabled)
  236                 ring->context.usr_page = cpu_to_be32(ring->bf.uar->index);
  237 
  238         err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
  239                                &ring->qp, &ring->qp_state);
  240         return err;
  241 }
  242 
  243 void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
  244                                 struct mlx4_en_tx_ring *ring)
  245 {
  246         struct mlx4_en_dev *mdev = priv->mdev;
  247 
  248         mlx4_qp_modify(mdev->dev, NULL, ring->qp_state,
  249                        MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
  250 }
  251 
  252 static volatile struct mlx4_wqe_data_seg *
  253 mlx4_en_store_inline_lso_data(volatile struct mlx4_wqe_data_seg *dseg,
  254     struct mbuf *mb, int len, __be32 owner_bit)
  255 {
  256         uint8_t *inl = __DEVOLATILE(uint8_t *, dseg);
  257 
  258         /* copy data into place */
  259         m_copydata(mb, 0, len, inl + 4);
  260         dseg += DIV_ROUND_UP(4 + len, DS_SIZE_ALIGNMENT);
  261         return (dseg);
  262 }
  263 
  264 static void
  265 mlx4_en_store_inline_lso_header(volatile struct mlx4_wqe_data_seg *dseg,
  266     int len, __be32 owner_bit)
  267 {
  268 }
  269 
  270 static void
  271 mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
  272     struct mlx4_en_tx_ring *ring, u32 index, u8 owner)
  273 {
  274         struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
  275         struct mlx4_en_tx_desc *tx_desc = (struct mlx4_en_tx_desc *)
  276             (ring->buf + (index * TXBB_SIZE));
  277         volatile __be32 *ptr = (__be32 *)tx_desc;
  278         const __be32 stamp = cpu_to_be32(STAMP_VAL |
  279             ((u32)owner << STAMP_SHIFT));
  280         u32 i;
  281 
  282         /* Stamp the freed descriptor */
  283         for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
  284                 *ptr = stamp;
  285                 ptr += STAMP_DWORDS;
  286         }
  287 }
  288 
  289 static u32
  290 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
  291     struct mlx4_en_tx_ring *ring, u32 index)
  292 {
  293         struct mlx4_en_tx_info *tx_info;
  294         struct mbuf *mb;
  295 
  296         tx_info = &ring->tx_info[index];
  297         mb = tx_info->mb;
  298 
  299         if (mb == NULL)
  300                 goto done;
  301 
  302         bus_dmamap_sync(ring->dma_tag, tx_info->dma_map,
  303             BUS_DMASYNC_POSTWRITE);
  304         bus_dmamap_unload(ring->dma_tag, tx_info->dma_map);
  305 
  306         m_freem(mb);
  307 done:
  308         return (tx_info->nr_txbb);
  309 }
  310 
  311 int mlx4_en_free_tx_buf(struct ifnet *dev, struct mlx4_en_tx_ring *ring)
  312 {
  313         struct mlx4_en_priv *priv = mlx4_netdev_priv(dev);
  314         int cnt = 0;
  315 
  316         /* Skip last polled descriptor */
  317         ring->cons += ring->last_nr_txbb;
  318         en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n",
  319                  ring->cons, ring->prod);
  320 
  321         if ((u32) (ring->prod - ring->cons) > ring->size) {
  322                 en_warn(priv, "Tx consumer passed producer!\n");
  323                 return 0;
  324         }
  325 
  326         while (ring->cons != ring->prod) {
  327                 ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring,
  328                     ring->cons & ring->size_mask);
  329                 ring->cons += ring->last_nr_txbb;
  330                 cnt++;
  331         }
  332 
  333         if (cnt)
  334                 en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt);
  335 
  336         return cnt;
  337 }
  338 
  339 static bool
  340 mlx4_en_tx_ring_is_full(struct mlx4_en_tx_ring *ring)
  341 {
  342         int wqs;
  343         wqs = ring->size - (ring->prod - ring->cons);
  344         return (wqs < (HEADROOM + (2 * MLX4_EN_TX_WQE_MAX_WQEBBS)));
  345 }
  346 
  347 static int mlx4_en_process_tx_cq(struct ifnet *dev,
  348                                  struct mlx4_en_cq *cq)
  349 {
  350         struct mlx4_en_priv *priv = mlx4_netdev_priv(dev);
  351         struct mlx4_cq *mcq = &cq->mcq;
  352         struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring];
  353         struct mlx4_cqe *cqe;
  354         u16 index;
  355         u16 new_index, ring_index, stamp_index;
  356         u32 txbbs_skipped = 0;
  357         u32 txbbs_stamp = 0;
  358         u32 cons_index = mcq->cons_index;
  359         int size = cq->size;
  360         u32 size_mask = ring->size_mask;
  361         struct mlx4_cqe *buf = cq->buf;
  362         int factor = priv->cqe_factor;
  363 
  364         if (!priv->port_up)
  365                 return 0;
  366 
  367         index = cons_index & size_mask;
  368         cqe = &buf[(index << factor) + factor];
  369         ring_index = ring->cons & size_mask;
  370         stamp_index = ring_index;
  371 
  372         /* Process all completed CQEs */
  373         while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
  374                         cons_index & size)) {
  375                 /*
  376                  * make sure we read the CQE after we read the
  377                  * ownership bit
  378                  */
  379                 rmb();
  380 
  381                 if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
  382                              MLX4_CQE_OPCODE_ERROR)) {
  383                         en_err(priv, "CQE completed in error - vendor syndrom: 0x%x syndrom: 0x%x\n",
  384                                ((struct mlx4_err_cqe *)cqe)->
  385                                        vendor_err_syndrome,
  386                                ((struct mlx4_err_cqe *)cqe)->syndrome);
  387                 }
  388 
  389                 /* Skip over last polled CQE */
  390                 new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
  391 
  392                 do {
  393                         txbbs_skipped += ring->last_nr_txbb;
  394                         ring_index = (ring_index + ring->last_nr_txbb) & size_mask;
  395                         /* free next descriptor */
  396                         ring->last_nr_txbb = mlx4_en_free_tx_desc(
  397                             priv, ring, ring_index);
  398                         mlx4_en_stamp_wqe(priv, ring, stamp_index,
  399                                           !!((ring->cons + txbbs_stamp) &
  400                                                 ring->size));
  401                         stamp_index = ring_index;
  402                         txbbs_stamp = txbbs_skipped;
  403                 } while (ring_index != new_index);
  404 
  405                 ++cons_index;
  406                 index = cons_index & size_mask;
  407                 cqe = &buf[(index << factor) + factor];
  408         }
  409 
  410 
  411         /*
  412          * To prevent CQ overflow we first update CQ consumer and only then
  413          * the ring consumer.
  414          */
  415         mcq->cons_index = cons_index;
  416         mlx4_cq_set_ci(mcq);
  417         wmb();
  418         ring->cons += txbbs_skipped;
  419 
  420         return (0);
  421 }
  422 
  423 void mlx4_en_tx_irq(struct mlx4_cq *mcq)
  424 {
  425         struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
  426         struct mlx4_en_priv *priv = mlx4_netdev_priv(cq->dev);
  427         struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring];
  428 
  429         if (priv->port_up == 0 || !spin_trylock(&ring->comp_lock))
  430                 return;
  431         mlx4_en_process_tx_cq(cq->dev, cq);
  432         mod_timer(&cq->timer, jiffies + 1);
  433         spin_unlock(&ring->comp_lock);
  434 }
  435 
  436 void mlx4_en_poll_tx_cq(unsigned long data)
  437 {
  438         struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data;
  439         struct mlx4_en_priv *priv = mlx4_netdev_priv(cq->dev);
  440         struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring];
  441         u32 inflight;
  442 
  443         INC_PERF_COUNTER(priv->pstats.tx_poll);
  444 
  445         if (priv->port_up == 0)
  446                 return;
  447         if (!spin_trylock(&ring->comp_lock)) {
  448                 mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
  449                 return;
  450         }
  451         mlx4_en_process_tx_cq(cq->dev, cq);
  452         inflight = (u32) (ring->prod - ring->cons - ring->last_nr_txbb);
  453 
  454         /* If there are still packets in flight and the timer has not already
  455          * been scheduled by the Tx routine then schedule it here to guarantee
  456          * completion processing of these packets */
  457         if (inflight && priv->port_up)
  458                 mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
  459 
  460         spin_unlock(&ring->comp_lock);
  461 }
  462 
  463 static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind)
  464 {
  465         struct mlx4_en_cq *cq = priv->tx_cq[tx_ind];
  466         struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind];
  467 
  468         if (priv->port_up == 0)
  469                 return;
  470 
  471         /* If we don't have a pending timer, set one up to catch our recent
  472            post in case the interface becomes idle */
  473         if (!timer_pending(&cq->timer))
  474                 mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
  475 
  476         /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */
  477         if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0)
  478                 if (spin_trylock(&ring->comp_lock)) {
  479                         mlx4_en_process_tx_cq(priv->dev, cq);
  480                         spin_unlock(&ring->comp_lock);
  481                 }
  482 }
  483 
  484 static u16
  485 mlx4_en_get_inline_hdr_size(struct mlx4_en_tx_ring *ring, struct mbuf *mb)
  486 {
  487         u16 retval;
  488 
  489         /* only copy from first fragment, if possible */
  490         retval = MIN(ring->inline_thold, mb->m_len);
  491 
  492         /* check for too little data */
  493         if (unlikely(retval < MIN_PKT_LEN))
  494                 retval = MIN(ring->inline_thold, mb->m_pkthdr.len);
  495         return (retval);
  496 }
  497 
  498 static int
  499 mlx4_en_get_header_size(struct mbuf *mb)
  500 {
  501         struct ether_vlan_header *eh;
  502         struct tcphdr *th;
  503         struct ip *ip;
  504         int ip_hlen, tcp_hlen;
  505         struct ip6_hdr *ip6;
  506         uint16_t eth_type;
  507         int eth_hdr_len;
  508 
  509         eh = mtod(mb, struct ether_vlan_header *);
  510         if (mb->m_len < ETHER_HDR_LEN)
  511                 return (0);
  512         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
  513                 eth_type = ntohs(eh->evl_proto);
  514                 eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
  515         } else {
  516                 eth_type = ntohs(eh->evl_encap_proto);
  517                 eth_hdr_len = ETHER_HDR_LEN;
  518         }
  519         if (mb->m_len < eth_hdr_len)
  520                 return (0);
  521         switch (eth_type) {
  522         case ETHERTYPE_IP:
  523                 ip = (struct ip *)(mb->m_data + eth_hdr_len);
  524                 if (mb->m_len < eth_hdr_len + sizeof(*ip))
  525                         return (0);
  526                 if (ip->ip_p != IPPROTO_TCP)
  527                         return (0);
  528                 ip_hlen = ip->ip_hl << 2;
  529                 eth_hdr_len += ip_hlen;
  530                 break;
  531         case ETHERTYPE_IPV6:
  532                 ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len);
  533                 if (mb->m_len < eth_hdr_len + sizeof(*ip6))
  534                         return (0);
  535                 if (ip6->ip6_nxt != IPPROTO_TCP)
  536                         return (0);
  537                 eth_hdr_len += sizeof(*ip6);
  538                 break;
  539         default:
  540                 return (0);
  541         }
  542         if (mb->m_len < eth_hdr_len + sizeof(*th))
  543                 return (0);
  544         th = (struct tcphdr *)(mb->m_data + eth_hdr_len);
  545         tcp_hlen = th->th_off << 2;
  546         eth_hdr_len += tcp_hlen;
  547         if (mb->m_len < eth_hdr_len)
  548                 return (0);
  549         return (eth_hdr_len);
  550 }
  551 
  552 static volatile struct mlx4_wqe_data_seg *
  553 mlx4_en_store_inline_data(volatile struct mlx4_wqe_data_seg *dseg,
  554     struct mbuf *mb, int len, __be32 owner_bit)
  555 {
  556         uint8_t *inl = __DEVOLATILE(uint8_t *, dseg);
  557         const int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - 4;
  558 
  559         if (unlikely(len < MIN_PKT_LEN)) {
  560                 m_copydata(mb, 0, len, inl + 4);
  561                 memset(inl + 4 + len, 0, MIN_PKT_LEN - len);
  562                 dseg += DIV_ROUND_UP(4 + MIN_PKT_LEN, DS_SIZE_ALIGNMENT);
  563         } else if (len <= spc) {
  564                 m_copydata(mb, 0, len, inl + 4);
  565                 dseg += DIV_ROUND_UP(4 + len, DS_SIZE_ALIGNMENT);
  566         } else {
  567                 m_copydata(mb, 0, spc, inl + 4);
  568                 m_copydata(mb, spc, len - spc, inl + 8 + spc);
  569                 dseg += DIV_ROUND_UP(8 + len, DS_SIZE_ALIGNMENT);
  570         }
  571         return (dseg);
  572 }
  573 
  574 static void
  575 mlx4_en_store_inline_header(volatile struct mlx4_wqe_data_seg *dseg,
  576     int len, __be32 owner_bit)
  577 {
  578         uint8_t *inl = __DEVOLATILE(uint8_t *, dseg);
  579         const int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - 4;
  580 
  581         if (unlikely(len < MIN_PKT_LEN)) {
  582                 *(volatile uint32_t *)inl =
  583                     SET_BYTE_COUNT((1U << 31) | MIN_PKT_LEN);
  584         } else if (len <= spc) {
  585                 *(volatile uint32_t *)inl =
  586                     SET_BYTE_COUNT((1U << 31) | len);
  587         } else {
  588                 *(volatile uint32_t *)(inl + 4 + spc) =
  589                     SET_BYTE_COUNT((1U << 31) | (len - spc));
  590                 wmb();
  591                 *(volatile uint32_t *)inl =
  592                     SET_BYTE_COUNT((1U << 31) | spc);
  593         }
  594 }
  595 
  596 static uint32_t hashrandom;
  597 static void hashrandom_init(void *arg)
  598 {
  599         /*
  600          * It is assumed that the random subsystem has been
  601          * initialized when this function is called:
  602          */
  603         hashrandom = m_ether_tcpip_hash_init();
  604 }
  605 SYSINIT(hashrandom_init, SI_SUB_RANDOM, SI_ORDER_ANY, &hashrandom_init, NULL);
  606 
  607 u16 mlx4_en_select_queue(struct ifnet *dev, struct mbuf *mb)
  608 {
  609         struct mlx4_en_priv *priv = mlx4_netdev_priv(dev);
  610         u32 rings_p_up = priv->num_tx_rings_p_up;
  611         u32 up = 0;
  612         u32 queue_index;
  613 
  614 #if (MLX4_EN_NUM_UP > 1)
  615         /* Obtain VLAN information if present */
  616         if (mb->m_flags & M_VLANTAG) {
  617                 u32 vlan_tag = mb->m_pkthdr.ether_vtag;
  618                 up = (vlan_tag >> 13) % MLX4_EN_NUM_UP;
  619         }
  620 #endif
  621         queue_index = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | MBUF_HASHFLAG_L4, mb, hashrandom);
  622 
  623         return ((queue_index % rings_p_up) + (up * rings_p_up));
  624 }
  625 
  626 static void mlx4_bf_copy(void __iomem *dst, volatile unsigned long *src, unsigned bytecnt)
  627 {
  628         __iowrite64_copy(dst, __DEVOLATILE(void *, src), bytecnt / 8);
  629 }
  630 
  631 int mlx4_en_xmit(struct mlx4_en_priv *priv, int tx_ind, struct mbuf **mbp)
  632 {
  633         enum {
  634                 DS_FACT = TXBB_SIZE / DS_SIZE_ALIGNMENT,
  635                 CTRL_FLAGS = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
  636                     MLX4_WQE_CTRL_SOLICITED),
  637         };
  638         bus_dma_segment_t segs[MLX4_EN_TX_MAX_MBUF_FRAGS];
  639         volatile struct mlx4_wqe_data_seg *dseg;
  640         volatile struct mlx4_wqe_data_seg *dseg_inline;
  641         volatile struct mlx4_en_tx_desc *tx_desc;
  642         struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind];
  643         struct ifnet *ifp = priv->dev;
  644         struct mlx4_en_tx_info *tx_info;
  645         struct mbuf *mb = *mbp;
  646         struct mbuf *m;
  647         __be32 owner_bit;
  648         int nr_segs;
  649         int pad;
  650         int err;
  651         u32 bf_size;
  652         u32 bf_prod;
  653         u32 opcode;
  654         u16 index;
  655         u16 ds_cnt;
  656         u16 ihs;
  657 
  658         if (unlikely(!priv->port_up)) {
  659                 err = EINVAL;
  660                 goto tx_drop;
  661         }
  662 
  663         /* check if TX ring is full */
  664         if (unlikely(mlx4_en_tx_ring_is_full(ring))) {
  665                 /* Use interrupts to find out when queue opened */
  666                 mlx4_en_arm_cq(priv, priv->tx_cq[tx_ind]);
  667                 return (ENOBUFS);
  668         }
  669 
  670         /* sanity check we are not wrapping around */
  671         KASSERT(((~ring->prod) & ring->size_mask) >=
  672             (MLX4_EN_TX_WQE_MAX_WQEBBS - 1), ("Wrapping around TX ring"));
  673 
  674         /* Track current inflight packets for performance analysis */
  675         AVG_PERF_COUNTER(priv->pstats.inflight_avg,
  676                          (u32) (ring->prod - ring->cons - 1));
  677 
  678         /* Track current mbuf packet header length */
  679         AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, mb->m_pkthdr.len);
  680 
  681         /* Grab an index and try to transmit packet */
  682         owner_bit = (ring->prod & ring->size) ?
  683                 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0;
  684         index = ring->prod & ring->size_mask;
  685         tx_desc = (volatile struct mlx4_en_tx_desc *)
  686             (ring->buf + index * TXBB_SIZE);
  687         tx_info = &ring->tx_info[index];
  688         dseg = &tx_desc->data;
  689 
  690         /* send a copy of the frame to the BPF listener, if any */
  691         if (ifp != NULL && ifp->if_bpf != NULL)
  692                 ETHER_BPF_MTAP(ifp, mb);
  693 
  694         /* get default flags */
  695         tx_desc->ctrl.srcrb_flags = CTRL_FLAGS;
  696 
  697         if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO))
  698                 tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM);
  699 
  700         if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP |
  701             CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
  702                 tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM);
  703 
  704         /* do statistics */
  705         if (likely(tx_desc->ctrl.srcrb_flags != CTRL_FLAGS)) {
  706                 priv->port_stats.tx_chksum_offload++;
  707                 ring->tx_csum++;
  708         }
  709 
  710         /* check for VLAN tag */
  711         if (mb->m_flags & M_VLANTAG) {
  712                 tx_desc->ctrl.vlan_tag = cpu_to_be16(mb->m_pkthdr.ether_vtag);
  713                 tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN;
  714         } else {
  715                 tx_desc->ctrl.vlan_tag = 0;
  716                 tx_desc->ctrl.ins_vlan = 0;
  717         }
  718 
  719         if (unlikely(mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback)) {
  720                 /*
  721                  * Copy destination MAC address to WQE. This allows
  722                  * loopback in eSwitch, so that VFs and PF can
  723                  * communicate with each other:
  724                  */
  725                 m_copydata(mb, 0, 2, __DEVOLATILE(void *, &tx_desc->ctrl.srcrb_flags16[0]));
  726                 m_copydata(mb, 2, 4, __DEVOLATILE(void *, &tx_desc->ctrl.imm));
  727         } else {
  728                 /* clear immediate field */
  729                 tx_desc->ctrl.imm = 0;
  730         }
  731 
  732         /* Handle LSO (TSO) packets */
  733         if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
  734                 u32 payload_len;
  735                 u32 mss = mb->m_pkthdr.tso_segsz;
  736                 u32 num_pkts;
  737 
  738                 opcode = cpu_to_be32(MLX4_OPCODE_LSO | MLX4_WQE_CTRL_RR) |
  739                     owner_bit;
  740                 ihs = mlx4_en_get_header_size(mb);
  741                 if (unlikely(ihs > MAX_INLINE)) {
  742                         ring->oversized_packets++;
  743                         err = EINVAL;
  744                         goto tx_drop;
  745                 }
  746                 tx_desc->lso.mss_hdr_size = cpu_to_be32((mss << 16) | ihs);
  747                 payload_len = mb->m_pkthdr.len - ihs;
  748                 if (unlikely(payload_len == 0))
  749                         num_pkts = 1;
  750                 else
  751                         num_pkts = DIV_ROUND_UP(payload_len, mss);
  752                 ring->bytes += payload_len + (num_pkts * ihs);
  753                 ring->packets += num_pkts;
  754                 ring->tso_packets++;
  755                 /* store pointer to inline header */
  756                 dseg_inline = dseg;
  757                 /* copy data inline */
  758                 dseg = mlx4_en_store_inline_lso_data(dseg,
  759                     mb, ihs, owner_bit);
  760         } else {
  761                 opcode = cpu_to_be32(MLX4_OPCODE_SEND) |
  762                     owner_bit;
  763                 ihs = mlx4_en_get_inline_hdr_size(ring, mb);
  764                 ring->bytes += max_t (unsigned int,
  765                     mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
  766                 ring->packets++;
  767                 /* store pointer to inline header */
  768                 dseg_inline = dseg;
  769                 /* copy data inline */
  770                 dseg = mlx4_en_store_inline_data(dseg,
  771                     mb, ihs, owner_bit);
  772         }
  773         m_adj(mb, ihs);
  774 
  775         err = bus_dmamap_load_mbuf_sg(ring->dma_tag, tx_info->dma_map,
  776             mb, segs, &nr_segs, BUS_DMA_NOWAIT);
  777         if (unlikely(err == EFBIG)) {
  778                 /* Too many mbuf fragments */
  779                 ring->defrag_attempts++;
  780                 m = m_defrag(mb, M_NOWAIT);
  781                 if (m == NULL) {
  782                         ring->oversized_packets++;
  783                         goto tx_drop;
  784                 }
  785                 mb = m;
  786                 /* Try again */
  787                 err = bus_dmamap_load_mbuf_sg(ring->dma_tag, tx_info->dma_map,
  788                     mb, segs, &nr_segs, BUS_DMA_NOWAIT);
  789         }
  790         /* catch errors */
  791         if (unlikely(err != 0)) {
  792                 ring->oversized_packets++;
  793                 goto tx_drop;
  794         }
  795         /* If there were no errors and we didn't load anything, don't sync. */
  796         if (nr_segs != 0) {
  797                 /* make sure all mbuf data is written to RAM */
  798                 bus_dmamap_sync(ring->dma_tag, tx_info->dma_map,
  799                     BUS_DMASYNC_PREWRITE);
  800         } else {
  801                 /* All data was inlined, free the mbuf. */
  802                 bus_dmamap_unload(ring->dma_tag, tx_info->dma_map);
  803                 m_freem(mb);
  804                 mb = NULL;
  805         }
  806 
  807         /* compute number of DS needed */
  808         ds_cnt = (dseg - ((volatile struct mlx4_wqe_data_seg *)tx_desc)) + nr_segs;
  809 
  810         /*
  811          * Check if the next request can wrap around and fill the end
  812          * of the current request with zero immediate data:
  813          */
  814         pad = DIV_ROUND_UP(ds_cnt, DS_FACT);
  815         pad = (~(ring->prod + pad)) & ring->size_mask;
  816 
  817         if (unlikely(pad < (MLX4_EN_TX_WQE_MAX_WQEBBS - 1))) {
  818                 /*
  819                  * Compute the least number of DS blocks we need to
  820                  * pad in order to achieve a TX ring wraparound:
  821                  */
  822                 pad = (DS_FACT * (pad + 1));
  823         } else {
  824                 /*
  825                  * The hardware will automatically jump to the next
  826                  * TXBB. No need for padding.
  827                  */
  828                 pad = 0;
  829         }
  830 
  831         /* compute total number of DS blocks */
  832         ds_cnt += pad;
  833         /*
  834          * When modifying this code, please ensure that the following
  835          * computation is always less than or equal to 0x3F:
  836          *
  837          * ((MLX4_EN_TX_WQE_MAX_WQEBBS - 1) * DS_FACT) +
  838          * (MLX4_EN_TX_WQE_MAX_WQEBBS * DS_FACT)
  839          *
  840          * Else the "ds_cnt" variable can become too big.
  841          */
  842         tx_desc->ctrl.fence_size = (ds_cnt & 0x3f);
  843 
  844         /* store pointer to mbuf */
  845         tx_info->mb = mb;
  846         tx_info->nr_txbb = DIV_ROUND_UP(ds_cnt, DS_FACT);
  847         bf_size = ds_cnt * DS_SIZE_ALIGNMENT;
  848         bf_prod = ring->prod;
  849 
  850         /* compute end of "dseg" array */
  851         dseg += nr_segs + pad;
  852 
  853         /* pad using zero immediate dseg */
  854         while (pad--) {
  855                 dseg--;
  856                 dseg->addr = 0;
  857                 dseg->lkey = 0;
  858                 wmb();
  859                 dseg->byte_count = SET_BYTE_COUNT((1U << 31)|0);
  860         }
  861 
  862         /* fill segment list */
  863         while (nr_segs--) {
  864                 if (unlikely(segs[nr_segs].ds_len == 0)) {
  865                         dseg--;
  866                         dseg->addr = 0;
  867                         dseg->lkey = 0;
  868                         wmb();
  869                         dseg->byte_count = SET_BYTE_COUNT((1U << 31)|0);
  870                 } else {
  871                         dseg--;
  872                         dseg->addr = cpu_to_be64((uint64_t)segs[nr_segs].ds_addr);
  873                         dseg->lkey = cpu_to_be32(priv->mdev->mr.key);
  874                         wmb();
  875                         dseg->byte_count = SET_BYTE_COUNT((uint32_t)segs[nr_segs].ds_len);
  876                 }
  877         }
  878 
  879         wmb();
  880 
  881         /* write owner bits in reverse order */
  882         if ((opcode & cpu_to_be32(0x1F)) == cpu_to_be32(MLX4_OPCODE_LSO))
  883                 mlx4_en_store_inline_lso_header(dseg_inline, ihs, owner_bit);
  884         else
  885                 mlx4_en_store_inline_header(dseg_inline, ihs, owner_bit);
  886 
  887         /* update producer counter */
  888         ring->prod += tx_info->nr_txbb;
  889 
  890         if (ring->bf_enabled && bf_size <= MAX_BF &&
  891             (tx_desc->ctrl.ins_vlan != MLX4_WQE_CTRL_INS_CVLAN)) {
  892 
  893                 /* store doorbell number */
  894                 *(volatile __be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn);
  895 
  896                 /* or in producer number for this WQE */
  897                 opcode |= cpu_to_be32((bf_prod & 0xffff) << 8);
  898 
  899                 /*
  900                  * Ensure the new descriptor hits memory before
  901                  * setting ownership of this descriptor to HW:
  902                  */
  903                 wmb();
  904                 tx_desc->ctrl.owner_opcode = opcode;
  905                 wmb();
  906                 mlx4_bf_copy(((u8 *)ring->bf.reg) + ring->bf.offset,
  907                      (volatile unsigned long *) &tx_desc->ctrl, bf_size);
  908                 wmb();
  909                 ring->bf.offset ^= ring->bf.buf_size;
  910         } else {
  911                 /*
  912                  * Ensure the new descriptor hits memory before
  913                  * setting ownership of this descriptor to HW:
  914                  */
  915                 wmb();
  916                 tx_desc->ctrl.owner_opcode = opcode;
  917                 wmb();
  918                 writel(cpu_to_be32(ring->doorbell_qpn),
  919                     ((u8 *)ring->bf.uar->map) + MLX4_SEND_DOORBELL);
  920         }
  921 
  922         return (0);
  923 tx_drop:
  924         *mbp = NULL;
  925         m_freem(mb);
  926         return (err);
  927 }
  928 
  929 static int
  930 mlx4_en_transmit_locked(struct ifnet *ifp, int tx_ind, struct mbuf *mb)
  931 {
  932         struct mlx4_en_priv *priv = mlx4_netdev_priv(ifp);
  933         struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind];
  934         int err = 0;
  935 
  936         if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
  937             READ_ONCE(priv->port_up) == 0)) {
  938                 m_freem(mb);
  939                 return (ENETDOWN);
  940         }
  941 
  942         if (mlx4_en_xmit(priv, tx_ind, &mb) != 0) {
  943                 /* NOTE: m_freem() is NULL safe */
  944                 m_freem(mb);
  945                 err = ENOBUFS;
  946                 if (ring->watchdog_time == 0)
  947                         ring->watchdog_time = ticks + MLX4_EN_WATCHDOG_TIMEOUT;
  948         } else {
  949                 ring->watchdog_time = 0;
  950         }
  951         return (err);
  952 }
  953 
  954 int
  955 mlx4_en_transmit(struct ifnet *dev, struct mbuf *m)
  956 {
  957         struct mlx4_en_priv *priv = mlx4_netdev_priv(dev);
  958         struct mlx4_en_tx_ring *ring;
  959         int i, err = 0;
  960 
  961         if (priv->port_up == 0) {
  962                 m_freem(m);
  963                 return (ENETDOWN);
  964         }
  965 
  966         /* Compute which queue to use */
  967         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
  968                 i = (m->m_pkthdr.flowid % 128) % priv->tx_ring_num;
  969         }
  970         else {
  971                 i = mlx4_en_select_queue(dev, m);
  972         }
  973 
  974         ring = priv->tx_ring[i];
  975 
  976         spin_lock(&ring->tx_lock);
  977 
  978         err = mlx4_en_transmit_locked(dev, i, m);
  979         spin_unlock(&ring->tx_lock);
  980 
  981         /* Poll CQ here */
  982         mlx4_en_xmit_poll(priv, i);
  983 
  984 #if __FreeBSD_version >= 1100000
  985         if (unlikely(err != 0))
  986                 if_inc_counter(dev, IFCOUNTER_IQDROPS, 1);
  987 #endif
  988         return (err);
  989 }
  990 
  991 /*
  992  * Flush ring buffers.
  993  */
  994 void
  995 mlx4_en_qflush(struct ifnet *dev)
  996 {
  997         struct mlx4_en_priv *priv = mlx4_netdev_priv(dev);
  998 
  999         if (priv->port_up == 0)
 1000                 return;
 1001 
 1002         if_qflush(dev);
 1003 }

Cache object: 2d552e0ccfc7e560707f8e2337b9fc8e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.