The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgbe/t4_l2t.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2012 Chelsio Communications, Inc.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 
   31 #include "opt_inet.h"
   32 #include "opt_inet6.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/eventhandler.h>
   36 #include <sys/systm.h>
   37 #include <sys/kernel.h>
   38 #include <sys/module.h>
   39 #include <sys/bus.h>
   40 #include <sys/lock.h>
   41 #include <sys/mutex.h>
   42 #include <sys/rwlock.h>
   43 #include <sys/socket.h>
   44 #include <sys/sbuf.h>
   45 #include <netinet/in.h>
   46 
   47 #include "common/common.h"
   48 #include "common/t4_msg.h"
   49 #include "t4_l2t.h"
   50 
   51 /*
   52  * Module locking notes:  There is a RW lock protecting the L2 table as a
   53  * whole plus a spinlock per L2T entry.  Entry lookups and allocations happen
   54  * under the protection of the table lock, individual entry changes happen
   55  * while holding that entry's spinlock.  The table lock nests outside the
   56  * entry locks.  Allocations of new entries take the table lock as writers so
   57  * no other lookups can happen while allocating new entries.  Entry updates
   58  * take the table lock as readers so multiple entries can be updated in
   59  * parallel.  An L2T entry can be dropped by decrementing its reference count
   60  * and therefore can happen in parallel with entry allocation but no entry
   61  * can change state or increment its ref count during allocation as both of
   62  * these perform lookups.
   63  *
   64  * Note: We do not take references to ifnets in this module because both
   65  * the TOE and the sockets already hold references to the interfaces and the
   66  * lifetime of an L2T entry is fully contained in the lifetime of the TOE.
   67  */
   68 
   69 /*
   70  * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
   71  */
   72 struct l2t_entry *
   73 t4_alloc_l2e(struct l2t_data *d)
   74 {
   75         struct l2t_entry *end, *e, **p;
   76 
   77         rw_assert(&d->lock, RA_WLOCKED);
   78 
   79         if (!atomic_load_acq_int(&d->nfree))
   80                 return (NULL);
   81 
   82         /* there's definitely a free entry */
   83         for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e)
   84                 if (atomic_load_acq_int(&e->refcnt) == 0)
   85                         goto found;
   86 
   87         for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e)
   88                 continue;
   89 found:
   90         d->rover = e + 1;
   91         atomic_subtract_int(&d->nfree, 1);
   92 
   93         /*
   94          * The entry we found may be an inactive entry that is
   95          * presently in the hash table.  We need to remove it.
   96          */
   97         if (e->state < L2T_STATE_SWITCHING) {
   98                 for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
   99                         if (*p == e) {
  100                                 *p = e->next;
  101                                 e->next = NULL;
  102                                 break;
  103                         }
  104                 }
  105         }
  106 
  107         e->state = L2T_STATE_UNUSED;
  108         return (e);
  109 }
  110 
  111 static struct l2t_entry *
  112 find_or_alloc_l2e(struct l2t_data *d, uint16_t vlan, uint8_t port, uint8_t *dmac)
  113 {
  114         struct l2t_entry *end, *e, **p;
  115         struct l2t_entry *first_free = NULL;
  116 
  117         for (e = &d->l2tab[0], end = &d->l2tab[d->l2t_size]; e != end; ++e) {
  118                 if (atomic_load_acq_int(&e->refcnt) == 0) {
  119                         if (!first_free)
  120                                 first_free = e;
  121                 } else if (e->state == L2T_STATE_SWITCHING &&
  122                     memcmp(e->dmac, dmac, ETHER_ADDR_LEN) == 0 &&
  123                     e->vlan == vlan && e->lport == port)
  124                         return (e);     /* Found existing entry that matches. */
  125         }
  126 
  127         if (first_free == NULL)
  128                 return (NULL);  /* No match and no room for a new entry. */
  129 
  130         /*
  131          * The entry we found may be an inactive entry that is
  132          * presently in the hash table.  We need to remove it.
  133          */
  134         e = first_free;
  135         if (e->state < L2T_STATE_SWITCHING) {
  136                 for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
  137                         if (*p == e) {
  138                                 *p = e->next;
  139                                 e->next = NULL;
  140                                 break;
  141                         }
  142                 }
  143         }
  144         e->state = L2T_STATE_UNUSED;
  145         return (e);
  146 }
  147 
  148 static void
  149 mk_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync, int reply,
  150     void *dst)
  151 {
  152         struct cpl_l2t_write_req *req;
  153         int idx;
  154 
  155         req = dst;
  156         idx = e->idx + sc->vres.l2t.start;
  157         INIT_TP_WR(req, 0);
  158         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx |
  159             V_SYNC_WR(sync) | V_TID_QID(e->iqid)));
  160         req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!reply));
  161         req->l2t_idx = htons(idx);
  162         req->vlan = htons(e->vlan);
  163         memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
  164 }
  165 
  166 /*
  167  * Write an L2T entry.  Must be called with the entry locked.
  168  * The write may be synchronous or asynchronous.
  169  */
  170 int
  171 t4_write_l2e(struct l2t_entry *e, int sync)
  172 {
  173         struct sge_wrq *wrq;
  174         struct adapter *sc;
  175         struct wrq_cookie cookie;
  176         struct cpl_l2t_write_req *req;
  177 
  178         mtx_assert(&e->lock, MA_OWNED);
  179         MPASS(e->wrq != NULL);
  180 
  181         wrq = e->wrq;
  182         sc = wrq->adapter;
  183 
  184         req = start_wrq_wr(wrq, howmany(sizeof(*req), 16), &cookie);
  185         if (req == NULL)
  186                 return (ENOMEM);
  187 
  188         mk_write_l2e(sc, e, sync, sync, req);
  189 
  190         commit_wrq_wr(wrq, req, &cookie);
  191 
  192         if (sync && e->state != L2T_STATE_SWITCHING)
  193                 e->state = L2T_STATE_SYNC_WRITE;
  194 
  195         return (0);
  196 }
  197 
  198 /*
  199  * Allocate an L2T entry for use by a TLS connection.  These entries are
  200  * associated with a specific VLAN and destination MAC that never changes.
  201  * However, multiple TLS connections might share a single entry.
  202  *
  203  * If a new L2T entry is allocated, a work request to initialize it is
  204  * written to 'txq' and 'ndesc' will be set to 1.  Otherwise, 'ndesc'
  205  * will be set to 0.
  206  *
  207  * To avoid races, separate L2T entries are reserved for individual
  208  * queues since the L2T entry update is written to a txq just prior to
  209  * TLS work requests that will depend on it being written.
  210  */
  211 struct l2t_entry *
  212 t4_l2t_alloc_tls(struct adapter *sc, struct sge_txq *txq, void *dst,
  213     int *ndesc, uint16_t vlan, uint8_t port, uint8_t *eth_addr)
  214 {
  215         struct l2t_data *d;
  216         struct l2t_entry *e;
  217         int i;
  218 
  219         TXQ_LOCK_ASSERT_OWNED(txq);
  220 
  221         d = sc->l2t;
  222         *ndesc = 0;
  223 
  224         rw_rlock(&d->lock);
  225 
  226         /* First, try to find an existing entry. */
  227         for (i = 0; i < d->l2t_size; i++) {
  228                 e = &d->l2tab[i];
  229                 if (e->state != L2T_STATE_TLS)
  230                         continue;
  231                 if (e->vlan == vlan && e->lport == port &&
  232                     e->wrq == (struct sge_wrq *)txq &&
  233                     memcmp(e->dmac, eth_addr, ETHER_ADDR_LEN) == 0) {
  234                         if (atomic_fetchadd_int(&e->refcnt, 1) == 0) {
  235                                 /*
  236                                  * This entry wasn't held but is still
  237                                  * valid, so decrement nfree.
  238                                  */
  239                                 atomic_subtract_int(&d->nfree, 1);
  240                         }
  241                         KASSERT(e->refcnt > 0,
  242                             ("%s: refcount overflow", __func__));
  243                         rw_runlock(&d->lock);
  244                         return (e);
  245                 }
  246         }
  247 
  248         /*
  249          * Don't bother rechecking if the upgrade fails since the txq is
  250          * already locked.
  251          */
  252         if (!rw_try_upgrade(&d->lock)) {
  253                 rw_runlock(&d->lock);
  254                 rw_wlock(&d->lock);
  255         }
  256 
  257         /* Match not found, allocate a new entry. */
  258         e = t4_alloc_l2e(d);
  259         if (e == NULL) {
  260                 rw_wunlock(&d->lock);
  261                 return (e);
  262         }
  263 
  264         /* Initialize the entry. */
  265         e->state = L2T_STATE_TLS;
  266         e->vlan = vlan;
  267         e->lport = port;
  268         e->iqid = sc->sge.fwq.abs_id;
  269         e->wrq = (struct sge_wrq *)txq;
  270         memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
  271         atomic_store_rel_int(&e->refcnt, 1);
  272         rw_wunlock(&d->lock);
  273 
  274         /* Write out the work request. */
  275         *ndesc = howmany(sizeof(struct cpl_l2t_write_req), EQ_ESIZE);
  276         MPASS(*ndesc == 1);
  277         mk_write_l2e(sc, e, 1, 0, dst);
  278 
  279         return (e);
  280 }
  281 
  282 /*
  283  * Allocate an L2T entry for use by a switching rule.  Such need to be
  284  * explicitly freed and while busy they are not on any hash chain, so normal
  285  * address resolution updates do not see them.
  286  */
  287 struct l2t_entry *
  288 t4_l2t_alloc_switching(struct adapter *sc, uint16_t vlan, uint8_t port,
  289     uint8_t *eth_addr)
  290 {
  291         struct l2t_data *d = sc->l2t;
  292         struct l2t_entry *e;
  293         int rc;
  294 
  295         rw_wlock(&d->lock);
  296         e = find_or_alloc_l2e(d, vlan, port, eth_addr);
  297         if (e) {
  298                 if (atomic_load_acq_int(&e->refcnt) == 0) {
  299                         mtx_lock(&e->lock);    /* avoid race with t4_l2t_free */
  300                         e->wrq = &sc->sge.ctrlq[0];
  301                         e->iqid = sc->sge.fwq.abs_id;
  302                         e->state = L2T_STATE_SWITCHING;
  303                         e->vlan = vlan;
  304                         e->lport = port;
  305                         memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
  306                         atomic_store_rel_int(&e->refcnt, 1);
  307                         atomic_subtract_int(&d->nfree, 1);
  308                         rc = t4_write_l2e(e, 0);
  309                         mtx_unlock(&e->lock);
  310                         if (rc != 0)
  311                                 e = NULL;
  312                 } else {
  313                         MPASS(e->vlan == vlan);
  314                         MPASS(e->lport == port);
  315                         atomic_add_int(&e->refcnt, 1);
  316                 }
  317         }
  318         rw_wunlock(&d->lock);
  319         return (e);
  320 }
  321 
  322 int
  323 t4_init_l2t(struct adapter *sc, int flags)
  324 {
  325         int i, l2t_size;
  326         struct l2t_data *d;
  327 
  328         l2t_size = sc->vres.l2t.size;
  329         if (l2t_size < 2)       /* At least 1 bucket for IP and 1 for IPv6 */
  330                 return (EINVAL);
  331 
  332         d = malloc(sizeof(*d) + l2t_size * sizeof (struct l2t_entry), M_CXGBE,
  333             M_ZERO | flags);
  334         if (!d)
  335                 return (ENOMEM);
  336 
  337         d->l2t_size = l2t_size;
  338         d->rover = d->l2tab;
  339         atomic_store_rel_int(&d->nfree, l2t_size);
  340         rw_init(&d->lock, "L2T");
  341 
  342         for (i = 0; i < l2t_size; i++) {
  343                 struct l2t_entry *e = &d->l2tab[i];
  344 
  345                 e->idx = i;
  346                 e->state = L2T_STATE_UNUSED;
  347                 mtx_init(&e->lock, "L2T_E", NULL, MTX_DEF);
  348                 STAILQ_INIT(&e->wr_list);
  349                 atomic_store_rel_int(&e->refcnt, 0);
  350         }
  351 
  352         sc->l2t = d;
  353 
  354         return (0);
  355 }
  356 
  357 int
  358 t4_free_l2t(struct l2t_data *d)
  359 {
  360         int i;
  361 
  362         for (i = 0; i < d->l2t_size; i++)
  363                 mtx_destroy(&d->l2tab[i].lock);
  364         rw_destroy(&d->lock);
  365         free(d, M_CXGBE);
  366 
  367         return (0);
  368 }
  369 
  370 int
  371 do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
  372     struct mbuf *m)
  373 {
  374         const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
  375         unsigned int tid = GET_TID(rpl);
  376         unsigned int idx = tid % L2T_SIZE;
  377 
  378         if (__predict_false(rpl->status != CPL_ERR_NONE)) {
  379                 log(LOG_ERR,
  380                     "Unexpected L2T_WRITE_RPL (%u) for entry at hw_idx %u\n",
  381                     rpl->status, idx);
  382                 return (EINVAL);
  383         }
  384 
  385         return (0);
  386 }
  387 
  388 static inline unsigned int
  389 vlan_prio(const struct l2t_entry *e)
  390 {
  391         return e->vlan >> 13;
  392 }
  393 
  394 static char
  395 l2e_state(const struct l2t_entry *e)
  396 {
  397         switch (e->state) {
  398         case L2T_STATE_VALID: return 'V';  /* valid, fast-path entry */
  399         case L2T_STATE_STALE: return 'S';  /* needs revalidation, but usable */
  400         case L2T_STATE_SYNC_WRITE: return 'W';
  401         case L2T_STATE_RESOLVING: return STAILQ_EMPTY(&e->wr_list) ? 'R' : 'A';
  402         case L2T_STATE_SWITCHING: return 'X';
  403         case L2T_STATE_TLS: return 'T';
  404         default: return 'U';
  405         }
  406 }
  407 
  408 int
  409 sysctl_l2t(SYSCTL_HANDLER_ARGS)
  410 {
  411         struct adapter *sc = arg1;
  412         struct l2t_data *l2t = sc->l2t;
  413         struct l2t_entry *e;
  414         struct sbuf *sb;
  415         int rc, i, header = 0;
  416         char ip[INET6_ADDRSTRLEN];
  417 
  418         if (l2t == NULL)
  419                 return (ENXIO);
  420 
  421         rc = sysctl_wire_old_buffer(req, 0);
  422         if (rc != 0)
  423                 return (rc);
  424 
  425         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
  426         if (sb == NULL)
  427                 return (ENOMEM);
  428 
  429         e = &l2t->l2tab[0];
  430         for (i = 0; i < l2t->l2t_size; i++, e++) {
  431                 mtx_lock(&e->lock);
  432                 if (e->state == L2T_STATE_UNUSED)
  433                         goto skip;
  434 
  435                 if (header == 0) {
  436                         sbuf_printf(sb, " Idx IP address      "
  437                             "Ethernet address  VLAN/P LP State Users Port");
  438                         header = 1;
  439                 }
  440                 if (e->state >= L2T_STATE_SWITCHING)
  441                         ip[0] = 0;
  442                 else {
  443                         inet_ntop(e->ipv6 ? AF_INET6 : AF_INET, &e->addr[0],
  444                             &ip[0], sizeof(ip));
  445                 }
  446 
  447                 /*
  448                  * XXX: IPv6 addresses may not align properly in the output.
  449                  */
  450                 sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
  451                            " %u %2u   %c   %5u %s",
  452                            e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
  453                            e->dmac[3], e->dmac[4], e->dmac[5],
  454                            e->vlan & 0xfff, vlan_prio(e), e->lport,
  455                            l2e_state(e), atomic_load_acq_int(&e->refcnt),
  456                            e->ifp ? e->ifp->if_xname : "-");
  457 skip:
  458                 mtx_unlock(&e->lock);
  459         }
  460 
  461         rc = sbuf_finish(sb);
  462         sbuf_delete(sb);
  463 
  464         return (rc);
  465 }

Cache object: 89528737dd9cd325f1a89dcc5b40a9f7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.