The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2020 Alexander V. Chernikov
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 #include "opt_inet6.h"
   31 
   32 #include <sys/param.h>
   33 #include <sys/kernel.h>
   34 #include <sys/lock.h>
   35 #include <sys/rmlock.h>
   36 #include <sys/malloc.h>
   37 #include <sys/module.h>
   38 #include <sys/kernel.h>
   39 #include <sys/socket.h>
   40 #include <sys/sysctl.h>
   41 #include <sys/syslog.h>
   42 #include <net/vnet.h>
   43 
   44 #include <net/if.h>
   45 #include <net/if_var.h>
   46 
   47 #include <netinet/in.h>
   48 #include <netinet/ip.h>
   49 #include <netinet/ip6.h>
   50 #include <netinet6/ip6_var.h>
   51 #include <netinet6/in6_fib.h>
   52 
   53 #include <net/route.h>
   54 #include <net/route/nhop.h>
   55 #include <net/route/route_ctl.h>
   56 #include <net/route/fib_algo.h>
   57 #define RTDEBUG
   58 
   59 #include "rte_lpm6.h"
   60 
   61 #define LPM6_MIN_TBL8   8               /* 2 pages of memory */
   62 #define LPM6_MAX_TBL8   65536 * 16      /* 256M */
   63 
   64 struct fib_algo_calldata {
   65         void *lookup;
   66         void *arg;
   67 };
   68 
   69 struct dpdk_lpm6_data {
   70         struct rte_lpm6 *lpm6;
   71         uint64_t routes_added;
   72         uint64_t routes_failed;
   73         uint32_t number_tbl8s;
   74         uint32_t fibnum;
   75         uint8_t hit_tables;
   76         struct fib_data *fd;
   77 };
   78 
   79 static struct nhop_object *
   80 lookup_ptr_ll(const struct rte_lpm6 *lpm6, const struct in6_addr *dst6,
   81     uint32_t scopeid)
   82 {
   83         const struct rte_lpm6_external *rte_ext;
   84 
   85         rte_ext = (const struct rte_lpm6_external *)lpm6;
   86 
   87         return (fib6_radix_lookup_nh(rte_ext->fibnum, dst6, scopeid));
   88 }
   89 
   90 /*
   91  * Main datapath routing
   92  */
   93 static struct nhop_object *
   94 lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
   95 {
   96         const struct rte_lpm6 *lpm6;
   97         const struct rte_lpm6_external *rte_ext;
   98         const struct in6_addr *addr6;
   99         uint32_t nhidx = 0;
  100         int ret;
  101 
  102         lpm6 = (const struct rte_lpm6 *)algo_data;
  103         addr6 = key.addr6;
  104         rte_ext = (const struct rte_lpm6_external *)lpm6;
  105 
  106         if (!IN6_IS_SCOPE_LINKLOCAL(addr6)) {
  107                 ret = rte_lpm6_lookup(lpm6, (const uint8_t *)addr6, &nhidx);
  108                 if (ret == 0) {
  109                         /* Success! */
  110                         return (rte_ext->nh_idx[nhidx]);
  111                 } else {
  112                         /* Not found. Check default route */
  113                         if (rte_ext->default_idx > 0)
  114                                 return (rte_ext->nh_idx[rte_ext->default_idx]);
  115                         else
  116                                 return (NULL);
  117                 }
  118         } else {
  119                 /* LL */
  120                 return (lookup_ptr_ll(lpm6, addr6, scopeid));
  121         }
  122 }
  123 
  124 static uint8_t
  125 rte6_get_pref(const struct rib_rtable_info *rinfo)
  126 {
  127 
  128         if (rinfo->num_prefixes < 10)
  129                 return (1);
  130         else if (rinfo->num_prefixes < 1000)
  131                 return (rinfo->num_prefixes / 10);
  132         else if (rinfo->num_prefixes < 100000)
  133                 return (100 + rinfo->num_prefixes / 667);
  134         else
  135                 return (250);
  136 }
  137 
  138 static enum flm_op_result
  139 handle_default_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc)
  140 {
  141         struct rte_lpm6_external *rte_ext;
  142         rte_ext = (struct rte_lpm6_external *)dd->lpm6;
  143 
  144         if (rc->rc_cmd != RTM_DELETE) {
  145                 /* Reference new */
  146                 uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
  147 
  148                 if (nhidx == 0)
  149                         return (FLM_REBUILD);
  150                 rte_ext->default_idx = nhidx;
  151         } else {
  152                 /* No default route */
  153                 rte_ext->default_idx = 0;
  154         }
  155 
  156         return (FLM_SUCCESS);
  157 }
  158 
  159 static enum flm_op_result
  160 handle_ll_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc,
  161     const struct in6_addr addr6, int plen, uint32_t scopeid)
  162 {
  163 
  164         return (FLM_SUCCESS);
  165 }
  166 
  167 static struct rte_lpm6_rule *
  168 pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, int plen,
  169     int *pplen, uint32_t *pnhop_idx, char *buffer)
  170 {
  171         struct rte_lpm6_rule *lsp_rule = NULL;
  172         struct rtentry *rt;
  173 
  174         *pnhop_idx = 0;
  175         *pplen = 0;
  176 
  177         rt = rt_get_inet6_parent(dd->fibnum, addr6, plen);
  178         /* plen = 0 means default route and it's out of scope */
  179         if (rt != NULL) {
  180                 uint32_t nhop_idx, scopeid;
  181                 struct in6_addr new_addr6;
  182                 rt_get_inet6_prefix_plen(rt, &new_addr6, &plen, &scopeid);
  183                 if (plen > 0) {
  184                         nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt));
  185                         lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhop_idx);
  186                         *pnhop_idx = nhop_idx;
  187                         *pplen = plen;
  188                 }
  189         }
  190 
  191         return (lsp_rule);
  192 }
  193 
  194 static enum flm_op_result
  195 handle_gu_change(struct dpdk_lpm6_data *dd, const struct rib_cmd_info *rc,
  196     const struct in6_addr *addr6, int plen)
  197 {
  198         int ret;
  199         char abuf[INET6_ADDRSTRLEN];
  200         inet_ntop(AF_INET6, addr6, abuf, sizeof(abuf));
  201 
  202         /* So we get sin6, plen and nhidx */
  203         if (rc->rc_cmd != RTM_DELETE) {
  204                 /*
  205                  * Addition or change. Save nhop in the internal table
  206                  * and get index.
  207                  */
  208                 uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
  209                 if (nhidx == 0) {
  210                         FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild");
  211                         return (FLM_REBUILD);
  212                 }
  213 
  214                 ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6,
  215                                    plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0);
  216                 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d",
  217                     (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
  218                     abuf, plen,
  219                     rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0,
  220                     nhidx, ret);
  221         } else {
  222                 /*
  223                  * Need to lookup parent. Assume deletion happened already
  224                  */
  225                 char buffer[RTE_LPM6_RULE_SIZE];
  226                 struct rte_lpm6_rule *lsp_rule = NULL;
  227                 int parent_plen;
  228                 uint32_t parent_nhop_idx;
  229                 lsp_rule = pack_parent_rule(dd, addr6, plen, &parent_plen,
  230                     &parent_nhop_idx, buffer);
  231 
  232                 ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule);
  233                 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d -> /%d nhop %u -> %u ret: %d",
  234                     "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old),
  235                     parent_nhop_idx, ret);
  236         }
  237 
  238         if (ret != 0) {
  239                 FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret);
  240                 if (ret == -ENOSPC)
  241                         return (FLM_REBUILD);
  242                 return (FLM_ERROR);
  243         }
  244         return (FLM_SUCCESS);
  245 }
  246 
  247 static enum flm_op_result
  248 handle_any_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc)
  249 {
  250         enum flm_op_result ret;
  251         struct in6_addr addr6;
  252         uint32_t scopeid;
  253         int plen;
  254 
  255         rt_get_inet6_prefix_plen(rc->rc_rt, &addr6, &plen, &scopeid);
  256 
  257         if (IN6_IS_SCOPE_LINKLOCAL(&addr6))
  258                 ret = handle_ll_change(dd, rc, addr6, plen, scopeid);
  259         else if (plen == 0)
  260                 ret = handle_default_change(dd, rc);
  261         else
  262                 ret = handle_gu_change(dd, rc, &addr6, plen);
  263 
  264         if (ret != 0)
  265                 FIB_PRINTF(LOG_INFO, dd->fd, "error handling route");
  266         return (ret);
  267 }
  268 
  269 static enum flm_op_result
  270 handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
  271     void *_data)
  272 {
  273         struct dpdk_lpm6_data *dd;
  274 
  275         dd = (struct dpdk_lpm6_data *)_data;
  276 
  277         return (handle_any_change(dd, rc));
  278 }
  279 
  280 static void
  281 destroy_dd(struct dpdk_lpm6_data *dd)
  282 {
  283 
  284         FIB_PRINTF(LOG_INFO, dd->fd, "destroy dd %p", dd);
  285         if (dd->lpm6 != NULL)
  286                 rte_lpm6_free(dd->lpm6);
  287         free(dd, M_TEMP);
  288 }
  289 
  290 static void
  291 destroy_table(void *_data)
  292 {
  293 
  294         destroy_dd((struct dpdk_lpm6_data *)_data);
  295 }
  296 
  297 static enum flm_op_result
  298 add_route_cb(struct rtentry *rt, void *_data)
  299 {
  300         struct dpdk_lpm6_data *dd = (struct dpdk_lpm6_data *)_data;
  301         struct in6_addr addr6;
  302         struct nhop_object *nh;
  303         uint32_t scopeid;
  304         int plen;
  305         int ret;
  306 
  307         rt_get_inet6_prefix_plen(rt, &addr6, &plen, &scopeid);
  308         nh = rt_get_raw_nhop(rt);
  309 
  310         if (IN6_IS_SCOPE_LINKLOCAL(&addr6)) {
  311 
  312                 /*
  313                  * We don't operate on LL directly, however
  314                  * reference them to maintain guarantee on
  315                  * ability to refcount nhops in epoch.
  316                  */
  317                 fib_get_nhop_idx(dd->fd, nh);
  318                 return (FLM_SUCCESS);
  319         }
  320 
  321         char abuf[INET6_ADDRSTRLEN];
  322         inet_ntop(AF_INET6, &addr6, abuf, sizeof(abuf));
  323         FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen);
  324 
  325         if (plen == 0) {
  326                 struct rib_cmd_info rc = {
  327                         .rc_cmd = RTM_ADD,
  328                         .rc_nh_new = nh,
  329                 };
  330 
  331                 FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route");
  332                 return (handle_default_change(dd, &rc));
  333         }
  334 
  335         uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh);
  336         if (nhidx == 0) {
  337                 FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index");
  338                 return (FLM_REBUILD);
  339         }
  340         ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)&addr6, plen, nhidx, 1);
  341         FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d",
  342             dd->lpm6, abuf, plen, nhidx, ret);
  343 
  344         if (ret != 0) {
  345                 FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm6_add() returned %d", ret);
  346                 if (ret == -ENOSPC) {
  347                         dd->hit_tables = 1;
  348                         return (FLM_REBUILD);
  349                 }
  350                 dd->routes_failed++;
  351                 return (FLM_ERROR);
  352         } else
  353                 dd->routes_added++;
  354 
  355         return (FLM_SUCCESS);
  356 }
  357 
  358 static enum flm_op_result
  359 check_dump_success(void *_data, struct fib_dp *dp)
  360 {
  361         struct dpdk_lpm6_data *dd;
  362 
  363         dd = (struct dpdk_lpm6_data *)_data;
  364 
  365         FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu",
  366             dd->routes_added, dd->routes_failed);
  367         if (dd->hit_tables || dd->routes_failed > 0)
  368                 return (FLM_REBUILD);
  369 
  370         FIB_PRINTF(LOG_INFO, dd->fd,
  371             "DPDK lookup engine synced with IPv6 RIB id %u, %zu routes",
  372             dd->fibnum, dd->routes_added);
  373 
  374         dp->f = lookup_ptr;
  375         dp->arg = dd->lpm6;
  376 
  377         return (FLM_SUCCESS);
  378 }
  379 
  380 static void
  381 estimate_scale(const struct dpdk_lpm6_data *dd_src, struct dpdk_lpm6_data *dd)
  382 {
  383 
  384         /* XXX: update at 75% capacity */
  385         if (dd_src->hit_tables)
  386                 dd->number_tbl8s = dd_src->number_tbl8s * 2;
  387         else
  388                 dd->number_tbl8s = dd_src->number_tbl8s;
  389 
  390         /* TODO: look into the appropriate RIB to adjust */
  391 }
  392 
  393 static struct dpdk_lpm6_data *
  394 build_table(struct dpdk_lpm6_data *dd_prev, struct fib_data *fd)
  395 {
  396         struct dpdk_lpm6_data *dd;
  397         struct rte_lpm6 *lpm6;
  398 
  399         dd = malloc(sizeof(struct dpdk_lpm6_data), M_TEMP, M_NOWAIT | M_ZERO);
  400         if (dd == NULL) {
  401                 FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure");
  402                 return (NULL);
  403         }
  404         dd->fibnum = dd_prev->fibnum;
  405         dd->fd = fd;
  406 
  407         estimate_scale(dd_prev, dd);
  408 
  409         struct rte_lpm6_config cfg = {.number_tbl8s = dd->number_tbl8s};
  410         lpm6 = rte_lpm6_create("test", 0, &cfg);
  411         if (lpm6 == NULL) {
  412                 FIB_PRINTF(LOG_INFO, fd, "unable to create lpm6");
  413                 free(dd, M_TEMP);
  414                 return (NULL);
  415         }
  416         dd->lpm6 = lpm6;
  417         struct rte_lpm6_external *ext = (struct rte_lpm6_external *)lpm6;
  418         ext->nh_idx = fib_get_nhop_array(dd->fd);
  419 
  420         FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s);
  421 
  422         return (dd);
  423 }
  424 
  425 static enum flm_op_result
  426 init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data)
  427 {
  428         struct dpdk_lpm6_data *dd, dd_base;
  429 
  430         if (_old_data == NULL) {
  431                 bzero(&dd_base, sizeof(struct dpdk_lpm6_data));
  432                 dd_base.fibnum = fibnum;
  433                 /* TODO: get rib statistics */
  434                 dd_base.number_tbl8s = LPM6_MIN_TBL8;
  435                 dd = &dd_base;
  436         } else {
  437                 FIB_PRINTF(LOG_INFO, fd, "Starting with old data");
  438                 dd = (struct dpdk_lpm6_data *)_old_data;
  439         }
  440 
  441         /* Guaranteed to be in epoch */
  442         dd = build_table(dd, fd);
  443         if (dd == NULL) {
  444                 FIB_PRINTF(LOG_INFO, fd, "table creation failed");
  445                 return (FLM_REBUILD);
  446         }
  447 
  448         *data = dd;
  449         return (FLM_SUCCESS);
  450 }
  451 
  452 static struct fib_lookup_module dpdk_lpm6 = {
  453         .flm_name = "dpdk_lpm6",
  454         .flm_family = AF_INET6,
  455         .flm_init_cb = init_table,
  456         .flm_destroy_cb = destroy_table,
  457         .flm_dump_rib_item_cb = add_route_cb,
  458         .flm_dump_end_cb = check_dump_success,
  459         .flm_change_rib_item_cb = handle_rtable_change_cb,
  460         .flm_get_pref = rte6_get_pref,
  461 };
  462 
  463 static int
  464 lpm6_modevent(module_t mod, int type, void *unused)
  465 {
  466         int error = 0;
  467 
  468         switch (type) {
  469         case MOD_LOAD:
  470                 fib_module_register(&dpdk_lpm6);
  471                 break;
  472         case MOD_UNLOAD:
  473                 error = fib_module_unregister(&dpdk_lpm6);
  474                 break;
  475         default:
  476                 error = EOPNOTSUPP;
  477                 break;
  478         }
  479         return (error);
  480 }
  481 
  482 static moduledata_t lpm6mod = {
  483         "dpdk_lpm6",
  484         lpm6_modevent,
  485         0
  486 };
  487 
  488 DECLARE_MODULE(lpm6mod, lpm6mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  489 MODULE_VERSION(lpm6mod, 1);

Cache object: 50ddc2d1b7f8cf09fdfce720782b2af9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.