The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/dpdk_rte_lpm/dpdk_lpm.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2020 Alexander V. Chernikov
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 #include "opt_inet.h"
   31 
   32 #include <sys/param.h>
   33 #include <sys/kernel.h>
   34 #include <sys/lock.h>
   35 #include <sys/rmlock.h>
   36 #include <sys/malloc.h>
   37 #include <sys/module.h>
   38 #include <sys/kernel.h>
   39 #include <sys/socket.h>
   40 #include <sys/sysctl.h>
   41 #include <sys/syslog.h>
   42 #include <net/vnet.h>
   43 
   44 #include <net/if.h>
   45 #include <net/if_var.h>
   46 
   47 #include <netinet/in.h>
   48 #include <netinet/in_fib.h>
   49 #include <netinet/ip.h>
   50 
   51 #include <net/route.h>
   52 #include <net/route/nhop.h>
   53 #include <net/route/route_ctl.h>
   54 #include <net/route/fib_algo.h>
   55 
   56 #include "rte_shim.h"
   57 #include "rte_lpm.h"
   58 
   59 #define LPM_MIN_TBL8    8               /* 2 pages of memory */
   60 #define LPM_MAX_TBL8    65536 * 16      /* 256M */
   61 
   62 MALLOC_DECLARE(M_RTABLE);
   63 
   64 struct dpdk_lpm_data {
   65         struct rte_lpm *lpm;
   66         uint64_t routes_added;
   67         uint64_t routes_failed;
   68         uint32_t number_tbl8s;
   69         uint32_t fibnum;
   70         uint8_t hit_tables;
   71         uint8_t hit_records;
   72         struct fib_data *fd;
   73 };
   74 
   75 /*
   76  * Main datapath routing
   77  */
   78 static struct nhop_object *
   79 lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
   80 {
   81         struct rte_lpm *lpm;
   82         const struct rte_lpm_external *rte_ext;
   83         uint32_t nhidx = 0;
   84         int ret;
   85 
   86         lpm = (struct rte_lpm *)algo_data;
   87         rte_ext = (const struct rte_lpm_external *)lpm;
   88 
   89         ret = rte_lpm_lookup(lpm, ntohl(key.addr4.s_addr), &nhidx);
   90         if (ret == 0) {
   91                 /* Success! */
   92                 return (rte_ext->nh_idx[nhidx]);
   93         } else {
   94                 /* Not found. Check default route */
   95                 return (rte_ext->nh_idx[rte_ext->default_idx]);
   96         }
   97 
   98         return (NULL);
   99 }
  100 
  101 static uint8_t
  102 rte_get_pref(const struct rib_rtable_info *rinfo)
  103 {
  104 
  105         if (rinfo->num_prefixes < 10)
  106                 return (1);
  107         else if (rinfo->num_prefixes < 1000)
  108                 return (rinfo->num_prefixes / 10);
  109         else if (rinfo->num_prefixes < 500000)
  110                 return (100 + rinfo->num_prefixes / 3334);
  111         else
  112                 return (250);
  113 }
  114 
  115 static enum flm_op_result
  116 handle_default_change(struct dpdk_lpm_data *dd, struct rib_cmd_info *rc)
  117 {
  118         struct rte_lpm_external *rte_ext;
  119         rte_ext = (struct rte_lpm_external *)dd->lpm;
  120 
  121         if (rc->rc_cmd != RTM_DELETE) {
  122                 /* Reference new */
  123                 uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
  124 
  125                 if (nhidx == 0)
  126                         return (FLM_REBUILD);
  127                 rte_ext->default_idx = nhidx;
  128         } else {
  129                 /* No default route */
  130                 rte_ext->default_idx = 0;
  131         }
  132 
  133         return (FLM_SUCCESS);
  134 }
  135 
  136 static void
  137 get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, int plen,
  138     uint8_t *pplen, uint32_t *nhop_idx)
  139 {
  140         struct rtentry *rt;
  141 
  142         rt = rt_get_inet_parent(dd->fibnum, addr, plen);
  143         if (rt != NULL) {
  144                 struct in_addr addr4;
  145                 uint32_t scopeid;
  146                 int parent_plen;
  147 
  148                 rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
  149                 if (parent_plen > 0) {
  150                         *pplen = parent_plen;
  151                         *nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt));
  152                         return;
  153                 }
  154         }
  155 
  156         *nhop_idx = 0;
  157         *pplen = 0;
  158 }
  159 
  160 static enum flm_op_result
  161 handle_gu_change(struct dpdk_lpm_data *dd, const struct rib_cmd_info *rc,
  162     const struct in_addr addr, int plen)
  163 {
  164         uint32_t nhidx = 0;
  165         int ret;
  166         char abuf[INET_ADDRSTRLEN];
  167         uint32_t ip;
  168 
  169         ip = ntohl(addr.s_addr);
  170         inet_ntop(AF_INET, &addr, abuf, sizeof(abuf));
  171 
  172         /* So we get sin, plen and nhidx */
  173         if (rc->rc_cmd != RTM_DELETE) {
  174                 /*
  175                  * Addition or change. Save nhop in the internal table
  176                  * and get index.
  177                  */
  178                 nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
  179                 if (nhidx == 0) {
  180                         FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild");
  181                         return (FLM_REBUILD);
  182                 }
  183 
  184                 ret = rte_lpm_add(dd->lpm, ip, plen, nhidx);
  185                 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d",
  186                     (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
  187                     abuf, plen,
  188                     rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0,
  189                     nhidx, ret);
  190         } else {
  191                 /*
  192                  * Need to lookup parent. Assume deletion happened already
  193                  */
  194                 uint8_t parent_plen;
  195                 uint32_t parent_nhop_idx;
  196                 get_parent_rule(dd, addr, plen, &parent_plen, &parent_nhop_idx);
  197 
  198                 ret = rte_lpm_delete(dd->lpm, ip, plen, parent_plen, parent_nhop_idx);
  199                 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d -> /%d nhop %u -> %u ret: %d",
  200                     "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old),
  201                     parent_nhop_idx, ret);
  202         }
  203 
  204         if (ret != 0) {
  205                 FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret);
  206                 if (ret == -ENOSPC)
  207                         return (FLM_REBUILD);
  208                 return (FLM_ERROR);
  209         }
  210         return (FLM_SUCCESS);
  211 }
  212 
  213 static enum flm_op_result
  214 handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
  215     void *_data)
  216 {
  217         struct dpdk_lpm_data *dd;
  218         enum flm_op_result ret;
  219         struct in_addr addr4;
  220         uint32_t scopeid;
  221         int plen;
  222 
  223         dd = (struct dpdk_lpm_data *)_data;
  224         rt_get_inet_prefix_plen(rc->rc_rt, &addr4, &plen, &scopeid);
  225 
  226         if (plen != 0)
  227                 ret = handle_gu_change(dd, rc, addr4, plen);
  228         else
  229                 ret = handle_default_change(dd, rc);
  230 
  231         if (ret != 0)
  232                 FIB_PRINTF(LOG_INFO, dd->fd, "error handling route");
  233         return (ret);
  234 }
  235 
  236 static void
  237 destroy_table(void *_data)
  238 {
  239         struct dpdk_lpm_data *dd = (struct dpdk_lpm_data *)_data;
  240 
  241         if (dd->lpm != NULL)
  242                 rte_lpm_free(dd->lpm);
  243         free(dd, M_RTABLE);
  244 }
  245 
  246 static enum flm_op_result
  247 add_route_cb(struct rtentry *rt, void *_data)
  248 {
  249         struct dpdk_lpm_data *dd = (struct dpdk_lpm_data *)_data;
  250         struct nhop_object *nh;
  251         int plen, ret;
  252         struct in_addr addr4;
  253         uint32_t scopeid;
  254 
  255         nh = rt_get_raw_nhop(rt);
  256         rt_get_inet_prefix_plen(rt, &addr4, &plen, &scopeid);
  257 
  258         char abuf[INET_ADDRSTRLEN];
  259         inet_ntop(AF_INET, &addr4, abuf, sizeof(abuf));
  260 
  261         FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen);
  262 
  263         if (plen == 0) {
  264                 struct rib_cmd_info rc = {
  265                         .rc_cmd = RTM_ADD,
  266                         .rc_nh_new = nh,
  267                 };
  268 
  269                 FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route");
  270                 return (handle_default_change(dd, &rc));
  271         }
  272 
  273         uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh);
  274         if (nhidx == 0) {
  275                 FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index");
  276                 return (FLM_REBUILD);
  277         }
  278         ret = rte_lpm_add(dd->lpm, ntohl(addr4.s_addr), plen, nhidx);
  279         FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d",
  280             dd->lpm, abuf, plen, nhidx, ret);
  281 
  282         if (ret != 0) {
  283                 FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm_add() returned %d", ret);
  284                 if (ret == -ENOSPC) {
  285                         dd->hit_tables = 1;
  286                         return (FLM_REBUILD);
  287                 }
  288                 dd->routes_failed++;
  289                 return (FLM_ERROR);
  290         } else
  291                 dd->routes_added++;
  292 
  293         return (FLM_SUCCESS);
  294 }
  295 
  296 static enum flm_op_result
  297 check_dump_success(void *_data, struct fib_dp *dp)
  298 {
  299         struct dpdk_lpm_data *dd;
  300 
  301         dd = (struct dpdk_lpm_data *)_data;
  302 
  303         FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu",
  304             dd->routes_added, dd->routes_failed);
  305         if (dd->hit_tables || dd->routes_failed > 0)
  306                 return (FLM_REBUILD);
  307 
  308         FIB_PRINTF(LOG_INFO, dd->fd,
  309             "DPDK lookup engine synced with IPv4 RIB id %u, %zu routes",
  310             dd->fibnum, dd->routes_added);
  311 
  312         dp->f = lookup_ptr;
  313         dp->arg = dd->lpm;
  314 
  315         return (FLM_SUCCESS);
  316 }
  317 
  318 static void
  319 estimate_scale(const struct dpdk_lpm_data *dd_src, struct dpdk_lpm_data *dd)
  320 {
  321 
  322         /* XXX: update at 75% capacity */
  323         if (dd_src->hit_tables)
  324                 dd->number_tbl8s = dd_src->number_tbl8s * 2;
  325         else
  326                 dd->number_tbl8s = dd_src->number_tbl8s;
  327 
  328         /* TODO: look into the appropriate RIB to adjust */
  329 }
  330 
  331 static struct dpdk_lpm_data *
  332 build_table(struct dpdk_lpm_data *dd_prev, struct fib_data *fd)
  333 {
  334         struct dpdk_lpm_data *dd;
  335         struct rte_lpm *lpm;
  336 
  337         dd = malloc(sizeof(struct dpdk_lpm_data), M_RTABLE, M_NOWAIT | M_ZERO);
  338         if (dd == NULL) {
  339                 FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure");
  340                 return (NULL);
  341         }
  342         dd->fibnum = dd_prev->fibnum;
  343         dd->fd = fd;
  344 
  345         estimate_scale(dd_prev, dd);
  346 
  347         struct rte_lpm_config cfg = {.number_tbl8s = dd->number_tbl8s};
  348         lpm = rte_lpm_create("test", 0, &cfg);
  349         if (lpm == NULL) {
  350                 FIB_PRINTF(LOG_INFO, fd, "unable to create lpm");
  351                 free(dd, M_RTABLE);
  352                 return (NULL);
  353         }
  354         dd->lpm = lpm;
  355         struct rte_lpm_external *ext = (struct rte_lpm_external *)lpm;
  356         ext->nh_idx = fib_get_nhop_array(dd->fd);
  357 
  358         FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s);
  359 
  360         return (dd);
  361 }
  362 
  363 static enum flm_op_result
  364 init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data)
  365 {
  366         struct dpdk_lpm_data *dd, dd_base;
  367 
  368         if (_old_data == NULL) {
  369                 bzero(&dd_base, sizeof(struct dpdk_lpm_data));
  370                 dd_base.fibnum = fibnum;
  371                 /* TODO: get rib statistics */
  372                 dd_base.number_tbl8s = LPM_MIN_TBL8;
  373                 dd = &dd_base;
  374         } else {
  375                 FIB_PRINTF(LOG_DEBUG, fd, "Starting with old data");
  376                 dd = (struct dpdk_lpm_data *)_old_data;
  377         }
  378 
  379         /* Guaranteed to be in epoch */
  380         dd = build_table(dd, fd);
  381         if (dd == NULL) {
  382                 FIB_PRINTF(LOG_NOTICE, fd, "table creation failed");
  383                 return (FLM_REBUILD);
  384         }
  385 
  386         *data = dd;
  387         return (FLM_SUCCESS);
  388 }
  389 
  390 static struct fib_lookup_module dpdk_lpm4 = {
  391         .flm_name = "dpdk_lpm4",
  392         .flm_family = AF_INET,
  393         .flm_init_cb = init_table,
  394         .flm_destroy_cb = destroy_table,
  395         .flm_dump_rib_item_cb = add_route_cb,
  396         .flm_dump_end_cb = check_dump_success,
  397         .flm_change_rib_item_cb = handle_rtable_change_cb,
  398         .flm_get_pref = rte_get_pref,
  399 };
  400 
  401 static int
  402 lpm4_modevent(module_t mod, int type, void *unused)
  403 {
  404         int error = 0;
  405 
  406         switch (type) {
  407         case MOD_LOAD:
  408                 fib_module_register(&dpdk_lpm4);
  409                 break;
  410         case MOD_UNLOAD:
  411                 error = fib_module_unregister(&dpdk_lpm4);
  412                 break;
  413         default:
  414                 error = EOPNOTSUPP;
  415                 break;
  416         }
  417         return (error);
  418 }
  419 
  420 static moduledata_t lpm4mod = {
  421         "dpdk_lpm4",
  422         lpm4_modevent,
  423         0
  424 };
  425 
  426 DECLARE_MODULE(lpm4mod, lpm4mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  427 MODULE_VERSION(lpm4mod, 1);

Cache object: 2394b6d6ded58bf33bf1a579b903bb3e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.