The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/flowtable.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2008-2010, BitGravity Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the BitGravity Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15 
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include "opt_route.h"
   31 #include "opt_mpath.h"
   32 #include "opt_ddb.h"
   33 #include "opt_inet.h"
   34 #include "opt_inet6.h"
   35 
   36 #include <sys/cdefs.h>
   37 __FBSDID("$FreeBSD: releng/9.1/sys/net/flowtable.c 232292 2012-02-29 09:47:26Z bz $");
   38 
   39 #include <sys/param.h>  
   40 #include <sys/types.h>
   41 #include <sys/bitstring.h>
   42 #include <sys/condvar.h>
   43 #include <sys/callout.h>
   44 #include <sys/kernel.h>  
   45 #include <sys/kthread.h>
   46 #include <sys/limits.h>
   47 #include <sys/malloc.h>
   48 #include <sys/mbuf.h>
   49 #include <sys/proc.h>
   50 #include <sys/sbuf.h>
   51 #include <sys/sched.h>
   52 #include <sys/smp.h>
   53 #include <sys/socket.h>
   54 #include <sys/syslog.h>
   55 #include <sys/sysctl.h>
   56 
   57 #include <net/if.h>
   58 #include <net/if_llatbl.h>
   59 #include <net/if_var.h>
   60 #include <net/route.h> 
   61 #include <net/flowtable.h>
   62 #include <net/vnet.h>
   63 
   64 #include <netinet/in.h>
   65 #include <netinet/in_systm.h>
   66 #include <netinet/in_var.h>
   67 #include <netinet/if_ether.h>
   68 #include <netinet/ip.h>
   69 #ifdef INET6
   70 #include <netinet/ip6.h>
   71 #endif
   72 #include <netinet/tcp.h>
   73 #include <netinet/udp.h>
   74 #include <netinet/sctp.h>
   75 
   76 #include <libkern/jenkins.h>
   77 #include <ddb/ddb.h>
   78 
   79 struct ipv4_tuple {
   80         uint16_t        ip_sport;       /* source port */
   81         uint16_t        ip_dport;       /* destination port */
   82         in_addr_t       ip_saddr;       /* source address */
   83         in_addr_t       ip_daddr;       /* destination address */
   84 };
   85 
   86 union ipv4_flow {
   87         struct ipv4_tuple ipf_ipt;
   88         uint32_t        ipf_key[3];
   89 };
   90 
   91 struct ipv6_tuple {
   92         uint16_t        ip_sport;       /* source port */
   93         uint16_t        ip_dport;       /* destination port */
   94         struct in6_addr ip_saddr;       /* source address */
   95         struct in6_addr ip_daddr;       /* destination address */
   96 };
   97 
   98 union ipv6_flow {
   99         struct ipv6_tuple ipf_ipt;
  100         uint32_t        ipf_key[9];
  101 };
  102 
  103 struct flentry {
  104         volatile uint32_t       f_fhash;        /* hash flowing forward */
  105         uint16_t                f_flags;        /* flow flags */
  106         uint8_t                 f_pad;          
  107         uint8_t                 f_proto;        /* protocol */
  108         uint32_t                f_fibnum;       /* fib index */
  109         uint32_t                f_uptime;       /* uptime at last access */
  110         struct flentry          *f_next;        /* pointer to collision entry */
  111         volatile struct rtentry *f_rt;          /* rtentry for flow */
  112         volatile struct llentry *f_lle;         /* llentry for flow */
  113 };
  114 
  115 struct flentry_v4 {
  116         struct flentry  fl_entry;
  117         union ipv4_flow fl_flow;
  118 };
  119 
  120 struct flentry_v6 {
  121         struct flentry  fl_entry;
  122         union ipv6_flow fl_flow;
  123 };
  124 
  125 #define fl_fhash        fl_entry.fl_fhash
  126 #define fl_flags        fl_entry.fl_flags
  127 #define fl_proto        fl_entry.fl_proto
  128 #define fl_uptime       fl_entry.fl_uptime
  129 #define fl_rt           fl_entry.fl_rt
  130 #define fl_lle          fl_entry.fl_lle
  131 
  132 #define SECS_PER_HOUR           3600
  133 #define SECS_PER_DAY            (24*SECS_PER_HOUR)
  134 
  135 #define SYN_IDLE                300
  136 #define UDP_IDLE                300
  137 #define FIN_WAIT_IDLE           600
  138 #define TCP_IDLE                SECS_PER_DAY
  139 
  140 
  141 typedef void fl_lock_t(struct flowtable *, uint32_t);
  142 typedef void fl_rtalloc_t(struct route *, uint32_t, u_int);
  143 
  144 union flentryp {
  145         struct flentry          **global;
  146         struct flentry          **pcpu[MAXCPU];
  147 };
  148 
  149 struct flowtable_stats {
  150         uint64_t        ft_collisions;
  151         uint64_t        ft_allocated;
  152         uint64_t        ft_misses;
  153         uint64_t        ft_max_depth;
  154         uint64_t        ft_free_checks;
  155         uint64_t        ft_frees;
  156         uint64_t        ft_hits;
  157         uint64_t        ft_lookups;
  158 } __aligned(CACHE_LINE_SIZE);
  159 
  160 struct flowtable {
  161         struct  flowtable_stats ft_stats[MAXCPU];
  162         int             ft_size;
  163         int             ft_lock_count;
  164         uint32_t        ft_flags;
  165         char            *ft_name;
  166         fl_lock_t       *ft_lock;
  167         fl_lock_t       *ft_unlock;
  168         fl_rtalloc_t    *ft_rtalloc;
  169         /*
  170          * XXX need to pad out 
  171          */ 
  172         struct mtx      *ft_locks;
  173         union flentryp  ft_table;
  174         bitstr_t        *ft_masks[MAXCPU];
  175         bitstr_t        *ft_tmpmask;
  176         struct flowtable *ft_next;
  177 
  178         uint32_t        ft_count __aligned(CACHE_LINE_SIZE);
  179         uint32_t        ft_udp_idle __aligned(CACHE_LINE_SIZE);
  180         uint32_t        ft_fin_wait_idle;
  181         uint32_t        ft_syn_idle;
  182         uint32_t        ft_tcp_idle;
  183         boolean_t       ft_full;
  184 } __aligned(CACHE_LINE_SIZE);
  185 
  186 static struct proc *flowcleanerproc;
  187 static VNET_DEFINE(struct flowtable *, flow_list_head);
  188 static VNET_DEFINE(uint32_t, flow_hashjitter);
  189 static VNET_DEFINE(uma_zone_t, flow_ipv4_zone);
  190 static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
  191 
  192 #define V_flow_list_head        VNET(flow_list_head)
  193 #define V_flow_hashjitter       VNET(flow_hashjitter)
  194 #define V_flow_ipv4_zone        VNET(flow_ipv4_zone)
  195 #define V_flow_ipv6_zone        VNET(flow_ipv6_zone)
  196 
  197 
  198 static struct cv        flowclean_f_cv;
  199 static struct cv        flowclean_c_cv;
  200 static struct mtx       flowclean_lock;
  201 static uint32_t         flowclean_cycles;
  202 static uint32_t         flowclean_freq;
  203 
  204 #ifdef FLOWTABLE_DEBUG
  205 #define FLDPRINTF(ft, flags, fmt, ...)          \
  206 do {                                            \
  207         if ((ft)->ft_flags & (flags))           \
  208                 printf((fmt), __VA_ARGS__);     \
  209 } while (0);                                    \
  210 
  211 #else
  212 #define FLDPRINTF(ft, flags, fmt, ...)
  213 
  214 #endif
  215 
  216 
  217 /*
  218  * TODO:
  219  * - Make flowtable stats per-cpu, aggregated at sysctl call time,
  220  *   to avoid extra cache evictions caused by incrementing a shared
  221  *   counter
  222  * - add sysctls to resize && flush flow tables 
  223  * - Add per flowtable sysctls for statistics and configuring timeouts
  224  * - add saturation counter to rtentry to support per-packet load-balancing
  225  *   add flag to indicate round-robin flow, add list lookup from head
  226      for flows
  227  * - add sysctl / device node / syscall to support exporting and importing
  228  *   of flows with flag to indicate that a flow was imported so should
  229  *   not be considered for auto-cleaning
  230  * - support explicit connection state (currently only ad-hoc for DSR)
  231  * - idetach() cleanup for options VIMAGE builds.
  232  */
  233 VNET_DEFINE(int, flowtable_enable) = 1;
  234 static VNET_DEFINE(int, flowtable_debug);
  235 static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
  236 static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
  237 static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
  238 static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
  239 static VNET_DEFINE(int, flowtable_nmbflows);
  240 static VNET_DEFINE(int, flowtable_ready) = 0;
  241 
  242 #define V_flowtable_enable              VNET(flowtable_enable)
  243 #define V_flowtable_debug               VNET(flowtable_debug)
  244 #define V_flowtable_syn_expire          VNET(flowtable_syn_expire)
  245 #define V_flowtable_udp_expire          VNET(flowtable_udp_expire)
  246 #define V_flowtable_fin_wait_expire     VNET(flowtable_fin_wait_expire)
  247 #define V_flowtable_tcp_expire          VNET(flowtable_tcp_expire)
  248 #define V_flowtable_nmbflows            VNET(flowtable_nmbflows)
  249 #define V_flowtable_ready               VNET(flowtable_ready)
  250 
  251 SYSCTL_NODE(_net_inet, OID_AUTO, flowtable, CTLFLAG_RD, NULL, "flowtable");
  252 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, debug, CTLFLAG_RW,
  253     &VNET_NAME(flowtable_debug), 0, "print debug info.");
  254 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, enable, CTLFLAG_RW,
  255     &VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
  256 
  257 /*
  258  * XXX This does not end up updating timeouts at runtime
  259  * and only reflects the value for the last table added :-/
  260  */
  261 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, syn_expire, CTLFLAG_RW,
  262     &VNET_NAME(flowtable_syn_expire), 0,
  263     "seconds after which to remove syn allocated flow.");
  264 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, udp_expire, CTLFLAG_RW,
  265     &VNET_NAME(flowtable_udp_expire), 0,
  266     "seconds after which to remove flow allocated to UDP.");
  267 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, fin_wait_expire, CTLFLAG_RW,
  268     &VNET_NAME(flowtable_fin_wait_expire), 0,
  269     "seconds after which to remove a flow in FIN_WAIT.");
  270 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, tcp_expire, CTLFLAG_RW,
  271     &VNET_NAME(flowtable_tcp_expire), 0,
  272     "seconds after which to remove flow allocated to a TCP connection.");
  273 
  274 
  275 /*
  276  * Maximum number of flows that can be allocated of a given type.
  277  *
  278  * The table is allocated at boot time (for the pure caching case
  279  * there is no reason why this could not be changed at runtime)
  280  * and thus (currently) needs to be set with a tunable.
  281  */
  282 static int
  283 sysctl_nmbflows(SYSCTL_HANDLER_ARGS)
  284 {
  285         int error, newnmbflows;
  286 
  287         newnmbflows = V_flowtable_nmbflows;
  288         error = sysctl_handle_int(oidp, &newnmbflows, 0, req); 
  289         if (error == 0 && req->newptr) {
  290                 if (newnmbflows > V_flowtable_nmbflows) {
  291                         V_flowtable_nmbflows = newnmbflows;
  292                         uma_zone_set_max(V_flow_ipv4_zone,
  293                             V_flowtable_nmbflows);
  294                         uma_zone_set_max(V_flow_ipv6_zone,
  295                             V_flowtable_nmbflows);
  296                 } else
  297                         error = EINVAL;
  298         }
  299         return (error);
  300 }
  301 SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
  302     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_nmbflows, "IU",
  303     "Maximum number of flows allowed");
  304 
  305 
  306 
  307 #define FS_PRINT(sb, field)     sbuf_printf((sb), "\t%s: %jd\n", #field, fs->ft_##field)
  308 
  309 static void
  310 fs_print(struct sbuf *sb, struct flowtable_stats *fs)
  311 {
  312 
  313         FS_PRINT(sb, collisions);
  314         FS_PRINT(sb, allocated);
  315         FS_PRINT(sb, misses);
  316         FS_PRINT(sb, max_depth);
  317         FS_PRINT(sb, free_checks);
  318         FS_PRINT(sb, frees);
  319         FS_PRINT(sb, hits);
  320         FS_PRINT(sb, lookups);
  321 }
  322 
  323 static void
  324 flowtable_show_stats(struct sbuf *sb, struct flowtable *ft)
  325 {
  326         int i;
  327         struct flowtable_stats fs, *pfs;
  328 
  329         if (ft->ft_flags & FL_PCPU) {
  330                 bzero(&fs, sizeof(fs));
  331                 pfs = &fs;
  332                 CPU_FOREACH(i) {
  333                         pfs->ft_collisions  += ft->ft_stats[i].ft_collisions;
  334                         pfs->ft_allocated   += ft->ft_stats[i].ft_allocated;
  335                         pfs->ft_misses      += ft->ft_stats[i].ft_misses;
  336                         pfs->ft_free_checks += ft->ft_stats[i].ft_free_checks;
  337                         pfs->ft_frees       += ft->ft_stats[i].ft_frees;
  338                         pfs->ft_hits        += ft->ft_stats[i].ft_hits;
  339                         pfs->ft_lookups     += ft->ft_stats[i].ft_lookups;
  340                         if (ft->ft_stats[i].ft_max_depth > pfs->ft_max_depth)
  341                                 pfs->ft_max_depth = ft->ft_stats[i].ft_max_depth;
  342                 }
  343         } else {
  344                 pfs = &ft->ft_stats[0];
  345         }
  346         fs_print(sb, pfs);
  347 }
  348 
  349 static int
  350 sysctl_flowtable_stats(SYSCTL_HANDLER_ARGS)
  351 {
  352         struct flowtable *ft;
  353         struct sbuf *sb;
  354         int error;
  355 
  356         sb = sbuf_new(NULL, NULL, 64*1024, SBUF_FIXEDLEN);
  357 
  358         ft = V_flow_list_head;
  359         while (ft != NULL) {
  360                 sbuf_printf(sb, "\ntable name: %s\n", ft->ft_name);
  361                 flowtable_show_stats(sb, ft);
  362                 ft = ft->ft_next;
  363         }
  364         sbuf_finish(sb);
  365         error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
  366         sbuf_delete(sb);
  367 
  368         return (error);
  369 }
  370 SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
  371     NULL, 0, sysctl_flowtable_stats, "A", "flowtable statistics");
  372 
  373 
  374 #ifndef RADIX_MPATH
  375 static void
  376 rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
  377 {
  378 
  379         rtalloc_ign_fib(ro, 0, fibnum);
  380 }
  381 #endif
  382 
  383 static void
  384 flowtable_global_lock(struct flowtable *table, uint32_t hash)
  385 {       
  386         int lock_index = (hash)&(table->ft_lock_count - 1);
  387 
  388         mtx_lock(&table->ft_locks[lock_index]);
  389 }
  390 
  391 static void
  392 flowtable_global_unlock(struct flowtable *table, uint32_t hash)
  393 {       
  394         int lock_index = (hash)&(table->ft_lock_count - 1);
  395 
  396         mtx_unlock(&table->ft_locks[lock_index]);
  397 }
  398 
  399 static void
  400 flowtable_pcpu_lock(struct flowtable *table, uint32_t hash)
  401 {
  402 
  403         critical_enter();
  404 }
  405 
  406 static void
  407 flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
  408 {
  409 
  410         critical_exit();
  411 }
  412 
  413 #define FL_ENTRY_INDEX(table, hash)((hash) % (table)->ft_size)
  414 #define FL_ENTRY(table, hash) *flowtable_entry((table), (hash))
  415 #define FL_ENTRY_LOCK(table, hash)  (table)->ft_lock((table), (hash))
  416 #define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
  417 
  418 #define FL_STALE        (1<<8)
  419 #define FL_OVERWRITE    (1<<10)
  420 
  421 void
  422 flow_invalidate(struct flentry *fle)
  423 {
  424 
  425         fle->f_flags |= FL_STALE;
  426 }
  427 
  428 static __inline int
  429 proto_to_flags(uint8_t proto)
  430 {
  431         int flag;
  432 
  433         switch (proto) {
  434         case IPPROTO_TCP:
  435                 flag = FL_TCP;
  436                 break;
  437         case IPPROTO_SCTP:
  438                 flag = FL_SCTP;
  439                 break;          
  440         case IPPROTO_UDP:
  441                 flag = FL_UDP;
  442                 break;
  443         default:
  444                 flag = 0;
  445                 break;
  446         }
  447 
  448         return (flag);
  449 }
  450 
  451 static __inline int
  452 flags_to_proto(int flags)
  453 {
  454         int proto, protoflags;
  455 
  456         protoflags = flags & (FL_TCP|FL_SCTP|FL_UDP);
  457         switch (protoflags) {
  458         case FL_TCP:
  459                 proto = IPPROTO_TCP;
  460                 break;
  461         case FL_SCTP:
  462                 proto = IPPROTO_SCTP;
  463                 break;
  464         case FL_UDP:
  465                 proto = IPPROTO_UDP;
  466                 break;
  467         default:
  468                 proto = 0;
  469                 break;
  470         }
  471         return (proto);
  472 }
  473 
  474 #ifdef INET
  475 #ifdef FLOWTABLE_DEBUG
  476 static void
  477 ipv4_flow_print_tuple(int flags, int proto, struct sockaddr_in *ssin,
  478     struct sockaddr_in *dsin)
  479 {
  480         char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
  481 
  482         if (flags & FL_HASH_ALL) {
  483                 inet_ntoa_r(ssin->sin_addr, saddr);
  484                 inet_ntoa_r(dsin->sin_addr, daddr);
  485                 printf("proto=%d %s:%d->%s:%d\n",
  486                     proto, saddr, ntohs(ssin->sin_port), daddr,
  487                     ntohs(dsin->sin_port));
  488         } else {
  489                 inet_ntoa_r(*(struct in_addr *) &dsin->sin_addr, daddr);
  490                 printf("proto=%d %s\n", proto, daddr);
  491         }
  492 
  493 }
  494 #endif
  495 
  496 static int
  497 ipv4_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
  498     struct sockaddr_in *ssin, struct sockaddr_in *dsin, uint16_t *flags)
  499 {
  500         struct ip *ip;
  501         uint8_t proto;
  502         int iphlen;
  503         struct tcphdr *th;
  504         struct udphdr *uh;
  505         struct sctphdr *sh;
  506         uint16_t sport, dport;
  507 
  508         proto = sport = dport = 0;
  509         ip = mtod(m, struct ip *);
  510         dsin->sin_family = AF_INET;
  511         dsin->sin_len = sizeof(*dsin);
  512         dsin->sin_addr = ip->ip_dst;
  513         ssin->sin_family = AF_INET;
  514         ssin->sin_len = sizeof(*ssin);
  515         ssin->sin_addr = ip->ip_src;    
  516 
  517         proto = ip->ip_p;
  518         if ((*flags & FL_HASH_ALL) == 0) {
  519                 FLDPRINTF(ft, FL_DEBUG_ALL, "skip port check flags=0x%x ",
  520                     *flags);
  521                 goto skipports;
  522         }
  523 
  524         iphlen = ip->ip_hl << 2; /* XXX options? */
  525 
  526         switch (proto) {
  527         case IPPROTO_TCP:
  528                 th = (struct tcphdr *)((caddr_t)ip + iphlen);
  529                 sport = th->th_sport;
  530                 dport = th->th_dport;
  531                 if ((*flags & FL_HASH_ALL) &&
  532                     (th->th_flags & (TH_RST|TH_FIN)))
  533                         *flags |= FL_STALE;
  534         break;
  535         case IPPROTO_UDP:
  536                 uh = (struct udphdr *)((caddr_t)ip + iphlen);
  537                 sport = uh->uh_sport;
  538                 dport = uh->uh_dport;
  539         break;
  540         case IPPROTO_SCTP:
  541                 sh = (struct sctphdr *)((caddr_t)ip + iphlen);
  542                 sport = sh->src_port;
  543                 dport = sh->dest_port;
  544         break;
  545         default:
  546                 FLDPRINTF(ft, FL_DEBUG_ALL, "proto=0x%x not supported\n", proto);
  547                 return (ENOTSUP);
  548                 /* no port - hence not a protocol we care about */
  549                 break;
  550         
  551         }
  552 
  553 skipports:
  554         *flags |= proto_to_flags(proto);
  555         ssin->sin_port = sport;
  556         dsin->sin_port = dport;
  557         return (0);
  558 }
  559 
  560 static uint32_t
  561 ipv4_flow_lookup_hash_internal(
  562         struct sockaddr_in *ssin, struct sockaddr_in *dsin, 
  563             uint32_t *key, uint16_t flags)
  564 {
  565         uint16_t sport, dport;
  566         uint8_t proto;
  567         int offset = 0;
  568 
  569         if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
  570                 return (0);
  571         proto = flags_to_proto(flags);
  572         sport = dport = key[2] = key[1] = key[0] = 0;
  573         if ((ssin != NULL) && (flags & FL_HASH_ALL)) {
  574                 key[1] = ssin->sin_addr.s_addr;
  575                 sport = ssin->sin_port;
  576         }
  577         if (dsin != NULL) {
  578                 key[2] = dsin->sin_addr.s_addr;
  579                 dport = dsin->sin_port;
  580         }
  581         if (flags & FL_HASH_ALL) {
  582                 ((uint16_t *)key)[0] = sport;
  583                 ((uint16_t *)key)[1] = dport; 
  584         } else
  585                 offset = V_flow_hashjitter + proto;
  586 
  587         return (jenkins_hashword(key, 3, offset));
  588 }
  589 
  590 static struct flentry *
  591 flowtable_lookup_mbuf4(struct flowtable *ft, struct mbuf *m)
  592 {
  593         struct sockaddr_storage ssa, dsa;
  594         uint16_t flags;
  595         struct sockaddr_in *dsin, *ssin;
  596 
  597         dsin = (struct sockaddr_in *)&dsa;
  598         ssin = (struct sockaddr_in *)&ssa;
  599         bzero(dsin, sizeof(*dsin));
  600         bzero(ssin, sizeof(*ssin));
  601         flags = ft->ft_flags;
  602         if (ipv4_mbuf_demarshal(ft, m, ssin, dsin, &flags) != 0)
  603                 return (NULL);
  604 
  605         return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
  606 }
  607 
  608 void
  609 flow_to_route(struct flentry *fle, struct route *ro)
  610 {
  611         uint32_t *hashkey = NULL;
  612         struct sockaddr_in *sin;
  613 
  614         sin = (struct sockaddr_in *)&ro->ro_dst;
  615         sin->sin_family = AF_INET;
  616         sin->sin_len = sizeof(*sin);
  617         hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
  618         sin->sin_addr.s_addr = hashkey[2];
  619         ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
  620         ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
  621 }
  622 #endif /* INET */
  623 
  624 #ifdef INET6
  625 /*
  626  * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
  627  * then it sets p to point at the offset "len" in the mbuf. WARNING: the
  628  * pointer might become stale after other pullups (but we never use it
  629  * this way).
  630  */
  631 #define PULLUP_TO(_len, p, T)                                           \
  632 do {                                                                    \
  633         int x = (_len) + sizeof(T);                                     \
  634         if ((m)->m_len < x) {                                           \
  635                 goto receive_failed;                                    \
  636         }                                                               \
  637         p = (mtod(m, char *) + (_len));                                 \
  638 } while (0)
  639 
  640 #define TCP(p)          ((struct tcphdr *)(p))
  641 #define SCTP(p)         ((struct sctphdr *)(p))
  642 #define UDP(p)          ((struct udphdr *)(p))
  643 
  644 static int
  645 ipv6_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
  646     struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, uint16_t *flags)
  647 {
  648         struct ip6_hdr *ip6;
  649         uint8_t proto;
  650         int hlen;
  651         uint16_t src_port, dst_port;
  652         u_short offset;
  653         void *ulp;
  654 
  655         offset = hlen = src_port = dst_port = 0;
  656         ulp = NULL;
  657         ip6 = mtod(m, struct ip6_hdr *);
  658         hlen = sizeof(struct ip6_hdr);
  659         proto = ip6->ip6_nxt;
  660 
  661         if ((*flags & FL_HASH_ALL) == 0)
  662                 goto skipports;
  663 
  664         while (ulp == NULL) {
  665                 switch (proto) {
  666                 case IPPROTO_ICMPV6:
  667                 case IPPROTO_OSPFIGP:
  668                 case IPPROTO_PIM:
  669                 case IPPROTO_CARP:
  670                 case IPPROTO_ESP:
  671                 case IPPROTO_NONE:
  672                         ulp = ip6;
  673                         break;
  674                 case IPPROTO_TCP:
  675                         PULLUP_TO(hlen, ulp, struct tcphdr);
  676                         dst_port = TCP(ulp)->th_dport;
  677                         src_port = TCP(ulp)->th_sport;
  678                         if ((*flags & FL_HASH_ALL) &&
  679                             (TCP(ulp)->th_flags & (TH_RST|TH_FIN)))
  680                                 *flags |= FL_STALE;
  681                         break;
  682                 case IPPROTO_SCTP:
  683                         PULLUP_TO(hlen, ulp, struct sctphdr);
  684                         src_port = SCTP(ulp)->src_port;
  685                         dst_port = SCTP(ulp)->dest_port;
  686                         break;
  687                 case IPPROTO_UDP:
  688                         PULLUP_TO(hlen, ulp, struct udphdr);
  689                         dst_port = UDP(ulp)->uh_dport;
  690                         src_port = UDP(ulp)->uh_sport;
  691                         break;
  692                 case IPPROTO_HOPOPTS:   /* RFC 2460 */
  693                         PULLUP_TO(hlen, ulp, struct ip6_hbh);
  694                         hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
  695                         proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
  696                         ulp = NULL;
  697                         break;
  698                 case IPPROTO_ROUTING:   /* RFC 2460 */
  699                         PULLUP_TO(hlen, ulp, struct ip6_rthdr); 
  700                         hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
  701                         proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
  702                         ulp = NULL;
  703                         break;
  704                 case IPPROTO_FRAGMENT:  /* RFC 2460 */
  705                         PULLUP_TO(hlen, ulp, struct ip6_frag);
  706                         hlen += sizeof (struct ip6_frag);
  707                         proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
  708                         offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
  709                             IP6F_OFF_MASK;
  710                         ulp = NULL;
  711                         break;
  712                 case IPPROTO_DSTOPTS:   /* RFC 2460 */
  713                         PULLUP_TO(hlen, ulp, struct ip6_hbh);
  714                         hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
  715                         proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
  716                         ulp = NULL;
  717                         break;
  718                 case IPPROTO_AH:        /* RFC 2402 */
  719                         PULLUP_TO(hlen, ulp, struct ip6_ext);
  720                         hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
  721                         proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
  722                         ulp = NULL;
  723                         break;
  724                 default:
  725                         PULLUP_TO(hlen, ulp, struct ip6_ext);
  726                         break;
  727                 }
  728         }
  729 
  730         if (src_port == 0) {
  731         receive_failed:
  732                 return (ENOTSUP);
  733         }
  734 
  735 skipports:
  736         dsin6->sin6_family = AF_INET6;
  737         dsin6->sin6_len = sizeof(*dsin6);
  738         dsin6->sin6_port = dst_port;
  739         memcpy(&dsin6->sin6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
  740 
  741         ssin6->sin6_family = AF_INET6;
  742         ssin6->sin6_len = sizeof(*ssin6);
  743         ssin6->sin6_port = src_port;
  744         memcpy(&ssin6->sin6_addr, &ip6->ip6_src, sizeof(struct in6_addr));
  745         *flags |= proto_to_flags(proto);
  746 
  747         return (0);
  748 }
  749 
  750 #define zero_key(key)           \
  751 do {                            \
  752         key[0] = 0;             \
  753         key[1] = 0;             \
  754         key[2] = 0;             \
  755         key[3] = 0;             \
  756         key[4] = 0;             \
  757         key[5] = 0;             \
  758         key[6] = 0;             \
  759         key[7] = 0;             \
  760         key[8] = 0;             \
  761 } while (0)
  762         
  763 static uint32_t
  764 ipv6_flow_lookup_hash_internal(
  765         struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, 
  766             uint32_t *key, uint16_t flags)
  767 {
  768         uint16_t sport, dport;
  769         uint8_t proto;
  770         int offset = 0;
  771 
  772         if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
  773                 return (0);
  774 
  775         proto = flags_to_proto(flags);
  776         zero_key(key);
  777         sport = dport = 0;
  778         if (dsin6 != NULL) {
  779                 memcpy(&key[1], &dsin6->sin6_addr, sizeof(struct in6_addr));
  780                 dport = dsin6->sin6_port;
  781         }
  782         if ((ssin6 != NULL) && (flags & FL_HASH_ALL)) {
  783                 memcpy(&key[5], &ssin6->sin6_addr, sizeof(struct in6_addr));
  784                 sport = ssin6->sin6_port;
  785         }
  786         if (flags & FL_HASH_ALL) {
  787                 ((uint16_t *)key)[0] = sport;
  788                 ((uint16_t *)key)[1] = dport; 
  789         } else
  790                 offset = V_flow_hashjitter + proto;
  791 
  792         return (jenkins_hashword(key, 9, offset));
  793 }
  794 
  795 static struct flentry *
  796 flowtable_lookup_mbuf6(struct flowtable *ft, struct mbuf *m)
  797 {
  798         struct sockaddr_storage ssa, dsa;
  799         struct sockaddr_in6 *dsin6, *ssin6;     
  800         uint16_t flags;
  801 
  802         dsin6 = (struct sockaddr_in6 *)&dsa;
  803         ssin6 = (struct sockaddr_in6 *)&ssa;
  804         bzero(dsin6, sizeof(*dsin6));
  805         bzero(ssin6, sizeof(*ssin6));
  806         flags = ft->ft_flags;
  807         
  808         if (ipv6_mbuf_demarshal(ft, m, ssin6, dsin6, &flags) != 0)
  809                 return (NULL);
  810 
  811         return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
  812 }
  813 
  814 void
  815 flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
  816 {
  817         uint32_t *hashkey = NULL;
  818         struct sockaddr_in6 *sin6;
  819 
  820         sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
  821 
  822         sin6->sin6_family = AF_INET6;
  823         sin6->sin6_len = sizeof(*sin6);
  824         hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
  825         memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
  826         ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
  827         ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
  828 
  829 }
  830 #endif /* INET6 */
  831 
  832 static bitstr_t *
  833 flowtable_mask(struct flowtable *ft)
  834 {
  835         bitstr_t *mask;
  836 
  837         if (ft->ft_flags & FL_PCPU)
  838                 mask = ft->ft_masks[curcpu];
  839         else
  840                 mask = ft->ft_masks[0];
  841 
  842         return (mask);
  843 }
  844 
  845 static struct flentry **
  846 flowtable_entry(struct flowtable *ft, uint32_t hash)
  847 {
  848         struct flentry **fle;
  849         int index = (hash % ft->ft_size);
  850 
  851         if (ft->ft_flags & FL_PCPU) {
  852                 KASSERT(&ft->ft_table.pcpu[curcpu][0] != NULL, ("pcpu not set"));
  853                 fle = &ft->ft_table.pcpu[curcpu][index];
  854         } else {
  855                 KASSERT(&ft->ft_table.global[0] != NULL, ("global not set"));
  856                 fle = &ft->ft_table.global[index];
  857         }
  858         
  859         return (fle);
  860 }
  861 
  862 static int
  863 flow_stale(struct flowtable *ft, struct flentry *fle)
  864 {
  865         time_t idle_time;
  866 
  867         if ((fle->f_fhash == 0)
  868             || ((fle->f_rt->rt_flags & RTF_HOST) &&
  869                 ((fle->f_rt->rt_flags & (RTF_UP))
  870                     != (RTF_UP)))
  871             || (fle->f_rt->rt_ifp == NULL)
  872             || !RT_LINK_IS_UP(fle->f_rt->rt_ifp))
  873                 return (1);
  874 
  875         idle_time = time_uptime - fle->f_uptime;
  876 
  877         if ((fle->f_flags & FL_STALE) ||
  878             ((fle->f_flags & (TH_SYN|TH_ACK|TH_FIN)) == 0
  879                 && (idle_time > ft->ft_udp_idle)) ||
  880             ((fle->f_flags & TH_FIN)
  881                 && (idle_time > ft->ft_fin_wait_idle)) ||
  882             ((fle->f_flags & (TH_SYN|TH_ACK)) == TH_SYN
  883                 && (idle_time > ft->ft_syn_idle)) ||
  884             ((fle->f_flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)
  885                 && (idle_time > ft->ft_tcp_idle)) ||
  886             ((fle->f_rt->rt_flags & RTF_UP) == 0 || 
  887                 (fle->f_rt->rt_ifp == NULL)))
  888                 return (1);
  889 
  890         return (0);
  891 }
  892 
  893 static void
  894 flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
  895 {
  896         uint32_t *hashkey;
  897         int i, nwords;
  898 
  899         if (fle->f_flags & FL_IPV6) {
  900                 nwords = 9;
  901                 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
  902         } else {
  903                 nwords = 3;
  904                 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
  905         }
  906         
  907         for (i = 0; i < nwords; i++) 
  908                 hashkey[i] = key[i];
  909 }
  910 
  911 static struct flentry *
  912 flow_alloc(struct flowtable *ft)
  913 {
  914         struct flentry *newfle;
  915         uma_zone_t zone;
  916 
  917         newfle = NULL;
  918         zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
  919 
  920         newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO);
  921         if (newfle != NULL)
  922                 atomic_add_int(&ft->ft_count, 1);
  923         return (newfle);
  924 }
  925 
  926 static void
  927 flow_free(struct flentry *fle, struct flowtable *ft)
  928 {
  929         uma_zone_t zone;
  930 
  931         zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
  932         atomic_add_int(&ft->ft_count, -1);
  933         uma_zfree(zone, fle);
  934 }
  935 
  936 static int
  937 flow_full(struct flowtable *ft)
  938 {
  939         boolean_t full;
  940         uint32_t count;
  941         
  942         full = ft->ft_full;
  943         count = ft->ft_count;
  944 
  945         if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3))))
  946                 ft->ft_full = FALSE;
  947         else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5))))
  948                 ft->ft_full = TRUE;
  949         
  950         if (full && !ft->ft_full) {
  951                 flowclean_freq = 4*hz;
  952                 if ((ft->ft_flags & FL_HASH_ALL) == 0)
  953                         ft->ft_udp_idle = ft->ft_fin_wait_idle =
  954                             ft->ft_syn_idle = ft->ft_tcp_idle = 5;
  955                 cv_broadcast(&flowclean_c_cv);
  956         } else if (!full && ft->ft_full) {
  957                 flowclean_freq = 20*hz;
  958                 if ((ft->ft_flags & FL_HASH_ALL) == 0)
  959                         ft->ft_udp_idle = ft->ft_fin_wait_idle =
  960                             ft->ft_syn_idle = ft->ft_tcp_idle = 30;
  961         }
  962 
  963         return (ft->ft_full);
  964 }
  965 
  966 static int
  967 flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
  968     uint32_t fibnum, struct route *ro, uint16_t flags)
  969 {
  970         struct flentry *fle, *fletail, *newfle, **flep;
  971         struct flowtable_stats *fs = &ft->ft_stats[curcpu];
  972         int depth;
  973         bitstr_t *mask;
  974         uint8_t proto;
  975 
  976         newfle = flow_alloc(ft);
  977         if (newfle == NULL)
  978                 return (ENOMEM);
  979 
  980         newfle->f_flags |= (flags & FL_IPV6);
  981         proto = flags_to_proto(flags);
  982 
  983         FL_ENTRY_LOCK(ft, hash);
  984         mask = flowtable_mask(ft);
  985         flep = flowtable_entry(ft, hash);
  986         fletail = fle = *flep;
  987 
  988         if (fle == NULL) {
  989                 bit_set(mask, FL_ENTRY_INDEX(ft, hash));
  990                 *flep = fle = newfle;
  991                 goto skip;
  992         } 
  993         
  994         depth = 0;
  995         fs->ft_collisions++;
  996         /*
  997          * find end of list and make sure that we were not
  998          * preempted by another thread handling this flow
  999          */
 1000         while (fle != NULL) {
 1001                 if (fle->f_fhash == hash && !flow_stale(ft, fle)) {
 1002                         /*
 1003                          * there was either a hash collision
 1004                          * or we lost a race to insert
 1005                          */
 1006                         FL_ENTRY_UNLOCK(ft, hash);
 1007                         flow_free(newfle, ft);
 1008                         
 1009                         if (flags & FL_OVERWRITE) 
 1010                                 goto skip;
 1011                         return (EEXIST);
 1012                 }
 1013                 /*
 1014                  * re-visit this double condition XXX
 1015                  */
 1016                 if (fletail->f_next != NULL)
 1017                         fletail = fle->f_next;
 1018 
 1019                 depth++;
 1020                 fle = fle->f_next;
 1021         } 
 1022 
 1023         if (depth > fs->ft_max_depth)
 1024                 fs->ft_max_depth = depth;
 1025         fletail->f_next = newfle;
 1026         fle = newfle;
 1027 skip:
 1028         flowtable_set_hashkey(fle, key);
 1029 
 1030         fle->f_proto = proto;
 1031         fle->f_rt = ro->ro_rt;
 1032         fle->f_lle = ro->ro_lle;
 1033         fle->f_fhash = hash;
 1034         fle->f_fibnum = fibnum;
 1035         fle->f_uptime = time_uptime;
 1036         FL_ENTRY_UNLOCK(ft, hash);
 1037         return (0);
 1038 }
 1039 
 1040 int
 1041 kern_flowtable_insert(struct flowtable *ft,
 1042     struct sockaddr_storage *ssa, struct sockaddr_storage *dsa,
 1043     struct route *ro, uint32_t fibnum, int flags)
 1044 {
 1045         uint32_t key[9], hash;
 1046 
 1047         flags = (ft->ft_flags | flags | FL_OVERWRITE);
 1048         hash = 0;
 1049 
 1050 #ifdef INET
 1051         if (ssa->ss_family == AF_INET) 
 1052                 hash = ipv4_flow_lookup_hash_internal((struct sockaddr_in *)ssa,
 1053                     (struct sockaddr_in *)dsa, key, flags);
 1054 #endif
 1055 #ifdef INET6
 1056         if (ssa->ss_family == AF_INET6) 
 1057                 hash = ipv6_flow_lookup_hash_internal((struct sockaddr_in6 *)ssa,
 1058                     (struct sockaddr_in6 *)dsa, key, flags);
 1059 #endif  
 1060         if (ro->ro_rt == NULL || ro->ro_lle == NULL)
 1061                 return (EINVAL);
 1062 
 1063         FLDPRINTF(ft, FL_DEBUG,
 1064             "kern_flowtable_insert: key=%x:%x:%x hash=%x fibnum=%d flags=%x\n",
 1065             key[0], key[1], key[2], hash, fibnum, flags);
 1066         return (flowtable_insert(ft, hash, key, fibnum, ro, flags));
 1067 }
 1068 
 1069 static int
 1070 flowtable_key_equal(struct flentry *fle, uint32_t *key)
 1071 {
 1072         uint32_t *hashkey;
 1073         int i, nwords;
 1074 
 1075         if (fle->f_flags & FL_IPV6) {
 1076                 nwords = 9;
 1077                 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
 1078         } else {
 1079                 nwords = 3;
 1080                 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
 1081         }
 1082 
 1083         for (i = 0; i < nwords; i++) 
 1084                 if (hashkey[i] != key[i])
 1085                         return (0);
 1086 
 1087         return (1);
 1088 }
 1089 
 1090 struct flentry *
 1091 flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af)
 1092 {
 1093         struct flentry *fle = NULL;
 1094 
 1095 #ifdef INET
 1096         if (af == AF_INET)
 1097                 fle = flowtable_lookup_mbuf4(ft, m);
 1098 #endif
 1099 #ifdef INET6
 1100         if (af == AF_INET6)
 1101                 fle = flowtable_lookup_mbuf6(ft, m);
 1102 #endif  
 1103         if (fle != NULL && m != NULL && (m->m_flags & M_FLOWID) == 0) {
 1104                 m->m_flags |= M_FLOWID;
 1105                 m->m_pkthdr.flowid = fle->f_fhash;
 1106         }
 1107         return (fle);
 1108 }
 1109         
 1110 struct flentry *
 1111 flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
 1112     struct sockaddr_storage *dsa, uint32_t fibnum, int flags)
 1113 {
 1114         uint32_t key[9], hash;
 1115         struct flentry *fle;
 1116         struct flowtable_stats *fs = &ft->ft_stats[curcpu];
 1117         uint8_t proto = 0;
 1118         int error = 0;
 1119         struct rtentry *rt;
 1120         struct llentry *lle;
 1121         struct route sro, *ro;
 1122         struct route_in6 sro6;
 1123 
 1124         sro.ro_rt = sro6.ro_rt = NULL;
 1125         sro.ro_lle = sro6.ro_lle = NULL;
 1126         ro = NULL;
 1127         hash = 0;
 1128         flags |= ft->ft_flags;
 1129         proto = flags_to_proto(flags);
 1130 #ifdef INET
 1131         if (ssa->ss_family == AF_INET) {
 1132                 struct sockaddr_in *ssin, *dsin;
 1133 
 1134                 ro = &sro;
 1135                 memcpy(&ro->ro_dst, dsa, sizeof(struct sockaddr_in));
 1136                 /*
 1137                  * The harvested source and destination addresses
 1138                  * may contain port information if the packet is 
 1139                  * from a transport protocol (e.g. TCP/UDP). The 
 1140                  * port field must be cleared before performing 
 1141                  * a route lookup.
 1142                  */
 1143                 ((struct sockaddr_in *)&ro->ro_dst)->sin_port = 0;
 1144                 dsin = (struct sockaddr_in *)dsa;
 1145                 ssin = (struct sockaddr_in *)ssa;
 1146                 if ((dsin->sin_addr.s_addr == ssin->sin_addr.s_addr) ||
 1147                     (ntohl(dsin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 1148                     (ntohl(ssin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
 1149                         return (NULL);
 1150 
 1151                 hash = ipv4_flow_lookup_hash_internal(ssin, dsin, key, flags);
 1152         }
 1153 #endif
 1154 #ifdef INET6
 1155         if (ssa->ss_family == AF_INET6) {
 1156                 struct sockaddr_in6 *ssin6, *dsin6;
 1157 
 1158                 ro = (struct route *)&sro6;
 1159                 memcpy(&sro6.ro_dst, dsa,
 1160                     sizeof(struct sockaddr_in6));
 1161                 ((struct sockaddr_in6 *)&ro->ro_dst)->sin6_port = 0;
 1162                 dsin6 = (struct sockaddr_in6 *)dsa;
 1163                 ssin6 = (struct sockaddr_in6 *)ssa;
 1164 
 1165                 flags |= FL_IPV6;
 1166                 hash = ipv6_flow_lookup_hash_internal(ssin6, dsin6, key, flags);
 1167         }
 1168 #endif
 1169         /*
 1170          * Ports are zero and this isn't a transmit cache
 1171          * - thus not a protocol for which we need to keep 
 1172          * state
 1173          * FL_HASH_ALL => key[0] != 0 for TCP || UDP || SCTP
 1174          */
 1175         if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_ALL)))
 1176                 return (NULL);
 1177 
 1178         fs->ft_lookups++;
 1179         FL_ENTRY_LOCK(ft, hash);
 1180         if ((fle = FL_ENTRY(ft, hash)) == NULL) {
 1181                 FL_ENTRY_UNLOCK(ft, hash);
 1182                 goto uncached;
 1183         }
 1184 keycheck:       
 1185         rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
 1186         lle = __DEVOLATILE(struct llentry *, fle->f_lle);
 1187         if ((rt != NULL)
 1188             && lle != NULL
 1189             && fle->f_fhash == hash
 1190             && flowtable_key_equal(fle, key)
 1191             && (proto == fle->f_proto)
 1192             && (fibnum == fle->f_fibnum)
 1193             && (rt->rt_flags & RTF_UP)
 1194             && (rt->rt_ifp != NULL)
 1195             && (lle->la_flags & LLE_VALID)) {
 1196                 fs->ft_hits++;
 1197                 fle->f_uptime = time_uptime;
 1198                 fle->f_flags |= flags;
 1199                 FL_ENTRY_UNLOCK(ft, hash);
 1200                 return (fle);
 1201         } else if (fle->f_next != NULL) {
 1202                 fle = fle->f_next;
 1203                 goto keycheck;
 1204         }
 1205         FL_ENTRY_UNLOCK(ft, hash);
 1206 uncached:
 1207         if (flags & FL_NOAUTO || flow_full(ft))
 1208                 return (NULL);
 1209 
 1210         fs->ft_misses++;
 1211         /*
 1212          * This bit of code ends up locking the
 1213          * same route 3 times (just like ip_output + ether_output)
 1214          * - at lookup
 1215          * - in rt_check when called by arpresolve
 1216          * - dropping the refcount for the rtentry
 1217          *
 1218          * This could be consolidated to one if we wrote a variant
 1219          * of arpresolve with an rt_check variant that expected to
 1220          * receive the route locked
 1221          */
 1222 
 1223 #ifdef INVARIANTS
 1224         if ((ro->ro_dst.sa_family != AF_INET) &&
 1225             (ro->ro_dst.sa_family != AF_INET6))
 1226                 panic("sa_family == %d\n", ro->ro_dst.sa_family);
 1227 #endif
 1228 
 1229         ft->ft_rtalloc(ro, hash, fibnum);
 1230         if (ro->ro_rt == NULL) 
 1231                 error = ENETUNREACH;
 1232         else {
 1233                 struct llentry *lle = NULL;
 1234                 struct sockaddr_storage *l3addr;
 1235                 struct rtentry *rt = ro->ro_rt;
 1236                 struct ifnet *ifp = rt->rt_ifp;
 1237 
 1238                 if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
 1239                         RTFREE(rt);
 1240                         ro->ro_rt = NULL;
 1241                         return (NULL);
 1242                 }
 1243 #ifdef INET6
 1244                 if (ssa->ss_family == AF_INET6) {
 1245                         struct sockaddr_in6 *dsin6;
 1246 
 1247                         dsin6 = (struct sockaddr_in6 *)dsa;                     
 1248                         if (in6_localaddr(&dsin6->sin6_addr)) {
 1249                                 RTFREE(rt);
 1250                                 ro->ro_rt = NULL;
 1251                                 return (NULL);                          
 1252                         }
 1253 
 1254                         if (rt->rt_flags & RTF_GATEWAY)
 1255                                 l3addr = (struct sockaddr_storage *)rt->rt_gateway;
 1256                         
 1257                         else
 1258                                 l3addr = (struct sockaddr_storage *)&ro->ro_dst;
 1259                         llentry_update(&lle, LLTABLE6(ifp), l3addr, ifp);
 1260                 }
 1261 #endif  
 1262 #ifdef INET
 1263                 if (ssa->ss_family == AF_INET) {
 1264                         if (rt->rt_flags & RTF_GATEWAY)
 1265                                 l3addr = (struct sockaddr_storage *)rt->rt_gateway;
 1266                         else
 1267                                 l3addr = (struct sockaddr_storage *)&ro->ro_dst;
 1268                         llentry_update(&lle, LLTABLE(ifp), l3addr, ifp);        
 1269                 }
 1270                         
 1271 #endif
 1272                 ro->ro_lle = lle;
 1273 
 1274                 if (lle == NULL) {
 1275                         RTFREE(rt);
 1276                         ro->ro_rt = NULL;
 1277                         return (NULL);
 1278                 }
 1279                 error = flowtable_insert(ft, hash, key, fibnum, ro, flags);
 1280 
 1281                 if (error) {
 1282                         RTFREE(rt);
 1283                         LLE_FREE(lle);
 1284                         ro->ro_rt = NULL;
 1285                         ro->ro_lle = NULL;
 1286                 }
 1287         } 
 1288 
 1289         return ((error) ? NULL : fle);
 1290 }
 1291 
 1292 /*
 1293  * used by the bit_alloc macro
 1294  */
 1295 #define calloc(count, size) malloc((count)*(size), M_DEVBUF, M_WAITOK|M_ZERO)
 1296         
 1297 struct flowtable *
 1298 flowtable_alloc(char *name, int nentry, int flags)
 1299 {
 1300         struct flowtable *ft, *fttail;
 1301         int i;
 1302 
 1303         if (V_flow_hashjitter == 0)
 1304                 V_flow_hashjitter = arc4random();
 1305 
 1306         KASSERT(nentry > 0, ("nentry must be > 0, is %d\n", nentry));
 1307 
 1308         ft = malloc(sizeof(struct flowtable),
 1309             M_RTABLE, M_WAITOK | M_ZERO);
 1310 
 1311         ft->ft_name = name;
 1312         ft->ft_flags = flags;
 1313         ft->ft_size = nentry;
 1314 #ifdef RADIX_MPATH
 1315         ft->ft_rtalloc = rtalloc_mpath_fib;
 1316 #else
 1317         ft->ft_rtalloc = rtalloc_ign_wrapper;
 1318 #endif
 1319         if (flags & FL_PCPU) {
 1320                 ft->ft_lock = flowtable_pcpu_lock;
 1321                 ft->ft_unlock = flowtable_pcpu_unlock;
 1322 
 1323                 for (i = 0; i <= mp_maxid; i++) {
 1324                         ft->ft_table.pcpu[i] =
 1325                             malloc(nentry*sizeof(struct flentry *),
 1326                                 M_RTABLE, M_WAITOK | M_ZERO);
 1327                         ft->ft_masks[i] = bit_alloc(nentry);
 1328                 }
 1329         } else {
 1330                 ft->ft_lock_count = 2*(powerof2(mp_maxid + 1) ? (mp_maxid + 1):
 1331                     (fls(mp_maxid + 1) << 1));
 1332                 
 1333                 ft->ft_lock = flowtable_global_lock;
 1334                 ft->ft_unlock = flowtable_global_unlock;
 1335                 ft->ft_table.global =
 1336                             malloc(nentry*sizeof(struct flentry *),
 1337                                 M_RTABLE, M_WAITOK | M_ZERO);
 1338                 ft->ft_locks = malloc(ft->ft_lock_count*sizeof(struct mtx),
 1339                                 M_RTABLE, M_WAITOK | M_ZERO);
 1340                 for (i = 0; i < ft->ft_lock_count; i++)
 1341                         mtx_init(&ft->ft_locks[i], "flow", NULL, MTX_DEF|MTX_DUPOK);
 1342 
 1343                 ft->ft_masks[0] = bit_alloc(nentry);
 1344         }
 1345         ft->ft_tmpmask = bit_alloc(nentry);
 1346 
 1347         /*
 1348          * In the local transmit case the table truly is 
 1349          * just a cache - so everything is eligible for
 1350          * replacement after 5s of non-use
 1351          */
 1352         if (flags & FL_HASH_ALL) {
 1353                 ft->ft_udp_idle = V_flowtable_udp_expire;
 1354                 ft->ft_syn_idle = V_flowtable_syn_expire;
 1355                 ft->ft_fin_wait_idle = V_flowtable_fin_wait_expire;
 1356                 ft->ft_tcp_idle = V_flowtable_fin_wait_expire;
 1357         } else {
 1358                 ft->ft_udp_idle = ft->ft_fin_wait_idle =
 1359                     ft->ft_syn_idle = ft->ft_tcp_idle = 30;
 1360                 
 1361         }
 1362 
 1363         /*
 1364          * hook in to the cleaner list
 1365          */
 1366         if (V_flow_list_head == NULL)
 1367                 V_flow_list_head = ft;
 1368         else {
 1369                 fttail = V_flow_list_head;
 1370                 while (fttail->ft_next != NULL)
 1371                         fttail = fttail->ft_next;
 1372                 fttail->ft_next = ft;
 1373         }
 1374 
 1375         return (ft);
 1376 }
 1377 
 1378 /*
 1379  * The rest of the code is devoted to garbage collection of expired entries.
 1380  * It is a new additon made necessary by the switch to dynamically allocating
 1381  * flow tables.
 1382  * 
 1383  */
 1384 static void
 1385 fle_free(struct flentry *fle, struct flowtable *ft)
 1386 {
 1387         struct rtentry *rt;
 1388         struct llentry *lle;
 1389 
 1390         rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
 1391         lle = __DEVOLATILE(struct llentry *, fle->f_lle);
 1392         if (rt != NULL)
 1393                 RTFREE(rt);
 1394         if (lle != NULL)
 1395                 LLE_FREE(lle);
 1396         flow_free(fle, ft);
 1397 }
 1398 
 1399 static void
 1400 flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
 1401 {
 1402         int curbit = 0, count;
 1403         struct flentry *fle,  **flehead, *fleprev;
 1404         struct flentry *flefreehead, *flefreetail, *fletmp;
 1405         bitstr_t *mask, *tmpmask;
 1406         struct flowtable_stats *fs = &ft->ft_stats[curcpu];
 1407 
 1408         flefreehead = flefreetail = NULL;
 1409         mask = flowtable_mask(ft);
 1410         tmpmask = ft->ft_tmpmask;
 1411         memcpy(tmpmask, mask, ft->ft_size/8);
 1412         /*
 1413          * XXX Note to self, bit_ffs operates at the byte level
 1414          * and thus adds gratuitous overhead
 1415          */
 1416         bit_ffs(tmpmask, ft->ft_size, &curbit);
 1417         while (curbit != -1) {
 1418                 if (curbit >= ft->ft_size || curbit < -1) {
 1419                         log(LOG_ALERT,
 1420                             "warning: bad curbit value %d \n",
 1421                             curbit);
 1422                         break;
 1423                 }
 1424 
 1425                 FL_ENTRY_LOCK(ft, curbit);
 1426                 flehead = flowtable_entry(ft, curbit);
 1427                 fle = fleprev = *flehead;
 1428 
 1429                 fs->ft_free_checks++;
 1430 #ifdef DIAGNOSTIC
 1431                 if (fle == NULL && curbit > 0) {
 1432                         log(LOG_ALERT,
 1433                             "warning bit=%d set, but no fle found\n",
 1434                             curbit);
 1435                 }
 1436 #endif          
 1437                 while (fle != NULL) {
 1438                         if (rt != NULL) {
 1439                                 if (__DEVOLATILE(struct rtentry *, fle->f_rt) != rt) {
 1440                                         fleprev = fle;
 1441                                         fle = fle->f_next;
 1442                                         continue;
 1443                                 }
 1444                         } else if (!flow_stale(ft, fle)) {
 1445                                 fleprev = fle;
 1446                                 fle = fle->f_next;
 1447                                 continue;
 1448                         }
 1449                         /*
 1450                          * delete head of the list
 1451                          */
 1452                         if (fleprev == *flehead) {
 1453                                 fletmp = fleprev;
 1454                                 if (fle == fleprev) {
 1455                                         fleprev = *flehead = fle->f_next;
 1456                                 } else
 1457                                         fleprev = *flehead = fle;
 1458                                 fle = fle->f_next;
 1459                         } else {
 1460                                 /*
 1461                                  * don't advance fleprev
 1462                                  */
 1463                                 fletmp = fle;
 1464                                 fleprev->f_next = fle->f_next;
 1465                                 fle = fleprev->f_next;
 1466                         }
 1467 
 1468                         if (flefreehead == NULL)
 1469                                 flefreehead = flefreetail = fletmp;
 1470                         else {
 1471                                 flefreetail->f_next = fletmp;
 1472                                 flefreetail = fletmp;
 1473                         }
 1474                         fletmp->f_next = NULL;
 1475                 }
 1476                 if (*flehead == NULL)
 1477                         bit_clear(mask, curbit);
 1478                 FL_ENTRY_UNLOCK(ft, curbit);
 1479                 bit_clear(tmpmask, curbit);
 1480                 bit_ffs(tmpmask, ft->ft_size, &curbit);
 1481         }
 1482         count = 0;
 1483         while ((fle = flefreehead) != NULL) {
 1484                 flefreehead = fle->f_next;
 1485                 count++;
 1486                 fs->ft_frees++;
 1487                 fle_free(fle, ft);
 1488         }
 1489         if (V_flowtable_debug && count)
 1490                 log(LOG_DEBUG, "freed %d flow entries\n", count);
 1491 }
 1492 
 1493 void
 1494 flowtable_route_flush(struct flowtable *ft, struct rtentry *rt)
 1495 {
 1496         int i;
 1497 
 1498         if (ft->ft_flags & FL_PCPU) {
 1499                 CPU_FOREACH(i) {
 1500                         if (smp_started == 1) {
 1501                                 thread_lock(curthread);
 1502                                 sched_bind(curthread, i);
 1503                                 thread_unlock(curthread);
 1504                         }
 1505 
 1506                         flowtable_free_stale(ft, rt);
 1507 
 1508                         if (smp_started == 1) {
 1509                                 thread_lock(curthread);
 1510                                 sched_unbind(curthread);
 1511                                 thread_unlock(curthread);
 1512                         }
 1513                 }
 1514         } else {
 1515                 flowtable_free_stale(ft, rt);
 1516         }
 1517 }
 1518 
 1519 static void
 1520 flowtable_clean_vnet(void)
 1521 {
 1522         struct flowtable *ft;
 1523         int i;
 1524 
 1525         ft = V_flow_list_head;
 1526         while (ft != NULL) {
 1527                 if (ft->ft_flags & FL_PCPU) {
 1528                         CPU_FOREACH(i) {
 1529                                 if (smp_started == 1) {
 1530                                         thread_lock(curthread);
 1531                                         sched_bind(curthread, i);
 1532                                         thread_unlock(curthread);
 1533                                 }
 1534 
 1535                                 flowtable_free_stale(ft, NULL);
 1536 
 1537                                 if (smp_started == 1) {
 1538                                         thread_lock(curthread);
 1539                                         sched_unbind(curthread);
 1540                                         thread_unlock(curthread);
 1541                                 }
 1542                         }
 1543                 } else {
 1544                         flowtable_free_stale(ft, NULL);
 1545                 }
 1546                 ft = ft->ft_next;
 1547         }
 1548 }
 1549 
 1550 static void
 1551 flowtable_cleaner(void)
 1552 {
 1553         VNET_ITERATOR_DECL(vnet_iter);
 1554         struct thread *td;
 1555 
 1556         if (bootverbose)
 1557                 log(LOG_INFO, "flowtable cleaner started\n");
 1558         td = curthread;
 1559         while (1) {
 1560                 VNET_LIST_RLOCK();
 1561                 VNET_FOREACH(vnet_iter) {
 1562                         CURVNET_SET(vnet_iter);
 1563                         flowtable_clean_vnet();
 1564                         CURVNET_RESTORE();
 1565                 }
 1566                 VNET_LIST_RUNLOCK();
 1567 
 1568                 /*
 1569                  * The 10 second interval between cleaning checks
 1570                  * is arbitrary
 1571                  */
 1572                 mtx_lock(&flowclean_lock);
 1573                 thread_lock(td);
 1574                 sched_prio(td, PPAUSE);
 1575                 thread_unlock(td);
 1576                 flowclean_cycles++;
 1577                 cv_broadcast(&flowclean_f_cv);
 1578                 cv_timedwait(&flowclean_c_cv, &flowclean_lock, flowclean_freq);
 1579                 mtx_unlock(&flowclean_lock);
 1580         }
 1581 }
 1582 
 1583 static void
 1584 flowtable_flush(void *unused __unused)
 1585 {
 1586         uint64_t start;
 1587 
 1588         mtx_lock(&flowclean_lock);
 1589         start = flowclean_cycles;
 1590         while (start == flowclean_cycles) {
 1591                 cv_broadcast(&flowclean_c_cv);
 1592                 cv_wait(&flowclean_f_cv, &flowclean_lock);
 1593         }
 1594         mtx_unlock(&flowclean_lock);
 1595 }
 1596 
 1597 static struct kproc_desc flow_kp = {
 1598         "flowcleaner",
 1599         flowtable_cleaner,
 1600         &flowcleanerproc
 1601 };
 1602 SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
 1603 
 1604 static void
 1605 flowtable_init_vnet(const void *unused __unused)
 1606 {
 1607 
 1608         V_flowtable_nmbflows = 1024 + maxusers * 64 * mp_ncpus;
 1609         V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
 1610             NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
 1611         V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
 1612             NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);    
 1613         uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
 1614         uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
 1615         V_flowtable_ready = 1;
 1616 }
 1617 VNET_SYSINIT(flowtable_init_vnet, SI_SUB_SMP, SI_ORDER_ANY,
 1618     flowtable_init_vnet, NULL);
 1619 
 1620 static void
 1621 flowtable_init(const void *unused __unused)
 1622 {
 1623 
 1624         cv_init(&flowclean_c_cv, "c_flowcleanwait");
 1625         cv_init(&flowclean_f_cv, "f_flowcleanwait");
 1626         mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
 1627         EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
 1628             EVENTHANDLER_PRI_ANY);
 1629         flowclean_freq = 20*hz;
 1630 }
 1631 SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST,
 1632     flowtable_init, NULL);
 1633 
 1634 
 1635 #ifdef VIMAGE
 1636 static void
 1637 flowtable_uninit(const void *unused __unused)
 1638 {
 1639 
 1640         V_flowtable_ready = 0;
 1641         uma_zdestroy(V_flow_ipv4_zone);
 1642         uma_zdestroy(V_flow_ipv6_zone);
 1643 }
 1644 
 1645 VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
 1646     flowtable_uninit, NULL);
 1647 #endif
 1648 
 1649 #ifdef DDB
 1650 static uint32_t *
 1651 flowtable_get_hashkey(struct flentry *fle)
 1652 {
 1653         uint32_t *hashkey;
 1654 
 1655         if (fle->f_flags & FL_IPV6)
 1656                 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
 1657         else
 1658                 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
 1659 
 1660         return (hashkey);
 1661 }
 1662 
 1663 static bitstr_t *
 1664 flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
 1665 {
 1666         bitstr_t *mask;
 1667 
 1668         if (ft->ft_flags & FL_PCPU)
 1669                 mask = ft->ft_masks[cpuid];
 1670         else
 1671                 mask = ft->ft_masks[0];
 1672 
 1673         return (mask);
 1674 }
 1675 
 1676 static struct flentry **
 1677 flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
 1678 {
 1679         struct flentry **fle;
 1680         int index = (hash % ft->ft_size);
 1681 
 1682         if (ft->ft_flags & FL_PCPU) {
 1683                 fle = &ft->ft_table.pcpu[cpuid][index];
 1684         } else {
 1685                 fle = &ft->ft_table.global[index];
 1686         }
 1687         
 1688         return (fle);
 1689 }
 1690 
 1691 static void
 1692 flow_show(struct flowtable *ft, struct flentry *fle)
 1693 {
 1694         int idle_time;
 1695         int rt_valid, ifp_valid;
 1696         uint16_t sport, dport;
 1697         uint32_t *hashkey;
 1698         char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
 1699         volatile struct rtentry *rt;
 1700         struct ifnet *ifp = NULL;
 1701 
 1702         idle_time = (int)(time_uptime - fle->f_uptime);
 1703         rt = fle->f_rt;
 1704         rt_valid = rt != NULL;
 1705         if (rt_valid) 
 1706                 ifp = rt->rt_ifp;
 1707         ifp_valid = ifp != NULL;
 1708         hashkey = flowtable_get_hashkey(fle);
 1709         if (fle->f_flags & FL_IPV6)
 1710                 goto skipaddr;
 1711 
 1712         inet_ntoa_r(*(struct in_addr *) &hashkey[2], daddr);
 1713         if (ft->ft_flags & FL_HASH_ALL) {
 1714                 inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);            
 1715                 sport = ntohs(((uint16_t *)hashkey)[0]);
 1716                 dport = ntohs(((uint16_t *)hashkey)[1]);
 1717                 db_printf("%s:%d->%s:%d",
 1718                     saddr, sport, daddr,
 1719                     dport);
 1720         } else 
 1721                 db_printf("%s ", daddr);
 1722     
 1723 skipaddr:
 1724         if (fle->f_flags & FL_STALE)
 1725                 db_printf(" FL_STALE ");
 1726         if (fle->f_flags & FL_TCP)
 1727                 db_printf(" FL_TCP ");
 1728         if (fle->f_flags & FL_UDP)
 1729                 db_printf(" FL_UDP ");
 1730         if (rt_valid) {
 1731                 if (rt->rt_flags & RTF_UP)
 1732                         db_printf(" RTF_UP ");
 1733         }
 1734         if (ifp_valid) {
 1735                 if (ifp->if_flags & IFF_LOOPBACK)
 1736                         db_printf(" IFF_LOOPBACK ");
 1737                 if (ifp->if_flags & IFF_UP)
 1738                         db_printf(" IFF_UP ");          
 1739                 if (ifp->if_flags & IFF_POINTOPOINT)
 1740                         db_printf(" IFF_POINTOPOINT ");         
 1741         }
 1742         if (fle->f_flags & FL_IPV6)
 1743                 db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
 1744                     hashkey[0], hashkey[1], hashkey[2],
 1745                     hashkey[3], hashkey[4], hashkey[5],
 1746                     hashkey[6], hashkey[7], hashkey[8]);
 1747         else
 1748                 db_printf("\n\tkey=%08x:%08x:%08x ",
 1749                     hashkey[0], hashkey[1], hashkey[2]);
 1750         db_printf("hash=%08x idle_time=%03d"
 1751             "\n\tfibnum=%02d rt=%p",
 1752             fle->f_fhash, idle_time, fle->f_fibnum, fle->f_rt);
 1753         db_printf("\n");
 1754 }
 1755 
 1756 static void
 1757 flowtable_show(struct flowtable *ft, int cpuid)
 1758 {
 1759         int curbit = 0;
 1760         struct flentry *fle,  **flehead;
 1761         bitstr_t *mask, *tmpmask;
 1762 
 1763         if (cpuid != -1)
 1764                 db_printf("cpu: %d\n", cpuid);
 1765         mask = flowtable_mask_pcpu(ft, cpuid);
 1766         tmpmask = ft->ft_tmpmask;
 1767         memcpy(tmpmask, mask, ft->ft_size/8);
 1768         /*
 1769          * XXX Note to self, bit_ffs operates at the byte level
 1770          * and thus adds gratuitous overhead
 1771          */
 1772         bit_ffs(tmpmask, ft->ft_size, &curbit);
 1773         while (curbit != -1) {
 1774                 if (curbit >= ft->ft_size || curbit < -1) {
 1775                         db_printf("warning: bad curbit value %d \n",
 1776                             curbit);
 1777                         break;
 1778                 }
 1779 
 1780                 flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
 1781                 fle = *flehead;
 1782 
 1783                 while (fle != NULL) {   
 1784                         flow_show(ft, fle);
 1785                         fle = fle->f_next;
 1786                         continue;
 1787                 }
 1788                 bit_clear(tmpmask, curbit);
 1789                 bit_ffs(tmpmask, ft->ft_size, &curbit);
 1790         }
 1791 }
 1792 
 1793 static void
 1794 flowtable_show_vnet(void)
 1795 {
 1796         struct flowtable *ft;
 1797         int i;
 1798 
 1799         ft = V_flow_list_head;
 1800         while (ft != NULL) {
 1801                 printf("name: %s\n", ft->ft_name);
 1802                 if (ft->ft_flags & FL_PCPU) {
 1803                         CPU_FOREACH(i) {
 1804                                 flowtable_show(ft, i);
 1805                         }
 1806                 } else {
 1807                         flowtable_show(ft, -1);
 1808                 }
 1809                 ft = ft->ft_next;
 1810         }
 1811 }
 1812 
 1813 DB_SHOW_COMMAND(flowtables, db_show_flowtables)
 1814 {
 1815         VNET_ITERATOR_DECL(vnet_iter);
 1816 
 1817         VNET_FOREACH(vnet_iter) {
 1818                 CURVNET_SET(vnet_iter);
 1819 #ifdef VIMAGE
 1820                 db_printf("vnet %p\n", vnet_iter);
 1821 #endif
 1822                 flowtable_show_vnet();
 1823                 CURVNET_RESTORE();
 1824         }
 1825 }
 1826 #endif

Cache object: 3fcbf877752016ea23644fe7ba4c2b4a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.