The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/flowtable.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2008-2010, BitGravity Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the BitGravity Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15 
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include "opt_route.h"
   31 #include "opt_mpath.h"
   32 #include "opt_ddb.h"
   33 #include "opt_inet.h"
   34 #include "opt_inet6.h"
   35 
   36 #include <sys/cdefs.h>
   37 __FBSDID("$FreeBSD: releng/8.4/sys/net/flowtable.c 232552 2012-03-05 17:33:01Z bz $");
   38 
   39 #include <sys/param.h>  
   40 #include <sys/types.h>
   41 #include <sys/bitstring.h>
   42 #include <sys/condvar.h>
   43 #include <sys/callout.h>
   44 #include <sys/kernel.h>  
   45 #include <sys/kthread.h>
   46 #include <sys/limits.h>
   47 #include <sys/malloc.h>
   48 #include <sys/mbuf.h>
   49 #include <sys/proc.h>
   50 #include <sys/sbuf.h>
   51 #include <sys/sched.h>
   52 #include <sys/smp.h>
   53 #include <sys/socket.h>
   54 #include <sys/syslog.h>
   55 #include <sys/sysctl.h>
   56 
   57 #include <net/if.h>
   58 #include <net/if_llatbl.h>
   59 #include <net/if_var.h>
   60 #include <net/route.h> 
   61 #include <net/flowtable.h>
   62 #include <net/vnet.h>
   63 
   64 #include <netinet/in.h>
   65 #include <netinet/in_systm.h>
   66 #include <netinet/in_var.h>
   67 #include <netinet/if_ether.h>
   68 #include <netinet/ip.h>
   69 #ifdef INET6
   70 #include <netinet/ip6.h>
   71 #endif
   72 #include <netinet/tcp.h>
   73 #include <netinet/udp.h>
   74 #include <netinet/sctp.h>
   75 
   76 #include <libkern/jenkins.h>
   77 #include <ddb/ddb.h>
   78 
   79 struct ipv4_tuple {
   80         uint16_t        ip_sport;       /* source port */
   81         uint16_t        ip_dport;       /* destination port */
   82         in_addr_t       ip_saddr;       /* source address */
   83         in_addr_t       ip_daddr;       /* destination address */
   84 };
   85 
   86 union ipv4_flow {
   87         struct ipv4_tuple ipf_ipt;
   88         uint32_t        ipf_key[3];
   89 };
   90 
   91 struct ipv6_tuple {
   92         uint16_t        ip_sport;       /* source port */
   93         uint16_t        ip_dport;       /* destination port */
   94         struct in6_addr ip_saddr;       /* source address */
   95         struct in6_addr ip_daddr;       /* destination address */
   96 };
   97 
   98 union ipv6_flow {
   99         struct ipv6_tuple ipf_ipt;
  100         uint32_t        ipf_key[9];
  101 };
  102 
  103 struct flentry {
  104         volatile uint32_t       f_fhash;        /* hash flowing forward */
  105         uint16_t                f_flags;        /* flow flags */
  106         uint8_t                 f_pad;          
  107         uint8_t                 f_proto;        /* protocol */
  108         uint32_t                f_fibnum;       /* fib index */
  109         uint32_t                f_uptime;       /* uptime at last access */
  110         struct flentry          *f_next;        /* pointer to collision entry */
  111         volatile struct rtentry *f_rt;          /* rtentry for flow */
  112         volatile struct llentry *f_lle;         /* llentry for flow */
  113 };
  114 
  115 struct flentry_v4 {
  116         struct flentry  fl_entry;
  117         union ipv4_flow fl_flow;
  118 };
  119 
  120 struct flentry_v6 {
  121         struct flentry  fl_entry;
  122         union ipv6_flow fl_flow;
  123 };
  124 
  125 #define fl_fhash        fl_entry.fl_fhash
  126 #define fl_flags        fl_entry.fl_flags
  127 #define fl_proto        fl_entry.fl_proto
  128 #define fl_uptime       fl_entry.fl_uptime
  129 #define fl_rt           fl_entry.fl_rt
  130 #define fl_lle          fl_entry.fl_lle
  131 
  132 #define SECS_PER_HOUR           3600
  133 #define SECS_PER_DAY            (24*SECS_PER_HOUR)
  134 
  135 #define SYN_IDLE                300
  136 #define UDP_IDLE                300
  137 #define FIN_WAIT_IDLE           600
  138 #define TCP_IDLE                SECS_PER_DAY
  139 
  140 
  141 typedef void fl_lock_t(struct flowtable *, uint32_t);
  142 typedef void fl_rtalloc_t(struct route *, uint32_t, u_int);
  143 
  144 union flentryp {
  145         struct flentry          **global;
  146         struct flentry          **pcpu[MAXCPU];
  147 };
  148 
  149 struct flowtable_stats {
  150         uint64_t        ft_collisions;
  151         uint64_t        ft_allocated;
  152         uint64_t        ft_misses;
  153         uint64_t        ft_max_depth;
  154         uint64_t        ft_free_checks;
  155         uint64_t        ft_frees;
  156         uint64_t        ft_hits;
  157         uint64_t        ft_lookups;
  158 } __aligned(CACHE_LINE_SIZE);
  159 
  160 struct flowtable {
  161         struct  flowtable_stats ft_stats[MAXCPU];
  162         int             ft_size;
  163         int             ft_lock_count;
  164         uint32_t        ft_flags;
  165         char            *ft_name;
  166         fl_lock_t       *ft_lock;
  167         fl_lock_t       *ft_unlock;
  168         fl_rtalloc_t    *ft_rtalloc;
  169         /*
  170          * XXX need to pad out 
  171          */ 
  172         struct mtx      *ft_locks;
  173         union flentryp  ft_table;
  174         bitstr_t        *ft_masks[MAXCPU];
  175         bitstr_t        *ft_tmpmask;
  176         struct flowtable *ft_next;
  177 
  178         uint32_t        ft_count __aligned(CACHE_LINE_SIZE);
  179         uint32_t        ft_udp_idle __aligned(CACHE_LINE_SIZE);
  180         uint32_t        ft_fin_wait_idle;
  181         uint32_t        ft_syn_idle;
  182         uint32_t        ft_tcp_idle;
  183         boolean_t       ft_full;
  184 } __aligned(CACHE_LINE_SIZE);
  185 
  186 static struct proc *flowcleanerproc;
  187 static VNET_DEFINE(struct flowtable *, flow_list_head);
  188 static VNET_DEFINE(uint32_t, flow_hashjitter);
  189 static VNET_DEFINE(uma_zone_t, flow_ipv4_zone);
  190 static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
  191 
  192 #define V_flow_list_head        VNET(flow_list_head)
  193 #define V_flow_hashjitter       VNET(flow_hashjitter)
  194 #define V_flow_ipv4_zone        VNET(flow_ipv4_zone)
  195 #define V_flow_ipv6_zone        VNET(flow_ipv6_zone)
  196 
  197 
  198 static struct cv        flowclean_f_cv;
  199 static struct cv        flowclean_c_cv;
  200 static struct mtx       flowclean_lock;
  201 static uint32_t         flowclean_cycles;
  202 static uint32_t         flowclean_freq;
  203 
  204 #ifdef FLOWTABLE_DEBUG
  205 #define FLDPRINTF(ft, flags, fmt, ...)          \
  206 do {                                            \
  207         if ((ft)->ft_flags & (flags))           \
  208                 printf((fmt), __VA_ARGS__);     \
  209 } while (0);                                    \
  210 
  211 #else
  212 #define FLDPRINTF(ft, flags, fmt, ...)
  213 
  214 #endif
  215 
  216 
  217 /*
  218  * TODO:
  219  * - Make flowtable stats per-cpu, aggregated at sysctl call time,
  220  *   to avoid extra cache evictions caused by incrementing a shared
  221  *   counter
  222  * - add sysctls to resize && flush flow tables 
  223  * - Add per flowtable sysctls for statistics and configuring timeouts
  224  * - add saturation counter to rtentry to support per-packet load-balancing
  225  *   add flag to indicate round-robin flow, add list lookup from head
  226      for flows
  227  * - add sysctl / device node / syscall to support exporting and importing
  228  *   of flows with flag to indicate that a flow was imported so should
  229  *   not be considered for auto-cleaning
  230  * - support explicit connection state (currently only ad-hoc for DSR)
  231  * - idetach() cleanup for options VIMAGE builds.
  232  */
  233 VNET_DEFINE(int, flowtable_enable) = 1;
  234 static VNET_DEFINE(int, flowtable_debug);
  235 static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
  236 static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
  237 static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
  238 static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
  239 static VNET_DEFINE(int, flowtable_nmbflows);
  240 static VNET_DEFINE(int, flowtable_ready) = 0;
  241 
  242 #define V_flowtable_enable              VNET(flowtable_enable)
  243 #define V_flowtable_debug               VNET(flowtable_debug)
  244 #define V_flowtable_syn_expire          VNET(flowtable_syn_expire)
  245 #define V_flowtable_udp_expire          VNET(flowtable_udp_expire)
  246 #define V_flowtable_fin_wait_expire     VNET(flowtable_fin_wait_expire)
  247 #define V_flowtable_tcp_expire          VNET(flowtable_tcp_expire)
  248 #define V_flowtable_nmbflows            VNET(flowtable_nmbflows)
  249 #define V_flowtable_ready               VNET(flowtable_ready)
  250 
  251 SYSCTL_NODE(_net_inet, OID_AUTO, flowtable, CTLFLAG_RD, NULL, "flowtable");
  252 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, debug, CTLFLAG_RW,
  253     &VNET_NAME(flowtable_debug), 0, "print debug info.");
  254 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, enable, CTLFLAG_RW,
  255     &VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
  256 
  257 /*
  258  * XXX This does not end up updating timeouts at runtime
  259  * and only reflects the value for the last table added :-/
  260  */
  261 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, syn_expire, CTLFLAG_RW,
  262     &VNET_NAME(flowtable_syn_expire), 0,
  263     "seconds after which to remove syn allocated flow.");
  264 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, udp_expire, CTLFLAG_RW,
  265     &VNET_NAME(flowtable_udp_expire), 0,
  266     "seconds after which to remove flow allocated to UDP.");
  267 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, fin_wait_expire, CTLFLAG_RW,
  268     &VNET_NAME(flowtable_fin_wait_expire), 0,
  269     "seconds after which to remove a flow in FIN_WAIT.");
  270 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, tcp_expire, CTLFLAG_RW,
  271     &VNET_NAME(flowtable_tcp_expire), 0,
  272     "seconds after which to remove flow allocated to a TCP connection.");
  273 
  274 
  275 /*
  276  * Maximum number of flows that can be allocated of a given type.
  277  *
  278  * The table is allocated at boot time (for the pure caching case
  279  * there is no reason why this could not be changed at runtime)
  280  * and thus (currently) needs to be set with a tunable.
  281  */
  282 static int
  283 sysctl_nmbflows(SYSCTL_HANDLER_ARGS)
  284 {
  285         int error, newnmbflows;
  286 
  287         newnmbflows = V_flowtable_nmbflows;
  288         error = sysctl_handle_int(oidp, &newnmbflows, 0, req); 
  289         if (error == 0 && req->newptr) {
  290                 if (newnmbflows > V_flowtable_nmbflows) {
  291                         V_flowtable_nmbflows = newnmbflows;
  292                         uma_zone_set_max(V_flow_ipv4_zone,
  293                             V_flowtable_nmbflows);
  294                         uma_zone_set_max(V_flow_ipv6_zone,
  295                             V_flowtable_nmbflows);
  296                 } else
  297                         error = EINVAL;
  298         }
  299         return (error);
  300 }
  301 SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
  302     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_nmbflows, "IU",
  303     "Maximum number of flows allowed");
  304 
  305 
  306 
  307 #define FS_PRINT(sb, field)     sbuf_printf((sb), "\t%s: %jd\n", #field, fs->ft_##field)
  308 
  309 static void
  310 fs_print(struct sbuf *sb, struct flowtable_stats *fs)
  311 {
  312 
  313         FS_PRINT(sb, collisions);
  314         FS_PRINT(sb, allocated);
  315         FS_PRINT(sb, misses);
  316         FS_PRINT(sb, max_depth);
  317         FS_PRINT(sb, free_checks);
  318         FS_PRINT(sb, frees);
  319         FS_PRINT(sb, hits);
  320         FS_PRINT(sb, lookups);
  321 }
  322 
  323 static void
  324 flowtable_show_stats(struct sbuf *sb, struct flowtable *ft)
  325 {
  326         int i;
  327         struct flowtable_stats fs, *pfs;
  328 
  329         if (ft->ft_flags & FL_PCPU) {
  330                 bzero(&fs, sizeof(fs));
  331                 pfs = &fs;
  332                 CPU_FOREACH(i) {
  333                         pfs->ft_collisions  += ft->ft_stats[i].ft_collisions;
  334                         pfs->ft_allocated   += ft->ft_stats[i].ft_allocated;
  335                         pfs->ft_misses      += ft->ft_stats[i].ft_misses;
  336                         pfs->ft_free_checks += ft->ft_stats[i].ft_free_checks;
  337                         pfs->ft_frees       += ft->ft_stats[i].ft_frees;
  338                         pfs->ft_hits        += ft->ft_stats[i].ft_hits;
  339                         pfs->ft_lookups     += ft->ft_stats[i].ft_lookups;
  340                         if (ft->ft_stats[i].ft_max_depth > pfs->ft_max_depth)
  341                                 pfs->ft_max_depth = ft->ft_stats[i].ft_max_depth;
  342                 }
  343         } else {
  344                 pfs = &ft->ft_stats[0];
  345         }
  346         fs_print(sb, pfs);
  347 }
  348 
  349 static int
  350 sysctl_flowtable_stats(SYSCTL_HANDLER_ARGS)
  351 {
  352         struct flowtable *ft;
  353         struct sbuf *sb;
  354         int error;
  355 
  356         sb = sbuf_new(NULL, NULL, 64*1024, SBUF_FIXEDLEN);
  357 
  358         ft = V_flow_list_head;
  359         while (ft != NULL) {
  360                 sbuf_printf(sb, "\ntable name: %s\n", ft->ft_name);
  361                 flowtable_show_stats(sb, ft);
  362                 ft = ft->ft_next;
  363         }
  364         sbuf_finish(sb);
  365         error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
  366         sbuf_delete(sb);
  367 
  368         return (error);
  369 }
  370 SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
  371     NULL, 0, sysctl_flowtable_stats, "A", "flowtable statistics");
  372 
  373 
  374 #ifndef RADIX_MPATH
  375 static void
  376 rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
  377 {
  378 
  379         rtalloc_ign_fib(ro, 0, fibnum);
  380 }
  381 #endif
  382 
  383 static void
  384 flowtable_global_lock(struct flowtable *table, uint32_t hash)
  385 {       
  386         int lock_index = (hash)&(table->ft_lock_count - 1);
  387 
  388         mtx_lock(&table->ft_locks[lock_index]);
  389 }
  390 
  391 static void
  392 flowtable_global_unlock(struct flowtable *table, uint32_t hash)
  393 {       
  394         int lock_index = (hash)&(table->ft_lock_count - 1);
  395 
  396         mtx_unlock(&table->ft_locks[lock_index]);
  397 }
  398 
  399 static void
  400 flowtable_pcpu_lock(struct flowtable *table, uint32_t hash)
  401 {
  402 
  403         critical_enter();
  404 }
  405 
  406 static void
  407 flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
  408 {
  409 
  410         critical_exit();
  411 }
  412 
  413 #define FL_ENTRY_INDEX(table, hash)((hash) % (table)->ft_size)
  414 #define FL_ENTRY(table, hash) *flowtable_entry((table), (hash))
  415 #define FL_ENTRY_LOCK(table, hash)  (table)->ft_lock((table), (hash))
  416 #define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
  417 
  418 #define FL_STALE        (1<<8)
  419 #define FL_IPV6         (1<<9)
  420 #define FL_OVERWRITE    (1<<10)
  421 
  422 void
  423 flow_invalidate(struct flentry *fle)
  424 {
  425 
  426         fle->f_flags |= FL_STALE;
  427 }
  428 
  429 static __inline int
  430 proto_to_flags(uint8_t proto)
  431 {
  432         int flag;
  433 
  434         switch (proto) {
  435         case IPPROTO_TCP:
  436                 flag = FL_TCP;
  437                 break;
  438         case IPPROTO_SCTP:
  439                 flag = FL_SCTP;
  440                 break;          
  441         case IPPROTO_UDP:
  442                 flag = FL_UDP;
  443                 break;
  444         default:
  445                 flag = 0;
  446                 break;
  447         }
  448 
  449         return (flag);
  450 }
  451 
  452 static __inline int
  453 flags_to_proto(int flags)
  454 {
  455         int proto, protoflags;
  456 
  457         protoflags = flags & (FL_TCP|FL_SCTP|FL_UDP);
  458         switch (protoflags) {
  459         case FL_TCP:
  460                 proto = IPPROTO_TCP;
  461                 break;
  462         case FL_SCTP:
  463                 proto = IPPROTO_SCTP;
  464                 break;
  465         case FL_UDP:
  466                 proto = IPPROTO_UDP;
  467                 break;
  468         default:
  469                 proto = 0;
  470                 break;
  471         }
  472         return (proto);
  473 }
  474 
  475 #ifdef INET
  476 #ifdef FLOWTABLE_DEBUG
  477 static void
  478 ipv4_flow_print_tuple(int flags, int proto, struct sockaddr_in *ssin,
  479     struct sockaddr_in *dsin)
  480 {
  481         char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
  482 
  483         if (flags & FL_HASH_ALL) {
  484                 inet_ntoa_r(ssin->sin_addr, saddr);
  485                 inet_ntoa_r(dsin->sin_addr, daddr);
  486                 printf("proto=%d %s:%d->%s:%d\n",
  487                     proto, saddr, ntohs(ssin->sin_port), daddr,
  488                     ntohs(dsin->sin_port));
  489         } else {
  490                 inet_ntoa_r(*(struct in_addr *) &dsin->sin_addr, daddr);
  491                 printf("proto=%d %s\n", proto, daddr);
  492         }
  493 
  494 }
  495 #endif
  496 
  497 static int
  498 ipv4_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
  499     struct sockaddr_in *ssin, struct sockaddr_in *dsin, uint16_t *flags)
  500 {
  501         struct ip *ip;
  502         uint8_t proto;
  503         int iphlen;
  504         struct tcphdr *th;
  505         struct udphdr *uh;
  506         struct sctphdr *sh;
  507         uint16_t sport, dport;
  508 
  509         proto = sport = dport = 0;
  510         ip = mtod(m, struct ip *);
  511         dsin->sin_family = AF_INET;
  512         dsin->sin_len = sizeof(*dsin);
  513         dsin->sin_addr = ip->ip_dst;
  514         ssin->sin_family = AF_INET;
  515         ssin->sin_len = sizeof(*ssin);
  516         ssin->sin_addr = ip->ip_src;    
  517 
  518         proto = ip->ip_p;
  519         if ((*flags & FL_HASH_ALL) == 0) {
  520                 FLDPRINTF(ft, FL_DEBUG_ALL, "skip port check flags=0x%x ",
  521                     *flags);
  522                 goto skipports;
  523         }
  524 
  525         iphlen = ip->ip_hl << 2; /* XXX options? */
  526 
  527         switch (proto) {
  528         case IPPROTO_TCP:
  529                 th = (struct tcphdr *)((caddr_t)ip + iphlen);
  530                 sport = th->th_sport;
  531                 dport = th->th_dport;
  532                 if ((*flags & FL_HASH_ALL) &&
  533                     (th->th_flags & (TH_RST|TH_FIN)))
  534                         *flags |= FL_STALE;
  535         break;
  536         case IPPROTO_UDP:
  537                 uh = (struct udphdr *)((caddr_t)ip + iphlen);
  538                 sport = uh->uh_sport;
  539                 dport = uh->uh_dport;
  540         break;
  541         case IPPROTO_SCTP:
  542                 sh = (struct sctphdr *)((caddr_t)ip + iphlen);
  543                 sport = sh->src_port;
  544                 dport = sh->dest_port;
  545         break;
  546         default:
  547                 FLDPRINTF(ft, FL_DEBUG_ALL, "proto=0x%x not supported\n", proto);
  548                 return (ENOTSUP);
  549                 /* no port - hence not a protocol we care about */
  550                 break;
  551         
  552         }
  553 
  554 skipports:
  555         *flags |= proto_to_flags(proto);
  556         ssin->sin_port = sport;
  557         dsin->sin_port = dport;
  558         return (0);
  559 }
  560 
  561 static uint32_t
  562 ipv4_flow_lookup_hash_internal(
  563         struct sockaddr_in *ssin, struct sockaddr_in *dsin, 
  564             uint32_t *key, uint16_t flags)
  565 {
  566         uint16_t sport, dport;
  567         uint8_t proto;
  568         int offset = 0;
  569 
  570         if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
  571                 return (0);
  572         proto = flags_to_proto(flags);
  573         sport = dport = key[2] = key[1] = key[0] = 0;
  574         if ((ssin != NULL) && (flags & FL_HASH_ALL)) {
  575                 key[1] = ssin->sin_addr.s_addr;
  576                 sport = ssin->sin_port;
  577         }
  578         if (dsin != NULL) {
  579                 key[2] = dsin->sin_addr.s_addr;
  580                 dport = dsin->sin_port;
  581         }
  582         if (flags & FL_HASH_ALL) {
  583                 ((uint16_t *)key)[0] = sport;
  584                 ((uint16_t *)key)[1] = dport; 
  585         } else
  586                 offset = V_flow_hashjitter + proto;
  587 
  588         return (jenkins_hashword(key, 3, offset));
  589 }
  590 
  591 static struct flentry *
  592 flowtable_lookup_mbuf4(struct flowtable *ft, struct mbuf *m)
  593 {
  594         struct sockaddr_storage ssa, dsa;
  595         uint16_t flags;
  596         struct sockaddr_in *dsin, *ssin;
  597 
  598         dsin = (struct sockaddr_in *)&dsa;
  599         ssin = (struct sockaddr_in *)&ssa;
  600         bzero(dsin, sizeof(*dsin));
  601         bzero(ssin, sizeof(*ssin));
  602         flags = ft->ft_flags;
  603         if (ipv4_mbuf_demarshal(ft, m, ssin, dsin, &flags) != 0)
  604                 return (NULL);
  605 
  606         return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
  607 }
  608 
  609 void
  610 flow_to_route(struct flentry *fle, struct route *ro)
  611 {
  612         uint32_t *hashkey = NULL;
  613         struct sockaddr_in *sin;
  614 
  615         sin = (struct sockaddr_in *)&ro->ro_dst;
  616         sin->sin_family = AF_INET;
  617         sin->sin_len = sizeof(*sin);
  618         hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
  619         sin->sin_addr.s_addr = hashkey[2];
  620         ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
  621         ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
  622 }
  623 #endif /* INET */
  624 
  625 #ifdef INET6
  626 /*
  627  * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
  628  * then it sets p to point at the offset "len" in the mbuf. WARNING: the
  629  * pointer might become stale after other pullups (but we never use it
  630  * this way).
  631  */
  632 #define PULLUP_TO(_len, p, T)                                           \
  633 do {                                                                    \
  634         int x = (_len) + sizeof(T);                                     \
  635         if ((m)->m_len < x) {                                           \
  636                 goto receive_failed;                                    \
  637         }                                                               \
  638         p = (mtod(m, char *) + (_len));                                 \
  639 } while (0)
  640 
  641 #define TCP(p)          ((struct tcphdr *)(p))
  642 #define SCTP(p)         ((struct sctphdr *)(p))
  643 #define UDP(p)          ((struct udphdr *)(p))
  644 
  645 static int
  646 ipv6_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
  647     struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, uint16_t *flags)
  648 {
  649         struct ip6_hdr *ip6;
  650         uint8_t proto;
  651         int hlen;
  652         uint16_t src_port, dst_port;
  653         u_short offset;
  654         void *ulp;
  655 
  656         offset = hlen = src_port = dst_port = 0;
  657         ulp = NULL;
  658         ip6 = mtod(m, struct ip6_hdr *);
  659         hlen = sizeof(struct ip6_hdr);
  660         proto = ip6->ip6_nxt;
  661 
  662         if ((*flags & FL_HASH_ALL) == 0)
  663                 goto skipports;
  664 
  665         while (ulp == NULL) {
  666                 switch (proto) {
  667                 case IPPROTO_ICMPV6:
  668                 case IPPROTO_OSPFIGP:
  669                 case IPPROTO_PIM:
  670                 case IPPROTO_CARP:
  671                 case IPPROTO_ESP:
  672                 case IPPROTO_NONE:
  673                         ulp = ip6;
  674                         break;
  675                 case IPPROTO_TCP:
  676                         PULLUP_TO(hlen, ulp, struct tcphdr);
  677                         dst_port = TCP(ulp)->th_dport;
  678                         src_port = TCP(ulp)->th_sport;
  679                         if ((*flags & FL_HASH_ALL) &&
  680                             (TCP(ulp)->th_flags & (TH_RST|TH_FIN)))
  681                                 *flags |= FL_STALE;
  682                         break;
  683                 case IPPROTO_SCTP:
  684                         PULLUP_TO(hlen, ulp, struct sctphdr);
  685                         src_port = SCTP(ulp)->src_port;
  686                         dst_port = SCTP(ulp)->dest_port;
  687                         break;
  688                 case IPPROTO_UDP:
  689                         PULLUP_TO(hlen, ulp, struct udphdr);
  690                         dst_port = UDP(ulp)->uh_dport;
  691                         src_port = UDP(ulp)->uh_sport;
  692                         break;
  693                 case IPPROTO_HOPOPTS:   /* RFC 2460 */
  694                         PULLUP_TO(hlen, ulp, struct ip6_hbh);
  695                         hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
  696                         proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
  697                         ulp = NULL;
  698                         break;
  699                 case IPPROTO_ROUTING:   /* RFC 2460 */
  700                         PULLUP_TO(hlen, ulp, struct ip6_rthdr); 
  701                         hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
  702                         proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
  703                         ulp = NULL;
  704                         break;
  705                 case IPPROTO_FRAGMENT:  /* RFC 2460 */
  706                         PULLUP_TO(hlen, ulp, struct ip6_frag);
  707                         hlen += sizeof (struct ip6_frag);
  708                         proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
  709                         offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
  710                             IP6F_OFF_MASK;
  711                         ulp = NULL;
  712                         break;
  713                 case IPPROTO_DSTOPTS:   /* RFC 2460 */
  714                         PULLUP_TO(hlen, ulp, struct ip6_hbh);
  715                         hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
  716                         proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
  717                         ulp = NULL;
  718                         break;
  719                 case IPPROTO_AH:        /* RFC 2402 */
  720                         PULLUP_TO(hlen, ulp, struct ip6_ext);
  721                         hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
  722                         proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
  723                         ulp = NULL;
  724                         break;
  725                 default:
  726                         PULLUP_TO(hlen, ulp, struct ip6_ext);
  727                         break;
  728                 }
  729         }
  730 
  731         if (src_port == 0) {
  732         receive_failed:
  733                 return (ENOTSUP);
  734         }
  735 
  736 skipports:
  737         dsin6->sin6_family = AF_INET6;
  738         dsin6->sin6_len = sizeof(*dsin6);
  739         dsin6->sin6_port = dst_port;
  740         memcpy(&dsin6->sin6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
  741 
  742         ssin6->sin6_family = AF_INET6;
  743         ssin6->sin6_len = sizeof(*ssin6);
  744         ssin6->sin6_port = src_port;
  745         memcpy(&ssin6->sin6_addr, &ip6->ip6_src, sizeof(struct in6_addr));
  746         *flags |= proto_to_flags(proto);
  747 
  748         return (0);
  749 }
  750 
  751 #define zero_key(key)           \
  752 do {                            \
  753         key[0] = 0;             \
  754         key[1] = 0;             \
  755         key[2] = 0;             \
  756         key[3] = 0;             \
  757         key[4] = 0;             \
  758         key[5] = 0;             \
  759         key[6] = 0;             \
  760         key[7] = 0;             \
  761         key[8] = 0;             \
  762 } while (0)
  763         
  764 static uint32_t
  765 ipv6_flow_lookup_hash_internal(
  766         struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, 
  767             uint32_t *key, uint16_t flags)
  768 {
  769         uint16_t sport, dport;
  770         uint8_t proto;
  771         int offset = 0;
  772 
  773         if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
  774                 return (0);
  775 
  776         proto = flags_to_proto(flags);
  777         zero_key(key);
  778         sport = dport = 0;
  779         if (dsin6 != NULL) {
  780                 memcpy(&key[1], &dsin6->sin6_addr, sizeof(struct in6_addr));
  781                 dport = dsin6->sin6_port;
  782         }
  783         if ((ssin6 != NULL) && (flags & FL_HASH_ALL)) {
  784                 memcpy(&key[5], &ssin6->sin6_addr, sizeof(struct in6_addr));
  785                 sport = ssin6->sin6_port;
  786         }
  787         if (flags & FL_HASH_ALL) {
  788                 ((uint16_t *)key)[0] = sport;
  789                 ((uint16_t *)key)[1] = dport; 
  790         } else
  791                 offset = V_flow_hashjitter + proto;
  792 
  793         return (jenkins_hashword(key, 9, offset));
  794 }
  795 
  796 static struct flentry *
  797 flowtable_lookup_mbuf6(struct flowtable *ft, struct mbuf *m)
  798 {
  799         struct sockaddr_storage ssa, dsa;
  800         struct sockaddr_in6 *dsin6, *ssin6;     
  801         uint16_t flags;
  802 
  803         dsin6 = (struct sockaddr_in6 *)&dsa;
  804         ssin6 = (struct sockaddr_in6 *)&ssa;
  805         bzero(dsin6, sizeof(*dsin6));
  806         bzero(ssin6, sizeof(*ssin6));
  807         flags = ft->ft_flags;
  808         
  809         if (ipv6_mbuf_demarshal(ft, m, ssin6, dsin6, &flags) != 0)
  810                 return (NULL);
  811 
  812         return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
  813 }
  814 
  815 void
  816 flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
  817 {
  818         uint32_t *hashkey = NULL;
  819         struct sockaddr_in6 *sin6;
  820 
  821         sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
  822 
  823         sin6->sin6_family = AF_INET6;
  824         sin6->sin6_len = sizeof(*sin6);
  825         hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
  826         memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
  827         ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
  828         ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
  829 
  830 }
  831 #endif /* INET6 */
  832 
  833 static bitstr_t *
  834 flowtable_mask(struct flowtable *ft)
  835 {
  836         bitstr_t *mask;
  837 
  838         if (ft->ft_flags & FL_PCPU)
  839                 mask = ft->ft_masks[curcpu];
  840         else
  841                 mask = ft->ft_masks[0];
  842 
  843         return (mask);
  844 }
  845 
  846 static struct flentry **
  847 flowtable_entry(struct flowtable *ft, uint32_t hash)
  848 {
  849         struct flentry **fle;
  850         int index = (hash % ft->ft_size);
  851 
  852         if (ft->ft_flags & FL_PCPU) {
  853                 KASSERT(&ft->ft_table.pcpu[curcpu][0] != NULL, ("pcpu not set"));
  854                 fle = &ft->ft_table.pcpu[curcpu][index];
  855         } else {
  856                 KASSERT(&ft->ft_table.global[0] != NULL, ("global not set"));
  857                 fle = &ft->ft_table.global[index];
  858         }
  859         
  860         return (fle);
  861 }
  862 
  863 static int
  864 flow_stale(struct flowtable *ft, struct flentry *fle)
  865 {
  866         time_t idle_time;
  867 
  868         if ((fle->f_fhash == 0)
  869             || ((fle->f_rt->rt_flags & RTF_HOST) &&
  870                 ((fle->f_rt->rt_flags & (RTF_UP))
  871                     != (RTF_UP)))
  872             || (fle->f_rt->rt_ifp == NULL)
  873             || !RT_LINK_IS_UP(fle->f_rt->rt_ifp))
  874                 return (1);
  875 
  876         idle_time = time_uptime - fle->f_uptime;
  877 
  878         if ((fle->f_flags & FL_STALE) ||
  879             ((fle->f_flags & (TH_SYN|TH_ACK|TH_FIN)) == 0
  880                 && (idle_time > ft->ft_udp_idle)) ||
  881             ((fle->f_flags & TH_FIN)
  882                 && (idle_time > ft->ft_fin_wait_idle)) ||
  883             ((fle->f_flags & (TH_SYN|TH_ACK)) == TH_SYN
  884                 && (idle_time > ft->ft_syn_idle)) ||
  885             ((fle->f_flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)
  886                 && (idle_time > ft->ft_tcp_idle)) ||
  887             ((fle->f_rt->rt_flags & RTF_UP) == 0 || 
  888                 (fle->f_rt->rt_ifp == NULL)))
  889                 return (1);
  890 
  891         return (0);
  892 }
  893 
  894 static void
  895 flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
  896 {
  897         uint32_t *hashkey;
  898         int i, nwords;
  899 
  900         if (fle->f_flags & FL_IPV6) {
  901                 nwords = 9;
  902                 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
  903         } else {
  904                 nwords = 3;
  905                 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
  906         }
  907         
  908         for (i = 0; i < nwords; i++) 
  909                 hashkey[i] = key[i];
  910 }
  911 
  912 static struct flentry *
  913 flow_alloc(struct flowtable *ft)
  914 {
  915         struct flentry *newfle;
  916         uma_zone_t zone;
  917 
  918         newfle = NULL;
  919         zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
  920 
  921         newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO);
  922         if (newfle != NULL)
  923                 atomic_add_int(&ft->ft_count, 1);
  924         return (newfle);
  925 }
  926 
  927 static void
  928 flow_free(struct flentry *fle, struct flowtable *ft)
  929 {
  930         uma_zone_t zone;
  931 
  932         zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
  933         atomic_add_int(&ft->ft_count, -1);
  934         uma_zfree(zone, fle);
  935 }
  936 
  937 static int
  938 flow_full(struct flowtable *ft)
  939 {
  940         boolean_t full;
  941         uint32_t count;
  942         
  943         full = ft->ft_full;
  944         count = ft->ft_count;
  945 
  946         if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3))))
  947                 ft->ft_full = FALSE;
  948         else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5))))
  949                 ft->ft_full = TRUE;
  950         
  951         if (full && !ft->ft_full) {
  952                 flowclean_freq = 4*hz;
  953                 if ((ft->ft_flags & FL_HASH_ALL) == 0)
  954                         ft->ft_udp_idle = ft->ft_fin_wait_idle =
  955                             ft->ft_syn_idle = ft->ft_tcp_idle = 5;
  956                 cv_broadcast(&flowclean_c_cv);
  957         } else if (!full && ft->ft_full) {
  958                 flowclean_freq = 20*hz;
  959                 if ((ft->ft_flags & FL_HASH_ALL) == 0)
  960                         ft->ft_udp_idle = ft->ft_fin_wait_idle =
  961                             ft->ft_syn_idle = ft->ft_tcp_idle = 30;
  962         }
  963 
  964         return (ft->ft_full);
  965 }
  966 
  967 static int
  968 flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
  969     uint32_t fibnum, struct route *ro, uint16_t flags)
  970 {
  971         struct flentry *fle, *fletail, *newfle, **flep;
  972         struct flowtable_stats *fs = &ft->ft_stats[curcpu];
  973         int depth;
  974         bitstr_t *mask;
  975         uint8_t proto;
  976 
  977         newfle = flow_alloc(ft);
  978         if (newfle == NULL)
  979                 return (ENOMEM);
  980 
  981         newfle->f_flags |= (flags & FL_IPV6);
  982         proto = flags_to_proto(flags);
  983 
  984         FL_ENTRY_LOCK(ft, hash);
  985         mask = flowtable_mask(ft);
  986         flep = flowtable_entry(ft, hash);
  987         fletail = fle = *flep;
  988 
  989         if (fle == NULL) {
  990                 bit_set(mask, FL_ENTRY_INDEX(ft, hash));
  991                 *flep = fle = newfle;
  992                 goto skip;
  993         } 
  994         
  995         depth = 0;
  996         fs->ft_collisions++;
  997         /*
  998          * find end of list and make sure that we were not
  999          * preempted by another thread handling this flow
 1000          */
 1001         while (fle != NULL) {
 1002                 if (fle->f_fhash == hash && !flow_stale(ft, fle)) {
 1003                         /*
 1004                          * there was either a hash collision
 1005                          * or we lost a race to insert
 1006                          */
 1007                         FL_ENTRY_UNLOCK(ft, hash);
 1008                         flow_free(newfle, ft);
 1009                         
 1010                         if (flags & FL_OVERWRITE) 
 1011                                 goto skip;
 1012                         return (EEXIST);
 1013                 }
 1014                 /*
 1015                  * re-visit this double condition XXX
 1016                  */
 1017                 if (fletail->f_next != NULL)
 1018                         fletail = fle->f_next;
 1019 
 1020                 depth++;
 1021                 fle = fle->f_next;
 1022         } 
 1023 
 1024         if (depth > fs->ft_max_depth)
 1025                 fs->ft_max_depth = depth;
 1026         fletail->f_next = newfle;
 1027         fle = newfle;
 1028 skip:
 1029         flowtable_set_hashkey(fle, key);
 1030 
 1031         fle->f_proto = proto;
 1032         fle->f_rt = ro->ro_rt;
 1033         fle->f_lle = ro->ro_lle;
 1034         fle->f_fhash = hash;
 1035         fle->f_fibnum = fibnum;
 1036         fle->f_uptime = time_uptime;
 1037         FL_ENTRY_UNLOCK(ft, hash);
 1038         return (0);
 1039 }
 1040 
 1041 int
 1042 kern_flowtable_insert(struct flowtable *ft,
 1043     struct sockaddr_storage *ssa, struct sockaddr_storage *dsa,
 1044     struct route *ro, uint32_t fibnum, int flags)
 1045 {
 1046         uint32_t key[9], hash;
 1047 
 1048         flags = (ft->ft_flags | flags | FL_OVERWRITE);
 1049         hash = 0;
 1050 
 1051 #ifdef INET
 1052         if (ssa->ss_family == AF_INET) 
 1053                 hash = ipv4_flow_lookup_hash_internal((struct sockaddr_in *)ssa,
 1054                     (struct sockaddr_in *)dsa, key, flags);
 1055 #endif
 1056 #ifdef INET6
 1057         if (ssa->ss_family == AF_INET6) 
 1058                 hash = ipv6_flow_lookup_hash_internal((struct sockaddr_in6 *)ssa,
 1059                     (struct sockaddr_in6 *)dsa, key, flags);
 1060 #endif  
 1061         if (ro->ro_rt == NULL || ro->ro_lle == NULL)
 1062                 return (EINVAL);
 1063 
 1064         FLDPRINTF(ft, FL_DEBUG,
 1065             "kern_flowtable_insert: key=%x:%x:%x hash=%x fibnum=%d flags=%x\n",
 1066             key[0], key[1], key[2], hash, fibnum, flags);
 1067         return (flowtable_insert(ft, hash, key, fibnum, ro, flags));
 1068 }
 1069 
 1070 static int
 1071 flowtable_key_equal(struct flentry *fle, uint32_t *key)
 1072 {
 1073         uint32_t *hashkey;
 1074         int i, nwords;
 1075 
 1076         if (fle->f_flags & FL_IPV6) {
 1077                 nwords = 9;
 1078                 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
 1079         } else {
 1080                 nwords = 3;
 1081                 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
 1082         }
 1083 
 1084         for (i = 0; i < nwords; i++) 
 1085                 if (hashkey[i] != key[i])
 1086                         return (0);
 1087 
 1088         return (1);
 1089 }
 1090 
 1091 struct flentry *
 1092 flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af)
 1093 {
 1094         struct flentry *fle = NULL;
 1095 
 1096 #ifdef INET
 1097         if (af == AF_INET)
 1098                 fle = flowtable_lookup_mbuf4(ft, m);
 1099 #endif
 1100 #ifdef INET6
 1101         if (af == AF_INET6)
 1102                 fle = flowtable_lookup_mbuf6(ft, m);
 1103 #endif  
 1104         if (fle != NULL && m != NULL && (m->m_flags & M_FLOWID) == 0) {
 1105                 m->m_flags |= M_FLOWID;
 1106                 m->m_pkthdr.flowid = fle->f_fhash;
 1107         }
 1108         return (fle);
 1109 }
 1110         
 1111 struct flentry *
 1112 flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
 1113     struct sockaddr_storage *dsa, uint32_t fibnum, int flags)
 1114 {
 1115         uint32_t key[9], hash;
 1116         struct flentry *fle;
 1117         struct flowtable_stats *fs = &ft->ft_stats[curcpu];
 1118         uint8_t proto = 0;
 1119         int error = 0;
 1120         struct rtentry *rt;
 1121         struct llentry *lle;
 1122         struct route sro, *ro;
 1123         struct route_in6 sro6;
 1124 
 1125         sro.ro_rt = sro6.ro_rt = NULL;
 1126         sro.ro_lle = sro6.ro_lle = NULL;
 1127         ro = NULL;
 1128         hash = 0;
 1129         flags |= ft->ft_flags;
 1130         proto = flags_to_proto(flags);
 1131 #ifdef INET
 1132         if (ssa->ss_family == AF_INET) {
 1133                 struct sockaddr_in *ssin, *dsin;
 1134 
 1135                 ro = &sro;
 1136                 memcpy(&ro->ro_dst, dsa, sizeof(struct sockaddr_in));
 1137                 /*
 1138                  * The harvested source and destination addresses
 1139                  * may contain port information if the packet is 
 1140                  * from a transport protocol (e.g. TCP/UDP). The 
 1141                  * port field must be cleared before performing 
 1142                  * a route lookup.
 1143                  */
 1144                 ((struct sockaddr_in *)&ro->ro_dst)->sin_port = 0;
 1145                 dsin = (struct sockaddr_in *)dsa;
 1146                 ssin = (struct sockaddr_in *)ssa;
 1147                 if ((dsin->sin_addr.s_addr == ssin->sin_addr.s_addr) ||
 1148                     (ntohl(dsin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 1149                     (ntohl(ssin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
 1150                         return (NULL);
 1151 
 1152                 hash = ipv4_flow_lookup_hash_internal(ssin, dsin, key, flags);
 1153         }
 1154 #endif
 1155 #ifdef INET6
 1156         if (ssa->ss_family == AF_INET6) {
 1157                 struct sockaddr_in6 *ssin6, *dsin6;
 1158 
 1159                 ro = (struct route *)&sro6;
 1160                 memcpy(&sro6.ro_dst, dsa,
 1161                     sizeof(struct sockaddr_in6));
 1162                 ((struct sockaddr_in6 *)&ro->ro_dst)->sin6_port = 0;
 1163                 dsin6 = (struct sockaddr_in6 *)dsa;
 1164                 ssin6 = (struct sockaddr_in6 *)ssa;
 1165 
 1166                 flags |= FL_IPV6;
 1167                 hash = ipv6_flow_lookup_hash_internal(ssin6, dsin6, key, flags);
 1168         }
 1169 #endif
 1170         /*
 1171          * Ports are zero and this isn't a transmit cache
 1172          * - thus not a protocol for which we need to keep 
 1173          * state
 1174          * FL_HASH_ALL => key[0] != 0 for TCP || UDP || SCTP
 1175          */
 1176         if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_ALL)))
 1177                 return (NULL);
 1178 
 1179         fs->ft_lookups++;
 1180         FL_ENTRY_LOCK(ft, hash);
 1181         if ((fle = FL_ENTRY(ft, hash)) == NULL) {
 1182                 FL_ENTRY_UNLOCK(ft, hash);
 1183                 goto uncached;
 1184         }
 1185 keycheck:       
 1186         rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
 1187         lle = __DEVOLATILE(struct llentry *, fle->f_lle);
 1188         if ((rt != NULL)
 1189             && lle != NULL
 1190             && fle->f_fhash == hash
 1191             && flowtable_key_equal(fle, key)
 1192             && (proto == fle->f_proto)
 1193             && (fibnum == fle->f_fibnum)
 1194             && (rt->rt_flags & RTF_UP)
 1195             && (rt->rt_ifp != NULL)
 1196             && (lle->la_flags & LLE_VALID)) {
 1197                 fs->ft_hits++;
 1198                 fle->f_uptime = time_uptime;
 1199                 fle->f_flags |= flags;
 1200                 FL_ENTRY_UNLOCK(ft, hash);
 1201                 return (fle);
 1202         } else if (fle->f_next != NULL) {
 1203                 fle = fle->f_next;
 1204                 goto keycheck;
 1205         }
 1206         FL_ENTRY_UNLOCK(ft, hash);
 1207 uncached:
 1208         if (flags & FL_NOAUTO || flow_full(ft))
 1209                 return (NULL);
 1210 
 1211         fs->ft_misses++;
 1212         /*
 1213          * This bit of code ends up locking the
 1214          * same route 3 times (just like ip_output + ether_output)
 1215          * - at lookup
 1216          * - in rt_check when called by arpresolve
 1217          * - dropping the refcount for the rtentry
 1218          *
 1219          * This could be consolidated to one if we wrote a variant
 1220          * of arpresolve with an rt_check variant that expected to
 1221          * receive the route locked
 1222          */
 1223 
 1224 #ifdef INVARIANTS
 1225         if ((ro->ro_dst.sa_family != AF_INET) &&
 1226             (ro->ro_dst.sa_family != AF_INET6))
 1227                 panic("sa_family == %d\n", ro->ro_dst.sa_family);
 1228 #endif
 1229 
 1230         ft->ft_rtalloc(ro, hash, fibnum);
 1231         if (ro->ro_rt == NULL) 
 1232                 error = ENETUNREACH;
 1233         else {
 1234                 struct llentry *lle = NULL;
 1235                 struct sockaddr_storage *l3addr;
 1236                 struct rtentry *rt = ro->ro_rt;
 1237                 struct ifnet *ifp = rt->rt_ifp;
 1238 
 1239                 if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
 1240                         RTFREE(rt);
 1241                         ro->ro_rt = NULL;
 1242                         return (NULL);
 1243                 }
 1244 #ifdef INET6
 1245                 if (ssa->ss_family == AF_INET6) {
 1246                         struct sockaddr_in6 *dsin6;
 1247 
 1248                         dsin6 = (struct sockaddr_in6 *)dsa;                     
 1249                         if (in6_localaddr(&dsin6->sin6_addr)) {
 1250                                 RTFREE(rt);
 1251                                 ro->ro_rt = NULL;
 1252                                 return (NULL);                          
 1253                         }
 1254 
 1255                         if (rt->rt_flags & RTF_GATEWAY)
 1256                                 l3addr = (struct sockaddr_storage *)rt->rt_gateway;
 1257                         
 1258                         else
 1259                                 l3addr = (struct sockaddr_storage *)&ro->ro_dst;
 1260                         llentry_update(&lle, LLTABLE6(ifp), l3addr, ifp);
 1261                 }
 1262 #endif  
 1263 #ifdef INET
 1264                 if (ssa->ss_family == AF_INET) {
 1265                         if (rt->rt_flags & RTF_GATEWAY)
 1266                                 l3addr = (struct sockaddr_storage *)rt->rt_gateway;
 1267                         else
 1268                                 l3addr = (struct sockaddr_storage *)&ro->ro_dst;
 1269                         llentry_update(&lle, LLTABLE(ifp), l3addr, ifp);        
 1270                 }
 1271                         
 1272 #endif
 1273                 ro->ro_lle = lle;
 1274 
 1275                 if (lle == NULL) {
 1276                         RTFREE(rt);
 1277                         ro->ro_rt = NULL;
 1278                         return (NULL);
 1279                 }
 1280                 error = flowtable_insert(ft, hash, key, fibnum, ro, flags);
 1281 
 1282                 if (error) {
 1283                         RTFREE(rt);
 1284                         LLE_FREE(lle);
 1285                         ro->ro_rt = NULL;
 1286                         ro->ro_lle = NULL;
 1287                 }
 1288         } 
 1289 
 1290         return ((error) ? NULL : fle);
 1291 }
 1292 
 1293 /*
 1294  * used by the bit_alloc macro
 1295  */
 1296 #define calloc(count, size) malloc((count)*(size), M_DEVBUF, M_WAITOK|M_ZERO)
 1297         
 1298 struct flowtable *
 1299 flowtable_alloc(char *name, int nentry, int flags)
 1300 {
 1301         struct flowtable *ft, *fttail;
 1302         int i;
 1303 
 1304         if (V_flow_hashjitter == 0)
 1305                 V_flow_hashjitter = arc4random();
 1306 
 1307         KASSERT(nentry > 0, ("nentry must be > 0, is %d\n", nentry));
 1308 
 1309         ft = malloc(sizeof(struct flowtable),
 1310             M_RTABLE, M_WAITOK | M_ZERO);
 1311 
 1312         ft->ft_name = name;
 1313         ft->ft_flags = flags;
 1314         ft->ft_size = nentry;
 1315 #ifdef RADIX_MPATH
 1316         ft->ft_rtalloc = rtalloc_mpath_fib;
 1317 #else
 1318         ft->ft_rtalloc = rtalloc_ign_wrapper;
 1319 #endif
 1320         if (flags & FL_PCPU) {
 1321                 ft->ft_lock = flowtable_pcpu_lock;
 1322                 ft->ft_unlock = flowtable_pcpu_unlock;
 1323 
 1324                 for (i = 0; i <= mp_maxid; i++) {
 1325                         ft->ft_table.pcpu[i] =
 1326                             malloc(nentry*sizeof(struct flentry *),
 1327                                 M_RTABLE, M_WAITOK | M_ZERO);
 1328                         ft->ft_masks[i] = bit_alloc(nentry);
 1329                 }
 1330         } else {
 1331                 ft->ft_lock_count = 2*(powerof2(mp_maxid + 1) ? (mp_maxid + 1):
 1332                     (fls(mp_maxid + 1) << 1));
 1333                 
 1334                 ft->ft_lock = flowtable_global_lock;
 1335                 ft->ft_unlock = flowtable_global_unlock;
 1336                 ft->ft_table.global =
 1337                             malloc(nentry*sizeof(struct flentry *),
 1338                                 M_RTABLE, M_WAITOK | M_ZERO);
 1339                 ft->ft_locks = malloc(ft->ft_lock_count*sizeof(struct mtx),
 1340                                 M_RTABLE, M_WAITOK | M_ZERO);
 1341                 for (i = 0; i < ft->ft_lock_count; i++)
 1342                         mtx_init(&ft->ft_locks[i], "flow", NULL, MTX_DEF|MTX_DUPOK);
 1343 
 1344                 ft->ft_masks[0] = bit_alloc(nentry);
 1345         }
 1346         ft->ft_tmpmask = bit_alloc(nentry);
 1347 
 1348         /*
 1349          * In the local transmit case the table truly is 
 1350          * just a cache - so everything is eligible for
 1351          * replacement after 5s of non-use
 1352          */
 1353         if (flags & FL_HASH_ALL) {
 1354                 ft->ft_udp_idle = V_flowtable_udp_expire;
 1355                 ft->ft_syn_idle = V_flowtable_syn_expire;
 1356                 ft->ft_fin_wait_idle = V_flowtable_fin_wait_expire;
 1357                 ft->ft_tcp_idle = V_flowtable_fin_wait_expire;
 1358         } else {
 1359                 ft->ft_udp_idle = ft->ft_fin_wait_idle =
 1360                     ft->ft_syn_idle = ft->ft_tcp_idle = 30;
 1361                 
 1362         }
 1363 
 1364         /*
 1365          * hook in to the cleaner list
 1366          */
 1367         if (V_flow_list_head == NULL)
 1368                 V_flow_list_head = ft;
 1369         else {
 1370                 fttail = V_flow_list_head;
 1371                 while (fttail->ft_next != NULL)
 1372                         fttail = fttail->ft_next;
 1373                 fttail->ft_next = ft;
 1374         }
 1375 
 1376         return (ft);
 1377 }
 1378 
 1379 /*
 1380  * The rest of the code is devoted to garbage collection of expired entries.
 1381  * It is a new additon made necessary by the switch to dynamically allocating
 1382  * flow tables.
 1383  * 
 1384  */
 1385 static void
 1386 fle_free(struct flentry *fle, struct flowtable *ft)
 1387 {
 1388         struct rtentry *rt;
 1389         struct llentry *lle;
 1390 
 1391         rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
 1392         lle = __DEVOLATILE(struct llentry *, fle->f_lle);
 1393         RTFREE(rt);
 1394         LLE_FREE(lle);
 1395         flow_free(fle, ft);
 1396 }
 1397 
 1398 static void
 1399 flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
 1400 {
 1401         int curbit = 0, count;
 1402         struct flentry *fle,  **flehead, *fleprev;
 1403         struct flentry *flefreehead, *flefreetail, *fletmp;
 1404         bitstr_t *mask, *tmpmask;
 1405         struct flowtable_stats *fs = &ft->ft_stats[curcpu];
 1406 
 1407         flefreehead = flefreetail = NULL;
 1408         mask = flowtable_mask(ft);
 1409         tmpmask = ft->ft_tmpmask;
 1410         memcpy(tmpmask, mask, ft->ft_size/8);
 1411         /*
 1412          * XXX Note to self, bit_ffs operates at the byte level
 1413          * and thus adds gratuitous overhead
 1414          */
 1415         bit_ffs(tmpmask, ft->ft_size, &curbit);
 1416         while (curbit != -1) {
 1417                 if (curbit >= ft->ft_size || curbit < -1) {
 1418                         log(LOG_ALERT,
 1419                             "warning: bad curbit value %d \n",
 1420                             curbit);
 1421                         break;
 1422                 }
 1423 
 1424                 FL_ENTRY_LOCK(ft, curbit);
 1425                 flehead = flowtable_entry(ft, curbit);
 1426                 fle = fleprev = *flehead;
 1427 
 1428                 fs->ft_free_checks++;
 1429 #ifdef DIAGNOSTIC
 1430                 if (fle == NULL && curbit > 0) {
 1431                         log(LOG_ALERT,
 1432                             "warning bit=%d set, but no fle found\n",
 1433                             curbit);
 1434                 }
 1435 #endif          
 1436                 while (fle != NULL) {
 1437                         if (rt != NULL) {
 1438                                 if (__DEVOLATILE(struct rtentry *, fle->f_rt) != rt) {
 1439                                         fleprev = fle;
 1440                                         fle = fle->f_next;
 1441                                         continue;
 1442                                 }
 1443                         } else if (!flow_stale(ft, fle)) {
 1444                                 fleprev = fle;
 1445                                 fle = fle->f_next;
 1446                                 continue;
 1447                         }
 1448                         /*
 1449                          * delete head of the list
 1450                          */
 1451                         if (fleprev == *flehead) {
 1452                                 fletmp = fleprev;
 1453                                 if (fle == fleprev) {
 1454                                         fleprev = *flehead = fle->f_next;
 1455                                 } else
 1456                                         fleprev = *flehead = fle;
 1457                                 fle = fle->f_next;
 1458                         } else {
 1459                                 /*
 1460                                  * don't advance fleprev
 1461                                  */
 1462                                 fletmp = fle;
 1463                                 fleprev->f_next = fle->f_next;
 1464                                 fle = fleprev->f_next;
 1465                         }
 1466 
 1467                         if (flefreehead == NULL)
 1468                                 flefreehead = flefreetail = fletmp;
 1469                         else {
 1470                                 flefreetail->f_next = fletmp;
 1471                                 flefreetail = fletmp;
 1472                         }
 1473                         fletmp->f_next = NULL;
 1474                 }
 1475                 if (*flehead == NULL)
 1476                         bit_clear(mask, curbit);
 1477                 FL_ENTRY_UNLOCK(ft, curbit);
 1478                 bit_clear(tmpmask, curbit);
 1479                 bit_ffs(tmpmask, ft->ft_size, &curbit);
 1480         }
 1481         count = 0;
 1482         while ((fle = flefreehead) != NULL) {
 1483                 flefreehead = fle->f_next;
 1484                 count++;
 1485                 fs->ft_frees++;
 1486                 fle_free(fle, ft);
 1487         }
 1488         if (V_flowtable_debug && count)
 1489                 log(LOG_DEBUG, "freed %d flow entries\n", count);
 1490 }
 1491 
 1492 void
 1493 flowtable_route_flush(struct flowtable *ft, struct rtentry *rt)
 1494 {
 1495         int i;
 1496 
 1497         if (ft->ft_flags & FL_PCPU) {
 1498                 CPU_FOREACH(i) {
 1499                         if (smp_started == 1) {
 1500                                 thread_lock(curthread);
 1501                                 sched_bind(curthread, i);
 1502                                 thread_unlock(curthread);
 1503                         }
 1504 
 1505                         flowtable_free_stale(ft, rt);
 1506 
 1507                         if (smp_started == 1) {
 1508                                 thread_lock(curthread);
 1509                                 sched_unbind(curthread);
 1510                                 thread_unlock(curthread);
 1511                         }
 1512                 }
 1513         } else {
 1514                 flowtable_free_stale(ft, rt);
 1515         }
 1516 }
 1517 
 1518 static void
 1519 flowtable_clean_vnet(void)
 1520 {
 1521         struct flowtable *ft;
 1522         int i;
 1523 
 1524         ft = V_flow_list_head;
 1525         while (ft != NULL) {
 1526                 if (ft->ft_flags & FL_PCPU) {
 1527                         CPU_FOREACH(i) {
 1528                                 if (smp_started == 1) {
 1529                                         thread_lock(curthread);
 1530                                         sched_bind(curthread, i);
 1531                                         thread_unlock(curthread);
 1532                                 }
 1533 
 1534                                 flowtable_free_stale(ft, NULL);
 1535 
 1536                                 if (smp_started == 1) {
 1537                                         thread_lock(curthread);
 1538                                         sched_unbind(curthread);
 1539                                         thread_unlock(curthread);
 1540                                 }
 1541                         }
 1542                 } else {
 1543                         flowtable_free_stale(ft, NULL);
 1544                 }
 1545                 ft = ft->ft_next;
 1546         }
 1547 }
 1548 
 1549 static void
 1550 flowtable_cleaner(void)
 1551 {
 1552         VNET_ITERATOR_DECL(vnet_iter);
 1553         struct thread *td;
 1554 
 1555         if (bootverbose)
 1556                 log(LOG_INFO, "flowtable cleaner started\n");
 1557         td = curthread;
 1558         while (1) {
 1559                 VNET_LIST_RLOCK();
 1560                 VNET_FOREACH(vnet_iter) {
 1561                         CURVNET_SET(vnet_iter);
 1562                         flowtable_clean_vnet();
 1563                         CURVNET_RESTORE();
 1564                 }
 1565                 VNET_LIST_RUNLOCK();
 1566 
 1567                 /*
 1568                  * The 10 second interval between cleaning checks
 1569                  * is arbitrary
 1570                  */
 1571                 mtx_lock(&flowclean_lock);
 1572                 thread_lock(td);
 1573                 sched_prio(td, PPAUSE);
 1574                 thread_unlock(td);
 1575                 flowclean_cycles++;
 1576                 cv_broadcast(&flowclean_f_cv);
 1577                 cv_timedwait(&flowclean_c_cv, &flowclean_lock, flowclean_freq);
 1578                 mtx_unlock(&flowclean_lock);
 1579         }
 1580 }
 1581 
 1582 static void
 1583 flowtable_flush(void *unused __unused)
 1584 {
 1585         uint64_t start;
 1586 
 1587         mtx_lock(&flowclean_lock);
 1588         start = flowclean_cycles;
 1589         while (start == flowclean_cycles) {
 1590                 cv_broadcast(&flowclean_c_cv);
 1591                 cv_wait(&flowclean_f_cv, &flowclean_lock);
 1592         }
 1593         mtx_unlock(&flowclean_lock);
 1594 }
 1595 
 1596 static struct kproc_desc flow_kp = {
 1597         "flowcleaner",
 1598         flowtable_cleaner,
 1599         &flowcleanerproc
 1600 };
 1601 SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
 1602 
 1603 static void
 1604 flowtable_init_vnet(const void *unused __unused)
 1605 {
 1606 
 1607         V_flowtable_nmbflows = 1024 + maxusers * 64 * mp_ncpus;
 1608         V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
 1609             NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
 1610         V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
 1611             NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);    
 1612         uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
 1613         uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
 1614         V_flowtable_ready = 1;
 1615 }
 1616 VNET_SYSINIT(flowtable_init_vnet, SI_SUB_SMP, SI_ORDER_ANY,
 1617     flowtable_init_vnet, NULL);
 1618 
 1619 static void
 1620 flowtable_init(const void *unused __unused)
 1621 {
 1622 
 1623         cv_init(&flowclean_c_cv, "c_flowcleanwait");
 1624         cv_init(&flowclean_f_cv, "f_flowcleanwait");
 1625         mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
 1626         EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
 1627             EVENTHANDLER_PRI_ANY);
 1628         flowclean_freq = 20*hz;
 1629 }
 1630 SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST,
 1631     flowtable_init, NULL);
 1632 
 1633 
 1634 #ifdef VIMAGE
 1635 static void
 1636 flowtable_uninit(const void *unused __unused)
 1637 {
 1638 
 1639         V_flowtable_ready = 0;
 1640         uma_zdestroy(V_flow_ipv4_zone);
 1641         uma_zdestroy(V_flow_ipv6_zone);
 1642 }
 1643 
 1644 VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
 1645     flowtable_uninit, NULL);
 1646 #endif
 1647 
 1648 #ifdef DDB
 1649 static uint32_t *
 1650 flowtable_get_hashkey(struct flentry *fle)
 1651 {
 1652         uint32_t *hashkey;
 1653 
 1654         if (fle->f_flags & FL_IPV6)
 1655                 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
 1656         else
 1657                 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
 1658 
 1659         return (hashkey);
 1660 }
 1661 
 1662 static bitstr_t *
 1663 flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
 1664 {
 1665         bitstr_t *mask;
 1666 
 1667         if (ft->ft_flags & FL_PCPU)
 1668                 mask = ft->ft_masks[cpuid];
 1669         else
 1670                 mask = ft->ft_masks[0];
 1671 
 1672         return (mask);
 1673 }
 1674 
 1675 static struct flentry **
 1676 flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
 1677 {
 1678         struct flentry **fle;
 1679         int index = (hash % ft->ft_size);
 1680 
 1681         if (ft->ft_flags & FL_PCPU) {
 1682                 fle = &ft->ft_table.pcpu[cpuid][index];
 1683         } else {
 1684                 fle = &ft->ft_table.global[index];
 1685         }
 1686         
 1687         return (fle);
 1688 }
 1689 
 1690 static void
 1691 flow_show(struct flowtable *ft, struct flentry *fle)
 1692 {
 1693         int idle_time;
 1694         int rt_valid, ifp_valid;
 1695         uint16_t sport, dport;
 1696         uint32_t *hashkey;
 1697         char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
 1698         volatile struct rtentry *rt;
 1699         struct ifnet *ifp = NULL;
 1700 
 1701         idle_time = (int)(time_uptime - fle->f_uptime);
 1702         rt = fle->f_rt;
 1703         rt_valid = rt != NULL;
 1704         if (rt_valid) 
 1705                 ifp = rt->rt_ifp;
 1706         ifp_valid = ifp != NULL;
 1707         hashkey = flowtable_get_hashkey(fle);
 1708         if (fle->f_flags & FL_IPV6)
 1709                 goto skipaddr;
 1710 
 1711         inet_ntoa_r(*(struct in_addr *) &hashkey[2], daddr);
 1712         if (ft->ft_flags & FL_HASH_ALL) {
 1713                 inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);            
 1714                 sport = ntohs(((uint16_t *)hashkey)[0]);
 1715                 dport = ntohs(((uint16_t *)hashkey)[1]);
 1716                 db_printf("%s:%d->%s:%d",
 1717                     saddr, sport, daddr,
 1718                     dport);
 1719         } else 
 1720                 db_printf("%s ", daddr);
 1721     
 1722 skipaddr:
 1723         if (fle->f_flags & FL_STALE)
 1724                 db_printf(" FL_STALE ");
 1725         if (fle->f_flags & FL_TCP)
 1726                 db_printf(" FL_TCP ");
 1727         if (fle->f_flags & FL_UDP)
 1728                 db_printf(" FL_UDP ");
 1729         if (rt_valid) {
 1730                 if (rt->rt_flags & RTF_UP)
 1731                         db_printf(" RTF_UP ");
 1732         }
 1733         if (ifp_valid) {
 1734                 if (ifp->if_flags & IFF_LOOPBACK)
 1735                         db_printf(" IFF_LOOPBACK ");
 1736                 if (ifp->if_flags & IFF_UP)
 1737                         db_printf(" IFF_UP ");          
 1738                 if (ifp->if_flags & IFF_POINTOPOINT)
 1739                         db_printf(" IFF_POINTOPOINT ");         
 1740         }
 1741         if (fle->f_flags & FL_IPV6)
 1742                 db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
 1743                     hashkey[0], hashkey[1], hashkey[2],
 1744                     hashkey[3], hashkey[4], hashkey[5],
 1745                     hashkey[6], hashkey[7], hashkey[8]);
 1746         else
 1747                 db_printf("\n\tkey=%08x:%08x:%08x ",
 1748                     hashkey[0], hashkey[1], hashkey[2]);
 1749         db_printf("hash=%08x idle_time=%03d"
 1750             "\n\tfibnum=%02d rt=%p",
 1751             fle->f_fhash, idle_time, fle->f_fibnum, fle->f_rt);
 1752         db_printf("\n");
 1753 }
 1754 
 1755 static void
 1756 flowtable_show(struct flowtable *ft, int cpuid)
 1757 {
 1758         int curbit = 0;
 1759         struct flentry *fle,  **flehead;
 1760         bitstr_t *mask, *tmpmask;
 1761 
 1762         if (cpuid != -1)
 1763                 db_printf("cpu: %d\n", cpuid);
 1764         mask = flowtable_mask_pcpu(ft, cpuid);
 1765         tmpmask = ft->ft_tmpmask;
 1766         memcpy(tmpmask, mask, ft->ft_size/8);
 1767         /*
 1768          * XXX Note to self, bit_ffs operates at the byte level
 1769          * and thus adds gratuitous overhead
 1770          */
 1771         bit_ffs(tmpmask, ft->ft_size, &curbit);
 1772         while (curbit != -1) {
 1773                 if (curbit >= ft->ft_size || curbit < -1) {
 1774                         db_printf("warning: bad curbit value %d \n",
 1775                             curbit);
 1776                         break;
 1777                 }
 1778 
 1779                 flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
 1780                 fle = *flehead;
 1781 
 1782                 while (fle != NULL) {   
 1783                         flow_show(ft, fle);
 1784                         fle = fle->f_next;
 1785                         continue;
 1786                 }
 1787                 bit_clear(tmpmask, curbit);
 1788                 bit_ffs(tmpmask, ft->ft_size, &curbit);
 1789         }
 1790 }
 1791 
 1792 static void
 1793 flowtable_show_vnet(void)
 1794 {
 1795         struct flowtable *ft;
 1796         int i;
 1797 
 1798         ft = V_flow_list_head;
 1799         while (ft != NULL) {
 1800                 printf("name: %s\n", ft->ft_name);
 1801                 if (ft->ft_flags & FL_PCPU) {
 1802                         CPU_FOREACH(i) {
 1803                                 flowtable_show(ft, i);
 1804                         }
 1805                 } else {
 1806                         flowtable_show(ft, -1);
 1807                 }
 1808                 ft = ft->ft_next;
 1809         }
 1810 }
 1811 
 1812 DB_SHOW_COMMAND(flowtables, db_show_flowtables)
 1813 {
 1814         VNET_ITERATOR_DECL(vnet_iter);
 1815 
 1816         VNET_FOREACH(vnet_iter) {
 1817                 CURVNET_SET(vnet_iter);
 1818 #ifdef VIMAGE
 1819                 db_printf("vnet %p\n", vnet_iter);
 1820 #endif
 1821                 flowtable_show_vnet();
 1822                 CURVNET_RESTORE();
 1823         }
 1824 }
 1825 #endif

Cache object: eea60e264fe7b99a092f6958e245b174


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.