The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/flowtable.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2008-2010, BitGravity Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the BitGravity Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15 
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include "opt_route.h"
   31 #include "opt_mpath.h"
   32 #include "opt_ddb.h"
   33 #include "opt_inet.h"
   34 #include "opt_inet6.h"
   35 
   36 #include <sys/cdefs.h>
   37 __FBSDID("$FreeBSD$");
   38 
   39 #include <sys/param.h>  
   40 #include <sys/types.h>
   41 #include <sys/bitstring.h>
   42 #include <sys/condvar.h>
   43 #include <sys/callout.h>
   44 #include <sys/kernel.h>  
   45 #include <sys/kthread.h>
   46 #include <sys/limits.h>
   47 #include <sys/malloc.h>
   48 #include <sys/mbuf.h>
   49 #include <sys/proc.h>
   50 #include <sys/sbuf.h>
   51 #include <sys/sched.h>
   52 #include <sys/smp.h>
   53 #include <sys/socket.h>
   54 #include <sys/syslog.h>
   55 #include <sys/sysctl.h>
   56 
   57 #include <net/if.h>
   58 #include <net/if_llatbl.h>
   59 #include <net/if_var.h>
   60 #include <net/route.h> 
   61 #include <net/flowtable.h>
   62 #include <net/vnet.h>
   63 
   64 #include <netinet/in.h>
   65 #include <netinet/in_systm.h>
   66 #include <netinet/in_var.h>
   67 #include <netinet/if_ether.h>
   68 #include <netinet/ip.h>
   69 #ifdef INET6
   70 #include <netinet/ip6.h>
   71 #endif
   72 #include <netinet/tcp.h>
   73 #include <netinet/udp.h>
   74 #include <netinet/sctp.h>
   75 
   76 #include <libkern/jenkins.h>
   77 #include <ddb/ddb.h>
   78 
   79 struct ipv4_tuple {
   80         uint16_t        ip_sport;       /* source port */
   81         uint16_t        ip_dport;       /* destination port */
   82         in_addr_t       ip_saddr;       /* source address */
   83         in_addr_t       ip_daddr;       /* destination address */
   84 };
   85 
   86 union ipv4_flow {
   87         struct ipv4_tuple ipf_ipt;
   88         uint32_t        ipf_key[3];
   89 };
   90 
   91 struct ipv6_tuple {
   92         uint16_t        ip_sport;       /* source port */
   93         uint16_t        ip_dport;       /* destination port */
   94         struct in6_addr ip_saddr;       /* source address */
   95         struct in6_addr ip_daddr;       /* destination address */
   96 };
   97 
   98 union ipv6_flow {
   99         struct ipv6_tuple ipf_ipt;
  100         uint32_t        ipf_key[9];
  101 };
  102 
  103 struct flentry {
  104         volatile uint32_t       f_fhash;        /* hash flowing forward */
  105         uint16_t                f_flags;        /* flow flags */
  106         uint8_t                 f_pad;          
  107         uint8_t                 f_proto;        /* protocol */
  108         uint32_t                f_fibnum;       /* fib index */
  109         uint32_t                f_uptime;       /* uptime at last access */
  110         struct flentry          *f_next;        /* pointer to collision entry */
  111         volatile struct rtentry *f_rt;          /* rtentry for flow */
  112         volatile struct llentry *f_lle;         /* llentry for flow */
  113 };
  114 
  115 struct flentry_v4 {
  116         struct flentry  fl_entry;
  117         union ipv4_flow fl_flow;
  118 };
  119 
  120 struct flentry_v6 {
  121         struct flentry  fl_entry;
  122         union ipv6_flow fl_flow;
  123 };
  124 
  125 #define fl_fhash        fl_entry.fl_fhash
  126 #define fl_flags        fl_entry.fl_flags
  127 #define fl_proto        fl_entry.fl_proto
  128 #define fl_uptime       fl_entry.fl_uptime
  129 #define fl_rt           fl_entry.fl_rt
  130 #define fl_lle          fl_entry.fl_lle
  131 
  132 #define SECS_PER_HOUR           3600
  133 #define SECS_PER_DAY            (24*SECS_PER_HOUR)
  134 
  135 #define SYN_IDLE                300
  136 #define UDP_IDLE                300
  137 #define FIN_WAIT_IDLE           600
  138 #define TCP_IDLE                SECS_PER_DAY
  139 
  140 
  141 typedef void fl_lock_t(struct flowtable *, uint32_t);
  142 typedef void fl_rtalloc_t(struct route *, uint32_t, u_int);
  143 
  144 union flentryp {
  145         struct flentry          **global;
  146         struct flentry          **pcpu[MAXCPU];
  147 };
  148 
  149 struct flowtable_stats {
  150         uint64_t        ft_collisions;
  151         uint64_t        ft_allocated;
  152         uint64_t        ft_misses;
  153         uint64_t        ft_max_depth;
  154         uint64_t        ft_free_checks;
  155         uint64_t        ft_frees;
  156         uint64_t        ft_hits;
  157         uint64_t        ft_lookups;
  158 } __aligned(CACHE_LINE_SIZE);
  159 
  160 struct flowtable {
  161         struct  flowtable_stats ft_stats[MAXCPU];
  162         int             ft_size;
  163         int             ft_lock_count;
  164         uint32_t        ft_flags;
  165         char            *ft_name;
  166         fl_lock_t       *ft_lock;
  167         fl_lock_t       *ft_unlock;
  168         fl_rtalloc_t    *ft_rtalloc;
  169         /*
  170          * XXX need to pad out 
  171          */ 
  172         struct mtx      *ft_locks;
  173         union flentryp  ft_table;
  174         bitstr_t        *ft_masks[MAXCPU];
  175         bitstr_t        *ft_tmpmask;
  176         struct flowtable *ft_next;
  177 
  178         uint32_t        ft_count __aligned(CACHE_LINE_SIZE);
  179         uint32_t        ft_udp_idle __aligned(CACHE_LINE_SIZE);
  180         uint32_t        ft_fin_wait_idle;
  181         uint32_t        ft_syn_idle;
  182         uint32_t        ft_tcp_idle;
  183         boolean_t       ft_full;
  184 } __aligned(CACHE_LINE_SIZE);
  185 
  186 static struct proc *flowcleanerproc;
  187 static VNET_DEFINE(struct flowtable *, flow_list_head);
  188 static VNET_DEFINE(uint32_t, flow_hashjitter);
  189 static VNET_DEFINE(uma_zone_t, flow_ipv4_zone);
  190 static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
  191 
  192 #define V_flow_list_head        VNET(flow_list_head)
  193 #define V_flow_hashjitter       VNET(flow_hashjitter)
  194 #define V_flow_ipv4_zone        VNET(flow_ipv4_zone)
  195 #define V_flow_ipv6_zone        VNET(flow_ipv6_zone)
  196 
  197 
  198 static struct cv        flowclean_f_cv;
  199 static struct cv        flowclean_c_cv;
  200 static struct mtx       flowclean_lock;
  201 static uint32_t         flowclean_cycles;
  202 static uint32_t         flowclean_freq;
  203 
  204 #ifdef FLOWTABLE_DEBUG
  205 #define FLDPRINTF(ft, flags, fmt, ...)          \
  206 do {                                            \
  207         if ((ft)->ft_flags & (flags))           \
  208                 printf((fmt), __VA_ARGS__);     \
  209 } while (0);                                    \
  210 
  211 #else
  212 #define FLDPRINTF(ft, flags, fmt, ...)
  213 
  214 #endif
  215 
  216 
  217 /*
  218  * TODO:
  219  * - Make flowtable stats per-cpu, aggregated at sysctl call time,
  220  *   to avoid extra cache evictions caused by incrementing a shared
  221  *   counter
  222  * - add sysctls to resize && flush flow tables 
  223  * - Add per flowtable sysctls for statistics and configuring timeouts
  224  * - add saturation counter to rtentry to support per-packet load-balancing
  225  *   add flag to indicate round-robin flow, add list lookup from head
  226      for flows
  227  * - add sysctl / device node / syscall to support exporting and importing
  228  *   of flows with flag to indicate that a flow was imported so should
  229  *   not be considered for auto-cleaning
  230  * - support explicit connection state (currently only ad-hoc for DSR)
  231  * - idetach() cleanup for options VIMAGE builds.
  232  */
  233 VNET_DEFINE(int, flowtable_enable) = 1;
  234 static VNET_DEFINE(int, flowtable_debug);
  235 static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
  236 static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
  237 static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
  238 static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
  239 static VNET_DEFINE(int, flowtable_nmbflows);
  240 static VNET_DEFINE(int, flowtable_ready) = 0;
  241 
  242 #define V_flowtable_enable              VNET(flowtable_enable)
  243 #define V_flowtable_debug               VNET(flowtable_debug)
  244 #define V_flowtable_syn_expire          VNET(flowtable_syn_expire)
  245 #define V_flowtable_udp_expire          VNET(flowtable_udp_expire)
  246 #define V_flowtable_fin_wait_expire     VNET(flowtable_fin_wait_expire)
  247 #define V_flowtable_tcp_expire          VNET(flowtable_tcp_expire)
  248 #define V_flowtable_nmbflows            VNET(flowtable_nmbflows)
  249 #define V_flowtable_ready               VNET(flowtable_ready)
  250 
  251 static SYSCTL_NODE(_net_inet, OID_AUTO, flowtable, CTLFLAG_RD, NULL,
  252     "flowtable");
  253 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, debug, CTLFLAG_RW,
  254     &VNET_NAME(flowtable_debug), 0, "print debug info.");
  255 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, enable, CTLFLAG_RW,
  256     &VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
  257 
  258 /*
  259  * XXX This does not end up updating timeouts at runtime
  260  * and only reflects the value for the last table added :-/
  261  */
  262 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, syn_expire, CTLFLAG_RW,
  263     &VNET_NAME(flowtable_syn_expire), 0,
  264     "seconds after which to remove syn allocated flow.");
  265 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, udp_expire, CTLFLAG_RW,
  266     &VNET_NAME(flowtable_udp_expire), 0,
  267     "seconds after which to remove flow allocated to UDP.");
  268 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, fin_wait_expire, CTLFLAG_RW,
  269     &VNET_NAME(flowtable_fin_wait_expire), 0,
  270     "seconds after which to remove a flow in FIN_WAIT.");
  271 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, tcp_expire, CTLFLAG_RW,
  272     &VNET_NAME(flowtable_tcp_expire), 0,
  273     "seconds after which to remove flow allocated to a TCP connection.");
  274 
  275 
  276 /*
  277  * Maximum number of flows that can be allocated of a given type.
  278  *
  279  * The table is allocated at boot time (for the pure caching case
  280  * there is no reason why this could not be changed at runtime)
  281  * and thus (currently) needs to be set with a tunable.
  282  */
  283 static int
  284 sysctl_nmbflows(SYSCTL_HANDLER_ARGS)
  285 {
  286         int error, newnmbflows;
  287 
  288         newnmbflows = V_flowtable_nmbflows;
  289         error = sysctl_handle_int(oidp, &newnmbflows, 0, req); 
  290         if (error == 0 && req->newptr) {
  291                 if (newnmbflows > V_flowtable_nmbflows) {
  292                         V_flowtable_nmbflows = newnmbflows;
  293                         uma_zone_set_max(V_flow_ipv4_zone,
  294                             V_flowtable_nmbflows);
  295                         uma_zone_set_max(V_flow_ipv6_zone,
  296                             V_flowtable_nmbflows);
  297                 } else
  298                         error = EINVAL;
  299         }
  300         return (error);
  301 }
  302 SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
  303     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_nmbflows, "IU",
  304     "Maximum number of flows allowed");
  305 
  306 
  307 
  308 #define FS_PRINT(sb, field)     sbuf_printf((sb), "\t%s: %jd\n", #field, fs->ft_##field)
  309 
  310 static void
  311 fs_print(struct sbuf *sb, struct flowtable_stats *fs)
  312 {
  313 
  314         FS_PRINT(sb, collisions);
  315         FS_PRINT(sb, allocated);
  316         FS_PRINT(sb, misses);
  317         FS_PRINT(sb, max_depth);
  318         FS_PRINT(sb, free_checks);
  319         FS_PRINT(sb, frees);
  320         FS_PRINT(sb, hits);
  321         FS_PRINT(sb, lookups);
  322 }
  323 
  324 static void
  325 flowtable_show_stats(struct sbuf *sb, struct flowtable *ft)
  326 {
  327         int i;
  328         struct flowtable_stats fs, *pfs;
  329 
  330         if (ft->ft_flags & FL_PCPU) {
  331                 bzero(&fs, sizeof(fs));
  332                 pfs = &fs;
  333                 CPU_FOREACH(i) {
  334                         pfs->ft_collisions  += ft->ft_stats[i].ft_collisions;
  335                         pfs->ft_allocated   += ft->ft_stats[i].ft_allocated;
  336                         pfs->ft_misses      += ft->ft_stats[i].ft_misses;
  337                         pfs->ft_free_checks += ft->ft_stats[i].ft_free_checks;
  338                         pfs->ft_frees       += ft->ft_stats[i].ft_frees;
  339                         pfs->ft_hits        += ft->ft_stats[i].ft_hits;
  340                         pfs->ft_lookups     += ft->ft_stats[i].ft_lookups;
  341                         if (ft->ft_stats[i].ft_max_depth > pfs->ft_max_depth)
  342                                 pfs->ft_max_depth = ft->ft_stats[i].ft_max_depth;
  343                 }
  344         } else {
  345                 pfs = &ft->ft_stats[0];
  346         }
  347         fs_print(sb, pfs);
  348 }
  349 
  350 static int
  351 sysctl_flowtable_stats(SYSCTL_HANDLER_ARGS)
  352 {
  353         struct flowtable *ft;
  354         struct sbuf *sb;
  355         int error;
  356 
  357         sb = sbuf_new(NULL, NULL, 64*1024, SBUF_FIXEDLEN);
  358 
  359         ft = V_flow_list_head;
  360         while (ft != NULL) {
  361                 sbuf_printf(sb, "\ntable name: %s\n", ft->ft_name);
  362                 flowtable_show_stats(sb, ft);
  363                 ft = ft->ft_next;
  364         }
  365         sbuf_finish(sb);
  366         error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
  367         sbuf_delete(sb);
  368 
  369         return (error);
  370 }
  371 SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
  372     NULL, 0, sysctl_flowtable_stats, "A", "flowtable statistics");
  373 
  374 
  375 #ifndef RADIX_MPATH
  376 static void
  377 rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
  378 {
  379 
  380         rtalloc_ign_fib(ro, 0, fibnum);
  381 }
  382 #endif
  383 
  384 static void
  385 flowtable_global_lock(struct flowtable *table, uint32_t hash)
  386 {       
  387         int lock_index = (hash)&(table->ft_lock_count - 1);
  388 
  389         mtx_lock(&table->ft_locks[lock_index]);
  390 }
  391 
  392 static void
  393 flowtable_global_unlock(struct flowtable *table, uint32_t hash)
  394 {       
  395         int lock_index = (hash)&(table->ft_lock_count - 1);
  396 
  397         mtx_unlock(&table->ft_locks[lock_index]);
  398 }
  399 
  400 static void
  401 flowtable_pcpu_lock(struct flowtable *table, uint32_t hash)
  402 {
  403 
  404         critical_enter();
  405 }
  406 
  407 static void
  408 flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
  409 {
  410 
  411         critical_exit();
  412 }
  413 
  414 #define FL_ENTRY_INDEX(table, hash)((hash) % (table)->ft_size)
  415 #define FL_ENTRY(table, hash) *flowtable_entry((table), (hash))
  416 #define FL_ENTRY_LOCK(table, hash)  (table)->ft_lock((table), (hash))
  417 #define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
  418 
  419 #define FL_STALE        (1<<8)
  420 #define FL_OVERWRITE    (1<<10)
  421 
  422 void
  423 flow_invalidate(struct flentry *fle)
  424 {
  425 
  426         fle->f_flags |= FL_STALE;
  427 }
  428 
  429 static __inline int
  430 proto_to_flags(uint8_t proto)
  431 {
  432         int flag;
  433 
  434         switch (proto) {
  435         case IPPROTO_TCP:
  436                 flag = FL_TCP;
  437                 break;
  438         case IPPROTO_SCTP:
  439                 flag = FL_SCTP;
  440                 break;          
  441         case IPPROTO_UDP:
  442                 flag = FL_UDP;
  443                 break;
  444         default:
  445                 flag = 0;
  446                 break;
  447         }
  448 
  449         return (flag);
  450 }
  451 
  452 static __inline int
  453 flags_to_proto(int flags)
  454 {
  455         int proto, protoflags;
  456 
  457         protoflags = flags & (FL_TCP|FL_SCTP|FL_UDP);
  458         switch (protoflags) {
  459         case FL_TCP:
  460                 proto = IPPROTO_TCP;
  461                 break;
  462         case FL_SCTP:
  463                 proto = IPPROTO_SCTP;
  464                 break;
  465         case FL_UDP:
  466                 proto = IPPROTO_UDP;
  467                 break;
  468         default:
  469                 proto = 0;
  470                 break;
  471         }
  472         return (proto);
  473 }
  474 
  475 #ifdef INET
  476 #ifdef FLOWTABLE_DEBUG
  477 static void
  478 ipv4_flow_print_tuple(int flags, int proto, struct sockaddr_in *ssin,
  479     struct sockaddr_in *dsin)
  480 {
  481         char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
  482 
  483         if (flags & FL_HASH_ALL) {
  484                 inet_ntoa_r(ssin->sin_addr, saddr);
  485                 inet_ntoa_r(dsin->sin_addr, daddr);
  486                 printf("proto=%d %s:%d->%s:%d\n",
  487                     proto, saddr, ntohs(ssin->sin_port), daddr,
  488                     ntohs(dsin->sin_port));
  489         } else {
  490                 inet_ntoa_r(*(struct in_addr *) &dsin->sin_addr, daddr);
  491                 printf("proto=%d %s\n", proto, daddr);
  492         }
  493 
  494 }
  495 #endif
  496 
  497 static int
  498 ipv4_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
  499     struct sockaddr_in *ssin, struct sockaddr_in *dsin, uint16_t *flags)
  500 {
  501         struct ip *ip;
  502         uint8_t proto;
  503         int iphlen;
  504         struct tcphdr *th;
  505         struct udphdr *uh;
  506         struct sctphdr *sh;
  507         uint16_t sport, dport;
  508 
  509         proto = sport = dport = 0;
  510         ip = mtod(m, struct ip *);
  511         dsin->sin_family = AF_INET;
  512         dsin->sin_len = sizeof(*dsin);
  513         dsin->sin_addr = ip->ip_dst;
  514         ssin->sin_family = AF_INET;
  515         ssin->sin_len = sizeof(*ssin);
  516         ssin->sin_addr = ip->ip_src;    
  517 
  518         proto = ip->ip_p;
  519         if ((*flags & FL_HASH_ALL) == 0) {
  520                 FLDPRINTF(ft, FL_DEBUG_ALL, "skip port check flags=0x%x ",
  521                     *flags);
  522                 goto skipports;
  523         }
  524 
  525         iphlen = ip->ip_hl << 2; /* XXX options? */
  526 
  527         switch (proto) {
  528         case IPPROTO_TCP:
  529                 th = (struct tcphdr *)((caddr_t)ip + iphlen);
  530                 sport = th->th_sport;
  531                 dport = th->th_dport;
  532                 if ((*flags & FL_HASH_ALL) &&
  533                     (th->th_flags & (TH_RST|TH_FIN)))
  534                         *flags |= FL_STALE;
  535         break;
  536         case IPPROTO_UDP:
  537                 uh = (struct udphdr *)((caddr_t)ip + iphlen);
  538                 sport = uh->uh_sport;
  539                 dport = uh->uh_dport;
  540         break;
  541         case IPPROTO_SCTP:
  542                 sh = (struct sctphdr *)((caddr_t)ip + iphlen);
  543                 sport = sh->src_port;
  544                 dport = sh->dest_port;
  545         break;
  546         default:
  547                 FLDPRINTF(ft, FL_DEBUG_ALL, "proto=0x%x not supported\n", proto);
  548                 return (ENOTSUP);
  549                 /* no port - hence not a protocol we care about */
  550                 break;
  551         
  552         }
  553 
  554 skipports:
  555         *flags |= proto_to_flags(proto);
  556         ssin->sin_port = sport;
  557         dsin->sin_port = dport;
  558         return (0);
  559 }
  560 
  561 static uint32_t
  562 ipv4_flow_lookup_hash_internal(
  563         struct sockaddr_in *ssin, struct sockaddr_in *dsin, 
  564             uint32_t *key, uint16_t flags)
  565 {
  566         uint16_t sport, dport;
  567         uint8_t proto;
  568         int offset = 0;
  569 
  570         if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
  571                 return (0);
  572         proto = flags_to_proto(flags);
  573         sport = dport = key[2] = key[1] = key[0] = 0;
  574         if ((ssin != NULL) && (flags & FL_HASH_ALL)) {
  575                 key[1] = ssin->sin_addr.s_addr;
  576                 sport = ssin->sin_port;
  577         }
  578         if (dsin != NULL) {
  579                 key[2] = dsin->sin_addr.s_addr;
  580                 dport = dsin->sin_port;
  581         }
  582         if (flags & FL_HASH_ALL) {
  583                 ((uint16_t *)key)[0] = sport;
  584                 ((uint16_t *)key)[1] = dport; 
  585         } else
  586                 offset = V_flow_hashjitter + proto;
  587 
  588         return (jenkins_hashword(key, 3, offset));
  589 }
  590 
  591 static struct flentry *
  592 flowtable_lookup_mbuf4(struct flowtable *ft, struct mbuf *m)
  593 {
  594         struct sockaddr_storage ssa, dsa;
  595         uint16_t flags;
  596         struct sockaddr_in *dsin, *ssin;
  597 
  598         dsin = (struct sockaddr_in *)&dsa;
  599         ssin = (struct sockaddr_in *)&ssa;
  600         bzero(dsin, sizeof(*dsin));
  601         bzero(ssin, sizeof(*ssin));
  602         flags = ft->ft_flags;
  603         if (ipv4_mbuf_demarshal(ft, m, ssin, dsin, &flags) != 0)
  604                 return (NULL);
  605 
  606         return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
  607 }
  608 
  609 void
  610 flow_to_route(struct flentry *fle, struct route *ro)
  611 {
  612         uint32_t *hashkey = NULL;
  613         struct sockaddr_in *sin;
  614 
  615         sin = (struct sockaddr_in *)&ro->ro_dst;
  616         sin->sin_family = AF_INET;
  617         sin->sin_len = sizeof(*sin);
  618         hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
  619         sin->sin_addr.s_addr = hashkey[2];
  620         ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
  621         ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
  622         ro->ro_flags |= RT_NORTREF;
  623 }
  624 #endif /* INET */
  625 
  626 #ifdef INET6
  627 /*
  628  * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
  629  * then it sets p to point at the offset "len" in the mbuf. WARNING: the
  630  * pointer might become stale after other pullups (but we never use it
  631  * this way).
  632  */
  633 #define PULLUP_TO(_len, p, T)                                           \
  634 do {                                                                    \
  635         int x = (_len) + sizeof(T);                                     \
  636         if ((m)->m_len < x) {                                           \
  637                 goto receive_failed;                                    \
  638         }                                                               \
  639         p = (mtod(m, char *) + (_len));                                 \
  640 } while (0)
  641 
  642 #define TCP(p)          ((struct tcphdr *)(p))
  643 #define SCTP(p)         ((struct sctphdr *)(p))
  644 #define UDP(p)          ((struct udphdr *)(p))
  645 
  646 static int
  647 ipv6_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
  648     struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, uint16_t *flags)
  649 {
  650         struct ip6_hdr *ip6;
  651         uint8_t proto;
  652         int hlen;
  653         uint16_t src_port, dst_port;
  654         u_short offset;
  655         void *ulp;
  656 
  657         offset = hlen = src_port = dst_port = 0;
  658         ulp = NULL;
  659         ip6 = mtod(m, struct ip6_hdr *);
  660         hlen = sizeof(struct ip6_hdr);
  661         proto = ip6->ip6_nxt;
  662 
  663         if ((*flags & FL_HASH_ALL) == 0)
  664                 goto skipports;
  665 
  666         while (ulp == NULL) {
  667                 switch (proto) {
  668                 case IPPROTO_ICMPV6:
  669                 case IPPROTO_OSPFIGP:
  670                 case IPPROTO_PIM:
  671                 case IPPROTO_CARP:
  672                 case IPPROTO_ESP:
  673                 case IPPROTO_NONE:
  674                         ulp = ip6;
  675                         break;
  676                 case IPPROTO_TCP:
  677                         PULLUP_TO(hlen, ulp, struct tcphdr);
  678                         dst_port = TCP(ulp)->th_dport;
  679                         src_port = TCP(ulp)->th_sport;
  680                         if ((*flags & FL_HASH_ALL) &&
  681                             (TCP(ulp)->th_flags & (TH_RST|TH_FIN)))
  682                                 *flags |= FL_STALE;
  683                         break;
  684                 case IPPROTO_SCTP:
  685                         PULLUP_TO(hlen, ulp, struct sctphdr);
  686                         src_port = SCTP(ulp)->src_port;
  687                         dst_port = SCTP(ulp)->dest_port;
  688                         break;
  689                 case IPPROTO_UDP:
  690                         PULLUP_TO(hlen, ulp, struct udphdr);
  691                         dst_port = UDP(ulp)->uh_dport;
  692                         src_port = UDP(ulp)->uh_sport;
  693                         break;
  694                 case IPPROTO_HOPOPTS:   /* RFC 2460 */
  695                         PULLUP_TO(hlen, ulp, struct ip6_hbh);
  696                         hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
  697                         proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
  698                         ulp = NULL;
  699                         break;
  700                 case IPPROTO_ROUTING:   /* RFC 2460 */
  701                         PULLUP_TO(hlen, ulp, struct ip6_rthdr); 
  702                         hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
  703                         proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
  704                         ulp = NULL;
  705                         break;
  706                 case IPPROTO_FRAGMENT:  /* RFC 2460 */
  707                         PULLUP_TO(hlen, ulp, struct ip6_frag);
  708                         hlen += sizeof (struct ip6_frag);
  709                         proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
  710                         offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
  711                             IP6F_OFF_MASK;
  712                         ulp = NULL;
  713                         break;
  714                 case IPPROTO_DSTOPTS:   /* RFC 2460 */
  715                         PULLUP_TO(hlen, ulp, struct ip6_hbh);
  716                         hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
  717                         proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
  718                         ulp = NULL;
  719                         break;
  720                 case IPPROTO_AH:        /* RFC 2402 */
  721                         PULLUP_TO(hlen, ulp, struct ip6_ext);
  722                         hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
  723                         proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
  724                         ulp = NULL;
  725                         break;
  726                 default:
  727                         PULLUP_TO(hlen, ulp, struct ip6_ext);
  728                         break;
  729                 }
  730         }
  731 
  732         if (src_port == 0) {
  733         receive_failed:
  734                 return (ENOTSUP);
  735         }
  736 
  737 skipports:
  738         dsin6->sin6_family = AF_INET6;
  739         dsin6->sin6_len = sizeof(*dsin6);
  740         dsin6->sin6_port = dst_port;
  741         memcpy(&dsin6->sin6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
  742 
  743         ssin6->sin6_family = AF_INET6;
  744         ssin6->sin6_len = sizeof(*ssin6);
  745         ssin6->sin6_port = src_port;
  746         memcpy(&ssin6->sin6_addr, &ip6->ip6_src, sizeof(struct in6_addr));
  747         *flags |= proto_to_flags(proto);
  748 
  749         return (0);
  750 }
  751 
  752 #define zero_key(key)           \
  753 do {                            \
  754         key[0] = 0;             \
  755         key[1] = 0;             \
  756         key[2] = 0;             \
  757         key[3] = 0;             \
  758         key[4] = 0;             \
  759         key[5] = 0;             \
  760         key[6] = 0;             \
  761         key[7] = 0;             \
  762         key[8] = 0;             \
  763 } while (0)
  764         
  765 static uint32_t
  766 ipv6_flow_lookup_hash_internal(
  767         struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, 
  768             uint32_t *key, uint16_t flags)
  769 {
  770         uint16_t sport, dport;
  771         uint8_t proto;
  772         int offset = 0;
  773 
  774         if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
  775                 return (0);
  776 
  777         proto = flags_to_proto(flags);
  778         zero_key(key);
  779         sport = dport = 0;
  780         if (dsin6 != NULL) {
  781                 memcpy(&key[1], &dsin6->sin6_addr, sizeof(struct in6_addr));
  782                 dport = dsin6->sin6_port;
  783         }
  784         if ((ssin6 != NULL) && (flags & FL_HASH_ALL)) {
  785                 memcpy(&key[5], &ssin6->sin6_addr, sizeof(struct in6_addr));
  786                 sport = ssin6->sin6_port;
  787         }
  788         if (flags & FL_HASH_ALL) {
  789                 ((uint16_t *)key)[0] = sport;
  790                 ((uint16_t *)key)[1] = dport; 
  791         } else
  792                 offset = V_flow_hashjitter + proto;
  793 
  794         return (jenkins_hashword(key, 9, offset));
  795 }
  796 
  797 static struct flentry *
  798 flowtable_lookup_mbuf6(struct flowtable *ft, struct mbuf *m)
  799 {
  800         struct sockaddr_storage ssa, dsa;
  801         struct sockaddr_in6 *dsin6, *ssin6;     
  802         uint16_t flags;
  803 
  804         dsin6 = (struct sockaddr_in6 *)&dsa;
  805         ssin6 = (struct sockaddr_in6 *)&ssa;
  806         bzero(dsin6, sizeof(*dsin6));
  807         bzero(ssin6, sizeof(*ssin6));
  808         flags = ft->ft_flags;
  809         
  810         if (ipv6_mbuf_demarshal(ft, m, ssin6, dsin6, &flags) != 0)
  811                 return (NULL);
  812 
  813         return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
  814 }
  815 
  816 void
  817 flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
  818 {
  819         uint32_t *hashkey = NULL;
  820         struct sockaddr_in6 *sin6;
  821 
  822         sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
  823 
  824         sin6->sin6_family = AF_INET6;
  825         sin6->sin6_len = sizeof(*sin6);
  826         hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
  827         memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
  828         ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
  829         ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
  830         ro->ro_flags |= RT_NORTREF;
  831 }
  832 #endif /* INET6 */
  833 
  834 static bitstr_t *
  835 flowtable_mask(struct flowtable *ft)
  836 {
  837         bitstr_t *mask;
  838 
  839         if (ft->ft_flags & FL_PCPU)
  840                 mask = ft->ft_masks[curcpu];
  841         else
  842                 mask = ft->ft_masks[0];
  843 
  844         return (mask);
  845 }
  846 
  847 static struct flentry **
  848 flowtable_entry(struct flowtable *ft, uint32_t hash)
  849 {
  850         struct flentry **fle;
  851         int index = (hash % ft->ft_size);
  852 
  853         if (ft->ft_flags & FL_PCPU) {
  854                 KASSERT(&ft->ft_table.pcpu[curcpu][0] != NULL, ("pcpu not set"));
  855                 fle = &ft->ft_table.pcpu[curcpu][index];
  856         } else {
  857                 KASSERT(&ft->ft_table.global[0] != NULL, ("global not set"));
  858                 fle = &ft->ft_table.global[index];
  859         }
  860         
  861         return (fle);
  862 }
  863 
  864 static int
  865 flow_stale(struct flowtable *ft, struct flentry *fle)
  866 {
  867         time_t idle_time;
  868 
  869         if ((fle->f_fhash == 0)
  870             || ((fle->f_rt->rt_flags & RTF_HOST) &&
  871                 ((fle->f_rt->rt_flags & (RTF_UP))
  872                     != (RTF_UP)))
  873             || (fle->f_rt->rt_ifp == NULL)
  874             || !RT_LINK_IS_UP(fle->f_rt->rt_ifp))
  875                 return (1);
  876 
  877         idle_time = time_uptime - fle->f_uptime;
  878 
  879         if ((fle->f_flags & FL_STALE) ||
  880             ((fle->f_flags & (TH_SYN|TH_ACK|TH_FIN)) == 0
  881                 && (idle_time > ft->ft_udp_idle)) ||
  882             ((fle->f_flags & TH_FIN)
  883                 && (idle_time > ft->ft_fin_wait_idle)) ||
  884             ((fle->f_flags & (TH_SYN|TH_ACK)) == TH_SYN
  885                 && (idle_time > ft->ft_syn_idle)) ||
  886             ((fle->f_flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)
  887                 && (idle_time > ft->ft_tcp_idle)) ||
  888             ((fle->f_rt->rt_flags & RTF_UP) == 0 || 
  889                 (fle->f_rt->rt_ifp == NULL)))
  890                 return (1);
  891 
  892         return (0);
  893 }
  894 
  895 static void
  896 flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
  897 {
  898         uint32_t *hashkey;
  899         int i, nwords;
  900 
  901         if (fle->f_flags & FL_IPV6) {
  902                 nwords = 9;
  903                 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
  904         } else {
  905                 nwords = 3;
  906                 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
  907         }
  908         
  909         for (i = 0; i < nwords; i++) 
  910                 hashkey[i] = key[i];
  911 }
  912 
  913 static struct flentry *
  914 flow_alloc(struct flowtable *ft)
  915 {
  916         struct flentry *newfle;
  917         uma_zone_t zone;
  918 
  919         newfle = NULL;
  920         zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
  921 
  922         newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO);
  923         if (newfle != NULL)
  924                 atomic_add_int(&ft->ft_count, 1);
  925         return (newfle);
  926 }
  927 
  928 static void
  929 flow_free(struct flentry *fle, struct flowtable *ft)
  930 {
  931         uma_zone_t zone;
  932 
  933         zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
  934         atomic_add_int(&ft->ft_count, -1);
  935         uma_zfree(zone, fle);
  936 }
  937 
  938 static int
  939 flow_full(struct flowtable *ft)
  940 {
  941         boolean_t full;
  942         uint32_t count;
  943         
  944         full = ft->ft_full;
  945         count = ft->ft_count;
  946 
  947         if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3))))
  948                 ft->ft_full = FALSE;
  949         else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5))))
  950                 ft->ft_full = TRUE;
  951         
  952         if (full && !ft->ft_full) {
  953                 flowclean_freq = 4*hz;
  954                 if ((ft->ft_flags & FL_HASH_ALL) == 0)
  955                         ft->ft_udp_idle = ft->ft_fin_wait_idle =
  956                             ft->ft_syn_idle = ft->ft_tcp_idle = 5;
  957                 cv_broadcast(&flowclean_c_cv);
  958         } else if (!full && ft->ft_full) {
  959                 flowclean_freq = 20*hz;
  960                 if ((ft->ft_flags & FL_HASH_ALL) == 0)
  961                         ft->ft_udp_idle = ft->ft_fin_wait_idle =
  962                             ft->ft_syn_idle = ft->ft_tcp_idle = 30;
  963         }
  964 
  965         return (ft->ft_full);
  966 }
  967 
  968 static int
  969 flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
  970     uint32_t fibnum, struct route *ro, uint16_t flags)
  971 {
  972         struct flentry *fle, *fletail, *newfle, **flep;
  973         struct flowtable_stats *fs = &ft->ft_stats[curcpu];
  974         int depth;
  975         bitstr_t *mask;
  976         uint8_t proto;
  977 
  978         newfle = flow_alloc(ft);
  979         if (newfle == NULL)
  980                 return (ENOMEM);
  981 
  982         newfle->f_flags |= (flags & FL_IPV6);
  983         proto = flags_to_proto(flags);
  984 
  985         FL_ENTRY_LOCK(ft, hash);
  986         mask = flowtable_mask(ft);
  987         flep = flowtable_entry(ft, hash);
  988         fletail = fle = *flep;
  989 
  990         if (fle == NULL) {
  991                 bit_set(mask, FL_ENTRY_INDEX(ft, hash));
  992                 *flep = fle = newfle;
  993                 goto skip;
  994         } 
  995         
  996         depth = 0;
  997         fs->ft_collisions++;
  998         /*
  999          * find end of list and make sure that we were not
 1000          * preempted by another thread handling this flow
 1001          */
 1002         while (fle != NULL) {
 1003                 if (fle->f_fhash == hash && !flow_stale(ft, fle)) {
 1004                         /*
 1005                          * there was either a hash collision
 1006                          * or we lost a race to insert
 1007                          */
 1008                         FL_ENTRY_UNLOCK(ft, hash);
 1009                         flow_free(newfle, ft);
 1010                         
 1011                         if (flags & FL_OVERWRITE) 
 1012                                 goto skip;
 1013                         return (EEXIST);
 1014                 }
 1015                 /*
 1016                  * re-visit this double condition XXX
 1017                  */
 1018                 if (fletail->f_next != NULL)
 1019                         fletail = fle->f_next;
 1020 
 1021                 depth++;
 1022                 fle = fle->f_next;
 1023         } 
 1024 
 1025         if (depth > fs->ft_max_depth)
 1026                 fs->ft_max_depth = depth;
 1027         fletail->f_next = newfle;
 1028         fle = newfle;
 1029 skip:
 1030         flowtable_set_hashkey(fle, key);
 1031 
 1032         fle->f_proto = proto;
 1033         fle->f_rt = ro->ro_rt;
 1034         fle->f_lle = ro->ro_lle;
 1035         fle->f_fhash = hash;
 1036         fle->f_fibnum = fibnum;
 1037         fle->f_uptime = time_uptime;
 1038         FL_ENTRY_UNLOCK(ft, hash);
 1039         return (0);
 1040 }
 1041 
 1042 int
 1043 kern_flowtable_insert(struct flowtable *ft,
 1044     struct sockaddr_storage *ssa, struct sockaddr_storage *dsa,
 1045     struct route *ro, uint32_t fibnum, int flags)
 1046 {
 1047         uint32_t key[9], hash;
 1048 
 1049         flags = (ft->ft_flags | flags | FL_OVERWRITE);
 1050         hash = 0;
 1051 
 1052 #ifdef INET
 1053         if (ssa->ss_family == AF_INET) 
 1054                 hash = ipv4_flow_lookup_hash_internal((struct sockaddr_in *)ssa,
 1055                     (struct sockaddr_in *)dsa, key, flags);
 1056 #endif
 1057 #ifdef INET6
 1058         if (ssa->ss_family == AF_INET6) 
 1059                 hash = ipv6_flow_lookup_hash_internal((struct sockaddr_in6 *)ssa,
 1060                     (struct sockaddr_in6 *)dsa, key, flags);
 1061 #endif  
 1062         if (ro->ro_rt == NULL || ro->ro_lle == NULL)
 1063                 return (EINVAL);
 1064 
 1065         FLDPRINTF(ft, FL_DEBUG,
 1066             "kern_flowtable_insert: key=%x:%x:%x hash=%x fibnum=%d flags=%x\n",
 1067             key[0], key[1], key[2], hash, fibnum, flags);
 1068         return (flowtable_insert(ft, hash, key, fibnum, ro, flags));
 1069 }
 1070 
 1071 static int
 1072 flowtable_key_equal(struct flentry *fle, uint32_t *key)
 1073 {
 1074         uint32_t *hashkey;
 1075         int i, nwords;
 1076 
 1077         if (fle->f_flags & FL_IPV6) {
 1078                 nwords = 9;
 1079                 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
 1080         } else {
 1081                 nwords = 3;
 1082                 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
 1083         }
 1084 
 1085         for (i = 0; i < nwords; i++) 
 1086                 if (hashkey[i] != key[i])
 1087                         return (0);
 1088 
 1089         return (1);
 1090 }
 1091 
 1092 struct flentry *
 1093 flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af)
 1094 {
 1095         struct flentry *fle = NULL;
 1096 
 1097 #ifdef INET
 1098         if (af == AF_INET)
 1099                 fle = flowtable_lookup_mbuf4(ft, m);
 1100 #endif
 1101 #ifdef INET6
 1102         if (af == AF_INET6)
 1103                 fle = flowtable_lookup_mbuf6(ft, m);
 1104 #endif  
 1105         if (fle != NULL && m != NULL && (m->m_flags & M_FLOWID) == 0) {
 1106                 m->m_flags |= M_FLOWID;
 1107                 m->m_pkthdr.flowid = fle->f_fhash;
 1108         }
 1109         return (fle);
 1110 }
 1111         
 1112 struct flentry *
 1113 flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
 1114     struct sockaddr_storage *dsa, uint32_t fibnum, int flags)
 1115 {
 1116         uint32_t key[9], hash;
 1117         struct flentry *fle;
 1118         struct flowtable_stats *fs = &ft->ft_stats[curcpu];
 1119         uint8_t proto = 0;
 1120         int error = 0;
 1121         struct rtentry *rt;
 1122         struct llentry *lle;
 1123         struct route sro, *ro;
 1124         struct route_in6 sro6;
 1125 
 1126         sro.ro_rt = sro6.ro_rt = NULL;
 1127         sro.ro_lle = sro6.ro_lle = NULL;
 1128         ro = NULL;
 1129         hash = 0;
 1130         flags |= ft->ft_flags;
 1131         proto = flags_to_proto(flags);
 1132 #ifdef INET
 1133         if (ssa->ss_family == AF_INET) {
 1134                 struct sockaddr_in *ssin, *dsin;
 1135 
 1136                 ro = &sro;
 1137                 memcpy(&ro->ro_dst, dsa, sizeof(struct sockaddr_in));
 1138                 /*
 1139                  * The harvested source and destination addresses
 1140                  * may contain port information if the packet is 
 1141                  * from a transport protocol (e.g. TCP/UDP). The 
 1142                  * port field must be cleared before performing 
 1143                  * a route lookup.
 1144                  */
 1145                 ((struct sockaddr_in *)&ro->ro_dst)->sin_port = 0;
 1146                 dsin = (struct sockaddr_in *)dsa;
 1147                 ssin = (struct sockaddr_in *)ssa;
 1148                 if ((dsin->sin_addr.s_addr == ssin->sin_addr.s_addr) ||
 1149                     (ntohl(dsin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 1150                     (ntohl(ssin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
 1151                         return (NULL);
 1152 
 1153                 hash = ipv4_flow_lookup_hash_internal(ssin, dsin, key, flags);
 1154         }
 1155 #endif
 1156 #ifdef INET6
 1157         if (ssa->ss_family == AF_INET6) {
 1158                 struct sockaddr_in6 *ssin6, *dsin6;
 1159 
 1160                 ro = (struct route *)&sro6;
 1161                 memcpy(&sro6.ro_dst, dsa,
 1162                     sizeof(struct sockaddr_in6));
 1163                 ((struct sockaddr_in6 *)&ro->ro_dst)->sin6_port = 0;
 1164                 dsin6 = (struct sockaddr_in6 *)dsa;
 1165                 ssin6 = (struct sockaddr_in6 *)ssa;
 1166 
 1167                 flags |= FL_IPV6;
 1168                 hash = ipv6_flow_lookup_hash_internal(ssin6, dsin6, key, flags);
 1169         }
 1170 #endif
 1171         /*
 1172          * Ports are zero and this isn't a transmit cache
 1173          * - thus not a protocol for which we need to keep 
 1174          * state
 1175          * FL_HASH_ALL => key[0] != 0 for TCP || UDP || SCTP
 1176          */
 1177         if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_ALL)))
 1178                 return (NULL);
 1179 
 1180         fs->ft_lookups++;
 1181         FL_ENTRY_LOCK(ft, hash);
 1182         if ((fle = FL_ENTRY(ft, hash)) == NULL) {
 1183                 FL_ENTRY_UNLOCK(ft, hash);
 1184                 goto uncached;
 1185         }
 1186 keycheck:       
 1187         rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
 1188         lle = __DEVOLATILE(struct llentry *, fle->f_lle);
 1189         if ((rt != NULL)
 1190             && lle != NULL
 1191             && fle->f_fhash == hash
 1192             && flowtable_key_equal(fle, key)
 1193             && (proto == fle->f_proto)
 1194             && (fibnum == fle->f_fibnum)
 1195             && (rt->rt_flags & RTF_UP)
 1196             && (rt->rt_ifp != NULL)
 1197             && (lle->la_flags & LLE_VALID)) {
 1198                 fs->ft_hits++;
 1199                 fle->f_uptime = time_uptime;
 1200                 fle->f_flags |= flags;
 1201                 FL_ENTRY_UNLOCK(ft, hash);
 1202                 return (fle);
 1203         } else if (fle->f_next != NULL) {
 1204                 fle = fle->f_next;
 1205                 goto keycheck;
 1206         }
 1207         FL_ENTRY_UNLOCK(ft, hash);
 1208 uncached:
 1209         if (flags & FL_NOAUTO || flow_full(ft))
 1210                 return (NULL);
 1211 
 1212         fs->ft_misses++;
 1213         /*
 1214          * This bit of code ends up locking the
 1215          * same route 3 times (just like ip_output + ether_output)
 1216          * - at lookup
 1217          * - in rt_check when called by arpresolve
 1218          * - dropping the refcount for the rtentry
 1219          *
 1220          * This could be consolidated to one if we wrote a variant
 1221          * of arpresolve with an rt_check variant that expected to
 1222          * receive the route locked
 1223          */
 1224 
 1225 #ifdef INVARIANTS
 1226         if ((ro->ro_dst.sa_family != AF_INET) &&
 1227             (ro->ro_dst.sa_family != AF_INET6))
 1228                 panic("sa_family == %d\n", ro->ro_dst.sa_family);
 1229 #endif
 1230 
 1231         ft->ft_rtalloc(ro, hash, fibnum);
 1232         if (ro->ro_rt == NULL) 
 1233                 error = ENETUNREACH;
 1234         else {
 1235                 struct llentry *lle = NULL;
 1236                 struct sockaddr_storage *l3addr;
 1237                 struct rtentry *rt = ro->ro_rt;
 1238                 struct ifnet *ifp = rt->rt_ifp;
 1239 
 1240                 if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
 1241                         RTFREE(rt);
 1242                         ro->ro_rt = NULL;
 1243                         return (NULL);
 1244                 }
 1245 #ifdef INET6
 1246                 if (ssa->ss_family == AF_INET6) {
 1247                         struct sockaddr_in6 *dsin6;
 1248 
 1249                         dsin6 = (struct sockaddr_in6 *)dsa;                     
 1250                         if (in6_localaddr(&dsin6->sin6_addr)) {
 1251                                 RTFREE(rt);
 1252                                 ro->ro_rt = NULL;
 1253                                 return (NULL);                          
 1254                         }
 1255 
 1256                         if (rt->rt_flags & RTF_GATEWAY)
 1257                                 l3addr = (struct sockaddr_storage *)rt->rt_gateway;
 1258                         
 1259                         else
 1260                                 l3addr = (struct sockaddr_storage *)&ro->ro_dst;
 1261                         lle = llentry_alloc(ifp, LLTABLE6(ifp), l3addr);
 1262                 }
 1263 #endif  
 1264 #ifdef INET
 1265                 if (ssa->ss_family == AF_INET) {
 1266                         if (rt->rt_flags & RTF_GATEWAY)
 1267                                 l3addr = (struct sockaddr_storage *)rt->rt_gateway;
 1268                         else
 1269                                 l3addr = (struct sockaddr_storage *)&ro->ro_dst;
 1270                         lle = llentry_alloc(ifp, LLTABLE(ifp), l3addr); 
 1271                 }
 1272                         
 1273 #endif
 1274                 ro->ro_lle = lle;
 1275 
 1276                 if (lle == NULL) {
 1277                         RTFREE(rt);
 1278                         ro->ro_rt = NULL;
 1279                         return (NULL);
 1280                 }
 1281                 error = flowtable_insert(ft, hash, key, fibnum, ro, flags);
 1282 
 1283                 if (error) {
 1284                         RTFREE(rt);
 1285                         LLE_FREE(lle);
 1286                         ro->ro_rt = NULL;
 1287                         ro->ro_lle = NULL;
 1288                 }
 1289         } 
 1290 
 1291         return ((error) ? NULL : fle);
 1292 }
 1293 
 1294 /*
 1295  * used by the bit_alloc macro
 1296  */
 1297 #define calloc(count, size) malloc((count)*(size), M_DEVBUF, M_WAITOK|M_ZERO)
 1298         
 1299 struct flowtable *
 1300 flowtable_alloc(char *name, int nentry, int flags)
 1301 {
 1302         struct flowtable *ft, *fttail;
 1303         int i;
 1304 
 1305         if (V_flow_hashjitter == 0)
 1306                 V_flow_hashjitter = arc4random();
 1307 
 1308         KASSERT(nentry > 0, ("nentry must be > 0, is %d\n", nentry));
 1309 
 1310         ft = malloc(sizeof(struct flowtable),
 1311             M_RTABLE, M_WAITOK | M_ZERO);
 1312 
 1313         ft->ft_name = name;
 1314         ft->ft_flags = flags;
 1315         ft->ft_size = nentry;
 1316 #ifdef RADIX_MPATH
 1317         ft->ft_rtalloc = rtalloc_mpath_fib;
 1318 #else
 1319         ft->ft_rtalloc = rtalloc_ign_wrapper;
 1320 #endif
 1321         if (flags & FL_PCPU) {
 1322                 ft->ft_lock = flowtable_pcpu_lock;
 1323                 ft->ft_unlock = flowtable_pcpu_unlock;
 1324 
 1325                 for (i = 0; i <= mp_maxid; i++) {
 1326                         ft->ft_table.pcpu[i] =
 1327                             malloc(nentry*sizeof(struct flentry *),
 1328                                 M_RTABLE, M_WAITOK | M_ZERO);
 1329                         ft->ft_masks[i] = bit_alloc(nentry);
 1330                 }
 1331         } else {
 1332                 ft->ft_lock_count = 2*(powerof2(mp_maxid + 1) ? (mp_maxid + 1):
 1333                     (fls(mp_maxid + 1) << 1));
 1334                 
 1335                 ft->ft_lock = flowtable_global_lock;
 1336                 ft->ft_unlock = flowtable_global_unlock;
 1337                 ft->ft_table.global =
 1338                             malloc(nentry*sizeof(struct flentry *),
 1339                                 M_RTABLE, M_WAITOK | M_ZERO);
 1340                 ft->ft_locks = malloc(ft->ft_lock_count*sizeof(struct mtx),
 1341                                 M_RTABLE, M_WAITOK | M_ZERO);
 1342                 for (i = 0; i < ft->ft_lock_count; i++)
 1343                         mtx_init(&ft->ft_locks[i], "flow", NULL, MTX_DEF|MTX_DUPOK);
 1344 
 1345                 ft->ft_masks[0] = bit_alloc(nentry);
 1346         }
 1347         ft->ft_tmpmask = bit_alloc(nentry);
 1348 
 1349         /*
 1350          * In the local transmit case the table truly is 
 1351          * just a cache - so everything is eligible for
 1352          * replacement after 5s of non-use
 1353          */
 1354         if (flags & FL_HASH_ALL) {
 1355                 ft->ft_udp_idle = V_flowtable_udp_expire;
 1356                 ft->ft_syn_idle = V_flowtable_syn_expire;
 1357                 ft->ft_fin_wait_idle = V_flowtable_fin_wait_expire;
 1358                 ft->ft_tcp_idle = V_flowtable_fin_wait_expire;
 1359         } else {
 1360                 ft->ft_udp_idle = ft->ft_fin_wait_idle =
 1361                     ft->ft_syn_idle = ft->ft_tcp_idle = 30;
 1362                 
 1363         }
 1364 
 1365         /*
 1366          * hook in to the cleaner list
 1367          */
 1368         if (V_flow_list_head == NULL)
 1369                 V_flow_list_head = ft;
 1370         else {
 1371                 fttail = V_flow_list_head;
 1372                 while (fttail->ft_next != NULL)
 1373                         fttail = fttail->ft_next;
 1374                 fttail->ft_next = ft;
 1375         }
 1376 
 1377         return (ft);
 1378 }
 1379 
 1380 /*
 1381  * The rest of the code is devoted to garbage collection of expired entries.
 1382  * It is a new additon made necessary by the switch to dynamically allocating
 1383  * flow tables.
 1384  * 
 1385  */
 1386 static void
 1387 fle_free(struct flentry *fle, struct flowtable *ft)
 1388 {
 1389         struct rtentry *rt;
 1390         struct llentry *lle;
 1391 
 1392         rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
 1393         lle = __DEVOLATILE(struct llentry *, fle->f_lle);
 1394         if (rt != NULL)
 1395                 RTFREE(rt);
 1396         if (lle != NULL)
 1397                 LLE_FREE(lle);
 1398         flow_free(fle, ft);
 1399 }
 1400 
 1401 static void
 1402 flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
 1403 {
 1404         int curbit = 0, count;
 1405         struct flentry *fle,  **flehead, *fleprev;
 1406         struct flentry *flefreehead, *flefreetail, *fletmp;
 1407         bitstr_t *mask, *tmpmask;
 1408         struct flowtable_stats *fs = &ft->ft_stats[curcpu];
 1409 
 1410         flefreehead = flefreetail = NULL;
 1411         mask = flowtable_mask(ft);
 1412         tmpmask = ft->ft_tmpmask;
 1413         memcpy(tmpmask, mask, ft->ft_size/8);
 1414         /*
 1415          * XXX Note to self, bit_ffs operates at the byte level
 1416          * and thus adds gratuitous overhead
 1417          */
 1418         bit_ffs(tmpmask, ft->ft_size, &curbit);
 1419         while (curbit != -1) {
 1420                 if (curbit >= ft->ft_size || curbit < -1) {
 1421                         log(LOG_ALERT,
 1422                             "warning: bad curbit value %d \n",
 1423                             curbit);
 1424                         break;
 1425                 }
 1426 
 1427                 FL_ENTRY_LOCK(ft, curbit);
 1428                 flehead = flowtable_entry(ft, curbit);
 1429                 fle = fleprev = *flehead;
 1430 
 1431                 fs->ft_free_checks++;
 1432 #ifdef DIAGNOSTIC
 1433                 if (fle == NULL && curbit > 0) {
 1434                         log(LOG_ALERT,
 1435                             "warning bit=%d set, but no fle found\n",
 1436                             curbit);
 1437                 }
 1438 #endif          
 1439                 while (fle != NULL) {
 1440                         if (rt != NULL) {
 1441                                 if (__DEVOLATILE(struct rtentry *, fle->f_rt) != rt) {
 1442                                         fleprev = fle;
 1443                                         fle = fle->f_next;
 1444                                         continue;
 1445                                 }
 1446                         } else if (!flow_stale(ft, fle)) {
 1447                                 fleprev = fle;
 1448                                 fle = fle->f_next;
 1449                                 continue;
 1450                         }
 1451                         /*
 1452                          * delete head of the list
 1453                          */
 1454                         if (fleprev == *flehead) {
 1455                                 fletmp = fleprev;
 1456                                 if (fle == fleprev) {
 1457                                         fleprev = *flehead = fle->f_next;
 1458                                 } else
 1459                                         fleprev = *flehead = fle;
 1460                                 fle = fle->f_next;
 1461                         } else {
 1462                                 /*
 1463                                  * don't advance fleprev
 1464                                  */
 1465                                 fletmp = fle;
 1466                                 fleprev->f_next = fle->f_next;
 1467                                 fle = fleprev->f_next;
 1468                         }
 1469 
 1470                         if (flefreehead == NULL)
 1471                                 flefreehead = flefreetail = fletmp;
 1472                         else {
 1473                                 flefreetail->f_next = fletmp;
 1474                                 flefreetail = fletmp;
 1475                         }
 1476                         fletmp->f_next = NULL;
 1477                 }
 1478                 if (*flehead == NULL)
 1479                         bit_clear(mask, curbit);
 1480                 FL_ENTRY_UNLOCK(ft, curbit);
 1481                 bit_clear(tmpmask, curbit);
 1482                 bit_ffs(tmpmask, ft->ft_size, &curbit);
 1483         }
 1484         count = 0;
 1485         while ((fle = flefreehead) != NULL) {
 1486                 flefreehead = fle->f_next;
 1487                 count++;
 1488                 fs->ft_frees++;
 1489                 fle_free(fle, ft);
 1490         }
 1491         if (V_flowtable_debug && count)
 1492                 log(LOG_DEBUG, "freed %d flow entries\n", count);
 1493 }
 1494 
 1495 void
 1496 flowtable_route_flush(struct flowtable *ft, struct rtentry *rt)
 1497 {
 1498         int i;
 1499 
 1500         if (ft->ft_flags & FL_PCPU) {
 1501                 CPU_FOREACH(i) {
 1502                         if (smp_started == 1) {
 1503                                 thread_lock(curthread);
 1504                                 sched_bind(curthread, i);
 1505                                 thread_unlock(curthread);
 1506                         }
 1507 
 1508                         flowtable_free_stale(ft, rt);
 1509 
 1510                         if (smp_started == 1) {
 1511                                 thread_lock(curthread);
 1512                                 sched_unbind(curthread);
 1513                                 thread_unlock(curthread);
 1514                         }
 1515                 }
 1516         } else {
 1517                 flowtable_free_stale(ft, rt);
 1518         }
 1519 }
 1520 
 1521 static void
 1522 flowtable_clean_vnet(void)
 1523 {
 1524         struct flowtable *ft;
 1525         int i;
 1526 
 1527         ft = V_flow_list_head;
 1528         while (ft != NULL) {
 1529                 if (ft->ft_flags & FL_PCPU) {
 1530                         CPU_FOREACH(i) {
 1531                                 if (smp_started == 1) {
 1532                                         thread_lock(curthread);
 1533                                         sched_bind(curthread, i);
 1534                                         thread_unlock(curthread);
 1535                                 }
 1536 
 1537                                 flowtable_free_stale(ft, NULL);
 1538 
 1539                                 if (smp_started == 1) {
 1540                                         thread_lock(curthread);
 1541                                         sched_unbind(curthread);
 1542                                         thread_unlock(curthread);
 1543                                 }
 1544                         }
 1545                 } else {
 1546                         flowtable_free_stale(ft, NULL);
 1547                 }
 1548                 ft = ft->ft_next;
 1549         }
 1550 }
 1551 
 1552 static void
 1553 flowtable_cleaner(void)
 1554 {
 1555         VNET_ITERATOR_DECL(vnet_iter);
 1556         struct thread *td;
 1557 
 1558         if (bootverbose)
 1559                 log(LOG_INFO, "flowtable cleaner started\n");
 1560         td = curthread;
 1561         while (1) {
 1562                 VNET_LIST_RLOCK();
 1563                 VNET_FOREACH(vnet_iter) {
 1564                         CURVNET_SET(vnet_iter);
 1565                         flowtable_clean_vnet();
 1566                         CURVNET_RESTORE();
 1567                 }
 1568                 VNET_LIST_RUNLOCK();
 1569 
 1570                 /*
 1571                  * The 10 second interval between cleaning checks
 1572                  * is arbitrary
 1573                  */
 1574                 mtx_lock(&flowclean_lock);
 1575                 thread_lock(td);
 1576                 sched_prio(td, PPAUSE);
 1577                 thread_unlock(td);
 1578                 flowclean_cycles++;
 1579                 cv_broadcast(&flowclean_f_cv);
 1580                 cv_timedwait(&flowclean_c_cv, &flowclean_lock, flowclean_freq);
 1581                 mtx_unlock(&flowclean_lock);
 1582         }
 1583 }
 1584 
 1585 static void
 1586 flowtable_flush(void *unused __unused)
 1587 {
 1588         uint64_t start;
 1589 
 1590         mtx_lock(&flowclean_lock);
 1591         start = flowclean_cycles;
 1592         while (start == flowclean_cycles) {
 1593                 cv_broadcast(&flowclean_c_cv);
 1594                 cv_wait(&flowclean_f_cv, &flowclean_lock);
 1595         }
 1596         mtx_unlock(&flowclean_lock);
 1597 }
 1598 
 1599 static struct kproc_desc flow_kp = {
 1600         "flowcleaner",
 1601         flowtable_cleaner,
 1602         &flowcleanerproc
 1603 };
 1604 SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
 1605 
 1606 static void
 1607 flowtable_init_vnet(const void *unused __unused)
 1608 {
 1609 
 1610         V_flowtable_nmbflows = 1024 + maxusers * 64 * mp_ncpus;
 1611         V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
 1612             NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
 1613         V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
 1614             NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);    
 1615         uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
 1616         uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
 1617         V_flowtable_ready = 1;
 1618 }
 1619 VNET_SYSINIT(flowtable_init_vnet, SI_SUB_SMP, SI_ORDER_ANY,
 1620     flowtable_init_vnet, NULL);
 1621 
 1622 static void
 1623 flowtable_init(const void *unused __unused)
 1624 {
 1625 
 1626         cv_init(&flowclean_c_cv, "c_flowcleanwait");
 1627         cv_init(&flowclean_f_cv, "f_flowcleanwait");
 1628         mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
 1629         EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
 1630             EVENTHANDLER_PRI_ANY);
 1631         flowclean_freq = 20*hz;
 1632 }
 1633 SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST,
 1634     flowtable_init, NULL);
 1635 
 1636 
 1637 #ifdef VIMAGE
 1638 static void
 1639 flowtable_uninit(const void *unused __unused)
 1640 {
 1641 
 1642         V_flowtable_ready = 0;
 1643         uma_zdestroy(V_flow_ipv4_zone);
 1644         uma_zdestroy(V_flow_ipv6_zone);
 1645 }
 1646 
 1647 VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
 1648     flowtable_uninit, NULL);
 1649 #endif
 1650 
 1651 #ifdef DDB
 1652 static uint32_t *
 1653 flowtable_get_hashkey(struct flentry *fle)
 1654 {
 1655         uint32_t *hashkey;
 1656 
 1657         if (fle->f_flags & FL_IPV6)
 1658                 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
 1659         else
 1660                 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
 1661 
 1662         return (hashkey);
 1663 }
 1664 
 1665 static bitstr_t *
 1666 flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
 1667 {
 1668         bitstr_t *mask;
 1669 
 1670         if (ft->ft_flags & FL_PCPU)
 1671                 mask = ft->ft_masks[cpuid];
 1672         else
 1673                 mask = ft->ft_masks[0];
 1674 
 1675         return (mask);
 1676 }
 1677 
 1678 static struct flentry **
 1679 flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
 1680 {
 1681         struct flentry **fle;
 1682         int index = (hash % ft->ft_size);
 1683 
 1684         if (ft->ft_flags & FL_PCPU) {
 1685                 fle = &ft->ft_table.pcpu[cpuid][index];
 1686         } else {
 1687                 fle = &ft->ft_table.global[index];
 1688         }
 1689         
 1690         return (fle);
 1691 }
 1692 
 1693 static void
 1694 flow_show(struct flowtable *ft, struct flentry *fle)
 1695 {
 1696         int idle_time;
 1697         int rt_valid, ifp_valid;
 1698         uint16_t sport, dport;
 1699         uint32_t *hashkey;
 1700         char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
 1701         volatile struct rtentry *rt;
 1702         struct ifnet *ifp = NULL;
 1703 
 1704         idle_time = (int)(time_uptime - fle->f_uptime);
 1705         rt = fle->f_rt;
 1706         rt_valid = rt != NULL;
 1707         if (rt_valid) 
 1708                 ifp = rt->rt_ifp;
 1709         ifp_valid = ifp != NULL;
 1710         hashkey = flowtable_get_hashkey(fle);
 1711         if (fle->f_flags & FL_IPV6)
 1712                 goto skipaddr;
 1713 
 1714         inet_ntoa_r(*(struct in_addr *) &hashkey[2], daddr);
 1715         if (ft->ft_flags & FL_HASH_ALL) {
 1716                 inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);            
 1717                 sport = ntohs(((uint16_t *)hashkey)[0]);
 1718                 dport = ntohs(((uint16_t *)hashkey)[1]);
 1719                 db_printf("%s:%d->%s:%d",
 1720                     saddr, sport, daddr,
 1721                     dport);
 1722         } else 
 1723                 db_printf("%s ", daddr);
 1724     
 1725 skipaddr:
 1726         if (fle->f_flags & FL_STALE)
 1727                 db_printf(" FL_STALE ");
 1728         if (fle->f_flags & FL_TCP)
 1729                 db_printf(" FL_TCP ");
 1730         if (fle->f_flags & FL_UDP)
 1731                 db_printf(" FL_UDP ");
 1732         if (rt_valid) {
 1733                 if (rt->rt_flags & RTF_UP)
 1734                         db_printf(" RTF_UP ");
 1735         }
 1736         if (ifp_valid) {
 1737                 if (ifp->if_flags & IFF_LOOPBACK)
 1738                         db_printf(" IFF_LOOPBACK ");
 1739                 if (ifp->if_flags & IFF_UP)
 1740                         db_printf(" IFF_UP ");          
 1741                 if (ifp->if_flags & IFF_POINTOPOINT)
 1742                         db_printf(" IFF_POINTOPOINT ");         
 1743         }
 1744         if (fle->f_flags & FL_IPV6)
 1745                 db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
 1746                     hashkey[0], hashkey[1], hashkey[2],
 1747                     hashkey[3], hashkey[4], hashkey[5],
 1748                     hashkey[6], hashkey[7], hashkey[8]);
 1749         else
 1750                 db_printf("\n\tkey=%08x:%08x:%08x ",
 1751                     hashkey[0], hashkey[1], hashkey[2]);
 1752         db_printf("hash=%08x idle_time=%03d"
 1753             "\n\tfibnum=%02d rt=%p",
 1754             fle->f_fhash, idle_time, fle->f_fibnum, fle->f_rt);
 1755         db_printf("\n");
 1756 }
 1757 
 1758 static void
 1759 flowtable_show(struct flowtable *ft, int cpuid)
 1760 {
 1761         int curbit = 0;
 1762         struct flentry *fle,  **flehead;
 1763         bitstr_t *mask, *tmpmask;
 1764 
 1765         if (cpuid != -1)
 1766                 db_printf("cpu: %d\n", cpuid);
 1767         mask = flowtable_mask_pcpu(ft, cpuid);
 1768         tmpmask = ft->ft_tmpmask;
 1769         memcpy(tmpmask, mask, ft->ft_size/8);
 1770         /*
 1771          * XXX Note to self, bit_ffs operates at the byte level
 1772          * and thus adds gratuitous overhead
 1773          */
 1774         bit_ffs(tmpmask, ft->ft_size, &curbit);
 1775         while (curbit != -1) {
 1776                 if (curbit >= ft->ft_size || curbit < -1) {
 1777                         db_printf("warning: bad curbit value %d \n",
 1778                             curbit);
 1779                         break;
 1780                 }
 1781 
 1782                 flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
 1783                 fle = *flehead;
 1784 
 1785                 while (fle != NULL) {   
 1786                         flow_show(ft, fle);
 1787                         fle = fle->f_next;
 1788                         continue;
 1789                 }
 1790                 bit_clear(tmpmask, curbit);
 1791                 bit_ffs(tmpmask, ft->ft_size, &curbit);
 1792         }
 1793 }
 1794 
 1795 static void
 1796 flowtable_show_vnet(void)
 1797 {
 1798         struct flowtable *ft;
 1799         int i;
 1800 
 1801         ft = V_flow_list_head;
 1802         while (ft != NULL) {
 1803                 printf("name: %s\n", ft->ft_name);
 1804                 if (ft->ft_flags & FL_PCPU) {
 1805                         CPU_FOREACH(i) {
 1806                                 flowtable_show(ft, i);
 1807                         }
 1808                 } else {
 1809                         flowtable_show(ft, -1);
 1810                 }
 1811                 ft = ft->ft_next;
 1812         }
 1813 }
 1814 
 1815 DB_SHOW_COMMAND(flowtables, db_show_flowtables)
 1816 {
 1817         VNET_ITERATOR_DECL(vnet_iter);
 1818 
 1819         VNET_FOREACH(vnet_iter) {
 1820                 CURVNET_SET(vnet_iter);
 1821 #ifdef VIMAGE
 1822                 db_printf("vnet %p\n", vnet_iter);
 1823 #endif
 1824                 flowtable_show_vnet();
 1825                 CURVNET_RESTORE();
 1826         }
 1827 }
 1828 #endif

Cache object: 26c7d7324815e67cfdbff6b3e349baf1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.