The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_flow.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: ip_flow.c,v 1.36 2006/10/06 03:20:47 mrg Exp $ */
    2 
    3 /*-
    4  * Copyright (c) 1998 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by the 3am Software Foundry ("3am").  It was developed by Matt Thomas.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the NetBSD
   21  *      Foundation, Inc. and its contributors.
   22  * 4. Neither the name of The NetBSD Foundation nor the names of its
   23  *    contributors may be used to endorse or promote products derived
   24  *    from this software without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36  * POSSIBILITY OF SUCH DAMAGE.
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.36 2006/10/06 03:20:47 mrg Exp $");
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/malloc.h>
   45 #include <sys/mbuf.h>
   46 #include <sys/domain.h>
   47 #include <sys/protosw.h>
   48 #include <sys/socket.h>
   49 #include <sys/socketvar.h>
   50 #include <sys/errno.h>
   51 #include <sys/time.h>
   52 #include <sys/kernel.h>
   53 #include <sys/pool.h>
   54 #include <sys/sysctl.h>
   55 
   56 #include <net/if.h>
   57 #include <net/if_dl.h>
   58 #include <net/route.h>
   59 #include <net/pfil.h>
   60 
   61 #include <netinet/in.h>
   62 #include <netinet/in_systm.h>
   63 #include <netinet/ip.h>
   64 #include <netinet/in_pcb.h>
   65 #include <netinet/in_var.h>
   66 #include <netinet/ip_var.h>
   67 
   68 POOL_INIT(ipflow_pool, sizeof(struct ipflow), 0, 0, 0, "ipflowpl", NULL);
   69 
   70 LIST_HEAD(ipflowhead, ipflow);
   71 
   72 #define IPFLOW_TIMER            (5 * PR_SLOWHZ)
   73 #define IPFLOW_HASHSIZE         (1 << IPFLOW_HASHBITS)
   74 
   75 static struct ipflowhead ipflowtable[IPFLOW_HASHSIZE];
   76 static struct ipflowhead ipflowlist;
   77 static int ipflow_inuse;
   78 
   79 #define IPFLOW_INSERT(bucket, ipf) \
   80 do { \
   81         LIST_INSERT_HEAD((bucket), (ipf), ipf_hash); \
   82         LIST_INSERT_HEAD(&ipflowlist, (ipf), ipf_list); \
   83 } while (/*CONSTCOND*/ 0)
   84 
   85 #define IPFLOW_REMOVE(ipf) \
   86 do { \
   87         LIST_REMOVE((ipf), ipf_hash); \
   88         LIST_REMOVE((ipf), ipf_list); \
   89 } while (/*CONSTCOND*/ 0)
   90 
   91 #ifndef IPFLOW_MAX
   92 #define IPFLOW_MAX              256
   93 #endif
   94 int ip_maxflows = IPFLOW_MAX;
   95 
   96 static unsigned
   97 ipflow_hash(struct in_addr dst, struct in_addr src, unsigned tos)
   98 {
   99         unsigned hash = tos;
  100         int idx;
  101         for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
  102                 hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
  103         return hash & (IPFLOW_HASHSIZE-1);
  104 }
  105 
  106 static struct ipflow *
  107 ipflow_lookup(const struct ip *ip)
  108 {
  109         unsigned hash;
  110         struct ipflow *ipf;
  111 
  112         hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
  113 
  114         LIST_FOREACH(ipf, &ipflowtable[hash], ipf_hash) {
  115                 if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
  116                     && ip->ip_src.s_addr == ipf->ipf_src.s_addr
  117                     && ip->ip_tos == ipf->ipf_tos)
  118                         break;
  119         }
  120         return ipf;
  121 }
  122 
  123 void
  124 ipflow_init(void)
  125 {
  126         int i;
  127 
  128         LIST_INIT(&ipflowlist);
  129         for (i = 0; i < IPFLOW_HASHSIZE; i++)
  130                 LIST_INIT(&ipflowtable[i]);
  131 }
  132 
  133 int
  134 ipflow_fastforward(struct mbuf *m)
  135 {
  136         struct ip *ip, ip_store;
  137         struct ipflow *ipf;
  138         struct rtentry *rt;
  139         struct sockaddr *dst;
  140         int error;
  141         int iplen;
  142 
  143         /*
  144          * Are we forwarding packets?  Big enough for an IP packet?
  145          */
  146         if (!ipforwarding || ipflow_inuse == 0 || m->m_len < sizeof(struct ip))
  147                 return 0;
  148 
  149         /*
  150          * Was packet received as a link-level multicast or broadcast?
  151          * If so, don't try to fast forward..
  152          */
  153         if ((m->m_flags & (M_BCAST|M_MCAST)) != 0)
  154                 return 0;
  155 
  156         /*
  157          * IP header with no option and valid version and length
  158          */
  159         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)))
  160                 ip = mtod(m, struct ip *);
  161         else {
  162                 memcpy(&ip_store, mtod(m, caddr_t), sizeof(ip_store));
  163                 ip = &ip_store;
  164         }
  165         iplen = ntohs(ip->ip_len);
  166         if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) ||
  167             iplen < sizeof(struct ip) || iplen > m->m_pkthdr.len)
  168                 return 0;
  169         /*
  170          * Find a flow.
  171          */
  172         if ((ipf = ipflow_lookup(ip)) == NULL)
  173                 return 0;
  174 
  175         /*
  176          * Verify the IP header checksum.
  177          */
  178         switch (m->m_pkthdr.csum_flags &
  179                 ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
  180                  M_CSUM_IPv4_BAD)) {
  181         case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
  182                 return (0);
  183 
  184         case M_CSUM_IPv4:
  185                 /* Checksum was okay. */
  186                 break;
  187 
  188         default:
  189                 /* Must compute it ourselves. */
  190                 if (in_cksum(m, sizeof(struct ip)) != 0)
  191                         return (0);
  192                 break;
  193         }
  194 
  195         /*
  196          * Route and interface still up?
  197          */
  198         rt = ipf->ipf_ro.ro_rt;
  199         if ((rt->rt_flags & RTF_UP) == 0 ||
  200             (rt->rt_ifp->if_flags & IFF_UP) == 0)
  201                 return 0;
  202 
  203         /*
  204          * Packet size OK?  TTL?
  205          */
  206         if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC)
  207                 return 0;
  208 
  209         /*
  210          * Clear any in-bound checksum flags for this packet.
  211          */
  212         m->m_pkthdr.csum_flags = 0;
  213 
  214         /*
  215          * Everything checks out and so we can forward this packet.
  216          * Modify the TTL and incrementally change the checksum.
  217          *
  218          * This method of adding the checksum works on either endian CPU.
  219          * If htons() is inlined, all the arithmetic is folded; otherwise
  220          * the htons()s are combined by CSE due to the const attribute.
  221          *
  222          * Don't bother using HW checksumming here -- the incremental
  223          * update is pretty fast.
  224          */
  225         ip->ip_ttl -= IPTTLDEC;
  226         if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8))
  227                 ip->ip_sum -= ~htons(IPTTLDEC << 8);
  228         else
  229                 ip->ip_sum += htons(IPTTLDEC << 8);
  230 
  231         /*
  232          * Done modifying the header; copy it back, if necessary.
  233          */
  234         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0)
  235                 memcpy(mtod(m, caddr_t), &ip_store, sizeof(ip_store));
  236 
  237         /*
  238          * Trim the packet in case it's too long..
  239          */
  240         if (m->m_pkthdr.len > iplen) {
  241                 if (m->m_len == m->m_pkthdr.len) {
  242                         m->m_len = iplen;
  243                         m->m_pkthdr.len = iplen;
  244                 } else
  245                         m_adj(m, iplen - m->m_pkthdr.len);
  246         }
  247 
  248         /*
  249          * Send the packet on it's way.  All we can get back is ENOBUFS
  250          */
  251         ipf->ipf_uses++;
  252         PRT_SLOW_ARM(ipf->ipf_timer, IPFLOW_TIMER);
  253 
  254         if (rt->rt_flags & RTF_GATEWAY)
  255                 dst = rt->rt_gateway;
  256         else
  257                 dst = &ipf->ipf_ro.ro_dst;
  258 
  259         if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) {
  260                 if (error == ENOBUFS)
  261                         ipf->ipf_dropped++;
  262                 else
  263                         ipf->ipf_errors++;
  264         }
  265         return 1;
  266 }
  267 
  268 static void
  269 ipflow_addstats(struct ipflow *ipf)
  270 {
  271         ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
  272         ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped;
  273         ipstat.ips_total += ipf->ipf_uses;
  274         ipstat.ips_forward += ipf->ipf_uses;
  275         ipstat.ips_fastforward += ipf->ipf_uses;
  276 }
  277 
  278 static void
  279 ipflow_free(struct ipflow *ipf)
  280 {
  281         int s;
  282         /*
  283          * Remove the flow from the hash table (at elevated IPL).
  284          * Once it's off the list, we can deal with it at normal
  285          * network IPL.
  286          */
  287         s = splnet();
  288         IPFLOW_REMOVE(ipf);
  289         splx(s);
  290         ipflow_addstats(ipf);
  291         RTFREE(ipf->ipf_ro.ro_rt);
  292         ipflow_inuse--;
  293         s = splnet();
  294         pool_put(&ipflow_pool, ipf);
  295         splx(s);
  296 }
  297 
  298 struct ipflow *
  299 ipflow_reap(int just_one)
  300 {
  301         while (just_one || ipflow_inuse > ip_maxflows) {
  302                 struct ipflow *ipf, *maybe_ipf = NULL;
  303                 int s;
  304 
  305                 ipf = LIST_FIRST(&ipflowlist);
  306                 while (ipf != NULL) {
  307                         /*
  308                          * If this no longer points to a valid route
  309                          * reclaim it.
  310                          */
  311                         if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
  312                                 goto done;
  313                         /*
  314                          * choose the one that's been least recently
  315                          * used or has had the least uses in the
  316                          * last 1.5 intervals.
  317                          */
  318                         if (maybe_ipf == NULL ||
  319                             ipf->ipf_timer < maybe_ipf->ipf_timer ||
  320                             (ipf->ipf_timer == maybe_ipf->ipf_timer &&
  321                              ipf->ipf_last_uses + ipf->ipf_uses <
  322                                  maybe_ipf->ipf_last_uses +
  323                                  maybe_ipf->ipf_uses))
  324                                 maybe_ipf = ipf;
  325                         ipf = LIST_NEXT(ipf, ipf_list);
  326                 }
  327                 ipf = maybe_ipf;
  328             done:
  329                 /*
  330                  * Remove the entry from the flow table.
  331                  */
  332                 s = splnet();
  333                 IPFLOW_REMOVE(ipf);
  334                 splx(s);
  335                 ipflow_addstats(ipf);
  336                 RTFREE(ipf->ipf_ro.ro_rt);
  337                 if (just_one)
  338                         return ipf;
  339                 pool_put(&ipflow_pool, ipf);
  340                 ipflow_inuse--;
  341         }
  342         return NULL;
  343 }
  344 
  345 void
  346 ipflow_slowtimo(void)
  347 {
  348         struct ipflow *ipf, *next_ipf;
  349 
  350         for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
  351                 next_ipf = LIST_NEXT(ipf, ipf_list);
  352                 if (PRT_SLOW_ISEXPIRED(ipf->ipf_timer)) {
  353                         ipflow_free(ipf);
  354                 } else {
  355                         ipf->ipf_last_uses = ipf->ipf_uses;
  356                         ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
  357                         ipstat.ips_total += ipf->ipf_uses;
  358                         ipstat.ips_forward += ipf->ipf_uses;
  359                         ipstat.ips_fastforward += ipf->ipf_uses;
  360                         ipf->ipf_uses = 0;
  361                 }
  362         }
  363 }
  364 
  365 void
  366 ipflow_create(const struct route *ro, struct mbuf *m)
  367 {
  368         const struct ip *const ip = mtod(m, struct ip *);
  369         struct ipflow *ipf;
  370         unsigned hash;
  371         int s;
  372 
  373         /*
  374          * Don't create cache entries for ICMP messages.
  375          */
  376         if (ip_maxflows == 0 || ip->ip_p == IPPROTO_ICMP)
  377                 return;
  378         /*
  379          * See if an existing flow struct exists.  If so remove it from it's
  380          * list and free the old route.  If not, try to malloc a new one
  381          * (if we aren't at our limit).
  382          */
  383         ipf = ipflow_lookup(ip);
  384         if (ipf == NULL) {
  385                 if (ipflow_inuse >= ip_maxflows) {
  386                         ipf = ipflow_reap(1);
  387                 } else {
  388                         s = splnet();
  389                         ipf = pool_get(&ipflow_pool, PR_NOWAIT);
  390                         splx(s);
  391                         if (ipf == NULL)
  392                                 return;
  393                         ipflow_inuse++;
  394                 }
  395                 bzero((caddr_t) ipf, sizeof(*ipf));
  396         } else {
  397                 s = splnet();
  398                 IPFLOW_REMOVE(ipf);
  399                 splx(s);
  400                 ipflow_addstats(ipf);
  401                 RTFREE(ipf->ipf_ro.ro_rt);
  402                 ipf->ipf_uses = ipf->ipf_last_uses = 0;
  403                 ipf->ipf_errors = ipf->ipf_dropped = 0;
  404         }
  405 
  406         /*
  407          * Fill in the updated information.
  408          */
  409         ipf->ipf_ro = *ro;
  410         ro->ro_rt->rt_refcnt++;
  411         ipf->ipf_dst = ip->ip_dst;
  412         ipf->ipf_src = ip->ip_src;
  413         ipf->ipf_tos = ip->ip_tos;
  414         PRT_SLOW_ARM(ipf->ipf_timer, IPFLOW_TIMER);
  415         ipf->ipf_start = time_uptime;
  416         /*
  417          * Insert into the approriate bucket of the flow table.
  418          */
  419         hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
  420         s = splnet();
  421         IPFLOW_INSERT(&ipflowtable[hash], ipf);
  422         splx(s);
  423 }
  424 
  425 void
  426 ipflow_invalidate_all(void)
  427 {
  428         struct ipflow *ipf, *next_ipf;
  429         int s;
  430 
  431         s = splnet();
  432         for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
  433                 next_ipf = LIST_NEXT(ipf, ipf_list);
  434                 ipflow_free(ipf);
  435         }
  436         splx(s);
  437 }

Cache object: 2a86633d5f15ffad5efad51779008fdf


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.