The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet6/in6_rmx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $FreeBSD: src/sys/netinet6/in6_rmx.c,v 1.1.2.4 2004/10/06 02:35:17 suz Exp $    */
    2 /*      $DragonFly: src/sys/netinet6/in6_rmx.c,v 1.15 2006/12/22 23:57:53 swildner Exp $        */
    3 /*      $KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $        */
    4 
    5 /*
    6  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    7  * All rights reserved.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. Neither the name of the project nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  */
   33 
   34 /*
   35  * Copyright 1994, 1995 Massachusetts Institute of Technology
   36  *
   37  * Permission to use, copy, modify, and distribute this software and
   38  * its documentation for any purpose and without fee is hereby
   39  * granted, provided that both the above copyright notice and this
   40  * permission notice appear in all copies, that both the above
   41  * copyright notice and this permission notice appear in all
   42  * supporting documentation, and that the name of M.I.T. not be used
   43  * in advertising or publicity pertaining to distribution of the
   44  * software without specific, written prior permission.  M.I.T. makes
   45  * no representations about the suitability of this software for any
   46  * purpose.  It is provided "as is" without express or implied
   47  * warranty.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
   50  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
   51  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
   52  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
   53  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   54  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   55  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   56  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   57  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   58  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   59  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   60  * SUCH DAMAGE.
   61  *
   62  */
   63 
   64 /*
   65  * This code does two things necessary for the enhanced TCP metrics to
   66  * function in a useful manner:
   67  *  1) It marks all non-host routes as `cloning', thus ensuring that
   68  *     every actual reference to such a route actually gets turned
   69  *     into a reference to a host route to the specific destination
   70  *     requested.
   71  *  2) When such routes lose all their references, it arranges for them
   72  *     to be deleted in some random collection of circumstances, so that
   73  *     a large quantity of stale routing data is not kept in kernel memory
   74  *     indefinitely.  See in6_rtqtimo() below for the exact mechanism.
   75  */
   76 
   77 #include <sys/param.h>
   78 #include <sys/systm.h>
   79 #include <sys/kernel.h>
   80 #include <sys/sysctl.h>
   81 #include <sys/queue.h>
   82 #include <sys/socket.h>
   83 #include <sys/socketvar.h>
   84 #include <sys/mbuf.h>
   85 #include <sys/syslog.h>
   86 #include <sys/globaldata.h>
   87 #include <sys/thread2.h>
   88 
   89 #include <net/if.h>
   90 #include <net/route.h>
   91 #include <netinet/in.h>
   92 #include <netinet/ip_var.h>
   93 #include <netinet/in_var.h>
   94 
   95 #include <netinet/ip6.h>
   96 #include <netinet6/ip6_var.h>
   97 
   98 #include <netinet/icmp6.h>
   99 
  100 #include <netinet/tcp.h>
  101 #include <netinet/tcp_seq.h>
  102 #include <netinet/tcp_timer.h>
  103 #include <netinet/tcp_var.h>
  104 
  105 static struct callout   in6_rtqtimo_ch[MAXCPU];
  106 static struct callout   in6_mtutimo_ch[MAXCPU];
  107 
  108 extern int      in6_inithead (void **head, int off);
  109 
  110 #define RTPRF_OURS              RTF_PROTO3      /* set on routes we manage */
  111 
  112 /*
  113  * Do what we need to do when inserting a route.
  114  */
  115 static struct radix_node *
  116 in6_addroute(char *key, char *mask, struct radix_node_head *head,
  117              struct radix_node *treenodes)
  118 {
  119         struct rtentry *rt = (struct rtentry *)treenodes;
  120         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
  121         struct radix_node *ret;
  122 
  123         /*
  124          * For IPv6, all unicast non-host routes are automatically cloning.
  125          */
  126         if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
  127                 rt->rt_flags |= RTF_MULTICAST;
  128 
  129         if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) {
  130                 rt->rt_flags |= RTF_PRCLONING;
  131         }
  132 
  133         /*
  134          * A little bit of help for both IPv6 output and input:
  135          *   For local addresses, we make sure that RTF_LOCAL is set,
  136          *   with the thought that this might one day be used to speed up
  137          *   ip_input().
  138          *
  139          * We also mark routes to multicast addresses as such, because
  140          * it's easy to do and might be useful (but this is much more
  141          * dubious since it's so easy to inspect the address).  (This
  142          * is done above.)
  143          *
  144          * XXX
  145          * should elaborate the code.
  146          */
  147         if (rt->rt_flags & RTF_HOST) {
  148                 if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)
  149                                         ->sin6_addr,
  150                                        &sin6->sin6_addr)) {
  151                         rt->rt_flags |= RTF_LOCAL;
  152                 }
  153         }
  154 
  155         if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
  156             rt->rt_ifp != NULL)
  157                 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
  158 
  159         ret = rn_addroute(key, mask, head, treenodes);
  160         if (ret == NULL && rt->rt_flags & RTF_HOST) {
  161                 struct rtentry *rt2;
  162 
  163                 /*
  164                  * We are trying to add a host route, but can't.
  165                  * Find out if it is because of an
  166                  * ARP entry and delete it if so.
  167                  */
  168                 rt2 = rtpurelookup((struct sockaddr *)sin6);
  169                 if (rt2 != NULL) {
  170                         --rt2->rt_refcnt;
  171                         if (rt2->rt_flags & RTF_LLINFO &&
  172                             rt2->rt_flags & RTF_HOST &&
  173                             rt2->rt_gateway &&
  174                             rt2->rt_gateway->sa_family == AF_LINK) {
  175                                 rtrequest(RTM_DELETE, rt_key(rt2),
  176                                           rt2->rt_gateway, rt_mask(rt2),
  177                                           rt2->rt_flags, NULL);
  178                                 ret = rn_addroute(key, mask, head, treenodes);
  179                         }
  180                 }
  181         } else if (ret == NULL && rt->rt_flags & RTF_CLONING) {
  182                 struct rtentry *rt2;
  183 
  184                 /*
  185                  * We are trying to add a net route, but can't.
  186                  * The following case should be allowed, so we'll make a
  187                  * special check for this:
  188                  *      Two IPv6 addresses with the same prefix is assigned
  189                  *      to a single interrface.
  190                  *      # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
  191                  *      # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
  192                  *      In this case, (*1) and (*2) want to add the same
  193                  *      net route entry, 3ffe:0501:: -> if0.
  194                  *      This case should not raise an error.
  195                  */
  196                 rt2 = rtpurelookup((struct sockaddr *)sin6);
  197                 if (rt2 != NULL) {
  198                         if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY))
  199                                         == RTF_CLONING &&
  200                             rt2->rt_gateway &&
  201                             rt2->rt_gateway->sa_family == AF_LINK &&
  202                             rt2->rt_ifp == rt->rt_ifp) {
  203                                 ret = rt2->rt_nodes;
  204                         }
  205                         --rt2->rt_refcnt;
  206                 }
  207         }
  208         return ret;
  209 }
  210 
  211 /*
  212  * This code is the inverse of in6_clsroute: on first reference, if we
  213  * were managing the route, stop doing so and set the expiration timer
  214  * back off again.
  215  */
  216 static struct radix_node *
  217 in6_matchroute(char *key, struct radix_node_head *head)
  218 {
  219         struct radix_node *rn = rn_match(key, head);
  220         struct rtentry *rt = (struct rtentry *)rn;
  221 
  222         if (rt != NULL && rt->rt_refcnt == 0) { /* this is first reference */
  223                 if (rt->rt_flags & RTPRF_OURS) {
  224                         rt->rt_flags &= ~RTPRF_OURS;
  225                         rt->rt_rmx.rmx_expire = 0;
  226                 }
  227         }
  228         return rn;
  229 }
  230 
  231 SYSCTL_DECL(_net_inet6_ip6);
  232 
  233 static int rtq_reallyold = 60*60;
  234         /* one hour is ``really old'' */
  235 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire,
  236     CTLFLAG_RW, &rtq_reallyold , 0, "Default expiration time on cloned routes");
  237                                 
  238 static int rtq_minreallyold = 10;
  239         /* never automatically crank down to less */
  240 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW,
  241     &rtq_minreallyold , 0, "Minimum time to attempt to hold onto cloned routes");
  242                                 
  243 static int rtq_toomany = 128;
  244         /* 128 cached routes is ``too many'' */
  245 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache,
  246     CTLFLAG_RW, &rtq_toomany , 0, "Upper limit on cloned routes");
  247                                 
  248 
  249 /*
  250  * On last reference drop, mark the route as belong to us so that it can be
  251  * timed out.
  252  */
  253 static void
  254 in6_clsroute(struct radix_node *rn, struct radix_node_head *head)
  255 {
  256         struct rtentry *rt = (struct rtentry *)rn;
  257 
  258         if (!(rt->rt_flags & RTF_UP))
  259                 return;         /* prophylactic measures */
  260 
  261         if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
  262                 return;
  263 
  264         if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED)
  265                 return;
  266 
  267         /*
  268          * As requested by David Greenman:
  269          * If rtq_reallyold is 0, just delete the route without
  270          * waiting for a timeout cycle to kill it.
  271          */
  272         if (rtq_reallyold != 0) {
  273                 rt->rt_flags |= RTPRF_OURS;
  274                 rt->rt_rmx.rmx_expire = time_uptime + rtq_reallyold;
  275         } else {
  276                 /*
  277                  * Remove route from the radix tree, but defer deallocation
  278                  * until we return to rtfree().
  279                  */
  280                 rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt),
  281                           rt->rt_flags, &rt);
  282         }
  283 }
  284 
  285 struct rtqk_arg {
  286         struct radix_node_head *rnh;
  287         int mode;
  288         int updating;
  289         int draining;
  290         int killed;
  291         int found;
  292         time_t nextstop;
  293 };
  294 
  295 /*
  296  * Get rid of old routes.  When draining, this deletes everything, even when
  297  * the timeout is not expired yet.  When updating, this makes sure that
  298  * nothing has a timeout longer than the current value of rtq_reallyold.
  299  */
  300 static int
  301 in6_rtqkill(struct radix_node *rn, void *rock)
  302 {
  303         struct rtqk_arg *ap = rock;
  304         struct rtentry *rt = (struct rtentry *)rn;
  305         int err;
  306 
  307         if (rt->rt_flags & RTPRF_OURS) {
  308                 ap->found++;
  309 
  310                 if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
  311                         if (rt->rt_refcnt > 0)
  312                                 panic("rtqkill route really not free");
  313 
  314                         err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
  315                                         rt_mask(rt), rt->rt_flags, NULL);
  316                         if (err)
  317                                 log(LOG_WARNING, "in6_rtqkill: error %d", err);
  318                         else
  319                                 ap->killed++;
  320                 } else {
  321                         if (ap->updating &&
  322                             (rt->rt_rmx.rmx_expire - time_uptime >
  323                              rtq_reallyold)) {
  324                                 rt->rt_rmx.rmx_expire =
  325                                     time_uptime + rtq_reallyold;
  326                         }
  327                         ap->nextstop = lmin(ap->nextstop,
  328                                             rt->rt_rmx.rmx_expire);
  329                 }
  330         }
  331 
  332         return 0;
  333 }
  334 
  335 #define RTQ_TIMEOUT     60*10   /* run no less than once every ten minutes */
  336 static int rtq_timeout = RTQ_TIMEOUT;
  337 
  338 static void
  339 in6_rtqtimo(void *rock)
  340 {
  341         struct radix_node_head *rnh = rock;
  342         struct rtqk_arg arg;
  343         struct timeval atv;
  344         static time_t last_adjusted_timeout = 0;
  345 
  346         arg.found = arg.killed = 0;
  347         arg.rnh = rnh;
  348         arg.nextstop = time_uptime + rtq_timeout;
  349         arg.draining = arg.updating = 0;
  350         crit_enter();
  351         rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
  352         crit_exit();
  353 
  354         /*
  355          * Attempt to be somewhat dynamic about this:
  356          * If there are ``too many'' routes sitting around taking up space,
  357          * then crank down the timeout, and see if we can't make some more
  358          * go away.  However, we make sure that we will never adjust more
  359          * than once in rtq_timeout seconds, to keep from cranking down too
  360          * hard.
  361          */
  362         if ((arg.found - arg.killed > rtq_toomany)
  363            && (int)(time_uptime - last_adjusted_timeout) >= rtq_timeout
  364            && rtq_reallyold > rtq_minreallyold) {
  365                 rtq_reallyold = 2*rtq_reallyold / 3;
  366                 if (rtq_reallyold < rtq_minreallyold) {
  367                         rtq_reallyold = rtq_minreallyold;
  368                 }
  369 
  370                 last_adjusted_timeout = time_uptime;
  371 #ifdef DIAGNOSTIC
  372                 log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d",
  373                     rtq_reallyold);
  374 #endif
  375                 arg.found = arg.killed = 0;
  376                 arg.updating = 1;
  377                 crit_enter();
  378                 rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
  379                 crit_exit();
  380         }
  381 
  382         atv.tv_usec = 0;
  383         atv.tv_sec = arg.nextstop - time_uptime;
  384         if ((int)atv.tv_sec < 1) {              /* time shift safety */
  385                 atv.tv_sec = 1;
  386                 arg.nextstop = time_uptime + atv.tv_sec;
  387         }
  388         if ((int)atv.tv_sec > rtq_timeout) {    /* time shift safety */
  389                 atv.tv_sec = rtq_timeout;
  390                 arg.nextstop = time_uptime + atv.tv_sec;
  391         }
  392         callout_reset(&in6_rtqtimo_ch[mycpuid], tvtohz_high(&atv),
  393                       in6_rtqtimo, rock);
  394 }
  395 
  396 /*
  397  * Age old PMTUs.
  398  */
  399 struct mtuex_arg {
  400         struct radix_node_head *rnh;
  401         time_t nextstop;
  402 };
  403 
  404 static int
  405 in6_mtuexpire(struct radix_node *rn, void *rock)
  406 {
  407         struct rtentry *rt = (struct rtentry *)rn;
  408         struct mtuex_arg *ap = rock;
  409 
  410         /* sanity */
  411         if (!rt)
  412                 panic("rt == NULL in in6_mtuexpire");
  413 
  414         if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
  415                 if (rt->rt_rmx.rmx_expire <= time_uptime) {
  416                         rt->rt_flags |= RTF_PROBEMTU;
  417                 } else {
  418                         ap->nextstop = lmin(ap->nextstop,
  419                                         rt->rt_rmx.rmx_expire);
  420                 }
  421         }
  422 
  423         return 0;
  424 }
  425 
  426 #define MTUTIMO_DEFAULT (60*1)
  427 
  428 static void
  429 in6_mtutimo(void *rock)
  430 {
  431         struct radix_node_head *rnh = rock;
  432         struct mtuex_arg arg;
  433         struct timeval atv;
  434 
  435         arg.rnh = rnh;
  436         arg.nextstop = time_uptime + MTUTIMO_DEFAULT;
  437         crit_enter();
  438         rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
  439         crit_exit();
  440 
  441         atv.tv_usec = 0;
  442         atv.tv_sec = arg.nextstop - time_uptime;
  443         if ((int)atv.tv_sec < 1) {              /* time shift safety */
  444                 atv.tv_sec = 1;
  445                 arg.nextstop = time_uptime + atv.tv_sec;
  446         }
  447         if ((int)atv.tv_sec > rtq_timeout) {    /* time shift safety */
  448                 atv.tv_sec = rtq_timeout;
  449                 arg.nextstop = time_uptime + atv.tv_sec;
  450         }
  451         callout_reset(&in6_mtutimo_ch[mycpuid], tvtohz_high(&atv),
  452                       in6_mtutimo, rock);
  453 }
  454 
  455 #if 0
  456 void
  457 in6_rtqdrain(void)
  458 {
  459         struct radix_node_head *rnh = rt_tables[mycpuid][AF_INET6];
  460         struct rtqk_arg arg;
  461 
  462         arg.found = arg.killed = 0;
  463         arg.rnh = rnh;
  464         arg.nextstop = 0;
  465         arg.draining = 1;
  466         arg.updating = 0;
  467         crit_enter();
  468         rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
  469         crit_exit();
  470 }
  471 #endif
  472 
  473 /*
  474  * Initialize our routing tree.
  475  */
  476 int
  477 in6_inithead(void **head, int off)
  478 {
  479         struct radix_node_head *rnh;
  480 
  481         if (!rn_inithead(head, rn_cpumaskhead(mycpuid), off))
  482                 return 0;
  483 
  484         if (head != (void **)&rt_tables[mycpuid][AF_INET6]) /* BOGUS! */
  485                 return 1;       /* only do this for the real routing table */
  486 
  487         rnh = *head;
  488         rnh->rnh_addaddr = in6_addroute;
  489         rnh->rnh_matchaddr = in6_matchroute;
  490         rnh->rnh_close = in6_clsroute;
  491         callout_init(&in6_mtutimo_ch[mycpuid]);
  492         callout_init(&in6_rtqtimo_ch[mycpuid]);
  493         in6_rtqtimo(rnh);       /* kick off timeout first time */
  494         in6_mtutimo(rnh);       /* kick off timeout first time */
  495         return 1;
  496 }

Cache object: 839317648f51c41124235d515667a24e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.