The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/in_rmx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright 1994, 1995 Massachusetts Institute of Technology
    3  *
    4  * Permission to use, copy, modify, and distribute this software and
    5  * its documentation for any purpose and without fee is hereby
    6  * granted, provided that both the above copyright notice and this
    7  * permission notice appear in all copies, that both the above
    8  * copyright notice and this permission notice appear in all
    9  * supporting documentation, and that the name of M.I.T. not be used
   10  * in advertising or publicity pertaining to distribution of the
   11  * software without specific, written prior permission.  M.I.T. makes
   12  * no representations about the suitability of this software for any
   13  * purpose.  It is provided "as is" without express or implied
   14  * warranty.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
   17  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
   18  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
   19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
   20  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   23  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  * $FreeBSD$
   30  */
   31 
   32 /*
   33  * This code does two things necessary for the enhanced TCP metrics to
   34  * function in a useful manner:
   35  *  1) It marks all non-host routes as `cloning', thus ensuring that
   36  *     every actual reference to such a route actually gets turned
   37  *     into a reference to a host route to the specific destination
   38  *     requested.
   39  *  2) When such routes lose all their references, it arranges for them
   40  *     to be deleted in some random collection of circumstances, so that
   41  *     a large quantity of stale routing data is not kept in kernel memory
   42  *     indefinitely.  See in_rtqtimo() below for the exact mechanism.
   43  */
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/kernel.h>
   48 #include <sys/sysctl.h>
   49 #include <sys/socket.h>
   50 #include <sys/mbuf.h>
   51 #include <sys/syslog.h>
   52 
   53 #include <net/if.h>
   54 #include <net/route.h>
   55 #include <netinet/in.h>
   56 #include <netinet/in_var.h>
   57 
   58 extern int      in_inithead __P((void **head, int off));
   59 
   60 #define RTPRF_OURS              RTF_PROTO3      /* set on routes we manage */
   61 
   62 /*
   63  * Do what we need to do when inserting a route.
   64  */
   65 static struct radix_node *
   66 in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
   67             struct radix_node *treenodes)
   68 {
   69         struct rtentry *rt = (struct rtentry *)treenodes;
   70         struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
   71         struct radix_node *ret;
   72 
   73         /*
   74          * For IP, all unicast non-host routes are automatically cloning.
   75          */
   76         if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
   77                 rt->rt_flags |= RTF_MULTICAST;
   78 
   79         if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) {
   80                 rt->rt_flags |= RTF_PRCLONING;
   81         }
   82 
   83         /*
   84          * A little bit of help for both IP output and input:
   85          *   For host routes, we make sure that RTF_BROADCAST
   86          *   is set for anything that looks like a broadcast address.
   87          *   This way, we can avoid an expensive call to in_broadcast()
   88          *   in ip_output() most of the time (because the route passed
   89          *   to ip_output() is almost always a host route).
   90          *
   91          *   We also do the same for local addresses, with the thought
   92          *   that this might one day be used to speed up ip_input().
   93          *
   94          * We also mark routes to multicast addresses as such, because
   95          * it's easy to do and might be useful (but this is much more
   96          * dubious since it's so easy to inspect the address).  (This
   97          * is done above.)
   98          */
   99         if (rt->rt_flags & RTF_HOST) {
  100                 if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
  101                         rt->rt_flags |= RTF_BROADCAST;
  102                 } else {
  103 #define satosin(sa) ((struct sockaddr_in *)sa)
  104                         if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr
  105                             == sin->sin_addr.s_addr)
  106                                 rt->rt_flags |= RTF_LOCAL;
  107 #undef satosin
  108                 }
  109         }
  110 
  111         if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) 
  112             && rt->rt_ifp)
  113                 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
  114 
  115         ret = rn_addroute(v_arg, n_arg, head, treenodes);
  116         if (ret == NULL && rt->rt_flags & RTF_HOST) {
  117                 struct rtentry *rt2;
  118                 /*
  119                  * We are trying to add a host route, but can't.
  120                  * Find out if it is because of an
  121                  * ARP entry and delete it if so.
  122                  */
  123                 rt2 = rtalloc1((struct sockaddr *)sin, 0,
  124                                 RTF_CLONING | RTF_PRCLONING);
  125                 if (rt2) {
  126                         if (rt2->rt_flags & RTF_LLINFO &&
  127                                 rt2->rt_flags & RTF_HOST &&
  128                                 rt2->rt_gateway &&
  129                                 rt2->rt_gateway->sa_family == AF_LINK) {
  130                                 rtrequest(RTM_DELETE,
  131                                           (struct sockaddr *)rt_key(rt2),
  132                                           rt2->rt_gateway,
  133                                           rt_mask(rt2), rt2->rt_flags, 0);
  134                                 ret = rn_addroute(v_arg, n_arg, head,
  135                                         treenodes);
  136                         }
  137                         RTFREE(rt2);
  138                 }
  139         }
  140         return ret;
  141 }
  142 
  143 /*
  144  * This code is the inverse of in_clsroute: on first reference, if we
  145  * were managing the route, stop doing so and set the expiration timer
  146  * back off again.
  147  */
  148 static struct radix_node *
  149 in_matroute(void *v_arg, struct radix_node_head *head)
  150 {
  151         struct radix_node *rn = rn_match(v_arg, head);
  152         struct rtentry *rt = (struct rtentry *)rn;
  153 
  154         if(rt && rt->rt_refcnt == 0) { /* this is first reference */
  155                 if(rt->rt_flags & RTPRF_OURS) {
  156                         rt->rt_flags &= ~RTPRF_OURS;
  157                         rt->rt_rmx.rmx_expire = 0;
  158                 }
  159         }
  160         return rn;
  161 }
  162 
  163 static int rtq_reallyold = 60*60;
  164         /* one hour is ``really old'' */
  165 SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire,
  166         CTLFLAG_RW, &rtq_reallyold , 0, "");
  167                                    
  168 static int rtq_minreallyold = 10;
  169         /* never automatically crank down to less */
  170 SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire,
  171         CTLFLAG_RW, &rtq_minreallyold , 0, "");
  172                                    
  173 static int rtq_toomany = 128;
  174         /* 128 cached routes is ``too many'' */
  175 SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache,
  176         CTLFLAG_RW, &rtq_toomany , 0, "");
  177                                    
  178 
  179 /*
  180  * On last reference drop, mark the route as belong to us so that it can be
  181  * timed out.
  182  */
  183 static void
  184 in_clsroute(struct radix_node *rn, struct radix_node_head *head)
  185 {
  186         struct rtentry *rt = (struct rtentry *)rn;
  187 
  188         if(!(rt->rt_flags & RTF_UP))
  189                 return;         /* prophylactic measures */
  190 
  191         if((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
  192                 return;
  193 
  194         if((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS))
  195            != RTF_WASCLONED)
  196                 return;
  197 
  198         /*
  199          * As requested by David Greenman:
  200          * If rtq_reallyold is 0, just delete the route without
  201          * waiting for a timeout cycle to kill it.
  202          */
  203         if(rtq_reallyold != 0) {
  204                 rt->rt_flags |= RTPRF_OURS;
  205                 rt->rt_rmx.rmx_expire = time_second + rtq_reallyold;
  206         } else {
  207                 rtrequest(RTM_DELETE,
  208                           (struct sockaddr *)rt_key(rt),
  209                           rt->rt_gateway, rt_mask(rt),
  210                           rt->rt_flags, 0);
  211         }
  212 }
  213 
  214 struct rtqk_arg {
  215         struct radix_node_head *rnh;
  216         int draining;
  217         int killed;
  218         int found;
  219         int updating;
  220         time_t nextstop;
  221 };
  222 
  223 /*
  224  * Get rid of old routes.  When draining, this deletes everything, even when
  225  * the timeout is not expired yet.  When updating, this makes sure that
  226  * nothing has a timeout longer than the current value of rtq_reallyold.
  227  */
  228 static int
  229 in_rtqkill(struct radix_node *rn, void *rock)
  230 {
  231         struct rtqk_arg *ap = rock;
  232         struct rtentry *rt = (struct rtentry *)rn;
  233         int err;
  234 
  235         if(rt->rt_flags & RTPRF_OURS) {
  236                 ap->found++;
  237 
  238                 if(ap->draining || rt->rt_rmx.rmx_expire <= time_second) {
  239                         if(rt->rt_refcnt > 0)
  240                                 panic("rtqkill route really not free");
  241 
  242                         err = rtrequest(RTM_DELETE,
  243                                         (struct sockaddr *)rt_key(rt),
  244                                         rt->rt_gateway, rt_mask(rt),
  245                                         rt->rt_flags, 0);
  246                         if(err) {
  247                                 log(LOG_WARNING, "in_rtqkill: error %d\n", err);
  248                         } else {
  249                                 ap->killed++;
  250                         }
  251                 } else {
  252                         if(ap->updating
  253                            && (rt->rt_rmx.rmx_expire - time_second
  254                                > rtq_reallyold)) {
  255                                 rt->rt_rmx.rmx_expire = time_second
  256                                         + rtq_reallyold;
  257                         }
  258                         ap->nextstop = lmin(ap->nextstop,
  259                                             rt->rt_rmx.rmx_expire);
  260                 }
  261         }
  262 
  263         return 0;
  264 }
  265 
  266 #define RTQ_TIMEOUT     60*10   /* run no less than once every ten minutes */
  267 static int rtq_timeout = RTQ_TIMEOUT;
  268 
  269 static void
  270 in_rtqtimo(void *rock)
  271 {
  272         struct radix_node_head *rnh = rock;
  273         struct rtqk_arg arg;
  274         struct timeval atv;
  275         static time_t last_adjusted_timeout = 0;
  276         int s;
  277 
  278         arg.found = arg.killed = 0;
  279         arg.rnh = rnh;
  280         arg.nextstop = time_second + rtq_timeout;
  281         arg.draining = arg.updating = 0;
  282         s = splnet();
  283         rnh->rnh_walktree(rnh, in_rtqkill, &arg);
  284         splx(s);
  285 
  286         /*
  287          * Attempt to be somewhat dynamic about this:
  288          * If there are ``too many'' routes sitting around taking up space,
  289          * then crank down the timeout, and see if we can't make some more
  290          * go away.  However, we make sure that we will never adjust more
  291          * than once in rtq_timeout seconds, to keep from cranking down too
  292          * hard.
  293          */
  294         if((arg.found - arg.killed > rtq_toomany)
  295            && (time_second - last_adjusted_timeout >= rtq_timeout)
  296            && rtq_reallyold > rtq_minreallyold) {
  297                 rtq_reallyold = 2*rtq_reallyold / 3;
  298                 if(rtq_reallyold < rtq_minreallyold) {
  299                         rtq_reallyold = rtq_minreallyold;
  300                 }
  301 
  302                 last_adjusted_timeout = time_second;
  303 #ifdef DIAGNOSTIC
  304                 log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
  305                     rtq_reallyold);
  306 #endif
  307                 arg.found = arg.killed = 0;
  308                 arg.updating = 1;
  309                 s = splnet();
  310                 rnh->rnh_walktree(rnh, in_rtqkill, &arg);
  311                 splx(s);
  312         }
  313 
  314         atv.tv_usec = 0;
  315         atv.tv_sec = arg.nextstop - time_second;
  316         timeout(in_rtqtimo, rock, tvtohz(&atv));
  317 }
  318 
  319 void
  320 in_rtqdrain(void)
  321 {
  322         struct radix_node_head *rnh = rt_tables[AF_INET];
  323         struct rtqk_arg arg;
  324         int s;
  325         arg.found = arg.killed = 0;
  326         arg.rnh = rnh;
  327         arg.nextstop = 0;
  328         arg.draining = 1;
  329         arg.updating = 0;
  330         s = splnet();
  331         rnh->rnh_walktree(rnh, in_rtqkill, &arg);
  332         splx(s);
  333 }
  334 
  335 /*
  336  * Initialize our routing tree.
  337  */
  338 int
  339 in_inithead(void **head, int off)
  340 {
  341         struct radix_node_head *rnh;
  342 
  343         if(!rn_inithead(head, off))
  344                 return 0;
  345 
  346         if(head != (void **)&rt_tables[AF_INET]) /* BOGUS! */
  347                 return 1;       /* only do this for the real routing table */
  348 
  349         rnh = *head;
  350         rnh->rnh_addaddr = in_addroute;
  351         rnh->rnh_matchaddr = in_matroute;
  352         rnh->rnh_close = in_clsroute;
  353         in_rtqtimo(rnh);        /* kick off timeout first time */
  354         return 1;
  355 }
  356 
  357 
  358 /*
  359  * This zaps old routes when the interface goes down.
  360  * Currently it doesn't delete static routes; there are
  361  * arguments one could make for both behaviors.  For the moment,
  362  * we will adopt the Principle of Least Surprise and leave them
  363  * alone (with the knowledge that this will not be enough for some
  364  * people).  The ones we really want to get rid of are things like ARP
  365  * entries, since the user might down the interface, walk over to a completely
  366  * different network, and plug back in.
  367  */
  368 struct in_ifadown_arg {
  369         struct radix_node_head *rnh;
  370         struct ifaddr *ifa;
  371 };
  372 
  373 static int
  374 in_ifadownkill(struct radix_node *rn, void *xap)
  375 {
  376         struct in_ifadown_arg *ap = xap;
  377         struct rtentry *rt = (struct rtentry *)rn;
  378         int err;
  379 
  380         if (rt->rt_ifa == ap->ifa && !(rt->rt_flags & RTF_STATIC)) {
  381                 /*
  382                  * We need to disable the automatic prune that happens
  383                  * in this case in rtrequest() because it will blow
  384                  * away the pointers that rn_walktree() needs in order
  385                  * continue our descent.  We will end up deleting all
  386                  * the routes that rtrequest() would have in any case,
  387                  * so that behavior is not needed there.
  388                  */
  389                 rt->rt_flags &= ~RTF_PRCLONING;
  390                 err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt),
  391                                 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
  392                 if (err) {
  393                         log(LOG_WARNING, "in_ifadownkill: error %d\n", err);
  394                 }
  395         }
  396         return 0;
  397 }
  398 
  399 int
  400 in_ifadown(struct ifaddr *ifa)
  401 {
  402         struct in_ifadown_arg arg;
  403         struct radix_node_head *rnh;
  404 
  405         if (ifa->ifa_addr->sa_family != AF_INET)
  406                 return 1;
  407 
  408         arg.rnh = rnh = rt_tables[AF_INET];
  409         arg.ifa = ifa;
  410         rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
  411         ifa->ifa_flags &= ~IFA_ROUTE;
  412         return 0;
  413 }

Cache object: 3ad968e0f3be1166be369d0067deae24


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.