The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/route.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1980, 1986, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)route.c     8.3.1.1 (Berkeley) 2/23/95
   30  * $FreeBSD: releng/8.1/sys/net/route.c 209524 2010-06-25 21:26:34Z qingli $
   31  */
   32 /************************************************************************
   33  * Note: In this file a 'fib' is a "forwarding information base"        *
   34  * Which is the new name for an in kernel routing (next hop) table.     *
   35  ***********************************************************************/
   36 
   37 #include "opt_inet.h"
   38 #include "opt_route.h"
   39 #include "opt_mrouting.h"
   40 #include "opt_mpath.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/syslog.h>
   45 #include <sys/malloc.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/socket.h>
   48 #include <sys/sysctl.h>
   49 #include <sys/syslog.h>
   50 #include <sys/sysproto.h>
   51 #include <sys/proc.h>
   52 #include <sys/domain.h>
   53 #include <sys/kernel.h>
   54 
   55 #include <net/if.h>
   56 #include <net/if_dl.h>
   57 #include <net/route.h>
   58 #include <net/vnet.h>
   59 #include <net/flowtable.h>
   60 
   61 #ifdef RADIX_MPATH
   62 #include <net/radix_mpath.h>
   63 #endif
   64 
   65 #include <netinet/in.h>
   66 #include <netinet/ip_mroute.h>
   67 
   68 #include <vm/uma.h>
   69 
   70 u_int rt_numfibs = RT_NUMFIBS;
   71 SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
   72 /*
   73  * Allow the boot code to allow LESS than RT_MAXFIBS to be used.
   74  * We can't do more because storage is statically allocated for now.
   75  * (for compatibility reasons.. this will change).
   76  */
   77 TUNABLE_INT("net.fibs", &rt_numfibs);
   78 
   79 /*
   80  * By default add routes to all fibs for new interfaces.
   81  * Once this is set to 0 then only allocate routes on interface
   82  * changes for the FIB of the caller when adding a new set of addresses
   83  * to an interface.  XXX this is a shotgun aproach to a problem that needs
   84  * a more fine grained solution.. that will come.
   85  */
   86 u_int rt_add_addr_allfibs = 1;
   87 SYSCTL_INT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
   88     &rt_add_addr_allfibs, 0, "");
   89 TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);
   90 
   91 VNET_DEFINE(struct rtstat, rtstat);
   92 #define V_rtstat        VNET(rtstat)
   93 
   94 VNET_DEFINE(struct radix_node_head *, rt_tables);
   95 #define V_rt_tables     VNET(rt_tables)
   96 
   97 VNET_DEFINE(int, rttrash);              /* routes not in table but not freed */
   98 #define V_rttrash       VNET(rttrash)
   99 
  100 
  101 /* compare two sockaddr structures */
  102 #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
  103 
  104 /*
  105  * Convert a 'struct radix_node *' to a 'struct rtentry *'.
  106  * The operation can be done safely (in this code) because a
  107  * 'struct rtentry' starts with two 'struct radix_node''s, the first
  108  * one representing leaf nodes in the routing tree, which is
  109  * what the code in radix.c passes us as a 'struct radix_node'.
  110  *
  111  * But because there are a lot of assumptions in this conversion,
  112  * do not cast explicitly, but always use the macro below.
  113  */
  114 #define RNTORT(p)       ((struct rtentry *)(p))
  115 
  116 static VNET_DEFINE(uma_zone_t, rtzone);         /* Routing table UMA zone. */
  117 #define V_rtzone        VNET(rtzone)
  118 
  119 #if 0
  120 /* default fib for tunnels to use */
  121 u_int tunnel_fib = 0;
  122 SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, "");
  123 #endif
  124 
  125 /*
  126  * handler for net.my_fibnum
  127  */
  128 static int
  129 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
  130 {
  131         int fibnum;
  132         int error;
  133  
  134         fibnum = curthread->td_proc->p_fibnum;
  135         error = sysctl_handle_int(oidp, &fibnum, 0, req);
  136         return (error);
  137 }
  138 
  139 SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
  140             NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
  141 
  142 static __inline struct radix_node_head **
  143 rt_tables_get_rnh_ptr(int table, int fam)
  144 {
  145         struct radix_node_head **rnh;
  146 
  147         KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.",
  148             __func__));
  149         KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.",
  150             __func__));
  151 
  152         /* rnh is [fib=0][af=0]. */
  153         rnh = (struct radix_node_head **)V_rt_tables;
  154         /* Get the offset to the requested table and fam. */
  155         rnh += table * (AF_MAX+1) + fam;
  156 
  157         return (rnh);
  158 }
  159 
  160 struct radix_node_head *
  161 rt_tables_get_rnh(int table, int fam)
  162 {
  163 
  164         return (*rt_tables_get_rnh_ptr(table, fam));
  165 }
  166 
  167 /*
  168  * route initialization must occur before ip6_init2(), which happenas at
  169  * SI_ORDER_MIDDLE.
  170  */
  171 static void
  172 route_init(void)
  173 {
  174         struct domain *dom;
  175         int max_keylen = 0;
  176 
  177         /* whack the tunable ints into  line. */
  178         if (rt_numfibs > RT_MAXFIBS)
  179                 rt_numfibs = RT_MAXFIBS;
  180         if (rt_numfibs == 0)
  181                 rt_numfibs = 1;
  182 
  183         for (dom = domains; dom; dom = dom->dom_next)
  184                 if (dom->dom_maxrtkey > max_keylen)
  185                         max_keylen = dom->dom_maxrtkey;
  186 
  187         rn_init(max_keylen);    /* init all zeroes, all ones, mask table */
  188 }
  189 SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
  190 
  191 static void
  192 vnet_route_init(const void *unused __unused)
  193 {
  194         struct domain *dom;
  195         struct radix_node_head **rnh;
  196         int table;
  197         int fam;
  198 
  199         V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
  200             sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO);
  201 
  202         V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
  203             NULL, NULL, UMA_ALIGN_PTR, 0);
  204         for (dom = domains; dom; dom = dom->dom_next) {
  205                 if (dom->dom_rtattach)  {
  206                         for  (table = 0; table < rt_numfibs; table++) {
  207                                 if ( (fam = dom->dom_family) == AF_INET ||
  208                                     table == 0) {
  209                                         /* for now only AF_INET has > 1 table */
  210                                         /* XXX MRT 
  211                                          * rtattach will be also called
  212                                          * from vfs_export.c but the
  213                                          * offset will be 0
  214                                          * (only for AF_INET and AF_INET6
  215                                          * which don't need it anyhow)
  216                                          */
  217                                         rnh = rt_tables_get_rnh_ptr(table, fam);
  218                                         if (rnh == NULL)
  219                                                 panic("%s: rnh NULL", __func__);
  220                                         dom->dom_rtattach((void **)rnh,
  221                                             dom->dom_rtoffset);
  222                                 } else {
  223                                         break;
  224                                 }
  225                         }
  226                 }
  227         }
  228 }
  229 VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
  230     vnet_route_init, 0);
  231 
  232 #ifdef VIMAGE
  233 static void
  234 vnet_route_uninit(const void *unused __unused)
  235 {
  236         int table;
  237         int fam;
  238         struct domain *dom;
  239         struct radix_node_head **rnh;
  240 
  241         for (dom = domains; dom; dom = dom->dom_next) {
  242                 if (dom->dom_rtdetach) {
  243                         for (table = 0; table < rt_numfibs; table++) {
  244                                 if ( (fam = dom->dom_family) == AF_INET ||
  245                                     table == 0) {
  246                                         /* For now only AF_INET has > 1 tbl. */
  247                                         rnh = rt_tables_get_rnh_ptr(table, fam);
  248                                         if (rnh == NULL)
  249                                                 panic("%s: rnh NULL", __func__);
  250                                         dom->dom_rtdetach((void **)rnh,
  251                                             dom->dom_rtoffset);
  252                                 } else {
  253                                         break;
  254                                 }
  255                         }
  256                 }
  257         }
  258 }
  259 VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
  260     vnet_route_uninit, 0);
  261 #endif
  262 
  263 #ifndef _SYS_SYSPROTO_H_
  264 struct setfib_args {
  265         int     fibnum;
  266 };
  267 #endif
  268 int
  269 setfib(struct thread *td, struct setfib_args *uap)
  270 {
  271         if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
  272                 return EINVAL;
  273         td->td_proc->p_fibnum = uap->fibnum;
  274         return (0);
  275 }
  276 
  277 /*
  278  * Packet routing routines.
  279  */
  280 void
  281 rtalloc(struct route *ro)
  282 {
  283         rtalloc_ign_fib(ro, 0UL, 0);
  284 }
  285 
  286 void
  287 rtalloc_fib(struct route *ro, u_int fibnum)
  288 {
  289         rtalloc_ign_fib(ro, 0UL, fibnum);
  290 }
  291 
  292 void
  293 rtalloc_ign(struct route *ro, u_long ignore)
  294 {
  295         struct rtentry *rt;
  296 
  297         if ((rt = ro->ro_rt) != NULL) {
  298                 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
  299                         return;
  300                 RTFREE(rt);
  301                 ro->ro_rt = NULL;
  302         }
  303         ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0);
  304         if (ro->ro_rt)
  305                 RT_UNLOCK(ro->ro_rt);
  306 }
  307 
  308 void
  309 rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
  310 {
  311         struct rtentry *rt;
  312 
  313         if ((rt = ro->ro_rt) != NULL) {
  314                 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
  315                         return;
  316                 RTFREE(rt);
  317                 ro->ro_rt = NULL;
  318         }
  319         ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
  320         if (ro->ro_rt)
  321                 RT_UNLOCK(ro->ro_rt);
  322 }
  323 
  324 /*
  325  * Look up the route that matches the address given
  326  * Or, at least try.. Create a cloned route if needed.
  327  *
  328  * The returned route, if any, is locked.
  329  */
  330 struct rtentry *
  331 rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
  332 {
  333         return (rtalloc1_fib(dst, report, ignflags, 0));
  334 }
  335 
  336 struct rtentry *
  337 rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
  338                     u_int fibnum)
  339 {
  340         struct radix_node_head *rnh;
  341         struct rtentry *rt;
  342         struct radix_node *rn;
  343         struct rtentry *newrt;
  344         struct rt_addrinfo info;
  345         int err = 0, msgtype = RTM_MISS;
  346         int needlock;
  347 
  348         KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
  349         if (dst->sa_family != AF_INET)  /* Only INET supports > 1 fib now */
  350                 fibnum = 0;
  351         rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
  352         newrt = NULL;
  353         /*
  354          * Look up the address in the table for that Address Family
  355          */
  356         if (rnh == NULL) {
  357                 V_rtstat.rts_unreach++;
  358                 goto miss;
  359         }
  360         needlock = !(ignflags & RTF_RNH_LOCKED);
  361         if (needlock)
  362                 RADIX_NODE_HEAD_RLOCK(rnh);
  363 #ifdef INVARIANTS       
  364         else
  365                 RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
  366 #endif
  367         rn = rnh->rnh_matchaddr(dst, rnh);
  368         if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
  369                 newrt = rt = RNTORT(rn);
  370                 RT_LOCK(newrt);
  371                 RT_ADDREF(newrt);
  372                 if (needlock)
  373                         RADIX_NODE_HEAD_RUNLOCK(rnh);
  374                 goto done;
  375 
  376         } else if (needlock)
  377                 RADIX_NODE_HEAD_RUNLOCK(rnh);
  378         
  379         /*
  380          * Either we hit the root or couldn't find any match,
  381          * Which basically means
  382          * "caint get there frm here"
  383          */
  384         V_rtstat.rts_unreach++;
  385 miss:
  386         if (report) {
  387                 /*
  388                  * If required, report the failure to the supervising
  389                  * Authorities.
  390                  * For a delete, this is not an error. (report == 0)
  391                  */
  392                 bzero(&info, sizeof(info));
  393                 info.rti_info[RTAX_DST] = dst;
  394                 rt_missmsg(msgtype, &info, 0, err);
  395         }       
  396 done:
  397         if (newrt)
  398                 RT_LOCK_ASSERT(newrt);
  399         return (newrt);
  400 }
  401 
  402 /*
  403  * Remove a reference count from an rtentry.
  404  * If the count gets low enough, take it out of the routing table
  405  */
  406 void
  407 rtfree(struct rtentry *rt)
  408 {
  409         struct radix_node_head *rnh;
  410 
  411         KASSERT(rt != NULL,("%s: NULL rt", __func__));
  412         rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
  413         KASSERT(rnh != NULL,("%s: NULL rnh", __func__));
  414 
  415         RT_LOCK_ASSERT(rt);
  416 
  417         /*
  418          * The callers should use RTFREE_LOCKED() or RTFREE(), so
  419          * we should come here exactly with the last reference.
  420          */
  421         RT_REMREF(rt);
  422         if (rt->rt_refcnt > 0) {
  423                 log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt);
  424                 goto done;
  425         }
  426 
  427         /*
  428          * On last reference give the "close method" a chance
  429          * to cleanup private state.  This also permits (for
  430          * IPv4 and IPv6) a chance to decide if the routing table
  431          * entry should be purged immediately or at a later time.
  432          * When an immediate purge is to happen the close routine
  433          * typically calls rtexpunge which clears the RTF_UP flag
  434          * on the entry so that the code below reclaims the storage.
  435          */
  436         if (rt->rt_refcnt == 0 && rnh->rnh_close)
  437                 rnh->rnh_close((struct radix_node *)rt, rnh);
  438 
  439         /*
  440          * If we are no longer "up" (and ref == 0)
  441          * then we can free the resources associated
  442          * with the route.
  443          */
  444         if ((rt->rt_flags & RTF_UP) == 0) {
  445                 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
  446                         panic("rtfree 2");
  447                 /*
  448                  * the rtentry must have been removed from the routing table
  449                  * so it is represented in rttrash.. remove that now.
  450                  */
  451                 V_rttrash--;
  452 #ifdef  DIAGNOSTIC
  453                 if (rt->rt_refcnt < 0) {
  454                         printf("rtfree: %p not freed (neg refs)\n", rt);
  455                         goto done;
  456                 }
  457 #endif
  458                 /*
  459                  * release references on items we hold them on..
  460                  * e.g other routes and ifaddrs.
  461                  */
  462                 if (rt->rt_ifa)
  463                         ifa_free(rt->rt_ifa);
  464                 /*
  465                  * The key is separatly alloc'd so free it (see rt_setgate()).
  466                  * This also frees the gateway, as they are always malloc'd
  467                  * together.
  468                  */
  469                 Free(rt_key(rt));
  470 
  471                 /*
  472                  * and the rtentry itself of course
  473                  */
  474                 RT_LOCK_DESTROY(rt);
  475                 uma_zfree(V_rtzone, rt);
  476                 return;
  477         }
  478 done:
  479         RT_UNLOCK(rt);
  480 }
  481 
  482 
  483 /*
  484  * Force a routing table entry to the specified
  485  * destination to go through the given gateway.
  486  * Normally called as a result of a routing redirect
  487  * message from the network layer.
  488  */
  489 void
  490 rtredirect(struct sockaddr *dst,
  491         struct sockaddr *gateway,
  492         struct sockaddr *netmask,
  493         int flags,
  494         struct sockaddr *src)
  495 {
  496         rtredirect_fib(dst, gateway, netmask, flags, src, 0);
  497 }
  498 
  499 void
  500 rtredirect_fib(struct sockaddr *dst,
  501         struct sockaddr *gateway,
  502         struct sockaddr *netmask,
  503         int flags,
  504         struct sockaddr *src,
  505         u_int fibnum)
  506 {
  507         struct rtentry *rt, *rt0 = NULL;
  508         int error = 0;
  509         short *stat = NULL;
  510         struct rt_addrinfo info;
  511         struct ifaddr *ifa;
  512         struct radix_node_head *rnh;
  513 
  514         ifa = NULL;
  515         rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
  516         if (rnh == NULL) {
  517                 error = EAFNOSUPPORT;
  518                 goto out;
  519         }
  520 
  521         /* verify the gateway is directly reachable */
  522         if ((ifa = ifa_ifwithnet(gateway, 0)) == NULL) {
  523                 error = ENETUNREACH;
  524                 goto out;
  525         }
  526         rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */
  527         /*
  528          * If the redirect isn't from our current router for this dst,
  529          * it's either old or wrong.  If it redirects us to ourselves,
  530          * we have a routing loop, perhaps as a result of an interface
  531          * going down recently.
  532          */
  533         if (!(flags & RTF_DONE) && rt &&
  534              (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
  535                 error = EINVAL;
  536         else if (ifa_ifwithaddr_check(gateway))
  537                 error = EHOSTUNREACH;
  538         if (error)
  539                 goto done;
  540         /*
  541          * Create a new entry if we just got back a wildcard entry
  542          * or the the lookup failed.  This is necessary for hosts
  543          * which use routing redirects generated by smart gateways
  544          * to dynamically build the routing tables.
  545          */
  546         if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
  547                 goto create;
  548         /*
  549          * Don't listen to the redirect if it's
  550          * for a route to an interface.
  551          */
  552         if (rt->rt_flags & RTF_GATEWAY) {
  553                 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
  554                         /*
  555                          * Changing from route to net => route to host.
  556                          * Create new route, rather than smashing route to net.
  557                          */
  558                 create:
  559                         rt0 = rt;
  560                         rt = NULL;
  561                 
  562                         flags |=  RTF_GATEWAY | RTF_DYNAMIC;
  563                         bzero((caddr_t)&info, sizeof(info));
  564                         info.rti_info[RTAX_DST] = dst;
  565                         info.rti_info[RTAX_GATEWAY] = gateway;
  566                         info.rti_info[RTAX_NETMASK] = netmask;
  567                         info.rti_ifa = ifa;
  568                         info.rti_flags = flags;
  569                         if (rt0 != NULL)
  570                                 RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */
  571                         error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
  572                         if (rt != NULL) {
  573                                 RT_LOCK(rt);
  574                                 if (rt0 != NULL)
  575                                         EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
  576                                 flags = rt->rt_flags;
  577                         }
  578                         if (rt0 != NULL)
  579                                 RTFREE(rt0);
  580                         
  581                         stat = &V_rtstat.rts_dynamic;
  582                 } else {
  583                         struct rtentry *gwrt;
  584 
  585                         /*
  586                          * Smash the current notion of the gateway to
  587                          * this destination.  Should check about netmask!!!
  588                          */
  589                         rt->rt_flags |= RTF_MODIFIED;
  590                         flags |= RTF_MODIFIED;
  591                         stat = &V_rtstat.rts_newgateway;
  592                         /*
  593                          * add the key and gateway (in one malloc'd chunk).
  594                          */
  595                         RT_UNLOCK(rt);
  596                         RADIX_NODE_HEAD_LOCK(rnh);
  597                         RT_LOCK(rt);
  598                         rt_setgate(rt, rt_key(rt), gateway);
  599                         gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED);
  600                         RADIX_NODE_HEAD_UNLOCK(rnh);
  601                         EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
  602                         RTFREE_LOCKED(gwrt);
  603                 }
  604         } else
  605                 error = EHOSTUNREACH;
  606 done:
  607         if (rt)
  608                 RTFREE_LOCKED(rt);
  609 out:
  610         if (error)
  611                 V_rtstat.rts_badredirect++;
  612         else if (stat != NULL)
  613                 (*stat)++;
  614         bzero((caddr_t)&info, sizeof(info));
  615         info.rti_info[RTAX_DST] = dst;
  616         info.rti_info[RTAX_GATEWAY] = gateway;
  617         info.rti_info[RTAX_NETMASK] = netmask;
  618         info.rti_info[RTAX_AUTHOR] = src;
  619         rt_missmsg(RTM_REDIRECT, &info, flags, error);
  620         if (ifa != NULL)
  621                 ifa_free(ifa);
  622 }
  623 
  624 int
  625 rtioctl(u_long req, caddr_t data)
  626 {
  627         return (rtioctl_fib(req, data, 0));
  628 }
  629 
  630 /*
  631  * Routing table ioctl interface.
  632  */
  633 int
  634 rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
  635 {
  636 
  637         /*
  638          * If more ioctl commands are added here, make sure the proper
  639          * super-user checks are being performed because it is possible for
  640          * prison-root to make it this far if raw sockets have been enabled
  641          * in jails.
  642          */
  643 #ifdef INET
  644         /* Multicast goop, grrr... */
  645         return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
  646 #else /* INET */
  647         return ENXIO;
  648 #endif /* INET */
  649 }
  650 
  651 /*
  652  * For both ifa_ifwithroute() routines, 'ifa' is returned referenced.
  653  */
  654 struct ifaddr *
  655 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
  656 {
  657         return (ifa_ifwithroute_fib(flags, dst, gateway, 0));
  658 }
  659 
  660 struct ifaddr *
  661 ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
  662                                 u_int fibnum)
  663 {
  664         register struct ifaddr *ifa;
  665         int not_found = 0;
  666 
  667         if ((flags & RTF_GATEWAY) == 0) {
  668                 /*
  669                  * If we are adding a route to an interface,
  670                  * and the interface is a pt to pt link
  671                  * we should search for the destination
  672                  * as our clue to the interface.  Otherwise
  673                  * we can use the local address.
  674                  */
  675                 ifa = NULL;
  676                 if (flags & RTF_HOST)
  677                         ifa = ifa_ifwithdstaddr(dst);
  678                 if (ifa == NULL)
  679                         ifa = ifa_ifwithaddr(gateway);
  680         } else {
  681                 /*
  682                  * If we are adding a route to a remote net
  683                  * or host, the gateway may still be on the
  684                  * other end of a pt to pt link.
  685                  */
  686                 ifa = ifa_ifwithdstaddr(gateway);
  687         }
  688         if (ifa == NULL)
  689                 ifa = ifa_ifwithnet(gateway, 0);
  690         if (ifa == NULL) {
  691                 struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum);
  692                 if (rt == NULL)
  693                         return (NULL);
  694                 /*
  695                  * dismiss a gateway that is reachable only
  696                  * through the default router
  697                  */
  698                 switch (gateway->sa_family) {
  699                 case AF_INET:
  700                         if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY)
  701                                 not_found = 1;
  702                         break;
  703                 case AF_INET6:
  704                         if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr))
  705                                 not_found = 1;
  706                         break;
  707                 default:
  708                         break;
  709                 }
  710                 if (!not_found && rt->rt_ifa != NULL) {
  711                         ifa = rt->rt_ifa;
  712                         ifa_ref(ifa);
  713                 }
  714                 RT_REMREF(rt);
  715                 RT_UNLOCK(rt);
  716                 if (not_found || ifa == NULL)
  717                         return (NULL);
  718         }
  719         if (ifa->ifa_addr->sa_family != dst->sa_family) {
  720                 struct ifaddr *oifa = ifa;
  721                 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
  722                 if (ifa == NULL)
  723                         ifa = oifa;
  724                 else
  725                         ifa_free(oifa);
  726         }
  727         return (ifa);
  728 }
  729 
  730 /*
  731  * Do appropriate manipulations of a routing tree given
  732  * all the bits of info needed
  733  */
  734 int
  735 rtrequest(int req,
  736         struct sockaddr *dst,
  737         struct sockaddr *gateway,
  738         struct sockaddr *netmask,
  739         int flags,
  740         struct rtentry **ret_nrt)
  741 {
  742         return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0));
  743 }
  744 
  745 int
  746 rtrequest_fib(int req,
  747         struct sockaddr *dst,
  748         struct sockaddr *gateway,
  749         struct sockaddr *netmask,
  750         int flags,
  751         struct rtentry **ret_nrt,
  752         u_int fibnum)
  753 {
  754         struct rt_addrinfo info;
  755 
  756         if (dst->sa_len == 0)
  757                 return(EINVAL);
  758 
  759         bzero((caddr_t)&info, sizeof(info));
  760         info.rti_flags = flags;
  761         info.rti_info[RTAX_DST] = dst;
  762         info.rti_info[RTAX_GATEWAY] = gateway;
  763         info.rti_info[RTAX_NETMASK] = netmask;
  764         return rtrequest1_fib(req, &info, ret_nrt, fibnum);
  765 }
  766 
  767 /*
  768  * These (questionable) definitions of apparent local variables apply
  769  * to the next two functions.  XXXXXX!!!
  770  */
  771 #define dst     info->rti_info[RTAX_DST]
  772 #define gateway info->rti_info[RTAX_GATEWAY]
  773 #define netmask info->rti_info[RTAX_NETMASK]
  774 #define ifaaddr info->rti_info[RTAX_IFA]
  775 #define ifpaddr info->rti_info[RTAX_IFP]
  776 #define flags   info->rti_flags
  777 
  778 int
  779 rt_getifa(struct rt_addrinfo *info)
  780 {
  781         return (rt_getifa_fib(info, 0));
  782 }
  783 
  784 /*
  785  * Look up rt_addrinfo for a specific fib.  Note that if rti_ifa is defined,
  786  * it will be referenced so the caller must free it.
  787  */
  788 int
  789 rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
  790 {
  791         struct ifaddr *ifa;
  792         int error = 0;
  793 
  794         /*
  795          * ifp may be specified by sockaddr_dl
  796          * when protocol address is ambiguous.
  797          */
  798         if (info->rti_ifp == NULL && ifpaddr != NULL &&
  799             ifpaddr->sa_family == AF_LINK &&
  800             (ifa = ifa_ifwithnet(ifpaddr, 0)) != NULL) {
  801                 info->rti_ifp = ifa->ifa_ifp;
  802                 ifa_free(ifa);
  803         }
  804         if (info->rti_ifa == NULL && ifaaddr != NULL)
  805                 info->rti_ifa = ifa_ifwithaddr(ifaaddr);
  806         if (info->rti_ifa == NULL) {
  807                 struct sockaddr *sa;
  808 
  809                 sa = ifaaddr != NULL ? ifaaddr :
  810                     (gateway != NULL ? gateway : dst);
  811                 if (sa != NULL && info->rti_ifp != NULL)
  812                         info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
  813                 else if (dst != NULL && gateway != NULL)
  814                         info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
  815                                                         fibnum);
  816                 else if (sa != NULL)
  817                         info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
  818                                                         fibnum);
  819         }
  820         if ((ifa = info->rti_ifa) != NULL) {
  821                 if (info->rti_ifp == NULL)
  822                         info->rti_ifp = ifa->ifa_ifp;
  823         } else
  824                 error = ENETUNREACH;
  825         return (error);
  826 }
  827 
  828 /*
  829  * Expunges references to a route that's about to be reclaimed.
  830  * The route must be locked.
  831  */
  832 int
  833 rtexpunge(struct rtentry *rt)
  834 {
  835 #if !defined(RADIX_MPATH)
  836         struct radix_node *rn;
  837 #else
  838         struct rt_addrinfo info;
  839         int fib;
  840         struct rtentry *rt0;
  841 #endif
  842         struct radix_node_head *rnh;
  843         struct ifaddr *ifa;
  844         int error = 0;
  845 
  846         /*
  847          * Find the correct routing tree to use for this Address Family
  848          */
  849         rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
  850         RT_LOCK_ASSERT(rt);
  851         if (rnh == NULL)
  852                 return (EAFNOSUPPORT);
  853         RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
  854 
  855 #ifdef RADIX_MPATH
  856         fib = rt->rt_fibnum;
  857         bzero(&info, sizeof(info));
  858         info.rti_ifp = rt->rt_ifp;
  859         info.rti_flags = RTF_RNH_LOCKED;
  860         info.rti_info[RTAX_DST] = rt_key(rt);
  861         info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr;
  862 
  863         RT_UNLOCK(rt);
  864         error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib);
  865 
  866         if (error == 0 && rt0 != NULL) {
  867                 rt = rt0;
  868                 RT_LOCK(rt);
  869         } else if (error != 0) {
  870                 RT_LOCK(rt);
  871                 return (error);
  872         }
  873 #else
  874         /*
  875          * Remove the item from the tree; it should be there,
  876          * but when callers invoke us blindly it may not (sigh).
  877          */
  878         rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
  879         if (rn == NULL) {
  880                 error = ESRCH;
  881                 goto bad;
  882         }
  883         KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
  884                 ("unexpected flags 0x%x", rn->rn_flags));
  885         KASSERT(rt == RNTORT(rn),
  886                 ("lookup mismatch, rt %p rn %p", rt, rn));
  887 #endif /* RADIX_MPATH */
  888 
  889         rt->rt_flags &= ~RTF_UP;
  890 
  891         /*
  892          * Give the protocol a chance to keep things in sync.
  893          */
  894         if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
  895                 struct rt_addrinfo info;
  896 
  897                 bzero((caddr_t)&info, sizeof(info));
  898                 info.rti_flags = rt->rt_flags;
  899                 info.rti_info[RTAX_DST] = rt_key(rt);
  900                 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
  901                 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
  902                 ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
  903         }
  904 
  905         /*
  906          * one more rtentry floating around that is not
  907          * linked to the routing table.
  908          */
  909         V_rttrash++;
  910 #if !defined(RADIX_MPATH)
  911 bad:
  912 #endif
  913         return (error);
  914 }
  915 
  916 #ifdef RADIX_MPATH
  917 static int
  918 rn_mpath_update(int req, struct rt_addrinfo *info,
  919     struct radix_node_head *rnh, struct rtentry **ret_nrt)
  920 {
  921         /*
  922          * if we got multipath routes, we require users to specify
  923          * a matching RTAX_GATEWAY.
  924          */
  925         struct rtentry *rt, *rto = NULL;
  926         register struct radix_node *rn;
  927         int error = 0;
  928 
  929         rn = rnh->rnh_matchaddr(dst, rnh);
  930         if (rn == NULL)
  931                 return (ESRCH);
  932         rto = rt = RNTORT(rn);
  933         rt = rt_mpath_matchgate(rt, gateway);
  934         if (rt == NULL)
  935                 return (ESRCH);
  936         /*
  937          * this is the first entry in the chain
  938          */
  939         if (rto == rt) {
  940                 rn = rn_mpath_next((struct radix_node *)rt);
  941                 /*
  942                  * there is another entry, now it's active
  943                  */
  944                 if (rn) {
  945                         rto = RNTORT(rn);
  946                         RT_LOCK(rto);
  947                         rto->rt_flags |= RTF_UP;
  948                         RT_UNLOCK(rto);
  949                 } else if (rt->rt_flags & RTF_GATEWAY) {
  950                         /*
  951                          * For gateway routes, we need to 
  952                          * make sure that we we are deleting
  953                          * the correct gateway. 
  954                          * rt_mpath_matchgate() does not 
  955                          * check the case when there is only
  956                          * one route in the chain.  
  957                          */
  958                         if (gateway &&
  959                             (rt->rt_gateway->sa_len != gateway->sa_len ||
  960                                 memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
  961                                 error = ESRCH;
  962                         else {
  963                                 /*
  964                                  * remove from tree before returning it
  965                                  * to the caller
  966                                  */
  967                                 rn = rnh->rnh_deladdr(dst, netmask, rnh);
  968                                 KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
  969                                 goto gwdelete;
  970                         }
  971                         
  972                 }
  973                 /*
  974                  * use the normal delete code to remove
  975                  * the first entry
  976                  */
  977                 if (req != RTM_DELETE) 
  978                         goto nondelete;
  979 
  980                 error = ENOENT;
  981                 goto done;
  982         }
  983                 
  984         /*
  985          * if the entry is 2nd and on up
  986          */
  987         if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
  988                 panic ("rtrequest1: rt_mpath_deldup");
  989 gwdelete:
  990         RT_LOCK(rt);
  991         RT_ADDREF(rt);
  992         if (req == RTM_DELETE) {
  993                 rt->rt_flags &= ~RTF_UP;
  994                 /*
  995                  * One more rtentry floating around that is not
  996                  * linked to the routing table. rttrash will be decremented
  997                  * when RTFREE(rt) is eventually called.
  998                  */
  999                 V_rttrash++;
 1000         }
 1001         
 1002 nondelete:
 1003         if (req != RTM_DELETE)
 1004                 panic("unrecognized request %d", req);
 1005         
 1006 
 1007         /*
 1008          * If the caller wants it, then it can have it,
 1009          * but it's up to it to free the rtentry as we won't be
 1010          * doing it.
 1011          */
 1012         if (ret_nrt) {
 1013                 *ret_nrt = rt;
 1014                 RT_UNLOCK(rt);
 1015         } else
 1016                 RTFREE_LOCKED(rt);
 1017 done:
 1018         return (error);
 1019 }
 1020 #endif
 1021 
 1022 int
 1023 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 1024                                 u_int fibnum)
 1025 {
 1026         int error = 0, needlock = 0;
 1027         register struct rtentry *rt;
 1028 #ifdef FLOWTABLE
 1029         register struct rtentry *rt0;
 1030 #endif
 1031         register struct radix_node *rn;
 1032         register struct radix_node_head *rnh;
 1033         struct ifaddr *ifa;
 1034         struct sockaddr *ndst;
 1035 #define senderr(x) { error = x ; goto bad; }
 1036 
 1037         KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
 1038         if (dst->sa_family != AF_INET)  /* Only INET supports > 1 fib now */
 1039                 fibnum = 0;
 1040         /*
 1041          * Find the correct routing tree to use for this Address Family
 1042          */
 1043         rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
 1044         if (rnh == NULL)
 1045                 return (EAFNOSUPPORT);
 1046         needlock = ((flags & RTF_RNH_LOCKED) == 0);
 1047         flags &= ~RTF_RNH_LOCKED;
 1048         if (needlock)
 1049                 RADIX_NODE_HEAD_LOCK(rnh);
 1050         else
 1051                 RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
 1052         /*
 1053          * If we are adding a host route then we don't want to put
 1054          * a netmask in the tree, nor do we want to clone it.
 1055          */
 1056         if (flags & RTF_HOST)
 1057                 netmask = NULL;
 1058 
 1059         switch (req) {
 1060         case RTM_DELETE:
 1061 #ifdef RADIX_MPATH
 1062                 if (rn_mpath_capable(rnh)) {
 1063                         error = rn_mpath_update(req, info, rnh, ret_nrt);
 1064                         /*
 1065                          * "bad" holds true for the success case
 1066                          * as well
 1067                          */
 1068                         if (error != ENOENT)
 1069                                 goto bad;
 1070                         error = 0;
 1071                 }
 1072 #endif
 1073                 /*
 1074                  * Remove the item from the tree and return it.
 1075                  * Complain if it is not there and do no more processing.
 1076                  */
 1077                 rn = rnh->rnh_deladdr(dst, netmask, rnh);
 1078                 if (rn == NULL)
 1079                         senderr(ESRCH);
 1080                 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 1081                         panic ("rtrequest delete");
 1082                 rt = RNTORT(rn);
 1083                 RT_LOCK(rt);
 1084                 RT_ADDREF(rt);
 1085                 rt->rt_flags &= ~RTF_UP;
 1086 
 1087                 /*
 1088                  * give the protocol a chance to keep things in sync.
 1089                  */
 1090                 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
 1091                         ifa->ifa_rtrequest(RTM_DELETE, rt, info);
 1092 
 1093                 /*
 1094                  * One more rtentry floating around that is not
 1095                  * linked to the routing table. rttrash will be decremented
 1096                  * when RTFREE(rt) is eventually called.
 1097                  */
 1098                 V_rttrash++;
 1099 
 1100                 /*
 1101                  * If the caller wants it, then it can have it,
 1102                  * but it's up to it to free the rtentry as we won't be
 1103                  * doing it.
 1104                  */
 1105                 if (ret_nrt) {
 1106                         *ret_nrt = rt;
 1107                         RT_UNLOCK(rt);
 1108                 } else
 1109                         RTFREE_LOCKED(rt);
 1110                 break;
 1111         case RTM_RESOLVE:
 1112                 /*
 1113                  * resolve was only used for route cloning
 1114                  * here for compat
 1115                  */
 1116                 break;
 1117         case RTM_ADD:
 1118                 if ((flags & RTF_GATEWAY) && !gateway)
 1119                         senderr(EINVAL);
 1120                 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 
 1121                     (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
 1122                         senderr(EINVAL);
 1123 
 1124                 if (info->rti_ifa == NULL) {
 1125                         error = rt_getifa_fib(info, fibnum);
 1126                         if (error)
 1127                                 senderr(error);
 1128                 } else
 1129                         ifa_ref(info->rti_ifa);
 1130                 ifa = info->rti_ifa;
 1131                 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
 1132                 if (rt == NULL) {
 1133                         if (ifa != NULL)
 1134                                 ifa_free(ifa);
 1135                         senderr(ENOBUFS);
 1136                 }
 1137                 RT_LOCK_INIT(rt);
 1138                 rt->rt_flags = RTF_UP | flags;
 1139                 rt->rt_fibnum = fibnum;
 1140                 /*
 1141                  * Add the gateway. Possibly re-malloc-ing the storage for it
 1142                  * 
 1143                  */
 1144                 RT_LOCK(rt);
 1145                 if ((error = rt_setgate(rt, dst, gateway)) != 0) {
 1146                         RT_LOCK_DESTROY(rt);
 1147                         if (ifa != NULL)
 1148                                 ifa_free(ifa);
 1149                         uma_zfree(V_rtzone, rt);
 1150                         senderr(error);
 1151                 }
 1152 
 1153                 /*
 1154                  * point to the (possibly newly malloc'd) dest address.
 1155                  */
 1156                 ndst = (struct sockaddr *)rt_key(rt);
 1157 
 1158                 /*
 1159                  * make sure it contains the value we want (masked if needed).
 1160                  */
 1161                 if (netmask) {
 1162                         rt_maskedcopy(dst, ndst, netmask);
 1163                 } else
 1164                         bcopy(dst, ndst, dst->sa_len);
 1165 
 1166                 /*
 1167                  * We use the ifa reference returned by rt_getifa_fib().
 1168                  * This moved from below so that rnh->rnh_addaddr() can
 1169                  * examine the ifa and  ifa->ifa_ifp if it so desires.
 1170                  */
 1171                 rt->rt_ifa = ifa;
 1172                 rt->rt_ifp = ifa->ifa_ifp;
 1173                 rt->rt_rmx.rmx_weight = 1;
 1174 
 1175 #ifdef RADIX_MPATH
 1176                 /* do not permit exactly the same dst/mask/gw pair */
 1177                 if (rn_mpath_capable(rnh) &&
 1178                         rt_mpath_conflict(rnh, rt, netmask)) {
 1179                         if (rt->rt_ifa) {
 1180                                 ifa_free(rt->rt_ifa);
 1181                         }
 1182                         Free(rt_key(rt));
 1183                         RT_LOCK_DESTROY(rt);
 1184                         uma_zfree(V_rtzone, rt);
 1185                         senderr(EEXIST);
 1186                 }
 1187 #endif
 1188 
 1189 #ifdef FLOWTABLE
 1190                 rt0 = NULL;
 1191                 /* XXX
 1192                  * "flow-table" only support IPv4 at the moment.
 1193                  */
 1194 #ifdef INET
 1195                 if (dst->sa_family == AF_INET) {
 1196                         rn = rnh->rnh_matchaddr(dst, rnh);
 1197                         if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
 1198                                 struct sockaddr *mask;
 1199                                 u_char *m, *n;
 1200                                 int len;
 1201                                 
 1202                                 /*
 1203                                  * compare mask to see if the new route is
 1204                                  * more specific than the existing one
 1205                                  */
 1206                                 rt0 = RNTORT(rn);
 1207                                 RT_LOCK(rt0);
 1208                                 RT_ADDREF(rt0);
 1209                                 RT_UNLOCK(rt0);
 1210                                 /*
 1211                                  * A host route is already present, so 
 1212                                  * leave the flow-table entries as is.
 1213                                  */
 1214                                 if (rt0->rt_flags & RTF_HOST) {
 1215                                         RTFREE(rt0);
 1216                                         rt0 = NULL;
 1217                                 } else if (!(flags & RTF_HOST) && netmask) {
 1218                                         mask = rt_mask(rt0);
 1219                                         len = mask->sa_len;
 1220                                         m = (u_char *)mask;
 1221                                         n = (u_char *)netmask;
 1222                                         while (len-- > 0) {
 1223                                                 if (*n != *m)
 1224                                                         break;
 1225                                                 n++;
 1226                                                 m++;
 1227                                         }
 1228                                         if (len == 0 || (*n < *m)) {
 1229                                                 RTFREE(rt0);
 1230                                                 rt0 = NULL;
 1231                                         }
 1232                                 }
 1233                         }
 1234                 }
 1235 #endif
 1236 #endif
 1237 
 1238                 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
 1239                 rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
 1240                 /*
 1241                  * If it still failed to go into the tree,
 1242                  * then un-make it (this should be a function)
 1243                  */
 1244                 if (rn == NULL) {
 1245                         if (rt->rt_ifa)
 1246                                 ifa_free(rt->rt_ifa);
 1247                         Free(rt_key(rt));
 1248                         RT_LOCK_DESTROY(rt);
 1249                         uma_zfree(V_rtzone, rt);
 1250 #ifdef FLOWTABLE
 1251                         if (rt0 != NULL)
 1252                                 RTFREE(rt0);
 1253 #endif
 1254                         senderr(EEXIST);
 1255                 } 
 1256 #ifdef FLOWTABLE
 1257                 else if (rt0 != NULL) {
 1258 #ifdef INET
 1259                         flowtable_route_flush(V_ip_ft, rt0);
 1260 #endif
 1261                         RTFREE(rt0);
 1262                 }
 1263 #endif
 1264 
 1265                 /*
 1266                  * If this protocol has something to add to this then
 1267                  * allow it to do that as well.
 1268                  */
 1269                 if (ifa->ifa_rtrequest)
 1270                         ifa->ifa_rtrequest(req, rt, info);
 1271 
 1272                 /*
 1273                  * actually return a resultant rtentry and
 1274                  * give the caller a single reference.
 1275                  */
 1276                 if (ret_nrt) {
 1277                         *ret_nrt = rt;
 1278                         RT_ADDREF(rt);
 1279                 }
 1280                 RT_UNLOCK(rt);
 1281                 break;
 1282         default:
 1283                 error = EOPNOTSUPP;
 1284         }
 1285 bad:
 1286         if (needlock)
 1287                 RADIX_NODE_HEAD_UNLOCK(rnh);
 1288         return (error);
 1289 #undef senderr
 1290 }
 1291 
 1292 #undef dst
 1293 #undef gateway
 1294 #undef netmask
 1295 #undef ifaaddr
 1296 #undef ifpaddr
 1297 #undef flags
 1298 
 1299 int
 1300 rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 1301 {
 1302         /* XXX dst may be overwritten, can we move this to below */
 1303         int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
 1304 #ifdef INVARIANTS
 1305         struct radix_node_head *rnh;
 1306 
 1307         rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family);
 1308 #endif
 1309 
 1310         RT_LOCK_ASSERT(rt);
 1311         RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
 1312         
 1313         /*
 1314          * Prepare to store the gateway in rt->rt_gateway.
 1315          * Both dst and gateway are stored one after the other in the same
 1316          * malloc'd chunk. If we have room, we can reuse the old buffer,
 1317          * rt_gateway already points to the right place.
 1318          * Otherwise, malloc a new block and update the 'dst' address.
 1319          */
 1320         if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
 1321                 caddr_t new;
 1322 
 1323                 R_Malloc(new, caddr_t, dlen + glen);
 1324                 if (new == NULL)
 1325                         return ENOBUFS;
 1326                 /*
 1327                  * XXX note, we copy from *dst and not *rt_key(rt) because
 1328                  * rt_setgate() can be called to initialize a newly
 1329                  * allocated route entry, in which case rt_key(rt) == NULL
 1330                  * (and also rt->rt_gateway == NULL).
 1331                  * Free()/free() handle a NULL argument just fine.
 1332                  */
 1333                 bcopy(dst, new, dlen);
 1334                 Free(rt_key(rt));       /* free old block, if any */
 1335                 rt_key(rt) = (struct sockaddr *)new;
 1336                 rt->rt_gateway = (struct sockaddr *)(new + dlen);
 1337         }
 1338 
 1339         /*
 1340          * Copy the new gateway value into the memory chunk.
 1341          */
 1342         bcopy(gate, rt->rt_gateway, glen);
 1343 
 1344         return (0);
 1345 }
 1346 
 1347 void
 1348 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
 1349 {
 1350         register u_char *cp1 = (u_char *)src;
 1351         register u_char *cp2 = (u_char *)dst;
 1352         register u_char *cp3 = (u_char *)netmask;
 1353         u_char *cplim = cp2 + *cp3;
 1354         u_char *cplim2 = cp2 + *cp1;
 1355 
 1356         *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
 1357         cp3 += 2;
 1358         if (cplim > cplim2)
 1359                 cplim = cplim2;
 1360         while (cp2 < cplim)
 1361                 *cp2++ = *cp1++ & *cp3++;
 1362         if (cp2 < cplim2)
 1363                 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
 1364 }
 1365 
 1366 /*
 1367  * Set up a routing table entry, normally
 1368  * for an interface.
 1369  */
 1370 #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
 1371 static inline  int
 1372 rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 1373 {
 1374         struct sockaddr *dst;
 1375         struct sockaddr *netmask;
 1376         struct rtentry *rt = NULL;
 1377         struct rt_addrinfo info;
 1378         int error = 0;
 1379         int startfib, endfib;
 1380         char tempbuf[_SOCKADDR_TMPSIZE];
 1381         int didwork = 0;
 1382         int a_failure = 0;
 1383         static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
 1384 
 1385         if (flags & RTF_HOST) {
 1386                 dst = ifa->ifa_dstaddr;
 1387                 netmask = NULL;
 1388         } else {
 1389                 dst = ifa->ifa_addr;
 1390                 netmask = ifa->ifa_netmask;
 1391         }
 1392         if ( dst->sa_family != AF_INET)
 1393                 fibnum = 0;
 1394         if (fibnum == -1) {
 1395                 if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
 1396                         startfib = endfib = curthread->td_proc->p_fibnum;
 1397                 } else {
 1398                         startfib = 0;
 1399                         endfib = rt_numfibs - 1;
 1400                 }
 1401         } else {
 1402                 KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
 1403                 startfib = fibnum;
 1404                 endfib = fibnum;
 1405         }
 1406         if (dst->sa_len == 0)
 1407                 return(EINVAL);
 1408 
 1409         /*
 1410          * If it's a delete, check that if it exists,
 1411          * it's on the correct interface or we might scrub
 1412          * a route to another ifa which would
 1413          * be confusing at best and possibly worse.
 1414          */
 1415         if (cmd == RTM_DELETE) {
 1416                 /*
 1417                  * It's a delete, so it should already exist..
 1418                  * If it's a net, mask off the host bits
 1419                  * (Assuming we have a mask)
 1420                  * XXX this is kinda inet specific..
 1421                  */
 1422                 if (netmask != NULL) {
 1423                         rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
 1424                         dst = (struct sockaddr *)tempbuf;
 1425                 }
 1426         }
 1427         /*
 1428          * Now go through all the requested tables (fibs) and do the
 1429          * requested action. Realistically, this will either be fib 0
 1430          * for protocols that don't do multiple tables or all the
 1431          * tables for those that do. XXX For this version only AF_INET.
 1432          * When that changes code should be refactored to protocol
 1433          * independent parts and protocol dependent parts.
 1434          */
 1435         for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
 1436                 if (cmd == RTM_DELETE) {
 1437                         struct radix_node_head *rnh;
 1438                         struct radix_node *rn;
 1439                         /*
 1440                          * Look up an rtentry that is in the routing tree and
 1441                          * contains the correct info.
 1442                          */
 1443                         rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
 1444                         if (rnh == NULL)
 1445                                 /* this table doesn't exist but others might */
 1446                                 continue;
 1447                         RADIX_NODE_HEAD_LOCK(rnh);
 1448 #ifdef RADIX_MPATH
 1449                         if (rn_mpath_capable(rnh)) {
 1450 
 1451                                 rn = rnh->rnh_matchaddr(dst, rnh);
 1452                                 if (rn == NULL) 
 1453                                         error = ESRCH;
 1454                                 else {
 1455                                         rt = RNTORT(rn);
 1456                                         /*
 1457                                          * for interface route the
 1458                                          * rt->rt_gateway is sockaddr_intf
 1459                                          * for cloning ARP entries, so
 1460                                          * rt_mpath_matchgate must use the
 1461                                          * interface address
 1462                                          */
 1463                                         rt = rt_mpath_matchgate(rt,
 1464                                             ifa->ifa_addr);
 1465                                         if (!rt) 
 1466                                                 error = ESRCH;
 1467                                 }
 1468                         }
 1469                         else
 1470 #endif
 1471                         rn = rnh->rnh_lookup(dst, netmask, rnh);
 1472                         error = (rn == NULL ||
 1473                             (rn->rn_flags & RNF_ROOT) ||
 1474                             RNTORT(rn)->rt_ifa != ifa ||
 1475                             !sa_equal((struct sockaddr *)rn->rn_key, dst));
 1476                         RADIX_NODE_HEAD_UNLOCK(rnh);
 1477                         if (error) {
 1478                                 /* this is only an error if bad on ALL tables */
 1479                                 continue;
 1480                         }
 1481                 }
 1482                 /*
 1483                  * Do the actual request
 1484                  */
 1485                 bzero((caddr_t)&info, sizeof(info));
 1486                 info.rti_ifa = ifa;
 1487                 info.rti_flags = flags | ifa->ifa_flags;
 1488                 info.rti_info[RTAX_DST] = dst;
 1489                 /* 
 1490                  * doing this for compatibility reasons
 1491                  */
 1492                 if (cmd == RTM_ADD)
 1493                         info.rti_info[RTAX_GATEWAY] =
 1494                             (struct sockaddr *)&null_sdl;
 1495                 else
 1496                         info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
 1497                 info.rti_info[RTAX_NETMASK] = netmask;
 1498                 error = rtrequest1_fib(cmd, &info, &rt, fibnum);
 1499                 if (error == 0 && rt != NULL) {
 1500                         /*
 1501                          * notify any listening routing agents of the change
 1502                          */
 1503                         RT_LOCK(rt);
 1504 #ifdef RADIX_MPATH
 1505                         /*
 1506                          * in case address alias finds the first address
 1507                          * e.g. ifconfig bge0 192.103.54.246/24
 1508                          * e.g. ifconfig bge0 192.103.54.247/24
 1509                          * the address set in the route is 192.103.54.246
 1510                          * so we need to replace it with 192.103.54.247
 1511                          */
 1512                         if (memcmp(rt->rt_ifa->ifa_addr,
 1513                             ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
 1514                                 ifa_free(rt->rt_ifa);
 1515                                 ifa_ref(ifa);
 1516                                 rt->rt_ifp = ifa->ifa_ifp;
 1517                                 rt->rt_ifa = ifa;
 1518                         }
 1519 #endif
 1520                         /* 
 1521                          * doing this for compatibility reasons
 1522                          */
 1523                         if (cmd == RTM_ADD) {
 1524                             ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type  =
 1525                                 rt->rt_ifp->if_type;
 1526                             ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
 1527                                 rt->rt_ifp->if_index;
 1528                         }
 1529                         RT_ADDREF(rt);
 1530                         RT_UNLOCK(rt);
 1531                         rt_newaddrmsg(cmd, ifa, error, rt);
 1532                         RT_LOCK(rt);
 1533                         RT_REMREF(rt);
 1534                         if (cmd == RTM_DELETE) {
 1535                                 /*
 1536                                  * If we are deleting, and we found an entry,
 1537                                  * then it's been removed from the tree..
 1538                                  * now throw it away.
 1539                                  */
 1540                                 RTFREE_LOCKED(rt);
 1541                         } else {
 1542                                 if (cmd == RTM_ADD) {
 1543                                         /*
 1544                                          * We just wanted to add it..
 1545                                          * we don't actually need a reference.
 1546                                          */
 1547                                         RT_REMREF(rt);
 1548                                 }
 1549                                 RT_UNLOCK(rt);
 1550                         }
 1551                         didwork = 1;
 1552                 }
 1553                 if (error)
 1554                         a_failure = error;
 1555         }
 1556         if (cmd == RTM_DELETE) {
 1557                 if (didwork) {
 1558                         error = 0;
 1559                 } else {
 1560                         /* we only give an error if it wasn't in any table */
 1561                         error = ((flags & RTF_HOST) ?
 1562                             EHOSTUNREACH : ENETUNREACH);
 1563                 }
 1564         } else {
 1565                 if (a_failure) {
 1566                         /* return an error if any of them failed */
 1567                         error = a_failure;
 1568                 }
 1569         }
 1570         return (error);
 1571 }
 1572 
 1573 /* special one for inet internal use. may not use. */
 1574 int
 1575 rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
 1576 {
 1577         return (rtinit1(ifa, cmd, flags, -1));
 1578 }
 1579 
 1580 /*
 1581  * Set up a routing table entry, normally
 1582  * for an interface.
 1583  */
 1584 int
 1585 rtinit(struct ifaddr *ifa, int cmd, int flags)
 1586 {
 1587         struct sockaddr *dst;
 1588         int fib = 0;
 1589 
 1590         if (flags & RTF_HOST) {
 1591                 dst = ifa->ifa_dstaddr;
 1592         } else {
 1593                 dst = ifa->ifa_addr;
 1594         }
 1595 
 1596         if (dst->sa_family == AF_INET)
 1597                 fib = -1;
 1598         return (rtinit1(ifa, cmd, flags, fib));
 1599 }

Cache object: 06115e597f70a35eb1dbf904e6501f68


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.