The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/route.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: route.c,v 1.236 2022/12/22 13:54:57 riastradh Exp $    */
    2 
    3 /*-
    4  * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  */
   32 
   33 /*
   34  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
   35  * All rights reserved.
   36  *
   37  * Redistribution and use in source and binary forms, with or without
   38  * modification, are permitted provided that the following conditions
   39  * are met:
   40  * 1. Redistributions of source code must retain the above copyright
   41  *    notice, this list of conditions and the following disclaimer.
   42  * 2. Redistributions in binary form must reproduce the above copyright
   43  *    notice, this list of conditions and the following disclaimer in the
   44  *    documentation and/or other materials provided with the distribution.
   45  * 3. Neither the name of the project nor the names of its contributors
   46  *    may be used to endorse or promote products derived from this software
   47  *    without specific prior written permission.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   59  * SUCH DAMAGE.
   60  */
   61 
   62 /*
   63  * Copyright (c) 1980, 1986, 1991, 1993
   64  *      The Regents of the University of California.  All rights reserved.
   65  *
   66  * Redistribution and use in source and binary forms, with or without
   67  * modification, are permitted provided that the following conditions
   68  * are met:
   69  * 1. Redistributions of source code must retain the above copyright
   70  *    notice, this list of conditions and the following disclaimer.
   71  * 2. Redistributions in binary form must reproduce the above copyright
   72  *    notice, this list of conditions and the following disclaimer in the
   73  *    documentation and/or other materials provided with the distribution.
   74  * 3. Neither the name of the University nor the names of its contributors
   75  *    may be used to endorse or promote products derived from this software
   76  *    without specific prior written permission.
   77  *
   78  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   79  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   80  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   81  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   82  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   83  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   84  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   86  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   87  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   88  * SUCH DAMAGE.
   89  *
   90  *      @(#)route.c     8.3 (Berkeley) 1/9/95
   91  */
   92 
   93 #ifdef _KERNEL_OPT
   94 #include "opt_inet.h"
   95 #include "opt_route.h"
   96 #include "opt_net_mpsafe.h"
   97 #endif
   98 
   99 #include <sys/cdefs.h>
  100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.236 2022/12/22 13:54:57 riastradh Exp $");
  101 
  102 #include <sys/param.h>
  103 #ifdef RTFLUSH_DEBUG
  104 #include <sys/sysctl.h>
  105 #endif
  106 #include <sys/systm.h>
  107 #include <sys/callout.h>
  108 #include <sys/proc.h>
  109 #include <sys/mbuf.h>
  110 #include <sys/socket.h>
  111 #include <sys/socketvar.h>
  112 #include <sys/domain.h>
  113 #include <sys/kernel.h>
  114 #include <sys/ioctl.h>
  115 #include <sys/pool.h>
  116 #include <sys/kauth.h>
  117 #include <sys/workqueue.h>
  118 #include <sys/syslog.h>
  119 #include <sys/rwlock.h>
  120 #include <sys/mutex.h>
  121 #include <sys/cpu.h>
  122 #include <sys/kmem.h>
  123 
  124 #include <net/if.h>
  125 #include <net/if_dl.h>
  126 #include <net/route.h>
  127 #if defined(INET) || defined(INET6)
  128 #include <net/if_llatbl.h>
  129 #endif
  130 
  131 #include <netinet/in.h>
  132 #include <netinet/in_var.h>
  133 
  134 #define PRESERVED_RTF   (RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_DONE | RTF_MASK)
  135 
  136 #ifdef RTFLUSH_DEBUG
  137 #define rtcache_debug() __predict_false(_rtcache_debug)
  138 #else /* RTFLUSH_DEBUG */
  139 #define rtcache_debug() 0
  140 #endif /* RTFLUSH_DEBUG */
  141 
  142 #ifdef RT_DEBUG
  143 #define RT_REFCNT_TRACE(rt)     printf("%s:%d: rt=%p refcnt=%d\n", \
  144                                     __func__, __LINE__, (rt), (rt)->rt_refcnt)
  145 #else
  146 #define RT_REFCNT_TRACE(rt)     do {} while (0)
  147 #endif
  148 
  149 #ifdef RT_DEBUG
  150 #define dlog(level, fmt, args...)       log(level, fmt, ##args)
  151 #else
  152 #define dlog(level, fmt, args...)       do {} while (0)
  153 #endif
  154 
  155 struct rtstat           rtstat;
  156 
  157 static int              rttrash;        /* routes not in table but not freed */
  158 
  159 static struct pool      rtentry_pool;
  160 static struct pool      rttimer_pool;
  161 
  162 static struct callout   rt_timer_ch; /* callout for rt_timer_timer() */
  163 static struct workqueue *rt_timer_wq;
  164 static struct work      rt_timer_wk;
  165 
  166 static void     rt_timer_init(void);
  167 static void     rt_timer_queue_remove_all(struct rttimer_queue *);
  168 static void     rt_timer_remove_all(struct rtentry *);
  169 static void     rt_timer_timer(void *);
  170 
  171 /*
  172  * Locking notes:
  173  * - The routing table is protected by a global rwlock
  174  *   - API: RT_RLOCK and friends
  175  * - rtcaches are NOT protected by the framework
  176  *   - Callers must guarantee a rtcache isn't accessed simultaneously
  177  *   - How the constraint is guaranteed in the wild
  178  *     - Protect a rtcache by a mutex (e.g., inp_route)
  179  *     - Make rtcache per-CPU and allow only accesses from softint
  180  *       (e.g., ipforward_rt_percpu)
  181  * - References to a rtentry is managed by reference counting and psref
  182  *   - Reference counting is used for temporal reference when a rtentry
  183  *     is fetched from the routing table
  184  *   - psref is used for temporal reference when a rtentry is fetched
  185  *     from a rtcache
  186  *     - struct route (rtcache) has struct psref, so we cannot obtain
  187  *       a reference twice on the same struct route
  188  *   - Before destroying or updating a rtentry, we have to wait for
  189  *     all references left (see below for details)
  190  *   - APIs
  191  *     - An obtained rtentry via rtalloc1 or rtrequest* must be
  192  *       unreferenced by rt_unref
  193  *     - An obtained rtentry via rtcache_* must be unreferenced by
  194  *       rtcache_unref
  195  *   - TODO: once we get a lockless routing table, we should use only
  196  *           psref for rtentries
  197  * - rtentry destruction
  198  *   - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE)
  199  *   - If a caller of rtrequest grabs a reference of a rtentry, the caller
  200  *     has a responsibility to destroy the rtentry by itself by calling
  201  *     rt_free
  202  *     - If not, rtrequest itself does that
  203  *   - If rt_free is called in softint, the actual destruction routine is
  204  *     deferred to a workqueue
  205  * - rtentry update
  206  *   - When updating a rtentry, RTF_UPDATING flag is set
  207  *   - If a rtentry is set RTF_UPDATING, fetching the rtentry from
  208  *     the routing table or a rtcache results in either of the following
  209  *     cases:
  210  *     - if the caller runs in softint, the caller fails to fetch
  211  *     - otherwise, the caller waits for the update completed and retries
  212  *       to fetch (probably succeed to fetch for the second time)
  213  * - rtcache invalidation
  214  *   - There is a global generation counter that is incremented when
  215  *     any routes have been added or deleted
  216  *   - When a rtcache caches a rtentry into itself, it also stores
  217  *     a snapshot of the generation counter
  218  *   - If the snapshot equals to the global counter, the cache is valid,
  219  *     otherwise the cache is invalidated
  220  */
  221 
  222 /*
  223  * Global lock for the routing table.
  224  */
  225 static krwlock_t                rt_lock __cacheline_aligned;
  226 #ifdef NET_MPSAFE
  227 #define RT_RLOCK()              rw_enter(&rt_lock, RW_READER)
  228 #define RT_WLOCK()              rw_enter(&rt_lock, RW_WRITER)
  229 #define RT_UNLOCK()             rw_exit(&rt_lock)
  230 #define RT_WLOCKED()            rw_write_held(&rt_lock)
  231 #define RT_ASSERT_WLOCK()       KASSERT(rw_write_held(&rt_lock))
  232 #else
  233 #define RT_RLOCK()              do {} while (0)
  234 #define RT_WLOCK()              do {} while (0)
  235 #define RT_UNLOCK()             do {} while (0)
  236 #define RT_WLOCKED()            true
  237 #define RT_ASSERT_WLOCK()       do {} while (0)
  238 #endif
  239 
  240 static uint64_t rtcache_generation;
  241 
  242 /*
  243  * mutex and cv that are used to wait for references to a rtentry left
  244  * before updating the rtentry.
  245  */
  246 static struct {
  247         kmutex_t                lock;
  248         kcondvar_t              cv;
  249         bool                    ongoing;
  250         const struct lwp        *lwp;
  251 } rt_update_global __cacheline_aligned;
  252 
  253 /*
  254  * A workqueue and stuff that are used to defer the destruction routine
  255  * of rtentries.
  256  */
  257 static struct {
  258         struct workqueue        *wq;
  259         struct work             wk;
  260         kmutex_t                lock;
  261         SLIST_HEAD(, rtentry)   queue;
  262         bool                    enqueued;
  263 } rt_free_global __cacheline_aligned;
  264 
  265 /* psref for rtentry */
  266 static struct psref_class *rt_psref_class __read_mostly;
  267 
  268 #ifdef RTFLUSH_DEBUG
  269 static int _rtcache_debug = 0;
  270 #endif /* RTFLUSH_DEBUG */
  271 
  272 static kauth_listener_t route_listener;
  273 
  274 static int rtdeletemsg(struct rtentry *);
  275 
  276 static void rt_maskedcopy(const struct sockaddr *,
  277     struct sockaddr *, const struct sockaddr *);
  278 
  279 static void rtcache_invalidate(void);
  280 
  281 static void rt_ref(struct rtentry *);
  282 
  283 static struct rtentry *
  284     rtalloc1_locked(const struct sockaddr *, int, bool, bool);
  285 
  286 static struct ifaddr *rt_getifa(struct rt_addrinfo *, struct psref *);
  287 static struct ifnet *rt_getifp(struct rt_addrinfo *, struct psref *);
  288 static struct ifaddr *ifa_ifwithroute_psref(int, const struct sockaddr *,
  289     const struct sockaddr *, struct psref *);
  290 
  291 static void rtcache_ref(struct rtentry *, struct route *);
  292 
  293 #ifdef NET_MPSAFE
  294 static void rt_update_wait(void);
  295 #endif
  296 
  297 static bool rt_wait_ok(void);
  298 static void rt_wait_refcnt(const char *, struct rtentry *, int);
  299 static void rt_wait_psref(struct rtentry *);
  300 
  301 #ifdef DDB
  302 static void db_print_sa(const struct sockaddr *);
  303 static void db_print_ifa(struct ifaddr *);
  304 static int db_show_rtentry(struct rtentry *, void *);
  305 #endif
  306 
  307 #ifdef RTFLUSH_DEBUG
  308 static void sysctl_net_rtcache_setup(struct sysctllog **);
  309 static void
  310 sysctl_net_rtcache_setup(struct sysctllog **clog)
  311 {
  312         const struct sysctlnode *rnode;
  313 
  314         if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
  315             CTLTYPE_NODE,
  316             "rtcache", SYSCTL_DESCR("Route cache related settings"),
  317             NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
  318                 return;
  319         if (sysctl_createv(clog, 0, &rnode, &rnode,
  320             CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
  321             "debug", SYSCTL_DESCR("Debug route caches"),
  322             NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
  323                 return;
  324 }
  325 #endif /* RTFLUSH_DEBUG */
  326 
  327 static inline void
  328 rt_destroy(struct rtentry *rt)
  329 {
  330         if (rt->_rt_key != NULL)
  331                 sockaddr_free(rt->_rt_key);
  332         if (rt->rt_gateway != NULL)
  333                 sockaddr_free(rt->rt_gateway);
  334         if (rt_gettag(rt) != NULL)
  335                 sockaddr_free(rt_gettag(rt));
  336         rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
  337 }
  338 
  339 static inline const struct sockaddr *
  340 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
  341 {
  342         if (rt->_rt_key == key)
  343                 goto out;
  344 
  345         if (rt->_rt_key != NULL)
  346                 sockaddr_free(rt->_rt_key);
  347         rt->_rt_key = sockaddr_dup(key, flags);
  348 out:
  349         rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
  350         return rt->_rt_key;
  351 }
  352 
  353 struct ifaddr *
  354 rt_get_ifa(struct rtentry *rt)
  355 {
  356         struct ifaddr *ifa;
  357 
  358         ifa = rt->rt_ifa;
  359         if (ifa->ifa_getifa == NULL)
  360                 return ifa;
  361 #if 0
  362         else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
  363                 return ifa;
  364 #endif
  365         else {
  366                 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
  367                 if (ifa == NULL)
  368                         return NULL;
  369                 rt_replace_ifa(rt, ifa);
  370                 return ifa;
  371         }
  372 }
  373 
  374 static void
  375 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
  376 {
  377         rt->rt_ifa = ifa;
  378         if (ifa->ifa_seqno != NULL)
  379                 rt->rt_ifa_seqno = *ifa->ifa_seqno;
  380 }
  381 
  382 /*
  383  * Is this route the connected route for the ifa?
  384  */
  385 static int
  386 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
  387 {
  388         const struct sockaddr *key, *dst, *odst;
  389         struct sockaddr_storage maskeddst;
  390 
  391         key = rt_getkey(rt);
  392         dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
  393         if (dst == NULL ||
  394             dst->sa_family != key->sa_family ||
  395             dst->sa_len != key->sa_len)
  396                 return 0;
  397         if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
  398                 odst = dst;
  399                 dst = (struct sockaddr *)&maskeddst;
  400                 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
  401                     ifa->ifa_netmask);
  402         }
  403         return (memcmp(dst, key, dst->sa_len) == 0);
  404 }
  405 
  406 void
  407 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
  408 {
  409         struct ifaddr *old;
  410 
  411         if (rt->rt_ifa == ifa)
  412                 return;
  413 
  414         if (rt->rt_ifa != ifa &&
  415             rt->rt_ifa->ifa_flags & IFA_ROUTE &&
  416             rt_ifa_connected(rt, rt->rt_ifa))
  417         {
  418                 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
  419                     "replace deleted IFA_ROUTE\n",
  420                     (void *)rt->_rt_key, (void *)rt->rt_ifa);
  421                 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
  422                 if (rt_ifa_connected(rt, ifa)) {
  423                         RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
  424                             "replace added IFA_ROUTE\n",
  425                             (void *)rt->_rt_key, (void *)ifa);
  426                         ifa->ifa_flags |= IFA_ROUTE;
  427                 }
  428         }
  429 
  430         ifaref(ifa);
  431         old = rt->rt_ifa;
  432         rt_set_ifa1(rt, ifa);
  433         ifafree(old);
  434 }
  435 
  436 static void
  437 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
  438 {
  439         ifaref(ifa);
  440         rt_set_ifa1(rt, ifa);
  441 }
  442 
  443 static int
  444 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
  445     void *arg0, void *arg1, void *arg2, void *arg3)
  446 {
  447         struct rt_msghdr *rtm;
  448         int result;
  449 
  450         result = KAUTH_RESULT_DEFER;
  451         rtm = arg1;
  452 
  453         if (action != KAUTH_NETWORK_ROUTE)
  454                 return result;
  455 
  456         if (rtm->rtm_type == RTM_GET)
  457                 result = KAUTH_RESULT_ALLOW;
  458 
  459         return result;
  460 }
  461 
  462 static void rt_free_work(struct work *, void *);
  463 
  464 void
  465 rt_init(void)
  466 {
  467         int error;
  468 
  469 #ifdef RTFLUSH_DEBUG
  470         sysctl_net_rtcache_setup(NULL);
  471 #endif
  472 
  473         mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
  474         SLIST_INIT(&rt_free_global.queue);
  475         rt_free_global.enqueued = false;
  476 
  477         rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET);
  478 
  479         error = workqueue_create(&rt_free_global.wq, "rt_free",
  480             rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
  481         if (error)
  482                 panic("%s: workqueue_create failed (%d)\n", __func__, error);
  483 
  484         mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
  485         cv_init(&rt_update_global.cv, "rt_update");
  486 
  487         pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
  488             NULL, IPL_SOFTNET);
  489         pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
  490             NULL, IPL_SOFTNET);
  491 
  492         rn_init();      /* initialize all zeroes, all ones, mask table */
  493         rtbl_init();
  494 
  495         route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
  496             route_listener_cb, NULL);
  497 }
  498 
  499 static void
  500 rtcache_invalidate(void)
  501 {
  502 
  503         RT_ASSERT_WLOCK();
  504 
  505         if (rtcache_debug())
  506                 printf("%s: enter\n", __func__);
  507 
  508         rtcache_generation++;
  509 }
  510 
  511 #ifdef RT_DEBUG
  512 static void
  513 dump_rt(const struct rtentry *rt)
  514 {
  515         char buf[512];
  516 
  517         log(LOG_DEBUG, "rt: ");
  518         log(LOG_DEBUG, "p=%p ", rt);
  519         if (rt->_rt_key == NULL) {
  520                 log(LOG_DEBUG, "dst=(NULL) ");
  521         } else {
  522                 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
  523                 log(LOG_DEBUG, "dst=%s ", buf);
  524         }
  525         if (rt->rt_gateway == NULL) {
  526                 log(LOG_DEBUG, "gw=(NULL) ");
  527         } else {
  528                 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
  529                 log(LOG_DEBUG, "gw=%s ", buf);
  530         }
  531         log(LOG_DEBUG, "flags=%x ", rt->rt_flags);
  532         if (rt->rt_ifp == NULL) {
  533                 log(LOG_DEBUG, "if=(NULL) ");
  534         } else {
  535                 log(LOG_DEBUG, "if=%s ", rt->rt_ifp->if_xname);
  536         }
  537         log(LOG_DEBUG, "\n");
  538 }
  539 #endif /* RT_DEBUG */
  540 
  541 /*
  542  * Packet routing routines. If success, refcnt of a returned rtentry
  543  * will be incremented. The caller has to rtfree it by itself.
  544  */
  545 struct rtentry *
  546 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok,
  547     bool wlock)
  548 {
  549         rtbl_t *rtbl;
  550         struct rtentry *rt;
  551         int s;
  552 
  553 #ifdef NET_MPSAFE
  554 retry:
  555 #endif
  556         s = splsoftnet();
  557         rtbl = rt_gettable(dst->sa_family);
  558         if (rtbl == NULL)
  559                 goto miss;
  560 
  561         rt = rt_matchaddr(rtbl, dst);
  562         if (rt == NULL)
  563                 goto miss;
  564 
  565         if (!ISSET(rt->rt_flags, RTF_UP))
  566                 goto miss;
  567 
  568 #ifdef NET_MPSAFE
  569         if (ISSET(rt->rt_flags, RTF_UPDATING) &&
  570             /* XXX updater should be always able to acquire */
  571             curlwp != rt_update_global.lwp) {
  572                 if (!wait_ok || !rt_wait_ok())
  573                         goto miss;
  574                 RT_UNLOCK();
  575                 splx(s);
  576 
  577                 /* We can wait until the update is complete */
  578                 rt_update_wait();
  579 
  580                 if (wlock)
  581                         RT_WLOCK();
  582                 else
  583                         RT_RLOCK();
  584                 goto retry;
  585         }
  586 #endif /* NET_MPSAFE */
  587 
  588         rt_ref(rt);
  589         RT_REFCNT_TRACE(rt);
  590 
  591         splx(s);
  592         return rt;
  593 miss:
  594         rtstat.rts_unreach++;
  595         if (report) {
  596                 struct rt_addrinfo info;
  597 
  598                 memset(&info, 0, sizeof(info));
  599                 info.rti_info[RTAX_DST] = dst;
  600                 rt_missmsg(RTM_MISS, &info, 0, 0);
  601         }
  602         splx(s);
  603         return NULL;
  604 }
  605 
  606 struct rtentry *
  607 rtalloc1(const struct sockaddr *dst, int report)
  608 {
  609         struct rtentry *rt;
  610 
  611         RT_RLOCK();
  612         rt = rtalloc1_locked(dst, report, true, false);
  613         RT_UNLOCK();
  614 
  615         return rt;
  616 }
  617 
  618 static void
  619 rt_ref(struct rtentry *rt)
  620 {
  621 
  622         KASSERTMSG(rt->rt_refcnt >= 0, "rt_refcnt=%d", rt->rt_refcnt);
  623         atomic_inc_uint(&rt->rt_refcnt);
  624 }
  625 
  626 void
  627 rt_unref(struct rtentry *rt)
  628 {
  629 
  630         KASSERT(rt != NULL);
  631         KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt);
  632 
  633         atomic_dec_uint(&rt->rt_refcnt);
  634         if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) {
  635                 mutex_enter(&rt_free_global.lock);
  636                 cv_broadcast(&rt->rt_cv);
  637                 mutex_exit(&rt_free_global.lock);
  638         }
  639 }
  640 
  641 static bool
  642 rt_wait_ok(void)
  643 {
  644 
  645         /*
  646          * This originally returned !cpu_softintr_p(), but that doesn't
  647          * work: the caller may hold a lock (probably softnet lock)
  648          * that a softint is waiting for, in which case waiting here
  649          * would cause a deadlock.  See https://gnats.netbsd.org/56844
  650          * for details.  For now, until the locking paths are sorted
  651          * out, we just disable the waiting option altogether and
  652          * always defer to workqueue.
  653          */
  654         KASSERT(!cpu_intr_p());
  655         return false;
  656 }
  657 
  658 void
  659 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt)
  660 {
  661         mutex_enter(&rt_free_global.lock);
  662         while (rt->rt_refcnt > cnt) {
  663                 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n",
  664                     __func__, title, rt->rt_refcnt);
  665                 cv_wait(&rt->rt_cv, &rt_free_global.lock);
  666                 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n",
  667                     __func__, title, rt->rt_refcnt);
  668         }
  669         mutex_exit(&rt_free_global.lock);
  670 }
  671 
  672 void
  673 rt_wait_psref(struct rtentry *rt)
  674 {
  675 
  676         psref_target_destroy(&rt->rt_psref, rt_psref_class);
  677         psref_target_init(&rt->rt_psref, rt_psref_class);
  678 }
  679 
  680 static void
  681 _rt_free(struct rtentry *rt)
  682 {
  683         struct ifaddr *ifa;
  684 
  685         /*
  686          * Need to avoid a deadlock on rt_wait_refcnt of update
  687          * and a conflict on psref_target_destroy of update.
  688          */
  689 #ifdef NET_MPSAFE
  690         rt_update_wait();
  691 #endif
  692 
  693         RT_REFCNT_TRACE(rt);
  694         KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt);
  695         rt_wait_refcnt("free", rt, 0);
  696 #ifdef NET_MPSAFE
  697         psref_target_destroy(&rt->rt_psref, rt_psref_class);
  698 #endif
  699 
  700         rt_assert_inactive(rt);
  701         rttrash--;
  702         ifa = rt->rt_ifa;
  703         rt->rt_ifa = NULL;
  704         ifafree(ifa);
  705         rt->rt_ifp = NULL;
  706         cv_destroy(&rt->rt_cv);
  707         rt_destroy(rt);
  708         pool_put(&rtentry_pool, rt);
  709 }
  710 
  711 static void
  712 rt_free_work(struct work *wk, void *arg)
  713 {
  714 
  715         for (;;) {
  716                 struct rtentry *rt;
  717 
  718                 mutex_enter(&rt_free_global.lock);
  719                 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) {
  720                         rt_free_global.enqueued = false;
  721                         mutex_exit(&rt_free_global.lock);
  722                         return;
  723                 }
  724                 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free);
  725                 mutex_exit(&rt_free_global.lock);
  726                 atomic_dec_uint(&rt->rt_refcnt);
  727                 _rt_free(rt);
  728         }
  729 }
  730 
  731 void
  732 rt_free(struct rtentry *rt)
  733 {
  734 
  735         KASSERTMSG(rt->rt_refcnt > 0, "rt_refcnt=%d", rt->rt_refcnt);
  736         if (rt_wait_ok()) {
  737                 atomic_dec_uint(&rt->rt_refcnt);
  738                 _rt_free(rt);
  739                 return;
  740         }
  741 
  742         mutex_enter(&rt_free_global.lock);
  743         /* No need to add a reference here. */
  744         SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free);
  745         if (!rt_free_global.enqueued) {
  746                 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL);
  747                 rt_free_global.enqueued = true;
  748         }
  749         mutex_exit(&rt_free_global.lock);
  750 }
  751 
  752 #ifdef NET_MPSAFE
  753 static void
  754 rt_update_wait(void)
  755 {
  756 
  757         mutex_enter(&rt_update_global.lock);
  758         while (rt_update_global.ongoing) {
  759                 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp);
  760                 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
  761                 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp);
  762         }
  763         mutex_exit(&rt_update_global.lock);
  764 }
  765 #endif
  766 
  767 int
  768 rt_update_prepare(struct rtentry *rt)
  769 {
  770 
  771         dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp);
  772 
  773         RT_WLOCK();
  774         /* If the entry is being destroyed, don't proceed the update. */
  775         if (!ISSET(rt->rt_flags, RTF_UP)) {
  776                 RT_UNLOCK();
  777                 return ESRCH;
  778         }
  779         rt->rt_flags |= RTF_UPDATING;
  780         RT_UNLOCK();
  781 
  782         mutex_enter(&rt_update_global.lock);
  783         while (rt_update_global.ongoing) {
  784                 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n",
  785                     __func__, rt, curlwp);
  786                 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
  787                 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n",
  788                     __func__, rt, curlwp);
  789         }
  790         rt_update_global.ongoing = true;
  791         /* XXX need it to avoid rt_update_wait by updater itself. */
  792         rt_update_global.lwp = curlwp;
  793         mutex_exit(&rt_update_global.lock);
  794 
  795         rt_wait_refcnt("update", rt, 1);
  796         rt_wait_psref(rt);
  797 
  798         return 0;
  799 }
  800 
  801 void
  802 rt_update_finish(struct rtentry *rt)
  803 {
  804 
  805         RT_WLOCK();
  806         rt->rt_flags &= ~RTF_UPDATING;
  807         RT_UNLOCK();
  808 
  809         mutex_enter(&rt_update_global.lock);
  810         rt_update_global.ongoing = false;
  811         rt_update_global.lwp = NULL;
  812         cv_broadcast(&rt_update_global.cv);
  813         mutex_exit(&rt_update_global.lock);
  814 
  815         dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp);
  816 }
  817 
  818 /*
  819  * Force a routing table entry to the specified
  820  * destination to go through the given gateway.
  821  * Normally called as a result of a routing redirect
  822  * message from the network layer.
  823  *
  824  * N.B.: must be called at splsoftnet
  825  */
  826 void
  827 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
  828         const struct sockaddr *netmask, int flags, const struct sockaddr *src,
  829         struct rtentry **rtp)
  830 {
  831         struct rtentry *rt;
  832         int error = 0;
  833         uint64_t *stat = NULL;
  834         struct rt_addrinfo info;
  835         struct ifaddr *ifa;
  836         struct psref psref;
  837 
  838         /* verify the gateway is directly reachable */
  839         if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
  840                 error = ENETUNREACH;
  841                 goto out;
  842         }
  843         rt = rtalloc1(dst, 0);
  844         /*
  845          * If the redirect isn't from our current router for this dst,
  846          * it's either old or wrong.  If it redirects us to ourselves,
  847          * we have a routing loop, perhaps as a result of an interface
  848          * going down recently.
  849          */
  850         if (!(flags & RTF_DONE) && rt &&
  851              (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
  852                 error = EINVAL;
  853         else {
  854                 int s = pserialize_read_enter();
  855                 struct ifaddr *_ifa;
  856 
  857                 _ifa = ifa_ifwithaddr(gateway);
  858                 if (_ifa != NULL)
  859                         error = EHOSTUNREACH;
  860                 pserialize_read_exit(s);
  861         }
  862         if (error)
  863                 goto done;
  864         /*
  865          * Create a new entry if we just got back a wildcard entry
  866          * or the lookup failed.  This is necessary for hosts
  867          * which use routing redirects generated by smart gateways
  868          * to dynamically build the routing tables.
  869          */
  870         if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
  871                 goto create;
  872         /*
  873          * Don't listen to the redirect if it's
  874          * for a route to an interface.
  875          */
  876         if (rt->rt_flags & RTF_GATEWAY) {
  877                 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
  878                         /*
  879                          * Changing from route to net => route to host.
  880                          * Create new route, rather than smashing route to net.
  881                          */
  882                 create:
  883                         if (rt != NULL)
  884                                 rt_unref(rt);
  885                         flags |=  RTF_GATEWAY | RTF_DYNAMIC;
  886                         memset(&info, 0, sizeof(info));
  887                         info.rti_info[RTAX_DST] = dst;
  888                         info.rti_info[RTAX_GATEWAY] = gateway;
  889                         info.rti_info[RTAX_NETMASK] = netmask;
  890                         info.rti_ifa = ifa;
  891                         info.rti_flags = flags;
  892                         rt = NULL;
  893                         error = rtrequest1(RTM_ADD, &info, &rt);
  894                         if (rt != NULL)
  895                                 flags = rt->rt_flags;
  896                         if (error == 0)
  897                                 rt_newmsg_dynamic(RTM_ADD, rt);
  898                         stat = &rtstat.rts_dynamic;
  899                 } else {
  900                         /*
  901                          * Smash the current notion of the gateway to
  902                          * this destination.  Should check about netmask!!!
  903                          */
  904 #ifdef NET_MPSAFE
  905                         KASSERT(!cpu_softintr_p());
  906 
  907                         error = rt_update_prepare(rt);
  908                         if (error == 0) {
  909 #endif
  910                                 RT_WLOCK();
  911                                 error = rt_setgate(rt, gateway);
  912                                 if (error == 0) {
  913                                         rt->rt_flags |= RTF_MODIFIED;
  914                                         flags |= RTF_MODIFIED;
  915                                 }
  916                                 RT_UNLOCK();
  917 #ifdef NET_MPSAFE
  918                                 rt_update_finish(rt);
  919                         } else {
  920                                 /*
  921                                  * If error != 0, the rtentry is being
  922                                  * destroyed, so doing nothing doesn't
  923                                  * matter.
  924                                  */
  925                         }
  926 #endif
  927                         stat = &rtstat.rts_newgateway;
  928                 }
  929         } else
  930                 error = EHOSTUNREACH;
  931 done:
  932         if (rt) {
  933                 if (rtp != NULL && !error)
  934                         *rtp = rt;
  935                 else
  936                         rt_unref(rt);
  937         }
  938 out:
  939         if (error)
  940                 rtstat.rts_badredirect++;
  941         else if (stat != NULL)
  942                 (*stat)++;
  943         memset(&info, 0, sizeof(info));
  944         info.rti_info[RTAX_DST] = dst;
  945         info.rti_info[RTAX_GATEWAY] = gateway;
  946         info.rti_info[RTAX_NETMASK] = netmask;
  947         info.rti_info[RTAX_AUTHOR] = src;
  948         rt_missmsg(RTM_REDIRECT, &info, flags, error);
  949         ifa_release(ifa, &psref);
  950 }
  951 
  952 /*
  953  * Delete a route and generate a message.
  954  * It doesn't free a passed rt.
  955  */
  956 static int
  957 rtdeletemsg(struct rtentry *rt)
  958 {
  959         int error;
  960         struct rt_addrinfo info;
  961         struct rtentry *retrt;
  962 
  963         /*
  964          * Request the new route so that the entry is not actually
  965          * deleted.  That will allow the information being reported to
  966          * be accurate (and consistent with route_output()).
  967          */
  968         memset(&info, 0, sizeof(info));
  969         info.rti_info[RTAX_DST] = rt_getkey(rt);
  970         info.rti_info[RTAX_NETMASK] = rt_mask(rt);
  971         info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
  972         info.rti_flags = rt->rt_flags;
  973         error = rtrequest1(RTM_DELETE, &info, &retrt);
  974 
  975         rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
  976 
  977         return error;
  978 }
  979 
  980 static struct ifaddr *
  981 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
  982     const struct sockaddr *gateway, struct psref *psref)
  983 {
  984         struct ifaddr *ifa = NULL;
  985 
  986         if ((flags & RTF_GATEWAY) == 0) {
  987                 /*
  988                  * If we are adding a route to an interface,
  989                  * and the interface is a pt to pt link
  990                  * we should search for the destination
  991                  * as our clue to the interface.  Otherwise
  992                  * we can use the local address.
  993                  */
  994                 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
  995                         ifa = ifa_ifwithdstaddr_psref(dst, psref);
  996                 if (ifa == NULL)
  997                         ifa = ifa_ifwithaddr_psref(gateway, psref);
  998         } else {
  999                 /*
 1000                  * If we are adding a route to a remote net
 1001                  * or host, the gateway may still be on the
 1002                  * other end of a pt to pt link.
 1003                  */
 1004                 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
 1005         }
 1006         if (ifa == NULL)
 1007                 ifa = ifa_ifwithnet_psref(gateway, psref);
 1008         if (ifa == NULL) {
 1009                 int s;
 1010                 struct rtentry *rt;
 1011 
 1012                 rt = rtalloc1_locked(gateway, 0, true, true);
 1013                 if (rt == NULL)
 1014                         return NULL;
 1015                 if (rt->rt_flags & RTF_GATEWAY) {
 1016                         rt_unref(rt);
 1017                         return NULL;
 1018                 }
 1019                 /*
 1020                  * Just in case. May not need to do this workaround.
 1021                  * Revisit when working on rtentry MP-ification.
 1022                  */
 1023                 s = pserialize_read_enter();
 1024                 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
 1025                         if (ifa == rt->rt_ifa)
 1026                                 break;
 1027                 }
 1028                 if (ifa != NULL)
 1029                         ifa_acquire(ifa, psref);
 1030                 pserialize_read_exit(s);
 1031                 rt_unref(rt);
 1032                 if (ifa == NULL)
 1033                         return NULL;
 1034         }
 1035         if (ifa->ifa_addr->sa_family != dst->sa_family) {
 1036                 struct ifaddr *nifa;
 1037                 int s;
 1038 
 1039                 s = pserialize_read_enter();
 1040                 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
 1041                 if (nifa != NULL) {
 1042                         ifa_release(ifa, psref);
 1043                         ifa_acquire(nifa, psref);
 1044                         ifa = nifa;
 1045                 }
 1046                 pserialize_read_exit(s);
 1047         }
 1048         return ifa;
 1049 }
 1050 
 1051 /*
 1052  * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
 1053  * The caller has to rtfree it by itself.
 1054  */
 1055 int
 1056 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
 1057         const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
 1058 {
 1059         struct rt_addrinfo info;
 1060 
 1061         memset(&info, 0, sizeof(info));
 1062         info.rti_flags = flags;
 1063         info.rti_info[RTAX_DST] = dst;
 1064         info.rti_info[RTAX_GATEWAY] = gateway;
 1065         info.rti_info[RTAX_NETMASK] = netmask;
 1066         return rtrequest1(req, &info, ret_nrt);
 1067 }
 1068 
 1069 static struct ifnet *
 1070 rt_getifp(struct rt_addrinfo *info, struct psref *psref)
 1071 {
 1072         const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
 1073 
 1074         if (info->rti_ifp != NULL)
 1075                 return NULL;
 1076         /*
 1077          * ifp may be specified by sockaddr_dl when protocol address
 1078          * is ambiguous
 1079          */
 1080         if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
 1081                 struct ifaddr *ifa;
 1082                 int s = pserialize_read_enter();
 1083 
 1084                 ifa = ifa_ifwithnet(ifpaddr);
 1085                 if (ifa != NULL)
 1086                         info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
 1087                             psref);
 1088                 pserialize_read_exit(s);
 1089         }
 1090 
 1091         return info->rti_ifp;
 1092 }
 1093 
 1094 static struct ifaddr *
 1095 rt_getifa(struct rt_addrinfo *info, struct psref *psref)
 1096 {
 1097         struct ifaddr *ifa = NULL;
 1098         const struct sockaddr *dst = info->rti_info[RTAX_DST];
 1099         const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
 1100         const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
 1101         int flags = info->rti_flags;
 1102         const struct sockaddr *sa;
 1103 
 1104         if (info->rti_ifa == NULL && ifaaddr != NULL) {
 1105                 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
 1106                 if (ifa != NULL)
 1107                         goto got;
 1108         }
 1109 
 1110         sa = ifaaddr != NULL ? ifaaddr :
 1111             (gateway != NULL ? gateway : dst);
 1112         if (sa != NULL && info->rti_ifp != NULL)
 1113                 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
 1114         else if (dst != NULL && gateway != NULL)
 1115                 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
 1116         else if (sa != NULL)
 1117                 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
 1118         if (ifa == NULL)
 1119                 return NULL;
 1120 got:
 1121         if (ifa->ifa_getifa != NULL) {
 1122                 /* FIXME ifa_getifa is NOMPSAFE */
 1123                 ifa = (*ifa->ifa_getifa)(ifa, dst);
 1124                 if (ifa == NULL)
 1125                         return NULL;
 1126                 ifa_acquire(ifa, psref);
 1127         }
 1128         info->rti_ifa = ifa;
 1129         if (info->rti_ifp == NULL)
 1130                 info->rti_ifp = ifa->ifa_ifp;
 1131         return ifa;
 1132 }
 1133 
 1134 /*
 1135  * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
 1136  * The caller has to rtfree it by itself.
 1137  */
 1138 int
 1139 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
 1140 {
 1141         int s = splsoftnet(), ss;
 1142         int error = 0, rc;
 1143         struct rtentry *rt;
 1144         rtbl_t *rtbl;
 1145         struct ifaddr *ifa = NULL;
 1146         struct sockaddr_storage maskeddst;
 1147         const struct sockaddr *dst = info->rti_info[RTAX_DST];
 1148         const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
 1149         const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
 1150         int flags = info->rti_flags;
 1151         struct psref psref_ifp, psref_ifa;
 1152         int bound = 0;
 1153         struct ifnet *ifp = NULL;
 1154         bool need_to_release_ifa = true;
 1155         bool need_unlock = true;
 1156 #define senderr(x) { error = x ; goto bad; }
 1157 
 1158         RT_WLOCK();
 1159 
 1160         bound = curlwp_bind();
 1161         if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
 1162                 senderr(ESRCH);
 1163         if (flags & RTF_HOST)
 1164                 netmask = NULL;
 1165         switch (req) {
 1166         case RTM_DELETE:
 1167                 if (netmask) {
 1168                         rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
 1169                             netmask);
 1170                         dst = (struct sockaddr *)&maskeddst;
 1171                 }
 1172                 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
 1173                         senderr(ESRCH);
 1174                 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
 1175                         senderr(ESRCH);
 1176                 rt->rt_flags &= ~RTF_UP;
 1177                 ifa = rt->rt_ifa;
 1178                 if (ifa->ifa_flags & IFA_ROUTE &&
 1179                     rt_ifa_connected(rt, ifa)) {
 1180                         RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
 1181                             "deleted IFA_ROUTE\n",
 1182                             (void *)rt->_rt_key, (void *)ifa);
 1183                         ifa->ifa_flags &= ~IFA_ROUTE;
 1184                 }
 1185                 if (ifa->ifa_rtrequest)
 1186                         ifa->ifa_rtrequest(RTM_DELETE, rt, info);
 1187                 ifa = NULL;
 1188                 rttrash++;
 1189                 if (ret_nrt) {
 1190                         *ret_nrt = rt;
 1191                         rt_ref(rt);
 1192                         RT_REFCNT_TRACE(rt);
 1193                 }
 1194                 rtcache_invalidate();
 1195                 RT_UNLOCK();
 1196                 need_unlock = false;
 1197                 rt_timer_remove_all(rt);
 1198 #if defined(INET) || defined(INET6)
 1199                 if (netmask != NULL)
 1200                         lltable_prefix_free(dst->sa_family, dst, netmask, 0);
 1201 #endif
 1202                 if (ret_nrt == NULL) {
 1203                         /* Adjust the refcount */
 1204                         rt_ref(rt);
 1205                         RT_REFCNT_TRACE(rt);
 1206                         rt_free(rt);
 1207                 }
 1208                 break;
 1209 
 1210         case RTM_ADD:
 1211                 if (info->rti_ifa == NULL) {
 1212                         ifp = rt_getifp(info, &psref_ifp);
 1213                         ifa = rt_getifa(info, &psref_ifa);
 1214                         if (ifa == NULL)
 1215                                 senderr(ENETUNREACH);
 1216                 } else {
 1217                         /* Caller should have a reference of ifa */
 1218                         ifa = info->rti_ifa;
 1219                         need_to_release_ifa = false;
 1220                 }
 1221                 rt = pool_get(&rtentry_pool, PR_NOWAIT);
 1222                 if (rt == NULL)
 1223                         senderr(ENOBUFS);
 1224                 memset(rt, 0, sizeof(*rt));
 1225                 rt->rt_flags = RTF_UP | (flags & ~RTF_DONTCHANGEIFA);
 1226                 LIST_INIT(&rt->rt_timer);
 1227 
 1228                 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
 1229                 if (netmask) {
 1230                         rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
 1231                             netmask);
 1232                         rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
 1233                 } else {
 1234                         rt_setkey(rt, dst, M_NOWAIT);
 1235                 }
 1236                 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
 1237                 if (rt_getkey(rt) == NULL ||
 1238                     rt_setgate(rt, gateway) != 0) {
 1239                         pool_put(&rtentry_pool, rt);
 1240                         senderr(ENOBUFS);
 1241                 }
 1242 
 1243                 rt_set_ifa(rt, ifa);
 1244                 if (info->rti_info[RTAX_TAG] != NULL) {
 1245                         const struct sockaddr *tag;
 1246                         tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
 1247                         if (tag == NULL)
 1248                                 senderr(ENOBUFS);
 1249                 }
 1250                 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
 1251 
 1252                 ss = pserialize_read_enter();
 1253                 if (info->rti_info[RTAX_IFP] != NULL) {
 1254                         struct ifaddr *ifa2;
 1255                         ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
 1256                         if (ifa2 != NULL)
 1257                                 rt->rt_ifp = ifa2->ifa_ifp;
 1258                         else
 1259                                 rt->rt_ifp = ifa->ifa_ifp;
 1260                 } else
 1261                         rt->rt_ifp = ifa->ifa_ifp;
 1262                 pserialize_read_exit(ss);
 1263                 cv_init(&rt->rt_cv, "rtentry");
 1264                 psref_target_init(&rt->rt_psref, rt_psref_class);
 1265 
 1266                 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
 1267                 rc = rt_addaddr(rtbl, rt, netmask);
 1268                 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
 1269                 if (rc != 0) {
 1270                         ifafree(ifa); /* for rt_set_ifa above */
 1271                         cv_destroy(&rt->rt_cv);
 1272                         rt_destroy(rt);
 1273                         pool_put(&rtentry_pool, rt);
 1274                         senderr(rc);
 1275                 }
 1276                 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
 1277                 if (ifa->ifa_rtrequest)
 1278                         ifa->ifa_rtrequest(req, rt, info);
 1279                 if (need_to_release_ifa)
 1280                         ifa_release(ifa, &psref_ifa);
 1281                 ifa = NULL;
 1282                 if_put(ifp, &psref_ifp);
 1283                 ifp = NULL;
 1284                 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
 1285                 if (ret_nrt) {
 1286                         *ret_nrt = rt;
 1287                         rt_ref(rt);
 1288                         RT_REFCNT_TRACE(rt);
 1289                 }
 1290                 rtcache_invalidate();
 1291                 RT_UNLOCK();
 1292                 need_unlock = false;
 1293                 break;
 1294         case RTM_GET:
 1295                 if (netmask != NULL) {
 1296                         rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
 1297                             netmask);
 1298                         dst = (struct sockaddr *)&maskeddst;
 1299                 }
 1300                 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
 1301                         senderr(ESRCH);
 1302                 if (ret_nrt != NULL) {
 1303                         *ret_nrt = rt;
 1304                         rt_ref(rt);
 1305                         RT_REFCNT_TRACE(rt);
 1306                 }
 1307                 break;
 1308         }
 1309 bad:
 1310         if (need_to_release_ifa)
 1311                 ifa_release(ifa, &psref_ifa);
 1312         if_put(ifp, &psref_ifp);
 1313         curlwp_bindx(bound);
 1314         if (need_unlock)
 1315                 RT_UNLOCK();
 1316         splx(s);
 1317         return error;
 1318 }
 1319 
 1320 int
 1321 rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
 1322 {
 1323         struct sockaddr *new, *old;
 1324 
 1325         KASSERT(RT_WLOCKED());
 1326         KASSERT(rt->_rt_key != NULL);
 1327         RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
 1328 
 1329         new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
 1330         if (new == NULL)
 1331                 return ENOMEM;
 1332 
 1333         old = rt->rt_gateway;
 1334         rt->rt_gateway = new;
 1335         if (old != NULL)
 1336                 sockaddr_free(old);
 1337 
 1338         KASSERT(rt->_rt_key != NULL);
 1339         RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
 1340 
 1341         if (rt->rt_flags & RTF_GATEWAY) {
 1342                 struct rtentry *gwrt;
 1343 
 1344                 gwrt = rtalloc1_locked(gate, 1, false, true);
 1345                 /*
 1346                  * If we switched gateways, grab the MTU from the new
 1347                  * gateway route if the current MTU, if the current MTU is
 1348                  * greater than the MTU of gateway.
 1349                  * Note that, if the MTU of gateway is 0, we will reset the
 1350                  * MTU of the route to run PMTUD again from scratch. XXX
 1351                  */
 1352                 if (gwrt != NULL) {
 1353                         KASSERT(gwrt->_rt_key != NULL);
 1354                         RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
 1355                         if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
 1356                             rt->rt_rmx.rmx_mtu &&
 1357                             rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
 1358                                 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
 1359                         }
 1360                         rt_unref(gwrt);
 1361                 }
 1362         }
 1363         KASSERT(rt->_rt_key != NULL);
 1364         RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
 1365         return 0;
 1366 }
 1367 
 1368 static struct ifaddr *
 1369 rt_update_get_ifa(const struct rt_addrinfo *info, const struct rtentry *rt,
 1370     struct ifnet **ifp, struct psref *psref_ifp, struct psref *psref)
 1371 {
 1372         struct ifaddr *ifa = NULL;
 1373 
 1374         *ifp = NULL;
 1375         if (info->rti_info[RTAX_IFP] != NULL) {
 1376                 ifa = ifa_ifwithnet_psref(info->rti_info[RTAX_IFP], psref);
 1377                 if (ifa == NULL)
 1378                         goto next;
 1379                 if (ifa->ifa_ifp->if_flags & IFF_UNNUMBERED) {
 1380                         ifa_release(ifa, psref);
 1381                         ifa = NULL;
 1382                         goto next;
 1383                 }
 1384                 *ifp = ifa->ifa_ifp;
 1385                 if_acquire(*ifp, psref_ifp);
 1386                 if (info->rti_info[RTAX_IFA] == NULL &&
 1387                     info->rti_info[RTAX_GATEWAY] == NULL)
 1388                         goto out;
 1389                 ifa_release(ifa, psref);
 1390                 if (info->rti_info[RTAX_IFA] == NULL) {
 1391                         /* route change <dst> <gw> -ifp <if> */
 1392                         ifa = ifaof_ifpforaddr_psref(
 1393                             info->rti_info[RTAX_GATEWAY], *ifp, psref);
 1394                 } else {
 1395                         /* route change <dst> -ifp <if> -ifa <addr> */
 1396                         ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA],
 1397                             psref);
 1398                         if (ifa != NULL)
 1399                                 goto out;
 1400                         ifa = ifaof_ifpforaddr_psref(info->rti_info[RTAX_IFA],
 1401                             *ifp, psref);
 1402                 }
 1403                 goto out;
 1404         }
 1405 next:
 1406         if (info->rti_info[RTAX_IFA] != NULL) {
 1407                 /* route change <dst> <gw> -ifa <addr> */
 1408                 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], psref);
 1409                 if (ifa != NULL)
 1410                         goto out;
 1411         }
 1412         if (info->rti_info[RTAX_GATEWAY] != NULL) {
 1413                 /* route change <dst> <gw> */
 1414                 ifa = ifa_ifwithroute_psref(rt->rt_flags, rt_getkey(rt),
 1415                     info->rti_info[RTAX_GATEWAY], psref);
 1416         }
 1417 out:
 1418         if (ifa != NULL && *ifp == NULL) {
 1419                 *ifp = ifa->ifa_ifp;
 1420                 if_acquire(*ifp, psref_ifp);
 1421         }
 1422         if (ifa == NULL && *ifp != NULL) {
 1423                 if_put(*ifp, psref_ifp);
 1424                 *ifp = NULL;
 1425         }
 1426         return ifa;
 1427 }
 1428 
 1429 int
 1430 rt_update(struct rtentry *rt, struct rt_addrinfo *info, void *rtm)
 1431 {
 1432         int error = 0;
 1433         struct ifnet *ifp = NULL, *new_ifp = NULL;
 1434         struct ifaddr *ifa = NULL, *new_ifa;
 1435         struct psref psref_ifa, psref_new_ifa, psref_ifp, psref_new_ifp;
 1436         bool newgw, ifp_changed = false;
 1437 
 1438         RT_WLOCK();
 1439         /*
 1440          * New gateway could require new ifaddr, ifp;
 1441          * flags may also be different; ifp may be specified
 1442          * by ll sockaddr when protocol address is ambiguous
 1443          */
 1444         newgw = info->rti_info[RTAX_GATEWAY] != NULL &&
 1445             sockaddr_cmp(info->rti_info[RTAX_GATEWAY], rt->rt_gateway) != 0;
 1446 
 1447         if (newgw || info->rti_info[RTAX_IFP] != NULL ||
 1448             info->rti_info[RTAX_IFA] != NULL) {
 1449                 ifp = rt_getifp(info, &psref_ifp);
 1450                 /* info refers ifp so we need to keep a reference */
 1451                 ifa = rt_getifa(info, &psref_ifa);
 1452                 if (ifa == NULL) {
 1453                         error = ENETUNREACH;
 1454                         goto out;
 1455                 }
 1456         }
 1457         if (newgw) {
 1458                 error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]);
 1459                 if (error != 0)
 1460                         goto out;
 1461         }
 1462         if (info->rti_info[RTAX_TAG]) {
 1463                 const struct sockaddr *tag;
 1464                 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
 1465                 if (tag == NULL) {
 1466                         error = ENOBUFS;
 1467                         goto out;
 1468                 }
 1469         }
 1470         /*
 1471          * New gateway could require new ifaddr, ifp;
 1472          * flags may also be different; ifp may be specified
 1473          * by ll sockaddr when protocol address is ambiguous
 1474          */
 1475         new_ifa = rt_update_get_ifa(info, rt, &new_ifp, &psref_new_ifp,
 1476             &psref_new_ifa);
 1477         if (new_ifa != NULL) {
 1478                 ifa_release(ifa, &psref_ifa);
 1479                 ifa = new_ifa;
 1480         }
 1481         if (ifa) {
 1482                 struct ifaddr *oifa = rt->rt_ifa;
 1483                 if (oifa != ifa && !ifa_is_destroying(ifa) &&
 1484                     new_ifp != NULL && !if_is_deactivated(new_ifp)) {
 1485                         if (oifa && oifa->ifa_rtrequest)
 1486                                 oifa->ifa_rtrequest(RTM_DELETE, rt, info);
 1487                         rt_replace_ifa(rt, ifa);
 1488                         rt->rt_ifp = new_ifp;
 1489                         ifp_changed = true;
 1490                 }
 1491                 if (new_ifa == NULL)
 1492                         ifa_release(ifa, &psref_ifa);
 1493                 /* To avoid ifa_release below */
 1494                 ifa = NULL;
 1495         }
 1496         ifa_release(new_ifa, &psref_new_ifa);
 1497         if (new_ifp && rt->rt_ifp != new_ifp && !if_is_deactivated(new_ifp)) {
 1498                 rt->rt_ifp = new_ifp;
 1499                 ifp_changed = true;
 1500         }
 1501         rt_setmetrics(rtm, rt);
 1502         if (rt->rt_flags != info->rti_flags) {
 1503                 rt->rt_flags = (info->rti_flags & ~PRESERVED_RTF) |
 1504                     (rt->rt_flags & PRESERVED_RTF);
 1505         }
 1506         if (rt->rt_ifa->ifa_rtrequest)
 1507                 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info);
 1508 #if defined(INET) || defined(INET6)
 1509         if (ifp_changed && rt_mask(rt) != NULL)
 1510                 lltable_prefix_free(rt_getkey(rt)->sa_family, rt_getkey(rt),
 1511                     rt_mask(rt), 0);
 1512 #else
 1513         (void)ifp_changed; /* XXX gcc */
 1514 #endif
 1515 out:
 1516         ifa_release(ifa, &psref_ifa);
 1517         if_put(new_ifp, &psref_new_ifp);
 1518         if_put(ifp, &psref_ifp);
 1519 
 1520         RT_UNLOCK();
 1521 
 1522         return error;
 1523 }
 1524 
 1525 static void
 1526 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
 1527         const struct sockaddr *netmask)
 1528 {
 1529         const char *netmaskp = &netmask->sa_data[0],
 1530                    *srcp = &src->sa_data[0];
 1531         char *dstp = &dst->sa_data[0];
 1532         const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
 1533         const char *srcend = (char *)dst + src->sa_len;
 1534 
 1535         dst->sa_len = src->sa_len;
 1536         dst->sa_family = src->sa_family;
 1537 
 1538         while (dstp < maskend)
 1539                 *dstp++ = *srcp++ & *netmaskp++;
 1540         if (dstp < srcend)
 1541                 memset(dstp, 0, (size_t)(srcend - dstp));
 1542 }
 1543 
 1544 /*
 1545  * Inform the routing socket of a route change.
 1546  */
 1547 void
 1548 rt_newmsg(const int cmd, const struct rtentry *rt)
 1549 {
 1550         struct rt_addrinfo info;
 1551 
 1552         memset((void *)&info, 0, sizeof(info));
 1553         info.rti_info[RTAX_DST] = rt_getkey(rt);
 1554         info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 1555         info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 1556         if (rt->rt_ifp) {
 1557                 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
 1558                 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 1559         }
 1560 
 1561         rt_missmsg(cmd, &info, rt->rt_flags, 0);
 1562 }
 1563 
 1564 /*
 1565  * Inform the routing socket of a route change for RTF_DYNAMIC.
 1566  */
 1567 void
 1568 rt_newmsg_dynamic(const int cmd, const struct rtentry *rt)
 1569 {
 1570         struct rt_addrinfo info;
 1571         struct sockaddr *gateway = rt->rt_gateway;
 1572 
 1573         if (gateway == NULL)
 1574                 return;
 1575 
 1576         switch(gateway->sa_family) {
 1577 #ifdef INET
 1578         case AF_INET: {
 1579                 extern bool icmp_dynamic_rt_msg;
 1580                 if (!icmp_dynamic_rt_msg)
 1581                         return;
 1582                 break;
 1583         }
 1584 #endif
 1585 #ifdef INET6
 1586         case AF_INET6: {
 1587                 extern bool icmp6_dynamic_rt_msg;
 1588                 if (!icmp6_dynamic_rt_msg)
 1589                         return;
 1590                 break;
 1591         }
 1592 #endif
 1593         default:
 1594                 return;
 1595         }
 1596 
 1597         memset((void *)&info, 0, sizeof(info));
 1598         info.rti_info[RTAX_DST] = rt_getkey(rt);
 1599         info.rti_info[RTAX_GATEWAY] = gateway;
 1600         info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 1601         if (rt->rt_ifp) {
 1602                 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
 1603                 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 1604         }
 1605 
 1606         rt_missmsg(cmd, &info, rt->rt_flags, 0);
 1607 }
 1608 
 1609 /*
 1610  * Set up or tear down a routing table entry, normally
 1611  * for an interface.
 1612  */
 1613 int
 1614 rtinit(struct ifaddr *ifa, int cmd, int flags)
 1615 {
 1616         struct rtentry *rt;
 1617         struct sockaddr *dst, *odst;
 1618         struct sockaddr_storage maskeddst;
 1619         struct rtentry *nrt = NULL;
 1620         int error;
 1621         struct rt_addrinfo info;
 1622 
 1623         dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
 1624         if (cmd == RTM_DELETE) {
 1625                 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
 1626                         /* Delete subnet route for this interface */
 1627                         odst = dst;
 1628                         dst = (struct sockaddr *)&maskeddst;
 1629                         rt_maskedcopy(odst, dst, ifa->ifa_netmask);
 1630                 }
 1631                 if ((rt = rtalloc1(dst, 0)) != NULL) {
 1632                         if (rt->rt_ifa != ifa) {
 1633                                 rt_unref(rt);
 1634                                 return (flags & RTF_HOST) ? EHOSTUNREACH
 1635                                                         : ENETUNREACH;
 1636                         }
 1637                         rt_unref(rt);
 1638                 }
 1639         }
 1640         memset(&info, 0, sizeof(info));
 1641         info.rti_ifa = ifa;
 1642         info.rti_flags = flags | ifa->ifa_flags | RTF_DONTCHANGEIFA;
 1643         info.rti_info[RTAX_DST] = dst;
 1644         info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
 1645 
 1646         /*
 1647          * XXX here, it seems that we are assuming that ifa_netmask is NULL
 1648          * for RTF_HOST.  bsdi4 passes NULL explicitly (via intermediate
 1649          * variable) when RTF_HOST is 1.  still not sure if i can safely
 1650          * change it to meet bsdi4 behavior.
 1651          */
 1652         if (cmd != RTM_LLINFO_UPD)
 1653                 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
 1654         error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
 1655             &nrt);
 1656         if (error != 0)
 1657                 return error;
 1658 
 1659         rt = nrt;
 1660         RT_REFCNT_TRACE(rt);
 1661         switch (cmd) {
 1662         case RTM_DELETE:
 1663                 rt_newmsg(cmd, rt);
 1664                 rt_free(rt);
 1665                 break;
 1666         case RTM_LLINFO_UPD:
 1667                 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
 1668                         ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
 1669                 rt_newmsg(RTM_CHANGE, rt);
 1670                 rt_unref(rt);
 1671                 break;
 1672         case RTM_ADD:
 1673                 KASSERT(rt->rt_ifa == ifa);
 1674                 rt_newmsg(cmd, rt);
 1675                 rt_unref(rt);
 1676                 RT_REFCNT_TRACE(rt);
 1677                 break;
 1678         }
 1679         return error;
 1680 }
 1681 
 1682 /*
 1683  * Create a local route entry for the address.
 1684  * Announce the addition of the address and the route to the routing socket.
 1685  */
 1686 int
 1687 rt_ifa_addlocal(struct ifaddr *ifa)
 1688 {
 1689         struct rtentry *rt;
 1690         int e;
 1691 
 1692         /* If there is no loopback entry, allocate one. */
 1693         rt = rtalloc1(ifa->ifa_addr, 0);
 1694 #ifdef RT_DEBUG
 1695         if (rt != NULL)
 1696                 dump_rt(rt);
 1697 #endif
 1698         if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
 1699             (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
 1700         {
 1701                 struct rt_addrinfo info;
 1702                 struct rtentry *nrt;
 1703 
 1704                 memset(&info, 0, sizeof(info));
 1705                 info.rti_flags = RTF_HOST | RTF_LOCAL | RTF_DONTCHANGEIFA;
 1706                 info.rti_info[RTAX_DST] = ifa->ifa_addr;
 1707                 info.rti_info[RTAX_GATEWAY] =
 1708                     (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
 1709                 info.rti_ifa = ifa;
 1710                 nrt = NULL;
 1711                 e = rtrequest1(RTM_ADD, &info, &nrt);
 1712                 rt_addrmsg_rt(RTM_ADD, ifa, e, nrt);
 1713                 if (nrt != NULL) {
 1714                         KASSERT(nrt->rt_ifa == ifa);
 1715 #ifdef RT_DEBUG
 1716                         dump_rt(nrt);
 1717 #endif
 1718                         rt_unref(nrt);
 1719                         RT_REFCNT_TRACE(nrt);
 1720                 }
 1721         } else {
 1722                 e = 0;
 1723                 rt_addrmsg(RTM_NEWADDR, ifa);
 1724         }
 1725         if (rt != NULL)
 1726                 rt_unref(rt);
 1727         return e;
 1728 }
 1729 
 1730 /*
 1731  * Remove the local route entry for the address.
 1732  * Announce the removal of the address and the route to the routing socket.
 1733  */
 1734 int
 1735 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
 1736 {
 1737         struct rtentry *rt;
 1738         int e = 0;
 1739 
 1740         rt = rtalloc1(ifa->ifa_addr, 0);
 1741 
 1742         /*
 1743          * Before deleting, check if a corresponding loopbacked
 1744          * host route surely exists.  With this check, we can avoid
 1745          * deleting an interface direct route whose destination is
 1746          * the same as the address being removed.  This can happen
 1747          * when removing a subnet-router anycast address on an
 1748          * interface attached to a shared medium.
 1749          */
 1750         if (rt != NULL &&
 1751             (rt->rt_flags & RTF_HOST) &&
 1752             (rt->rt_ifp->if_flags & IFF_LOOPBACK))
 1753         {
 1754                 /* If we cannot replace the route's ifaddr with the equivalent
 1755                  * ifaddr of another interface, I believe it is safest to
 1756                  * delete the route.
 1757                  */
 1758                 if (alt_ifa == NULL) {
 1759                         e = rtdeletemsg(rt);
 1760                         if (e == 0) {
 1761                                 rt_unref(rt);
 1762                                 rt_free(rt);
 1763                                 rt = NULL;
 1764                         }
 1765                         rt_addrmsg(RTM_DELADDR, ifa);
 1766                 } else {
 1767 #ifdef NET_MPSAFE
 1768                         int error = rt_update_prepare(rt);
 1769                         if (error == 0) {
 1770                                 rt_replace_ifa(rt, alt_ifa);
 1771                                 rt_update_finish(rt);
 1772                         } else {
 1773                                 /*
 1774                                  * If error != 0, the rtentry is being
 1775                                  * destroyed, so doing nothing doesn't
 1776                                  * matter.
 1777                                  */
 1778                         }
 1779 #else
 1780                         rt_replace_ifa(rt, alt_ifa);
 1781 #endif
 1782                         rt_newmsg(RTM_CHANGE, rt);
 1783                 }
 1784         } else
 1785                 rt_addrmsg(RTM_DELADDR, ifa);
 1786         if (rt != NULL)
 1787                 rt_unref(rt);
 1788         return e;
 1789 }
 1790 
 1791 /*
 1792  * Route timer routines.  These routes allow functions to be called
 1793  * for various routes at any time.  This is useful in supporting
 1794  * path MTU discovery and redirect route deletion.
 1795  *
 1796  * This is similar to some BSDI internal functions, but it provides
 1797  * for multiple queues for efficiency's sake...
 1798  */
 1799 
 1800 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
 1801 static int rt_init_done = 0;
 1802 
 1803 /*
 1804  * Some subtle order problems with domain initialization mean that
 1805  * we cannot count on this being run from rt_init before various
 1806  * protocol initializations are done.  Therefore, we make sure
 1807  * that this is run when the first queue is added...
 1808  */
 1809 
 1810 static void rt_timer_work(struct work *, void *);
 1811 
 1812 static void
 1813 rt_timer_init(void)
 1814 {
 1815         int error;
 1816 
 1817         assert(rt_init_done == 0);
 1818 
 1819         /* XXX should be in rt_init */
 1820         rw_init(&rt_lock);
 1821 
 1822         LIST_INIT(&rttimer_queue_head);
 1823         callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
 1824         error = workqueue_create(&rt_timer_wq, "rt_timer",
 1825             rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
 1826         if (error)
 1827                 panic("%s: workqueue_create failed (%d)\n", __func__, error);
 1828         callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
 1829         rt_init_done = 1;
 1830 }
 1831 
 1832 struct rttimer_queue *
 1833 rt_timer_queue_create(u_int timeout)
 1834 {
 1835         struct rttimer_queue *rtq;
 1836 
 1837         if (rt_init_done == 0)
 1838                 rt_timer_init();
 1839 
 1840         R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
 1841         if (rtq == NULL)
 1842                 return NULL;
 1843         memset(rtq, 0, sizeof(*rtq));
 1844 
 1845         rtq->rtq_timeout = timeout;
 1846         TAILQ_INIT(&rtq->rtq_head);
 1847         RT_WLOCK();
 1848         LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
 1849         RT_UNLOCK();
 1850 
 1851         return rtq;
 1852 }
 1853 
 1854 void
 1855 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
 1856 {
 1857 
 1858         rtq->rtq_timeout = timeout;
 1859 }
 1860 
 1861 static void
 1862 rt_timer_queue_remove_all(struct rttimer_queue *rtq)
 1863 {
 1864         struct rttimer *r;
 1865 
 1866         RT_ASSERT_WLOCK();
 1867 
 1868         while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
 1869                 LIST_REMOVE(r, rtt_link);
 1870                 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
 1871                 rt_ref(r->rtt_rt); /* XXX */
 1872                 RT_REFCNT_TRACE(r->rtt_rt);
 1873                 RT_UNLOCK();
 1874                 (*r->rtt_func)(r->rtt_rt, r);
 1875                 pool_put(&rttimer_pool, r);
 1876                 RT_WLOCK();
 1877                 if (rtq->rtq_count > 0)
 1878                         rtq->rtq_count--;
 1879                 else
 1880                         printf("rt_timer_queue_remove_all: "
 1881                             "rtq_count reached 0\n");
 1882         }
 1883 }
 1884 
 1885 void
 1886 rt_timer_queue_destroy(struct rttimer_queue *rtq)
 1887 {
 1888 
 1889         RT_WLOCK();
 1890         rt_timer_queue_remove_all(rtq);
 1891         LIST_REMOVE(rtq, rtq_link);
 1892         RT_UNLOCK();
 1893 
 1894         /*
 1895          * Caller is responsible for freeing the rttimer_queue structure.
 1896          */
 1897 }
 1898 
 1899 unsigned long
 1900 rt_timer_count(struct rttimer_queue *rtq)
 1901 {
 1902         return rtq->rtq_count;
 1903 }
 1904 
 1905 static void
 1906 rt_timer_remove_all(struct rtentry *rt)
 1907 {
 1908         struct rttimer *r;
 1909 
 1910         RT_WLOCK();
 1911         while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
 1912                 LIST_REMOVE(r, rtt_link);
 1913                 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
 1914                 if (r->rtt_queue->rtq_count > 0)
 1915                         r->rtt_queue->rtq_count--;
 1916                 else
 1917                         printf("rt_timer_remove_all: rtq_count reached 0\n");
 1918                 pool_put(&rttimer_pool, r);
 1919         }
 1920         RT_UNLOCK();
 1921 }
 1922 
 1923 int
 1924 rt_timer_add(struct rtentry *rt,
 1925         void (*func)(struct rtentry *, struct rttimer *),
 1926         struct rttimer_queue *queue)
 1927 {
 1928         struct rttimer *r;
 1929 
 1930         KASSERT(func != NULL);
 1931         RT_WLOCK();
 1932         /*
 1933          * If there's already a timer with this action, destroy it before
 1934          * we add a new one.
 1935          */
 1936         LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
 1937                 if (r->rtt_func == func)
 1938                         break;
 1939         }
 1940         if (r != NULL) {
 1941                 LIST_REMOVE(r, rtt_link);
 1942                 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
 1943                 if (r->rtt_queue->rtq_count > 0)
 1944                         r->rtt_queue->rtq_count--;
 1945                 else
 1946                         printf("rt_timer_add: rtq_count reached 0\n");
 1947         } else {
 1948                 r = pool_get(&rttimer_pool, PR_NOWAIT);
 1949                 if (r == NULL) {
 1950                         RT_UNLOCK();
 1951                         return ENOBUFS;
 1952                 }
 1953         }
 1954 
 1955         memset(r, 0, sizeof(*r));
 1956 
 1957         r->rtt_rt = rt;
 1958         r->rtt_time = time_uptime;
 1959         r->rtt_func = func;
 1960         r->rtt_queue = queue;
 1961         LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
 1962         TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
 1963         r->rtt_queue->rtq_count++;
 1964 
 1965         RT_UNLOCK();
 1966 
 1967         return 0;
 1968 }
 1969 
 1970 static void
 1971 rt_timer_work(struct work *wk, void *arg)
 1972 {
 1973         struct rttimer_queue *rtq;
 1974         struct rttimer *r;
 1975 
 1976         RT_WLOCK();
 1977         LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
 1978                 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
 1979                     (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
 1980                         LIST_REMOVE(r, rtt_link);
 1981                         TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
 1982                         /*
 1983                          * Take a reference to avoid the rtentry is freed
 1984                          * accidentally after RT_UNLOCK.  The callback
 1985                          * (rtt_func) must rt_unref it by itself.
 1986                          */
 1987                         rt_ref(r->rtt_rt);
 1988                         RT_REFCNT_TRACE(r->rtt_rt);
 1989                         RT_UNLOCK();
 1990                         (*r->rtt_func)(r->rtt_rt, r);
 1991                         pool_put(&rttimer_pool, r);
 1992                         RT_WLOCK();
 1993                         if (rtq->rtq_count > 0)
 1994                                 rtq->rtq_count--;
 1995                         else
 1996                                 printf("rt_timer_timer: rtq_count reached 0\n");
 1997                 }
 1998         }
 1999         RT_UNLOCK();
 2000 
 2001         callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
 2002 }
 2003 
 2004 static void
 2005 rt_timer_timer(void *arg)
 2006 {
 2007 
 2008         workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
 2009 }
 2010 
 2011 static struct rtentry *
 2012 _rtcache_init(struct route *ro, int flag)
 2013 {
 2014         struct rtentry *rt;
 2015 
 2016         rtcache_invariants(ro);
 2017         KASSERT(ro->_ro_rt == NULL);
 2018 
 2019         if (rtcache_getdst(ro) == NULL)
 2020                 return NULL;
 2021         rt = rtalloc1(rtcache_getdst(ro), flag);
 2022         if (rt != NULL) {
 2023                 RT_RLOCK();
 2024                 if (ISSET(rt->rt_flags, RTF_UP)) {
 2025                         ro->_ro_rt = rt;
 2026                         ro->ro_rtcache_generation = rtcache_generation;
 2027                         rtcache_ref(rt, ro);
 2028                 }
 2029                 RT_UNLOCK();
 2030                 rt_unref(rt);
 2031         }
 2032 
 2033         rtcache_invariants(ro);
 2034         return ro->_ro_rt;
 2035 }
 2036 
 2037 struct rtentry *
 2038 rtcache_init(struct route *ro)
 2039 {
 2040 
 2041         return _rtcache_init(ro, 1);
 2042 }
 2043 
 2044 struct rtentry *
 2045 rtcache_init_noclone(struct route *ro)
 2046 {
 2047 
 2048         return _rtcache_init(ro, 0);
 2049 }
 2050 
 2051 struct rtentry *
 2052 rtcache_update(struct route *ro, int clone)
 2053 {
 2054 
 2055         ro->_ro_rt = NULL;
 2056         return _rtcache_init(ro, clone);
 2057 }
 2058 
 2059 void
 2060 rtcache_copy(struct route *new_ro, struct route *old_ro)
 2061 {
 2062         struct rtentry *rt;
 2063         int ret;
 2064 
 2065         KASSERT(new_ro != old_ro);
 2066         rtcache_invariants(new_ro);
 2067         rtcache_invariants(old_ro);
 2068 
 2069         rt = rtcache_validate(old_ro);
 2070 
 2071         if (rtcache_getdst(old_ro) == NULL)
 2072                 goto out;
 2073         ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro));
 2074         if (ret != 0)
 2075                 goto out;
 2076 
 2077         RT_RLOCK();
 2078         new_ro->_ro_rt = rt;
 2079         new_ro->ro_rtcache_generation = rtcache_generation;
 2080         RT_UNLOCK();
 2081         rtcache_invariants(new_ro);
 2082 out:
 2083         rtcache_unref(rt, old_ro);
 2084         return;
 2085 }
 2086 
 2087 #if defined(RT_DEBUG) && defined(NET_MPSAFE)
 2088 static void
 2089 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro)
 2090 {
 2091         char dst[64];
 2092 
 2093         sockaddr_format(ro->ro_sa, dst, 64);
 2094         printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst,
 2095             cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref);
 2096 }
 2097 #define RTCACHE_PSREF_TRACE(rt, ro)     rtcache_trace(__func__, (rt), (ro))
 2098 #else
 2099 #define RTCACHE_PSREF_TRACE(rt, ro)     do {} while (0)
 2100 #endif
 2101 
 2102 static void
 2103 rtcache_ref(struct rtentry *rt, struct route *ro)
 2104 {
 2105 
 2106         KASSERT(rt != NULL);
 2107 
 2108 #ifdef NET_MPSAFE
 2109         RTCACHE_PSREF_TRACE(rt, ro);
 2110         ro->ro_bound = curlwp_bind();
 2111         /* XXX Use a real caller's address */
 2112         PSREF_DEBUG_FILL_RETURN_ADDRESS(&ro->ro_psref);
 2113         psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
 2114 #endif
 2115 }
 2116 
 2117 void
 2118 rtcache_unref(struct rtentry *rt, struct route *ro)
 2119 {
 2120 
 2121         if (rt == NULL)
 2122                 return;
 2123 
 2124 #ifdef NET_MPSAFE
 2125         psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
 2126         curlwp_bindx(ro->ro_bound);
 2127         RTCACHE_PSREF_TRACE(rt, ro);
 2128 #endif
 2129 }
 2130 
 2131 struct rtentry *
 2132 rtcache_validate(struct route *ro)
 2133 {
 2134         struct rtentry *rt = NULL;
 2135 
 2136 #ifdef NET_MPSAFE
 2137 retry:
 2138 #endif
 2139         rtcache_invariants(ro);
 2140         RT_RLOCK();
 2141         if (ro->ro_rtcache_generation != rtcache_generation) {
 2142                 /* The cache is invalidated */
 2143                 rt = NULL;
 2144                 goto out;
 2145         }
 2146 
 2147         rt = ro->_ro_rt;
 2148         if (rt == NULL)
 2149                 goto out;
 2150 
 2151         if ((rt->rt_flags & RTF_UP) == 0) {
 2152                 rt = NULL;
 2153                 goto out;
 2154         }
 2155 #ifdef NET_MPSAFE
 2156         if (ISSET(rt->rt_flags, RTF_UPDATING)) {
 2157                 if (rt_wait_ok()) {
 2158                         RT_UNLOCK();
 2159 
 2160                         /* We can wait until the update is complete */
 2161                         rt_update_wait();
 2162                         goto retry;
 2163                 } else {
 2164                         rt = NULL;
 2165                 }
 2166         } else
 2167 #endif
 2168                 rtcache_ref(rt, ro);
 2169 out:
 2170         RT_UNLOCK();
 2171         return rt;
 2172 }
 2173 
 2174 struct rtentry *
 2175 rtcache_lookup2(struct route *ro, const struct sockaddr *dst,
 2176     int clone, int *hitp)
 2177 {
 2178         const struct sockaddr *odst;
 2179         struct rtentry *rt = NULL;
 2180 
 2181         odst = rtcache_getdst(ro);
 2182         if (odst == NULL)
 2183                 goto miss;
 2184 
 2185         if (sockaddr_cmp(odst, dst) != 0) {
 2186                 rtcache_free(ro);
 2187                 goto miss;
 2188         }
 2189 
 2190         rt = rtcache_validate(ro);
 2191         if (rt == NULL) {
 2192                 ro->_ro_rt = NULL;
 2193                 goto miss;
 2194         }
 2195 
 2196         rtcache_invariants(ro);
 2197 
 2198         if (hitp != NULL)
 2199                 *hitp = 1;
 2200         return rt;
 2201 miss:
 2202         if (hitp != NULL)
 2203                 *hitp = 0;
 2204         if (rtcache_setdst(ro, dst) == 0)
 2205                 rt = _rtcache_init(ro, clone);
 2206 
 2207         rtcache_invariants(ro);
 2208 
 2209         return rt;
 2210 }
 2211 
 2212 void
 2213 rtcache_free(struct route *ro)
 2214 {
 2215 
 2216         ro->_ro_rt = NULL;
 2217         if (ro->ro_sa != NULL) {
 2218                 sockaddr_free(ro->ro_sa);
 2219                 ro->ro_sa = NULL;
 2220         }
 2221         rtcache_invariants(ro);
 2222 }
 2223 
 2224 int
 2225 rtcache_setdst(struct route *ro, const struct sockaddr *sa)
 2226 {
 2227         KASSERT(sa != NULL);
 2228 
 2229         rtcache_invariants(ro);
 2230         if (ro->ro_sa != NULL) {
 2231                 if (ro->ro_sa->sa_family == sa->sa_family) {
 2232                         ro->_ro_rt = NULL;
 2233                         sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
 2234                         rtcache_invariants(ro);
 2235                         return 0;
 2236                 }
 2237                 /* free ro_sa, wrong family */
 2238                 rtcache_free(ro);
 2239         }
 2240 
 2241         KASSERT(ro->_ro_rt == NULL);
 2242 
 2243         if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
 2244                 rtcache_invariants(ro);
 2245                 return ENOMEM;
 2246         }
 2247         rtcache_invariants(ro);
 2248         return 0;
 2249 }
 2250 
 2251 static void
 2252 rtcache_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused)
 2253 {
 2254         struct route **rop = p;
 2255 
 2256         /*
 2257          * We can't have struct route as percpu data because it can be destroyed
 2258          * over a memory enlargement processing of percpu.
 2259          */
 2260         *rop = kmem_zalloc(sizeof(**rop), KM_SLEEP);
 2261 }
 2262 
 2263 percpu_t *
 2264 rtcache_percpu_alloc(void)
 2265 {
 2266 
 2267         return percpu_create(sizeof(struct route *),
 2268             rtcache_percpu_init_cpu, NULL, NULL);
 2269 }
 2270 
 2271 const struct sockaddr *
 2272 rt_settag(struct rtentry *rt, const struct sockaddr *tag)
 2273 {
 2274         if (rt->rt_tag != tag) {
 2275                 if (rt->rt_tag != NULL)
 2276                         sockaddr_free(rt->rt_tag);
 2277                 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
 2278         }
 2279         return rt->rt_tag;
 2280 }
 2281 
 2282 struct sockaddr *
 2283 rt_gettag(const struct rtentry *rt)
 2284 {
 2285         return rt->rt_tag;
 2286 }
 2287 
 2288 int
 2289 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
 2290 {
 2291 
 2292         if ((rt->rt_flags & RTF_REJECT) != 0) {
 2293                 /* Mimic looutput */
 2294                 if (ifp->if_flags & IFF_LOOPBACK)
 2295                         return (rt->rt_flags & RTF_HOST) ?
 2296                             EHOSTUNREACH : ENETUNREACH;
 2297                 else if (rt->rt_rmx.rmx_expire == 0 ||
 2298                     time_uptime < rt->rt_rmx.rmx_expire)
 2299                         return (rt->rt_flags & RTF_GATEWAY) ?
 2300                             EHOSTUNREACH : EHOSTDOWN;
 2301         }
 2302 
 2303         return 0;
 2304 }
 2305 
 2306 void
 2307 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *),
 2308     void *v, bool notify)
 2309 {
 2310 
 2311         for (;;) {
 2312                 int s;
 2313                 int error;
 2314                 struct rtentry *rt, *retrt = NULL;
 2315 
 2316                 RT_RLOCK();
 2317                 s = splsoftnet();
 2318                 rt = rtbl_search_matched_entry(family, f, v);
 2319                 if (rt == NULL) {
 2320                         splx(s);
 2321                         RT_UNLOCK();
 2322                         return;
 2323                 }
 2324                 rt_ref(rt);
 2325                 RT_REFCNT_TRACE(rt);
 2326                 splx(s);
 2327                 RT_UNLOCK();
 2328 
 2329                 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
 2330                     rt_mask(rt), rt->rt_flags, &retrt);
 2331                 if (error == 0) {
 2332                         KASSERT(retrt == rt);
 2333                         KASSERT((retrt->rt_flags & RTF_UP) == 0);
 2334                         if (notify)
 2335                                 rt_newmsg(RTM_DELETE, retrt);
 2336                         retrt->rt_ifp = NULL;
 2337                         rt_unref(rt);
 2338                         RT_REFCNT_TRACE(rt);
 2339                         rt_free(retrt);
 2340                 } else if (error == ESRCH) {
 2341                         /* Someone deleted the entry already. */
 2342                         rt_unref(rt);
 2343                         RT_REFCNT_TRACE(rt);
 2344                 } else {
 2345                         log(LOG_ERR, "%s: unable to delete rtentry @ %p, "
 2346                             "error = %d\n", rt->rt_ifp->if_xname, rt, error);
 2347                         /* XXX how to treat this case? */
 2348                 }
 2349         }
 2350 }
 2351 
 2352 static int
 2353 rt_walktree_locked(sa_family_t family, int (*f)(struct rtentry *, void *),
 2354     void *v)
 2355 {
 2356 
 2357         return rtbl_walktree(family, f, v);
 2358 }
 2359 
 2360 void
 2361 rt_replace_ifa_matched_entries(sa_family_t family,
 2362     int (*f)(struct rtentry *, void *), void *v, struct ifaddr *ifa)
 2363 {
 2364 
 2365         for (;;) {
 2366                 int s;
 2367 #ifdef NET_MPSAFE
 2368                 int error;
 2369 #endif
 2370                 struct rtentry *rt;
 2371 
 2372                 RT_RLOCK();
 2373                 s = splsoftnet();
 2374                 rt = rtbl_search_matched_entry(family, f, v);
 2375                 if (rt == NULL) {
 2376                         splx(s);
 2377                         RT_UNLOCK();
 2378                         return;
 2379                 }
 2380                 rt_ref(rt);
 2381                 RT_REFCNT_TRACE(rt);
 2382                 splx(s);
 2383                 RT_UNLOCK();
 2384 
 2385 #ifdef NET_MPSAFE
 2386                 error = rt_update_prepare(rt);
 2387                 if (error == 0) {
 2388                         rt_replace_ifa(rt, ifa);
 2389                         rt_update_finish(rt);
 2390                         rt_newmsg(RTM_CHANGE, rt);
 2391                 } else {
 2392                         /*
 2393                          * If error != 0, the rtentry is being
 2394                          * destroyed, so doing nothing doesn't
 2395                          * matter.
 2396                          */
 2397                 }
 2398 #else
 2399                 rt_replace_ifa(rt, ifa);
 2400                 rt_newmsg(RTM_CHANGE, rt);
 2401 #endif
 2402                 rt_unref(rt);
 2403                 RT_REFCNT_TRACE(rt);
 2404         }
 2405 }
 2406 
 2407 int
 2408 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v)
 2409 {
 2410         int error;
 2411 
 2412         RT_RLOCK();
 2413         error = rt_walktree_locked(family, f, v);
 2414         RT_UNLOCK();
 2415 
 2416         return error;
 2417 }
 2418 
 2419 #ifdef DDB
 2420 
 2421 #include <machine/db_machdep.h>
 2422 #include <ddb/db_interface.h>
 2423 #include <ddb/db_output.h>
 2424 
 2425 #define rt_expire rt_rmx.rmx_expire
 2426 
 2427 static void
 2428 db_print_sa(const struct sockaddr *sa)
 2429 {
 2430         int len;
 2431         const u_char *p;
 2432 
 2433         if (sa == NULL) {
 2434                 db_printf("[NULL]");
 2435                 return;
 2436         }
 2437 
 2438         p = (const u_char *)sa;
 2439         len = sa->sa_len;
 2440         db_printf("[");
 2441         while (len > 0) {
 2442                 db_printf("%d", *p);
 2443                 p++; len--;
 2444                 if (len) db_printf(",");
 2445         }
 2446         db_printf("]\n");
 2447 }
 2448 
 2449 static void
 2450 db_print_ifa(struct ifaddr *ifa)
 2451 {
 2452         if (ifa == NULL)
 2453                 return;
 2454         db_printf("  ifa_addr=");
 2455         db_print_sa(ifa->ifa_addr);
 2456         db_printf("  ifa_dsta=");
 2457         db_print_sa(ifa->ifa_dstaddr);
 2458         db_printf("  ifa_mask=");
 2459         db_print_sa(ifa->ifa_netmask);
 2460         db_printf("  flags=0x%x,refcnt=%d,metric=%d\n",
 2461                           ifa->ifa_flags,
 2462                           ifa->ifa_refcnt,
 2463                           ifa->ifa_metric);
 2464 }
 2465 
 2466 /*
 2467  * Function to pass to rt_walktree().
 2468  * Return non-zero error to abort walk.
 2469  */
 2470 static int
 2471 db_show_rtentry(struct rtentry *rt, void *w)
 2472 {
 2473         db_printf("rtentry=%p", rt);
 2474 
 2475         db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
 2476                           rt->rt_flags, rt->rt_refcnt,
 2477                           rt->rt_use, (uint64_t)rt->rt_expire);
 2478 
 2479         db_printf(" key="); db_print_sa(rt_getkey(rt));
 2480         db_printf(" mask="); db_print_sa(rt_mask(rt));
 2481         db_printf(" gw="); db_print_sa(rt->rt_gateway);
 2482 
 2483         db_printf(" ifp=%p ", rt->rt_ifp);
 2484         if (rt->rt_ifp)
 2485                 db_printf("(%s)", rt->rt_ifp->if_xname);
 2486         else
 2487                 db_printf("(NULL)");
 2488 
 2489         db_printf(" ifa=%p\n", rt->rt_ifa);
 2490         db_print_ifa(rt->rt_ifa);
 2491 
 2492         db_printf(" gwroute=%p llinfo=%p\n",
 2493                           rt->rt_gwroute, rt->rt_llinfo);
 2494 
 2495         return 0;
 2496 }
 2497 
 2498 /*
 2499  * Function to print all the route trees.
 2500  * Use this from ddb:  "show routes"
 2501  */
 2502 void
 2503 db_show_routes(db_expr_t addr, bool have_addr,
 2504     db_expr_t count, const char *modif)
 2505 {
 2506 
 2507         /* Taking RT_LOCK will fail if LOCKDEBUG is enabled. */
 2508         rt_walktree_locked(AF_INET, db_show_rtentry, NULL);
 2509 }
 2510 #endif

Cache object: 57f1d44d27451cea5ffe51061d0d42e4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.