The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/rtsock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1988, 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)rtsock.c    8.7 (Berkeley) 10/12/95
   32  * $FreeBSD$
   33  */
   34 #include "opt_ddb.h"
   35 #include "opt_route.h"
   36 #include "opt_inet.h"
   37 #include "opt_inet6.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/jail.h>
   41 #include <sys/kernel.h>
   42 #include <sys/eventhandler.h>
   43 #include <sys/domain.h>
   44 #include <sys/lock.h>
   45 #include <sys/malloc.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/priv.h>
   48 #include <sys/proc.h>
   49 #include <sys/protosw.h>
   50 #include <sys/rmlock.h>
   51 #include <sys/rwlock.h>
   52 #include <sys/signalvar.h>
   53 #include <sys/socket.h>
   54 #include <sys/socketvar.h>
   55 #include <sys/sysctl.h>
   56 #include <sys/systm.h>
   57 
   58 #include <net/if.h>
   59 #include <net/if_var.h>
   60 #include <net/if_private.h>
   61 #include <net/if_dl.h>
   62 #include <net/if_llatbl.h>
   63 #include <net/if_types.h>
   64 #include <net/netisr.h>
   65 #include <net/route.h>
   66 #include <net/route/route_ctl.h>
   67 #include <net/route/route_var.h>
   68 #include <net/vnet.h>
   69 
   70 #include <netinet/in.h>
   71 #include <netinet/if_ether.h>
   72 #include <netinet/ip_carp.h>
   73 #ifdef INET6
   74 #include <netinet6/in6_var.h>
   75 #include <netinet6/ip6_var.h>
   76 #include <netinet6/scope6_var.h>
   77 #endif
   78 #include <net/route/nhop.h>
   79 
   80 #define DEBUG_MOD_NAME  rtsock
   81 #define DEBUG_MAX_LEVEL LOG_DEBUG
   82 #include <net/route/route_debug.h>
   83 _DECLARE_DEBUG(LOG_INFO);
   84 
   85 #ifdef COMPAT_FREEBSD32
   86 #include <sys/mount.h>
   87 #include <compat/freebsd32/freebsd32.h>
   88 
   89 struct if_msghdr32 {
   90         uint16_t ifm_msglen;
   91         uint8_t ifm_version;
   92         uint8_t ifm_type;
   93         int32_t ifm_addrs;
   94         int32_t ifm_flags;
   95         uint16_t ifm_index;
   96         uint16_t _ifm_spare1;
   97         struct  if_data ifm_data;
   98 };
   99 
  100 struct if_msghdrl32 {
  101         uint16_t ifm_msglen;
  102         uint8_t ifm_version;
  103         uint8_t ifm_type;
  104         int32_t ifm_addrs;
  105         int32_t ifm_flags;
  106         uint16_t ifm_index;
  107         uint16_t _ifm_spare1;
  108         uint16_t ifm_len;
  109         uint16_t ifm_data_off;
  110         uint32_t _ifm_spare2;
  111         struct  if_data ifm_data;
  112 };
  113 
  114 struct ifa_msghdrl32 {
  115         uint16_t ifam_msglen;
  116         uint8_t ifam_version;
  117         uint8_t ifam_type;
  118         int32_t ifam_addrs;
  119         int32_t ifam_flags;
  120         uint16_t ifam_index;
  121         uint16_t _ifam_spare1;
  122         uint16_t ifam_len;
  123         uint16_t ifam_data_off;
  124         int32_t ifam_metric;
  125         struct  if_data ifam_data;
  126 };
  127 
  128 #define SA_SIZE32(sa)                                           \
  129     (  (((struct sockaddr *)(sa))->sa_len == 0) ?               \
  130         sizeof(int)             :                               \
  131         1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(int) - 1) ) )
  132 
  133 #endif /* COMPAT_FREEBSD32 */
  134 
  135 struct linear_buffer {
  136         char            *base;  /* Base allocated memory pointer */
  137         uint32_t        offset; /* Currently used offset */
  138         uint32_t        size;   /* Total buffer size */
  139 };
  140 #define SCRATCH_BUFFER_SIZE     1024
  141 
  142 #define RTS_PID_LOG(_l, _fmt, ...)      RT_LOG_##_l(_l, "PID %d: " _fmt, curproc ? curproc->p_pid : 0, ## __VA_ARGS__)
  143 
  144 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
  145 
  146 /* NB: these are not modified */
  147 static struct   sockaddr route_src = { 2, PF_ROUTE, };
  148 static struct   sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
  149 
  150 /* These are external hooks for CARP. */
  151 int     (*carp_get_vhid_p)(struct ifaddr *);
  152 
  153 /*
  154  * Used by rtsock callback code to decide whether to filter the update
  155  * notification to a socket bound to a particular FIB.
  156  */
  157 #define RTS_FILTER_FIB  M_PROTO8
  158 /*
  159  * Used to store address family of the notification.
  160  */
  161 #define m_rtsock_family m_pkthdr.PH_loc.eight[0]
  162 
  163 struct rcb {
  164         LIST_ENTRY(rcb) list;
  165         struct socket   *rcb_socket;
  166         sa_family_t     rcb_family;
  167 };
  168 
  169 typedef struct {
  170         LIST_HEAD(, rcb)        cblist;
  171         int     ip_count;       /* attached w/ AF_INET */
  172         int     ip6_count;      /* attached w/ AF_INET6 */
  173         int     any_count;      /* total attached */
  174 } route_cb_t;
  175 VNET_DEFINE_STATIC(route_cb_t, route_cb);
  176 #define V_route_cb VNET(route_cb)
  177 
  178 struct mtx rtsock_mtx;
  179 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
  180 
  181 #define RTSOCK_LOCK()   mtx_lock(&rtsock_mtx)
  182 #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
  183 #define RTSOCK_LOCK_ASSERT()    mtx_assert(&rtsock_mtx, MA_OWNED)
  184 
  185 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
  186 
  187 struct walkarg {
  188         int     family;
  189         int     w_tmemsize;
  190         int     w_op, w_arg;
  191         caddr_t w_tmem;
  192         struct sysctl_req *w_req;
  193         struct sockaddr *dst;
  194         struct sockaddr *mask;
  195 };
  196 
  197 static void     rts_input(struct mbuf *m);
  198 static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
  199 static int      rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
  200                         struct walkarg *w, int *plen);
  201 static int      rt_xaddrs(caddr_t cp, caddr_t cplim,
  202                         struct rt_addrinfo *rtinfo);
  203 static int      cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb);
  204 static int      sysctl_dumpentry(struct rtentry *rt, void *vw);
  205 static int      sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh,
  206                         uint32_t weight, struct walkarg *w);
  207 static int      sysctl_iflist(int af, struct walkarg *w);
  208 static int      sysctl_ifmalist(int af, struct walkarg *w);
  209 static void     rt_getmetrics(const struct rtentry *rt,
  210                         const struct nhop_object *nh, struct rt_metrics *out);
  211 static void     rt_dispatch(struct mbuf *, sa_family_t);
  212 static void     rt_ifannouncemsg(struct ifnet *ifp, int what);
  213 static int      handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
  214                         struct rt_msghdr *rtm, struct rib_cmd_info *rc);
  215 static int      update_rtm_from_rc(struct rt_addrinfo *info,
  216                         struct rt_msghdr **prtm, int alloc_len,
  217                         struct rib_cmd_info *rc, struct nhop_object *nh);
  218 static void     send_rtm_reply(struct socket *so, struct rt_msghdr *rtm,
  219                         struct mbuf *m, sa_family_t saf, u_int fibnum,
  220                         int rtm_errno);
  221 static bool     can_export_rte(struct ucred *td_ucred, bool rt_is_host,
  222                         const struct sockaddr *rt_dst);
  223 static void     rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc);
  224 static void     rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask);
  225 
  226 static struct netisr_handler rtsock_nh = {
  227         .nh_name = "rtsock",
  228         .nh_handler = rts_input,
  229         .nh_proto = NETISR_ROUTE,
  230         .nh_policy = NETISR_POLICY_SOURCE,
  231 };
  232 
  233 static int
  234 sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
  235 {
  236         int error, qlimit;
  237 
  238         netisr_getqlimit(&rtsock_nh, &qlimit);
  239         error = sysctl_handle_int(oidp, &qlimit, 0, req);
  240         if (error || !req->newptr)
  241                 return (error);
  242         if (qlimit < 1)
  243                 return (EINVAL);
  244         return (netisr_setqlimit(&rtsock_nh, qlimit));
  245 }
  246 SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen,
  247     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
  248     0, 0, sysctl_route_netisr_maxqlen, "I",
  249     "maximum routing socket dispatch queue length");
  250 
  251 static void
  252 vnet_rts_init(void)
  253 {
  254         int tmp;
  255 
  256         if (IS_DEFAULT_VNET(curvnet)) {
  257                 if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
  258                         rtsock_nh.nh_qlimit = tmp;
  259                 netisr_register(&rtsock_nh);
  260         }
  261 #ifdef VIMAGE
  262          else
  263                 netisr_register_vnet(&rtsock_nh);
  264 #endif
  265 }
  266 VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
  267     vnet_rts_init, 0);
  268 
  269 #ifdef VIMAGE
  270 static void
  271 vnet_rts_uninit(void)
  272 {
  273 
  274         netisr_unregister_vnet(&rtsock_nh);
  275 }
  276 VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
  277     vnet_rts_uninit, 0);
  278 #endif
  279 
  280 static void
  281 report_route_event(const struct rib_cmd_info *rc, void *_cbdata)
  282 {
  283         uint32_t fibnum = (uint32_t)(uintptr_t)_cbdata;
  284         struct nhop_object *nh;
  285 
  286         nh = rc->rc_cmd == RTM_DELETE ? rc->rc_nh_old : rc->rc_nh_new;
  287         rt_routemsg(rc->rc_cmd, rc->rc_rt, nh, fibnum);
  288 }
  289 
  290 static void
  291 rts_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
  292 {
  293 #ifdef ROUTE_MPATH
  294         if ((rc->rc_nh_new && NH_IS_NHGRP(rc->rc_nh_new)) ||
  295             (rc->rc_nh_old && NH_IS_NHGRP(rc->rc_nh_old))) {
  296                 rib_decompose_notification(rc, report_route_event,
  297                     (void *)(uintptr_t)fibnum);
  298         } else
  299 #endif
  300                 report_route_event(rc, (void *)(uintptr_t)fibnum);
  301 }
  302 static struct rtbridge rtsbridge = {
  303         .route_f = rts_handle_route_event,
  304         .ifmsg_f = rtsock_ifmsg,
  305 };
  306 static struct rtbridge *rtsbridge_orig_p;
  307 
  308 static void
  309 rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc)
  310 {
  311         netlink_callback_p->route_f(fibnum, rc);
  312 }
  313 
  314 static void
  315 rtsock_init(void)
  316 {
  317         rtsbridge_orig_p = rtsock_callback_p;
  318         rtsock_callback_p = &rtsbridge;
  319 }
  320 SYSINIT(rtsock_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtsock_init, NULL);
  321 
  322 static void
  323 rts_handle_ifnet_arrival(void *arg __unused, struct ifnet *ifp)
  324 {
  325         rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
  326 }
  327 EVENTHANDLER_DEFINE(ifnet_arrival_event, rts_handle_ifnet_arrival, NULL, 0);
  328 
  329 static void
  330 rts_handle_ifnet_departure(void *arg __unused, struct ifnet *ifp)
  331 {
  332         rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
  333 }
  334 EVENTHANDLER_DEFINE(ifnet_departure_event, rts_handle_ifnet_departure, NULL, 0);
  335 
  336 static void
  337 rts_append_data(struct socket *so, struct mbuf *m)
  338 {
  339 
  340         if (sbappendaddr(&so->so_rcv, &route_src, m, NULL) == 0) {
  341                 soroverflow(so);
  342                 m_freem(m);
  343         } else
  344                 sorwakeup(so);
  345 }
  346 
  347 static void
  348 rts_input(struct mbuf *m)
  349 {
  350         struct rcb *rcb;
  351         struct socket *last;
  352 
  353         last = NULL;
  354         RTSOCK_LOCK();
  355         LIST_FOREACH(rcb, &V_route_cb.cblist, list) {
  356                 if (rcb->rcb_family != AF_UNSPEC &&
  357                     rcb->rcb_family != m->m_rtsock_family)
  358                         continue;
  359                 if ((m->m_flags & RTS_FILTER_FIB) &&
  360                     M_GETFIB(m) != rcb->rcb_socket->so_fibnum)
  361                         continue;
  362                 if (last != NULL) {
  363                         struct mbuf *n;
  364 
  365                         n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
  366                         if (n != NULL)
  367                                 rts_append_data(last, n);
  368                 }
  369                 last = rcb->rcb_socket;
  370         }
  371         if (last != NULL)
  372                 rts_append_data(last, m);
  373         else
  374                 m_freem(m);
  375         RTSOCK_UNLOCK();
  376 }
  377 
  378 static void
  379 rts_close(struct socket *so)
  380 {
  381 
  382         soisdisconnected(so);
  383 }
  384 
  385 static SYSCTL_NODE(_net, OID_AUTO, rtsock, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  386     "Routing socket infrastructure");
  387 static u_long rts_sendspace = 8192;
  388 SYSCTL_ULONG(_net_rtsock, OID_AUTO, sendspace, CTLFLAG_RW, &rts_sendspace, 0,
  389     "Default routing socket send space");
  390 static u_long rts_recvspace = 8192;
  391 SYSCTL_ULONG(_net_rtsock, OID_AUTO, recvspace, CTLFLAG_RW, &rts_recvspace, 0,
  392     "Default routing socket receive space");
  393 
  394 static int
  395 rts_attach(struct socket *so, int proto, struct thread *td)
  396 {
  397         struct rcb *rcb;
  398         int error;
  399 
  400         error = soreserve(so, rts_sendspace, rts_recvspace);
  401         if (error)
  402                 return (error);
  403 
  404         rcb = malloc(sizeof(*rcb), M_PCB, M_WAITOK);
  405         rcb->rcb_socket = so;
  406         rcb->rcb_family = proto;
  407 
  408         so->so_pcb = rcb;
  409         so->so_fibnum = td->td_proc->p_fibnum;
  410         so->so_options |= SO_USELOOPBACK;
  411 
  412         RTSOCK_LOCK();
  413         LIST_INSERT_HEAD(&V_route_cb.cblist, rcb, list);
  414         switch (proto) {
  415         case AF_INET:
  416                 V_route_cb.ip_count++;
  417                 break;
  418         case AF_INET6:
  419                 V_route_cb.ip6_count++;
  420                 break;
  421         }
  422         V_route_cb.any_count++;
  423         RTSOCK_UNLOCK();
  424         soisconnected(so);
  425 
  426         return (0);
  427 }
  428 
  429 static void
  430 rts_detach(struct socket *so)
  431 {
  432         struct rcb *rcb = so->so_pcb;
  433 
  434         RTSOCK_LOCK();
  435         LIST_REMOVE(rcb, list);
  436         switch(rcb->rcb_family) {
  437         case AF_INET:
  438                 V_route_cb.ip_count--;
  439                 break;
  440         case AF_INET6:
  441                 V_route_cb.ip6_count--;
  442                 break;
  443         }
  444         V_route_cb.any_count--;
  445         RTSOCK_UNLOCK();
  446         free(rcb, M_PCB);
  447         so->so_pcb = NULL;
  448 }
  449 
  450 static int
  451 rts_disconnect(struct socket *so)
  452 {
  453 
  454         return (ENOTCONN);
  455 }
  456 
  457 static int
  458 rts_shutdown(struct socket *so)
  459 {
  460 
  461         socantsendmore(so);
  462         return (0);
  463 }
  464 
  465 #ifndef _SOCKADDR_UNION_DEFINED
  466 #define _SOCKADDR_UNION_DEFINED
  467 /*
  468  * The union of all possible address formats we handle.
  469  */
  470 union sockaddr_union {
  471         struct sockaddr         sa;
  472         struct sockaddr_in      sin;
  473         struct sockaddr_in6     sin6;
  474 };
  475 #endif /* _SOCKADDR_UNION_DEFINED */
  476 
  477 static int
  478 rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
  479     struct nhop_object *nh, union sockaddr_union *saun, struct ucred *cred)
  480 {
  481 #if defined(INET) || defined(INET6)
  482         struct epoch_tracker et;
  483 #endif
  484 
  485         /* First, see if the returned address is part of the jail. */
  486         if (prison_if(cred, nh->nh_ifa->ifa_addr) == 0) {
  487                 info->rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
  488                 return (0);
  489         }
  490 
  491         switch (info->rti_info[RTAX_DST]->sa_family) {
  492 #ifdef INET
  493         case AF_INET:
  494         {
  495                 struct in_addr ia;
  496                 struct ifaddr *ifa;
  497                 int found;
  498 
  499                 found = 0;
  500                 /*
  501                  * Try to find an address on the given outgoing interface
  502                  * that belongs to the jail.
  503                  */
  504                 NET_EPOCH_ENTER(et);
  505                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  506                         struct sockaddr *sa;
  507                         sa = ifa->ifa_addr;
  508                         if (sa->sa_family != AF_INET)
  509                                 continue;
  510                         ia = ((struct sockaddr_in *)sa)->sin_addr;
  511                         if (prison_check_ip4(cred, &ia) == 0) {
  512                                 found = 1;
  513                                 break;
  514                         }
  515                 }
  516                 NET_EPOCH_EXIT(et);
  517                 if (!found) {
  518                         /*
  519                          * As a last resort return the 'default' jail address.
  520                          */
  521                         ia = ((struct sockaddr_in *)nh->nh_ifa->ifa_addr)->
  522                             sin_addr;
  523                         if (prison_get_ip4(cred, &ia) != 0)
  524                                 return (ESRCH);
  525                 }
  526                 bzero(&saun->sin, sizeof(struct sockaddr_in));
  527                 saun->sin.sin_len = sizeof(struct sockaddr_in);
  528                 saun->sin.sin_family = AF_INET;
  529                 saun->sin.sin_addr.s_addr = ia.s_addr;
  530                 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
  531                 break;
  532         }
  533 #endif
  534 #ifdef INET6
  535         case AF_INET6:
  536         {
  537                 struct in6_addr ia6;
  538                 struct ifaddr *ifa;
  539                 int found;
  540 
  541                 found = 0;
  542                 /*
  543                  * Try to find an address on the given outgoing interface
  544                  * that belongs to the jail.
  545                  */
  546                 NET_EPOCH_ENTER(et);
  547                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  548                         struct sockaddr *sa;
  549                         sa = ifa->ifa_addr;
  550                         if (sa->sa_family != AF_INET6)
  551                                 continue;
  552                         bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
  553                             &ia6, sizeof(struct in6_addr));
  554                         if (prison_check_ip6(cred, &ia6) == 0) {
  555                                 found = 1;
  556                                 break;
  557                         }
  558                 }
  559                 NET_EPOCH_EXIT(et);
  560                 if (!found) {
  561                         /*
  562                          * As a last resort return the 'default' jail address.
  563                          */
  564                         ia6 = ((struct sockaddr_in6 *)nh->nh_ifa->ifa_addr)->
  565                             sin6_addr;
  566                         if (prison_get_ip6(cred, &ia6) != 0)
  567                                 return (ESRCH);
  568                 }
  569                 bzero(&saun->sin6, sizeof(struct sockaddr_in6));
  570                 saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
  571                 saun->sin6.sin6_family = AF_INET6;
  572                 bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
  573                 if (sa6_recoverscope(&saun->sin6) != 0)
  574                         return (ESRCH);
  575                 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
  576                 break;
  577         }
  578 #endif
  579         default:
  580                 return (ESRCH);
  581         }
  582         return (0);
  583 }
  584 
  585 static int
  586 fill_blackholeinfo(struct rt_addrinfo *info, union sockaddr_union *saun)
  587 {
  588         struct ifaddr *ifa;
  589         sa_family_t saf;
  590 
  591         if (V_loif == NULL) {
  592                 RTS_PID_LOG(LOG_INFO, "Unable to add blackhole/reject nhop without loopback");
  593                 return (ENOTSUP);
  594         }
  595         info->rti_ifp = V_loif;
  596 
  597         saf = info->rti_info[RTAX_DST]->sa_family;
  598 
  599         CK_STAILQ_FOREACH(ifa, &info->rti_ifp->if_addrhead, ifa_link) {
  600                 if (ifa->ifa_addr->sa_family == saf) {
  601                         info->rti_ifa = ifa;
  602                         break;
  603                 }
  604         }
  605         if (info->rti_ifa == NULL) {
  606                 RTS_PID_LOG(LOG_INFO, "Unable to find ifa for blackhole/reject nhop");
  607                 return (ENOTSUP);
  608         }
  609 
  610         bzero(saun, sizeof(union sockaddr_union));
  611         switch (saf) {
  612 #ifdef INET
  613         case AF_INET:
  614                 saun->sin.sin_family = AF_INET;
  615                 saun->sin.sin_len = sizeof(struct sockaddr_in);
  616                 saun->sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
  617                 break;
  618 #endif
  619 #ifdef INET6
  620         case AF_INET6:
  621                 saun->sin6.sin6_family = AF_INET6;
  622                 saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
  623                 saun->sin6.sin6_addr = in6addr_loopback;
  624                 break;
  625 #endif
  626         default:
  627                 RTS_PID_LOG(LOG_INFO, "unsupported family: %d", saf);
  628                 return (ENOTSUP);
  629         }
  630         info->rti_info[RTAX_GATEWAY] = &saun->sa;
  631         info->rti_flags |= RTF_GATEWAY;
  632 
  633         return (0);
  634 }
  635 
  636 /*
  637  * Fills in @info based on userland-provided @rtm message.
  638  *
  639  * Returns 0 on success.
  640  */
  641 static int
  642 fill_addrinfo(struct rt_msghdr *rtm, int len, struct linear_buffer *lb, u_int fibnum,
  643     struct rt_addrinfo *info)
  644 {
  645         int error;
  646 
  647         rtm->rtm_pid = curproc->p_pid;
  648         info->rti_addrs = rtm->rtm_addrs;
  649 
  650         info->rti_mflags = rtm->rtm_inits;
  651         info->rti_rmx = &rtm->rtm_rmx;
  652 
  653         /*
  654          * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
  655          * link-local address because rtrequest requires addresses with
  656          * embedded scope id.
  657          */
  658         if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, info))
  659                 return (EINVAL);
  660 
  661         info->rti_flags = rtm->rtm_flags;
  662         error = cleanup_xaddrs(info, lb);
  663         if (error != 0)
  664                 return (error);
  665         /*
  666          * Verify that the caller has the appropriate privilege; RTM_GET
  667          * is the only operation the non-superuser is allowed.
  668          */
  669         if (rtm->rtm_type != RTM_GET) {
  670                 error = priv_check(curthread, PRIV_NET_ROUTE);
  671                 if (error != 0)
  672                         return (error);
  673         }
  674 
  675         /*
  676          * The given gateway address may be an interface address.
  677          * For example, issuing a "route change" command on a route
  678          * entry that was created from a tunnel, and the gateway
  679          * address given is the local end point. In this case the 
  680          * RTF_GATEWAY flag must be cleared or the destination will
  681          * not be reachable even though there is no error message.
  682          */
  683         if (info->rti_info[RTAX_GATEWAY] != NULL &&
  684             info->rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
  685                 struct nhop_object *nh;
  686 
  687                 /* 
  688                  * A host route through the loopback interface is 
  689                  * installed for each interface adddress. In pre 8.0
  690                  * releases the interface address of a PPP link type
  691                  * is not reachable locally. This behavior is fixed as 
  692                  * part of the new L2/L3 redesign and rewrite work. The
  693                  * signature of this interface address route is the
  694                  * AF_LINK sa_family type of the gateway, and the
  695                  * rt_ifp has the IFF_LOOPBACK flag set.
  696                  */
  697                 nh = rib_lookup(fibnum, info->rti_info[RTAX_GATEWAY], NHR_NONE, 0);
  698                 if (nh != NULL && nh->gw_sa.sa_family == AF_LINK &&
  699                     nh->nh_ifp->if_flags & IFF_LOOPBACK) {
  700                                 info->rti_flags &= ~RTF_GATEWAY;
  701                                 info->rti_flags |= RTF_GWFLAG_COMPAT;
  702                 }
  703         }
  704 
  705         return (0);
  706 }
  707 
  708 static struct nhop_object *
  709 select_nhop(struct nhop_object *nh, const struct sockaddr *gw)
  710 {
  711         if (!NH_IS_NHGRP(nh))
  712                 return (nh);
  713 #ifdef ROUTE_MPATH
  714         const struct weightened_nhop *wn;
  715         uint32_t num_nhops;
  716         wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
  717         if (gw == NULL)
  718                 return (wn[0].nh);
  719         for (int i = 0; i < num_nhops; i++) {
  720                 if (match_nhop_gw(wn[i].nh, gw))
  721                         return (wn[i].nh);
  722         }
  723 #endif
  724         return (NULL);
  725 }
  726 
  727 /*
  728  * Handles RTM_GET message from routing socket, returning matching rt.
  729  *
  730  * Returns:
  731  * 0 on success, with locked and referenced matching rt in @rt_nrt
  732  * errno of failure
  733  */
  734 static int
  735 handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
  736     struct rt_msghdr *rtm, struct rib_cmd_info *rc)
  737 {
  738         RIB_RLOCK_TRACKER;
  739         struct rib_head *rnh;
  740         struct nhop_object *nh;
  741         sa_family_t saf;
  742 
  743         saf = info->rti_info[RTAX_DST]->sa_family;
  744 
  745         rnh = rt_tables_get_rnh(fibnum, saf);
  746         if (rnh == NULL)
  747                 return (EAFNOSUPPORT);
  748 
  749         RIB_RLOCK(rnh);
  750 
  751         /*
  752          * By (implicit) convention host route (one without netmask)
  753          * means longest-prefix-match request and the route with netmask
  754          * means exact-match lookup.
  755          * As cleanup_xaddrs() cleans up info flags&addrs for the /32,/128
  756          * prefixes, use original data to check for the netmask presence.
  757          */
  758         if ((rtm->rtm_addrs & RTA_NETMASK) == 0) {
  759                 /*
  760                  * Provide longest prefix match for
  761                  * address lookup (no mask).
  762                  * 'route -n get addr'
  763                  */
  764                 rc->rc_rt = (struct rtentry *) rnh->rnh_matchaddr(
  765                     info->rti_info[RTAX_DST], &rnh->head);
  766         } else
  767                 rc->rc_rt = (struct rtentry *) rnh->rnh_lookup(
  768                     info->rti_info[RTAX_DST],
  769                     info->rti_info[RTAX_NETMASK], &rnh->head);
  770 
  771         if (rc->rc_rt == NULL) {
  772                 RIB_RUNLOCK(rnh);
  773                 return (ESRCH);
  774         }
  775 
  776         nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]);
  777         if (nh == NULL) {
  778                 RIB_RUNLOCK(rnh);
  779                 return (ESRCH);
  780         }
  781         /*
  782          * If performing proxied L2 entry insertion, and
  783          * the actual PPP host entry is found, perform
  784          * another search to retrieve the prefix route of
  785          * the local end point of the PPP link.
  786          * TODO: move this logic to userland.
  787          */
  788         if (rtm->rtm_flags & RTF_ANNOUNCE) {
  789                 struct sockaddr_storage laddr;
  790 
  791                 if (nh->nh_ifp != NULL &&
  792                     nh->nh_ifp->if_type == IFT_PROPVIRTUAL) {
  793                         struct ifaddr *ifa;
  794 
  795                         ifa = ifa_ifwithnet(info->rti_info[RTAX_DST], 1,
  796                                         RT_ALL_FIBS);
  797                         if (ifa != NULL)
  798                                 rt_maskedcopy(ifa->ifa_addr,
  799                                               (struct sockaddr *)&laddr,
  800                                               ifa->ifa_netmask);
  801                 } else
  802                         rt_maskedcopy(nh->nh_ifa->ifa_addr,
  803                                       (struct sockaddr *)&laddr,
  804                                       nh->nh_ifa->ifa_netmask);
  805                 /* 
  806                  * refactor rt and no lock operation necessary
  807                  */
  808                 rc->rc_rt = (struct rtentry *)rnh->rnh_matchaddr(
  809                     (struct sockaddr *)&laddr, &rnh->head);
  810                 if (rc->rc_rt == NULL) {
  811                         RIB_RUNLOCK(rnh);
  812                         return (ESRCH);
  813                 }
  814                 nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]);
  815                 if (nh == NULL) {
  816                         RIB_RUNLOCK(rnh);
  817                         return (ESRCH);
  818                 }
  819         }
  820         rc->rc_nh_new = nh;
  821         rc->rc_nh_weight = rc->rc_rt->rt_weight;
  822         RIB_RUNLOCK(rnh);
  823 
  824         return (0);
  825 }
  826 
  827 static void
  828 init_sockaddrs_family(int family, struct sockaddr *dst, struct sockaddr *mask)
  829 {
  830 #ifdef INET
  831         if (family == AF_INET) {
  832                 struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
  833                 struct sockaddr_in *mask4 = (struct sockaddr_in *)mask;
  834 
  835                 bzero(dst4, sizeof(struct sockaddr_in));
  836                 bzero(mask4, sizeof(struct sockaddr_in));
  837 
  838                 dst4->sin_family = AF_INET;
  839                 dst4->sin_len = sizeof(struct sockaddr_in);
  840                 mask4->sin_family = AF_INET;
  841                 mask4->sin_len = sizeof(struct sockaddr_in);
  842         }
  843 #endif
  844 #ifdef INET6
  845         if (family == AF_INET6) {
  846                 struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
  847                 struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask;
  848 
  849                 bzero(dst6, sizeof(struct sockaddr_in6));
  850                 bzero(mask6, sizeof(struct sockaddr_in6));
  851 
  852                 dst6->sin6_family = AF_INET6;
  853                 dst6->sin6_len = sizeof(struct sockaddr_in6);
  854                 mask6->sin6_family = AF_INET6;
  855                 mask6->sin6_len = sizeof(struct sockaddr_in6);
  856         }
  857 #endif
  858 }
  859 
  860 static void
  861 export_rtaddrs(const struct rtentry *rt, struct sockaddr *dst,
  862     struct sockaddr *mask)
  863 {
  864 #ifdef INET
  865         if (dst->sa_family == AF_INET) {
  866                 struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
  867                 struct sockaddr_in *mask4 = (struct sockaddr_in *)mask;
  868                 uint32_t scopeid = 0;
  869                 rt_get_inet_prefix_pmask(rt, &dst4->sin_addr, &mask4->sin_addr,
  870                     &scopeid);
  871                 return;
  872         }
  873 #endif
  874 #ifdef INET6
  875         if (dst->sa_family == AF_INET6) {
  876                 struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
  877                 struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask;
  878                 uint32_t scopeid = 0;
  879                 rt_get_inet6_prefix_pmask(rt, &dst6->sin6_addr,
  880                     &mask6->sin6_addr, &scopeid);
  881                 dst6->sin6_scope_id = scopeid;
  882                 return;
  883         }
  884 #endif
  885 }
  886 
  887 static int
  888 update_rtm_from_info(struct rt_addrinfo *info, struct rt_msghdr **prtm,
  889     int alloc_len)
  890 {
  891         struct rt_msghdr *rtm, *orig_rtm = NULL;
  892         struct walkarg w;
  893         int len;
  894 
  895         rtm = *prtm;
  896         /* Check if we need to realloc storage */
  897         rtsock_msg_buffer(rtm->rtm_type, info, NULL, &len);
  898         if (len > alloc_len) {
  899                 struct rt_msghdr *tmp_rtm;
  900 
  901                 tmp_rtm = malloc(len, M_TEMP, M_NOWAIT);
  902                 if (tmp_rtm == NULL)
  903                         return (ENOBUFS);
  904                 bcopy(rtm, tmp_rtm, rtm->rtm_msglen);
  905                 orig_rtm = rtm;
  906                 rtm = tmp_rtm;
  907                 alloc_len = len;
  908 
  909                 /*
  910                  * Delay freeing original rtm as info contains
  911                  * data referencing it.
  912                  */
  913         }
  914 
  915         w.w_tmem = (caddr_t)rtm;
  916         w.w_tmemsize = alloc_len;
  917         rtsock_msg_buffer(rtm->rtm_type, info, &w, &len);
  918         rtm->rtm_addrs = info->rti_addrs;
  919 
  920         if (orig_rtm != NULL)
  921                 free(orig_rtm, M_TEMP);
  922         *prtm = rtm;
  923         return (0);
  924 }
  925 
  926 
  927 /*
  928  * Update sockaddrs, flags, etc in @prtm based on @rc data.
  929  * rtm can be reallocated.
  930  *
  931  * Returns 0 on success, along with pointer to (potentially reallocated)
  932  *  rtm.
  933  *
  934  */
  935 static int
  936 update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm,
  937     int alloc_len, struct rib_cmd_info *rc, struct nhop_object *nh)
  938 {
  939         union sockaddr_union saun;
  940         struct rt_msghdr *rtm;
  941         struct ifnet *ifp;
  942         int error;
  943 
  944         rtm = *prtm;
  945         union sockaddr_union sa_dst, sa_mask;
  946         int family = info->rti_info[RTAX_DST]->sa_family;
  947         init_sockaddrs_family(family, &sa_dst.sa, &sa_mask.sa);
  948         export_rtaddrs(rc->rc_rt, &sa_dst.sa, &sa_mask.sa);
  949 
  950         info->rti_info[RTAX_DST] = &sa_dst.sa;
  951         info->rti_info[RTAX_NETMASK] = rt_is_host(rc->rc_rt) ? NULL : &sa_mask.sa;
  952         info->rti_info[RTAX_GATEWAY] = &nh->gw_sa;
  953         info->rti_info[RTAX_GENMASK] = 0;
  954         ifp = nh->nh_ifp;
  955         if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
  956                 if (ifp) {
  957                         info->rti_info[RTAX_IFP] =
  958                             ifp->if_addr->ifa_addr;
  959                         error = rtm_get_jailed(info, ifp, nh,
  960                             &saun, curthread->td_ucred);
  961                         if (error != 0)
  962                                 return (error);
  963                         if (ifp->if_flags & IFF_POINTOPOINT)
  964                                 info->rti_info[RTAX_BRD] =
  965                                     nh->nh_ifa->ifa_dstaddr;
  966                         rtm->rtm_index = ifp->if_index;
  967                 } else {
  968                         info->rti_info[RTAX_IFP] = NULL;
  969                         info->rti_info[RTAX_IFA] = NULL;
  970                 }
  971         } else if (ifp != NULL)
  972                 rtm->rtm_index = ifp->if_index;
  973 
  974         if ((error = update_rtm_from_info(info, prtm, alloc_len)) != 0)
  975                 return (error);
  976 
  977         rtm = *prtm;
  978         rtm->rtm_flags = rc->rc_rt->rte_flags | nhop_get_rtflags(nh);
  979         if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
  980                 rtm->rtm_flags = RTF_GATEWAY | 
  981                         (rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
  982         rt_getmetrics(rc->rc_rt, nh, &rtm->rtm_rmx);
  983         rtm->rtm_rmx.rmx_weight = rc->rc_nh_weight;
  984 
  985         return (0);
  986 }
  987 
  988 #ifdef ROUTE_MPATH
  989 static void
  990 save_del_notification(const struct rib_cmd_info *rc, void *_cbdata)
  991 {
  992         struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
  993 
  994         if (rc->rc_cmd == RTM_DELETE)
  995                 *rc_new = *rc;
  996 }
  997 
  998 static void
  999 save_add_notification(const struct rib_cmd_info *rc, void *_cbdata)
 1000 {
 1001         struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
 1002 
 1003         if (rc->rc_cmd == RTM_ADD)
 1004                 *rc_new = *rc;
 1005 }
 1006 #endif
 1007 
 1008 #if defined(INET6) || defined(INET)
 1009 static struct sockaddr *
 1010 alloc_sockaddr_aligned(struct linear_buffer *lb, int len)
 1011 {
 1012         len = roundup2(len, sizeof(uint64_t));
 1013         if (lb->offset + len > lb->size)
 1014                 return (NULL);
 1015         struct sockaddr *sa = (struct sockaddr *)(lb->base + lb->offset);
 1016         lb->offset += len;
 1017         return (sa);
 1018 }
 1019 #endif
 1020 
 1021 static int
 1022 rts_send(struct socket *so, int flags, struct mbuf *m,
 1023     struct sockaddr *nam, struct mbuf *control, struct thread *td)
 1024 {
 1025         struct rt_msghdr *rtm = NULL;
 1026         struct rt_addrinfo info;
 1027         struct epoch_tracker et;
 1028 #ifdef INET6
 1029         struct sockaddr_storage ss;
 1030         struct sockaddr_in6 *sin6;
 1031         int i, rti_need_deembed = 0;
 1032 #endif
 1033         int alloc_len = 0, len, error = 0, fibnum;
 1034         sa_family_t saf = AF_UNSPEC;
 1035         struct rib_cmd_info rc;
 1036         struct nhop_object *nh;
 1037 
 1038         if ((flags & PRUS_OOB) || control != NULL) {
 1039                 m_freem(m);
 1040                 if (control != NULL)
 1041                         m_freem(control);
 1042                 return (EOPNOTSUPP);
 1043         }
 1044 
 1045         fibnum = so->so_fibnum;
 1046 #define senderr(e) { error = e; goto flush;}
 1047         if (m == NULL || ((m->m_len < sizeof(long)) &&
 1048                        (m = m_pullup(m, sizeof(long))) == NULL))
 1049                 return (ENOBUFS);
 1050         if ((m->m_flags & M_PKTHDR) == 0)
 1051                 panic("route_output");
 1052         NET_EPOCH_ENTER(et);
 1053         len = m->m_pkthdr.len;
 1054         if (len < sizeof(*rtm) ||
 1055             len != mtod(m, struct rt_msghdr *)->rtm_msglen)
 1056                 senderr(EINVAL);
 1057 
 1058         /*
 1059          * Most of current messages are in range 200-240 bytes,
 1060          * minimize possible re-allocation on reply using larger size
 1061          * buffer aligned on 1k boundaty.
 1062          */
 1063         alloc_len = roundup2(len, 1024);
 1064         int total_len = alloc_len + SCRATCH_BUFFER_SIZE;
 1065         if ((rtm = malloc(total_len, M_TEMP, M_NOWAIT)) == NULL)
 1066                 senderr(ENOBUFS);
 1067 
 1068         m_copydata(m, 0, len, (caddr_t)rtm);
 1069         bzero(&info, sizeof(info));
 1070         nh = NULL;
 1071         struct linear_buffer lb = {
 1072                 .base = (char *)rtm + alloc_len,
 1073                 .size = SCRATCH_BUFFER_SIZE,
 1074         };
 1075 
 1076         if (rtm->rtm_version != RTM_VERSION) {
 1077                 /* Do not touch message since format is unknown */
 1078                 free(rtm, M_TEMP);
 1079                 rtm = NULL;
 1080                 senderr(EPROTONOSUPPORT);
 1081         }
 1082 
 1083         /*
 1084          * Starting from here, it is possible
 1085          * to alter original message and insert
 1086          * caller PID and error value.
 1087          */
 1088 
 1089         if ((error = fill_addrinfo(rtm, len, &lb, fibnum, &info)) != 0) {
 1090                 senderr(error);
 1091         }
 1092         /* fill_addringo() embeds scope into IPv6 addresses */
 1093 #ifdef INET6
 1094         rti_need_deembed = 1;
 1095 #endif
 1096 
 1097         saf = info.rti_info[RTAX_DST]->sa_family;
 1098 
 1099         /* support for new ARP code */
 1100         if (rtm->rtm_flags & RTF_LLDATA) {
 1101                 error = lla_rt_output(rtm, &info);
 1102                 goto flush;
 1103         }
 1104 
 1105         union sockaddr_union gw_saun;
 1106         int blackhole_flags = rtm->rtm_flags & (RTF_BLACKHOLE|RTF_REJECT);
 1107         if (blackhole_flags != 0) {
 1108                 if (blackhole_flags != (RTF_BLACKHOLE | RTF_REJECT))
 1109                         error = fill_blackholeinfo(&info, &gw_saun);
 1110                 else {
 1111                         RTS_PID_LOG(LOG_DEBUG, "both BLACKHOLE and REJECT flags specifiied");
 1112                         error = EINVAL;
 1113                 }
 1114                 if (error != 0)
 1115                         senderr(error);
 1116         }
 1117 
 1118         switch (rtm->rtm_type) {
 1119         case RTM_ADD:
 1120         case RTM_CHANGE:
 1121                 if (rtm->rtm_type == RTM_ADD) {
 1122                         if (info.rti_info[RTAX_GATEWAY] == NULL) {
 1123                                 RTS_PID_LOG(LOG_DEBUG, "RTM_ADD w/o gateway");
 1124                                 senderr(EINVAL);
 1125                         }
 1126                 }
 1127                 error = rib_action(fibnum, rtm->rtm_type, &info, &rc);
 1128                 if (error == 0) {
 1129                         rtsock_notify_event(fibnum, &rc);
 1130 #ifdef ROUTE_MPATH
 1131                         if (NH_IS_NHGRP(rc.rc_nh_new) ||
 1132                             (rc.rc_nh_old && NH_IS_NHGRP(rc.rc_nh_old))) {
 1133                                 struct rib_cmd_info rc_simple = {};
 1134                                 rib_decompose_notification(&rc,
 1135                                     save_add_notification, (void *)&rc_simple);
 1136                                 rc = rc_simple;
 1137                         }
 1138 #endif
 1139                         /* nh MAY be empty if RTM_CHANGE request is no-op */
 1140                         nh = rc.rc_nh_new;
 1141                         if (nh != NULL) {
 1142                                 rtm->rtm_index = nh->nh_ifp->if_index;
 1143                                 rtm->rtm_flags = rc.rc_rt->rte_flags | nhop_get_rtflags(nh);
 1144                         }
 1145                 }
 1146                 break;
 1147 
 1148         case RTM_DELETE:
 1149                 error = rib_action(fibnum, RTM_DELETE, &info, &rc);
 1150                 if (error == 0) {
 1151                         rtsock_notify_event(fibnum, &rc);
 1152 #ifdef ROUTE_MPATH
 1153                         if (NH_IS_NHGRP(rc.rc_nh_old) ||
 1154                             (rc.rc_nh_new && NH_IS_NHGRP(rc.rc_nh_new))) {
 1155                                 struct rib_cmd_info rc_simple = {};
 1156                                 rib_decompose_notification(&rc,
 1157                                     save_del_notification, (void *)&rc_simple);
 1158                                 rc = rc_simple;
 1159                         }
 1160 #endif
 1161                         nh = rc.rc_nh_old;
 1162                 }
 1163                 break;
 1164 
 1165         case RTM_GET:
 1166                 error = handle_rtm_get(&info, fibnum, rtm, &rc);
 1167                 if (error != 0)
 1168                         senderr(error);
 1169                 nh = rc.rc_nh_new;
 1170 
 1171                 if (!can_export_rte(curthread->td_ucred,
 1172                     info.rti_info[RTAX_NETMASK] == NULL,
 1173                     info.rti_info[RTAX_DST])) {
 1174                         senderr(ESRCH);
 1175                 }
 1176                 break;
 1177 
 1178         default:
 1179                 senderr(EOPNOTSUPP);
 1180         }
 1181 
 1182         if (error == 0 && nh != NULL) {
 1183                 error = update_rtm_from_rc(&info, &rtm, alloc_len, &rc, nh);
 1184                 /*
 1185                  * Note that some sockaddr pointers may have changed to
 1186                  * point to memory outsize @rtm. Some may be pointing
 1187                  * to the on-stack variables.
 1188                  * Given that, any pointer in @info CANNOT BE USED.
 1189                  */
 1190 
 1191                 /*
 1192                  * scopeid deembedding has been performed while
 1193                  * writing updated rtm in rtsock_msg_buffer().
 1194                  * With that in mind, skip deembedding procedure below.
 1195                  */
 1196 #ifdef INET6
 1197                 rti_need_deembed = 0;
 1198 #endif
 1199         }
 1200 
 1201 flush:
 1202         NET_EPOCH_EXIT(et);
 1203 
 1204 #ifdef INET6
 1205         if (rtm != NULL) {
 1206                 if (rti_need_deembed) {
 1207                         /* sin6_scope_id is recovered before sending rtm. */
 1208                         sin6 = (struct sockaddr_in6 *)&ss;
 1209                         for (i = 0; i < RTAX_MAX; i++) {
 1210                                 if (info.rti_info[i] == NULL)
 1211                                         continue;
 1212                                 if (info.rti_info[i]->sa_family != AF_INET6)
 1213                                         continue;
 1214                                 bcopy(info.rti_info[i], sin6, sizeof(*sin6));
 1215                                 if (sa6_recoverscope(sin6) == 0)
 1216                                         bcopy(sin6, info.rti_info[i],
 1217                                                     sizeof(*sin6));
 1218                         }
 1219                         if (update_rtm_from_info(&info, &rtm, alloc_len) != 0) {
 1220                                 if (error != 0)
 1221                                         error = ENOBUFS;
 1222                         }
 1223                 }
 1224         }
 1225 #endif
 1226         send_rtm_reply(so, rtm, m, saf, fibnum, error);
 1227 
 1228         return (error);
 1229 }
 1230 
 1231 /*
 1232  * Sends the prepared reply message in @rtm to all rtsock clients.
 1233  * Frees @m and @rtm.
 1234  *
 1235  */
 1236 static void
 1237 send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m,
 1238     sa_family_t saf, u_int fibnum, int rtm_errno)
 1239 {
 1240         struct rcb *rcb = NULL;
 1241 
 1242         /*
 1243          * Check to see if we don't want our own messages.
 1244          */
 1245         if ((so->so_options & SO_USELOOPBACK) == 0) {
 1246                 if (V_route_cb.any_count <= 1) {
 1247                         if (rtm != NULL)
 1248                                 free(rtm, M_TEMP);
 1249                         m_freem(m);
 1250                         return;
 1251                 }
 1252                 /* There is another listener, so construct message */
 1253                 rcb = so->so_pcb;
 1254         }
 1255 
 1256         if (rtm != NULL) {
 1257                 if (rtm_errno!= 0)
 1258                         rtm->rtm_errno = rtm_errno;
 1259                 else
 1260                         rtm->rtm_flags |= RTF_DONE;
 1261 
 1262                 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
 1263                 if (m->m_pkthdr.len < rtm->rtm_msglen) {
 1264                         m_freem(m);
 1265                         m = NULL;
 1266                 } else if (m->m_pkthdr.len > rtm->rtm_msglen)
 1267                         m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
 1268 
 1269                 free(rtm, M_TEMP);
 1270         }
 1271         if (m != NULL) {
 1272                 M_SETFIB(m, fibnum);
 1273                 m->m_flags |= RTS_FILTER_FIB;
 1274                 if (rcb) {
 1275                         /*
 1276                          * XXX insure we don't get a copy by
 1277                          * invalidating our protocol
 1278                          */
 1279                         sa_family_t family = rcb->rcb_family;
 1280                         rcb->rcb_family = AF_UNSPEC;
 1281                         rt_dispatch(m, saf);
 1282                         rcb->rcb_family = family;
 1283                 } else
 1284                         rt_dispatch(m, saf);
 1285         }
 1286 }
 1287 
 1288 static void
 1289 rt_getmetrics(const struct rtentry *rt, const struct nhop_object *nh,
 1290     struct rt_metrics *out)
 1291 {
 1292 
 1293         bzero(out, sizeof(*out));
 1294         out->rmx_mtu = nh->nh_mtu;
 1295         out->rmx_weight = rt->rt_weight;
 1296         out->rmx_nhidx = nhop_get_idx(nh);
 1297         /* Kernel -> userland timebase conversion. */
 1298         out->rmx_expire = nhop_get_expire(nh) ?
 1299             nhop_get_expire(nh) - time_uptime + time_second : 0;
 1300 }
 1301 
 1302 /*
 1303  * Extract the addresses of the passed sockaddrs.
 1304  * Do a little sanity checking so as to avoid bad memory references.
 1305  * This data is derived straight from userland.
 1306  */
 1307 static int
 1308 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
 1309 {
 1310         struct sockaddr *sa;
 1311         int i;
 1312 
 1313         for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
 1314                 if ((rtinfo->rti_addrs & (1 << i)) == 0)
 1315                         continue;
 1316                 sa = (struct sockaddr *)cp;
 1317                 /*
 1318                  * It won't fit.
 1319                  */
 1320                 if (cp + sa->sa_len > cplim) {
 1321                         RTS_PID_LOG(LOG_DEBUG, "sa_len too big for sa type %d", i);
 1322                         return (EINVAL);
 1323                 }
 1324                 /*
 1325                  * there are no more.. quit now
 1326                  * If there are more bits, they are in error.
 1327                  * I've seen this. route(1) can evidently generate these. 
 1328                  * This causes kernel to core dump.
 1329                  * for compatibility, If we see this, point to a safe address.
 1330                  */
 1331                 if (sa->sa_len == 0) {
 1332                         rtinfo->rti_info[i] = &sa_zero;
 1333                         return (0); /* should be EINVAL but for compat */
 1334                 }
 1335                 /* accept it */
 1336 #ifdef INET6
 1337                 if (sa->sa_family == AF_INET6)
 1338                         sa6_embedscope((struct sockaddr_in6 *)sa,
 1339                             V_ip6_use_defzone);
 1340 #endif
 1341                 rtinfo->rti_info[i] = sa;
 1342                 cp += SA_SIZE(sa);
 1343         }
 1344         return (0);
 1345 }
 1346 
 1347 #ifdef INET
 1348 static inline void
 1349 fill_sockaddr_inet(struct sockaddr_in *sin, struct in_addr addr)
 1350 {
 1351 
 1352         const struct sockaddr_in nsin = {
 1353                 .sin_family = AF_INET,
 1354                 .sin_len = sizeof(struct sockaddr_in),
 1355                 .sin_addr = addr,
 1356         };
 1357         *sin = nsin;
 1358 }
 1359 #endif
 1360 
 1361 #ifdef INET6
 1362 static inline void
 1363 fill_sockaddr_inet6(struct sockaddr_in6 *sin6, const struct in6_addr *addr6,
 1364     uint32_t scopeid)
 1365 {
 1366 
 1367         const struct sockaddr_in6 nsin6 = {
 1368                 .sin6_family = AF_INET6,
 1369                 .sin6_len = sizeof(struct sockaddr_in6),
 1370                 .sin6_addr = *addr6,
 1371                 .sin6_scope_id = scopeid,
 1372         };
 1373         *sin6 = nsin6;
 1374 }
 1375 #endif
 1376 
 1377 #if defined(INET6) || defined(INET)
 1378 /*
 1379  * Checks if gateway is suitable for lltable operations.
 1380  * Lltable code requires AF_LINK gateway with ifindex
 1381  *  and mac address specified.
 1382  * Returns 0 on success.
 1383  */
 1384 static int
 1385 cleanup_xaddrs_lladdr(struct rt_addrinfo *info)
 1386 {
 1387         struct sockaddr_dl *sdl = (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
 1388 
 1389         if (sdl->sdl_family != AF_LINK)
 1390                 return (EINVAL);
 1391 
 1392         if (sdl->sdl_index == 0) {
 1393                 RTS_PID_LOG(LOG_DEBUG, "AF_LINK gateway w/o ifindex");
 1394                 return (EINVAL);
 1395         }
 1396 
 1397         if (offsetof(struct sockaddr_dl, sdl_data) + sdl->sdl_nlen + sdl->sdl_alen > sdl->sdl_len) {
 1398                 RTS_PID_LOG(LOG_DEBUG, "AF_LINK gw: sdl_nlen/sdl_alen too large");
 1399                 return (EINVAL);
 1400         }
 1401 
 1402         return (0);
 1403 }
 1404 
 1405 static int
 1406 cleanup_xaddrs_gateway(struct rt_addrinfo *info, struct linear_buffer *lb)
 1407 {
 1408         struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
 1409         struct sockaddr *sa;
 1410 
 1411         if (info->rti_flags & RTF_LLDATA)
 1412                 return (cleanup_xaddrs_lladdr(info));
 1413 
 1414         switch (gw->sa_family) {
 1415 #ifdef INET
 1416         case AF_INET:
 1417                 {
 1418                         struct sockaddr_in *gw_sin = (struct sockaddr_in *)gw;
 1419 
 1420                         /* Ensure reads do not go beyoud SA boundary */
 1421                         if (SA_SIZE(gw) < offsetof(struct sockaddr_in, sin_zero)) {
 1422                                 RTS_PID_LOG(LOG_DEBUG, "gateway sin_len too small: %d",
 1423                                     gw->sa_len);
 1424                                 return (EINVAL);
 1425                         }
 1426                         sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_in));
 1427                         if (sa == NULL)
 1428                                 return (ENOBUFS);
 1429                         fill_sockaddr_inet((struct sockaddr_in *)sa, gw_sin->sin_addr);
 1430                         info->rti_info[RTAX_GATEWAY] = sa;
 1431                 }
 1432                 break;
 1433 #endif
 1434 #ifdef INET6
 1435         case AF_INET6:
 1436                 {
 1437                         struct sockaddr_in6 *gw_sin6 = (struct sockaddr_in6 *)gw;
 1438                         if (gw_sin6->sin6_len < sizeof(struct sockaddr_in6)) {
 1439                                 RTS_PID_LOG(LOG_DEBUG, "gateway sin6_len too small: %d",
 1440                                     gw->sa_len);
 1441                                 return (EINVAL);
 1442                         }
 1443                         fill_sockaddr_inet6(gw_sin6, &gw_sin6->sin6_addr, 0);
 1444                         break;
 1445                 }
 1446 #endif
 1447         case AF_LINK:
 1448                 {
 1449                         struct sockaddr_dl *gw_sdl;
 1450 
 1451                         size_t sdl_min_len = offsetof(struct sockaddr_dl, sdl_data);
 1452                         gw_sdl = (struct sockaddr_dl *)gw;
 1453                         if (gw_sdl->sdl_len < sdl_min_len) {
 1454                                 RTS_PID_LOG(LOG_DEBUG, "gateway sdl_len too small: %d",
 1455                                     gw_sdl->sdl_len);
 1456                                 return (EINVAL);
 1457                         }
 1458                         sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_dl_short));
 1459                         if (sa == NULL)
 1460                                 return (ENOBUFS);
 1461 
 1462                         const struct sockaddr_dl_short sdl = {
 1463                                 .sdl_family = AF_LINK,
 1464                                 .sdl_len = sizeof(struct sockaddr_dl_short),
 1465                                 .sdl_index = gw_sdl->sdl_index,
 1466                         };
 1467                         *((struct sockaddr_dl_short *)sa) = sdl;
 1468                         info->rti_info[RTAX_GATEWAY] = sa;
 1469                         break;
 1470                 }
 1471         }
 1472 
 1473         return (0);
 1474 }
 1475 #endif
 1476 
 1477 static void
 1478 remove_netmask(struct rt_addrinfo *info)
 1479 {
 1480         info->rti_info[RTAX_NETMASK] = NULL;
 1481         info->rti_flags |= RTF_HOST;
 1482         info->rti_addrs &= ~RTA_NETMASK;
 1483 }
 1484 
 1485 #ifdef INET
 1486 static int
 1487 cleanup_xaddrs_inet(struct rt_addrinfo *info, struct linear_buffer *lb)
 1488 {
 1489         struct sockaddr_in *dst_sa, *mask_sa;
 1490         const int sa_len = sizeof(struct sockaddr_in);
 1491         struct in_addr dst, mask;
 1492 
 1493         /* Check & fixup dst/netmask combination first */
 1494         dst_sa = (struct sockaddr_in *)info->rti_info[RTAX_DST];
 1495         mask_sa = (struct sockaddr_in *)info->rti_info[RTAX_NETMASK];
 1496 
 1497         /* Ensure reads do not go beyound the buffer size */
 1498         if (SA_SIZE(dst_sa) < offsetof(struct sockaddr_in, sin_zero)) {
 1499                 RTS_PID_LOG(LOG_DEBUG, "prefix dst sin_len too small: %d",
 1500                     dst_sa->sin_len);
 1501                 return (EINVAL);
 1502         }
 1503 
 1504         if ((mask_sa != NULL) && mask_sa->sin_len < sizeof(struct sockaddr_in)) {
 1505                 /*
 1506                  * Some older routing software encode mask length into the
 1507                  * sin_len, thus resulting in "truncated" sockaddr.
 1508                  */
 1509                 int len = mask_sa->sin_len - offsetof(struct sockaddr_in, sin_addr);
 1510                 if (len >= 0) {
 1511                         mask.s_addr = 0;
 1512                         if (len > sizeof(struct in_addr))
 1513                                 len = sizeof(struct in_addr);
 1514                         memcpy(&mask, &mask_sa->sin_addr, len);
 1515                 } else {
 1516                         RTS_PID_LOG(LOG_DEBUG, "prefix mask sin_len too small: %d",
 1517                             mask_sa->sin_len);
 1518                         return (EINVAL);
 1519                 }
 1520         } else
 1521                 mask.s_addr = mask_sa ? mask_sa->sin_addr.s_addr : INADDR_BROADCAST;
 1522 
 1523         dst.s_addr = htonl(ntohl(dst_sa->sin_addr.s_addr) & ntohl(mask.s_addr));
 1524 
 1525         /* Construct new "clean" dst/mask sockaddresses */
 1526         if ((dst_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL)
 1527                 return (ENOBUFS);
 1528         fill_sockaddr_inet(dst_sa, dst);
 1529         info->rti_info[RTAX_DST] = (struct sockaddr *)dst_sa;
 1530 
 1531         if (mask.s_addr != INADDR_BROADCAST) {
 1532                 if ((mask_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL)
 1533                         return (ENOBUFS);
 1534                 fill_sockaddr_inet(mask_sa, mask);
 1535                 info->rti_info[RTAX_NETMASK] = (struct sockaddr *)mask_sa;
 1536                 info->rti_flags &= ~RTF_HOST;
 1537         } else
 1538                 remove_netmask(info);
 1539 
 1540         /* Check gateway */
 1541         if (info->rti_info[RTAX_GATEWAY] != NULL)
 1542                 return (cleanup_xaddrs_gateway(info, lb));
 1543 
 1544         return (0);
 1545 }
 1546 #endif
 1547 
 1548 #ifdef INET6
 1549 static int
 1550 cleanup_xaddrs_inet6(struct rt_addrinfo *info, struct linear_buffer *lb)
 1551 {
 1552         struct sockaddr *sa;
 1553         struct sockaddr_in6 *dst_sa, *mask_sa;
 1554         struct in6_addr mask, *dst;
 1555         const int sa_len = sizeof(struct sockaddr_in6);
 1556 
 1557         /* Check & fixup dst/netmask combination first */
 1558         dst_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_DST];
 1559         mask_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_NETMASK];
 1560 
 1561         if (dst_sa->sin6_len < sizeof(struct sockaddr_in6)) {
 1562                 RTS_PID_LOG(LOG_DEBUG, "prefix dst sin6_len too small: %d",
 1563                     dst_sa->sin6_len);
 1564                 return (EINVAL);
 1565         }
 1566 
 1567         if (mask_sa && mask_sa->sin6_len < sizeof(struct sockaddr_in6)) {
 1568                 /*
 1569                  * Some older routing software encode mask length into the
 1570                  * sin6_len, thus resulting in "truncated" sockaddr.
 1571                  */
 1572                 int len = mask_sa->sin6_len - offsetof(struct sockaddr_in6, sin6_addr);
 1573                 if (len >= 0) {
 1574                         bzero(&mask, sizeof(mask));
 1575                         if (len > sizeof(struct in6_addr))
 1576                                 len = sizeof(struct in6_addr);
 1577                         memcpy(&mask, &mask_sa->sin6_addr, len);
 1578                 } else {
 1579                         RTS_PID_LOG(LOG_DEBUG, "rtsock: prefix mask sin6_len too small: %d",
 1580                             mask_sa->sin6_len);
 1581                         return (EINVAL);
 1582                 }
 1583         } else
 1584                 mask = mask_sa ? mask_sa->sin6_addr : in6mask128;
 1585 
 1586         dst = &dst_sa->sin6_addr;
 1587         IN6_MASK_ADDR(dst, &mask);
 1588 
 1589         if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL)
 1590                 return (ENOBUFS);
 1591         fill_sockaddr_inet6((struct sockaddr_in6 *)sa, dst, 0);
 1592         info->rti_info[RTAX_DST] = sa;
 1593 
 1594         if (!IN6_ARE_ADDR_EQUAL(&mask, &in6mask128)) {
 1595                 if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL)
 1596                         return (ENOBUFS);
 1597                 fill_sockaddr_inet6((struct sockaddr_in6 *)sa, &mask, 0);
 1598                 info->rti_info[RTAX_NETMASK] = sa;
 1599                 info->rti_flags &= ~RTF_HOST;
 1600         } else
 1601                 remove_netmask(info);
 1602 
 1603         /* Check gateway */
 1604         if (info->rti_info[RTAX_GATEWAY] != NULL)
 1605                 return (cleanup_xaddrs_gateway(info, lb));
 1606 
 1607         return (0);
 1608 }
 1609 #endif
 1610 
 1611 static int
 1612 cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb)
 1613 {
 1614         int error = EAFNOSUPPORT;
 1615 
 1616         if (info->rti_info[RTAX_DST] == NULL) {
 1617                 RTS_PID_LOG(LOG_DEBUG, "prefix dst is not set");
 1618                 return (EINVAL);
 1619         }
 1620 
 1621         if (info->rti_flags & RTF_LLDATA) {
 1622                 /*
 1623                  * arp(8)/ndp(8) sends RTA_NETMASK for the associated
 1624                  * prefix along with the actual address in RTA_DST.
 1625                  * Remove netmask to avoid unnecessary address masking.
 1626                  */
 1627                 remove_netmask(info);
 1628         }
 1629 
 1630         switch (info->rti_info[RTAX_DST]->sa_family) {
 1631 #ifdef INET
 1632         case AF_INET:
 1633                 error = cleanup_xaddrs_inet(info, lb);
 1634                 break;
 1635 #endif
 1636 #ifdef INET6
 1637         case AF_INET6:
 1638                 error = cleanup_xaddrs_inet6(info, lb);
 1639                 break;
 1640 #endif
 1641         }
 1642 
 1643         return (error);
 1644 }
 1645 
 1646 /*
 1647  * Fill in @dmask with valid netmask leaving original @smask
 1648  * intact. Mostly used with radix netmasks.
 1649  */
 1650 struct sockaddr *
 1651 rtsock_fix_netmask(const struct sockaddr *dst, const struct sockaddr *smask,
 1652     struct sockaddr_storage *dmask)
 1653 {
 1654         if (dst == NULL || smask == NULL)
 1655                 return (NULL);
 1656 
 1657         memset(dmask, 0, dst->sa_len);
 1658         memcpy(dmask, smask, smask->sa_len);
 1659         dmask->ss_len = dst->sa_len;
 1660         dmask->ss_family = dst->sa_family;
 1661 
 1662         return ((struct sockaddr *)dmask);
 1663 }
 1664 
 1665 /*
 1666  * Writes information related to @rtinfo object to newly-allocated mbuf.
 1667  * Assumes MCLBYTES is enough to construct any message.
 1668  * Used for OS notifications of vaious events (if/ifa announces,etc)
 1669  *
 1670  * Returns allocated mbuf or NULL on failure.
 1671  */
 1672 static struct mbuf *
 1673 rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
 1674 {
 1675         struct sockaddr_storage ss;
 1676         struct rt_msghdr *rtm;
 1677         struct mbuf *m;
 1678         int i;
 1679         struct sockaddr *sa;
 1680 #ifdef INET6
 1681         struct sockaddr_in6 *sin6;
 1682 #endif
 1683         int len, dlen;
 1684 
 1685         switch (type) {
 1686         case RTM_DELADDR:
 1687         case RTM_NEWADDR:
 1688                 len = sizeof(struct ifa_msghdr);
 1689                 break;
 1690 
 1691         case RTM_DELMADDR:
 1692         case RTM_NEWMADDR:
 1693                 len = sizeof(struct ifma_msghdr);
 1694                 break;
 1695 
 1696         case RTM_IFINFO:
 1697                 len = sizeof(struct if_msghdr);
 1698                 break;
 1699 
 1700         case RTM_IFANNOUNCE:
 1701         case RTM_IEEE80211:
 1702                 len = sizeof(struct if_announcemsghdr);
 1703                 break;
 1704 
 1705         default:
 1706                 len = sizeof(struct rt_msghdr);
 1707         }
 1708 
 1709         /* XXXGL: can we use MJUMPAGESIZE cluster here? */
 1710         KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
 1711         if (len > MHLEN)
 1712                 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 1713         else
 1714                 m = m_gethdr(M_NOWAIT, MT_DATA);
 1715         if (m == NULL)
 1716                 return (m);
 1717 
 1718         m->m_pkthdr.len = m->m_len = len;
 1719         rtm = mtod(m, struct rt_msghdr *);
 1720         bzero((caddr_t)rtm, len);
 1721         for (i = 0; i < RTAX_MAX; i++) {
 1722                 if ((sa = rtinfo->rti_info[i]) == NULL)
 1723                         continue;
 1724                 rtinfo->rti_addrs |= (1 << i);
 1725 
 1726                 dlen = SA_SIZE(sa);
 1727                 KASSERT(dlen <= sizeof(ss),
 1728                     ("%s: sockaddr size overflow", __func__));
 1729                 bzero(&ss, sizeof(ss));
 1730                 bcopy(sa, &ss, sa->sa_len);
 1731                 sa = (struct sockaddr *)&ss;
 1732 #ifdef INET6
 1733                 if (sa->sa_family == AF_INET6) {
 1734                         sin6 = (struct sockaddr_in6 *)sa;
 1735                         (void)sa6_recoverscope(sin6);
 1736                 }
 1737 #endif
 1738                 m_copyback(m, len, dlen, (caddr_t)sa);
 1739                 len += dlen;
 1740         }
 1741         if (m->m_pkthdr.len != len) {
 1742                 m_freem(m);
 1743                 return (NULL);
 1744         }
 1745         rtm->rtm_msglen = len;
 1746         rtm->rtm_version = RTM_VERSION;
 1747         rtm->rtm_type = type;
 1748         return (m);
 1749 }
 1750 
 1751 /*
 1752  * Writes information related to @rtinfo object to preallocated buffer.
 1753  * Stores needed size in @plen. If @w is NULL, calculates size without
 1754  * writing.
 1755  * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
 1756  *
 1757  * Returns 0 on success.
 1758  *
 1759  */
 1760 static int
 1761 rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
 1762 {
 1763         struct sockaddr_storage ss;
 1764         int len, buflen = 0, dlen, i;
 1765         caddr_t cp = NULL;
 1766         struct rt_msghdr *rtm = NULL;
 1767 #ifdef INET6
 1768         struct sockaddr_in6 *sin6;
 1769 #endif
 1770 #ifdef COMPAT_FREEBSD32
 1771         bool compat32 = false;
 1772 #endif
 1773 
 1774         switch (type) {
 1775         case RTM_DELADDR:
 1776         case RTM_NEWADDR:
 1777                 if (w != NULL && w->w_op == NET_RT_IFLISTL) {
 1778 #ifdef COMPAT_FREEBSD32
 1779                         if (w->w_req->flags & SCTL_MASK32) {
 1780                                 len = sizeof(struct ifa_msghdrl32);
 1781                                 compat32 = true;
 1782                         } else
 1783 #endif
 1784                                 len = sizeof(struct ifa_msghdrl);
 1785                 } else
 1786                         len = sizeof(struct ifa_msghdr);
 1787                 break;
 1788 
 1789         case RTM_IFINFO:
 1790 #ifdef COMPAT_FREEBSD32
 1791                 if (w != NULL && w->w_req->flags & SCTL_MASK32) {
 1792                         if (w->w_op == NET_RT_IFLISTL)
 1793                                 len = sizeof(struct if_msghdrl32);
 1794                         else
 1795                                 len = sizeof(struct if_msghdr32);
 1796                         compat32 = true;
 1797                         break;
 1798                 }
 1799 #endif
 1800                 if (w != NULL && w->w_op == NET_RT_IFLISTL)
 1801                         len = sizeof(struct if_msghdrl);
 1802                 else
 1803                         len = sizeof(struct if_msghdr);
 1804                 break;
 1805 
 1806         case RTM_NEWMADDR:
 1807                 len = sizeof(struct ifma_msghdr);
 1808                 break;
 1809 
 1810         default:
 1811                 len = sizeof(struct rt_msghdr);
 1812         }
 1813 
 1814         if (w != NULL) {
 1815                 rtm = (struct rt_msghdr *)w->w_tmem;
 1816                 buflen = w->w_tmemsize - len;
 1817                 cp = (caddr_t)w->w_tmem + len;
 1818         }
 1819 
 1820         rtinfo->rti_addrs = 0;
 1821         for (i = 0; i < RTAX_MAX; i++) {
 1822                 struct sockaddr *sa;
 1823 
 1824                 if ((sa = rtinfo->rti_info[i]) == NULL)
 1825                         continue;
 1826                 rtinfo->rti_addrs |= (1 << i);
 1827 #ifdef COMPAT_FREEBSD32
 1828                 if (compat32)
 1829                         dlen = SA_SIZE32(sa);
 1830                 else
 1831 #endif
 1832                         dlen = SA_SIZE(sa);
 1833                 if (cp != NULL && buflen >= dlen) {
 1834                         KASSERT(dlen <= sizeof(ss),
 1835                             ("%s: sockaddr size overflow", __func__));
 1836                         bzero(&ss, sizeof(ss));
 1837                         bcopy(sa, &ss, sa->sa_len);
 1838                         sa = (struct sockaddr *)&ss;
 1839 #ifdef INET6
 1840                         if (sa->sa_family == AF_INET6) {
 1841                                 sin6 = (struct sockaddr_in6 *)sa;
 1842                                 (void)sa6_recoverscope(sin6);
 1843                         }
 1844 #endif
 1845                         bcopy((caddr_t)sa, cp, (unsigned)dlen);
 1846                         cp += dlen;
 1847                         buflen -= dlen;
 1848                 } else if (cp != NULL) {
 1849                         /*
 1850                          * Buffer too small. Count needed size
 1851                          * and return with error.
 1852                          */
 1853                         cp = NULL;
 1854                 }
 1855 
 1856                 len += dlen;
 1857         }
 1858 
 1859         if (cp != NULL) {
 1860                 dlen = ALIGN(len) - len;
 1861                 if (buflen < dlen)
 1862                         cp = NULL;
 1863                 else {
 1864                         bzero(cp, dlen);
 1865                         cp += dlen;
 1866                         buflen -= dlen;
 1867                 }
 1868         }
 1869         len = ALIGN(len);
 1870 
 1871         if (cp != NULL) {
 1872                 /* fill header iff buffer is large enough */
 1873                 rtm->rtm_version = RTM_VERSION;
 1874                 rtm->rtm_type = type;
 1875                 rtm->rtm_msglen = len;
 1876         }
 1877 
 1878         *plen = len;
 1879 
 1880         if (w != NULL && cp == NULL)
 1881                 return (ENOBUFS);
 1882 
 1883         return (0);
 1884 }
 1885 
 1886 /*
 1887  * This routine is called to generate a message from the routing
 1888  * socket indicating that a redirect has occurred, a routing lookup
 1889  * has failed, or that a protocol has detected timeouts to a particular
 1890  * destination.
 1891  */
 1892 void
 1893 rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
 1894     int fibnum)
 1895 {
 1896         struct rt_msghdr *rtm;
 1897         struct mbuf *m;
 1898         struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
 1899 
 1900         if (V_route_cb.any_count == 0)
 1901                 return;
 1902         m = rtsock_msg_mbuf(type, rtinfo);
 1903         if (m == NULL)
 1904                 return;
 1905 
 1906         if (fibnum != RT_ALL_FIBS) {
 1907                 KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
 1908                     "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
 1909                 M_SETFIB(m, fibnum);
 1910                 m->m_flags |= RTS_FILTER_FIB;
 1911         }
 1912 
 1913         rtm = mtod(m, struct rt_msghdr *);
 1914         rtm->rtm_flags = RTF_DONE | flags;
 1915         rtm->rtm_errno = error;
 1916         rtm->rtm_addrs = rtinfo->rti_addrs;
 1917         rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
 1918 }
 1919 
 1920 void
 1921 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
 1922 {
 1923 
 1924         rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS);
 1925 }
 1926 
 1927 /*
 1928  * This routine is called to generate a message from the routing
 1929  * socket indicating that the status of a network interface has changed.
 1930  */
 1931 static void
 1932 rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask __unused)
 1933 {
 1934         struct if_msghdr *ifm;
 1935         struct mbuf *m;
 1936         struct rt_addrinfo info;
 1937 
 1938         if (V_route_cb.any_count == 0)
 1939                 return;
 1940         bzero((caddr_t)&info, sizeof(info));
 1941         m = rtsock_msg_mbuf(RTM_IFINFO, &info);
 1942         if (m == NULL)
 1943                 return;
 1944         ifm = mtod(m, struct if_msghdr *);
 1945         ifm->ifm_index = ifp->if_index;
 1946         ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 1947         if_data_copy(ifp, &ifm->ifm_data);
 1948         ifm->ifm_addrs = 0;
 1949         rt_dispatch(m, AF_UNSPEC);
 1950 }
 1951 
 1952 /*
 1953  * Announce interface address arrival/withdraw.
 1954  * Please do not call directly, use rt_addrmsg().
 1955  * Assume input data to be valid.
 1956  * Returns 0 on success.
 1957  */
 1958 int
 1959 rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
 1960 {
 1961         struct rt_addrinfo info;
 1962         struct sockaddr *sa;
 1963         int ncmd;
 1964         struct mbuf *m;
 1965         struct ifa_msghdr *ifam;
 1966         struct ifnet *ifp = ifa->ifa_ifp;
 1967         struct sockaddr_storage ss;
 1968 
 1969         if (V_route_cb.any_count == 0)
 1970                 return (0);
 1971 
 1972         ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
 1973 
 1974         bzero((caddr_t)&info, sizeof(info));
 1975         info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
 1976         info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
 1977         info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
 1978             info.rti_info[RTAX_IFA], ifa->ifa_netmask, &ss);
 1979         info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 1980         if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
 1981                 return (ENOBUFS);
 1982         ifam = mtod(m, struct ifa_msghdr *);
 1983         ifam->ifam_index = ifp->if_index;
 1984         ifam->ifam_metric = ifa->ifa_ifp->if_metric;
 1985         ifam->ifam_flags = ifa->ifa_flags;
 1986         ifam->ifam_addrs = info.rti_addrs;
 1987 
 1988         if (fibnum != RT_ALL_FIBS) {
 1989                 M_SETFIB(m, fibnum);
 1990                 m->m_flags |= RTS_FILTER_FIB;
 1991         }
 1992 
 1993         rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
 1994 
 1995         return (0);
 1996 }
 1997 
 1998 /*
 1999  * Announce route addition/removal to rtsock based on @rt data.
 2000  * Callers are advives to use rt_routemsg() instead of using this
 2001  *  function directly.
 2002  * Assume @rt data is consistent.
 2003  *
 2004  * Returns 0 on success.
 2005  */
 2006 int
 2007 rtsock_routemsg(int cmd, struct rtentry *rt, struct nhop_object *nh,
 2008     int fibnum)
 2009 {
 2010         union sockaddr_union dst, mask;
 2011         struct rt_addrinfo info;
 2012 
 2013         if (V_route_cb.any_count == 0)
 2014                 return (0);
 2015 
 2016         int family = rt_get_family(rt);
 2017         init_sockaddrs_family(family, &dst.sa, &mask.sa);
 2018         export_rtaddrs(rt, &dst.sa, &mask.sa);
 2019 
 2020         bzero((caddr_t)&info, sizeof(info));
 2021         info.rti_info[RTAX_DST] = &dst.sa;
 2022         info.rti_info[RTAX_NETMASK] = &mask.sa;
 2023         info.rti_info[RTAX_GATEWAY] = &nh->gw_sa;
 2024         info.rti_flags = rt->rte_flags | nhop_get_rtflags(nh);
 2025         info.rti_ifp = nh->nh_ifp;
 2026 
 2027         return (rtsock_routemsg_info(cmd, &info, fibnum));
 2028 }
 2029 
 2030 int
 2031 rtsock_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum)
 2032 {
 2033         struct rt_msghdr *rtm;
 2034         struct sockaddr *sa;
 2035         struct mbuf *m;
 2036 
 2037         if (V_route_cb.any_count == 0)
 2038                 return (0);
 2039 
 2040         if (info->rti_flags & RTF_HOST)
 2041                 info->rti_info[RTAX_NETMASK] = NULL;
 2042 
 2043         m = rtsock_msg_mbuf(cmd, info);
 2044         if (m == NULL)
 2045                 return (ENOBUFS);
 2046 
 2047         if (fibnum != RT_ALL_FIBS) {
 2048                 KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
 2049                     "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
 2050                 M_SETFIB(m, fibnum);
 2051                 m->m_flags |= RTS_FILTER_FIB;
 2052         }
 2053 
 2054         rtm = mtod(m, struct rt_msghdr *);
 2055         rtm->rtm_addrs = info->rti_addrs;
 2056         if (info->rti_ifp != NULL)
 2057                 rtm->rtm_index = info->rti_ifp->if_index;
 2058         /* Add RTF_DONE to indicate command 'completion' required by API */
 2059         info->rti_flags |= RTF_DONE;
 2060         /* Reported routes has to be up */
 2061         if (cmd == RTM_ADD || cmd == RTM_CHANGE)
 2062                 info->rti_flags |= RTF_UP;
 2063         rtm->rtm_flags = info->rti_flags;
 2064 
 2065         sa = info->rti_info[RTAX_DST];
 2066         rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
 2067 
 2068         return (0);
 2069 }
 2070 
 2071 /*
 2072  * This is the analogue to the rt_newaddrmsg which performs the same
 2073  * function but for multicast group memberhips.  This is easier since
 2074  * there is no route state to worry about.
 2075  */
 2076 void
 2077 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
 2078 {
 2079         struct rt_addrinfo info;
 2080         struct mbuf *m = NULL;
 2081         struct ifnet *ifp = ifma->ifma_ifp;
 2082         struct ifma_msghdr *ifmam;
 2083 
 2084         if (V_route_cb.any_count == 0)
 2085                 return;
 2086 
 2087         bzero((caddr_t)&info, sizeof(info));
 2088         info.rti_info[RTAX_IFA] = ifma->ifma_addr;
 2089         if (ifp && ifp->if_addr)
 2090                 info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
 2091         else
 2092                 info.rti_info[RTAX_IFP] = NULL;
 2093         /*
 2094          * If a link-layer address is present, present it as a ``gateway''
 2095          * (similarly to how ARP entries, e.g., are presented).
 2096          */
 2097         info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
 2098         m = rtsock_msg_mbuf(cmd, &info);
 2099         if (m == NULL)
 2100                 return;
 2101         ifmam = mtod(m, struct ifma_msghdr *);
 2102         KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
 2103             __func__));
 2104         ifmam->ifmam_index = ifp->if_index;
 2105         ifmam->ifmam_addrs = info.rti_addrs;
 2106         rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
 2107 }
 2108 
 2109 static struct mbuf *
 2110 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
 2111         struct rt_addrinfo *info)
 2112 {
 2113         struct if_announcemsghdr *ifan;
 2114         struct mbuf *m;
 2115 
 2116         if (V_route_cb.any_count == 0)
 2117                 return NULL;
 2118         bzero((caddr_t)info, sizeof(*info));
 2119         m = rtsock_msg_mbuf(type, info);
 2120         if (m != NULL) {
 2121                 ifan = mtod(m, struct if_announcemsghdr *);
 2122                 ifan->ifan_index = ifp->if_index;
 2123                 strlcpy(ifan->ifan_name, ifp->if_xname,
 2124                         sizeof(ifan->ifan_name));
 2125                 ifan->ifan_what = what;
 2126         }
 2127         return m;
 2128 }
 2129 
 2130 /*
 2131  * This is called to generate routing socket messages indicating
 2132  * IEEE80211 wireless events.
 2133  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
 2134  */
 2135 void
 2136 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
 2137 {
 2138         struct mbuf *m;
 2139         struct rt_addrinfo info;
 2140 
 2141         m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
 2142         if (m != NULL) {
 2143                 /*
 2144                  * Append the ieee80211 data.  Try to stick it in the
 2145                  * mbuf containing the ifannounce msg; otherwise allocate
 2146                  * a new mbuf and append.
 2147                  *
 2148                  * NB: we assume m is a single mbuf.
 2149                  */
 2150                 if (data_len > M_TRAILINGSPACE(m)) {
 2151                         struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
 2152                         if (n == NULL) {
 2153                                 m_freem(m);
 2154                                 return;
 2155                         }
 2156                         bcopy(data, mtod(n, void *), data_len);
 2157                         n->m_len = data_len;
 2158                         m->m_next = n;
 2159                 } else if (data_len > 0) {
 2160                         bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
 2161                         m->m_len += data_len;
 2162                 }
 2163                 if (m->m_flags & M_PKTHDR)
 2164                         m->m_pkthdr.len += data_len;
 2165                 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
 2166                 rt_dispatch(m, AF_UNSPEC);
 2167         }
 2168 }
 2169 
 2170 /*
 2171  * This is called to generate routing socket messages indicating
 2172  * network interface arrival and departure.
 2173  */
 2174 static void
 2175 rt_ifannouncemsg(struct ifnet *ifp, int what)
 2176 {
 2177         struct mbuf *m;
 2178         struct rt_addrinfo info;
 2179 
 2180         m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
 2181         if (m != NULL)
 2182                 rt_dispatch(m, AF_UNSPEC);
 2183 }
 2184 
 2185 static void
 2186 rt_dispatch(struct mbuf *m, sa_family_t saf)
 2187 {
 2188 
 2189         M_ASSERTPKTHDR(m);
 2190 
 2191         m->m_rtsock_family = saf;
 2192         if (V_loif)
 2193                 m->m_pkthdr.rcvif = V_loif;
 2194         else {
 2195                 m_freem(m);
 2196                 return;
 2197         }
 2198         netisr_queue(NETISR_ROUTE, m);  /* mbuf is free'd on failure. */
 2199 }
 2200 
 2201 /*
 2202  * Checks if rte can be exported w.r.t jails/vnets.
 2203  *
 2204  * Returns true if it can, false otherwise.
 2205  */
 2206 static bool
 2207 can_export_rte(struct ucred *td_ucred, bool rt_is_host,
 2208     const struct sockaddr *rt_dst)
 2209 {
 2210 
 2211         if ((!rt_is_host) ? jailed_without_vnet(td_ucred)
 2212             : prison_if(td_ucred, rt_dst) != 0)
 2213                 return (false);
 2214         return (true);
 2215 }
 2216 
 2217 
 2218 /*
 2219  * This is used in dumping the kernel table via sysctl().
 2220  */
 2221 static int
 2222 sysctl_dumpentry(struct rtentry *rt, void *vw)
 2223 {
 2224         struct walkarg *w = vw;
 2225         struct nhop_object *nh;
 2226 
 2227         NET_EPOCH_ASSERT();
 2228 
 2229         export_rtaddrs(rt, w->dst, w->mask);
 2230         if (!can_export_rte(w->w_req->td->td_ucred, rt_is_host(rt), w->dst))
 2231                 return (0);
 2232         nh = rt_get_raw_nhop(rt);
 2233 #ifdef ROUTE_MPATH
 2234         if (NH_IS_NHGRP(nh)) {
 2235                 const struct weightened_nhop *wn;
 2236                 uint32_t num_nhops;
 2237                 int error;
 2238                 wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
 2239                 for (int i = 0; i < num_nhops; i++) {
 2240                         error = sysctl_dumpnhop(rt, wn[i].nh, wn[i].weight, w);
 2241                         if (error != 0)
 2242                                 return (error);
 2243                 }
 2244         } else
 2245 #endif
 2246                 sysctl_dumpnhop(rt, nh, rt->rt_weight, w);
 2247 
 2248         return (0);
 2249 }
 2250 
 2251 
 2252 static int
 2253 sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh, uint32_t weight,
 2254     struct walkarg *w)
 2255 {
 2256         struct rt_addrinfo info;
 2257         int error = 0, size;
 2258         uint32_t rtflags;
 2259 
 2260         rtflags = nhop_get_rtflags(nh);
 2261 
 2262         if (w->w_op == NET_RT_FLAGS && !(rtflags & w->w_arg))
 2263                 return (0);
 2264 
 2265         bzero((caddr_t)&info, sizeof(info));
 2266         info.rti_info[RTAX_DST] = w->dst;
 2267         info.rti_info[RTAX_GATEWAY] = &nh->gw_sa;
 2268         info.rti_info[RTAX_NETMASK] = (rtflags & RTF_HOST) ? NULL : w->mask;
 2269         info.rti_info[RTAX_GENMASK] = 0;
 2270         if (nh->nh_ifp && !(nh->nh_ifp->if_flags & IFF_DYING)) {
 2271                 info.rti_info[RTAX_IFP] = nh->nh_ifp->if_addr->ifa_addr;
 2272                 info.rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
 2273                 if (nh->nh_ifp->if_flags & IFF_POINTOPOINT)
 2274                         info.rti_info[RTAX_BRD] = nh->nh_ifa->ifa_dstaddr;
 2275         }
 2276         if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
 2277                 return (error);
 2278         if (w->w_req && w->w_tmem) {
 2279                 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 2280 
 2281                 bzero(&rtm->rtm_index,
 2282                     sizeof(*rtm) - offsetof(struct rt_msghdr, rtm_index));
 2283 
 2284                 /*
 2285                  * rte flags may consist of RTF_HOST (duplicated in nhop rtflags)
 2286                  * and RTF_UP (if entry is linked, which is always true here).
 2287                  * Given that, use nhop rtflags & add RTF_UP.
 2288                  */
 2289                 rtm->rtm_flags = rtflags | RTF_UP;
 2290                 if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
 2291                         rtm->rtm_flags = RTF_GATEWAY | 
 2292                                 (rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
 2293                 rt_getmetrics(rt, nh, &rtm->rtm_rmx);
 2294                 rtm->rtm_rmx.rmx_weight = weight;
 2295                 rtm->rtm_index = nh->nh_ifp->if_index;
 2296                 rtm->rtm_addrs = info.rti_addrs;
 2297                 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
 2298                 return (error);
 2299         }
 2300         return (error);
 2301 }
 2302 
 2303 static int
 2304 sysctl_iflist_ifml(struct ifnet *ifp, const struct if_data *src_ifd,
 2305     struct rt_addrinfo *info, struct walkarg *w, int len)
 2306 {
 2307         struct if_msghdrl *ifm;
 2308         struct if_data *ifd;
 2309 
 2310         ifm = (struct if_msghdrl *)w->w_tmem;
 2311 
 2312 #ifdef COMPAT_FREEBSD32
 2313         if (w->w_req->flags & SCTL_MASK32) {
 2314                 struct if_msghdrl32 *ifm32;
 2315 
 2316                 ifm32 = (struct if_msghdrl32 *)ifm;
 2317                 ifm32->ifm_addrs = info->rti_addrs;
 2318                 ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 2319                 ifm32->ifm_index = ifp->if_index;
 2320                 ifm32->_ifm_spare1 = 0;
 2321                 ifm32->ifm_len = sizeof(*ifm32);
 2322                 ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
 2323                 ifm32->_ifm_spare2 = 0;
 2324                 ifd = &ifm32->ifm_data;
 2325         } else
 2326 #endif
 2327         {
 2328                 ifm->ifm_addrs = info->rti_addrs;
 2329                 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 2330                 ifm->ifm_index = ifp->if_index;
 2331                 ifm->_ifm_spare1 = 0;
 2332                 ifm->ifm_len = sizeof(*ifm);
 2333                 ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
 2334                 ifm->_ifm_spare2 = 0;
 2335                 ifd = &ifm->ifm_data;
 2336         }
 2337 
 2338         memcpy(ifd, src_ifd, sizeof(*ifd));
 2339 
 2340         return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
 2341 }
 2342 
 2343 static int
 2344 sysctl_iflist_ifm(struct ifnet *ifp, const struct if_data *src_ifd,
 2345     struct rt_addrinfo *info, struct walkarg *w, int len)
 2346 {
 2347         struct if_msghdr *ifm;
 2348         struct if_data *ifd;
 2349 
 2350         ifm = (struct if_msghdr *)w->w_tmem;
 2351 
 2352 #ifdef COMPAT_FREEBSD32
 2353         if (w->w_req->flags & SCTL_MASK32) {
 2354                 struct if_msghdr32 *ifm32;
 2355 
 2356                 ifm32 = (struct if_msghdr32 *)ifm;
 2357                 ifm32->ifm_addrs = info->rti_addrs;
 2358                 ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 2359                 ifm32->ifm_index = ifp->if_index;
 2360                 ifm32->_ifm_spare1 = 0;
 2361                 ifd = &ifm32->ifm_data;
 2362         } else
 2363 #endif
 2364         {
 2365                 ifm->ifm_addrs = info->rti_addrs;
 2366                 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 2367                 ifm->ifm_index = ifp->if_index;
 2368                 ifm->_ifm_spare1 = 0;
 2369                 ifd = &ifm->ifm_data;
 2370         }
 2371 
 2372         memcpy(ifd, src_ifd, sizeof(*ifd));
 2373 
 2374         return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
 2375 }
 2376 
 2377 static int
 2378 sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
 2379     struct walkarg *w, int len)
 2380 {
 2381         struct ifa_msghdrl *ifam;
 2382         struct if_data *ifd;
 2383 
 2384         ifam = (struct ifa_msghdrl *)w->w_tmem;
 2385 
 2386 #ifdef COMPAT_FREEBSD32
 2387         if (w->w_req->flags & SCTL_MASK32) {
 2388                 struct ifa_msghdrl32 *ifam32;
 2389 
 2390                 ifam32 = (struct ifa_msghdrl32 *)ifam;
 2391                 ifam32->ifam_addrs = info->rti_addrs;
 2392                 ifam32->ifam_flags = ifa->ifa_flags;
 2393                 ifam32->ifam_index = ifa->ifa_ifp->if_index;
 2394                 ifam32->_ifam_spare1 = 0;
 2395                 ifam32->ifam_len = sizeof(*ifam32);
 2396                 ifam32->ifam_data_off =
 2397                     offsetof(struct ifa_msghdrl32, ifam_data);
 2398                 ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
 2399                 ifd = &ifam32->ifam_data;
 2400         } else
 2401 #endif
 2402         {
 2403                 ifam->ifam_addrs = info->rti_addrs;
 2404                 ifam->ifam_flags = ifa->ifa_flags;
 2405                 ifam->ifam_index = ifa->ifa_ifp->if_index;
 2406                 ifam->_ifam_spare1 = 0;
 2407                 ifam->ifam_len = sizeof(*ifam);
 2408                 ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
 2409                 ifam->ifam_metric = ifa->ifa_ifp->if_metric;
 2410                 ifd = &ifam->ifam_data;
 2411         }
 2412 
 2413         bzero(ifd, sizeof(*ifd));
 2414         ifd->ifi_datalen = sizeof(struct if_data);
 2415         ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
 2416         ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
 2417         ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
 2418         ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
 2419 
 2420         /* Fixup if_data carp(4) vhid. */
 2421         if (carp_get_vhid_p != NULL)
 2422                 ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
 2423 
 2424         return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
 2425 }
 2426 
 2427 static int
 2428 sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
 2429     struct walkarg *w, int len)
 2430 {
 2431         struct ifa_msghdr *ifam;
 2432 
 2433         ifam = (struct ifa_msghdr *)w->w_tmem;
 2434         ifam->ifam_addrs = info->rti_addrs;
 2435         ifam->ifam_flags = ifa->ifa_flags;
 2436         ifam->ifam_index = ifa->ifa_ifp->if_index;
 2437         ifam->_ifam_spare1 = 0;
 2438         ifam->ifam_metric = ifa->ifa_ifp->if_metric;
 2439 
 2440         return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
 2441 }
 2442 
 2443 static int
 2444 sysctl_iflist(int af, struct walkarg *w)
 2445 {
 2446         struct ifnet *ifp;
 2447         struct ifaddr *ifa;
 2448         struct if_data ifd;
 2449         struct rt_addrinfo info;
 2450         int len, error = 0;
 2451         struct sockaddr_storage ss;
 2452 
 2453         bzero((caddr_t)&info, sizeof(info));
 2454         bzero(&ifd, sizeof(ifd));
 2455         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 2456                 if (w->w_arg && w->w_arg != ifp->if_index)
 2457                         continue;
 2458                 if_data_copy(ifp, &ifd);
 2459                 ifa = ifp->if_addr;
 2460                 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 2461                 error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
 2462                 if (error != 0)
 2463                         goto done;
 2464                 info.rti_info[RTAX_IFP] = NULL;
 2465                 if (w->w_req && w->w_tmem) {
 2466                         if (w->w_op == NET_RT_IFLISTL)
 2467                                 error = sysctl_iflist_ifml(ifp, &ifd, &info, w,
 2468                                     len);
 2469                         else
 2470                                 error = sysctl_iflist_ifm(ifp, &ifd, &info, w,
 2471                                     len);
 2472                         if (error)
 2473                                 goto done;
 2474                 }
 2475                 while ((ifa = CK_STAILQ_NEXT(ifa, ifa_link)) != NULL) {
 2476                         if (af && af != ifa->ifa_addr->sa_family)
 2477                                 continue;
 2478                         if (prison_if(w->w_req->td->td_ucred,
 2479                             ifa->ifa_addr) != 0)
 2480                                 continue;
 2481                         info.rti_info[RTAX_IFA] = ifa->ifa_addr;
 2482                         info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
 2483                             ifa->ifa_addr, ifa->ifa_netmask, &ss);
 2484                         info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 2485                         error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
 2486                         if (error != 0)
 2487                                 goto done;
 2488                         if (w->w_req && w->w_tmem) {
 2489                                 if (w->w_op == NET_RT_IFLISTL)
 2490                                         error = sysctl_iflist_ifaml(ifa, &info,
 2491                                             w, len);
 2492                                 else
 2493                                         error = sysctl_iflist_ifam(ifa, &info,
 2494                                             w, len);
 2495                                 if (error)
 2496                                         goto done;
 2497                         }
 2498                 }
 2499                 info.rti_info[RTAX_IFA] = NULL;
 2500                 info.rti_info[RTAX_NETMASK] = NULL;
 2501                 info.rti_info[RTAX_BRD] = NULL;
 2502         }
 2503 done:
 2504         return (error);
 2505 }
 2506 
 2507 static int
 2508 sysctl_ifmalist(int af, struct walkarg *w)
 2509 {
 2510         struct rt_addrinfo info;
 2511         struct ifaddr *ifa;
 2512         struct ifmultiaddr *ifma;
 2513         struct ifnet *ifp;
 2514         int error, len;
 2515 
 2516         NET_EPOCH_ASSERT();
 2517 
 2518         error = 0;
 2519         bzero((caddr_t)&info, sizeof(info));
 2520 
 2521         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 2522                 if (w->w_arg && w->w_arg != ifp->if_index)
 2523                         continue;
 2524                 ifa = ifp->if_addr;
 2525                 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
 2526                 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 2527                         if (af && af != ifma->ifma_addr->sa_family)
 2528                                 continue;
 2529                         if (prison_if(w->w_req->td->td_ucred,
 2530                             ifma->ifma_addr) != 0)
 2531                                 continue;
 2532                         info.rti_info[RTAX_IFA] = ifma->ifma_addr;
 2533                         info.rti_info[RTAX_GATEWAY] =
 2534                             (ifma->ifma_addr->sa_family != AF_LINK) ?
 2535                             ifma->ifma_lladdr : NULL;
 2536                         error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
 2537                         if (error != 0)
 2538                                 break;
 2539                         if (w->w_req && w->w_tmem) {
 2540                                 struct ifma_msghdr *ifmam;
 2541 
 2542                                 ifmam = (struct ifma_msghdr *)w->w_tmem;
 2543                                 ifmam->ifmam_index = ifma->ifma_ifp->if_index;
 2544                                 ifmam->ifmam_flags = 0;
 2545                                 ifmam->ifmam_addrs = info.rti_addrs;
 2546                                 ifmam->_ifmam_spare1 = 0;
 2547                                 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
 2548                                 if (error != 0)
 2549                                         break;
 2550                         }
 2551                 }
 2552                 if (error != 0)
 2553                         break;
 2554         }
 2555         return (error);
 2556 }
 2557 
 2558 static void
 2559 rtable_sysctl_dump(uint32_t fibnum, int family, struct walkarg *w)
 2560 {
 2561         union sockaddr_union sa_dst, sa_mask;
 2562 
 2563         w->family = family;
 2564         w->dst = (struct sockaddr *)&sa_dst;
 2565         w->mask = (struct sockaddr *)&sa_mask;
 2566 
 2567         init_sockaddrs_family(family, w->dst, w->mask);
 2568 
 2569         rib_walk(fibnum, family, false, sysctl_dumpentry, w);
 2570 }
 2571 
 2572 static int
 2573 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
 2574 {
 2575         struct epoch_tracker et;
 2576         int     *name = (int *)arg1;
 2577         u_int   namelen = arg2;
 2578         struct rib_head *rnh = NULL; /* silence compiler. */
 2579         int     i, lim, error = EINVAL;
 2580         int     fib = 0;
 2581         u_char  af;
 2582         struct  walkarg w;
 2583 
 2584         if (namelen < 3)
 2585                 return (EINVAL);
 2586 
 2587         name++;
 2588         namelen--;
 2589         if (req->newptr)
 2590                 return (EPERM);
 2591         if (name[1] == NET_RT_DUMP || name[1] == NET_RT_NHOP || name[1] == NET_RT_NHGRP) {
 2592                 if (namelen == 3)
 2593                         fib = req->td->td_proc->p_fibnum;
 2594                 else if (namelen == 4)
 2595                         fib = (name[3] == RT_ALL_FIBS) ?
 2596                             req->td->td_proc->p_fibnum : name[3];
 2597                 else
 2598                         return ((namelen < 3) ? EISDIR : ENOTDIR);
 2599                 if (fib < 0 || fib >= rt_numfibs)
 2600                         return (EINVAL);
 2601         } else if (namelen != 3)
 2602                 return ((namelen < 3) ? EISDIR : ENOTDIR);
 2603         af = name[0];
 2604         if (af > AF_MAX)
 2605                 return (EINVAL);
 2606         bzero(&w, sizeof(w));
 2607         w.w_op = name[1];
 2608         w.w_arg = name[2];
 2609         w.w_req = req;
 2610 
 2611         error = sysctl_wire_old_buffer(req, 0);
 2612         if (error)
 2613                 return (error);
 2614 
 2615         /*
 2616          * Allocate reply buffer in advance.
 2617          * All rtsock messages has maximum length of u_short.
 2618          */
 2619         w.w_tmemsize = 65536;
 2620         w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
 2621 
 2622         NET_EPOCH_ENTER(et);
 2623         switch (w.w_op) {
 2624         case NET_RT_DUMP:
 2625         case NET_RT_FLAGS:
 2626                 if (af == 0) {                  /* dump all tables */
 2627                         i = 1;
 2628                         lim = AF_MAX;
 2629                 } else                          /* dump only one table */
 2630                         i = lim = af;
 2631 
 2632                 /*
 2633                  * take care of llinfo entries, the caller must
 2634                  * specify an AF
 2635                  */
 2636                 if (w.w_op == NET_RT_FLAGS &&
 2637                     (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
 2638                         if (af != 0)
 2639                                 error = lltable_sysctl_dumparp(af, w.w_req);
 2640                         else
 2641                                 error = EINVAL;
 2642                         break;
 2643                 }
 2644                 /*
 2645                  * take care of routing entries
 2646                  */
 2647                 for (error = 0; error == 0 && i <= lim; i++) {
 2648                         rnh = rt_tables_get_rnh(fib, i);
 2649                         if (rnh != NULL) {
 2650                                 rtable_sysctl_dump(fib, i, &w);
 2651                         } else if (af != 0)
 2652                                 error = EAFNOSUPPORT;
 2653                 }
 2654                 break;
 2655         case NET_RT_NHOP:
 2656         case NET_RT_NHGRP:
 2657                 /* Allow dumping one specific af/fib at a time */
 2658                 if (namelen < 4) {
 2659                         error = EINVAL;
 2660                         break;
 2661                 }
 2662                 fib = name[3];
 2663                 if (fib < 0 || fib > rt_numfibs) {
 2664                         error = EINVAL;
 2665                         break;
 2666                 }
 2667                 rnh = rt_tables_get_rnh(fib, af);
 2668                 if (rnh == NULL) {
 2669                         error = EAFNOSUPPORT;
 2670                         break;
 2671                 }
 2672                 if (w.w_op == NET_RT_NHOP)
 2673                         error = nhops_dump_sysctl(rnh, w.w_req);
 2674                 else
 2675 #ifdef ROUTE_MPATH
 2676                         error = nhgrp_dump_sysctl(rnh, w.w_req);
 2677 #else
 2678                         error = ENOTSUP;
 2679 #endif
 2680                 break;
 2681         case NET_RT_IFLIST:
 2682         case NET_RT_IFLISTL:
 2683                 error = sysctl_iflist(af, &w);
 2684                 break;
 2685 
 2686         case NET_RT_IFMALIST:
 2687                 error = sysctl_ifmalist(af, &w);
 2688                 break;
 2689         }
 2690         NET_EPOCH_EXIT(et);
 2691 
 2692         free(w.w_tmem, M_TEMP);
 2693         return (error);
 2694 }
 2695 
 2696 static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_MPSAFE,
 2697     sysctl_rtsock, "Return route tables and interface/address lists");
 2698 
 2699 /*
 2700  * Definitions of protocols supported in the ROUTE domain.
 2701  */
 2702 
 2703 static struct domain routedomain;               /* or at least forward */
 2704 
 2705 static struct protosw routesw = {
 2706         .pr_type =              SOCK_RAW,
 2707         .pr_flags =             PR_ATOMIC|PR_ADDR,
 2708         .pr_abort =             rts_close,
 2709         .pr_attach =            rts_attach,
 2710         .pr_detach =            rts_detach,
 2711         .pr_send =              rts_send,
 2712         .pr_shutdown =          rts_shutdown,
 2713         .pr_disconnect =        rts_disconnect,
 2714         .pr_close =             rts_close,
 2715 };
 2716 
 2717 static struct domain routedomain = {
 2718         .dom_family =           PF_ROUTE,
 2719         .dom_name =             "route",
 2720         .dom_nprotosw =         1,
 2721         .dom_protosw =          { &routesw },
 2722 };
 2723 
 2724 DOMAIN_SET(route);

Cache object: 721e4c534aa1b0742a513b6a1f5524cf


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.