The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet6/nd6.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. Neither the name of the project nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: releng/8.2/sys/netinet6/nd6.c 216359 2010-12-10 15:37:54Z bz $");
   34 
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/callout.h>
   41 #include <sys/malloc.h>
   42 #include <sys/mbuf.h>
   43 #include <sys/socket.h>
   44 #include <sys/sockio.h>
   45 #include <sys/time.h>
   46 #include <sys/kernel.h>
   47 #include <sys/protosw.h>
   48 #include <sys/errno.h>
   49 #include <sys/syslog.h>
   50 #include <sys/lock.h>
   51 #include <sys/rwlock.h>
   52 #include <sys/queue.h>
   53 #include <sys/sysctl.h>
   54 
   55 #include <net/if.h>
   56 #include <net/if_arc.h>
   57 #include <net/if_dl.h>
   58 #include <net/if_types.h>
   59 #include <net/iso88025.h>
   60 #include <net/fddi.h>
   61 #include <net/route.h>
   62 #include <net/vnet.h>
   63 
   64 #include <netinet/in.h>
   65 #include <net/if_llatbl.h>
   66 #define L3_ADDR_SIN6(le)        ((struct sockaddr_in6 *) L3_ADDR(le))
   67 #include <netinet/if_ether.h>
   68 #include <netinet6/in6_var.h>
   69 #include <netinet/ip6.h>
   70 #include <netinet6/ip6_var.h>
   71 #include <netinet6/scope6_var.h>
   72 #include <netinet6/nd6.h>
   73 #include <netinet/icmp6.h>
   74 
   75 #include <sys/limits.h>
   76 
   77 #include <security/mac/mac_framework.h>
   78 
   79 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
   80 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
   81 
   82 #define SIN6(s) ((struct sockaddr_in6 *)s)
   83 
   84 /* timer values */
   85 VNET_DEFINE(int, nd6_prune)     = 1;    /* walk list every 1 seconds */
   86 VNET_DEFINE(int, nd6_delay)     = 5;    /* delay first probe time 5 second */
   87 VNET_DEFINE(int, nd6_umaxtries) = 3;    /* maximum unicast query */
   88 VNET_DEFINE(int, nd6_mmaxtries) = 3;    /* maximum multicast query */
   89 VNET_DEFINE(int, nd6_useloopback) = 1;  /* use loopback interface for
   90                                          * local traffic */
   91 VNET_DEFINE(int, nd6_gctimer)   = (60 * 60 * 24); /* 1 day: garbage
   92                                          * collection timer */
   93 
   94 /* preventing too many loops in ND option parsing */
   95 static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
   96 
   97 VNET_DEFINE(int, nd6_maxnudhint) = 0;   /* max # of subsequent upper
   98                                          * layer hints */
   99 static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved
  100                                          * ND entries */
  101 #define V_nd6_maxndopt                  VNET(nd6_maxndopt)
  102 #define V_nd6_maxqueuelen               VNET(nd6_maxqueuelen)
  103 
  104 #ifdef ND6_DEBUG
  105 VNET_DEFINE(int, nd6_debug) = 1;
  106 #else
  107 VNET_DEFINE(int, nd6_debug) = 0;
  108 #endif
  109 
  110 /* for debugging? */
  111 #if 0
  112 static int nd6_inuse, nd6_allocated;
  113 #endif
  114 
  115 VNET_DEFINE(struct nd_drhead, nd_defrouter);
  116 VNET_DEFINE(struct nd_prhead, nd_prefix);
  117 
  118 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
  119 #define V_nd6_recalc_reachtm_interval   VNET(nd6_recalc_reachtm_interval)
  120 
  121 static struct sockaddr_in6 all1_sa;
  122 
  123 static int nd6_is_new_addr_neighbor __P((struct sockaddr_in6 *,
  124         struct ifnet *));
  125 static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
  126 static void nd6_slowtimo(void *);
  127 static int regen_tmpaddr(struct in6_ifaddr *);
  128 static struct llentry *nd6_free(struct llentry *, int);
  129 static void nd6_llinfo_timer(void *);
  130 static void clear_llinfo_pqueue(struct llentry *);
  131 
  132 static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
  133 #define V_nd6_slowtimo_ch               VNET(nd6_slowtimo_ch)
  134 
  135 VNET_DEFINE(struct callout, nd6_timer_ch);
  136 
  137 void
  138 nd6_init(void)
  139 {
  140         int i;
  141 
  142         LIST_INIT(&V_nd_prefix);
  143 
  144         all1_sa.sin6_family = AF_INET6;
  145         all1_sa.sin6_len = sizeof(struct sockaddr_in6);
  146         for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
  147                 all1_sa.sin6_addr.s6_addr[i] = 0xff;
  148 
  149         /* initialization of the default router list */
  150         TAILQ_INIT(&V_nd_defrouter);
  151 
  152         /* start timer */
  153         callout_init(&V_nd6_slowtimo_ch, 0);
  154         callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
  155             nd6_slowtimo, curvnet);
  156 }
  157 
  158 #ifdef VIMAGE
  159 void
  160 nd6_destroy()
  161 {
  162 
  163         callout_drain(&V_nd6_slowtimo_ch);
  164         callout_drain(&V_nd6_timer_ch);
  165 }
  166 #endif
  167 
  168 struct nd_ifinfo *
  169 nd6_ifattach(struct ifnet *ifp)
  170 {
  171         struct nd_ifinfo *nd;
  172 
  173         nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK);
  174         bzero(nd, sizeof(*nd));
  175 
  176         nd->initialized = 1;
  177 
  178         nd->chlim = IPV6_DEFHLIM;
  179         nd->basereachable = REACHABLE_TIME;
  180         nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
  181         nd->retrans = RETRANS_TIMER;
  182         /*
  183          * Note that the default value of ip6_accept_rtadv is 0, which means
  184          * we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV
  185          * here.
  186          */
  187         nd->flags = (ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV);
  188 
  189         /* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
  190         nd6_setmtu0(ifp, nd);
  191 
  192         return nd;
  193 }
  194 
  195 void
  196 nd6_ifdetach(struct nd_ifinfo *nd)
  197 {
  198 
  199         free(nd, M_IP6NDP);
  200 }
  201 
  202 /*
  203  * Reset ND level link MTU. This function is called when the physical MTU
  204  * changes, which means we might have to adjust the ND level MTU.
  205  */
  206 void
  207 nd6_setmtu(struct ifnet *ifp)
  208 {
  209 
  210         nd6_setmtu0(ifp, ND_IFINFO(ifp));
  211 }
  212 
  213 /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
  214 void
  215 nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
  216 {
  217         u_int32_t omaxmtu;
  218 
  219         omaxmtu = ndi->maxmtu;
  220 
  221         switch (ifp->if_type) {
  222         case IFT_ARCNET:
  223                 ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */
  224                 break;
  225         case IFT_FDDI:
  226                 ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */
  227                 break;
  228         case IFT_ISO88025:
  229                  ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
  230                  break;
  231         default:
  232                 ndi->maxmtu = ifp->if_mtu;
  233                 break;
  234         }
  235 
  236         /*
  237          * Decreasing the interface MTU under IPV6 minimum MTU may cause
  238          * undesirable situation.  We thus notify the operator of the change
  239          * explicitly.  The check for omaxmtu is necessary to restrict the
  240          * log to the case of changing the MTU, not initializing it.
  241          */
  242         if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
  243                 log(LOG_NOTICE, "nd6_setmtu0: "
  244                     "new link MTU on %s (%lu) is too small for IPv6\n",
  245                     if_name(ifp), (unsigned long)ndi->maxmtu);
  246         }
  247 
  248         if (ndi->maxmtu > V_in6_maxmtu)
  249                 in6_setmaxmtu(); /* check all interfaces just in case */
  250 
  251 }
  252 
  253 void
  254 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
  255 {
  256 
  257         bzero(ndopts, sizeof(*ndopts));
  258         ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
  259         ndopts->nd_opts_last
  260                 = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
  261 
  262         if (icmp6len == 0) {
  263                 ndopts->nd_opts_done = 1;
  264                 ndopts->nd_opts_search = NULL;
  265         }
  266 }
  267 
  268 /*
  269  * Take one ND option.
  270  */
  271 struct nd_opt_hdr *
  272 nd6_option(union nd_opts *ndopts)
  273 {
  274         struct nd_opt_hdr *nd_opt;
  275         int olen;
  276 
  277         if (ndopts == NULL)
  278                 panic("ndopts == NULL in nd6_option");
  279         if (ndopts->nd_opts_last == NULL)
  280                 panic("uninitialized ndopts in nd6_option");
  281         if (ndopts->nd_opts_search == NULL)
  282                 return NULL;
  283         if (ndopts->nd_opts_done)
  284                 return NULL;
  285 
  286         nd_opt = ndopts->nd_opts_search;
  287 
  288         /* make sure nd_opt_len is inside the buffer */
  289         if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
  290                 bzero(ndopts, sizeof(*ndopts));
  291                 return NULL;
  292         }
  293 
  294         olen = nd_opt->nd_opt_len << 3;
  295         if (olen == 0) {
  296                 /*
  297                  * Message validation requires that all included
  298                  * options have a length that is greater than zero.
  299                  */
  300                 bzero(ndopts, sizeof(*ndopts));
  301                 return NULL;
  302         }
  303 
  304         ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
  305         if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
  306                 /* option overruns the end of buffer, invalid */
  307                 bzero(ndopts, sizeof(*ndopts));
  308                 return NULL;
  309         } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
  310                 /* reached the end of options chain */
  311                 ndopts->nd_opts_done = 1;
  312                 ndopts->nd_opts_search = NULL;
  313         }
  314         return nd_opt;
  315 }
  316 
  317 /*
  318  * Parse multiple ND options.
  319  * This function is much easier to use, for ND routines that do not need
  320  * multiple options of the same type.
  321  */
  322 int
  323 nd6_options(union nd_opts *ndopts)
  324 {
  325         struct nd_opt_hdr *nd_opt;
  326         int i = 0;
  327 
  328         if (ndopts == NULL)
  329                 panic("ndopts == NULL in nd6_options");
  330         if (ndopts->nd_opts_last == NULL)
  331                 panic("uninitialized ndopts in nd6_options");
  332         if (ndopts->nd_opts_search == NULL)
  333                 return 0;
  334 
  335         while (1) {
  336                 nd_opt = nd6_option(ndopts);
  337                 if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
  338                         /*
  339                          * Message validation requires that all included
  340                          * options have a length that is greater than zero.
  341                          */
  342                         ICMP6STAT_INC(icp6s_nd_badopt);
  343                         bzero(ndopts, sizeof(*ndopts));
  344                         return -1;
  345                 }
  346 
  347                 if (nd_opt == NULL)
  348                         goto skip1;
  349 
  350                 switch (nd_opt->nd_opt_type) {
  351                 case ND_OPT_SOURCE_LINKADDR:
  352                 case ND_OPT_TARGET_LINKADDR:
  353                 case ND_OPT_MTU:
  354                 case ND_OPT_REDIRECTED_HEADER:
  355                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
  356                                 nd6log((LOG_INFO,
  357                                     "duplicated ND6 option found (type=%d)\n",
  358                                     nd_opt->nd_opt_type));
  359                                 /* XXX bark? */
  360                         } else {
  361                                 ndopts->nd_opt_array[nd_opt->nd_opt_type]
  362                                         = nd_opt;
  363                         }
  364                         break;
  365                 case ND_OPT_PREFIX_INFORMATION:
  366                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
  367                                 ndopts->nd_opt_array[nd_opt->nd_opt_type]
  368                                         = nd_opt;
  369                         }
  370                         ndopts->nd_opts_pi_end =
  371                                 (struct nd_opt_prefix_info *)nd_opt;
  372                         break;
  373                 default:
  374                         /*
  375                          * Unknown options must be silently ignored,
  376                          * to accomodate future extension to the protocol.
  377                          */
  378                         nd6log((LOG_DEBUG,
  379                             "nd6_options: unsupported option %d - "
  380                             "option ignored\n", nd_opt->nd_opt_type));
  381                 }
  382 
  383 skip1:
  384                 i++;
  385                 if (i > V_nd6_maxndopt) {
  386                         ICMP6STAT_INC(icp6s_nd_toomanyopt);
  387                         nd6log((LOG_INFO, "too many loop in nd opt\n"));
  388                         break;
  389                 }
  390 
  391                 if (ndopts->nd_opts_done)
  392                         break;
  393         }
  394 
  395         return 0;
  396 }
  397 
  398 /*
  399  * ND6 timer routine to handle ND6 entries
  400  */
  401 void
  402 nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
  403 {
  404         int canceled;
  405 
  406         LLE_WLOCK_ASSERT(ln);
  407 
  408         if (tick < 0) {
  409                 ln->la_expire = 0;
  410                 ln->ln_ntick = 0;
  411                 canceled = callout_stop(&ln->ln_timer_ch);
  412         } else {
  413                 ln->la_expire = time_second + tick / hz;
  414                 LLE_ADDREF(ln);
  415                 if (tick > INT_MAX) {
  416                         ln->ln_ntick = tick - INT_MAX;
  417                         canceled = callout_reset(&ln->ln_timer_ch, INT_MAX,
  418                             nd6_llinfo_timer, ln);
  419                 } else {
  420                         ln->ln_ntick = 0;
  421                         canceled = callout_reset(&ln->ln_timer_ch, tick,
  422                             nd6_llinfo_timer, ln);
  423                 }
  424         }
  425         if (canceled)
  426                 LLE_REMREF(ln);
  427 }
  428 
  429 void
  430 nd6_llinfo_settimer(struct llentry *ln, long tick)
  431 {
  432 
  433         LLE_WLOCK(ln);
  434         nd6_llinfo_settimer_locked(ln, tick);
  435         LLE_WUNLOCK(ln);
  436 }
  437 
  438 static void
  439 nd6_llinfo_timer(void *arg)
  440 {
  441         struct llentry *ln;
  442         struct in6_addr *dst;
  443         struct ifnet *ifp;
  444         struct nd_ifinfo *ndi = NULL;
  445 
  446         KASSERT(arg != NULL, ("%s: arg NULL", __func__));
  447         ln = (struct llentry *)arg;
  448         LLE_WLOCK_ASSERT(ln);
  449         ifp = ln->lle_tbl->llt_ifp;
  450 
  451         CURVNET_SET(ifp->if_vnet);
  452 
  453         if (ln->ln_ntick > 0) {
  454                 if (ln->ln_ntick > INT_MAX) {
  455                         ln->ln_ntick -= INT_MAX;
  456                         nd6_llinfo_settimer_locked(ln, INT_MAX);
  457                 } else {
  458                         ln->ln_ntick = 0;
  459                         nd6_llinfo_settimer_locked(ln, ln->ln_ntick);
  460                 }
  461                 goto done;
  462         }
  463 
  464         ndi = ND_IFINFO(ifp);
  465         dst = &L3_ADDR_SIN6(ln)->sin6_addr;
  466         if (ln->la_flags & LLE_STATIC) {
  467                 goto done;
  468         }
  469 
  470         if (ln->la_flags & LLE_DELETED) {
  471                 (void)nd6_free(ln, 0);
  472                 ln = NULL;
  473                 goto done;
  474         }
  475 
  476         switch (ln->ln_state) {
  477         case ND6_LLINFO_INCOMPLETE:
  478                 if (ln->la_asked < V_nd6_mmaxtries) {
  479                         ln->la_asked++;
  480                         nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
  481                         LLE_WUNLOCK(ln);
  482                         nd6_ns_output(ifp, NULL, dst, ln, 0);
  483                         LLE_WLOCK(ln);
  484                 } else {
  485                         struct mbuf *m = ln->la_hold;
  486                         if (m) {
  487                                 struct mbuf *m0;
  488 
  489                                 /*
  490                                  * assuming every packet in la_hold has the
  491                                  * same IP header.  Send error after unlock.
  492                                  */
  493                                 m0 = m->m_nextpkt;
  494                                 m->m_nextpkt = NULL;
  495                                 ln->la_hold = m0;
  496                                 clear_llinfo_pqueue(ln);
  497                         }
  498                         (void)nd6_free(ln, 0);
  499                         ln = NULL;
  500                         if (m != NULL)
  501                                 icmp6_error2(m, ICMP6_DST_UNREACH,
  502                                     ICMP6_DST_UNREACH_ADDR, 0, ifp);
  503                 }
  504                 break;
  505         case ND6_LLINFO_REACHABLE:
  506                 if (!ND6_LLINFO_PERMANENT(ln)) {
  507                         ln->ln_state = ND6_LLINFO_STALE;
  508                         nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
  509                 }
  510                 break;
  511 
  512         case ND6_LLINFO_STALE:
  513                 /* Garbage Collection(RFC 2461 5.3) */
  514                 if (!ND6_LLINFO_PERMANENT(ln)) {
  515                         (void)nd6_free(ln, 1);
  516                         ln = NULL;
  517                 }
  518                 break;
  519 
  520         case ND6_LLINFO_DELAY:
  521                 if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
  522                         /* We need NUD */
  523                         ln->la_asked = 1;
  524                         ln->ln_state = ND6_LLINFO_PROBE;
  525                         nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
  526                         LLE_WUNLOCK(ln);
  527                         nd6_ns_output(ifp, dst, dst, ln, 0);
  528                         LLE_WLOCK(ln);
  529                 } else {
  530                         ln->ln_state = ND6_LLINFO_STALE; /* XXX */
  531                         nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
  532                 }
  533                 break;
  534         case ND6_LLINFO_PROBE:
  535                 if (ln->la_asked < V_nd6_umaxtries) {
  536                         ln->la_asked++;
  537                         nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
  538                         LLE_WUNLOCK(ln);
  539                         nd6_ns_output(ifp, dst, dst, ln, 0);
  540                         LLE_WLOCK(ln);
  541                 } else {
  542                         (void)nd6_free(ln, 0);
  543                         ln = NULL;
  544                 }
  545                 break;
  546         default:
  547                 panic("%s: paths in a dark night can be confusing: %d",
  548                     __func__, ln->ln_state);
  549         }
  550 done:
  551         if (ln != NULL)
  552                 LLE_FREE_LOCKED(ln);
  553         CURVNET_RESTORE();
  554 }
  555 
  556 
  557 /*
  558  * ND6 timer routine to expire default route list and prefix list
  559  */
  560 void
  561 nd6_timer(void *arg)
  562 {
  563         CURVNET_SET((struct vnet *) arg);
  564         int s;
  565         struct nd_defrouter *dr;
  566         struct nd_prefix *pr;
  567         struct in6_ifaddr *ia6, *nia6;
  568         struct in6_addrlifetime *lt6;
  569 
  570         callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
  571             nd6_timer, curvnet);
  572 
  573         /* expire default router list */
  574         s = splnet();
  575         dr = TAILQ_FIRST(&V_nd_defrouter);
  576         while (dr) {
  577                 if (dr->expire && dr->expire < time_second) {
  578                         struct nd_defrouter *t;
  579                         t = TAILQ_NEXT(dr, dr_entry);
  580                         defrtrlist_del(dr);
  581                         dr = t;
  582                 } else {
  583                         dr = TAILQ_NEXT(dr, dr_entry);
  584                 }
  585         }
  586 
  587         /*
  588          * expire interface addresses.
  589          * in the past the loop was inside prefix expiry processing.
  590          * However, from a stricter speci-confrmance standpoint, we should
  591          * rather separate address lifetimes and prefix lifetimes.
  592          *
  593          * XXXRW: in6_ifaddrhead locking.
  594          */
  595   addrloop:
  596         TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) {
  597                 /* check address lifetime */
  598                 lt6 = &ia6->ia6_lifetime;
  599                 if (IFA6_IS_INVALID(ia6)) {
  600                         int regen = 0;
  601 
  602                         /*
  603                          * If the expiring address is temporary, try
  604                          * regenerating a new one.  This would be useful when
  605                          * we suspended a laptop PC, then turned it on after a
  606                          * period that could invalidate all temporary
  607                          * addresses.  Although we may have to restart the
  608                          * loop (see below), it must be after purging the
  609                          * address.  Otherwise, we'd see an infinite loop of
  610                          * regeneration.
  611                          */
  612                         if (V_ip6_use_tempaddr &&
  613                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
  614                                 if (regen_tmpaddr(ia6) == 0)
  615                                         regen = 1;
  616                         }
  617 
  618                         in6_purgeaddr(&ia6->ia_ifa);
  619 
  620                         if (regen)
  621                                 goto addrloop; /* XXX: see below */
  622                 } else if (IFA6_IS_DEPRECATED(ia6)) {
  623                         int oldflags = ia6->ia6_flags;
  624 
  625                         ia6->ia6_flags |= IN6_IFF_DEPRECATED;
  626 
  627                         /*
  628                          * If a temporary address has just become deprecated,
  629                          * regenerate a new one if possible.
  630                          */
  631                         if (V_ip6_use_tempaddr &&
  632                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
  633                             (oldflags & IN6_IFF_DEPRECATED) == 0) {
  634 
  635                                 if (regen_tmpaddr(ia6) == 0) {
  636                                         /*
  637                                          * A new temporary address is
  638                                          * generated.
  639                                          * XXX: this means the address chain
  640                                          * has changed while we are still in
  641                                          * the loop.  Although the change
  642                                          * would not cause disaster (because
  643                                          * it's not a deletion, but an
  644                                          * addition,) we'd rather restart the
  645                                          * loop just for safety.  Or does this
  646                                          * significantly reduce performance??
  647                                          */
  648                                         goto addrloop;
  649                                 }
  650                         }
  651                 } else {
  652                         /*
  653                          * A new RA might have made a deprecated address
  654                          * preferred.
  655                          */
  656                         ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
  657                 }
  658         }
  659 
  660         /* expire prefix list */
  661         pr = V_nd_prefix.lh_first;
  662         while (pr) {
  663                 /*
  664                  * check prefix lifetime.
  665                  * since pltime is just for autoconf, pltime processing for
  666                  * prefix is not necessary.
  667                  */
  668                 if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
  669                     time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
  670                         struct nd_prefix *t;
  671                         t = pr->ndpr_next;
  672 
  673                         /*
  674                          * address expiration and prefix expiration are
  675                          * separate.  NEVER perform in6_purgeaddr here.
  676                          */
  677 
  678                         prelist_remove(pr);
  679                         pr = t;
  680                 } else
  681                         pr = pr->ndpr_next;
  682         }
  683         splx(s);
  684         CURVNET_RESTORE();
  685 }
  686 
  687 /*
  688  * ia6 - deprecated/invalidated temporary address
  689  */
  690 static int
  691 regen_tmpaddr(struct in6_ifaddr *ia6)
  692 {
  693         struct ifaddr *ifa;
  694         struct ifnet *ifp;
  695         struct in6_ifaddr *public_ifa6 = NULL;
  696 
  697         ifp = ia6->ia_ifa.ifa_ifp;
  698         IF_ADDR_LOCK(ifp);
  699         TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  700                 struct in6_ifaddr *it6;
  701 
  702                 if (ifa->ifa_addr->sa_family != AF_INET6)
  703                         continue;
  704 
  705                 it6 = (struct in6_ifaddr *)ifa;
  706 
  707                 /* ignore no autoconf addresses. */
  708                 if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
  709                         continue;
  710 
  711                 /* ignore autoconf addresses with different prefixes. */
  712                 if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
  713                         continue;
  714 
  715                 /*
  716                  * Now we are looking at an autoconf address with the same
  717                  * prefix as ours.  If the address is temporary and is still
  718                  * preferred, do not create another one.  It would be rare, but
  719                  * could happen, for example, when we resume a laptop PC after
  720                  * a long period.
  721                  */
  722                 if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
  723                     !IFA6_IS_DEPRECATED(it6)) {
  724                         public_ifa6 = NULL;
  725                         break;
  726                 }
  727 
  728                 /*
  729                  * This is a public autoconf address that has the same prefix
  730                  * as ours.  If it is preferred, keep it.  We can't break the
  731                  * loop here, because there may be a still-preferred temporary
  732                  * address with the prefix.
  733                  */
  734                 if (!IFA6_IS_DEPRECATED(it6))
  735                     public_ifa6 = it6;
  736 
  737                 if (public_ifa6 != NULL)
  738                         ifa_ref(&public_ifa6->ia_ifa);
  739         }
  740         IF_ADDR_UNLOCK(ifp);
  741 
  742         if (public_ifa6 != NULL) {
  743                 int e;
  744 
  745                 if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
  746                         ifa_free(&public_ifa6->ia_ifa);
  747                         log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
  748                             " tmp addr,errno=%d\n", e);
  749                         return (-1);
  750                 }
  751                 ifa_free(&public_ifa6->ia_ifa);
  752                 return (0);
  753         }
  754 
  755         return (-1);
  756 }
  757 
  758 /*
  759  * Nuke neighbor cache/prefix/default router management table, right before
  760  * ifp goes away.
  761  */
  762 void
  763 nd6_purge(struct ifnet *ifp)
  764 {
  765         struct nd_defrouter *dr, *ndr;
  766         struct nd_prefix *pr, *npr;
  767 
  768         /*
  769          * Nuke default router list entries toward ifp.
  770          * We defer removal of default router list entries that is installed
  771          * in the routing table, in order to keep additional side effects as
  772          * small as possible.
  773          */
  774         for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
  775                 ndr = TAILQ_NEXT(dr, dr_entry);
  776                 if (dr->installed)
  777                         continue;
  778 
  779                 if (dr->ifp == ifp)
  780                         defrtrlist_del(dr);
  781         }
  782 
  783         for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
  784                 ndr = TAILQ_NEXT(dr, dr_entry);
  785                 if (!dr->installed)
  786                         continue;
  787 
  788                 if (dr->ifp == ifp)
  789                         defrtrlist_del(dr);
  790         }
  791 
  792         /* Nuke prefix list entries toward ifp */
  793         for (pr = V_nd_prefix.lh_first; pr; pr = npr) {
  794                 npr = pr->ndpr_next;
  795                 if (pr->ndpr_ifp == ifp) {
  796                         /*
  797                          * Because if_detach() does *not* release prefixes
  798                          * while purging addresses the reference count will
  799                          * still be above zero. We therefore reset it to
  800                          * make sure that the prefix really gets purged.
  801                          */
  802                         pr->ndpr_refcnt = 0;
  803 
  804                         /*
  805                          * Previously, pr->ndpr_addr is removed as well,
  806                          * but I strongly believe we don't have to do it.
  807                          * nd6_purge() is only called from in6_ifdetach(),
  808                          * which removes all the associated interface addresses
  809                          * by itself.
  810                          * (jinmei@kame.net 20010129)
  811                          */
  812                         prelist_remove(pr);
  813                 }
  814         }
  815 
  816         /* cancel default outgoing interface setting */
  817         if (V_nd6_defifindex == ifp->if_index)
  818                 nd6_setdefaultiface(0);
  819 
  820         if (!V_ip6_forwarding && V_ip6_accept_rtadv) { /* XXX: too restrictive? */
  821                 /* refresh default router list
  822                  *
  823                  * 
  824                  */
  825                 defrouter_select();
  826 
  827         }
  828 
  829         /* XXXXX
  830          * We do not nuke the neighbor cache entries here any more
  831          * because the neighbor cache is kept in if_afdata[AF_INET6].
  832          * nd6_purge() is invoked by in6_ifdetach() which is called
  833          * from if_detach() where everything gets purged. So let
  834          * in6_domifdetach() do the actual L2 table purging work.
  835          */
  836 }
  837 
  838 /* 
  839  * the caller acquires and releases the lock on the lltbls
  840  * Returns the llentry locked
  841  */
  842 struct llentry *
  843 nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
  844 {
  845         struct sockaddr_in6 sin6;
  846         struct llentry *ln;
  847         int llflags;
  848         
  849         bzero(&sin6, sizeof(sin6));
  850         sin6.sin6_len = sizeof(struct sockaddr_in6);
  851         sin6.sin6_family = AF_INET6;
  852         sin6.sin6_addr = *addr6;
  853 
  854         IF_AFDATA_LOCK_ASSERT(ifp);
  855 
  856         llflags = 0;
  857         if (flags & ND6_CREATE)
  858             llflags |= LLE_CREATE;
  859         if (flags & ND6_EXCLUSIVE)
  860             llflags |= LLE_EXCLUSIVE;   
  861         
  862         ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6);
  863         if ((ln != NULL) && (llflags & LLE_CREATE))
  864                 ln->ln_state = ND6_LLINFO_NOSTATE;
  865         
  866         return (ln);
  867 }
  868 
  869 /*
  870  * Test whether a given IPv6 address is a neighbor or not, ignoring
  871  * the actual neighbor cache.  The neighbor cache is ignored in order
  872  * to not reenter the routing code from within itself.
  873  */
  874 static int
  875 nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
  876 {
  877         struct nd_prefix *pr;
  878         struct ifaddr *dstaddr;
  879 
  880         /*
  881          * A link-local address is always a neighbor.
  882          * XXX: a link does not necessarily specify a single interface.
  883          */
  884         if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
  885                 struct sockaddr_in6 sin6_copy;
  886                 u_int32_t zone;
  887 
  888                 /*
  889                  * We need sin6_copy since sa6_recoverscope() may modify the
  890                  * content (XXX).
  891                  */
  892                 sin6_copy = *addr;
  893                 if (sa6_recoverscope(&sin6_copy))
  894                         return (0); /* XXX: should be impossible */
  895                 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
  896                         return (0);
  897                 if (sin6_copy.sin6_scope_id == zone)
  898                         return (1);
  899                 else
  900                         return (0);
  901         }
  902 
  903         /*
  904          * If the address matches one of our addresses,
  905          * it should be a neighbor.
  906          * If the address matches one of our on-link prefixes, it should be a
  907          * neighbor.
  908          */
  909         for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
  910                 if (pr->ndpr_ifp != ifp)
  911                         continue;
  912 
  913                 if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
  914                         struct rtentry *rt;
  915                         rt = rtalloc1((struct sockaddr *)&pr->ndpr_prefix, 0, 0);
  916                         if (rt == NULL)
  917                                 continue;
  918                         /*
  919                          * This is the case where multiple interfaces
  920                          * have the same prefix, but only one is installed 
  921                          * into the routing table and that prefix entry
  922                          * is not the one being examined here. In the case
  923                          * where RADIX_MPATH is enabled, multiple route
  924                          * entries (of the same rt_key value) will be 
  925                          * installed because the interface addresses all
  926                          * differ.
  927                          */
  928                         if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
  929                                &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) {
  930                                 RTFREE_LOCKED(rt);
  931                                 continue;
  932                         }
  933                         RTFREE_LOCKED(rt);
  934                 }
  935 
  936                 if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
  937                     &addr->sin6_addr, &pr->ndpr_mask))
  938                         return (1);
  939         }
  940 
  941         /*
  942          * If the address is assigned on the node of the other side of
  943          * a p2p interface, the address should be a neighbor.
  944          */
  945         dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr);
  946         if (dstaddr != NULL) {
  947                 if (dstaddr->ifa_ifp == ifp) {
  948                         ifa_free(dstaddr);
  949                         return (1);
  950                 }
  951                 ifa_free(dstaddr);
  952         }
  953 
  954         /*
  955          * If the default router list is empty, all addresses are regarded
  956          * as on-link, and thus, as a neighbor.
  957          * XXX: we restrict the condition to hosts, because routers usually do
  958          * not have the "default router list".
  959          */
  960         if (!V_ip6_forwarding && TAILQ_FIRST(&V_nd_defrouter) == NULL &&
  961             V_nd6_defifindex == ifp->if_index) {
  962                 return (1);
  963         }
  964 
  965         return (0);
  966 }
  967 
  968 
  969 /*
  970  * Detect if a given IPv6 address identifies a neighbor on a given link.
  971  * XXX: should take care of the destination of a p2p link?
  972  */
  973 int
  974 nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
  975 {
  976         struct llentry *lle;
  977         int rc = 0;
  978 
  979         IF_AFDATA_UNLOCK_ASSERT(ifp);
  980         if (nd6_is_new_addr_neighbor(addr, ifp))
  981                 return (1);
  982 
  983         /*
  984          * Even if the address matches none of our addresses, it might be
  985          * in the neighbor cache.
  986          */
  987         IF_AFDATA_LOCK(ifp);
  988         if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
  989                 LLE_RUNLOCK(lle);
  990                 rc = 1;
  991         }
  992         IF_AFDATA_UNLOCK(ifp);
  993         return (rc);
  994 }
  995 
  996 /*
  997  * Free an nd6 llinfo entry.
  998  * Since the function would cause significant changes in the kernel, DO NOT
  999  * make it global, unless you have a strong reason for the change, and are sure
 1000  * that the change is safe.
 1001  */
 1002 static struct llentry *
 1003 nd6_free(struct llentry *ln, int gc)
 1004 {
 1005         struct llentry *next;
 1006         struct nd_defrouter *dr;
 1007         struct ifnet *ifp;
 1008 
 1009         LLE_WLOCK_ASSERT(ln);
 1010 
 1011         /*
 1012          * we used to have pfctlinput(PRC_HOSTDEAD) here.
 1013          * even though it is not harmful, it was not really necessary.
 1014          */
 1015 
 1016         /* cancel timer */
 1017         nd6_llinfo_settimer_locked(ln, -1);
 1018 
 1019         ifp = ln->lle_tbl->llt_ifp;
 1020 
 1021         if (!V_ip6_forwarding) {
 1022 
 1023                 dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
 1024 
 1025                 if (dr != NULL && dr->expire &&
 1026                     ln->ln_state == ND6_LLINFO_STALE && gc) {
 1027                         /*
 1028                          * If the reason for the deletion is just garbage
 1029                          * collection, and the neighbor is an active default
 1030                          * router, do not delete it.  Instead, reset the GC
 1031                          * timer using the router's lifetime.
 1032                          * Simply deleting the entry would affect default
 1033                          * router selection, which is not necessarily a good
 1034                          * thing, especially when we're using router preference
 1035                          * values.
 1036                          * XXX: the check for ln_state would be redundant,
 1037                          *      but we intentionally keep it just in case.
 1038                          */
 1039                         if (dr->expire > time_second)
 1040                                 nd6_llinfo_settimer_locked(ln,
 1041                                     (dr->expire - time_second) * hz);
 1042                         else
 1043                                 nd6_llinfo_settimer_locked(ln,
 1044                                     (long)V_nd6_gctimer * hz);
 1045 
 1046                         next = LIST_NEXT(ln, lle_next);
 1047                         LLE_REMREF(ln);
 1048                         LLE_WUNLOCK(ln);
 1049                         return (next);
 1050                 }
 1051 
 1052                 if (dr) {
 1053                         /*
 1054                          * Unreachablity of a router might affect the default
 1055                          * router selection and on-link detection of advertised
 1056                          * prefixes.
 1057                          */
 1058 
 1059                         /*
 1060                          * Temporarily fake the state to choose a new default
 1061                          * router and to perform on-link determination of
 1062                          * prefixes correctly.
 1063                          * Below the state will be set correctly,
 1064                          * or the entry itself will be deleted.
 1065                          */
 1066                         ln->ln_state = ND6_LLINFO_INCOMPLETE;
 1067                 }
 1068 
 1069                 if (ln->ln_router || dr) {
 1070 
 1071                         /*
 1072                          * We need to unlock to avoid a LOR with rt6_flush() with the
 1073                          * rnh and for the calls to pfxlist_onlink_check() and
 1074                          * defrouter_select() in the block further down for calls
 1075                          * into nd6_lookup().  We still hold a ref.
 1076                          */
 1077                         LLE_WUNLOCK(ln);
 1078 
 1079                         /*
 1080                          * rt6_flush must be called whether or not the neighbor
 1081                          * is in the Default Router List.
 1082                          * See a corresponding comment in nd6_na_input().
 1083                          */
 1084                         rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
 1085                 }
 1086 
 1087                 if (dr) {
 1088                         /*
 1089                          * Since defrouter_select() does not affect the
 1090                          * on-link determination and MIP6 needs the check
 1091                          * before the default router selection, we perform
 1092                          * the check now.
 1093                          */
 1094                         pfxlist_onlink_check();
 1095 
 1096                         /*
 1097                          * Refresh default router list.
 1098                          */
 1099                         defrouter_select();
 1100                 }
 1101 
 1102                 if (ln->ln_router || dr)
 1103                         LLE_WLOCK(ln);
 1104         }
 1105 
 1106         /*
 1107          * Before deleting the entry, remember the next entry as the
 1108          * return value.  We need this because pfxlist_onlink_check() above
 1109          * might have freed other entries (particularly the old next entry) as
 1110          * a side effect (XXX).
 1111          */
 1112         next = LIST_NEXT(ln, lle_next);
 1113 
 1114         /*
 1115          * Save to unlock. We still hold an extra reference and will not
 1116          * free(9) in llentry_free() if someone else holds one as well.
 1117          */
 1118         LLE_WUNLOCK(ln);
 1119         IF_AFDATA_LOCK(ifp);
 1120         LLE_WLOCK(ln);
 1121         LLE_REMREF(ln);
 1122         llentry_free(ln);
 1123         IF_AFDATA_UNLOCK(ifp);
 1124 
 1125         return (next);
 1126 }
 1127 
 1128 /*
 1129  * Upper-layer reachability hint for Neighbor Unreachability Detection.
 1130  *
 1131  * XXX cost-effective methods?
 1132  */
 1133 void
 1134 nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force)
 1135 {
 1136         struct llentry *ln;
 1137         struct ifnet *ifp;
 1138 
 1139         if ((dst6 == NULL) || (rt == NULL))
 1140                 return;
 1141 
 1142         ifp = rt->rt_ifp;
 1143         IF_AFDATA_LOCK(ifp);
 1144         ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL);
 1145         IF_AFDATA_UNLOCK(ifp);
 1146         if (ln == NULL)
 1147                 return;
 1148 
 1149         if (ln->ln_state < ND6_LLINFO_REACHABLE)
 1150                 goto done;
 1151 
 1152         /*
 1153          * if we get upper-layer reachability confirmation many times,
 1154          * it is possible we have false information.
 1155          */
 1156         if (!force) {
 1157                 ln->ln_byhint++;
 1158                 if (ln->ln_byhint > V_nd6_maxnudhint) {
 1159                         goto done;
 1160                 }
 1161         }
 1162 
 1163         ln->ln_state = ND6_LLINFO_REACHABLE;
 1164         if (!ND6_LLINFO_PERMANENT(ln)) {
 1165                 nd6_llinfo_settimer_locked(ln,
 1166                     (long)ND_IFINFO(rt->rt_ifp)->reachable * hz);
 1167         }
 1168 done:
 1169         LLE_WUNLOCK(ln);
 1170 }
 1171 
 1172 
 1173 int
 1174 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 1175 {
 1176         struct in6_drlist *drl = (struct in6_drlist *)data;
 1177         struct in6_oprlist *oprl = (struct in6_oprlist *)data;
 1178         struct in6_ndireq *ndi = (struct in6_ndireq *)data;
 1179         struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
 1180         struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
 1181         struct nd_defrouter *dr;
 1182         struct nd_prefix *pr;
 1183         int i = 0, error = 0;
 1184         int s;
 1185 
 1186         switch (cmd) {
 1187         case SIOCGDRLST_IN6:
 1188                 /*
 1189                  * obsolete API, use sysctl under net.inet6.icmp6
 1190                  */
 1191                 bzero(drl, sizeof(*drl));
 1192                 s = splnet();
 1193                 dr = TAILQ_FIRST(&V_nd_defrouter);
 1194                 while (dr && i < DRLSTSIZ) {
 1195                         drl->defrouter[i].rtaddr = dr->rtaddr;
 1196                         in6_clearscope(&drl->defrouter[i].rtaddr);
 1197 
 1198                         drl->defrouter[i].flags = dr->flags;
 1199                         drl->defrouter[i].rtlifetime = dr->rtlifetime;
 1200                         drl->defrouter[i].expire = dr->expire;
 1201                         drl->defrouter[i].if_index = dr->ifp->if_index;
 1202                         i++;
 1203                         dr = TAILQ_NEXT(dr, dr_entry);
 1204                 }
 1205                 splx(s);
 1206                 break;
 1207         case SIOCGPRLST_IN6:
 1208                 /*
 1209                  * obsolete API, use sysctl under net.inet6.icmp6
 1210                  *
 1211                  * XXX the structure in6_prlist was changed in backward-
 1212                  * incompatible manner.  in6_oprlist is used for SIOCGPRLST_IN6,
 1213                  * in6_prlist is used for nd6_sysctl() - fill_prlist().
 1214                  */
 1215                 /*
 1216                  * XXX meaning of fields, especialy "raflags", is very
 1217                  * differnet between RA prefix list and RR/static prefix list.
 1218                  * how about separating ioctls into two?
 1219                  */
 1220                 bzero(oprl, sizeof(*oprl));
 1221                 s = splnet();
 1222                 pr = V_nd_prefix.lh_first;
 1223                 while (pr && i < PRLSTSIZ) {
 1224                         struct nd_pfxrouter *pfr;
 1225                         int j;
 1226 
 1227                         oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
 1228                         oprl->prefix[i].raflags = pr->ndpr_raf;
 1229                         oprl->prefix[i].prefixlen = pr->ndpr_plen;
 1230                         oprl->prefix[i].vltime = pr->ndpr_vltime;
 1231                         oprl->prefix[i].pltime = pr->ndpr_pltime;
 1232                         oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
 1233                         if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 1234                                 oprl->prefix[i].expire = 0;
 1235                         else {
 1236                                 time_t maxexpire;
 1237 
 1238                                 /* XXX: we assume time_t is signed. */
 1239                                 maxexpire = (-1) &
 1240                                     ~((time_t)1 <<
 1241                                     ((sizeof(maxexpire) * 8) - 1));
 1242                                 if (pr->ndpr_vltime <
 1243                                     maxexpire - pr->ndpr_lastupdate) {
 1244                                         oprl->prefix[i].expire =
 1245                                             pr->ndpr_lastupdate +
 1246                                             pr->ndpr_vltime;
 1247                                 } else
 1248                                         oprl->prefix[i].expire = maxexpire;
 1249                         }
 1250 
 1251                         pfr = pr->ndpr_advrtrs.lh_first;
 1252                         j = 0;
 1253                         while (pfr) {
 1254                                 if (j < DRLSTSIZ) {
 1255 #define RTRADDR oprl->prefix[i].advrtr[j]
 1256                                         RTRADDR = pfr->router->rtaddr;
 1257                                         in6_clearscope(&RTRADDR);
 1258 #undef RTRADDR
 1259                                 }
 1260                                 j++;
 1261                                 pfr = pfr->pfr_next;
 1262                         }
 1263                         oprl->prefix[i].advrtrs = j;
 1264                         oprl->prefix[i].origin = PR_ORIG_RA;
 1265 
 1266                         i++;
 1267                         pr = pr->ndpr_next;
 1268                 }
 1269                 splx(s);
 1270 
 1271                 break;
 1272         case OSIOCGIFINFO_IN6:
 1273 #define ND      ndi->ndi
 1274                 /* XXX: old ndp(8) assumes a positive value for linkmtu. */
 1275                 bzero(&ND, sizeof(ND));
 1276                 ND.linkmtu = IN6_LINKMTU(ifp);
 1277                 ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
 1278                 ND.basereachable = ND_IFINFO(ifp)->basereachable;
 1279                 ND.reachable = ND_IFINFO(ifp)->reachable;
 1280                 ND.retrans = ND_IFINFO(ifp)->retrans;
 1281                 ND.flags = ND_IFINFO(ifp)->flags;
 1282                 ND.recalctm = ND_IFINFO(ifp)->recalctm;
 1283                 ND.chlim = ND_IFINFO(ifp)->chlim;
 1284                 break;
 1285         case SIOCGIFINFO_IN6:
 1286                 ND = *ND_IFINFO(ifp);
 1287                 break;
 1288         case SIOCSIFINFO_IN6:
 1289                 /*
 1290                  * used to change host variables from userland.
 1291                  * intented for a use on router to reflect RA configurations.
 1292                  */
 1293                 /* 0 means 'unspecified' */
 1294                 if (ND.linkmtu != 0) {
 1295                         if (ND.linkmtu < IPV6_MMTU ||
 1296                             ND.linkmtu > IN6_LINKMTU(ifp)) {
 1297                                 error = EINVAL;
 1298                                 break;
 1299                         }
 1300                         ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
 1301                 }
 1302 
 1303                 if (ND.basereachable != 0) {
 1304                         int obasereachable = ND_IFINFO(ifp)->basereachable;
 1305 
 1306                         ND_IFINFO(ifp)->basereachable = ND.basereachable;
 1307                         if (ND.basereachable != obasereachable)
 1308                                 ND_IFINFO(ifp)->reachable =
 1309                                     ND_COMPUTE_RTIME(ND.basereachable);
 1310                 }
 1311                 if (ND.retrans != 0)
 1312                         ND_IFINFO(ifp)->retrans = ND.retrans;
 1313                 if (ND.chlim != 0)
 1314                         ND_IFINFO(ifp)->chlim = ND.chlim;
 1315                 /* FALLTHROUGH */
 1316         case SIOCSIFINFO_FLAGS:
 1317                 ND_IFINFO(ifp)->flags = ND.flags;
 1318                 break;
 1319 #undef ND
 1320         case SIOCSNDFLUSH_IN6:  /* XXX: the ioctl name is confusing... */
 1321                 /* sync kernel routing table with the default router list */
 1322                 defrouter_reset();
 1323                 defrouter_select();
 1324                 break;
 1325         case SIOCSPFXFLUSH_IN6:
 1326         {
 1327                 /* flush all the prefix advertised by routers */
 1328                 struct nd_prefix *pr, *next;
 1329 
 1330                 s = splnet();
 1331                 for (pr = V_nd_prefix.lh_first; pr; pr = next) {
 1332                         struct in6_ifaddr *ia, *ia_next;
 1333 
 1334                         next = pr->ndpr_next;
 1335 
 1336                         if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 1337                                 continue; /* XXX */
 1338 
 1339                         /* do we really have to remove addresses as well? */
 1340                         /* XXXRW: in6_ifaddrhead locking. */
 1341                         TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
 1342                             ia_next) {
 1343                                 if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 1344                                         continue;
 1345 
 1346                                 if (ia->ia6_ndpr == pr)
 1347                                         in6_purgeaddr(&ia->ia_ifa);
 1348                         }
 1349                         prelist_remove(pr);
 1350                 }
 1351                 splx(s);
 1352                 break;
 1353         }
 1354         case SIOCSRTRFLUSH_IN6:
 1355         {
 1356                 /* flush all the default routers */
 1357                 struct nd_defrouter *dr, *next;
 1358 
 1359                 s = splnet();
 1360                 defrouter_reset();
 1361                 for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = next) {
 1362                         next = TAILQ_NEXT(dr, dr_entry);
 1363                         defrtrlist_del(dr);
 1364                 }
 1365                 defrouter_select();
 1366                 splx(s);
 1367                 break;
 1368         }
 1369         case SIOCGNBRINFO_IN6:
 1370         {
 1371                 struct llentry *ln;
 1372                 struct in6_addr nb_addr = nbi->addr; /* make local for safety */
 1373 
 1374                 if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
 1375                         return (error);
 1376 
 1377                 IF_AFDATA_LOCK(ifp);
 1378                 ln = nd6_lookup(&nb_addr, 0, ifp);
 1379                 IF_AFDATA_UNLOCK(ifp);
 1380 
 1381                 if (ln == NULL) {
 1382                         error = EINVAL;
 1383                         break;
 1384                 }
 1385                 nbi->state = ln->ln_state;
 1386                 nbi->asked = ln->la_asked;
 1387                 nbi->isrouter = ln->ln_router;
 1388                 nbi->expire = ln->la_expire;
 1389                 LLE_RUNLOCK(ln);
 1390                 break;
 1391         }
 1392         case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */
 1393                 ndif->ifindex = V_nd6_defifindex;
 1394                 break;
 1395         case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */
 1396                 return (nd6_setdefaultiface(ndif->ifindex));
 1397         }
 1398         return (error);
 1399 }
 1400 
 1401 /*
 1402  * Create neighbor cache entry and cache link-layer address,
 1403  * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
 1404  *
 1405  * type - ICMP6 type
 1406  * code - type dependent information
 1407  *
 1408  * XXXXX
 1409  *  The caller of this function already acquired the ndp 
 1410  *  cache table lock because the cache entry is returned.
 1411  */
 1412 struct llentry *
 1413 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
 1414     int lladdrlen, int type, int code)
 1415 {
 1416         struct llentry *ln = NULL;
 1417         int is_newentry;
 1418         int do_update;
 1419         int olladdr;
 1420         int llchange;
 1421         int flags;
 1422         int newstate = 0;
 1423         uint16_t router = 0;
 1424         struct sockaddr_in6 sin6;
 1425         struct mbuf *chain = NULL;
 1426         int static_route = 0;
 1427 
 1428         IF_AFDATA_UNLOCK_ASSERT(ifp);
 1429 
 1430         if (ifp == NULL)
 1431                 panic("ifp == NULL in nd6_cache_lladdr");
 1432         if (from == NULL)
 1433                 panic("from == NULL in nd6_cache_lladdr");
 1434 
 1435         /* nothing must be updated for unspecified address */
 1436         if (IN6_IS_ADDR_UNSPECIFIED(from))
 1437                 return NULL;
 1438 
 1439         /*
 1440          * Validation about ifp->if_addrlen and lladdrlen must be done in
 1441          * the caller.
 1442          *
 1443          * XXX If the link does not have link-layer adderss, what should
 1444          * we do? (ifp->if_addrlen == 0)
 1445          * Spec says nothing in sections for RA, RS and NA.  There's small
 1446          * description on it in NS section (RFC 2461 7.2.3).
 1447          */
 1448         flags = lladdr ? ND6_EXCLUSIVE : 0;
 1449         IF_AFDATA_LOCK(ifp);
 1450         ln = nd6_lookup(from, flags, ifp);
 1451 
 1452         if (ln == NULL) {
 1453                 flags |= ND6_EXCLUSIVE;
 1454                 ln = nd6_lookup(from, flags | ND6_CREATE, ifp);
 1455                 IF_AFDATA_UNLOCK(ifp);
 1456                 is_newentry = 1;
 1457         } else {
 1458                 IF_AFDATA_UNLOCK(ifp);          
 1459                 /* do nothing if static ndp is set */
 1460                 if (ln->la_flags & LLE_STATIC) {
 1461                         static_route = 1;
 1462                         goto done;
 1463                 }
 1464                 is_newentry = 0;
 1465         }
 1466         if (ln == NULL)
 1467                 return (NULL);
 1468 
 1469         olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
 1470         if (olladdr && lladdr) {
 1471                 llchange = bcmp(lladdr, &ln->ll_addr,
 1472                     ifp->if_addrlen);
 1473         } else
 1474                 llchange = 0;
 1475 
 1476         /*
 1477          * newentry olladdr  lladdr  llchange   (*=record)
 1478          *      0       n       n       --      (1)
 1479          *      0       y       n       --      (2)
 1480          *      0       n       y       --      (3) * STALE
 1481          *      0       y       y       n       (4) *
 1482          *      0       y       y       y       (5) * STALE
 1483          *      1       --      n       --      (6)   NOSTATE(= PASSIVE)
 1484          *      1       --      y       --      (7) * STALE
 1485          */
 1486 
 1487         if (lladdr) {           /* (3-5) and (7) */
 1488                 /*
 1489                  * Record source link-layer address
 1490                  * XXX is it dependent to ifp->if_type?
 1491                  */
 1492                 bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
 1493                 ln->la_flags |= LLE_VALID;
 1494         }
 1495 
 1496         if (!is_newentry) {
 1497                 if ((!olladdr && lladdr != NULL) ||     /* (3) */
 1498                     (olladdr && lladdr != NULL && llchange)) {  /* (5) */
 1499                         do_update = 1;
 1500                         newstate = ND6_LLINFO_STALE;
 1501                 } else                                  /* (1-2,4) */
 1502                         do_update = 0;
 1503         } else {
 1504                 do_update = 1;
 1505                 if (lladdr == NULL)                     /* (6) */
 1506                         newstate = ND6_LLINFO_NOSTATE;
 1507                 else                                    /* (7) */
 1508                         newstate = ND6_LLINFO_STALE;
 1509         }
 1510 
 1511         if (do_update) {
 1512                 /*
 1513                  * Update the state of the neighbor cache.
 1514                  */
 1515                 ln->ln_state = newstate;
 1516 
 1517                 if (ln->ln_state == ND6_LLINFO_STALE) {
 1518                         /*
 1519                          * XXX: since nd6_output() below will cause
 1520                          * state tansition to DELAY and reset the timer,
 1521                          * we must set the timer now, although it is actually
 1522                          * meaningless.
 1523                          */
 1524                         nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
 1525 
 1526                         if (ln->la_hold) {
 1527                                 struct mbuf *m_hold, *m_hold_next;
 1528 
 1529                                 /*
 1530                                  * reset the la_hold in advance, to explicitly
 1531                                  * prevent a la_hold lookup in nd6_output()
 1532                                  * (wouldn't happen, though...)
 1533                                  */
 1534                                 for (m_hold = ln->la_hold, ln->la_hold = NULL;
 1535                                     m_hold; m_hold = m_hold_next) {
 1536                                         m_hold_next = m_hold->m_nextpkt;
 1537                                         m_hold->m_nextpkt = NULL;
 1538 
 1539                                         /*
 1540                                          * we assume ifp is not a p2p here, so
 1541                                          * just set the 2nd argument as the
 1542                                          * 1st one.
 1543                                          */
 1544                                         nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
 1545                                 }
 1546                                 /*
 1547                                  * If we have mbufs in the chain we need to do
 1548                                  * deferred transmit. Copy the address from the
 1549                                  * llentry before dropping the lock down below.
 1550                                  */
 1551                                 if (chain != NULL)
 1552                                         memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
 1553                         }
 1554                 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
 1555                         /* probe right away */
 1556                         nd6_llinfo_settimer_locked((void *)ln, 0);
 1557                 }
 1558         }
 1559 
 1560         /*
 1561          * ICMP6 type dependent behavior.
 1562          *
 1563          * NS: clear IsRouter if new entry
 1564          * RS: clear IsRouter
 1565          * RA: set IsRouter if there's lladdr
 1566          * redir: clear IsRouter if new entry
 1567          *
 1568          * RA case, (1):
 1569          * The spec says that we must set IsRouter in the following cases:
 1570          * - If lladdr exist, set IsRouter.  This means (1-5).
 1571          * - If it is old entry (!newentry), set IsRouter.  This means (7).
 1572          * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
 1573          * A quetion arises for (1) case.  (1) case has no lladdr in the
 1574          * neighbor cache, this is similar to (6).
 1575          * This case is rare but we figured that we MUST NOT set IsRouter.
 1576          *
 1577          * newentry olladdr  lladdr  llchange       NS  RS  RA  redir
 1578          *                                                      D R
 1579          *      0       n       n       --      (1)     c   ?     s
 1580          *      0       y       n       --      (2)     c   s     s
 1581          *      0       n       y       --      (3)     c   s     s
 1582          *      0       y       y       n       (4)     c   s     s
 1583          *      0       y       y       y       (5)     c   s     s
 1584          *      1       --      n       --      (6) c   c       c s
 1585          *      1       --      y       --      (7) c   c   s   c s
 1586          *
 1587          *                                      (c=clear s=set)
 1588          */
 1589         switch (type & 0xff) {
 1590         case ND_NEIGHBOR_SOLICIT:
 1591                 /*
 1592                  * New entry must have is_router flag cleared.
 1593                  */
 1594                 if (is_newentry)        /* (6-7) */
 1595                         ln->ln_router = 0;
 1596                 break;
 1597         case ND_REDIRECT:
 1598                 /*
 1599                  * If the icmp is a redirect to a better router, always set the
 1600                  * is_router flag.  Otherwise, if the entry is newly created,
 1601                  * clear the flag.  [RFC 2461, sec 8.3]
 1602                  */
 1603                 if (code == ND_REDIRECT_ROUTER)
 1604                         ln->ln_router = 1;
 1605                 else if (is_newentry) /* (6-7) */
 1606                         ln->ln_router = 0;
 1607                 break;
 1608         case ND_ROUTER_SOLICIT:
 1609                 /*
 1610                  * is_router flag must always be cleared.
 1611                  */
 1612                 ln->ln_router = 0;
 1613                 break;
 1614         case ND_ROUTER_ADVERT:
 1615                 /*
 1616                  * Mark an entry with lladdr as a router.
 1617                  */
 1618                 if ((!is_newentry && (olladdr || lladdr)) ||    /* (2-5) */
 1619                     (is_newentry && lladdr)) {                  /* (7) */
 1620                         ln->ln_router = 1;
 1621                 }
 1622                 break;
 1623         }
 1624 
 1625         if (ln != NULL) {
 1626                 static_route = (ln->la_flags & LLE_STATIC);
 1627                 router = ln->ln_router;
 1628 
 1629                 if (flags & ND6_EXCLUSIVE)
 1630                         LLE_WUNLOCK(ln);
 1631                 else
 1632                         LLE_RUNLOCK(ln);
 1633                 if (static_route)
 1634                         ln = NULL;
 1635         }
 1636         if (chain)
 1637                 nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
 1638         
 1639         /*
 1640          * When the link-layer address of a router changes, select the
 1641          * best router again.  In particular, when the neighbor entry is newly
 1642          * created, it might affect the selection policy.
 1643          * Question: can we restrict the first condition to the "is_newentry"
 1644          * case?
 1645          * XXX: when we hear an RA from a new router with the link-layer
 1646          * address option, defrouter_select() is called twice, since
 1647          * defrtrlist_update called the function as well.  However, I believe
 1648          * we can compromise the overhead, since it only happens the first
 1649          * time.
 1650          * XXX: although defrouter_select() should not have a bad effect
 1651          * for those are not autoconfigured hosts, we explicitly avoid such
 1652          * cases for safety.
 1653          */
 1654         if (do_update && router && !V_ip6_forwarding && V_ip6_accept_rtadv) {
 1655                 /*
 1656                  * guaranteed recursion
 1657                  */
 1658                 defrouter_select();
 1659         }
 1660         
 1661         return (ln);
 1662 done:   
 1663         if (ln != NULL) {
 1664                 if (flags & ND6_EXCLUSIVE)
 1665                         LLE_WUNLOCK(ln);
 1666                 else
 1667                         LLE_RUNLOCK(ln);
 1668                 if (static_route)
 1669                         ln = NULL;
 1670         }
 1671         return (ln);
 1672 }
 1673 
 1674 static void
 1675 nd6_slowtimo(void *arg)
 1676 {
 1677         CURVNET_SET((struct vnet *) arg);
 1678         struct nd_ifinfo *nd6if;
 1679         struct ifnet *ifp;
 1680 
 1681         callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 1682             nd6_slowtimo, curvnet);
 1683         IFNET_RLOCK_NOSLEEP();
 1684         for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
 1685             ifp = TAILQ_NEXT(ifp, if_list)) {
 1686                 nd6if = ND_IFINFO(ifp);
 1687                 if (nd6if->basereachable && /* already initialized */
 1688                     (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
 1689                         /*
 1690                          * Since reachable time rarely changes by router
 1691                          * advertisements, we SHOULD insure that a new random
 1692                          * value gets recomputed at least once every few hours.
 1693                          * (RFC 2461, 6.3.4)
 1694                          */
 1695                         nd6if->recalctm = V_nd6_recalc_reachtm_interval;
 1696                         nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
 1697                 }
 1698         }
 1699         IFNET_RUNLOCK_NOSLEEP();
 1700         CURVNET_RESTORE();
 1701 }
 1702 
 1703 int
 1704 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
 1705     struct sockaddr_in6 *dst, struct rtentry *rt0)
 1706 {
 1707 
 1708         return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL));
 1709 }
 1710 
 1711 
 1712 /*
 1713  * Note that I'm not enforcing any global serialization
 1714  * lle state or asked changes here as the logic is too
 1715  * complicated to avoid having to always acquire an exclusive
 1716  * lock
 1717  * KMM
 1718  *
 1719  */
 1720 #define senderr(e) { error = (e); goto bad;}
 1721 
 1722 int
 1723 nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
 1724     struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle,
 1725         struct mbuf **chain)
 1726 {
 1727         struct mbuf *m = m0;
 1728         struct llentry *ln = lle;
 1729         int error = 0;
 1730         int flags = 0;
 1731 
 1732 #ifdef INVARIANTS
 1733         if (lle != NULL) {
 1734                 
 1735                 LLE_WLOCK_ASSERT(lle);
 1736 
 1737                 KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed"));
 1738         }
 1739 #endif
 1740         if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
 1741                 goto sendpkt;
 1742 
 1743         if (nd6_need_cache(ifp) == 0)
 1744                 goto sendpkt;
 1745 
 1746         /*
 1747          * next hop determination.  This routine is derived from ether_output.
 1748          */
 1749 
 1750         /*
 1751          * Address resolution or Neighbor Unreachability Detection
 1752          * for the next hop.
 1753          * At this point, the destination of the packet must be a unicast
 1754          * or an anycast address(i.e. not a multicast).
 1755          */
 1756 
 1757         flags = ((m != NULL) || (lle != NULL)) ? LLE_EXCLUSIVE : 0;
 1758         if (ln == NULL) {
 1759         retry:
 1760                 IF_AFDATA_LOCK(ifp);
 1761                 ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst);
 1762                 IF_AFDATA_UNLOCK(ifp);
 1763                 if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
 1764                         /*
 1765                          * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
 1766                          * the condition below is not very efficient.  But we believe
 1767                          * it is tolerable, because this should be a rare case.
 1768                          */
 1769                         flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0);
 1770                         IF_AFDATA_LOCK(ifp);
 1771                         ln = nd6_lookup(&dst->sin6_addr, flags, ifp);
 1772                         IF_AFDATA_UNLOCK(ifp);
 1773                 }
 1774         } 
 1775         if (ln == NULL) {
 1776                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
 1777                     !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
 1778                         char ip6buf[INET6_ADDRSTRLEN];
 1779                         log(LOG_DEBUG,
 1780                             "nd6_output: can't allocate llinfo for %s "
 1781                             "(ln=%p)\n",
 1782                             ip6_sprintf(ip6buf, &dst->sin6_addr), ln);
 1783                         senderr(EIO);   /* XXX: good error? */
 1784                 }
 1785                 goto sendpkt;   /* send anyway */
 1786         }
 1787 
 1788         /* We don't have to do link-layer address resolution on a p2p link. */
 1789         if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
 1790             ln->ln_state < ND6_LLINFO_REACHABLE) {
 1791                 if ((flags & LLE_EXCLUSIVE) == 0) {
 1792                         flags |= LLE_EXCLUSIVE;
 1793                         goto retry;
 1794                 }
 1795                 ln->ln_state = ND6_LLINFO_STALE;
 1796                 nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
 1797         }
 1798 
 1799         /*
 1800          * The first time we send a packet to a neighbor whose entry is
 1801          * STALE, we have to change the state to DELAY and a sets a timer to
 1802          * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
 1803          * neighbor unreachability detection on expiration.
 1804          * (RFC 2461 7.3.3)
 1805          */
 1806         if (ln->ln_state == ND6_LLINFO_STALE) {
 1807                 if ((flags & LLE_EXCLUSIVE) == 0) {
 1808                         flags |= LLE_EXCLUSIVE;
 1809                         LLE_RUNLOCK(ln);
 1810                         goto retry;
 1811                 }
 1812                 ln->la_asked = 0;
 1813                 ln->ln_state = ND6_LLINFO_DELAY;
 1814                 nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz);
 1815         }
 1816 
 1817         /*
 1818          * If the neighbor cache entry has a state other than INCOMPLETE
 1819          * (i.e. its link-layer address is already resolved), just
 1820          * send the packet.
 1821          */
 1822         if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
 1823                 goto sendpkt;
 1824 
 1825         /*
 1826          * There is a neighbor cache entry, but no ethernet address
 1827          * response yet.  Append this latest packet to the end of the
 1828          * packet queue in the mbuf, unless the number of the packet
 1829          * does not exceed nd6_maxqueuelen.  When it exceeds nd6_maxqueuelen,
 1830          * the oldest packet in the queue will be removed.
 1831          */
 1832         if (ln->ln_state == ND6_LLINFO_NOSTATE)
 1833                 ln->ln_state = ND6_LLINFO_INCOMPLETE;
 1834 
 1835         if ((flags & LLE_EXCLUSIVE) == 0) {
 1836                 flags |= LLE_EXCLUSIVE;
 1837                 LLE_RUNLOCK(ln);
 1838                 goto retry;
 1839         }
 1840 
 1841         LLE_WLOCK_ASSERT(ln);
 1842 
 1843         if (ln->la_hold) {
 1844                 struct mbuf *m_hold;
 1845                 int i;
 1846                 
 1847                 i = 0;
 1848                 for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) {
 1849                         i++;
 1850                         if (m_hold->m_nextpkt == NULL) {
 1851                                 m_hold->m_nextpkt = m;
 1852                                 break;
 1853                         }
 1854                 }
 1855                 while (i >= V_nd6_maxqueuelen) {
 1856                         m_hold = ln->la_hold;
 1857                         ln->la_hold = ln->la_hold->m_nextpkt;
 1858                         m_freem(m_hold);
 1859                         i--;
 1860                 }
 1861         } else {
 1862                 ln->la_hold = m;
 1863         }
 1864 
 1865         /*
 1866          * If there has been no NS for the neighbor after entering the
 1867          * INCOMPLETE state, send the first solicitation.
 1868          */
 1869         if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
 1870                 ln->la_asked++;
 1871                 
 1872                 nd6_llinfo_settimer_locked(ln,
 1873                     (long)ND_IFINFO(ifp)->retrans * hz / 1000);
 1874                 LLE_WUNLOCK(ln);
 1875                 nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
 1876                 if (lle != NULL && ln == lle)
 1877                         LLE_WLOCK(lle);
 1878 
 1879         } else if (lle == NULL || ln != lle) {
 1880                 /*
 1881                  * We did the lookup (no lle arg) so we
 1882                  * need to do the unlock here.
 1883                  */
 1884                 LLE_WUNLOCK(ln);
 1885         }
 1886 
 1887         return (0);
 1888 
 1889   sendpkt:
 1890         /* discard the packet if IPv6 operation is disabled on the interface */
 1891         if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
 1892                 error = ENETDOWN; /* better error? */
 1893                 goto bad;
 1894         }
 1895         /*
 1896          * ln is valid and the caller did not pass in 
 1897          * an llentry
 1898          */
 1899         if ((ln != NULL) && (lle == NULL)) {
 1900                 if (flags & LLE_EXCLUSIVE)
 1901                         LLE_WUNLOCK(ln);
 1902                 else
 1903                         LLE_RUNLOCK(ln);
 1904         }
 1905 
 1906 #ifdef MAC
 1907         mac_netinet6_nd6_send(ifp, m);
 1908 #endif
 1909         /*
 1910          * We were passed in a pointer to an lle with the lock held 
 1911          * this means that we can't call if_output as we will
 1912          * recurse on the lle lock - so what we do is we create
 1913          * a list of mbufs to send and transmit them in the caller
 1914          * after the lock is dropped
 1915          */
 1916         if (lle != NULL) {
 1917                 if (*chain == NULL)
 1918                         *chain = m;
 1919                 else {
 1920                         struct mbuf *m = *chain;
 1921 
 1922                         /*
 1923                          * append mbuf to end of deferred chain
 1924                          */
 1925                         while (m->m_nextpkt != NULL)
 1926                                 m = m->m_nextpkt;
 1927                         m->m_nextpkt = m;
 1928                 }
 1929                 return (error);
 1930         }
 1931         if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 1932                 return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
 1933                     NULL));
 1934         }
 1935         error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL);
 1936         return (error);
 1937 
 1938   bad:
 1939         /*
 1940          * ln is valid and the caller did not pass in 
 1941          * an llentry
 1942          */
 1943         if ((ln != NULL) && (lle == NULL)) {
 1944                 if (flags & LLE_EXCLUSIVE)
 1945                         LLE_WUNLOCK(ln);
 1946                 else
 1947                         LLE_RUNLOCK(ln);
 1948         }
 1949         if (m)
 1950                 m_freem(m);
 1951         return (error);
 1952 }
 1953 #undef senderr
 1954 
 1955 
 1956 int
 1957 nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
 1958     struct sockaddr_in6 *dst, struct route *ro)
 1959 {
 1960         struct mbuf *m, *m_head;
 1961         struct ifnet *outifp;
 1962         int error = 0;
 1963 
 1964         m_head = chain;
 1965         if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 1966                 outifp = origifp;
 1967         else
 1968                 outifp = ifp;
 1969         
 1970         while (m_head) {
 1971                 m = m_head;
 1972                 m_head = m_head->m_nextpkt;
 1973                 error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro);                         
 1974         }
 1975 
 1976         /*
 1977          * XXX
 1978          * note that intermediate errors are blindly ignored - but this is 
 1979          * the same convention as used with nd6_output when called by
 1980          * nd6_cache_lladdr
 1981          */
 1982         return (error);
 1983 }       
 1984 
 1985 
 1986 int
 1987 nd6_need_cache(struct ifnet *ifp)
 1988 {
 1989         /*
 1990          * XXX: we currently do not make neighbor cache on any interface
 1991          * other than ARCnet, Ethernet, FDDI and GIF.
 1992          *
 1993          * RFC2893 says:
 1994          * - unidirectional tunnels needs no ND
 1995          */
 1996         switch (ifp->if_type) {
 1997         case IFT_ARCNET:
 1998         case IFT_ETHER:
 1999         case IFT_FDDI:
 2000         case IFT_IEEE1394:
 2001 #ifdef IFT_L2VLAN
 2002         case IFT_L2VLAN:
 2003 #endif
 2004 #ifdef IFT_IEEE80211
 2005         case IFT_IEEE80211:
 2006 #endif
 2007 #ifdef IFT_CARP
 2008         case IFT_CARP:
 2009 #endif
 2010         case IFT_GIF:           /* XXX need more cases? */
 2011         case IFT_PPP:
 2012         case IFT_TUNNEL:
 2013         case IFT_BRIDGE:
 2014         case IFT_PROPVIRTUAL:
 2015                 return (1);
 2016         default:
 2017                 return (0);
 2018         }
 2019 }
 2020 
 2021 /*
 2022  * the callers of this function need to be re-worked to drop
 2023  * the lle lock, drop here for now
 2024  */
 2025 int
 2026 nd6_storelladdr(struct ifnet *ifp, struct mbuf *m,
 2027     struct sockaddr *dst, u_char *desten, struct llentry **lle)
 2028 {
 2029         struct llentry *ln;
 2030 
 2031         *lle = NULL;
 2032         IF_AFDATA_UNLOCK_ASSERT(ifp);
 2033         if (m->m_flags & M_MCAST) {
 2034                 int i;
 2035 
 2036                 switch (ifp->if_type) {
 2037                 case IFT_ETHER:
 2038                 case IFT_FDDI:
 2039 #ifdef IFT_L2VLAN
 2040                 case IFT_L2VLAN:
 2041 #endif
 2042 #ifdef IFT_IEEE80211
 2043                 case IFT_IEEE80211:
 2044 #endif
 2045                 case IFT_BRIDGE:
 2046                 case IFT_ISO88025:
 2047                         ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
 2048                                                  desten);
 2049                         return (0);
 2050                 case IFT_IEEE1394:
 2051                         /*
 2052                          * netbsd can use if_broadcastaddr, but we don't do so
 2053                          * to reduce # of ifdef.
 2054                          */
 2055                         for (i = 0; i < ifp->if_addrlen; i++)
 2056                                 desten[i] = ~0;
 2057                         return (0);
 2058                 case IFT_ARCNET:
 2059                         *desten = 0;
 2060                         return (0);
 2061                 default:
 2062                         m_freem(m);
 2063                         return (EAFNOSUPPORT);
 2064                 }
 2065         }
 2066 
 2067 
 2068         /*
 2069          * the entry should have been created in nd6_store_lladdr
 2070          */
 2071         IF_AFDATA_LOCK(ifp);
 2072         ln = lla_lookup(LLTABLE6(ifp), 0, dst);
 2073         IF_AFDATA_UNLOCK(ifp);
 2074         if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) {
 2075                 if (ln != NULL)
 2076                         LLE_RUNLOCK(ln);
 2077                 /* this could happen, if we could not allocate memory */
 2078                 m_freem(m);
 2079                 return (1);
 2080         }
 2081 
 2082         bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
 2083         *lle = ln;
 2084         LLE_RUNLOCK(ln);
 2085         /*
 2086          * A *small* use after free race exists here
 2087          */
 2088         return (0);
 2089 }
 2090 
 2091 static void 
 2092 clear_llinfo_pqueue(struct llentry *ln)
 2093 {
 2094         struct mbuf *m_hold, *m_hold_next;
 2095 
 2096         for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
 2097                 m_hold_next = m_hold->m_nextpkt;
 2098                 m_hold->m_nextpkt = NULL;
 2099                 m_freem(m_hold);
 2100         }
 2101 
 2102         ln->la_hold = NULL;
 2103         return;
 2104 }
 2105 
 2106 static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
 2107 static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
 2108 #ifdef SYSCTL_DECL
 2109 SYSCTL_DECL(_net_inet6_icmp6);
 2110 #endif
 2111 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
 2112         CTLFLAG_RD, nd6_sysctl_drlist, "");
 2113 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
 2114         CTLFLAG_RD, nd6_sysctl_prlist, "");
 2115 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
 2116         CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
 2117 
 2118 static int
 2119 nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
 2120 {
 2121         int error;
 2122         char buf[1024] __aligned(4);
 2123         struct in6_defrouter *d, *de;
 2124         struct nd_defrouter *dr;
 2125 
 2126         if (req->newptr)
 2127                 return EPERM;
 2128         error = 0;
 2129 
 2130         for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
 2131              dr = TAILQ_NEXT(dr, dr_entry)) {
 2132                 d = (struct in6_defrouter *)buf;
 2133                 de = (struct in6_defrouter *)(buf + sizeof(buf));
 2134 
 2135                 if (d + 1 <= de) {
 2136                         bzero(d, sizeof(*d));
 2137                         d->rtaddr.sin6_family = AF_INET6;
 2138                         d->rtaddr.sin6_len = sizeof(d->rtaddr);
 2139                         d->rtaddr.sin6_addr = dr->rtaddr;
 2140                         error = sa6_recoverscope(&d->rtaddr);
 2141                         if (error != 0)
 2142                                 return (error);
 2143                         d->flags = dr->flags;
 2144                         d->rtlifetime = dr->rtlifetime;
 2145                         d->expire = dr->expire;
 2146                         d->if_index = dr->ifp->if_index;
 2147                 } else
 2148                         panic("buffer too short");
 2149 
 2150                 error = SYSCTL_OUT(req, buf, sizeof(*d));
 2151                 if (error)
 2152                         break;
 2153         }
 2154 
 2155         return (error);
 2156 }
 2157 
 2158 static int
 2159 nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 2160 {
 2161         int error;
 2162         char buf[1024] __aligned(4);
 2163         struct in6_prefix *p, *pe;
 2164         struct nd_prefix *pr;
 2165         char ip6buf[INET6_ADDRSTRLEN];
 2166 
 2167         if (req->newptr)
 2168                 return EPERM;
 2169         error = 0;
 2170 
 2171         for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 2172                 u_short advrtrs;
 2173                 size_t advance;
 2174                 struct sockaddr_in6 *sin6, *s6;
 2175                 struct nd_pfxrouter *pfr;
 2176 
 2177                 p = (struct in6_prefix *)buf;
 2178                 pe = (struct in6_prefix *)(buf + sizeof(buf));
 2179 
 2180                 if (p + 1 <= pe) {
 2181                         bzero(p, sizeof(*p));
 2182                         sin6 = (struct sockaddr_in6 *)(p + 1);
 2183 
 2184                         p->prefix = pr->ndpr_prefix;
 2185                         if (sa6_recoverscope(&p->prefix)) {
 2186                                 log(LOG_ERR,
 2187                                     "scope error in prefix list (%s)\n",
 2188                                     ip6_sprintf(ip6buf, &p->prefix.sin6_addr));
 2189                                 /* XXX: press on... */
 2190                         }
 2191                         p->raflags = pr->ndpr_raf;
 2192                         p->prefixlen = pr->ndpr_plen;
 2193                         p->vltime = pr->ndpr_vltime;
 2194                         p->pltime = pr->ndpr_pltime;
 2195                         p->if_index = pr->ndpr_ifp->if_index;
 2196                         if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 2197                                 p->expire = 0;
 2198                         else {
 2199                                 time_t maxexpire;
 2200 
 2201                                 /* XXX: we assume time_t is signed. */
 2202                                 maxexpire = (-1) &
 2203                                     ~((time_t)1 <<
 2204                                     ((sizeof(maxexpire) * 8) - 1));
 2205                                 if (pr->ndpr_vltime <
 2206                                     maxexpire - pr->ndpr_lastupdate) {
 2207                                     p->expire = pr->ndpr_lastupdate +
 2208                                         pr->ndpr_vltime;
 2209                                 } else
 2210                                         p->expire = maxexpire;
 2211                         }
 2212                         p->refcnt = pr->ndpr_refcnt;
 2213                         p->flags = pr->ndpr_stateflags;
 2214                         p->origin = PR_ORIG_RA;
 2215                         advrtrs = 0;
 2216                         for (pfr = pr->ndpr_advrtrs.lh_first; pfr;
 2217                              pfr = pfr->pfr_next) {
 2218                                 if ((void *)&sin6[advrtrs + 1] > (void *)pe) {
 2219                                         advrtrs++;
 2220                                         continue;
 2221                                 }
 2222                                 s6 = &sin6[advrtrs];
 2223                                 bzero(s6, sizeof(*s6));
 2224                                 s6->sin6_family = AF_INET6;
 2225                                 s6->sin6_len = sizeof(*sin6);
 2226                                 s6->sin6_addr = pfr->router->rtaddr;
 2227                                 if (sa6_recoverscope(s6)) {
 2228                                         log(LOG_ERR,
 2229                                             "scope error in "
 2230                                             "prefix list (%s)\n",
 2231                                             ip6_sprintf(ip6buf,
 2232                                                     &pfr->router->rtaddr));
 2233                                 }
 2234                                 advrtrs++;
 2235                         }
 2236                         p->advrtrs = advrtrs;
 2237                 } else
 2238                         panic("buffer too short");
 2239 
 2240                 advance = sizeof(*p) + sizeof(*sin6) * advrtrs;
 2241                 error = SYSCTL_OUT(req, buf, advance);
 2242                 if (error)
 2243                         break;
 2244         }
 2245 
 2246         return (error);
 2247 }

Cache object: 0ac9799ebe32874ff7341a2a3e311d75


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.