The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet6/nd6.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. Neither the name of the project nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: releng/8.1/sys/netinet6/nd6.c 207695 2010-05-06 06:44:19Z bz $");
   34 
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/callout.h>
   41 #include <sys/malloc.h>
   42 #include <sys/mbuf.h>
   43 #include <sys/socket.h>
   44 #include <sys/sockio.h>
   45 #include <sys/time.h>
   46 #include <sys/kernel.h>
   47 #include <sys/protosw.h>
   48 #include <sys/errno.h>
   49 #include <sys/syslog.h>
   50 #include <sys/lock.h>
   51 #include <sys/rwlock.h>
   52 #include <sys/queue.h>
   53 #include <sys/sysctl.h>
   54 
   55 #include <net/if.h>
   56 #include <net/if_arc.h>
   57 #include <net/if_dl.h>
   58 #include <net/if_types.h>
   59 #include <net/iso88025.h>
   60 #include <net/fddi.h>
   61 #include <net/route.h>
   62 #include <net/vnet.h>
   63 
   64 #include <netinet/in.h>
   65 #include <net/if_llatbl.h>
   66 #define L3_ADDR_SIN6(le)        ((struct sockaddr_in6 *) L3_ADDR(le))
   67 #include <netinet/if_ether.h>
   68 #include <netinet6/in6_var.h>
   69 #include <netinet/ip6.h>
   70 #include <netinet6/ip6_var.h>
   71 #include <netinet6/scope6_var.h>
   72 #include <netinet6/nd6.h>
   73 #include <netinet/icmp6.h>
   74 
   75 #include <sys/limits.h>
   76 
   77 #include <security/mac/mac_framework.h>
   78 
   79 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
   80 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
   81 
   82 #define SIN6(s) ((struct sockaddr_in6 *)s)
   83 
   84 /* timer values */
   85 VNET_DEFINE(int, nd6_prune)     = 1;    /* walk list every 1 seconds */
   86 VNET_DEFINE(int, nd6_delay)     = 5;    /* delay first probe time 5 second */
   87 VNET_DEFINE(int, nd6_umaxtries) = 3;    /* maximum unicast query */
   88 VNET_DEFINE(int, nd6_mmaxtries) = 3;    /* maximum multicast query */
   89 VNET_DEFINE(int, nd6_useloopback) = 1;  /* use loopback interface for
   90                                          * local traffic */
   91 VNET_DEFINE(int, nd6_gctimer)   = (60 * 60 * 24); /* 1 day: garbage
   92                                          * collection timer */
   93 
   94 /* preventing too many loops in ND option parsing */
   95 static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
   96 
   97 VNET_DEFINE(int, nd6_maxnudhint) = 0;   /* max # of subsequent upper
   98                                          * layer hints */
   99 static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved
  100                                          * ND entries */
  101 #define V_nd6_maxndopt                  VNET(nd6_maxndopt)
  102 #define V_nd6_maxqueuelen               VNET(nd6_maxqueuelen)
  103 
  104 #ifdef ND6_DEBUG
  105 VNET_DEFINE(int, nd6_debug) = 1;
  106 #else
  107 VNET_DEFINE(int, nd6_debug) = 0;
  108 #endif
  109 
  110 /* for debugging? */
  111 #if 0
  112 static int nd6_inuse, nd6_allocated;
  113 #endif
  114 
  115 VNET_DEFINE(struct nd_drhead, nd_defrouter);
  116 VNET_DEFINE(struct nd_prhead, nd_prefix);
  117 
  118 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
  119 #define V_nd6_recalc_reachtm_interval   VNET(nd6_recalc_reachtm_interval)
  120 
  121 static struct sockaddr_in6 all1_sa;
  122 
  123 static int nd6_is_new_addr_neighbor __P((struct sockaddr_in6 *,
  124         struct ifnet *));
  125 static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
  126 static void nd6_slowtimo(void *);
  127 static int regen_tmpaddr(struct in6_ifaddr *);
  128 static struct llentry *nd6_free(struct llentry *, int);
  129 static void nd6_llinfo_timer(void *);
  130 static void clear_llinfo_pqueue(struct llentry *);
  131 
  132 static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
  133 #define V_nd6_slowtimo_ch               VNET(nd6_slowtimo_ch)
  134 
  135 VNET_DEFINE(struct callout, nd6_timer_ch);
  136 
  137 void
  138 nd6_init(void)
  139 {
  140         int i;
  141 
  142         LIST_INIT(&V_nd_prefix);
  143 
  144         all1_sa.sin6_family = AF_INET6;
  145         all1_sa.sin6_len = sizeof(struct sockaddr_in6);
  146         for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
  147                 all1_sa.sin6_addr.s6_addr[i] = 0xff;
  148 
  149         /* initialization of the default router list */
  150         TAILQ_INIT(&V_nd_defrouter);
  151 
  152         /* start timer */
  153         callout_init(&V_nd6_slowtimo_ch, 0);
  154         callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
  155             nd6_slowtimo, curvnet);
  156 }
  157 
  158 #ifdef VIMAGE
  159 void
  160 nd6_destroy()
  161 {
  162 
  163         callout_drain(&V_nd6_slowtimo_ch);
  164         callout_drain(&V_nd6_timer_ch);
  165 }
  166 #endif
  167 
  168 struct nd_ifinfo *
  169 nd6_ifattach(struct ifnet *ifp)
  170 {
  171         struct nd_ifinfo *nd;
  172 
  173         nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK);
  174         bzero(nd, sizeof(*nd));
  175 
  176         nd->initialized = 1;
  177 
  178         nd->chlim = IPV6_DEFHLIM;
  179         nd->basereachable = REACHABLE_TIME;
  180         nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
  181         nd->retrans = RETRANS_TIMER;
  182         /*
  183          * Note that the default value of ip6_accept_rtadv is 0, which means
  184          * we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV
  185          * here.
  186          */
  187         nd->flags = (ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV);
  188 
  189         /* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
  190         nd6_setmtu0(ifp, nd);
  191 
  192         return nd;
  193 }
  194 
  195 void
  196 nd6_ifdetach(struct nd_ifinfo *nd)
  197 {
  198 
  199         free(nd, M_IP6NDP);
  200 }
  201 
  202 /*
  203  * Reset ND level link MTU. This function is called when the physical MTU
  204  * changes, which means we might have to adjust the ND level MTU.
  205  */
  206 void
  207 nd6_setmtu(struct ifnet *ifp)
  208 {
  209 
  210         nd6_setmtu0(ifp, ND_IFINFO(ifp));
  211 }
  212 
  213 /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
  214 void
  215 nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
  216 {
  217         u_int32_t omaxmtu;
  218 
  219         omaxmtu = ndi->maxmtu;
  220 
  221         switch (ifp->if_type) {
  222         case IFT_ARCNET:
  223                 ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */
  224                 break;
  225         case IFT_FDDI:
  226                 ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */
  227                 break;
  228         case IFT_ISO88025:
  229                  ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
  230                  break;
  231         default:
  232                 ndi->maxmtu = ifp->if_mtu;
  233                 break;
  234         }
  235 
  236         /*
  237          * Decreasing the interface MTU under IPV6 minimum MTU may cause
  238          * undesirable situation.  We thus notify the operator of the change
  239          * explicitly.  The check for omaxmtu is necessary to restrict the
  240          * log to the case of changing the MTU, not initializing it.
  241          */
  242         if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
  243                 log(LOG_NOTICE, "nd6_setmtu0: "
  244                     "new link MTU on %s (%lu) is too small for IPv6\n",
  245                     if_name(ifp), (unsigned long)ndi->maxmtu);
  246         }
  247 
  248         if (ndi->maxmtu > V_in6_maxmtu)
  249                 in6_setmaxmtu(); /* check all interfaces just in case */
  250 
  251 }
  252 
  253 void
  254 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
  255 {
  256 
  257         bzero(ndopts, sizeof(*ndopts));
  258         ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
  259         ndopts->nd_opts_last
  260                 = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
  261 
  262         if (icmp6len == 0) {
  263                 ndopts->nd_opts_done = 1;
  264                 ndopts->nd_opts_search = NULL;
  265         }
  266 }
  267 
  268 /*
  269  * Take one ND option.
  270  */
  271 struct nd_opt_hdr *
  272 nd6_option(union nd_opts *ndopts)
  273 {
  274         struct nd_opt_hdr *nd_opt;
  275         int olen;
  276 
  277         if (ndopts == NULL)
  278                 panic("ndopts == NULL in nd6_option");
  279         if (ndopts->nd_opts_last == NULL)
  280                 panic("uninitialized ndopts in nd6_option");
  281         if (ndopts->nd_opts_search == NULL)
  282                 return NULL;
  283         if (ndopts->nd_opts_done)
  284                 return NULL;
  285 
  286         nd_opt = ndopts->nd_opts_search;
  287 
  288         /* make sure nd_opt_len is inside the buffer */
  289         if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
  290                 bzero(ndopts, sizeof(*ndopts));
  291                 return NULL;
  292         }
  293 
  294         olen = nd_opt->nd_opt_len << 3;
  295         if (olen == 0) {
  296                 /*
  297                  * Message validation requires that all included
  298                  * options have a length that is greater than zero.
  299                  */
  300                 bzero(ndopts, sizeof(*ndopts));
  301                 return NULL;
  302         }
  303 
  304         ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
  305         if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
  306                 /* option overruns the end of buffer, invalid */
  307                 bzero(ndopts, sizeof(*ndopts));
  308                 return NULL;
  309         } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
  310                 /* reached the end of options chain */
  311                 ndopts->nd_opts_done = 1;
  312                 ndopts->nd_opts_search = NULL;
  313         }
  314         return nd_opt;
  315 }
  316 
  317 /*
  318  * Parse multiple ND options.
  319  * This function is much easier to use, for ND routines that do not need
  320  * multiple options of the same type.
  321  */
  322 int
  323 nd6_options(union nd_opts *ndopts)
  324 {
  325         struct nd_opt_hdr *nd_opt;
  326         int i = 0;
  327 
  328         if (ndopts == NULL)
  329                 panic("ndopts == NULL in nd6_options");
  330         if (ndopts->nd_opts_last == NULL)
  331                 panic("uninitialized ndopts in nd6_options");
  332         if (ndopts->nd_opts_search == NULL)
  333                 return 0;
  334 
  335         while (1) {
  336                 nd_opt = nd6_option(ndopts);
  337                 if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
  338                         /*
  339                          * Message validation requires that all included
  340                          * options have a length that is greater than zero.
  341                          */
  342                         ICMP6STAT_INC(icp6s_nd_badopt);
  343                         bzero(ndopts, sizeof(*ndopts));
  344                         return -1;
  345                 }
  346 
  347                 if (nd_opt == NULL)
  348                         goto skip1;
  349 
  350                 switch (nd_opt->nd_opt_type) {
  351                 case ND_OPT_SOURCE_LINKADDR:
  352                 case ND_OPT_TARGET_LINKADDR:
  353                 case ND_OPT_MTU:
  354                 case ND_OPT_REDIRECTED_HEADER:
  355                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
  356                                 nd6log((LOG_INFO,
  357                                     "duplicated ND6 option found (type=%d)\n",
  358                                     nd_opt->nd_opt_type));
  359                                 /* XXX bark? */
  360                         } else {
  361                                 ndopts->nd_opt_array[nd_opt->nd_opt_type]
  362                                         = nd_opt;
  363                         }
  364                         break;
  365                 case ND_OPT_PREFIX_INFORMATION:
  366                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
  367                                 ndopts->nd_opt_array[nd_opt->nd_opt_type]
  368                                         = nd_opt;
  369                         }
  370                         ndopts->nd_opts_pi_end =
  371                                 (struct nd_opt_prefix_info *)nd_opt;
  372                         break;
  373                 default:
  374                         /*
  375                          * Unknown options must be silently ignored,
  376                          * to accomodate future extension to the protocol.
  377                          */
  378                         nd6log((LOG_DEBUG,
  379                             "nd6_options: unsupported option %d - "
  380                             "option ignored\n", nd_opt->nd_opt_type));
  381                 }
  382 
  383 skip1:
  384                 i++;
  385                 if (i > V_nd6_maxndopt) {
  386                         ICMP6STAT_INC(icp6s_nd_toomanyopt);
  387                         nd6log((LOG_INFO, "too many loop in nd opt\n"));
  388                         break;
  389                 }
  390 
  391                 if (ndopts->nd_opts_done)
  392                         break;
  393         }
  394 
  395         return 0;
  396 }
  397 
  398 /*
  399  * ND6 timer routine to handle ND6 entries
  400  */
  401 void
  402 nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
  403 {
  404         int canceled;
  405 
  406         if (tick < 0) {
  407                 ln->la_expire = 0;
  408                 ln->ln_ntick = 0;
  409                 canceled = callout_stop(&ln->ln_timer_ch);
  410         } else {
  411                 ln->la_expire = time_second + tick / hz;
  412                 LLE_ADDREF(ln);
  413                 if (tick > INT_MAX) {
  414                         ln->ln_ntick = tick - INT_MAX;
  415                         canceled = callout_reset(&ln->ln_timer_ch, INT_MAX,
  416                             nd6_llinfo_timer, ln);
  417                 } else {
  418                         ln->ln_ntick = 0;
  419                         canceled = callout_reset(&ln->ln_timer_ch, tick,
  420                             nd6_llinfo_timer, ln);
  421                 }
  422         }
  423         if (canceled)
  424                 LLE_REMREF(ln);
  425 }
  426 
  427 void
  428 nd6_llinfo_settimer(struct llentry *ln, long tick)
  429 {
  430 
  431         LLE_WLOCK(ln);
  432         nd6_llinfo_settimer_locked(ln, tick);
  433         LLE_WUNLOCK(ln);
  434 }
  435 
  436 static void
  437 nd6_llinfo_timer(void *arg)
  438 {
  439         struct llentry *ln;
  440         struct in6_addr *dst;
  441         struct ifnet *ifp;
  442         struct nd_ifinfo *ndi = NULL;
  443 
  444         ln = (struct llentry *)arg;
  445         if (ln == NULL) {
  446                 panic("%s: NULL entry!\n", __func__);
  447                 return;
  448         }
  449 
  450         if ((ifp = ((ln->lle_tbl != NULL) ? ln->lle_tbl->llt_ifp : NULL)) == NULL)
  451                 panic("ln ifp == NULL");
  452 
  453         CURVNET_SET(ifp->if_vnet);
  454 
  455         if (ln->ln_ntick > 0) {
  456                 if (ln->ln_ntick > INT_MAX) {
  457                         ln->ln_ntick -= INT_MAX;
  458                         nd6_llinfo_settimer(ln, INT_MAX);
  459                 } else {
  460                         ln->ln_ntick = 0;
  461                         nd6_llinfo_settimer(ln, ln->ln_ntick);
  462                 }
  463                 goto done;
  464         }
  465 
  466         ndi = ND_IFINFO(ifp);
  467         dst = &L3_ADDR_SIN6(ln)->sin6_addr;
  468         if (ln->la_flags & LLE_STATIC) {
  469                 goto done;
  470         }
  471 
  472         if (ln->la_flags & LLE_DELETED) {
  473                 (void)nd6_free(ln, 0);
  474                 ln = NULL;
  475                 goto done;
  476         }
  477 
  478         switch (ln->ln_state) {
  479         case ND6_LLINFO_INCOMPLETE:
  480                 if (ln->la_asked < V_nd6_mmaxtries) {
  481                         ln->la_asked++;
  482                         nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
  483                         nd6_ns_output(ifp, NULL, dst, ln, 0);
  484                 } else {
  485                         struct mbuf *m = ln->la_hold;
  486                         if (m) {
  487                                 struct mbuf *m0;
  488 
  489                                 /*
  490                                  * assuming every packet in la_hold has the
  491                                  * same IP header
  492                                  */
  493                                 m0 = m->m_nextpkt;
  494                                 m->m_nextpkt = NULL;
  495                                 icmp6_error2(m, ICMP6_DST_UNREACH,
  496                                     ICMP6_DST_UNREACH_ADDR, 0, ifp);
  497 
  498                                 ln->la_hold = m0;
  499                                 clear_llinfo_pqueue(ln);
  500                         }
  501                         (void)nd6_free(ln, 0);
  502                         ln = NULL;
  503                 }
  504                 break;
  505         case ND6_LLINFO_REACHABLE:
  506                 if (!ND6_LLINFO_PERMANENT(ln)) {
  507                         ln->ln_state = ND6_LLINFO_STALE;
  508                         nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
  509                 }
  510                 break;
  511 
  512         case ND6_LLINFO_STALE:
  513                 /* Garbage Collection(RFC 2461 5.3) */
  514                 if (!ND6_LLINFO_PERMANENT(ln)) {
  515                         (void)nd6_free(ln, 1);
  516                         ln = NULL;
  517                 }
  518                 break;
  519 
  520         case ND6_LLINFO_DELAY:
  521                 if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
  522                         /* We need NUD */
  523                         ln->la_asked = 1;
  524                         ln->ln_state = ND6_LLINFO_PROBE;
  525                         nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
  526                         nd6_ns_output(ifp, dst, dst, ln, 0);
  527                 } else {
  528                         ln->ln_state = ND6_LLINFO_STALE; /* XXX */
  529                         nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
  530                 }
  531                 break;
  532         case ND6_LLINFO_PROBE:
  533                 if (ln->la_asked < V_nd6_umaxtries) {
  534                         ln->la_asked++;
  535                         nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
  536                         nd6_ns_output(ifp, dst, dst, ln, 0);
  537                 } else {
  538                         (void)nd6_free(ln, 0);
  539                         ln = NULL;
  540                 }
  541                 break;
  542         }
  543 done:
  544         if (ln != NULL)
  545                 LLE_FREE(ln);
  546         CURVNET_RESTORE();
  547 }
  548 
  549 
  550 /*
  551  * ND6 timer routine to expire default route list and prefix list
  552  */
  553 void
  554 nd6_timer(void *arg)
  555 {
  556         CURVNET_SET((struct vnet *) arg);
  557         int s;
  558         struct nd_defrouter *dr;
  559         struct nd_prefix *pr;
  560         struct in6_ifaddr *ia6, *nia6;
  561         struct in6_addrlifetime *lt6;
  562 
  563         callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
  564             nd6_timer, curvnet);
  565 
  566         /* expire default router list */
  567         s = splnet();
  568         dr = TAILQ_FIRST(&V_nd_defrouter);
  569         while (dr) {
  570                 if (dr->expire && dr->expire < time_second) {
  571                         struct nd_defrouter *t;
  572                         t = TAILQ_NEXT(dr, dr_entry);
  573                         defrtrlist_del(dr);
  574                         dr = t;
  575                 } else {
  576                         dr = TAILQ_NEXT(dr, dr_entry);
  577                 }
  578         }
  579 
  580         /*
  581          * expire interface addresses.
  582          * in the past the loop was inside prefix expiry processing.
  583          * However, from a stricter speci-confrmance standpoint, we should
  584          * rather separate address lifetimes and prefix lifetimes.
  585          *
  586          * XXXRW: in6_ifaddrhead locking.
  587          */
  588   addrloop:
  589         TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) {
  590                 /* check address lifetime */
  591                 lt6 = &ia6->ia6_lifetime;
  592                 if (IFA6_IS_INVALID(ia6)) {
  593                         int regen = 0;
  594 
  595                         /*
  596                          * If the expiring address is temporary, try
  597                          * regenerating a new one.  This would be useful when
  598                          * we suspended a laptop PC, then turned it on after a
  599                          * period that could invalidate all temporary
  600                          * addresses.  Although we may have to restart the
  601                          * loop (see below), it must be after purging the
  602                          * address.  Otherwise, we'd see an infinite loop of
  603                          * regeneration.
  604                          */
  605                         if (V_ip6_use_tempaddr &&
  606                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
  607                                 if (regen_tmpaddr(ia6) == 0)
  608                                         regen = 1;
  609                         }
  610 
  611                         in6_purgeaddr(&ia6->ia_ifa);
  612 
  613                         if (regen)
  614                                 goto addrloop; /* XXX: see below */
  615                 } else if (IFA6_IS_DEPRECATED(ia6)) {
  616                         int oldflags = ia6->ia6_flags;
  617 
  618                         ia6->ia6_flags |= IN6_IFF_DEPRECATED;
  619 
  620                         /*
  621                          * If a temporary address has just become deprecated,
  622                          * regenerate a new one if possible.
  623                          */
  624                         if (V_ip6_use_tempaddr &&
  625                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
  626                             (oldflags & IN6_IFF_DEPRECATED) == 0) {
  627 
  628                                 if (regen_tmpaddr(ia6) == 0) {
  629                                         /*
  630                                          * A new temporary address is
  631                                          * generated.
  632                                          * XXX: this means the address chain
  633                                          * has changed while we are still in
  634                                          * the loop.  Although the change
  635                                          * would not cause disaster (because
  636                                          * it's not a deletion, but an
  637                                          * addition,) we'd rather restart the
  638                                          * loop just for safety.  Or does this
  639                                          * significantly reduce performance??
  640                                          */
  641                                         goto addrloop;
  642                                 }
  643                         }
  644                 } else {
  645                         /*
  646                          * A new RA might have made a deprecated address
  647                          * preferred.
  648                          */
  649                         ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
  650                 }
  651         }
  652 
  653         /* expire prefix list */
  654         pr = V_nd_prefix.lh_first;
  655         while (pr) {
  656                 /*
  657                  * check prefix lifetime.
  658                  * since pltime is just for autoconf, pltime processing for
  659                  * prefix is not necessary.
  660                  */
  661                 if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
  662                     time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
  663                         struct nd_prefix *t;
  664                         t = pr->ndpr_next;
  665 
  666                         /*
  667                          * address expiration and prefix expiration are
  668                          * separate.  NEVER perform in6_purgeaddr here.
  669                          */
  670 
  671                         prelist_remove(pr);
  672                         pr = t;
  673                 } else
  674                         pr = pr->ndpr_next;
  675         }
  676         splx(s);
  677         CURVNET_RESTORE();
  678 }
  679 
  680 /*
  681  * ia6 - deprecated/invalidated temporary address
  682  */
  683 static int
  684 regen_tmpaddr(struct in6_ifaddr *ia6)
  685 {
  686         struct ifaddr *ifa;
  687         struct ifnet *ifp;
  688         struct in6_ifaddr *public_ifa6 = NULL;
  689 
  690         ifp = ia6->ia_ifa.ifa_ifp;
  691         IF_ADDR_LOCK(ifp);
  692         TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  693                 struct in6_ifaddr *it6;
  694 
  695                 if (ifa->ifa_addr->sa_family != AF_INET6)
  696                         continue;
  697 
  698                 it6 = (struct in6_ifaddr *)ifa;
  699 
  700                 /* ignore no autoconf addresses. */
  701                 if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
  702                         continue;
  703 
  704                 /* ignore autoconf addresses with different prefixes. */
  705                 if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
  706                         continue;
  707 
  708                 /*
  709                  * Now we are looking at an autoconf address with the same
  710                  * prefix as ours.  If the address is temporary and is still
  711                  * preferred, do not create another one.  It would be rare, but
  712                  * could happen, for example, when we resume a laptop PC after
  713                  * a long period.
  714                  */
  715                 if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
  716                     !IFA6_IS_DEPRECATED(it6)) {
  717                         public_ifa6 = NULL;
  718                         break;
  719                 }
  720 
  721                 /*
  722                  * This is a public autoconf address that has the same prefix
  723                  * as ours.  If it is preferred, keep it.  We can't break the
  724                  * loop here, because there may be a still-preferred temporary
  725                  * address with the prefix.
  726                  */
  727                 if (!IFA6_IS_DEPRECATED(it6))
  728                     public_ifa6 = it6;
  729 
  730                 if (public_ifa6 != NULL)
  731                         ifa_ref(&public_ifa6->ia_ifa);
  732         }
  733         IF_ADDR_UNLOCK(ifp);
  734 
  735         if (public_ifa6 != NULL) {
  736                 int e;
  737 
  738                 if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
  739                         ifa_free(&public_ifa6->ia_ifa);
  740                         log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
  741                             " tmp addr,errno=%d\n", e);
  742                         return (-1);
  743                 }
  744                 ifa_free(&public_ifa6->ia_ifa);
  745                 return (0);
  746         }
  747 
  748         return (-1);
  749 }
  750 
  751 /*
  752  * Nuke neighbor cache/prefix/default router management table, right before
  753  * ifp goes away.
  754  */
  755 void
  756 nd6_purge(struct ifnet *ifp)
  757 {
  758         struct nd_defrouter *dr, *ndr;
  759         struct nd_prefix *pr, *npr;
  760 
  761         /*
  762          * Nuke default router list entries toward ifp.
  763          * We defer removal of default router list entries that is installed
  764          * in the routing table, in order to keep additional side effects as
  765          * small as possible.
  766          */
  767         for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
  768                 ndr = TAILQ_NEXT(dr, dr_entry);
  769                 if (dr->installed)
  770                         continue;
  771 
  772                 if (dr->ifp == ifp)
  773                         defrtrlist_del(dr);
  774         }
  775 
  776         for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
  777                 ndr = TAILQ_NEXT(dr, dr_entry);
  778                 if (!dr->installed)
  779                         continue;
  780 
  781                 if (dr->ifp == ifp)
  782                         defrtrlist_del(dr);
  783         }
  784 
  785         /* Nuke prefix list entries toward ifp */
  786         for (pr = V_nd_prefix.lh_first; pr; pr = npr) {
  787                 npr = pr->ndpr_next;
  788                 if (pr->ndpr_ifp == ifp) {
  789                         /*
  790                          * Because if_detach() does *not* release prefixes
  791                          * while purging addresses the reference count will
  792                          * still be above zero. We therefore reset it to
  793                          * make sure that the prefix really gets purged.
  794                          */
  795                         pr->ndpr_refcnt = 0;
  796 
  797                         /*
  798                          * Previously, pr->ndpr_addr is removed as well,
  799                          * but I strongly believe we don't have to do it.
  800                          * nd6_purge() is only called from in6_ifdetach(),
  801                          * which removes all the associated interface addresses
  802                          * by itself.
  803                          * (jinmei@kame.net 20010129)
  804                          */
  805                         prelist_remove(pr);
  806                 }
  807         }
  808 
  809         /* cancel default outgoing interface setting */
  810         if (V_nd6_defifindex == ifp->if_index)
  811                 nd6_setdefaultiface(0);
  812 
  813         if (!V_ip6_forwarding && V_ip6_accept_rtadv) { /* XXX: too restrictive? */
  814                 /* refresh default router list
  815                  *
  816                  * 
  817                  */
  818                 defrouter_select();
  819 
  820         }
  821 
  822         /* XXXXX
  823          * We do not nuke the neighbor cache entries here any more
  824          * because the neighbor cache is kept in if_afdata[AF_INET6].
  825          * nd6_purge() is invoked by in6_ifdetach() which is called
  826          * from if_detach() where everything gets purged. So let
  827          * in6_domifdetach() do the actual L2 table purging work.
  828          */
  829 }
  830 
  831 /* 
  832  * the caller acquires and releases the lock on the lltbls
  833  * Returns the llentry locked
  834  */
  835 struct llentry *
  836 nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
  837 {
  838         struct sockaddr_in6 sin6;
  839         struct llentry *ln;
  840         int llflags = 0;
  841         
  842         bzero(&sin6, sizeof(sin6));
  843         sin6.sin6_len = sizeof(struct sockaddr_in6);
  844         sin6.sin6_family = AF_INET6;
  845         sin6.sin6_addr = *addr6;
  846 
  847         IF_AFDATA_LOCK_ASSERT(ifp);
  848 
  849         if (flags & ND6_CREATE)
  850             llflags |= LLE_CREATE;
  851         if (flags & ND6_EXCLUSIVE)
  852             llflags |= LLE_EXCLUSIVE;   
  853         
  854         ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6);
  855         if ((ln != NULL) && (flags & LLE_CREATE)) {
  856                 ln->ln_state = ND6_LLINFO_NOSTATE;
  857                 callout_init(&ln->ln_timer_ch, 0);
  858         }
  859         
  860         return (ln);
  861 }
  862 
  863 /*
  864  * Test whether a given IPv6 address is a neighbor or not, ignoring
  865  * the actual neighbor cache.  The neighbor cache is ignored in order
  866  * to not reenter the routing code from within itself.
  867  */
  868 static int
  869 nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
  870 {
  871         struct nd_prefix *pr;
  872         struct ifaddr *dstaddr;
  873 
  874         /*
  875          * A link-local address is always a neighbor.
  876          * XXX: a link does not necessarily specify a single interface.
  877          */
  878         if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
  879                 struct sockaddr_in6 sin6_copy;
  880                 u_int32_t zone;
  881 
  882                 /*
  883                  * We need sin6_copy since sa6_recoverscope() may modify the
  884                  * content (XXX).
  885                  */
  886                 sin6_copy = *addr;
  887                 if (sa6_recoverscope(&sin6_copy))
  888                         return (0); /* XXX: should be impossible */
  889                 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
  890                         return (0);
  891                 if (sin6_copy.sin6_scope_id == zone)
  892                         return (1);
  893                 else
  894                         return (0);
  895         }
  896 
  897         /*
  898          * If the address matches one of our addresses,
  899          * it should be a neighbor.
  900          * If the address matches one of our on-link prefixes, it should be a
  901          * neighbor.
  902          */
  903         for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
  904                 if (pr->ndpr_ifp != ifp)
  905                         continue;
  906 
  907                 if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
  908                         struct rtentry *rt;
  909                         rt = rtalloc1((struct sockaddr *)&pr->ndpr_prefix, 0, 0);
  910                         if (rt == NULL)
  911                                 continue;
  912                         /*
  913                          * This is the case where multiple interfaces
  914                          * have the same prefix, but only one is installed 
  915                          * into the routing table and that prefix entry
  916                          * is not the one being examined here. In the case
  917                          * where RADIX_MPATH is enabled, multiple route
  918                          * entries (of the same rt_key value) will be 
  919                          * installed because the interface addresses all
  920                          * differ.
  921                          */
  922                         if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
  923                                &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) {
  924                                 RTFREE_LOCKED(rt);
  925                                 continue;
  926                         }
  927                         RTFREE_LOCKED(rt);
  928                 }
  929 
  930                 if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
  931                     &addr->sin6_addr, &pr->ndpr_mask))
  932                         return (1);
  933         }
  934 
  935         /*
  936          * If the address is assigned on the node of the other side of
  937          * a p2p interface, the address should be a neighbor.
  938          */
  939         dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr);
  940         if (dstaddr != NULL) {
  941                 if (dstaddr->ifa_ifp == ifp) {
  942                         ifa_free(dstaddr);
  943                         return (1);
  944                 }
  945                 ifa_free(dstaddr);
  946         }
  947 
  948         /*
  949          * If the default router list is empty, all addresses are regarded
  950          * as on-link, and thus, as a neighbor.
  951          * XXX: we restrict the condition to hosts, because routers usually do
  952          * not have the "default router list".
  953          */
  954         if (!V_ip6_forwarding && TAILQ_FIRST(&V_nd_defrouter) == NULL &&
  955             V_nd6_defifindex == ifp->if_index) {
  956                 return (1);
  957         }
  958 
  959         return (0);
  960 }
  961 
  962 
  963 /*
  964  * Detect if a given IPv6 address identifies a neighbor on a given link.
  965  * XXX: should take care of the destination of a p2p link?
  966  */
  967 int
  968 nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
  969 {
  970         struct llentry *lle;
  971         int rc = 0;
  972 
  973         IF_AFDATA_UNLOCK_ASSERT(ifp);
  974         if (nd6_is_new_addr_neighbor(addr, ifp))
  975                 return (1);
  976 
  977         /*
  978          * Even if the address matches none of our addresses, it might be
  979          * in the neighbor cache.
  980          */
  981         IF_AFDATA_LOCK(ifp);
  982         if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
  983                 LLE_RUNLOCK(lle);
  984                 rc = 1;
  985         }
  986         IF_AFDATA_UNLOCK(ifp);
  987         return (rc);
  988 }
  989 
  990 /*
  991  * Free an nd6 llinfo entry.
  992  * Since the function would cause significant changes in the kernel, DO NOT
  993  * make it global, unless you have a strong reason for the change, and are sure
  994  * that the change is safe.
  995  */
  996 static struct llentry *
  997 nd6_free(struct llentry *ln, int gc)
  998 {
  999         struct llentry *next;
 1000         struct nd_defrouter *dr;
 1001         struct ifnet *ifp=NULL;
 1002 
 1003         /*
 1004          * we used to have pfctlinput(PRC_HOSTDEAD) here.
 1005          * even though it is not harmful, it was not really necessary.
 1006          */
 1007 
 1008         /* cancel timer */
 1009         nd6_llinfo_settimer(ln, -1);
 1010 
 1011         if (!V_ip6_forwarding) {
 1012                 int s;
 1013                 s = splnet();
 1014                 dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
 1015 
 1016                 if (dr != NULL && dr->expire &&
 1017                     ln->ln_state == ND6_LLINFO_STALE && gc) {
 1018                         /*
 1019                          * If the reason for the deletion is just garbage
 1020                          * collection, and the neighbor is an active default
 1021                          * router, do not delete it.  Instead, reset the GC
 1022                          * timer using the router's lifetime.
 1023                          * Simply deleting the entry would affect default
 1024                          * router selection, which is not necessarily a good
 1025                          * thing, especially when we're using router preference
 1026                          * values.
 1027                          * XXX: the check for ln_state would be redundant,
 1028                          *      but we intentionally keep it just in case.
 1029                          */
 1030                         if (dr->expire > time_second)
 1031                                 nd6_llinfo_settimer(ln,
 1032                                     (dr->expire - time_second) * hz);
 1033                         else
 1034                                 nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
 1035                         splx(s);
 1036                         LLE_WLOCK(ln);
 1037                         LLE_REMREF(ln);
 1038                         LLE_WUNLOCK(ln);
 1039                         return (LIST_NEXT(ln, lle_next));
 1040                 }
 1041 
 1042                 if (ln->ln_router || dr) {
 1043                         /*
 1044                          * rt6_flush must be called whether or not the neighbor
 1045                          * is in the Default Router List.
 1046                          * See a corresponding comment in nd6_na_input().
 1047                          */
 1048                         rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
 1049                 }
 1050 
 1051                 if (dr) {
 1052                         /*
 1053                          * Unreachablity of a router might affect the default
 1054                          * router selection and on-link detection of advertised
 1055                          * prefixes.
 1056                          */
 1057 
 1058                         /*
 1059                          * Temporarily fake the state to choose a new default
 1060                          * router and to perform on-link determination of
 1061                          * prefixes correctly.
 1062                          * Below the state will be set correctly,
 1063                          * or the entry itself will be deleted.
 1064                          */
 1065                         ln->ln_state = ND6_LLINFO_INCOMPLETE;
 1066 
 1067                         /*
 1068                          * Since defrouter_select() does not affect the
 1069                          * on-link determination and MIP6 needs the check
 1070                          * before the default router selection, we perform
 1071                          * the check now.
 1072                          */
 1073                         pfxlist_onlink_check();
 1074 
 1075                         /*
 1076                          * refresh default router list
 1077                          */
 1078                         defrouter_select();
 1079                 }
 1080                 splx(s);
 1081         }
 1082 
 1083         /*
 1084          * Before deleting the entry, remember the next entry as the
 1085          * return value.  We need this because pfxlist_onlink_check() above
 1086          * might have freed other entries (particularly the old next entry) as
 1087          * a side effect (XXX).
 1088          */
 1089         next = LIST_NEXT(ln, lle_next);
 1090 
 1091         ifp = ln->lle_tbl->llt_ifp;
 1092         IF_AFDATA_LOCK(ifp);
 1093         LLE_WLOCK(ln);
 1094         LLE_REMREF(ln);
 1095         llentry_free(ln);
 1096         IF_AFDATA_UNLOCK(ifp);
 1097 
 1098         return (next);
 1099 }
 1100 
 1101 /*
 1102  * Upper-layer reachability hint for Neighbor Unreachability Detection.
 1103  *
 1104  * XXX cost-effective methods?
 1105  */
 1106 void
 1107 nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force)
 1108 {
 1109         struct llentry *ln;
 1110         struct ifnet *ifp;
 1111 
 1112         if ((dst6 == NULL) || (rt == NULL))
 1113                 return;
 1114 
 1115         ifp = rt->rt_ifp;
 1116         IF_AFDATA_LOCK(ifp);
 1117         ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL);
 1118         IF_AFDATA_UNLOCK(ifp);
 1119         if (ln == NULL)
 1120                 return;
 1121 
 1122         if (ln->ln_state < ND6_LLINFO_REACHABLE)
 1123                 goto done;
 1124 
 1125         /*
 1126          * if we get upper-layer reachability confirmation many times,
 1127          * it is possible we have false information.
 1128          */
 1129         if (!force) {
 1130                 ln->ln_byhint++;
 1131                 if (ln->ln_byhint > V_nd6_maxnudhint) {
 1132                         goto done;
 1133                 }
 1134         }
 1135 
 1136         ln->ln_state = ND6_LLINFO_REACHABLE;
 1137         if (!ND6_LLINFO_PERMANENT(ln)) {
 1138                 nd6_llinfo_settimer_locked(ln,
 1139                     (long)ND_IFINFO(rt->rt_ifp)->reachable * hz);
 1140         }
 1141 done:
 1142         LLE_WUNLOCK(ln);
 1143 }
 1144 
 1145 
 1146 int
 1147 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 1148 {
 1149         struct in6_drlist *drl = (struct in6_drlist *)data;
 1150         struct in6_oprlist *oprl = (struct in6_oprlist *)data;
 1151         struct in6_ndireq *ndi = (struct in6_ndireq *)data;
 1152         struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
 1153         struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
 1154         struct nd_defrouter *dr;
 1155         struct nd_prefix *pr;
 1156         int i = 0, error = 0;
 1157         int s;
 1158 
 1159         switch (cmd) {
 1160         case SIOCGDRLST_IN6:
 1161                 /*
 1162                  * obsolete API, use sysctl under net.inet6.icmp6
 1163                  */
 1164                 bzero(drl, sizeof(*drl));
 1165                 s = splnet();
 1166                 dr = TAILQ_FIRST(&V_nd_defrouter);
 1167                 while (dr && i < DRLSTSIZ) {
 1168                         drl->defrouter[i].rtaddr = dr->rtaddr;
 1169                         in6_clearscope(&drl->defrouter[i].rtaddr);
 1170 
 1171                         drl->defrouter[i].flags = dr->flags;
 1172                         drl->defrouter[i].rtlifetime = dr->rtlifetime;
 1173                         drl->defrouter[i].expire = dr->expire;
 1174                         drl->defrouter[i].if_index = dr->ifp->if_index;
 1175                         i++;
 1176                         dr = TAILQ_NEXT(dr, dr_entry);
 1177                 }
 1178                 splx(s);
 1179                 break;
 1180         case SIOCGPRLST_IN6:
 1181                 /*
 1182                  * obsolete API, use sysctl under net.inet6.icmp6
 1183                  *
 1184                  * XXX the structure in6_prlist was changed in backward-
 1185                  * incompatible manner.  in6_oprlist is used for SIOCGPRLST_IN6,
 1186                  * in6_prlist is used for nd6_sysctl() - fill_prlist().
 1187                  */
 1188                 /*
 1189                  * XXX meaning of fields, especialy "raflags", is very
 1190                  * differnet between RA prefix list and RR/static prefix list.
 1191                  * how about separating ioctls into two?
 1192                  */
 1193                 bzero(oprl, sizeof(*oprl));
 1194                 s = splnet();
 1195                 pr = V_nd_prefix.lh_first;
 1196                 while (pr && i < PRLSTSIZ) {
 1197                         struct nd_pfxrouter *pfr;
 1198                         int j;
 1199 
 1200                         oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
 1201                         oprl->prefix[i].raflags = pr->ndpr_raf;
 1202                         oprl->prefix[i].prefixlen = pr->ndpr_plen;
 1203                         oprl->prefix[i].vltime = pr->ndpr_vltime;
 1204                         oprl->prefix[i].pltime = pr->ndpr_pltime;
 1205                         oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
 1206                         if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 1207                                 oprl->prefix[i].expire = 0;
 1208                         else {
 1209                                 time_t maxexpire;
 1210 
 1211                                 /* XXX: we assume time_t is signed. */
 1212                                 maxexpire = (-1) &
 1213                                     ~((time_t)1 <<
 1214                                     ((sizeof(maxexpire) * 8) - 1));
 1215                                 if (pr->ndpr_vltime <
 1216                                     maxexpire - pr->ndpr_lastupdate) {
 1217                                         oprl->prefix[i].expire =
 1218                                             pr->ndpr_lastupdate +
 1219                                             pr->ndpr_vltime;
 1220                                 } else
 1221                                         oprl->prefix[i].expire = maxexpire;
 1222                         }
 1223 
 1224                         pfr = pr->ndpr_advrtrs.lh_first;
 1225                         j = 0;
 1226                         while (pfr) {
 1227                                 if (j < DRLSTSIZ) {
 1228 #define RTRADDR oprl->prefix[i].advrtr[j]
 1229                                         RTRADDR = pfr->router->rtaddr;
 1230                                         in6_clearscope(&RTRADDR);
 1231 #undef RTRADDR
 1232                                 }
 1233                                 j++;
 1234                                 pfr = pfr->pfr_next;
 1235                         }
 1236                         oprl->prefix[i].advrtrs = j;
 1237                         oprl->prefix[i].origin = PR_ORIG_RA;
 1238 
 1239                         i++;
 1240                         pr = pr->ndpr_next;
 1241                 }
 1242                 splx(s);
 1243 
 1244                 break;
 1245         case OSIOCGIFINFO_IN6:
 1246 #define ND      ndi->ndi
 1247                 /* XXX: old ndp(8) assumes a positive value for linkmtu. */
 1248                 bzero(&ND, sizeof(ND));
 1249                 ND.linkmtu = IN6_LINKMTU(ifp);
 1250                 ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
 1251                 ND.basereachable = ND_IFINFO(ifp)->basereachable;
 1252                 ND.reachable = ND_IFINFO(ifp)->reachable;
 1253                 ND.retrans = ND_IFINFO(ifp)->retrans;
 1254                 ND.flags = ND_IFINFO(ifp)->flags;
 1255                 ND.recalctm = ND_IFINFO(ifp)->recalctm;
 1256                 ND.chlim = ND_IFINFO(ifp)->chlim;
 1257                 break;
 1258         case SIOCGIFINFO_IN6:
 1259                 ND = *ND_IFINFO(ifp);
 1260                 break;
 1261         case SIOCSIFINFO_IN6:
 1262                 /*
 1263                  * used to change host variables from userland.
 1264                  * intented for a use on router to reflect RA configurations.
 1265                  */
 1266                 /* 0 means 'unspecified' */
 1267                 if (ND.linkmtu != 0) {
 1268                         if (ND.linkmtu < IPV6_MMTU ||
 1269                             ND.linkmtu > IN6_LINKMTU(ifp)) {
 1270                                 error = EINVAL;
 1271                                 break;
 1272                         }
 1273                         ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
 1274                 }
 1275 
 1276                 if (ND.basereachable != 0) {
 1277                         int obasereachable = ND_IFINFO(ifp)->basereachable;
 1278 
 1279                         ND_IFINFO(ifp)->basereachable = ND.basereachable;
 1280                         if (ND.basereachable != obasereachable)
 1281                                 ND_IFINFO(ifp)->reachable =
 1282                                     ND_COMPUTE_RTIME(ND.basereachable);
 1283                 }
 1284                 if (ND.retrans != 0)
 1285                         ND_IFINFO(ifp)->retrans = ND.retrans;
 1286                 if (ND.chlim != 0)
 1287                         ND_IFINFO(ifp)->chlim = ND.chlim;
 1288                 /* FALLTHROUGH */
 1289         case SIOCSIFINFO_FLAGS:
 1290                 ND_IFINFO(ifp)->flags = ND.flags;
 1291                 break;
 1292 #undef ND
 1293         case SIOCSNDFLUSH_IN6:  /* XXX: the ioctl name is confusing... */
 1294                 /* sync kernel routing table with the default router list */
 1295                 defrouter_reset();
 1296                 defrouter_select();
 1297                 break;
 1298         case SIOCSPFXFLUSH_IN6:
 1299         {
 1300                 /* flush all the prefix advertised by routers */
 1301                 struct nd_prefix *pr, *next;
 1302 
 1303                 s = splnet();
 1304                 for (pr = V_nd_prefix.lh_first; pr; pr = next) {
 1305                         struct in6_ifaddr *ia, *ia_next;
 1306 
 1307                         next = pr->ndpr_next;
 1308 
 1309                         if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 1310                                 continue; /* XXX */
 1311 
 1312                         /* do we really have to remove addresses as well? */
 1313                         /* XXXRW: in6_ifaddrhead locking. */
 1314                         TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
 1315                             ia_next) {
 1316                                 if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 1317                                         continue;
 1318 
 1319                                 if (ia->ia6_ndpr == pr)
 1320                                         in6_purgeaddr(&ia->ia_ifa);
 1321                         }
 1322                         prelist_remove(pr);
 1323                 }
 1324                 splx(s);
 1325                 break;
 1326         }
 1327         case SIOCSRTRFLUSH_IN6:
 1328         {
 1329                 /* flush all the default routers */
 1330                 struct nd_defrouter *dr, *next;
 1331 
 1332                 s = splnet();
 1333                 defrouter_reset();
 1334                 for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = next) {
 1335                         next = TAILQ_NEXT(dr, dr_entry);
 1336                         defrtrlist_del(dr);
 1337                 }
 1338                 defrouter_select();
 1339                 splx(s);
 1340                 break;
 1341         }
 1342         case SIOCGNBRINFO_IN6:
 1343         {
 1344                 struct llentry *ln;
 1345                 struct in6_addr nb_addr = nbi->addr; /* make local for safety */
 1346 
 1347                 if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
 1348                         return (error);
 1349 
 1350                 IF_AFDATA_LOCK(ifp);
 1351                 ln = nd6_lookup(&nb_addr, 0, ifp);
 1352                 IF_AFDATA_UNLOCK(ifp);
 1353 
 1354                 if (ln == NULL) {
 1355                         error = EINVAL;
 1356                         break;
 1357                 }
 1358                 nbi->state = ln->ln_state;
 1359                 nbi->asked = ln->la_asked;
 1360                 nbi->isrouter = ln->ln_router;
 1361                 nbi->expire = ln->la_expire;
 1362                 LLE_RUNLOCK(ln);
 1363                 break;
 1364         }
 1365         case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */
 1366                 ndif->ifindex = V_nd6_defifindex;
 1367                 break;
 1368         case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */
 1369                 return (nd6_setdefaultiface(ndif->ifindex));
 1370         }
 1371         return (error);
 1372 }
 1373 
 1374 /*
 1375  * Create neighbor cache entry and cache link-layer address,
 1376  * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
 1377  *
 1378  * type - ICMP6 type
 1379  * code - type dependent information
 1380  *
 1381  * XXXXX
 1382  *  The caller of this function already acquired the ndp 
 1383  *  cache table lock because the cache entry is returned.
 1384  */
 1385 struct llentry *
 1386 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
 1387     int lladdrlen, int type, int code)
 1388 {
 1389         struct llentry *ln = NULL;
 1390         int is_newentry;
 1391         int do_update;
 1392         int olladdr;
 1393         int llchange;
 1394         int flags = 0;
 1395         int newstate = 0;
 1396         uint16_t router = 0;
 1397         struct sockaddr_in6 sin6;
 1398         struct mbuf *chain = NULL;
 1399         int static_route = 0;
 1400 
 1401         IF_AFDATA_UNLOCK_ASSERT(ifp);
 1402 
 1403         if (ifp == NULL)
 1404                 panic("ifp == NULL in nd6_cache_lladdr");
 1405         if (from == NULL)
 1406                 panic("from == NULL in nd6_cache_lladdr");
 1407 
 1408         /* nothing must be updated for unspecified address */
 1409         if (IN6_IS_ADDR_UNSPECIFIED(from))
 1410                 return NULL;
 1411 
 1412         /*
 1413          * Validation about ifp->if_addrlen and lladdrlen must be done in
 1414          * the caller.
 1415          *
 1416          * XXX If the link does not have link-layer adderss, what should
 1417          * we do? (ifp->if_addrlen == 0)
 1418          * Spec says nothing in sections for RA, RS and NA.  There's small
 1419          * description on it in NS section (RFC 2461 7.2.3).
 1420          */
 1421         flags |= lladdr ? ND6_EXCLUSIVE : 0;
 1422         IF_AFDATA_LOCK(ifp);
 1423         ln = nd6_lookup(from, flags, ifp);
 1424 
 1425         if (ln == NULL) {
 1426                 flags |= LLE_EXCLUSIVE;
 1427                 ln = nd6_lookup(from, flags |ND6_CREATE, ifp);
 1428                 IF_AFDATA_UNLOCK(ifp);
 1429                 is_newentry = 1;
 1430         } else {
 1431                 IF_AFDATA_UNLOCK(ifp);          
 1432                 /* do nothing if static ndp is set */
 1433                 if (ln->la_flags & LLE_STATIC) {
 1434                         static_route = 1;
 1435                         goto done;
 1436                 }
 1437                 is_newentry = 0;
 1438         }
 1439         if (ln == NULL)
 1440                 return (NULL);
 1441 
 1442         olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
 1443         if (olladdr && lladdr) {
 1444                 llchange = bcmp(lladdr, &ln->ll_addr,
 1445                     ifp->if_addrlen);
 1446         } else
 1447                 llchange = 0;
 1448 
 1449         /*
 1450          * newentry olladdr  lladdr  llchange   (*=record)
 1451          *      0       n       n       --      (1)
 1452          *      0       y       n       --      (2)
 1453          *      0       n       y       --      (3) * STALE
 1454          *      0       y       y       n       (4) *
 1455          *      0       y       y       y       (5) * STALE
 1456          *      1       --      n       --      (6)   NOSTATE(= PASSIVE)
 1457          *      1       --      y       --      (7) * STALE
 1458          */
 1459 
 1460         if (lladdr) {           /* (3-5) and (7) */
 1461                 /*
 1462                  * Record source link-layer address
 1463                  * XXX is it dependent to ifp->if_type?
 1464                  */
 1465                 bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
 1466                 ln->la_flags |= LLE_VALID;
 1467         }
 1468 
 1469         if (!is_newentry) {
 1470                 if ((!olladdr && lladdr != NULL) ||     /* (3) */
 1471                     (olladdr && lladdr != NULL && llchange)) {  /* (5) */
 1472                         do_update = 1;
 1473                         newstate = ND6_LLINFO_STALE;
 1474                 } else                                  /* (1-2,4) */
 1475                         do_update = 0;
 1476         } else {
 1477                 do_update = 1;
 1478                 if (lladdr == NULL)                     /* (6) */
 1479                         newstate = ND6_LLINFO_NOSTATE;
 1480                 else                                    /* (7) */
 1481                         newstate = ND6_LLINFO_STALE;
 1482         }
 1483 
 1484         if (do_update) {
 1485                 /*
 1486                  * Update the state of the neighbor cache.
 1487                  */
 1488                 ln->ln_state = newstate;
 1489 
 1490                 if (ln->ln_state == ND6_LLINFO_STALE) {
 1491                         /*
 1492                          * XXX: since nd6_output() below will cause
 1493                          * state tansition to DELAY and reset the timer,
 1494                          * we must set the timer now, although it is actually
 1495                          * meaningless.
 1496                          */
 1497                         nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
 1498 
 1499                         if (ln->la_hold) {
 1500                                 struct mbuf *m_hold, *m_hold_next;
 1501 
 1502                                 /*
 1503                                  * reset the la_hold in advance, to explicitly
 1504                                  * prevent a la_hold lookup in nd6_output()
 1505                                  * (wouldn't happen, though...)
 1506                                  */
 1507                                 for (m_hold = ln->la_hold, ln->la_hold = NULL;
 1508                                     m_hold; m_hold = m_hold_next) {
 1509                                         m_hold_next = m_hold->m_nextpkt;
 1510                                         m_hold->m_nextpkt = NULL;
 1511 
 1512                                         /*
 1513                                          * we assume ifp is not a p2p here, so
 1514                                          * just set the 2nd argument as the
 1515                                          * 1st one.
 1516                                          */
 1517                                         nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
 1518                                 }
 1519                                 /*
 1520                                  * If we have mbufs in the chain we need to do
 1521                                  * deferred transmit. Copy the address from the
 1522                                  * llentry before dropping the lock down below.
 1523                                  */
 1524                                 if (chain != NULL)
 1525                                         memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
 1526                         }
 1527                 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
 1528                         /* probe right away */
 1529                         nd6_llinfo_settimer_locked((void *)ln, 0);
 1530                 }
 1531         }
 1532 
 1533         /*
 1534          * ICMP6 type dependent behavior.
 1535          *
 1536          * NS: clear IsRouter if new entry
 1537          * RS: clear IsRouter
 1538          * RA: set IsRouter if there's lladdr
 1539          * redir: clear IsRouter if new entry
 1540          *
 1541          * RA case, (1):
 1542          * The spec says that we must set IsRouter in the following cases:
 1543          * - If lladdr exist, set IsRouter.  This means (1-5).
 1544          * - If it is old entry (!newentry), set IsRouter.  This means (7).
 1545          * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
 1546          * A quetion arises for (1) case.  (1) case has no lladdr in the
 1547          * neighbor cache, this is similar to (6).
 1548          * This case is rare but we figured that we MUST NOT set IsRouter.
 1549          *
 1550          * newentry olladdr  lladdr  llchange       NS  RS  RA  redir
 1551          *                                                      D R
 1552          *      0       n       n       --      (1)     c   ?     s
 1553          *      0       y       n       --      (2)     c   s     s
 1554          *      0       n       y       --      (3)     c   s     s
 1555          *      0       y       y       n       (4)     c   s     s
 1556          *      0       y       y       y       (5)     c   s     s
 1557          *      1       --      n       --      (6) c   c       c s
 1558          *      1       --      y       --      (7) c   c   s   c s
 1559          *
 1560          *                                      (c=clear s=set)
 1561          */
 1562         switch (type & 0xff) {
 1563         case ND_NEIGHBOR_SOLICIT:
 1564                 /*
 1565                  * New entry must have is_router flag cleared.
 1566                  */
 1567                 if (is_newentry)        /* (6-7) */
 1568                         ln->ln_router = 0;
 1569                 break;
 1570         case ND_REDIRECT:
 1571                 /*
 1572                  * If the icmp is a redirect to a better router, always set the
 1573                  * is_router flag.  Otherwise, if the entry is newly created,
 1574                  * clear the flag.  [RFC 2461, sec 8.3]
 1575                  */
 1576                 if (code == ND_REDIRECT_ROUTER)
 1577                         ln->ln_router = 1;
 1578                 else if (is_newentry) /* (6-7) */
 1579                         ln->ln_router = 0;
 1580                 break;
 1581         case ND_ROUTER_SOLICIT:
 1582                 /*
 1583                  * is_router flag must always be cleared.
 1584                  */
 1585                 ln->ln_router = 0;
 1586                 break;
 1587         case ND_ROUTER_ADVERT:
 1588                 /*
 1589                  * Mark an entry with lladdr as a router.
 1590                  */
 1591                 if ((!is_newentry && (olladdr || lladdr)) ||    /* (2-5) */
 1592                     (is_newentry && lladdr)) {                  /* (7) */
 1593                         ln->ln_router = 1;
 1594                 }
 1595                 break;
 1596         }
 1597 
 1598         if (ln != NULL) {
 1599                 static_route = (ln->la_flags & LLE_STATIC);
 1600                 router = ln->ln_router;
 1601 
 1602                 if (flags & ND6_EXCLUSIVE)
 1603                         LLE_WUNLOCK(ln);
 1604                 else
 1605                         LLE_RUNLOCK(ln);
 1606                 if (static_route)
 1607                         ln = NULL;
 1608         }
 1609         if (chain)
 1610                 nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
 1611         
 1612         /*
 1613          * When the link-layer address of a router changes, select the
 1614          * best router again.  In particular, when the neighbor entry is newly
 1615          * created, it might affect the selection policy.
 1616          * Question: can we restrict the first condition to the "is_newentry"
 1617          * case?
 1618          * XXX: when we hear an RA from a new router with the link-layer
 1619          * address option, defrouter_select() is called twice, since
 1620          * defrtrlist_update called the function as well.  However, I believe
 1621          * we can compromise the overhead, since it only happens the first
 1622          * time.
 1623          * XXX: although defrouter_select() should not have a bad effect
 1624          * for those are not autoconfigured hosts, we explicitly avoid such
 1625          * cases for safety.
 1626          */
 1627         if (do_update && router && !V_ip6_forwarding && V_ip6_accept_rtadv) {
 1628                 /*
 1629                  * guaranteed recursion
 1630                  */
 1631                 defrouter_select();
 1632         }
 1633         
 1634         return (ln);
 1635 done:   
 1636         if (ln != NULL) {
 1637                 if (flags & ND6_EXCLUSIVE)
 1638                         LLE_WUNLOCK(ln);
 1639                 else
 1640                         LLE_RUNLOCK(ln);
 1641                 if (static_route)
 1642                         ln = NULL;
 1643         }
 1644         return (ln);
 1645 }
 1646 
 1647 static void
 1648 nd6_slowtimo(void *arg)
 1649 {
 1650         CURVNET_SET((struct vnet *) arg);
 1651         struct nd_ifinfo *nd6if;
 1652         struct ifnet *ifp;
 1653 
 1654         callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 1655             nd6_slowtimo, curvnet);
 1656         IFNET_RLOCK_NOSLEEP();
 1657         for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
 1658             ifp = TAILQ_NEXT(ifp, if_list)) {
 1659                 nd6if = ND_IFINFO(ifp);
 1660                 if (nd6if->basereachable && /* already initialized */
 1661                     (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
 1662                         /*
 1663                          * Since reachable time rarely changes by router
 1664                          * advertisements, we SHOULD insure that a new random
 1665                          * value gets recomputed at least once every few hours.
 1666                          * (RFC 2461, 6.3.4)
 1667                          */
 1668                         nd6if->recalctm = V_nd6_recalc_reachtm_interval;
 1669                         nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
 1670                 }
 1671         }
 1672         IFNET_RUNLOCK_NOSLEEP();
 1673         CURVNET_RESTORE();
 1674 }
 1675 
 1676 int
 1677 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
 1678     struct sockaddr_in6 *dst, struct rtentry *rt0)
 1679 {
 1680 
 1681         return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL));
 1682 }
 1683 
 1684 
 1685 /*
 1686  * Note that I'm not enforcing any global serialization
 1687  * lle state or asked changes here as the logic is too
 1688  * complicated to avoid having to always acquire an exclusive
 1689  * lock
 1690  * KMM
 1691  *
 1692  */
 1693 #define senderr(e) { error = (e); goto bad;}
 1694 
 1695 int
 1696 nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
 1697     struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle,
 1698         struct mbuf **chain)
 1699 {
 1700         struct mbuf *m = m0;
 1701         struct llentry *ln = lle;
 1702         int error = 0;
 1703         int flags = 0;
 1704 
 1705 #ifdef INVARIANTS
 1706         if (lle != NULL) {
 1707                 
 1708                 LLE_WLOCK_ASSERT(lle);
 1709 
 1710                 KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed"));
 1711         }
 1712 #endif
 1713         if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
 1714                 goto sendpkt;
 1715 
 1716         if (nd6_need_cache(ifp) == 0)
 1717                 goto sendpkt;
 1718 
 1719         /*
 1720          * next hop determination.  This routine is derived from ether_output.
 1721          */
 1722 
 1723         /*
 1724          * Address resolution or Neighbor Unreachability Detection
 1725          * for the next hop.
 1726          * At this point, the destination of the packet must be a unicast
 1727          * or an anycast address(i.e. not a multicast).
 1728          */
 1729 
 1730         flags = ((m != NULL) || (lle != NULL)) ? LLE_EXCLUSIVE : 0;
 1731         if (ln == NULL) {
 1732         retry:
 1733                 IF_AFDATA_LOCK(ifp);
 1734                 ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst);
 1735                 IF_AFDATA_UNLOCK(ifp);
 1736                 if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
 1737                         /*
 1738                          * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
 1739                          * the condition below is not very efficient.  But we believe
 1740                          * it is tolerable, because this should be a rare case.
 1741                          */
 1742                         flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0);
 1743                         IF_AFDATA_LOCK(ifp);
 1744                         ln = nd6_lookup(&dst->sin6_addr, flags, ifp);
 1745                         IF_AFDATA_UNLOCK(ifp);
 1746                 }
 1747         } 
 1748         if (ln == NULL) {
 1749                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
 1750                     !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
 1751                         char ip6buf[INET6_ADDRSTRLEN];
 1752                         log(LOG_DEBUG,
 1753                             "nd6_output: can't allocate llinfo for %s "
 1754                             "(ln=%p)\n",
 1755                             ip6_sprintf(ip6buf, &dst->sin6_addr), ln);
 1756                         senderr(EIO);   /* XXX: good error? */
 1757                 }
 1758                 goto sendpkt;   /* send anyway */
 1759         }
 1760 
 1761         /* We don't have to do link-layer address resolution on a p2p link. */
 1762         if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
 1763             ln->ln_state < ND6_LLINFO_REACHABLE) {
 1764                 if ((flags & LLE_EXCLUSIVE) == 0) {
 1765                         flags |= LLE_EXCLUSIVE;
 1766                         goto retry;
 1767                 }
 1768                 ln->ln_state = ND6_LLINFO_STALE;
 1769                 nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
 1770         }
 1771 
 1772         /*
 1773          * The first time we send a packet to a neighbor whose entry is
 1774          * STALE, we have to change the state to DELAY and a sets a timer to
 1775          * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
 1776          * neighbor unreachability detection on expiration.
 1777          * (RFC 2461 7.3.3)
 1778          */
 1779         if (ln->ln_state == ND6_LLINFO_STALE) {
 1780                 if ((flags & LLE_EXCLUSIVE) == 0) {
 1781                         flags |= LLE_EXCLUSIVE;
 1782                         LLE_RUNLOCK(ln);
 1783                         goto retry;
 1784                 }
 1785                 ln->la_asked = 0;
 1786                 ln->ln_state = ND6_LLINFO_DELAY;
 1787                 nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz);
 1788         }
 1789 
 1790         /*
 1791          * If the neighbor cache entry has a state other than INCOMPLETE
 1792          * (i.e. its link-layer address is already resolved), just
 1793          * send the packet.
 1794          */
 1795         if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
 1796                 goto sendpkt;
 1797 
 1798         /*
 1799          * There is a neighbor cache entry, but no ethernet address
 1800          * response yet.  Append this latest packet to the end of the
 1801          * packet queue in the mbuf, unless the number of the packet
 1802          * does not exceed nd6_maxqueuelen.  When it exceeds nd6_maxqueuelen,
 1803          * the oldest packet in the queue will be removed.
 1804          */
 1805         if (ln->ln_state == ND6_LLINFO_NOSTATE)
 1806                 ln->ln_state = ND6_LLINFO_INCOMPLETE;
 1807 
 1808         if ((flags & LLE_EXCLUSIVE) == 0) {
 1809                 flags |= LLE_EXCLUSIVE;
 1810                 LLE_RUNLOCK(ln);
 1811                 goto retry;
 1812         }
 1813         if (ln->la_hold) {
 1814                 struct mbuf *m_hold;
 1815                 int i;
 1816                 
 1817                 i = 0;
 1818                 for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) {
 1819                         i++;
 1820                         if (m_hold->m_nextpkt == NULL) {
 1821                                 m_hold->m_nextpkt = m;
 1822                                 break;
 1823                         }
 1824                 }
 1825                 while (i >= V_nd6_maxqueuelen) {
 1826                         m_hold = ln->la_hold;
 1827                         ln->la_hold = ln->la_hold->m_nextpkt;
 1828                         m_freem(m_hold);
 1829                         i--;
 1830                 }
 1831         } else {
 1832                 ln->la_hold = m;
 1833         }
 1834         /*
 1835          * We did the lookup (no lle arg) so we
 1836          * need to do the unlock here
 1837          */
 1838         if (lle == NULL) {
 1839                 if (flags & LLE_EXCLUSIVE)
 1840                         LLE_WUNLOCK(ln);
 1841                 else
 1842                         LLE_RUNLOCK(ln);
 1843         }
 1844         
 1845         /*
 1846          * If there has been no NS for the neighbor after entering the
 1847          * INCOMPLETE state, send the first solicitation.
 1848          */
 1849         if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
 1850                 ln->la_asked++;
 1851                 
 1852                 nd6_llinfo_settimer(ln,
 1853                     (long)ND_IFINFO(ifp)->retrans * hz / 1000);
 1854                 nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
 1855         }
 1856         return (0);
 1857 
 1858   sendpkt:
 1859         /* discard the packet if IPv6 operation is disabled on the interface */
 1860         if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
 1861                 error = ENETDOWN; /* better error? */
 1862                 goto bad;
 1863         }
 1864         /*
 1865          * ln is valid and the caller did not pass in 
 1866          * an llentry
 1867          */
 1868         if ((ln != NULL) && (lle == NULL)) {
 1869                 if (flags & LLE_EXCLUSIVE)
 1870                         LLE_WUNLOCK(ln);
 1871                 else
 1872                         LLE_RUNLOCK(ln);
 1873         }
 1874 
 1875 #ifdef MAC
 1876         mac_netinet6_nd6_send(ifp, m);
 1877 #endif
 1878         /*
 1879          * We were passed in a pointer to an lle with the lock held 
 1880          * this means that we can't call if_output as we will
 1881          * recurse on the lle lock - so what we do is we create
 1882          * a list of mbufs to send and transmit them in the caller
 1883          * after the lock is dropped
 1884          */
 1885         if (lle != NULL) {
 1886                 if (*chain == NULL)
 1887                         *chain = m;
 1888                 else {
 1889                         struct mbuf *m = *chain;
 1890 
 1891                         /*
 1892                          * append mbuf to end of deferred chain
 1893                          */
 1894                         while (m->m_nextpkt != NULL)
 1895                                 m = m->m_nextpkt;
 1896                         m->m_nextpkt = m;
 1897                 }
 1898                 return (error);
 1899         }
 1900         if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 1901                 return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
 1902                     NULL));
 1903         }
 1904         error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL);
 1905         return (error);
 1906 
 1907   bad:
 1908         /*
 1909          * ln is valid and the caller did not pass in 
 1910          * an llentry
 1911          */
 1912         if ((ln != NULL) && (lle == NULL)) {
 1913                 if (flags & LLE_EXCLUSIVE)
 1914                         LLE_WUNLOCK(ln);
 1915                 else
 1916                         LLE_RUNLOCK(ln);
 1917         }
 1918         if (m)
 1919                 m_freem(m);
 1920         return (error);
 1921 }
 1922 #undef senderr
 1923 
 1924 
 1925 int
 1926 nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
 1927     struct sockaddr_in6 *dst, struct route *ro)
 1928 {
 1929         struct mbuf *m, *m_head;
 1930         struct ifnet *outifp;
 1931         int error = 0;
 1932 
 1933         m_head = chain;
 1934         if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 1935                 outifp = origifp;
 1936         else
 1937                 outifp = ifp;
 1938         
 1939         while (m_head) {
 1940                 m = m_head;
 1941                 m_head = m_head->m_nextpkt;
 1942                 error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro);                         
 1943         }
 1944 
 1945         /*
 1946          * XXX
 1947          * note that intermediate errors are blindly ignored - but this is 
 1948          * the same convention as used with nd6_output when called by
 1949          * nd6_cache_lladdr
 1950          */
 1951         return (error);
 1952 }       
 1953 
 1954 
 1955 int
 1956 nd6_need_cache(struct ifnet *ifp)
 1957 {
 1958         /*
 1959          * XXX: we currently do not make neighbor cache on any interface
 1960          * other than ARCnet, Ethernet, FDDI and GIF.
 1961          *
 1962          * RFC2893 says:
 1963          * - unidirectional tunnels needs no ND
 1964          */
 1965         switch (ifp->if_type) {
 1966         case IFT_ARCNET:
 1967         case IFT_ETHER:
 1968         case IFT_FDDI:
 1969         case IFT_IEEE1394:
 1970 #ifdef IFT_L2VLAN
 1971         case IFT_L2VLAN:
 1972 #endif
 1973 #ifdef IFT_IEEE80211
 1974         case IFT_IEEE80211:
 1975 #endif
 1976 #ifdef IFT_CARP
 1977         case IFT_CARP:
 1978 #endif
 1979         case IFT_GIF:           /* XXX need more cases? */
 1980         case IFT_PPP:
 1981         case IFT_TUNNEL:
 1982         case IFT_BRIDGE:
 1983         case IFT_PROPVIRTUAL:
 1984                 return (1);
 1985         default:
 1986                 return (0);
 1987         }
 1988 }
 1989 
 1990 /*
 1991  * the callers of this function need to be re-worked to drop
 1992  * the lle lock, drop here for now
 1993  */
 1994 int
 1995 nd6_storelladdr(struct ifnet *ifp, struct mbuf *m,
 1996     struct sockaddr *dst, u_char *desten, struct llentry **lle)
 1997 {
 1998         struct llentry *ln;
 1999 
 2000         *lle = NULL;
 2001         IF_AFDATA_UNLOCK_ASSERT(ifp);
 2002         if (m->m_flags & M_MCAST) {
 2003                 int i;
 2004 
 2005                 switch (ifp->if_type) {
 2006                 case IFT_ETHER:
 2007                 case IFT_FDDI:
 2008 #ifdef IFT_L2VLAN
 2009                 case IFT_L2VLAN:
 2010 #endif
 2011 #ifdef IFT_IEEE80211
 2012                 case IFT_IEEE80211:
 2013 #endif
 2014                 case IFT_BRIDGE:
 2015                 case IFT_ISO88025:
 2016                         ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
 2017                                                  desten);
 2018                         return (0);
 2019                 case IFT_IEEE1394:
 2020                         /*
 2021                          * netbsd can use if_broadcastaddr, but we don't do so
 2022                          * to reduce # of ifdef.
 2023                          */
 2024                         for (i = 0; i < ifp->if_addrlen; i++)
 2025                                 desten[i] = ~0;
 2026                         return (0);
 2027                 case IFT_ARCNET:
 2028                         *desten = 0;
 2029                         return (0);
 2030                 default:
 2031                         m_freem(m);
 2032                         return (EAFNOSUPPORT);
 2033                 }
 2034         }
 2035 
 2036 
 2037         /*
 2038          * the entry should have been created in nd6_store_lladdr
 2039          */
 2040         IF_AFDATA_LOCK(ifp);
 2041         ln = lla_lookup(LLTABLE6(ifp), 0, dst);
 2042         IF_AFDATA_UNLOCK(ifp);
 2043         if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) {
 2044                 if (ln != NULL)
 2045                         LLE_RUNLOCK(ln);
 2046                 /* this could happen, if we could not allocate memory */
 2047                 m_freem(m);
 2048                 return (1);
 2049         }
 2050 
 2051         bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
 2052         *lle = ln;
 2053         LLE_RUNLOCK(ln);
 2054         /*
 2055          * A *small* use after free race exists here
 2056          */
 2057         return (0);
 2058 }
 2059 
 2060 static void 
 2061 clear_llinfo_pqueue(struct llentry *ln)
 2062 {
 2063         struct mbuf *m_hold, *m_hold_next;
 2064 
 2065         for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
 2066                 m_hold_next = m_hold->m_nextpkt;
 2067                 m_hold->m_nextpkt = NULL;
 2068                 m_freem(m_hold);
 2069         }
 2070 
 2071         ln->la_hold = NULL;
 2072         return;
 2073 }
 2074 
 2075 static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
 2076 static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
 2077 #ifdef SYSCTL_DECL
 2078 SYSCTL_DECL(_net_inet6_icmp6);
 2079 #endif
 2080 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
 2081         CTLFLAG_RD, nd6_sysctl_drlist, "");
 2082 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
 2083         CTLFLAG_RD, nd6_sysctl_prlist, "");
 2084 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
 2085         CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
 2086 
 2087 static int
 2088 nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
 2089 {
 2090         int error;
 2091         char buf[1024] __aligned(4);
 2092         struct in6_defrouter *d, *de;
 2093         struct nd_defrouter *dr;
 2094 
 2095         if (req->newptr)
 2096                 return EPERM;
 2097         error = 0;
 2098 
 2099         for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
 2100              dr = TAILQ_NEXT(dr, dr_entry)) {
 2101                 d = (struct in6_defrouter *)buf;
 2102                 de = (struct in6_defrouter *)(buf + sizeof(buf));
 2103 
 2104                 if (d + 1 <= de) {
 2105                         bzero(d, sizeof(*d));
 2106                         d->rtaddr.sin6_family = AF_INET6;
 2107                         d->rtaddr.sin6_len = sizeof(d->rtaddr);
 2108                         d->rtaddr.sin6_addr = dr->rtaddr;
 2109                         error = sa6_recoverscope(&d->rtaddr);
 2110                         if (error != 0)
 2111                                 return (error);
 2112                         d->flags = dr->flags;
 2113                         d->rtlifetime = dr->rtlifetime;
 2114                         d->expire = dr->expire;
 2115                         d->if_index = dr->ifp->if_index;
 2116                 } else
 2117                         panic("buffer too short");
 2118 
 2119                 error = SYSCTL_OUT(req, buf, sizeof(*d));
 2120                 if (error)
 2121                         break;
 2122         }
 2123 
 2124         return (error);
 2125 }
 2126 
 2127 static int
 2128 nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 2129 {
 2130         int error;
 2131         char buf[1024] __aligned(4);
 2132         struct in6_prefix *p, *pe;
 2133         struct nd_prefix *pr;
 2134         char ip6buf[INET6_ADDRSTRLEN];
 2135 
 2136         if (req->newptr)
 2137                 return EPERM;
 2138         error = 0;
 2139 
 2140         for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 2141                 u_short advrtrs;
 2142                 size_t advance;
 2143                 struct sockaddr_in6 *sin6, *s6;
 2144                 struct nd_pfxrouter *pfr;
 2145 
 2146                 p = (struct in6_prefix *)buf;
 2147                 pe = (struct in6_prefix *)(buf + sizeof(buf));
 2148 
 2149                 if (p + 1 <= pe) {
 2150                         bzero(p, sizeof(*p));
 2151                         sin6 = (struct sockaddr_in6 *)(p + 1);
 2152 
 2153                         p->prefix = pr->ndpr_prefix;
 2154                         if (sa6_recoverscope(&p->prefix)) {
 2155                                 log(LOG_ERR,
 2156                                     "scope error in prefix list (%s)\n",
 2157                                     ip6_sprintf(ip6buf, &p->prefix.sin6_addr));
 2158                                 /* XXX: press on... */
 2159                         }
 2160                         p->raflags = pr->ndpr_raf;
 2161                         p->prefixlen = pr->ndpr_plen;
 2162                         p->vltime = pr->ndpr_vltime;
 2163                         p->pltime = pr->ndpr_pltime;
 2164                         p->if_index = pr->ndpr_ifp->if_index;
 2165                         if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 2166                                 p->expire = 0;
 2167                         else {
 2168                                 time_t maxexpire;
 2169 
 2170                                 /* XXX: we assume time_t is signed. */
 2171                                 maxexpire = (-1) &
 2172                                     ~((time_t)1 <<
 2173                                     ((sizeof(maxexpire) * 8) - 1));
 2174                                 if (pr->ndpr_vltime <
 2175                                     maxexpire - pr->ndpr_lastupdate) {
 2176                                     p->expire = pr->ndpr_lastupdate +
 2177                                         pr->ndpr_vltime;
 2178                                 } else
 2179                                         p->expire = maxexpire;
 2180                         }
 2181                         p->refcnt = pr->ndpr_refcnt;
 2182                         p->flags = pr->ndpr_stateflags;
 2183                         p->origin = PR_ORIG_RA;
 2184                         advrtrs = 0;
 2185                         for (pfr = pr->ndpr_advrtrs.lh_first; pfr;
 2186                              pfr = pfr->pfr_next) {
 2187                                 if ((void *)&sin6[advrtrs + 1] > (void *)pe) {
 2188                                         advrtrs++;
 2189                                         continue;
 2190                                 }
 2191                                 s6 = &sin6[advrtrs];
 2192                                 bzero(s6, sizeof(*s6));
 2193                                 s6->sin6_family = AF_INET6;
 2194                                 s6->sin6_len = sizeof(*sin6);
 2195                                 s6->sin6_addr = pfr->router->rtaddr;
 2196                                 if (sa6_recoverscope(s6)) {
 2197                                         log(LOG_ERR,
 2198                                             "scope error in "
 2199                                             "prefix list (%s)\n",
 2200                                             ip6_sprintf(ip6buf,
 2201                                                     &pfr->router->rtaddr));
 2202                                 }
 2203                                 advrtrs++;
 2204                         }
 2205                         p->advrtrs = advrtrs;
 2206                 } else
 2207                         panic("buffer too short");
 2208 
 2209                 advance = sizeof(*p) + sizeof(*sin6) * advrtrs;
 2210                 error = SYSCTL_OUT(req, buf, advance);
 2211                 if (error)
 2212                         break;
 2213         }
 2214 
 2215         return (error);
 2216 }

Cache object: bf1526ed6ea266442e3bd26cd170ab63


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.