The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet6/nd6.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. Neither the name of the project nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: releng/8.3/sys/netinet6/nd6.c 230770 2012-01-30 12:13:50Z pluknet $");
   34 
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/callout.h>
   41 #include <sys/malloc.h>
   42 #include <sys/mbuf.h>
   43 #include <sys/socket.h>
   44 #include <sys/sockio.h>
   45 #include <sys/time.h>
   46 #include <sys/kernel.h>
   47 #include <sys/protosw.h>
   48 #include <sys/errno.h>
   49 #include <sys/syslog.h>
   50 #include <sys/lock.h>
   51 #include <sys/rwlock.h>
   52 #include <sys/queue.h>
   53 #include <sys/sysctl.h>
   54 
   55 #include <net/if.h>
   56 #include <net/if_arc.h>
   57 #include <net/if_dl.h>
   58 #include <net/if_types.h>
   59 #include <net/iso88025.h>
   60 #include <net/fddi.h>
   61 #include <net/route.h>
   62 #include <net/vnet.h>
   63 
   64 #include <netinet/in.h>
   65 #include <net/if_llatbl.h>
   66 #define L3_ADDR_SIN6(le)        ((struct sockaddr_in6 *) L3_ADDR(le))
   67 #include <netinet/if_ether.h>
   68 #include <netinet6/in6_var.h>
   69 #include <netinet/ip6.h>
   70 #include <netinet6/ip6_var.h>
   71 #include <netinet6/scope6_var.h>
   72 #include <netinet6/nd6.h>
   73 #include <netinet/icmp6.h>
   74 
   75 #include <sys/limits.h>
   76 
   77 #include <security/mac/mac_framework.h>
   78 
   79 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
   80 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
   81 
   82 #define SIN6(s) ((struct sockaddr_in6 *)s)
   83 
   84 /* timer values */
   85 VNET_DEFINE(int, nd6_prune)     = 1;    /* walk list every 1 seconds */
   86 VNET_DEFINE(int, nd6_delay)     = 5;    /* delay first probe time 5 second */
   87 VNET_DEFINE(int, nd6_umaxtries) = 3;    /* maximum unicast query */
   88 VNET_DEFINE(int, nd6_mmaxtries) = 3;    /* maximum multicast query */
   89 VNET_DEFINE(int, nd6_useloopback) = 1;  /* use loopback interface for
   90                                          * local traffic */
   91 VNET_DEFINE(int, nd6_gctimer)   = (60 * 60 * 24); /* 1 day: garbage
   92                                          * collection timer */
   93 
   94 /* preventing too many loops in ND option parsing */
   95 static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
   96 
   97 VNET_DEFINE(int, nd6_maxnudhint) = 0;   /* max # of subsequent upper
   98                                          * layer hints */
   99 static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved
  100                                          * ND entries */
  101 #define V_nd6_maxndopt                  VNET(nd6_maxndopt)
  102 #define V_nd6_maxqueuelen               VNET(nd6_maxqueuelen)
  103 
  104 #ifdef ND6_DEBUG
  105 VNET_DEFINE(int, nd6_debug) = 1;
  106 #else
  107 VNET_DEFINE(int, nd6_debug) = 0;
  108 #endif
  109 
  110 /* for debugging? */
  111 #if 0
  112 static int nd6_inuse, nd6_allocated;
  113 #endif
  114 
  115 VNET_DEFINE(struct nd_drhead, nd_defrouter);
  116 VNET_DEFINE(struct nd_prhead, nd_prefix);
  117 
  118 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
  119 #define V_nd6_recalc_reachtm_interval   VNET(nd6_recalc_reachtm_interval)
  120 
  121 static struct sockaddr_in6 all1_sa;
  122 
  123 static int nd6_is_new_addr_neighbor __P((struct sockaddr_in6 *,
  124         struct ifnet *));
  125 static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
  126 static void nd6_slowtimo(void *);
  127 static int regen_tmpaddr(struct in6_ifaddr *);
  128 static struct llentry *nd6_free(struct llentry *, int);
  129 static void nd6_llinfo_timer(void *);
  130 static void clear_llinfo_pqueue(struct llentry *);
  131 
  132 static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
  133 #define V_nd6_slowtimo_ch               VNET(nd6_slowtimo_ch)
  134 
  135 VNET_DEFINE(struct callout, nd6_timer_ch);
  136 
  137 void
  138 nd6_init(void)
  139 {
  140         int i;
  141 
  142         LIST_INIT(&V_nd_prefix);
  143 
  144         all1_sa.sin6_family = AF_INET6;
  145         all1_sa.sin6_len = sizeof(struct sockaddr_in6);
  146         for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
  147                 all1_sa.sin6_addr.s6_addr[i] = 0xff;
  148 
  149         /* initialization of the default router list */
  150         TAILQ_INIT(&V_nd_defrouter);
  151 
  152         /* start timer */
  153         callout_init(&V_nd6_slowtimo_ch, 0);
  154         callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
  155             nd6_slowtimo, curvnet);
  156 }
  157 
  158 #ifdef VIMAGE
  159 void
  160 nd6_destroy()
  161 {
  162 
  163         callout_drain(&V_nd6_slowtimo_ch);
  164         callout_drain(&V_nd6_timer_ch);
  165 }
  166 #endif
  167 
  168 struct nd_ifinfo *
  169 nd6_ifattach(struct ifnet *ifp)
  170 {
  171         struct nd_ifinfo *nd;
  172 
  173         nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK);
  174         bzero(nd, sizeof(*nd));
  175 
  176         nd->initialized = 1;
  177 
  178         nd->chlim = IPV6_DEFHLIM;
  179         nd->basereachable = REACHABLE_TIME;
  180         nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
  181         nd->retrans = RETRANS_TIMER;
  182         /*
  183          * Note that the default value of ip6_accept_rtadv is 0, which means
  184          * we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV
  185          * here.
  186          */
  187         nd->flags = (ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV);
  188 
  189         /* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
  190         nd6_setmtu0(ifp, nd);
  191 
  192         return nd;
  193 }
  194 
  195 void
  196 nd6_ifdetach(struct nd_ifinfo *nd)
  197 {
  198 
  199         free(nd, M_IP6NDP);
  200 }
  201 
  202 /*
  203  * Reset ND level link MTU. This function is called when the physical MTU
  204  * changes, which means we might have to adjust the ND level MTU.
  205  */
  206 void
  207 nd6_setmtu(struct ifnet *ifp)
  208 {
  209 
  210         nd6_setmtu0(ifp, ND_IFINFO(ifp));
  211 }
  212 
  213 /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
  214 void
  215 nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
  216 {
  217         u_int32_t omaxmtu;
  218 
  219         omaxmtu = ndi->maxmtu;
  220 
  221         switch (ifp->if_type) {
  222         case IFT_ARCNET:
  223                 ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */
  224                 break;
  225         case IFT_FDDI:
  226                 ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */
  227                 break;
  228         case IFT_ISO88025:
  229                  ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
  230                  break;
  231         default:
  232                 ndi->maxmtu = ifp->if_mtu;
  233                 break;
  234         }
  235 
  236         /*
  237          * Decreasing the interface MTU under IPV6 minimum MTU may cause
  238          * undesirable situation.  We thus notify the operator of the change
  239          * explicitly.  The check for omaxmtu is necessary to restrict the
  240          * log to the case of changing the MTU, not initializing it.
  241          */
  242         if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
  243                 log(LOG_NOTICE, "nd6_setmtu0: "
  244                     "new link MTU on %s (%lu) is too small for IPv6\n",
  245                     if_name(ifp), (unsigned long)ndi->maxmtu);
  246         }
  247 
  248         if (ndi->maxmtu > V_in6_maxmtu)
  249                 in6_setmaxmtu(); /* check all interfaces just in case */
  250 
  251 }
  252 
  253 void
  254 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
  255 {
  256 
  257         bzero(ndopts, sizeof(*ndopts));
  258         ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
  259         ndopts->nd_opts_last
  260                 = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
  261 
  262         if (icmp6len == 0) {
  263                 ndopts->nd_opts_done = 1;
  264                 ndopts->nd_opts_search = NULL;
  265         }
  266 }
  267 
  268 /*
  269  * Take one ND option.
  270  */
  271 struct nd_opt_hdr *
  272 nd6_option(union nd_opts *ndopts)
  273 {
  274         struct nd_opt_hdr *nd_opt;
  275         int olen;
  276 
  277         if (ndopts == NULL)
  278                 panic("ndopts == NULL in nd6_option");
  279         if (ndopts->nd_opts_last == NULL)
  280                 panic("uninitialized ndopts in nd6_option");
  281         if (ndopts->nd_opts_search == NULL)
  282                 return NULL;
  283         if (ndopts->nd_opts_done)
  284                 return NULL;
  285 
  286         nd_opt = ndopts->nd_opts_search;
  287 
  288         /* make sure nd_opt_len is inside the buffer */
  289         if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
  290                 bzero(ndopts, sizeof(*ndopts));
  291                 return NULL;
  292         }
  293 
  294         olen = nd_opt->nd_opt_len << 3;
  295         if (olen == 0) {
  296                 /*
  297                  * Message validation requires that all included
  298                  * options have a length that is greater than zero.
  299                  */
  300                 bzero(ndopts, sizeof(*ndopts));
  301                 return NULL;
  302         }
  303 
  304         ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
  305         if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
  306                 /* option overruns the end of buffer, invalid */
  307                 bzero(ndopts, sizeof(*ndopts));
  308                 return NULL;
  309         } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
  310                 /* reached the end of options chain */
  311                 ndopts->nd_opts_done = 1;
  312                 ndopts->nd_opts_search = NULL;
  313         }
  314         return nd_opt;
  315 }
  316 
  317 /*
  318  * Parse multiple ND options.
  319  * This function is much easier to use, for ND routines that do not need
  320  * multiple options of the same type.
  321  */
  322 int
  323 nd6_options(union nd_opts *ndopts)
  324 {
  325         struct nd_opt_hdr *nd_opt;
  326         int i = 0;
  327 
  328         if (ndopts == NULL)
  329                 panic("ndopts == NULL in nd6_options");
  330         if (ndopts->nd_opts_last == NULL)
  331                 panic("uninitialized ndopts in nd6_options");
  332         if (ndopts->nd_opts_search == NULL)
  333                 return 0;
  334 
  335         while (1) {
  336                 nd_opt = nd6_option(ndopts);
  337                 if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
  338                         /*
  339                          * Message validation requires that all included
  340                          * options have a length that is greater than zero.
  341                          */
  342                         ICMP6STAT_INC(icp6s_nd_badopt);
  343                         bzero(ndopts, sizeof(*ndopts));
  344                         return -1;
  345                 }
  346 
  347                 if (nd_opt == NULL)
  348                         goto skip1;
  349 
  350                 switch (nd_opt->nd_opt_type) {
  351                 case ND_OPT_SOURCE_LINKADDR:
  352                 case ND_OPT_TARGET_LINKADDR:
  353                 case ND_OPT_MTU:
  354                 case ND_OPT_REDIRECTED_HEADER:
  355                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
  356                                 nd6log((LOG_INFO,
  357                                     "duplicated ND6 option found (type=%d)\n",
  358                                     nd_opt->nd_opt_type));
  359                                 /* XXX bark? */
  360                         } else {
  361                                 ndopts->nd_opt_array[nd_opt->nd_opt_type]
  362                                         = nd_opt;
  363                         }
  364                         break;
  365                 case ND_OPT_PREFIX_INFORMATION:
  366                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
  367                                 ndopts->nd_opt_array[nd_opt->nd_opt_type]
  368                                         = nd_opt;
  369                         }
  370                         ndopts->nd_opts_pi_end =
  371                                 (struct nd_opt_prefix_info *)nd_opt;
  372                         break;
  373                 default:
  374                         /*
  375                          * Unknown options must be silently ignored,
  376                          * to accomodate future extension to the protocol.
  377                          */
  378                         nd6log((LOG_DEBUG,
  379                             "nd6_options: unsupported option %d - "
  380                             "option ignored\n", nd_opt->nd_opt_type));
  381                 }
  382 
  383 skip1:
  384                 i++;
  385                 if (i > V_nd6_maxndopt) {
  386                         ICMP6STAT_INC(icp6s_nd_toomanyopt);
  387                         nd6log((LOG_INFO, "too many loop in nd opt\n"));
  388                         break;
  389                 }
  390 
  391                 if (ndopts->nd_opts_done)
  392                         break;
  393         }
  394 
  395         return 0;
  396 }
  397 
  398 /*
  399  * ND6 timer routine to handle ND6 entries
  400  */
  401 void
  402 nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
  403 {
  404         int canceled;
  405 
  406         LLE_WLOCK_ASSERT(ln);
  407 
  408         if (tick < 0) {
  409                 ln->la_expire = 0;
  410                 ln->ln_ntick = 0;
  411                 canceled = callout_stop(&ln->ln_timer_ch);
  412         } else {
  413                 ln->la_expire = time_second + tick / hz;
  414                 LLE_ADDREF(ln);
  415                 if (tick > INT_MAX) {
  416                         ln->ln_ntick = tick - INT_MAX;
  417                         canceled = callout_reset(&ln->ln_timer_ch, INT_MAX,
  418                             nd6_llinfo_timer, ln);
  419                 } else {
  420                         ln->ln_ntick = 0;
  421                         canceled = callout_reset(&ln->ln_timer_ch, tick,
  422                             nd6_llinfo_timer, ln);
  423                 }
  424         }
  425         if (canceled)
  426                 LLE_REMREF(ln);
  427 }
  428 
  429 void
  430 nd6_llinfo_settimer(struct llentry *ln, long tick)
  431 {
  432 
  433         LLE_WLOCK(ln);
  434         nd6_llinfo_settimer_locked(ln, tick);
  435         LLE_WUNLOCK(ln);
  436 }
  437 
  438 static void
  439 nd6_llinfo_timer(void *arg)
  440 {
  441         struct llentry *ln;
  442         struct in6_addr *dst;
  443         struct ifnet *ifp;
  444         struct nd_ifinfo *ndi = NULL;
  445 
  446         KASSERT(arg != NULL, ("%s: arg NULL", __func__));
  447         ln = (struct llentry *)arg;
  448         LLE_WLOCK_ASSERT(ln);
  449         ifp = ln->lle_tbl->llt_ifp;
  450 
  451         CURVNET_SET(ifp->if_vnet);
  452 
  453         if (ln->ln_ntick > 0) {
  454                 if (ln->ln_ntick > INT_MAX) {
  455                         ln->ln_ntick -= INT_MAX;
  456                         nd6_llinfo_settimer_locked(ln, INT_MAX);
  457                 } else {
  458                         ln->ln_ntick = 0;
  459                         nd6_llinfo_settimer_locked(ln, ln->ln_ntick);
  460                 }
  461                 goto done;
  462         }
  463 
  464         ndi = ND_IFINFO(ifp);
  465         dst = &L3_ADDR_SIN6(ln)->sin6_addr;
  466         if (ln->la_flags & LLE_STATIC) {
  467                 goto done;
  468         }
  469 
  470         if (ln->la_flags & LLE_DELETED) {
  471                 (void)nd6_free(ln, 0);
  472                 ln = NULL;
  473                 goto done;
  474         }
  475 
  476         switch (ln->ln_state) {
  477         case ND6_LLINFO_INCOMPLETE:
  478                 if (ln->la_asked < V_nd6_mmaxtries) {
  479                         ln->la_asked++;
  480                         nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
  481                         LLE_WUNLOCK(ln);
  482                         nd6_ns_output(ifp, NULL, dst, ln, 0);
  483                         LLE_WLOCK(ln);
  484                 } else {
  485                         struct mbuf *m = ln->la_hold;
  486                         if (m) {
  487                                 struct mbuf *m0;
  488 
  489                                 /*
  490                                  * assuming every packet in la_hold has the
  491                                  * same IP header.  Send error after unlock.
  492                                  */
  493                                 m0 = m->m_nextpkt;
  494                                 m->m_nextpkt = NULL;
  495                                 ln->la_hold = m0;
  496                                 clear_llinfo_pqueue(ln);
  497                         }
  498                         (void)nd6_free(ln, 0);
  499                         ln = NULL;
  500                         if (m != NULL)
  501                                 icmp6_error2(m, ICMP6_DST_UNREACH,
  502                                     ICMP6_DST_UNREACH_ADDR, 0, ifp);
  503                 }
  504                 break;
  505         case ND6_LLINFO_REACHABLE:
  506                 if (!ND6_LLINFO_PERMANENT(ln)) {
  507                         ln->ln_state = ND6_LLINFO_STALE;
  508                         nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
  509                 }
  510                 break;
  511 
  512         case ND6_LLINFO_STALE:
  513                 /* Garbage Collection(RFC 2461 5.3) */
  514                 if (!ND6_LLINFO_PERMANENT(ln)) {
  515                         (void)nd6_free(ln, 1);
  516                         ln = NULL;
  517                 }
  518                 break;
  519 
  520         case ND6_LLINFO_DELAY:
  521                 if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
  522                         /* We need NUD */
  523                         ln->la_asked = 1;
  524                         ln->ln_state = ND6_LLINFO_PROBE;
  525                         nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
  526                         LLE_WUNLOCK(ln);
  527                         nd6_ns_output(ifp, dst, dst, ln, 0);
  528                         LLE_WLOCK(ln);
  529                 } else {
  530                         ln->ln_state = ND6_LLINFO_STALE; /* XXX */
  531                         nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
  532                 }
  533                 break;
  534         case ND6_LLINFO_PROBE:
  535                 if (ln->la_asked < V_nd6_umaxtries) {
  536                         ln->la_asked++;
  537                         nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
  538                         LLE_WUNLOCK(ln);
  539                         nd6_ns_output(ifp, dst, dst, ln, 0);
  540                         LLE_WLOCK(ln);
  541                 } else {
  542                         (void)nd6_free(ln, 0);
  543                         ln = NULL;
  544                 }
  545                 break;
  546         default:
  547                 panic("%s: paths in a dark night can be confusing: %d",
  548                     __func__, ln->ln_state);
  549         }
  550 done:
  551         if (ln != NULL)
  552                 LLE_FREE_LOCKED(ln);
  553         CURVNET_RESTORE();
  554 }
  555 
  556 
  557 /*
  558  * ND6 timer routine to expire default route list and prefix list
  559  */
  560 void
  561 nd6_timer(void *arg)
  562 {
  563         CURVNET_SET((struct vnet *) arg);
  564         int s;
  565         struct nd_defrouter *dr;
  566         struct nd_prefix *pr;
  567         struct in6_ifaddr *ia6, *nia6;
  568 
  569         callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
  570             nd6_timer, curvnet);
  571 
  572         /* expire default router list */
  573         s = splnet();
  574         dr = TAILQ_FIRST(&V_nd_defrouter);
  575         while (dr) {
  576                 if (dr->expire && dr->expire < time_second) {
  577                         struct nd_defrouter *t;
  578                         t = TAILQ_NEXT(dr, dr_entry);
  579                         defrtrlist_del(dr);
  580                         dr = t;
  581                 } else {
  582                         dr = TAILQ_NEXT(dr, dr_entry);
  583                 }
  584         }
  585 
  586         /*
  587          * expire interface addresses.
  588          * in the past the loop was inside prefix expiry processing.
  589          * However, from a stricter speci-confrmance standpoint, we should
  590          * rather separate address lifetimes and prefix lifetimes.
  591          *
  592          * XXXRW: in6_ifaddrhead locking.
  593          */
  594   addrloop:
  595         TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) {
  596                 /* check address lifetime */
  597                 if (IFA6_IS_INVALID(ia6)) {
  598                         int regen = 0;
  599 
  600                         /*
  601                          * If the expiring address is temporary, try
  602                          * regenerating a new one.  This would be useful when
  603                          * we suspended a laptop PC, then turned it on after a
  604                          * period that could invalidate all temporary
  605                          * addresses.  Although we may have to restart the
  606                          * loop (see below), it must be after purging the
  607                          * address.  Otherwise, we'd see an infinite loop of
  608                          * regeneration.
  609                          */
  610                         if (V_ip6_use_tempaddr &&
  611                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
  612                                 if (regen_tmpaddr(ia6) == 0)
  613                                         regen = 1;
  614                         }
  615 
  616                         in6_purgeaddr(&ia6->ia_ifa);
  617 
  618                         if (regen)
  619                                 goto addrloop; /* XXX: see below */
  620                 } else if (IFA6_IS_DEPRECATED(ia6)) {
  621                         int oldflags = ia6->ia6_flags;
  622 
  623                         ia6->ia6_flags |= IN6_IFF_DEPRECATED;
  624 
  625                         /*
  626                          * If a temporary address has just become deprecated,
  627                          * regenerate a new one if possible.
  628                          */
  629                         if (V_ip6_use_tempaddr &&
  630                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
  631                             (oldflags & IN6_IFF_DEPRECATED) == 0) {
  632 
  633                                 if (regen_tmpaddr(ia6) == 0) {
  634                                         /*
  635                                          * A new temporary address is
  636                                          * generated.
  637                                          * XXX: this means the address chain
  638                                          * has changed while we are still in
  639                                          * the loop.  Although the change
  640                                          * would not cause disaster (because
  641                                          * it's not a deletion, but an
  642                                          * addition,) we'd rather restart the
  643                                          * loop just for safety.  Or does this
  644                                          * significantly reduce performance??
  645                                          */
  646                                         goto addrloop;
  647                                 }
  648                         }
  649                 } else {
  650                         /*
  651                          * A new RA might have made a deprecated address
  652                          * preferred.
  653                          */
  654                         ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
  655                 }
  656         }
  657 
  658         /* expire prefix list */
  659         pr = V_nd_prefix.lh_first;
  660         while (pr) {
  661                 /*
  662                  * check prefix lifetime.
  663                  * since pltime is just for autoconf, pltime processing for
  664                  * prefix is not necessary.
  665                  */
  666                 if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
  667                     time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
  668                         struct nd_prefix *t;
  669                         t = pr->ndpr_next;
  670 
  671                         /*
  672                          * address expiration and prefix expiration are
  673                          * separate.  NEVER perform in6_purgeaddr here.
  674                          */
  675 
  676                         prelist_remove(pr);
  677                         pr = t;
  678                 } else
  679                         pr = pr->ndpr_next;
  680         }
  681         splx(s);
  682         CURVNET_RESTORE();
  683 }
  684 
  685 /*
  686  * ia6 - deprecated/invalidated temporary address
  687  */
  688 static int
  689 regen_tmpaddr(struct in6_ifaddr *ia6)
  690 {
  691         struct ifaddr *ifa;
  692         struct ifnet *ifp;
  693         struct in6_ifaddr *public_ifa6 = NULL;
  694 
  695         ifp = ia6->ia_ifa.ifa_ifp;
  696         IF_ADDR_LOCK(ifp);
  697         TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  698                 struct in6_ifaddr *it6;
  699 
  700                 if (ifa->ifa_addr->sa_family != AF_INET6)
  701                         continue;
  702 
  703                 it6 = (struct in6_ifaddr *)ifa;
  704 
  705                 /* ignore no autoconf addresses. */
  706                 if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
  707                         continue;
  708 
  709                 /* ignore autoconf addresses with different prefixes. */
  710                 if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
  711                         continue;
  712 
  713                 /*
  714                  * Now we are looking at an autoconf address with the same
  715                  * prefix as ours.  If the address is temporary and is still
  716                  * preferred, do not create another one.  It would be rare, but
  717                  * could happen, for example, when we resume a laptop PC after
  718                  * a long period.
  719                  */
  720                 if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
  721                     !IFA6_IS_DEPRECATED(it6)) {
  722                         public_ifa6 = NULL;
  723                         break;
  724                 }
  725 
  726                 /*
  727                  * This is a public autoconf address that has the same prefix
  728                  * as ours.  If it is preferred, keep it.  We can't break the
  729                  * loop here, because there may be a still-preferred temporary
  730                  * address with the prefix.
  731                  */
  732                 if (!IFA6_IS_DEPRECATED(it6))
  733                     public_ifa6 = it6;
  734 
  735                 if (public_ifa6 != NULL)
  736                         ifa_ref(&public_ifa6->ia_ifa);
  737         }
  738         IF_ADDR_UNLOCK(ifp);
  739 
  740         if (public_ifa6 != NULL) {
  741                 int e;
  742 
  743                 if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
  744                         ifa_free(&public_ifa6->ia_ifa);
  745                         log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
  746                             " tmp addr,errno=%d\n", e);
  747                         return (-1);
  748                 }
  749                 ifa_free(&public_ifa6->ia_ifa);
  750                 return (0);
  751         }
  752 
  753         return (-1);
  754 }
  755 
  756 /*
  757  * Nuke neighbor cache/prefix/default router management table, right before
  758  * ifp goes away.
  759  */
  760 void
  761 nd6_purge(struct ifnet *ifp)
  762 {
  763         struct nd_defrouter *dr, *ndr;
  764         struct nd_prefix *pr, *npr;
  765 
  766         /*
  767          * Nuke default router list entries toward ifp.
  768          * We defer removal of default router list entries that is installed
  769          * in the routing table, in order to keep additional side effects as
  770          * small as possible.
  771          */
  772         for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
  773                 ndr = TAILQ_NEXT(dr, dr_entry);
  774                 if (dr->installed)
  775                         continue;
  776 
  777                 if (dr->ifp == ifp)
  778                         defrtrlist_del(dr);
  779         }
  780 
  781         for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
  782                 ndr = TAILQ_NEXT(dr, dr_entry);
  783                 if (!dr->installed)
  784                         continue;
  785 
  786                 if (dr->ifp == ifp)
  787                         defrtrlist_del(dr);
  788         }
  789 
  790         /* Nuke prefix list entries toward ifp */
  791         for (pr = V_nd_prefix.lh_first; pr; pr = npr) {
  792                 npr = pr->ndpr_next;
  793                 if (pr->ndpr_ifp == ifp) {
  794                         /*
  795                          * Because if_detach() does *not* release prefixes
  796                          * while purging addresses the reference count will
  797                          * still be above zero. We therefore reset it to
  798                          * make sure that the prefix really gets purged.
  799                          */
  800                         pr->ndpr_refcnt = 0;
  801 
  802                         /*
  803                          * Previously, pr->ndpr_addr is removed as well,
  804                          * but I strongly believe we don't have to do it.
  805                          * nd6_purge() is only called from in6_ifdetach(),
  806                          * which removes all the associated interface addresses
  807                          * by itself.
  808                          * (jinmei@kame.net 20010129)
  809                          */
  810                         prelist_remove(pr);
  811                 }
  812         }
  813 
  814         /* cancel default outgoing interface setting */
  815         if (V_nd6_defifindex == ifp->if_index)
  816                 nd6_setdefaultiface(0);
  817 
  818         if (!V_ip6_forwarding && V_ip6_accept_rtadv) { /* XXX: too restrictive? */
  819                 /* refresh default router list
  820                  *
  821                  * 
  822                  */
  823                 defrouter_select();
  824 
  825         }
  826 
  827         /* XXXXX
  828          * We do not nuke the neighbor cache entries here any more
  829          * because the neighbor cache is kept in if_afdata[AF_INET6].
  830          * nd6_purge() is invoked by in6_ifdetach() which is called
  831          * from if_detach() where everything gets purged. So let
  832          * in6_domifdetach() do the actual L2 table purging work.
  833          */
  834 }
  835 
  836 /* 
  837  * the caller acquires and releases the lock on the lltbls
  838  * Returns the llentry locked
  839  */
  840 struct llentry *
  841 nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
  842 {
  843         struct sockaddr_in6 sin6;
  844         struct llentry *ln;
  845         int llflags;
  846         
  847         bzero(&sin6, sizeof(sin6));
  848         sin6.sin6_len = sizeof(struct sockaddr_in6);
  849         sin6.sin6_family = AF_INET6;
  850         sin6.sin6_addr = *addr6;
  851 
  852         IF_AFDATA_LOCK_ASSERT(ifp);
  853 
  854         llflags = 0;
  855         if (flags & ND6_CREATE)
  856             llflags |= LLE_CREATE;
  857         if (flags & ND6_EXCLUSIVE)
  858             llflags |= LLE_EXCLUSIVE;   
  859         
  860         ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6);
  861         if ((ln != NULL) && (llflags & LLE_CREATE))
  862                 ln->ln_state = ND6_LLINFO_NOSTATE;
  863         
  864         return (ln);
  865 }
  866 
  867 /*
  868  * Test whether a given IPv6 address is a neighbor or not, ignoring
  869  * the actual neighbor cache.  The neighbor cache is ignored in order
  870  * to not reenter the routing code from within itself.
  871  */
  872 static int
  873 nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
  874 {
  875         struct nd_prefix *pr;
  876         struct ifaddr *dstaddr;
  877 
  878         /*
  879          * A link-local address is always a neighbor.
  880          * XXX: a link does not necessarily specify a single interface.
  881          */
  882         if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
  883                 struct sockaddr_in6 sin6_copy;
  884                 u_int32_t zone;
  885 
  886                 /*
  887                  * We need sin6_copy since sa6_recoverscope() may modify the
  888                  * content (XXX).
  889                  */
  890                 sin6_copy = *addr;
  891                 if (sa6_recoverscope(&sin6_copy))
  892                         return (0); /* XXX: should be impossible */
  893                 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
  894                         return (0);
  895                 if (sin6_copy.sin6_scope_id == zone)
  896                         return (1);
  897                 else
  898                         return (0);
  899         }
  900 
  901         /*
  902          * If the address matches one of our addresses,
  903          * it should be a neighbor.
  904          * If the address matches one of our on-link prefixes, it should be a
  905          * neighbor.
  906          */
  907         for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
  908                 if (pr->ndpr_ifp != ifp)
  909                         continue;
  910 
  911                 if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
  912                         struct rtentry *rt;
  913                         rt = rtalloc1((struct sockaddr *)&pr->ndpr_prefix, 0, 0);
  914                         if (rt == NULL)
  915                                 continue;
  916                         /*
  917                          * This is the case where multiple interfaces
  918                          * have the same prefix, but only one is installed 
  919                          * into the routing table and that prefix entry
  920                          * is not the one being examined here. In the case
  921                          * where RADIX_MPATH is enabled, multiple route
  922                          * entries (of the same rt_key value) will be 
  923                          * installed because the interface addresses all
  924                          * differ.
  925                          */
  926                         if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
  927                                &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) {
  928                                 RTFREE_LOCKED(rt);
  929                                 continue;
  930                         }
  931                         RTFREE_LOCKED(rt);
  932                 }
  933 
  934                 if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
  935                     &addr->sin6_addr, &pr->ndpr_mask))
  936                         return (1);
  937         }
  938 
  939         /*
  940          * If the address is assigned on the node of the other side of
  941          * a p2p interface, the address should be a neighbor.
  942          */
  943         dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr);
  944         if (dstaddr != NULL) {
  945                 if (dstaddr->ifa_ifp == ifp) {
  946                         ifa_free(dstaddr);
  947                         return (1);
  948                 }
  949                 ifa_free(dstaddr);
  950         }
  951 
  952         /*
  953          * If the default router list is empty, all addresses are regarded
  954          * as on-link, and thus, as a neighbor.
  955          * XXX: we restrict the condition to hosts, because routers usually do
  956          * not have the "default router list".
  957          */
  958         if (!V_ip6_forwarding && TAILQ_FIRST(&V_nd_defrouter) == NULL &&
  959             V_nd6_defifindex == ifp->if_index) {
  960                 return (1);
  961         }
  962 
  963         return (0);
  964 }
  965 
  966 
  967 /*
  968  * Detect if a given IPv6 address identifies a neighbor on a given link.
  969  * XXX: should take care of the destination of a p2p link?
  970  */
  971 int
  972 nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
  973 {
  974         struct llentry *lle;
  975         int rc = 0;
  976 
  977         IF_AFDATA_UNLOCK_ASSERT(ifp);
  978         if (nd6_is_new_addr_neighbor(addr, ifp))
  979                 return (1);
  980 
  981         /*
  982          * Even if the address matches none of our addresses, it might be
  983          * in the neighbor cache.
  984          */
  985         IF_AFDATA_LOCK(ifp);
  986         if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
  987                 LLE_RUNLOCK(lle);
  988                 rc = 1;
  989         }
  990         IF_AFDATA_UNLOCK(ifp);
  991         return (rc);
  992 }
  993 
  994 /*
  995  * Free an nd6 llinfo entry.
  996  * Since the function would cause significant changes in the kernel, DO NOT
  997  * make it global, unless you have a strong reason for the change, and are sure
  998  * that the change is safe.
  999  */
 1000 static struct llentry *
 1001 nd6_free(struct llentry *ln, int gc)
 1002 {
 1003         struct llentry *next;
 1004         struct nd_defrouter *dr;
 1005         struct ifnet *ifp;
 1006 
 1007         LLE_WLOCK_ASSERT(ln);
 1008 
 1009         /*
 1010          * we used to have pfctlinput(PRC_HOSTDEAD) here.
 1011          * even though it is not harmful, it was not really necessary.
 1012          */
 1013 
 1014         /* cancel timer */
 1015         nd6_llinfo_settimer_locked(ln, -1);
 1016 
 1017         ifp = ln->lle_tbl->llt_ifp;
 1018 
 1019         if (!V_ip6_forwarding) {
 1020 
 1021                 dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
 1022 
 1023                 if (dr != NULL && dr->expire &&
 1024                     ln->ln_state == ND6_LLINFO_STALE && gc) {
 1025                         /*
 1026                          * If the reason for the deletion is just garbage
 1027                          * collection, and the neighbor is an active default
 1028                          * router, do not delete it.  Instead, reset the GC
 1029                          * timer using the router's lifetime.
 1030                          * Simply deleting the entry would affect default
 1031                          * router selection, which is not necessarily a good
 1032                          * thing, especially when we're using router preference
 1033                          * values.
 1034                          * XXX: the check for ln_state would be redundant,
 1035                          *      but we intentionally keep it just in case.
 1036                          */
 1037                         if (dr->expire > time_second)
 1038                                 nd6_llinfo_settimer_locked(ln,
 1039                                     (dr->expire - time_second) * hz);
 1040                         else
 1041                                 nd6_llinfo_settimer_locked(ln,
 1042                                     (long)V_nd6_gctimer * hz);
 1043 
 1044                         next = LIST_NEXT(ln, lle_next);
 1045                         LLE_REMREF(ln);
 1046                         LLE_WUNLOCK(ln);
 1047                         return (next);
 1048                 }
 1049 
 1050                 if (dr) {
 1051                         /*
 1052                          * Unreachablity of a router might affect the default
 1053                          * router selection and on-link detection of advertised
 1054                          * prefixes.
 1055                          */
 1056 
 1057                         /*
 1058                          * Temporarily fake the state to choose a new default
 1059                          * router and to perform on-link determination of
 1060                          * prefixes correctly.
 1061                          * Below the state will be set correctly,
 1062                          * or the entry itself will be deleted.
 1063                          */
 1064                         ln->ln_state = ND6_LLINFO_INCOMPLETE;
 1065                 }
 1066 
 1067                 if (ln->ln_router || dr) {
 1068 
 1069                         /*
 1070                          * We need to unlock to avoid a LOR with rt6_flush() with the
 1071                          * rnh and for the calls to pfxlist_onlink_check() and
 1072                          * defrouter_select() in the block further down for calls
 1073                          * into nd6_lookup().  We still hold a ref.
 1074                          */
 1075                         LLE_WUNLOCK(ln);
 1076 
 1077                         /*
 1078                          * rt6_flush must be called whether or not the neighbor
 1079                          * is in the Default Router List.
 1080                          * See a corresponding comment in nd6_na_input().
 1081                          */
 1082                         rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
 1083                 }
 1084 
 1085                 if (dr) {
 1086                         /*
 1087                          * Since defrouter_select() does not affect the
 1088                          * on-link determination and MIP6 needs the check
 1089                          * before the default router selection, we perform
 1090                          * the check now.
 1091                          */
 1092                         pfxlist_onlink_check();
 1093 
 1094                         /*
 1095                          * Refresh default router list.
 1096                          */
 1097                         defrouter_select();
 1098                 }
 1099 
 1100                 if (ln->ln_router || dr)
 1101                         LLE_WLOCK(ln);
 1102         }
 1103 
 1104         /*
 1105          * Before deleting the entry, remember the next entry as the
 1106          * return value.  We need this because pfxlist_onlink_check() above
 1107          * might have freed other entries (particularly the old next entry) as
 1108          * a side effect (XXX).
 1109          */
 1110         next = LIST_NEXT(ln, lle_next);
 1111 
 1112         /*
 1113          * Save to unlock. We still hold an extra reference and will not
 1114          * free(9) in llentry_free() if someone else holds one as well.
 1115          */
 1116         LLE_WUNLOCK(ln);
 1117         IF_AFDATA_LOCK(ifp);
 1118         LLE_WLOCK(ln);
 1119         LLE_REMREF(ln);
 1120         llentry_free(ln);
 1121         IF_AFDATA_UNLOCK(ifp);
 1122 
 1123         return (next);
 1124 }
 1125 
 1126 /*
 1127  * Upper-layer reachability hint for Neighbor Unreachability Detection.
 1128  *
 1129  * XXX cost-effective methods?
 1130  */
 1131 void
 1132 nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force)
 1133 {
 1134         struct llentry *ln;
 1135         struct ifnet *ifp;
 1136 
 1137         if ((dst6 == NULL) || (rt == NULL))
 1138                 return;
 1139 
 1140         ifp = rt->rt_ifp;
 1141         IF_AFDATA_LOCK(ifp);
 1142         ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL);
 1143         IF_AFDATA_UNLOCK(ifp);
 1144         if (ln == NULL)
 1145                 return;
 1146 
 1147         if (ln->ln_state < ND6_LLINFO_REACHABLE)
 1148                 goto done;
 1149 
 1150         /*
 1151          * if we get upper-layer reachability confirmation many times,
 1152          * it is possible we have false information.
 1153          */
 1154         if (!force) {
 1155                 ln->ln_byhint++;
 1156                 if (ln->ln_byhint > V_nd6_maxnudhint) {
 1157                         goto done;
 1158                 }
 1159         }
 1160 
 1161         ln->ln_state = ND6_LLINFO_REACHABLE;
 1162         if (!ND6_LLINFO_PERMANENT(ln)) {
 1163                 nd6_llinfo_settimer_locked(ln,
 1164                     (long)ND_IFINFO(rt->rt_ifp)->reachable * hz);
 1165         }
 1166 done:
 1167         LLE_WUNLOCK(ln);
 1168 }
 1169 
 1170 
 1171 /*
 1172  * Rejuvenate this function for routing operations related
 1173  * processing.
 1174  */
 1175 void
 1176 nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
 1177 {
 1178         struct sockaddr_in6 *gateway = (struct sockaddr_in6 *)rt->rt_gateway;
 1179         struct nd_defrouter *dr;
 1180         struct ifnet *ifp = rt->rt_ifp;
 1181 
 1182         RT_LOCK_ASSERT(rt);
 1183 
 1184         switch (req) {
 1185         case RTM_ADD:
 1186                 break;
 1187 
 1188         case RTM_DELETE:
 1189                 if (!ifp)
 1190                         return;
 1191                 /*
 1192                  * Only indirect routes are interesting.
 1193                  */
 1194                 if ((rt->rt_flags & RTF_GATEWAY) == 0)
 1195                         return;
 1196                 /*
 1197                  * check for default route
 1198                  */
 1199                 if (IN6_ARE_ADDR_EQUAL(&in6addr_any, 
 1200                                        &SIN6(rt_key(rt))->sin6_addr)) {
 1201 
 1202                         dr = defrouter_lookup(&gateway->sin6_addr, ifp);
 1203                         if (dr != NULL)
 1204                                 dr->installed = 0;
 1205                 }
 1206                 break;
 1207         }
 1208 }
 1209 
 1210 
 1211 int
 1212 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 1213 {
 1214         struct in6_drlist *drl = (struct in6_drlist *)data;
 1215         struct in6_oprlist *oprl = (struct in6_oprlist *)data;
 1216         struct in6_ndireq *ndi = (struct in6_ndireq *)data;
 1217         struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
 1218         struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
 1219         struct nd_defrouter *dr;
 1220         struct nd_prefix *pr;
 1221         int i = 0, error = 0;
 1222         int s;
 1223 
 1224         switch (cmd) {
 1225         case SIOCGDRLST_IN6:
 1226                 /*
 1227                  * obsolete API, use sysctl under net.inet6.icmp6
 1228                  */
 1229                 bzero(drl, sizeof(*drl));
 1230                 s = splnet();
 1231                 dr = TAILQ_FIRST(&V_nd_defrouter);
 1232                 while (dr && i < DRLSTSIZ) {
 1233                         drl->defrouter[i].rtaddr = dr->rtaddr;
 1234                         in6_clearscope(&drl->defrouter[i].rtaddr);
 1235 
 1236                         drl->defrouter[i].flags = dr->flags;
 1237                         drl->defrouter[i].rtlifetime = dr->rtlifetime;
 1238                         drl->defrouter[i].expire = dr->expire;
 1239                         drl->defrouter[i].if_index = dr->ifp->if_index;
 1240                         i++;
 1241                         dr = TAILQ_NEXT(dr, dr_entry);
 1242                 }
 1243                 splx(s);
 1244                 break;
 1245         case SIOCGPRLST_IN6:
 1246                 /*
 1247                  * obsolete API, use sysctl under net.inet6.icmp6
 1248                  *
 1249                  * XXX the structure in6_prlist was changed in backward-
 1250                  * incompatible manner.  in6_oprlist is used for SIOCGPRLST_IN6,
 1251                  * in6_prlist is used for nd6_sysctl() - fill_prlist().
 1252                  */
 1253                 /*
 1254                  * XXX meaning of fields, especialy "raflags", is very
 1255                  * differnet between RA prefix list and RR/static prefix list.
 1256                  * how about separating ioctls into two?
 1257                  */
 1258                 bzero(oprl, sizeof(*oprl));
 1259                 s = splnet();
 1260                 pr = V_nd_prefix.lh_first;
 1261                 while (pr && i < PRLSTSIZ) {
 1262                         struct nd_pfxrouter *pfr;
 1263                         int j;
 1264 
 1265                         oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
 1266                         oprl->prefix[i].raflags = pr->ndpr_raf;
 1267                         oprl->prefix[i].prefixlen = pr->ndpr_plen;
 1268                         oprl->prefix[i].vltime = pr->ndpr_vltime;
 1269                         oprl->prefix[i].pltime = pr->ndpr_pltime;
 1270                         oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
 1271                         if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 1272                                 oprl->prefix[i].expire = 0;
 1273                         else {
 1274                                 time_t maxexpire;
 1275 
 1276                                 /* XXX: we assume time_t is signed. */
 1277                                 maxexpire = (-1) &
 1278                                     ~((time_t)1 <<
 1279                                     ((sizeof(maxexpire) * 8) - 1));
 1280                                 if (pr->ndpr_vltime <
 1281                                     maxexpire - pr->ndpr_lastupdate) {
 1282                                         oprl->prefix[i].expire =
 1283                                             pr->ndpr_lastupdate +
 1284                                             pr->ndpr_vltime;
 1285                                 } else
 1286                                         oprl->prefix[i].expire = maxexpire;
 1287                         }
 1288 
 1289                         pfr = pr->ndpr_advrtrs.lh_first;
 1290                         j = 0;
 1291                         while (pfr) {
 1292                                 if (j < DRLSTSIZ) {
 1293 #define RTRADDR oprl->prefix[i].advrtr[j]
 1294                                         RTRADDR = pfr->router->rtaddr;
 1295                                         in6_clearscope(&RTRADDR);
 1296 #undef RTRADDR
 1297                                 }
 1298                                 j++;
 1299                                 pfr = pfr->pfr_next;
 1300                         }
 1301                         oprl->prefix[i].advrtrs = j;
 1302                         oprl->prefix[i].origin = PR_ORIG_RA;
 1303 
 1304                         i++;
 1305                         pr = pr->ndpr_next;
 1306                 }
 1307                 splx(s);
 1308 
 1309                 break;
 1310         case OSIOCGIFINFO_IN6:
 1311 #define ND      ndi->ndi
 1312                 /* XXX: old ndp(8) assumes a positive value for linkmtu. */
 1313                 bzero(&ND, sizeof(ND));
 1314                 ND.linkmtu = IN6_LINKMTU(ifp);
 1315                 ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
 1316                 ND.basereachable = ND_IFINFO(ifp)->basereachable;
 1317                 ND.reachable = ND_IFINFO(ifp)->reachable;
 1318                 ND.retrans = ND_IFINFO(ifp)->retrans;
 1319                 ND.flags = ND_IFINFO(ifp)->flags;
 1320                 ND.recalctm = ND_IFINFO(ifp)->recalctm;
 1321                 ND.chlim = ND_IFINFO(ifp)->chlim;
 1322                 break;
 1323         case SIOCGIFINFO_IN6:
 1324                 ND = *ND_IFINFO(ifp);
 1325                 break;
 1326         case SIOCSIFINFO_IN6:
 1327                 /*
 1328                  * used to change host variables from userland.
 1329                  * intented for a use on router to reflect RA configurations.
 1330                  */
 1331                 /* 0 means 'unspecified' */
 1332                 if (ND.linkmtu != 0) {
 1333                         if (ND.linkmtu < IPV6_MMTU ||
 1334                             ND.linkmtu > IN6_LINKMTU(ifp)) {
 1335                                 error = EINVAL;
 1336                                 break;
 1337                         }
 1338                         ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
 1339                 }
 1340 
 1341                 if (ND.basereachable != 0) {
 1342                         int obasereachable = ND_IFINFO(ifp)->basereachable;
 1343 
 1344                         ND_IFINFO(ifp)->basereachable = ND.basereachable;
 1345                         if (ND.basereachable != obasereachable)
 1346                                 ND_IFINFO(ifp)->reachable =
 1347                                     ND_COMPUTE_RTIME(ND.basereachable);
 1348                 }
 1349                 if (ND.retrans != 0)
 1350                         ND_IFINFO(ifp)->retrans = ND.retrans;
 1351                 if (ND.chlim != 0)
 1352                         ND_IFINFO(ifp)->chlim = ND.chlim;
 1353                 /* FALLTHROUGH */
 1354         case SIOCSIFINFO_FLAGS:
 1355                 ND_IFINFO(ifp)->flags = ND.flags;
 1356                 break;
 1357 #undef ND
 1358         case SIOCSNDFLUSH_IN6:  /* XXX: the ioctl name is confusing... */
 1359                 /* sync kernel routing table with the default router list */
 1360                 defrouter_reset();
 1361                 defrouter_select();
 1362                 break;
 1363         case SIOCSPFXFLUSH_IN6:
 1364         {
 1365                 /* flush all the prefix advertised by routers */
 1366                 struct nd_prefix *pr, *next;
 1367 
 1368                 s = splnet();
 1369                 for (pr = V_nd_prefix.lh_first; pr; pr = next) {
 1370                         struct in6_ifaddr *ia, *ia_next;
 1371 
 1372                         next = pr->ndpr_next;
 1373 
 1374                         if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 1375                                 continue; /* XXX */
 1376 
 1377                         /* do we really have to remove addresses as well? */
 1378                         /* XXXRW: in6_ifaddrhead locking. */
 1379                         TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
 1380                             ia_next) {
 1381                                 if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 1382                                         continue;
 1383 
 1384                                 if (ia->ia6_ndpr == pr)
 1385                                         in6_purgeaddr(&ia->ia_ifa);
 1386                         }
 1387                         prelist_remove(pr);
 1388                 }
 1389                 splx(s);
 1390                 break;
 1391         }
 1392         case SIOCSRTRFLUSH_IN6:
 1393         {
 1394                 /* flush all the default routers */
 1395                 struct nd_defrouter *dr, *next;
 1396 
 1397                 s = splnet();
 1398                 defrouter_reset();
 1399                 for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = next) {
 1400                         next = TAILQ_NEXT(dr, dr_entry);
 1401                         defrtrlist_del(dr);
 1402                 }
 1403                 defrouter_select();
 1404                 splx(s);
 1405                 break;
 1406         }
 1407         case SIOCGNBRINFO_IN6:
 1408         {
 1409                 struct llentry *ln;
 1410                 struct in6_addr nb_addr = nbi->addr; /* make local for safety */
 1411 
 1412                 if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
 1413                         return (error);
 1414 
 1415                 IF_AFDATA_LOCK(ifp);
 1416                 ln = nd6_lookup(&nb_addr, 0, ifp);
 1417                 IF_AFDATA_UNLOCK(ifp);
 1418 
 1419                 if (ln == NULL) {
 1420                         error = EINVAL;
 1421                         break;
 1422                 }
 1423                 nbi->state = ln->ln_state;
 1424                 nbi->asked = ln->la_asked;
 1425                 nbi->isrouter = ln->ln_router;
 1426                 nbi->expire = ln->la_expire;
 1427                 LLE_RUNLOCK(ln);
 1428                 break;
 1429         }
 1430         case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */
 1431                 ndif->ifindex = V_nd6_defifindex;
 1432                 break;
 1433         case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */
 1434                 return (nd6_setdefaultiface(ndif->ifindex));
 1435         }
 1436         return (error);
 1437 }
 1438 
 1439 /*
 1440  * Create neighbor cache entry and cache link-layer address,
 1441  * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
 1442  *
 1443  * type - ICMP6 type
 1444  * code - type dependent information
 1445  *
 1446  * XXXXX
 1447  *  The caller of this function already acquired the ndp 
 1448  *  cache table lock because the cache entry is returned.
 1449  */
 1450 struct llentry *
 1451 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
 1452     int lladdrlen, int type, int code)
 1453 {
 1454         struct llentry *ln = NULL;
 1455         int is_newentry;
 1456         int do_update;
 1457         int olladdr;
 1458         int llchange;
 1459         int flags;
 1460         int newstate = 0;
 1461         uint16_t router = 0;
 1462         struct sockaddr_in6 sin6;
 1463         struct mbuf *chain = NULL;
 1464         int static_route = 0;
 1465 
 1466         IF_AFDATA_UNLOCK_ASSERT(ifp);
 1467 
 1468         if (ifp == NULL)
 1469                 panic("ifp == NULL in nd6_cache_lladdr");
 1470         if (from == NULL)
 1471                 panic("from == NULL in nd6_cache_lladdr");
 1472 
 1473         /* nothing must be updated for unspecified address */
 1474         if (IN6_IS_ADDR_UNSPECIFIED(from))
 1475                 return NULL;
 1476 
 1477         /*
 1478          * Validation about ifp->if_addrlen and lladdrlen must be done in
 1479          * the caller.
 1480          *
 1481          * XXX If the link does not have link-layer adderss, what should
 1482          * we do? (ifp->if_addrlen == 0)
 1483          * Spec says nothing in sections for RA, RS and NA.  There's small
 1484          * description on it in NS section (RFC 2461 7.2.3).
 1485          */
 1486         flags = lladdr ? ND6_EXCLUSIVE : 0;
 1487         IF_AFDATA_LOCK(ifp);
 1488         ln = nd6_lookup(from, flags, ifp);
 1489 
 1490         if (ln == NULL) {
 1491                 flags |= ND6_EXCLUSIVE;
 1492                 ln = nd6_lookup(from, flags | ND6_CREATE, ifp);
 1493                 IF_AFDATA_UNLOCK(ifp);
 1494                 is_newentry = 1;
 1495         } else {
 1496                 IF_AFDATA_UNLOCK(ifp);          
 1497                 /* do nothing if static ndp is set */
 1498                 if (ln->la_flags & LLE_STATIC) {
 1499                         static_route = 1;
 1500                         goto done;
 1501                 }
 1502                 is_newentry = 0;
 1503         }
 1504         if (ln == NULL)
 1505                 return (NULL);
 1506 
 1507         olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
 1508         if (olladdr && lladdr) {
 1509                 llchange = bcmp(lladdr, &ln->ll_addr,
 1510                     ifp->if_addrlen);
 1511         } else
 1512                 llchange = 0;
 1513 
 1514         /*
 1515          * newentry olladdr  lladdr  llchange   (*=record)
 1516          *      0       n       n       --      (1)
 1517          *      0       y       n       --      (2)
 1518          *      0       n       y       --      (3) * STALE
 1519          *      0       y       y       n       (4) *
 1520          *      0       y       y       y       (5) * STALE
 1521          *      1       --      n       --      (6)   NOSTATE(= PASSIVE)
 1522          *      1       --      y       --      (7) * STALE
 1523          */
 1524 
 1525         if (lladdr) {           /* (3-5) and (7) */
 1526                 /*
 1527                  * Record source link-layer address
 1528                  * XXX is it dependent to ifp->if_type?
 1529                  */
 1530                 bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
 1531                 ln->la_flags |= LLE_VALID;
 1532         }
 1533 
 1534         if (!is_newentry) {
 1535                 if ((!olladdr && lladdr != NULL) ||     /* (3) */
 1536                     (olladdr && lladdr != NULL && llchange)) {  /* (5) */
 1537                         do_update = 1;
 1538                         newstate = ND6_LLINFO_STALE;
 1539                 } else                                  /* (1-2,4) */
 1540                         do_update = 0;
 1541         } else {
 1542                 do_update = 1;
 1543                 if (lladdr == NULL)                     /* (6) */
 1544                         newstate = ND6_LLINFO_NOSTATE;
 1545                 else                                    /* (7) */
 1546                         newstate = ND6_LLINFO_STALE;
 1547         }
 1548 
 1549         if (do_update) {
 1550                 /*
 1551                  * Update the state of the neighbor cache.
 1552                  */
 1553                 ln->ln_state = newstate;
 1554 
 1555                 if (ln->ln_state == ND6_LLINFO_STALE) {
 1556                         /*
 1557                          * XXX: since nd6_output() below will cause
 1558                          * state tansition to DELAY and reset the timer,
 1559                          * we must set the timer now, although it is actually
 1560                          * meaningless.
 1561                          */
 1562                         nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
 1563 
 1564                         if (ln->la_hold) {
 1565                                 struct mbuf *m_hold, *m_hold_next;
 1566 
 1567                                 /*
 1568                                  * reset the la_hold in advance, to explicitly
 1569                                  * prevent a la_hold lookup in nd6_output()
 1570                                  * (wouldn't happen, though...)
 1571                                  */
 1572                                 for (m_hold = ln->la_hold, ln->la_hold = NULL;
 1573                                     m_hold; m_hold = m_hold_next) {
 1574                                         m_hold_next = m_hold->m_nextpkt;
 1575                                         m_hold->m_nextpkt = NULL;
 1576 
 1577                                         /*
 1578                                          * we assume ifp is not a p2p here, so
 1579                                          * just set the 2nd argument as the
 1580                                          * 1st one.
 1581                                          */
 1582                                         nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
 1583                                 }
 1584                                 /*
 1585                                  * If we have mbufs in the chain we need to do
 1586                                  * deferred transmit. Copy the address from the
 1587                                  * llentry before dropping the lock down below.
 1588                                  */
 1589                                 if (chain != NULL)
 1590                                         memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
 1591                         }
 1592                 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
 1593                         /* probe right away */
 1594                         nd6_llinfo_settimer_locked((void *)ln, 0);
 1595                 }
 1596         }
 1597 
 1598         /*
 1599          * ICMP6 type dependent behavior.
 1600          *
 1601          * NS: clear IsRouter if new entry
 1602          * RS: clear IsRouter
 1603          * RA: set IsRouter if there's lladdr
 1604          * redir: clear IsRouter if new entry
 1605          *
 1606          * RA case, (1):
 1607          * The spec says that we must set IsRouter in the following cases:
 1608          * - If lladdr exist, set IsRouter.  This means (1-5).
 1609          * - If it is old entry (!newentry), set IsRouter.  This means (7).
 1610          * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
 1611          * A quetion arises for (1) case.  (1) case has no lladdr in the
 1612          * neighbor cache, this is similar to (6).
 1613          * This case is rare but we figured that we MUST NOT set IsRouter.
 1614          *
 1615          * newentry olladdr  lladdr  llchange       NS  RS  RA  redir
 1616          *                                                      D R
 1617          *      0       n       n       --      (1)     c   ?     s
 1618          *      0       y       n       --      (2)     c   s     s
 1619          *      0       n       y       --      (3)     c   s     s
 1620          *      0       y       y       n       (4)     c   s     s
 1621          *      0       y       y       y       (5)     c   s     s
 1622          *      1       --      n       --      (6) c   c       c s
 1623          *      1       --      y       --      (7) c   c   s   c s
 1624          *
 1625          *                                      (c=clear s=set)
 1626          */
 1627         switch (type & 0xff) {
 1628         case ND_NEIGHBOR_SOLICIT:
 1629                 /*
 1630                  * New entry must have is_router flag cleared.
 1631                  */
 1632                 if (is_newentry)        /* (6-7) */
 1633                         ln->ln_router = 0;
 1634                 break;
 1635         case ND_REDIRECT:
 1636                 /*
 1637                  * If the icmp is a redirect to a better router, always set the
 1638                  * is_router flag.  Otherwise, if the entry is newly created,
 1639                  * clear the flag.  [RFC 2461, sec 8.3]
 1640                  */
 1641                 if (code == ND_REDIRECT_ROUTER)
 1642                         ln->ln_router = 1;
 1643                 else if (is_newentry) /* (6-7) */
 1644                         ln->ln_router = 0;
 1645                 break;
 1646         case ND_ROUTER_SOLICIT:
 1647                 /*
 1648                  * is_router flag must always be cleared.
 1649                  */
 1650                 ln->ln_router = 0;
 1651                 break;
 1652         case ND_ROUTER_ADVERT:
 1653                 /*
 1654                  * Mark an entry with lladdr as a router.
 1655                  */
 1656                 if ((!is_newentry && (olladdr || lladdr)) ||    /* (2-5) */
 1657                     (is_newentry && lladdr)) {                  /* (7) */
 1658                         ln->ln_router = 1;
 1659                 }
 1660                 break;
 1661         }
 1662 
 1663         if (ln != NULL) {
 1664                 static_route = (ln->la_flags & LLE_STATIC);
 1665                 router = ln->ln_router;
 1666 
 1667                 if (flags & ND6_EXCLUSIVE)
 1668                         LLE_WUNLOCK(ln);
 1669                 else
 1670                         LLE_RUNLOCK(ln);
 1671                 if (static_route)
 1672                         ln = NULL;
 1673         }
 1674         if (chain)
 1675                 nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
 1676         
 1677         /*
 1678          * When the link-layer address of a router changes, select the
 1679          * best router again.  In particular, when the neighbor entry is newly
 1680          * created, it might affect the selection policy.
 1681          * Question: can we restrict the first condition to the "is_newentry"
 1682          * case?
 1683          * XXX: when we hear an RA from a new router with the link-layer
 1684          * address option, defrouter_select() is called twice, since
 1685          * defrtrlist_update called the function as well.  However, I believe
 1686          * we can compromise the overhead, since it only happens the first
 1687          * time.
 1688          * XXX: although defrouter_select() should not have a bad effect
 1689          * for those are not autoconfigured hosts, we explicitly avoid such
 1690          * cases for safety.
 1691          */
 1692         if (do_update && router && !V_ip6_forwarding && V_ip6_accept_rtadv) {
 1693                 /*
 1694                  * guaranteed recursion
 1695                  */
 1696                 defrouter_select();
 1697         }
 1698         
 1699         return (ln);
 1700 done:   
 1701         if (ln != NULL) {
 1702                 if (flags & ND6_EXCLUSIVE)
 1703                         LLE_WUNLOCK(ln);
 1704                 else
 1705                         LLE_RUNLOCK(ln);
 1706                 if (static_route)
 1707                         ln = NULL;
 1708         }
 1709         return (ln);
 1710 }
 1711 
 1712 static void
 1713 nd6_slowtimo(void *arg)
 1714 {
 1715         CURVNET_SET((struct vnet *) arg);
 1716         struct nd_ifinfo *nd6if;
 1717         struct ifnet *ifp;
 1718 
 1719         callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 1720             nd6_slowtimo, curvnet);
 1721         IFNET_RLOCK_NOSLEEP();
 1722         for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
 1723             ifp = TAILQ_NEXT(ifp, if_list)) {
 1724                 nd6if = ND_IFINFO(ifp);
 1725                 if (nd6if->basereachable && /* already initialized */
 1726                     (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
 1727                         /*
 1728                          * Since reachable time rarely changes by router
 1729                          * advertisements, we SHOULD insure that a new random
 1730                          * value gets recomputed at least once every few hours.
 1731                          * (RFC 2461, 6.3.4)
 1732                          */
 1733                         nd6if->recalctm = V_nd6_recalc_reachtm_interval;
 1734                         nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
 1735                 }
 1736         }
 1737         IFNET_RUNLOCK_NOSLEEP();
 1738         CURVNET_RESTORE();
 1739 }
 1740 
 1741 int
 1742 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
 1743     struct sockaddr_in6 *dst, struct rtentry *rt0)
 1744 {
 1745 
 1746         return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL));
 1747 }
 1748 
 1749 
 1750 /*
 1751  * Note that I'm not enforcing any global serialization
 1752  * lle state or asked changes here as the logic is too
 1753  * complicated to avoid having to always acquire an exclusive
 1754  * lock
 1755  * KMM
 1756  *
 1757  */
 1758 #define senderr(e) { error = (e); goto bad;}
 1759 
 1760 int
 1761 nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
 1762     struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle,
 1763         struct mbuf **chain)
 1764 {
 1765         struct mbuf *m = m0;
 1766         struct llentry *ln = lle;
 1767         int error = 0;
 1768         int flags = 0;
 1769 
 1770 #ifdef INVARIANTS
 1771         if (lle != NULL) {
 1772                 
 1773                 LLE_WLOCK_ASSERT(lle);
 1774 
 1775                 KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed"));
 1776         }
 1777 #endif
 1778         if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
 1779                 goto sendpkt;
 1780 
 1781         if (nd6_need_cache(ifp) == 0)
 1782                 goto sendpkt;
 1783 
 1784         /*
 1785          * next hop determination.  This routine is derived from ether_output.
 1786          */
 1787 
 1788         /*
 1789          * Address resolution or Neighbor Unreachability Detection
 1790          * for the next hop.
 1791          * At this point, the destination of the packet must be a unicast
 1792          * or an anycast address(i.e. not a multicast).
 1793          */
 1794 
 1795         flags = ((m != NULL) || (lle != NULL)) ? LLE_EXCLUSIVE : 0;
 1796         if (ln == NULL) {
 1797         retry:
 1798                 IF_AFDATA_LOCK(ifp);
 1799                 ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst);
 1800                 IF_AFDATA_UNLOCK(ifp);
 1801                 if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
 1802                         /*
 1803                          * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
 1804                          * the condition below is not very efficient.  But we believe
 1805                          * it is tolerable, because this should be a rare case.
 1806                          */
 1807                         flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0);
 1808                         IF_AFDATA_LOCK(ifp);
 1809                         ln = nd6_lookup(&dst->sin6_addr, flags, ifp);
 1810                         IF_AFDATA_UNLOCK(ifp);
 1811                 }
 1812         } 
 1813         if (ln == NULL) {
 1814                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
 1815                     !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
 1816                         char ip6buf[INET6_ADDRSTRLEN];
 1817                         log(LOG_DEBUG,
 1818                             "nd6_output: can't allocate llinfo for %s "
 1819                             "(ln=%p)\n",
 1820                             ip6_sprintf(ip6buf, &dst->sin6_addr), ln);
 1821                         senderr(EIO);   /* XXX: good error? */
 1822                 }
 1823                 goto sendpkt;   /* send anyway */
 1824         }
 1825 
 1826         /* We don't have to do link-layer address resolution on a p2p link. */
 1827         if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
 1828             ln->ln_state < ND6_LLINFO_REACHABLE) {
 1829                 if ((flags & LLE_EXCLUSIVE) == 0) {
 1830                         flags |= LLE_EXCLUSIVE;
 1831                         goto retry;
 1832                 }
 1833                 ln->ln_state = ND6_LLINFO_STALE;
 1834                 nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
 1835         }
 1836 
 1837         /*
 1838          * The first time we send a packet to a neighbor whose entry is
 1839          * STALE, we have to change the state to DELAY and a sets a timer to
 1840          * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
 1841          * neighbor unreachability detection on expiration.
 1842          * (RFC 2461 7.3.3)
 1843          */
 1844         if (ln->ln_state == ND6_LLINFO_STALE) {
 1845                 if ((flags & LLE_EXCLUSIVE) == 0) {
 1846                         flags |= LLE_EXCLUSIVE;
 1847                         LLE_RUNLOCK(ln);
 1848                         goto retry;
 1849                 }
 1850                 ln->la_asked = 0;
 1851                 ln->ln_state = ND6_LLINFO_DELAY;
 1852                 nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz);
 1853         }
 1854 
 1855         /*
 1856          * If the neighbor cache entry has a state other than INCOMPLETE
 1857          * (i.e. its link-layer address is already resolved), just
 1858          * send the packet.
 1859          */
 1860         if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
 1861                 goto sendpkt;
 1862 
 1863         /*
 1864          * There is a neighbor cache entry, but no ethernet address
 1865          * response yet.  Append this latest packet to the end of the
 1866          * packet queue in the mbuf, unless the number of the packet
 1867          * does not exceed nd6_maxqueuelen.  When it exceeds nd6_maxqueuelen,
 1868          * the oldest packet in the queue will be removed.
 1869          */
 1870         if (ln->ln_state == ND6_LLINFO_NOSTATE)
 1871                 ln->ln_state = ND6_LLINFO_INCOMPLETE;
 1872 
 1873         if ((flags & LLE_EXCLUSIVE) == 0) {
 1874                 flags |= LLE_EXCLUSIVE;
 1875                 LLE_RUNLOCK(ln);
 1876                 goto retry;
 1877         }
 1878 
 1879         LLE_WLOCK_ASSERT(ln);
 1880 
 1881         if (ln->la_hold) {
 1882                 struct mbuf *m_hold;
 1883                 int i;
 1884                 
 1885                 i = 0;
 1886                 for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) {
 1887                         i++;
 1888                         if (m_hold->m_nextpkt == NULL) {
 1889                                 m_hold->m_nextpkt = m;
 1890                                 break;
 1891                         }
 1892                 }
 1893                 while (i >= V_nd6_maxqueuelen) {
 1894                         m_hold = ln->la_hold;
 1895                         ln->la_hold = ln->la_hold->m_nextpkt;
 1896                         m_freem(m_hold);
 1897                         i--;
 1898                 }
 1899         } else {
 1900                 ln->la_hold = m;
 1901         }
 1902 
 1903         /*
 1904          * If there has been no NS for the neighbor after entering the
 1905          * INCOMPLETE state, send the first solicitation.
 1906          */
 1907         if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
 1908                 ln->la_asked++;
 1909                 
 1910                 nd6_llinfo_settimer_locked(ln,
 1911                     (long)ND_IFINFO(ifp)->retrans * hz / 1000);
 1912                 LLE_WUNLOCK(ln);
 1913                 nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
 1914                 if (lle != NULL && ln == lle)
 1915                         LLE_WLOCK(lle);
 1916 
 1917         } else if (lle == NULL || ln != lle) {
 1918                 /*
 1919                  * We did the lookup (no lle arg) so we
 1920                  * need to do the unlock here.
 1921                  */
 1922                 LLE_WUNLOCK(ln);
 1923         }
 1924 
 1925         return (0);
 1926 
 1927   sendpkt:
 1928         /* discard the packet if IPv6 operation is disabled on the interface */
 1929         if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
 1930                 error = ENETDOWN; /* better error? */
 1931                 goto bad;
 1932         }
 1933         /*
 1934          * ln is valid and the caller did not pass in 
 1935          * an llentry
 1936          */
 1937         if ((ln != NULL) && (lle == NULL)) {
 1938                 if (flags & LLE_EXCLUSIVE)
 1939                         LLE_WUNLOCK(ln);
 1940                 else
 1941                         LLE_RUNLOCK(ln);
 1942         }
 1943 
 1944 #ifdef MAC
 1945         mac_netinet6_nd6_send(ifp, m);
 1946 #endif
 1947         /*
 1948          * We were passed in a pointer to an lle with the lock held 
 1949          * this means that we can't call if_output as we will
 1950          * recurse on the lle lock - so what we do is we create
 1951          * a list of mbufs to send and transmit them in the caller
 1952          * after the lock is dropped
 1953          */
 1954         if (lle != NULL) {
 1955                 if (*chain == NULL)
 1956                         *chain = m;
 1957                 else {
 1958                         struct mbuf *mb;
 1959 
 1960                         /*
 1961                          * append mbuf to end of deferred chain
 1962                          */
 1963                         mb = *chain;
 1964                         while (mb->m_nextpkt != NULL)
 1965                                 mb = mb->m_nextpkt;
 1966                         mb->m_nextpkt = m;
 1967                 }
 1968                 return (error);
 1969         }
 1970         if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 1971                 return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
 1972                     NULL));
 1973         }
 1974         error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL);
 1975         return (error);
 1976 
 1977   bad:
 1978         /*
 1979          * ln is valid and the caller did not pass in 
 1980          * an llentry
 1981          */
 1982         if ((ln != NULL) && (lle == NULL)) {
 1983                 if (flags & LLE_EXCLUSIVE)
 1984                         LLE_WUNLOCK(ln);
 1985                 else
 1986                         LLE_RUNLOCK(ln);
 1987         }
 1988         if (m)
 1989                 m_freem(m);
 1990         return (error);
 1991 }
 1992 #undef senderr
 1993 
 1994 
 1995 int
 1996 nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
 1997     struct sockaddr_in6 *dst, struct route *ro)
 1998 {
 1999         struct mbuf *m, *m_head;
 2000         struct ifnet *outifp;
 2001         int error = 0;
 2002 
 2003         m_head = chain;
 2004         if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 2005                 outifp = origifp;
 2006         else
 2007                 outifp = ifp;
 2008         
 2009         while (m_head) {
 2010                 m = m_head;
 2011                 m_head = m_head->m_nextpkt;
 2012                 error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro);                         
 2013         }
 2014 
 2015         /*
 2016          * XXX
 2017          * note that intermediate errors are blindly ignored - but this is 
 2018          * the same convention as used with nd6_output when called by
 2019          * nd6_cache_lladdr
 2020          */
 2021         return (error);
 2022 }       
 2023 
 2024 
 2025 int
 2026 nd6_need_cache(struct ifnet *ifp)
 2027 {
 2028         /*
 2029          * XXX: we currently do not make neighbor cache on any interface
 2030          * other than ARCnet, Ethernet, FDDI and GIF.
 2031          *
 2032          * RFC2893 says:
 2033          * - unidirectional tunnels needs no ND
 2034          */
 2035         switch (ifp->if_type) {
 2036         case IFT_ARCNET:
 2037         case IFT_ETHER:
 2038         case IFT_FDDI:
 2039         case IFT_IEEE1394:
 2040 #ifdef IFT_L2VLAN
 2041         case IFT_L2VLAN:
 2042 #endif
 2043 #ifdef IFT_IEEE80211
 2044         case IFT_IEEE80211:
 2045 #endif
 2046 #ifdef IFT_CARP
 2047         case IFT_CARP:
 2048 #endif
 2049         case IFT_GIF:           /* XXX need more cases? */
 2050         case IFT_PPP:
 2051         case IFT_TUNNEL:
 2052         case IFT_BRIDGE:
 2053         case IFT_PROPVIRTUAL:
 2054                 return (1);
 2055         default:
 2056                 return (0);
 2057         }
 2058 }
 2059 
 2060 /*
 2061  * the callers of this function need to be re-worked to drop
 2062  * the lle lock, drop here for now
 2063  */
 2064 int
 2065 nd6_storelladdr(struct ifnet *ifp, struct mbuf *m,
 2066     struct sockaddr *dst, u_char *desten, struct llentry **lle)
 2067 {
 2068         struct llentry *ln;
 2069 
 2070         *lle = NULL;
 2071         IF_AFDATA_UNLOCK_ASSERT(ifp);
 2072         if (m->m_flags & M_MCAST) {
 2073                 int i;
 2074 
 2075                 switch (ifp->if_type) {
 2076                 case IFT_ETHER:
 2077                 case IFT_FDDI:
 2078 #ifdef IFT_L2VLAN
 2079                 case IFT_L2VLAN:
 2080 #endif
 2081 #ifdef IFT_IEEE80211
 2082                 case IFT_IEEE80211:
 2083 #endif
 2084                 case IFT_BRIDGE:
 2085                 case IFT_ISO88025:
 2086                         ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
 2087                                                  desten);
 2088                         return (0);
 2089                 case IFT_IEEE1394:
 2090                         /*
 2091                          * netbsd can use if_broadcastaddr, but we don't do so
 2092                          * to reduce # of ifdef.
 2093                          */
 2094                         for (i = 0; i < ifp->if_addrlen; i++)
 2095                                 desten[i] = ~0;
 2096                         return (0);
 2097                 case IFT_ARCNET:
 2098                         *desten = 0;
 2099                         return (0);
 2100                 default:
 2101                         m_freem(m);
 2102                         return (EAFNOSUPPORT);
 2103                 }
 2104         }
 2105 
 2106 
 2107         /*
 2108          * the entry should have been created in nd6_store_lladdr
 2109          */
 2110         IF_AFDATA_LOCK(ifp);
 2111         ln = lla_lookup(LLTABLE6(ifp), 0, dst);
 2112         IF_AFDATA_UNLOCK(ifp);
 2113         if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) {
 2114                 if (ln != NULL)
 2115                         LLE_RUNLOCK(ln);
 2116                 /* this could happen, if we could not allocate memory */
 2117                 m_freem(m);
 2118                 return (1);
 2119         }
 2120 
 2121         bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
 2122         *lle = ln;
 2123         LLE_RUNLOCK(ln);
 2124         /*
 2125          * A *small* use after free race exists here
 2126          */
 2127         return (0);
 2128 }
 2129 
 2130 static void 
 2131 clear_llinfo_pqueue(struct llentry *ln)
 2132 {
 2133         struct mbuf *m_hold, *m_hold_next;
 2134 
 2135         for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
 2136                 m_hold_next = m_hold->m_nextpkt;
 2137                 m_hold->m_nextpkt = NULL;
 2138                 m_freem(m_hold);
 2139         }
 2140 
 2141         ln->la_hold = NULL;
 2142         return;
 2143 }
 2144 
 2145 static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
 2146 static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
 2147 #ifdef SYSCTL_DECL
 2148 SYSCTL_DECL(_net_inet6_icmp6);
 2149 #endif
 2150 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
 2151         CTLFLAG_RD, nd6_sysctl_drlist, "");
 2152 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
 2153         CTLFLAG_RD, nd6_sysctl_prlist, "");
 2154 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
 2155         CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
 2156 
 2157 static int
 2158 nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
 2159 {
 2160         int error;
 2161         char buf[1024] __aligned(4);
 2162         struct in6_defrouter *d, *de;
 2163         struct nd_defrouter *dr;
 2164 
 2165         if (req->newptr)
 2166                 return EPERM;
 2167         error = 0;
 2168 
 2169         for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
 2170              dr = TAILQ_NEXT(dr, dr_entry)) {
 2171                 d = (struct in6_defrouter *)buf;
 2172                 de = (struct in6_defrouter *)(buf + sizeof(buf));
 2173 
 2174                 if (d + 1 <= de) {
 2175                         bzero(d, sizeof(*d));
 2176                         d->rtaddr.sin6_family = AF_INET6;
 2177                         d->rtaddr.sin6_len = sizeof(d->rtaddr);
 2178                         d->rtaddr.sin6_addr = dr->rtaddr;
 2179                         error = sa6_recoverscope(&d->rtaddr);
 2180                         if (error != 0)
 2181                                 return (error);
 2182                         d->flags = dr->flags;
 2183                         d->rtlifetime = dr->rtlifetime;
 2184                         d->expire = dr->expire;
 2185                         d->if_index = dr->ifp->if_index;
 2186                 } else
 2187                         panic("buffer too short");
 2188 
 2189                 error = SYSCTL_OUT(req, buf, sizeof(*d));
 2190                 if (error)
 2191                         break;
 2192         }
 2193 
 2194         return (error);
 2195 }
 2196 
 2197 static int
 2198 nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 2199 {
 2200         int error;
 2201         char buf[1024] __aligned(4);
 2202         struct in6_prefix *p, *pe;
 2203         struct nd_prefix *pr;
 2204         char ip6buf[INET6_ADDRSTRLEN];
 2205 
 2206         if (req->newptr)
 2207                 return EPERM;
 2208         error = 0;
 2209 
 2210         for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 2211                 u_short advrtrs;
 2212                 size_t advance;
 2213                 struct sockaddr_in6 *sin6, *s6;
 2214                 struct nd_pfxrouter *pfr;
 2215 
 2216                 p = (struct in6_prefix *)buf;
 2217                 pe = (struct in6_prefix *)(buf + sizeof(buf));
 2218 
 2219                 if (p + 1 <= pe) {
 2220                         bzero(p, sizeof(*p));
 2221                         sin6 = (struct sockaddr_in6 *)(p + 1);
 2222 
 2223                         p->prefix = pr->ndpr_prefix;
 2224                         if (sa6_recoverscope(&p->prefix)) {
 2225                                 log(LOG_ERR,
 2226                                     "scope error in prefix list (%s)\n",
 2227                                     ip6_sprintf(ip6buf, &p->prefix.sin6_addr));
 2228                                 /* XXX: press on... */
 2229                         }
 2230                         p->raflags = pr->ndpr_raf;
 2231                         p->prefixlen = pr->ndpr_plen;
 2232                         p->vltime = pr->ndpr_vltime;
 2233                         p->pltime = pr->ndpr_pltime;
 2234                         p->if_index = pr->ndpr_ifp->if_index;
 2235                         if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 2236                                 p->expire = 0;
 2237                         else {
 2238                                 time_t maxexpire;
 2239 
 2240                                 /* XXX: we assume time_t is signed. */
 2241                                 maxexpire = (-1) &
 2242                                     ~((time_t)1 <<
 2243                                     ((sizeof(maxexpire) * 8) - 1));
 2244                                 if (pr->ndpr_vltime <
 2245                                     maxexpire - pr->ndpr_lastupdate) {
 2246                                     p->expire = pr->ndpr_lastupdate +
 2247                                         pr->ndpr_vltime;
 2248                                 } else
 2249                                         p->expire = maxexpire;
 2250                         }
 2251                         p->refcnt = pr->ndpr_refcnt;
 2252                         p->flags = pr->ndpr_stateflags;
 2253                         p->origin = PR_ORIG_RA;
 2254                         advrtrs = 0;
 2255                         for (pfr = pr->ndpr_advrtrs.lh_first; pfr;
 2256                              pfr = pfr->pfr_next) {
 2257                                 if ((void *)&sin6[advrtrs + 1] > (void *)pe) {
 2258                                         advrtrs++;
 2259                                         continue;
 2260                                 }
 2261                                 s6 = &sin6[advrtrs];
 2262                                 bzero(s6, sizeof(*s6));
 2263                                 s6->sin6_family = AF_INET6;
 2264                                 s6->sin6_len = sizeof(*sin6);
 2265                                 s6->sin6_addr = pfr->router->rtaddr;
 2266                                 if (sa6_recoverscope(s6)) {
 2267                                         log(LOG_ERR,
 2268                                             "scope error in "
 2269                                             "prefix list (%s)\n",
 2270                                             ip6_sprintf(ip6buf,
 2271                                                     &pfr->router->rtaddr));
 2272                                 }
 2273                                 advrtrs++;
 2274                         }
 2275                         p->advrtrs = advrtrs;
 2276                 } else
 2277                         panic("buffer too short");
 2278 
 2279                 advance = sizeof(*p) + sizeof(*sin6) * advrtrs;
 2280                 error = SYSCTL_OUT(req, buf, advance);
 2281                 if (error)
 2282                         break;
 2283         }
 2284 
 2285         return (error);
 2286 }

Cache object: a3413e95e182e154334f547e6a4167be


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.