[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet6/in6_src.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*-
  2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  3  * All rights reserved.
  4  *
  5  * Redistribution and use in source and binary forms, with or without
  6  * modification, are permitted provided that the following conditions
  7  * are met:
  8  * 1. Redistributions of source code must retain the above copyright
  9  *    notice, this list of conditions and the following disclaimer.
 10  * 2. Redistributions in binary form must reproduce the above copyright
 11  *    notice, this list of conditions and the following disclaimer in the
 12  *    documentation and/or other materials provided with the distribution.
 13  * 3. Neither the name of the project nor the names of its contributors
 14  *    may be used to endorse or promote products derived from this software
 15  *    without specific prior written permission.
 16  *
 17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 27  * SUCH DAMAGE.
 28  *
 29  *      $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $
 30  */
 31 
 32 /*-
 33  * Copyright (c) 1982, 1986, 1991, 1993
 34  *      The Regents of the University of California.  All rights reserved.
 35  *
 36  * Redistribution and use in source and binary forms, with or without
 37  * modification, are permitted provided that the following conditions
 38  * are met:
 39  * 1. Redistributions of source code must retain the above copyright
 40  *    notice, this list of conditions and the following disclaimer.
 41  * 2. Redistributions in binary form must reproduce the above copyright
 42  *    notice, this list of conditions and the following disclaimer in the
 43  *    documentation and/or other materials provided with the distribution.
 44  * 4. Neither the name of the University nor the names of its contributors
 45  *    may be used to endorse or promote products derived from this software
 46  *    without specific prior written permission.
 47  *
 48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 58  * SUCH DAMAGE.
 59  *
 60  *      @(#)in_pcb.c    8.2 (Berkeley) 1/4/94
 61  */
 62 
 63 #include <sys/cdefs.h>
 64 __FBSDID("$FreeBSD: src/sys/netinet6/in6_src.c,v 1.62 2008/12/02 21:37:28 bz Exp $");
 65 
 66 #include "opt_inet.h"
 67 #include "opt_inet6.h"
 68 #include "opt_mpath.h"
 69 
 70 #include <sys/param.h>
 71 #include <sys/systm.h>
 72 #include <sys/lock.h>
 73 #include <sys/malloc.h>
 74 #include <sys/mbuf.h>
 75 #include <sys/priv.h>
 76 #include <sys/protosw.h>
 77 #include <sys/socket.h>
 78 #include <sys/socketvar.h>
 79 #include <sys/sockio.h>
 80 #include <sys/sysctl.h>
 81 #include <sys/errno.h>
 82 #include <sys/time.h>
 83 #include <sys/jail.h>
 84 #include <sys/kernel.h>
 85 #include <sys/sx.h>
 86 #include <sys/vimage.h>
 87 
 88 #include <net/if.h>
 89 #include <net/route.h>
 90 #ifdef RADIX_MPATH
 91 #include <net/radix_mpath.h>
 92 #endif
 93 
 94 #include <netinet/in.h>
 95 #include <netinet/in_var.h>
 96 #include <netinet/in_systm.h>
 97 #include <netinet/ip.h>
 98 #include <netinet/in_pcb.h>
 99 #include <netinet/ip_var.h>
100 #include <netinet/udp.h>
101 #include <netinet/udp_var.h>
102 #include <netinet/vinet.h>
103 
104 #include <netinet6/in6_var.h>
105 #include <netinet/ip6.h>
106 #include <netinet6/in6_pcb.h>
107 #include <netinet6/ip6_var.h>
108 #include <netinet6/scope6_var.h>
109 #include <netinet6/nd6.h>
110 #include <netinet6/vinet6.h>
111 
112 static struct mtx addrsel_lock;
113 #define ADDRSEL_LOCK_INIT()     mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF)
114 #define ADDRSEL_LOCK()          mtx_lock(&addrsel_lock)
115 #define ADDRSEL_UNLOCK()        mtx_unlock(&addrsel_lock)
116 #define ADDRSEL_LOCK_ASSERT()   mtx_assert(&addrsel_lock, MA_OWNED)
117 
118 static struct sx addrsel_sxlock;
119 #define ADDRSEL_SXLOCK_INIT()   sx_init(&addrsel_sxlock, "addrsel_sxlock")
120 #define ADDRSEL_SLOCK()         sx_slock(&addrsel_sxlock)
121 #define ADDRSEL_SUNLOCK()       sx_sunlock(&addrsel_sxlock)
122 #define ADDRSEL_XLOCK()         sx_xlock(&addrsel_sxlock)
123 #define ADDRSEL_XUNLOCK()       sx_xunlock(&addrsel_sxlock)
124 
125 #define ADDR_LABEL_NOTAPP (-1)
126 
127 #ifdef VIMAGE_GLOBALS
128 struct in6_addrpolicy defaultaddrpolicy;
129 int ip6_prefer_tempaddr;
130 #endif
131 
132 static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *,
133         struct ip6_moptions *, struct route_in6 *, struct ifnet **,
134         struct rtentry **, int, int));
135 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *,
136         struct ip6_moptions *, struct route_in6 *ro, struct ifnet **));
137 
138 static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
139 
140 static void init_policy_queue(void);
141 static int add_addrsel_policyent(struct in6_addrpolicy *);
142 static int delete_addrsel_policyent(struct in6_addrpolicy *);
143 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *),
144                                     void *));
145 static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
146 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
147 
148 /*
149  * Return an IPv6 address, which is the most appropriate for a given
150  * destination and user specified options.
151  * If necessary, this function lookups the routing table and returns
152  * an entry to the caller for later use.
153  */
154 #define REPLACE(r) do {\
155         if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
156                 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
157                 V_ip6stat.ip6s_sources_rule[(r)]++; \
158         /* { \
159         char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
160         printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
161         } */ \
162         goto replace; \
163 } while(0)
164 #define NEXT(r) do {\
165         if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
166                 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
167                 V_ip6stat.ip6s_sources_rule[(r)]++; \
168         /* { \
169         char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
170         printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
171         } */ \
172         goto next;              /* XXX: we can't use 'continue' here */ \
173 } while(0)
174 #define BREAK(r) do { \
175         if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
176                 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
177                 V_ip6stat.ip6s_sources_rule[(r)]++; \
178         goto out;               /* XXX: we can't use 'break' here */ \
179 } while(0)
180 
181 struct in6_addr *
182 in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
183     struct inpcb *inp, struct route_in6 *ro, struct ucred *cred,
184     struct ifnet **ifpp, int *errorp)
185 {
186         INIT_VNET_INET6(curvnet);
187         struct in6_addr dst;
188         struct ifnet *ifp = NULL;
189         struct in6_ifaddr *ia = NULL, *ia_best = NULL;
190         struct in6_pktinfo *pi = NULL;
191         int dst_scope = -1, best_scope = -1, best_matchlen = -1;
192         struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
193         u_int32_t odstzone;
194         int prefer_tempaddr;
195         struct ip6_moptions *mopts;
196 
197         dst = dstsock->sin6_addr; /* make a copy for local operation */
198         *errorp = 0;
199         if (ifpp)
200                 *ifpp = NULL;
201 
202         if (inp != NULL) {
203                 INP_LOCK_ASSERT(inp);
204                 mopts = inp->in6p_moptions;
205         } else {
206                 mopts = NULL;
207         }
208 
209         /*
210          * If the source address is explicitly specified by the caller,
211          * check if the requested source address is indeed a unicast address
212          * assigned to the node, and can be used as the packet's source
213          * address.  If everything is okay, use the address as source.
214          */
215         if (opts && (pi = opts->ip6po_pktinfo) &&
216             !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
217                 struct sockaddr_in6 srcsock;
218                 struct in6_ifaddr *ia6;
219 
220                 /* get the outgoing interface */
221                 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp))
222                     != 0) {
223                         return (NULL);
224                 }
225 
226                 /*
227                  * determine the appropriate zone id of the source based on
228                  * the zone of the destination and the outgoing interface.
229                  * If the specified address is ambiguous wrt the scope zone,
230                  * the interface must be specified; otherwise, ifa_ifwithaddr()
231                  * will fail matching the address.
232                  */
233                 bzero(&srcsock, sizeof(srcsock));
234                 srcsock.sin6_family = AF_INET6;
235                 srcsock.sin6_len = sizeof(srcsock);
236                 srcsock.sin6_addr = pi->ipi6_addr;
237                 if (ifp) {
238                         *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
239                         if (*errorp != 0)
240                                 return (NULL);
241                 }
242                 if (cred != NULL && prison_local_ip6(cred, &srcsock.sin6_addr,
243                     (inp != NULL && (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) {
244                         *errorp = EADDRNOTAVAIL;
245                         return (NULL);
246                 }
247 
248                 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock));
249                 if (ia6 == NULL ||
250                     (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
251                         *errorp = EADDRNOTAVAIL;
252                         return (NULL);
253                 }
254                 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
255                 if (ifpp)
256                         *ifpp = ifp;
257                 return (&ia6->ia_addr.sin6_addr);
258         }
259 
260         /*
261          * Otherwise, if the socket has already bound the source, just use it.
262          */
263         if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
264                 if (cred != NULL && prison_local_ip6(cred, &inp->in6p_laddr,
265                     ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) {
266                         *errorp = EADDRNOTAVAIL;
267                         return (NULL);
268                 }
269                 return (&inp->in6p_laddr);
270         }
271 
272         /*
273          * If the address is not specified, choose the best one based on
274          * the outgoing interface and the destination address.
275          */
276         /* get the outgoing interface */
277         if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0)
278                 return (NULL);
279 
280 #ifdef DIAGNOSTIC
281         if (ifp == NULL)        /* this should not happen */
282                 panic("in6_selectsrc: NULL ifp");
283 #endif
284         *errorp = in6_setscope(&dst, ifp, &odstzone);
285         if (*errorp != 0)
286                 return (NULL);
287 
288         for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
289                 int new_scope = -1, new_matchlen = -1;
290                 struct in6_addrpolicy *new_policy = NULL;
291                 u_int32_t srczone, osrczone, dstzone;
292                 struct in6_addr src;
293                 struct ifnet *ifp1 = ia->ia_ifp;
294 
295                 /*
296                  * We'll never take an address that breaks the scope zone
297                  * of the destination.  We also skip an address if its zone
298                  * does not contain the outgoing interface.
299                  * XXX: we should probably use sin6_scope_id here.
300                  */
301                 if (in6_setscope(&dst, ifp1, &dstzone) ||
302                     odstzone != dstzone) {
303                         continue;
304                 }
305                 src = ia->ia_addr.sin6_addr;
306                 if (in6_setscope(&src, ifp, &osrczone) ||
307                     in6_setscope(&src, ifp1, &srczone) ||
308                     osrczone != srczone) {
309                         continue;
310                 }
311 
312                 /* avoid unusable addresses */
313                 if ((ia->ia6_flags &
314                      (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
315                                 continue;
316                 }
317                 if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
318                         continue;
319 
320                 if (cred != NULL &&
321                     prison_local_ip6(cred, &ia->ia_addr.sin6_addr,
322                         (inp != NULL &&
323                         (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0)
324                         continue;
325 
326                 /* Rule 1: Prefer same address */
327                 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) {
328                         ia_best = ia;
329                         BREAK(1); /* there should be no better candidate */
330                 }
331 
332                 if (ia_best == NULL)
333                         REPLACE(0);
334 
335                 /* Rule 2: Prefer appropriate scope */
336                 if (dst_scope < 0)
337                         dst_scope = in6_addrscope(&dst);
338                 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
339                 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
340                         if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
341                                 REPLACE(2);
342                         NEXT(2);
343                 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
344                         if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
345                                 NEXT(2);
346                         REPLACE(2);
347                 }
348 
349                 /*
350                  * Rule 3: Avoid deprecated addresses.  Note that the case of
351                  * !ip6_use_deprecated is already rejected above.
352                  */
353                 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
354                         NEXT(3);
355                 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
356                         REPLACE(3);
357 
358                 /* Rule 4: Prefer home addresses */
359                 /*
360                  * XXX: This is a TODO.  We should probably merge the MIP6
361                  * case above.
362                  */
363 
364                 /* Rule 5: Prefer outgoing interface */
365                 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
366                         NEXT(5);
367                 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
368                         REPLACE(5);
369 
370                 /*
371                  * Rule 6: Prefer matching label
372                  * Note that best_policy should be non-NULL here.
373                  */
374                 if (dst_policy == NULL)
375                         dst_policy = lookup_addrsel_policy(dstsock);
376                 if (dst_policy->label != ADDR_LABEL_NOTAPP) {
377                         new_policy = lookup_addrsel_policy(&ia->ia_addr);
378                         if (dst_policy->label == best_policy->label &&
379                             dst_policy->label != new_policy->label)
380                                 NEXT(6);
381                         if (dst_policy->label != best_policy->label &&
382                             dst_policy->label == new_policy->label)
383                                 REPLACE(6);
384                 }
385 
386                 /*
387                  * Rule 7: Prefer public addresses.
388                  * We allow users to reverse the logic by configuring
389                  * a sysctl variable, so that privacy conscious users can
390                  * always prefer temporary addresses.
391                  */
392                 if (opts == NULL ||
393                     opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
394                         prefer_tempaddr = V_ip6_prefer_tempaddr;
395                 } else if (opts->ip6po_prefer_tempaddr ==
396                     IP6PO_TEMPADDR_NOTPREFER) {
397                         prefer_tempaddr = 0;
398                 } else
399                         prefer_tempaddr = 1;
400                 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
401                     (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
402                         if (prefer_tempaddr)
403                                 REPLACE(7);
404                         else
405                                 NEXT(7);
406                 }
407                 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
408                     !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
409                         if (prefer_tempaddr)
410                                 NEXT(7);
411                         else
412                                 REPLACE(7);
413                 }
414 
415                 /*
416                  * Rule 8: prefer addresses on alive interfaces.
417                  * This is a KAME specific rule.
418                  */
419                 if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
420                     !(ia->ia_ifp->if_flags & IFF_UP))
421                         NEXT(8);
422                 if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
423                     (ia->ia_ifp->if_flags & IFF_UP))
424                         REPLACE(8);
425 
426                 /*
427                  * Rule 14: Use longest matching prefix.
428                  * Note: in the address selection draft, this rule is
429                  * documented as "Rule 8".  However, since it is also
430                  * documented that this rule can be overridden, we assign
431                  * a large number so that it is easy to assign smaller numbers
432                  * to more preferred rules.
433                  */
434                 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
435                 if (best_matchlen < new_matchlen)
436                         REPLACE(14);
437                 if (new_matchlen < best_matchlen)
438                         NEXT(14);
439 
440                 /* Rule 15 is reserved. */
441 
442                 /*
443                  * Last resort: just keep the current candidate.
444                  * Or, do we need more rules?
445                  */
446                 continue;
447 
448           replace:
449                 ia_best = ia;
450                 best_scope = (new_scope >= 0 ? new_scope :
451                               in6_addrscope(&ia_best->ia_addr.sin6_addr));
452                 best_policy = (new_policy ? new_policy :
453                                lookup_addrsel_policy(&ia_best->ia_addr));
454                 best_matchlen = (new_matchlen >= 0 ? new_matchlen :
455                                  in6_matchlen(&ia_best->ia_addr.sin6_addr,
456                                               &dst));
457 
458           next:
459                 continue;
460 
461           out:
462                 break;
463         }
464 
465         if ((ia = ia_best) == NULL) {
466                 *errorp = EADDRNOTAVAIL;
467                 return (NULL);
468         }
469 
470         if (ifpp)
471                 *ifpp = ifp;
472 
473         return (&ia->ia_addr.sin6_addr);
474 }
475 
476 /*
477  * clone - meaningful only for bsdi and freebsd
478  */
479 static int
480 selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
481     struct ip6_moptions *mopts, struct route_in6 *ro,
482     struct ifnet **retifp, struct rtentry **retrt, int clone,
483     int norouteok)
484 {
485         INIT_VNET_INET6(curvnet);
486         int error = 0;
487         struct ifnet *ifp = NULL;
488         struct rtentry *rt = NULL;
489         struct sockaddr_in6 *sin6_next;
490         struct in6_pktinfo *pi = NULL;
491         struct in6_addr *dst = &dstsock->sin6_addr;
492 #if 0
493         char ip6buf[INET6_ADDRSTRLEN];
494 
495         if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
496             dstsock->sin6_addr.s6_addr32[1] == 0 &&
497             !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
498                 printf("in6_selectroute: strange destination %s\n",
499                        ip6_sprintf(ip6buf, &dstsock->sin6_addr));
500         } else {
501                 printf("in6_selectroute: destination = %s%%%d\n",
502                        ip6_sprintf(ip6buf, &dstsock->sin6_addr),
503                        dstsock->sin6_scope_id); /* for debug */
504         }
505 #endif
506 
507         /* If the caller specify the outgoing interface explicitly, use it. */
508         if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
509                 /* XXX boundary check is assumed to be already done. */
510                 ifp = ifnet_byindex(pi->ipi6_ifindex);
511                 if (ifp != NULL &&
512                     (norouteok || retrt == NULL ||
513                     IN6_IS_ADDR_MULTICAST(dst))) {
514                         /*
515                          * we do not have to check or get the route for
516                          * multicast.
517                          */
518                         goto done;
519                 } else
520                         goto getroute;
521         }
522 
523         /*
524          * If the destination address is a multicast address and the outgoing
525          * interface for the address is specified by the caller, use it.
526          */
527         if (IN6_IS_ADDR_MULTICAST(dst) &&
528             mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) {
529                 goto done; /* we do not need a route for multicast. */
530         }
531 
532   getroute:
533         /*
534          * If the next hop address for the packet is specified by the caller,
535          * use it as the gateway.
536          */
537         if (opts && opts->ip6po_nexthop) {
538                 struct route_in6 *ron;
539 
540                 sin6_next = satosin6(opts->ip6po_nexthop);
541 
542                 /* at this moment, we only support AF_INET6 next hops */
543                 if (sin6_next->sin6_family != AF_INET6) {
544                         error = EAFNOSUPPORT; /* or should we proceed? */
545                         goto done;
546                 }
547 
548                 /*
549                  * If the next hop is an IPv6 address, then the node identified
550                  * by that address must be a neighbor of the sending host.
551                  */
552                 ron = &opts->ip6po_nextroute;
553                 if ((ron->ro_rt &&
554                      (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) !=
555                      (RTF_UP | RTF_LLINFO)) ||
556                     !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
557                     &sin6_next->sin6_addr)) {
558                         if (ron->ro_rt) {
559                                 RTFREE(ron->ro_rt);
560                                 ron->ro_rt = NULL;
561                         }
562                         *satosin6(&ron->ro_dst) = *sin6_next;
563                 }
564                 if (ron->ro_rt == NULL) {
565                         rtalloc((struct route *)ron); /* multi path case? */
566                         if (ron->ro_rt == NULL ||
567                             !(ron->ro_rt->rt_flags & RTF_LLINFO)) {
568                                 if (ron->ro_rt) {
569                                         RTFREE(ron->ro_rt);
570                                         ron->ro_rt = NULL;
571                                 }
572                                 error = EHOSTUNREACH;
573                                 goto done;
574                         }
575                 }
576                 rt = ron->ro_rt;
577                 ifp = rt->rt_ifp;
578 
579                 /*
580                  * When cloning is required, try to allocate a route to the
581                  * destination so that the caller can store path MTU
582                  * information.
583                  */
584                 if (!clone)
585                         goto done;
586         }
587 
588         /*
589          * Use a cached route if it exists and is valid, else try to allocate
590          * a new one.  Note that we should check the address family of the
591          * cached destination, in case of sharing the cache with IPv4.
592          */
593         if (ro) {
594                 if (ro->ro_rt &&
595                     (!(ro->ro_rt->rt_flags & RTF_UP) ||
596                      ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 ||
597                      !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr,
598                      dst))) {
599                         RTFREE(ro->ro_rt);
600                         ro->ro_rt = (struct rtentry *)NULL;
601                 }
602                 if (ro->ro_rt == (struct rtentry *)NULL) {
603                         struct sockaddr_in6 *sa6;
604 
605                         /* No route yet, so try to acquire one */
606                         bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
607                         sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
608                         *sa6 = *dstsock;
609                         sa6->sin6_scope_id = 0;
610 
611                         if (clone) {
612 #ifdef RADIX_MPATH
613                                 rtalloc_mpath((struct route *)ro,
614                                     ntohl(sa6->sin6_addr.s6_addr32[3]));
615 #else
616                                 rtalloc((struct route *)ro);
617 #endif
618                         } else {
619                                 ro->ro_rt = rtalloc1(&((struct route *)ro)
620                                                      ->ro_dst, 0, 0UL);
621                                 if (ro->ro_rt)
622                                         RT_UNLOCK(ro->ro_rt);
623                         }
624                 }
625 
626                 /*
627                  * do not care about the result if we have the nexthop
628                  * explicitly specified.
629                  */
630                 if (opts && opts->ip6po_nexthop)
631                         goto done;
632 
633                 if (ro->ro_rt) {
634                         ifp = ro->ro_rt->rt_ifp;
635 
636                         if (ifp == NULL) { /* can this really happen? */
637                                 RTFREE(ro->ro_rt);
638                                 ro->ro_rt = NULL;
639                         }
640                 }
641                 if (ro->ro_rt == NULL)
642                         error = EHOSTUNREACH;
643                 rt = ro->ro_rt;
644 
645                 /*
646                  * Check if the outgoing interface conflicts with
647                  * the interface specified by ipi6_ifindex (if specified).
648                  * Note that loopback interface is always okay.
649                  * (this may happen when we are sending a packet to one of
650                  *  our own addresses.)
651                  */
652                 if (ifp && opts && opts->ip6po_pktinfo &&
653                     opts->ip6po_pktinfo->ipi6_ifindex) {
654                         if (!(ifp->if_flags & IFF_LOOPBACK) &&
655                             ifp->if_index !=
656                             opts->ip6po_pktinfo->ipi6_ifindex) {
657                                 error = EHOSTUNREACH;
658                                 goto done;
659                         }
660                 }
661         }
662 
663   done:
664         if (ifp == NULL && rt == NULL) {
665                 /*
666                  * This can happen if the caller did not pass a cached route
667                  * nor any other hints.  We treat this case an error.
668                  */
669                 error = EHOSTUNREACH;
670         }
671         if (error == EHOSTUNREACH)
672                 V_ip6stat.ip6s_noroute++;
673 
674         if (retifp != NULL)
675                 *retifp = ifp;
676         if (retrt != NULL)
677                 *retrt = rt;    /* rt may be NULL */
678 
679         return (error);
680 }
681 
682 static int
683 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
684     struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp)
685 {
686         int error;
687         struct route_in6 sro;
688         struct rtentry *rt = NULL;
689 
690         if (ro == NULL) {
691                 bzero(&sro, sizeof(sro));
692                 ro = &sro;
693         }
694 
695         if ((error = selectroute(dstsock, opts, mopts, ro, retifp,
696                                      &rt, 0, 1)) != 0) {
697                 if (ro == &sro && rt && rt == sro.ro_rt)
698                         RTFREE(rt);
699                 return (error);
700         }
701 
702         /*
703          * do not use a rejected or black hole route.
704          * XXX: this check should be done in the L2 output routine.
705          * However, if we skipped this check here, we'd see the following
706          * scenario:
707          * - install a rejected route for a scoped address prefix
708          *   (like fe80::/10)
709          * - send a packet to a destination that matches the scoped prefix,
710          *   with ambiguity about the scope zone.
711          * - pick the outgoing interface from the route, and disambiguate the
712          *   scope zone with the interface.
713          * - ip6_output() would try to get another route with the "new"
714          *   destination, which may be valid.
715          * - we'd see no error on output.
716          * Although this may not be very harmful, it should still be confusing.
717          * We thus reject the case here.
718          */
719         if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
720                 int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
721 
722                 if (ro == &sro && rt && rt == sro.ro_rt)
723                         RTFREE(rt);
724                 return (flags);
725         }
726 
727         /*
728          * Adjust the "outgoing" interface.  If we're going to loop the packet
729          * back to ourselves, the ifp would be the loopback interface.
730          * However, we'd rather know the interface associated to the
731          * destination address (which should probably be one of our own
732          * addresses.)
733          */
734         if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp)
735                 *retifp = rt->rt_ifa->ifa_ifp;
736 
737         if (ro == &sro && rt && rt == sro.ro_rt)
738                 RTFREE(rt);
739         return (0);
740 }
741 
742 /*
743  * clone - meaningful only for bsdi and freebsd
744  */
745 int
746 in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
747     struct ip6_moptions *mopts, struct route_in6 *ro,
748     struct ifnet **retifp, struct rtentry **retrt, int clone)
749 {
750 
751         return (selectroute(dstsock, opts, mopts, ro, retifp,
752             retrt, clone, 0));
753 }
754 
755 /*
756  * Default hop limit selection. The precedence is as follows:
757  * 1. Hoplimit value specified via ioctl.
758  * 2. (If the outgoing interface is detected) the current
759  *     hop limit of the interface specified by router advertisement.
760  * 3. The system default hoplimit.
761  */
762 int
763 in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
764 {
765         INIT_VNET_INET6(curvnet);
766 
767         if (in6p && in6p->in6p_hops >= 0)
768                 return (in6p->in6p_hops);
769         else if (ifp)
770                 return (ND_IFINFO(ifp)->chlim);
771         else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
772                 struct route_in6 ro6;
773                 struct ifnet *lifp;
774 
775                 bzero(&ro6, sizeof(ro6));
776                 ro6.ro_dst.sin6_family = AF_INET6;
777                 ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
778                 ro6.ro_dst.sin6_addr = in6p->in6p_faddr;
779                 rtalloc((struct route *)&ro6);
780                 if (ro6.ro_rt) {
781                         lifp = ro6.ro_rt->rt_ifp;
782                         RTFREE(ro6.ro_rt);
783                         if (lifp)
784                                 return (ND_IFINFO(lifp)->chlim);
785                 } else
786                         return (V_ip6_defhlim);
787         }
788         return (V_ip6_defhlim);
789 }
790 
791 /*
792  * XXX: this is borrowed from in6_pcbbind(). If possible, we should
793  * share this function by all *bsd*...
794  */
795 int
796 in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
797 {
798         INIT_VNET_INET(curvnet);
799         struct socket *so = inp->inp_socket;
800         u_int16_t lport = 0, first, last, *lastport;
801         int count, error = 0, wild = 0, dorandom;
802         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
803 
804         INP_INFO_WLOCK_ASSERT(pcbinfo);
805         INP_WLOCK_ASSERT(inp);
806 
807         if (prison_local_ip6(cred, laddr,
808             ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0)
809                 return(EINVAL);
810 
811         /* XXX: this is redundant when called from in6_pcbbind */
812         if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
813                 wild = INPLOOKUP_WILDCARD;
814 
815         inp->inp_flags |= INP_ANONPORT;
816 
817         if (inp->inp_flags & INP_HIGHPORT) {
818                 first = V_ipport_hifirstauto;   /* sysctl */
819                 last  = V_ipport_hilastauto;
820                 lastport = &pcbinfo->ipi_lasthi;
821         } else if (inp->inp_flags & INP_LOWPORT) {
822                 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
823                 if (error)
824                         return error;
825                 first = V_ipport_lowfirstauto;  /* 1023 */
826                 last  = V_ipport_lowlastauto;   /* 600 */
827                 lastport = &pcbinfo->ipi_lastlow;
828         } else {
829                 first = V_ipport_firstauto;     /* sysctl */
830                 last  = V_ipport_lastauto;
831                 lastport = &pcbinfo->ipi_lastport;
832         }
833 
834         /*
835          * For UDP, use random port allocation as long as the user
836          * allows it.  For TCP (and as of yet unknown) connections,
837          * use random port allocation only if the user allows it AND
838          * ipport_tick() allows it.
839          */
840         if (V_ipport_randomized &&
841             (!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
842                 dorandom = 1;
843         else
844                 dorandom = 0;
845         /*
846          * It makes no sense to do random port allocation if
847          * we have the only port available.
848          */
849         if (first == last)
850                 dorandom = 0;
851         /* Make sure to not include UDP packets in the count. */
852         if (pcbinfo != &V_udbinfo)
853                 V_ipport_tcpallocs++;
854 
855         /*
856          * Instead of having two loops further down counting up or down
857          * make sure that first is always <= last and go with only one
858          * code path implementing all logic.
859          */
860         if (first > last) {
861                 u_int16_t aux;
862 
863                 aux = first;
864                 first = last;
865                 last = aux;
866         }
867 
868         if (dorandom)
869                 *lastport = first + (arc4random() % (last - first));
870 
871         count = last - first;
872 
873         do {
874                 if (count-- < 0) {      /* completely used? */
875                         /* Undo an address bind that may have occurred. */
876                         inp->in6p_laddr = in6addr_any;
877                         return (EADDRNOTAVAIL);
878                 }
879                 ++*lastport;
880                 if (*lastport < first || *lastport > last)
881                         *lastport = first;
882                 lport = htons(*lastport);
883         } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr,
884             lport, wild, cred));
885 
886         inp->inp_lport = lport;
887         if (in_pcbinshash(inp) != 0) {
888                 inp->in6p_laddr = in6addr_any;
889                 inp->inp_lport = 0;
890                 return (EAGAIN);
891         }
892 
893         return (0);
894 }
895 
896 void
897 addrsel_policy_init(void)
898 {
899         ADDRSEL_LOCK_INIT();
900         ADDRSEL_SXLOCK_INIT();
901         INIT_VNET_INET6(curvnet);
902 
903         V_ip6_prefer_tempaddr = 0;
904 
905         init_policy_queue();
906 
907         /* initialize the "last resort" policy */
908         bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy));
909         V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
910 }
911 
912 static struct in6_addrpolicy *
913 lookup_addrsel_policy(struct sockaddr_in6 *key)
914 {
915         INIT_VNET_INET6(curvnet);
916         struct in6_addrpolicy *match = NULL;
917 
918         ADDRSEL_LOCK();
919         match = match_addrsel_policy(key);
920 
921         if (match == NULL)
922                 match = &V_defaultaddrpolicy;
923         else
924                 match->use++;
925         ADDRSEL_UNLOCK();
926 
927         return (match);
928 }
929 
930 /*
931  * Subroutines to manage the address selection policy table via sysctl.
932  */
933 struct walkarg {
934         struct sysctl_req *w_req;
935 };
936 
937 static int in6_src_sysctl(SYSCTL_HANDLER_ARGS);
938 SYSCTL_DECL(_net_inet6_ip6);
939 SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
940         CTLFLAG_RD, in6_src_sysctl, "");
941 
942 static int
943 in6_src_sysctl(SYSCTL_HANDLER_ARGS)
944 {
945         struct walkarg w;
946 
947         if (req->newptr)
948                 return EPERM;
949 
950         bzero(&w, sizeof(w));
951         w.w_req = req;
952 
953         return (walk_addrsel_policy(dump_addrsel_policyent, &w));
954 }
955 
956 int
957 in6_src_ioctl(u_long cmd, caddr_t data)
958 {
959         int i;
960         struct in6_addrpolicy ent0;
961 
962         if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
963                 return (EOPNOTSUPP); /* check for safety */
964 
965         ent0 = *(struct in6_addrpolicy *)data;
966 
967         if (ent0.label == ADDR_LABEL_NOTAPP)
968                 return (EINVAL);
969         /* check if the prefix mask is consecutive. */
970         if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
971                 return (EINVAL);
972         /* clear trailing garbages (if any) of the prefix address. */
973         for (i = 0; i < 4; i++) {
974                 ent0.addr.sin6_addr.s6_addr32[i] &=
975                         ent0.addrmask.sin6_addr.s6_addr32[i];
976         }
977         ent0.use = 0;
978 
979         switch (cmd) {
980         case SIOCAADDRCTL_POLICY:
981                 return (add_addrsel_policyent(&ent0));
982         case SIOCDADDRCTL_POLICY:
983                 return (delete_addrsel_policyent(&ent0));
984         }
985 
986         return (0);             /* XXX: compromise compilers */
987 }