The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet6/ip6_output.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: ip6_output.c,v 1.135.2.1 2009/04/20 22:56:04 snj Exp $ */
    2 /*      $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $    */
    3 
    4 /*
    5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the project nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  */
   32 
   33 /*
   34  * Copyright (c) 1982, 1986, 1988, 1990, 1993
   35  *      The Regents of the University of California.  All rights reserved.
   36  *
   37  * Redistribution and use in source and binary forms, with or without
   38  * modification, are permitted provided that the following conditions
   39  * are met:
   40  * 1. Redistributions of source code must retain the above copyright
   41  *    notice, this list of conditions and the following disclaimer.
   42  * 2. Redistributions in binary form must reproduce the above copyright
   43  *    notice, this list of conditions and the following disclaimer in the
   44  *    documentation and/or other materials provided with the distribution.
   45  * 3. Neither the name of the University nor the names of its contributors
   46  *    may be used to endorse or promote products derived from this software
   47  *    without specific prior written permission.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   59  * SUCH DAMAGE.
   60  *
   61  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
   62  */
   63 
   64 #include <sys/cdefs.h>
   65 __KERNEL_RCSID(0, "$NetBSD: ip6_output.c,v 1.135.2.1 2009/04/20 22:56:04 snj Exp $");
   66 
   67 #include "opt_inet.h"
   68 #include "opt_inet6.h"
   69 #include "opt_ipsec.h"
   70 #include "opt_pfil_hooks.h"
   71 
   72 #include <sys/param.h>
   73 #include <sys/malloc.h>
   74 #include <sys/mbuf.h>
   75 #include <sys/errno.h>
   76 #include <sys/protosw.h>
   77 #include <sys/socket.h>
   78 #include <sys/socketvar.h>
   79 #include <sys/systm.h>
   80 #include <sys/proc.h>
   81 #include <sys/kauth.h>
   82 
   83 #include <net/if.h>
   84 #include <net/route.h>
   85 #ifdef PFIL_HOOKS
   86 #include <net/pfil.h>
   87 #endif
   88 
   89 #include <netinet/in.h>
   90 #include <netinet/in_var.h>
   91 #include <netinet/ip6.h>
   92 #include <netinet/icmp6.h>
   93 #include <netinet/in_offload.h>
   94 #include <netinet6/in6_offload.h>
   95 #include <netinet6/ip6_var.h>
   96 #include <netinet6/ip6_private.h>
   97 #include <netinet6/in6_pcb.h>
   98 #include <netinet6/nd6.h>
   99 #include <netinet6/ip6protosw.h>
  100 #include <netinet6/scope6_var.h>
  101 
  102 #ifdef IPSEC
  103 #include <netinet6/ipsec.h>
  104 #include <netinet6/ipsec_private.h>
  105 #include <netkey/key.h>
  106 #endif /* IPSEC */
  107 
  108 #ifdef FAST_IPSEC
  109 #include <netipsec/ipsec.h>
  110 #include <netipsec/ipsec6.h>
  111 #include <netipsec/key.h>
  112 #include <netipsec/xform.h>
  113 #endif
  114 
  115 
  116 #include <net/net_osdep.h>
  117 
  118 #ifdef PFIL_HOOKS
  119 extern struct pfil_head inet6_pfil_hook;        /* XXX */
  120 #endif
  121 
  122 struct ip6_exthdrs {
  123         struct mbuf *ip6e_ip6;
  124         struct mbuf *ip6e_hbh;
  125         struct mbuf *ip6e_dest1;
  126         struct mbuf *ip6e_rthdr;
  127         struct mbuf *ip6e_dest2;
  128 };
  129 
  130 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
  131         int, int);
  132 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
  133 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
  134         int, int, int);
  135 static int ip6_setmoptions(const struct sockopt *, struct ip6_moptions **);
  136 static int ip6_getmoptions(struct sockopt *, struct ip6_moptions *);
  137 static int ip6_copyexthdr(struct mbuf **, void *, int);
  138 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
  139         struct ip6_frag **);
  140 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
  141 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
  142 static int ip6_getpmtu(struct route *, struct route *, struct ifnet *,
  143     const struct in6_addr *, u_long *, int *);
  144 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
  145 
  146 #ifdef RFC2292
  147 static int ip6_pcbopts(struct ip6_pktopts **, struct socket *, struct sockopt *);
  148 #endif
  149 
  150 #define IN6_NEED_CHECKSUM(ifp, csum_flags) \
  151         (__predict_true(((ifp)->if_flags & IFF_LOOPBACK) == 0 || \
  152         (((csum_flags) & M_CSUM_UDPv6) != 0 && udp_do_loopback_cksum) || \
  153         (((csum_flags) & M_CSUM_TCPv6) != 0 && tcp_do_loopback_cksum)))
  154 
  155 /*
  156  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  157  * header (with pri, len, nxt, hlim, src, dst).
  158  * This function may modify ver and hlim only.
  159  * The mbuf chain containing the packet will be freed.
  160  * The mbuf opt, if present, will not be freed.
  161  *
  162  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
  163  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
  164  * which is rt_rmx.rmx_mtu.
  165  */
  166 int
  167 ip6_output(
  168     struct mbuf *m0,
  169     struct ip6_pktopts *opt,
  170     struct route *ro,
  171     int flags,
  172     struct ip6_moptions *im6o,
  173     struct socket *so,
  174     struct ifnet **ifpp         /* XXX: just for statistics */
  175 )
  176 {
  177         struct ip6_hdr *ip6, *mhip6;
  178         struct ifnet *ifp, *origifp;
  179         struct mbuf *m = m0;
  180         int hlen, tlen, len, off;
  181         bool tso;
  182         struct route ip6route;
  183         struct rtentry *rt = NULL;
  184         const struct sockaddr_in6 *dst = NULL;
  185         struct sockaddr_in6 src_sa, dst_sa;
  186         int error = 0;
  187         struct in6_ifaddr *ia = NULL;
  188         u_long mtu;
  189         int alwaysfrag, dontfrag;
  190         u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
  191         struct ip6_exthdrs exthdrs;
  192         struct in6_addr finaldst, src0, dst0;
  193         u_int32_t zone;
  194         struct route *ro_pmtu = NULL;
  195         int hdrsplit = 0;
  196         int needipsec = 0;
  197 #ifdef IPSEC
  198         int needipsectun = 0;
  199         struct secpolicy *sp = NULL;
  200 
  201         ip6 = mtod(m, struct ip6_hdr *);
  202 #endif /* IPSEC */
  203 #ifdef FAST_IPSEC
  204         struct secpolicy *sp = NULL;
  205         int s;
  206 #endif
  207 
  208         memset(&ip6route, 0, sizeof(ip6route));
  209 
  210 #ifdef  DIAGNOSTIC
  211         if ((m->m_flags & M_PKTHDR) == 0)
  212                 panic("ip6_output: no HDR");
  213 
  214         if ((m->m_pkthdr.csum_flags &
  215             (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TSOv4)) != 0) {
  216                 panic("ip6_output: IPv4 checksum offload flags: %d",
  217                     m->m_pkthdr.csum_flags);
  218         }
  219 
  220         if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) ==
  221             (M_CSUM_TCPv6|M_CSUM_UDPv6)) {
  222                 panic("ip6_output: conflicting checksum offload flags: %d",
  223                     m->m_pkthdr.csum_flags);
  224         }
  225 #endif
  226 
  227         M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data, sizeof(struct ip6_hdr));
  228 
  229 #define MAKE_EXTHDR(hp, mp)                                             \
  230     do {                                                                \
  231         if (hp) {                                                       \
  232                 struct ip6_ext *eh = (struct ip6_ext *)(hp);            \
  233                 error = ip6_copyexthdr((mp), (void *)(hp),              \
  234                     ((eh)->ip6e_len + 1) << 3);                         \
  235                 if (error)                                              \
  236                         goto freehdrs;                                  \
  237         }                                                               \
  238     } while (/*CONSTCOND*/ 0)
  239 
  240         bzero(&exthdrs, sizeof(exthdrs));
  241         if (opt) {
  242                 /* Hop-by-Hop options header */
  243                 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
  244                 /* Destination options header(1st part) */
  245                 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
  246                 /* Routing header */
  247                 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
  248                 /* Destination options header(2nd part) */
  249                 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
  250         }
  251 
  252 #ifdef IPSEC
  253         if ((flags & IPV6_FORWARDING) != 0) {
  254                 needipsec = 0;
  255                 goto skippolicycheck;
  256         }
  257 
  258         /* get a security policy for this packet */
  259         if (so == NULL)
  260                 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
  261         else {
  262                 if (IPSEC_PCB_SKIP_IPSEC(sotoinpcb_hdr(so)->inph_sp,
  263                                          IPSEC_DIR_OUTBOUND)) {
  264                         needipsec = 0;
  265                         goto skippolicycheck;
  266                 }
  267                 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
  268         }
  269 
  270         if (sp == NULL) {
  271                 IPSEC6_STATINC(IPSEC_STAT_OUT_INVAL);
  272                 goto freehdrs;
  273         }
  274 
  275         error = 0;
  276 
  277         /* check policy */
  278         switch (sp->policy) {
  279         case IPSEC_POLICY_DISCARD:
  280                 /*
  281                  * This packet is just discarded.
  282                  */
  283                 IPSEC6_STATINC(IPSEC_STAT_OUT_POLVIO);
  284                 goto freehdrs;
  285 
  286         case IPSEC_POLICY_BYPASS:
  287         case IPSEC_POLICY_NONE:
  288                 /* no need to do IPsec. */
  289                 needipsec = 0;
  290                 break;
  291 
  292         case IPSEC_POLICY_IPSEC:
  293                 if (sp->req == NULL) {
  294                         /* XXX should be panic ? */
  295                         printf("ip6_output: No IPsec request specified.\n");
  296                         error = EINVAL;
  297                         goto freehdrs;
  298                 }
  299                 needipsec = 1;
  300                 break;
  301 
  302         case IPSEC_POLICY_ENTRUST:
  303         default:
  304                 printf("ip6_output: Invalid policy found. %d\n", sp->policy);
  305         }
  306 
  307   skippolicycheck:;
  308 #endif /* IPSEC */
  309 
  310         /*
  311          * Calculate the total length of the extension header chain.
  312          * Keep the length of the unfragmentable part for fragmentation.
  313          */
  314         optlen = 0;
  315         if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
  316         if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
  317         if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
  318         unfragpartlen = optlen + sizeof(struct ip6_hdr);
  319         /* NOTE: we don't add AH/ESP length here. do that later. */
  320         if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
  321 
  322 #ifdef FAST_IPSEC
  323         /* Check the security policy (SP) for the packet */
  324     
  325         /* XXX For moment, we doesn't support packet with extented action */
  326         if (optlen !=0)
  327                 goto freehdrs;
  328 
  329         sp = ipsec6_check_policy(m,so,flags,&needipsec,&error);
  330         if (error != 0) {
  331                 /*
  332                  * Hack: -EINVAL is used to signal that a packet
  333                  * should be silently discarded.  This is typically
  334                  * because we asked key management for an SA and
  335                  * it was delayed (e.g. kicked up to IKE).
  336                  */
  337         if (error == -EINVAL) 
  338                 error = 0;
  339         goto freehdrs;
  340     }
  341 #endif /* FAST_IPSEC */
  342 
  343 
  344         if (needipsec &&
  345             (m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
  346                 in6_delayed_cksum(m);
  347                 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
  348         }
  349 
  350 
  351         /*
  352          * If we need IPsec, or there is at least one extension header,
  353          * separate IP6 header from the payload.
  354          */
  355         if ((needipsec || optlen) && !hdrsplit) {
  356                 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
  357                         m = NULL;
  358                         goto freehdrs;
  359                 }
  360                 m = exthdrs.ip6e_ip6;
  361                 hdrsplit++;
  362         }
  363 
  364         /* adjust pointer */
  365         ip6 = mtod(m, struct ip6_hdr *);
  366 
  367         /* adjust mbuf packet header length */
  368         m->m_pkthdr.len += optlen;
  369         plen = m->m_pkthdr.len - sizeof(*ip6);
  370 
  371         /* If this is a jumbo payload, insert a jumbo payload option. */
  372         if (plen > IPV6_MAXPACKET) {
  373                 if (!hdrsplit) {
  374                         if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
  375                                 m = NULL;
  376                                 goto freehdrs;
  377                         }
  378                         m = exthdrs.ip6e_ip6;
  379                         hdrsplit++;
  380                 }
  381                 /* adjust pointer */
  382                 ip6 = mtod(m, struct ip6_hdr *);
  383                 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
  384                         goto freehdrs;
  385                 optlen += 8; /* XXX JUMBOOPTLEN */
  386                 ip6->ip6_plen = 0;
  387         } else
  388                 ip6->ip6_plen = htons(plen);
  389 
  390         /*
  391          * Concatenate headers and fill in next header fields.
  392          * Here we have, on "m"
  393          *      IPv6 payload
  394          * and we insert headers accordingly.  Finally, we should be getting:
  395          *      IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
  396          *
  397          * during the header composing process, "m" points to IPv6 header.
  398          * "mprev" points to an extension header prior to esp.
  399          */
  400         {
  401                 u_char *nexthdrp = &ip6->ip6_nxt;
  402                 struct mbuf *mprev = m;
  403 
  404                 /*
  405                  * we treat dest2 specially.  this makes IPsec processing
  406                  * much easier.  the goal here is to make mprev point the
  407                  * mbuf prior to dest2.
  408                  *
  409                  * result: IPv6 dest2 payload
  410                  * m and mprev will point to IPv6 header.
  411                  */
  412                 if (exthdrs.ip6e_dest2) {
  413                         if (!hdrsplit)
  414                                 panic("assumption failed: hdr not split");
  415                         exthdrs.ip6e_dest2->m_next = m->m_next;
  416                         m->m_next = exthdrs.ip6e_dest2;
  417                         *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
  418                         ip6->ip6_nxt = IPPROTO_DSTOPTS;
  419                 }
  420 
  421 #define MAKE_CHAIN(m, mp, p, i)\
  422     do {\
  423         if (m) {\
  424                 if (!hdrsplit) \
  425                         panic("assumption failed: hdr not split"); \
  426                 *mtod((m), u_char *) = *(p);\
  427                 *(p) = (i);\
  428                 p = mtod((m), u_char *);\
  429                 (m)->m_next = (mp)->m_next;\
  430                 (mp)->m_next = (m);\
  431                 (mp) = (m);\
  432         }\
  433     } while (/*CONSTCOND*/ 0)
  434                 /*
  435                  * result: IPv6 hbh dest1 rthdr dest2 payload
  436                  * m will point to IPv6 header.  mprev will point to the
  437                  * extension header prior to dest2 (rthdr in the above case).
  438                  */
  439                 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
  440                 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
  441                     IPPROTO_DSTOPTS);
  442                 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
  443                     IPPROTO_ROUTING);
  444 
  445                 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data,
  446                     sizeof(struct ip6_hdr) + optlen);
  447 
  448 #ifdef IPSEC
  449                 if (!needipsec)
  450                         goto skip_ipsec2;
  451 
  452                 /*
  453                  * pointers after IPsec headers are not valid any more.
  454                  * other pointers need a great care too.
  455                  * (IPsec routines should not mangle mbufs prior to AH/ESP)
  456                  */
  457                 exthdrs.ip6e_dest2 = NULL;
  458 
  459             {
  460                 struct ip6_rthdr *rh = NULL;
  461                 int segleft_org = 0;
  462                 struct ipsec_output_state state;
  463 
  464                 if (exthdrs.ip6e_rthdr) {
  465                         rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
  466                         segleft_org = rh->ip6r_segleft;
  467                         rh->ip6r_segleft = 0;
  468                 }
  469 
  470                 bzero(&state, sizeof(state));
  471                 state.m = m;
  472                 error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
  473                     &needipsectun);
  474                 m = state.m;
  475                 if (error) {
  476                         rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
  477                         /* mbuf is already reclaimed in ipsec6_output_trans. */
  478                         m = NULL;
  479                         switch (error) {
  480                         case EHOSTUNREACH:
  481                         case ENETUNREACH:
  482                         case EMSGSIZE:
  483                         case ENOBUFS:
  484                         case ENOMEM:
  485                                 break;
  486                         default:
  487                                 printf("ip6_output (ipsec): error code %d\n", error);
  488                                 /* FALLTHROUGH */
  489                         case ENOENT:
  490                                 /* don't show these error codes to the user */
  491                                 error = 0;
  492                                 break;
  493                         }
  494                         goto bad;
  495                 }
  496                 if (exthdrs.ip6e_rthdr) {
  497                         /* ah6_output doesn't modify mbuf chain */
  498                         rh->ip6r_segleft = segleft_org;
  499                 }
  500             }
  501 skip_ipsec2:;
  502 #endif
  503         }
  504 
  505         /*
  506          * If there is a routing header, replace destination address field
  507          * with the first hop of the routing header.
  508          */
  509         if (exthdrs.ip6e_rthdr) {
  510                 struct ip6_rthdr *rh;
  511                 struct ip6_rthdr0 *rh0;
  512                 struct in6_addr *addr;
  513                 struct sockaddr_in6 sa;
  514 
  515                 rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
  516                     struct ip6_rthdr *));
  517                 finaldst = ip6->ip6_dst;
  518                 switch (rh->ip6r_type) {
  519                 case IPV6_RTHDR_TYPE_0:
  520                          rh0 = (struct ip6_rthdr0 *)rh;
  521                          addr = (struct in6_addr *)(rh0 + 1);
  522 
  523                          /*
  524                           * construct a sockaddr_in6 form of
  525                           * the first hop.
  526                           *
  527                           * XXX: we may not have enough
  528                           * information about its scope zone;
  529                           * there is no standard API to pass
  530                           * the information from the
  531                           * application.
  532                           */
  533                          sockaddr_in6_init(&sa, addr, 0, 0, 0);
  534                          if ((error = sa6_embedscope(&sa,
  535                              ip6_use_defzone)) != 0) {
  536                                  goto bad;
  537                          }
  538                          ip6->ip6_dst = sa.sin6_addr;
  539                          (void)memmove(&addr[0], &addr[1],
  540                              sizeof(struct in6_addr) *
  541                              (rh0->ip6r0_segleft - 1));
  542                          addr[rh0->ip6r0_segleft - 1] = finaldst;
  543                          /* XXX */
  544                          in6_clearscope(addr + rh0->ip6r0_segleft - 1);
  545                          break;
  546                 default:        /* is it possible? */
  547                          error = EINVAL;
  548                          goto bad;
  549                 }
  550         }
  551 
  552         /* Source address validation */
  553         if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
  554             (flags & IPV6_UNSPECSRC) == 0) {
  555                 error = EOPNOTSUPP;
  556                 IP6_STATINC(IP6_STAT_BADSCOPE);
  557                 goto bad;
  558         }
  559         if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
  560                 error = EOPNOTSUPP;
  561                 IP6_STATINC(IP6_STAT_BADSCOPE);
  562                 goto bad;
  563         }
  564 
  565         IP6_STATINC(IP6_STAT_LOCALOUT);
  566 
  567         /*
  568          * Route packet.
  569          */
  570         /* initialize cached route */
  571         if (ro == NULL) {
  572                 ro = &ip6route;
  573         }
  574         ro_pmtu = ro;
  575         if (opt && opt->ip6po_rthdr)
  576                 ro = &opt->ip6po_route;
  577 
  578         /*
  579          * if specified, try to fill in the traffic class field.
  580          * do not override if a non-zero value is already set.
  581          * we check the diffserv field and the ecn field separately.
  582          */
  583         if (opt && opt->ip6po_tclass >= 0) {
  584                 int mask = 0;
  585 
  586                 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
  587                         mask |= 0xfc;
  588                 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
  589                         mask |= 0x03;
  590                 if (mask != 0)
  591                         ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
  592         }
  593 
  594         /* fill in or override the hop limit field, if necessary. */
  595         if (opt && opt->ip6po_hlim != -1)
  596                 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
  597         else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
  598                 if (im6o != NULL)
  599                         ip6->ip6_hlim = im6o->im6o_multicast_hlim;
  600                 else
  601                         ip6->ip6_hlim = ip6_defmcasthlim;
  602         }
  603 
  604 #ifdef IPSEC
  605         if (needipsec && needipsectun) {
  606                 struct ipsec_output_state state;
  607 
  608                 /*
  609                  * All the extension headers will become inaccessible
  610                  * (since they can be encrypted).
  611                  * Don't panic, we need no more updates to extension headers
  612                  * on inner IPv6 packet (since they are now encapsulated).
  613                  *
  614                  * IPv6 [ESP|AH] IPv6 [extension headers] payload
  615                  */
  616                 bzero(&exthdrs, sizeof(exthdrs));
  617                 exthdrs.ip6e_ip6 = m;
  618 
  619                 bzero(&state, sizeof(state));
  620                 state.m = m;
  621                 state.ro = ro;
  622                 state.dst = rtcache_getdst(ro);
  623 
  624                 error = ipsec6_output_tunnel(&state, sp, flags);
  625 
  626                 m = state.m;
  627                 ro_pmtu = ro = state.ro;
  628                 dst = satocsin6(state.dst);
  629                 if (error) {
  630                         /* mbuf is already reclaimed in ipsec6_output_tunnel. */
  631                         m0 = m = NULL;
  632                         m = NULL;
  633                         switch (error) {
  634                         case EHOSTUNREACH:
  635                         case ENETUNREACH:
  636                         case EMSGSIZE:
  637                         case ENOBUFS:
  638                         case ENOMEM:
  639                                 break;
  640                         default:
  641                                 printf("ip6_output (ipsec): error code %d\n", error);
  642                                 /* FALLTHROUGH */
  643                         case ENOENT:
  644                                 /* don't show these error codes to the user */
  645                                 error = 0;
  646                                 break;
  647                         }
  648                         goto bad;
  649                 }
  650 
  651                 exthdrs.ip6e_ip6 = m;
  652         }
  653 #endif /* IPSEC */
  654 #ifdef FAST_IPSEC
  655         if (needipsec) {
  656                 s = splsoftnet();
  657                 error = ipsec6_process_packet(m,sp->req);
  658 
  659                 /*
  660                  * Preserve KAME behaviour: ENOENT can be returned
  661                  * when an SA acquire is in progress.  Don't propagate
  662                  * this to user-level; it confuses applications.
  663                  * XXX this will go away when the SADB is redone.
  664                  */
  665                 if (error == ENOENT)
  666                         error = 0;
  667                 splx(s);
  668                 goto done;
  669         }
  670 #endif /* FAST_IPSEC */    
  671 
  672 
  673 
  674         /* adjust pointer */
  675         ip6 = mtod(m, struct ip6_hdr *);
  676 
  677         sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
  678         if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
  679             &ifp, &rt, 0)) != 0) {
  680                 if (ifp != NULL)
  681                         in6_ifstat_inc(ifp, ifs6_out_discard);
  682                 goto bad;
  683         }
  684         if (rt == NULL) {
  685                 /*
  686                  * If in6_selectroute() does not return a route entry,
  687                  * dst may not have been updated.
  688                  */
  689                 rtcache_setdst(ro, sin6tosa(&dst_sa));
  690         }
  691 
  692         /*
  693          * then rt (for unicast) and ifp must be non-NULL valid values.
  694          */
  695         if ((flags & IPV6_FORWARDING) == 0) {
  696                 /* XXX: the FORWARDING flag can be set for mrouting. */
  697                 in6_ifstat_inc(ifp, ifs6_out_request);
  698         }
  699         if (rt != NULL) {
  700                 ia = (struct in6_ifaddr *)(rt->rt_ifa);
  701                 rt->rt_use++;
  702         }
  703 
  704         /*
  705          * The outgoing interface must be in the zone of source and
  706          * destination addresses.  We should use ia_ifp to support the
  707          * case of sending packets to an address of our own.
  708          */
  709         if (ia != NULL && ia->ia_ifp)
  710                 origifp = ia->ia_ifp;
  711         else
  712                 origifp = ifp;
  713 
  714         src0 = ip6->ip6_src;
  715         if (in6_setscope(&src0, origifp, &zone))
  716                 goto badscope;
  717         sockaddr_in6_init(&src_sa, &ip6->ip6_src, 0, 0, 0);
  718         if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
  719                 goto badscope;
  720 
  721         dst0 = ip6->ip6_dst;
  722         if (in6_setscope(&dst0, origifp, &zone))
  723                 goto badscope;
  724         /* re-initialize to be sure */
  725         sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
  726         if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id)
  727                 goto badscope;
  728 
  729         /* scope check is done. */
  730 
  731         if (rt == NULL || IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
  732                 if (dst == NULL)
  733                         dst = satocsin6(rtcache_getdst(ro));
  734                 KASSERT(dst != NULL);
  735         } else if (opt && rtcache_validate(&opt->ip6po_nextroute) != NULL) {
  736                 /*
  737                  * The nexthop is explicitly specified by the
  738                  * application.  We assume the next hop is an IPv6
  739                  * address.
  740                  */
  741                 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
  742         } else if ((rt->rt_flags & RTF_GATEWAY))
  743                 dst = (struct sockaddr_in6 *)rt->rt_gateway;
  744         else if (dst == NULL)
  745                 dst = satocsin6(rtcache_getdst(ro));
  746 
  747         /*
  748          * XXXXXX: original code follows:
  749          */
  750         if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
  751                 m->m_flags &= ~(M_BCAST | M_MCAST);     /* just in case */
  752         else {
  753                 struct  in6_multi *in6m;
  754 
  755                 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
  756 
  757                 in6_ifstat_inc(ifp, ifs6_out_mcast);
  758 
  759                 /*
  760                  * Confirm that the outgoing interface supports multicast.
  761                  */
  762                 if (!(ifp->if_flags & IFF_MULTICAST)) {
  763                         IP6_STATINC(IP6_STAT_NOROUTE);
  764                         in6_ifstat_inc(ifp, ifs6_out_discard);
  765                         error = ENETUNREACH;
  766                         goto bad;
  767                 }
  768 
  769                 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
  770                 if (in6m != NULL &&
  771                    (im6o == NULL || im6o->im6o_multicast_loop)) {
  772                         /*
  773                          * If we belong to the destination multicast group
  774                          * on the outgoing interface, and the caller did not
  775                          * forbid loopback, loop back a copy.
  776                          */
  777                         KASSERT(dst != NULL);
  778                         ip6_mloopback(ifp, m, dst);
  779                 } else {
  780                         /*
  781                          * If we are acting as a multicast router, perform
  782                          * multicast forwarding as if the packet had just
  783                          * arrived on the interface to which we are about
  784                          * to send.  The multicast forwarding function
  785                          * recursively calls this function, using the
  786                          * IPV6_FORWARDING flag to prevent infinite recursion.
  787                          *
  788                          * Multicasts that are looped back by ip6_mloopback(),
  789                          * above, will be forwarded by the ip6_input() routine,
  790                          * if necessary.
  791                          */
  792                         if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
  793                                 if (ip6_mforward(ip6, ifp, m) != 0) {
  794                                         m_freem(m);
  795                                         goto done;
  796                                 }
  797                         }
  798                 }
  799                 /*
  800                  * Multicasts with a hoplimit of zero may be looped back,
  801                  * above, but must not be transmitted on a network.
  802                  * Also, multicasts addressed to the loopback interface
  803                  * are not sent -- the above call to ip6_mloopback() will
  804                  * loop back a copy if this host actually belongs to the
  805                  * destination group on the loopback interface.
  806                  */
  807                 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
  808                     IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
  809                         m_freem(m);
  810                         goto done;
  811                 }
  812         }
  813 
  814         /*
  815          * Fill the outgoing inteface to tell the upper layer
  816          * to increment per-interface statistics.
  817          */
  818         if (ifpp)
  819                 *ifpp = ifp;
  820 
  821         /* Determine path MTU. */
  822         if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
  823             &alwaysfrag)) != 0)
  824                 goto bad;
  825 #ifdef IPSEC
  826         if (needipsectun)
  827                 mtu = IPV6_MMTU;
  828 #endif
  829 
  830         /*
  831          * The caller of this function may specify to use the minimum MTU
  832          * in some cases.
  833          * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
  834          * setting.  The logic is a bit complicated; by default, unicast
  835          * packets will follow path MTU while multicast packets will be sent at
  836          * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
  837          * including unicast ones will be sent at the minimum MTU.  Multicast
  838          * packets will always be sent at the minimum MTU unless
  839          * IP6PO_MINMTU_DISABLE is explicitly specified.
  840          * See RFC 3542 for more details.
  841          */
  842         if (mtu > IPV6_MMTU) {
  843                 if ((flags & IPV6_MINMTU))
  844                         mtu = IPV6_MMTU;
  845                 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
  846                         mtu = IPV6_MMTU;
  847                 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
  848                          (opt == NULL ||
  849                           opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
  850                         mtu = IPV6_MMTU;
  851                 }
  852         }
  853 
  854         /*
  855          * clear embedded scope identifiers if necessary.
  856          * in6_clearscope will touch the addresses only when necessary.
  857          */
  858         in6_clearscope(&ip6->ip6_src);
  859         in6_clearscope(&ip6->ip6_dst);
  860 
  861         /*
  862          * If the outgoing packet contains a hop-by-hop options header,
  863          * it must be examined and processed even by the source node.
  864          * (RFC 2460, section 4.)
  865          */
  866         if (exthdrs.ip6e_hbh) {
  867                 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
  868                 u_int32_t dummy1; /* XXX unused */
  869                 u_int32_t dummy2; /* XXX unused */
  870 
  871                 /*
  872                  *  XXX: if we have to send an ICMPv6 error to the sender,
  873                  *       we need the M_LOOP flag since icmp6_error() expects
  874                  *       the IPv6 and the hop-by-hop options header are
  875                  *       continuous unless the flag is set.
  876                  */
  877                 m->m_flags |= M_LOOP;
  878                 m->m_pkthdr.rcvif = ifp;
  879                 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
  880                     ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
  881                     &dummy1, &dummy2) < 0) {
  882                         /* m was already freed at this point */
  883                         error = EINVAL;/* better error? */
  884                         goto done;
  885                 }
  886                 m->m_flags &= ~M_LOOP; /* XXX */
  887                 m->m_pkthdr.rcvif = NULL;
  888         }
  889 
  890 #ifdef PFIL_HOOKS
  891         /*
  892          * Run through list of hooks for output packets.
  893          */
  894         if ((error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
  895                 goto done;
  896         if (m == NULL)
  897                 goto done;
  898         ip6 = mtod(m, struct ip6_hdr *);
  899 #endif /* PFIL_HOOKS */
  900         /*
  901          * Send the packet to the outgoing interface.
  902          * If necessary, do IPv6 fragmentation before sending.
  903          *
  904          * the logic here is rather complex:
  905          * 1: normal case (dontfrag == 0, alwaysfrag == 0)
  906          * 1-a: send as is if tlen <= path mtu
  907          * 1-b: fragment if tlen > path mtu
  908          *
  909          * 2: if user asks us not to fragment (dontfrag == 1)
  910          * 2-a: send as is if tlen <= interface mtu
  911          * 2-b: error if tlen > interface mtu
  912          *
  913          * 3: if we always need to attach fragment header (alwaysfrag == 1)
  914          *      always fragment
  915          *
  916          * 4: if dontfrag == 1 && alwaysfrag == 1
  917          *      error, as we cannot handle this conflicting request
  918          */
  919         tlen = m->m_pkthdr.len;
  920         tso = (m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0;
  921         if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
  922                 dontfrag = 1;
  923         else
  924                 dontfrag = 0;
  925 
  926         if (dontfrag && alwaysfrag) {   /* case 4 */
  927                 /* conflicting request - can't transmit */
  928                 error = EMSGSIZE;
  929                 goto bad;
  930         }
  931         if (dontfrag && (!tso && tlen > IN6_LINKMTU(ifp))) {    /* case 2-b */
  932                 /*
  933                  * Even if the DONTFRAG option is specified, we cannot send the
  934                  * packet when the data length is larger than the MTU of the
  935                  * outgoing interface.
  936                  * Notify the error by sending IPV6_PATHMTU ancillary data as
  937                  * well as returning an error code (the latter is not described
  938                  * in the API spec.)
  939                  */
  940                 u_int32_t mtu32;
  941                 struct ip6ctlparam ip6cp;
  942 
  943                 mtu32 = (u_int32_t)mtu;
  944                 bzero(&ip6cp, sizeof(ip6cp));
  945                 ip6cp.ip6c_cmdarg = (void *)&mtu32;
  946                 pfctlinput2(PRC_MSGSIZE,
  947                     rtcache_getdst(ro_pmtu), &ip6cp);
  948 
  949                 error = EMSGSIZE;
  950                 goto bad;
  951         }
  952 
  953         /*
  954          * transmit packet without fragmentation
  955          */
  956         if (dontfrag || (!alwaysfrag && (tlen <= mtu || tso))) {
  957                 /* case 1-a and 2-a */
  958                 struct in6_ifaddr *ia6;
  959                 int sw_csum;
  960 
  961                 ip6 = mtod(m, struct ip6_hdr *);
  962                 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
  963                 if (ia6) {
  964                         /* Record statistics for this interface address. */
  965                         ia6->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len;
  966                 }
  967 #ifdef IPSEC
  968                 /* clean ipsec history once it goes out of the node */
  969                 ipsec_delaux(m);
  970 #endif
  971 
  972                 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
  973                 if ((sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
  974                         if (IN6_NEED_CHECKSUM(ifp,
  975                             sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
  976                                 in6_delayed_cksum(m);
  977                         }
  978                         m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
  979                 }
  980 
  981                 KASSERT(dst != NULL);
  982                 if (__predict_true(!tso ||
  983                     (ifp->if_capenable & IFCAP_TSOv6) != 0)) {
  984                         error = nd6_output(ifp, origifp, m, dst, rt);
  985                 } else {
  986                         error = ip6_tso_output(ifp, origifp, m, dst, rt);
  987                 }
  988                 goto done;
  989         }
  990 
  991         if (tso) {
  992                 error = EINVAL; /* XXX */
  993                 goto bad;
  994         }
  995 
  996         /*
  997          * try to fragment the packet.  case 1-b and 3
  998          */
  999         if (mtu < IPV6_MMTU) {
 1000                 /* path MTU cannot be less than IPV6_MMTU */
 1001                 error = EMSGSIZE;
 1002                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
 1003                 goto bad;
 1004         } else if (ip6->ip6_plen == 0) {
 1005                 /* jumbo payload cannot be fragmented */
 1006                 error = EMSGSIZE;
 1007                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
 1008                 goto bad;
 1009         } else {
 1010                 struct mbuf **mnext, *m_frgpart;
 1011                 struct ip6_frag *ip6f;
 1012                 u_int32_t id = htonl(ip6_randomid());
 1013                 u_char nextproto;
 1014 #if 0                           /* see below */
 1015                 struct ip6ctlparam ip6cp;
 1016                 u_int32_t mtu32;
 1017 #endif
 1018 
 1019                 /*
 1020                  * Too large for the destination or interface;
 1021                  * fragment if possible.
 1022                  * Must be able to put at least 8 bytes per fragment.
 1023                  */
 1024                 hlen = unfragpartlen;
 1025                 if (mtu > IPV6_MAXPACKET)
 1026                         mtu = IPV6_MAXPACKET;
 1027 
 1028 #if 0
 1029                 /*
 1030                  * It is believed this code is a leftover from the
 1031                  * development of the IPV6_RECVPATHMTU sockopt and
 1032                  * associated work to implement RFC3542.
 1033                  * It's not entirely clear what the intent of the API
 1034                  * is at this point, so disable this code for now.
 1035                  * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
 1036                  * will send notifications if the application requests.
 1037                  */
 1038 
 1039                 /* Notify a proper path MTU to applications. */
 1040                 mtu32 = (u_int32_t)mtu;
 1041                 bzero(&ip6cp, sizeof(ip6cp));
 1042                 ip6cp.ip6c_cmdarg = (void *)&mtu32;
 1043                 pfctlinput2(PRC_MSGSIZE,
 1044                     rtcache_getdst(ro_pmtu), &ip6cp);
 1045 #endif
 1046 
 1047                 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
 1048                 if (len < 8) {
 1049                         error = EMSGSIZE;
 1050                         in6_ifstat_inc(ifp, ifs6_out_fragfail);
 1051                         goto bad;
 1052                 }
 1053 
 1054                 mnext = &m->m_nextpkt;
 1055 
 1056                 /*
 1057                  * Change the next header field of the last header in the
 1058                  * unfragmentable part.
 1059                  */
 1060                 if (exthdrs.ip6e_rthdr) {
 1061                         nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
 1062                         *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
 1063                 } else if (exthdrs.ip6e_dest1) {
 1064                         nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
 1065                         *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
 1066                 } else if (exthdrs.ip6e_hbh) {
 1067                         nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
 1068                         *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
 1069                 } else {
 1070                         nextproto = ip6->ip6_nxt;
 1071                         ip6->ip6_nxt = IPPROTO_FRAGMENT;
 1072                 }
 1073 
 1074                 if ((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6))
 1075                     != 0) {
 1076                         if (IN6_NEED_CHECKSUM(ifp,
 1077                             m->m_pkthdr.csum_flags &
 1078                             (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
 1079                                 in6_delayed_cksum(m);
 1080                         }
 1081                         m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
 1082                 }
 1083 
 1084                 /*
 1085                  * Loop through length of segment after first fragment,
 1086                  * make new header and copy data of each part and link onto
 1087                  * chain.
 1088                  */
 1089                 m0 = m;
 1090                 for (off = hlen; off < tlen; off += len) {
 1091                         struct mbuf *mlast;
 1092 
 1093                         MGETHDR(m, M_DONTWAIT, MT_HEADER);
 1094                         if (!m) {
 1095                                 error = ENOBUFS;
 1096                                 IP6_STATINC(IP6_STAT_ODROPPED);
 1097                                 goto sendorfree;
 1098                         }
 1099                         m->m_pkthdr.rcvif = NULL;
 1100                         m->m_flags = m0->m_flags & M_COPYFLAGS;
 1101                         *mnext = m;
 1102                         mnext = &m->m_nextpkt;
 1103                         m->m_data += max_linkhdr;
 1104                         mhip6 = mtod(m, struct ip6_hdr *);
 1105                         *mhip6 = *ip6;
 1106                         m->m_len = sizeof(*mhip6);
 1107                         error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
 1108                         if (error) {
 1109                                 IP6_STATINC(IP6_STAT_ODROPPED);
 1110                                 goto sendorfree;
 1111                         }
 1112                         ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
 1113                         if (off + len >= tlen)
 1114                                 len = tlen - off;
 1115                         else
 1116                                 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
 1117                         mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
 1118                             sizeof(*ip6f) - sizeof(struct ip6_hdr)));
 1119                         if ((m_frgpart = m_copy(m0, off, len)) == 0) {
 1120                                 error = ENOBUFS;
 1121                                 IP6_STATINC(IP6_STAT_ODROPPED);
 1122                                 goto sendorfree;
 1123                         }
 1124                         for (mlast = m; mlast->m_next; mlast = mlast->m_next)
 1125                                 ;
 1126                         mlast->m_next = m_frgpart;
 1127                         m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
 1128                         m->m_pkthdr.rcvif = (struct ifnet *)0;
 1129                         ip6f->ip6f_reserved = 0;
 1130                         ip6f->ip6f_ident = id;
 1131                         ip6f->ip6f_nxt = nextproto;
 1132                         IP6_STATINC(IP6_STAT_OFRAGMENTS);
 1133                         in6_ifstat_inc(ifp, ifs6_out_fragcreat);
 1134                 }
 1135 
 1136                 in6_ifstat_inc(ifp, ifs6_out_fragok);
 1137         }
 1138 
 1139         /*
 1140          * Remove leading garbages.
 1141          */
 1142 sendorfree:
 1143         m = m0->m_nextpkt;
 1144         m0->m_nextpkt = 0;
 1145         m_freem(m0);
 1146         for (m0 = m; m; m = m0) {
 1147                 m0 = m->m_nextpkt;
 1148                 m->m_nextpkt = 0;
 1149                 if (error == 0) {
 1150                         struct in6_ifaddr *ia6;
 1151                         ip6 = mtod(m, struct ip6_hdr *);
 1152                         ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
 1153                         if (ia6) {
 1154                                 /*
 1155                                  * Record statistics for this interface
 1156                                  * address.
 1157                                  */
 1158                                 ia6->ia_ifa.ifa_data.ifad_outbytes +=
 1159                                     m->m_pkthdr.len;
 1160                         }
 1161 #ifdef IPSEC
 1162                         /* clean ipsec history once it goes out of the node */
 1163                         ipsec_delaux(m);
 1164 #endif
 1165                         KASSERT(dst != NULL);
 1166                         error = nd6_output(ifp, origifp, m, dst, rt);
 1167                 } else
 1168                         m_freem(m);
 1169         }
 1170 
 1171         if (error == 0)
 1172                 IP6_STATINC(IP6_STAT_FRAGMENTED);
 1173 
 1174 done:
 1175         rtcache_free(&ip6route);
 1176 
 1177 #ifdef IPSEC
 1178         if (sp != NULL)
 1179                 key_freesp(sp);
 1180 #endif /* IPSEC */
 1181 #ifdef FAST_IPSEC
 1182         if (sp != NULL)
 1183                 KEY_FREESP(&sp);
 1184 #endif /* FAST_IPSEC */
 1185 
 1186 
 1187         return (error);
 1188 
 1189 freehdrs:
 1190         m_freem(exthdrs.ip6e_hbh);      /* m_freem will check if mbuf is 0 */
 1191         m_freem(exthdrs.ip6e_dest1);
 1192         m_freem(exthdrs.ip6e_rthdr);
 1193         m_freem(exthdrs.ip6e_dest2);
 1194         /* FALLTHROUGH */
 1195 bad:
 1196         m_freem(m);
 1197         goto done;
 1198 badscope:
 1199         IP6_STATINC(IP6_STAT_BADSCOPE);
 1200         in6_ifstat_inc(origifp, ifs6_out_discard);
 1201         if (error == 0)
 1202                 error = EHOSTUNREACH; /* XXX */
 1203         goto bad;
 1204 }
 1205 
 1206 static int
 1207 ip6_copyexthdr(struct mbuf **mp, void *hdr, int hlen)
 1208 {
 1209         struct mbuf *m;
 1210 
 1211         if (hlen > MCLBYTES)
 1212                 return (ENOBUFS); /* XXX */
 1213 
 1214         MGET(m, M_DONTWAIT, MT_DATA);
 1215         if (!m)
 1216                 return (ENOBUFS);
 1217 
 1218         if (hlen > MLEN) {
 1219                 MCLGET(m, M_DONTWAIT);
 1220                 if ((m->m_flags & M_EXT) == 0) {
 1221                         m_free(m);
 1222                         return (ENOBUFS);
 1223                 }
 1224         }
 1225         m->m_len = hlen;
 1226         if (hdr)
 1227                 bcopy(hdr, mtod(m, void *), hlen);
 1228 
 1229         *mp = m;
 1230         return (0);
 1231 }
 1232 
 1233 /*
 1234  * Process a delayed payload checksum calculation.
 1235  */
 1236 void
 1237 in6_delayed_cksum(struct mbuf *m)
 1238 {
 1239         uint16_t csum, offset;
 1240 
 1241         KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
 1242         KASSERT((~m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
 1243         KASSERT((m->m_pkthdr.csum_flags
 1244             & (M_CSUM_UDPv4|M_CSUM_TCPv4|M_CSUM_TSOv4)) == 0);
 1245 
 1246         offset = M_CSUM_DATA_IPv6_HL(m->m_pkthdr.csum_data);
 1247         csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
 1248         if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv6) != 0) {
 1249                 csum = 0xffff;
 1250         }
 1251 
 1252         offset += M_CSUM_DATA_IPv6_OFFSET(m->m_pkthdr.csum_data);
 1253         if ((offset + sizeof(csum)) > m->m_len) {
 1254                 m_copyback(m, offset, sizeof(csum), &csum);
 1255         } else {
 1256                 *(uint16_t *)(mtod(m, char *) + offset) = csum;
 1257         }
 1258 }
 1259 
 1260 /*
 1261  * Insert jumbo payload option.
 1262  */
 1263 static int
 1264 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
 1265 {
 1266         struct mbuf *mopt;
 1267         u_int8_t *optbuf;
 1268         u_int32_t v;
 1269 
 1270 #define JUMBOOPTLEN     8       /* length of jumbo payload option and padding */
 1271 
 1272         /*
 1273          * If there is no hop-by-hop options header, allocate new one.
 1274          * If there is one but it doesn't have enough space to store the
 1275          * jumbo payload option, allocate a cluster to store the whole options.
 1276          * Otherwise, use it to store the options.
 1277          */
 1278         if (exthdrs->ip6e_hbh == 0) {
 1279                 MGET(mopt, M_DONTWAIT, MT_DATA);
 1280                 if (mopt == 0)
 1281                         return (ENOBUFS);
 1282                 mopt->m_len = JUMBOOPTLEN;
 1283                 optbuf = mtod(mopt, u_int8_t *);
 1284                 optbuf[1] = 0;  /* = ((JUMBOOPTLEN) >> 3) - 1 */
 1285                 exthdrs->ip6e_hbh = mopt;
 1286         } else {
 1287                 struct ip6_hbh *hbh;
 1288 
 1289                 mopt = exthdrs->ip6e_hbh;
 1290                 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
 1291                         /*
 1292                          * XXX assumption:
 1293                          * - exthdrs->ip6e_hbh is not referenced from places
 1294                          *   other than exthdrs.
 1295                          * - exthdrs->ip6e_hbh is not an mbuf chain.
 1296                          */
 1297                         int oldoptlen = mopt->m_len;
 1298                         struct mbuf *n;
 1299 
 1300                         /*
 1301                          * XXX: give up if the whole (new) hbh header does
 1302                          * not fit even in an mbuf cluster.
 1303                          */
 1304                         if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
 1305                                 return (ENOBUFS);
 1306 
 1307                         /*
 1308                          * As a consequence, we must always prepare a cluster
 1309                          * at this point.
 1310                          */
 1311                         MGET(n, M_DONTWAIT, MT_DATA);
 1312                         if (n) {
 1313                                 MCLGET(n, M_DONTWAIT);
 1314                                 if ((n->m_flags & M_EXT) == 0) {
 1315                                         m_freem(n);
 1316                                         n = NULL;
 1317                                 }
 1318                         }
 1319                         if (!n)
 1320                                 return (ENOBUFS);
 1321                         n->m_len = oldoptlen + JUMBOOPTLEN;
 1322                         bcopy(mtod(mopt, void *), mtod(n, void *),
 1323                             oldoptlen);
 1324                         optbuf = mtod(n, u_int8_t *) + oldoptlen;
 1325                         m_freem(mopt);
 1326                         mopt = exthdrs->ip6e_hbh = n;
 1327                 } else {
 1328                         optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
 1329                         mopt->m_len += JUMBOOPTLEN;
 1330                 }
 1331                 optbuf[0] = IP6OPT_PADN;
 1332                 optbuf[1] = 0;
 1333 
 1334                 /*
 1335                  * Adjust the header length according to the pad and
 1336                  * the jumbo payload option.
 1337                  */
 1338                 hbh = mtod(mopt, struct ip6_hbh *);
 1339                 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
 1340         }
 1341 
 1342         /* fill in the option. */
 1343         optbuf[2] = IP6OPT_JUMBO;
 1344         optbuf[3] = 4;
 1345         v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
 1346         bcopy(&v, &optbuf[4], sizeof(u_int32_t));
 1347 
 1348         /* finally, adjust the packet header length */
 1349         exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
 1350 
 1351         return (0);
 1352 #undef JUMBOOPTLEN
 1353 }
 1354 
 1355 /*
 1356  * Insert fragment header and copy unfragmentable header portions.
 1357  */
 1358 static int
 1359 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, 
 1360         struct ip6_frag **frghdrp)
 1361 {
 1362         struct mbuf *n, *mlast;
 1363 
 1364         if (hlen > sizeof(struct ip6_hdr)) {
 1365                 n = m_copym(m0, sizeof(struct ip6_hdr),
 1366                     hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
 1367                 if (n == 0)
 1368                         return (ENOBUFS);
 1369                 m->m_next = n;
 1370         } else
 1371                 n = m;
 1372 
 1373         /* Search for the last mbuf of unfragmentable part. */
 1374         for (mlast = n; mlast->m_next; mlast = mlast->m_next)
 1375                 ;
 1376 
 1377         if ((mlast->m_flags & M_EXT) == 0 &&
 1378             M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
 1379                 /* use the trailing space of the last mbuf for the fragment hdr */
 1380                 *frghdrp = (struct ip6_frag *)(mtod(mlast, char *) +
 1381                     mlast->m_len);
 1382                 mlast->m_len += sizeof(struct ip6_frag);
 1383                 m->m_pkthdr.len += sizeof(struct ip6_frag);
 1384         } else {
 1385                 /* allocate a new mbuf for the fragment header */
 1386                 struct mbuf *mfrg;
 1387 
 1388                 MGET(mfrg, M_DONTWAIT, MT_DATA);
 1389                 if (mfrg == 0)
 1390                         return (ENOBUFS);
 1391                 mfrg->m_len = sizeof(struct ip6_frag);
 1392                 *frghdrp = mtod(mfrg, struct ip6_frag *);
 1393                 mlast->m_next = mfrg;
 1394         }
 1395 
 1396         return (0);
 1397 }
 1398 
 1399 static int
 1400 ip6_getpmtu(struct route *ro_pmtu, struct route *ro, struct ifnet *ifp,
 1401     const struct in6_addr *dst, u_long *mtup, int *alwaysfragp)
 1402 {
 1403         struct rtentry *rt;
 1404         u_int32_t mtu = 0;
 1405         int alwaysfrag = 0;
 1406         int error = 0;
 1407 
 1408         if (ro_pmtu != ro) {
 1409                 union {
 1410                         struct sockaddr         dst;
 1411                         struct sockaddr_in6     dst6;
 1412                 } u;
 1413 
 1414                 /* The first hop and the final destination may differ. */
 1415                 sockaddr_in6_init(&u.dst6, dst, 0, 0, 0);
 1416                 rt = rtcache_lookup(ro_pmtu, &u.dst);
 1417         } else
 1418                 rt = rtcache_validate(ro_pmtu);
 1419         if (rt != NULL) {
 1420                 u_int32_t ifmtu;
 1421 
 1422                 if (ifp == NULL)
 1423                         ifp = rt->rt_ifp;
 1424                 ifmtu = IN6_LINKMTU(ifp);
 1425                 mtu = rt->rt_rmx.rmx_mtu;
 1426                 if (mtu == 0)
 1427                         mtu = ifmtu;
 1428                 else if (mtu < IPV6_MMTU) {
 1429                         /*
 1430                          * RFC2460 section 5, last paragraph:
 1431                          * if we record ICMPv6 too big message with
 1432                          * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
 1433                          * or smaller, with fragment header attached.
 1434                          * (fragment header is needed regardless from the
 1435                          * packet size, for translators to identify packets)
 1436                          */
 1437                         alwaysfrag = 1;
 1438                         mtu = IPV6_MMTU;
 1439                 } else if (mtu > ifmtu) {
 1440                         /*
 1441                          * The MTU on the route is larger than the MTU on
 1442                          * the interface!  This shouldn't happen, unless the
 1443                          * MTU of the interface has been changed after the
 1444                          * interface was brought up.  Change the MTU in the
 1445                          * route to match the interface MTU (as long as the
 1446                          * field isn't locked).
 1447                          */
 1448                         mtu = ifmtu;
 1449                         if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
 1450                                 rt->rt_rmx.rmx_mtu = mtu;
 1451                 }
 1452         } else if (ifp) {
 1453                 mtu = IN6_LINKMTU(ifp);
 1454         } else
 1455                 error = EHOSTUNREACH; /* XXX */
 1456 
 1457         *mtup = mtu;
 1458         if (alwaysfragp)
 1459                 *alwaysfragp = alwaysfrag;
 1460         return (error);
 1461 }
 1462 
 1463 /*
 1464  * IP6 socket option processing.
 1465  */
 1466 int
 1467 ip6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
 1468 {
 1469         int privileged, optdatalen, uproto;
 1470         void *optdata;
 1471         struct in6pcb *in6p = sotoin6pcb(so);
 1472         int error, optval;
 1473         struct lwp *l = curlwp; /* XXX */
 1474         int level, optname;
 1475 
 1476         KASSERT(sopt != NULL);
 1477 
 1478         level = sopt->sopt_level;
 1479         optname = sopt->sopt_name;
 1480 
 1481         error = optval = 0;
 1482         privileged = (l == 0 || kauth_authorize_generic(l->l_cred,
 1483             KAUTH_GENERIC_ISSUSER, NULL)) ? 0 : 1;
 1484         uproto = (int)so->so_proto->pr_protocol;
 1485 
 1486         if (level != IPPROTO_IPV6) {
 1487                 return ENOPROTOOPT;
 1488         }
 1489         switch (op) {
 1490         case PRCO_SETOPT:
 1491                 switch (optname) {
 1492 #ifdef RFC2292
 1493                 case IPV6_2292PKTOPTIONS:
 1494                         error = ip6_pcbopts(&in6p->in6p_outputopts, so, sopt);
 1495                         break;
 1496 #endif
 1497 
 1498                 /*
 1499                  * Use of some Hop-by-Hop options or some
 1500                  * Destination options, might require special
 1501                  * privilege.  That is, normal applications
 1502                  * (without special privilege) might be forbidden
 1503                  * from setting certain options in outgoing packets,
 1504                  * and might never see certain options in received
 1505                  * packets. [RFC 2292 Section 6]
 1506                  * KAME specific note:
 1507                  *  KAME prevents non-privileged users from sending or
 1508                  *  receiving ANY hbh/dst options in order to avoid
 1509                  *  overhead of parsing options in the kernel.
 1510                  */
 1511                 case IPV6_RECVHOPOPTS:
 1512                 case IPV6_RECVDSTOPTS:
 1513                 case IPV6_RECVRTHDRDSTOPTS:
 1514                         if (!privileged) {
 1515                                 error = EPERM;
 1516                                 break;
 1517                         }
 1518                         /* FALLTHROUGH */
 1519                 case IPV6_UNICAST_HOPS:
 1520                 case IPV6_HOPLIMIT:
 1521                 case IPV6_FAITH:
 1522 
 1523                 case IPV6_RECVPKTINFO:
 1524                 case IPV6_RECVHOPLIMIT:
 1525                 case IPV6_RECVRTHDR:
 1526                 case IPV6_RECVPATHMTU:
 1527                 case IPV6_RECVTCLASS:
 1528                 case IPV6_V6ONLY:
 1529                         error = sockopt_getint(sopt, &optval);
 1530                         if (error)
 1531                                 break;
 1532                         switch (optname) {
 1533                         case IPV6_UNICAST_HOPS:
 1534                                 if (optval < -1 || optval >= 256)
 1535                                         error = EINVAL;
 1536                                 else {
 1537                                         /* -1 = kernel default */
 1538                                         in6p->in6p_hops = optval;
 1539                                 }
 1540                                 break;
 1541 #define OPTSET(bit) \
 1542 do { \
 1543 if (optval) \
 1544         in6p->in6p_flags |= (bit); \
 1545 else \
 1546         in6p->in6p_flags &= ~(bit); \
 1547 } while (/*CONSTCOND*/ 0)
 1548 
 1549 #ifdef RFC2292
 1550 #define OPTSET2292(bit)                         \
 1551 do {                                            \
 1552 in6p->in6p_flags |= IN6P_RFC2292;       \
 1553 if (optval)                             \
 1554         in6p->in6p_flags |= (bit);      \
 1555 else                                    \
 1556         in6p->in6p_flags &= ~(bit);     \
 1557 } while (/*CONSTCOND*/ 0)
 1558 #endif
 1559 
 1560 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
 1561 
 1562                         case IPV6_RECVPKTINFO:
 1563 #ifdef RFC2292
 1564                                 /* cannot mix with RFC2292 */
 1565                                 if (OPTBIT(IN6P_RFC2292)) {
 1566                                         error = EINVAL;
 1567                                         break;
 1568                                 }
 1569 #endif
 1570                                 OPTSET(IN6P_PKTINFO);
 1571                                 break;
 1572 
 1573                         case IPV6_HOPLIMIT:
 1574                         {
 1575                                 struct ip6_pktopts **optp;
 1576 
 1577 #ifdef RFC2292
 1578                                 /* cannot mix with RFC2292 */
 1579                                 if (OPTBIT(IN6P_RFC2292)) {
 1580                                         error = EINVAL;
 1581                                         break;
 1582                                 }
 1583 #endif
 1584                                 optp = &in6p->in6p_outputopts;
 1585                                 error = ip6_pcbopt(IPV6_HOPLIMIT,
 1586                                                    (u_char *)&optval,
 1587                                                    sizeof(optval),
 1588                                                    optp,
 1589                                                    privileged, uproto);
 1590                                 break;
 1591                         }
 1592 
 1593                         case IPV6_RECVHOPLIMIT:
 1594 #ifdef RFC2292
 1595                                 /* cannot mix with RFC2292 */
 1596                                 if (OPTBIT(IN6P_RFC2292)) {
 1597                                         error = EINVAL;
 1598                                         break;
 1599                                 }
 1600 #endif
 1601                                 OPTSET(IN6P_HOPLIMIT);
 1602                                 break;
 1603 
 1604                         case IPV6_RECVHOPOPTS:
 1605 #ifdef RFC2292
 1606                                 /* cannot mix with RFC2292 */
 1607                                 if (OPTBIT(IN6P_RFC2292)) {
 1608                                         error = EINVAL;
 1609                                         break;
 1610                                 }
 1611 #endif
 1612                                 OPTSET(IN6P_HOPOPTS);
 1613                                 break;
 1614 
 1615                         case IPV6_RECVDSTOPTS:
 1616 #ifdef RFC2292
 1617                                 /* cannot mix with RFC2292 */
 1618                                 if (OPTBIT(IN6P_RFC2292)) {
 1619                                         error = EINVAL;
 1620                                         break;
 1621                                 }
 1622 #endif
 1623                                 OPTSET(IN6P_DSTOPTS);
 1624                                 break;
 1625 
 1626                         case IPV6_RECVRTHDRDSTOPTS:
 1627 #ifdef RFC2292
 1628                                 /* cannot mix with RFC2292 */
 1629                                 if (OPTBIT(IN6P_RFC2292)) {
 1630                                         error = EINVAL;
 1631                                         break;
 1632                                 }
 1633 #endif
 1634                                 OPTSET(IN6P_RTHDRDSTOPTS);
 1635                                 break;
 1636 
 1637                         case IPV6_RECVRTHDR:
 1638 #ifdef RFC2292
 1639                                 /* cannot mix with RFC2292 */
 1640                                 if (OPTBIT(IN6P_RFC2292)) {
 1641                                         error = EINVAL;
 1642                                         break;
 1643                                 }
 1644 #endif
 1645                                 OPTSET(IN6P_RTHDR);
 1646                                 break;
 1647 
 1648                         case IPV6_FAITH:
 1649                                 OPTSET(IN6P_FAITH);
 1650                                 break;
 1651 
 1652                         case IPV6_RECVPATHMTU:
 1653                                 /*
 1654                                  * We ignore this option for TCP
 1655                                  * sockets.
 1656                                  * (RFC3542 leaves this case
 1657                                  * unspecified.)
 1658                                  */
 1659                                 if (uproto != IPPROTO_TCP)
 1660                                         OPTSET(IN6P_MTU);
 1661                                 break;
 1662 
 1663                         case IPV6_V6ONLY:
 1664                                 /*
 1665                                  * make setsockopt(IPV6_V6ONLY)
 1666                                  * available only prior to bind(2).
 1667                                  * see ipng mailing list, Jun 22 2001.
 1668                                  */
 1669                                 if (in6p->in6p_lport ||
 1670                                     !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
 1671                                         error = EINVAL;
 1672                                         break;
 1673                                 }
 1674 #ifdef INET6_BINDV6ONLY
 1675                                 if (!optval)
 1676                                         error = EINVAL;
 1677 #else
 1678                                 OPTSET(IN6P_IPV6_V6ONLY);
 1679 #endif
 1680                                 break;
 1681                         case IPV6_RECVTCLASS:
 1682 #ifdef RFC2292
 1683                                 /* cannot mix with RFC2292 XXX */
 1684                                 if (OPTBIT(IN6P_RFC2292)) {
 1685                                         error = EINVAL;
 1686                                         break;
 1687                                 }
 1688 #endif
 1689                                 OPTSET(IN6P_TCLASS);
 1690                                 break;
 1691 
 1692                         }
 1693                         break;
 1694 
 1695                 case IPV6_OTCLASS:
 1696                 {
 1697                         struct ip6_pktopts **optp;
 1698                         u_int8_t tclass;
 1699 
 1700                         error = sockopt_get(sopt, &tclass, sizeof(tclass));
 1701                         if (error)
 1702                                 break;
 1703                         optp = &in6p->in6p_outputopts;
 1704                         error = ip6_pcbopt(optname,
 1705                                            (u_char *)&tclass,
 1706                                            sizeof(tclass),
 1707                                            optp,
 1708                                            privileged, uproto);
 1709                         break;
 1710                 }
 1711 
 1712                 case IPV6_TCLASS:
 1713                 case IPV6_DONTFRAG:
 1714                 case IPV6_USE_MIN_MTU:
 1715                         error = sockopt_getint(sopt, &optval);
 1716                         if (error)
 1717                                 break;
 1718                         {
 1719                                 struct ip6_pktopts **optp;
 1720                                 optp = &in6p->in6p_outputopts;
 1721                                 error = ip6_pcbopt(optname,
 1722                                                    (u_char *)&optval,
 1723                                                    sizeof(optval),
 1724                                                    optp,
 1725                                                    privileged, uproto);
 1726                                 break;
 1727                         }
 1728 
 1729 #ifdef RFC2292
 1730                 case IPV6_2292PKTINFO:
 1731                 case IPV6_2292HOPLIMIT:
 1732                 case IPV6_2292HOPOPTS:
 1733                 case IPV6_2292DSTOPTS:
 1734                 case IPV6_2292RTHDR:
 1735                         /* RFC 2292 */
 1736                         error = sockopt_getint(sopt, &optval);
 1737                         if (error)
 1738                                 break;
 1739 
 1740                         switch (optname) {
 1741                         case IPV6_2292PKTINFO:
 1742                                 OPTSET2292(IN6P_PKTINFO);
 1743                                 break;
 1744                         case IPV6_2292HOPLIMIT:
 1745                                 OPTSET2292(IN6P_HOPLIMIT);
 1746                                 break;
 1747                         case IPV6_2292HOPOPTS:
 1748                                 /*
 1749                                  * Check super-user privilege.
 1750                                  * See comments for IPV6_RECVHOPOPTS.
 1751                                  */
 1752                                 if (!privileged)
 1753                                         return (EPERM);
 1754                                 OPTSET2292(IN6P_HOPOPTS);
 1755                                 break;
 1756                         case IPV6_2292DSTOPTS:
 1757                                 if (!privileged)
 1758                                         return (EPERM);
 1759                                 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
 1760                                 break;
 1761                         case IPV6_2292RTHDR:
 1762                                 OPTSET2292(IN6P_RTHDR);
 1763                                 break;
 1764                         }
 1765                         break;
 1766 #endif
 1767                 case IPV6_PKTINFO:
 1768                 case IPV6_HOPOPTS:
 1769                 case IPV6_RTHDR:
 1770                 case IPV6_DSTOPTS:
 1771                 case IPV6_RTHDRDSTOPTS:
 1772                 case IPV6_NEXTHOP: {
 1773                         /* new advanced API (RFC3542) */
 1774                         void *optbuf;
 1775                         int optbuflen;
 1776                         struct ip6_pktopts **optp;
 1777 
 1778 #ifdef RFC2292
 1779                         /* cannot mix with RFC2292 */
 1780                         if (OPTBIT(IN6P_RFC2292)) {
 1781                                 error = EINVAL;
 1782                                 break;
 1783                         }
 1784 #endif
 1785 
 1786                         optbuflen = sopt->sopt_size;
 1787                         optbuf = malloc(optbuflen, M_IP6OPT, M_NOWAIT);
 1788                         if (optbuf == NULL) {
 1789                                 error = ENOBUFS;
 1790                                 break;
 1791                         }
 1792 
 1793                         sockopt_get(sopt, optbuf, optbuflen);
 1794                         optp = &in6p->in6p_outputopts;
 1795                         error = ip6_pcbopt(optname, optbuf, optbuflen,
 1796                             optp, privileged, uproto);
 1797                         break;
 1798                         }
 1799 #undef OPTSET
 1800 
 1801                 case IPV6_MULTICAST_IF:
 1802                 case IPV6_MULTICAST_HOPS:
 1803                 case IPV6_MULTICAST_LOOP:
 1804                 case IPV6_JOIN_GROUP:
 1805                 case IPV6_LEAVE_GROUP:
 1806                         error = ip6_setmoptions(sopt, &in6p->in6p_moptions);
 1807                         break;
 1808 
 1809                 case IPV6_PORTRANGE:
 1810                         error = sockopt_getint(sopt, &optval);
 1811                         if (error)
 1812                                 break;
 1813 
 1814                         switch (optval) {
 1815                         case IPV6_PORTRANGE_DEFAULT:
 1816                                 in6p->in6p_flags &= ~(IN6P_LOWPORT);
 1817                                 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
 1818                                 break;
 1819 
 1820                         case IPV6_PORTRANGE_HIGH:
 1821                                 in6p->in6p_flags &= ~(IN6P_LOWPORT);
 1822                                 in6p->in6p_flags |= IN6P_HIGHPORT;
 1823                                 break;
 1824 
 1825                         case IPV6_PORTRANGE_LOW:
 1826                                 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
 1827                                 in6p->in6p_flags |= IN6P_LOWPORT;
 1828                                 break;
 1829 
 1830                         default:
 1831                                 error = EINVAL;
 1832                                 break;
 1833                         }
 1834                         break;
 1835 
 1836 
 1837 #if defined(IPSEC) || defined(FAST_IPSEC)
 1838                 case IPV6_IPSEC_POLICY:
 1839                         error = ipsec6_set_policy(in6p, optname,
 1840                             sopt->sopt_data, sopt->sopt_size, privileged);
 1841                         break;
 1842 #endif /* IPSEC */
 1843 
 1844                 default:
 1845                         error = ENOPROTOOPT;
 1846                         break;
 1847                 }
 1848                 break;
 1849 
 1850         case PRCO_GETOPT:
 1851                 switch (optname) {
 1852 #ifdef RFC2292
 1853                 case IPV6_2292PKTOPTIONS:
 1854                         /*
 1855                          * RFC3542 (effectively) deprecated the
 1856                          * semantics of the 2292-style pktoptions.
 1857                          * Since it was not reliable in nature (i.e.,
 1858                          * applications had to expect the lack of some
 1859                          * information after all), it would make sense
 1860                          * to simplify this part by always returning
 1861                          * empty data.
 1862                          */
 1863                         break;
 1864 #endif
 1865 
 1866                 case IPV6_RECVHOPOPTS:
 1867                 case IPV6_RECVDSTOPTS:
 1868                 case IPV6_RECVRTHDRDSTOPTS:
 1869                 case IPV6_UNICAST_HOPS:
 1870                 case IPV6_RECVPKTINFO:
 1871                 case IPV6_RECVHOPLIMIT:
 1872                 case IPV6_RECVRTHDR:
 1873                 case IPV6_RECVPATHMTU:
 1874 
 1875                 case IPV6_FAITH:
 1876                 case IPV6_V6ONLY:
 1877                 case IPV6_PORTRANGE:
 1878                 case IPV6_RECVTCLASS:
 1879                         switch (optname) {
 1880 
 1881                         case IPV6_RECVHOPOPTS:
 1882                                 optval = OPTBIT(IN6P_HOPOPTS);
 1883                                 break;
 1884 
 1885                         case IPV6_RECVDSTOPTS:
 1886                                 optval = OPTBIT(IN6P_DSTOPTS);
 1887                                 break;
 1888 
 1889                         case IPV6_RECVRTHDRDSTOPTS:
 1890                                 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
 1891                                 break;
 1892 
 1893                         case IPV6_UNICAST_HOPS:
 1894                                 optval = in6p->in6p_hops;
 1895                                 break;
 1896 
 1897                         case IPV6_RECVPKTINFO:
 1898                                 optval = OPTBIT(IN6P_PKTINFO);
 1899                                 break;
 1900 
 1901                         case IPV6_RECVHOPLIMIT:
 1902                                 optval = OPTBIT(IN6P_HOPLIMIT);
 1903                                 break;
 1904 
 1905                         case IPV6_RECVRTHDR:
 1906                                 optval = OPTBIT(IN6P_RTHDR);
 1907                                 break;
 1908 
 1909                         case IPV6_RECVPATHMTU:
 1910                                 optval = OPTBIT(IN6P_MTU);
 1911                                 break;
 1912 
 1913                         case IPV6_FAITH:
 1914                                 optval = OPTBIT(IN6P_FAITH);
 1915                                 break;
 1916 
 1917                         case IPV6_V6ONLY:
 1918                                 optval = OPTBIT(IN6P_IPV6_V6ONLY);
 1919                                 break;
 1920 
 1921                         case IPV6_PORTRANGE:
 1922                             {
 1923                                 int flags;
 1924                                 flags = in6p->in6p_flags;
 1925                                 if (flags & IN6P_HIGHPORT)
 1926                                         optval = IPV6_PORTRANGE_HIGH;
 1927                                 else if (flags & IN6P_LOWPORT)
 1928                                         optval = IPV6_PORTRANGE_LOW;
 1929                                 else
 1930                                         optval = 0;
 1931                                 break;
 1932                             }
 1933                         case IPV6_RECVTCLASS:
 1934                                 optval = OPTBIT(IN6P_TCLASS);
 1935                                 break;
 1936 
 1937                         }
 1938                         if (error)
 1939                                 break;
 1940                         error = sockopt_setint(sopt, optval);
 1941                         break;
 1942 
 1943                 case IPV6_PATHMTU:
 1944                     {
 1945                         u_long pmtu = 0;
 1946                         struct ip6_mtuinfo mtuinfo;
 1947                         struct route *ro = &in6p->in6p_route;
 1948 
 1949                         if (!(so->so_state & SS_ISCONNECTED))
 1950                                 return (ENOTCONN);
 1951                         /*
 1952                          * XXX: we dot not consider the case of source
 1953                          * routing, or optional information to specify
 1954                          * the outgoing interface.
 1955                          */
 1956                         error = ip6_getpmtu(ro, NULL, NULL,
 1957                             &in6p->in6p_faddr, &pmtu, NULL);
 1958                         if (error)
 1959                                 break;
 1960                         if (pmtu > IPV6_MAXPACKET)
 1961                                 pmtu = IPV6_MAXPACKET;
 1962 
 1963                         memset(&mtuinfo, 0, sizeof(mtuinfo));
 1964                         mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
 1965                         optdata = (void *)&mtuinfo;
 1966                         optdatalen = sizeof(mtuinfo);
 1967                         if (optdatalen > MCLBYTES)
 1968                                 return (EMSGSIZE); /* XXX */
 1969                         error = sockopt_set(sopt, optdata, optdatalen);
 1970                         break;
 1971                     }
 1972 
 1973 #ifdef RFC2292
 1974                 case IPV6_2292PKTINFO:
 1975                 case IPV6_2292HOPLIMIT:
 1976                 case IPV6_2292HOPOPTS:
 1977                 case IPV6_2292RTHDR:
 1978                 case IPV6_2292DSTOPTS:
 1979                         switch (optname) {
 1980                         case IPV6_2292PKTINFO:
 1981                                 optval = OPTBIT(IN6P_PKTINFO);
 1982                                 break;
 1983                         case IPV6_2292HOPLIMIT:
 1984                                 optval = OPTBIT(IN6P_HOPLIMIT);
 1985                                 break;
 1986                         case IPV6_2292HOPOPTS:
 1987                                 optval = OPTBIT(IN6P_HOPOPTS);
 1988                                 break;
 1989                         case IPV6_2292RTHDR:
 1990                                 optval = OPTBIT(IN6P_RTHDR);
 1991                                 break;
 1992                         case IPV6_2292DSTOPTS:
 1993                                 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
 1994                                 break;
 1995                         }
 1996                         error = sockopt_setint(sopt, optval);
 1997                         break;
 1998 #endif
 1999                 case IPV6_PKTINFO:
 2000                 case IPV6_HOPOPTS:
 2001                 case IPV6_RTHDR:
 2002                 case IPV6_DSTOPTS:
 2003                 case IPV6_RTHDRDSTOPTS:
 2004                 case IPV6_NEXTHOP:
 2005                 case IPV6_OTCLASS:
 2006                 case IPV6_TCLASS:
 2007                 case IPV6_DONTFRAG:
 2008                 case IPV6_USE_MIN_MTU:
 2009                         error = ip6_getpcbopt(in6p->in6p_outputopts,
 2010                             optname, sopt);
 2011                         break;
 2012 
 2013                 case IPV6_MULTICAST_IF:
 2014                 case IPV6_MULTICAST_HOPS:
 2015                 case IPV6_MULTICAST_LOOP:
 2016                 case IPV6_JOIN_GROUP:
 2017                 case IPV6_LEAVE_GROUP:
 2018                         error = ip6_getmoptions(sopt, in6p->in6p_moptions);
 2019                         break;
 2020 
 2021 #if defined(IPSEC) || defined(FAST_IPSEC)
 2022                 case IPV6_IPSEC_POLICY:
 2023                     {
 2024                         struct mbuf *m = NULL;
 2025 
 2026                         /* XXX this will return EINVAL as sopt is empty */
 2027                         error = ipsec6_get_policy(in6p, sopt->sopt_data,
 2028                             sopt->sopt_size, &m);
 2029                         if (!error)
 2030                                 error = sockopt_setmbuf(sopt, m);
 2031 
 2032                         break;
 2033                     }
 2034 #endif /* IPSEC */
 2035 
 2036                 default:
 2037                         error = ENOPROTOOPT;
 2038                         break;
 2039                 }
 2040                 break;
 2041         }
 2042         return (error);
 2043 }
 2044 
 2045 int
 2046 ip6_raw_ctloutput(int op, struct socket *so, struct sockopt *sopt)
 2047 {
 2048         int error = 0, optval;
 2049         const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
 2050         struct in6pcb *in6p = sotoin6pcb(so);
 2051         int level, optname;
 2052 
 2053         KASSERT(sopt != NULL);
 2054 
 2055         level = sopt->sopt_level;
 2056         optname = sopt->sopt_name;
 2057 
 2058         if (level != IPPROTO_IPV6) {
 2059                 return ENOPROTOOPT;
 2060         }
 2061 
 2062         switch (optname) {
 2063         case IPV6_CHECKSUM:
 2064                 /*
 2065                  * For ICMPv6 sockets, no modification allowed for checksum
 2066                  * offset, permit "no change" values to help existing apps.
 2067                  *
 2068                  * XXX RFC3542 says: "An attempt to set IPV6_CHECKSUM
 2069                  * for an ICMPv6 socket will fail."  The current
 2070                  * behavior does not meet RFC3542.
 2071                  */
 2072                 switch (op) {
 2073                 case PRCO_SETOPT:
 2074                         error = sockopt_getint(sopt, &optval);
 2075                         if (error)
 2076                                 break;
 2077                         if ((optval % 2) != 0) {
 2078                                 /* the API assumes even offset values */
 2079                                 error = EINVAL;
 2080                         } else if (so->so_proto->pr_protocol ==
 2081                             IPPROTO_ICMPV6) {
 2082                                 if (optval != icmp6off)
 2083                                         error = EINVAL;
 2084                         } else
 2085                                 in6p->in6p_cksum = optval;
 2086                         break;
 2087 
 2088                 case PRCO_GETOPT:
 2089                         if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
 2090                                 optval = icmp6off;
 2091                         else
 2092                                 optval = in6p->in6p_cksum;
 2093 
 2094                         error = sockopt_setint(sopt, optval);
 2095                         break;
 2096 
 2097                 default:
 2098                         error = EINVAL;
 2099                         break;
 2100                 }
 2101                 break;
 2102 
 2103         default:
 2104                 error = ENOPROTOOPT;
 2105                 break;
 2106         }
 2107 
 2108         return (error);
 2109 }
 2110 
 2111 #ifdef RFC2292
 2112 /*
 2113  * Set up IP6 options in pcb for insertion in output packets or
 2114  * specifying behavior of outgoing packets.
 2115  */
 2116 static int
 2117 ip6_pcbopts(struct ip6_pktopts **pktopt, struct socket *so,
 2118     struct sockopt *sopt)
 2119 {
 2120         struct ip6_pktopts *opt = *pktopt;
 2121         struct mbuf *m;
 2122         int error = 0;
 2123         struct lwp *l = curlwp; /* XXX */
 2124         int priv = 0;
 2125 
 2126         /* turn off any old options. */
 2127         if (opt) {
 2128 #ifdef DIAGNOSTIC
 2129             if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
 2130                 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
 2131                 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
 2132                     printf("ip6_pcbopts: all specified options are cleared.\n");
 2133 #endif
 2134                 ip6_clearpktopts(opt, -1);
 2135         } else {
 2136                 opt = malloc(sizeof(*opt), M_IP6OPT, M_NOWAIT);
 2137                 if (opt == NULL)
 2138                         return (ENOBUFS);
 2139         }
 2140         *pktopt = NULL;
 2141 
 2142         if (sopt == NULL || sopt->sopt_size == 0) {
 2143                 /*
 2144                  * Only turning off any previous options, regardless of
 2145                  * whether the opt is just created or given.
 2146                  */
 2147                 free(opt, M_IP6OPT);
 2148                 return (0);
 2149         }
 2150 
 2151         /*  set options specified by user. */
 2152         if (l && !kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
 2153             NULL))
 2154                 priv = 1;
 2155 
 2156         m = sockopt_getmbuf(sopt);
 2157         if (m == NULL) {
 2158                 free(opt, M_IP6OPT);
 2159                 return (ENOBUFS);
 2160         }
 2161 
 2162         error = ip6_setpktopts(m, opt, NULL, priv, so->so_proto->pr_protocol);
 2163         m_freem(m);
 2164         if (error != 0) {
 2165                 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
 2166                 free(opt, M_IP6OPT);
 2167                 return (error);
 2168         }
 2169         *pktopt = opt;
 2170         return (0);
 2171 }
 2172 #endif
 2173 
 2174 /*
 2175  * initialize ip6_pktopts.  beware that there are non-zero default values in
 2176  * the struct.
 2177  */
 2178 void
 2179 ip6_initpktopts(struct ip6_pktopts *opt)
 2180 {
 2181 
 2182         memset(opt, 0, sizeof(*opt));
 2183         opt->ip6po_hlim = -1;   /* -1 means default hop limit */
 2184         opt->ip6po_tclass = -1; /* -1 means default traffic class */
 2185         opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
 2186 }
 2187 
 2188 #define sin6tosa(sin6)  ((struct sockaddr *)(sin6)) /* XXX */
 2189 static int
 2190 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
 2191     int priv, int uproto)
 2192 {
 2193         struct ip6_pktopts *opt;
 2194 
 2195         if (*pktopt == NULL) {
 2196                 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
 2197                     M_NOWAIT);
 2198                 if (*pktopt == NULL)
 2199                         return (ENOBUFS);
 2200 
 2201                 ip6_initpktopts(*pktopt);
 2202         }
 2203         opt = *pktopt;
 2204 
 2205         return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto));
 2206 }
 2207 
 2208 static int
 2209 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
 2210 {
 2211         void *optdata = NULL;
 2212         int optdatalen = 0;
 2213         struct ip6_ext *ip6e;
 2214         int error = 0;
 2215         struct in6_pktinfo null_pktinfo;
 2216         int deftclass = 0, on;
 2217         int defminmtu = IP6PO_MINMTU_MCASTONLY;
 2218 
 2219         switch (optname) {
 2220         case IPV6_PKTINFO:
 2221                 if (pktopt && pktopt->ip6po_pktinfo)
 2222                         optdata = (void *)pktopt->ip6po_pktinfo;
 2223                 else {
 2224                         /* XXX: we don't have to do this every time... */
 2225                         memset(&null_pktinfo, 0, sizeof(null_pktinfo));
 2226                         optdata = (void *)&null_pktinfo;
 2227                 }
 2228                 optdatalen = sizeof(struct in6_pktinfo);
 2229                 break;
 2230         case IPV6_OTCLASS:
 2231                 /* XXX */
 2232                 return (EINVAL);
 2233         case IPV6_TCLASS:
 2234                 if (pktopt && pktopt->ip6po_tclass >= 0)
 2235                         optdata = (void *)&pktopt->ip6po_tclass;
 2236                 else
 2237                         optdata = (void *)&deftclass;
 2238                 optdatalen = sizeof(int);
 2239                 break;
 2240         case IPV6_HOPOPTS:
 2241                 if (pktopt && pktopt->ip6po_hbh) {
 2242                         optdata = (void *)pktopt->ip6po_hbh;
 2243                         ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
 2244                         optdatalen = (ip6e->ip6e_len + 1) << 3;
 2245                 }
 2246                 break;
 2247         case IPV6_RTHDR:
 2248                 if (pktopt && pktopt->ip6po_rthdr) {
 2249                         optdata = (void *)pktopt->ip6po_rthdr;
 2250                         ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
 2251                         optdatalen = (ip6e->ip6e_len + 1) << 3;
 2252                 }
 2253                 break;
 2254         case IPV6_RTHDRDSTOPTS:
 2255                 if (pktopt && pktopt->ip6po_dest1) {
 2256                         optdata = (void *)pktopt->ip6po_dest1;
 2257                         ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
 2258                         optdatalen = (ip6e->ip6e_len + 1) << 3;
 2259                 }
 2260                 break;
 2261         case IPV6_DSTOPTS:
 2262                 if (pktopt && pktopt->ip6po_dest2) {
 2263                         optdata = (void *)pktopt->ip6po_dest2;
 2264                         ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
 2265                         optdatalen = (ip6e->ip6e_len + 1) << 3;
 2266                 }
 2267                 break;
 2268         case IPV6_NEXTHOP:
 2269                 if (pktopt && pktopt->ip6po_nexthop) {
 2270                         optdata = (void *)pktopt->ip6po_nexthop;
 2271                         optdatalen = pktopt->ip6po_nexthop->sa_len;
 2272                 }
 2273                 break;
 2274         case IPV6_USE_MIN_MTU:
 2275                 if (pktopt)
 2276                         optdata = (void *)&pktopt->ip6po_minmtu;
 2277                 else
 2278                         optdata = (void *)&defminmtu;
 2279                 optdatalen = sizeof(int);
 2280                 break;
 2281         case IPV6_DONTFRAG:
 2282                 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
 2283                         on = 1;
 2284                 else
 2285                         on = 0;
 2286                 optdata = (void *)&on;
 2287                 optdatalen = sizeof(on);
 2288                 break;
 2289         default:                /* should not happen */
 2290 #ifdef DIAGNOSTIC
 2291                 panic("ip6_getpcbopt: unexpected option\n");
 2292 #endif
 2293                 return (ENOPROTOOPT);
 2294         }
 2295 
 2296         error = sockopt_set(sopt, optdata, optdatalen);
 2297 
 2298         return (error);
 2299 }
 2300 
 2301 void
 2302 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
 2303 {
 2304         if (optname == -1 || optname == IPV6_PKTINFO) {
 2305                 if (pktopt->ip6po_pktinfo)
 2306                         free(pktopt->ip6po_pktinfo, M_IP6OPT);
 2307                 pktopt->ip6po_pktinfo = NULL;
 2308         }
 2309         if (optname == -1 || optname == IPV6_HOPLIMIT)
 2310                 pktopt->ip6po_hlim = -1;
 2311         if (optname == -1 || optname == IPV6_TCLASS)
 2312                 pktopt->ip6po_tclass = -1;
 2313         if (optname == -1 || optname == IPV6_NEXTHOP) {
 2314                 rtcache_free(&pktopt->ip6po_nextroute);
 2315                 if (pktopt->ip6po_nexthop)
 2316                         free(pktopt->ip6po_nexthop, M_IP6OPT);
 2317                 pktopt->ip6po_nexthop = NULL;
 2318         }
 2319         if (optname == -1 || optname == IPV6_HOPOPTS) {
 2320                 if (pktopt->ip6po_hbh)
 2321                         free(pktopt->ip6po_hbh, M_IP6OPT);
 2322                 pktopt->ip6po_hbh = NULL;
 2323         }
 2324         if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
 2325                 if (pktopt->ip6po_dest1)
 2326                         free(pktopt->ip6po_dest1, M_IP6OPT);
 2327                 pktopt->ip6po_dest1 = NULL;
 2328         }
 2329         if (optname == -1 || optname == IPV6_RTHDR) {
 2330                 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
 2331                         free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
 2332                 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
 2333                 rtcache_free(&pktopt->ip6po_route);
 2334         }
 2335         if (optname == -1 || optname == IPV6_DSTOPTS) {
 2336                 if (pktopt->ip6po_dest2)
 2337                         free(pktopt->ip6po_dest2, M_IP6OPT);
 2338                 pktopt->ip6po_dest2 = NULL;
 2339         }
 2340 }
 2341 
 2342 #define PKTOPT_EXTHDRCPY(type)                                  \
 2343 do {                                                            \
 2344         if (src->type) {                                        \
 2345                 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
 2346                 dst->type = malloc(hlen, M_IP6OPT, canwait);    \
 2347                 if (dst->type == NULL && canwait == M_NOWAIT)   \
 2348                         goto bad;                               \
 2349                 memcpy(dst->type, src->type, hlen);             \
 2350         }                                                       \
 2351 } while (/*CONSTCOND*/ 0)
 2352 
 2353 static int
 2354 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
 2355 {
 2356         dst->ip6po_hlim = src->ip6po_hlim;
 2357         dst->ip6po_tclass = src->ip6po_tclass;
 2358         dst->ip6po_flags = src->ip6po_flags;
 2359         if (src->ip6po_pktinfo) {
 2360                 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
 2361                     M_IP6OPT, canwait);
 2362                 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
 2363                         goto bad;
 2364                 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
 2365         }
 2366         if (src->ip6po_nexthop) {
 2367                 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
 2368                     M_IP6OPT, canwait);
 2369                 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
 2370                         goto bad;
 2371                 memcpy(dst->ip6po_nexthop, src->ip6po_nexthop,
 2372                     src->ip6po_nexthop->sa_len);
 2373         }
 2374         PKTOPT_EXTHDRCPY(ip6po_hbh);
 2375         PKTOPT_EXTHDRCPY(ip6po_dest1);
 2376         PKTOPT_EXTHDRCPY(ip6po_dest2);
 2377         PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
 2378         return (0);
 2379 
 2380   bad:
 2381         if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
 2382         if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
 2383         if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
 2384         if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
 2385         if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
 2386         if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
 2387 
 2388         return (ENOBUFS);
 2389 }
 2390 #undef PKTOPT_EXTHDRCPY
 2391 
 2392 struct ip6_pktopts *
 2393 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
 2394 {
 2395         int error;
 2396         struct ip6_pktopts *dst;
 2397 
 2398         dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
 2399         if (dst == NULL && canwait == M_NOWAIT)
 2400                 return (NULL);
 2401         ip6_initpktopts(dst);
 2402 
 2403         if ((error = copypktopts(dst, src, canwait)) != 0) {
 2404                 free(dst, M_IP6OPT);
 2405                 return (NULL);
 2406         }
 2407 
 2408         return (dst);
 2409 }
 2410 
 2411 void
 2412 ip6_freepcbopts(struct ip6_pktopts *pktopt)
 2413 {
 2414         if (pktopt == NULL)
 2415                 return;
 2416 
 2417         ip6_clearpktopts(pktopt, -1);
 2418 
 2419         free(pktopt, M_IP6OPT);
 2420 }
 2421 
 2422 /*
 2423  * Set the IP6 multicast options in response to user setsockopt().
 2424  */
 2425 static int
 2426 ip6_setmoptions(const struct sockopt *sopt, struct ip6_moptions **im6op)
 2427 {
 2428         int error = 0;
 2429         u_int loop, ifindex;
 2430         struct ipv6_mreq mreq;
 2431         struct ifnet *ifp;
 2432         struct ip6_moptions *im6o = *im6op;
 2433         struct route ro;
 2434         struct in6_multi_mship *imm;
 2435         struct lwp *l = curlwp; /* XXX */
 2436 
 2437         if (im6o == NULL) {
 2438                 /*
 2439                  * No multicast option buffer attached to the pcb;
 2440                  * allocate one and initialize to default values.
 2441                  */
 2442                 im6o = malloc(sizeof(*im6o), M_IPMOPTS, M_NOWAIT);
 2443                 if (im6o == NULL)
 2444                         return (ENOBUFS);
 2445 
 2446                 *im6op = im6o;
 2447                 im6o->im6o_multicast_ifp = NULL;
 2448                 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
 2449                 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
 2450                 LIST_INIT(&im6o->im6o_memberships);
 2451         }
 2452 
 2453         switch (sopt->sopt_name) {
 2454 
 2455         case IPV6_MULTICAST_IF:
 2456                 /*
 2457                  * Select the interface for outgoing multicast packets.
 2458                  */
 2459                 error = sockopt_get(sopt, &ifindex, sizeof(ifindex));
 2460                 if (error != 0)
 2461                         break;
 2462 
 2463                 if (ifindex != 0) {
 2464                         if (if_indexlim <= ifindex || !ifindex2ifnet[ifindex]) {
 2465                                 error = ENXIO;  /* XXX EINVAL? */
 2466                                 break;
 2467                         }
 2468                         ifp = ifindex2ifnet[ifindex];
 2469                         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 2470                                 error = EADDRNOTAVAIL;
 2471                                 break;
 2472                         }
 2473                 } else
 2474                         ifp = NULL;
 2475                 im6o->im6o_multicast_ifp = ifp;
 2476                 break;
 2477 
 2478         case IPV6_MULTICAST_HOPS:
 2479             {
 2480                 /*
 2481                  * Set the IP6 hoplimit for outgoing multicast packets.
 2482                  */
 2483                 int optval;
 2484 
 2485                 error = sockopt_getint(sopt, &optval);
 2486                 if (error != 0)
 2487                         break;
 2488 
 2489                 if (optval < -1 || optval >= 256)
 2490                         error = EINVAL;
 2491                 else if (optval == -1)
 2492                         im6o->im6o_multicast_hlim = ip6_defmcasthlim;
 2493                 else
 2494                         im6o->im6o_multicast_hlim = optval;
 2495                 break;
 2496             }
 2497 
 2498         case IPV6_MULTICAST_LOOP:
 2499                 /*
 2500                  * Set the loopback flag for outgoing multicast packets.
 2501                  * Must be zero or one.
 2502                  */
 2503                 error = sockopt_get(sopt, &loop, sizeof(loop));
 2504                 if (error != 0)
 2505                         break;
 2506                 if (loop > 1) {
 2507                         error = EINVAL;
 2508                         break;
 2509                 }
 2510                 im6o->im6o_multicast_loop = loop;
 2511                 break;
 2512 
 2513         case IPV6_JOIN_GROUP:
 2514                 /*
 2515                  * Add a multicast group membership.
 2516                  * Group must be a valid IP6 multicast address.
 2517                  */
 2518                 error = sockopt_get(sopt, &mreq, sizeof(mreq));
 2519                 if (error != 0)
 2520                         break;
 2521 
 2522                 if (IN6_IS_ADDR_UNSPECIFIED(&mreq.ipv6mr_multiaddr)) {
 2523                         /*
 2524                          * We use the unspecified address to specify to accept
 2525                          * all multicast addresses. Only super user is allowed
 2526                          * to do this.
 2527                          */
 2528                         if (kauth_authorize_generic(l->l_cred,
 2529                             KAUTH_GENERIC_ISSUSER, NULL))
 2530                         {
 2531                                 error = EACCES;
 2532                                 break;
 2533                         }
 2534                 } else if (!IN6_IS_ADDR_MULTICAST(&mreq.ipv6mr_multiaddr)) {
 2535                         error = EINVAL;
 2536                         break;
 2537                 }
 2538 
 2539                 /*
 2540                  * If no interface was explicitly specified, choose an
 2541                  * appropriate one according to the given multicast address.
 2542                  */
 2543                 if (mreq.ipv6mr_interface == 0) {
 2544                         struct rtentry *rt;
 2545                         union {
 2546                                 struct sockaddr         dst;
 2547                                 struct sockaddr_in6     dst6;
 2548                         } u;
 2549 
 2550                         /*
 2551                          * Look up the routing table for the
 2552                          * address, and choose the outgoing interface.
 2553                          *   XXX: is it a good approach?
 2554                          */
 2555                         memset(&ro, 0, sizeof(ro));
 2556                         sockaddr_in6_init(&u.dst6, &mreq.ipv6mr_multiaddr, 0,
 2557                             0, 0);
 2558                         rtcache_setdst(&ro, &u.dst);
 2559                         ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp
 2560                                                                 : NULL;
 2561                         rtcache_free(&ro);
 2562                 } else {
 2563                         /*
 2564                          * If the interface is specified, validate it.
 2565                          */
 2566                         if (if_indexlim <= mreq.ipv6mr_interface ||
 2567                             !ifindex2ifnet[mreq.ipv6mr_interface]) {
 2568                                 error = ENXIO;  /* XXX EINVAL? */
 2569                                 break;
 2570                         }
 2571                         ifp = ifindex2ifnet[mreq.ipv6mr_interface];
 2572                 }
 2573 
 2574                 /*
 2575                  * See if we found an interface, and confirm that it
 2576                  * supports multicast
 2577                  */
 2578                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 2579                         error = EADDRNOTAVAIL;
 2580                         break;
 2581                 }
 2582 
 2583                 if (in6_setscope(&mreq.ipv6mr_multiaddr, ifp, NULL)) {
 2584                         error = EADDRNOTAVAIL; /* XXX: should not happen */
 2585                         break;
 2586                 }
 2587 
 2588                 /*
 2589                  * See if the membership already exists.
 2590                  */
 2591                 for (imm = im6o->im6o_memberships.lh_first;
 2592                      imm != NULL; imm = imm->i6mm_chain.le_next)
 2593                         if (imm->i6mm_maddr->in6m_ifp == ifp &&
 2594                             IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
 2595                             &mreq.ipv6mr_multiaddr))
 2596                                 break;
 2597                 if (imm != NULL) {
 2598                         error = EADDRINUSE;
 2599                         break;
 2600                 }
 2601                 /*
 2602                  * Everything looks good; add a new record to the multicast
 2603                  * address list for the given interface.
 2604                  */
 2605                 imm = in6_joingroup(ifp, &mreq.ipv6mr_multiaddr, &error, 0);
 2606                 if (imm == NULL)
 2607                         break;
 2608                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
 2609                 break;
 2610 
 2611         case IPV6_LEAVE_GROUP:
 2612                 /*
 2613                  * Drop a multicast group membership.
 2614                  * Group must be a valid IP6 multicast address.
 2615                  */
 2616                 error = sockopt_get(sopt, &mreq, sizeof(mreq));
 2617                 if (error != 0)
 2618                         break;
 2619 
 2620                 /*
 2621                  * If an interface address was specified, get a pointer
 2622                  * to its ifnet structure.
 2623                  */
 2624                 if (mreq.ipv6mr_interface != 0) {
 2625                         if (if_indexlim <= mreq.ipv6mr_interface ||
 2626                             !ifindex2ifnet[mreq.ipv6mr_interface]) {
 2627                                 error = ENXIO;  /* XXX EINVAL? */
 2628                                 break;
 2629                         }
 2630                         ifp = ifindex2ifnet[mreq.ipv6mr_interface];
 2631                 } else
 2632                         ifp = NULL;
 2633 
 2634                 /* Fill in the scope zone ID */
 2635                 if (ifp) {
 2636                         if (in6_setscope(&mreq.ipv6mr_multiaddr, ifp, NULL)) {
 2637                                 /* XXX: should not happen */
 2638                                 error = EADDRNOTAVAIL;
 2639                                 break;
 2640                         }
 2641                 } else if (mreq.ipv6mr_interface != 0) {
 2642                         /*
 2643                          * XXX: This case would happens when the (positive)
 2644                          * index is in the valid range, but the corresponding
 2645                          * interface has been detached dynamically.  The above
 2646                          * check probably avoids such case to happen here, but
 2647                          * we check it explicitly for safety.
 2648                          */
 2649                         error = EADDRNOTAVAIL;
 2650                         break;      
 2651                 } else {        /* ipv6mr_interface == 0 */
 2652                         struct sockaddr_in6 sa6_mc;
 2653 
 2654                         /*
 2655                          * The API spec says as follows:
 2656                          *  If the interface index is specified as 0, the
 2657                          *  system may choose a multicast group membership to
 2658                          *  drop by matching the multicast address only.
 2659                          * On the other hand, we cannot disambiguate the scope
 2660                          * zone unless an interface is provided.  Thus, we
 2661                          * check if there's ambiguity with the default scope
 2662                          * zone as the last resort.
 2663                          */
 2664                         sockaddr_in6_init(&sa6_mc, &mreq.ipv6mr_multiaddr,
 2665                             0, 0, 0);
 2666                         error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
 2667                         if (error != 0)
 2668                                 break;
 2669                         mreq.ipv6mr_multiaddr = sa6_mc.sin6_addr;
 2670                 }
 2671 
 2672                 /*
 2673                  * Find the membership in the membership list.
 2674                  */
 2675                 for (imm = im6o->im6o_memberships.lh_first;
 2676                      imm != NULL; imm = imm->i6mm_chain.le_next) {
 2677                         if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
 2678                             IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
 2679                             &mreq.ipv6mr_multiaddr))
 2680                                 break;
 2681                 }
 2682                 if (imm == NULL) {
 2683                         /* Unable to resolve interface */
 2684                         error = EADDRNOTAVAIL;
 2685                         break;
 2686                 }
 2687                 /*
 2688                  * Give up the multicast address record to which the
 2689                  * membership points.
 2690                  */
 2691                 LIST_REMOVE(imm, i6mm_chain);
 2692                 in6_leavegroup(imm);
 2693                 break;
 2694 
 2695         default:
 2696                 error = EOPNOTSUPP;
 2697                 break;
 2698         }
 2699 
 2700         /*
 2701          * If all options have default values, no need to keep the mbuf.
 2702          */
 2703         if (im6o->im6o_multicast_ifp == NULL &&
 2704             im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
 2705             im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
 2706             im6o->im6o_memberships.lh_first == NULL) {
 2707                 free(*im6op, M_IPMOPTS);
 2708                 *im6op = NULL;
 2709         }
 2710 
 2711         return (error);
 2712 }
 2713 
 2714 /*
 2715  * Return the IP6 multicast options in response to user getsockopt().
 2716  */
 2717 static int
 2718 ip6_getmoptions(struct sockopt *sopt, struct ip6_moptions *im6o)
 2719 {
 2720         u_int optval;
 2721         int error;
 2722 
 2723         switch (sopt->sopt_name) {
 2724         case IPV6_MULTICAST_IF:
 2725                 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
 2726                         optval = 0;
 2727                 else
 2728                         optval = im6o->im6o_multicast_ifp->if_index;
 2729 
 2730                 error = sockopt_set(sopt, &optval, sizeof(optval));
 2731                 break;
 2732 
 2733         case IPV6_MULTICAST_HOPS:
 2734                 if (im6o == NULL)
 2735                         optval = ip6_defmcasthlim;
 2736                 else
 2737                         optval = im6o->im6o_multicast_hlim;
 2738 
 2739                 error = sockopt_set(sopt, &optval, sizeof(optval));
 2740                 break;
 2741 
 2742         case IPV6_MULTICAST_LOOP:
 2743                 if (im6o == NULL)
 2744                         optval = ip6_defmcasthlim;
 2745                 else
 2746                         optval = im6o->im6o_multicast_loop;
 2747 
 2748                 error = sockopt_set(sopt, &optval, sizeof(optval));
 2749                 break;
 2750 
 2751         default:
 2752                 error = EOPNOTSUPP;
 2753         }
 2754 
 2755         return (error);
 2756 }
 2757 
 2758 /*
 2759  * Discard the IP6 multicast options.
 2760  */
 2761 void
 2762 ip6_freemoptions(struct ip6_moptions *im6o)
 2763 {
 2764         struct in6_multi_mship *imm;
 2765 
 2766         if (im6o == NULL)
 2767                 return;
 2768 
 2769         while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
 2770                 LIST_REMOVE(imm, i6mm_chain);
 2771                 in6_leavegroup(imm);
 2772         }
 2773         free(im6o, M_IPMOPTS);
 2774 }
 2775 
 2776 /*
 2777  * Set IPv6 outgoing packet options based on advanced API.
 2778  */
 2779 int
 2780 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, 
 2781         struct ip6_pktopts *stickyopt, int priv, int uproto)
 2782 {
 2783         struct cmsghdr *cm = 0;
 2784 
 2785         if (control == NULL || opt == NULL)
 2786                 return (EINVAL);
 2787 
 2788         ip6_initpktopts(opt);
 2789         if (stickyopt) {
 2790                 int error;
 2791 
 2792                 /*
 2793                  * If stickyopt is provided, make a local copy of the options
 2794                  * for this particular packet, then override them by ancillary
 2795                  * objects.
 2796                  * XXX: copypktopts() does not copy the cached route to a next
 2797                  * hop (if any).  This is not very good in terms of efficiency,
 2798                  * but we can allow this since this option should be rarely
 2799                  * used.
 2800                  */
 2801                 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
 2802                         return (error);
 2803         }
 2804 
 2805         /*
 2806          * XXX: Currently, we assume all the optional information is stored
 2807          * in a single mbuf.
 2808          */
 2809         if (control->m_next)
 2810                 return (EINVAL);
 2811 
 2812         /* XXX if cm->cmsg_len is not aligned, control->m_len can become <0 */
 2813         for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
 2814             control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 2815                 int error;
 2816 
 2817                 if (control->m_len < CMSG_LEN(0))
 2818                         return (EINVAL);
 2819 
 2820                 cm = mtod(control, struct cmsghdr *);
 2821                 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
 2822                         return (EINVAL);
 2823                 if (cm->cmsg_level != IPPROTO_IPV6)
 2824                         continue;
 2825 
 2826                 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
 2827                     cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto);
 2828                 if (error)
 2829                         return (error);
 2830         }
 2831 
 2832         return (0);
 2833 }
 2834 
 2835 /*
 2836  * Set a particular packet option, as a sticky option or an ancillary data
 2837  * item.  "len" can be 0 only when it's a sticky option.
 2838  * We have 4 cases of combination of "sticky" and "cmsg":
 2839  * "sticky=0, cmsg=0": impossible
 2840  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
 2841  * "sticky=1, cmsg=0": RFC3542 socket option
 2842  * "sticky=1, cmsg=1": RFC2292 socket option
 2843  */
 2844 static int
 2845 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
 2846     int priv, int sticky, int cmsg, int uproto)
 2847 {
 2848         int minmtupolicy;
 2849 
 2850         if (!sticky && !cmsg) {
 2851 #ifdef DIAGNOSTIC
 2852                 printf("ip6_setpktopt: impossible case\n");
 2853 #endif
 2854                 return (EINVAL);
 2855         }
 2856 
 2857         /*
 2858          * IPV6_2292xxx is for backward compatibility to RFC2292, and should
 2859          * not be specified in the context of RFC3542.  Conversely,
 2860          * RFC3542 types should not be specified in the context of RFC2292.
 2861          */
 2862         if (!cmsg) {
 2863                 switch (optname) {
 2864                 case IPV6_2292PKTINFO:
 2865                 case IPV6_2292HOPLIMIT:
 2866                 case IPV6_2292NEXTHOP:
 2867                 case IPV6_2292HOPOPTS:
 2868                 case IPV6_2292DSTOPTS:
 2869                 case IPV6_2292RTHDR:
 2870                 case IPV6_2292PKTOPTIONS:
 2871                         return (ENOPROTOOPT);
 2872                 }
 2873         }
 2874         if (sticky && cmsg) {
 2875                 switch (optname) {
 2876                 case IPV6_PKTINFO:
 2877                 case IPV6_HOPLIMIT:
 2878                 case IPV6_NEXTHOP:
 2879                 case IPV6_HOPOPTS:
 2880                 case IPV6_DSTOPTS:
 2881                 case IPV6_RTHDRDSTOPTS:
 2882                 case IPV6_RTHDR:
 2883                 case IPV6_USE_MIN_MTU:
 2884                 case IPV6_DONTFRAG:
 2885                 case IPV6_OTCLASS:
 2886                 case IPV6_TCLASS:
 2887                         return (ENOPROTOOPT);
 2888                 }
 2889         }
 2890 
 2891         switch (optname) {
 2892 #ifdef RFC2292
 2893         case IPV6_2292PKTINFO:
 2894 #endif
 2895         case IPV6_PKTINFO:
 2896         {
 2897                 struct ifnet *ifp = NULL;
 2898                 struct in6_pktinfo *pktinfo;
 2899 
 2900                 if (len != sizeof(struct in6_pktinfo))
 2901                         return (EINVAL);
 2902 
 2903                 pktinfo = (struct in6_pktinfo *)buf;
 2904 
 2905                 /*
 2906                  * An application can clear any sticky IPV6_PKTINFO option by
 2907                  * doing a "regular" setsockopt with ipi6_addr being
 2908                  * in6addr_any and ipi6_ifindex being zero.
 2909                  * [RFC 3542, Section 6]
 2910                  */
 2911                 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
 2912                     pktinfo->ipi6_ifindex == 0 &&
 2913                     IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 2914                         ip6_clearpktopts(opt, optname);
 2915                         break;
 2916                 }
 2917 
 2918                 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
 2919                     sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 2920                         return (EINVAL);
 2921                 }
 2922 
 2923                 /* validate the interface index if specified. */
 2924                 if (pktinfo->ipi6_ifindex >= if_indexlim) {
 2925                          return (ENXIO);
 2926                 }
 2927                 if (pktinfo->ipi6_ifindex) {
 2928                         ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
 2929                         if (ifp == NULL)
 2930                                 return (ENXIO);
 2931                 }
 2932 
 2933                 /*
 2934                  * We store the address anyway, and let in6_selectsrc()
 2935                  * validate the specified address.  This is because ipi6_addr
 2936                  * may not have enough information about its scope zone, and
 2937                  * we may need additional information (such as outgoing
 2938                  * interface or the scope zone of a destination address) to
 2939                  * disambiguate the scope.
 2940                  * XXX: the delay of the validation may confuse the
 2941                  * application when it is used as a sticky option.
 2942                  */
 2943                 if (opt->ip6po_pktinfo == NULL) {
 2944                         opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
 2945                             M_IP6OPT, M_NOWAIT);
 2946                         if (opt->ip6po_pktinfo == NULL)
 2947                                 return (ENOBUFS);
 2948                 }
 2949                 memcpy(opt->ip6po_pktinfo, pktinfo, sizeof(*pktinfo));
 2950                 break;
 2951         }
 2952 
 2953 #ifdef RFC2292
 2954         case IPV6_2292HOPLIMIT:
 2955 #endif
 2956         case IPV6_HOPLIMIT:
 2957         {
 2958                 int *hlimp;
 2959 
 2960                 /*
 2961                  * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
 2962                  * to simplify the ordering among hoplimit options.
 2963                  */
 2964                 if (optname == IPV6_HOPLIMIT && sticky)
 2965                         return (ENOPROTOOPT);
 2966 
 2967                 if (len != sizeof(int))
 2968                         return (EINVAL);
 2969                 hlimp = (int *)buf;
 2970                 if (*hlimp < -1 || *hlimp > 255)
 2971                         return (EINVAL);
 2972 
 2973                 opt->ip6po_hlim = *hlimp;
 2974                 break;
 2975         }
 2976 
 2977         case IPV6_OTCLASS:
 2978                 if (len != sizeof(u_int8_t))
 2979                         return (EINVAL);
 2980 
 2981                 opt->ip6po_tclass = *(u_int8_t *)buf;
 2982                 break;
 2983 
 2984         case IPV6_TCLASS:
 2985         {
 2986                 int tclass;
 2987 
 2988                 if (len != sizeof(int))
 2989                         return (EINVAL);
 2990                 tclass = *(int *)buf;
 2991                 if (tclass < -1 || tclass > 255)
 2992                         return (EINVAL);
 2993 
 2994                 opt->ip6po_tclass = tclass;
 2995                 break;
 2996         }
 2997 
 2998 #ifdef RFC2292
 2999         case IPV6_2292NEXTHOP:
 3000 #endif
 3001         case IPV6_NEXTHOP:
 3002                 if (!priv)
 3003                         return (EPERM);
 3004 
 3005                 if (len == 0) { /* just remove the option */
 3006                         ip6_clearpktopts(opt, IPV6_NEXTHOP);
 3007                         break;
 3008                 }
 3009 
 3010                 /* check if cmsg_len is large enough for sa_len */
 3011                 if (len < sizeof(struct sockaddr) || len < *buf)
 3012                         return (EINVAL);
 3013 
 3014                 switch (((struct sockaddr *)buf)->sa_family) {
 3015                 case AF_INET6:
 3016                 {
 3017                         struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
 3018                         int error;
 3019 
 3020                         if (sa6->sin6_len != sizeof(struct sockaddr_in6))
 3021                                 return (EINVAL);
 3022 
 3023                         if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
 3024                             IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
 3025                                 return (EINVAL);
 3026                         }
 3027                         if ((error = sa6_embedscope(sa6, ip6_use_defzone))
 3028                             != 0) {
 3029                                 return (error);
 3030                         }
 3031                         break;
 3032                 }
 3033                 case AF_LINK:   /* eventually be supported? */
 3034                 default:
 3035                         return (EAFNOSUPPORT);
 3036                 }
 3037 
 3038                 /* turn off the previous option, then set the new option. */
 3039                 ip6_clearpktopts(opt, IPV6_NEXTHOP);
 3040                 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
 3041                 if (opt->ip6po_nexthop == NULL)
 3042                         return (ENOBUFS);
 3043                 memcpy(opt->ip6po_nexthop, buf, *buf);
 3044                 break;
 3045 
 3046 #ifdef RFC2292
 3047         case IPV6_2292HOPOPTS:
 3048 #endif
 3049         case IPV6_HOPOPTS:
 3050         {
 3051                 struct ip6_hbh *hbh;
 3052                 int hbhlen;
 3053 
 3054                 /*
 3055                  * XXX: We don't allow a non-privileged user to set ANY HbH
 3056                  * options, since per-option restriction has too much
 3057                  * overhead.
 3058                  */
 3059                 if (!priv)
 3060                         return (EPERM);
 3061 
 3062                 if (len == 0) {
 3063                         ip6_clearpktopts(opt, IPV6_HOPOPTS);
 3064                         break;  /* just remove the option */
 3065                 }
 3066 
 3067                 /* message length validation */
 3068                 if (len < sizeof(struct ip6_hbh))
 3069                         return (EINVAL);
 3070                 hbh = (struct ip6_hbh *)buf;
 3071                 hbhlen = (hbh->ip6h_len + 1) << 3;
 3072                 if (len != hbhlen)
 3073                         return (EINVAL);
 3074 
 3075                 /* turn off the previous option, then set the new option. */
 3076                 ip6_clearpktopts(opt, IPV6_HOPOPTS);
 3077                 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
 3078                 if (opt->ip6po_hbh == NULL)
 3079                         return (ENOBUFS);
 3080                 memcpy(opt->ip6po_hbh, hbh, hbhlen);
 3081 
 3082                 break;
 3083         }
 3084 
 3085 #ifdef RFC2292
 3086         case IPV6_2292DSTOPTS:
 3087 #endif
 3088         case IPV6_DSTOPTS:
 3089         case IPV6_RTHDRDSTOPTS:
 3090         {
 3091                 struct ip6_dest *dest, **newdest = NULL;
 3092                 int destlen;
 3093 
 3094                 if (!priv)      /* XXX: see the comment for IPV6_HOPOPTS */
 3095                         return (EPERM);
 3096 
 3097                 if (len == 0) {
 3098                         ip6_clearpktopts(opt, optname);
 3099                         break;  /* just remove the option */
 3100                 }
 3101 
 3102                 /* message length validation */
 3103                 if (len < sizeof(struct ip6_dest))
 3104                         return (EINVAL);
 3105                 dest = (struct ip6_dest *)buf;
 3106                 destlen = (dest->ip6d_len + 1) << 3;
 3107                 if (len != destlen)
 3108                         return (EINVAL);
 3109                 /*
 3110                  * Determine the position that the destination options header
 3111                  * should be inserted; before or after the routing header.
 3112                  */
 3113                 switch (optname) {
 3114                 case IPV6_2292DSTOPTS:
 3115                         /*
 3116                          * The old advanced API is ambiguous on this point.
 3117                          * Our approach is to determine the position based
 3118                          * according to the existence of a routing header.
 3119                          * Note, however, that this depends on the order of the
 3120                          * extension headers in the ancillary data; the 1st
 3121                          * part of the destination options header must appear
 3122                          * before the routing header in the ancillary data,
 3123                          * too.
 3124                          * RFC3542 solved the ambiguity by introducing
 3125                          * separate ancillary data or option types.
 3126                          */
 3127                         if (opt->ip6po_rthdr == NULL)
 3128                                 newdest = &opt->ip6po_dest1;
 3129                         else
 3130                                 newdest = &opt->ip6po_dest2;
 3131                         break;
 3132                 case IPV6_RTHDRDSTOPTS:
 3133                         newdest = &opt->ip6po_dest1;
 3134                         break;
 3135                 case IPV6_DSTOPTS:
 3136                         newdest = &opt->ip6po_dest2;
 3137                         break;
 3138                 }
 3139 
 3140                 /* turn off the previous option, then set the new option. */
 3141                 ip6_clearpktopts(opt, optname);
 3142                 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
 3143                 if (*newdest == NULL)
 3144                         return (ENOBUFS);
 3145                 memcpy(*newdest, dest, destlen);
 3146 
 3147                 break;
 3148         }
 3149 
 3150 #ifdef RFC2292
 3151         case IPV6_2292RTHDR:
 3152 #endif
 3153         case IPV6_RTHDR:
 3154         {
 3155                 struct ip6_rthdr *rth;
 3156                 int rthlen;
 3157 
 3158                 if (len == 0) {
 3159                         ip6_clearpktopts(opt, IPV6_RTHDR);
 3160                         break;  /* just remove the option */
 3161                 }
 3162 
 3163                 /* message length validation */
 3164                 if (len < sizeof(struct ip6_rthdr))
 3165                         return (EINVAL);
 3166                 rth = (struct ip6_rthdr *)buf;
 3167                 rthlen = (rth->ip6r_len + 1) << 3;
 3168                 if (len != rthlen)
 3169                         return (EINVAL);
 3170                 switch (rth->ip6r_type) {
 3171                 case IPV6_RTHDR_TYPE_0:
 3172                         if (rth->ip6r_len == 0) /* must contain one addr */
 3173                                 return (EINVAL);
 3174                         if (rth->ip6r_len % 2) /* length must be even */
 3175                                 return (EINVAL);
 3176                         if (rth->ip6r_len / 2 != rth->ip6r_segleft)
 3177                                 return (EINVAL);
 3178                         break;
 3179                 default:
 3180                         return (EINVAL);        /* not supported */
 3181                 }
 3182                 /* turn off the previous option */
 3183                 ip6_clearpktopts(opt, IPV6_RTHDR);
 3184                 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
 3185                 if (opt->ip6po_rthdr == NULL)
 3186                         return (ENOBUFS);
 3187                 memcpy(opt->ip6po_rthdr, rth, rthlen);
 3188                 break;
 3189         }
 3190 
 3191         case IPV6_USE_MIN_MTU:
 3192                 if (len != sizeof(int))
 3193                         return (EINVAL);
 3194                 minmtupolicy = *(int *)buf;
 3195                 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
 3196                     minmtupolicy != IP6PO_MINMTU_DISABLE &&
 3197                     minmtupolicy != IP6PO_MINMTU_ALL) {
 3198                         return (EINVAL);
 3199                 }
 3200                 opt->ip6po_minmtu = minmtupolicy;
 3201                 break;
 3202 
 3203         case IPV6_DONTFRAG:
 3204                 if (len != sizeof(int))
 3205                         return (EINVAL);
 3206 
 3207                 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
 3208                         /*
 3209                          * we ignore this option for TCP sockets.
 3210                          * (RFC3542 leaves this case unspecified.)
 3211                          */
 3212                         opt->ip6po_flags &= ~IP6PO_DONTFRAG;
 3213                 } else
 3214                         opt->ip6po_flags |= IP6PO_DONTFRAG;
 3215                 break;
 3216 
 3217         default:
 3218                 return (ENOPROTOOPT);
 3219         } /* end of switch */
 3220 
 3221         return (0);
 3222 }
 3223 
 3224 /*
 3225  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
 3226  * packet to the input queue of a specified interface.  Note that this
 3227  * calls the output routine of the loopback "driver", but with an interface
 3228  * pointer that might NOT be lo0ifp -- easier than replicating that code here.
 3229  */
 3230 void
 3231 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, 
 3232         const struct sockaddr_in6 *dst)
 3233 {
 3234         struct mbuf *copym;
 3235         struct ip6_hdr *ip6;
 3236 
 3237         copym = m_copy(m, 0, M_COPYALL);
 3238         if (copym == NULL)
 3239                 return;
 3240 
 3241         /*
 3242          * Make sure to deep-copy IPv6 header portion in case the data
 3243          * is in an mbuf cluster, so that we can safely override the IPv6
 3244          * header portion later.
 3245          */
 3246         if ((copym->m_flags & M_EXT) != 0 ||
 3247             copym->m_len < sizeof(struct ip6_hdr)) {
 3248                 copym = m_pullup(copym, sizeof(struct ip6_hdr));
 3249                 if (copym == NULL)
 3250                         return;
 3251         }
 3252 
 3253 #ifdef DIAGNOSTIC
 3254         if (copym->m_len < sizeof(*ip6)) {
 3255                 m_freem(copym);
 3256                 return;
 3257         }
 3258 #endif
 3259 
 3260         ip6 = mtod(copym, struct ip6_hdr *);
 3261         /*
 3262          * clear embedded scope identifiers if necessary.
 3263          * in6_clearscope will touch the addresses only when necessary.
 3264          */
 3265         in6_clearscope(&ip6->ip6_src);
 3266         in6_clearscope(&ip6->ip6_dst);
 3267 
 3268         (void)looutput(ifp, copym, (const struct sockaddr *)dst, NULL);
 3269 }
 3270 
 3271 /*
 3272  * Chop IPv6 header off from the payload.
 3273  */
 3274 static int
 3275 ip6_splithdr(struct mbuf *m,  struct ip6_exthdrs *exthdrs)
 3276 {
 3277         struct mbuf *mh;
 3278         struct ip6_hdr *ip6;
 3279 
 3280         ip6 = mtod(m, struct ip6_hdr *);
 3281         if (m->m_len > sizeof(*ip6)) {
 3282                 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
 3283                 if (mh == 0) {
 3284                         m_freem(m);
 3285                         return ENOBUFS;
 3286                 }
 3287                 M_MOVE_PKTHDR(mh, m);
 3288                 MH_ALIGN(mh, sizeof(*ip6));
 3289                 m->m_len -= sizeof(*ip6);
 3290                 m->m_data += sizeof(*ip6);
 3291                 mh->m_next = m;
 3292                 m = mh;
 3293                 m->m_len = sizeof(*ip6);
 3294                 bcopy((void *)ip6, mtod(m, void *), sizeof(*ip6));
 3295         }
 3296         exthdrs->ip6e_ip6 = m;
 3297         return 0;
 3298 }
 3299 
 3300 /*
 3301  * Compute IPv6 extension header length.
 3302  */
 3303 int
 3304 ip6_optlen(struct in6pcb *in6p)
 3305 {
 3306         int len;
 3307 
 3308         if (!in6p->in6p_outputopts)
 3309                 return 0;
 3310 
 3311         len = 0;
 3312 #define elen(x) \
 3313     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
 3314 
 3315         len += elen(in6p->in6p_outputopts->ip6po_hbh);
 3316         len += elen(in6p->in6p_outputopts->ip6po_dest1);
 3317         len += elen(in6p->in6p_outputopts->ip6po_rthdr);
 3318         len += elen(in6p->in6p_outputopts->ip6po_dest2);
 3319         return len;
 3320 #undef elen
 3321 }

Cache object: 2857f88382af400f610ebc061124e9c3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.