The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_output.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: ip_output.c,v 1.324 2022/11/21 09:51:13 knakahara Exp $        */
    2 
    3 /*
    4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the project nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
   34  * All rights reserved.
   35  *
   36  * This code is derived from software contributed to The NetBSD Foundation
   37  * by Public Access Networks Corporation ("Panix").  It was developed under
   38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
   39  *
   40  * Redistribution and use in source and binary forms, with or without
   41  * modification, are permitted provided that the following conditions
   42  * are met:
   43  * 1. Redistributions of source code must retain the above copyright
   44  *    notice, this list of conditions and the following disclaimer.
   45  * 2. Redistributions in binary form must reproduce the above copyright
   46  *    notice, this list of conditions and the following disclaimer in the
   47  *    documentation and/or other materials provided with the distribution.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   59  * POSSIBILITY OF SUCH DAMAGE.
   60  */
   61 
   62 /*
   63  * Copyright (c) 1982, 1986, 1988, 1990, 1993
   64  *      The Regents of the University of California.  All rights reserved.
   65  *
   66  * Redistribution and use in source and binary forms, with or without
   67  * modification, are permitted provided that the following conditions
   68  * are met:
   69  * 1. Redistributions of source code must retain the above copyright
   70  *    notice, this list of conditions and the following disclaimer.
   71  * 2. Redistributions in binary form must reproduce the above copyright
   72  *    notice, this list of conditions and the following disclaimer in the
   73  *    documentation and/or other materials provided with the distribution.
   74  * 3. Neither the name of the University nor the names of its contributors
   75  *    may be used to endorse or promote products derived from this software
   76  *    without specific prior written permission.
   77  *
   78  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   79  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   80  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   81  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   82  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   83  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   84  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   86  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   87  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   88  * SUCH DAMAGE.
   89  *
   90  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
   91  */
   92 
   93 #include <sys/cdefs.h>
   94 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.324 2022/11/21 09:51:13 knakahara Exp $");
   95 
   96 #ifdef _KERNEL_OPT
   97 #include "opt_inet.h"
   98 #include "opt_ipsec.h"
   99 #include "opt_mrouting.h"
  100 #include "opt_net_mpsafe.h"
  101 #include "opt_mpls.h"
  102 #endif
  103 
  104 #include "arp.h"
  105 
  106 #include <sys/param.h>
  107 #include <sys/kmem.h>
  108 #include <sys/mbuf.h>
  109 #include <sys/socket.h>
  110 #include <sys/socketvar.h>
  111 #include <sys/kauth.h>
  112 #include <sys/systm.h>
  113 #include <sys/syslog.h>
  114 
  115 #include <net/if.h>
  116 #include <net/if_types.h>
  117 #include <net/route.h>
  118 #include <net/pfil.h>
  119 
  120 #include <netinet/in.h>
  121 #include <netinet/in_systm.h>
  122 #include <netinet/ip.h>
  123 #include <netinet/in_pcb.h>
  124 #include <netinet/in_var.h>
  125 #include <netinet/ip_var.h>
  126 #include <netinet/ip_private.h>
  127 #include <netinet/in_offload.h>
  128 #include <netinet/portalgo.h>
  129 #include <netinet/udp.h>
  130 #include <netinet/udp_var.h>
  131 
  132 #ifdef INET6
  133 #include <netinet6/ip6_var.h>
  134 #endif
  135 
  136 #ifdef MROUTING
  137 #include <netinet/ip_mroute.h>
  138 #endif
  139 
  140 #ifdef IPSEC
  141 #include <netipsec/ipsec.h>
  142 #include <netipsec/key.h>
  143 #endif
  144 
  145 #ifdef MPLS
  146 #include <netmpls/mpls.h>
  147 #include <netmpls/mpls_var.h>
  148 #endif
  149 
  150 static int ip_pcbopts(struct inpcb *, const struct sockopt *);
  151 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
  152 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
  153 static void ip_mloopback(struct ifnet *, struct mbuf *,
  154     const struct sockaddr_in *);
  155 static int ip_ifaddrvalid(const struct in_ifaddr *);
  156 
  157 extern pfil_head_t *inet_pfil_hook;                     /* XXX */
  158 
  159 int ip_do_loopback_cksum = 0;
  160 
  161 static int
  162 ip_mark_mpls(struct ifnet * const ifp, struct mbuf * const m,
  163     const struct rtentry *rt)
  164 {
  165         int error = 0;
  166 #ifdef MPLS
  167         union mpls_shim msh;
  168 
  169         if (rt == NULL || rt_gettag(rt) == NULL ||
  170             rt_gettag(rt)->sa_family != AF_MPLS ||
  171             (m->m_flags & (M_MCAST | M_BCAST)) != 0 ||
  172             ifp->if_type != IFT_ETHER)
  173                 return 0;
  174 
  175         msh.s_addr = MPLS_GETSADDR(rt);
  176         if (msh.shim.label != MPLS_LABEL_IMPLNULL) {
  177                 struct m_tag *mtag;
  178                 /*
  179                  * XXX tentative solution to tell ether_output
  180                  * it's MPLS. Need some more efficient solution.
  181                  */
  182                 mtag = m_tag_get(PACKET_TAG_MPLS,
  183                     sizeof(int) /* dummy */,
  184                     M_NOWAIT);
  185                 if (mtag == NULL)
  186                         return ENOMEM;
  187                 m_tag_prepend(m, mtag);
  188         }
  189 #endif
  190         return error;
  191 }
  192 
  193 /*
  194  * Send an IP packet to a host.
  195  */
  196 int
  197 ip_if_output(struct ifnet * const ifp, struct mbuf * const m,
  198     const struct sockaddr * const dst, const struct rtentry *rt)
  199 {
  200         int error = 0;
  201 
  202         if (rt != NULL) {
  203                 error = rt_check_reject_route(rt, ifp);
  204                 if (error != 0) {
  205                         IP_STATINC(IP_STAT_RTREJECT);
  206                         m_freem(m);
  207                         return error;
  208                 }
  209         }
  210 
  211         error = ip_mark_mpls(ifp, m, rt);
  212         if (error != 0) {
  213                 m_freem(m);
  214                 return error;
  215         }
  216 
  217         error = if_output_lock(ifp, ifp, m, dst, rt);
  218 
  219         return error;
  220 }
  221 
  222 /*
  223  * IP output.  The packet in mbuf chain m contains a skeletal IP
  224  * header (with len, off, ttl, proto, tos, src, dst).
  225  * The mbuf chain containing the packet will be freed.
  226  * The mbuf opt, if present, will not be freed.
  227  */
  228 int
  229 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags,
  230     struct ip_moptions *imo, struct inpcb *inp)
  231 {
  232         struct rtentry *rt;
  233         struct ip *ip;
  234         struct ifnet *ifp, *mifp = NULL;
  235         struct mbuf *m = m0;
  236         int len, hlen, error = 0;
  237         struct route iproute;
  238         const struct sockaddr_in *dst;
  239         struct in_ifaddr *ia = NULL;
  240         struct ifaddr *ifa;
  241         int isbroadcast;
  242         int sw_csum;
  243         u_long mtu;
  244         bool natt_frag = false;
  245         bool rtmtu_nolock;
  246         union {
  247                 struct sockaddr         sa;
  248                 struct sockaddr_in      sin;
  249         } udst, usrc;
  250         struct sockaddr *rdst = &udst.sa;       /* real IP destination, as
  251                                                  * opposed to the nexthop
  252                                                  */
  253         struct psref psref, psref_ia;
  254         int bound;
  255         bool bind_need_restore = false;
  256         const struct sockaddr *sa;
  257 
  258         len = 0;
  259 
  260         MCLAIM(m, &ip_tx_mowner);
  261 
  262         KASSERT((m->m_flags & M_PKTHDR) != 0);
  263         KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) == 0);
  264         KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) !=
  265             (M_CSUM_TCPv4|M_CSUM_UDPv4));
  266         KASSERT(m->m_len >= sizeof(struct ip));
  267 
  268         hlen = sizeof(struct ip);
  269         if (opt) {
  270                 m = ip_insertoptions(m, opt, &len);
  271                 hlen = len;
  272         }
  273         ip = mtod(m, struct ip *);
  274 
  275         /*
  276          * Fill in IP header.
  277          */
  278         if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
  279                 ip->ip_v = IPVERSION;
  280                 ip->ip_off = htons(0);
  281                 /* ip->ip_id filled in after we find out source ia */
  282                 ip->ip_hl = hlen >> 2;
  283                 IP_STATINC(IP_STAT_LOCALOUT);
  284         } else {
  285                 hlen = ip->ip_hl << 2;
  286         }
  287 
  288         /*
  289          * Route packet.
  290          */
  291         if (ro == NULL) {
  292                 memset(&iproute, 0, sizeof(iproute));
  293                 ro = &iproute;
  294         }
  295         sockaddr_in_init(&udst.sin, &ip->ip_dst, 0);
  296         dst = satocsin(rtcache_getdst(ro));
  297 
  298         /*
  299          * If there is a cached route, check that it is to the same
  300          * destination and is still up.  If not, free it and try again.
  301          * The address family should also be checked in case of sharing
  302          * the cache with IPv6.
  303          */
  304         if (dst && (dst->sin_family != AF_INET ||
  305             !in_hosteq(dst->sin_addr, ip->ip_dst)))
  306                 rtcache_free(ro);
  307 
  308         /* XXX must be before rtcache operations */
  309         bound = curlwp_bind();
  310         bind_need_restore = true;
  311 
  312         if ((rt = rtcache_validate(ro)) == NULL &&
  313             (rt = rtcache_update(ro, 1)) == NULL) {
  314                 dst = &udst.sin;
  315                 error = rtcache_setdst(ro, &udst.sa);
  316                 if (error != 0) {
  317                         IP_STATINC(IP_STAT_ODROPPED);
  318                         goto bad;
  319                 }
  320         }
  321 
  322         /*
  323          * If routing to interface only, short circuit routing lookup.
  324          */
  325         if (flags & IP_ROUTETOIF) {
  326                 ifa = ifa_ifwithladdr_psref(sintocsa(dst), &psref_ia);
  327                 if (ifa == NULL) {
  328                         IP_STATINC(IP_STAT_NOROUTE);
  329                         error = ENETUNREACH;
  330                         goto bad;
  331                 }
  332                 /* ia is already referenced by psref_ia */
  333                 ia = ifatoia(ifa);
  334 
  335                 ifp = ia->ia_ifp;
  336                 mtu = ifp->if_mtu;
  337                 ip->ip_ttl = 1;
  338                 isbroadcast = in_broadcast(dst->sin_addr, ifp);
  339         } else if (((IN_MULTICAST(ip->ip_dst.s_addr) ||
  340             ip->ip_dst.s_addr == INADDR_BROADCAST) ||
  341             (flags & IP_ROUTETOIFINDEX)) &&
  342             imo != NULL && imo->imo_multicast_if_index != 0) {
  343                 ifp = mifp = if_get_byindex(imo->imo_multicast_if_index, &psref);
  344                 if (ifp == NULL) {
  345                         IP_STATINC(IP_STAT_NOROUTE);
  346                         error = ENETUNREACH;
  347                         goto bad;
  348                 }
  349                 mtu = ifp->if_mtu;
  350                 ia = in_get_ia_from_ifp_psref(ifp, &psref_ia);
  351                 if (ia == NULL) {
  352                         IP_STATINC(IP_STAT_IFNOADDR);
  353                         error = EADDRNOTAVAIL;
  354                         goto bad;
  355                 }
  356                 if (IN_MULTICAST(ip->ip_dst.s_addr) ||
  357                     ip->ip_dst.s_addr == INADDR_BROADCAST) {
  358                         isbroadcast = 0;
  359                 } else {
  360                         /* IP_ROUTETOIFINDEX */
  361                         isbroadcast = in_broadcast(dst->sin_addr, ifp);
  362                         if ((isbroadcast == 0) && ((ifp->if_flags &
  363                             (IFF_LOOPBACK | IFF_POINTOPOINT)) == 0) &&
  364                             (in_direct(dst->sin_addr, ifp) == 0)) {
  365                                 /* gateway address required */
  366                                 if (rt == NULL)
  367                                         rt = rtcache_init(ro);
  368                                 if (rt == NULL || rt->rt_ifp != ifp) {
  369                                         IP_STATINC(IP_STAT_NOROUTE);
  370                                         error = EHOSTUNREACH;
  371                                         goto bad;
  372                                 }
  373                                 rt->rt_use++;
  374                                 if (rt->rt_flags & RTF_GATEWAY)
  375                                         dst = satosin(rt->rt_gateway);
  376                                 if (rt->rt_flags & RTF_HOST)
  377                                         isbroadcast =
  378                                             rt->rt_flags & RTF_BROADCAST;
  379                         }
  380                 }
  381         } else {
  382                 if (rt == NULL)
  383                         rt = rtcache_init(ro);
  384                 if (rt == NULL) {
  385                         IP_STATINC(IP_STAT_NOROUTE);
  386                         error = EHOSTUNREACH;
  387                         goto bad;
  388                 }
  389                 if (ifa_is_destroying(rt->rt_ifa)) {
  390                         rtcache_unref(rt, ro);
  391                         rt = NULL;
  392                         IP_STATINC(IP_STAT_NOROUTE);
  393                         error = EHOSTUNREACH;
  394                         goto bad;
  395                 }
  396                 ifa_acquire(rt->rt_ifa, &psref_ia);
  397                 ia = ifatoia(rt->rt_ifa);
  398                 ifp = rt->rt_ifp;
  399                 if ((mtu = rt->rt_rmx.rmx_mtu) == 0)
  400                         mtu = ifp->if_mtu;
  401                 rt->rt_use++;
  402                 if (rt->rt_flags & RTF_GATEWAY)
  403                         dst = satosin(rt->rt_gateway);
  404                 if (rt->rt_flags & RTF_HOST)
  405                         isbroadcast = rt->rt_flags & RTF_BROADCAST;
  406                 else
  407                         isbroadcast = in_broadcast(dst->sin_addr, ifp);
  408         }
  409         rtmtu_nolock = rt && (rt->rt_rmx.rmx_locks & RTV_MTU) == 0;
  410 
  411         if (IN_MULTICAST(ip->ip_dst.s_addr) ||
  412             (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
  413                 bool inmgroup;
  414 
  415                 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
  416                     M_BCAST : M_MCAST;
  417                 /*
  418                  * See if the caller provided any multicast options
  419                  */
  420                 if (imo != NULL)
  421                         ip->ip_ttl = imo->imo_multicast_ttl;
  422                 else
  423                         ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
  424 
  425                 /*
  426                  * if we don't know the outgoing ifp yet, we can't generate
  427                  * output
  428                  */
  429                 if (!ifp) {
  430                         IP_STATINC(IP_STAT_NOROUTE);
  431                         error = ENETUNREACH;
  432                         goto bad;
  433                 }
  434 
  435                 /*
  436                  * If the packet is multicast or broadcast, confirm that
  437                  * the outgoing interface can transmit it.
  438                  */
  439                 if (((m->m_flags & M_MCAST) &&
  440                      (ifp->if_flags & IFF_MULTICAST) == 0) ||
  441                     ((m->m_flags & M_BCAST) &&
  442                      (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0))  {
  443                         IP_STATINC(IP_STAT_NOROUTE);
  444                         error = ENETUNREACH;
  445                         goto bad;
  446                 }
  447                 /*
  448                  * If source address not specified yet, use an address
  449                  * of outgoing interface.
  450                  */
  451                 if (in_nullhost(ip->ip_src)) {
  452                         struct in_ifaddr *xia;
  453                         struct ifaddr *xifa;
  454                         struct psref _psref;
  455 
  456                         xia = in_get_ia_from_ifp_psref(ifp, &_psref);
  457                         if (!xia) {
  458                                 IP_STATINC(IP_STAT_IFNOADDR);
  459                                 error = EADDRNOTAVAIL;
  460                                 goto bad;
  461                         }
  462                         xifa = &xia->ia_ifa;
  463                         if (xifa->ifa_getifa != NULL) {
  464                                 ia4_release(xia, &_psref);
  465                                 /* FIXME ifa_getifa is NOMPSAFE */
  466                                 xia = ifatoia((*xifa->ifa_getifa)(xifa, rdst));
  467                                 if (xia == NULL) {
  468                                         IP_STATINC(IP_STAT_IFNOADDR);
  469                                         error = EADDRNOTAVAIL;
  470                                         goto bad;
  471                                 }
  472                                 ia4_acquire(xia, &_psref);
  473                         }
  474                         ip->ip_src = xia->ia_addr.sin_addr;
  475                         ia4_release(xia, &_psref);
  476                 }
  477 
  478                 inmgroup = in_multi_group(ip->ip_dst, ifp, flags);
  479                 if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) {
  480                         /*
  481                          * If we belong to the destination multicast group
  482                          * on the outgoing interface, and the caller did not
  483                          * forbid loopback, loop back a copy.
  484                          */
  485                         ip_mloopback(ifp, m, &udst.sin);
  486                 }
  487 #ifdef MROUTING
  488                 else {
  489                         /*
  490                          * If we are acting as a multicast router, perform
  491                          * multicast forwarding as if the packet had just
  492                          * arrived on the interface to which we are about
  493                          * to send.  The multicast forwarding function
  494                          * recursively calls this function, using the
  495                          * IP_FORWARDING flag to prevent infinite recursion.
  496                          *
  497                          * Multicasts that are looped back by ip_mloopback(),
  498                          * above, will be forwarded by the ip_input() routine,
  499                          * if necessary.
  500                          */
  501                         extern struct socket *ip_mrouter;
  502 
  503                         if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
  504                                 if (ip_mforward(m, ifp) != 0) {
  505                                         m_freem(m);
  506                                         goto done;
  507                                 }
  508                         }
  509                 }
  510 #endif
  511                 /*
  512                  * Multicasts with a time-to-live of zero may be looped-
  513                  * back, above, but must not be transmitted on a network.
  514                  * Also, multicasts addressed to the loopback interface
  515                  * are not sent -- the above call to ip_mloopback() will
  516                  * loop back a copy if this host actually belongs to the
  517                  * destination group on the loopback interface.
  518                  */
  519                 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
  520                         IP_STATINC(IP_STAT_ODROPPED);
  521                         m_freem(m);
  522                         goto done;
  523                 }
  524                 goto sendit;
  525         }
  526 
  527         /*
  528          * If source address not specified yet, use address
  529          * of outgoing interface.
  530          */
  531         if (in_nullhost(ip->ip_src)) {
  532                 struct ifaddr *xifa;
  533 
  534                 /* If rt_ifa is AF_LINK, ia can be NULL. */
  535                 if (ia == NULL) {
  536                         KASSERTMSG(rt->rt_ifa->ifa_addr->sa_family == AF_LINK,
  537                             "sa_family=%d", rt->rt_ifa->ifa_addr->sa_family);
  538                         IP_STATINC(IP_STAT_NOROUTE);
  539                         error = EHOSTUNREACH;
  540                         goto bad;
  541                 }
  542 
  543                 xifa = &ia->ia_ifa;
  544                 if (xifa->ifa_getifa != NULL) {
  545                         ia4_release(ia, &psref_ia);
  546                         /* FIXME ifa_getifa is NOMPSAFE */
  547                         ia = ifatoia((*xifa->ifa_getifa)(xifa, rdst));
  548                         if (ia == NULL) {
  549                                 error = EADDRNOTAVAIL;
  550                                 goto bad;
  551                         }
  552                         ia4_acquire(ia, &psref_ia);
  553                 }
  554                 ip->ip_src = ia->ia_addr.sin_addr;
  555         }
  556 
  557         /*
  558          * Packets with Class-D address as source are not valid per
  559          * RFC1112.
  560          */
  561         if (IN_MULTICAST(ip->ip_src.s_addr)) {
  562                 IP_STATINC(IP_STAT_ODROPPED);
  563                 error = EADDRNOTAVAIL;
  564                 goto bad;
  565         }
  566 
  567         /*
  568          * Look for broadcast address and verify user is allowed to
  569          * send such a packet.
  570          */
  571         if (isbroadcast) {
  572                 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
  573                         IP_STATINC(IP_STAT_BCASTDENIED);
  574                         error = EADDRNOTAVAIL;
  575                         goto bad;
  576                 }
  577                 if ((flags & IP_ALLOWBROADCAST) == 0) {
  578                         IP_STATINC(IP_STAT_BCASTDENIED);
  579                         error = EACCES;
  580                         goto bad;
  581                 }
  582                 /* don't allow broadcast messages to be fragmented */
  583                 if (ntohs(ip->ip_len) > ifp->if_mtu) {
  584                         IP_STATINC(IP_STAT_BCASTDENIED);
  585                         error = EMSGSIZE;
  586                         goto bad;
  587                 }
  588                 m->m_flags |= M_BCAST;
  589         } else
  590                 m->m_flags &= ~M_BCAST;
  591 
  592 sendit:
  593         if ((flags & (IP_FORWARDING|IP_NOIPNEWID)) == 0) {
  594                 /* If rt_ifa is AF_LINK, ia can be NULL. */
  595                 if (ia == NULL) {
  596                         KASSERTMSG(rt->rt_ifa->ifa_addr->sa_family == AF_LINK,
  597                             "sa_family=%d", rt->rt_ifa->ifa_addr->sa_family);
  598                         IP_STATINC(IP_STAT_NOROUTE);
  599                         error = EHOSTUNREACH;
  600                         goto bad;
  601                 }
  602 
  603                 if (m->m_pkthdr.len < IP_MINFRAGSIZE) {
  604                         ip->ip_id = 0;
  605                 } else if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
  606                         ip->ip_id = ip_newid(ia);
  607                 } else {
  608                         /*
  609                          * TSO capable interfaces (typically?) increment
  610                          * ip_id for each segment.
  611                          * "allocate" enough ids here to increase the chance
  612                          * for them to be unique.
  613                          *
  614                          * note that the following calculation is not
  615                          * needed to be precise.  wasting some ip_id is fine.
  616                          */
  617 
  618                         unsigned int segsz = m->m_pkthdr.segsz;
  619                         unsigned int datasz = ntohs(ip->ip_len) - hlen;
  620                         unsigned int num = howmany(datasz, segsz);
  621 
  622                         ip->ip_id = ip_newid_range(ia, num);
  623                 }
  624         }
  625         if (ia != NULL) {
  626                 ia4_release(ia, &psref_ia);
  627                 ia = NULL;
  628         }
  629 
  630         /*
  631          * If we're doing Path MTU Discovery, we need to set DF unless
  632          * the route's MTU is locked.
  633          */
  634         if ((flags & IP_MTUDISC) != 0 && rtmtu_nolock) {
  635                 ip->ip_off |= htons(IP_DF);
  636         }
  637 
  638 #ifdef IPSEC
  639         if (ipsec_used) {
  640                 bool ipsec_done = false;
  641                 bool count_drop = false;
  642 
  643                 /* Perform IPsec processing, if any. */
  644                 error = ipsec4_output(m, inp, flags, &mtu, &natt_frag,
  645                     &ipsec_done, &count_drop);
  646                 if (count_drop)
  647                         IP_STATINC(IP_STAT_IPSECDROP_OUT);
  648                 if (error || ipsec_done)
  649                         goto done;
  650         }
  651 
  652         if (!ipsec_used || !natt_frag)
  653 #endif
  654         {
  655                 /*
  656                  * Run through list of hooks for output packets.
  657                  */
  658                 error = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_OUT);
  659                 if (error || m == NULL) {
  660                         IP_STATINC(IP_STAT_PFILDROP_OUT);
  661                         goto done;
  662                 }
  663         }
  664 
  665         ip = mtod(m, struct ip *);
  666         hlen = ip->ip_hl << 2;
  667 
  668         m->m_pkthdr.csum_data |= hlen << 16;
  669 
  670         /*
  671          * search for the source address structure to
  672          * maintain output statistics, and verify address
  673          * validity
  674          */
  675         KASSERT(ia == NULL);
  676         sockaddr_in_init(&usrc.sin, &ip->ip_src, 0);
  677         ifa = ifaof_ifpforaddr_psref(&usrc.sa, ifp, &psref_ia);
  678         if (ifa != NULL)
  679                 ia = ifatoia(ifa);
  680 
  681         /*
  682          * Ensure we only send from a valid address.
  683          * A NULL address is valid because the packet could be
  684          * generated from a packet filter.
  685          */
  686         if (ia != NULL && (flags & IP_FORWARDING) == 0 &&
  687             (error = ip_ifaddrvalid(ia)) != 0)
  688         {
  689                 ARPLOG(LOG_ERR,
  690                     "refusing to send from invalid address %s (pid %d)\n",
  691                     ARPLOGADDR(&ip->ip_src), curproc->p_pid);
  692                 IP_STATINC(IP_STAT_ODROPPED);
  693                 if (error == 1)
  694                         /*
  695                          * Address exists, but is tentative or detached.
  696                          * We can't send from it because it's invalid,
  697                          * so we drop the packet.
  698                          */
  699                         error = 0;
  700                 else
  701                         error = EADDRNOTAVAIL;
  702                 goto bad;
  703         }
  704 
  705         /* Maybe skip checksums on loopback interfaces. */
  706         if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) {
  707                 m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
  708         }
  709         sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
  710 
  711         /* Need to fragment the packet */
  712         if (ntohs(ip->ip_len) > mtu &&
  713             (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
  714                 goto fragment;
  715         }
  716 
  717 #if IFA_STATS
  718         if (ia)
  719                 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len);
  720 #endif
  721         /*
  722          * Always initialize the sum to 0!  Some HW assisted
  723          * checksumming requires this.
  724          */
  725         ip->ip_sum = 0;
  726 
  727         if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
  728                 /*
  729                  * Perform any checksums that the hardware can't do
  730                  * for us.
  731                  *
  732                  * XXX Does any hardware require the {th,uh}_sum
  733                  * XXX fields to be 0?
  734                  */
  735                 if (sw_csum & M_CSUM_IPv4) {
  736                         KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4));
  737                         ip->ip_sum = in_cksum(m, hlen);
  738                         m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
  739                 }
  740                 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  741                         if (IN_NEED_CHECKSUM(ifp,
  742                             sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
  743                                 in_undefer_cksum_tcpudp(m);
  744                         }
  745                         m->m_pkthdr.csum_flags &=
  746                             ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
  747                 }
  748         }
  749 
  750         sa = (m->m_flags & M_MCAST) ? sintocsa(rdst) : sintocsa(dst);
  751 
  752         /* Send it */
  753         if (__predict_false(sw_csum & M_CSUM_TSOv4)) {
  754                 /*
  755                  * TSO4 is required by a packet, but disabled for
  756                  * the interface.
  757                  */
  758                 error = ip_tso_output(ifp, m, sa, rt);
  759         } else
  760                 error = ip_if_output(ifp, m, sa, rt);
  761         goto done;
  762 
  763 fragment:
  764         /*
  765          * We can't use HW checksumming if we're about to fragment the packet.
  766          *
  767          * XXX Some hardware can do this.
  768          */
  769         if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  770                 if (IN_NEED_CHECKSUM(ifp,
  771                     m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
  772                         in_undefer_cksum_tcpudp(m);
  773                 }
  774                 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
  775         }
  776 
  777         /*
  778          * Too large for interface; fragment if possible.
  779          * Must be able to put at least 8 bytes per fragment.
  780          */
  781         if (ntohs(ip->ip_off) & IP_DF) {
  782                 if (flags & IP_RETURNMTU) {
  783                         KASSERT(inp != NULL);
  784                         in4p_errormtu(inp) = mtu;
  785                 }
  786                 error = EMSGSIZE;
  787                 IP_STATINC(IP_STAT_CANTFRAG);
  788                 goto bad;
  789         }
  790 
  791         error = ip_fragment(m, ifp, mtu);
  792         if (error) {
  793                 m = NULL;
  794                 goto bad;
  795         }
  796 
  797         for (; m; m = m0) {
  798                 m0 = m->m_nextpkt;
  799                 m->m_nextpkt = NULL;
  800                 if (error) {
  801                         m_freem(m);
  802                         continue;
  803                 }
  804 #if IFA_STATS
  805                 if (ia)
  806                         ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len);
  807 #endif
  808                 /*
  809                  * If we get there, the packet has not been handled by
  810                  * IPsec whereas it should have. Now that it has been
  811                  * fragmented, re-inject it in ip_output so that IPsec
  812                  * processing can occur.
  813                  */
  814                 if (natt_frag) {
  815                         error = ip_output(m, opt, NULL,
  816                             flags | IP_RAWOUTPUT | IP_NOIPNEWID,
  817                             imo, inp);
  818                 } else {
  819                         KASSERT((m->m_pkthdr.csum_flags &
  820                             (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0);
  821                         error = ip_if_output(ifp, m, (m->m_flags & M_MCAST) ?
  822                             sintocsa(rdst) : sintocsa(dst), rt);
  823                 }
  824         }
  825         if (error == 0) {
  826                 IP_STATINC(IP_STAT_FRAGMENTED);
  827         }
  828 
  829 done:
  830         ia4_release(ia, &psref_ia);
  831         rtcache_unref(rt, ro);
  832         if (ro == &iproute) {
  833                 rtcache_free(&iproute);
  834         }
  835         if (mifp != NULL) {
  836                 if_put(mifp, &psref);
  837         }
  838         if (bind_need_restore)
  839                 curlwp_bindx(bound);
  840         return error;
  841 
  842 bad:
  843         m_freem(m);
  844         goto done;
  845 }
  846 
  847 int
  848 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu)
  849 {
  850         struct ip *ip, *mhip;
  851         struct mbuf *m0;
  852         int len, hlen, off;
  853         int mhlen, firstlen;
  854         struct mbuf **mnext;
  855         int sw_csum = m->m_pkthdr.csum_flags;
  856         int fragments = 0;
  857         int error = 0;
  858         int ipoff, ipflg;
  859 
  860         ip = mtod(m, struct ip *);
  861         hlen = ip->ip_hl << 2;
  862 
  863         /* Preserve the offset and flags. */
  864         ipoff = ntohs(ip->ip_off) & IP_OFFMASK;
  865         ipflg = ntohs(ip->ip_off) & (IP_RF|IP_DF|IP_MF);
  866 
  867         if (ifp != NULL)
  868                 sw_csum &= ~ifp->if_csum_flags_tx;
  869 
  870         len = (mtu - hlen) &~ 7;
  871         if (len < 8) {
  872                 IP_STATINC(IP_STAT_CANTFRAG);
  873                 m_freem(m);
  874                 return EMSGSIZE;
  875         }
  876 
  877         firstlen = len;
  878         mnext = &m->m_nextpkt;
  879 
  880         /*
  881          * Loop through length of segment after first fragment,
  882          * make new header and copy data of each part and link onto chain.
  883          */
  884         m0 = m;
  885         mhlen = sizeof(struct ip);
  886         for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
  887                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
  888                 if (m == NULL) {
  889                         error = ENOBUFS;
  890                         IP_STATINC(IP_STAT_ODROPPED);
  891                         goto sendorfree;
  892                 }
  893                 MCLAIM(m, m0->m_owner);
  894 
  895                 *mnext = m;
  896                 mnext = &m->m_nextpkt;
  897 
  898                 m->m_data += max_linkhdr;
  899                 mhip = mtod(m, struct ip *);
  900                 *mhip = *ip;
  901 
  902                 /* we must inherit the flags */
  903                 m->m_flags |= m0->m_flags & M_COPYFLAGS;
  904 
  905                 if (hlen > sizeof(struct ip)) {
  906                         mhlen = ip_optcopy(ip, mhip) + sizeof(struct ip);
  907                         mhip->ip_hl = mhlen >> 2;
  908                 }
  909                 m->m_len = mhlen;
  910 
  911                 mhip->ip_off = ((off - hlen) >> 3) + ipoff;
  912                 mhip->ip_off |= ipflg;
  913                 if (off + len >= ntohs(ip->ip_len))
  914                         len = ntohs(ip->ip_len) - off;
  915                 else
  916                         mhip->ip_off |= IP_MF;
  917                 HTONS(mhip->ip_off);
  918 
  919                 mhip->ip_len = htons((u_int16_t)(len + mhlen));
  920                 m->m_next = m_copym(m0, off, len, M_DONTWAIT);
  921                 if (m->m_next == NULL) {
  922                         error = ENOBUFS;
  923                         IP_STATINC(IP_STAT_ODROPPED);
  924                         goto sendorfree;
  925                 }
  926 
  927                 m->m_pkthdr.len = mhlen + len;
  928                 m_reset_rcvif(m);
  929 
  930                 mhip->ip_sum = 0;
  931                 KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0);
  932                 if (sw_csum & M_CSUM_IPv4) {
  933                         mhip->ip_sum = in_cksum(m, mhlen);
  934                 } else {
  935                         /*
  936                          * checksum is hw-offloaded or not necessary.
  937                          */
  938                         m->m_pkthdr.csum_flags |=
  939                             m0->m_pkthdr.csum_flags & M_CSUM_IPv4;
  940                         m->m_pkthdr.csum_data |= mhlen << 16;
  941                         KASSERT(!(ifp != NULL &&
  942                             IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) ||
  943                             (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0);
  944                 }
  945                 IP_STATINC(IP_STAT_OFRAGMENTS);
  946                 fragments++;
  947         }
  948 
  949         /*
  950          * Update first fragment by trimming what's been copied out
  951          * and updating header, then send each fragment (in order).
  952          */
  953         m = m0;
  954         m_adj(m, hlen + firstlen - ntohs(ip->ip_len));
  955         m->m_pkthdr.len = hlen + firstlen;
  956         ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
  957         ip->ip_off |= htons(IP_MF);
  958         ip->ip_sum = 0;
  959         if (sw_csum & M_CSUM_IPv4) {
  960                 ip->ip_sum = in_cksum(m, hlen);
  961                 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
  962         } else {
  963                 /*
  964                  * checksum is hw-offloaded or not necessary.
  965                  */
  966                 KASSERT(!(ifp != NULL && IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) ||
  967                     (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0);
  968                 KASSERT(M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data) >=
  969                     sizeof(struct ip));
  970         }
  971 
  972 sendorfree:
  973         /*
  974          * If there is no room for all the fragments, don't queue
  975          * any of them.
  976          */
  977         if (ifp != NULL) {
  978                 IFQ_LOCK(&ifp->if_snd);
  979                 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments &&
  980                     error == 0) {
  981                         error = ENOBUFS;
  982                         IP_STATINC(IP_STAT_ODROPPED);
  983                         IFQ_INC_DROPS(&ifp->if_snd);
  984                 }
  985                 IFQ_UNLOCK(&ifp->if_snd);
  986         }
  987         if (error) {
  988                 for (m = m0; m; m = m0) {
  989                         m0 = m->m_nextpkt;
  990                         m->m_nextpkt = NULL;
  991                         m_freem(m);
  992                 }
  993         }
  994 
  995         return error;
  996 }
  997 
  998 /*
  999  * Determine the maximum length of the options to be inserted;
 1000  * we would far rather allocate too much space rather than too little.
 1001  */
 1002 u_int
 1003 ip_optlen(struct inpcb *inp)
 1004 {
 1005         struct mbuf *m = inp->inp_options;
 1006 
 1007         if (m && m->m_len > offsetof(struct ipoption, ipopt_dst)) {
 1008                 return (m->m_len - offsetof(struct ipoption, ipopt_dst));
 1009         }
 1010         return 0;
 1011 }
 1012 
 1013 /*
 1014  * Insert IP options into preformed packet.
 1015  * Adjust IP destination as required for IP source routing,
 1016  * as indicated by a non-zero in_addr at the start of the options.
 1017  */
 1018 static struct mbuf *
 1019 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
 1020 {
 1021         struct ipoption *p = mtod(opt, struct ipoption *);
 1022         struct mbuf *n;
 1023         struct ip *ip = mtod(m, struct ip *);
 1024         unsigned optlen;
 1025 
 1026         optlen = opt->m_len - sizeof(p->ipopt_dst);
 1027         KASSERT(optlen % 4 == 0);
 1028         if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
 1029                 return m;               /* XXX should fail */
 1030         if (!in_nullhost(p->ipopt_dst))
 1031                 ip->ip_dst = p->ipopt_dst;
 1032         if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) {
 1033                 MGETHDR(n, M_DONTWAIT, MT_HEADER);
 1034                 if (n == NULL)
 1035                         return m;
 1036                 MCLAIM(n, m->m_owner);
 1037                 m_move_pkthdr(n, m);
 1038                 m->m_len -= sizeof(struct ip);
 1039                 m->m_data += sizeof(struct ip);
 1040                 n->m_next = m;
 1041                 n->m_len = optlen + sizeof(struct ip);
 1042                 n->m_data += max_linkhdr;
 1043                 memcpy(mtod(n, void *), ip, sizeof(struct ip));
 1044                 m = n;
 1045         } else {
 1046                 m->m_data -= optlen;
 1047                 m->m_len += optlen;
 1048                 memmove(mtod(m, void *), ip, sizeof(struct ip));
 1049         }
 1050         m->m_pkthdr.len += optlen;
 1051         ip = mtod(m, struct ip *);
 1052         memcpy(ip + 1, p->ipopt_list, optlen);
 1053         *phlen = sizeof(struct ip) + optlen;
 1054         ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
 1055         return m;
 1056 }
 1057 
 1058 /*
 1059  * Copy options from ipsrc to ipdst, omitting those not copied during
 1060  * fragmentation.
 1061  */
 1062 int
 1063 ip_optcopy(struct ip *ipsrc, struct ip *ipdst)
 1064 {
 1065         u_char *cp, *dp;
 1066         int opt, optlen, cnt;
 1067 
 1068         cp = (u_char *)(ipsrc + 1);
 1069         dp = (u_char *)(ipdst + 1);
 1070         cnt = (ipsrc->ip_hl << 2) - sizeof(struct ip);
 1071         for (; cnt > 0; cnt -= optlen, cp += optlen) {
 1072                 opt = cp[0];
 1073                 if (opt == IPOPT_EOL)
 1074                         break;
 1075                 if (opt == IPOPT_NOP) {
 1076                         /* Preserve for IP mcast tunnel's LSRR alignment. */
 1077                         *dp++ = IPOPT_NOP;
 1078                         optlen = 1;
 1079                         continue;
 1080                 }
 1081 
 1082                 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp));
 1083                 optlen = cp[IPOPT_OLEN];
 1084                 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen < cnt);
 1085 
 1086                 /* Invalid lengths should have been caught by ip_dooptions. */
 1087                 if (optlen > cnt)
 1088                         optlen = cnt;
 1089                 if (IPOPT_COPIED(opt)) {
 1090                         bcopy((void *)cp, (void *)dp, (unsigned)optlen);
 1091                         dp += optlen;
 1092                 }
 1093         }
 1094 
 1095         for (optlen = dp - (u_char *)(ipdst+1); optlen & 0x3; optlen++) {
 1096                 *dp++ = IPOPT_EOL;
 1097         }
 1098 
 1099         return optlen;
 1100 }
 1101 
 1102 /*
 1103  * IP socket option processing.
 1104  */
 1105 int
 1106 ip_ctloutput(int op, struct socket *so, struct sockopt *sopt)
 1107 {
 1108         struct inpcb *inp = sotoinpcb(so);
 1109         struct ip *ip = &in4p_ip(inp);
 1110         int inpflags = inp->inp_flags;
 1111         int optval = 0, error = 0;
 1112         struct in_pktinfo pktinfo;
 1113 
 1114         KASSERT(solocked(so));
 1115 
 1116         if (sopt->sopt_level != IPPROTO_IP) {
 1117                 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER)
 1118                         return 0;
 1119                 return ENOPROTOOPT;
 1120         }
 1121 
 1122         switch (op) {
 1123         case PRCO_SETOPT:
 1124                 switch (sopt->sopt_name) {
 1125                 case IP_OPTIONS:
 1126 #ifdef notyet
 1127                 case IP_RETOPTS:
 1128 #endif
 1129                         error = ip_pcbopts(inp, sopt);
 1130                         break;
 1131 
 1132                 case IP_TOS:
 1133                 case IP_TTL:
 1134                 case IP_MINTTL:
 1135                 case IP_RECVOPTS:
 1136                 case IP_RECVRETOPTS:
 1137                 case IP_RECVDSTADDR:
 1138                 case IP_RECVIF:
 1139                 case IP_RECVPKTINFO:
 1140                 case IP_RECVTTL:
 1141                 case IP_BINDANY:
 1142                         error = sockopt_getint(sopt, &optval);
 1143                         if (error)
 1144                                 break;
 1145 
 1146                         switch (sopt->sopt_name) {
 1147                         case IP_TOS:
 1148                                 ip->ip_tos = optval;
 1149                                 break;
 1150 
 1151                         case IP_TTL:
 1152                                 ip->ip_ttl = optval;
 1153                                 break;
 1154 
 1155                         case IP_MINTTL:
 1156                                 if (optval > 0 && optval <= MAXTTL)
 1157                                         in4p_ip_minttl(inp) = optval;
 1158                                 else
 1159                                         error = EINVAL;
 1160                                 break;
 1161 #define OPTSET(bit) \
 1162         if (optval) \
 1163                 inpflags |= bit; \
 1164         else \
 1165                 inpflags &= ~bit;
 1166 
 1167                         case IP_RECVOPTS:
 1168                                 OPTSET(INP_RECVOPTS);
 1169                                 break;
 1170 
 1171                         case IP_RECVPKTINFO:
 1172                                 OPTSET(INP_RECVPKTINFO);
 1173                                 break;
 1174 
 1175                         case IP_RECVRETOPTS:
 1176                                 OPTSET(INP_RECVRETOPTS);
 1177                                 break;
 1178 
 1179                         case IP_RECVDSTADDR:
 1180                                 OPTSET(INP_RECVDSTADDR);
 1181                                 break;
 1182 
 1183                         case IP_RECVIF:
 1184                                 OPTSET(INP_RECVIF);
 1185                                 break;
 1186 
 1187                         case IP_RECVTTL:
 1188                                 OPTSET(INP_RECVTTL);
 1189                                 break;
 1190 
 1191                         case IP_BINDANY:
 1192                                 error = kauth_authorize_network(
 1193                                     kauth_cred_get(), KAUTH_NETWORK_BIND,
 1194                                     KAUTH_REQ_NETWORK_BIND_ANYADDR, so,
 1195                                     NULL, NULL);
 1196                                 if (error == 0) {
 1197                                         OPTSET(INP_BINDANY);
 1198                                 }
 1199                                 break;
 1200                         }
 1201                         break;
 1202                 case IP_PKTINFO:
 1203                         error = sockopt_getint(sopt, &optval);
 1204                         if (!error) {
 1205                                 /* Linux compatibility */
 1206                                 OPTSET(INP_RECVPKTINFO);
 1207                                 break;
 1208                         }
 1209                         error = sockopt_get(sopt, &pktinfo, sizeof(pktinfo));
 1210                         if (error)
 1211                                 break;
 1212 
 1213                         if (pktinfo.ipi_ifindex == 0) {
 1214                                 in4p_prefsrcip(inp) = pktinfo.ipi_addr;
 1215                                 break;
 1216                         }
 1217 
 1218                         /* Solaris compatibility */
 1219                         struct ifnet *ifp;
 1220                         struct in_ifaddr *ia;
 1221                         int s;
 1222 
 1223                         /* pick up primary address */
 1224                         s = pserialize_read_enter();
 1225                         ifp = if_byindex(pktinfo.ipi_ifindex);
 1226                         if (ifp == NULL) {
 1227                                 pserialize_read_exit(s);
 1228                                 error = EADDRNOTAVAIL;
 1229                                 break;
 1230                         }
 1231                         ia = in_get_ia_from_ifp(ifp);
 1232                         if (ia == NULL) {
 1233                                 pserialize_read_exit(s);
 1234                                 error = EADDRNOTAVAIL;
 1235                                 break;
 1236                         }
 1237                         in4p_prefsrcip(inp) = IA_SIN(ia)->sin_addr;
 1238                         pserialize_read_exit(s);
 1239                         break;
 1240                 break;
 1241 #undef OPTSET
 1242 
 1243                 case IP_MULTICAST_IF:
 1244                 case IP_MULTICAST_TTL:
 1245                 case IP_MULTICAST_LOOP:
 1246                 case IP_ADD_MEMBERSHIP:
 1247                 case IP_DROP_MEMBERSHIP:
 1248                         error = ip_setmoptions(&inp->inp_moptions, sopt);
 1249                         break;
 1250 
 1251                 case IP_PORTRANGE:
 1252                         error = sockopt_getint(sopt, &optval);
 1253                         if (error)
 1254                                 break;
 1255 
 1256                         switch (optval) {
 1257                         case IP_PORTRANGE_DEFAULT:
 1258                         case IP_PORTRANGE_HIGH:
 1259                                 inpflags &= ~(INP_LOWPORT);
 1260                                 break;
 1261 
 1262                         case IP_PORTRANGE_LOW:
 1263                                 inpflags |= INP_LOWPORT;
 1264                                 break;
 1265 
 1266                         default:
 1267                                 error = EINVAL;
 1268                                 break;
 1269                         }
 1270                         break;
 1271 
 1272                 case IP_PORTALGO:
 1273                         error = sockopt_getint(sopt, &optval);
 1274                         if (error)
 1275                                 break;
 1276 
 1277                         error = portalgo_algo_index_select(inp, optval);
 1278                         break;
 1279 
 1280 #if defined(IPSEC)
 1281                 case IP_IPSEC_POLICY:
 1282                         if (ipsec_enabled) {
 1283                                 error = ipsec_set_policy(inp,
 1284                                     sopt->sopt_data, sopt->sopt_size,
 1285                                     curlwp->l_cred);
 1286                         } else 
 1287                                 error = ENOPROTOOPT;
 1288                         break;
 1289 #endif /* IPSEC */
 1290 
 1291                 default:
 1292                         error = ENOPROTOOPT;
 1293                         break;
 1294                 }
 1295                 break;
 1296 
 1297         case PRCO_GETOPT:
 1298                 switch (sopt->sopt_name) {
 1299                 case IP_OPTIONS:
 1300                 case IP_RETOPTS: {
 1301                         struct mbuf *mopts = inp->inp_options;
 1302 
 1303                         if (mopts) {
 1304                                 struct mbuf *m;
 1305 
 1306                                 m = m_copym(mopts, 0, M_COPYALL, M_DONTWAIT);
 1307                                 if (m == NULL) {
 1308                                         error = ENOBUFS;
 1309                                         break;
 1310                                 }
 1311                                 error = sockopt_setmbuf(sopt, m);
 1312                         }
 1313                         break;
 1314                 }
 1315                 case IP_TOS:
 1316                 case IP_TTL:
 1317                 case IP_MINTTL:
 1318                 case IP_RECVOPTS:
 1319                 case IP_RECVRETOPTS:
 1320                 case IP_RECVDSTADDR:
 1321                 case IP_RECVIF:
 1322                 case IP_RECVPKTINFO:
 1323                 case IP_RECVTTL:
 1324                 case IP_ERRORMTU:
 1325                 case IP_BINDANY:
 1326                         switch (sopt->sopt_name) {
 1327                         case IP_TOS:
 1328                                 optval = ip->ip_tos;
 1329                                 break;
 1330 
 1331                         case IP_TTL:
 1332                                 optval = ip->ip_ttl;
 1333                                 break;
 1334 
 1335                         case IP_MINTTL:
 1336                                 optval = in4p_ip_minttl(inp);
 1337                                 break;
 1338 
 1339                         case IP_ERRORMTU:
 1340                                 optval = in4p_errormtu(inp);
 1341                                 break;
 1342 
 1343 #define OPTBIT(bit)     (inpflags & bit ? 1 : 0)
 1344 
 1345                         case IP_RECVOPTS:
 1346                                 optval = OPTBIT(INP_RECVOPTS);
 1347                                 break;
 1348 
 1349                         case IP_RECVPKTINFO:
 1350                                 optval = OPTBIT(INP_RECVPKTINFO);
 1351                                 break;
 1352 
 1353                         case IP_RECVRETOPTS:
 1354                                 optval = OPTBIT(INP_RECVRETOPTS);
 1355                                 break;
 1356 
 1357                         case IP_RECVDSTADDR:
 1358                                 optval = OPTBIT(INP_RECVDSTADDR);
 1359                                 break;
 1360 
 1361                         case IP_RECVIF:
 1362                                 optval = OPTBIT(INP_RECVIF);
 1363                                 break;
 1364 
 1365                         case IP_RECVTTL:
 1366                                 optval = OPTBIT(INP_RECVTTL);
 1367                                 break;
 1368 
 1369                         case IP_BINDANY:
 1370                                 optval = OPTBIT(INP_BINDANY);
 1371                                 break;
 1372                         }
 1373                         error = sockopt_setint(sopt, optval);
 1374                         break;
 1375 
 1376                 case IP_PKTINFO:
 1377                         switch (sopt->sopt_size) {
 1378                         case sizeof(int):
 1379                                 /* Linux compatibility */
 1380                                 optval = OPTBIT(INP_RECVPKTINFO);
 1381                                 error = sockopt_setint(sopt, optval);
 1382                                 break;
 1383                         case sizeof(struct in_pktinfo):
 1384                                 /* Solaris compatibility */
 1385                                 pktinfo.ipi_ifindex = 0;
 1386                                 pktinfo.ipi_addr = in4p_prefsrcip(inp);
 1387                                 error = sockopt_set(sopt, &pktinfo,
 1388                                     sizeof(pktinfo));
 1389                                 break;
 1390                         default:
 1391                                 /*
 1392                                  * While size is stuck at 0, and, later, if
 1393                                  * the caller doesn't use an exactly sized
 1394                                  * recipient for the data, default to Linux
 1395                                  * compatibility
 1396                                  */
 1397                                 optval = OPTBIT(INP_RECVPKTINFO);
 1398                                 error = sockopt_setint(sopt, optval);
 1399                                 break;
 1400                         }
 1401                         break;
 1402 
 1403 #if 0   /* defined(IPSEC) */
 1404                 case IP_IPSEC_POLICY:
 1405                 {
 1406                         struct mbuf *m = NULL;
 1407 
 1408                         /* XXX this will return EINVAL as sopt is empty */
 1409                         error = ipsec_get_policy(inp, sopt->sopt_data,
 1410                             sopt->sopt_size, &m);
 1411                         if (error == 0)
 1412                                 error = sockopt_setmbuf(sopt, m);
 1413                         break;
 1414                 }
 1415 #endif /*IPSEC*/
 1416 
 1417                 case IP_MULTICAST_IF:
 1418                 case IP_MULTICAST_TTL:
 1419                 case IP_MULTICAST_LOOP:
 1420                 case IP_ADD_MEMBERSHIP:
 1421                 case IP_DROP_MEMBERSHIP:
 1422                         error = ip_getmoptions(inp->inp_moptions, sopt);
 1423                         break;
 1424 
 1425                 case IP_PORTRANGE:
 1426                         if (inpflags & INP_LOWPORT)
 1427                                 optval = IP_PORTRANGE_LOW;
 1428                         else
 1429                                 optval = IP_PORTRANGE_DEFAULT;
 1430                         error = sockopt_setint(sopt, optval);
 1431                         break;
 1432 
 1433                 case IP_PORTALGO:
 1434                         optval = inp->inp_portalgo;
 1435                         error = sockopt_setint(sopt, optval);
 1436                         break;
 1437 
 1438                 default:
 1439                         error = ENOPROTOOPT;
 1440                         break;
 1441                 }
 1442                 break;
 1443         }
 1444 
 1445         if (!error) {
 1446                 inp->inp_flags = inpflags;
 1447         }
 1448         return error;
 1449 }
 1450 
 1451 static int
 1452 ip_pktinfo_prepare(const struct inpcb *inp, const struct in_pktinfo *pktinfo,
 1453     struct ip_pktopts *pktopts, int *flags, kauth_cred_t cred)
 1454 {
 1455         struct ip_moptions *imo;
 1456         int error = 0;
 1457         bool addrset = false;
 1458 
 1459         if (!in_nullhost(pktinfo->ipi_addr)) {
 1460                 pktopts->ippo_laddr.sin_addr = pktinfo->ipi_addr;
 1461                 /* EADDRNOTAVAIL? */
 1462                 error = inpcb_bindableaddr(inp, &pktopts->ippo_laddr, cred);
 1463                 if (error != 0)
 1464                         return error;
 1465                 addrset = true;
 1466         }
 1467 
 1468         if (pktinfo->ipi_ifindex != 0) {
 1469                 if (!addrset) {
 1470                         struct ifnet *ifp;
 1471                         struct in_ifaddr *ia;
 1472                         int s;
 1473 
 1474                         /* pick up primary address */
 1475                         s = pserialize_read_enter();
 1476                         ifp = if_byindex(pktinfo->ipi_ifindex);
 1477                         if (ifp == NULL) {
 1478                                 pserialize_read_exit(s);
 1479                                 return EADDRNOTAVAIL;
 1480                         }
 1481                         ia = in_get_ia_from_ifp(ifp);
 1482                         if (ia == NULL) {
 1483                                 pserialize_read_exit(s);
 1484                                 return EADDRNOTAVAIL;
 1485                         }
 1486                         pktopts->ippo_laddr.sin_addr = IA_SIN(ia)->sin_addr;
 1487                         pserialize_read_exit(s);
 1488                 }
 1489 
 1490                 /*
 1491                  * If specified ipi_ifindex,
 1492                  * use copied or locally initialized ip_moptions.
 1493                  * Original ip_moptions must not be modified.
 1494                  */
 1495                 imo = &pktopts->ippo_imobuf;    /* local buf in pktopts */
 1496                 if (pktopts->ippo_imo != NULL) {
 1497                         memcpy(imo, pktopts->ippo_imo, sizeof(*imo));
 1498                 } else {
 1499                         memset(imo, 0, sizeof(*imo));
 1500                         imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 1501                         imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
 1502                 }
 1503                 imo->imo_multicast_if_index = pktinfo->ipi_ifindex;
 1504                 pktopts->ippo_imo = imo;
 1505                 *flags |= IP_ROUTETOIFINDEX;
 1506         }
 1507         return error;
 1508 }
 1509 
 1510 /*
 1511  * Set up IP outgoing packet options. Even if control is NULL,
 1512  * pktopts->ippo_laddr and pktopts->ippo_imo are set and used.
 1513  */
 1514 int
 1515 ip_setpktopts(struct mbuf *control, struct ip_pktopts *pktopts, int *flags,
 1516     struct inpcb *inp, kauth_cred_t cred)
 1517 {
 1518         struct cmsghdr *cm;
 1519         struct in_pktinfo pktinfo;
 1520         int error;
 1521 
 1522         pktopts->ippo_imo = inp->inp_moptions;
 1523 
 1524         struct in_addr *ia = in_nullhost(in4p_prefsrcip(inp)) ? &in4p_laddr(inp) :
 1525             &in4p_prefsrcip(inp);
 1526         sockaddr_in_init(&pktopts->ippo_laddr, ia, 0);
 1527 
 1528         if (control == NULL)
 1529                 return 0;
 1530 
 1531         /*
 1532          * XXX: Currently, we assume all the optional information is
 1533          * stored in a single mbuf.
 1534          */
 1535         if (control->m_next)
 1536                 return EINVAL;
 1537 
 1538         for (; control->m_len > 0;
 1539             control->m_data += CMSG_ALIGN(cm->cmsg_len),
 1540             control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 1541                 cm = mtod(control, struct cmsghdr *);
 1542                 if ((control->m_len < sizeof(*cm)) ||
 1543                     (cm->cmsg_len == 0) ||
 1544                     (cm->cmsg_len > control->m_len)) {
 1545                         return EINVAL;
 1546                 }
 1547                 if (cm->cmsg_level != IPPROTO_IP)
 1548                         continue;
 1549 
 1550                 switch (cm->cmsg_type) {
 1551                 case IP_PKTINFO:
 1552                         if (cm->cmsg_len != CMSG_LEN(sizeof(pktinfo)))
 1553                                 return EINVAL;
 1554                         memcpy(&pktinfo, CMSG_DATA(cm), sizeof(pktinfo));
 1555                         error = ip_pktinfo_prepare(inp, &pktinfo, pktopts,
 1556                             flags, cred);
 1557                         if (error)
 1558                                 return error;
 1559                         break;
 1560                 case IP_SENDSRCADDR: /* FreeBSD compatibility */
 1561                         if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_addr)))
 1562                                 return EINVAL;
 1563                         pktinfo.ipi_ifindex = 0;
 1564                         pktinfo.ipi_addr =
 1565                             ((struct in_pktinfo *)CMSG_DATA(cm))->ipi_addr;
 1566                         error = ip_pktinfo_prepare(inp, &pktinfo, pktopts,
 1567                             flags, cred);
 1568                         if (error)
 1569                                 return error;
 1570                         break;
 1571                 default:
 1572                         return ENOPROTOOPT;
 1573                 }
 1574         }
 1575         return 0;
 1576 }
 1577 
 1578 /*
 1579  * Set up IP options in pcb for insertion in output packets.
 1580  * Store in mbuf with pointer in pcbopt, adding pseudo-option
 1581  * with destination address if source routed.
 1582  */
 1583 static int
 1584 ip_pcbopts(struct inpcb *inp, const struct sockopt *sopt)
 1585 {
 1586         struct mbuf *m;
 1587         const u_char *cp;
 1588         u_char *dp;
 1589         int cnt;
 1590 
 1591         KASSERT(inp_locked(inp));
 1592 
 1593         /* Turn off any old options. */
 1594         if (inp->inp_options) {
 1595                 m_free(inp->inp_options);
 1596         }
 1597         inp->inp_options = NULL;
 1598         if ((cnt = sopt->sopt_size) == 0) {
 1599                 /* Only turning off any previous options. */
 1600                 return 0;
 1601         }
 1602         cp = sopt->sopt_data;
 1603 
 1604         if (cnt % 4) {
 1605                 /* Must be 4-byte aligned, because there's no padding. */
 1606                 return EINVAL;
 1607         }
 1608 
 1609         m = m_get(M_DONTWAIT, MT_SOOPTS);
 1610         if (m == NULL)
 1611                 return ENOBUFS;
 1612 
 1613         dp = mtod(m, u_char *);
 1614         memset(dp, 0, sizeof(struct in_addr));
 1615         dp += sizeof(struct in_addr);
 1616         m->m_len = sizeof(struct in_addr);
 1617 
 1618         /*
 1619          * IP option list according to RFC791. Each option is of the form
 1620          *
 1621          *      [optval] [olen] [(olen - 2) data bytes]
 1622          *
 1623          * We validate the list and copy options to an mbuf for prepending
 1624          * to data packets. The IP first-hop destination address will be
 1625          * stored before actual options and is zero if unset.
 1626          */
 1627         while (cnt > 0) {
 1628                 uint8_t optval, olen, offset;
 1629 
 1630                 optval = cp[IPOPT_OPTVAL];
 1631 
 1632                 if (optval == IPOPT_EOL || optval == IPOPT_NOP) {
 1633                         olen = 1;
 1634                 } else {
 1635                         if (cnt < IPOPT_OLEN + 1)
 1636                                 goto bad;
 1637 
 1638                         olen = cp[IPOPT_OLEN];
 1639                         if (olen < IPOPT_OLEN + 1 || olen > cnt)
 1640                                 goto bad;
 1641                 }
 1642 
 1643                 if (optval == IPOPT_LSRR || optval == IPOPT_SSRR) {
 1644                         /*
 1645                          * user process specifies route as:
 1646                          *      ->A->B->C->D
 1647                          * D must be our final destination (but we can't
 1648                          * check that since we may not have connected yet).
 1649                          * A is first hop destination, which doesn't appear in
 1650                          * actual IP option, but is stored before the options.
 1651                          */
 1652                         if (olen < IPOPT_OFFSET + 1 + sizeof(struct in_addr))
 1653                                 goto bad;
 1654 
 1655                         offset = cp[IPOPT_OFFSET];
 1656                         memcpy(mtod(m, u_char *), cp + IPOPT_OFFSET + 1,
 1657                             sizeof(struct in_addr));
 1658 
 1659                         cp += sizeof(struct in_addr);
 1660                         cnt -= sizeof(struct in_addr);
 1661                         olen -= sizeof(struct in_addr);
 1662 
 1663                         if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr))
 1664                                 goto bad;
 1665 
 1666                         memcpy(dp, cp, olen);
 1667                         dp[IPOPT_OPTVAL] = optval;
 1668                         dp[IPOPT_OLEN] = olen;
 1669                         dp[IPOPT_OFFSET] = offset;
 1670                         break;
 1671                 } else {
 1672                         if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr))
 1673                                 goto bad;
 1674 
 1675                         memcpy(dp, cp, olen);
 1676                         break;
 1677                 }
 1678 
 1679                 dp += olen;
 1680                 m->m_len += olen;
 1681 
 1682                 if (optval == IPOPT_EOL)
 1683                         break;
 1684 
 1685                 cp += olen;
 1686                 cnt -= olen;
 1687         }
 1688 
 1689         inp->inp_options = m;
 1690         return 0;
 1691 
 1692 bad:
 1693         (void)m_free(m);
 1694         return EINVAL;
 1695 }
 1696 
 1697 /*
 1698  * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
 1699  * Must be called in a pserialize critical section.
 1700  */
 1701 static struct ifnet *
 1702 ip_multicast_if(struct in_addr *a, int *ifindexp)
 1703 {
 1704         int ifindex;
 1705         struct ifnet *ifp = NULL;
 1706         struct in_ifaddr *ia;
 1707 
 1708         if (ifindexp)
 1709                 *ifindexp = 0;
 1710         if (ntohl(a->s_addr) >> 24 == 0) {
 1711                 ifindex = ntohl(a->s_addr) & 0xffffff;
 1712                 ifp = if_byindex(ifindex);
 1713                 if (!ifp)
 1714                         return NULL;
 1715                 if (ifindexp)
 1716                         *ifindexp = ifindex;
 1717         } else {
 1718                 IN_ADDRHASH_READER_FOREACH(ia, a->s_addr) {
 1719                         if (in_hosteq(ia->ia_addr.sin_addr, *a) &&
 1720                             (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) {
 1721                                 ifp = ia->ia_ifp;
 1722                                 if (if_is_deactivated(ifp))
 1723                                         ifp = NULL;
 1724                                 break;
 1725                         }
 1726                 }
 1727         }
 1728         return ifp;
 1729 }
 1730 
 1731 static int
 1732 ip_getoptval(const struct sockopt *sopt, u_int8_t *val, u_int maxval)
 1733 {
 1734         u_int tval;
 1735         u_char cval;
 1736         int error;
 1737 
 1738         if (sopt == NULL)
 1739                 return EINVAL;
 1740 
 1741         switch (sopt->sopt_size) {
 1742         case sizeof(u_char):
 1743                 error = sockopt_get(sopt, &cval, sizeof(u_char));
 1744                 tval = cval;
 1745                 break;
 1746 
 1747         case sizeof(u_int):
 1748                 error = sockopt_get(sopt, &tval, sizeof(u_int));
 1749                 break;
 1750 
 1751         default:
 1752                 error = EINVAL;
 1753         }
 1754 
 1755         if (error)
 1756                 return error;
 1757 
 1758         if (tval > maxval)
 1759                 return EINVAL;
 1760 
 1761         *val = tval;
 1762         return 0;
 1763 }
 1764 
 1765 static int
 1766 ip_get_membership(const struct sockopt *sopt, struct ifnet **ifp,
 1767     struct psref *psref, struct in_addr *ia, bool add)
 1768 {
 1769         int error;
 1770         struct ip_mreq mreq;
 1771 
 1772         error = sockopt_get(sopt, &mreq, sizeof(mreq));
 1773         if (error)
 1774                 return error;
 1775 
 1776         if (!IN_MULTICAST(mreq.imr_multiaddr.s_addr))
 1777                 return EINVAL;
 1778 
 1779         memcpy(ia, &mreq.imr_multiaddr, sizeof(*ia));
 1780 
 1781         if (in_nullhost(mreq.imr_interface)) {
 1782                 union {
 1783                         struct sockaddr         dst;
 1784                         struct sockaddr_in      dst4;
 1785                 } u;
 1786                 struct route ro;
 1787 
 1788                 if (!add) {
 1789                         *ifp = NULL;
 1790                         return 0;
 1791                 }
 1792                 /*
 1793                  * If no interface address was provided, use the interface of
 1794                  * the route to the given multicast address.
 1795                  */
 1796                 struct rtentry *rt;
 1797                 memset(&ro, 0, sizeof(ro));
 1798 
 1799                 sockaddr_in_init(&u.dst4, ia, 0);
 1800                 error = rtcache_setdst(&ro, &u.dst);
 1801                 if (error != 0)
 1802                         return error;
 1803                 *ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp : NULL;
 1804                 if (*ifp != NULL) {
 1805                         if (if_is_deactivated(*ifp))
 1806                                 *ifp = NULL;
 1807                         else
 1808                                 if_acquire(*ifp, psref);
 1809                 }
 1810                 rtcache_unref(rt, &ro);
 1811                 rtcache_free(&ro);
 1812         } else {
 1813                 int s = pserialize_read_enter();
 1814                 *ifp = ip_multicast_if(&mreq.imr_interface, NULL);
 1815                 if (!add && *ifp == NULL) {
 1816                         pserialize_read_exit(s);
 1817                         return EADDRNOTAVAIL;
 1818                 }
 1819                 if (*ifp != NULL) {
 1820                         if (if_is_deactivated(*ifp))
 1821                                 *ifp = NULL;
 1822                         else
 1823                                 if_acquire(*ifp, psref);
 1824                 }
 1825                 pserialize_read_exit(s);
 1826         }
 1827         return 0;
 1828 }
 1829 
 1830 /*
 1831  * Add a multicast group membership.
 1832  * Group must be a valid IP multicast address.
 1833  */
 1834 static int
 1835 ip_add_membership(struct ip_moptions *imo, const struct sockopt *sopt)
 1836 {
 1837         struct ifnet *ifp = NULL;       // XXX: gcc [ppc]
 1838         struct in_addr ia;
 1839         int i, error, bound;
 1840         struct psref psref;
 1841 
 1842         /* imo is protected by solock or referenced only by the caller */
 1843 
 1844         bound = curlwp_bind();
 1845         if (sopt->sopt_size == sizeof(struct ip_mreq))
 1846                 error = ip_get_membership(sopt, &ifp, &psref, &ia, true);
 1847         else {
 1848 #ifdef INET6
 1849                 error = ip6_get_membership(sopt, &ifp, &psref, &ia, sizeof(ia));
 1850 #else
 1851                 error = EINVAL;
 1852 #endif
 1853         }
 1854 
 1855         if (error)
 1856                 goto out;
 1857 
 1858         /*
 1859          * See if we found an interface, and confirm that it
 1860          * supports multicast.
 1861          */
 1862         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 1863                 error = EADDRNOTAVAIL;
 1864                 goto out;
 1865         }
 1866 
 1867         /*
 1868          * See if the membership already exists or if all the
 1869          * membership slots are full.
 1870          */
 1871         for (i = 0; i < imo->imo_num_memberships; ++i) {
 1872                 if (imo->imo_membership[i]->inm_ifp == ifp &&
 1873                     in_hosteq(imo->imo_membership[i]->inm_addr, ia))
 1874                         break;
 1875         }
 1876         if (i < imo->imo_num_memberships) {
 1877                 error = EADDRINUSE;
 1878                 goto out;
 1879         }
 1880 
 1881         if (i == IP_MAX_MEMBERSHIPS) {
 1882                 error = ETOOMANYREFS;
 1883                 goto out;
 1884         }
 1885 
 1886         /*
 1887          * Everything looks good; add a new record to the multicast
 1888          * address list for the given interface.
 1889          */
 1890         imo->imo_membership[i] = in_addmulti(&ia, ifp);
 1891         if (imo->imo_membership[i] == NULL) {
 1892                 error = ENOBUFS;
 1893                 goto out;
 1894         }
 1895 
 1896         ++imo->imo_num_memberships;
 1897         error = 0;
 1898 out:
 1899         if_put(ifp, &psref);
 1900         curlwp_bindx(bound);
 1901         return error;
 1902 }
 1903 
 1904 /*
 1905  * Drop a multicast group membership.
 1906  * Group must be a valid IP multicast address.
 1907  */
 1908 static int
 1909 ip_drop_membership(struct ip_moptions *imo, const struct sockopt *sopt)
 1910 {
 1911         struct in_addr ia = { .s_addr = 0 };    // XXX: gcc [ppc]
 1912         struct ifnet *ifp = NULL;               // XXX: gcc [ppc]
 1913         int i, error, bound;
 1914         struct psref psref;
 1915 
 1916         /* imo is protected by solock or referenced only by the caller */
 1917 
 1918         bound = curlwp_bind();
 1919         if (sopt->sopt_size == sizeof(struct ip_mreq))
 1920                 error = ip_get_membership(sopt, &ifp, &psref, &ia, false);
 1921         else {
 1922 #ifdef INET6
 1923                 error = ip6_get_membership(sopt, &ifp, &psref, &ia, sizeof(ia));
 1924 #else
 1925                 error = EINVAL;
 1926 #endif
 1927         }
 1928 
 1929         if (error)
 1930                 goto out;
 1931 
 1932         /*
 1933          * Find the membership in the membership array.
 1934          */
 1935         for (i = 0; i < imo->imo_num_memberships; ++i) {
 1936                 if ((ifp == NULL ||
 1937                      imo->imo_membership[i]->inm_ifp == ifp) &&
 1938                     in_hosteq(imo->imo_membership[i]->inm_addr, ia))
 1939                         break;
 1940         }
 1941         if (i == imo->imo_num_memberships) {
 1942                 error = EADDRNOTAVAIL;
 1943                 goto out;
 1944         }
 1945 
 1946         /*
 1947          * Give up the multicast address record to which the
 1948          * membership points.
 1949          */
 1950         in_delmulti(imo->imo_membership[i]);
 1951 
 1952         /*
 1953          * Remove the gap in the membership array.
 1954          */
 1955         for (++i; i < imo->imo_num_memberships; ++i)
 1956                 imo->imo_membership[i-1] = imo->imo_membership[i];
 1957         --imo->imo_num_memberships;
 1958         error = 0;
 1959 out:
 1960         if_put(ifp, &psref);
 1961         curlwp_bindx(bound);
 1962         return error;
 1963 }
 1964 
 1965 /*
 1966  * Set the IP multicast options in response to user setsockopt().
 1967  */
 1968 int
 1969 ip_setmoptions(struct ip_moptions **pimo, const struct sockopt *sopt)
 1970 {
 1971         struct ip_moptions *imo = *pimo;
 1972         struct in_addr addr;
 1973         struct ifnet *ifp;
 1974         int ifindex, error = 0;
 1975 
 1976         /* The passed imo isn't NULL, it should be protected by solock */
 1977 
 1978         if (!imo) {
 1979                 /*
 1980                  * No multicast option buffer attached to the pcb;
 1981                  * allocate one and initialize to default values.
 1982                  */
 1983                 imo = kmem_intr_alloc(sizeof(*imo), KM_NOSLEEP);
 1984                 if (imo == NULL)
 1985                         return ENOBUFS;
 1986 
 1987                 imo->imo_multicast_if_index = 0;
 1988                 imo->imo_multicast_addr.s_addr = INADDR_ANY;
 1989                 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 1990                 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
 1991                 imo->imo_num_memberships = 0;
 1992                 *pimo = imo;
 1993         }
 1994 
 1995         switch (sopt->sopt_name) {
 1996         case IP_MULTICAST_IF: {
 1997                 int s;
 1998                 /*
 1999                  * Select the interface for outgoing multicast packets.
 2000                  */
 2001                 error = sockopt_get(sopt, &addr, sizeof(addr));
 2002                 if (error)
 2003                         break;
 2004 
 2005                 /*
 2006                  * INADDR_ANY is used to remove a previous selection.
 2007                  * When no interface is selected, a default one is
 2008                  * chosen every time a multicast packet is sent.
 2009                  */
 2010                 if (in_nullhost(addr)) {
 2011                         imo->imo_multicast_if_index = 0;
 2012                         break;
 2013                 }
 2014                 /*
 2015                  * The selected interface is identified by its local
 2016                  * IP address.  Find the interface and confirm that
 2017                  * it supports multicasting.
 2018                  */
 2019                 s = pserialize_read_enter();
 2020                 ifp = ip_multicast_if(&addr, &ifindex);
 2021                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 2022                         pserialize_read_exit(s);
 2023                         error = EADDRNOTAVAIL;
 2024                         break;
 2025                 }
 2026                 imo->imo_multicast_if_index = ifp->if_index;
 2027                 pserialize_read_exit(s);
 2028                 if (ifindex)
 2029                         imo->imo_multicast_addr = addr;
 2030                 else
 2031                         imo->imo_multicast_addr.s_addr = INADDR_ANY;
 2032                 break;
 2033             }
 2034 
 2035         case IP_MULTICAST_TTL:
 2036                 /*
 2037                  * Set the IP time-to-live for outgoing multicast packets.
 2038                  */
 2039                 error = ip_getoptval(sopt, &imo->imo_multicast_ttl, MAXTTL);
 2040                 break;
 2041 
 2042         case IP_MULTICAST_LOOP:
 2043                 /*
 2044                  * Set the loopback flag for outgoing multicast packets.
 2045                  * Must be zero or one.
 2046                  */
 2047                 error = ip_getoptval(sopt, &imo->imo_multicast_loop, 1);
 2048                 break;
 2049 
 2050         case IP_ADD_MEMBERSHIP: /* IPV6_JOIN_GROUP */
 2051                 error = ip_add_membership(imo, sopt);
 2052                 break;
 2053 
 2054         case IP_DROP_MEMBERSHIP: /* IPV6_LEAVE_GROUP */
 2055                 error = ip_drop_membership(imo, sopt);
 2056                 break;
 2057 
 2058         default:
 2059                 error = EOPNOTSUPP;
 2060                 break;
 2061         }
 2062 
 2063         /*
 2064          * If all options have default values, no need to keep the mbuf.
 2065          */
 2066         if (imo->imo_multicast_if_index == 0 &&
 2067             imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
 2068             imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
 2069             imo->imo_num_memberships == 0) {
 2070                 kmem_intr_free(imo, sizeof(*imo));
 2071                 *pimo = NULL;
 2072         }
 2073 
 2074         return error;
 2075 }
 2076 
 2077 /*
 2078  * Return the IP multicast options in response to user getsockopt().
 2079  */
 2080 int
 2081 ip_getmoptions(struct ip_moptions *imo, struct sockopt *sopt)
 2082 {
 2083         struct in_addr addr;
 2084         uint8_t optval;
 2085         int error = 0;
 2086 
 2087         /* imo is protected by solock or referenced only by the caller */
 2088 
 2089         switch (sopt->sopt_name) {
 2090         case IP_MULTICAST_IF:
 2091                 if (imo == NULL || imo->imo_multicast_if_index == 0)
 2092                         addr = zeroin_addr;
 2093                 else if (imo->imo_multicast_addr.s_addr) {
 2094                         /* return the value user has set */
 2095                         addr = imo->imo_multicast_addr;
 2096                 } else {
 2097                         struct ifnet *ifp;
 2098                         struct in_ifaddr *ia = NULL;
 2099                         int s = pserialize_read_enter();
 2100 
 2101                         ifp = if_byindex(imo->imo_multicast_if_index);
 2102                         if (ifp != NULL) {
 2103                                 ia = in_get_ia_from_ifp(ifp);
 2104                         }
 2105                         addr = ia ? ia->ia_addr.sin_addr : zeroin_addr;
 2106                         pserialize_read_exit(s);
 2107                 }
 2108                 error = sockopt_set(sopt, &addr, sizeof(addr));
 2109                 break;
 2110 
 2111         case IP_MULTICAST_TTL:
 2112                 optval = imo ? imo->imo_multicast_ttl
 2113                     : IP_DEFAULT_MULTICAST_TTL;
 2114 
 2115                 error = sockopt_set(sopt, &optval, sizeof(optval));
 2116                 break;
 2117 
 2118         case IP_MULTICAST_LOOP:
 2119                 optval = imo ? imo->imo_multicast_loop
 2120                     : IP_DEFAULT_MULTICAST_LOOP;
 2121 
 2122                 error = sockopt_set(sopt, &optval, sizeof(optval));
 2123                 break;
 2124 
 2125         default:
 2126                 error = EOPNOTSUPP;
 2127         }
 2128 
 2129         return error;
 2130 }
 2131 
 2132 /*
 2133  * Discard the IP multicast options.
 2134  */
 2135 void
 2136 ip_freemoptions(struct ip_moptions *imo)
 2137 {
 2138         int i;
 2139 
 2140         /* The owner of imo (inp) should be protected by solock */
 2141 
 2142         if (imo != NULL) {
 2143                 for (i = 0; i < imo->imo_num_memberships; ++i) {
 2144                         struct in_multi *inm = imo->imo_membership[i];
 2145                         in_delmulti(inm);
 2146                         /* ifp should not leave thanks to solock */
 2147                 }
 2148 
 2149                 kmem_intr_free(imo, sizeof(*imo));
 2150         }
 2151 }
 2152 
 2153 /*
 2154  * Routine called from ip_output() to loop back a copy of an IP multicast
 2155  * packet to the input queue of a specified interface.  Note that this
 2156  * calls the output routine of the loopback "driver", but with an interface
 2157  * pointer that might NOT be lo0ifp -- easier than replicating that code here.
 2158  */
 2159 static void
 2160 ip_mloopback(struct ifnet *ifp, struct mbuf *m, const struct sockaddr_in *dst)
 2161 {
 2162         struct ip *ip;
 2163         struct mbuf *copym;
 2164 
 2165         copym = m_copypacket(m, M_DONTWAIT);
 2166         if (copym != NULL &&
 2167             (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip)))
 2168                 copym = m_pullup(copym, sizeof(struct ip));
 2169         if (copym == NULL)
 2170                 return;
 2171         /*
 2172          * We don't bother to fragment if the IP length is greater
 2173          * than the interface's MTU.  Can this possibly matter?
 2174          */
 2175         ip = mtod(copym, struct ip *);
 2176 
 2177         if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
 2178                 in_undefer_cksum_tcpudp(copym);
 2179                 copym->m_pkthdr.csum_flags &=
 2180                     ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
 2181         }
 2182 
 2183         ip->ip_sum = 0;
 2184         ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
 2185         KERNEL_LOCK_UNLESS_NET_MPSAFE();
 2186         (void)looutput(ifp, copym, sintocsa(dst), NULL);
 2187         KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
 2188 }
 2189 
 2190 /*
 2191  * Ensure sending address is valid.
 2192  * Returns 0 on success, -1 if an error should be sent back or 1
 2193  * if the packet could be dropped without error (protocol dependent).
 2194  */
 2195 static int
 2196 ip_ifaddrvalid(const struct in_ifaddr *ia)
 2197 {
 2198 
 2199         if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
 2200                 return 0;
 2201 
 2202         if (ia->ia4_flags & IN_IFF_DUPLICATED)
 2203                 return -1;
 2204         else if (ia->ia4_flags & (IN_IFF_TENTATIVE | IN_IFF_DETACHED))
 2205                 return 1;
 2206 
 2207         return 0;
 2208 }

Cache object: e790d0061b4666af10e0c0be301098d2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.