The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_output.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: ip_output.c,v 1.167.2.2 2007/03/28 20:46:13 jdc Exp $  */
    2 
    3 /*
    4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the project nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  */
   31 
   32 /*-
   33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
   34  * All rights reserved.
   35  *
   36  * This code is derived from software contributed to The NetBSD Foundation
   37  * by Public Access Networks Corporation ("Panix").  It was developed under
   38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
   39  *
   40  * Redistribution and use in source and binary forms, with or without
   41  * modification, are permitted provided that the following conditions
   42  * are met:
   43  * 1. Redistributions of source code must retain the above copyright
   44  *    notice, this list of conditions and the following disclaimer.
   45  * 2. Redistributions in binary form must reproduce the above copyright
   46  *    notice, this list of conditions and the following disclaimer in the
   47  *    documentation and/or other materials provided with the distribution.
   48  * 3. All advertising materials mentioning features or use of this software
   49  *    must display the following acknowledgement:
   50  *      This product includes software developed by the NetBSD
   51  *      Foundation, Inc. and its contributors.
   52  * 4. Neither the name of The NetBSD Foundation nor the names of its
   53  *    contributors may be used to endorse or promote products derived
   54  *    from this software without specific prior written permission.
   55  *
   56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   66  * POSSIBILITY OF SUCH DAMAGE.
   67  */
   68 
   69 /*
   70  * Copyright (c) 1982, 1986, 1988, 1990, 1993
   71  *      The Regents of the University of California.  All rights reserved.
   72  *
   73  * Redistribution and use in source and binary forms, with or without
   74  * modification, are permitted provided that the following conditions
   75  * are met:
   76  * 1. Redistributions of source code must retain the above copyright
   77  *    notice, this list of conditions and the following disclaimer.
   78  * 2. Redistributions in binary form must reproduce the above copyright
   79  *    notice, this list of conditions and the following disclaimer in the
   80  *    documentation and/or other materials provided with the distribution.
   81  * 3. Neither the name of the University nor the names of its contributors
   82  *    may be used to endorse or promote products derived from this software
   83  *    without specific prior written permission.
   84  *
   85  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   86  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   87  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   88  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   89  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   90  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   91  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   92  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   93  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   94  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   95  * SUCH DAMAGE.
   96  *
   97  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
   98  */
   99 
  100 #include <sys/cdefs.h>
  101 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.167.2.2 2007/03/28 20:46:13 jdc Exp $");
  102 
  103 #include "opt_pfil_hooks.h"
  104 #include "opt_inet.h"
  105 #include "opt_ipsec.h"
  106 #include "opt_mrouting.h"
  107 
  108 #include <sys/param.h>
  109 #include <sys/malloc.h>
  110 #include <sys/mbuf.h>
  111 #include <sys/errno.h>
  112 #include <sys/protosw.h>
  113 #include <sys/socket.h>
  114 #include <sys/socketvar.h>
  115 #include <sys/kauth.h>
  116 #ifdef FAST_IPSEC
  117 #include <sys/domain.h>
  118 #endif
  119 #include <sys/systm.h>
  120 #include <sys/proc.h>
  121 
  122 #include <net/if.h>
  123 #include <net/route.h>
  124 #include <net/pfil.h>
  125 
  126 #include <netinet/in.h>
  127 #include <netinet/in_systm.h>
  128 #include <netinet/ip.h>
  129 #include <netinet/in_pcb.h>
  130 #include <netinet/in_var.h>
  131 #include <netinet/ip_var.h>
  132 #include <netinet/in_offload.h>
  133 
  134 #ifdef MROUTING
  135 #include <netinet/ip_mroute.h>
  136 #endif
  137 
  138 #include <machine/stdarg.h>
  139 
  140 #ifdef IPSEC
  141 #include <netinet6/ipsec.h>
  142 #include <netkey/key.h>
  143 #include <netkey/key_debug.h>
  144 #endif /*IPSEC*/
  145 
  146 #ifdef FAST_IPSEC
  147 #include <netipsec/ipsec.h>
  148 #include <netipsec/key.h>
  149 #include <netipsec/xform.h>
  150 #endif  /* FAST_IPSEC*/
  151 
  152 #ifdef IPSEC_NAT_T
  153 #include <netinet/udp.h>
  154 #endif
  155 
  156 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
  157 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
  158 static void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *);
  159 static int ip_getoptval(struct mbuf *, u_int8_t *, u_int);
  160 
  161 #ifdef PFIL_HOOKS
  162 extern struct pfil_head inet_pfil_hook;                 /* XXX */
  163 #endif
  164 
  165 int     ip_do_loopback_cksum = 0;
  166 
  167 #define IN_NEED_CHECKSUM(ifp, csum_flags) \
  168         (__predict_true(((ifp)->if_flags & IFF_LOOPBACK) == 0 || \
  169         (((csum_flags) & M_CSUM_UDPv4) != 0 && udp_do_loopback_cksum) || \
  170         (((csum_flags) & M_CSUM_TCPv4) != 0 && tcp_do_loopback_cksum) || \
  171         (((csum_flags) & M_CSUM_IPv4) != 0 && ip_do_loopback_cksum)))
  172 
  173 /*
  174  * IP output.  The packet in mbuf chain m contains a skeletal IP
  175  * header (with len, off, ttl, proto, tos, src, dst).
  176  * The mbuf chain containing the packet will be freed.
  177  * The mbuf opt, if present, will not be freed.
  178  */
  179 int
  180 ip_output(struct mbuf *m0, ...)
  181 {
  182         struct ip *ip;
  183         struct ifnet *ifp;
  184         struct mbuf *m = m0;
  185         int hlen = sizeof (struct ip);
  186         int len, error = 0;
  187         struct route iproute;
  188         struct sockaddr_in *dst;
  189         struct in_ifaddr *ia;
  190         struct ifaddr *xifa;
  191         struct mbuf *opt;
  192         struct route *ro;
  193         int flags, sw_csum;
  194         int *mtu_p;
  195         u_long mtu;
  196         struct ip_moptions *imo;
  197         struct socket *so;
  198         va_list ap;
  199 #ifdef IPSEC_NAT_T
  200         int natt_frag = 0;
  201 #endif
  202 #ifdef IPSEC
  203         struct secpolicy *sp = NULL;
  204 #endif /*IPSEC*/
  205 #ifdef FAST_IPSEC
  206         struct inpcb *inp;
  207         struct m_tag *mtag;
  208         struct secpolicy *sp = NULL;
  209         struct tdb_ident *tdbi;
  210         int s;
  211 #endif
  212         u_int16_t ip_len;
  213 
  214         len = 0;
  215         va_start(ap, m0);
  216         opt = va_arg(ap, struct mbuf *);
  217         ro = va_arg(ap, struct route *);
  218         flags = va_arg(ap, int);
  219         imo = va_arg(ap, struct ip_moptions *);
  220         so = va_arg(ap, struct socket *);
  221         if (flags & IP_RETURNMTU)
  222                 mtu_p = va_arg(ap, int *);
  223         else
  224                 mtu_p = NULL;
  225         va_end(ap);
  226 
  227         MCLAIM(m, &ip_tx_mowner);
  228 #ifdef FAST_IPSEC
  229         if (so != NULL && so->so_proto->pr_domain->dom_family == AF_INET)
  230                 inp = (struct inpcb *)so->so_pcb;
  231         else
  232                 inp = NULL;
  233 #endif /* FAST_IPSEC */
  234 
  235 #ifdef  DIAGNOSTIC
  236         if ((m->m_flags & M_PKTHDR) == 0)
  237                 panic("ip_output: no HDR");
  238 
  239         if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) != 0) {
  240                 panic("ip_output: IPv6 checksum offload flags: %d",
  241                     m->m_pkthdr.csum_flags);
  242         }
  243 
  244         if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) ==
  245             (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  246                 panic("ip_output: conflicting checksum offload flags: %d",
  247                     m->m_pkthdr.csum_flags);
  248         }
  249 #endif
  250         if (opt) {
  251                 m = ip_insertoptions(m, opt, &len);
  252                 if (len >= sizeof(struct ip))
  253                         hlen = len;
  254         }
  255         ip = mtod(m, struct ip *);
  256         /*
  257          * Fill in IP header.
  258          */
  259         if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
  260                 ip->ip_v = IPVERSION;
  261                 ip->ip_off = htons(0);
  262                 if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
  263                         ip->ip_id = ip_newid();
  264                 } else {
  265 
  266                         /*
  267                          * TSO capable interfaces (typically?) increment
  268                          * ip_id for each segment.
  269                          * "allocate" enough ids here to increase the chance
  270                          * for them to be unique.
  271                          *
  272                          * note that the following calculation is not
  273                          * needed to be precise.  wasting some ip_id is fine.
  274                          */
  275 
  276                         unsigned int segsz = m->m_pkthdr.segsz;
  277                         unsigned int datasz = ntohs(ip->ip_len) - hlen;
  278                         unsigned int num = howmany(datasz, segsz);
  279 
  280                         ip->ip_id = ip_newid_range(num);
  281                 }
  282                 ip->ip_hl = hlen >> 2;
  283                 ipstat.ips_localout++;
  284         } else {
  285                 hlen = ip->ip_hl << 2;
  286         }
  287         /*
  288          * Route packet.
  289          */
  290         bzero(&iproute, sizeof(iproute));
  291         if (ro == NULL)
  292                 ro = &iproute;
  293         dst = satosin(&ro->ro_dst);
  294         /*
  295          * If there is a cached route,
  296          * check that it is to the same destination
  297          * and is still up.  If not, free it and try again.
  298          * The address family should also be checked in case of sharing the
  299          * cache with IPv6.
  300          */
  301         if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
  302             dst->sin_family != AF_INET ||
  303             !in_hosteq(dst->sin_addr, ip->ip_dst))) {
  304                 RTFREE(ro->ro_rt);
  305                 ro->ro_rt = (struct rtentry *)0;
  306         }
  307         if (ro->ro_rt == 0) {
  308                 bzero(dst, sizeof(*dst));
  309                 dst->sin_family = AF_INET;
  310                 dst->sin_len = sizeof(*dst);
  311                 dst->sin_addr = ip->ip_dst;
  312         }
  313         /*
  314          * If routing to interface only,
  315          * short circuit routing lookup.
  316          */
  317         if (flags & IP_ROUTETOIF) {
  318                 if ((ia = ifatoia(ifa_ifwithladdr(sintosa(dst)))) == 0) {
  319                         ipstat.ips_noroute++;
  320                         error = ENETUNREACH;
  321                         goto bad;
  322                 }
  323                 ifp = ia->ia_ifp;
  324                 mtu = ifp->if_mtu;
  325                 ip->ip_ttl = 1;
  326         } else if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
  327             ip->ip_dst.s_addr == INADDR_BROADCAST) &&
  328             imo != NULL && imo->imo_multicast_ifp != NULL) {
  329                 ifp = imo->imo_multicast_ifp;
  330                 mtu = ifp->if_mtu;
  331                 IFP_TO_IA(ifp, ia);
  332         } else {
  333                 if (ro->ro_rt == 0)
  334                         rtalloc(ro);
  335                 if (ro->ro_rt == 0) {
  336                         ipstat.ips_noroute++;
  337                         error = EHOSTUNREACH;
  338                         goto bad;
  339                 }
  340                 ia = ifatoia(ro->ro_rt->rt_ifa);
  341                 ifp = ro->ro_rt->rt_ifp;
  342                 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
  343                         mtu = ifp->if_mtu;
  344                 ro->ro_rt->rt_use++;
  345                 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
  346                         dst = satosin(ro->ro_rt->rt_gateway);
  347         }
  348         if (IN_MULTICAST(ip->ip_dst.s_addr) ||
  349             (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
  350                 struct in_multi *inm;
  351 
  352                 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
  353                         M_BCAST : M_MCAST;
  354                 /*
  355                  * IP destination address is multicast.  Make sure "dst"
  356                  * still points to the address in "ro".  (It may have been
  357                  * changed to point to a gateway address, above.)
  358                  */
  359                 dst = satosin(&ro->ro_dst);
  360                 /*
  361                  * See if the caller provided any multicast options
  362                  */
  363                 if (imo != NULL)
  364                         ip->ip_ttl = imo->imo_multicast_ttl;
  365                 else
  366                         ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
  367 
  368                 /*
  369                  * if we don't know the outgoing ifp yet, we can't generate
  370                  * output
  371                  */
  372                 if (!ifp) {
  373                         ipstat.ips_noroute++;
  374                         error = ENETUNREACH;
  375                         goto bad;
  376                 }
  377 
  378                 /*
  379                  * If the packet is multicast or broadcast, confirm that
  380                  * the outgoing interface can transmit it.
  381                  */
  382                 if (((m->m_flags & M_MCAST) &&
  383                      (ifp->if_flags & IFF_MULTICAST) == 0) ||
  384                     ((m->m_flags & M_BCAST) &&
  385                      (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0))  {
  386                         ipstat.ips_noroute++;
  387                         error = ENETUNREACH;
  388                         goto bad;
  389                 }
  390                 /*
  391                  * If source address not specified yet, use an address
  392                  * of outgoing interface.
  393                  */
  394                 if (in_nullhost(ip->ip_src)) {
  395                         struct in_ifaddr *xia;
  396 
  397                         IFP_TO_IA(ifp, xia);
  398                         if (!xia) {
  399                                 error = EADDRNOTAVAIL;
  400                                 goto bad;
  401                         }
  402                         xifa = &xia->ia_ifa;
  403                         if (xifa->ifa_getifa != NULL) {
  404                                 xia = ifatoia((*xifa->ifa_getifa)(xifa,
  405                                     &ro->ro_dst));
  406                         }
  407                         ip->ip_src = xia->ia_addr.sin_addr;
  408                 }
  409 
  410                 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
  411                 if (inm != NULL &&
  412                    (imo == NULL || imo->imo_multicast_loop)) {
  413                         /*
  414                          * If we belong to the destination multicast group
  415                          * on the outgoing interface, and the caller did not
  416                          * forbid loopback, loop back a copy.
  417                          */
  418                         ip_mloopback(ifp, m, dst);
  419                 }
  420 #ifdef MROUTING
  421                 else {
  422                         /*
  423                          * If we are acting as a multicast router, perform
  424                          * multicast forwarding as if the packet had just
  425                          * arrived on the interface to which we are about
  426                          * to send.  The multicast forwarding function
  427                          * recursively calls this function, using the
  428                          * IP_FORWARDING flag to prevent infinite recursion.
  429                          *
  430                          * Multicasts that are looped back by ip_mloopback(),
  431                          * above, will be forwarded by the ip_input() routine,
  432                          * if necessary.
  433                          */
  434                         extern struct socket *ip_mrouter;
  435 
  436                         if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
  437                                 if (ip_mforward(m, ifp) != 0) {
  438                                         m_freem(m);
  439                                         goto done;
  440                                 }
  441                         }
  442                 }
  443 #endif
  444                 /*
  445                  * Multicasts with a time-to-live of zero may be looped-
  446                  * back, above, but must not be transmitted on a network.
  447                  * Also, multicasts addressed to the loopback interface
  448                  * are not sent -- the above call to ip_mloopback() will
  449                  * loop back a copy if this host actually belongs to the
  450                  * destination group on the loopback interface.
  451                  */
  452                 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
  453                         m_freem(m);
  454                         goto done;
  455                 }
  456 
  457                 goto sendit;
  458         }
  459         /*
  460          * If source address not specified yet, use address
  461          * of outgoing interface.
  462          */
  463         if (in_nullhost(ip->ip_src)) {
  464                 xifa = &ia->ia_ifa;
  465                 if (xifa->ifa_getifa != NULL)
  466                         ia = ifatoia((*xifa->ifa_getifa)(xifa, &ro->ro_dst));
  467                 ip->ip_src = ia->ia_addr.sin_addr;
  468         }
  469 
  470         /*
  471          * packets with Class-D address as source are not valid per
  472          * RFC 1112
  473          */
  474         if (IN_MULTICAST(ip->ip_src.s_addr)) {
  475                 ipstat.ips_odropped++;
  476                 error = EADDRNOTAVAIL;
  477                 goto bad;
  478         }
  479 
  480         /*
  481          * Look for broadcast address and
  482          * and verify user is allowed to send
  483          * such a packet.
  484          */
  485         if (in_broadcast(dst->sin_addr, ifp)) {
  486                 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
  487                         error = EADDRNOTAVAIL;
  488                         goto bad;
  489                 }
  490                 if ((flags & IP_ALLOWBROADCAST) == 0) {
  491                         error = EACCES;
  492                         goto bad;
  493                 }
  494                 /* don't allow broadcast messages to be fragmented */
  495                 if (ntohs(ip->ip_len) > ifp->if_mtu) {
  496                         error = EMSGSIZE;
  497                         goto bad;
  498                 }
  499                 m->m_flags |= M_BCAST;
  500         } else
  501                 m->m_flags &= ~M_BCAST;
  502 
  503 sendit:
  504         /*
  505          * If we're doing Path MTU Discovery, we need to set DF unless
  506          * the route's MTU is locked.
  507          */
  508         if ((flags & IP_MTUDISC) != 0 && ro->ro_rt != NULL &&
  509             (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
  510                 ip->ip_off |= htons(IP_DF);
  511 
  512         /* Remember the current ip_len */
  513         ip_len = ntohs(ip->ip_len);
  514 
  515 #ifdef IPSEC
  516         /* get SP for this packet */
  517         if (so == NULL)
  518                 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
  519                     flags, &error);
  520         else {
  521                 if (IPSEC_PCB_SKIP_IPSEC(sotoinpcb_hdr(so)->inph_sp,
  522                                          IPSEC_DIR_OUTBOUND))
  523                         goto skip_ipsec;
  524                 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
  525         }
  526 
  527         if (sp == NULL) {
  528                 ipsecstat.out_inval++;
  529                 goto bad;
  530         }
  531 
  532         error = 0;
  533 
  534         /* check policy */
  535         switch (sp->policy) {
  536         case IPSEC_POLICY_DISCARD:
  537                 /*
  538                  * This packet is just discarded.
  539                  */
  540                 ipsecstat.out_polvio++;
  541                 goto bad;
  542 
  543         case IPSEC_POLICY_BYPASS:
  544         case IPSEC_POLICY_NONE:
  545                 /* no need to do IPsec. */
  546                 goto skip_ipsec;
  547 
  548         case IPSEC_POLICY_IPSEC:
  549                 if (sp->req == NULL) {
  550                         /* XXX should be panic ? */
  551                         printf("ip_output: No IPsec request specified.\n");
  552                         error = EINVAL;
  553                         goto bad;
  554                 }
  555                 break;
  556 
  557         case IPSEC_POLICY_ENTRUST:
  558         default:
  559                 printf("ip_output: Invalid policy found. %d\n", sp->policy);
  560         }
  561 
  562 #ifdef IPSEC_NAT_T
  563         /*
  564          * NAT-T ESP fragmentation: don't do IPSec processing now,
  565          * we'll do it on each fragmented packet.
  566          */
  567         if (sp->req->sav &&
  568             ((sp->req->sav->natt_type & UDP_ENCAP_ESPINUDP) ||
  569              (sp->req->sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE))) {
  570                 if (ntohs(ip->ip_len) > sp->req->sav->esp_frag) {
  571                         natt_frag = 1;
  572                         mtu = sp->req->sav->esp_frag;
  573                         goto skip_ipsec;
  574                 }
  575         }
  576 #endif /* IPSEC_NAT_T */
  577 
  578         /*
  579          * ipsec4_output() expects ip_len and ip_off in network
  580          * order.  They have been set to network order above.
  581          */
  582 
  583     {
  584         struct ipsec_output_state state;
  585         bzero(&state, sizeof(state));
  586         state.m = m;
  587         if (flags & IP_ROUTETOIF) {
  588                 state.ro = &iproute;
  589                 bzero(&iproute, sizeof(iproute));
  590         } else
  591                 state.ro = ro;
  592         state.dst = (struct sockaddr *)dst;
  593 
  594         /*
  595          * We can't defer the checksum of payload data if
  596          * we're about to encrypt/authenticate it.
  597          *
  598          * XXX When we support crypto offloading functions of
  599          * XXX network interfaces, we need to reconsider this,
  600          * XXX since it's likely that they'll support checksumming,
  601          * XXX as well.
  602          */
  603         if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  604                 in_delayed_cksum(m);
  605                 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
  606         }
  607 
  608         error = ipsec4_output(&state, sp, flags);
  609 
  610         m = state.m;
  611         if (flags & IP_ROUTETOIF) {
  612                 /*
  613                  * if we have tunnel mode SA, we may need to ignore
  614                  * IP_ROUTETOIF.
  615                  */
  616                 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
  617                         flags &= ~IP_ROUTETOIF;
  618                         ro = state.ro;
  619                 }
  620         } else
  621                 ro = state.ro;
  622         dst = (struct sockaddr_in *)state.dst;
  623         if (error) {
  624                 /* mbuf is already reclaimed in ipsec4_output. */
  625                 m0 = NULL;
  626                 switch (error) {
  627                 case EHOSTUNREACH:
  628                 case ENETUNREACH:
  629                 case EMSGSIZE:
  630                 case ENOBUFS:
  631                 case ENOMEM:
  632                         break;
  633                 default:
  634                         printf("ip4_output (ipsec): error code %d\n", error);
  635                         /*fall through*/
  636                 case ENOENT:
  637                         /* don't show these error codes to the user */
  638                         error = 0;
  639                         break;
  640                 }
  641                 goto bad;
  642         }
  643 
  644         /* be sure to update variables that are affected by ipsec4_output() */
  645         ip = mtod(m, struct ip *);
  646         hlen = ip->ip_hl << 2;
  647         ip_len = ntohs(ip->ip_len);
  648 
  649         if (ro->ro_rt == NULL) {
  650                 if ((flags & IP_ROUTETOIF) == 0) {
  651                         printf("ip_output: "
  652                                 "can't update route after IPsec processing\n");
  653                         error = EHOSTUNREACH;   /*XXX*/
  654                         goto bad;
  655                 }
  656         } else {
  657                 /* nobody uses ia beyond here */
  658                 if (state.encap) {
  659                         ifp = ro->ro_rt->rt_ifp;
  660                         if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
  661                                 mtu = ifp->if_mtu;
  662                 }
  663         }
  664     }
  665 skip_ipsec:
  666 #endif /*IPSEC*/
  667 #ifdef FAST_IPSEC
  668         /*
  669          * Check the security policy (SP) for the packet and, if
  670          * required, do IPsec-related processing.  There are two
  671          * cases here; the first time a packet is sent through
  672          * it will be untagged and handled by ipsec4_checkpolicy.
  673          * If the packet is resubmitted to ip_output (e.g. after
  674          * AH, ESP, etc. processing), there will be a tag to bypass
  675          * the lookup and related policy checking.
  676          */
  677         mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
  678         s = splsoftnet();
  679         if (mtag != NULL) {
  680                 tdbi = (struct tdb_ident *)(mtag + 1);
  681                 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
  682                 if (sp == NULL)
  683                         error = -EINVAL;        /* force silent drop */
  684                 m_tag_delete(m, mtag);
  685         } else {
  686                 if (inp != NULL &&
  687                     IPSEC_PCB_SKIP_IPSEC(inp->inp_sp, IPSEC_DIR_OUTBOUND))
  688                         goto spd_done;
  689                 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
  690                                         &error, inp);
  691         }
  692         /*
  693          * There are four return cases:
  694          *    sp != NULL                    apply IPsec policy
  695          *    sp == NULL, error == 0        no IPsec handling needed
  696          *    sp == NULL, error == -EINVAL  discard packet w/o error
  697          *    sp == NULL, error != 0        discard packet, report error
  698          */
  699         if (sp != NULL) {
  700 #ifdef IPSEC_NAT_T
  701                 /*
  702                  * NAT-T ESP fragmentation: don't do IPSec processing now,
  703                  * we'll do it on each fragmented packet.
  704                  */
  705                 if (sp->req->sav &&
  706                     ((sp->req->sav->natt_type & UDP_ENCAP_ESPINUDP) ||
  707                      (sp->req->sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE))) {
  708                         if (ntohs(ip->ip_len) > sp->req->sav->esp_frag) {
  709                                 natt_frag = 1;
  710                                 mtu = sp->req->sav->esp_frag;
  711                                 goto spd_done;
  712                         }
  713                 }
  714 #endif /* IPSEC_NAT_T */
  715                 /* Loop detection, check if ipsec processing already done */
  716                 IPSEC_ASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
  717                 for (mtag = m_tag_first(m); mtag != NULL;
  718                      mtag = m_tag_next(m, mtag)) {
  719 #ifdef MTAG_ABI_COMPAT
  720                         if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
  721                                 continue;
  722 #endif
  723                         if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
  724                             mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
  725                                 continue;
  726                         /*
  727                          * Check if policy has an SA associated with it.
  728                          * This can happen when an SP has yet to acquire
  729                          * an SA; e.g. on first reference.  If it occurs,
  730                          * then we let ipsec4_process_packet do its thing.
  731                          */
  732                         if (sp->req->sav == NULL)
  733                                 break;
  734                         tdbi = (struct tdb_ident *)(mtag + 1);
  735                         if (tdbi->spi == sp->req->sav->spi &&
  736                             tdbi->proto == sp->req->sav->sah->saidx.proto &&
  737                             bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
  738                                  sizeof (union sockaddr_union)) == 0) {
  739                                 /*
  740                                  * No IPsec processing is needed, free
  741                                  * reference to SP.
  742                                  *
  743                                  * NB: null pointer to avoid free at
  744                                  *     done: below.
  745                                  */
  746                                 KEY_FREESP(&sp), sp = NULL;
  747                                 splx(s);
  748                                 goto spd_done;
  749                         }
  750                 }
  751 
  752                 /*
  753                  * Do delayed checksums now because we send before
  754                  * this is done in the normal processing path.
  755                  */
  756                 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  757                         in_delayed_cksum(m);
  758                         m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
  759                 }
  760 
  761 #ifdef __FreeBSD__
  762                 ip->ip_len = htons(ip->ip_len);
  763                 ip->ip_off = htons(ip->ip_off);
  764 #endif
  765 
  766                 /* NB: callee frees mbuf */
  767                 error = ipsec4_process_packet(m, sp->req, flags, 0);
  768                 /*
  769                  * Preserve KAME behaviour: ENOENT can be returned
  770                  * when an SA acquire is in progress.  Don't propagate
  771                  * this to user-level; it confuses applications.
  772                  *
  773                  * XXX this will go away when the SADB is redone.
  774                  */
  775                 if (error == ENOENT)
  776                         error = 0;
  777                 splx(s);
  778                 goto done;
  779         } else {
  780                 splx(s);
  781 
  782                 if (error != 0) {
  783                         /*
  784                          * Hack: -EINVAL is used to signal that a packet
  785                          * should be silently discarded.  This is typically
  786                          * because we asked key management for an SA and
  787                          * it was delayed (e.g. kicked up to IKE).
  788                          */
  789                         if (error == -EINVAL)
  790                                 error = 0;
  791                         goto bad;
  792                 } else {
  793                         /* No IPsec processing for this packet. */
  794                 }
  795 #ifdef notyet
  796                 /*
  797                  * If deferred crypto processing is needed, check that
  798                  * the interface supports it.
  799                  */
  800                 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
  801                 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
  802                         /* notify IPsec to do its own crypto */
  803                         ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
  804                         error = EHOSTUNREACH;
  805                         goto bad;
  806                 }
  807 #endif
  808         }
  809 spd_done:
  810 #endif /* FAST_IPSEC */
  811 
  812 #ifdef PFIL_HOOKS
  813         /*
  814          * Run through list of hooks for output packets.
  815          */
  816         if ((error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
  817                 goto done;
  818         if (m == NULL)
  819                 goto done;
  820 
  821         ip = mtod(m, struct ip *);
  822         hlen = ip->ip_hl << 2;
  823         ip_len = ntohs(ip->ip_len);
  824 #endif /* PFIL_HOOKS */
  825 
  826         m->m_pkthdr.csum_data |= hlen << 16;
  827 
  828 #if IFA_STATS
  829         /*
  830          * search for the source address structure to
  831          * maintain output statistics.
  832          */
  833         INADDR_TO_IA(ip->ip_src, ia);
  834 #endif
  835 
  836         /* Maybe skip checksums on loopback interfaces. */
  837         if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) {
  838                 m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
  839         }
  840         sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
  841         /*
  842          * If small enough for mtu of path, or if using TCP segmentation
  843          * offload, can just send directly.
  844          */
  845         if (ip_len <= mtu ||
  846             (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) != 0) {
  847 #if IFA_STATS
  848                 if (ia)
  849                         ia->ia_ifa.ifa_data.ifad_outbytes += ip_len;
  850 #endif
  851                 /*
  852                  * Always initialize the sum to 0!  Some HW assisted
  853                  * checksumming requires this.
  854                  */
  855                 ip->ip_sum = 0;
  856 
  857                 if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
  858                         /*
  859                          * Perform any checksums that the hardware can't do
  860                          * for us.
  861                          *
  862                          * XXX Does any hardware require the {th,uh}_sum
  863                          * XXX fields to be 0?
  864                          */
  865                         if (sw_csum & M_CSUM_IPv4) {
  866                                 KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4));
  867                                 ip->ip_sum = in_cksum(m, hlen);
  868                                 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
  869                         }
  870                         if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  871                                 if (IN_NEED_CHECKSUM(ifp,
  872                                     sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
  873                                         in_delayed_cksum(m);
  874                                 }
  875                                 m->m_pkthdr.csum_flags &=
  876                                     ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
  877                         }
  878                 }
  879 
  880 #ifdef IPSEC
  881                 /* clean ipsec history once it goes out of the node */
  882                 ipsec_delaux(m);
  883 #endif
  884 
  885                 if (__predict_true(
  886                     (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0 ||
  887                     (ifp->if_capenable & IFCAP_TSOv4) != 0)) {
  888                         error =
  889                             (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
  890                 } else {
  891                         error =
  892                             ip_tso_output(ifp, m, sintosa(dst), ro->ro_rt);
  893                 }
  894                 goto done;
  895         }
  896 
  897         /*
  898          * We can't use HW checksumming if we're about to
  899          * to fragment the packet.
  900          *
  901          * XXX Some hardware can do this.
  902          */
  903         if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  904                 if (IN_NEED_CHECKSUM(ifp,
  905                     m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
  906                         in_delayed_cksum(m);
  907                 }
  908                 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
  909         }
  910 
  911         /*
  912          * Too large for interface; fragment if possible.
  913          * Must be able to put at least 8 bytes per fragment.
  914          */
  915         if (ntohs(ip->ip_off) & IP_DF) {
  916                 if (flags & IP_RETURNMTU)
  917                         *mtu_p = mtu;
  918                 error = EMSGSIZE;
  919                 ipstat.ips_cantfrag++;
  920                 goto bad;
  921         }
  922 
  923         error = ip_fragment(m, ifp, mtu);
  924         if (error) {
  925                 m = NULL;
  926                 goto bad;
  927         }
  928 
  929         for (; m; m = m0) {
  930                 m0 = m->m_nextpkt;
  931                 m->m_nextpkt = 0;
  932                 if (error == 0) {
  933 #if IFA_STATS
  934                         if (ia)
  935                                 ia->ia_ifa.ifa_data.ifad_outbytes +=
  936                                     ntohs(ip->ip_len);
  937 #endif
  938 #ifdef IPSEC
  939                         /* clean ipsec history once it goes out of the node */
  940                         ipsec_delaux(m);
  941 #endif /* IPSEC */
  942 
  943 #ifdef IPSEC_NAT_T
  944                         /*
  945                          * If we get there, the packet has not been handeld by
  946                          * IPSec whereas it should have. Now that it has been
  947                          * fragmented, re-inject it in ip_output so that IPsec
  948                          * processing can occur.
  949                          */
  950                         if (natt_frag) {
  951                                 error = ip_output(m, opt,
  952                                     ro, flags, imo, so, mtu_p);
  953                         } else
  954 #endif /* IPSEC_NAT_T */
  955                         {
  956                                 KASSERT((m->m_pkthdr.csum_flags &
  957                                     (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0);
  958                                 error = (*ifp->if_output)(ifp, m, sintosa(dst),
  959                                     ro->ro_rt);
  960                         }
  961                 } else
  962                         m_freem(m);
  963         }
  964 
  965         if (error == 0)
  966                 ipstat.ips_fragmented++;
  967 done:
  968         if (iproute.ro_rt != NULL)
  969                 RTFREE(iproute.ro_rt);
  970 
  971 #ifdef IPSEC
  972         if (sp != NULL) {
  973                 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
  974                         printf("DP ip_output call free SP:%p\n", sp));
  975                 key_freesp(sp);
  976         }
  977 #endif /* IPSEC */
  978 #ifdef FAST_IPSEC
  979         if (sp != NULL)
  980                 KEY_FREESP(&sp);
  981 #endif /* FAST_IPSEC */
  982 
  983         return (error);
  984 bad:
  985         m_freem(m);
  986         goto done;
  987 }
  988 
  989 int
  990 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu)
  991 {
  992         struct ip *ip, *mhip;
  993         struct mbuf *m0;
  994         int len, hlen, off;
  995         int mhlen, firstlen;
  996         struct mbuf **mnext;
  997         int sw_csum = m->m_pkthdr.csum_flags;
  998         int fragments = 0;
  999         int s;
 1000         int error = 0;
 1001 
 1002         ip = mtod(m, struct ip *);
 1003         hlen = ip->ip_hl << 2;
 1004         if (ifp != NULL)
 1005                 sw_csum &= ~ifp->if_csum_flags_tx;
 1006 
 1007         len = (mtu - hlen) &~ 7;
 1008         if (len < 8) {
 1009                 m_freem(m);
 1010                 return (EMSGSIZE);
 1011         }
 1012 
 1013         firstlen = len;
 1014         mnext = &m->m_nextpkt;
 1015 
 1016         /*
 1017          * Loop through length of segment after first fragment,
 1018          * make new header and copy data of each part and link onto chain.
 1019          */
 1020         m0 = m;
 1021         mhlen = sizeof (struct ip);
 1022         for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
 1023                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
 1024                 if (m == 0) {
 1025                         error = ENOBUFS;
 1026                         ipstat.ips_odropped++;
 1027                         goto sendorfree;
 1028                 }
 1029                 MCLAIM(m, m0->m_owner);
 1030                 *mnext = m;
 1031                 mnext = &m->m_nextpkt;
 1032                 m->m_data += max_linkhdr;
 1033                 mhip = mtod(m, struct ip *);
 1034                 *mhip = *ip;
 1035                 /* we must inherit MCAST and BCAST flags */
 1036                 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
 1037                 if (hlen > sizeof (struct ip)) {
 1038                         mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 1039                         mhip->ip_hl = mhlen >> 2;
 1040                 }
 1041                 m->m_len = mhlen;
 1042                 mhip->ip_off = ((off - hlen) >> 3) +
 1043                     (ntohs(ip->ip_off) & ~IP_MF);
 1044                 if (ip->ip_off & htons(IP_MF))
 1045                         mhip->ip_off |= IP_MF;
 1046                 if (off + len >= ntohs(ip->ip_len))
 1047                         len = ntohs(ip->ip_len) - off;
 1048                 else
 1049                         mhip->ip_off |= IP_MF;
 1050                 HTONS(mhip->ip_off);
 1051                 mhip->ip_len = htons((u_int16_t)(len + mhlen));
 1052                 m->m_next = m_copy(m0, off, len);
 1053                 if (m->m_next == 0) {
 1054                         error = ENOBUFS;        /* ??? */
 1055                         ipstat.ips_odropped++;
 1056                         goto sendorfree;
 1057                 }
 1058                 m->m_pkthdr.len = mhlen + len;
 1059                 m->m_pkthdr.rcvif = (struct ifnet *)0;
 1060                 mhip->ip_sum = 0;
 1061                 if (sw_csum & M_CSUM_IPv4) {
 1062                         mhip->ip_sum = in_cksum(m, mhlen);
 1063                         KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0);
 1064                 } else {
 1065                         m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
 1066                         m->m_pkthdr.csum_data |= mhlen << 16;
 1067                 }
 1068                 ipstat.ips_ofragments++;
 1069                 fragments++;
 1070         }
 1071         /*
 1072          * Update first fragment by trimming what's been copied out
 1073          * and updating header, then send each fragment (in order).
 1074          */
 1075         m = m0;
 1076         m_adj(m, hlen + firstlen - ntohs(ip->ip_len));
 1077         m->m_pkthdr.len = hlen + firstlen;
 1078         ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
 1079         ip->ip_off |= htons(IP_MF);
 1080         ip->ip_sum = 0;
 1081         if (sw_csum & M_CSUM_IPv4) {
 1082                 ip->ip_sum = in_cksum(m, hlen);
 1083                 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
 1084         } else {
 1085                 KASSERT(m->m_pkthdr.csum_flags & M_CSUM_IPv4);
 1086                 KASSERT(M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data) >=
 1087                         sizeof(struct ip));
 1088         }
 1089 sendorfree:
 1090         /*
 1091          * If there is no room for all the fragments, don't queue
 1092          * any of them.
 1093          */
 1094         if (ifp != NULL) {
 1095                 s = splnet();
 1096                 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments &&
 1097                     error == 0) {
 1098                         error = ENOBUFS;
 1099                         ipstat.ips_odropped++;
 1100                         IFQ_INC_DROPS(&ifp->if_snd);
 1101                 }
 1102                 splx(s);
 1103         }
 1104         if (error) {
 1105                 for (m = m0; m; m = m0) {
 1106                         m0 = m->m_nextpkt;
 1107                         m->m_nextpkt = NULL;
 1108                         m_freem(m);
 1109                 }
 1110         }
 1111         return (error);
 1112 }
 1113 
 1114 /*
 1115  * Process a delayed payload checksum calculation.
 1116  */
 1117 void
 1118 in_delayed_cksum(struct mbuf *m)
 1119 {
 1120         struct ip *ip;
 1121         u_int16_t csum, offset;
 1122 
 1123         ip = mtod(m, struct ip *);
 1124         offset = ip->ip_hl << 2;
 1125         csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset);
 1126         if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0)
 1127                 csum = 0xffff;
 1128 
 1129         offset += M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data);
 1130 
 1131         if ((offset + sizeof(u_int16_t)) > m->m_len) {
 1132                 /* This happen when ip options were inserted
 1133                 printf("in_delayed_cksum: pullup len %d off %d proto %d\n",
 1134                     m->m_len, offset, ip->ip_p);
 1135                  */
 1136                 m_copyback(m, offset, sizeof(csum), (caddr_t) &csum);
 1137         } else
 1138                 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
 1139 }
 1140 
 1141 /*
 1142  * Determine the maximum length of the options to be inserted;
 1143  * we would far rather allocate too much space rather than too little.
 1144  */
 1145 
 1146 u_int
 1147 ip_optlen(struct inpcb *inp)
 1148 {
 1149         struct mbuf *m = inp->inp_options;
 1150 
 1151         if (m && m->m_len > offsetof(struct ipoption, ipopt_dst))
 1152                 return (m->m_len - offsetof(struct ipoption, ipopt_dst));
 1153         else
 1154                 return 0;
 1155 }
 1156 
 1157 
 1158 /*
 1159  * Insert IP options into preformed packet.
 1160  * Adjust IP destination as required for IP source routing,
 1161  * as indicated by a non-zero in_addr at the start of the options.
 1162  */
 1163 static struct mbuf *
 1164 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
 1165 {
 1166         struct ipoption *p = mtod(opt, struct ipoption *);
 1167         struct mbuf *n;
 1168         struct ip *ip = mtod(m, struct ip *);
 1169         unsigned optlen;
 1170 
 1171         optlen = opt->m_len - sizeof(p->ipopt_dst);
 1172         if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
 1173                 return (m);             /* XXX should fail */
 1174         if (!in_nullhost(p->ipopt_dst))
 1175                 ip->ip_dst = p->ipopt_dst;
 1176         if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) {
 1177                 MGETHDR(n, M_DONTWAIT, MT_HEADER);
 1178                 if (n == 0)
 1179                         return (m);
 1180                 MCLAIM(n, m->m_owner);
 1181                 M_MOVE_PKTHDR(n, m);
 1182                 m->m_len -= sizeof(struct ip);
 1183                 m->m_data += sizeof(struct ip);
 1184                 n->m_next = m;
 1185                 m = n;
 1186                 m->m_len = optlen + sizeof(struct ip);
 1187                 m->m_data += max_linkhdr;
 1188                 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
 1189         } else {
 1190                 m->m_data -= optlen;
 1191                 m->m_len += optlen;
 1192                 memmove(mtod(m, caddr_t), ip, sizeof(struct ip));
 1193         }
 1194         m->m_pkthdr.len += optlen;
 1195         ip = mtod(m, struct ip *);
 1196         bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
 1197         *phlen = sizeof(struct ip) + optlen;
 1198         ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
 1199         return (m);
 1200 }
 1201 
 1202 /*
 1203  * Copy options from ip to jp,
 1204  * omitting those not copied during fragmentation.
 1205  */
 1206 int
 1207 ip_optcopy(struct ip *ip, struct ip *jp)
 1208 {
 1209         u_char *cp, *dp;
 1210         int opt, optlen, cnt;
 1211 
 1212         cp = (u_char *)(ip + 1);
 1213         dp = (u_char *)(jp + 1);
 1214         cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 1215         for (; cnt > 0; cnt -= optlen, cp += optlen) {
 1216                 opt = cp[0];
 1217                 if (opt == IPOPT_EOL)
 1218                         break;
 1219                 if (opt == IPOPT_NOP) {
 1220                         /* Preserve for IP mcast tunnel's LSRR alignment. */
 1221                         *dp++ = IPOPT_NOP;
 1222                         optlen = 1;
 1223                         continue;
 1224                 }
 1225 #ifdef DIAGNOSTIC
 1226                 if (cnt < IPOPT_OLEN + sizeof(*cp))
 1227                         panic("malformed IPv4 option passed to ip_optcopy");
 1228 #endif
 1229                 optlen = cp[IPOPT_OLEN];
 1230 #ifdef DIAGNOSTIC
 1231                 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
 1232                         panic("malformed IPv4 option passed to ip_optcopy");
 1233 #endif
 1234                 /* bogus lengths should have been caught by ip_dooptions */
 1235                 if (optlen > cnt)
 1236                         optlen = cnt;
 1237                 if (IPOPT_COPIED(opt)) {
 1238                         bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
 1239                         dp += optlen;
 1240                 }
 1241         }
 1242         for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
 1243                 *dp++ = IPOPT_EOL;
 1244         return (optlen);
 1245 }
 1246 
 1247 /*
 1248  * IP socket option processing.
 1249  */
 1250 int
 1251 ip_ctloutput(int op, struct socket *so, int level, int optname,
 1252     struct mbuf **mp)
 1253 {
 1254         struct inpcb *inp = sotoinpcb(so);
 1255         struct mbuf *m = *mp;
 1256         int optval = 0;
 1257         int error = 0;
 1258 #if defined(IPSEC) || defined(FAST_IPSEC)
 1259         struct lwp *l = curlwp; /*XXX*/
 1260 #endif
 1261 
 1262         if (level != IPPROTO_IP) {
 1263                 error = EINVAL;
 1264                 if (op == PRCO_SETOPT && *mp)
 1265                         (void) m_free(*mp);
 1266         } else switch (op) {
 1267 
 1268         case PRCO_SETOPT:
 1269                 switch (optname) {
 1270                 case IP_OPTIONS:
 1271 #ifdef notyet
 1272                 case IP_RETOPTS:
 1273                         return (ip_pcbopts(optname, &inp->inp_options, m));
 1274 #else
 1275                         return (ip_pcbopts(&inp->inp_options, m));
 1276 #endif
 1277 
 1278                 case IP_TOS:
 1279                 case IP_TTL:
 1280                 case IP_RECVOPTS:
 1281                 case IP_RECVRETOPTS:
 1282                 case IP_RECVDSTADDR:
 1283                 case IP_RECVIF:
 1284                         if (m == NULL || m->m_len != sizeof(int))
 1285                                 error = EINVAL;
 1286                         else {
 1287                                 optval = *mtod(m, int *);
 1288                                 switch (optname) {
 1289 
 1290                                 case IP_TOS:
 1291                                         inp->inp_ip.ip_tos = optval;
 1292                                         break;
 1293 
 1294                                 case IP_TTL:
 1295                                         inp->inp_ip.ip_ttl = optval;
 1296                                         break;
 1297 #define OPTSET(bit) \
 1298         if (optval) \
 1299                 inp->inp_flags |= bit; \
 1300         else \
 1301                 inp->inp_flags &= ~bit;
 1302 
 1303                                 case IP_RECVOPTS:
 1304                                         OPTSET(INP_RECVOPTS);
 1305                                         break;
 1306 
 1307                                 case IP_RECVRETOPTS:
 1308                                         OPTSET(INP_RECVRETOPTS);
 1309                                         break;
 1310 
 1311                                 case IP_RECVDSTADDR:
 1312                                         OPTSET(INP_RECVDSTADDR);
 1313                                         break;
 1314 
 1315                                 case IP_RECVIF:
 1316                                         OPTSET(INP_RECVIF);
 1317                                         break;
 1318                                 }
 1319                         }
 1320                         break;
 1321 #undef OPTSET
 1322 
 1323                 case IP_MULTICAST_IF:
 1324                 case IP_MULTICAST_TTL:
 1325                 case IP_MULTICAST_LOOP:
 1326                 case IP_ADD_MEMBERSHIP:
 1327                 case IP_DROP_MEMBERSHIP:
 1328                         error = ip_setmoptions(optname, &inp->inp_moptions, m);
 1329                         break;
 1330 
 1331                 case IP_PORTRANGE:
 1332                         if (m == 0 || m->m_len != sizeof(int))
 1333                                 error = EINVAL;
 1334                         else {
 1335                                 optval = *mtod(m, int *);
 1336 
 1337                                 switch (optval) {
 1338 
 1339                                 case IP_PORTRANGE_DEFAULT:
 1340                                 case IP_PORTRANGE_HIGH:
 1341                                         inp->inp_flags &= ~(INP_LOWPORT);
 1342                                         break;
 1343 
 1344                                 case IP_PORTRANGE_LOW:
 1345                                         inp->inp_flags |= INP_LOWPORT;
 1346                                         break;
 1347 
 1348                                 default:
 1349                                         error = EINVAL;
 1350                                         break;
 1351                                 }
 1352                         }
 1353                         break;
 1354 
 1355 #if defined(IPSEC) || defined(FAST_IPSEC)
 1356                 case IP_IPSEC_POLICY:
 1357                 {
 1358                         caddr_t req = NULL;
 1359                         size_t len = 0;
 1360                         int priv = 0;
 1361 
 1362 #ifdef __NetBSD__
 1363                         if (l == 0 || kauth_authorize_generic(l->l_cred,
 1364                             KAUTH_GENERIC_ISSUSER, &l->l_acflag))
 1365                                 priv = 0;
 1366                         else
 1367                                 priv = 1;
 1368 #else
 1369                         priv = (in6p->in6p_socket->so_state & SS_PRIV);
 1370 #endif
 1371                         if (m) {
 1372                                 req = mtod(m, caddr_t);
 1373                                 len = m->m_len;
 1374                         }
 1375                         error = ipsec4_set_policy(inp, optname, req, len, priv);
 1376                         break;
 1377                     }
 1378 #endif /*IPSEC*/
 1379 
 1380                 default:
 1381                         error = ENOPROTOOPT;
 1382                         break;
 1383                 }
 1384                 if (m)
 1385                         (void)m_free(m);
 1386                 break;
 1387 
 1388         case PRCO_GETOPT:
 1389                 switch (optname) {
 1390                 case IP_OPTIONS:
 1391                 case IP_RETOPTS:
 1392                         *mp = m = m_get(M_WAIT, MT_SOOPTS);
 1393                         MCLAIM(m, so->so_mowner);
 1394                         if (inp->inp_options) {
 1395                                 m->m_len = inp->inp_options->m_len;
 1396                                 bcopy(mtod(inp->inp_options, caddr_t),
 1397                                     mtod(m, caddr_t), (unsigned)m->m_len);
 1398                         } else
 1399                                 m->m_len = 0;
 1400                         break;
 1401 
 1402                 case IP_TOS:
 1403                 case IP_TTL:
 1404                 case IP_RECVOPTS:
 1405                 case IP_RECVRETOPTS:
 1406                 case IP_RECVDSTADDR:
 1407                 case IP_RECVIF:
 1408                 case IP_ERRORMTU:
 1409                         *mp = m = m_get(M_WAIT, MT_SOOPTS);
 1410                         MCLAIM(m, so->so_mowner);
 1411                         m->m_len = sizeof(int);
 1412                         switch (optname) {
 1413 
 1414                         case IP_TOS:
 1415                                 optval = inp->inp_ip.ip_tos;
 1416                                 break;
 1417 
 1418                         case IP_TTL:
 1419                                 optval = inp->inp_ip.ip_ttl;
 1420                                 break;
 1421 
 1422                         case IP_ERRORMTU:
 1423                                 optval = inp->inp_errormtu;
 1424                                 break;
 1425 
 1426 #define OPTBIT(bit)     (inp->inp_flags & bit ? 1 : 0)
 1427 
 1428                         case IP_RECVOPTS:
 1429                                 optval = OPTBIT(INP_RECVOPTS);
 1430                                 break;
 1431 
 1432                         case IP_RECVRETOPTS:
 1433                                 optval = OPTBIT(INP_RECVRETOPTS);
 1434                                 break;
 1435 
 1436                         case IP_RECVDSTADDR:
 1437                                 optval = OPTBIT(INP_RECVDSTADDR);
 1438                                 break;
 1439 
 1440                         case IP_RECVIF:
 1441                                 optval = OPTBIT(INP_RECVIF);
 1442                                 break;
 1443                         }
 1444                         *mtod(m, int *) = optval;
 1445                         break;
 1446 
 1447 #if 0   /* defined(IPSEC) || defined(FAST_IPSEC) */
 1448                 /* XXX: code broken */
 1449                 case IP_IPSEC_POLICY:
 1450                 {
 1451                         caddr_t req = NULL;
 1452                         size_t len = 0;
 1453 
 1454                         if (m) {
 1455                                 req = mtod(m, caddr_t);
 1456                                 len = m->m_len;
 1457                         }
 1458                         error = ipsec4_get_policy(inp, req, len, mp);
 1459                         break;
 1460                 }
 1461 #endif /*IPSEC*/
 1462 
 1463                 case IP_MULTICAST_IF:
 1464                 case IP_MULTICAST_TTL:
 1465                 case IP_MULTICAST_LOOP:
 1466                 case IP_ADD_MEMBERSHIP:
 1467                 case IP_DROP_MEMBERSHIP:
 1468                         error = ip_getmoptions(optname, inp->inp_moptions, mp);
 1469                         if (*mp)
 1470                                 MCLAIM(*mp, so->so_mowner);
 1471                         break;
 1472 
 1473                 case IP_PORTRANGE:
 1474                         *mp = m = m_get(M_WAIT, MT_SOOPTS);
 1475                         MCLAIM(m, so->so_mowner);
 1476                         m->m_len = sizeof(int);
 1477 
 1478                         if (inp->inp_flags & INP_LOWPORT)
 1479                                 optval = IP_PORTRANGE_LOW;
 1480                         else
 1481                                 optval = IP_PORTRANGE_DEFAULT;
 1482 
 1483                         *mtod(m, int *) = optval;
 1484                         break;
 1485 
 1486                 default:
 1487                         error = ENOPROTOOPT;
 1488                         break;
 1489                 }
 1490                 break;
 1491         }
 1492         return (error);
 1493 }
 1494 
 1495 /*
 1496  * Set up IP options in pcb for insertion in output packets.
 1497  * Store in mbuf with pointer in pcbopt, adding pseudo-option
 1498  * with destination address if source routed.
 1499  */
 1500 int
 1501 #ifdef notyet
 1502 ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m)
 1503 #else
 1504 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m)
 1505 #endif
 1506 {
 1507         int cnt, optlen;
 1508         u_char *cp;
 1509         u_char opt;
 1510 
 1511         /* turn off any old options */
 1512         if (*pcbopt)
 1513                 (void)m_free(*pcbopt);
 1514         *pcbopt = 0;
 1515         if (m == (struct mbuf *)0 || m->m_len == 0) {
 1516                 /*
 1517                  * Only turning off any previous options.
 1518                  */
 1519                 if (m)
 1520                         (void)m_free(m);
 1521                 return (0);
 1522         }
 1523 
 1524 #ifndef __vax__
 1525         if (m->m_len % sizeof(int32_t))
 1526                 goto bad;
 1527 #endif
 1528         /*
 1529          * IP first-hop destination address will be stored before
 1530          * actual options; move other options back
 1531          * and clear it when none present.
 1532          */
 1533         if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
 1534                 goto bad;
 1535         cnt = m->m_len;
 1536         m->m_len += sizeof(struct in_addr);
 1537         cp = mtod(m, u_char *) + sizeof(struct in_addr);
 1538         memmove(cp, mtod(m, caddr_t), (unsigned)cnt);
 1539         bzero(mtod(m, caddr_t), sizeof(struct in_addr));
 1540 
 1541         for (; cnt > 0; cnt -= optlen, cp += optlen) {
 1542                 opt = cp[IPOPT_OPTVAL];
 1543                 if (opt == IPOPT_EOL)
 1544                         break;
 1545                 if (opt == IPOPT_NOP)
 1546                         optlen = 1;
 1547                 else {
 1548                         if (cnt < IPOPT_OLEN + sizeof(*cp))
 1549                                 goto bad;
 1550                         optlen = cp[IPOPT_OLEN];
 1551                         if (optlen < IPOPT_OLEN  + sizeof(*cp) || optlen > cnt)
 1552                                 goto bad;
 1553                 }
 1554                 switch (opt) {
 1555 
 1556                 default:
 1557                         break;
 1558 
 1559                 case IPOPT_LSRR:
 1560                 case IPOPT_SSRR:
 1561                         /*
 1562                          * user process specifies route as:
 1563                          *      ->A->B->C->D
 1564                          * D must be our final destination (but we can't
 1565                          * check that since we may not have connected yet).
 1566                          * A is first hop destination, which doesn't appear in
 1567                          * actual IP option, but is stored before the options.
 1568                          */
 1569                         if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
 1570                                 goto bad;
 1571                         m->m_len -= sizeof(struct in_addr);
 1572                         cnt -= sizeof(struct in_addr);
 1573                         optlen -= sizeof(struct in_addr);
 1574                         cp[IPOPT_OLEN] = optlen;
 1575                         /*
 1576                          * Move first hop before start of options.
 1577                          */
 1578                         bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
 1579                             sizeof(struct in_addr));
 1580                         /*
 1581                          * Then copy rest of options back
 1582                          * to close up the deleted entry.
 1583                          */
 1584                         (void)memmove(&cp[IPOPT_OFFSET+1],
 1585                             &cp[IPOPT_OFFSET+1] + sizeof(struct in_addr),
 1586                             (unsigned)cnt - (IPOPT_MINOFF - 1));
 1587                         break;
 1588                 }
 1589         }
 1590         if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
 1591                 goto bad;
 1592         *pcbopt = m;
 1593         return (0);
 1594 
 1595 bad:
 1596         (void)m_free(m);
 1597         return (EINVAL);
 1598 }
 1599 
 1600 /*
 1601  * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
 1602  */
 1603 static struct ifnet *
 1604 ip_multicast_if(struct in_addr *a, int *ifindexp)
 1605 {
 1606         int ifindex;
 1607         struct ifnet *ifp = NULL;
 1608         struct in_ifaddr *ia;
 1609 
 1610         if (ifindexp)
 1611                 *ifindexp = 0;
 1612         if (ntohl(a->s_addr) >> 24 == 0) {
 1613                 ifindex = ntohl(a->s_addr) & 0xffffff;
 1614                 if (ifindex < 0 || if_indexlim <= ifindex)
 1615                         return NULL;
 1616                 ifp = ifindex2ifnet[ifindex];
 1617                 if (!ifp)
 1618                         return NULL;
 1619                 if (ifindexp)
 1620                         *ifindexp = ifindex;
 1621         } else {
 1622                 LIST_FOREACH(ia, &IN_IFADDR_HASH(a->s_addr), ia_hash) {
 1623                         if (in_hosteq(ia->ia_addr.sin_addr, *a) &&
 1624                             (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) {
 1625                                 ifp = ia->ia_ifp;
 1626                                 break;
 1627                         }
 1628                 }
 1629         }
 1630         return ifp;
 1631 }
 1632 
 1633 static int
 1634 ip_getoptval(struct mbuf *m, u_int8_t *val, u_int maxval)
 1635 {
 1636         u_int tval;
 1637 
 1638         if (m == NULL)
 1639                 return EINVAL;
 1640 
 1641         switch (m->m_len) {
 1642         case sizeof(u_char):
 1643                 tval = *(mtod(m, u_char *));
 1644                 break;
 1645         case sizeof(u_int):
 1646                 tval = *(mtod(m, u_int *));
 1647                 break;
 1648         default:
 1649                 return EINVAL;
 1650         }
 1651 
 1652         if (tval > maxval)
 1653                 return EINVAL;
 1654 
 1655         *val = tval;
 1656         return 0;
 1657 }
 1658 
 1659 /*
 1660  * Set the IP multicast options in response to user setsockopt().
 1661  */
 1662 int
 1663 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m)
 1664 {
 1665         int error = 0;
 1666         int i;
 1667         struct in_addr addr;
 1668         struct ip_mreq *mreq;
 1669         struct ifnet *ifp;
 1670         struct ip_moptions *imo = *imop;
 1671         struct route ro;
 1672         struct sockaddr_in *dst;
 1673         int ifindex;
 1674 
 1675         if (imo == NULL) {
 1676                 /*
 1677                  * No multicast option buffer attached to the pcb;
 1678                  * allocate one and initialize to default values.
 1679                  */
 1680                 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
 1681                     M_WAITOK);
 1682 
 1683                 if (imo == NULL)
 1684                         return (ENOBUFS);
 1685                 *imop = imo;
 1686                 imo->imo_multicast_ifp = NULL;
 1687                 imo->imo_multicast_addr.s_addr = INADDR_ANY;
 1688                 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 1689                 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
 1690                 imo->imo_num_memberships = 0;
 1691         }
 1692 
 1693         switch (optname) {
 1694 
 1695         case IP_MULTICAST_IF:
 1696                 /*
 1697                  * Select the interface for outgoing multicast packets.
 1698                  */
 1699                 if (m == NULL || m->m_len != sizeof(struct in_addr)) {
 1700                         error = EINVAL;
 1701                         break;
 1702                 }
 1703                 addr = *(mtod(m, struct in_addr *));
 1704                 /*
 1705                  * INADDR_ANY is used to remove a previous selection.
 1706                  * When no interface is selected, a default one is
 1707                  * chosen every time a multicast packet is sent.
 1708                  */
 1709                 if (in_nullhost(addr)) {
 1710                         imo->imo_multicast_ifp = NULL;
 1711                         break;
 1712                 }
 1713                 /*
 1714                  * The selected interface is identified by its local
 1715                  * IP address.  Find the interface and confirm that
 1716                  * it supports multicasting.
 1717                  */
 1718                 ifp = ip_multicast_if(&addr, &ifindex);
 1719                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 1720                         error = EADDRNOTAVAIL;
 1721                         break;
 1722                 }
 1723                 imo->imo_multicast_ifp = ifp;
 1724                 if (ifindex)
 1725                         imo->imo_multicast_addr = addr;
 1726                 else
 1727                         imo->imo_multicast_addr.s_addr = INADDR_ANY;
 1728                 break;
 1729 
 1730         case IP_MULTICAST_TTL:
 1731                 /*
 1732                  * Set the IP time-to-live for outgoing multicast packets.
 1733                  */
 1734                 error = ip_getoptval(m, &imo->imo_multicast_ttl, MAXTTL);
 1735                 break;
 1736 
 1737         case IP_MULTICAST_LOOP:
 1738                 /*
 1739                  * Set the loopback flag for outgoing multicast packets.
 1740                  * Must be zero or one.
 1741                  */
 1742                 error = ip_getoptval(m, &imo->imo_multicast_loop, 1);
 1743                 break;
 1744 
 1745         case IP_ADD_MEMBERSHIP:
 1746                 /*
 1747                  * Add a multicast group membership.
 1748                  * Group must be a valid IP multicast address.
 1749                  */
 1750                 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
 1751                         error = EINVAL;
 1752                         break;
 1753                 }
 1754                 mreq = mtod(m, struct ip_mreq *);
 1755                 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
 1756                         error = EINVAL;
 1757                         break;
 1758                 }
 1759                 /*
 1760                  * If no interface address was provided, use the interface of
 1761                  * the route to the given multicast address.
 1762                  */
 1763                 if (in_nullhost(mreq->imr_interface)) {
 1764                         bzero((caddr_t)&ro, sizeof(ro));
 1765                         ro.ro_rt = NULL;
 1766                         dst = satosin(&ro.ro_dst);
 1767                         dst->sin_len = sizeof(*dst);
 1768                         dst->sin_family = AF_INET;
 1769                         dst->sin_addr = mreq->imr_multiaddr;
 1770                         rtalloc(&ro);
 1771                         if (ro.ro_rt == NULL) {
 1772                                 error = EADDRNOTAVAIL;
 1773                                 break;
 1774                         }
 1775                         ifp = ro.ro_rt->rt_ifp;
 1776                         rtfree(ro.ro_rt);
 1777                 } else {
 1778                         ifp = ip_multicast_if(&mreq->imr_interface, NULL);
 1779                 }
 1780                 /*
 1781                  * See if we found an interface, and confirm that it
 1782                  * supports multicast.
 1783                  */
 1784                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 1785                         error = EADDRNOTAVAIL;
 1786                         break;
 1787                 }
 1788                 /*
 1789                  * See if the membership already exists or if all the
 1790                  * membership slots are full.
 1791                  */
 1792                 for (i = 0; i < imo->imo_num_memberships; ++i) {
 1793                         if (imo->imo_membership[i]->inm_ifp == ifp &&
 1794                             in_hosteq(imo->imo_membership[i]->inm_addr,
 1795                                       mreq->imr_multiaddr))
 1796                                 break;
 1797                 }
 1798                 if (i < imo->imo_num_memberships) {
 1799                         error = EADDRINUSE;
 1800                         break;
 1801                 }
 1802                 if (i == IP_MAX_MEMBERSHIPS) {
 1803                         error = ETOOMANYREFS;
 1804                         break;
 1805                 }
 1806                 /*
 1807                  * Everything looks good; add a new record to the multicast
 1808                  * address list for the given interface.
 1809                  */
 1810                 if ((imo->imo_membership[i] =
 1811                     in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
 1812                         error = ENOBUFS;
 1813                         break;
 1814                 }
 1815                 ++imo->imo_num_memberships;
 1816                 break;
 1817 
 1818         case IP_DROP_MEMBERSHIP:
 1819                 /*
 1820                  * Drop a multicast group membership.
 1821                  * Group must be a valid IP multicast address.
 1822                  */
 1823                 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
 1824                         error = EINVAL;
 1825                         break;
 1826                 }
 1827                 mreq = mtod(m, struct ip_mreq *);
 1828                 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
 1829                         error = EINVAL;
 1830                         break;
 1831                 }
 1832                 /*
 1833                  * If an interface address was specified, get a pointer
 1834                  * to its ifnet structure.
 1835                  */
 1836                 if (in_nullhost(mreq->imr_interface))
 1837                         ifp = NULL;
 1838                 else {
 1839                         ifp = ip_multicast_if(&mreq->imr_interface, NULL);
 1840                         if (ifp == NULL) {
 1841                                 error = EADDRNOTAVAIL;
 1842                                 break;
 1843                         }
 1844                 }
 1845                 /*
 1846                  * Find the membership in the membership array.
 1847                  */
 1848                 for (i = 0; i < imo->imo_num_memberships; ++i) {
 1849                         if ((ifp == NULL ||
 1850                              imo->imo_membership[i]->inm_ifp == ifp) &&
 1851                              in_hosteq(imo->imo_membership[i]->inm_addr,
 1852                                        mreq->imr_multiaddr))
 1853                                 break;
 1854                 }
 1855                 if (i == imo->imo_num_memberships) {
 1856                         error = EADDRNOTAVAIL;
 1857                         break;
 1858                 }
 1859                 /*
 1860                  * Give up the multicast address record to which the
 1861                  * membership points.
 1862                  */
 1863                 in_delmulti(imo->imo_membership[i]);
 1864                 /*
 1865                  * Remove the gap in the membership array.
 1866                  */
 1867                 for (++i; i < imo->imo_num_memberships; ++i)
 1868                         imo->imo_membership[i-1] = imo->imo_membership[i];
 1869                 --imo->imo_num_memberships;
 1870                 break;
 1871 
 1872         default:
 1873                 error = EOPNOTSUPP;
 1874                 break;
 1875         }
 1876 
 1877         /*
 1878          * If all options have default values, no need to keep the mbuf.
 1879          */
 1880         if (imo->imo_multicast_ifp == NULL &&
 1881             imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
 1882             imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
 1883             imo->imo_num_memberships == 0) {
 1884                 free(*imop, M_IPMOPTS);
 1885                 *imop = NULL;
 1886         }
 1887 
 1888         return (error);
 1889 }
 1890 
 1891 /*
 1892  * Return the IP multicast options in response to user getsockopt().
 1893  */
 1894 int
 1895 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf **mp)
 1896 {
 1897         u_char *ttl;
 1898         u_char *loop;
 1899         struct in_addr *addr;
 1900         struct in_ifaddr *ia;
 1901 
 1902         *mp = m_get(M_WAIT, MT_SOOPTS);
 1903 
 1904         switch (optname) {
 1905 
 1906         case IP_MULTICAST_IF:
 1907                 addr = mtod(*mp, struct in_addr *);
 1908                 (*mp)->m_len = sizeof(struct in_addr);
 1909                 if (imo == NULL || imo->imo_multicast_ifp == NULL)
 1910                         *addr = zeroin_addr;
 1911                 else if (imo->imo_multicast_addr.s_addr) {
 1912                         /* return the value user has set */
 1913                         *addr = imo->imo_multicast_addr;
 1914                 } else {
 1915                         IFP_TO_IA(imo->imo_multicast_ifp, ia);
 1916                         *addr = ia ? ia->ia_addr.sin_addr : zeroin_addr;
 1917                 }
 1918                 return (0);
 1919 
 1920         case IP_MULTICAST_TTL:
 1921                 ttl = mtod(*mp, u_char *);
 1922                 (*mp)->m_len = 1;
 1923                 *ttl = imo ? imo->imo_multicast_ttl
 1924                            : IP_DEFAULT_MULTICAST_TTL;
 1925                 return (0);
 1926 
 1927         case IP_MULTICAST_LOOP:
 1928                 loop = mtod(*mp, u_char *);
 1929                 (*mp)->m_len = 1;
 1930                 *loop = imo ? imo->imo_multicast_loop
 1931                             : IP_DEFAULT_MULTICAST_LOOP;
 1932                 return (0);
 1933 
 1934         default:
 1935                 return (EOPNOTSUPP);
 1936         }
 1937 }
 1938 
 1939 /*
 1940  * Discard the IP multicast options.
 1941  */
 1942 void
 1943 ip_freemoptions(struct ip_moptions *imo)
 1944 {
 1945         int i;
 1946 
 1947         if (imo != NULL) {
 1948                 for (i = 0; i < imo->imo_num_memberships; ++i)
 1949                         in_delmulti(imo->imo_membership[i]);
 1950                 free(imo, M_IPMOPTS);
 1951         }
 1952 }
 1953 
 1954 /*
 1955  * Routine called from ip_output() to loop back a copy of an IP multicast
 1956  * packet to the input queue of a specified interface.  Note that this
 1957  * calls the output routine of the loopback "driver", but with an interface
 1958  * pointer that might NOT be lo0ifp -- easier than replicating that code here.
 1959  */
 1960 static void
 1961 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst)
 1962 {
 1963         struct ip *ip;
 1964         struct mbuf *copym;
 1965 
 1966         copym = m_copy(m, 0, M_COPYALL);
 1967         if (copym != NULL
 1968          && (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip)))
 1969                 copym = m_pullup(copym, sizeof(struct ip));
 1970         if (copym != NULL) {
 1971                 /*
 1972                  * We don't bother to fragment if the IP length is greater
 1973                  * than the interface's MTU.  Can this possibly matter?
 1974                  */
 1975                 ip = mtod(copym, struct ip *);
 1976 
 1977                 if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
 1978                         in_delayed_cksum(copym);
 1979                         copym->m_pkthdr.csum_flags &=
 1980                             ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
 1981                 }
 1982 
 1983                 ip->ip_sum = 0;
 1984                 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
 1985                 (void) looutput(ifp, copym, sintosa(dst), NULL);
 1986         }
 1987 }

Cache object: 296faa85805041344b435412d00a2220


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.