The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_output.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: ip_output.c,v 1.149.2.5 2007/03/31 14:15:43 bouyer Exp $       */
    2 
    3 /*
    4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the project nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  */
   31 
   32 /*-
   33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
   34  * All rights reserved.
   35  *
   36  * This code is derived from software contributed to The NetBSD Foundation
   37  * by Public Access Networks Corporation ("Panix").  It was developed under
   38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
   39  *
   40  * Redistribution and use in source and binary forms, with or without
   41  * modification, are permitted provided that the following conditions
   42  * are met:
   43  * 1. Redistributions of source code must retain the above copyright
   44  *    notice, this list of conditions and the following disclaimer.
   45  * 2. Redistributions in binary form must reproduce the above copyright
   46  *    notice, this list of conditions and the following disclaimer in the
   47  *    documentation and/or other materials provided with the distribution.
   48  * 3. All advertising materials mentioning features or use of this software
   49  *    must display the following acknowledgement:
   50  *      This product includes software developed by the NetBSD
   51  *      Foundation, Inc. and its contributors.
   52  * 4. Neither the name of The NetBSD Foundation nor the names of its
   53  *    contributors may be used to endorse or promote products derived
   54  *    from this software without specific prior written permission.
   55  *
   56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   66  * POSSIBILITY OF SUCH DAMAGE.
   67  */
   68 
   69 /*
   70  * Copyright (c) 1982, 1986, 1988, 1990, 1993
   71  *      The Regents of the University of California.  All rights reserved.
   72  *
   73  * Redistribution and use in source and binary forms, with or without
   74  * modification, are permitted provided that the following conditions
   75  * are met:
   76  * 1. Redistributions of source code must retain the above copyright
   77  *    notice, this list of conditions and the following disclaimer.
   78  * 2. Redistributions in binary form must reproduce the above copyright
   79  *    notice, this list of conditions and the following disclaimer in the
   80  *    documentation and/or other materials provided with the distribution.
   81  * 3. Neither the name of the University nor the names of its contributors
   82  *    may be used to endorse or promote products derived from this software
   83  *    without specific prior written permission.
   84  *
   85  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   86  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   87  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   88  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   89  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   90  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   91  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   92  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   93  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   94  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   95  * SUCH DAMAGE.
   96  *
   97  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
   98  */
   99 
  100 #include <sys/cdefs.h>
  101 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.149.2.5 2007/03/31 14:15:43 bouyer Exp $");
  102 
  103 #include "opt_pfil_hooks.h"
  104 #include "opt_inet.h"
  105 #include "opt_ipsec.h"
  106 #include "opt_mrouting.h"
  107 
  108 #include <sys/param.h>
  109 #include <sys/malloc.h>
  110 #include <sys/mbuf.h>
  111 #include <sys/errno.h>
  112 #include <sys/protosw.h>
  113 #include <sys/socket.h>
  114 #include <sys/socketvar.h>
  115 #ifdef FAST_IPSEC
  116 #include <sys/domain.h>
  117 #endif
  118 #include <sys/systm.h>
  119 #include <sys/proc.h>
  120 
  121 #include <net/if.h>
  122 #include <net/route.h>
  123 #include <net/pfil.h>
  124 
  125 #include <netinet/in.h>
  126 #include <netinet/in_systm.h>
  127 #include <netinet/ip.h>
  128 #include <netinet/in_pcb.h>
  129 #include <netinet/in_var.h>
  130 #include <netinet/ip_var.h>
  131 
  132 #ifdef MROUTING
  133 #include <netinet/ip_mroute.h>
  134 #endif
  135 
  136 #include <machine/stdarg.h>
  137 
  138 #ifdef IPSEC
  139 #include <netinet6/ipsec.h>
  140 #include <netkey/key.h>
  141 #include <netkey/key_debug.h>
  142 #ifdef IPSEC_NAT_T
  143 #include <netinet/udp.h>
  144 #endif
  145 #endif /*IPSEC*/
  146 
  147 #ifdef FAST_IPSEC
  148 #include <netipsec/ipsec.h>
  149 #include <netipsec/key.h>
  150 #include <netipsec/xform.h>
  151 #endif  /* FAST_IPSEC*/
  152 
  153 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
  154 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
  155 static void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *);
  156 static int ip_getoptval(struct mbuf *, u_int8_t *, u_int);
  157 
  158 #ifdef PFIL_HOOKS
  159 extern struct pfil_head inet_pfil_hook;                 /* XXX */
  160 #endif
  161 
  162 int     udp_do_loopback_cksum = 0;
  163 int     tcp_do_loopback_cksum = 0;
  164 int     ip_do_loopback_cksum = 0;
  165 
  166 #define IN_NEED_CHECKSUM(ifp, csum_flags) \
  167         (__predict_true(((ifp)->if_flags & IFF_LOOPBACK) == 0 || \
  168         (((csum_flags) & M_CSUM_UDPv4) != 0 && udp_do_loopback_cksum) || \
  169         (((csum_flags) & M_CSUM_TCPv4) != 0 && tcp_do_loopback_cksum) || \
  170         (((csum_flags) & M_CSUM_IPv4) != 0 && ip_do_loopback_cksum)))
  171 
  172 /*
  173  * IP output.  The packet in mbuf chain m contains a skeletal IP
  174  * header (with len, off, ttl, proto, tos, src, dst).
  175  * The mbuf chain containing the packet will be freed.
  176  * The mbuf opt, if present, will not be freed.
  177  */
  178 int
  179 ip_output(struct mbuf *m0, ...)
  180 {
  181         struct ip *ip;
  182         struct ifnet *ifp;
  183         struct mbuf *m = m0;
  184         int hlen = sizeof (struct ip);
  185         int len, error = 0;
  186         struct route iproute;
  187         struct sockaddr_in *dst;
  188         struct in_ifaddr *ia;
  189         struct mbuf *opt;
  190         struct route *ro;
  191         int flags, sw_csum;
  192         int *mtu_p;
  193         u_long mtu;
  194         struct ip_moptions *imo;
  195         struct socket *so;
  196         va_list ap;
  197 #ifdef IPSEC
  198         struct secpolicy *sp = NULL;
  199 #ifdef IPSEC_NAT_T
  200         int natt_frag = 0;
  201 #endif
  202 #endif /*IPSEC*/
  203 #ifdef FAST_IPSEC
  204         struct inpcb *inp;
  205         struct m_tag *mtag;
  206         struct secpolicy *sp = NULL;
  207         struct tdb_ident *tdbi;
  208         int s;
  209 #endif
  210         u_int16_t ip_len;
  211 
  212         len = 0;
  213         va_start(ap, m0);
  214         opt = va_arg(ap, struct mbuf *);
  215         ro = va_arg(ap, struct route *);
  216         flags = va_arg(ap, int);
  217         imo = va_arg(ap, struct ip_moptions *);
  218         so = va_arg(ap, struct socket *);
  219         if (flags & IP_RETURNMTU)
  220                 mtu_p = va_arg(ap, int *);
  221         else
  222                 mtu_p = NULL;
  223         va_end(ap);
  224 
  225         MCLAIM(m, &ip_tx_mowner);
  226 #ifdef FAST_IPSEC
  227         if (so != NULL && so->so_proto->pr_domain->dom_family == AF_INET)
  228                 inp = (struct inpcb *)so->so_pcb;
  229         else
  230                 inp = NULL;
  231 #endif /* FAST_IPSEC */
  232 
  233 #ifdef  DIAGNOSTIC
  234         if ((m->m_flags & M_PKTHDR) == 0)
  235                 panic("ip_output no HDR");
  236 #endif
  237         if (opt) {
  238                 m = ip_insertoptions(m, opt, &len);
  239                 if (len >= sizeof(struct ip))
  240                         hlen = len;
  241         }
  242         ip = mtod(m, struct ip *);
  243         /*
  244          * Fill in IP header.
  245          */
  246         if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
  247                 ip->ip_v = IPVERSION;
  248                 ip->ip_off = htons(0);
  249                 if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
  250                         ip->ip_id = ip_newid();
  251                 } else {
  252 
  253                         /*
  254                          * TSO capable interfaces (typically?) increment
  255                          * ip_id for each segment.
  256                          * "allocate" enough ids here to increase the chance
  257                          * for them to be unique.
  258                          *
  259                          * note that the following calculation is not
  260                          * needed to be precise.  wasting some ip_id is fine.
  261                          */
  262 
  263                         unsigned int segsz = m->m_pkthdr.segsz;
  264                         unsigned int datasz = ntohs(ip->ip_len) - hlen;
  265                         unsigned int num = howmany(datasz, segsz);
  266 
  267                         ip->ip_id = ip_newid_range(num);
  268                 }
  269                 ip->ip_hl = hlen >> 2;
  270                 ipstat.ips_localout++;
  271         } else {
  272                 hlen = ip->ip_hl << 2;
  273         }
  274         /*
  275          * Route packet.
  276          */
  277         bzero(&iproute, sizeof(iproute));
  278         if (ro == NULL)
  279                 ro = &iproute;
  280         dst = satosin(&ro->ro_dst);
  281         /*
  282          * If there is a cached route,
  283          * check that it is to the same destination
  284          * and is still up.  If not, free it and try again.
  285          * The address family should also be checked in case of sharing the
  286          * cache with IPv6.
  287          */
  288         if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
  289             dst->sin_family != AF_INET ||
  290             !in_hosteq(dst->sin_addr, ip->ip_dst))) {
  291                 RTFREE(ro->ro_rt);
  292                 ro->ro_rt = (struct rtentry *)0;
  293         }
  294         if (ro->ro_rt == 0) {
  295                 bzero(dst, sizeof(*dst));
  296                 dst->sin_family = AF_INET;
  297                 dst->sin_len = sizeof(*dst);
  298                 dst->sin_addr = ip->ip_dst;
  299         }
  300         /*
  301          * If routing to interface only,
  302          * short circuit routing lookup.
  303          */
  304         if (flags & IP_ROUTETOIF) {
  305                 if ((ia = ifatoia(ifa_ifwithladdr(sintosa(dst)))) == 0) {
  306                         ipstat.ips_noroute++;
  307                         error = ENETUNREACH;
  308                         goto bad;
  309                 }
  310                 ifp = ia->ia_ifp;
  311                 mtu = ifp->if_mtu;
  312                 ip->ip_ttl = 1;
  313         } else if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
  314             ip->ip_dst.s_addr == INADDR_BROADCAST) &&
  315             imo != NULL && imo->imo_multicast_ifp != NULL) {
  316                 ifp = imo->imo_multicast_ifp;
  317                 mtu = ifp->if_mtu;
  318                 IFP_TO_IA(ifp, ia);
  319         } else {
  320                 if (ro->ro_rt == 0)
  321                         rtalloc(ro);
  322                 if (ro->ro_rt == 0) {
  323                         ipstat.ips_noroute++;
  324                         error = EHOSTUNREACH;
  325                         goto bad;
  326                 }
  327                 ia = ifatoia(ro->ro_rt->rt_ifa);
  328                 ifp = ro->ro_rt->rt_ifp;
  329                 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
  330                         mtu = ifp->if_mtu;
  331                 ro->ro_rt->rt_use++;
  332                 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
  333                         dst = satosin(ro->ro_rt->rt_gateway);
  334         }
  335         if (IN_MULTICAST(ip->ip_dst.s_addr) ||
  336             (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
  337                 struct in_multi *inm;
  338 
  339                 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
  340                         M_BCAST : M_MCAST;
  341                 /*
  342                  * IP destination address is multicast.  Make sure "dst"
  343                  * still points to the address in "ro".  (It may have been
  344                  * changed to point to a gateway address, above.)
  345                  */
  346                 dst = satosin(&ro->ro_dst);
  347                 /*
  348                  * See if the caller provided any multicast options
  349                  */
  350                 if (imo != NULL)
  351                         ip->ip_ttl = imo->imo_multicast_ttl;
  352                 else
  353                         ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
  354 
  355                 /*
  356                  * if we don't know the outgoing ifp yet, we can't generate
  357                  * output
  358                  */
  359                 if (!ifp) {
  360                         ipstat.ips_noroute++;
  361                         error = ENETUNREACH;
  362                         goto bad;
  363                 }
  364 
  365                 /*
  366                  * If the packet is multicast or broadcast, confirm that
  367                  * the outgoing interface can transmit it.
  368                  */
  369                 if (((m->m_flags & M_MCAST) &&
  370                      (ifp->if_flags & IFF_MULTICAST) == 0) ||
  371                     ((m->m_flags & M_BCAST) &&
  372                      (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0))  {
  373                         ipstat.ips_noroute++;
  374                         error = ENETUNREACH;
  375                         goto bad;
  376                 }
  377                 /*
  378                  * If source address not specified yet, use an address
  379                  * of outgoing interface.
  380                  */
  381                 if (in_nullhost(ip->ip_src)) {
  382                         struct in_ifaddr *ia;
  383 
  384                         IFP_TO_IA(ifp, ia);
  385                         if (!ia) {
  386                                 error = EADDRNOTAVAIL;
  387                                 goto bad;
  388                         }
  389                         ip->ip_src = ia->ia_addr.sin_addr;
  390                 }
  391 
  392                 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
  393                 if (inm != NULL &&
  394                    (imo == NULL || imo->imo_multicast_loop)) {
  395                         /*
  396                          * If we belong to the destination multicast group
  397                          * on the outgoing interface, and the caller did not
  398                          * forbid loopback, loop back a copy.
  399                          */
  400                         ip_mloopback(ifp, m, dst);
  401                 }
  402 #ifdef MROUTING
  403                 else {
  404                         /*
  405                          * If we are acting as a multicast router, perform
  406                          * multicast forwarding as if the packet had just
  407                          * arrived on the interface to which we are about
  408                          * to send.  The multicast forwarding function
  409                          * recursively calls this function, using the
  410                          * IP_FORWARDING flag to prevent infinite recursion.
  411                          *
  412                          * Multicasts that are looped back by ip_mloopback(),
  413                          * above, will be forwarded by the ip_input() routine,
  414                          * if necessary.
  415                          */
  416                         extern struct socket *ip_mrouter;
  417 
  418                         if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
  419                                 if (ip_mforward(m, ifp) != 0) {
  420                                         m_freem(m);
  421                                         goto done;
  422                                 }
  423                         }
  424                 }
  425 #endif
  426                 /*
  427                  * Multicasts with a time-to-live of zero may be looped-
  428                  * back, above, but must not be transmitted on a network.
  429                  * Also, multicasts addressed to the loopback interface
  430                  * are not sent -- the above call to ip_mloopback() will
  431                  * loop back a copy if this host actually belongs to the
  432                  * destination group on the loopback interface.
  433                  */
  434                 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
  435                         m_freem(m);
  436                         goto done;
  437                 }
  438 
  439                 goto sendit;
  440         }
  441 #ifndef notdef
  442         /*
  443          * If source address not specified yet, use address
  444          * of outgoing interface.
  445          */
  446         if (in_nullhost(ip->ip_src))
  447                 ip->ip_src = ia->ia_addr.sin_addr;
  448 #endif
  449 
  450         /*
  451          * packets with Class-D address as source are not valid per
  452          * RFC 1112
  453          */
  454         if (IN_MULTICAST(ip->ip_src.s_addr)) {
  455                 ipstat.ips_odropped++;
  456                 error = EADDRNOTAVAIL;
  457                 goto bad;
  458         }
  459 
  460         /*
  461          * Look for broadcast address and
  462          * and verify user is allowed to send
  463          * such a packet.
  464          */
  465         if (in_broadcast(dst->sin_addr, ifp)) {
  466                 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
  467                         error = EADDRNOTAVAIL;
  468                         goto bad;
  469                 }
  470                 if ((flags & IP_ALLOWBROADCAST) == 0) {
  471                         error = EACCES;
  472                         goto bad;
  473                 }
  474                 /* don't allow broadcast messages to be fragmented */
  475                 if (ntohs(ip->ip_len) > ifp->if_mtu) {
  476                         error = EMSGSIZE;
  477                         goto bad;
  478                 }
  479                 m->m_flags |= M_BCAST;
  480         } else
  481                 m->m_flags &= ~M_BCAST;
  482 
  483 sendit:
  484         /*
  485          * If we're doing Path MTU Discovery, we need to set DF unless
  486          * the route's MTU is locked.
  487          */
  488         if ((flags & IP_MTUDISC) != 0 && ro->ro_rt != NULL &&
  489             (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
  490                 ip->ip_off |= htons(IP_DF);
  491 
  492         /* Remember the current ip_len */
  493         ip_len = ntohs(ip->ip_len);
  494 
  495 #ifdef IPSEC
  496         /* get SP for this packet */
  497         if (so == NULL)
  498                 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
  499                     flags, &error);
  500         else {
  501                 if (IPSEC_PCB_SKIP_IPSEC(sotoinpcb_hdr(so)->inph_sp,
  502                                          IPSEC_DIR_OUTBOUND))
  503                         goto skip_ipsec;
  504                 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
  505         }
  506 
  507         if (sp == NULL) {
  508                 ipsecstat.out_inval++;
  509                 goto bad;
  510         }
  511 
  512         error = 0;
  513 
  514         /* check policy */
  515         switch (sp->policy) {
  516         case IPSEC_POLICY_DISCARD:
  517                 /*
  518                  * This packet is just discarded.
  519                  */
  520                 ipsecstat.out_polvio++;
  521                 goto bad;
  522 
  523         case IPSEC_POLICY_BYPASS:
  524         case IPSEC_POLICY_NONE:
  525                 /* no need to do IPsec. */
  526                 goto skip_ipsec;
  527 
  528         case IPSEC_POLICY_IPSEC:
  529                 if (sp->req == NULL) {
  530                         /* XXX should be panic ? */
  531                         printf("ip_output: No IPsec request specified.\n");
  532                         error = EINVAL;
  533                         goto bad;
  534                 }
  535                 break;
  536 
  537         case IPSEC_POLICY_ENTRUST:
  538         default:
  539                 printf("ip_output: Invalid policy found. %d\n", sp->policy);
  540         }
  541 
  542 #ifdef IPSEC_NAT_T
  543         /*
  544          * NAT-T ESP fragmentation: don't do IPSec processing now,
  545          * we'll do it on each fragmented packet.
  546          */
  547         if (sp->req->sav &&
  548             ((sp->req->sav->natt_type & UDP_ENCAP_ESPINUDP) ||
  549              (sp->req->sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE))) {
  550                 if (ntohs(ip->ip_len) > sp->req->sav->esp_frag) {
  551                         natt_frag = 1;
  552                         mtu = sp->req->sav->esp_frag;
  553                         goto skip_ipsec;
  554                 }
  555         }
  556 #endif /* IPSEC_NAT_T */
  557 
  558         /*
  559          * ipsec4_output() expects ip_len and ip_off in network
  560          * order.  They have been set to network order above.
  561          */
  562 
  563     {
  564         struct ipsec_output_state state;
  565         bzero(&state, sizeof(state));
  566         state.m = m;
  567         if (flags & IP_ROUTETOIF) {
  568                 state.ro = &iproute;
  569                 bzero(&iproute, sizeof(iproute));
  570         } else
  571                 state.ro = ro;
  572         state.dst = (struct sockaddr *)dst;
  573 
  574         /*
  575          * We can't defer the checksum of payload data if
  576          * we're about to encrypt/authenticate it.
  577          *
  578          * XXX When we support crypto offloading functions of
  579          * XXX network interfaces, we need to reconsider this,
  580          * XXX since it's likely that they'll support checksumming,
  581          * XXX as well.
  582          */
  583         if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  584                 in_delayed_cksum(m);
  585                 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
  586         }
  587 
  588         error = ipsec4_output(&state, sp, flags);
  589 
  590         m = state.m;
  591         if (flags & IP_ROUTETOIF) {
  592                 /*
  593                  * if we have tunnel mode SA, we may need to ignore
  594                  * IP_ROUTETOIF.
  595                  */
  596                 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
  597                         flags &= ~IP_ROUTETOIF;
  598                         ro = state.ro;
  599                 }
  600         } else
  601                 ro = state.ro;
  602         dst = (struct sockaddr_in *)state.dst;
  603         if (error) {
  604                 /* mbuf is already reclaimed in ipsec4_output. */
  605                 m0 = NULL;
  606                 switch (error) {
  607                 case EHOSTUNREACH:
  608                 case ENETUNREACH:
  609                 case EMSGSIZE:
  610                 case ENOBUFS:
  611                 case ENOMEM:
  612                         break;
  613                 default:
  614                         printf("ip4_output (ipsec): error code %d\n", error);
  615                         /*fall through*/
  616                 case ENOENT:
  617                         /* don't show these error codes to the user */
  618                         error = 0;
  619                         break;
  620                 }
  621                 goto bad;
  622         }
  623 
  624         /* be sure to update variables that are affected by ipsec4_output() */
  625         ip = mtod(m, struct ip *);
  626         hlen = ip->ip_hl << 2;
  627         ip_len = ntohs(ip->ip_len);
  628 
  629         if (ro->ro_rt == NULL) {
  630                 if ((flags & IP_ROUTETOIF) == 0) {
  631                         printf("ip_output: "
  632                                 "can't update route after IPsec processing\n");
  633                         error = EHOSTUNREACH;   /*XXX*/
  634                         goto bad;
  635                 }
  636         } else {
  637                 /* nobody uses ia beyond here */
  638                 if (state.encap) {
  639                         ifp = ro->ro_rt->rt_ifp;
  640                         if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
  641                                 mtu = ifp->if_mtu;
  642                 }
  643         }
  644     }
  645 skip_ipsec:
  646 #endif /*IPSEC*/
  647 #ifdef FAST_IPSEC
  648         /*
  649          * Check the security policy (SP) for the packet and, if
  650          * required, do IPsec-related processing.  There are two
  651          * cases here; the first time a packet is sent through
  652          * it will be untagged and handled by ipsec4_checkpolicy.
  653          * If the packet is resubmitted to ip_output (e.g. after
  654          * AH, ESP, etc. processing), there will be a tag to bypass
  655          * the lookup and related policy checking.
  656          */
  657         mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
  658         s = splsoftnet();
  659         if (mtag != NULL) {
  660                 tdbi = (struct tdb_ident *)(mtag + 1);
  661                 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
  662                 if (sp == NULL)
  663                         error = -EINVAL;        /* force silent drop */
  664                 m_tag_delete(m, mtag);
  665         } else {
  666                 if (inp != NULL &&
  667                     IPSEC_PCB_SKIP_IPSEC(inp->inp_sp, IPSEC_DIR_OUTBOUND))
  668                         goto spd_done;
  669                 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
  670                                         &error, inp);
  671         }
  672         /*
  673          * There are four return cases:
  674          *    sp != NULL                    apply IPsec policy
  675          *    sp == NULL, error == 0        no IPsec handling needed
  676          *    sp == NULL, error == -EINVAL  discard packet w/o error
  677          *    sp == NULL, error != 0        discard packet, report error
  678          */
  679         if (sp != NULL) {
  680                 /* Loop detection, check if ipsec processing already done */
  681                 IPSEC_ASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
  682                 for (mtag = m_tag_first(m); mtag != NULL;
  683                      mtag = m_tag_next(m, mtag)) {
  684 #ifdef MTAG_ABI_COMPAT
  685                         if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
  686                                 continue;
  687 #endif
  688                         if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
  689                             mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
  690                                 continue;
  691                         /*
  692                          * Check if policy has an SA associated with it.
  693                          * This can happen when an SP has yet to acquire
  694                          * an SA; e.g. on first reference.  If it occurs,
  695                          * then we let ipsec4_process_packet do its thing.
  696                          */
  697                         if (sp->req->sav == NULL)
  698                                 break;
  699                         tdbi = (struct tdb_ident *)(mtag + 1);
  700                         if (tdbi->spi == sp->req->sav->spi &&
  701                             tdbi->proto == sp->req->sav->sah->saidx.proto &&
  702                             bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
  703                                  sizeof (union sockaddr_union)) == 0) {
  704                                 /*
  705                                  * No IPsec processing is needed, free
  706                                  * reference to SP.
  707                                  *
  708                                  * NB: null pointer to avoid free at
  709                                  *     done: below.
  710                                  */
  711                                 KEY_FREESP(&sp), sp = NULL;
  712                                 splx(s);
  713                                 goto spd_done;
  714                         }
  715                 }
  716 
  717                 /*
  718                  * Do delayed checksums now because we send before
  719                  * this is done in the normal processing path.
  720                  */
  721                 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  722                         in_delayed_cksum(m);
  723                         m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
  724                 }
  725 
  726 #ifdef __FreeBSD__
  727                 ip->ip_len = htons(ip->ip_len);
  728                 ip->ip_off = htons(ip->ip_off);
  729 #endif
  730 
  731                 /* NB: callee frees mbuf */
  732                 error = ipsec4_process_packet(m, sp->req, flags, 0);
  733                 /*
  734                  * Preserve KAME behaviour: ENOENT can be returned
  735                  * when an SA acquire is in progress.  Don't propagate
  736                  * this to user-level; it confuses applications.
  737                  *
  738                  * XXX this will go away when the SADB is redone.
  739                  */
  740                 if (error == ENOENT)
  741                         error = 0;
  742                 splx(s);
  743                 goto done;
  744         } else {
  745                 splx(s);
  746 
  747                 if (error != 0) {
  748                         /*
  749                          * Hack: -EINVAL is used to signal that a packet
  750                          * should be silently discarded.  This is typically
  751                          * because we asked key management for an SA and
  752                          * it was delayed (e.g. kicked up to IKE).
  753                          */
  754                         if (error == -EINVAL)
  755                                 error = 0;
  756                         goto bad;
  757                 } else {
  758                         /* No IPsec processing for this packet. */
  759                 }
  760 #ifdef notyet
  761                 /*
  762                  * If deferred crypto processing is needed, check that
  763                  * the interface supports it.
  764                  */
  765                 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
  766                 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
  767                         /* notify IPsec to do its own crypto */
  768                         ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
  769                         error = EHOSTUNREACH;
  770                         goto bad;
  771                 }
  772 #endif
  773         }
  774 spd_done:
  775 #endif /* FAST_IPSEC */
  776 
  777 #ifdef PFIL_HOOKS
  778         /*
  779          * Run through list of hooks for output packets.
  780          */
  781         if ((error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
  782                 goto done;
  783         if (m == NULL)
  784                 goto done;
  785 
  786         ip = mtod(m, struct ip *);
  787         hlen = ip->ip_hl << 2;
  788         ip_len = ntohs(ip->ip_len);
  789 #endif /* PFIL_HOOKS */
  790 
  791         m->m_pkthdr.csum_data |= hlen << 16;
  792 
  793 #if IFA_STATS
  794         /*
  795          * search for the source address structure to
  796          * maintain output statistics.
  797          */
  798         INADDR_TO_IA(ip->ip_src, ia);
  799 #endif
  800 
  801         /* Maybe skip checksums on loopback interfaces. */
  802         if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) {
  803                 m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
  804         }
  805         sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
  806         /*
  807          * If small enough for mtu of path, or if using TCP segmentation
  808          * offload, can just send directly.
  809          */
  810         if (ip_len <= mtu ||
  811             (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) != 0) {
  812 #if IFA_STATS
  813                 if (ia)
  814                         ia->ia_ifa.ifa_data.ifad_outbytes += ip_len;
  815 #endif
  816                 /*
  817                  * Always initialize the sum to 0!  Some HW assisted
  818                  * checksumming requires this.
  819                  */
  820                 ip->ip_sum = 0;
  821 
  822                 if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
  823                         /*
  824                          * Perform any checksums that the hardware can't do
  825                          * for us.
  826                          *
  827                          * XXX Does any hardware require the {th,uh}_sum
  828                          * XXX fields to be 0?
  829                          */
  830                         if (sw_csum & M_CSUM_IPv4) {
  831                                 KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4));
  832                                 ip->ip_sum = in_cksum(m, hlen);
  833                                 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
  834                         }
  835                         if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  836                                 if (IN_NEED_CHECKSUM(ifp,
  837                                     sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
  838                                         in_delayed_cksum(m);
  839                                 }
  840                                 m->m_pkthdr.csum_flags &=
  841                                     ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
  842                         }
  843                 }
  844 
  845 #ifdef IPSEC
  846                 /* clean ipsec history once it goes out of the node */
  847                 ipsec_delaux(m);
  848 #endif
  849                 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
  850                 goto done;
  851         }
  852 
  853         /*
  854          * We can't use HW checksumming if we're about to
  855          * to fragment the packet.
  856          *
  857          * XXX Some hardware can do this.
  858          */
  859         if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
  860                 if (IN_NEED_CHECKSUM(ifp,
  861                     m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
  862                         in_delayed_cksum(m);
  863                 }
  864                 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
  865         }
  866 
  867         /*
  868          * Too large for interface; fragment if possible.
  869          * Must be able to put at least 8 bytes per fragment.
  870          */
  871         if (ntohs(ip->ip_off) & IP_DF) {
  872                 if (flags & IP_RETURNMTU)
  873                         *mtu_p = mtu;
  874                 error = EMSGSIZE;
  875                 ipstat.ips_cantfrag++;
  876                 goto bad;
  877         }
  878 
  879         error = ip_fragment(m, ifp, mtu);
  880         if (error) {
  881                 m = NULL;
  882                 goto bad;
  883         }
  884 
  885         for (; m; m = m0) {
  886                 m0 = m->m_nextpkt;
  887                 m->m_nextpkt = 0;
  888                 if (error == 0) {
  889 #if IFA_STATS
  890                         if (ia)
  891                                 ia->ia_ifa.ifa_data.ifad_outbytes +=
  892                                     ntohs(ip->ip_len);
  893 #endif
  894 #ifdef IPSEC
  895                         /* clean ipsec history once it goes out of the node */
  896                         ipsec_delaux(m);
  897 
  898 #ifdef IPSEC_NAT_T
  899                         /*
  900                          * If we get there, the packet has not been handeld by
  901                          * IPSec whereas it should have. Now that it has been
  902                          * fragmented, re-inject it in ip_output so that IPsec
  903                          * processing can occur.
  904                          */
  905                         if (natt_frag) {
  906                                 error = ip_output(m, opt,
  907                                     ro, flags, imo, so, mtu_p);
  908                         } else
  909 #endif /* IPSEC_NAT_T */
  910 #endif /* IPSEC */
  911                         {
  912                                 KASSERT((m->m_pkthdr.csum_flags &
  913                                     (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0);
  914                                 error = (*ifp->if_output)(ifp, m, sintosa(dst),
  915                                     ro->ro_rt);
  916                         }
  917                 } else
  918                         m_freem(m);
  919         }
  920 
  921         if (error == 0)
  922                 ipstat.ips_fragmented++;
  923 done:
  924         if (iproute.ro_rt != NULL)
  925                 RTFREE(iproute.ro_rt);
  926 
  927 #ifdef IPSEC
  928         if (sp != NULL) {
  929                 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
  930                         printf("DP ip_output call free SP:%p\n", sp));
  931                 key_freesp(sp);
  932         }
  933 #endif /* IPSEC */
  934 #ifdef FAST_IPSEC
  935         if (sp != NULL)
  936                 KEY_FREESP(&sp);
  937 #endif /* FAST_IPSEC */
  938 
  939         return (error);
  940 bad:
  941         m_freem(m);
  942         goto done;
  943 }
  944 
  945 int
  946 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu)
  947 {
  948         struct ip *ip, *mhip;
  949         struct mbuf *m0;
  950         int len, hlen, off;
  951         int mhlen, firstlen;
  952         struct mbuf **mnext;
  953         int sw_csum = m->m_pkthdr.csum_flags;
  954         int fragments = 0;
  955         int s;
  956         int error = 0;
  957 
  958         ip = mtod(m, struct ip *);
  959         hlen = ip->ip_hl << 2;
  960         if (ifp != NULL)
  961                 sw_csum &= ~ifp->if_csum_flags_tx;
  962 
  963         len = (mtu - hlen) &~ 7;
  964         if (len < 8) {
  965                 m_freem(m);
  966                 return (EMSGSIZE);
  967         }
  968 
  969         firstlen = len;
  970         mnext = &m->m_nextpkt;
  971 
  972         /*
  973          * Loop through length of segment after first fragment,
  974          * make new header and copy data of each part and link onto chain.
  975          */
  976         m0 = m;
  977         mhlen = sizeof (struct ip);
  978         for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
  979                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
  980                 if (m == 0) {
  981                         error = ENOBUFS;
  982                         ipstat.ips_odropped++;
  983                         goto sendorfree;
  984                 }
  985                 MCLAIM(m, m0->m_owner);
  986                 *mnext = m;
  987                 mnext = &m->m_nextpkt;
  988                 m->m_data += max_linkhdr;
  989                 mhip = mtod(m, struct ip *);
  990                 *mhip = *ip;
  991                 /* we must inherit MCAST and BCAST flags */
  992                 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
  993                 if (hlen > sizeof (struct ip)) {
  994                         mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
  995                         mhip->ip_hl = mhlen >> 2;
  996                 }
  997                 m->m_len = mhlen;
  998                 mhip->ip_off = ((off - hlen) >> 3) +
  999                     (ntohs(ip->ip_off) & ~IP_MF);
 1000                 if (ip->ip_off & htons(IP_MF))
 1001                         mhip->ip_off |= IP_MF;
 1002                 if (off + len >= ntohs(ip->ip_len))
 1003                         len = ntohs(ip->ip_len) - off;
 1004                 else
 1005                         mhip->ip_off |= IP_MF;
 1006                 HTONS(mhip->ip_off);
 1007                 mhip->ip_len = htons((u_int16_t)(len + mhlen));
 1008                 m->m_next = m_copy(m0, off, len);
 1009                 if (m->m_next == 0) {
 1010                         error = ENOBUFS;        /* ??? */
 1011                         ipstat.ips_odropped++;
 1012                         goto sendorfree;
 1013                 }
 1014                 m->m_pkthdr.len = mhlen + len;
 1015                 m->m_pkthdr.rcvif = (struct ifnet *)0;
 1016                 mhip->ip_sum = 0;
 1017                 if (sw_csum & M_CSUM_IPv4) {
 1018                         mhip->ip_sum = in_cksum(m, mhlen);
 1019                         KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0);
 1020                 } else {
 1021                         m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
 1022                         m->m_pkthdr.csum_data |= mhlen << 16;
 1023                 }
 1024                 ipstat.ips_ofragments++;
 1025                 fragments++;
 1026         }
 1027         /*
 1028          * Update first fragment by trimming what's been copied out
 1029          * and updating header, then send each fragment (in order).
 1030          */
 1031         m = m0;
 1032         m_adj(m, hlen + firstlen - ntohs(ip->ip_len));
 1033         m->m_pkthdr.len = hlen + firstlen;
 1034         ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
 1035         ip->ip_off |= htons(IP_MF);
 1036         ip->ip_sum = 0;
 1037         if (sw_csum & M_CSUM_IPv4) {
 1038                 ip->ip_sum = in_cksum(m, hlen);
 1039                 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
 1040         } else {
 1041                 KASSERT(m->m_pkthdr.csum_flags & M_CSUM_IPv4);
 1042                 KASSERT(M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data) >=
 1043                         sizeof(struct ip));
 1044         }
 1045 sendorfree:
 1046         /*
 1047          * If there is no room for all the fragments, don't queue
 1048          * any of them.
 1049          */
 1050         if (ifp != NULL) {
 1051                 s = splnet();
 1052                 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments &&
 1053                     error == 0) {
 1054                         error = ENOBUFS;
 1055                         ipstat.ips_odropped++;
 1056                         IFQ_INC_DROPS(&ifp->if_snd);
 1057                 }
 1058                 splx(s);
 1059         }
 1060         if (error) {
 1061                 for (m = m0; m; m = m0) {
 1062                         m0 = m->m_nextpkt;
 1063                         m->m_nextpkt = NULL;
 1064                         m_freem(m);
 1065                 }
 1066         }
 1067         return (error);
 1068 }
 1069 
 1070 /*
 1071  * Process a delayed payload checksum calculation.
 1072  */
 1073 void
 1074 in_delayed_cksum(struct mbuf *m)
 1075 {
 1076         struct ip *ip;
 1077         u_int16_t csum, offset;
 1078 
 1079         ip = mtod(m, struct ip *);
 1080         offset = ip->ip_hl << 2;
 1081         csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset);
 1082         if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0)
 1083                 csum = 0xffff;
 1084 
 1085         offset += M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data);
 1086 
 1087         if ((offset + sizeof(u_int16_t)) > m->m_len) {
 1088                 /* This happen when ip options were inserted
 1089                 printf("in_delayed_cksum: pullup len %d off %d proto %d\n",
 1090                     m->m_len, offset, ip->ip_p);
 1091                  */
 1092                 m_copyback(m, offset, sizeof(csum), (caddr_t) &csum);
 1093         } else
 1094                 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
 1095 }
 1096 
 1097 /*
 1098  * Determine the maximum length of the options to be inserted;
 1099  * we would far rather allocate too much space rather than too little.
 1100  */
 1101 
 1102 u_int
 1103 ip_optlen(struct inpcb *inp)
 1104 {
 1105         struct mbuf *m = inp->inp_options;
 1106 
 1107         if (m && m->m_len > offsetof(struct ipoption, ipopt_dst))
 1108                 return (m->m_len - offsetof(struct ipoption, ipopt_dst));
 1109         else
 1110                 return 0;
 1111 }
 1112 
 1113 
 1114 /*
 1115  * Insert IP options into preformed packet.
 1116  * Adjust IP destination as required for IP source routing,
 1117  * as indicated by a non-zero in_addr at the start of the options.
 1118  */
 1119 static struct mbuf *
 1120 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
 1121 {
 1122         struct ipoption *p = mtod(opt, struct ipoption *);
 1123         struct mbuf *n;
 1124         struct ip *ip = mtod(m, struct ip *);
 1125         unsigned optlen;
 1126 
 1127         optlen = opt->m_len - sizeof(p->ipopt_dst);
 1128         if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
 1129                 return (m);             /* XXX should fail */
 1130         if (!in_nullhost(p->ipopt_dst))
 1131                 ip->ip_dst = p->ipopt_dst;
 1132         if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) {
 1133                 MGETHDR(n, M_DONTWAIT, MT_HEADER);
 1134                 if (n == 0)
 1135                         return (m);
 1136                 MCLAIM(n, m->m_owner);
 1137                 M_COPY_PKTHDR(n, m);
 1138                 m_tag_delete_chain(m, NULL);
 1139                 m->m_flags &= ~M_PKTHDR;
 1140                 m->m_len -= sizeof(struct ip);
 1141                 m->m_data += sizeof(struct ip);
 1142                 n->m_next = m;
 1143                 m = n;
 1144                 m->m_len = optlen + sizeof(struct ip);
 1145                 m->m_data += max_linkhdr;
 1146                 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
 1147         } else {
 1148                 m->m_data -= optlen;
 1149                 m->m_len += optlen;
 1150                 memmove(mtod(m, caddr_t), ip, sizeof(struct ip));
 1151         }
 1152         m->m_pkthdr.len += optlen;
 1153         ip = mtod(m, struct ip *);
 1154         bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
 1155         *phlen = sizeof(struct ip) + optlen;
 1156         ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
 1157         return (m);
 1158 }
 1159 
 1160 /*
 1161  * Copy options from ip to jp,
 1162  * omitting those not copied during fragmentation.
 1163  */
 1164 int
 1165 ip_optcopy(struct ip *ip, struct ip *jp)
 1166 {
 1167         u_char *cp, *dp;
 1168         int opt, optlen, cnt;
 1169 
 1170         cp = (u_char *)(ip + 1);
 1171         dp = (u_char *)(jp + 1);
 1172         cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 1173         for (; cnt > 0; cnt -= optlen, cp += optlen) {
 1174                 opt = cp[0];
 1175                 if (opt == IPOPT_EOL)
 1176                         break;
 1177                 if (opt == IPOPT_NOP) {
 1178                         /* Preserve for IP mcast tunnel's LSRR alignment. */
 1179                         *dp++ = IPOPT_NOP;
 1180                         optlen = 1;
 1181                         continue;
 1182                 }
 1183 #ifdef DIAGNOSTIC
 1184                 if (cnt < IPOPT_OLEN + sizeof(*cp))
 1185                         panic("malformed IPv4 option passed to ip_optcopy");
 1186 #endif
 1187                 optlen = cp[IPOPT_OLEN];
 1188 #ifdef DIAGNOSTIC
 1189                 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
 1190                         panic("malformed IPv4 option passed to ip_optcopy");
 1191 #endif
 1192                 /* bogus lengths should have been caught by ip_dooptions */
 1193                 if (optlen > cnt)
 1194                         optlen = cnt;
 1195                 if (IPOPT_COPIED(opt)) {
 1196                         bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
 1197                         dp += optlen;
 1198                 }
 1199         }
 1200         for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
 1201                 *dp++ = IPOPT_EOL;
 1202         return (optlen);
 1203 }
 1204 
 1205 /*
 1206  * IP socket option processing.
 1207  */
 1208 int
 1209 ip_ctloutput(int op, struct socket *so, int level, int optname,
 1210     struct mbuf **mp)
 1211 {
 1212         struct inpcb *inp = sotoinpcb(so);
 1213         struct mbuf *m = *mp;
 1214         int optval = 0;
 1215         int error = 0;
 1216 #if defined(IPSEC) || defined(FAST_IPSEC)
 1217         struct proc *p = curproc;       /*XXX*/
 1218 #endif
 1219 
 1220         if (level != IPPROTO_IP) {
 1221                 error = EINVAL;
 1222                 if (op == PRCO_SETOPT && *mp)
 1223                         (void) m_free(*mp);
 1224         } else switch (op) {
 1225 
 1226         case PRCO_SETOPT:
 1227                 switch (optname) {
 1228                 case IP_OPTIONS:
 1229 #ifdef notyet
 1230                 case IP_RETOPTS:
 1231                         return (ip_pcbopts(optname, &inp->inp_options, m));
 1232 #else
 1233                         return (ip_pcbopts(&inp->inp_options, m));
 1234 #endif
 1235 
 1236                 case IP_TOS:
 1237                 case IP_TTL:
 1238                 case IP_RECVOPTS:
 1239                 case IP_RECVRETOPTS:
 1240                 case IP_RECVDSTADDR:
 1241                 case IP_RECVIF:
 1242                         if (m == NULL || m->m_len != sizeof(int))
 1243                                 error = EINVAL;
 1244                         else {
 1245                                 optval = *mtod(m, int *);
 1246                                 switch (optname) {
 1247 
 1248                                 case IP_TOS:
 1249                                         inp->inp_ip.ip_tos = optval;
 1250                                         break;
 1251 
 1252                                 case IP_TTL:
 1253                                         inp->inp_ip.ip_ttl = optval;
 1254                                         break;
 1255 #define OPTSET(bit) \
 1256         if (optval) \
 1257                 inp->inp_flags |= bit; \
 1258         else \
 1259                 inp->inp_flags &= ~bit;
 1260 
 1261                                 case IP_RECVOPTS:
 1262                                         OPTSET(INP_RECVOPTS);
 1263                                         break;
 1264 
 1265                                 case IP_RECVRETOPTS:
 1266                                         OPTSET(INP_RECVRETOPTS);
 1267                                         break;
 1268 
 1269                                 case IP_RECVDSTADDR:
 1270                                         OPTSET(INP_RECVDSTADDR);
 1271                                         break;
 1272 
 1273                                 case IP_RECVIF:
 1274                                         OPTSET(INP_RECVIF);
 1275                                         break;
 1276                                 }
 1277                         }
 1278                         break;
 1279 #undef OPTSET
 1280 
 1281                 case IP_MULTICAST_IF:
 1282                 case IP_MULTICAST_TTL:
 1283                 case IP_MULTICAST_LOOP:
 1284                 case IP_ADD_MEMBERSHIP:
 1285                 case IP_DROP_MEMBERSHIP:
 1286                         error = ip_setmoptions(optname, &inp->inp_moptions, m);
 1287                         break;
 1288 
 1289                 case IP_PORTRANGE:
 1290                         if (m == 0 || m->m_len != sizeof(int))
 1291                                 error = EINVAL;
 1292                         else {
 1293                                 optval = *mtod(m, int *);
 1294 
 1295                                 switch (optval) {
 1296 
 1297                                 case IP_PORTRANGE_DEFAULT:
 1298                                 case IP_PORTRANGE_HIGH:
 1299                                         inp->inp_flags &= ~(INP_LOWPORT);
 1300                                         break;
 1301 
 1302                                 case IP_PORTRANGE_LOW:
 1303                                         inp->inp_flags |= INP_LOWPORT;
 1304                                         break;
 1305 
 1306                                 default:
 1307                                         error = EINVAL;
 1308                                         break;
 1309                                 }
 1310                         }
 1311                         break;
 1312 
 1313 #if defined(IPSEC) || defined(FAST_IPSEC)
 1314                 case IP_IPSEC_POLICY:
 1315                 {
 1316                         caddr_t req = NULL;
 1317                         size_t len = 0;
 1318                         int priv = 0;
 1319 
 1320 #ifdef __NetBSD__
 1321                         if (p == 0 || suser(p->p_ucred, &p->p_acflag))
 1322                                 priv = 0;
 1323                         else
 1324                                 priv = 1;
 1325 #else
 1326                         priv = (in6p->in6p_socket->so_state & SS_PRIV);
 1327 #endif
 1328                         if (m) {
 1329                                 req = mtod(m, caddr_t);
 1330                                 len = m->m_len;
 1331                         }
 1332                         error = ipsec4_set_policy(inp, optname, req, len, priv);
 1333                         break;
 1334                     }
 1335 #endif /*IPSEC*/
 1336 
 1337                 default:
 1338                         error = ENOPROTOOPT;
 1339                         break;
 1340                 }
 1341                 if (m)
 1342                         (void)m_free(m);
 1343                 break;
 1344 
 1345         case PRCO_GETOPT:
 1346                 switch (optname) {
 1347                 case IP_OPTIONS:
 1348                 case IP_RETOPTS:
 1349                         *mp = m = m_get(M_WAIT, MT_SOOPTS);
 1350                         MCLAIM(m, so->so_mowner);
 1351                         if (inp->inp_options) {
 1352                                 m->m_len = inp->inp_options->m_len;
 1353                                 bcopy(mtod(inp->inp_options, caddr_t),
 1354                                     mtod(m, caddr_t), (unsigned)m->m_len);
 1355                         } else
 1356                                 m->m_len = 0;
 1357                         break;
 1358 
 1359                 case IP_TOS:
 1360                 case IP_TTL:
 1361                 case IP_RECVOPTS:
 1362                 case IP_RECVRETOPTS:
 1363                 case IP_RECVDSTADDR:
 1364                 case IP_RECVIF:
 1365                 case IP_ERRORMTU:
 1366                         *mp = m = m_get(M_WAIT, MT_SOOPTS);
 1367                         MCLAIM(m, so->so_mowner);
 1368                         m->m_len = sizeof(int);
 1369                         switch (optname) {
 1370 
 1371                         case IP_TOS:
 1372                                 optval = inp->inp_ip.ip_tos;
 1373                                 break;
 1374 
 1375                         case IP_TTL:
 1376                                 optval = inp->inp_ip.ip_ttl;
 1377                                 break;
 1378 
 1379                         case IP_ERRORMTU:
 1380                                 optval = inp->inp_errormtu;
 1381                                 break;
 1382 
 1383 #define OPTBIT(bit)     (inp->inp_flags & bit ? 1 : 0)
 1384 
 1385                         case IP_RECVOPTS:
 1386                                 optval = OPTBIT(INP_RECVOPTS);
 1387                                 break;
 1388 
 1389                         case IP_RECVRETOPTS:
 1390                                 optval = OPTBIT(INP_RECVRETOPTS);
 1391                                 break;
 1392 
 1393                         case IP_RECVDSTADDR:
 1394                                 optval = OPTBIT(INP_RECVDSTADDR);
 1395                                 break;
 1396 
 1397                         case IP_RECVIF:
 1398                                 optval = OPTBIT(INP_RECVIF);
 1399                                 break;
 1400                         }
 1401                         *mtod(m, int *) = optval;
 1402                         break;
 1403 
 1404 #if 0   /* defined(IPSEC) || defined(FAST_IPSEC) */
 1405                 /* XXX: code broken */
 1406                 case IP_IPSEC_POLICY:
 1407                 {
 1408                         caddr_t req = NULL;
 1409                         size_t len = 0;
 1410 
 1411                         if (m) {
 1412                                 req = mtod(m, caddr_t);
 1413                                 len = m->m_len;
 1414                         }
 1415                         error = ipsec4_get_policy(inp, req, len, mp);
 1416                         break;
 1417                 }
 1418 #endif /*IPSEC*/
 1419 
 1420                 case IP_MULTICAST_IF:
 1421                 case IP_MULTICAST_TTL:
 1422                 case IP_MULTICAST_LOOP:
 1423                 case IP_ADD_MEMBERSHIP:
 1424                 case IP_DROP_MEMBERSHIP:
 1425                         error = ip_getmoptions(optname, inp->inp_moptions, mp);
 1426                         if (*mp)
 1427                                 MCLAIM(*mp, so->so_mowner);
 1428                         break;
 1429 
 1430                 case IP_PORTRANGE:
 1431                         *mp = m = m_get(M_WAIT, MT_SOOPTS);
 1432                         MCLAIM(m, so->so_mowner);
 1433                         m->m_len = sizeof(int);
 1434 
 1435                         if (inp->inp_flags & INP_LOWPORT)
 1436                                 optval = IP_PORTRANGE_LOW;
 1437                         else
 1438                                 optval = IP_PORTRANGE_DEFAULT;
 1439 
 1440                         *mtod(m, int *) = optval;
 1441                         break;
 1442 
 1443                 default:
 1444                         error = ENOPROTOOPT;
 1445                         break;
 1446                 }
 1447                 break;
 1448         }
 1449         return (error);
 1450 }
 1451 
 1452 /*
 1453  * Set up IP options in pcb for insertion in output packets.
 1454  * Store in mbuf with pointer in pcbopt, adding pseudo-option
 1455  * with destination address if source routed.
 1456  */
 1457 int
 1458 #ifdef notyet
 1459 ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m)
 1460 #else
 1461 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m)
 1462 #endif
 1463 {
 1464         int cnt, optlen;
 1465         u_char *cp;
 1466         u_char opt;
 1467 
 1468         /* turn off any old options */
 1469         if (*pcbopt)
 1470                 (void)m_free(*pcbopt);
 1471         *pcbopt = 0;
 1472         if (m == (struct mbuf *)0 || m->m_len == 0) {
 1473                 /*
 1474                  * Only turning off any previous options.
 1475                  */
 1476                 if (m)
 1477                         (void)m_free(m);
 1478                 return (0);
 1479         }
 1480 
 1481 #ifndef __vax__
 1482         if (m->m_len % sizeof(int32_t))
 1483                 goto bad;
 1484 #endif
 1485         /*
 1486          * IP first-hop destination address will be stored before
 1487          * actual options; move other options back
 1488          * and clear it when none present.
 1489          */
 1490         if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
 1491                 goto bad;
 1492         cnt = m->m_len;
 1493         m->m_len += sizeof(struct in_addr);
 1494         cp = mtod(m, u_char *) + sizeof(struct in_addr);
 1495         memmove(cp, mtod(m, caddr_t), (unsigned)cnt);
 1496         bzero(mtod(m, caddr_t), sizeof(struct in_addr));
 1497 
 1498         for (; cnt > 0; cnt -= optlen, cp += optlen) {
 1499                 opt = cp[IPOPT_OPTVAL];
 1500                 if (opt == IPOPT_EOL)
 1501                         break;
 1502                 if (opt == IPOPT_NOP)
 1503                         optlen = 1;
 1504                 else {
 1505                         if (cnt < IPOPT_OLEN + sizeof(*cp))
 1506                                 goto bad;
 1507                         optlen = cp[IPOPT_OLEN];
 1508                         if (optlen < IPOPT_OLEN  + sizeof(*cp) || optlen > cnt)
 1509                                 goto bad;
 1510                 }
 1511                 switch (opt) {
 1512 
 1513                 default:
 1514                         break;
 1515 
 1516                 case IPOPT_LSRR:
 1517                 case IPOPT_SSRR:
 1518                         /*
 1519                          * user process specifies route as:
 1520                          *      ->A->B->C->D
 1521                          * D must be our final destination (but we can't
 1522                          * check that since we may not have connected yet).
 1523                          * A is first hop destination, which doesn't appear in
 1524                          * actual IP option, but is stored before the options.
 1525                          */
 1526                         if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
 1527                                 goto bad;
 1528                         m->m_len -= sizeof(struct in_addr);
 1529                         cnt -= sizeof(struct in_addr);
 1530                         optlen -= sizeof(struct in_addr);
 1531                         cp[IPOPT_OLEN] = optlen;
 1532                         /*
 1533                          * Move first hop before start of options.
 1534                          */
 1535                         bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
 1536                             sizeof(struct in_addr));
 1537                         /*
 1538                          * Then copy rest of options back
 1539                          * to close up the deleted entry.
 1540                          */
 1541                         (void)memmove(&cp[IPOPT_OFFSET+1],
 1542                             &cp[IPOPT_OFFSET+1] + sizeof(struct in_addr),
 1543                             (unsigned)cnt - (IPOPT_MINOFF - 1));
 1544                         break;
 1545                 }
 1546         }
 1547         if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
 1548                 goto bad;
 1549         *pcbopt = m;
 1550         return (0);
 1551 
 1552 bad:
 1553         (void)m_free(m);
 1554         return (EINVAL);
 1555 }
 1556 
 1557 /*
 1558  * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
 1559  */
 1560 static struct ifnet *
 1561 ip_multicast_if(struct in_addr *a, int *ifindexp)
 1562 {
 1563         int ifindex;
 1564         struct ifnet *ifp = NULL;
 1565         struct in_ifaddr *ia;
 1566 
 1567         if (ifindexp)
 1568                 *ifindexp = 0;
 1569         if (ntohl(a->s_addr) >> 24 == 0) {
 1570                 ifindex = ntohl(a->s_addr) & 0xffffff;
 1571                 if (ifindex < 0 || if_indexlim <= ifindex)
 1572                         return NULL;
 1573                 ifp = ifindex2ifnet[ifindex];
 1574                 if (!ifp)
 1575                         return NULL;
 1576                 if (ifindexp)
 1577                         *ifindexp = ifindex;
 1578         } else {
 1579                 LIST_FOREACH(ia, &IN_IFADDR_HASH(a->s_addr), ia_hash) {
 1580                         if (in_hosteq(ia->ia_addr.sin_addr, *a) &&
 1581                             (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) {
 1582                                 ifp = ia->ia_ifp;
 1583                                 break;
 1584                         }
 1585                 }
 1586         }
 1587         return ifp;
 1588 }
 1589 
 1590 static int
 1591 ip_getoptval(struct mbuf *m, u_int8_t *val, u_int maxval)
 1592 {
 1593         u_int tval;
 1594 
 1595         if (m == NULL)
 1596                 return EINVAL;
 1597 
 1598         switch (m->m_len) {
 1599         case sizeof(u_char):
 1600                 tval = *(mtod(m, u_char *));
 1601                 break;
 1602         case sizeof(u_int):
 1603                 tval = *(mtod(m, u_int *));
 1604                 break;
 1605         default:
 1606                 return EINVAL;
 1607         }
 1608 
 1609         if (tval > maxval)
 1610                 return EINVAL;
 1611 
 1612         *val = tval;
 1613         return 0;
 1614 }
 1615 
 1616 /*
 1617  * Set the IP multicast options in response to user setsockopt().
 1618  */
 1619 int
 1620 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m)
 1621 {
 1622         int error = 0;
 1623         int i;
 1624         struct in_addr addr;
 1625         struct ip_mreq *mreq;
 1626         struct ifnet *ifp;
 1627         struct ip_moptions *imo = *imop;
 1628         struct route ro;
 1629         struct sockaddr_in *dst;
 1630         int ifindex;
 1631 
 1632         if (imo == NULL) {
 1633                 /*
 1634                  * No multicast option buffer attached to the pcb;
 1635                  * allocate one and initialize to default values.
 1636                  */
 1637                 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
 1638                     M_WAITOK);
 1639 
 1640                 if (imo == NULL)
 1641                         return (ENOBUFS);
 1642                 *imop = imo;
 1643                 imo->imo_multicast_ifp = NULL;
 1644                 imo->imo_multicast_addr.s_addr = INADDR_ANY;
 1645                 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 1646                 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
 1647                 imo->imo_num_memberships = 0;
 1648         }
 1649 
 1650         switch (optname) {
 1651 
 1652         case IP_MULTICAST_IF:
 1653                 /*
 1654                  * Select the interface for outgoing multicast packets.
 1655                  */
 1656                 if (m == NULL || m->m_len != sizeof(struct in_addr)) {
 1657                         error = EINVAL;
 1658                         break;
 1659                 }
 1660                 addr = *(mtod(m, struct in_addr *));
 1661                 /*
 1662                  * INADDR_ANY is used to remove a previous selection.
 1663                  * When no interface is selected, a default one is
 1664                  * chosen every time a multicast packet is sent.
 1665                  */
 1666                 if (in_nullhost(addr)) {
 1667                         imo->imo_multicast_ifp = NULL;
 1668                         break;
 1669                 }
 1670                 /*
 1671                  * The selected interface is identified by its local
 1672                  * IP address.  Find the interface and confirm that
 1673                  * it supports multicasting.
 1674                  */
 1675                 ifp = ip_multicast_if(&addr, &ifindex);
 1676                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 1677                         error = EADDRNOTAVAIL;
 1678                         break;
 1679                 }
 1680                 imo->imo_multicast_ifp = ifp;
 1681                 if (ifindex)
 1682                         imo->imo_multicast_addr = addr;
 1683                 else
 1684                         imo->imo_multicast_addr.s_addr = INADDR_ANY;
 1685                 break;
 1686 
 1687         case IP_MULTICAST_TTL:
 1688                 /*
 1689                  * Set the IP time-to-live for outgoing multicast packets.
 1690                  */
 1691                 error = ip_getoptval(m, &imo->imo_multicast_ttl, MAXTTL);
 1692                 break;
 1693 
 1694         case IP_MULTICAST_LOOP:
 1695                 /*
 1696                  * Set the loopback flag for outgoing multicast packets.
 1697                  * Must be zero or one.
 1698                  */
 1699                 error = ip_getoptval(m, &imo->imo_multicast_loop, 1);
 1700                 break;
 1701 
 1702         case IP_ADD_MEMBERSHIP:
 1703                 /*
 1704                  * Add a multicast group membership.
 1705                  * Group must be a valid IP multicast address.
 1706                  */
 1707                 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
 1708                         error = EINVAL;
 1709                         break;
 1710                 }
 1711                 mreq = mtod(m, struct ip_mreq *);
 1712                 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
 1713                         error = EINVAL;
 1714                         break;
 1715                 }
 1716                 /*
 1717                  * If no interface address was provided, use the interface of
 1718                  * the route to the given multicast address.
 1719                  */
 1720                 if (in_nullhost(mreq->imr_interface)) {
 1721                         bzero((caddr_t)&ro, sizeof(ro));
 1722                         ro.ro_rt = NULL;
 1723                         dst = satosin(&ro.ro_dst);
 1724                         dst->sin_len = sizeof(*dst);
 1725                         dst->sin_family = AF_INET;
 1726                         dst->sin_addr = mreq->imr_multiaddr;
 1727                         rtalloc(&ro);
 1728                         if (ro.ro_rt == NULL) {
 1729                                 error = EADDRNOTAVAIL;
 1730                                 break;
 1731                         }
 1732                         ifp = ro.ro_rt->rt_ifp;
 1733                         rtfree(ro.ro_rt);
 1734                 } else {
 1735                         ifp = ip_multicast_if(&mreq->imr_interface, NULL);
 1736                 }
 1737                 /*
 1738                  * See if we found an interface, and confirm that it
 1739                  * supports multicast.
 1740                  */
 1741                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 1742                         error = EADDRNOTAVAIL;
 1743                         break;
 1744                 }
 1745                 /*
 1746                  * See if the membership already exists or if all the
 1747                  * membership slots are full.
 1748                  */
 1749                 for (i = 0; i < imo->imo_num_memberships; ++i) {
 1750                         if (imo->imo_membership[i]->inm_ifp == ifp &&
 1751                             in_hosteq(imo->imo_membership[i]->inm_addr,
 1752                                       mreq->imr_multiaddr))
 1753                                 break;
 1754                 }
 1755                 if (i < imo->imo_num_memberships) {
 1756                         error = EADDRINUSE;
 1757                         break;
 1758                 }
 1759                 if (i == IP_MAX_MEMBERSHIPS) {
 1760                         error = ETOOMANYREFS;
 1761                         break;
 1762                 }
 1763                 /*
 1764                  * Everything looks good; add a new record to the multicast
 1765                  * address list for the given interface.
 1766                  */
 1767                 if ((imo->imo_membership[i] =
 1768                     in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
 1769                         error = ENOBUFS;
 1770                         break;
 1771                 }
 1772                 ++imo->imo_num_memberships;
 1773                 break;
 1774 
 1775         case IP_DROP_MEMBERSHIP:
 1776                 /*
 1777                  * Drop a multicast group membership.
 1778                  * Group must be a valid IP multicast address.
 1779                  */
 1780                 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
 1781                         error = EINVAL;
 1782                         break;
 1783                 }
 1784                 mreq = mtod(m, struct ip_mreq *);
 1785                 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
 1786                         error = EINVAL;
 1787                         break;
 1788                 }
 1789                 /*
 1790                  * If an interface address was specified, get a pointer
 1791                  * to its ifnet structure.
 1792                  */
 1793                 if (in_nullhost(mreq->imr_interface))
 1794                         ifp = NULL;
 1795                 else {
 1796                         ifp = ip_multicast_if(&mreq->imr_interface, NULL);
 1797                         if (ifp == NULL) {
 1798                                 error = EADDRNOTAVAIL;
 1799                                 break;
 1800                         }
 1801                 }
 1802                 /*
 1803                  * Find the membership in the membership array.
 1804                  */
 1805                 for (i = 0; i < imo->imo_num_memberships; ++i) {
 1806                         if ((ifp == NULL ||
 1807                              imo->imo_membership[i]->inm_ifp == ifp) &&
 1808                              in_hosteq(imo->imo_membership[i]->inm_addr,
 1809                                        mreq->imr_multiaddr))
 1810                                 break;
 1811                 }
 1812                 if (i == imo->imo_num_memberships) {
 1813                         error = EADDRNOTAVAIL;
 1814                         break;
 1815                 }
 1816                 /*
 1817                  * Give up the multicast address record to which the
 1818                  * membership points.
 1819                  */
 1820                 in_delmulti(imo->imo_membership[i]);
 1821                 /*
 1822                  * Remove the gap in the membership array.
 1823                  */
 1824                 for (++i; i < imo->imo_num_memberships; ++i)
 1825                         imo->imo_membership[i-1] = imo->imo_membership[i];
 1826                 --imo->imo_num_memberships;
 1827                 break;
 1828 
 1829         default:
 1830                 error = EOPNOTSUPP;
 1831                 break;
 1832         }
 1833 
 1834         /*
 1835          * If all options have default values, no need to keep the mbuf.
 1836          */
 1837         if (imo->imo_multicast_ifp == NULL &&
 1838             imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
 1839             imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
 1840             imo->imo_num_memberships == 0) {
 1841                 free(*imop, M_IPMOPTS);
 1842                 *imop = NULL;
 1843         }
 1844 
 1845         return (error);
 1846 }
 1847 
 1848 /*
 1849  * Return the IP multicast options in response to user getsockopt().
 1850  */
 1851 int
 1852 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf **mp)
 1853 {
 1854         u_char *ttl;
 1855         u_char *loop;
 1856         struct in_addr *addr;
 1857         struct in_ifaddr *ia;
 1858 
 1859         *mp = m_get(M_WAIT, MT_SOOPTS);
 1860 
 1861         switch (optname) {
 1862 
 1863         case IP_MULTICAST_IF:
 1864                 addr = mtod(*mp, struct in_addr *);
 1865                 (*mp)->m_len = sizeof(struct in_addr);
 1866                 if (imo == NULL || imo->imo_multicast_ifp == NULL)
 1867                         *addr = zeroin_addr;
 1868                 else if (imo->imo_multicast_addr.s_addr) {
 1869                         /* return the value user has set */
 1870                         *addr = imo->imo_multicast_addr;
 1871                 } else {
 1872                         IFP_TO_IA(imo->imo_multicast_ifp, ia);
 1873                         *addr = ia ? ia->ia_addr.sin_addr : zeroin_addr;
 1874                 }
 1875                 return (0);
 1876 
 1877         case IP_MULTICAST_TTL:
 1878                 ttl = mtod(*mp, u_char *);
 1879                 (*mp)->m_len = 1;
 1880                 *ttl = imo ? imo->imo_multicast_ttl
 1881                            : IP_DEFAULT_MULTICAST_TTL;
 1882                 return (0);
 1883 
 1884         case IP_MULTICAST_LOOP:
 1885                 loop = mtod(*mp, u_char *);
 1886                 (*mp)->m_len = 1;
 1887                 *loop = imo ? imo->imo_multicast_loop
 1888                             : IP_DEFAULT_MULTICAST_LOOP;
 1889                 return (0);
 1890 
 1891         default:
 1892                 return (EOPNOTSUPP);
 1893         }
 1894 }
 1895 
 1896 /*
 1897  * Discard the IP multicast options.
 1898  */
 1899 void
 1900 ip_freemoptions(struct ip_moptions *imo)
 1901 {
 1902         int i;
 1903 
 1904         if (imo != NULL) {
 1905                 for (i = 0; i < imo->imo_num_memberships; ++i)
 1906                         in_delmulti(imo->imo_membership[i]);
 1907                 free(imo, M_IPMOPTS);
 1908         }
 1909 }
 1910 
 1911 /*
 1912  * Routine called from ip_output() to loop back a copy of an IP multicast
 1913  * packet to the input queue of a specified interface.  Note that this
 1914  * calls the output routine of the loopback "driver", but with an interface
 1915  * pointer that might NOT be lo0ifp -- easier than replicating that code here.
 1916  */
 1917 static void
 1918 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst)
 1919 {
 1920         struct ip *ip;
 1921         struct mbuf *copym;
 1922 
 1923         copym = m_copy(m, 0, M_COPYALL);
 1924         if (copym != NULL
 1925          && (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip)))
 1926                 copym = m_pullup(copym, sizeof(struct ip));
 1927         if (copym != NULL) {
 1928                 /*
 1929                  * We don't bother to fragment if the IP length is greater
 1930                  * than the interface's MTU.  Can this possibly matter?
 1931                  */
 1932                 ip = mtod(copym, struct ip *);
 1933 
 1934                 if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
 1935                         in_delayed_cksum(copym);
 1936                         copym->m_pkthdr.csum_flags &=
 1937                             ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
 1938                 }
 1939 
 1940                 ip->ip_sum = 0;
 1941                 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
 1942                 (void) looutput(ifp, copym, sintosa(dst), NULL);
 1943         }
 1944 }

Cache object: 4007455cba1060b40fe33569517a5078


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.