The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_output.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: ip_output.c,v 1.382 2022/08/12 17:04:16 bluhm Exp $   */
    2 /*      $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $  */
    3 
    4 /*
    5  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
   33  */
   34 
   35 #include "pf.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/mbuf.h>
   40 #include <sys/protosw.h>
   41 #include <sys/socket.h>
   42 #include <sys/socketvar.h>
   43 #include <sys/proc.h>
   44 #include <sys/kernel.h>
   45 
   46 #include <net/if.h>
   47 #include <net/if_var.h>
   48 #include <net/if_enc.h>
   49 #include <net/route.h>
   50 
   51 #include <netinet/in.h>
   52 #include <netinet/ip.h>
   53 #include <netinet/in_pcb.h>
   54 #include <netinet/in_var.h>
   55 #include <netinet/ip_var.h>
   56 #include <netinet/ip_icmp.h>
   57 #include <netinet/tcp.h>
   58 #include <netinet/udp.h>
   59 #include <netinet/tcp_timer.h>
   60 #include <netinet/tcp_var.h>
   61 #include <netinet/udp_var.h>
   62 
   63 #if NPF > 0
   64 #include <net/pfvar.h>
   65 #endif
   66 
   67 #ifdef IPSEC
   68 #ifdef ENCDEBUG
   69 #define DPRINTF(fmt, args...)                                           \
   70         do {                                                            \
   71                 if (encdebug)                                           \
   72                         printf("%s: " fmt "\n", __func__, ## args);     \
   73         } while (0)
   74 #else
   75 #define DPRINTF(fmt, args...)                                           \
   76         do { } while (0)
   77 #endif
   78 #endif /* IPSEC */
   79 
   80 int ip_pcbopts(struct mbuf **, struct mbuf *);
   81 int ip_multicast_if(struct ip_mreqn *, u_int, unsigned int *);
   82 int ip_setmoptions(int, struct ip_moptions **, struct mbuf *, u_int);
   83 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *);
   84 static __inline u_int16_t __attribute__((__unused__))
   85     in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t);
   86 void in_delayed_cksum(struct mbuf *);
   87 int in_ifcap_cksum(struct mbuf *, struct ifnet *, int);
   88 
   89 int ip_output_ipsec_lookup(struct mbuf *m, int hlen, struct inpcb *inp,
   90     struct tdb **, int ipsecflowinfo);
   91 void ip_output_ipsec_pmtu_update(struct tdb *, struct route *, struct in_addr,
   92     int, int);
   93 int ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, int);
   94 
   95 /*
   96  * IP output.  The packet in mbuf chain m contains a skeletal IP
   97  * header (with len, off, ttl, proto, tos, src, dst).
   98  * The mbuf chain containing the packet will be freed.
   99  * The mbuf opt, if present, will not be freed.
  100  */
  101 int
  102 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
  103     struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo)
  104 {
  105         struct ip *ip;
  106         struct ifnet *ifp = NULL;
  107         struct mbuf_list fml;
  108         int hlen = sizeof (struct ip);
  109         int error = 0;
  110         struct route iproute;
  111         struct sockaddr_in *dst;
  112         struct tdb *tdb = NULL;
  113         u_long mtu;
  114 #if NPF > 0
  115         u_int orig_rtableid;
  116 #endif
  117 
  118         NET_ASSERT_LOCKED();
  119 
  120 #ifdef IPSEC
  121         if (inp && (inp->inp_flags & INP_IPV6) != 0)
  122                 panic("ip_output: IPv6 pcb is passed");
  123 #endif /* IPSEC */
  124 
  125 #ifdef  DIAGNOSTIC
  126         if ((m->m_flags & M_PKTHDR) == 0)
  127                 panic("ip_output no HDR");
  128 #endif
  129         if (opt)
  130                 m = ip_insertoptions(m, opt, &hlen);
  131 
  132         ip = mtod(m, struct ip *);
  133 
  134         /*
  135          * Fill in IP header.
  136          */
  137         if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
  138                 ip->ip_v = IPVERSION;
  139                 ip->ip_off &= htons(IP_DF);
  140                 ip->ip_id = htons(ip_randomid());
  141                 ip->ip_hl = hlen >> 2;
  142                 ipstat_inc(ips_localout);
  143         } else {
  144                 hlen = ip->ip_hl << 2;
  145         }
  146 
  147         /*
  148          * We should not send traffic to 0/8 say both Stevens and RFCs
  149          * 5735 section 3 and 1122 sections 3.2.1.3 and 3.3.6.
  150          */
  151         if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == 0) {
  152                 error = ENETUNREACH;
  153                 goto bad;
  154         }
  155 
  156 #if NPF > 0
  157         orig_rtableid = m->m_pkthdr.ph_rtableid;
  158 reroute:
  159 #endif
  160 
  161         /*
  162          * Do a route lookup now in case we need the source address to
  163          * do an SPD lookup in IPsec; for most packets, the source address
  164          * is set at a higher level protocol. ICMPs and other packets
  165          * though (e.g., traceroute) have a source address of zeroes.
  166          */
  167         if (ro == NULL) {
  168                 ro = &iproute;
  169                 memset(ro, 0, sizeof(*ro));
  170         }
  171 
  172         dst = satosin(&ro->ro_dst);
  173 
  174         /*
  175          * If there is a cached route, check that it is to the same
  176          * destination and is still up.  If not, free it and try again.
  177          */
  178         if (!rtisvalid(ro->ro_rt) ||
  179             dst->sin_addr.s_addr != ip->ip_dst.s_addr ||
  180             ro->ro_tableid != m->m_pkthdr.ph_rtableid) {
  181                 rtfree(ro->ro_rt);
  182                 ro->ro_rt = NULL;
  183         }
  184 
  185         if (ro->ro_rt == NULL) {
  186                 dst->sin_family = AF_INET;
  187                 dst->sin_len = sizeof(*dst);
  188                 dst->sin_addr = ip->ip_dst;
  189                 ro->ro_tableid = m->m_pkthdr.ph_rtableid;
  190         }
  191 
  192         if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
  193             (ip->ip_dst.s_addr == INADDR_BROADCAST)) &&
  194             imo != NULL && (ifp = if_get(imo->imo_ifidx)) != NULL) {
  195 
  196                 mtu = ifp->if_mtu;
  197                 if (ip->ip_src.s_addr == INADDR_ANY) {
  198                         struct in_ifaddr *ia;
  199 
  200                         IFP_TO_IA(ifp, ia);
  201                         if (ia != NULL)
  202                                 ip->ip_src = ia->ia_addr.sin_addr;
  203                 }
  204         } else {
  205                 struct in_ifaddr *ia;
  206 
  207                 if (ro->ro_rt == NULL)
  208                         ro->ro_rt = rtalloc_mpath(&ro->ro_dst,
  209                             &ip->ip_src.s_addr, ro->ro_tableid);
  210 
  211                 if (ro->ro_rt == NULL) {
  212                         ipstat_inc(ips_noroute);
  213                         error = EHOSTUNREACH;
  214                         goto bad;
  215                 }
  216 
  217                 ia = ifatoia(ro->ro_rt->rt_ifa);
  218                 if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL))
  219                         ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid));
  220                 else
  221                         ifp = if_get(ro->ro_rt->rt_ifidx);
  222                 /*
  223                  * We aren't using rtisvalid() here because the UP/DOWN state
  224                  * machine is broken with some Ethernet drivers like em(4).
  225                  * As a result we might try to use an invalid cached route
  226                  * entry while an interface is being detached.
  227                  */
  228                 if (ifp == NULL) {
  229                         ipstat_inc(ips_noroute);
  230                         error = EHOSTUNREACH;
  231                         goto bad;
  232                 }
  233                 if ((mtu = ro->ro_rt->rt_mtu) == 0)
  234                         mtu = ifp->if_mtu;
  235 
  236                 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
  237                         dst = satosin(ro->ro_rt->rt_gateway);
  238 
  239                 /* Set the source IP address */
  240                 if (ip->ip_src.s_addr == INADDR_ANY && ia)
  241                         ip->ip_src = ia->ia_addr.sin_addr;
  242         }
  243 
  244 #ifdef IPSEC
  245         if (ipsec_in_use || inp != NULL) {
  246                 /* Do we have any pending SAs to apply ? */
  247                 error = ip_output_ipsec_lookup(m, hlen, inp, &tdb,
  248                     ipsecflowinfo);
  249                 if (error) {
  250                         /* Should silently drop packet */
  251                         if (error == -EINVAL)
  252                                 error = 0;
  253                         goto bad;
  254                 }
  255                 if (tdb != NULL) {
  256                         /*
  257                          * If it needs TCP/UDP hardware-checksumming, do the
  258                          * computation now.
  259                          */
  260                         in_proto_cksum_out(m, NULL);
  261                 }
  262         }
  263 #endif /* IPSEC */
  264 
  265         if (IN_MULTICAST(ip->ip_dst.s_addr) ||
  266             (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
  267 
  268                 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
  269                         M_BCAST : M_MCAST;
  270 
  271                 /*
  272                  * IP destination address is multicast.  Make sure "dst"
  273                  * still points to the address in "ro".  (It may have been
  274                  * changed to point to a gateway address, above.)
  275                  */
  276                 dst = satosin(&ro->ro_dst);
  277 
  278                 /*
  279                  * See if the caller provided any multicast options
  280                  */
  281                 if (imo != NULL)
  282                         ip->ip_ttl = imo->imo_ttl;
  283                 else
  284                         ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
  285 
  286                 /*
  287                  * if we don't know the outgoing ifp yet, we can't generate
  288                  * output
  289                  */
  290                 if (!ifp) {
  291                         ipstat_inc(ips_noroute);
  292                         error = EHOSTUNREACH;
  293                         goto bad;
  294                 }
  295 
  296                 /*
  297                  * Confirm that the outgoing interface supports multicast,
  298                  * but only if the packet actually is going out on that
  299                  * interface (i.e., no IPsec is applied).
  300                  */
  301                 if ((((m->m_flags & M_MCAST) &&
  302                       (ifp->if_flags & IFF_MULTICAST) == 0) ||
  303                      ((m->m_flags & M_BCAST) &&
  304                       (ifp->if_flags & IFF_BROADCAST) == 0)) && (tdb == NULL)) {
  305                         ipstat_inc(ips_noroute);
  306                         error = ENETUNREACH;
  307                         goto bad;
  308                 }
  309 
  310                 /*
  311                  * If source address not specified yet, use address
  312                  * of outgoing interface.
  313                  */
  314                 if (ip->ip_src.s_addr == INADDR_ANY) {
  315                         struct in_ifaddr *ia;
  316 
  317                         IFP_TO_IA(ifp, ia);
  318                         if (ia != NULL)
  319                                 ip->ip_src = ia->ia_addr.sin_addr;
  320                 }
  321 
  322                 if ((imo == NULL || imo->imo_loop) &&
  323                     in_hasmulti(&ip->ip_dst, ifp)) {
  324                         /*
  325                          * If we belong to the destination multicast group
  326                          * on the outgoing interface, and the caller did not
  327                          * forbid loopback, loop back a copy.
  328                          * Can't defer TCP/UDP checksumming, do the
  329                          * computation now.
  330                          */
  331                         in_proto_cksum_out(m, NULL);
  332                         ip_mloopback(ifp, m, dst);
  333                 }
  334 #ifdef MROUTING
  335                 else {
  336                         /*
  337                          * If we are acting as a multicast router, perform
  338                          * multicast forwarding as if the packet had just
  339                          * arrived on the interface to which we are about
  340                          * to send.  The multicast forwarding function
  341                          * recursively calls this function, using the
  342                          * IP_FORWARDING flag to prevent infinite recursion.
  343                          *
  344                          * Multicasts that are looped back by ip_mloopback(),
  345                          * above, will be forwarded by the ip_input() routine,
  346                          * if necessary.
  347                          */
  348                         if (ipmforwarding && ip_mrouter[ifp->if_rdomain] &&
  349                             (flags & IP_FORWARDING) == 0) {
  350                                 int rv;
  351 
  352                                 KERNEL_LOCK();
  353                                 rv = ip_mforward(m, ifp);
  354                                 KERNEL_UNLOCK();
  355                                 if (rv != 0)
  356                                         goto bad;
  357                         }
  358                 }
  359 #endif
  360                 /*
  361                  * Multicasts with a time-to-live of zero may be looped-
  362                  * back, above, but must not be transmitted on a network.
  363                  * Also, multicasts addressed to the loopback interface
  364                  * are not sent -- the above call to ip_mloopback() will
  365                  * loop back a copy if this host actually belongs to the
  366                  * destination group on the loopback interface.
  367                  */
  368                 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0)
  369                         goto bad;
  370 
  371                 goto sendit;
  372         }
  373 
  374         /*
  375          * Look for broadcast address and verify user is allowed to send
  376          * such a packet; if the packet is going in an IPsec tunnel, skip
  377          * this check.
  378          */
  379         if ((tdb == NULL) && ((dst->sin_addr.s_addr == INADDR_BROADCAST) ||
  380             (ro && ro->ro_rt && ISSET(ro->ro_rt->rt_flags, RTF_BROADCAST)))) {
  381                 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
  382                         error = EADDRNOTAVAIL;
  383                         goto bad;
  384                 }
  385                 if ((flags & IP_ALLOWBROADCAST) == 0) {
  386                         error = EACCES;
  387                         goto bad;
  388                 }
  389 
  390                 /* Don't allow broadcast messages to be fragmented */
  391                 if (ntohs(ip->ip_len) > ifp->if_mtu) {
  392                         error = EMSGSIZE;
  393                         goto bad;
  394                 }
  395                 m->m_flags |= M_BCAST;
  396         } else
  397                 m->m_flags &= ~M_BCAST;
  398 
  399 sendit:
  400         /*
  401          * If we're doing Path MTU discovery, we need to set DF unless
  402          * the route's MTU is locked.
  403          */
  404         if ((flags & IP_MTUDISC) && ro && ro->ro_rt &&
  405             (ro->ro_rt->rt_locks & RTV_MTU) == 0)
  406                 ip->ip_off |= htons(IP_DF);
  407 
  408 #ifdef IPSEC
  409         /*
  410          * Check if the packet needs encapsulation.
  411          */
  412         if (tdb != NULL) {
  413                 /* Callee frees mbuf */
  414                 error = ip_output_ipsec_send(tdb, m, ro,
  415                     (flags & IP_FORWARDING) ? 1 : 0);
  416                 goto done;
  417         }
  418 #endif /* IPSEC */
  419 
  420         /*
  421          * Packet filter
  422          */
  423 #if NPF > 0
  424         if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT,
  425             ifp, &m) != PF_PASS) {
  426                 error = EACCES;
  427                 goto bad;
  428         }
  429         if (m == NULL)
  430                 goto done;
  431         ip = mtod(m, struct ip *);
  432         hlen = ip->ip_hl << 2;
  433         if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) ==
  434             (PF_TAG_REROUTE | PF_TAG_GENERATED))
  435                 /* already rerun the route lookup, go on */
  436                 m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE);
  437         else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) {
  438                 /* tag as generated to skip over pf_test on rerun */
  439                 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
  440                 ro = NULL;
  441                 if_put(ifp); /* drop reference since target changed */
  442                 ifp = NULL;
  443                 goto reroute;
  444         }
  445 #endif
  446         in_proto_cksum_out(m, ifp);
  447 
  448 #ifdef IPSEC
  449         if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) &&
  450             (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) {
  451                 error = EHOSTUNREACH;
  452                 goto bad;
  453         }
  454 #endif
  455 
  456         /*
  457          * If small enough for interface, can just send directly.
  458          */
  459         if (ntohs(ip->ip_len) <= mtu) {
  460                 ip->ip_sum = 0;
  461                 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4))
  462                         m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
  463                 else {
  464                         ipstat_inc(ips_outswcsum);
  465                         ip->ip_sum = in_cksum(m, hlen);
  466                 }
  467 
  468                 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt);
  469                 goto done;
  470         }
  471 
  472         /*
  473          * Too large for interface; fragment if possible.
  474          * Must be able to put at least 8 bytes per fragment.
  475          */
  476         if (ip->ip_off & htons(IP_DF)) {
  477 #ifdef IPSEC
  478                 if (ip_mtudisc)
  479                         ipsec_adjust_mtu(m, ifp->if_mtu);
  480 #endif
  481                 error = EMSGSIZE;
  482 #if NPF > 0
  483                 /* pf changed routing table, use orig rtable for path MTU */
  484                 if (ro->ro_tableid != orig_rtableid) {
  485                         rtfree(ro->ro_rt);
  486                         ro->ro_tableid = orig_rtableid;
  487                         ro->ro_rt = icmp_mtudisc_clone(
  488                             satosin(&ro->ro_dst)->sin_addr, ro->ro_tableid, 0);
  489                 }
  490 #endif
  491                 /*
  492                  * This case can happen if the user changed the MTU
  493                  * of an interface after enabling IP on it.  Because
  494                  * most netifs don't keep track of routes pointing to
  495                  * them, there is no way for one to update all its
  496                  * routes when the MTU is changed.
  497                  */
  498                 if (rtisvalid(ro->ro_rt) &&
  499                     ISSET(ro->ro_rt->rt_flags, RTF_HOST) &&
  500                     !(ro->ro_rt->rt_locks & RTV_MTU) &&
  501                     (ro->ro_rt->rt_mtu > ifp->if_mtu)) {
  502                         ro->ro_rt->rt_mtu = ifp->if_mtu;
  503                 }
  504                 ipstat_inc(ips_cantfrag);
  505                 goto bad;
  506         }
  507 
  508         error = ip_fragment(m, &fml, ifp, mtu);
  509         if (error)
  510                 goto done;
  511 
  512         while ((m = ml_dequeue(&fml)) != NULL) {
  513                 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt);
  514                 if (error)
  515                         break;
  516         }
  517         if (error)
  518                 ml_purge(&fml);
  519         else
  520                 ipstat_inc(ips_fragmented);
  521 
  522 done:
  523         if (ro == &iproute && ro->ro_rt)
  524                 rtfree(ro->ro_rt);
  525         if_put(ifp);
  526 #ifdef IPSEC
  527         tdb_unref(tdb);
  528 #endif /* IPSEC */
  529         return (error);
  530 
  531 bad:
  532         m_freem(m);
  533         goto done;
  534 }
  535 
  536 #ifdef IPSEC
  537 int
  538 ip_output_ipsec_lookup(struct mbuf *m, int hlen, struct inpcb *inp,
  539     struct tdb **tdbout, int ipsecflowinfo)
  540 {
  541         struct m_tag *mtag;
  542         struct tdb_ident *tdbi;
  543         struct tdb *tdb;
  544         struct ipsec_ids *ids = NULL;
  545         int error;
  546 
  547         /* Do we have any pending SAs to apply ? */
  548         if (ipsecflowinfo)
  549                 ids = ipsp_ids_lookup(ipsecflowinfo);
  550         error = ipsp_spd_lookup(m, AF_INET, hlen, IPSP_DIRECTION_OUT,
  551             NULL, inp, &tdb, ids);
  552         ipsp_ids_free(ids);
  553         if (error || tdb == NULL) {
  554                 *tdbout = NULL;
  555                 return error;
  556         }
  557         /* Loop detection */
  558         for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) {
  559                 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE)
  560                         continue;
  561                 tdbi = (struct tdb_ident *)(mtag + 1);
  562                 if (tdbi->spi == tdb->tdb_spi &&
  563                     tdbi->proto == tdb->tdb_sproto &&
  564                     tdbi->rdomain == tdb->tdb_rdomain &&
  565                     !memcmp(&tdbi->dst, &tdb->tdb_dst,
  566                     sizeof(union sockaddr_union))) {
  567                         /* no IPsec needed */
  568                         tdb_unref(tdb);
  569                         *tdbout = NULL;
  570                         return 0;
  571                 }
  572         }
  573         *tdbout = tdb;
  574         return 0;
  575 }
  576 
  577 void
  578 ip_output_ipsec_pmtu_update(struct tdb *tdb, struct route *ro,
  579     struct in_addr dst, int rtableid, int transportmode)
  580 {
  581         struct rtentry *rt = NULL;
  582         int rt_mtucloned = 0;
  583 
  584         /* Find a host route to store the mtu in */
  585         if (ro != NULL)
  586                 rt = ro->ro_rt;
  587         /* but don't add a PMTU route for transport mode SAs */
  588         if (transportmode)
  589                 rt = NULL;
  590         else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
  591                 rt = icmp_mtudisc_clone(dst, rtableid, 1);
  592                 rt_mtucloned = 1;
  593         }
  594         DPRINTF("spi %08x mtu %d rt %p cloned %d",
  595             ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
  596         if (rt != NULL) {
  597                 rt->rt_mtu = tdb->tdb_mtu;
  598                 if (ro != NULL && ro->ro_rt != NULL) {
  599                         rtfree(ro->ro_rt);
  600                         ro->ro_rt = rtalloc(&ro->ro_dst, RT_RESOLVE, rtableid);
  601                 }
  602                 if (rt_mtucloned)
  603                         rtfree(rt);
  604         }
  605 }
  606 
  607 int
  608 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, int fwd)
  609 {
  610 #if NPF > 0
  611         struct ifnet *encif;
  612 #endif
  613         struct ip *ip;
  614         struct in_addr dst;
  615         int error, rtableid;
  616 
  617 #if NPF > 0
  618         /*
  619          * Packet filter
  620          */
  621         if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL ||
  622             pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) {
  623                 m_freem(m);
  624                 return EACCES;
  625         }
  626         if (m == NULL)
  627                 return 0;
  628         /*
  629          * PF_TAG_REROUTE handling or not...
  630          * Packet is entering IPsec so the routing is
  631          * already overruled by the IPsec policy.
  632          * Until now the change was not reconsidered.
  633          * What's the behaviour?
  634          */
  635         in_proto_cksum_out(m, encif);
  636 #endif
  637 
  638         /* Check if we are allowed to fragment */
  639         ip = mtod(m, struct ip *);
  640         dst = ip->ip_dst;
  641         rtableid = m->m_pkthdr.ph_rtableid;
  642         if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu &&
  643             ntohs(ip->ip_len) > tdb->tdb_mtu &&
  644             tdb->tdb_mtutimeout > gettime()) {
  645                 int transportmode;
  646 
  647                 transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) &&
  648                     (tdb->tdb_dst.sin.sin_addr.s_addr == dst.s_addr);
  649                 ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid,
  650                     transportmode);
  651                 ipsec_adjust_mtu(m, tdb->tdb_mtu);
  652                 m_freem(m);
  653                 return EMSGSIZE;
  654         }
  655         /* propagate IP_DF for v4-over-v6 */
  656         if (ip_mtudisc && ip->ip_off & htons(IP_DF))
  657                 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
  658 
  659         /*
  660          * Clear these -- they'll be set in the recursive invocation
  661          * as needed.
  662          */
  663         m->m_flags &= ~(M_MCAST | M_BCAST);
  664 
  665         /* Callee frees mbuf */
  666         KERNEL_LOCK();
  667         error = ipsp_process_packet(m, tdb, AF_INET, 0);
  668         KERNEL_UNLOCK();
  669         if (error) {
  670                 ipsecstat_inc(ipsec_odrops);
  671                 tdbstat_inc(tdb, tdb_odrops);
  672         }
  673         if (ip_mtudisc && error == EMSGSIZE)
  674                 ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid, 0);
  675         return error;
  676 }
  677 #endif /* IPSEC */
  678 
  679 int
  680 ip_fragment(struct mbuf *m0, struct mbuf_list *fml, struct ifnet *ifp,
  681     u_long mtu)
  682 {
  683         struct mbuf *m;
  684         struct ip *ip;
  685         int firstlen, hlen, tlen, len, off;
  686         int error;
  687 
  688         ml_init(fml);
  689         ml_enqueue(fml, m0);
  690 
  691         ip = mtod(m0, struct ip *);
  692         hlen = ip->ip_hl << 2;
  693         tlen = m0->m_pkthdr.len;
  694         len = (mtu - hlen) &~ 7;
  695         if (len < 8) {
  696                 error = EMSGSIZE;
  697                 goto bad;
  698         }
  699         firstlen = len;
  700 
  701         /*
  702          * If we are doing fragmentation, we can't defer TCP/UDP
  703          * checksumming; compute the checksum and clear the flag.
  704          */
  705         in_proto_cksum_out(m0, NULL);
  706 
  707         /*
  708          * Loop through length of segment after first fragment,
  709          * make new header and copy data of each part and link onto chain.
  710          */
  711         for (off = hlen + firstlen; off < tlen; off += len) {
  712                 struct ip *mhip;
  713                 int mhlen;
  714 
  715                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
  716                 if (m == NULL) {
  717                         error = ENOBUFS;
  718                         goto bad;
  719                 }
  720                 ml_enqueue(fml, m);
  721 
  722                 if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0)
  723                         goto bad;
  724                 m->m_data += max_linkhdr;
  725                 mhip = mtod(m, struct ip *);
  726                 *mhip = *ip;
  727                 if (hlen > sizeof(struct ip)) {
  728                         mhlen = ip_optcopy(ip, mhip) + sizeof(struct ip);
  729                         mhip->ip_hl = mhlen >> 2;
  730                 } else
  731                         mhlen = sizeof(struct ip);
  732                 m->m_len = mhlen;
  733 
  734                 mhip->ip_off = ((off - hlen) >> 3) +
  735                     (ntohs(ip->ip_off) & ~IP_MF);
  736                 if (ip->ip_off & htons(IP_MF))
  737                         mhip->ip_off |= IP_MF;
  738                 if (off + len >= tlen)
  739                         len = tlen - off;
  740                 else
  741                         mhip->ip_off |= IP_MF;
  742                 mhip->ip_off = htons(mhip->ip_off);
  743 
  744                 m->m_pkthdr.len = mhlen + len;
  745                 mhip->ip_len = htons(m->m_pkthdr.len);
  746                 m->m_next = m_copym(m0, off, len, M_NOWAIT);
  747                 if (m->m_next == NULL) {
  748                         error = ENOBUFS;
  749                         goto bad;
  750                 }
  751 
  752                 mhip->ip_sum = 0;
  753                 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4))
  754                         m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
  755                 else {
  756                         ipstat_inc(ips_outswcsum);
  757                         mhip->ip_sum = in_cksum(m, mhlen);
  758                 }
  759         }
  760 
  761         /*
  762          * Update first fragment by trimming what's been copied out
  763          * and updating header, then send each fragment (in order).
  764          */
  765         m = m0;
  766         m_adj(m, hlen + firstlen - tlen);
  767         ip->ip_off |= htons(IP_MF);
  768         ip->ip_len = htons(m->m_pkthdr.len);
  769 
  770         ip->ip_sum = 0;
  771         if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4))
  772                 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
  773         else {
  774                 ipstat_inc(ips_outswcsum);
  775                 ip->ip_sum = in_cksum(m, hlen);
  776         }
  777 
  778         ipstat_add(ips_ofragments, ml_len(fml));
  779         return (0);
  780 
  781 bad:
  782         ipstat_inc(ips_odropped);
  783         ml_purge(fml);
  784         return (error);
  785 }
  786 
  787 /*
  788  * Insert IP options into preformed packet.
  789  * Adjust IP destination as required for IP source routing,
  790  * as indicated by a non-zero in_addr at the start of the options.
  791  */
  792 struct mbuf *
  793 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
  794 {
  795         struct ipoption *p = mtod(opt, struct ipoption *);
  796         struct mbuf *n;
  797         struct ip *ip = mtod(m, struct ip *);
  798         unsigned int optlen;
  799 
  800         optlen = opt->m_len - sizeof(p->ipopt_dst);
  801         if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
  802                 return (m);             /* XXX should fail */
  803 
  804         /* check if options will fit to IP header */
  805         if ((optlen + sizeof(struct ip)) > (0x0f << 2)) {
  806                 *phlen = sizeof(struct ip);
  807                 return (m);
  808         }
  809 
  810         if (p->ipopt_dst.s_addr)
  811                 ip->ip_dst = p->ipopt_dst;
  812         if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
  813                 MGETHDR(n, M_DONTWAIT, MT_HEADER);
  814                 if (n == NULL)
  815                         return (m);
  816                 M_MOVE_HDR(n, m);
  817                 n->m_pkthdr.len += optlen;
  818                 m->m_len -= sizeof(struct ip);
  819                 m->m_data += sizeof(struct ip);
  820                 n->m_next = m;
  821                 m = n;
  822                 m->m_len = optlen + sizeof(struct ip);
  823                 m->m_data += max_linkhdr;
  824                 memcpy(mtod(m, caddr_t), ip, sizeof(struct ip));
  825         } else {
  826                 m->m_data -= optlen;
  827                 m->m_len += optlen;
  828                 m->m_pkthdr.len += optlen;
  829                 memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip));
  830         }
  831         ip = mtod(m, struct ip *);
  832         memcpy(ip + 1, p->ipopt_list, optlen);
  833         *phlen = sizeof(struct ip) + optlen;
  834         ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
  835         return (m);
  836 }
  837 
  838 /*
  839  * Copy options from ip to jp,
  840  * omitting those not copied during fragmentation.
  841  */
  842 int
  843 ip_optcopy(struct ip *ip, struct ip *jp)
  844 {
  845         u_char *cp, *dp;
  846         int opt, optlen, cnt;
  847 
  848         cp = (u_char *)(ip + 1);
  849         dp = (u_char *)(jp + 1);
  850         cnt = (ip->ip_hl << 2) - sizeof (struct ip);
  851         for (; cnt > 0; cnt -= optlen, cp += optlen) {
  852                 opt = cp[0];
  853                 if (opt == IPOPT_EOL)
  854                         break;
  855                 if (opt == IPOPT_NOP) {
  856                         /* Preserve for IP mcast tunnel's LSRR alignment. */
  857                         *dp++ = IPOPT_NOP;
  858                         optlen = 1;
  859                         continue;
  860                 }
  861 #ifdef DIAGNOSTIC
  862                 if (cnt < IPOPT_OLEN + sizeof(*cp))
  863                         panic("malformed IPv4 option passed to ip_optcopy");
  864 #endif
  865                 optlen = cp[IPOPT_OLEN];
  866 #ifdef DIAGNOSTIC
  867                 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
  868                         panic("malformed IPv4 option passed to ip_optcopy");
  869 #endif
  870                 /* bogus lengths should have been caught by ip_dooptions */
  871                 if (optlen > cnt)
  872                         optlen = cnt;
  873                 if (IPOPT_COPIED(opt)) {
  874                         memcpy(dp, cp, optlen);
  875                         dp += optlen;
  876                 }
  877         }
  878         for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
  879                 *dp++ = IPOPT_EOL;
  880         return (optlen);
  881 }
  882 
  883 /*
  884  * IP socket option processing.
  885  */
  886 int
  887 ip_ctloutput(int op, struct socket *so, int level, int optname,
  888     struct mbuf *m)
  889 {
  890         struct inpcb *inp = sotoinpcb(so);
  891         int optval = 0;
  892         struct proc *p = curproc; /* XXX */
  893         int error = 0;
  894         u_int rtableid, rtid = 0;
  895 
  896         if (level != IPPROTO_IP)
  897                 return (EINVAL);
  898 
  899         rtableid = p->p_p->ps_rtableid;
  900 
  901         switch (op) {
  902         case PRCO_SETOPT:
  903                 switch (optname) {
  904                 case IP_OPTIONS:
  905                         return (ip_pcbopts(&inp->inp_options, m));
  906 
  907                 case IP_TOS:
  908                 case IP_TTL:
  909                 case IP_MINTTL:
  910                 case IP_RECVOPTS:
  911                 case IP_RECVRETOPTS:
  912                 case IP_RECVDSTADDR:
  913                 case IP_RECVIF:
  914                 case IP_RECVTTL:
  915                 case IP_RECVDSTPORT:
  916                 case IP_RECVRTABLE:
  917                 case IP_IPSECFLOWINFO:
  918                         if (m == NULL || m->m_len != sizeof(int))
  919                                 error = EINVAL;
  920                         else {
  921                                 optval = *mtod(m, int *);
  922                                 switch (optname) {
  923 
  924                                 case IP_TOS:
  925                                         inp->inp_ip.ip_tos = optval;
  926                                         break;
  927 
  928                                 case IP_TTL:
  929                                         if (optval > 0 && optval <= MAXTTL)
  930                                                 inp->inp_ip.ip_ttl = optval;
  931                                         else if (optval == -1)
  932                                                 inp->inp_ip.ip_ttl = ip_defttl;
  933                                         else
  934                                                 error = EINVAL;
  935                                         break;
  936 
  937                                 case IP_MINTTL:
  938                                         if (optval >= 0 && optval <= MAXTTL)
  939                                                 inp->inp_ip_minttl = optval;
  940                                         else
  941                                                 error = EINVAL;
  942                                         break;
  943 #define OPTSET(bit) \
  944         if (optval) \
  945                 inp->inp_flags |= bit; \
  946         else \
  947                 inp->inp_flags &= ~bit;
  948 
  949                                 case IP_RECVOPTS:
  950                                         OPTSET(INP_RECVOPTS);
  951                                         break;
  952 
  953                                 case IP_RECVRETOPTS:
  954                                         OPTSET(INP_RECVRETOPTS);
  955                                         break;
  956 
  957                                 case IP_RECVDSTADDR:
  958                                         OPTSET(INP_RECVDSTADDR);
  959                                         break;
  960                                 case IP_RECVIF:
  961                                         OPTSET(INP_RECVIF);
  962                                         break;
  963                                 case IP_RECVTTL:
  964                                         OPTSET(INP_RECVTTL);
  965                                         break;
  966                                 case IP_RECVDSTPORT:
  967                                         OPTSET(INP_RECVDSTPORT);
  968                                         break;
  969                                 case IP_RECVRTABLE:
  970                                         OPTSET(INP_RECVRTABLE);
  971                                         break;
  972                                 case IP_IPSECFLOWINFO:
  973                                         OPTSET(INP_IPSECFLOWINFO);
  974                                         break;
  975                                 }
  976                         }
  977                         break;
  978 #undef OPTSET
  979 
  980                 case IP_MULTICAST_IF:
  981                 case IP_MULTICAST_TTL:
  982                 case IP_MULTICAST_LOOP:
  983                 case IP_ADD_MEMBERSHIP:
  984                 case IP_DROP_MEMBERSHIP:
  985                         error = ip_setmoptions(optname, &inp->inp_moptions, m,
  986                             inp->inp_rtableid);
  987                         break;
  988 
  989                 case IP_PORTRANGE:
  990                         if (m == NULL || m->m_len != sizeof(int))
  991                                 error = EINVAL;
  992                         else {
  993                                 optval = *mtod(m, int *);
  994 
  995                                 switch (optval) {
  996 
  997                                 case IP_PORTRANGE_DEFAULT:
  998                                         inp->inp_flags &= ~(INP_LOWPORT);
  999                                         inp->inp_flags &= ~(INP_HIGHPORT);
 1000                                         break;
 1001 
 1002                                 case IP_PORTRANGE_HIGH:
 1003                                         inp->inp_flags &= ~(INP_LOWPORT);
 1004                                         inp->inp_flags |= INP_HIGHPORT;
 1005                                         break;
 1006 
 1007                                 case IP_PORTRANGE_LOW:
 1008                                         inp->inp_flags &= ~(INP_HIGHPORT);
 1009                                         inp->inp_flags |= INP_LOWPORT;
 1010                                         break;
 1011 
 1012                                 default:
 1013 
 1014                                         error = EINVAL;
 1015                                         break;
 1016                                 }
 1017                         }
 1018                         break;
 1019                 case IP_AUTH_LEVEL:
 1020                 case IP_ESP_TRANS_LEVEL:
 1021                 case IP_ESP_NETWORK_LEVEL:
 1022                 case IP_IPCOMP_LEVEL:
 1023 #ifndef IPSEC
 1024                         error = EOPNOTSUPP;
 1025 #else
 1026                         if (m == NULL || m->m_len != sizeof(int)) {
 1027                                 error = EINVAL;
 1028                                 break;
 1029                         }
 1030                         optval = *mtod(m, int *);
 1031 
 1032                         if (optval < IPSEC_LEVEL_BYPASS ||
 1033                             optval > IPSEC_LEVEL_UNIQUE) {
 1034                                 error = EINVAL;
 1035                                 break;
 1036                         }
 1037 
 1038                         switch (optname) {
 1039                         case IP_AUTH_LEVEL:
 1040                                 if (optval < IPSEC_AUTH_LEVEL_DEFAULT &&
 1041                                     suser(p)) {
 1042                                         error = EACCES;
 1043                                         break;
 1044                                 }
 1045                                 inp->inp_seclevel[SL_AUTH] = optval;
 1046                                 break;
 1047 
 1048                         case IP_ESP_TRANS_LEVEL:
 1049                                 if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT &&
 1050                                     suser(p)) {
 1051                                         error = EACCES;
 1052                                         break;
 1053                                 }
 1054                                 inp->inp_seclevel[SL_ESP_TRANS] = optval;
 1055                                 break;
 1056 
 1057                         case IP_ESP_NETWORK_LEVEL:
 1058                                 if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT &&
 1059                                     suser(p)) {
 1060                                         error = EACCES;
 1061                                         break;
 1062                                 }
 1063                                 inp->inp_seclevel[SL_ESP_NETWORK] = optval;
 1064                                 break;
 1065                         case IP_IPCOMP_LEVEL:
 1066                                 if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT &&
 1067                                     suser(p)) {
 1068                                         error = EACCES;
 1069                                         break;
 1070                                 }
 1071                                 inp->inp_seclevel[SL_IPCOMP] = optval;
 1072                                 break;
 1073                         }
 1074 #endif
 1075                         break;
 1076 
 1077                 case IP_IPSEC_LOCAL_ID:
 1078                 case IP_IPSEC_REMOTE_ID:
 1079                         error = EOPNOTSUPP;
 1080                         break;
 1081                 case SO_RTABLE:
 1082                         if (m == NULL || m->m_len < sizeof(u_int)) {
 1083                                 error = EINVAL;
 1084                                 break;
 1085                         }
 1086                         rtid = *mtod(m, u_int *);
 1087                         if (inp->inp_rtableid == rtid)
 1088                                 break;
 1089                         /* needs privileges to switch when already set */
 1090                         if (rtableid != rtid && rtableid != 0 &&
 1091                             (error = suser(p)) != 0)
 1092                                 break;
 1093                         /* table must exist */
 1094                         if (!rtable_exists(rtid)) {
 1095                                 error = EINVAL;
 1096                                 break;
 1097                         }
 1098                         if (inp->inp_lport) {
 1099                                 error = EBUSY;
 1100                                 break;
 1101                         }
 1102                         inp->inp_rtableid = rtid;
 1103                         in_pcbrehash(inp);
 1104                         break;
 1105                 case IP_PIPEX:
 1106                         if (m != NULL && m->m_len == sizeof(int))
 1107                                 inp->inp_pipex = *mtod(m, int *);
 1108                         else
 1109                                 error = EINVAL;
 1110                         break;
 1111 
 1112                 default:
 1113                         error = ENOPROTOOPT;
 1114                         break;
 1115                 }
 1116                 break;
 1117 
 1118         case PRCO_GETOPT:
 1119                 switch (optname) {
 1120                 case IP_OPTIONS:
 1121                 case IP_RETOPTS:
 1122                         if (inp->inp_options) {
 1123                                 m->m_len = inp->inp_options->m_len;
 1124                                 memcpy(mtod(m, caddr_t),
 1125                                     mtod(inp->inp_options, caddr_t), m->m_len);
 1126                         } else
 1127                                 m->m_len = 0;
 1128                         break;
 1129 
 1130                 case IP_TOS:
 1131                 case IP_TTL:
 1132                 case IP_MINTTL:
 1133                 case IP_RECVOPTS:
 1134                 case IP_RECVRETOPTS:
 1135                 case IP_RECVDSTADDR:
 1136                 case IP_RECVIF:
 1137                 case IP_RECVTTL:
 1138                 case IP_RECVDSTPORT:
 1139                 case IP_RECVRTABLE:
 1140                 case IP_IPSECFLOWINFO:
 1141                 case IP_IPDEFTTL:
 1142                         m->m_len = sizeof(int);
 1143                         switch (optname) {
 1144 
 1145                         case IP_TOS:
 1146                                 optval = inp->inp_ip.ip_tos;
 1147                                 break;
 1148 
 1149                         case IP_TTL:
 1150                                 optval = inp->inp_ip.ip_ttl;
 1151                                 break;
 1152 
 1153                         case IP_MINTTL:
 1154                                 optval = inp->inp_ip_minttl;
 1155                                 break;
 1156 
 1157                         case IP_IPDEFTTL:
 1158                                 optval = ip_defttl;
 1159                                 break;
 1160 
 1161 #define OPTBIT(bit)     (inp->inp_flags & bit ? 1 : 0)
 1162 
 1163                         case IP_RECVOPTS:
 1164                                 optval = OPTBIT(INP_RECVOPTS);
 1165                                 break;
 1166 
 1167                         case IP_RECVRETOPTS:
 1168                                 optval = OPTBIT(INP_RECVRETOPTS);
 1169                                 break;
 1170 
 1171                         case IP_RECVDSTADDR:
 1172                                 optval = OPTBIT(INP_RECVDSTADDR);
 1173                                 break;
 1174                         case IP_RECVIF:
 1175                                 optval = OPTBIT(INP_RECVIF);
 1176                                 break;
 1177                         case IP_RECVTTL:
 1178                                 optval = OPTBIT(INP_RECVTTL);
 1179                                 break;
 1180                         case IP_RECVDSTPORT:
 1181                                 optval = OPTBIT(INP_RECVDSTPORT);
 1182                                 break;
 1183                         case IP_RECVRTABLE:
 1184                                 optval = OPTBIT(INP_RECVRTABLE);
 1185                                 break;
 1186                         case IP_IPSECFLOWINFO:
 1187                                 optval = OPTBIT(INP_IPSECFLOWINFO);
 1188                                 break;
 1189                         }
 1190                         *mtod(m, int *) = optval;
 1191                         break;
 1192 
 1193                 case IP_MULTICAST_IF:
 1194                 case IP_MULTICAST_TTL:
 1195                 case IP_MULTICAST_LOOP:
 1196                 case IP_ADD_MEMBERSHIP:
 1197                 case IP_DROP_MEMBERSHIP:
 1198                         error = ip_getmoptions(optname, inp->inp_moptions, m);
 1199                         break;
 1200 
 1201                 case IP_PORTRANGE:
 1202                         m->m_len = sizeof(int);
 1203 
 1204                         if (inp->inp_flags & INP_HIGHPORT)
 1205                                 optval = IP_PORTRANGE_HIGH;
 1206                         else if (inp->inp_flags & INP_LOWPORT)
 1207                                 optval = IP_PORTRANGE_LOW;
 1208                         else
 1209                                 optval = 0;
 1210 
 1211                         *mtod(m, int *) = optval;
 1212                         break;
 1213 
 1214                 case IP_AUTH_LEVEL:
 1215                 case IP_ESP_TRANS_LEVEL:
 1216                 case IP_ESP_NETWORK_LEVEL:
 1217                 case IP_IPCOMP_LEVEL:
 1218 #ifndef IPSEC
 1219                         m->m_len = sizeof(int);
 1220                         *mtod(m, int *) = IPSEC_LEVEL_NONE;
 1221 #else
 1222                         m->m_len = sizeof(int);
 1223                         switch (optname) {
 1224                         case IP_AUTH_LEVEL:
 1225                                 optval = inp->inp_seclevel[SL_AUTH];
 1226                                 break;
 1227 
 1228                         case IP_ESP_TRANS_LEVEL:
 1229                                 optval = inp->inp_seclevel[SL_ESP_TRANS];
 1230                                 break;
 1231 
 1232                         case IP_ESP_NETWORK_LEVEL:
 1233                                 optval = inp->inp_seclevel[SL_ESP_NETWORK];
 1234                                 break;
 1235                         case IP_IPCOMP_LEVEL:
 1236                                 optval = inp->inp_seclevel[SL_IPCOMP];
 1237                                 break;
 1238                         }
 1239                         *mtod(m, int *) = optval;
 1240 #endif
 1241                         break;
 1242                 case IP_IPSEC_LOCAL_ID:
 1243                 case IP_IPSEC_REMOTE_ID:
 1244                         error = EOPNOTSUPP;
 1245                         break;
 1246                 case SO_RTABLE:
 1247                         m->m_len = sizeof(u_int);
 1248                         *mtod(m, u_int *) = inp->inp_rtableid;
 1249                         break;
 1250                 case IP_PIPEX:
 1251                         m->m_len = sizeof(int);
 1252                         *mtod(m, int *) = inp->inp_pipex;
 1253                         break;
 1254                 default:
 1255                         error = ENOPROTOOPT;
 1256                         break;
 1257                 }
 1258                 break;
 1259         }
 1260         return (error);
 1261 }
 1262 
 1263 /*
 1264  * Set up IP options in pcb for insertion in output packets.
 1265  * Store in mbuf with pointer in pcbopt, adding pseudo-option
 1266  * with destination address if source routed.
 1267  */
 1268 int
 1269 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m)
 1270 {
 1271         struct mbuf *n;
 1272         struct ipoption *p;
 1273         int cnt, off, optlen;
 1274         u_char *cp;
 1275         u_char opt;
 1276 
 1277         /* turn off any old options */
 1278         m_freem(*pcbopt);
 1279         *pcbopt = NULL;
 1280         if (m == NULL || m->m_len == 0) {
 1281                 /*
 1282                  * Only turning off any previous options.
 1283                  */
 1284                 return (0);
 1285         }
 1286 
 1287         if (m->m_len % sizeof(int32_t) ||
 1288             m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
 1289                 return (EINVAL);
 1290 
 1291         /* Don't sleep because NET_LOCK() is hold. */
 1292         if ((n = m_get(M_NOWAIT, MT_SOOPTS)) == NULL)
 1293                 return (ENOBUFS);
 1294         p = mtod(n, struct ipoption *);
 1295         memset(p, 0, sizeof (*p));      /* 0 = IPOPT_EOL, needed for padding */
 1296         n->m_len = sizeof(struct in_addr);
 1297 
 1298         off = 0;
 1299         cnt = m->m_len;
 1300         cp = mtod(m, u_char *);
 1301 
 1302         while (cnt > 0) {
 1303                 opt = cp[IPOPT_OPTVAL];
 1304 
 1305                 if (opt == IPOPT_NOP || opt == IPOPT_EOL) {
 1306                         optlen = 1;
 1307                 } else {
 1308                         if (cnt < IPOPT_OLEN + sizeof(*cp))
 1309                                 goto bad;
 1310                         optlen = cp[IPOPT_OLEN];
 1311                         if (optlen < IPOPT_OLEN  + sizeof(*cp) || optlen > cnt)
 1312                                 goto bad;
 1313                 }
 1314                 switch (opt) {
 1315                 default:
 1316                         memcpy(p->ipopt_list + off, cp, optlen);
 1317                         break;
 1318 
 1319                 case IPOPT_LSRR:
 1320                 case IPOPT_SSRR:
 1321                         /*
 1322                          * user process specifies route as:
 1323                          *      ->A->B->C->D
 1324                          * D must be our final destination (but we can't
 1325                          * check that since we may not have connected yet).
 1326                          * A is first hop destination, which doesn't appear in
 1327                          * actual IP option, but is stored before the options.
 1328                          */
 1329                         if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
 1330                                 goto bad;
 1331 
 1332                         /*
 1333                          * Optlen is smaller because first address is popped.
 1334                          * Cnt and cp will be adjusted a bit later to reflect
 1335                          * this.
 1336                          */
 1337                         optlen -= sizeof(struct in_addr);
 1338                         p->ipopt_list[off + IPOPT_OPTVAL] = opt;
 1339                         p->ipopt_list[off + IPOPT_OLEN] = optlen;
 1340 
 1341                         /*
 1342                          * Move first hop before start of options.
 1343                          */
 1344                         memcpy(&p->ipopt_dst, cp + IPOPT_OFFSET,
 1345                             sizeof(struct in_addr));
 1346                         cp += sizeof(struct in_addr);
 1347                         cnt -= sizeof(struct in_addr);
 1348                         /*
 1349                          * Then copy rest of options
 1350                          */
 1351                         memcpy(p->ipopt_list + off + IPOPT_OFFSET,
 1352                             cp + IPOPT_OFFSET, optlen - IPOPT_OFFSET);
 1353                         break;
 1354                 }
 1355                 off += optlen;
 1356                 cp += optlen;
 1357                 cnt -= optlen;
 1358 
 1359                 if (opt == IPOPT_EOL)
 1360                         break;
 1361         }
 1362         /* pad options to next word, since p was zeroed just adjust off */
 1363         off = (off + sizeof(int32_t) - 1) & ~(sizeof(int32_t) - 1);
 1364         n->m_len += off;
 1365         if (n->m_len > sizeof(*p)) {
 1366  bad:
 1367                 m_freem(n);
 1368                 return (EINVAL);
 1369         }
 1370 
 1371         *pcbopt = n;
 1372         return (0);
 1373 }
 1374 
 1375 /*
 1376  * Lookup the interface based on the information in the ip_mreqn struct.
 1377  */
 1378 int
 1379 ip_multicast_if(struct ip_mreqn *mreq, u_int rtableid, unsigned int *ifidx)
 1380 {
 1381         struct sockaddr_in sin;
 1382         struct rtentry *rt;
 1383 
 1384         /*
 1385          * In case userland provides the imr_ifindex use this as interface.
 1386          * If no interface address was provided, use the interface of
 1387          * the route to the given multicast address.
 1388          */
 1389         if (mreq->imr_ifindex != 0) {
 1390                 *ifidx = mreq->imr_ifindex;
 1391         } else if (mreq->imr_address.s_addr == INADDR_ANY) {
 1392                 memset(&sin, 0, sizeof(sin));
 1393                 sin.sin_len = sizeof(sin);
 1394                 sin.sin_family = AF_INET;
 1395                 sin.sin_addr = mreq->imr_multiaddr;
 1396                 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid);
 1397                 if (!rtisvalid(rt)) {
 1398                         rtfree(rt);
 1399                         return EADDRNOTAVAIL;
 1400                 }
 1401                 *ifidx = rt->rt_ifidx;
 1402                 rtfree(rt);
 1403         } else {
 1404                 memset(&sin, 0, sizeof(sin));
 1405                 sin.sin_len = sizeof(sin);
 1406                 sin.sin_family = AF_INET;
 1407                 sin.sin_addr = mreq->imr_address;
 1408                 rt = rtalloc(sintosa(&sin), 0, rtableid);
 1409                 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
 1410                         rtfree(rt);
 1411                         return EADDRNOTAVAIL;
 1412                 }
 1413                 *ifidx = rt->rt_ifidx;
 1414                 rtfree(rt);
 1415         }
 1416 
 1417         return 0;
 1418 }
 1419 
 1420 /*
 1421  * Set the IP multicast options in response to user setsockopt().
 1422  */
 1423 int
 1424 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m,
 1425     u_int rtableid)
 1426 {
 1427         struct in_addr addr;
 1428         struct in_ifaddr *ia;
 1429         struct ip_mreqn mreqn;
 1430         struct ifnet *ifp = NULL;
 1431         struct ip_moptions *imo = *imop;
 1432         struct in_multi **immp;
 1433         struct sockaddr_in sin;
 1434         unsigned int ifidx;
 1435         int i, error = 0;
 1436         u_char loop;
 1437 
 1438         if (imo == NULL) {
 1439                 /*
 1440                  * No multicast option buffer attached to the pcb;
 1441                  * allocate one and initialize to default values.
 1442                  */
 1443                 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK|M_ZERO);
 1444                 immp = mallocarray(IP_MIN_MEMBERSHIPS, sizeof(*immp), M_IPMOPTS,
 1445                     M_WAITOK|M_ZERO);
 1446                 *imop = imo;
 1447                 imo->imo_ifidx = 0;
 1448                 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL;
 1449                 imo->imo_loop = IP_DEFAULT_MULTICAST_LOOP;
 1450                 imo->imo_num_memberships = 0;
 1451                 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
 1452                 imo->imo_membership = immp;
 1453         }
 1454 
 1455         switch (optname) {
 1456 
 1457         case IP_MULTICAST_IF:
 1458                 /*
 1459                  * Select the interface for outgoing multicast packets.
 1460                  */
 1461                 if (m == NULL) {
 1462                         error = EINVAL;
 1463                         break;
 1464                 }
 1465                 if (m->m_len == sizeof(struct in_addr)) {
 1466                         addr = *(mtod(m, struct in_addr *));
 1467                 } else if (m->m_len == sizeof(struct ip_mreq) ||
 1468                     m->m_len == sizeof(struct ip_mreqn)) {
 1469                         memset(&mreqn, 0, sizeof(mreqn));
 1470                         memcpy(&mreqn, mtod(m, void *), m->m_len);
 1471 
 1472                         /*
 1473                          * If an interface index is given use this
 1474                          * index to set the imo_ifidx but check first
 1475                          * that the interface actually exists.
 1476                          * In the other case just set the addr to
 1477                          * the imr_address and fall through to the
 1478                          * regular code.
 1479                          */
 1480                         if (mreqn.imr_ifindex != 0) {
 1481                                 ifp = if_get(mreqn.imr_ifindex);
 1482                                 if (ifp == NULL ||
 1483                                     ifp->if_rdomain != rtable_l2(rtableid)) {
 1484                                         error = EADDRNOTAVAIL;
 1485                                         if_put(ifp);
 1486                                         break;
 1487                                 }
 1488                                 imo->imo_ifidx = ifp->if_index;
 1489                                 if_put(ifp);
 1490                                 break;
 1491                         } else
 1492                                 addr = mreqn.imr_address;
 1493                 } else {
 1494                         error = EINVAL;
 1495                         break;
 1496                 }
 1497                 /*
 1498                  * INADDR_ANY is used to remove a previous selection.
 1499                  * When no interface is selected, a default one is
 1500                  * chosen every time a multicast packet is sent.
 1501                  */
 1502                 if (addr.s_addr == INADDR_ANY) {
 1503                         imo->imo_ifidx = 0;
 1504                         break;
 1505                 }
 1506                 /*
 1507                  * The selected interface is identified by its local
 1508                  * IP address.  Find the interface and confirm that
 1509                  * it supports multicasting.
 1510                  */
 1511                 memset(&sin, 0, sizeof(sin));
 1512                 sin.sin_len = sizeof(sin);
 1513                 sin.sin_family = AF_INET;
 1514                 sin.sin_addr = addr;
 1515                 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid));
 1516                 if (ia == NULL ||
 1517                     (ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) {
 1518                         error = EADDRNOTAVAIL;
 1519                         break;
 1520                 }
 1521                 imo->imo_ifidx = ia->ia_ifp->if_index;
 1522                 break;
 1523 
 1524         case IP_MULTICAST_TTL:
 1525                 /*
 1526                  * Set the IP time-to-live for outgoing multicast packets.
 1527                  */
 1528                 if (m == NULL || m->m_len != 1) {
 1529                         error = EINVAL;
 1530                         break;
 1531                 }
 1532                 imo->imo_ttl = *(mtod(m, u_char *));
 1533                 break;
 1534 
 1535         case IP_MULTICAST_LOOP:
 1536                 /*
 1537                  * Set the loopback flag for outgoing multicast packets.
 1538                  * Must be zero or one.
 1539                  */
 1540                 if (m == NULL || m->m_len != 1 ||
 1541                    (loop = *(mtod(m, u_char *))) > 1) {
 1542                         error = EINVAL;
 1543                         break;
 1544                 }
 1545                 imo->imo_loop = loop;
 1546                 break;
 1547 
 1548         case IP_ADD_MEMBERSHIP:
 1549                 /*
 1550                  * Add a multicast group membership.
 1551                  * Group must be a valid IP multicast address.
 1552                  */
 1553                 if (m == NULL || !(m->m_len == sizeof(struct ip_mreq) ||
 1554                     m->m_len == sizeof(struct ip_mreqn))) {
 1555                         error = EINVAL;
 1556                         break;
 1557                 }
 1558                 memset(&mreqn, 0, sizeof(mreqn));
 1559                 memcpy(&mreqn, mtod(m, void *), m->m_len);
 1560                 if (!IN_MULTICAST(mreqn.imr_multiaddr.s_addr)) {
 1561                         error = EINVAL;
 1562                         break;
 1563                 }
 1564 
 1565                 error = ip_multicast_if(&mreqn, rtableid, &ifidx);
 1566                 if (error)
 1567                         break;
 1568 
 1569                 /*
 1570                  * See if we found an interface, and confirm that it
 1571                  * supports multicast.
 1572                  */
 1573                 ifp = if_get(ifidx);
 1574                 if (ifp == NULL || ifp->if_rdomain != rtable_l2(rtableid) ||
 1575                     (ifp->if_flags & IFF_MULTICAST) == 0) {
 1576                         error = EADDRNOTAVAIL;
 1577                         if_put(ifp);
 1578                         break;
 1579                 }
 1580 
 1581                 /*
 1582                  * See if the membership already exists or if all the
 1583                  * membership slots are full.
 1584                  */
 1585                 for (i = 0; i < imo->imo_num_memberships; ++i) {
 1586                         if (imo->imo_membership[i]->inm_ifidx == ifidx &&
 1587                             imo->imo_membership[i]->inm_addr.s_addr
 1588                                                 == mreqn.imr_multiaddr.s_addr)
 1589                                 break;
 1590                 }
 1591                 if (i < imo->imo_num_memberships) {
 1592                         error = EADDRINUSE;
 1593                         if_put(ifp);
 1594                         break;
 1595                 }
 1596                 if (imo->imo_num_memberships == imo->imo_max_memberships) {
 1597                         struct in_multi **nmships, **omships;
 1598                         size_t newmax;
 1599                         /*
 1600                          * Resize the vector to next power-of-two minus 1. If
 1601                          * the size would exceed the maximum then we know we've
 1602                          * really run out of entries. Otherwise, we reallocate
 1603                          * the vector.
 1604                          */
 1605                         nmships = NULL;
 1606                         omships = imo->imo_membership;
 1607                         newmax = ((imo->imo_max_memberships + 1) * 2) - 1;
 1608                         if (newmax <= IP_MAX_MEMBERSHIPS) {
 1609                                 nmships = mallocarray(newmax, sizeof(*nmships),
 1610                                     M_IPMOPTS, M_NOWAIT|M_ZERO);
 1611                                 if (nmships != NULL) {
 1612                                         memcpy(nmships, omships,
 1613                                             sizeof(*omships) *
 1614                                             imo->imo_max_memberships);
 1615                                         free(omships, M_IPMOPTS,
 1616                                             sizeof(*omships) *
 1617                                             imo->imo_max_memberships);
 1618                                         imo->imo_membership = nmships;
 1619                                         imo->imo_max_memberships = newmax;
 1620                                 }
 1621                         }
 1622                         if (nmships == NULL) {
 1623                                 error = ENOBUFS;
 1624                                 if_put(ifp);
 1625                                 break;
 1626                         }
 1627                 }
 1628                 /*
 1629                  * Everything looks good; add a new record to the multicast
 1630                  * address list for the given interface.
 1631                  */
 1632                 if ((imo->imo_membership[i] =
 1633                     in_addmulti(&mreqn.imr_multiaddr, ifp)) == NULL) {
 1634                         error = ENOBUFS;
 1635                         if_put(ifp);
 1636                         break;
 1637                 }
 1638                 ++imo->imo_num_memberships;
 1639                 if_put(ifp);
 1640                 break;
 1641 
 1642         case IP_DROP_MEMBERSHIP:
 1643                 /*
 1644                  * Drop a multicast group membership.
 1645                  * Group must be a valid IP multicast address.
 1646                  */
 1647                 if (m == NULL || !(m->m_len == sizeof(struct ip_mreq) ||
 1648                     m->m_len == sizeof(struct ip_mreqn))) {
 1649                         error = EINVAL;
 1650                         break;
 1651                 }
 1652                 memset(&mreqn, 0, sizeof(mreqn));
 1653                 memcpy(&mreqn, mtod(m, void *), m->m_len);
 1654                 if (!IN_MULTICAST(mreqn.imr_multiaddr.s_addr)) {
 1655                         error = EINVAL;
 1656                         break;
 1657                 }
 1658 
 1659                 /*
 1660                  * If an interface address was specified, get a pointer
 1661                  * to its ifnet structure.
 1662                  */
 1663                 error = ip_multicast_if(&mreqn, rtableid, &ifidx);
 1664                 if (error)
 1665                         break;
 1666 
 1667                 /*
 1668                  * Find the membership in the membership array.
 1669                  */
 1670                 for (i = 0; i < imo->imo_num_memberships; ++i) {
 1671                         if ((ifidx == 0 ||
 1672                             imo->imo_membership[i]->inm_ifidx == ifidx) &&
 1673                              imo->imo_membership[i]->inm_addr.s_addr ==
 1674                              mreqn.imr_multiaddr.s_addr)
 1675                                 break;
 1676                 }
 1677                 if (i == imo->imo_num_memberships) {
 1678                         error = EADDRNOTAVAIL;
 1679                         break;
 1680                 }
 1681                 /*
 1682                  * Give up the multicast address record to which the
 1683                  * membership points.
 1684                  */
 1685                 in_delmulti(imo->imo_membership[i]);
 1686                 /*
 1687                  * Remove the gap in the membership array.
 1688                  */
 1689                 for (++i; i < imo->imo_num_memberships; ++i)
 1690                         imo->imo_membership[i-1] = imo->imo_membership[i];
 1691                 --imo->imo_num_memberships;
 1692                 break;
 1693 
 1694         default:
 1695                 error = EOPNOTSUPP;
 1696                 break;
 1697         }
 1698 
 1699         /*
 1700          * If all options have default values, no need to keep the data.
 1701          */
 1702         if (imo->imo_ifidx == 0 &&
 1703             imo->imo_ttl == IP_DEFAULT_MULTICAST_TTL &&
 1704             imo->imo_loop == IP_DEFAULT_MULTICAST_LOOP &&
 1705             imo->imo_num_memberships == 0) {
 1706                 free(imo->imo_membership , M_IPMOPTS,
 1707                     imo->imo_max_memberships * sizeof(struct in_multi *));
 1708                 free(*imop, M_IPMOPTS, sizeof(**imop));
 1709                 *imop = NULL;
 1710         }
 1711 
 1712         return (error);
 1713 }
 1714 
 1715 /*
 1716  * Return the IP multicast options in response to user getsockopt().
 1717  */
 1718 int
 1719 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf *m)
 1720 {
 1721         u_char *ttl;
 1722         u_char *loop;
 1723         struct in_addr *addr;
 1724         struct in_ifaddr *ia;
 1725         struct ifnet *ifp;
 1726 
 1727         switch (optname) {
 1728 
 1729         case IP_MULTICAST_IF:
 1730                 addr = mtod(m, struct in_addr *);
 1731                 m->m_len = sizeof(struct in_addr);
 1732                 if (imo == NULL || (ifp = if_get(imo->imo_ifidx)) == NULL)
 1733                         addr->s_addr = INADDR_ANY;
 1734                 else {
 1735                         IFP_TO_IA(ifp, ia);
 1736                         addr->s_addr = (ia == NULL) ? INADDR_ANY
 1737                                         : ia->ia_addr.sin_addr.s_addr;
 1738                         if_put(ifp);
 1739                 }
 1740                 return (0);
 1741 
 1742         case IP_MULTICAST_TTL:
 1743                 ttl = mtod(m, u_char *);
 1744                 m->m_len = 1;
 1745                 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
 1746                                      : imo->imo_ttl;
 1747                 return (0);
 1748 
 1749         case IP_MULTICAST_LOOP:
 1750                 loop = mtod(m, u_char *);
 1751                 m->m_len = 1;
 1752                 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
 1753                                       : imo->imo_loop;
 1754                 return (0);
 1755 
 1756         default:
 1757                 return (EOPNOTSUPP);
 1758         }
 1759 }
 1760 
 1761 /*
 1762  * Discard the IP multicast options.
 1763  */
 1764 void
 1765 ip_freemoptions(struct ip_moptions *imo)
 1766 {
 1767         int i;
 1768 
 1769         if (imo != NULL) {
 1770                 for (i = 0; i < imo->imo_num_memberships; ++i)
 1771                         in_delmulti(imo->imo_membership[i]);
 1772                 free(imo->imo_membership, M_IPMOPTS,
 1773                     imo->imo_max_memberships * sizeof(struct in_multi *));
 1774                 free(imo, M_IPMOPTS, sizeof(*imo));
 1775         }
 1776 }
 1777 
 1778 /*
 1779  * Routine called from ip_output() to loop back a copy of an IP multicast
 1780  * packet to the input queue of a specified interface.
 1781  */
 1782 void
 1783 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst)
 1784 {
 1785         struct ip *ip;
 1786         struct mbuf *copym;
 1787 
 1788         copym = m_dup_pkt(m, max_linkhdr, M_DONTWAIT);
 1789         if (copym != NULL) {
 1790                 /*
 1791                  * We don't bother to fragment if the IP length is greater
 1792                  * than the interface's MTU.  Can this possibly matter?
 1793                  */
 1794                 ip = mtod(copym, struct ip *);
 1795                 ip->ip_sum = 0;
 1796                 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
 1797                 if_input_local(ifp, copym, dst->sin_family);
 1798         }
 1799 }
 1800 
 1801 /*
 1802  *      Compute significant parts of the IPv4 checksum pseudo-header
 1803  *      for use in a delayed TCP/UDP checksum calculation.
 1804  */
 1805 static __inline u_int16_t __attribute__((__unused__))
 1806 in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto)
 1807 {
 1808         u_int32_t sum;
 1809 
 1810         sum = lenproto +
 1811               (u_int16_t)(src >> 16) +
 1812               (u_int16_t)(src /*& 0xffff*/) +
 1813               (u_int16_t)(dst >> 16) +
 1814               (u_int16_t)(dst /*& 0xffff*/);
 1815 
 1816         sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/);
 1817 
 1818         if (sum > 0xffff)
 1819                 sum -= 0xffff;
 1820 
 1821         return (sum);
 1822 }
 1823 
 1824 /*
 1825  * Process a delayed payload checksum calculation.
 1826  */
 1827 void
 1828 in_delayed_cksum(struct mbuf *m)
 1829 {
 1830         struct ip *ip;
 1831         u_int16_t csum, offset;
 1832 
 1833         ip = mtod(m, struct ip *);
 1834         offset = ip->ip_hl << 2;
 1835         csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset);
 1836         if (csum == 0 && ip->ip_p == IPPROTO_UDP)
 1837                 csum = 0xffff;
 1838 
 1839         switch (ip->ip_p) {
 1840         case IPPROTO_TCP:
 1841                 offset += offsetof(struct tcphdr, th_sum);
 1842                 break;
 1843 
 1844         case IPPROTO_UDP:
 1845                 offset += offsetof(struct udphdr, uh_sum);
 1846                 break;
 1847 
 1848         case IPPROTO_ICMP:
 1849                 offset += offsetof(struct icmp, icmp_cksum);
 1850                 break;
 1851 
 1852         default:
 1853                 return;
 1854         }
 1855 
 1856         if ((offset + sizeof(u_int16_t)) > m->m_len)
 1857                 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT);
 1858         else
 1859                 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
 1860 }
 1861 
 1862 void
 1863 in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp)
 1864 {
 1865         struct ip *ip = mtod(m, struct ip *);
 1866 
 1867         /* some hw and in_delayed_cksum need the pseudo header cksum */
 1868         if (m->m_pkthdr.csum_flags &
 1869             (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) {
 1870                 u_int16_t csum = 0, offset;
 1871 
 1872                 offset = ip->ip_hl << 2;
 1873                 if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
 1874                         csum = in_cksum_phdr(ip->ip_src.s_addr,
 1875                             ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) -
 1876                             offset + ip->ip_p));
 1877                 if (ip->ip_p == IPPROTO_TCP)
 1878                         offset += offsetof(struct tcphdr, th_sum);
 1879                 else if (ip->ip_p == IPPROTO_UDP)
 1880                         offset += offsetof(struct udphdr, uh_sum);
 1881                 else if (ip->ip_p == IPPROTO_ICMP)
 1882                         offset += offsetof(struct icmp, icmp_cksum);
 1883                 if ((offset + sizeof(u_int16_t)) > m->m_len)
 1884                         m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT);
 1885                 else
 1886                         *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
 1887         }
 1888 
 1889         if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
 1890                 if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_TCPv4) ||
 1891                     ip->ip_hl != 5) {
 1892                         tcpstat_inc(tcps_outswcsum);
 1893                         in_delayed_cksum(m);
 1894                         m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */
 1895                 }
 1896         } else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
 1897                 if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_UDPv4) ||
 1898                     ip->ip_hl != 5) {
 1899                         udpstat_inc(udps_outswcsum);
 1900                         in_delayed_cksum(m);
 1901                         m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */
 1902                 }
 1903         } else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) {
 1904                 in_delayed_cksum(m);
 1905                 m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */
 1906         }
 1907 }
 1908 
 1909 int
 1910 in_ifcap_cksum(struct mbuf *m, struct ifnet *ifp, int ifcap)
 1911 {
 1912         if ((ifp == NULL) ||
 1913             !ISSET(ifp->if_capabilities, ifcap) ||
 1914             (ifp->if_bridgeidx != 0))
 1915                 return (0);
 1916         /*
 1917          * Simplex interface sends packet back without hardware cksum.
 1918          * Keep this check in sync with the condition where ether_resolve()
 1919          * calls if_input_local().
 1920          */
 1921         if (ISSET(m->m_flags, M_BCAST) &&
 1922             ISSET(ifp->if_flags, IFF_SIMPLEX) &&
 1923             !m->m_pkthdr.pf.routed)
 1924                 return (0);
 1925         return (1);
 1926 }

Cache object: ad4acf3e2d1d152308066bc93ce7e0b0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.