The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_output.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include "opt_inet.h"
   38 #include "opt_ratelimit.h"
   39 #include "opt_ipsec.h"
   40 #include "opt_mbuf_stress_test.h"
   41 #include "opt_mpath.h"
   42 #include "opt_route.h"
   43 #include "opt_sctp.h"
   44 #include "opt_rss.h"
   45 
   46 #include <sys/param.h>
   47 #include <sys/systm.h>
   48 #include <sys/kernel.h>
   49 #include <sys/lock.h>
   50 #include <sys/malloc.h>
   51 #include <sys/mbuf.h>
   52 #include <sys/priv.h>
   53 #include <sys/proc.h>
   54 #include <sys/protosw.h>
   55 #include <sys/rmlock.h>
   56 #include <sys/sdt.h>
   57 #include <sys/socket.h>
   58 #include <sys/socketvar.h>
   59 #include <sys/sysctl.h>
   60 #include <sys/ucred.h>
   61 
   62 #include <net/if.h>
   63 #include <net/if_var.h>
   64 #include <net/if_vlan_var.h>
   65 #include <net/if_llatbl.h>
   66 #include <net/ethernet.h>
   67 #include <net/netisr.h>
   68 #include <net/pfil.h>
   69 #include <net/route.h>
   70 #ifdef RADIX_MPATH
   71 #include <net/radix_mpath.h>
   72 #endif
   73 #include <net/rss_config.h>
   74 #include <net/vnet.h>
   75 
   76 #include <netinet/in.h>
   77 #include <netinet/in_kdtrace.h>
   78 #include <netinet/in_systm.h>
   79 #include <netinet/ip.h>
   80 #include <netinet/in_pcb.h>
   81 #include <netinet/in_rss.h>
   82 #include <netinet/in_var.h>
   83 #include <netinet/ip_var.h>
   84 #include <netinet/ip_options.h>
   85 
   86 #include <netinet/udp.h>
   87 #include <netinet/udp_var.h>
   88 
   89 #if defined(SCTP) || defined(SCTP_SUPPORT)
   90 #include <netinet/sctp.h>
   91 #include <netinet/sctp_crc32.h>
   92 #endif
   93 
   94 #include <netipsec/ipsec_support.h>
   95 
   96 #include <machine/in_cksum.h>
   97 
   98 #include <security/mac/mac_framework.h>
   99 
  100 #ifdef MBUF_STRESS_TEST
  101 static int mbuf_frag_size = 0;
  102 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
  103         &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
  104 #endif
  105 
  106 static void     ip_mloopback(struct ifnet *, const struct mbuf *, int);
  107 
  108 
  109 extern int in_mcast_loop;
  110 extern  struct protosw inetsw[];
  111 
  112 static inline int
  113 ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp,
  114     struct sockaddr_in *dst, int *fibnum, int *error)
  115 {
  116         struct m_tag *fwd_tag = NULL;
  117         struct mbuf *m;
  118         struct in_addr odst;
  119         struct ip *ip;
  120 
  121         m = *mp;
  122         ip = mtod(m, struct ip *);
  123 
  124         /* Run through list of hooks for output packets. */
  125         odst.s_addr = ip->ip_dst.s_addr;
  126         *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, 0, inp);
  127         m = *mp;
  128         if ((*error) != 0 || m == NULL)
  129                 return 1; /* Finished */
  130 
  131         ip = mtod(m, struct ip *);
  132 
  133         /* See if destination IP address was changed by packet filter. */
  134         if (odst.s_addr != ip->ip_dst.s_addr) {
  135                 m->m_flags |= M_SKIP_FIREWALL;
  136                 /* If destination is now ourself drop to ip_input(). */
  137                 if (in_localip(ip->ip_dst)) {
  138                         m->m_flags |= M_FASTFWD_OURS;
  139                         if (m->m_pkthdr.rcvif == NULL)
  140                                 m->m_pkthdr.rcvif = V_loif;
  141                         if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
  142                                 m->m_pkthdr.csum_flags |=
  143                                         CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  144                                 m->m_pkthdr.csum_data = 0xffff;
  145                         }
  146                         m->m_pkthdr.csum_flags |=
  147                                 CSUM_IP_CHECKED | CSUM_IP_VALID;
  148 #if defined(SCTP) || defined(SCTP_SUPPORT)
  149                         if (m->m_pkthdr.csum_flags & CSUM_SCTP)
  150                                 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
  151 #endif
  152                         *error = netisr_queue(NETISR_IP, m);
  153                         return 1; /* Finished */
  154                 }
  155 
  156                 bzero(dst, sizeof(*dst));
  157                 dst->sin_family = AF_INET;
  158                 dst->sin_len = sizeof(*dst);
  159                 dst->sin_addr = ip->ip_dst;
  160 
  161                 return -1; /* Reloop */
  162         }
  163         /* See if fib was changed by packet filter. */
  164         if ((*fibnum) != M_GETFIB(m)) {
  165                 m->m_flags |= M_SKIP_FIREWALL;
  166                 *fibnum = M_GETFIB(m);
  167                 return -1; /* Reloop for FIB change */
  168         }
  169 
  170         /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
  171         if (m->m_flags & M_FASTFWD_OURS) {
  172                 if (m->m_pkthdr.rcvif == NULL)
  173                         m->m_pkthdr.rcvif = V_loif;
  174                 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
  175                         m->m_pkthdr.csum_flags |=
  176                                 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  177                         m->m_pkthdr.csum_data = 0xffff;
  178                 }
  179 #if defined(SCTP) || defined(SCTP_SUPPORT)
  180                 if (m->m_pkthdr.csum_flags & CSUM_SCTP)
  181                         m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
  182 #endif
  183                 m->m_pkthdr.csum_flags |=
  184                         CSUM_IP_CHECKED | CSUM_IP_VALID;
  185 
  186                 *error = netisr_queue(NETISR_IP, m);
  187                 return 1; /* Finished */
  188         }
  189         /* Or forward to some other address? */
  190         if ((m->m_flags & M_IP_NEXTHOP) &&
  191             ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
  192                 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
  193                 m->m_flags |= M_SKIP_FIREWALL;
  194                 m->m_flags &= ~M_IP_NEXTHOP;
  195                 m_tag_delete(m, fwd_tag);
  196 
  197                 return -1; /* Reloop for CHANGE of dst */
  198         }
  199 
  200         return 0;
  201 }
  202 
  203 /*
  204  * IP output.  The packet in mbuf chain m contains a skeletal IP
  205  * header (with len, off, ttl, proto, tos, src, dst).
  206  * The mbuf chain containing the packet will be freed.
  207  * The mbuf opt, if present, will not be freed.
  208  * If route ro is present and has ro_rt initialized, route lookup would be
  209  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  210  * then result of route lookup is stored in ro->ro_rt.
  211  *
  212  * In the IP forwarding case, the packet will arrive with options already
  213  * inserted, so must have a NULL opt pointer.
  214  */
  215 int
  216 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
  217     struct ip_moptions *imo, struct inpcb *inp)
  218 {
  219         struct rm_priotracker in_ifa_tracker;
  220         struct ip *ip;
  221         struct ifnet *ifp = NULL;       /* keep compiler happy */
  222         struct mbuf *m0;
  223         int hlen = sizeof (struct ip);
  224         int mtu;
  225         int error = 0;
  226         int vlan_pcp = -1;
  227         struct sockaddr_in *dst;
  228         const struct sockaddr_in *gw;
  229         struct in_ifaddr *ia;
  230         int isbroadcast;
  231         uint16_t ip_len, ip_off;
  232         struct route iproute;
  233         struct rtentry *rte;    /* cache for ro->ro_rt */
  234         uint32_t fibnum;
  235 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  236         int no_route_but_check_spd = 0;
  237 #endif
  238         M_ASSERTPKTHDR(m);
  239 
  240         if (inp != NULL) {
  241                 INP_LOCK_ASSERT(inp);
  242                 M_SETFIB(m, inp->inp_inc.inc_fibnum);
  243                 if ((flags & IP_NODEFAULTFLOWID) == 0) {
  244                         m->m_pkthdr.flowid = inp->inp_flowid;
  245                         M_HASHTYPE_SET(m, inp->inp_flowtype);
  246                 }
  247                 if ((inp->inp_flags2 & INP_2PCP_SET) != 0)
  248                         vlan_pcp = (inp->inp_flags2 & INP_2PCP_MASK) >>
  249                             INP_2PCP_SHIFT;
  250         }
  251 
  252         if (ro == NULL) {
  253                 ro = &iproute;
  254                 bzero(ro, sizeof (*ro));
  255         }
  256 
  257         if (opt) {
  258                 int len = 0;
  259                 m = ip_insertoptions(m, opt, &len);
  260                 if (len != 0)
  261                         hlen = len; /* ip->ip_hl is updated above */
  262         }
  263         ip = mtod(m, struct ip *);
  264         ip_len = ntohs(ip->ip_len);
  265         ip_off = ntohs(ip->ip_off);
  266 
  267         if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
  268                 ip->ip_v = IPVERSION;
  269                 ip->ip_hl = hlen >> 2;
  270                 ip_fillid(ip);
  271         } else {
  272                 /* Header already set, fetch hlen from there */
  273                 hlen = ip->ip_hl << 2;
  274         }
  275         if ((flags & IP_FORWARDING) == 0)
  276                 IPSTAT_INC(ips_localout);
  277 
  278         /*
  279          * dst/gw handling:
  280          *
  281          * dst can be rewritten but always points to &ro->ro_dst.
  282          * gw is readonly but can point either to dst OR rt_gateway,
  283          * therefore we need restore gw if we're redoing lookup.
  284          */
  285         gw = dst = (struct sockaddr_in *)&ro->ro_dst;
  286         fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
  287         rte = ro->ro_rt;
  288         if (rte == NULL) {
  289                 bzero(dst, sizeof(*dst));
  290                 dst->sin_family = AF_INET;
  291                 dst->sin_len = sizeof(*dst);
  292                 dst->sin_addr = ip->ip_dst;
  293         }
  294         NET_EPOCH_ENTER();
  295 again:
  296         /*
  297          * Validate route against routing table additions;
  298          * a better/more specific route might have been added.
  299          */
  300         if (inp)
  301                 RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
  302         /*
  303          * If there is a cached route,
  304          * check that it is to the same destination
  305          * and is still up.  If not, free it and try again.
  306          * The address family should also be checked in case of sharing the
  307          * cache with IPv6.
  308          * Also check whether routing cache needs invalidation.
  309          */
  310         rte = ro->ro_rt;
  311         if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
  312                     rte->rt_ifp == NULL ||
  313                     !RT_LINK_IS_UP(rte->rt_ifp) ||
  314                           dst->sin_family != AF_INET ||
  315                           dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
  316                 RO_INVALIDATE_CACHE(ro);
  317                 rte = NULL;
  318         }
  319         ia = NULL;
  320         /*
  321          * If routing to interface only, short circuit routing lookup.
  322          * The use of an all-ones broadcast address implies this; an
  323          * interface is specified by the broadcast address of an interface,
  324          * or the destination address of a ptp interface.
  325          */
  326         if (flags & IP_SENDONES) {
  327                 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst),
  328                                                       M_GETFIB(m)))) == NULL &&
  329                     (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
  330                                                     M_GETFIB(m)))) == NULL) {
  331                         IPSTAT_INC(ips_noroute);
  332                         error = ENETUNREACH;
  333                         goto bad;
  334                 }
  335                 ip->ip_dst.s_addr = INADDR_BROADCAST;
  336                 dst->sin_addr = ip->ip_dst;
  337                 ifp = ia->ia_ifp;
  338                 ip->ip_ttl = 1;
  339                 isbroadcast = 1;
  340         } else if (flags & IP_ROUTETOIF) {
  341                 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
  342                                                     M_GETFIB(m)))) == NULL &&
  343                     (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0,
  344                                                 M_GETFIB(m)))) == NULL) {
  345                         IPSTAT_INC(ips_noroute);
  346                         error = ENETUNREACH;
  347                         goto bad;
  348                 }
  349                 ifp = ia->ia_ifp;
  350                 ip->ip_ttl = 1;
  351                 isbroadcast = ifp->if_flags & IFF_BROADCAST ?
  352                     in_ifaddr_broadcast(dst->sin_addr, ia) : 0;
  353         } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
  354             imo != NULL && imo->imo_multicast_ifp != NULL) {
  355                 /*
  356                  * Bypass the normal routing lookup for multicast
  357                  * packets if the interface is specified.
  358                  */
  359                 ifp = imo->imo_multicast_ifp;
  360                 IFP_TO_IA(ifp, ia, &in_ifa_tracker);
  361                 isbroadcast = 0;        /* fool gcc */
  362         } else {
  363                 /*
  364                  * We want to do any cloning requested by the link layer,
  365                  * as this is probably required in all cases for correct
  366                  * operation (as it is for ARP).
  367                  */
  368                 if (rte == NULL) {
  369 #ifdef RADIX_MPATH
  370                         rtalloc_mpath_fib(ro,
  371                             ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
  372                             fibnum);
  373 #else
  374                         in_rtalloc_ign(ro, 0, fibnum);
  375 #endif
  376                         rte = ro->ro_rt;
  377                 }
  378                 if (rte == NULL ||
  379                     (rte->rt_flags & RTF_UP) == 0 ||
  380                     rte->rt_ifp == NULL ||
  381                     !RT_LINK_IS_UP(rte->rt_ifp)) {
  382 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  383                         /*
  384                          * There is no route for this packet, but it is
  385                          * possible that a matching SPD entry exists.
  386                          */
  387                         no_route_but_check_spd = 1;
  388                         mtu = 0; /* Silence GCC warning. */
  389                         goto sendit;
  390 #endif
  391                         IPSTAT_INC(ips_noroute);
  392                         error = EHOSTUNREACH;
  393                         goto bad;
  394                 }
  395                 ia = ifatoia(rte->rt_ifa);
  396                 ifp = rte->rt_ifp;
  397                 counter_u64_add(rte->rt_pksent, 1);
  398                 rt_update_ro_flags(ro);
  399                 if (rte->rt_flags & RTF_GATEWAY)
  400                         gw = (struct sockaddr_in *)rte->rt_gateway;
  401                 if (rte->rt_flags & RTF_HOST)
  402                         isbroadcast = (rte->rt_flags & RTF_BROADCAST);
  403                 else if (ifp->if_flags & IFF_BROADCAST)
  404                         isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
  405                 else
  406                         isbroadcast = 0;
  407         }
  408 
  409         /*
  410          * Calculate MTU.  If we have a route that is up, use that,
  411          * otherwise use the interface's MTU.
  412          */
  413         if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
  414                 mtu = rte->rt_mtu;
  415         else
  416                 mtu = ifp->if_mtu;
  417         /* Catch a possible divide by zero later. */
  418         KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
  419             __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
  420 
  421         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
  422                 m->m_flags |= M_MCAST;
  423                 /*
  424                  * IP destination address is multicast.  Make sure "gw"
  425                  * still points to the address in "ro".  (It may have been
  426                  * changed to point to a gateway address, above.)
  427                  */
  428                 gw = dst;
  429                 /*
  430                  * See if the caller provided any multicast options
  431                  */
  432                 if (imo != NULL) {
  433                         ip->ip_ttl = imo->imo_multicast_ttl;
  434                         if (imo->imo_multicast_vif != -1)
  435                                 ip->ip_src.s_addr =
  436                                     ip_mcast_src ?
  437                                     ip_mcast_src(imo->imo_multicast_vif) :
  438                                     INADDR_ANY;
  439                 } else
  440                         ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
  441                 /*
  442                  * Confirm that the outgoing interface supports multicast.
  443                  */
  444                 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
  445                         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
  446                                 IPSTAT_INC(ips_noroute);
  447                                 error = ENETUNREACH;
  448                                 goto bad;
  449                         }
  450                 }
  451                 /*
  452                  * If source address not specified yet, use address
  453                  * of outgoing interface.
  454                  */
  455                 if (ip->ip_src.s_addr == INADDR_ANY) {
  456                         /* Interface may have no addresses. */
  457                         if (ia != NULL)
  458                                 ip->ip_src = IA_SIN(ia)->sin_addr;
  459                 }
  460 
  461                 if ((imo == NULL && in_mcast_loop) ||
  462                     (imo && imo->imo_multicast_loop)) {
  463                         /*
  464                          * Loop back multicast datagram if not expressly
  465                          * forbidden to do so, even if we are not a member
  466                          * of the group; ip_input() will filter it later,
  467                          * thus deferring a hash lookup and mutex acquisition
  468                          * at the expense of a cheap copy using m_copym().
  469                          */
  470                         ip_mloopback(ifp, m, hlen);
  471                 } else {
  472                         /*
  473                          * If we are acting as a multicast router, perform
  474                          * multicast forwarding as if the packet had just
  475                          * arrived on the interface to which we are about
  476                          * to send.  The multicast forwarding function
  477                          * recursively calls this function, using the
  478                          * IP_FORWARDING flag to prevent infinite recursion.
  479                          *
  480                          * Multicasts that are looped back by ip_mloopback(),
  481                          * above, will be forwarded by the ip_input() routine,
  482                          * if necessary.
  483                          */
  484                         if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
  485                                 /*
  486                                  * If rsvp daemon is not running, do not
  487                                  * set ip_moptions. This ensures that the packet
  488                                  * is multicast and not just sent down one link
  489                                  * as prescribed by rsvpd.
  490                                  */
  491                                 if (!V_rsvp_on)
  492                                         imo = NULL;
  493                                 if (ip_mforward &&
  494                                     ip_mforward(ip, ifp, m, imo) != 0) {
  495                                         m_freem(m);
  496                                         goto done;
  497                                 }
  498                         }
  499                 }
  500 
  501                 /*
  502                  * Multicasts with a time-to-live of zero may be looped-
  503                  * back, above, but must not be transmitted on a network.
  504                  * Also, multicasts addressed to the loopback interface
  505                  * are not sent -- the above call to ip_mloopback() will
  506                  * loop back a copy. ip_input() will drop the copy if
  507                  * this host does not belong to the destination group on
  508                  * the loopback interface.
  509                  */
  510                 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
  511                         m_freem(m);
  512                         goto done;
  513                 }
  514 
  515                 goto sendit;
  516         }
  517 
  518         /*
  519          * If the source address is not specified yet, use the address
  520          * of the outoing interface.
  521          */
  522         if (ip->ip_src.s_addr == INADDR_ANY) {
  523                 /* Interface may have no addresses. */
  524                 if (ia != NULL) {
  525                         ip->ip_src = IA_SIN(ia)->sin_addr;
  526                 }
  527         }
  528 
  529         /*
  530          * Look for broadcast address and
  531          * verify user is allowed to send
  532          * such a packet.
  533          */
  534         if (isbroadcast) {
  535                 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
  536                         error = EADDRNOTAVAIL;
  537                         goto bad;
  538                 }
  539                 if ((flags & IP_ALLOWBROADCAST) == 0) {
  540                         error = EACCES;
  541                         goto bad;
  542                 }
  543                 /* don't allow broadcast messages to be fragmented */
  544                 if (ip_len > mtu) {
  545                         error = EMSGSIZE;
  546                         goto bad;
  547                 }
  548                 m->m_flags |= M_BCAST;
  549         } else {
  550                 m->m_flags &= ~M_BCAST;
  551         }
  552 
  553 sendit:
  554 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  555         if (IPSEC_ENABLED(ipv4)) {
  556                 if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) {
  557                         if (error == EINPROGRESS)
  558                                 error = 0;
  559                         goto done;
  560                 }
  561         }
  562         /*
  563          * Check if there was a route for this packet; return error if not.
  564          */
  565         if (no_route_but_check_spd) {
  566                 IPSTAT_INC(ips_noroute);
  567                 error = EHOSTUNREACH;
  568                 goto bad;
  569         }
  570         /* Update variables that are affected by ipsec4_output(). */
  571         ip = mtod(m, struct ip *);
  572         hlen = ip->ip_hl << 2;
  573 #endif /* IPSEC */
  574 
  575         /* Jump over all PFIL processing if hooks are not active. */
  576         if (PFIL_HOOKED(&V_inet_pfil_hook)) {
  577                 switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) {
  578                 case 1: /* Finished */
  579                         goto done;
  580 
  581                 case 0: /* Continue normally */
  582                         ip = mtod(m, struct ip *);
  583                         break;
  584 
  585                 case -1: /* Need to try again */
  586                         /* Reset everything for a new round */
  587                         RO_RTFREE(ro);
  588                         ro->ro_prepend = NULL;
  589                         rte = NULL;
  590                         gw = dst;
  591                         ip = mtod(m, struct ip *);
  592                         goto again;
  593 
  594                 }
  595         }
  596 
  597         if (vlan_pcp > -1)
  598                 EVL_APPLY_PRI(m, vlan_pcp);
  599 
  600         /* IN_LOOPBACK must not appear on the wire - RFC1122. */
  601         if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
  602             IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
  603                 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
  604                         IPSTAT_INC(ips_badaddr);
  605                         error = EADDRNOTAVAIL;
  606                         goto bad;
  607                 }
  608         }
  609 
  610         m->m_pkthdr.csum_flags |= CSUM_IP;
  611         if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
  612                 in_delayed_cksum(m);
  613                 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
  614         }
  615 #if defined(SCTP) || defined(SCTP_SUPPORT)
  616         if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
  617                 sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
  618                 m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
  619         }
  620 #endif
  621 
  622         /*
  623          * If small enough for interface, or the interface will take
  624          * care of the fragmentation for us, we can just send directly.
  625          */
  626         if (ip_len <= mtu ||
  627             (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
  628                 ip->ip_sum = 0;
  629                 if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
  630                         ip->ip_sum = in_cksum(m, hlen);
  631                         m->m_pkthdr.csum_flags &= ~CSUM_IP;
  632                 }
  633 
  634                 /*
  635                  * Record statistics for this interface address.
  636                  * With CSUM_TSO the byte/packet count will be slightly
  637                  * incorrect because we count the IP+TCP headers only
  638                  * once instead of for every generated packet.
  639                  */
  640                 if (!(flags & IP_FORWARDING) && ia) {
  641                         if (m->m_pkthdr.csum_flags & CSUM_TSO)
  642                                 counter_u64_add(ia->ia_ifa.ifa_opackets,
  643                                     m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
  644                         else
  645                                 counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
  646 
  647                         counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len);
  648                 }
  649 #ifdef MBUF_STRESS_TEST
  650                 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
  651                         m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
  652 #endif
  653                 /*
  654                  * Reset layer specific mbuf flags
  655                  * to avoid confusing lower layers.
  656                  */
  657                 m_clrprotoflags(m);
  658                 IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
  659 #ifdef RATELIMIT
  660                 if (inp != NULL) {
  661                         if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
  662                                 in_pcboutput_txrtlmt(inp, ifp, m);
  663                         /* stamp send tag on mbuf */
  664                         m->m_pkthdr.snd_tag = inp->inp_snd_tag;
  665                         m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
  666                 } else {
  667                         m->m_pkthdr.snd_tag = NULL;
  668                 }
  669 #endif
  670                 error = (*ifp->if_output)(ifp, m,
  671                     (const struct sockaddr *)gw, ro);
  672 #ifdef RATELIMIT
  673                 /* check for route change */
  674                 if (error == EAGAIN)
  675                         in_pcboutput_eagain(inp);
  676 #endif
  677                 goto done;
  678         }
  679 
  680         /* Balk when DF bit is set or the interface didn't support TSO. */
  681         if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
  682                 error = EMSGSIZE;
  683                 IPSTAT_INC(ips_cantfrag);
  684                 goto bad;
  685         }
  686 
  687         /*
  688          * Too large for interface; fragment if possible. If successful,
  689          * on return, m will point to a list of packets to be sent.
  690          */
  691         error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
  692         if (error)
  693                 goto bad;
  694         for (; m; m = m0) {
  695                 m0 = m->m_nextpkt;
  696                 m->m_nextpkt = 0;
  697                 if (error == 0) {
  698                         /* Record statistics for this interface address. */
  699                         if (ia != NULL) {
  700                                 counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
  701                                 counter_u64_add(ia->ia_ifa.ifa_obytes,
  702                                     m->m_pkthdr.len);
  703                         }
  704                         /*
  705                          * Reset layer specific mbuf flags
  706                          * to avoid confusing upper layers.
  707                          */
  708                         m_clrprotoflags(m);
  709 
  710                         IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp,
  711                             mtod(m, struct ip *), NULL);
  712 #ifdef RATELIMIT
  713                         if (inp != NULL) {
  714                                 if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
  715                                         in_pcboutput_txrtlmt(inp, ifp, m);
  716                                 /* stamp send tag on mbuf */
  717                                 m->m_pkthdr.snd_tag = inp->inp_snd_tag;
  718                                 m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
  719                         } else {
  720                                 m->m_pkthdr.snd_tag = NULL;
  721                         }
  722 #endif
  723                         error = (*ifp->if_output)(ifp, m,
  724                             (const struct sockaddr *)gw, ro);
  725 #ifdef RATELIMIT
  726                         /* check for route change */
  727                         if (error == EAGAIN)
  728                                 in_pcboutput_eagain(inp);
  729 #endif
  730                 } else
  731                         m_freem(m);
  732         }
  733 
  734         if (error == 0)
  735                 IPSTAT_INC(ips_fragmented);
  736 
  737 done:
  738         if (ro == &iproute)
  739                 RO_RTFREE(ro);
  740         else if (rte == NULL)
  741                 /*
  742                  * If the caller supplied a route but somehow the reference
  743                  * to it has been released need to prevent the caller
  744                  * calling RTFREE on it again.
  745                  */
  746                 ro->ro_rt = NULL;
  747         NET_EPOCH_EXIT();
  748         return (error);
  749  bad:
  750         m_freem(m);
  751         goto done;
  752 }
  753 
  754 /*
  755  * Create a chain of fragments which fit the given mtu. m_frag points to the
  756  * mbuf to be fragmented; on return it points to the chain with the fragments.
  757  * Return 0 if no error. If error, m_frag may contain a partially built
  758  * chain of fragments that should be freed by the caller.
  759  *
  760  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
  761  */
  762 int
  763 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
  764     u_long if_hwassist_flags)
  765 {
  766         int error = 0;
  767         int hlen = ip->ip_hl << 2;
  768         int len = (mtu - hlen) & ~7;    /* size of payload in each fragment */
  769         int off;
  770         struct mbuf *m0 = *m_frag;      /* the original packet          */
  771         int firstlen;
  772         struct mbuf **mnext;
  773         int nfrags;
  774         uint16_t ip_len, ip_off;
  775 
  776         ip_len = ntohs(ip->ip_len);
  777         ip_off = ntohs(ip->ip_off);
  778 
  779         if (ip_off & IP_DF) {   /* Fragmentation not allowed */
  780                 IPSTAT_INC(ips_cantfrag);
  781                 return EMSGSIZE;
  782         }
  783 
  784         /*
  785          * Must be able to put at least 8 bytes per fragment.
  786          */
  787         if (len < 8)
  788                 return EMSGSIZE;
  789 
  790         /*
  791          * If the interface will not calculate checksums on
  792          * fragmented packets, then do it here.
  793          */
  794         if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
  795                 in_delayed_cksum(m0);
  796                 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
  797         }
  798 #if defined(SCTP) || defined(SCTP_SUPPORT)
  799         if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
  800                 sctp_delayed_cksum(m0, hlen);
  801                 m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
  802         }
  803 #endif
  804         if (len > PAGE_SIZE) {
  805                 /*
  806                  * Fragment large datagrams such that each segment
  807                  * contains a multiple of PAGE_SIZE amount of data,
  808                  * plus headers. This enables a receiver to perform
  809                  * page-flipping zero-copy optimizations.
  810                  *
  811                  * XXX When does this help given that sender and receiver
  812                  * could have different page sizes, and also mtu could
  813                  * be less than the receiver's page size ?
  814                  */
  815                 int newlen;
  816 
  817                 off = MIN(mtu, m0->m_pkthdr.len);
  818 
  819                 /*
  820                  * firstlen (off - hlen) must be aligned on an
  821                  * 8-byte boundary
  822                  */
  823                 if (off < hlen)
  824                         goto smart_frag_failure;
  825                 off = ((off - hlen) & ~7) + hlen;
  826                 newlen = (~PAGE_MASK) & mtu;
  827                 if ((newlen + sizeof (struct ip)) > mtu) {
  828                         /* we failed, go back the default */
  829 smart_frag_failure:
  830                         newlen = len;
  831                         off = hlen + len;
  832                 }
  833                 len = newlen;
  834 
  835         } else {
  836                 off = hlen + len;
  837         }
  838 
  839         firstlen = off - hlen;
  840         mnext = &m0->m_nextpkt;         /* pointer to next packet */
  841 
  842         /*
  843          * Loop through length of segment after first fragment,
  844          * make new header and copy data of each part and link onto chain.
  845          * Here, m0 is the original packet, m is the fragment being created.
  846          * The fragments are linked off the m_nextpkt of the original
  847          * packet, which after processing serves as the first fragment.
  848          */
  849         for (nfrags = 1; off < ip_len; off += len, nfrags++) {
  850                 struct ip *mhip;        /* ip header on the fragment */
  851                 struct mbuf *m;
  852                 int mhlen = sizeof (struct ip);
  853 
  854                 m = m_gethdr(M_NOWAIT, MT_DATA);
  855                 if (m == NULL) {
  856                         error = ENOBUFS;
  857                         IPSTAT_INC(ips_odropped);
  858                         goto done;
  859                 }
  860                 /*
  861                  * Make sure the complete packet header gets copied
  862                  * from the originating mbuf to the newly created
  863                  * mbuf. This also ensures that existing firewall
  864                  * classification(s), VLAN tags and so on get copied
  865                  * to the resulting fragmented packet(s):
  866                  */
  867                 if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
  868                         m_free(m);
  869                         error = ENOBUFS;
  870                         IPSTAT_INC(ips_odropped);
  871                         goto done;
  872                 }
  873                 /*
  874                  * In the first mbuf, leave room for the link header, then
  875                  * copy the original IP header including options. The payload
  876                  * goes into an additional mbuf chain returned by m_copym().
  877                  */
  878                 m->m_data += max_linkhdr;
  879                 mhip = mtod(m, struct ip *);
  880                 *mhip = *ip;
  881                 if (hlen > sizeof (struct ip)) {
  882                         mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
  883                         mhip->ip_v = IPVERSION;
  884                         mhip->ip_hl = mhlen >> 2;
  885                 }
  886                 m->m_len = mhlen;
  887                 /* XXX do we need to add ip_off below ? */
  888                 mhip->ip_off = ((off - hlen) >> 3) + ip_off;
  889                 if (off + len >= ip_len)
  890                         len = ip_len - off;
  891                 else
  892                         mhip->ip_off |= IP_MF;
  893                 mhip->ip_len = htons((u_short)(len + mhlen));
  894                 m->m_next = m_copym(m0, off, len, M_NOWAIT);
  895                 if (m->m_next == NULL) {        /* copy failed */
  896                         m_free(m);
  897                         error = ENOBUFS;        /* ??? */
  898                         IPSTAT_INC(ips_odropped);
  899                         goto done;
  900                 }
  901                 m->m_pkthdr.len = mhlen + len;
  902 #ifdef MAC
  903                 mac_netinet_fragment(m0, m);
  904 #endif
  905                 mhip->ip_off = htons(mhip->ip_off);
  906                 mhip->ip_sum = 0;
  907                 if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
  908                         mhip->ip_sum = in_cksum(m, mhlen);
  909                         m->m_pkthdr.csum_flags &= ~CSUM_IP;
  910                 }
  911                 *mnext = m;
  912                 mnext = &m->m_nextpkt;
  913         }
  914         IPSTAT_ADD(ips_ofragments, nfrags);
  915 
  916         /*
  917          * Update first fragment by trimming what's been copied out
  918          * and updating header.
  919          */
  920         m_adj(m0, hlen + firstlen - ip_len);
  921         m0->m_pkthdr.len = hlen + firstlen;
  922         ip->ip_len = htons((u_short)m0->m_pkthdr.len);
  923         ip->ip_off = htons(ip_off | IP_MF);
  924         ip->ip_sum = 0;
  925         if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
  926                 ip->ip_sum = in_cksum(m0, hlen);
  927                 m0->m_pkthdr.csum_flags &= ~CSUM_IP;
  928         }
  929 
  930 done:
  931         *m_frag = m0;
  932         return error;
  933 }
  934 
  935 void
  936 in_delayed_cksum(struct mbuf *m)
  937 {
  938         struct ip *ip;
  939         struct udphdr *uh;
  940         uint16_t cklen, csum, offset;
  941 
  942         ip = mtod(m, struct ip *);
  943         offset = ip->ip_hl << 2 ;
  944 
  945         if (m->m_pkthdr.csum_flags & CSUM_UDP) {
  946                 /* if udp header is not in the first mbuf copy udplen */
  947                 if (offset + sizeof(struct udphdr) > m->m_len) {
  948                         m_copydata(m, offset + offsetof(struct udphdr,
  949                             uh_ulen), sizeof(cklen), (caddr_t)&cklen);
  950                         cklen = ntohs(cklen);
  951                 } else {
  952                         uh = (struct udphdr *)mtodo(m, offset);
  953                         cklen = ntohs(uh->uh_ulen);
  954                 }
  955                 csum = in_cksum_skip(m, cklen + offset, offset);
  956                 if (csum == 0)
  957                         csum = 0xffff;
  958         } else {
  959                 cklen = ntohs(ip->ip_len);
  960                 csum = in_cksum_skip(m, cklen, offset);
  961         }
  962         offset += m->m_pkthdr.csum_data;        /* checksum offset */
  963 
  964         if (offset + sizeof(csum) > m->m_len)
  965                 m_copyback(m, offset, sizeof(csum), (caddr_t)&csum);
  966         else
  967                 *(u_short *)mtodo(m, offset) = csum;
  968 }
  969 
  970 /*
  971  * IP socket option processing.
  972  */
  973 int
  974 ip_ctloutput(struct socket *so, struct sockopt *sopt)
  975 {
  976         struct  inpcb *inp = sotoinpcb(so);
  977         int     error, optval;
  978 #ifdef  RSS
  979         uint32_t rss_bucket;
  980         int retval;
  981 #endif
  982 
  983         error = optval = 0;
  984         if (sopt->sopt_level != IPPROTO_IP) {
  985                 error = EINVAL;
  986 
  987                 if (sopt->sopt_level == SOL_SOCKET &&
  988                     sopt->sopt_dir == SOPT_SET) {
  989                         switch (sopt->sopt_name) {
  990                         case SO_REUSEADDR:
  991                                 INP_WLOCK(inp);
  992                                 if ((so->so_options & SO_REUSEADDR) != 0)
  993                                         inp->inp_flags2 |= INP_REUSEADDR;
  994                                 else
  995                                         inp->inp_flags2 &= ~INP_REUSEADDR;
  996                                 INP_WUNLOCK(inp);
  997                                 error = 0;
  998                                 break;
  999                         case SO_REUSEPORT:
 1000                                 INP_WLOCK(inp);
 1001                                 if ((so->so_options & SO_REUSEPORT) != 0)
 1002                                         inp->inp_flags2 |= INP_REUSEPORT;
 1003                                 else
 1004                                         inp->inp_flags2 &= ~INP_REUSEPORT;
 1005                                 INP_WUNLOCK(inp);
 1006                                 error = 0;
 1007                                 break;
 1008                         case SO_REUSEPORT_LB:
 1009                                 INP_WLOCK(inp);
 1010                                 if ((so->so_options & SO_REUSEPORT_LB) != 0)
 1011                                         inp->inp_flags2 |= INP_REUSEPORT_LB;
 1012                                 else
 1013                                         inp->inp_flags2 &= ~INP_REUSEPORT_LB;
 1014                                 INP_WUNLOCK(inp);
 1015                                 error = 0;
 1016                                 break;
 1017                         case SO_SETFIB:
 1018                                 INP_WLOCK(inp);
 1019                                 inp->inp_inc.inc_fibnum = so->so_fibnum;
 1020                                 INP_WUNLOCK(inp);
 1021                                 error = 0;
 1022                                 break;
 1023                         case SO_MAX_PACING_RATE:
 1024 #ifdef RATELIMIT
 1025                                 INP_WLOCK(inp);
 1026                                 inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
 1027                                 INP_WUNLOCK(inp);
 1028                                 error = 0;
 1029 #else
 1030                                 error = EOPNOTSUPP;
 1031 #endif
 1032                                 break;
 1033                         default:
 1034                                 break;
 1035                         }
 1036                 }
 1037                 return (error);
 1038         }
 1039 
 1040         switch (sopt->sopt_dir) {
 1041         case SOPT_SET:
 1042                 switch (sopt->sopt_name) {
 1043                 case IP_OPTIONS:
 1044 #ifdef notyet
 1045                 case IP_RETOPTS:
 1046 #endif
 1047                 {
 1048                         struct mbuf *m;
 1049                         if (sopt->sopt_valsize > MLEN) {
 1050                                 error = EMSGSIZE;
 1051                                 break;
 1052                         }
 1053                         m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 1054                         if (m == NULL) {
 1055                                 error = ENOBUFS;
 1056                                 break;
 1057                         }
 1058                         m->m_len = sopt->sopt_valsize;
 1059                         error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
 1060                                             m->m_len);
 1061                         if (error) {
 1062                                 m_free(m);
 1063                                 break;
 1064                         }
 1065                         INP_WLOCK(inp);
 1066                         error = ip_pcbopts(inp, sopt->sopt_name, m);
 1067                         INP_WUNLOCK(inp);
 1068                         return (error);
 1069                 }
 1070 
 1071                 case IP_BINDANY:
 1072                         if (sopt->sopt_td != NULL) {
 1073                                 error = priv_check(sopt->sopt_td,
 1074                                     PRIV_NETINET_BINDANY);
 1075                                 if (error)
 1076                                         break;
 1077                         }
 1078                         /* FALLTHROUGH */
 1079                 case IP_BINDMULTI:
 1080 #ifdef  RSS
 1081                 case IP_RSS_LISTEN_BUCKET:
 1082 #endif
 1083                 case IP_TOS:
 1084                 case IP_TTL:
 1085                 case IP_MINTTL:
 1086                 case IP_RECVOPTS:
 1087                 case IP_RECVRETOPTS:
 1088                 case IP_ORIGDSTADDR:
 1089                 case IP_RECVDSTADDR:
 1090                 case IP_RECVTTL:
 1091                 case IP_RECVIF:
 1092                 case IP_ONESBCAST:
 1093                 case IP_DONTFRAG:
 1094                 case IP_RECVTOS:
 1095                 case IP_RECVFLOWID:
 1096 #ifdef  RSS
 1097                 case IP_RECVRSSBUCKETID:
 1098 #endif
 1099                 case IP_VLAN_PCP:
 1100                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1101                                             sizeof optval);
 1102                         if (error)
 1103                                 break;
 1104 
 1105                         switch (sopt->sopt_name) {
 1106                         case IP_TOS:
 1107                                 inp->inp_ip_tos = optval;
 1108                                 break;
 1109 
 1110                         case IP_TTL:
 1111                                 inp->inp_ip_ttl = optval;
 1112                                 break;
 1113 
 1114                         case IP_MINTTL:
 1115                                 if (optval >= 0 && optval <= MAXTTL)
 1116                                         inp->inp_ip_minttl = optval;
 1117                                 else
 1118                                         error = EINVAL;
 1119                                 break;
 1120 
 1121 #define OPTSET(bit) do {                                                \
 1122         INP_WLOCK(inp);                                                 \
 1123         if (optval)                                                     \
 1124                 inp->inp_flags |= bit;                                  \
 1125         else                                                            \
 1126                 inp->inp_flags &= ~bit;                                 \
 1127         INP_WUNLOCK(inp);                                               \
 1128 } while (0)
 1129 
 1130 #define OPTSET2(bit, val) do {                                          \
 1131         INP_WLOCK(inp);                                                 \
 1132         if (val)                                                        \
 1133                 inp->inp_flags2 |= bit;                                 \
 1134         else                                                            \
 1135                 inp->inp_flags2 &= ~bit;                                \
 1136         INP_WUNLOCK(inp);                                               \
 1137 } while (0)
 1138 
 1139                         case IP_RECVOPTS:
 1140                                 OPTSET(INP_RECVOPTS);
 1141                                 break;
 1142 
 1143                         case IP_RECVRETOPTS:
 1144                                 OPTSET(INP_RECVRETOPTS);
 1145                                 break;
 1146 
 1147                         case IP_RECVDSTADDR:
 1148                                 OPTSET(INP_RECVDSTADDR);
 1149                                 break;
 1150 
 1151                         case IP_ORIGDSTADDR:
 1152                                 OPTSET2(INP_ORIGDSTADDR, optval);
 1153                                 break;
 1154 
 1155                         case IP_RECVTTL:
 1156                                 OPTSET(INP_RECVTTL);
 1157                                 break;
 1158 
 1159                         case IP_RECVIF:
 1160                                 OPTSET(INP_RECVIF);
 1161                                 break;
 1162 
 1163                         case IP_ONESBCAST:
 1164                                 OPTSET(INP_ONESBCAST);
 1165                                 break;
 1166                         case IP_DONTFRAG:
 1167                                 OPTSET(INP_DONTFRAG);
 1168                                 break;
 1169                         case IP_BINDANY:
 1170                                 OPTSET(INP_BINDANY);
 1171                                 break;
 1172                         case IP_RECVTOS:
 1173                                 OPTSET(INP_RECVTOS);
 1174                                 break;
 1175                         case IP_BINDMULTI:
 1176                                 OPTSET2(INP_BINDMULTI, optval);
 1177                                 break;
 1178                         case IP_RECVFLOWID:
 1179                                 OPTSET2(INP_RECVFLOWID, optval);
 1180                                 break;
 1181 #ifdef  RSS
 1182                         case IP_RSS_LISTEN_BUCKET:
 1183                                 if ((optval >= 0) &&
 1184                                     (optval < rss_getnumbuckets())) {
 1185                                         inp->inp_rss_listen_bucket = optval;
 1186                                         OPTSET2(INP_RSS_BUCKET_SET, 1);
 1187                                 } else {
 1188                                         error = EINVAL;
 1189                                 }
 1190                                 break;
 1191                         case IP_RECVRSSBUCKETID:
 1192                                 OPTSET2(INP_RECVRSSBUCKETID, optval);
 1193                                 break;
 1194 #endif
 1195                         case IP_VLAN_PCP:
 1196                                 if ((optval >= -1) && (optval <=
 1197                                     (INP_2PCP_MASK >> INP_2PCP_SHIFT))) {
 1198                                         if (optval == -1) {
 1199                                                 INP_WLOCK(inp);
 1200                                                 inp->inp_flags2 &=
 1201                                                     ~(INP_2PCP_SET |
 1202                                                       INP_2PCP_MASK);
 1203                                                 INP_WUNLOCK(inp);
 1204                                         } else {
 1205                                                 INP_WLOCK(inp);
 1206                                                 inp->inp_flags2 |=
 1207                                                     INP_2PCP_SET;
 1208                                                 inp->inp_flags2 &=
 1209                                                     ~INP_2PCP_MASK;
 1210                                                 inp->inp_flags2 |=
 1211                                                     optval << INP_2PCP_SHIFT;
 1212                                                 INP_WUNLOCK(inp);
 1213                                         }
 1214                                 } else
 1215                                         error = EINVAL;
 1216                                 break;
 1217                         }
 1218                         break;
 1219 #undef OPTSET
 1220 #undef OPTSET2
 1221 
 1222                 /*
 1223                  * Multicast socket options are processed by the in_mcast
 1224                  * module.
 1225                  */
 1226                 case IP_MULTICAST_IF:
 1227                 case IP_MULTICAST_VIF:
 1228                 case IP_MULTICAST_TTL:
 1229                 case IP_MULTICAST_LOOP:
 1230                 case IP_ADD_MEMBERSHIP:
 1231                 case IP_DROP_MEMBERSHIP:
 1232                 case IP_ADD_SOURCE_MEMBERSHIP:
 1233                 case IP_DROP_SOURCE_MEMBERSHIP:
 1234                 case IP_BLOCK_SOURCE:
 1235                 case IP_UNBLOCK_SOURCE:
 1236                 case IP_MSFILTER:
 1237                 case MCAST_JOIN_GROUP:
 1238                 case MCAST_LEAVE_GROUP:
 1239                 case MCAST_JOIN_SOURCE_GROUP:
 1240                 case MCAST_LEAVE_SOURCE_GROUP:
 1241                 case MCAST_BLOCK_SOURCE:
 1242                 case MCAST_UNBLOCK_SOURCE:
 1243                         error = inp_setmoptions(inp, sopt);
 1244                         break;
 1245 
 1246                 case IP_PORTRANGE:
 1247                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1248                                             sizeof optval);
 1249                         if (error)
 1250                                 break;
 1251 
 1252                         INP_WLOCK(inp);
 1253                         switch (optval) {
 1254                         case IP_PORTRANGE_DEFAULT:
 1255                                 inp->inp_flags &= ~(INP_LOWPORT);
 1256                                 inp->inp_flags &= ~(INP_HIGHPORT);
 1257                                 break;
 1258 
 1259                         case IP_PORTRANGE_HIGH:
 1260                                 inp->inp_flags &= ~(INP_LOWPORT);
 1261                                 inp->inp_flags |= INP_HIGHPORT;
 1262                                 break;
 1263 
 1264                         case IP_PORTRANGE_LOW:
 1265                                 inp->inp_flags &= ~(INP_HIGHPORT);
 1266                                 inp->inp_flags |= INP_LOWPORT;
 1267                                 break;
 1268 
 1269                         default:
 1270                                 error = EINVAL;
 1271                                 break;
 1272                         }
 1273                         INP_WUNLOCK(inp);
 1274                         break;
 1275 
 1276 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 1277                 case IP_IPSEC_POLICY:
 1278                         if (IPSEC_ENABLED(ipv4)) {
 1279                                 error = IPSEC_PCBCTL(ipv4, inp, sopt);
 1280                                 break;
 1281                         }
 1282                         /* FALLTHROUGH */
 1283 #endif /* IPSEC */
 1284 
 1285                 default:
 1286                         error = ENOPROTOOPT;
 1287                         break;
 1288                 }
 1289                 break;
 1290 
 1291         case SOPT_GET:
 1292                 switch (sopt->sopt_name) {
 1293                 case IP_OPTIONS:
 1294                 case IP_RETOPTS:
 1295                         INP_RLOCK(inp);
 1296                         if (inp->inp_options) {
 1297                                 struct mbuf *options;
 1298 
 1299                                 options = m_copym(inp->inp_options, 0,
 1300                                     M_COPYALL, M_NOWAIT);
 1301                                 INP_RUNLOCK(inp);
 1302                                 if (options != NULL) {
 1303                                         error = sooptcopyout(sopt,
 1304                                                              mtod(options, char *),
 1305                                                              options->m_len);
 1306                                         m_freem(options);
 1307                                 } else
 1308                                         error = ENOMEM;
 1309                         } else {
 1310                                 INP_RUNLOCK(inp);
 1311                                 sopt->sopt_valsize = 0;
 1312                         }
 1313                         break;
 1314 
 1315                 case IP_TOS:
 1316                 case IP_TTL:
 1317                 case IP_MINTTL:
 1318                 case IP_RECVOPTS:
 1319                 case IP_RECVRETOPTS:
 1320                 case IP_ORIGDSTADDR:
 1321                 case IP_RECVDSTADDR:
 1322                 case IP_RECVTTL:
 1323                 case IP_RECVIF:
 1324                 case IP_PORTRANGE:
 1325                 case IP_ONESBCAST:
 1326                 case IP_DONTFRAG:
 1327                 case IP_BINDANY:
 1328                 case IP_RECVTOS:
 1329                 case IP_BINDMULTI:
 1330                 case IP_FLOWID:
 1331                 case IP_FLOWTYPE:
 1332                 case IP_RECVFLOWID:
 1333 #ifdef  RSS
 1334                 case IP_RSSBUCKETID:
 1335                 case IP_RECVRSSBUCKETID:
 1336 #endif
 1337                 case IP_VLAN_PCP:
 1338                         switch (sopt->sopt_name) {
 1339 
 1340                         case IP_TOS:
 1341                                 optval = inp->inp_ip_tos;
 1342                                 break;
 1343 
 1344                         case IP_TTL:
 1345                                 optval = inp->inp_ip_ttl;
 1346                                 break;
 1347 
 1348                         case IP_MINTTL:
 1349                                 optval = inp->inp_ip_minttl;
 1350                                 break;
 1351 
 1352 #define OPTBIT(bit)     (inp->inp_flags & bit ? 1 : 0)
 1353 #define OPTBIT2(bit)    (inp->inp_flags2 & bit ? 1 : 0)
 1354 
 1355                         case IP_RECVOPTS:
 1356                                 optval = OPTBIT(INP_RECVOPTS);
 1357                                 break;
 1358 
 1359                         case IP_RECVRETOPTS:
 1360                                 optval = OPTBIT(INP_RECVRETOPTS);
 1361                                 break;
 1362 
 1363                         case IP_RECVDSTADDR:
 1364                                 optval = OPTBIT(INP_RECVDSTADDR);
 1365                                 break;
 1366 
 1367                         case IP_ORIGDSTADDR:
 1368                                 optval = OPTBIT2(INP_ORIGDSTADDR);
 1369                                 break;
 1370 
 1371                         case IP_RECVTTL:
 1372                                 optval = OPTBIT(INP_RECVTTL);
 1373                                 break;
 1374 
 1375                         case IP_RECVIF:
 1376                                 optval = OPTBIT(INP_RECVIF);
 1377                                 break;
 1378 
 1379                         case IP_PORTRANGE:
 1380                                 if (inp->inp_flags & INP_HIGHPORT)
 1381                                         optval = IP_PORTRANGE_HIGH;
 1382                                 else if (inp->inp_flags & INP_LOWPORT)
 1383                                         optval = IP_PORTRANGE_LOW;
 1384                                 else
 1385                                         optval = 0;
 1386                                 break;
 1387 
 1388                         case IP_ONESBCAST:
 1389                                 optval = OPTBIT(INP_ONESBCAST);
 1390                                 break;
 1391                         case IP_DONTFRAG:
 1392                                 optval = OPTBIT(INP_DONTFRAG);
 1393                                 break;
 1394                         case IP_BINDANY:
 1395                                 optval = OPTBIT(INP_BINDANY);
 1396                                 break;
 1397                         case IP_RECVTOS:
 1398                                 optval = OPTBIT(INP_RECVTOS);
 1399                                 break;
 1400                         case IP_FLOWID:
 1401                                 optval = inp->inp_flowid;
 1402                                 break;
 1403                         case IP_FLOWTYPE:
 1404                                 optval = inp->inp_flowtype;
 1405                                 break;
 1406                         case IP_RECVFLOWID:
 1407                                 optval = OPTBIT2(INP_RECVFLOWID);
 1408                                 break;
 1409 #ifdef  RSS
 1410                         case IP_RSSBUCKETID:
 1411                                 retval = rss_hash2bucket(inp->inp_flowid,
 1412                                     inp->inp_flowtype,
 1413                                     &rss_bucket);
 1414                                 if (retval == 0)
 1415                                         optval = rss_bucket;
 1416                                 else
 1417                                         error = EINVAL;
 1418                                 break;
 1419                         case IP_RECVRSSBUCKETID:
 1420                                 optval = OPTBIT2(INP_RECVRSSBUCKETID);
 1421                                 break;
 1422 #endif
 1423                         case IP_BINDMULTI:
 1424                                 optval = OPTBIT2(INP_BINDMULTI);
 1425                                 break;
 1426                         case IP_VLAN_PCP:
 1427                                 if (OPTBIT2(INP_2PCP_SET)) {
 1428                                         optval = (inp->inp_flags2 &
 1429                                             INP_2PCP_MASK) >> INP_2PCP_SHIFT;
 1430                                 } else {
 1431                                         optval = -1;
 1432                                 }
 1433                                 break;
 1434                         }
 1435                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1436                         break;
 1437 
 1438                 /*
 1439                  * Multicast socket options are processed by the in_mcast
 1440                  * module.
 1441                  */
 1442                 case IP_MULTICAST_IF:
 1443                 case IP_MULTICAST_VIF:
 1444                 case IP_MULTICAST_TTL:
 1445                 case IP_MULTICAST_LOOP:
 1446                 case IP_MSFILTER:
 1447                         error = inp_getmoptions(inp, sopt);
 1448                         break;
 1449 
 1450 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 1451                 case IP_IPSEC_POLICY:
 1452                         if (IPSEC_ENABLED(ipv4)) {
 1453                                 error = IPSEC_PCBCTL(ipv4, inp, sopt);
 1454                                 break;
 1455                         }
 1456                         /* FALLTHROUGH */
 1457 #endif /* IPSEC */
 1458 
 1459                 default:
 1460                         error = ENOPROTOOPT;
 1461                         break;
 1462                 }
 1463                 break;
 1464         }
 1465         return (error);
 1466 }
 1467 
 1468 /*
 1469  * Routine called from ip_output() to loop back a copy of an IP multicast
 1470  * packet to the input queue of a specified interface.  Note that this
 1471  * calls the output routine of the loopback "driver", but with an interface
 1472  * pointer that might NOT be a loopback interface -- evil, but easier than
 1473  * replicating that code here.
 1474  */
 1475 static void
 1476 ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen)
 1477 {
 1478         struct ip *ip;
 1479         struct mbuf *copym;
 1480 
 1481         /*
 1482          * Make a deep copy of the packet because we're going to
 1483          * modify the pack in order to generate checksums.
 1484          */
 1485         copym = m_dup(m, M_NOWAIT);
 1486         if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen))
 1487                 copym = m_pullup(copym, hlen);
 1488         if (copym != NULL) {
 1489                 /* If needed, compute the checksum and mark it as valid. */
 1490                 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 1491                         in_delayed_cksum(copym);
 1492                         copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 1493                         copym->m_pkthdr.csum_flags |=
 1494                             CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 1495                         copym->m_pkthdr.csum_data = 0xffff;
 1496                 }
 1497                 /*
 1498                  * We don't bother to fragment if the IP length is greater
 1499                  * than the interface's MTU.  Can this possibly matter?
 1500                  */
 1501                 ip = mtod(copym, struct ip *);
 1502                 ip->ip_sum = 0;
 1503                 ip->ip_sum = in_cksum(copym, hlen);
 1504                 if_simloop(ifp, copym, AF_INET, 0);
 1505         }
 1506 }

Cache object: 011a8b83addfc58b594115378c7576fb


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.