The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_output.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 #include "opt_inet.h"
   36 #include "opt_ipsec.h"
   37 #include "opt_mbuf_stress_test.h"
   38 #include "opt_mpath.h"
   39 #include "opt_route.h"
   40 #include "opt_sctp.h"
   41 #include "opt_rss.h"
   42 
   43 #include <sys/param.h>
   44 #include <sys/systm.h>
   45 #include <sys/kernel.h>
   46 #include <sys/lock.h>
   47 #include <sys/malloc.h>
   48 #include <sys/mbuf.h>
   49 #include <sys/priv.h>
   50 #include <sys/proc.h>
   51 #include <sys/protosw.h>
   52 #include <sys/rmlock.h>
   53 #include <sys/sdt.h>
   54 #include <sys/socket.h>
   55 #include <sys/socketvar.h>
   56 #include <sys/sysctl.h>
   57 #include <sys/ucred.h>
   58 
   59 #include <net/if.h>
   60 #include <net/if_var.h>
   61 #include <net/if_llatbl.h>
   62 #include <net/netisr.h>
   63 #include <net/pfil.h>
   64 #include <net/route.h>
   65 #include <net/flowtable.h>
   66 #ifdef RADIX_MPATH
   67 #include <net/radix_mpath.h>
   68 #endif
   69 #include <net/rss_config.h>
   70 #include <net/vnet.h>
   71 
   72 #include <netinet/in.h>
   73 #include <netinet/in_kdtrace.h>
   74 #include <netinet/in_systm.h>
   75 #include <netinet/ip.h>
   76 #include <netinet/in_pcb.h>
   77 #include <netinet/in_rss.h>
   78 #include <netinet/in_var.h>
   79 #include <netinet/ip_var.h>
   80 #include <netinet/ip_options.h>
   81 #ifdef SCTP
   82 #include <netinet/sctp.h>
   83 #include <netinet/sctp_crc32.h>
   84 #endif
   85 
   86 #include <netipsec/ipsec_support.h>
   87 
   88 #include <machine/in_cksum.h>
   89 
   90 #include <security/mac/mac_framework.h>
   91 
   92 #ifdef MBUF_STRESS_TEST
   93 static int mbuf_frag_size = 0;
   94 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
   95         &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
   96 #endif
   97 
   98 static void     ip_mloopback(struct ifnet *, const struct mbuf *, int);
   99 
  100 
  101 extern int in_mcast_loop;
  102 extern  struct protosw inetsw[];
  103 
  104 static inline int
  105 ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp,
  106     struct sockaddr_in *dst, int *fibnum, int *error)
  107 {
  108         struct m_tag *fwd_tag = NULL;
  109         struct mbuf *m;
  110         struct in_addr odst;
  111         struct ip *ip;
  112 
  113         m = *mp;
  114         ip = mtod(m, struct ip *);
  115 
  116         /* Run through list of hooks for output packets. */
  117         odst.s_addr = ip->ip_dst.s_addr;
  118         *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, 0, inp);
  119         m = *mp;
  120         if ((*error) != 0 || m == NULL)
  121                 return 1; /* Finished */
  122 
  123         ip = mtod(m, struct ip *);
  124 
  125         /* See if destination IP address was changed by packet filter. */
  126         if (odst.s_addr != ip->ip_dst.s_addr) {
  127                 m->m_flags |= M_SKIP_FIREWALL;
  128                 /* If destination is now ourself drop to ip_input(). */
  129                 if (in_localip(ip->ip_dst)) {
  130                         m->m_flags |= M_FASTFWD_OURS;
  131                         if (m->m_pkthdr.rcvif == NULL)
  132                                 m->m_pkthdr.rcvif = V_loif;
  133                         if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
  134                                 m->m_pkthdr.csum_flags |=
  135                                         CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  136                                 m->m_pkthdr.csum_data = 0xffff;
  137                         }
  138                         m->m_pkthdr.csum_flags |=
  139                                 CSUM_IP_CHECKED | CSUM_IP_VALID;
  140 #ifdef SCTP
  141                         if (m->m_pkthdr.csum_flags & CSUM_SCTP)
  142                                 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
  143 #endif
  144                         *error = netisr_queue(NETISR_IP, m);
  145                         return 1; /* Finished */
  146                 }
  147 
  148                 bzero(dst, sizeof(*dst));
  149                 dst->sin_family = AF_INET;
  150                 dst->sin_len = sizeof(*dst);
  151                 dst->sin_addr = ip->ip_dst;
  152 
  153                 return -1; /* Reloop */
  154         }
  155         /* See if fib was changed by packet filter. */
  156         if ((*fibnum) != M_GETFIB(m)) {
  157                 m->m_flags |= M_SKIP_FIREWALL;
  158                 *fibnum = M_GETFIB(m);
  159                 return -1; /* Reloop for FIB change */
  160         }
  161 
  162         /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
  163         if (m->m_flags & M_FASTFWD_OURS) {
  164                 if (m->m_pkthdr.rcvif == NULL)
  165                         m->m_pkthdr.rcvif = V_loif;
  166                 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
  167                         m->m_pkthdr.csum_flags |=
  168                                 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  169                         m->m_pkthdr.csum_data = 0xffff;
  170                 }
  171 #ifdef SCTP
  172                 if (m->m_pkthdr.csum_flags & CSUM_SCTP)
  173                         m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
  174 #endif
  175                 m->m_pkthdr.csum_flags |=
  176                         CSUM_IP_CHECKED | CSUM_IP_VALID;
  177 
  178                 *error = netisr_queue(NETISR_IP, m);
  179                 return 1; /* Finished */
  180         }
  181         /* Or forward to some other address? */
  182         if ((m->m_flags & M_IP_NEXTHOP) &&
  183             ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
  184                 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
  185                 m->m_flags |= M_SKIP_FIREWALL;
  186                 m->m_flags &= ~M_IP_NEXTHOP;
  187                 m_tag_delete(m, fwd_tag);
  188 
  189                 return -1; /* Reloop for CHANGE of dst */
  190         }
  191 
  192         return 0;
  193 }
  194 
  195 /*
  196  * IP output.  The packet in mbuf chain m contains a skeletal IP
  197  * header (with len, off, ttl, proto, tos, src, dst).
  198  * The mbuf chain containing the packet will be freed.
  199  * The mbuf opt, if present, will not be freed.
  200  * If route ro is present and has ro_rt initialized, route lookup would be
  201  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  202  * then result of route lookup is stored in ro->ro_rt.
  203  *
  204  * In the IP forwarding case, the packet will arrive with options already
  205  * inserted, so must have a NULL opt pointer.
  206  */
  207 int
  208 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
  209     struct ip_moptions *imo, struct inpcb *inp)
  210 {
  211         struct rm_priotracker in_ifa_tracker;
  212         struct ip *ip;
  213         struct ifnet *ifp = NULL;       /* keep compiler happy */
  214         struct mbuf *m0;
  215         int hlen = sizeof (struct ip);
  216         int mtu;
  217         int error = 0;
  218         struct sockaddr_in *dst;
  219         const struct sockaddr_in *gw;
  220         struct in_ifaddr *ia;
  221         int isbroadcast;
  222         uint16_t ip_len, ip_off;
  223         struct route iproute;
  224         struct rtentry *rte;    /* cache for ro->ro_rt */
  225         uint32_t fibnum;
  226         int have_ia_ref;
  227 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  228         int no_route_but_check_spd = 0;
  229 #endif
  230         M_ASSERTPKTHDR(m);
  231 
  232         if (inp != NULL) {
  233                 INP_LOCK_ASSERT(inp);
  234                 M_SETFIB(m, inp->inp_inc.inc_fibnum);
  235                 if ((flags & IP_NODEFAULTFLOWID) == 0) {
  236                         m->m_pkthdr.flowid = inp->inp_flowid;
  237                         M_HASHTYPE_SET(m, inp->inp_flowtype);
  238                 }
  239         }
  240 
  241         if (ro == NULL) {
  242                 ro = &iproute;
  243                 bzero(ro, sizeof (*ro));
  244         }
  245 
  246 #ifdef FLOWTABLE
  247         if (ro->ro_rt == NULL)
  248                 (void )flowtable_lookup(AF_INET, m, ro);
  249 #endif
  250 
  251         if (opt) {
  252                 int len = 0;
  253                 m = ip_insertoptions(m, opt, &len);
  254                 if (len != 0)
  255                         hlen = len; /* ip->ip_hl is updated above */
  256         }
  257         ip = mtod(m, struct ip *);
  258         ip_len = ntohs(ip->ip_len);
  259         ip_off = ntohs(ip->ip_off);
  260 
  261         if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
  262                 ip->ip_v = IPVERSION;
  263                 ip->ip_hl = hlen >> 2;
  264                 ip_fillid(ip);
  265         } else {
  266                 /* Header already set, fetch hlen from there */
  267                 hlen = ip->ip_hl << 2;
  268         }
  269         if ((flags & IP_FORWARDING) == 0)
  270                 IPSTAT_INC(ips_localout);
  271 
  272         /*
  273          * dst/gw handling:
  274          *
  275          * dst can be rewritten but always points to &ro->ro_dst.
  276          * gw is readonly but can point either to dst OR rt_gateway,
  277          * therefore we need restore gw if we're redoing lookup.
  278          */
  279         gw = dst = (struct sockaddr_in *)&ro->ro_dst;
  280         fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
  281         rte = ro->ro_rt;
  282         if (rte == NULL) {
  283                 bzero(dst, sizeof(*dst));
  284                 dst->sin_family = AF_INET;
  285                 dst->sin_len = sizeof(*dst);
  286                 dst->sin_addr = ip->ip_dst;
  287         }
  288 again:
  289         /*
  290          * Validate route against routing table additions;
  291          * a better/more specific route might have been added.
  292          */
  293         if (inp)
  294                 RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
  295         /*
  296          * If there is a cached route,
  297          * check that it is to the same destination
  298          * and is still up.  If not, free it and try again.
  299          * The address family should also be checked in case of sharing the
  300          * cache with IPv6.
  301          * Also check whether routing cache needs invalidation.
  302          */
  303         rte = ro->ro_rt;
  304         if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
  305                     rte->rt_ifp == NULL ||
  306                     !RT_LINK_IS_UP(rte->rt_ifp) ||
  307                           dst->sin_family != AF_INET ||
  308                           dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
  309                 RTFREE(rte);
  310                 rte = ro->ro_rt = (struct rtentry *)NULL;
  311                 if (ro->ro_lle)
  312                         LLE_FREE(ro->ro_lle);   /* zeros ro_lle */
  313                 ro->ro_lle = (struct llentry *)NULL;
  314         }
  315         ia = NULL;
  316         have_ia_ref = 0;
  317         /*
  318          * If routing to interface only, short circuit routing lookup.
  319          * The use of an all-ones broadcast address implies this; an
  320          * interface is specified by the broadcast address of an interface,
  321          * or the destination address of a ptp interface.
  322          */
  323         if (flags & IP_SENDONES) {
  324                 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst),
  325                                                       M_GETFIB(m)))) == NULL &&
  326                     (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
  327                                                     M_GETFIB(m)))) == NULL) {
  328                         IPSTAT_INC(ips_noroute);
  329                         error = ENETUNREACH;
  330                         goto bad;
  331                 }
  332                 have_ia_ref = 1;
  333                 ip->ip_dst.s_addr = INADDR_BROADCAST;
  334                 dst->sin_addr = ip->ip_dst;
  335                 ifp = ia->ia_ifp;
  336                 ip->ip_ttl = 1;
  337                 isbroadcast = 1;
  338         } else if (flags & IP_ROUTETOIF) {
  339                 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
  340                                                     M_GETFIB(m)))) == NULL &&
  341                     (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0,
  342                                                 M_GETFIB(m)))) == NULL) {
  343                         IPSTAT_INC(ips_noroute);
  344                         error = ENETUNREACH;
  345                         goto bad;
  346                 }
  347                 have_ia_ref = 1;
  348                 ifp = ia->ia_ifp;
  349                 ip->ip_ttl = 1;
  350                 isbroadcast = in_broadcast(dst->sin_addr, ifp);
  351         } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
  352             imo != NULL && imo->imo_multicast_ifp != NULL) {
  353                 /*
  354                  * Bypass the normal routing lookup for multicast
  355                  * packets if the interface is specified.
  356                  */
  357                 ifp = imo->imo_multicast_ifp;
  358                 IFP_TO_IA(ifp, ia, &in_ifa_tracker);
  359                 if (ia)
  360                         have_ia_ref = 1;
  361                 isbroadcast = 0;        /* fool gcc */
  362         } else {
  363                 /*
  364                  * We want to do any cloning requested by the link layer,
  365                  * as this is probably required in all cases for correct
  366                  * operation (as it is for ARP).
  367                  */
  368                 if (rte == NULL) {
  369 #ifdef RADIX_MPATH
  370                         rtalloc_mpath_fib(ro,
  371                             ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
  372                             fibnum);
  373 #else
  374                         in_rtalloc_ign(ro, 0, fibnum);
  375 #endif
  376                         rte = ro->ro_rt;
  377                 }
  378                 if (rte == NULL ||
  379                     (rte->rt_flags & RTF_UP) == 0 ||
  380                     rte->rt_ifp == NULL ||
  381                     !RT_LINK_IS_UP(rte->rt_ifp)) {
  382 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  383                         /*
  384                          * There is no route for this packet, but it is
  385                          * possible that a matching SPD entry exists.
  386                          */
  387                         no_route_but_check_spd = 1;
  388                         mtu = 0; /* Silence GCC warning. */
  389                         goto sendit;
  390 #endif
  391                         IPSTAT_INC(ips_noroute);
  392                         error = EHOSTUNREACH;
  393                         goto bad;
  394                 }
  395                 ia = ifatoia(rte->rt_ifa);
  396                 ifp = rte->rt_ifp;
  397                 counter_u64_add(rte->rt_pksent, 1);
  398                 rt_update_ro_flags(ro);
  399                 if (rte->rt_flags & RTF_GATEWAY)
  400                         gw = (struct sockaddr_in *)rte->rt_gateway;
  401                 if (rte->rt_flags & RTF_HOST)
  402                         isbroadcast = (rte->rt_flags & RTF_BROADCAST);
  403                 else
  404                         isbroadcast = in_broadcast(gw->sin_addr, ifp);
  405         }
  406 
  407         /*
  408          * Calculate MTU.  If we have a route that is up, use that,
  409          * otherwise use the interface's MTU.
  410          */
  411         if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
  412                 mtu = rte->rt_mtu;
  413         else
  414                 mtu = ifp->if_mtu;
  415         /* Catch a possible divide by zero later. */
  416         KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
  417             __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
  418 
  419         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
  420                 m->m_flags |= M_MCAST;
  421                 /*
  422                  * IP destination address is multicast.  Make sure "gw"
  423                  * still points to the address in "ro".  (It may have been
  424                  * changed to point to a gateway address, above.)
  425                  */
  426                 gw = dst;
  427                 /*
  428                  * See if the caller provided any multicast options
  429                  */
  430                 if (imo != NULL) {
  431                         ip->ip_ttl = imo->imo_multicast_ttl;
  432                         if (imo->imo_multicast_vif != -1)
  433                                 ip->ip_src.s_addr =
  434                                     ip_mcast_src ?
  435                                     ip_mcast_src(imo->imo_multicast_vif) :
  436                                     INADDR_ANY;
  437                 } else
  438                         ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
  439                 /*
  440                  * Confirm that the outgoing interface supports multicast.
  441                  */
  442                 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
  443                         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
  444                                 IPSTAT_INC(ips_noroute);
  445                                 error = ENETUNREACH;
  446                                 goto bad;
  447                         }
  448                 }
  449                 /*
  450                  * If source address not specified yet, use address
  451                  * of outgoing interface.
  452                  */
  453                 if (ip->ip_src.s_addr == INADDR_ANY) {
  454                         /* Interface may have no addresses. */
  455                         if (ia != NULL)
  456                                 ip->ip_src = IA_SIN(ia)->sin_addr;
  457                 }
  458 
  459                 if ((imo == NULL && in_mcast_loop) ||
  460                     (imo && imo->imo_multicast_loop)) {
  461                         /*
  462                          * Loop back multicast datagram if not expressly
  463                          * forbidden to do so, even if we are not a member
  464                          * of the group; ip_input() will filter it later,
  465                          * thus deferring a hash lookup and mutex acquisition
  466                          * at the expense of a cheap copy using m_copym().
  467                          */
  468                         ip_mloopback(ifp, m, hlen);
  469                 } else {
  470                         /*
  471                          * If we are acting as a multicast router, perform
  472                          * multicast forwarding as if the packet had just
  473                          * arrived on the interface to which we are about
  474                          * to send.  The multicast forwarding function
  475                          * recursively calls this function, using the
  476                          * IP_FORWARDING flag to prevent infinite recursion.
  477                          *
  478                          * Multicasts that are looped back by ip_mloopback(),
  479                          * above, will be forwarded by the ip_input() routine,
  480                          * if necessary.
  481                          */
  482                         if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
  483                                 /*
  484                                  * If rsvp daemon is not running, do not
  485                                  * set ip_moptions. This ensures that the packet
  486                                  * is multicast and not just sent down one link
  487                                  * as prescribed by rsvpd.
  488                                  */
  489                                 if (!V_rsvp_on)
  490                                         imo = NULL;
  491                                 if (ip_mforward &&
  492                                     ip_mforward(ip, ifp, m, imo) != 0) {
  493                                         m_freem(m);
  494                                         goto done;
  495                                 }
  496                         }
  497                 }
  498 
  499                 /*
  500                  * Multicasts with a time-to-live of zero may be looped-
  501                  * back, above, but must not be transmitted on a network.
  502                  * Also, multicasts addressed to the loopback interface
  503                  * are not sent -- the above call to ip_mloopback() will
  504                  * loop back a copy. ip_input() will drop the copy if
  505                  * this host does not belong to the destination group on
  506                  * the loopback interface.
  507                  */
  508                 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
  509                         m_freem(m);
  510                         goto done;
  511                 }
  512 
  513                 goto sendit;
  514         }
  515 
  516         /*
  517          * If the source address is not specified yet, use the address
  518          * of the outoing interface.
  519          */
  520         if (ip->ip_src.s_addr == INADDR_ANY) {
  521                 /* Interface may have no addresses. */
  522                 if (ia != NULL) {
  523                         ip->ip_src = IA_SIN(ia)->sin_addr;
  524                 }
  525         }
  526 
  527         /*
  528          * Look for broadcast address and
  529          * verify user is allowed to send
  530          * such a packet.
  531          */
  532         if (isbroadcast) {
  533                 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
  534                         error = EADDRNOTAVAIL;
  535                         goto bad;
  536                 }
  537                 if ((flags & IP_ALLOWBROADCAST) == 0) {
  538                         error = EACCES;
  539                         goto bad;
  540                 }
  541                 /* don't allow broadcast messages to be fragmented */
  542                 if (ip_len > mtu) {
  543                         error = EMSGSIZE;
  544                         goto bad;
  545                 }
  546                 m->m_flags |= M_BCAST;
  547         } else {
  548                 m->m_flags &= ~M_BCAST;
  549         }
  550 
  551 sendit:
  552 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  553         if (IPSEC_ENABLED(ipv4)) {
  554                 if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) {
  555                         if (error == EINPROGRESS)
  556                                 error = 0;
  557                         goto done;
  558                 }
  559         }
  560         /*
  561          * Check if there was a route for this packet; return error if not.
  562          */
  563         if (no_route_but_check_spd) {
  564                 IPSTAT_INC(ips_noroute);
  565                 error = EHOSTUNREACH;
  566                 goto bad;
  567         }
  568         /* Update variables that are affected by ipsec4_output(). */
  569         ip = mtod(m, struct ip *);
  570         hlen = ip->ip_hl << 2;
  571 #endif /* IPSEC */
  572 
  573         /* Jump over all PFIL processing if hooks are not active. */
  574         if (PFIL_HOOKED(&V_inet_pfil_hook)) {
  575                 switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) {
  576                 case 1: /* Finished */
  577                         goto done;
  578 
  579                 case 0: /* Continue normally */
  580                         ip = mtod(m, struct ip *);
  581                         break;
  582 
  583                 case -1: /* Need to try again */
  584                         /* Reset everything for a new round */
  585                         RO_RTFREE(ro);
  586                         if (have_ia_ref)
  587                                 ifa_free(&ia->ia_ifa);
  588                         ro->ro_prepend = NULL;
  589                         rte = NULL;
  590                         gw = dst;
  591                         ip = mtod(m, struct ip *);
  592                         goto again;
  593 
  594                 }
  595         }
  596 
  597         /* 127/8 must not appear on wire - RFC1122. */
  598         if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
  599             (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
  600                 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
  601                         IPSTAT_INC(ips_badaddr);
  602                         error = EADDRNOTAVAIL;
  603                         goto bad;
  604                 }
  605         }
  606 
  607         m->m_pkthdr.csum_flags |= CSUM_IP;
  608         if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
  609                 in_delayed_cksum(m);
  610                 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
  611         }
  612 #ifdef SCTP
  613         if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
  614                 sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
  615                 m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
  616         }
  617 #endif
  618 
  619         /*
  620          * If small enough for interface, or the interface will take
  621          * care of the fragmentation for us, we can just send directly.
  622          */
  623         if (ip_len <= mtu ||
  624             (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
  625                 ip->ip_sum = 0;
  626                 if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
  627                         ip->ip_sum = in_cksum(m, hlen);
  628                         m->m_pkthdr.csum_flags &= ~CSUM_IP;
  629                 }
  630 
  631                 /*
  632                  * Record statistics for this interface address.
  633                  * With CSUM_TSO the byte/packet count will be slightly
  634                  * incorrect because we count the IP+TCP headers only
  635                  * once instead of for every generated packet.
  636                  */
  637                 if (!(flags & IP_FORWARDING) && ia) {
  638                         if (m->m_pkthdr.csum_flags & CSUM_TSO)
  639                                 counter_u64_add(ia->ia_ifa.ifa_opackets,
  640                                     m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
  641                         else
  642                                 counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
  643 
  644                         counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len);
  645                 }
  646 #ifdef MBUF_STRESS_TEST
  647                 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
  648                         m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
  649 #endif
  650                 /*
  651                  * Reset layer specific mbuf flags
  652                  * to avoid confusing lower layers.
  653                  */
  654                 m_clrprotoflags(m);
  655                 IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
  656                 error = (*ifp->if_output)(ifp, m,
  657                     (const struct sockaddr *)gw, ro);
  658                 goto done;
  659         }
  660 
  661         /* Balk when DF bit is set or the interface didn't support TSO. */
  662         if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
  663                 error = EMSGSIZE;
  664                 IPSTAT_INC(ips_cantfrag);
  665                 goto bad;
  666         }
  667 
  668         /*
  669          * Too large for interface; fragment if possible. If successful,
  670          * on return, m will point to a list of packets to be sent.
  671          */
  672         error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
  673         if (error)
  674                 goto bad;
  675         for (; m; m = m0) {
  676                 m0 = m->m_nextpkt;
  677                 m->m_nextpkt = 0;
  678                 if (error == 0) {
  679                         /* Record statistics for this interface address. */
  680                         if (ia != NULL) {
  681                                 counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
  682                                 counter_u64_add(ia->ia_ifa.ifa_obytes,
  683                                     m->m_pkthdr.len);
  684                         }
  685                         /*
  686                          * Reset layer specific mbuf flags
  687                          * to avoid confusing upper layers.
  688                          */
  689                         m_clrprotoflags(m);
  690 
  691                         IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp,
  692                             mtod(m, struct ip *), NULL);
  693                         error = (*ifp->if_output)(ifp, m,
  694                             (const struct sockaddr *)gw, ro);
  695                 } else
  696                         m_freem(m);
  697         }
  698 
  699         if (error == 0)
  700                 IPSTAT_INC(ips_fragmented);
  701 
  702 done:
  703         /*
  704          * Release the route if using our private route, or if
  705          * (with flowtable) we don't have our own reference.
  706          */
  707         if (ro == &iproute || ro->ro_flags & RT_NORTREF)
  708                 RO_RTFREE(ro);
  709         else if (rte == NULL)
  710                 /*
  711                  * If the caller supplied a route but somehow the reference
  712                  * to it has been released need to prevent the caller
  713                  * calling RTFREE on it again.
  714                  */
  715                 ro->ro_rt = NULL;
  716         if (have_ia_ref)
  717                 ifa_free(&ia->ia_ifa);
  718         return (error);
  719 bad:
  720         m_freem(m);
  721         goto done;
  722 }
  723 
  724 /*
  725  * Create a chain of fragments which fit the given mtu. m_frag points to the
  726  * mbuf to be fragmented; on return it points to the chain with the fragments.
  727  * Return 0 if no error. If error, m_frag may contain a partially built
  728  * chain of fragments that should be freed by the caller.
  729  *
  730  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
  731  */
  732 int
  733 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
  734     u_long if_hwassist_flags)
  735 {
  736         int error = 0;
  737         int hlen = ip->ip_hl << 2;
  738         int len = (mtu - hlen) & ~7;    /* size of payload in each fragment */
  739         int off;
  740         struct mbuf *m0 = *m_frag;      /* the original packet          */
  741         int firstlen;
  742         struct mbuf **mnext;
  743         int nfrags;
  744         uint16_t ip_len, ip_off;
  745 
  746         ip_len = ntohs(ip->ip_len);
  747         ip_off = ntohs(ip->ip_off);
  748 
  749         if (ip_off & IP_DF) {   /* Fragmentation not allowed */
  750                 IPSTAT_INC(ips_cantfrag);
  751                 return EMSGSIZE;
  752         }
  753 
  754         /*
  755          * Must be able to put at least 8 bytes per fragment.
  756          */
  757         if (len < 8)
  758                 return EMSGSIZE;
  759 
  760         /*
  761          * If the interface will not calculate checksums on
  762          * fragmented packets, then do it here.
  763          */
  764         if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
  765                 in_delayed_cksum(m0);
  766                 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
  767         }
  768 #ifdef SCTP
  769         if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
  770                 sctp_delayed_cksum(m0, hlen);
  771                 m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
  772         }
  773 #endif
  774         if (len > PAGE_SIZE) {
  775                 /*
  776                  * Fragment large datagrams such that each segment
  777                  * contains a multiple of PAGE_SIZE amount of data,
  778                  * plus headers. This enables a receiver to perform
  779                  * page-flipping zero-copy optimizations.
  780                  *
  781                  * XXX When does this help given that sender and receiver
  782                  * could have different page sizes, and also mtu could
  783                  * be less than the receiver's page size ?
  784                  */
  785                 int newlen;
  786 
  787                 off = MIN(mtu, m0->m_pkthdr.len);
  788 
  789                 /*
  790                  * firstlen (off - hlen) must be aligned on an
  791                  * 8-byte boundary
  792                  */
  793                 if (off < hlen)
  794                         goto smart_frag_failure;
  795                 off = ((off - hlen) & ~7) + hlen;
  796                 newlen = (~PAGE_MASK) & mtu;
  797                 if ((newlen + sizeof (struct ip)) > mtu) {
  798                         /* we failed, go back the default */
  799 smart_frag_failure:
  800                         newlen = len;
  801                         off = hlen + len;
  802                 }
  803                 len = newlen;
  804 
  805         } else {
  806                 off = hlen + len;
  807         }
  808 
  809         firstlen = off - hlen;
  810         mnext = &m0->m_nextpkt;         /* pointer to next packet */
  811 
  812         /*
  813          * Loop through length of segment after first fragment,
  814          * make new header and copy data of each part and link onto chain.
  815          * Here, m0 is the original packet, m is the fragment being created.
  816          * The fragments are linked off the m_nextpkt of the original
  817          * packet, which after processing serves as the first fragment.
  818          */
  819         for (nfrags = 1; off < ip_len; off += len, nfrags++) {
  820                 struct ip *mhip;        /* ip header on the fragment */
  821                 struct mbuf *m;
  822                 int mhlen = sizeof (struct ip);
  823 
  824                 m = m_gethdr(M_NOWAIT, MT_DATA);
  825                 if (m == NULL) {
  826                         error = ENOBUFS;
  827                         IPSTAT_INC(ips_odropped);
  828                         goto done;
  829                 }
  830                 /*
  831                  * Make sure the complete packet header gets copied
  832                  * from the originating mbuf to the newly created
  833                  * mbuf. This also ensures that existing firewall
  834                  * classification(s), VLAN tags and so on get copied
  835                  * to the resulting fragmented packet(s):
  836                  */
  837                 if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
  838                         m_free(m);
  839                         error = ENOBUFS;
  840                         IPSTAT_INC(ips_odropped);
  841                         goto done;
  842                 }
  843                 /*
  844                  * In the first mbuf, leave room for the link header, then
  845                  * copy the original IP header including options. The payload
  846                  * goes into an additional mbuf chain returned by m_copym().
  847                  */
  848                 m->m_data += max_linkhdr;
  849                 mhip = mtod(m, struct ip *);
  850                 *mhip = *ip;
  851                 if (hlen > sizeof (struct ip)) {
  852                         mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
  853                         mhip->ip_v = IPVERSION;
  854                         mhip->ip_hl = mhlen >> 2;
  855                 }
  856                 m->m_len = mhlen;
  857                 /* XXX do we need to add ip_off below ? */
  858                 mhip->ip_off = ((off - hlen) >> 3) + ip_off;
  859                 if (off + len >= ip_len)
  860                         len = ip_len - off;
  861                 else
  862                         mhip->ip_off |= IP_MF;
  863                 mhip->ip_len = htons((u_short)(len + mhlen));
  864                 m->m_next = m_copym(m0, off, len, M_NOWAIT);
  865                 if (m->m_next == NULL) {        /* copy failed */
  866                         m_free(m);
  867                         error = ENOBUFS;        /* ??? */
  868                         IPSTAT_INC(ips_odropped);
  869                         goto done;
  870                 }
  871                 m->m_pkthdr.len = mhlen + len;
  872 #ifdef MAC
  873                 mac_netinet_fragment(m0, m);
  874 #endif
  875                 mhip->ip_off = htons(mhip->ip_off);
  876                 mhip->ip_sum = 0;
  877                 if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
  878                         mhip->ip_sum = in_cksum(m, mhlen);
  879                         m->m_pkthdr.csum_flags &= ~CSUM_IP;
  880                 }
  881                 *mnext = m;
  882                 mnext = &m->m_nextpkt;
  883         }
  884         IPSTAT_ADD(ips_ofragments, nfrags);
  885 
  886         /*
  887          * Update first fragment by trimming what's been copied out
  888          * and updating header.
  889          */
  890         m_adj(m0, hlen + firstlen - ip_len);
  891         m0->m_pkthdr.len = hlen + firstlen;
  892         ip->ip_len = htons((u_short)m0->m_pkthdr.len);
  893         ip->ip_off = htons(ip_off | IP_MF);
  894         ip->ip_sum = 0;
  895         if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
  896                 ip->ip_sum = in_cksum(m0, hlen);
  897                 m0->m_pkthdr.csum_flags &= ~CSUM_IP;
  898         }
  899 
  900 done:
  901         *m_frag = m0;
  902         return error;
  903 }
  904 
  905 void
  906 in_delayed_cksum(struct mbuf *m)
  907 {
  908         struct ip *ip;
  909         uint16_t csum, offset, ip_len;
  910 
  911         ip = mtod(m, struct ip *);
  912         offset = ip->ip_hl << 2 ;
  913         ip_len = ntohs(ip->ip_len);
  914         csum = in_cksum_skip(m, ip_len, offset);
  915         if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
  916                 csum = 0xffff;
  917         offset += m->m_pkthdr.csum_data;        /* checksum offset */
  918 
  919         /* find the mbuf in the chain where the checksum starts*/
  920         while ((m != NULL) && (offset >= m->m_len)) {
  921                 offset -= m->m_len;
  922                 m = m->m_next;
  923         }
  924         KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain."));
  925         KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs."));
  926         *(u_short *)(m->m_data + offset) = csum;
  927 }
  928 
  929 /*
  930  * IP socket option processing.
  931  */
  932 int
  933 ip_ctloutput(struct socket *so, struct sockopt *sopt)
  934 {
  935         struct  inpcb *inp = sotoinpcb(so);
  936         int     error, optval;
  937 #ifdef  RSS
  938         uint32_t rss_bucket;
  939         int retval;
  940 #endif
  941 
  942         error = optval = 0;
  943         if (sopt->sopt_level != IPPROTO_IP) {
  944                 error = EINVAL;
  945 
  946                 if (sopt->sopt_level == SOL_SOCKET &&
  947                     sopt->sopt_dir == SOPT_SET) {
  948                         switch (sopt->sopt_name) {
  949                         case SO_REUSEADDR:
  950                                 INP_WLOCK(inp);
  951                                 if ((so->so_options & SO_REUSEADDR) != 0)
  952                                         inp->inp_flags2 |= INP_REUSEADDR;
  953                                 else
  954                                         inp->inp_flags2 &= ~INP_REUSEADDR;
  955                                 INP_WUNLOCK(inp);
  956                                 error = 0;
  957                                 break;
  958                         case SO_REUSEPORT:
  959                                 INP_WLOCK(inp);
  960                                 if ((so->so_options & SO_REUSEPORT) != 0)
  961                                         inp->inp_flags2 |= INP_REUSEPORT;
  962                                 else
  963                                         inp->inp_flags2 &= ~INP_REUSEPORT;
  964                                 INP_WUNLOCK(inp);
  965                                 error = 0;
  966                                 break;
  967                         case SO_SETFIB:
  968                                 INP_WLOCK(inp);
  969                                 inp->inp_inc.inc_fibnum = so->so_fibnum;
  970                                 INP_WUNLOCK(inp);
  971                                 error = 0;
  972                                 break;
  973                         default:
  974                                 break;
  975                         }
  976                 }
  977                 return (error);
  978         }
  979 
  980         switch (sopt->sopt_dir) {
  981         case SOPT_SET:
  982                 switch (sopt->sopt_name) {
  983                 case IP_OPTIONS:
  984 #ifdef notyet
  985                 case IP_RETOPTS:
  986 #endif
  987                 {
  988                         struct mbuf *m;
  989                         if (sopt->sopt_valsize > MLEN) {
  990                                 error = EMSGSIZE;
  991                                 break;
  992                         }
  993                         m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
  994                         if (m == NULL) {
  995                                 error = ENOBUFS;
  996                                 break;
  997                         }
  998                         m->m_len = sopt->sopt_valsize;
  999                         error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
 1000                                             m->m_len);
 1001                         if (error) {
 1002                                 m_free(m);
 1003                                 break;
 1004                         }
 1005                         INP_WLOCK(inp);
 1006                         error = ip_pcbopts(inp, sopt->sopt_name, m);
 1007                         INP_WUNLOCK(inp);
 1008                         return (error);
 1009                 }
 1010 
 1011                 case IP_BINDANY:
 1012                         if (sopt->sopt_td != NULL) {
 1013                                 error = priv_check(sopt->sopt_td,
 1014                                     PRIV_NETINET_BINDANY);
 1015                                 if (error)
 1016                                         break;
 1017                         }
 1018                         /* FALLTHROUGH */
 1019                 case IP_BINDMULTI:
 1020 #ifdef  RSS
 1021                 case IP_RSS_LISTEN_BUCKET:
 1022 #endif
 1023                 case IP_TOS:
 1024                 case IP_TTL:
 1025                 case IP_MINTTL:
 1026                 case IP_RECVOPTS:
 1027                 case IP_RECVRETOPTS:
 1028                 case IP_RECVDSTADDR:
 1029                 case IP_RECVTTL:
 1030                 case IP_RECVIF:
 1031                 case IP_ONESBCAST:
 1032                 case IP_DONTFRAG:
 1033                 case IP_RECVTOS:
 1034                 case IP_RECVFLOWID:
 1035 #ifdef  RSS
 1036                 case IP_RECVRSSBUCKETID:
 1037 #endif
 1038                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1039                                             sizeof optval);
 1040                         if (error)
 1041                                 break;
 1042 
 1043                         switch (sopt->sopt_name) {
 1044                         case IP_TOS:
 1045                                 inp->inp_ip_tos = optval;
 1046                                 break;
 1047 
 1048                         case IP_TTL:
 1049                                 inp->inp_ip_ttl = optval;
 1050                                 break;
 1051 
 1052                         case IP_MINTTL:
 1053                                 if (optval >= 0 && optval <= MAXTTL)
 1054                                         inp->inp_ip_minttl = optval;
 1055                                 else
 1056                                         error = EINVAL;
 1057                                 break;
 1058 
 1059 #define OPTSET(bit) do {                                                \
 1060         INP_WLOCK(inp);                                                 \
 1061         if (optval)                                                     \
 1062                 inp->inp_flags |= bit;                                  \
 1063         else                                                            \
 1064                 inp->inp_flags &= ~bit;                                 \
 1065         INP_WUNLOCK(inp);                                               \
 1066 } while (0)
 1067 
 1068 #define OPTSET2(bit, val) do {                                          \
 1069         INP_WLOCK(inp);                                                 \
 1070         if (val)                                                        \
 1071                 inp->inp_flags2 |= bit;                                 \
 1072         else                                                            \
 1073                 inp->inp_flags2 &= ~bit;                                \
 1074         INP_WUNLOCK(inp);                                               \
 1075 } while (0)
 1076 
 1077                         case IP_RECVOPTS:
 1078                                 OPTSET(INP_RECVOPTS);
 1079                                 break;
 1080 
 1081                         case IP_RECVRETOPTS:
 1082                                 OPTSET(INP_RECVRETOPTS);
 1083                                 break;
 1084 
 1085                         case IP_RECVDSTADDR:
 1086                                 OPTSET(INP_RECVDSTADDR);
 1087                                 break;
 1088 
 1089                         case IP_RECVTTL:
 1090                                 OPTSET(INP_RECVTTL);
 1091                                 break;
 1092 
 1093                         case IP_RECVIF:
 1094                                 OPTSET(INP_RECVIF);
 1095                                 break;
 1096 
 1097                         case IP_ONESBCAST:
 1098                                 OPTSET(INP_ONESBCAST);
 1099                                 break;
 1100                         case IP_DONTFRAG:
 1101                                 OPTSET(INP_DONTFRAG);
 1102                                 break;
 1103                         case IP_BINDANY:
 1104                                 OPTSET(INP_BINDANY);
 1105                                 break;
 1106                         case IP_RECVTOS:
 1107                                 OPTSET(INP_RECVTOS);
 1108                                 break;
 1109                         case IP_BINDMULTI:
 1110                                 OPTSET2(INP_BINDMULTI, optval);
 1111                                 break;
 1112                         case IP_RECVFLOWID:
 1113                                 OPTSET2(INP_RECVFLOWID, optval);
 1114                                 break;
 1115 #ifdef  RSS
 1116                         case IP_RSS_LISTEN_BUCKET:
 1117                                 if ((optval >= 0) &&
 1118                                     (optval < rss_getnumbuckets())) {
 1119                                         inp->inp_rss_listen_bucket = optval;
 1120                                         OPTSET2(INP_RSS_BUCKET_SET, 1);
 1121                                 } else {
 1122                                         error = EINVAL;
 1123                                 }
 1124                                 break;
 1125                         case IP_RECVRSSBUCKETID:
 1126                                 OPTSET2(INP_RECVRSSBUCKETID, optval);
 1127                                 break;
 1128 #endif
 1129                         }
 1130                         break;
 1131 #undef OPTSET
 1132 #undef OPTSET2
 1133 
 1134                 /*
 1135                  * Multicast socket options are processed by the in_mcast
 1136                  * module.
 1137                  */
 1138                 case IP_MULTICAST_IF:
 1139                 case IP_MULTICAST_VIF:
 1140                 case IP_MULTICAST_TTL:
 1141                 case IP_MULTICAST_LOOP:
 1142                 case IP_ADD_MEMBERSHIP:
 1143                 case IP_DROP_MEMBERSHIP:
 1144                 case IP_ADD_SOURCE_MEMBERSHIP:
 1145                 case IP_DROP_SOURCE_MEMBERSHIP:
 1146                 case IP_BLOCK_SOURCE:
 1147                 case IP_UNBLOCK_SOURCE:
 1148                 case IP_MSFILTER:
 1149                 case MCAST_JOIN_GROUP:
 1150                 case MCAST_LEAVE_GROUP:
 1151                 case MCAST_JOIN_SOURCE_GROUP:
 1152                 case MCAST_LEAVE_SOURCE_GROUP:
 1153                 case MCAST_BLOCK_SOURCE:
 1154                 case MCAST_UNBLOCK_SOURCE:
 1155                         error = inp_setmoptions(inp, sopt);
 1156                         break;
 1157 
 1158                 case IP_PORTRANGE:
 1159                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1160                                             sizeof optval);
 1161                         if (error)
 1162                                 break;
 1163 
 1164                         INP_WLOCK(inp);
 1165                         switch (optval) {
 1166                         case IP_PORTRANGE_DEFAULT:
 1167                                 inp->inp_flags &= ~(INP_LOWPORT);
 1168                                 inp->inp_flags &= ~(INP_HIGHPORT);
 1169                                 break;
 1170 
 1171                         case IP_PORTRANGE_HIGH:
 1172                                 inp->inp_flags &= ~(INP_LOWPORT);
 1173                                 inp->inp_flags |= INP_HIGHPORT;
 1174                                 break;
 1175 
 1176                         case IP_PORTRANGE_LOW:
 1177                                 inp->inp_flags &= ~(INP_HIGHPORT);
 1178                                 inp->inp_flags |= INP_LOWPORT;
 1179                                 break;
 1180 
 1181                         default:
 1182                                 error = EINVAL;
 1183                                 break;
 1184                         }
 1185                         INP_WUNLOCK(inp);
 1186                         break;
 1187 
 1188 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 1189                 case IP_IPSEC_POLICY:
 1190                         if (IPSEC_ENABLED(ipv4)) {
 1191                                 error = IPSEC_PCBCTL(ipv4, inp, sopt);
 1192                                 break;
 1193                         }
 1194                         /* FALLTHROUGH */
 1195 #endif /* IPSEC */
 1196 
 1197                 default:
 1198                         error = ENOPROTOOPT;
 1199                         break;
 1200                 }
 1201                 break;
 1202 
 1203         case SOPT_GET:
 1204                 switch (sopt->sopt_name) {
 1205                 case IP_OPTIONS:
 1206                 case IP_RETOPTS:
 1207                         if (inp->inp_options)
 1208                                 error = sooptcopyout(sopt,
 1209                                                      mtod(inp->inp_options,
 1210                                                           char *),
 1211                                                      inp->inp_options->m_len);
 1212                         else
 1213                                 sopt->sopt_valsize = 0;
 1214                         break;
 1215 
 1216                 case IP_TOS:
 1217                 case IP_TTL:
 1218                 case IP_MINTTL:
 1219                 case IP_RECVOPTS:
 1220                 case IP_RECVRETOPTS:
 1221                 case IP_RECVDSTADDR:
 1222                 case IP_RECVTTL:
 1223                 case IP_RECVIF:
 1224                 case IP_PORTRANGE:
 1225                 case IP_ONESBCAST:
 1226                 case IP_DONTFRAG:
 1227                 case IP_BINDANY:
 1228                 case IP_RECVTOS:
 1229                 case IP_BINDMULTI:
 1230                 case IP_FLOWID:
 1231                 case IP_FLOWTYPE:
 1232                 case IP_RECVFLOWID:
 1233 #ifdef  RSS
 1234                 case IP_RSSBUCKETID:
 1235                 case IP_RECVRSSBUCKETID:
 1236 #endif
 1237                         switch (sopt->sopt_name) {
 1238 
 1239                         case IP_TOS:
 1240                                 optval = inp->inp_ip_tos;
 1241                                 break;
 1242 
 1243                         case IP_TTL:
 1244                                 optval = inp->inp_ip_ttl;
 1245                                 break;
 1246 
 1247                         case IP_MINTTL:
 1248                                 optval = inp->inp_ip_minttl;
 1249                                 break;
 1250 
 1251 #define OPTBIT(bit)     (inp->inp_flags & bit ? 1 : 0)
 1252 #define OPTBIT2(bit)    (inp->inp_flags2 & bit ? 1 : 0)
 1253 
 1254                         case IP_RECVOPTS:
 1255                                 optval = OPTBIT(INP_RECVOPTS);
 1256                                 break;
 1257 
 1258                         case IP_RECVRETOPTS:
 1259                                 optval = OPTBIT(INP_RECVRETOPTS);
 1260                                 break;
 1261 
 1262                         case IP_RECVDSTADDR:
 1263                                 optval = OPTBIT(INP_RECVDSTADDR);
 1264                                 break;
 1265 
 1266                         case IP_RECVTTL:
 1267                                 optval = OPTBIT(INP_RECVTTL);
 1268                                 break;
 1269 
 1270                         case IP_RECVIF:
 1271                                 optval = OPTBIT(INP_RECVIF);
 1272                                 break;
 1273 
 1274                         case IP_PORTRANGE:
 1275                                 if (inp->inp_flags & INP_HIGHPORT)
 1276                                         optval = IP_PORTRANGE_HIGH;
 1277                                 else if (inp->inp_flags & INP_LOWPORT)
 1278                                         optval = IP_PORTRANGE_LOW;
 1279                                 else
 1280                                         optval = 0;
 1281                                 break;
 1282 
 1283                         case IP_ONESBCAST:
 1284                                 optval = OPTBIT(INP_ONESBCAST);
 1285                                 break;
 1286                         case IP_DONTFRAG:
 1287                                 optval = OPTBIT(INP_DONTFRAG);
 1288                                 break;
 1289                         case IP_BINDANY:
 1290                                 optval = OPTBIT(INP_BINDANY);
 1291                                 break;
 1292                         case IP_RECVTOS:
 1293                                 optval = OPTBIT(INP_RECVTOS);
 1294                                 break;
 1295                         case IP_FLOWID:
 1296                                 optval = inp->inp_flowid;
 1297                                 break;
 1298                         case IP_FLOWTYPE:
 1299                                 optval = inp->inp_flowtype;
 1300                                 break;
 1301                         case IP_RECVFLOWID:
 1302                                 optval = OPTBIT2(INP_RECVFLOWID);
 1303                                 break;
 1304 #ifdef  RSS
 1305                         case IP_RSSBUCKETID:
 1306                                 retval = rss_hash2bucket(inp->inp_flowid,
 1307                                     inp->inp_flowtype,
 1308                                     &rss_bucket);
 1309                                 if (retval == 0)
 1310                                         optval = rss_bucket;
 1311                                 else
 1312                                         error = EINVAL;
 1313                                 break;
 1314                         case IP_RECVRSSBUCKETID:
 1315                                 optval = OPTBIT2(INP_RECVRSSBUCKETID);
 1316                                 break;
 1317 #endif
 1318                         case IP_BINDMULTI:
 1319                                 optval = OPTBIT2(INP_BINDMULTI);
 1320                                 break;
 1321                         }
 1322                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1323                         break;
 1324 
 1325                 /*
 1326                  * Multicast socket options are processed by the in_mcast
 1327                  * module.
 1328                  */
 1329                 case IP_MULTICAST_IF:
 1330                 case IP_MULTICAST_VIF:
 1331                 case IP_MULTICAST_TTL:
 1332                 case IP_MULTICAST_LOOP:
 1333                 case IP_MSFILTER:
 1334                         error = inp_getmoptions(inp, sopt);
 1335                         break;
 1336 
 1337 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 1338                 case IP_IPSEC_POLICY:
 1339                         if (IPSEC_ENABLED(ipv4)) {
 1340                                 error = IPSEC_PCBCTL(ipv4, inp, sopt);
 1341                                 break;
 1342                         }
 1343                         /* FALLTHROUGH */
 1344 #endif /* IPSEC */
 1345 
 1346                 default:
 1347                         error = ENOPROTOOPT;
 1348                         break;
 1349                 }
 1350                 break;
 1351         }
 1352         return (error);
 1353 }
 1354 
 1355 /*
 1356  * Routine called from ip_output() to loop back a copy of an IP multicast
 1357  * packet to the input queue of a specified interface.  Note that this
 1358  * calls the output routine of the loopback "driver", but with an interface
 1359  * pointer that might NOT be a loopback interface -- evil, but easier than
 1360  * replicating that code here.
 1361  */
 1362 static void
 1363 ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen)
 1364 {
 1365         struct ip *ip;
 1366         struct mbuf *copym;
 1367 
 1368         /*
 1369          * Make a deep copy of the packet because we're going to
 1370          * modify the pack in order to generate checksums.
 1371          */
 1372         copym = m_dup(m, M_NOWAIT);
 1373         if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen))
 1374                 copym = m_pullup(copym, hlen);
 1375         if (copym != NULL) {
 1376                 /* If needed, compute the checksum and mark it as valid. */
 1377                 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 1378                         in_delayed_cksum(copym);
 1379                         copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 1380                         copym->m_pkthdr.csum_flags |=
 1381                             CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 1382                         copym->m_pkthdr.csum_data = 0xffff;
 1383                 }
 1384                 /*
 1385                  * We don't bother to fragment if the IP length is greater
 1386                  * than the interface's MTU.  Can this possibly matter?
 1387                  */
 1388                 ip = mtod(copym, struct ip *);
 1389                 ip->ip_sum = 0;
 1390                 ip->ip_sum = in_cksum(copym, hlen);
 1391                 if_simloop(ifp, copym, AF_INET, 0);
 1392         }
 1393 }

Cache object: 59a4130406cea9d81992717cec5970b5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.