The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_input.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)ip_input.c  8.2 (Berkeley) 1/4/94
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: releng/11.0/sys/netinet/ip_input.c 302054 2016-06-21 13:48:49Z bz $");
   34 
   35 #include "opt_bootp.h"
   36 #include "opt_ipstealth.h"
   37 #include "opt_ipsec.h"
   38 #include "opt_route.h"
   39 #include "opt_rss.h"
   40 
   41 #include <sys/param.h>
   42 #include <sys/systm.h>
   43 #include <sys/hhook.h>
   44 #include <sys/mbuf.h>
   45 #include <sys/malloc.h>
   46 #include <sys/domain.h>
   47 #include <sys/protosw.h>
   48 #include <sys/socket.h>
   49 #include <sys/time.h>
   50 #include <sys/kernel.h>
   51 #include <sys/lock.h>
   52 #include <sys/rmlock.h>
   53 #include <sys/rwlock.h>
   54 #include <sys/sdt.h>
   55 #include <sys/syslog.h>
   56 #include <sys/sysctl.h>
   57 
   58 #include <net/pfil.h>
   59 #include <net/if.h>
   60 #include <net/if_types.h>
   61 #include <net/if_var.h>
   62 #include <net/if_dl.h>
   63 #include <net/route.h>
   64 #include <net/netisr.h>
   65 #include <net/rss_config.h>
   66 #include <net/vnet.h>
   67 
   68 #include <netinet/in.h>
   69 #include <netinet/in_kdtrace.h>
   70 #include <netinet/in_systm.h>
   71 #include <netinet/in_var.h>
   72 #include <netinet/ip.h>
   73 #include <netinet/in_pcb.h>
   74 #include <netinet/ip_var.h>
   75 #include <netinet/ip_fw.h>
   76 #include <netinet/ip_icmp.h>
   77 #include <netinet/ip_options.h>
   78 #include <machine/in_cksum.h>
   79 #include <netinet/ip_carp.h>
   80 #ifdef IPSEC
   81 #include <netinet/ip_ipsec.h>
   82 #include <netipsec/ipsec.h>
   83 #include <netipsec/key.h>
   84 #endif /* IPSEC */
   85 #include <netinet/in_rss.h>
   86 
   87 #include <sys/socketvar.h>
   88 
   89 #include <security/mac/mac_framework.h>
   90 
   91 #ifdef CTASSERT
   92 CTASSERT(sizeof(struct ip) == 20);
   93 #endif
   94 
   95 /* IP reassembly functions are defined in ip_reass.c. */
   96 extern void ipreass_init(void);
   97 extern void ipreass_drain(void);
   98 extern void ipreass_slowtimo(void);
   99 #ifdef VIMAGE
  100 extern void ipreass_destroy(void);
  101 #endif
  102 
  103 struct rmlock in_ifaddr_lock;
  104 RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
  105 
  106 VNET_DEFINE(int, rsvp_on);
  107 
  108 VNET_DEFINE(int, ipforwarding);
  109 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
  110     &VNET_NAME(ipforwarding), 0,
  111     "Enable IP forwarding between interfaces");
  112 
  113 static VNET_DEFINE(int, ipsendredirects) = 1;   /* XXX */
  114 #define V_ipsendredirects       VNET(ipsendredirects)
  115 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
  116     &VNET_NAME(ipsendredirects), 0,
  117     "Enable sending IP redirects");
  118 
  119 /*
  120  * XXX - Setting ip_checkinterface mostly implements the receive side of
  121  * the Strong ES model described in RFC 1122, but since the routing table
  122  * and transmit implementation do not implement the Strong ES model,
  123  * setting this to 1 results in an odd hybrid.
  124  *
  125  * XXX - ip_checkinterface currently must be disabled if you use ipnat
  126  * to translate the destination address to another local interface.
  127  *
  128  * XXX - ip_checkinterface must be disabled if you add IP aliases
  129  * to the loopback interface instead of the interface where the
  130  * packets for those addresses are received.
  131  */
  132 static VNET_DEFINE(int, ip_checkinterface);
  133 #define V_ip_checkinterface     VNET(ip_checkinterface)
  134 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
  135     &VNET_NAME(ip_checkinterface), 0,
  136     "Verify packet arrives on correct interface");
  137 
  138 VNET_DEFINE(struct pfil_head, inet_pfil_hook);  /* Packet filter hooks */
  139 
  140 static struct netisr_handler ip_nh = {
  141         .nh_name = "ip",
  142         .nh_handler = ip_input,
  143         .nh_proto = NETISR_IP,
  144 #ifdef  RSS
  145         .nh_m2cpuid = rss_soft_m2cpuid_v4,
  146         .nh_policy = NETISR_POLICY_CPU,
  147         .nh_dispatch = NETISR_DISPATCH_HYBRID,
  148 #else
  149         .nh_policy = NETISR_POLICY_FLOW,
  150 #endif
  151 };
  152 
  153 #ifdef  RSS
  154 /*
  155  * Directly dispatched frames are currently assumed
  156  * to have a flowid already calculated.
  157  *
  158  * It should likely have something that assert it
  159  * actually has valid flow details.
  160  */
  161 static struct netisr_handler ip_direct_nh = {
  162         .nh_name = "ip_direct",
  163         .nh_handler = ip_direct_input,
  164         .nh_proto = NETISR_IP_DIRECT,
  165         .nh_m2cpuid = rss_soft_m2cpuid_v4,
  166         .nh_policy = NETISR_POLICY_CPU,
  167         .nh_dispatch = NETISR_DISPATCH_HYBRID,
  168 };
  169 #endif
  170 
  171 extern  struct domain inetdomain;
  172 extern  struct protosw inetsw[];
  173 u_char  ip_protox[IPPROTO_MAX];
  174 VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
  175 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
  176 VNET_DEFINE(u_long, in_ifaddrhmask);            /* mask for hash table */
  177 
  178 #ifdef IPCTL_DEFMTU
  179 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
  180     &ip_mtu, 0, "Default MTU");
  181 #endif
  182 
  183 #ifdef IPSTEALTH
  184 VNET_DEFINE(int, ipstealth);
  185 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
  186     &VNET_NAME(ipstealth), 0,
  187     "IP stealth mode, no TTL decrementation on forwarding");
  188 #endif
  189 
  190 /*
  191  * IP statistics are stored in the "array" of counter(9)s.
  192  */
  193 VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
  194 VNET_PCPUSTAT_SYSINIT(ipstat);
  195 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
  196     "IP statistics (struct ipstat, netinet/ip_var.h)");
  197 
  198 #ifdef VIMAGE
  199 VNET_PCPUSTAT_SYSUNINIT(ipstat);
  200 #endif /* VIMAGE */
  201 
  202 /*
  203  * Kernel module interface for updating ipstat.  The argument is an index
  204  * into ipstat treated as an array.
  205  */
  206 void
  207 kmod_ipstat_inc(int statnum)
  208 {
  209 
  210         counter_u64_add(VNET(ipstat)[statnum], 1);
  211 }
  212 
  213 void
  214 kmod_ipstat_dec(int statnum)
  215 {
  216 
  217         counter_u64_add(VNET(ipstat)[statnum], -1);
  218 }
  219 
  220 static int
  221 sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
  222 {
  223         int error, qlimit;
  224 
  225         netisr_getqlimit(&ip_nh, &qlimit);
  226         error = sysctl_handle_int(oidp, &qlimit, 0, req);
  227         if (error || !req->newptr)
  228                 return (error);
  229         if (qlimit < 1)
  230                 return (EINVAL);
  231         return (netisr_setqlimit(&ip_nh, qlimit));
  232 }
  233 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
  234     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
  235     "Maximum size of the IP input queue");
  236 
  237 static int
  238 sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
  239 {
  240         u_int64_t qdrops_long;
  241         int error, qdrops;
  242 
  243         netisr_getqdrops(&ip_nh, &qdrops_long);
  244         qdrops = qdrops_long;
  245         error = sysctl_handle_int(oidp, &qdrops, 0, req);
  246         if (error || !req->newptr)
  247                 return (error);
  248         if (qdrops != 0)
  249                 return (EINVAL);
  250         netisr_clearqdrops(&ip_nh);
  251         return (0);
  252 }
  253 
  254 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
  255     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
  256     "Number of packets dropped from the IP input queue");
  257 
  258 #ifdef  RSS
  259 static int
  260 sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
  261 {
  262         int error, qlimit;
  263 
  264         netisr_getqlimit(&ip_direct_nh, &qlimit);
  265         error = sysctl_handle_int(oidp, &qlimit, 0, req);
  266         if (error || !req->newptr)
  267                 return (error);
  268         if (qlimit < 1)
  269                 return (EINVAL);
  270         return (netisr_setqlimit(&ip_direct_nh, qlimit));
  271 }
  272 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen,
  273     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I",
  274     "Maximum size of the IP direct input queue");
  275 
  276 static int
  277 sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
  278 {
  279         u_int64_t qdrops_long;
  280         int error, qdrops;
  281 
  282         netisr_getqdrops(&ip_direct_nh, &qdrops_long);
  283         qdrops = qdrops_long;
  284         error = sysctl_handle_int(oidp, &qdrops, 0, req);
  285         if (error || !req->newptr)
  286                 return (error);
  287         if (qdrops != 0)
  288                 return (EINVAL);
  289         netisr_clearqdrops(&ip_direct_nh);
  290         return (0);
  291 }
  292 
  293 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops,
  294     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
  295     "Number of packets dropped from the IP direct input queue");
  296 #endif  /* RSS */
  297 
  298 /*
  299  * IP initialization: fill in IP protocol switch table.
  300  * All protocols not implemented in kernel go to raw IP protocol handler.
  301  */
  302 void
  303 ip_init(void)
  304 {
  305         struct protosw *pr;
  306         int i;
  307 
  308         TAILQ_INIT(&V_in_ifaddrhead);
  309         V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
  310 
  311         /* Initialize IP reassembly queue. */
  312         ipreass_init();
  313 
  314         /* Initialize packet filter hooks. */
  315         V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
  316         V_inet_pfil_hook.ph_af = AF_INET;
  317         if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
  318                 printf("%s: WARNING: unable to register pfil hook, "
  319                         "error %d\n", __func__, i);
  320 
  321         if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
  322             &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
  323             HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
  324                 printf("%s: WARNING: unable to register input helper hook\n",
  325                     __func__);
  326         if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
  327             &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
  328             HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
  329                 printf("%s: WARNING: unable to register output helper hook\n",
  330                     __func__);
  331 
  332         /* Skip initialization of globals for non-default instances. */
  333 #ifdef VIMAGE
  334         if (!IS_DEFAULT_VNET(curvnet)) {
  335                 netisr_register_vnet(&ip_nh);
  336 #ifdef  RSS
  337                 netisr_register_vnet(&ip_direct_nh);
  338 #endif
  339                 return;
  340         }
  341 #endif
  342 
  343         pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
  344         if (pr == NULL)
  345                 panic("ip_init: PF_INET not found");
  346 
  347         /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
  348         for (i = 0; i < IPPROTO_MAX; i++)
  349                 ip_protox[i] = pr - inetsw;
  350         /*
  351          * Cycle through IP protocols and put them into the appropriate place
  352          * in ip_protox[].
  353          */
  354         for (pr = inetdomain.dom_protosw;
  355             pr < inetdomain.dom_protoswNPROTOSW; pr++)
  356                 if (pr->pr_domain->dom_family == PF_INET &&
  357                     pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
  358                         /* Be careful to only index valid IP protocols. */
  359                         if (pr->pr_protocol < IPPROTO_MAX)
  360                                 ip_protox[pr->pr_protocol] = pr - inetsw;
  361                 }
  362 
  363         netisr_register(&ip_nh);
  364 #ifdef  RSS
  365         netisr_register(&ip_direct_nh);
  366 #endif
  367 }
  368 
  369 #ifdef VIMAGE
  370 static void
  371 ip_destroy(void *unused __unused)
  372 {
  373         struct ifnet *ifp;
  374         int error;
  375 
  376 #ifdef  RSS
  377         netisr_unregister_vnet(&ip_direct_nh);
  378 #endif
  379         netisr_unregister_vnet(&ip_nh);
  380 
  381         if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
  382                 printf("%s: WARNING: unable to unregister pfil hook, "
  383                     "error %d\n", __func__, error);
  384 
  385         error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
  386         if (error != 0) {
  387                 printf("%s: WARNING: unable to deregister input helper hook "
  388                     "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
  389                     "error %d returned\n", __func__, error);
  390         }
  391         error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
  392         if (error != 0) {
  393                 printf("%s: WARNING: unable to deregister output helper hook "
  394                     "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
  395                     "error %d returned\n", __func__, error);
  396         }
  397 
  398         /* Remove the IPv4 addresses from all interfaces. */
  399         in_ifscrub_all();
  400 
  401         /* Make sure the IPv4 routes are gone as well. */
  402         IFNET_RLOCK();
  403         TAILQ_FOREACH(ifp, &V_ifnet, if_link)
  404                 rt_flushifroutes_af(ifp, AF_INET);
  405         IFNET_RUNLOCK();
  406 
  407         /* Destroy IP reassembly queue. */
  408         ipreass_destroy();
  409 
  410         /* Cleanup in_ifaddr hash table; should be empty. */
  411         hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
  412 }
  413 
  414 VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
  415 #endif
  416 
  417 #ifdef  RSS
  418 /*
  419  * IP direct input routine.
  420  *
  421  * This is called when reinjecting completed fragments where
  422  * all of the previous checking and book-keeping has been done.
  423  */
  424 void
  425 ip_direct_input(struct mbuf *m)
  426 {
  427         struct ip *ip;
  428         int hlen;
  429 
  430         ip = mtod(m, struct ip *);
  431         hlen = ip->ip_hl << 2;
  432 
  433         IPSTAT_INC(ips_delivered);
  434         (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
  435         return;
  436 }
  437 #endif
  438 
  439 /*
  440  * Ip input routine.  Checksum and byte swap header.  If fragmented
  441  * try to reassemble.  Process options.  Pass to next level.
  442  */
  443 void
  444 ip_input(struct mbuf *m)
  445 {
  446         struct ip *ip = NULL;
  447         struct in_ifaddr *ia = NULL;
  448         struct ifaddr *ifa;
  449         struct ifnet *ifp;
  450         int    checkif, hlen = 0;
  451         uint16_t sum, ip_len;
  452         int dchg = 0;                           /* dest changed after fw */
  453         struct in_addr odst;                    /* original dst address */
  454 
  455         M_ASSERTPKTHDR(m);
  456 
  457         if (m->m_flags & M_FASTFWD_OURS) {
  458                 m->m_flags &= ~M_FASTFWD_OURS;
  459                 /* Set up some basics that will be used later. */
  460                 ip = mtod(m, struct ip *);
  461                 hlen = ip->ip_hl << 2;
  462                 ip_len = ntohs(ip->ip_len);
  463                 goto ours;
  464         }
  465 
  466         IPSTAT_INC(ips_total);
  467 
  468         if (m->m_pkthdr.len < sizeof(struct ip))
  469                 goto tooshort;
  470 
  471         if (m->m_len < sizeof (struct ip) &&
  472             (m = m_pullup(m, sizeof (struct ip))) == NULL) {
  473                 IPSTAT_INC(ips_toosmall);
  474                 return;
  475         }
  476         ip = mtod(m, struct ip *);
  477 
  478         if (ip->ip_v != IPVERSION) {
  479                 IPSTAT_INC(ips_badvers);
  480                 goto bad;
  481         }
  482 
  483         hlen = ip->ip_hl << 2;
  484         if (hlen < sizeof(struct ip)) { /* minimum header length */
  485                 IPSTAT_INC(ips_badhlen);
  486                 goto bad;
  487         }
  488         if (hlen > m->m_len) {
  489                 if ((m = m_pullup(m, hlen)) == NULL) {
  490                         IPSTAT_INC(ips_badhlen);
  491                         return;
  492                 }
  493                 ip = mtod(m, struct ip *);
  494         }
  495 
  496         IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
  497 
  498         /* 127/8 must not appear on wire - RFC1122 */
  499         ifp = m->m_pkthdr.rcvif;
  500         if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
  501             (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
  502                 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
  503                         IPSTAT_INC(ips_badaddr);
  504                         goto bad;
  505                 }
  506         }
  507 
  508         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
  509                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
  510         } else {
  511                 if (hlen == sizeof(struct ip)) {
  512                         sum = in_cksum_hdr(ip);
  513                 } else {
  514                         sum = in_cksum(m, hlen);
  515                 }
  516         }
  517         if (sum) {
  518                 IPSTAT_INC(ips_badsum);
  519                 goto bad;
  520         }
  521 
  522 #ifdef ALTQ
  523         if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
  524                 /* packet is dropped by traffic conditioner */
  525                 return;
  526 #endif
  527 
  528         ip_len = ntohs(ip->ip_len);
  529         if (ip_len < hlen) {
  530                 IPSTAT_INC(ips_badlen);
  531                 goto bad;
  532         }
  533 
  534         /*
  535          * Check that the amount of data in the buffers
  536          * is as at least much as the IP header would have us expect.
  537          * Trim mbufs if longer than we expect.
  538          * Drop packet if shorter than we expect.
  539          */
  540         if (m->m_pkthdr.len < ip_len) {
  541 tooshort:
  542                 IPSTAT_INC(ips_tooshort);
  543                 goto bad;
  544         }
  545         if (m->m_pkthdr.len > ip_len) {
  546                 if (m->m_len == m->m_pkthdr.len) {
  547                         m->m_len = ip_len;
  548                         m->m_pkthdr.len = ip_len;
  549                 } else
  550                         m_adj(m, ip_len - m->m_pkthdr.len);
  551         }
  552 
  553         /* Try to forward the packet, but if we fail continue */
  554 #ifdef IPSEC
  555         /* For now we do not handle IPSEC in tryforward. */
  556         if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) &&
  557             (V_ipforwarding == 1))
  558                 if (ip_tryforward(m) == NULL)
  559                         return;
  560         /*
  561          * Bypass packet filtering for packets previously handled by IPsec.
  562          */
  563         if (ip_ipsec_filtertunnel(m))
  564                 goto passin;
  565 #else
  566         if (V_ipforwarding == 1)
  567                 if (ip_tryforward(m) == NULL)
  568                         return;
  569 #endif /* IPSEC */
  570 
  571         /*
  572          * Run through list of hooks for input packets.
  573          *
  574          * NB: Beware of the destination address changing (e.g.
  575          *     by NAT rewriting).  When this happens, tell
  576          *     ip_forward to do the right thing.
  577          */
  578 
  579         /* Jump over all PFIL processing if hooks are not active. */
  580         if (!PFIL_HOOKED(&V_inet_pfil_hook))
  581                 goto passin;
  582 
  583         odst = ip->ip_dst;
  584         if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
  585                 return;
  586         if (m == NULL)                  /* consumed by filter */
  587                 return;
  588 
  589         ip = mtod(m, struct ip *);
  590         dchg = (odst.s_addr != ip->ip_dst.s_addr);
  591         ifp = m->m_pkthdr.rcvif;
  592 
  593         if (m->m_flags & M_FASTFWD_OURS) {
  594                 m->m_flags &= ~M_FASTFWD_OURS;
  595                 goto ours;
  596         }
  597         if (m->m_flags & M_IP_NEXTHOP) {
  598                 if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
  599                         /*
  600                          * Directly ship the packet on.  This allows
  601                          * forwarding packets originally destined to us
  602                          * to some other directly connected host.
  603                          */
  604                         ip_forward(m, 1);
  605                         return;
  606                 }
  607         }
  608 passin:
  609 
  610         /*
  611          * Process options and, if not destined for us,
  612          * ship it on.  ip_dooptions returns 1 when an
  613          * error was detected (causing an icmp message
  614          * to be sent and the original packet to be freed).
  615          */
  616         if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
  617                 return;
  618 
  619         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
  620          * matter if it is destined to another node, or whether it is 
  621          * a multicast one, RSVP wants it! and prevents it from being forwarded
  622          * anywhere else. Also checks if the rsvp daemon is running before
  623          * grabbing the packet.
  624          */
  625         if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 
  626                 goto ours;
  627 
  628         /*
  629          * Check our list of addresses, to see if the packet is for us.
  630          * If we don't have any addresses, assume any unicast packet
  631          * we receive might be for us (and let the upper layers deal
  632          * with it).
  633          */
  634         if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
  635             (m->m_flags & (M_MCAST|M_BCAST)) == 0)
  636                 goto ours;
  637 
  638         /*
  639          * Enable a consistency check between the destination address
  640          * and the arrival interface for a unicast packet (the RFC 1122
  641          * strong ES model) if IP forwarding is disabled and the packet
  642          * is not locally generated and the packet is not subject to
  643          * 'ipfw fwd'.
  644          *
  645          * XXX - Checking also should be disabled if the destination
  646          * address is ipnat'ed to a different interface.
  647          *
  648          * XXX - Checking is incompatible with IP aliases added
  649          * to the loopback interface instead of the interface where
  650          * the packets are received.
  651          *
  652          * XXX - This is the case for carp vhost IPs as well so we
  653          * insert a workaround. If the packet got here, we already
  654          * checked with carp_iamatch() and carp_forus().
  655          */
  656         checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 
  657             ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
  658             ifp->if_carp == NULL && (dchg == 0);
  659 
  660         /*
  661          * Check for exact addresses in the hash bucket.
  662          */
  663         /* IN_IFADDR_RLOCK(); */
  664         LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
  665                 /*
  666                  * If the address matches, verify that the packet
  667                  * arrived via the correct interface if checking is
  668                  * enabled.
  669                  */
  670                 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 
  671                     (!checkif || ia->ia_ifp == ifp)) {
  672                         counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
  673                         counter_u64_add(ia->ia_ifa.ifa_ibytes,
  674                             m->m_pkthdr.len);
  675                         /* IN_IFADDR_RUNLOCK(); */
  676                         goto ours;
  677                 }
  678         }
  679         /* IN_IFADDR_RUNLOCK(); */
  680 
  681         /*
  682          * Check for broadcast addresses.
  683          *
  684          * Only accept broadcast packets that arrive via the matching
  685          * interface.  Reception of forwarded directed broadcasts would
  686          * be handled via ip_forward() and ether_output() with the loopback
  687          * into the stack for SIMPLEX interfaces handled by ether_output().
  688          */
  689         if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
  690                 IF_ADDR_RLOCK(ifp);
  691                 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  692                         if (ifa->ifa_addr->sa_family != AF_INET)
  693                                 continue;
  694                         ia = ifatoia(ifa);
  695                         if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
  696                             ip->ip_dst.s_addr) {
  697                                 counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
  698                                 counter_u64_add(ia->ia_ifa.ifa_ibytes,
  699                                     m->m_pkthdr.len);
  700                                 IF_ADDR_RUNLOCK(ifp);
  701                                 goto ours;
  702                         }
  703 #ifdef BOOTP_COMPAT
  704                         if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
  705                                 counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
  706                                 counter_u64_add(ia->ia_ifa.ifa_ibytes,
  707                                     m->m_pkthdr.len);
  708                                 IF_ADDR_RUNLOCK(ifp);
  709                                 goto ours;
  710                         }
  711 #endif
  712                 }
  713                 IF_ADDR_RUNLOCK(ifp);
  714                 ia = NULL;
  715         }
  716         /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
  717         if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
  718                 IPSTAT_INC(ips_cantforward);
  719                 m_freem(m);
  720                 return;
  721         }
  722         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
  723                 if (V_ip_mrouter) {
  724                         /*
  725                          * If we are acting as a multicast router, all
  726                          * incoming multicast packets are passed to the
  727                          * kernel-level multicast forwarding function.
  728                          * The packet is returned (relatively) intact; if
  729                          * ip_mforward() returns a non-zero value, the packet
  730                          * must be discarded, else it may be accepted below.
  731                          */
  732                         if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
  733                                 IPSTAT_INC(ips_cantforward);
  734                                 m_freem(m);
  735                                 return;
  736                         }
  737 
  738                         /*
  739                          * The process-level routing daemon needs to receive
  740                          * all multicast IGMP packets, whether or not this
  741                          * host belongs to their destination groups.
  742                          */
  743                         if (ip->ip_p == IPPROTO_IGMP)
  744                                 goto ours;
  745                         IPSTAT_INC(ips_forward);
  746                 }
  747                 /*
  748                  * Assume the packet is for us, to avoid prematurely taking
  749                  * a lock on the in_multi hash. Protocols must perform
  750                  * their own filtering and update statistics accordingly.
  751                  */
  752                 goto ours;
  753         }
  754         if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
  755                 goto ours;
  756         if (ip->ip_dst.s_addr == INADDR_ANY)
  757                 goto ours;
  758 
  759         /*
  760          * Not for us; forward if possible and desirable.
  761          */
  762         if (V_ipforwarding == 0) {
  763                 IPSTAT_INC(ips_cantforward);
  764                 m_freem(m);
  765         } else {
  766                 ip_forward(m, dchg);
  767         }
  768         return;
  769 
  770 ours:
  771 #ifdef IPSTEALTH
  772         /*
  773          * IPSTEALTH: Process non-routing options only
  774          * if the packet is destined for us.
  775          */
  776         if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
  777                 return;
  778 #endif /* IPSTEALTH */
  779 
  780         /*
  781          * Attempt reassembly; if it succeeds, proceed.
  782          * ip_reass() will return a different mbuf.
  783          */
  784         if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
  785                 /* XXXGL: shouldn't we save & set m_flags? */
  786                 m = ip_reass(m);
  787                 if (m == NULL)
  788                         return;
  789                 ip = mtod(m, struct ip *);
  790                 /* Get the header length of the reassembled packet */
  791                 hlen = ip->ip_hl << 2;
  792         }
  793 
  794 #ifdef IPSEC
  795         /*
  796          * enforce IPsec policy checking if we are seeing last header.
  797          * note that we do not visit this with protocols with pcb layer
  798          * code - like udp/tcp/raw ip.
  799          */
  800         if (ip_ipsec_input(m, ip->ip_p) != 0)
  801                 goto bad;
  802 #endif /* IPSEC */
  803 
  804         /*
  805          * Switch out to protocol's input routine.
  806          */
  807         IPSTAT_INC(ips_delivered);
  808 
  809         (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
  810         return;
  811 bad:
  812         m_freem(m);
  813 }
  814 
  815 /*
  816  * IP timer processing;
  817  * if a timer expires on a reassembly
  818  * queue, discard it.
  819  */
  820 void
  821 ip_slowtimo(void)
  822 {
  823         VNET_ITERATOR_DECL(vnet_iter);
  824 
  825         VNET_LIST_RLOCK_NOSLEEP();
  826         VNET_FOREACH(vnet_iter) {
  827                 CURVNET_SET(vnet_iter);
  828                 ipreass_slowtimo();
  829                 CURVNET_RESTORE();
  830         }
  831         VNET_LIST_RUNLOCK_NOSLEEP();
  832 }
  833 
  834 void
  835 ip_drain(void)
  836 {
  837         VNET_ITERATOR_DECL(vnet_iter);
  838 
  839         VNET_LIST_RLOCK_NOSLEEP();
  840         VNET_FOREACH(vnet_iter) {
  841                 CURVNET_SET(vnet_iter);
  842                 ipreass_drain();
  843                 CURVNET_RESTORE();
  844         }
  845         VNET_LIST_RUNLOCK_NOSLEEP();
  846 }
  847 
  848 /*
  849  * The protocol to be inserted into ip_protox[] must be already registered
  850  * in inetsw[], either statically or through pf_proto_register().
  851  */
  852 int
  853 ipproto_register(short ipproto)
  854 {
  855         struct protosw *pr;
  856 
  857         /* Sanity checks. */
  858         if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
  859                 return (EPROTONOSUPPORT);
  860 
  861         /*
  862          * The protocol slot must not be occupied by another protocol
  863          * already.  An index pointing to IPPROTO_RAW is unused.
  864          */
  865         pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
  866         if (pr == NULL)
  867                 return (EPFNOSUPPORT);
  868         if (ip_protox[ipproto] != pr - inetsw)  /* IPPROTO_RAW */
  869                 return (EEXIST);
  870 
  871         /* Find the protocol position in inetsw[] and set the index. */
  872         for (pr = inetdomain.dom_protosw;
  873              pr < inetdomain.dom_protoswNPROTOSW; pr++) {
  874                 if (pr->pr_domain->dom_family == PF_INET &&
  875                     pr->pr_protocol && pr->pr_protocol == ipproto) {
  876                         ip_protox[pr->pr_protocol] = pr - inetsw;
  877                         return (0);
  878                 }
  879         }
  880         return (EPROTONOSUPPORT);
  881 }
  882 
  883 int
  884 ipproto_unregister(short ipproto)
  885 {
  886         struct protosw *pr;
  887 
  888         /* Sanity checks. */
  889         if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
  890                 return (EPROTONOSUPPORT);
  891 
  892         /* Check if the protocol was indeed registered. */
  893         pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
  894         if (pr == NULL)
  895                 return (EPFNOSUPPORT);
  896         if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
  897                 return (ENOENT);
  898 
  899         /* Reset the protocol slot to IPPROTO_RAW. */
  900         ip_protox[ipproto] = pr - inetsw;
  901         return (0);
  902 }
  903 
  904 u_char inetctlerrmap[PRC_NCMDS] = {
  905         0,              0,              0,              0,
  906         0,              EMSGSIZE,       EHOSTDOWN,      EHOSTUNREACH,
  907         EHOSTUNREACH,   EHOSTUNREACH,   ECONNREFUSED,   ECONNREFUSED,
  908         EMSGSIZE,       EHOSTUNREACH,   0,              0,
  909         0,              0,              EHOSTUNREACH,   0,
  910         ENOPROTOOPT,    ECONNREFUSED
  911 };
  912 
  913 /*
  914  * Forward a packet.  If some error occurs return the sender
  915  * an icmp packet.  Note we can't always generate a meaningful
  916  * icmp message because icmp doesn't have a large enough repertoire
  917  * of codes and types.
  918  *
  919  * If not forwarding, just drop the packet.  This could be confusing
  920  * if ipforwarding was zero but some routing protocol was advancing
  921  * us as a gateway to somewhere.  However, we must let the routing
  922  * protocol deal with that.
  923  *
  924  * The srcrt parameter indicates whether the packet is being forwarded
  925  * via a source route.
  926  */
  927 void
  928 ip_forward(struct mbuf *m, int srcrt)
  929 {
  930         struct ip *ip = mtod(m, struct ip *);
  931         struct in_ifaddr *ia;
  932         struct mbuf *mcopy;
  933         struct sockaddr_in *sin;
  934         struct in_addr dest;
  935         struct route ro;
  936         int error, type = 0, code = 0, mtu = 0;
  937 
  938         if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
  939                 IPSTAT_INC(ips_cantforward);
  940                 m_freem(m);
  941                 return;
  942         }
  943 #ifdef IPSEC
  944         if (ip_ipsec_fwd(m) != 0) {
  945                 IPSTAT_INC(ips_cantforward);
  946                 m_freem(m);
  947                 return;
  948         }
  949 #endif /* IPSEC */
  950 #ifdef IPSTEALTH
  951         if (!V_ipstealth) {
  952 #endif
  953                 if (ip->ip_ttl <= IPTTLDEC) {
  954                         icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
  955                             0, 0);
  956                         return;
  957                 }
  958 #ifdef IPSTEALTH
  959         }
  960 #endif
  961 
  962         bzero(&ro, sizeof(ro));
  963         sin = (struct sockaddr_in *)&ro.ro_dst;
  964         sin->sin_family = AF_INET;
  965         sin->sin_len = sizeof(*sin);
  966         sin->sin_addr = ip->ip_dst;
  967 #ifdef RADIX_MPATH
  968         rtalloc_mpath_fib(&ro,
  969             ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
  970             M_GETFIB(m));
  971 #else
  972         in_rtalloc_ign(&ro, 0, M_GETFIB(m));
  973 #endif
  974         if (ro.ro_rt != NULL) {
  975                 ia = ifatoia(ro.ro_rt->rt_ifa);
  976                 ifa_ref(&ia->ia_ifa);
  977         } else
  978                 ia = NULL;
  979 #ifndef IPSEC
  980         /*
  981          * 'ia' may be NULL if there is no route for this destination.
  982          * In case of IPsec, Don't discard it just yet, but pass it to
  983          * ip_output in case of outgoing IPsec policy.
  984          */
  985         if (!srcrt && ia == NULL) {
  986                 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
  987                 RO_RTFREE(&ro);
  988                 return;
  989         }
  990 #endif
  991 
  992         /*
  993          * Save the IP header and at most 8 bytes of the payload,
  994          * in case we need to generate an ICMP message to the src.
  995          *
  996          * XXX this can be optimized a lot by saving the data in a local
  997          * buffer on the stack (72 bytes at most), and only allocating the
  998          * mbuf if really necessary. The vast majority of the packets
  999          * are forwarded without having to send an ICMP back (either
 1000          * because unnecessary, or because rate limited), so we are
 1001          * really we are wasting a lot of work here.
 1002          *
 1003          * We don't use m_copy() because it might return a reference
 1004          * to a shared cluster. Both this function and ip_output()
 1005          * assume exclusive access to the IP header in `m', so any
 1006          * data in a cluster may change before we reach icmp_error().
 1007          */
 1008         mcopy = m_gethdr(M_NOWAIT, m->m_type);
 1009         if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
 1010                 /*
 1011                  * It's probably ok if the pkthdr dup fails (because
 1012                  * the deep copy of the tag chain failed), but for now
 1013                  * be conservative and just discard the copy since
 1014                  * code below may some day want the tags.
 1015                  */
 1016                 m_free(mcopy);
 1017                 mcopy = NULL;
 1018         }
 1019         if (mcopy != NULL) {
 1020                 mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
 1021                 mcopy->m_pkthdr.len = mcopy->m_len;
 1022                 m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 1023         }
 1024 
 1025 #ifdef IPSTEALTH
 1026         if (!V_ipstealth) {
 1027 #endif
 1028                 ip->ip_ttl -= IPTTLDEC;
 1029 #ifdef IPSTEALTH
 1030         }
 1031 #endif
 1032 
 1033         /*
 1034          * If forwarding packet using same interface that it came in on,
 1035          * perhaps should send a redirect to sender to shortcut a hop.
 1036          * Only send redirect if source is sending directly to us,
 1037          * and if packet was not source routed (or has any options).
 1038          * Also, don't send redirect if forwarding using a default route
 1039          * or a route modified by a redirect.
 1040          */
 1041         dest.s_addr = 0;
 1042         if (!srcrt && V_ipsendredirects &&
 1043             ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
 1044                 struct rtentry *rt;
 1045 
 1046                 rt = ro.ro_rt;
 1047 
 1048                 if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
 1049                     satosin(rt_key(rt))->sin_addr.s_addr != 0) {
 1050 #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa))
 1051                         u_long src = ntohl(ip->ip_src.s_addr);
 1052 
 1053                         if (RTA(rt) &&
 1054                             (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
 1055                                 if (rt->rt_flags & RTF_GATEWAY)
 1056                                         dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
 1057                                 else
 1058                                         dest.s_addr = ip->ip_dst.s_addr;
 1059                                 /* Router requirements says to only send host redirects */
 1060                                 type = ICMP_REDIRECT;
 1061                                 code = ICMP_REDIRECT_HOST;
 1062                         }
 1063                 }
 1064         }
 1065 
 1066         error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
 1067 
 1068         if (error == EMSGSIZE && ro.ro_rt)
 1069                 mtu = ro.ro_rt->rt_mtu;
 1070         RO_RTFREE(&ro);
 1071 
 1072         if (error)
 1073                 IPSTAT_INC(ips_cantforward);
 1074         else {
 1075                 IPSTAT_INC(ips_forward);
 1076                 if (type)
 1077                         IPSTAT_INC(ips_redirectsent);
 1078                 else {
 1079                         if (mcopy)
 1080                                 m_freem(mcopy);
 1081                         if (ia != NULL)
 1082                                 ifa_free(&ia->ia_ifa);
 1083                         return;
 1084                 }
 1085         }
 1086         if (mcopy == NULL) {
 1087                 if (ia != NULL)
 1088                         ifa_free(&ia->ia_ifa);
 1089                 return;
 1090         }
 1091 
 1092         switch (error) {
 1093 
 1094         case 0:                         /* forwarded, but need redirect */
 1095                 /* type, code set above */
 1096                 break;
 1097 
 1098         case ENETUNREACH:
 1099         case EHOSTUNREACH:
 1100         case ENETDOWN:
 1101         case EHOSTDOWN:
 1102         default:
 1103                 type = ICMP_UNREACH;
 1104                 code = ICMP_UNREACH_HOST;
 1105                 break;
 1106 
 1107         case EMSGSIZE:
 1108                 type = ICMP_UNREACH;
 1109                 code = ICMP_UNREACH_NEEDFRAG;
 1110 
 1111 #ifdef IPSEC
 1112                 /* 
 1113                  * If IPsec is configured for this path,
 1114                  * override any possibly mtu value set by ip_output.
 1115                  */ 
 1116                 mtu = ip_ipsec_mtu(mcopy, mtu);
 1117 #endif /* IPSEC */
 1118                 /*
 1119                  * If the MTU was set before make sure we are below the
 1120                  * interface MTU.
 1121                  * If the MTU wasn't set before use the interface mtu or
 1122                  * fall back to the next smaller mtu step compared to the
 1123                  * current packet size.
 1124                  */
 1125                 if (mtu != 0) {
 1126                         if (ia != NULL)
 1127                                 mtu = min(mtu, ia->ia_ifp->if_mtu);
 1128                 } else {
 1129                         if (ia != NULL)
 1130                                 mtu = ia->ia_ifp->if_mtu;
 1131                         else
 1132                                 mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
 1133                 }
 1134                 IPSTAT_INC(ips_cantfrag);
 1135                 break;
 1136 
 1137         case ENOBUFS:
 1138         case EACCES:                    /* ipfw denied packet */
 1139                 m_freem(mcopy);
 1140                 if (ia != NULL)
 1141                         ifa_free(&ia->ia_ifa);
 1142                 return;
 1143         }
 1144         if (ia != NULL)
 1145                 ifa_free(&ia->ia_ifa);
 1146         icmp_error(mcopy, type, code, dest.s_addr, mtu);
 1147 }
 1148 
 1149 void
 1150 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
 1151     struct mbuf *m)
 1152 {
 1153 
 1154         if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
 1155                 struct bintime bt;
 1156 
 1157                 bintime(&bt);
 1158                 if (inp->inp_socket->so_options & SO_BINTIME) {
 1159                         *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
 1160                             SCM_BINTIME, SOL_SOCKET);
 1161                         if (*mp)
 1162                                 mp = &(*mp)->m_next;
 1163                 }
 1164                 if (inp->inp_socket->so_options & SO_TIMESTAMP) {
 1165                         struct timeval tv;
 1166 
 1167                         bintime2timeval(&bt, &tv);
 1168                         *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
 1169                             SCM_TIMESTAMP, SOL_SOCKET);
 1170                         if (*mp)
 1171                                 mp = &(*mp)->m_next;
 1172                 }
 1173         }
 1174         if (inp->inp_flags & INP_RECVDSTADDR) {
 1175                 *mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
 1176                     sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 1177                 if (*mp)
 1178                         mp = &(*mp)->m_next;
 1179         }
 1180         if (inp->inp_flags & INP_RECVTTL) {
 1181                 *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
 1182                     sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
 1183                 if (*mp)
 1184                         mp = &(*mp)->m_next;
 1185         }
 1186 #ifdef notyet
 1187         /* XXX
 1188          * Moving these out of udp_input() made them even more broken
 1189          * than they already were.
 1190          */
 1191         /* options were tossed already */
 1192         if (inp->inp_flags & INP_RECVOPTS) {
 1193                 *mp = sbcreatecontrol((caddr_t)opts_deleted_above,
 1194                     sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 1195                 if (*mp)
 1196                         mp = &(*mp)->m_next;
 1197         }
 1198         /* ip_srcroute doesn't do what we want here, need to fix */
 1199         if (inp->inp_flags & INP_RECVRETOPTS) {
 1200                 *mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
 1201                     sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 1202                 if (*mp)
 1203                         mp = &(*mp)->m_next;
 1204         }
 1205 #endif
 1206         if (inp->inp_flags & INP_RECVIF) {
 1207                 struct ifnet *ifp;
 1208                 struct sdlbuf {
 1209                         struct sockaddr_dl sdl;
 1210                         u_char  pad[32];
 1211                 } sdlbuf;
 1212                 struct sockaddr_dl *sdp;
 1213                 struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 1214 
 1215                 if ((ifp = m->m_pkthdr.rcvif) &&
 1216                     ifp->if_index && ifp->if_index <= V_if_index) {
 1217                         sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 1218                         /*
 1219                          * Change our mind and don't try copy.
 1220                          */
 1221                         if (sdp->sdl_family != AF_LINK ||
 1222                             sdp->sdl_len > sizeof(sdlbuf)) {
 1223                                 goto makedummy;
 1224                         }
 1225                         bcopy(sdp, sdl2, sdp->sdl_len);
 1226                 } else {
 1227 makedummy:      
 1228                         sdl2->sdl_len =
 1229                             offsetof(struct sockaddr_dl, sdl_data[0]);
 1230                         sdl2->sdl_family = AF_LINK;
 1231                         sdl2->sdl_index = 0;
 1232                         sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 1233                 }
 1234                 *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
 1235                     IP_RECVIF, IPPROTO_IP);
 1236                 if (*mp)
 1237                         mp = &(*mp)->m_next;
 1238         }
 1239         if (inp->inp_flags & INP_RECVTOS) {
 1240                 *mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
 1241                     sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
 1242                 if (*mp)
 1243                         mp = &(*mp)->m_next;
 1244         }
 1245 
 1246         if (inp->inp_flags2 & INP_RECVFLOWID) {
 1247                 uint32_t flowid, flow_type;
 1248 
 1249                 flowid = m->m_pkthdr.flowid;
 1250                 flow_type = M_HASHTYPE_GET(m);
 1251 
 1252                 /*
 1253                  * XXX should handle the failure of one or the
 1254                  * other - don't populate both?
 1255                  */
 1256                 *mp = sbcreatecontrol((caddr_t) &flowid,
 1257                     sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
 1258                 if (*mp)
 1259                         mp = &(*mp)->m_next;
 1260                 *mp = sbcreatecontrol((caddr_t) &flow_type,
 1261                     sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
 1262                 if (*mp)
 1263                         mp = &(*mp)->m_next;
 1264         }
 1265 
 1266 #ifdef  RSS
 1267         if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
 1268                 uint32_t flowid, flow_type;
 1269                 uint32_t rss_bucketid;
 1270 
 1271                 flowid = m->m_pkthdr.flowid;
 1272                 flow_type = M_HASHTYPE_GET(m);
 1273 
 1274                 if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
 1275                         *mp = sbcreatecontrol((caddr_t) &rss_bucketid,
 1276                            sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
 1277                         if (*mp)
 1278                                 mp = &(*mp)->m_next;
 1279                 }
 1280         }
 1281 #endif
 1282 }
 1283 
 1284 /*
 1285  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
 1286  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
 1287  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
 1288  * compiled.
 1289  */
 1290 static VNET_DEFINE(int, ip_rsvp_on);
 1291 VNET_DEFINE(struct socket *, ip_rsvpd);
 1292 
 1293 #define V_ip_rsvp_on            VNET(ip_rsvp_on)
 1294 
 1295 int
 1296 ip_rsvp_init(struct socket *so)
 1297 {
 1298 
 1299         if (so->so_type != SOCK_RAW ||
 1300             so->so_proto->pr_protocol != IPPROTO_RSVP)
 1301                 return EOPNOTSUPP;
 1302 
 1303         if (V_ip_rsvpd != NULL)
 1304                 return EADDRINUSE;
 1305 
 1306         V_ip_rsvpd = so;
 1307         /*
 1308          * This may seem silly, but we need to be sure we don't over-increment
 1309          * the RSVP counter, in case something slips up.
 1310          */
 1311         if (!V_ip_rsvp_on) {
 1312                 V_ip_rsvp_on = 1;
 1313                 V_rsvp_on++;
 1314         }
 1315 
 1316         return 0;
 1317 }
 1318 
 1319 int
 1320 ip_rsvp_done(void)
 1321 {
 1322 
 1323         V_ip_rsvpd = NULL;
 1324         /*
 1325          * This may seem silly, but we need to be sure we don't over-decrement
 1326          * the RSVP counter, in case something slips up.
 1327          */
 1328         if (V_ip_rsvp_on) {
 1329                 V_ip_rsvp_on = 0;
 1330                 V_rsvp_on--;
 1331         }
 1332         return 0;
 1333 }
 1334 
 1335 int
 1336 rsvp_input(struct mbuf **mp, int *offp, int proto)
 1337 {
 1338         struct mbuf *m;
 1339 
 1340         m = *mp;
 1341         *mp = NULL;
 1342 
 1343         if (rsvp_input_p) { /* call the real one if loaded */
 1344                 *mp = m;
 1345                 rsvp_input_p(mp, offp, proto);
 1346                 return (IPPROTO_DONE);
 1347         }
 1348 
 1349         /* Can still get packets with rsvp_on = 0 if there is a local member
 1350          * of the group to which the RSVP packet is addressed.  But in this
 1351          * case we want to throw the packet away.
 1352          */
 1353         
 1354         if (!V_rsvp_on) {
 1355                 m_freem(m);
 1356                 return (IPPROTO_DONE);
 1357         }
 1358 
 1359         if (V_ip_rsvpd != NULL) { 
 1360                 *mp = m;
 1361                 rip_input(mp, offp, proto);
 1362                 return (IPPROTO_DONE);
 1363         }
 1364         /* Drop the packet */
 1365         m_freem(m);
 1366         return (IPPROTO_DONE);
 1367 }

Cache object: 1bfd0b51c8d35a868d80bc92dfb29e43


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.