The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_input.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: ip_input.c,v 1.381 2022/08/29 14:43:56 bluhm Exp $    */
    2 /*      $NetBSD: ip_input.c,v 1.30 1996/03/16 23:53:58 christos Exp $   */
    3 
    4 /*
    5  * Copyright (c) 1982, 1986, 1988, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)ip_input.c  8.2 (Berkeley) 1/4/94
   33  */
   34 
   35 #include "pf.h"
   36 #include "carp.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/mbuf.h>
   41 #include <sys/domain.h>
   42 #include <sys/mutex.h>
   43 #include <sys/protosw.h>
   44 #include <sys/socket.h>
   45 #include <sys/socketvar.h>
   46 #include <sys/sysctl.h>
   47 #include <sys/pool.h>
   48 #include <sys/task.h>
   49 
   50 #include <net/if.h>
   51 #include <net/if_var.h>
   52 #include <net/if_dl.h>
   53 #include <net/route.h>
   54 #include <net/netisr.h>
   55 
   56 #include <netinet/in.h>
   57 #include <netinet/in_systm.h>
   58 #include <netinet/if_ether.h>
   59 #include <netinet/ip.h>
   60 #include <netinet/in_pcb.h>
   61 #include <netinet/in_var.h>
   62 #include <netinet/ip_var.h>
   63 #include <netinet/ip_icmp.h>
   64 #include <net/if_types.h>
   65 
   66 #ifdef INET6
   67 #include <netinet6/ip6_var.h>
   68 #endif
   69 
   70 #if NPF > 0
   71 #include <net/pfvar.h>
   72 #endif
   73 
   74 #ifdef MROUTING
   75 #include <netinet/ip_mroute.h>
   76 #endif
   77 
   78 #ifdef IPSEC
   79 #include <netinet/ip_ipsp.h>
   80 #endif /* IPSEC */
   81 
   82 #if NCARP > 0
   83 #include <netinet/ip_carp.h>
   84 #endif
   85 
   86 /* values controllable via sysctl */
   87 int     ipforwarding = 0;
   88 int     ipmforwarding = 0;
   89 int     ipmultipath = 0;
   90 int     ipsendredirects = 1;
   91 int     ip_dosourceroute = 0;
   92 int     ip_defttl = IPDEFTTL;
   93 int     ip_mtudisc = 1;
   94 int     ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
   95 int     ip_directedbcast = 0;
   96 
   97 /* Protects `ipq' and `ip_frags'. */
   98 struct mutex    ipq_mutex = MUTEX_INITIALIZER(IPL_SOFTNET);
   99 
  100 /* IP reassembly queue */
  101 LIST_HEAD(, ipq) ipq;
  102 
  103 /* Keep track of memory used for reassembly */
  104 int     ip_maxqueue = 300;
  105 int     ip_frags = 0;
  106 
  107 const struct sysctl_bounded_args ipctl_vars[] = {
  108 #ifdef MROUTING
  109         { IPCTL_MRTPROTO, &ip_mrtproto, SYSCTL_INT_READONLY },
  110 #endif
  111         { IPCTL_FORWARDING, &ipforwarding, 0, 2 },
  112         { IPCTL_SENDREDIRECTS, &ipsendredirects, 0, 1 },
  113         { IPCTL_DEFTTL, &ip_defttl, 0, 255 },
  114         { IPCTL_DIRECTEDBCAST, &ip_directedbcast, 0, 1 },
  115         { IPCTL_IPPORT_FIRSTAUTO, &ipport_firstauto, 0, 65535 },
  116         { IPCTL_IPPORT_LASTAUTO, &ipport_lastauto, 0, 65535 },
  117         { IPCTL_IPPORT_HIFIRSTAUTO, &ipport_hifirstauto, 0, 65535 },
  118         { IPCTL_IPPORT_HILASTAUTO, &ipport_hilastauto, 0, 65535 },
  119         { IPCTL_IPPORT_MAXQUEUE, &ip_maxqueue, 0, 10000 },
  120         { IPCTL_MFORWARDING, &ipmforwarding, 0, 1 },
  121         { IPCTL_MULTIPATH, &ipmultipath, 0, 1 },
  122         { IPCTL_ARPTIMEOUT, &arpt_keep, 0, INT_MAX },
  123         { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
  124 };
  125 
  126 struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
  127 
  128 struct pool ipqent_pool;
  129 struct pool ipq_pool;
  130 
  131 struct cpumem *ipcounters;
  132 
  133 int ip_sysctl_ipstat(void *, size_t *, void *);
  134 
  135 static struct mbuf_queue        ipsend_mq;
  136 static struct mbuf_queue        ipsendraw_mq;
  137 
  138 extern struct niqueue           arpinq;
  139 
  140 int     ip_ours(struct mbuf **, int *, int, int);
  141 int     ip_dooptions(struct mbuf *, struct ifnet *);
  142 int     in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
  143 
  144 int             ip_fragcheck(struct mbuf **, int *);
  145 struct mbuf *   ip_reass(struct ipqent *, struct ipq *);
  146 void            ip_freef(struct ipq *);
  147 void            ip_flush(void);
  148 
  149 static void ip_send_dispatch(void *);
  150 static void ip_sendraw_dispatch(void *);
  151 static struct task ipsend_task = TASK_INITIALIZER(ip_send_dispatch, &ipsend_mq);
  152 static struct task ipsendraw_task =
  153         TASK_INITIALIZER(ip_sendraw_dispatch, &ipsendraw_mq);
  154 
  155 /*
  156  * Used to save the IP options in case a protocol wants to respond
  157  * to an incoming packet over the same route if the packet got here
  158  * using IP source routing.  This allows connection establishment and
  159  * maintenance when the remote end is on a network that is not known
  160  * to us.
  161  */
  162 struct ip_srcrt {
  163         int             isr_nhops;                 /* number of hops */
  164         struct in_addr  isr_dst;                   /* final destination */
  165         char            isr_nop;                   /* one NOP to align */
  166         char            isr_hdr[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN & OFFSET */
  167         struct in_addr  isr_routes[MAX_IPOPTLEN/sizeof(struct in_addr)];
  168 };
  169 
  170 void save_rte(struct mbuf *, u_char *, struct in_addr);
  171 
  172 /*
  173  * IP initialization: fill in IP protocol switch table.
  174  * All protocols not implemented in kernel go to raw IP protocol handler.
  175  */
  176 void
  177 ip_init(void)
  178 {
  179         const struct protosw *pr;
  180         int i;
  181         const u_int16_t defbaddynamicports_tcp[] = DEFBADDYNAMICPORTS_TCP;
  182         const u_int16_t defbaddynamicports_udp[] = DEFBADDYNAMICPORTS_UDP;
  183         const u_int16_t defrootonlyports_tcp[] = DEFROOTONLYPORTS_TCP;
  184         const u_int16_t defrootonlyports_udp[] = DEFROOTONLYPORTS_UDP;
  185 
  186         ipcounters = counters_alloc(ips_ncounters);
  187 
  188         pool_init(&ipqent_pool, sizeof(struct ipqent), 0,
  189             IPL_SOFTNET, 0, "ipqe",  NULL);
  190         pool_init(&ipq_pool, sizeof(struct ipq), 0,
  191             IPL_SOFTNET, 0, "ipq", NULL);
  192 
  193         pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
  194         if (pr == NULL)
  195                 panic("ip_init");
  196         for (i = 0; i < IPPROTO_MAX; i++)
  197                 ip_protox[i] = pr - inetsw;
  198         for (pr = inetdomain.dom_protosw;
  199             pr < inetdomain.dom_protoswNPROTOSW; pr++)
  200                 if (pr->pr_domain->dom_family == PF_INET &&
  201                     pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW &&
  202                     pr->pr_protocol < IPPROTO_MAX)
  203                         ip_protox[pr->pr_protocol] = pr - inetsw;
  204         LIST_INIT(&ipq);
  205 
  206         /* Fill in list of ports not to allocate dynamically. */
  207         memset(&baddynamicports, 0, sizeof(baddynamicports));
  208         for (i = 0; defbaddynamicports_tcp[i] != 0; i++)
  209                 DP_SET(baddynamicports.tcp, defbaddynamicports_tcp[i]);
  210         for (i = 0; defbaddynamicports_udp[i] != 0; i++)
  211                 DP_SET(baddynamicports.udp, defbaddynamicports_udp[i]);
  212 
  213         /* Fill in list of ports only root can bind to. */
  214         memset(&rootonlyports, 0, sizeof(rootonlyports));
  215         for (i = 0; defrootonlyports_tcp[i] != 0; i++)
  216                 DP_SET(rootonlyports.tcp, defrootonlyports_tcp[i]);
  217         for (i = 0; defrootonlyports_udp[i] != 0; i++)
  218                 DP_SET(rootonlyports.udp, defrootonlyports_udp[i]);
  219 
  220         mq_init(&ipsend_mq, 64, IPL_SOFTNET);
  221         mq_init(&ipsendraw_mq, 64, IPL_SOFTNET);
  222 
  223         arpinit();
  224 #ifdef IPSEC
  225         ipsec_init();
  226 #endif
  227 #ifdef MROUTING
  228         rt_timer_queue_init(&ip_mrouterq, MCAST_EXPIRE_FREQUENCY,
  229             &mfc_expire_route);
  230 #endif
  231 }
  232 
  233 /*
  234  * Enqueue packet for local delivery.  Queuing is used as a boundary
  235  * between the network layer (input/forward path) running with
  236  * NET_LOCK_SHARED() and the transport layer needing it exclusively.
  237  */
  238 int
  239 ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
  240 {
  241         nxt = ip_fragcheck(mp, offp);
  242         if (nxt == IPPROTO_DONE)
  243                 return IPPROTO_DONE;
  244 
  245         /* We are already in a IPv4/IPv6 local deliver loop. */
  246         if (af != AF_UNSPEC)
  247                 return nxt;
  248 
  249         niq_enqueue(&ipintrq, *mp);
  250         *mp = NULL;
  251         return IPPROTO_DONE;
  252 }
  253 
  254 /*
  255  * Dequeue and process locally delivered packets.
  256  * This is called with exclusive NET_LOCK().
  257  */
  258 void
  259 ipintr(void)
  260 {
  261         struct mbuf *m;
  262 
  263         while ((m = niq_dequeue(&ipintrq)) != NULL) {
  264                 struct ip *ip;
  265                 int off, nxt;
  266 
  267 #ifdef DIAGNOSTIC
  268                 if ((m->m_flags & M_PKTHDR) == 0)
  269                         panic("ipintr no HDR");
  270 #endif
  271                 ip = mtod(m, struct ip *);
  272                 off = ip->ip_hl << 2;
  273                 nxt = ip->ip_p;
  274 
  275                 nxt = ip_deliver(&m, &off, nxt, AF_INET);
  276                 KASSERT(nxt == IPPROTO_DONE);
  277         }
  278 }
  279 
  280 /*
  281  * IPv4 input routine.
  282  *
  283  * Checksum and byte swap header.  Process options. Forward or deliver.
  284  */
  285 void
  286 ipv4_input(struct ifnet *ifp, struct mbuf *m)
  287 {
  288         int off, nxt;
  289 
  290         off = 0;
  291         nxt = ip_input_if(&m, &off, IPPROTO_IPV4, AF_UNSPEC, ifp);
  292         KASSERT(nxt == IPPROTO_DONE);
  293 }
  294 
  295 struct mbuf *
  296 ipv4_check(struct ifnet *ifp, struct mbuf *m)
  297 {
  298         struct ip *ip;
  299         int hlen, len;
  300 
  301         if (m->m_len < sizeof(*ip)) {
  302                 m = m_pullup(m, sizeof(*ip));
  303                 if (m == NULL) {
  304                         ipstat_inc(ips_toosmall);
  305                         return (NULL);
  306                 }
  307         }
  308 
  309         ip = mtod(m, struct ip *);
  310         if (ip->ip_v != IPVERSION) {
  311                 ipstat_inc(ips_badvers);
  312                 goto bad;
  313         }
  314 
  315         hlen = ip->ip_hl << 2;
  316         if (hlen < sizeof(*ip)) {       /* minimum header length */
  317                 ipstat_inc(ips_badhlen);
  318                 goto bad;
  319         }
  320         if (hlen > m->m_len) {
  321                 m = m_pullup(m, hlen);
  322                 if (m == NULL) {
  323                         ipstat_inc(ips_badhlen);
  324                         return (NULL);
  325                 }
  326                 ip = mtod(m, struct ip *);
  327         }
  328 
  329         /* 127/8 must not appear on wire - RFC1122 */
  330         if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
  331             (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
  332                 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
  333                         ipstat_inc(ips_badaddr);
  334                         goto bad;
  335                 }
  336         }
  337 
  338         if (!ISSET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK)) {
  339                 if (ISSET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_BAD)) {
  340                         ipstat_inc(ips_badsum);
  341                         goto bad;
  342                 }
  343 
  344                 ipstat_inc(ips_inswcsum);
  345                 if (in_cksum(m, hlen) != 0) {
  346                         ipstat_inc(ips_badsum);
  347                         goto bad;
  348                 }
  349 
  350                 SET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK);
  351         }
  352 
  353         /* Retrieve the packet length. */
  354         len = ntohs(ip->ip_len);
  355 
  356         /*
  357          * Convert fields to host representation.
  358          */
  359         if (len < hlen) {
  360                 ipstat_inc(ips_badlen);
  361                 goto bad;
  362         }
  363 
  364         /*
  365          * Check that the amount of data in the buffers
  366          * is at least as much as the IP header would have us expect.
  367          * Trim mbufs if longer than we expect.
  368          * Drop packet if shorter than we expect.
  369          */
  370         if (m->m_pkthdr.len < len) {
  371                 ipstat_inc(ips_tooshort);
  372                 goto bad;
  373         }
  374         if (m->m_pkthdr.len > len) {
  375                 if (m->m_len == m->m_pkthdr.len) {
  376                         m->m_len = len;
  377                         m->m_pkthdr.len = len;
  378                 } else
  379                         m_adj(m, len - m->m_pkthdr.len);
  380         }
  381 
  382         return (m);
  383 bad:
  384         m_freem(m);
  385         return (NULL);
  386 }
  387 
  388 int
  389 ip_input_if(struct mbuf **mp, int *offp, int nxt, int af, struct ifnet *ifp)
  390 {
  391         struct mbuf     *m;
  392         struct rtentry  *rt = NULL;
  393         struct ip       *ip;
  394         int hlen;
  395         in_addr_t pfrdr = 0;
  396 
  397         KASSERT(*offp == 0);
  398 
  399         ipstat_inc(ips_total);
  400         m = *mp = ipv4_check(ifp, *mp);
  401         if (m == NULL)
  402                 goto bad;
  403 
  404         ip = mtod(m, struct ip *);
  405 
  406 #if NCARP > 0
  407         if (carp_lsdrop(ifp, m, AF_INET, &ip->ip_src.s_addr,
  408             &ip->ip_dst.s_addr, (ip->ip_p == IPPROTO_ICMP ? 0 : 1)))
  409                 goto bad;
  410 #endif
  411 
  412 #if NPF > 0
  413         /*
  414          * Packet filter
  415          */
  416         pfrdr = ip->ip_dst.s_addr;
  417         if (pf_test(AF_INET, PF_IN, ifp, mp) != PF_PASS)
  418                 goto bad;
  419         m = *mp;
  420         if (m == NULL)
  421                 goto bad;
  422 
  423         ip = mtod(m, struct ip *);
  424         pfrdr = (pfrdr != ip->ip_dst.s_addr);
  425 #endif
  426 
  427         hlen = ip->ip_hl << 2;
  428 
  429         /*
  430          * Process options and, if not destined for us,
  431          * ship it on.  ip_dooptions returns 1 when an
  432          * error was detected (causing an icmp message
  433          * to be sent and the original packet to be freed).
  434          */
  435         if (hlen > sizeof (struct ip) && ip_dooptions(m, ifp)) {
  436                 m = *mp = NULL;
  437                 goto bad;
  438         }
  439 
  440         if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
  441             ip->ip_dst.s_addr == INADDR_ANY) {
  442                 nxt = ip_ours(mp, offp, nxt, af);
  443                 goto out;
  444         }
  445 
  446         switch(in_ouraddr(m, ifp, &rt)) {
  447         case 2:
  448                 goto bad;
  449         case 1:
  450                 nxt = ip_ours(mp, offp, nxt, af);
  451                 goto out;
  452         }
  453 
  454         if (IN_MULTICAST(ip->ip_dst.s_addr)) {
  455                 /*
  456                  * Make sure M_MCAST is set.  It should theoretically
  457                  * already be there, but let's play safe because upper
  458                  * layers check for this flag.
  459                  */
  460                 m->m_flags |= M_MCAST;
  461 
  462 #ifdef MROUTING
  463                 if (ipmforwarding && ip_mrouter[ifp->if_rdomain]) {
  464                         int error;
  465 
  466                         if (m->m_flags & M_EXT) {
  467                                 if ((m = *mp = m_pullup(m, hlen)) == NULL) {
  468                                         ipstat_inc(ips_toosmall);
  469                                         goto bad;
  470                                 }
  471                                 ip = mtod(m, struct ip *);
  472                         }
  473                         /*
  474                          * If we are acting as a multicast router, all
  475                          * incoming multicast packets are passed to the
  476                          * kernel-level multicast forwarding function.
  477                          * The packet is returned (relatively) intact; if
  478                          * ip_mforward() returns a non-zero value, the packet
  479                          * must be discarded, else it may be accepted below.
  480                          *
  481                          * (The IP ident field is put in the same byte order
  482                          * as expected when ip_mforward() is called from
  483                          * ip_output().)
  484                          */
  485                         KERNEL_LOCK();
  486                         error = ip_mforward(m, ifp);
  487                         KERNEL_UNLOCK();
  488                         if (error) {
  489                                 ipstat_inc(ips_cantforward);
  490                                 goto bad;
  491                         }
  492 
  493                         /*
  494                          * The process-level routing daemon needs to receive
  495                          * all multicast IGMP packets, whether or not this
  496                          * host belongs to their destination groups.
  497                          */
  498                         if (ip->ip_p == IPPROTO_IGMP) {
  499                                 nxt = ip_ours(mp, offp, nxt, af);
  500                                 goto out;
  501                         }
  502                         ipstat_inc(ips_forward);
  503                 }
  504 #endif
  505                 /*
  506                  * See if we belong to the destination multicast group on the
  507                  * arrival interface.
  508                  */
  509                 if (!in_hasmulti(&ip->ip_dst, ifp)) {
  510                         ipstat_inc(ips_notmember);
  511                         if (!IN_LOCAL_GROUP(ip->ip_dst.s_addr))
  512                                 ipstat_inc(ips_cantforward);
  513                         goto bad;
  514                 }
  515                 nxt = ip_ours(mp, offp, nxt, af);
  516                 goto out;
  517         }
  518 
  519 #if NCARP > 0
  520         if (ip->ip_p == IPPROTO_ICMP &&
  521             carp_lsdrop(ifp, m, AF_INET, &ip->ip_src.s_addr,
  522             &ip->ip_dst.s_addr, 1))
  523                 goto bad;
  524 #endif
  525         /*
  526          * Not for us; forward if possible and desirable.
  527          */
  528         if (ipforwarding == 0) {
  529                 ipstat_inc(ips_cantforward);
  530                 goto bad;
  531         }
  532 #ifdef IPSEC
  533         if (ipsec_in_use) {
  534                 int rv;
  535 
  536                 rv = ipsec_forward_check(m, hlen, AF_INET);
  537                 if (rv != 0) {
  538                         ipstat_inc(ips_cantforward);
  539                         goto bad;
  540                 }
  541                 /*
  542                  * Fall through, forward packet. Outbound IPsec policy
  543                  * checking will occur in ip_output().
  544                  */
  545         }
  546 #endif /* IPSEC */
  547 
  548         ip_forward(m, ifp, rt, pfrdr);
  549         *mp = NULL;
  550         return IPPROTO_DONE;
  551  bad:
  552         nxt = IPPROTO_DONE;
  553         m_freemp(mp);
  554  out:
  555         rtfree(rt);
  556         return nxt;
  557 }
  558 
  559 int
  560 ip_fragcheck(struct mbuf **mp, int *offp)
  561 {
  562         struct ip *ip;
  563         struct ipq *fp;
  564         struct ipqent *ipqe;
  565         int hlen;
  566         uint16_t mff;
  567 
  568         ip = mtod(*mp, struct ip *);
  569         hlen = ip->ip_hl << 2;
  570 
  571         /*
  572          * If offset or more fragments are set, must reassemble.
  573          * Otherwise, nothing need be done.
  574          * (We could look in the reassembly queue to see
  575          * if the packet was previously fragmented,
  576          * but it's not worth the time; just let them time out.)
  577          */
  578         if (ISSET(ip->ip_off, htons(IP_OFFMASK | IP_MF))) {
  579                 if ((*mp)->m_flags & M_EXT) {           /* XXX */
  580                         if ((*mp = m_pullup(*mp, hlen)) == NULL) {
  581                                 ipstat_inc(ips_toosmall);
  582                                 return IPPROTO_DONE;
  583                         }
  584                         ip = mtod(*mp, struct ip *);
  585                 }
  586 
  587                 /*
  588                  * Adjust ip_len to not reflect header,
  589                  * set ipqe_mff if more fragments are expected,
  590                  * convert offset of this to bytes.
  591                  */
  592                 ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
  593                 mff = ISSET(ip->ip_off, htons(IP_MF));
  594                 if (mff) {
  595                         /*
  596                          * Make sure that fragments have a data length
  597                          * that's a non-zero multiple of 8 bytes.
  598                          */
  599                         if (ntohs(ip->ip_len) == 0 ||
  600                             (ntohs(ip->ip_len) & 0x7) != 0) {
  601                                 ipstat_inc(ips_badfrags);
  602                                 m_freemp(mp);
  603                                 return IPPROTO_DONE;
  604                         }
  605                 }
  606                 ip->ip_off = htons(ntohs(ip->ip_off) << 3);
  607 
  608                 mtx_enter(&ipq_mutex);
  609 
  610                 /*
  611                  * Look for queue of fragments
  612                  * of this datagram.
  613                  */
  614                 LIST_FOREACH(fp, &ipq, ipq_q) {
  615                         if (ip->ip_id == fp->ipq_id &&
  616                             ip->ip_src.s_addr == fp->ipq_src.s_addr &&
  617                             ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
  618                             ip->ip_p == fp->ipq_p)
  619                                 break;
  620                 }
  621 
  622                 /*
  623                  * If datagram marked as having more fragments
  624                  * or if this is not the first fragment,
  625                  * attempt reassembly; if it succeeds, proceed.
  626                  */
  627                 if (mff || ip->ip_off) {
  628                         ipstat_inc(ips_fragments);
  629                         if (ip_frags + 1 > ip_maxqueue) {
  630                                 ip_flush();
  631                                 ipstat_inc(ips_rcvmemdrop);
  632                                 goto bad;
  633                         }
  634 
  635                         ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
  636                         if (ipqe == NULL) {
  637                                 ipstat_inc(ips_rcvmemdrop);
  638                                 goto bad;
  639                         }
  640                         ip_frags++;
  641                         ipqe->ipqe_mff = mff;
  642                         ipqe->ipqe_m = *mp;
  643                         ipqe->ipqe_ip = ip;
  644                         *mp = ip_reass(ipqe, fp);
  645                         if (*mp == NULL)
  646                                 goto bad;
  647                         ipstat_inc(ips_reassembled);
  648                         ip = mtod(*mp, struct ip *);
  649                         hlen = ip->ip_hl << 2;
  650                         ip->ip_len = htons(ntohs(ip->ip_len) + hlen);
  651                 } else {
  652                         if (fp != NULL)
  653                                 ip_freef(fp);
  654                 }
  655 
  656                 mtx_leave(&ipq_mutex);
  657         }
  658 
  659         *offp = hlen;
  660         return ip->ip_p;
  661 
  662  bad:
  663         mtx_leave(&ipq_mutex);
  664         m_freemp(mp);
  665         return IPPROTO_DONE;
  666 }
  667 
  668 #ifndef INET6
  669 #define IPSTAT_INC(name)        ipstat_inc(ips_##name)
  670 #else
  671 #define IPSTAT_INC(name)        (af == AF_INET ?        \
  672     ipstat_inc(ips_##name) : ip6stat_inc(ip6s_##name))
  673 #endif
  674 
  675 int
  676 ip_deliver(struct mbuf **mp, int *offp, int nxt, int af)
  677 {
  678         const struct protosw *psw;
  679         int naf = af;
  680 #ifdef INET6
  681         int nest = 0;
  682 #endif /* INET6 */
  683 
  684         NET_ASSERT_LOCKED_EXCLUSIVE();
  685 
  686         /*
  687          * Tell launch routine the next header
  688          */
  689         IPSTAT_INC(delivered);
  690 
  691         while (nxt != IPPROTO_DONE) {
  692 #ifdef INET6
  693                 if (af == AF_INET6 &&
  694                     ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
  695                         ip6stat_inc(ip6s_toomanyhdr);
  696                         goto bad;
  697                 }
  698 #endif /* INET6 */
  699 
  700                 /*
  701                  * protection against faulty packet - there should be
  702                  * more sanity checks in header chain processing.
  703                  */
  704                 if ((*mp)->m_pkthdr.len < *offp) {
  705                         IPSTAT_INC(tooshort);
  706                         goto bad;
  707                 }
  708 
  709 #ifdef IPSEC
  710                 if (ipsec_in_use) {
  711                         if (ipsec_local_check(*mp, *offp, nxt, af) != 0) {
  712                                 IPSTAT_INC(cantforward);
  713                                 goto bad;
  714                         }
  715                 }
  716                 /* Otherwise, just fall through and deliver the packet */
  717 #endif /* IPSEC */
  718 
  719                 switch (nxt) {
  720                 case IPPROTO_IPV4:
  721                         naf = AF_INET;
  722                         ipstat_inc(ips_delivered);
  723                         break;
  724 #ifdef INET6
  725                 case IPPROTO_IPV6:
  726                         naf = AF_INET6;
  727                         ip6stat_inc(ip6s_delivered);
  728                         break;
  729 #endif /* INET6 */
  730                 }
  731                 switch (af) {
  732                 case AF_INET:
  733                         psw = &inetsw[ip_protox[nxt]];
  734                         break;
  735 #ifdef INET6
  736                 case AF_INET6:
  737                         psw = &inet6sw[ip6_protox[nxt]];
  738                         break;
  739 #endif /* INET6 */
  740                 }
  741                 nxt = (*psw->pr_input)(mp, offp, nxt, af);
  742                 af = naf;
  743         }
  744         return nxt;
  745  bad:
  746         m_freemp(mp);
  747         return IPPROTO_DONE;
  748 }
  749 #undef IPSTAT_INC
  750 
  751 int
  752 in_ouraddr(struct mbuf *m, struct ifnet *ifp, struct rtentry **prt)
  753 {
  754         struct rtentry          *rt;
  755         struct ip               *ip;
  756         struct sockaddr_in       sin;
  757         int                      match = 0;
  758 
  759 #if NPF > 0
  760         switch (pf_ouraddr(m)) {
  761         case 0:
  762                 return (0);
  763         case 1:
  764                 return (1);
  765         default:
  766                 /* pf does not know it */
  767                 break;
  768         }
  769 #endif
  770 
  771         ip = mtod(m, struct ip *);
  772 
  773         memset(&sin, 0, sizeof(sin));
  774         sin.sin_len = sizeof(sin);
  775         sin.sin_family = AF_INET;
  776         sin.sin_addr = ip->ip_dst;
  777         rt = rtalloc_mpath(sintosa(&sin), &ip->ip_src.s_addr,
  778             m->m_pkthdr.ph_rtableid);
  779         if (rtisvalid(rt)) {
  780                 if (ISSET(rt->rt_flags, RTF_LOCAL))
  781                         match = 1;
  782 
  783                 /*
  784                  * If directedbcast is enabled we only consider it local
  785                  * if it is received on the interface with that address.
  786                  */
  787                 if (ISSET(rt->rt_flags, RTF_BROADCAST) &&
  788                     (!ip_directedbcast || rt->rt_ifidx == ifp->if_index)) {
  789                         match = 1;
  790 
  791                         /* Make sure M_BCAST is set */
  792                         m->m_flags |= M_BCAST;
  793                 }
  794         }
  795         *prt = rt;
  796 
  797         if (!match) {
  798                 struct ifaddr *ifa;
  799 
  800                 /*
  801                  * No local address or broadcast address found, so check for
  802                  * ancient classful broadcast addresses.
  803                  * It must have been broadcast on the link layer, and for an
  804                  * address on the interface it was received on.
  805                  */
  806                 if (!ISSET(m->m_flags, M_BCAST) ||
  807                     !IN_CLASSFULBROADCAST(ip->ip_dst.s_addr, ip->ip_dst.s_addr))
  808                         return (0);
  809 
  810                 if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid))
  811                         return (0);
  812                 /*
  813                  * The check in the loop assumes you only rx a packet on an UP
  814                  * interface, and that M_BCAST will only be set on a BROADCAST
  815                  * interface.
  816                  */
  817                 NET_ASSERT_LOCKED();
  818                 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
  819                         if (ifa->ifa_addr->sa_family != AF_INET)
  820                                 continue;
  821 
  822                         if (IN_CLASSFULBROADCAST(ip->ip_dst.s_addr,
  823                             ifatoia(ifa)->ia_addr.sin_addr.s_addr)) {
  824                                 match = 1;
  825                                 break;
  826                         }
  827                 }
  828         } else if (ipforwarding == 0 && rt->rt_ifidx != ifp->if_index &&
  829             !((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_type == IFT_ENC) ||
  830             (m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST))) {
  831                 /* received on wrong interface. */
  832 #if NCARP > 0
  833                 struct ifnet *out_if;
  834 
  835                 /*
  836                  * Virtual IPs on carp interfaces need to be checked also
  837                  * against the parent interface and other carp interfaces
  838                  * sharing the same parent.
  839                  */
  840                 out_if = if_get(rt->rt_ifidx);
  841                 if (!(out_if && carp_strict_addr_chk(out_if, ifp))) {
  842                         ipstat_inc(ips_wrongif);
  843                         match = 2;
  844                 }
  845                 if_put(out_if);
  846 #else
  847                 ipstat_inc(ips_wrongif);
  848                 match = 2;
  849 #endif
  850         }
  851 
  852         return (match);
  853 }
  854 
  855 /*
  856  * Take incoming datagram fragment and try to
  857  * reassemble it into whole datagram.  If a chain for
  858  * reassembly of this datagram already exists, then it
  859  * is given as fp; otherwise have to make a chain.
  860  */
  861 struct mbuf *
  862 ip_reass(struct ipqent *ipqe, struct ipq *fp)
  863 {
  864         struct mbuf *m = ipqe->ipqe_m;
  865         struct ipqent *nq, *p, *q;
  866         struct ip *ip;
  867         struct mbuf *t;
  868         int hlen = ipqe->ipqe_ip->ip_hl << 2;
  869         int i, next;
  870         u_int8_t ecn, ecn0;
  871 
  872         MUTEX_ASSERT_LOCKED(&ipq_mutex);
  873 
  874         /*
  875          * Presence of header sizes in mbufs
  876          * would confuse code below.
  877          */
  878         m->m_data += hlen;
  879         m->m_len -= hlen;
  880 
  881         /*
  882          * If first fragment to arrive, create a reassembly queue.
  883          */
  884         if (fp == NULL) {
  885                 fp = pool_get(&ipq_pool, PR_NOWAIT);
  886                 if (fp == NULL)
  887                         goto dropfrag;
  888                 LIST_INSERT_HEAD(&ipq, fp, ipq_q);
  889                 fp->ipq_ttl = IPFRAGTTL;
  890                 fp->ipq_p = ipqe->ipqe_ip->ip_p;
  891                 fp->ipq_id = ipqe->ipqe_ip->ip_id;
  892                 LIST_INIT(&fp->ipq_fragq);
  893                 fp->ipq_src = ipqe->ipqe_ip->ip_src;
  894                 fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
  895                 p = NULL;
  896                 goto insert;
  897         }
  898 
  899         /*
  900          * Handle ECN by comparing this segment with the first one;
  901          * if CE is set, do not lose CE.
  902          * drop if CE and not-ECT are mixed for the same packet.
  903          */
  904         ecn = ipqe->ipqe_ip->ip_tos & IPTOS_ECN_MASK;
  905         ecn0 = LIST_FIRST(&fp->ipq_fragq)->ipqe_ip->ip_tos & IPTOS_ECN_MASK;
  906         if (ecn == IPTOS_ECN_CE) {
  907                 if (ecn0 == IPTOS_ECN_NOTECT)
  908                         goto dropfrag;
  909                 if (ecn0 != IPTOS_ECN_CE)
  910                         LIST_FIRST(&fp->ipq_fragq)->ipqe_ip->ip_tos |=
  911                             IPTOS_ECN_CE;
  912         }
  913         if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
  914                 goto dropfrag;
  915 
  916         /*
  917          * Find a segment which begins after this one does.
  918          */
  919         for (p = NULL, q = LIST_FIRST(&fp->ipq_fragq); q != NULL;
  920             p = q, q = LIST_NEXT(q, ipqe_q))
  921                 if (ntohs(q->ipqe_ip->ip_off) > ntohs(ipqe->ipqe_ip->ip_off))
  922                         break;
  923 
  924         /*
  925          * If there is a preceding segment, it may provide some of
  926          * our data already.  If so, drop the data from the incoming
  927          * segment.  If it provides all of our data, drop us.
  928          */
  929         if (p != NULL) {
  930                 i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) -
  931                     ntohs(ipqe->ipqe_ip->ip_off);
  932                 if (i > 0) {
  933                         if (i >= ntohs(ipqe->ipqe_ip->ip_len))
  934                                 goto dropfrag;
  935                         m_adj(ipqe->ipqe_m, i);
  936                         ipqe->ipqe_ip->ip_off =
  937                             htons(ntohs(ipqe->ipqe_ip->ip_off) + i);
  938                         ipqe->ipqe_ip->ip_len =
  939                             htons(ntohs(ipqe->ipqe_ip->ip_len) - i);
  940                 }
  941         }
  942 
  943         /*
  944          * While we overlap succeeding segments trim them or,
  945          * if they are completely covered, dequeue them.
  946          */
  947         for (; q != NULL &&
  948             ntohs(ipqe->ipqe_ip->ip_off) + ntohs(ipqe->ipqe_ip->ip_len) >
  949             ntohs(q->ipqe_ip->ip_off); q = nq) {
  950                 i = (ntohs(ipqe->ipqe_ip->ip_off) +
  951                     ntohs(ipqe->ipqe_ip->ip_len)) - ntohs(q->ipqe_ip->ip_off);
  952                 if (i < ntohs(q->ipqe_ip->ip_len)) {
  953                         q->ipqe_ip->ip_len =
  954                             htons(ntohs(q->ipqe_ip->ip_len) - i);
  955                         q->ipqe_ip->ip_off =
  956                             htons(ntohs(q->ipqe_ip->ip_off) + i);
  957                         m_adj(q->ipqe_m, i);
  958                         break;
  959                 }
  960                 nq = LIST_NEXT(q, ipqe_q);
  961                 m_freem(q->ipqe_m);
  962                 LIST_REMOVE(q, ipqe_q);
  963                 pool_put(&ipqent_pool, q);
  964                 ip_frags--;
  965         }
  966 
  967 insert:
  968         /*
  969          * Stick new segment in its place;
  970          * check for complete reassembly.
  971          */
  972         if (p == NULL) {
  973                 LIST_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
  974         } else {
  975                 LIST_INSERT_AFTER(p, ipqe, ipqe_q);
  976         }
  977         next = 0;
  978         for (p = NULL, q = LIST_FIRST(&fp->ipq_fragq); q != NULL;
  979             p = q, q = LIST_NEXT(q, ipqe_q)) {
  980                 if (ntohs(q->ipqe_ip->ip_off) != next)
  981                         return (0);
  982                 next += ntohs(q->ipqe_ip->ip_len);
  983         }
  984         if (p->ipqe_mff)
  985                 return (0);
  986 
  987         /*
  988          * Reassembly is complete.  Check for a bogus message size and
  989          * concatenate fragments.
  990          */
  991         q = LIST_FIRST(&fp->ipq_fragq);
  992         ip = q->ipqe_ip;
  993         if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
  994                 ipstat_inc(ips_toolong);
  995                 ip_freef(fp);
  996                 return (0);
  997         }
  998         m = q->ipqe_m;
  999         t = m->m_next;
 1000         m->m_next = 0;
 1001         m_cat(m, t);
 1002         nq = LIST_NEXT(q, ipqe_q);
 1003         pool_put(&ipqent_pool, q);
 1004         ip_frags--;
 1005         for (q = nq; q != NULL; q = nq) {
 1006                 t = q->ipqe_m;
 1007                 nq = LIST_NEXT(q, ipqe_q);
 1008                 pool_put(&ipqent_pool, q);
 1009                 ip_frags--;
 1010                 m_removehdr(t);
 1011                 m_cat(m, t);
 1012         }
 1013 
 1014         /*
 1015          * Create header for new ip packet by
 1016          * modifying header of first packet;
 1017          * dequeue and discard fragment reassembly header.
 1018          * Make header visible.
 1019          */
 1020         ip->ip_len = htons(next);
 1021         ip->ip_src = fp->ipq_src;
 1022         ip->ip_dst = fp->ipq_dst;
 1023         LIST_REMOVE(fp, ipq_q);
 1024         pool_put(&ipq_pool, fp);
 1025         m->m_len += (ip->ip_hl << 2);
 1026         m->m_data -= (ip->ip_hl << 2);
 1027         m_calchdrlen(m);
 1028         return (m);
 1029 
 1030 dropfrag:
 1031         ipstat_inc(ips_fragdropped);
 1032         m_freem(m);
 1033         pool_put(&ipqent_pool, ipqe);
 1034         ip_frags--;
 1035         return (NULL);
 1036 }
 1037 
 1038 /*
 1039  * Free a fragment reassembly header and all
 1040  * associated datagrams.
 1041  */
 1042 void
 1043 ip_freef(struct ipq *fp)
 1044 {
 1045         struct ipqent *q;
 1046 
 1047         MUTEX_ASSERT_LOCKED(&ipq_mutex);
 1048 
 1049         while ((q = LIST_FIRST(&fp->ipq_fragq)) != NULL) {
 1050                 LIST_REMOVE(q, ipqe_q);
 1051                 m_freem(q->ipqe_m);
 1052                 pool_put(&ipqent_pool, q);
 1053                 ip_frags--;
 1054         }
 1055         LIST_REMOVE(fp, ipq_q);
 1056         pool_put(&ipq_pool, fp);
 1057 }
 1058 
 1059 /*
 1060  * IP timer processing;
 1061  * if a timer expires on a reassembly queue, discard it.
 1062  */
 1063 void
 1064 ip_slowtimo(void)
 1065 {
 1066         struct ipq *fp, *nfp;
 1067 
 1068         mtx_enter(&ipq_mutex);
 1069         LIST_FOREACH_SAFE(fp, &ipq, ipq_q, nfp) {
 1070                 if (--fp->ipq_ttl == 0) {
 1071                         ipstat_inc(ips_fragtimeout);
 1072                         ip_freef(fp);
 1073                 }
 1074         }
 1075         mtx_leave(&ipq_mutex);
 1076 }
 1077 
 1078 /*
 1079  * Flush a bunch of datagram fragments, till we are down to 75%.
 1080  */
 1081 void
 1082 ip_flush(void)
 1083 {
 1084         int max = 50;
 1085 
 1086         MUTEX_ASSERT_LOCKED(&ipq_mutex);
 1087 
 1088         while (!LIST_EMPTY(&ipq) && ip_frags > ip_maxqueue * 3 / 4 && --max) {
 1089                 ipstat_inc(ips_fragdropped);
 1090                 ip_freef(LIST_FIRST(&ipq));
 1091         }
 1092 }
 1093 
 1094 /*
 1095  * Do option processing on a datagram,
 1096  * possibly discarding it if bad options are encountered,
 1097  * or forwarding it if source-routed.
 1098  * Returns 1 if packet has been forwarded/freed,
 1099  * 0 if the packet should be processed further.
 1100  */
 1101 int
 1102 ip_dooptions(struct mbuf *m, struct ifnet *ifp)
 1103 {
 1104         struct ip *ip = mtod(m, struct ip *);
 1105         unsigned int rtableid = m->m_pkthdr.ph_rtableid;
 1106         struct rtentry *rt;
 1107         struct sockaddr_in ipaddr;
 1108         u_char *cp;
 1109         struct ip_timestamp ipt;
 1110         struct in_ifaddr *ia;
 1111         int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
 1112         struct in_addr sin, dst;
 1113         u_int32_t ntime;
 1114 
 1115         dst = ip->ip_dst;
 1116         cp = (u_char *)(ip + 1);
 1117         cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 1118 
 1119         KERNEL_LOCK();
 1120         for (; cnt > 0; cnt -= optlen, cp += optlen) {
 1121                 opt = cp[IPOPT_OPTVAL];
 1122                 if (opt == IPOPT_EOL)
 1123                         break;
 1124                 if (opt == IPOPT_NOP)
 1125                         optlen = 1;
 1126                 else {
 1127                         if (cnt < IPOPT_OLEN + sizeof(*cp)) {
 1128                                 code = &cp[IPOPT_OLEN] - (u_char *)ip;
 1129                                 goto bad;
 1130                         }
 1131                         optlen = cp[IPOPT_OLEN];
 1132                         if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
 1133                                 code = &cp[IPOPT_OLEN] - (u_char *)ip;
 1134                                 goto bad;
 1135                         }
 1136                 }
 1137 
 1138                 switch (opt) {
 1139 
 1140                 default:
 1141                         break;
 1142 
 1143                 /*
 1144                  * Source routing with record.
 1145                  * Find interface with current destination address.
 1146                  * If none on this machine then drop if strictly routed,
 1147                  * or do nothing if loosely routed.
 1148                  * Record interface address and bring up next address
 1149                  * component.  If strictly routed make sure next
 1150                  * address is on directly accessible net.
 1151                  */
 1152                 case IPOPT_LSRR:
 1153                 case IPOPT_SSRR:
 1154                         if (!ip_dosourceroute) {
 1155                                 type = ICMP_UNREACH;
 1156                                 code = ICMP_UNREACH_SRCFAIL;
 1157                                 goto bad;
 1158                         }
 1159                         if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
 1160                                 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 1161                                 goto bad;
 1162                         }
 1163                         memset(&ipaddr, 0, sizeof(ipaddr));
 1164                         ipaddr.sin_family = AF_INET;
 1165                         ipaddr.sin_len = sizeof(ipaddr);
 1166                         ipaddr.sin_addr = ip->ip_dst;
 1167                         ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr),
 1168                             m->m_pkthdr.ph_rtableid));
 1169                         if (ia == NULL) {
 1170                                 if (opt == IPOPT_SSRR) {
 1171                                         type = ICMP_UNREACH;
 1172                                         code = ICMP_UNREACH_SRCFAIL;
 1173                                         goto bad;
 1174                                 }
 1175                                 /*
 1176                                  * Loose routing, and not at next destination
 1177                                  * yet; nothing to do except forward.
 1178                                  */
 1179                                 break;
 1180                         }
 1181                         off--;                  /* 0 origin */
 1182                         if ((off + sizeof(struct in_addr)) > optlen) {
 1183                                 /*
 1184                                  * End of source route.  Should be for us.
 1185                                  */
 1186                                 save_rte(m, cp, ip->ip_src);
 1187                                 break;
 1188                         }
 1189 
 1190                         /*
 1191                          * locate outgoing interface
 1192                          */
 1193                         memset(&ipaddr, 0, sizeof(ipaddr));
 1194                         ipaddr.sin_family = AF_INET;
 1195                         ipaddr.sin_len = sizeof(ipaddr);
 1196                         memcpy(&ipaddr.sin_addr, cp + off,
 1197                             sizeof(ipaddr.sin_addr));
 1198                         /* keep packet in the virtual instance */
 1199                         rt = rtalloc(sintosa(&ipaddr), RT_RESOLVE, rtableid);
 1200                         if (!rtisvalid(rt) || ((opt == IPOPT_SSRR) &&
 1201                             ISSET(rt->rt_flags, RTF_GATEWAY))) {
 1202                                 type = ICMP_UNREACH;
 1203                                 code = ICMP_UNREACH_SRCFAIL;
 1204                                 rtfree(rt);
 1205                                 goto bad;
 1206                         }
 1207                         ia = ifatoia(rt->rt_ifa);
 1208                         memcpy(cp + off, &ia->ia_addr.sin_addr,
 1209                             sizeof(struct in_addr));
 1210                         rtfree(rt);
 1211                         cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 1212                         ip->ip_dst = ipaddr.sin_addr;
 1213                         /*
 1214                          * Let ip_intr's mcast routing check handle mcast pkts
 1215                          */
 1216                         forward = !IN_MULTICAST(ip->ip_dst.s_addr);
 1217                         break;
 1218 
 1219                 case IPOPT_RR:
 1220                         if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
 1221                                 code = &cp[IPOPT_OLEN] - (u_char *)ip;
 1222                                 goto bad;
 1223                         }
 1224                         if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
 1225                                 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 1226                                 goto bad;
 1227                         }
 1228 
 1229                         /*
 1230                          * If no space remains, ignore.
 1231                          */
 1232                         off--;                  /* 0 origin */
 1233                         if ((off + sizeof(struct in_addr)) > optlen)
 1234                                 break;
 1235                         memset(&ipaddr, 0, sizeof(ipaddr));
 1236                         ipaddr.sin_family = AF_INET;
 1237                         ipaddr.sin_len = sizeof(ipaddr);
 1238                         ipaddr.sin_addr = ip->ip_dst;
 1239                         /*
 1240                          * locate outgoing interface; if we're the destination,
 1241                          * use the incoming interface (should be same).
 1242                          * Again keep the packet inside the virtual instance.
 1243                          */
 1244                         rt = rtalloc(sintosa(&ipaddr), RT_RESOLVE, rtableid);
 1245                         if (!rtisvalid(rt)) {
 1246                                 type = ICMP_UNREACH;
 1247                                 code = ICMP_UNREACH_HOST;
 1248                                 rtfree(rt);
 1249                                 goto bad;
 1250                         }
 1251                         ia = ifatoia(rt->rt_ifa);
 1252                         memcpy(cp + off, &ia->ia_addr.sin_addr,
 1253                             sizeof(struct in_addr));
 1254                         rtfree(rt);
 1255                         cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 1256                         break;
 1257 
 1258                 case IPOPT_TS:
 1259                         code = cp - (u_char *)ip;
 1260                         if (optlen < sizeof(struct ip_timestamp))
 1261                                 goto bad;
 1262                         memcpy(&ipt, cp, sizeof(struct ip_timestamp));
 1263                         if (ipt.ipt_ptr < 5 || ipt.ipt_len < 5)
 1264                                 goto bad;
 1265                         if (ipt.ipt_ptr - 1 + sizeof(u_int32_t) > ipt.ipt_len) {
 1266                                 if (++ipt.ipt_oflw == 0)
 1267                                         goto bad;
 1268                                 break;
 1269                         }
 1270                         memcpy(&sin, cp + ipt.ipt_ptr - 1, sizeof sin);
 1271                         switch (ipt.ipt_flg) {
 1272 
 1273                         case IPOPT_TS_TSONLY:
 1274                                 break;
 1275 
 1276                         case IPOPT_TS_TSANDADDR:
 1277                                 if (ipt.ipt_ptr - 1 + sizeof(u_int32_t) +
 1278                                     sizeof(struct in_addr) > ipt.ipt_len)
 1279                                         goto bad;
 1280                                 memset(&ipaddr, 0, sizeof(ipaddr));
 1281                                 ipaddr.sin_family = AF_INET;
 1282                                 ipaddr.sin_len = sizeof(ipaddr);
 1283                                 ipaddr.sin_addr = dst;
 1284                                 ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr),
 1285                                     ifp));
 1286                                 if (ia == NULL)
 1287                                         continue;
 1288                                 memcpy(&sin, &ia->ia_addr.sin_addr,
 1289                                     sizeof(struct in_addr));
 1290                                 ipt.ipt_ptr += sizeof(struct in_addr);
 1291                                 break;
 1292 
 1293                         case IPOPT_TS_PRESPEC:
 1294                                 if (ipt.ipt_ptr - 1 + sizeof(u_int32_t) +
 1295                                     sizeof(struct in_addr) > ipt.ipt_len)
 1296                                         goto bad;
 1297                                 memset(&ipaddr, 0, sizeof(ipaddr));
 1298                                 ipaddr.sin_family = AF_INET;
 1299                                 ipaddr.sin_len = sizeof(ipaddr);
 1300                                 ipaddr.sin_addr = sin;
 1301                                 if (ifa_ifwithaddr(sintosa(&ipaddr),
 1302                                     m->m_pkthdr.ph_rtableid) == NULL)
 1303                                         continue;
 1304                                 ipt.ipt_ptr += sizeof(struct in_addr);
 1305                                 break;
 1306 
 1307                         default:
 1308                                 /* XXX can't take &ipt->ipt_flg */
 1309                                 code = (u_char *)&ipt.ipt_ptr -
 1310                                     (u_char *)ip + 1;
 1311                                 goto bad;
 1312                         }
 1313                         ntime = iptime();
 1314                         memcpy(cp + ipt.ipt_ptr - 1, &ntime, sizeof(u_int32_t));
 1315                         ipt.ipt_ptr += sizeof(u_int32_t);
 1316                 }
 1317         }
 1318         KERNEL_UNLOCK();
 1319         if (forward && ipforwarding > 0) {
 1320                 ip_forward(m, ifp, NULL, 1);
 1321                 return (1);
 1322         }
 1323         return (0);
 1324 bad:
 1325         KERNEL_UNLOCK();
 1326         icmp_error(m, type, code, 0, 0);
 1327         ipstat_inc(ips_badoptions);
 1328         return (1);
 1329 }
 1330 
 1331 /*
 1332  * Save incoming source route for use in replies,
 1333  * to be picked up later by ip_srcroute if the receiver is interested.
 1334  */
 1335 void
 1336 save_rte(struct mbuf *m, u_char *option, struct in_addr dst)
 1337 {
 1338         struct ip_srcrt *isr;
 1339         struct m_tag *mtag;
 1340         unsigned olen;
 1341 
 1342         olen = option[IPOPT_OLEN];
 1343         if (olen > sizeof(isr->isr_hdr) + sizeof(isr->isr_routes))
 1344                 return;
 1345 
 1346         mtag = m_tag_get(PACKET_TAG_SRCROUTE, sizeof(*isr), M_NOWAIT);
 1347         if (mtag == NULL) {
 1348                 ipstat_inc(ips_idropped);
 1349                 return;
 1350         }
 1351         isr = (struct ip_srcrt *)(mtag + 1);
 1352 
 1353         memcpy(isr->isr_hdr, option, olen);
 1354         isr->isr_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
 1355         isr->isr_dst = dst;
 1356         m_tag_prepend(m, mtag);
 1357 }
 1358 
 1359 /*
 1360  * Retrieve incoming source route for use in replies,
 1361  * in the same form used by setsockopt.
 1362  * The first hop is placed before the options, will be removed later.
 1363  */
 1364 struct mbuf *
 1365 ip_srcroute(struct mbuf *m0)
 1366 {
 1367         struct in_addr *p, *q;
 1368         struct mbuf *m;
 1369         struct ip_srcrt *isr;
 1370         struct m_tag *mtag;
 1371 
 1372         if (!ip_dosourceroute)
 1373                 return (NULL);
 1374 
 1375         mtag = m_tag_find(m0, PACKET_TAG_SRCROUTE, NULL);
 1376         if (mtag == NULL)
 1377                 return (NULL);
 1378         isr = (struct ip_srcrt *)(mtag + 1);
 1379 
 1380         if (isr->isr_nhops == 0)
 1381                 return (NULL);
 1382         m = m_get(M_DONTWAIT, MT_SOOPTS);
 1383         if (m == NULL) {
 1384                 ipstat_inc(ips_idropped);
 1385                 return (NULL);
 1386         }
 1387 
 1388 #define OPTSIZ  (sizeof(isr->isr_nop) + sizeof(isr->isr_hdr))
 1389 
 1390         /* length is (nhops+1)*sizeof(addr) + sizeof(nop + header) */
 1391         m->m_len = (isr->isr_nhops + 1) * sizeof(struct in_addr) + OPTSIZ;
 1392 
 1393         /*
 1394          * First save first hop for return route
 1395          */
 1396         p = &(isr->isr_routes[isr->isr_nhops - 1]);
 1397         *(mtod(m, struct in_addr *)) = *p--;
 1398 
 1399         /*
 1400          * Copy option fields and padding (nop) to mbuf.
 1401          */
 1402         isr->isr_nop = IPOPT_NOP;
 1403         isr->isr_hdr[IPOPT_OFFSET] = IPOPT_MINOFF;
 1404         memcpy(mtod(m, caddr_t) + sizeof(struct in_addr), &isr->isr_nop,
 1405             OPTSIZ);
 1406         q = (struct in_addr *)(mtod(m, caddr_t) +
 1407             sizeof(struct in_addr) + OPTSIZ);
 1408 #undef OPTSIZ
 1409         /*
 1410          * Record return path as an IP source route,
 1411          * reversing the path (pointers are now aligned).
 1412          */
 1413         while (p >= isr->isr_routes) {
 1414                 *q++ = *p--;
 1415         }
 1416         /*
 1417          * Last hop goes to final destination.
 1418          */
 1419         *q = isr->isr_dst;
 1420         m_tag_delete(m0, (struct m_tag *)isr);
 1421         return (m);
 1422 }
 1423 
 1424 /*
 1425  * Strip out IP options, at higher level protocol in the kernel.
 1426  */
 1427 void
 1428 ip_stripoptions(struct mbuf *m)
 1429 {
 1430         int i;
 1431         struct ip *ip = mtod(m, struct ip *);
 1432         caddr_t opts;
 1433         int olen;
 1434 
 1435         olen = (ip->ip_hl<<2) - sizeof (struct ip);
 1436         opts = (caddr_t)(ip + 1);
 1437         i = m->m_len - (sizeof (struct ip) + olen);
 1438         memmove(opts, opts  + olen, i);
 1439         m->m_len -= olen;
 1440         if (m->m_flags & M_PKTHDR)
 1441                 m->m_pkthdr.len -= olen;
 1442         ip->ip_hl = sizeof(struct ip) >> 2;
 1443         ip->ip_len = htons(ntohs(ip->ip_len) - olen);
 1444 }
 1445 
 1446 const u_char inetctlerrmap[PRC_NCMDS] = {
 1447         0,              0,              0,              0,
 1448         0,              EMSGSIZE,       EHOSTDOWN,      EHOSTUNREACH,
 1449         EHOSTUNREACH,   EHOSTUNREACH,   ECONNREFUSED,   ECONNREFUSED,
 1450         EMSGSIZE,       EHOSTUNREACH,   0,              0,
 1451         0,              0,              0,              0,
 1452         ENOPROTOOPT
 1453 };
 1454 
 1455 /*
 1456  * Forward a packet.  If some error occurs return the sender
 1457  * an icmp packet.  Note we can't always generate a meaningful
 1458  * icmp message because icmp doesn't have a large enough repertoire
 1459  * of codes and types.
 1460  *
 1461  * If not forwarding, just drop the packet.  This could be confusing
 1462  * if ipforwarding was zero but some routing protocol was advancing
 1463  * us as a gateway to somewhere.  However, we must let the routing
 1464  * protocol deal with that.
 1465  *
 1466  * The srcrt parameter indicates whether the packet is being forwarded
 1467  * via a source route.
 1468  */
 1469 void
 1470 ip_forward(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt, int srcrt)
 1471 {
 1472         struct mbuf mfake, *mcopy = NULL;
 1473         struct ip *ip = mtod(m, struct ip *);
 1474         struct sockaddr_in *sin;
 1475         struct route ro;
 1476         int error = 0, type = 0, code = 0, destmtu = 0, fake = 0, len;
 1477         u_int32_t dest;
 1478 
 1479         dest = 0;
 1480         if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 1481                 ipstat_inc(ips_cantforward);
 1482                 m_freem(m);
 1483                 goto freecopy;
 1484         }
 1485         if (ip->ip_ttl <= IPTTLDEC) {
 1486                 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
 1487                 goto freecopy;
 1488         }
 1489 
 1490         memset(&ro, 0, sizeof(ro));
 1491         sin = satosin(&ro.ro_dst);
 1492         sin->sin_family = AF_INET;
 1493         sin->sin_len = sizeof(*sin);
 1494         sin->sin_addr = ip->ip_dst;
 1495 
 1496         if (!rtisvalid(rt)) {
 1497                 rtfree(rt);
 1498                 rt = rtalloc_mpath(sintosa(sin), &ip->ip_src.s_addr,
 1499                     m->m_pkthdr.ph_rtableid);
 1500                 if (rt == NULL) {
 1501                         ipstat_inc(ips_noroute);
 1502                         icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
 1503                         return;
 1504                 }
 1505         }
 1506 
 1507         /*
 1508          * Save at most 68 bytes of the packet in case
 1509          * we need to generate an ICMP message to the src.
 1510          * The data is saved in the mbuf on the stack that
 1511          * acts as a temporary storage not intended to be
 1512          * passed down the IP stack or to the mfree.
 1513          */
 1514         memset(&mfake.m_hdr, 0, sizeof(mfake.m_hdr));
 1515         mfake.m_type = m->m_type;
 1516         if (m_dup_pkthdr(&mfake, m, M_DONTWAIT) == 0) {
 1517                 mfake.m_data = mfake.m_pktdat;
 1518                 len = min(ntohs(ip->ip_len), 68);
 1519                 m_copydata(m, 0, len, mfake.m_pktdat);
 1520                 mfake.m_pkthdr.len = mfake.m_len = len;
 1521 #if NPF > 0
 1522                 pf_pkt_addr_changed(&mfake);
 1523 #endif  /* NPF > 0 */
 1524                 fake = 1;
 1525         }
 1526 
 1527         ip->ip_ttl -= IPTTLDEC;
 1528 
 1529         /*
 1530          * If forwarding packet using same interface that it came in on,
 1531          * perhaps should send a redirect to sender to shortcut a hop.
 1532          * Only send redirect if source is sending directly to us,
 1533          * and if packet was not source routed (or has any options).
 1534          * Also, don't send redirect if forwarding using a default route
 1535          * or a route modified by a redirect.
 1536          * Don't send redirect if we advertise destination's arp address
 1537          * as ours (proxy arp).
 1538          */
 1539         if ((rt->rt_ifidx == ifp->if_index) &&
 1540             (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
 1541             satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
 1542             ipsendredirects && !srcrt &&
 1543             !arpproxy(satosin(rt_key(rt))->sin_addr, m->m_pkthdr.ph_rtableid)) {
 1544                 if ((ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_netmask) ==
 1545                     ifatoia(rt->rt_ifa)->ia_net) {
 1546                     if (rt->rt_flags & RTF_GATEWAY)
 1547                         dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
 1548                     else
 1549                         dest = ip->ip_dst.s_addr;
 1550                     /* Router requirements says to only send host redirects */
 1551                     type = ICMP_REDIRECT;
 1552                     code = ICMP_REDIRECT_HOST;
 1553                 }
 1554         }
 1555 
 1556         ro.ro_rt = rt;
 1557         ro.ro_tableid = m->m_pkthdr.ph_rtableid;
 1558         error = ip_output(m, NULL, &ro,
 1559             (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
 1560             NULL, NULL, 0);
 1561         rt = ro.ro_rt;
 1562         if (error)
 1563                 ipstat_inc(ips_cantforward);
 1564         else {
 1565                 ipstat_inc(ips_forward);
 1566                 if (type)
 1567                         ipstat_inc(ips_redirectsent);
 1568                 else
 1569                         goto freecopy;
 1570         }
 1571         if (!fake)
 1572                 goto freecopy;
 1573 
 1574         switch (error) {
 1575         case 0:                         /* forwarded, but need redirect */
 1576                 /* type, code set above */
 1577                 break;
 1578 
 1579         case EMSGSIZE:
 1580                 type = ICMP_UNREACH;
 1581                 code = ICMP_UNREACH_NEEDFRAG;
 1582                 if (rt != NULL) {
 1583                         if (rt->rt_mtu) {
 1584                                 destmtu = rt->rt_mtu;
 1585                         } else {
 1586                                 struct ifnet *destifp;
 1587 
 1588                                 destifp = if_get(rt->rt_ifidx);
 1589                                 if (destifp != NULL)
 1590                                         destmtu = destifp->if_mtu;
 1591                                 if_put(destifp);
 1592                         }
 1593                 }
 1594                 ipstat_inc(ips_cantfrag);
 1595                 if (destmtu == 0)
 1596                         goto freecopy;
 1597                 break;
 1598 
 1599         case EACCES:
 1600                 /*
 1601                  * pf(4) blocked the packet. There is no need to send an ICMP
 1602                  * packet back since pf(4) takes care of it.
 1603                  */
 1604                 goto freecopy;
 1605 
 1606         case ENOBUFS:
 1607                 /*
 1608                  * a router should not generate ICMP_SOURCEQUENCH as
 1609                  * required in RFC1812 Requirements for IP Version 4 Routers.
 1610                  * source quench could be a big problem under DoS attacks,
 1611                  * or the underlying interface is rate-limited.
 1612                  */
 1613                 goto freecopy;
 1614 
 1615         case ENETUNREACH:               /* shouldn't happen, checked above */
 1616         case EHOSTUNREACH:
 1617         case ENETDOWN:
 1618         case EHOSTDOWN:
 1619         default:
 1620                 type = ICMP_UNREACH;
 1621                 code = ICMP_UNREACH_HOST;
 1622                 break;
 1623         }
 1624         mcopy = m_copym(&mfake, 0, len, M_DONTWAIT);
 1625         if (mcopy)
 1626                 icmp_error(mcopy, type, code, dest, destmtu);
 1627 
 1628 freecopy:
 1629         if (fake)
 1630                 m_tag_delete_chain(&mfake);
 1631         rtfree(rt);
 1632 }
 1633 
 1634 int
 1635 ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
 1636     size_t newlen)
 1637 {
 1638         int error;
 1639 #ifdef MROUTING
 1640         extern struct mrtstat mrtstat;
 1641 #endif
 1642 
 1643         /* Almost all sysctl names at this level are terminal. */
 1644         if (namelen != 1 && name[0] != IPCTL_IFQUEUE &&
 1645             name[0] != IPCTL_ARPQUEUE)
 1646                 return (ENOTDIR);
 1647 
 1648         switch (name[0]) {
 1649         case IPCTL_SOURCEROUTE:
 1650                 NET_LOCK();
 1651                 error = sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
 1652                     &ip_dosourceroute);
 1653                 NET_UNLOCK();
 1654                 return (error);
 1655         case IPCTL_MTUDISC:
 1656                 NET_LOCK();
 1657                 error = sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtudisc);
 1658                 if (ip_mtudisc == 0)
 1659                         rt_timer_queue_flush(&ip_mtudisc_timeout_q);
 1660                 NET_UNLOCK();
 1661                 return error;
 1662         case IPCTL_MTUDISCTIMEOUT:
 1663                 NET_LOCK();
 1664                 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
 1665                     &ip_mtudisc_timeout, 0, INT_MAX);
 1666                 rt_timer_queue_change(&ip_mtudisc_timeout_q,
 1667                     ip_mtudisc_timeout);
 1668                 NET_UNLOCK();
 1669                 return (error);
 1670 #ifdef IPSEC
 1671         case IPCTL_ENCDEBUG:
 1672         case IPCTL_IPSEC_STATS:
 1673         case IPCTL_IPSEC_EXPIRE_ACQUIRE:
 1674         case IPCTL_IPSEC_EMBRYONIC_SA_TIMEOUT:
 1675         case IPCTL_IPSEC_REQUIRE_PFS:
 1676         case IPCTL_IPSEC_SOFT_ALLOCATIONS:
 1677         case IPCTL_IPSEC_ALLOCATIONS:
 1678         case IPCTL_IPSEC_SOFT_BYTES:
 1679         case IPCTL_IPSEC_BYTES:
 1680         case IPCTL_IPSEC_TIMEOUT:
 1681         case IPCTL_IPSEC_SOFT_TIMEOUT:
 1682         case IPCTL_IPSEC_SOFT_FIRSTUSE:
 1683         case IPCTL_IPSEC_FIRSTUSE:
 1684         case IPCTL_IPSEC_ENC_ALGORITHM:
 1685         case IPCTL_IPSEC_AUTH_ALGORITHM:
 1686         case IPCTL_IPSEC_IPCOMP_ALGORITHM:
 1687                 return (ipsec_sysctl(name, namelen, oldp, oldlenp, newp,
 1688                     newlen));
 1689 #endif
 1690         case IPCTL_IFQUEUE:
 1691                 return (sysctl_niq(name + 1, namelen - 1,
 1692                     oldp, oldlenp, newp, newlen, &ipintrq));
 1693         case IPCTL_ARPQUEUE:
 1694                 return (sysctl_niq(name + 1, namelen - 1,
 1695                     oldp, oldlenp, newp, newlen, &arpinq));
 1696         case IPCTL_ARPQUEUED:
 1697                 return (sysctl_rdint(oldp, oldlenp, newp, la_hold_total));
 1698         case IPCTL_STATS:
 1699                 return (ip_sysctl_ipstat(oldp, oldlenp, newp));
 1700 #ifdef MROUTING
 1701         case IPCTL_MRTSTATS:
 1702                 return (sysctl_rdstruct(oldp, oldlenp, newp,
 1703                     &mrtstat, sizeof(mrtstat)));
 1704         case IPCTL_MRTMFC:
 1705                 if (newp)
 1706                         return (EPERM);
 1707                 NET_LOCK();
 1708                 error = mrt_sysctl_mfc(oldp, oldlenp);
 1709                 NET_UNLOCK();
 1710                 return (error);
 1711         case IPCTL_MRTVIF:
 1712                 if (newp)
 1713                         return (EPERM);
 1714                 NET_LOCK();
 1715                 error = mrt_sysctl_vif(oldp, oldlenp);
 1716                 NET_UNLOCK();
 1717                 return (error);
 1718 #else
 1719         case IPCTL_MRTPROTO:
 1720         case IPCTL_MRTSTATS:
 1721         case IPCTL_MRTMFC:
 1722         case IPCTL_MRTVIF:
 1723                 return (EOPNOTSUPP);
 1724 #endif
 1725         default:
 1726                 NET_LOCK();
 1727                 error = sysctl_bounded_arr(ipctl_vars, nitems(ipctl_vars),
 1728                     name, namelen, oldp, oldlenp, newp, newlen);
 1729                 NET_UNLOCK();
 1730                 return (error);
 1731         }
 1732         /* NOTREACHED */
 1733 }
 1734 
 1735 int
 1736 ip_sysctl_ipstat(void *oldp, size_t *oldlenp, void *newp)
 1737 {
 1738         uint64_t counters[ips_ncounters];
 1739         struct ipstat ipstat;
 1740         u_long *words = (u_long *)&ipstat;
 1741         int i;
 1742 
 1743         CTASSERT(sizeof(ipstat) == (nitems(counters) * sizeof(u_long)));
 1744         memset(&ipstat, 0, sizeof ipstat);
 1745         counters_read(ipcounters, counters, nitems(counters));
 1746 
 1747         for (i = 0; i < nitems(counters); i++)
 1748                 words[i] = (u_long)counters[i];
 1749 
 1750         return (sysctl_rdstruct(oldp, oldlenp, newp, &ipstat, sizeof(ipstat)));
 1751 }
 1752 
 1753 void
 1754 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
 1755     struct mbuf *m)
 1756 {
 1757         if (inp->inp_socket->so_options & SO_TIMESTAMP) {
 1758                 struct timeval tv;
 1759 
 1760                 m_microtime(m, &tv);
 1761                 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
 1762                     SCM_TIMESTAMP, SOL_SOCKET);
 1763                 if (*mp)
 1764                         mp = &(*mp)->m_next;
 1765         }
 1766 
 1767         if (inp->inp_flags & INP_RECVDSTADDR) {
 1768                 *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
 1769                     sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 1770                 if (*mp)
 1771                         mp = &(*mp)->m_next;
 1772         }
 1773 #ifdef notyet
 1774         /* this code is broken and will probably never be fixed. */
 1775         /* options were tossed already */
 1776         if (inp->inp_flags & INP_RECVOPTS) {
 1777                 *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
 1778                     sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 1779                 if (*mp)
 1780                         mp = &(*mp)->m_next;
 1781         }
 1782         /* ip_srcroute doesn't do what we want here, need to fix */
 1783         if (inp->inp_flags & INP_RECVRETOPTS) {
 1784                 *mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
 1785                     sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 1786                 if (*mp)
 1787                         mp = &(*mp)->m_next;
 1788         }
 1789 #endif
 1790         if (inp->inp_flags & INP_RECVIF) {
 1791                 struct sockaddr_dl sdl;
 1792                 struct ifnet *ifp;
 1793 
 1794                 ifp = if_get(m->m_pkthdr.ph_ifidx);
 1795                 if (ifp == NULL || ifp->if_sadl == NULL) {
 1796                         memset(&sdl, 0, sizeof(sdl));
 1797                         sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]);
 1798                         sdl.sdl_family = AF_LINK;
 1799                         sdl.sdl_index = ifp != NULL ? ifp->if_index : 0;
 1800                         sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0;
 1801                         *mp = sbcreatecontrol((caddr_t) &sdl, sdl.sdl_len,
 1802                             IP_RECVIF, IPPROTO_IP);
 1803                 } else {
 1804                         *mp = sbcreatecontrol((caddr_t) ifp->if_sadl,
 1805                             ifp->if_sadl->sdl_len, IP_RECVIF, IPPROTO_IP);
 1806                 }
 1807                 if (*mp)
 1808                         mp = &(*mp)->m_next;
 1809                 if_put(ifp);
 1810         }
 1811         if (inp->inp_flags & INP_RECVTTL) {
 1812                 *mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
 1813                     sizeof(u_int8_t), IP_RECVTTL, IPPROTO_IP);
 1814                 if (*mp)
 1815                         mp = &(*mp)->m_next;
 1816         }
 1817         if (inp->inp_flags & INP_RECVRTABLE) {
 1818                 u_int rtableid = inp->inp_rtableid;
 1819 
 1820 #if NPF > 0
 1821                 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
 1822                         struct pf_divert *divert;
 1823 
 1824                         divert = pf_find_divert(m);
 1825                         KASSERT(divert != NULL);
 1826                         rtableid = divert->rdomain;
 1827                 }
 1828 #endif
 1829 
 1830                 *mp = sbcreatecontrol((caddr_t) &rtableid,
 1831                     sizeof(u_int), IP_RECVRTABLE, IPPROTO_IP);
 1832                 if (*mp)
 1833                         mp = &(*mp)->m_next;
 1834         }
 1835 }
 1836 
 1837 void
 1838 ip_send_do_dispatch(void *xmq, int flags)
 1839 {
 1840         struct mbuf_queue *mq = xmq;
 1841         struct mbuf *m;
 1842         struct mbuf_list ml;
 1843         struct m_tag *mtag;
 1844 
 1845         mq_delist(mq, &ml);
 1846         if (ml_empty(&ml))
 1847                 return;
 1848 
 1849         NET_LOCK();
 1850         while ((m = ml_dequeue(&ml)) != NULL) {
 1851                 u_int32_t ipsecflowinfo = 0;
 1852 
 1853                 if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_FLOWINFO, NULL))
 1854                     != NULL) {
 1855                         ipsecflowinfo = *(u_int32_t *)(mtag + 1);
 1856                         m_tag_delete(m, mtag);
 1857                 }
 1858                 ip_output(m, NULL, NULL, flags, NULL, NULL, ipsecflowinfo);
 1859         }
 1860         NET_UNLOCK();
 1861 }
 1862 
 1863 void
 1864 ip_sendraw_dispatch(void *xmq)
 1865 {
 1866         ip_send_do_dispatch(xmq, IP_RAWOUTPUT);
 1867 }
 1868 
 1869 void
 1870 ip_send_dispatch(void *xmq)
 1871 {
 1872         ip_send_do_dispatch(xmq, 0);
 1873 }
 1874 
 1875 void
 1876 ip_send(struct mbuf *m)
 1877 {
 1878         mq_enqueue(&ipsend_mq, m);
 1879         task_add(net_tq(0), &ipsend_task);
 1880 }
 1881 
 1882 void
 1883 ip_send_raw(struct mbuf *m)
 1884 {
 1885         mq_enqueue(&ipsendraw_mq, m);
 1886         task_add(net_tq(0), &ipsendraw_task);
 1887 }

Cache object: 7989223505f815cd66adc02cc5a2d764


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.