The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet6/frag6.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the project nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      $KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD: stable/12/sys/netinet6/frag6.c 355880 2019-12-18 11:48:50Z bz $");
   36 
   37 #include "opt_rss.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/systm.h>
   41 #include <sys/domain.h>
   42 #include <sys/eventhandler.h>
   43 #include <sys/hash.h>
   44 #include <sys/kernel.h>
   45 #include <sys/malloc.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/protosw.h>
   48 #include <sys/socket.h>
   49 #include <sys/sysctl.h>
   50 #include <sys/syslog.h>
   51 
   52 #include <net/if.h>
   53 #include <net/if_var.h>
   54 #include <net/netisr.h>
   55 #include <net/route.h>
   56 #include <net/vnet.h>
   57 
   58 #include <netinet/in.h>
   59 #include <netinet/in_var.h>
   60 #include <netinet/ip6.h>
   61 #include <netinet6/ip6_var.h>
   62 #include <netinet/icmp6.h>
   63 #include <netinet/in_systm.h>   /* For ECN definitions. */
   64 #include <netinet/ip.h>         /* For ECN definitions. */
   65 
   66 #ifdef MAC
   67 #include <security/mac/mac_framework.h>
   68 #endif
   69 
   70 /* Reassembly headers are stored in hash buckets. */
   71 #define IP6REASS_NHASH_LOG2     10
   72 #define IP6REASS_NHASH          (1 << IP6REASS_NHASH_LOG2)
   73 #define IP6REASS_HMASK          (IP6REASS_NHASH - 1)
   74 
   75 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *,
   76     uint32_t bucket __unused);
   77 static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused);
   78 static void frag6_insque_head(struct ip6q *, struct ip6q *,
   79     uint32_t bucket);
   80 static void frag6_remque(struct ip6q *, uint32_t bucket);
   81 static void frag6_freef(struct ip6q *, uint32_t bucket);
   82 
   83 struct ip6qbucket {
   84         struct ip6q     ip6q;
   85         struct mtx      lock;
   86         int             count;
   87 };
   88 
   89 static MALLOC_DEFINE(M_FRAG6, "frag6", "IPv6 fragment reassembly header");
   90 
   91 /* System wide (global) maximum and count of packets in reassembly queues. */ 
   92 static int ip6_maxfrags;
   93 static volatile u_int frag6_nfrags = 0;
   94 
   95 /* Maximum and current packets in per-VNET reassembly queue. */
   96 VNET_DEFINE_STATIC(int,                 ip6_maxfragpackets);
   97 VNET_DEFINE_STATIC(volatile u_int,      frag6_nfragpackets);
   98 #define V_ip6_maxfragpackets            VNET(ip6_maxfragpackets)
   99 #define V_frag6_nfragpackets            VNET(frag6_nfragpackets)
  100 
  101 /* Maximum per-VNET reassembly queues per bucket and fragments per packet. */
  102 VNET_DEFINE_STATIC(int,                 ip6_maxfragbucketsize);
  103 VNET_DEFINE_STATIC(int,                 ip6_maxfragsperpacket);
  104 #define V_ip6_maxfragbucketsize         VNET(ip6_maxfragbucketsize)
  105 #define V_ip6_maxfragsperpacket         VNET(ip6_maxfragsperpacket)
  106 
  107 /* Per-VNET reassembly queue buckets. */
  108 VNET_DEFINE_STATIC(struct ip6qbucket,   ip6qb[IP6REASS_NHASH]);
  109 VNET_DEFINE_STATIC(uint32_t,            ip6qb_hashseed);
  110 #define V_ip6qb                         VNET(ip6qb)
  111 #define V_ip6qb_hashseed                VNET(ip6qb_hashseed)
  112 
  113 #define IP6QB_LOCK(_b)          mtx_lock(&V_ip6qb[(_b)].lock)
  114 #define IP6QB_TRYLOCK(_b)       mtx_trylock(&V_ip6qb[(_b)].lock)
  115 #define IP6QB_LOCK_ASSERT(_b)   mtx_assert(&V_ip6qb[(_b)].lock, MA_OWNED)
  116 #define IP6QB_UNLOCK(_b)        mtx_unlock(&V_ip6qb[(_b)].lock)
  117 #define IP6QB_HEAD(_b)          (&V_ip6qb[(_b)].ip6q)
  118 
  119 /*
  120  * By default, limit the number of IP6 fragments across all reassembly
  121  * queues to  1/32 of the total number of mbuf clusters.
  122  *
  123  * Limit the total number of reassembly queues per VNET to the
  124  * IP6 fragment limit, but ensure the limit will not allow any bucket
  125  * to grow above 100 items. (The bucket limit is
  126  * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct
  127  * multiplier to reach a 100-item limit.)
  128  * The 100-item limit was chosen as brief testing seems to show that
  129  * this produces "reasonable" performance on some subset of systems
  130  * under DoS attack.
  131  */
  132 #define IP6_MAXFRAGS            (nmbclusters / 32)
  133 #define IP6_MAXFRAGPACKETS      (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50))
  134 
  135 
  136 /*
  137  * Sysctls and helper function.
  138  */
  139 SYSCTL_DECL(_net_inet6_ip6);
  140 
  141 static void
  142 frag6_set_bucketsize(void)
  143 {
  144         int i;
  145 
  146         if ((i = V_ip6_maxfragpackets) > 0)
  147                 V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1);
  148 }
  149 
  150 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
  151         CTLFLAG_RW, &ip6_maxfrags, 0,
  152         "Maximum allowed number of outstanding IPv6 packet fragments. "
  153         "A value of 0 means no fragmented packets will be accepted, while a "
  154         "a value of -1 means no limit");
  155 
  156 static int
  157 sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS)
  158 {
  159         int error, val;
  160 
  161         val = V_ip6_maxfragpackets;
  162         error = sysctl_handle_int(oidp, &val, 0, req);
  163         if (error != 0 || !req->newptr)
  164                 return (error);
  165         V_ip6_maxfragpackets = val;
  166         frag6_set_bucketsize();
  167         return (0);
  168 }
  169 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
  170         CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
  171         sysctl_ip6_maxfragpackets, "I",
  172         "Default maximum number of outstanding fragmented IPv6 packets. "
  173         "A value of 0 means no fragmented packets will be accepted, while a "
  174         "a value of -1 means no limit");
  175 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket,
  176         CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0,
  177         "Maximum allowed number of fragments per packet");
  178 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize,
  179         CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0,
  180         "Maximum number of reassembly queues per hash bucket");
  181 
  182 
  183 /*
  184  * Remove the IPv6 fragmentation header from the mbuf.
  185  */
  186 int
  187 ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
  188 {
  189         struct ip6_hdr *ip6;
  190         struct mbuf *t;
  191 
  192         /* Delete frag6 header. */
  193         if (m->m_len >= offset + sizeof(struct ip6_frag)) {
  194 
  195                 /* This is the only possible case with !PULLDOWN_TEST. */
  196                 ip6  = mtod(m, struct ip6_hdr *);
  197                 bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
  198                     offset);
  199                 m->m_data += sizeof(struct ip6_frag);
  200                 m->m_len -= sizeof(struct ip6_frag);
  201         } else {
  202 
  203                 /* This comes with no copy if the boundary is on cluster. */
  204                 if ((t = m_split(m, offset, wait)) == NULL)
  205                         return (ENOMEM);
  206                 m_adj(t, sizeof(struct ip6_frag));
  207                 m_cat(m, t);
  208         }
  209 
  210         m->m_flags |= M_FRAGMENTED;
  211         return (0);
  212 }
  213 
  214 /*
  215  * Free a fragment reassembly header and all associated datagrams.
  216  */
  217 static void
  218 frag6_freef(struct ip6q *q6, uint32_t bucket)
  219 {
  220         struct ip6_hdr *ip6;
  221         struct ip6asfrag *af6, *down6;
  222         struct mbuf *m;
  223 
  224         IP6QB_LOCK_ASSERT(bucket);
  225 
  226         for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
  227              af6 = down6) {
  228 
  229                 m = IP6_REASS_MBUF(af6);
  230                 down6 = af6->ip6af_down;
  231                 frag6_deq(af6, bucket);
  232 
  233                 /*
  234                  * Return ICMP time exceeded error for the 1st fragment.
  235                  * Just free other fragments.
  236                  */
  237                 if (af6->ip6af_off == 0) {
  238 
  239                         /* Adjust pointer. */
  240                         ip6 = mtod(m, struct ip6_hdr *);
  241 
  242                         /* Restore source and destination addresses. */
  243                         ip6->ip6_src = q6->ip6q_src;
  244                         ip6->ip6_dst = q6->ip6q_dst;
  245 
  246                         icmp6_error(m, ICMP6_TIME_EXCEEDED,
  247                             ICMP6_TIME_EXCEED_REASSEMBLY, 0);
  248                 } else
  249                         m_freem(m);
  250 
  251                 free(af6, M_FRAG6);
  252         }
  253         frag6_remque(q6, bucket);
  254         atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
  255 #ifdef MAC
  256         mac_ip6q_destroy(q6);
  257 #endif
  258         free(q6, M_FRAG6);
  259         atomic_subtract_int(&V_frag6_nfragpackets, 1);
  260 }
  261 
  262 /*
  263  * Like in RFC2460, in RFC8200, fragment and reassembly rules do not agree with
  264  * each other, in terms of next header field handling in fragment header.
  265  * While the sender will use the same value for all of the fragmented packets,
  266  * receiver is suggested not to check for consistency.
  267  *
  268  * Fragment rules (p18,p19):
  269  *      (2)  A Fragment header containing:
  270  *      The Next Header value that identifies the first header
  271  *      after the Per-Fragment headers of the original packet.
  272  *              -> next header field is same for all fragments
  273  *
  274  * Reassembly rule (p20):
  275  *      The Next Header field of the last header of the Per-Fragment
  276  *      headers is obtained from the Next Header field of the first
  277  *      fragment's Fragment header.
  278  *              -> should grab it from the first fragment only
  279  *
  280  * The following note also contradicts with fragment rule - no one is going to
  281  * send different fragment with different next header field.
  282  *
  283  * Additional note (p22) [not an error]:
  284  *      The Next Header values in the Fragment headers of different
  285  *      fragments of the same original packet may differ.  Only the value
  286  *      from the Offset zero fragment packet is used for reassembly.
  287  *              -> should grab it from the first fragment only
  288  *
  289  * There is no explicit reason given in the RFC.  Historical reason maybe?
  290  */
  291 /*
  292  * Fragment input.
  293  */
  294 int
  295 frag6_input(struct mbuf **mp, int *offp, int proto)
  296 {
  297         struct ifnet *dstifp;
  298         struct in6_ifaddr *ia6;
  299         struct ip6_hdr *ip6;
  300         struct ip6_frag *ip6f;
  301         struct ip6q *head, *q6;
  302         struct ip6asfrag *af6, *af6dwn, *ip6af;
  303         struct mbuf *m, *t;
  304         uint32_t hashkey[(sizeof(struct in6_addr) * 2 +
  305                     sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)];
  306         uint32_t bucket, *hashkeyp;
  307         int fragoff, frgpartlen;        /* Must be larger than uint16_t. */
  308         int nxt, offset, plen;
  309         uint8_t ecn, ecn0;
  310         bool only_frag;
  311 #ifdef RSS
  312         struct ip6_direct_ctx *ip6dc;
  313         struct m_tag *mtag;
  314 #endif
  315 
  316         m = *mp;
  317         offset = *offp;
  318 
  319         ip6 = mtod(m, struct ip6_hdr *);
  320 #ifndef PULLDOWN_TEST
  321         IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
  322         ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
  323 #else
  324         IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
  325         if (ip6f == NULL)
  326                 return (IPPROTO_DONE);
  327 #endif
  328 
  329         dstifp = NULL;
  330         /* Find the destination interface of the packet. */
  331         ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
  332         if (ia6 != NULL) {
  333                 dstifp = ia6->ia_ifp;
  334                 ifa_free(&ia6->ia_ifa);
  335         }
  336 
  337         /* Jumbo payload cannot contain a fragment header. */
  338         if (ip6->ip6_plen == 0) {
  339                 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
  340                 in6_ifstat_inc(dstifp, ifs6_reass_fail);
  341                 return (IPPROTO_DONE);
  342         }
  343 
  344         /*
  345          * Check whether fragment packet's fragment length is a
  346          * multiple of 8 octets (unless it is the last one).
  347          * sizeof(struct ip6_frag) == 8
  348          * sizeof(struct ip6_hdr) = 40
  349          */
  350         if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
  351             (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
  352                 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
  353                     offsetof(struct ip6_hdr, ip6_plen));
  354                 in6_ifstat_inc(dstifp, ifs6_reass_fail);
  355                 return (IPPROTO_DONE);
  356         }
  357 
  358         IP6STAT_INC(ip6s_fragments);
  359         in6_ifstat_inc(dstifp, ifs6_reass_reqd);
  360 
  361         /* Offset now points to data portion. */
  362         offset += sizeof(struct ip6_frag);
  363 
  364         /*
  365          * Handle "atomic" fragments (offset and m bit set to 0) upfront,
  366          * unrelated to any reassembly.  Still need to remove the frag hdr.
  367          * See RFC 6946 and section 4.5 of RFC 8200.
  368          */
  369         if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
  370                 /* XXX-BZ we want dedicated counters for this. */
  371                 IP6STAT_INC(ip6s_reassembled);
  372                 /* XXX-BZ handle correctly. */
  373                 in6_ifstat_inc(dstifp, ifs6_reass_ok);
  374                 *offp = offset;
  375                 m->m_flags |= M_FRAGMENTED;
  376                 return (ip6f->ip6f_nxt);
  377         }
  378 
  379         /* Get fragment length and discard 0-byte fragments. */
  380         frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
  381         if (frgpartlen == 0) {
  382                 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
  383                     offsetof(struct ip6_hdr, ip6_plen));
  384                 in6_ifstat_inc(dstifp, ifs6_reass_fail);
  385                 IP6STAT_INC(ip6s_fragdropped);
  386                 return (IPPROTO_DONE);
  387         }
  388 
  389         /* Generate a hash value for fragment bucket selection. */
  390         hashkeyp = hashkey;
  391         memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr));
  392         hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
  393         memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr));
  394         hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
  395         *hashkeyp = ip6f->ip6f_ident;
  396         bucket = jenkins_hash32(hashkey, nitems(hashkey), V_ip6qb_hashseed);
  397         bucket &= IP6REASS_HMASK;
  398         head = IP6QB_HEAD(bucket);
  399         IP6QB_LOCK(bucket);
  400 
  401         /*
  402          * Enforce upper bound on number of fragments for the entire system.
  403          * If maxfrag is 0, never accept fragments.
  404          * If maxfrag is -1, accept all fragments without limitation.
  405          */
  406         if (ip6_maxfrags < 0)
  407                 ;
  408         else if (atomic_load_int(&frag6_nfrags) >= (u_int)ip6_maxfrags)
  409                 goto dropfrag;
  410 
  411         for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next)
  412                 if (ip6f->ip6f_ident == q6->ip6q_ident &&
  413                     IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
  414                     IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)
  415 #ifdef MAC
  416                     && mac_ip6q_match(m, q6)
  417 #endif
  418                     )
  419                         break;
  420 
  421         only_frag = false;
  422         if (q6 == head) {
  423 
  424                 /* A first fragment to arrive creates a reassembly queue. */
  425                 only_frag = true;
  426 
  427                 /*
  428                  * Enforce upper bound on number of fragmented packets
  429                  * for which we attempt reassembly;
  430                  * If maxfragpackets is 0, never accept fragments.
  431                  * If maxfragpackets is -1, accept all fragments without
  432                  * limitation.
  433                  */
  434                 if (V_ip6_maxfragpackets < 0)
  435                         ;
  436                 else if (V_ip6qb[bucket].count >= V_ip6_maxfragbucketsize ||
  437                     atomic_load_int(&V_frag6_nfragpackets) >=
  438                     (u_int)V_ip6_maxfragpackets)
  439                         goto dropfrag;
  440                 atomic_add_int(&V_frag6_nfragpackets, 1);
  441 
  442                 /* Allocate IPv6 fragement packet queue entry. */
  443                 q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FRAG6,
  444                     M_NOWAIT | M_ZERO);
  445                 if (q6 == NULL)
  446                         goto dropfrag;
  447 #ifdef MAC
  448                 if (mac_ip6q_init(q6, M_NOWAIT) != 0) {
  449                         free(q6, M_FRAG6);
  450                         goto dropfrag;
  451                 }
  452                 mac_ip6q_create(m, q6);
  453 #endif
  454                 frag6_insque_head(q6, head, bucket);
  455 
  456                 /* ip6q_nxt will be filled afterwards, from 1st fragment. */
  457                 q6->ip6q_down   = q6->ip6q_up = (struct ip6asfrag *)q6;
  458 #ifdef notyet
  459                 q6->ip6q_nxtp   = (u_char *)nxtp;
  460 #endif
  461                 q6->ip6q_ident  = ip6f->ip6f_ident;
  462                 q6->ip6q_ttl    = IPV6_FRAGTTL;
  463                 q6->ip6q_src    = ip6->ip6_src;
  464                 q6->ip6q_dst    = ip6->ip6_dst;
  465                 q6->ip6q_ecn    =
  466                     (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
  467                 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
  468 
  469                 q6->ip6q_nfrag = 0;
  470         }
  471 
  472         /*
  473          * If it is the 1st fragment, record the length of the
  474          * unfragmentable part and the next header of the fragment header.
  475          */
  476         fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
  477         if (fragoff == 0) {
  478                 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
  479                     sizeof(struct ip6_frag);
  480                 q6->ip6q_nxt = ip6f->ip6f_nxt;
  481         }
  482 
  483         /*
  484          * Check that the reassembled packet would not exceed 65535 bytes
  485          * in size.
  486          * If it would exceed, discard the fragment and return an ICMP error.
  487          */
  488         if (q6->ip6q_unfrglen >= 0) {
  489                 /* The 1st fragment has already arrived. */
  490                 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
  491                         icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
  492                             offset - sizeof(struct ip6_frag) +
  493                             offsetof(struct ip6_frag, ip6f_offlg));
  494                         IP6QB_UNLOCK(bucket);
  495                         return (IPPROTO_DONE);
  496                 }
  497         } else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
  498                 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
  499                     offset - sizeof(struct ip6_frag) +
  500                     offsetof(struct ip6_frag, ip6f_offlg));
  501                 IP6QB_UNLOCK(bucket);
  502                 return (IPPROTO_DONE);
  503         }
  504         /*
  505          * If it is the first fragment, do the above check for each
  506          * fragment already stored in the reassembly queue.
  507          */
  508         if (fragoff == 0) {
  509                 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
  510                      af6 = af6dwn) {
  511                         af6dwn = af6->ip6af_down;
  512 
  513                         if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
  514                             IPV6_MAXPACKET) {
  515                                 struct ip6_hdr *ip6err;
  516                                 struct mbuf *merr;
  517                                 int erroff;
  518 
  519                                 merr = IP6_REASS_MBUF(af6);
  520                                 erroff = af6->ip6af_offset;
  521 
  522                                 /* Dequeue the fragment. */
  523                                 frag6_deq(af6, bucket);
  524                                 free(af6, M_FRAG6);
  525 
  526                                 /* Adjust pointer. */
  527                                 ip6err = mtod(merr, struct ip6_hdr *);
  528 
  529                                 /*
  530                                  * Restore source and destination addresses
  531                                  * in the erroneous IPv6 header.
  532                                  */
  533                                 ip6err->ip6_src = q6->ip6q_src;
  534                                 ip6err->ip6_dst = q6->ip6q_dst;
  535 
  536                                 icmp6_error(merr, ICMP6_PARAM_PROB,
  537                                     ICMP6_PARAMPROB_HEADER,
  538                                     erroff - sizeof(struct ip6_frag) +
  539                                     offsetof(struct ip6_frag, ip6f_offlg));
  540                         }
  541                 }
  542         }
  543 
  544         /* Allocate an IPv6 fragement queue entry for this fragmented part. */
  545         ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FRAG6,
  546             M_NOWAIT | M_ZERO);
  547         if (ip6af == NULL)
  548                 goto dropfrag;
  549         ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
  550         ip6af->ip6af_off = fragoff;
  551         ip6af->ip6af_frglen = frgpartlen;
  552         ip6af->ip6af_offset = offset;
  553         IP6_REASS_MBUF(ip6af) = m;
  554 
  555         if (only_frag) {
  556                 af6 = (struct ip6asfrag *)q6;
  557                 goto insert;
  558         }
  559 
  560         /* Do duplicate, condition, and boundry checks. */
  561         /*
  562          * Handle ECN by comparing this segment with the first one;
  563          * if CE is set, do not lose CE.
  564          * Drop if CE and not-ECT are mixed for the same packet.
  565          */
  566         ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
  567         ecn0 = q6->ip6q_ecn;
  568         if (ecn == IPTOS_ECN_CE) {
  569                 if (ecn0 == IPTOS_ECN_NOTECT) {
  570                         free(ip6af, M_FRAG6);
  571                         goto dropfrag;
  572                 }
  573                 if (ecn0 != IPTOS_ECN_CE)
  574                         q6->ip6q_ecn = IPTOS_ECN_CE;
  575         }
  576         if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
  577                 free(ip6af, M_FRAG6);
  578                 goto dropfrag;
  579         }
  580 
  581         /* Find a fragmented part which begins after this one does. */
  582         for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
  583              af6 = af6->ip6af_down)
  584                 if (af6->ip6af_off > ip6af->ip6af_off)
  585                         break;
  586 
  587         /*
  588          * If the incoming framgent overlaps some existing fragments in
  589          * the reassembly queue, drop both the new fragment and the
  590          * entire reassembly queue.  However, if the new fragment
  591          * is an exact duplicate of an existing fragment, only silently
  592          * drop the existing fragment and leave the fragmentation queue
  593          * unchanged, as allowed by the RFC.  (RFC 8200, 4.5)
  594          */
  595         if (af6->ip6af_up != (struct ip6asfrag *)q6) {
  596                 if (af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen -
  597                     ip6af->ip6af_off > 0) {
  598                         free(ip6af, M_FRAG6);
  599                         goto dropfrag;
  600                 }
  601         }
  602         if (af6 != (struct ip6asfrag *)q6) {
  603                 if (ip6af->ip6af_off + ip6af->ip6af_frglen -
  604                     af6->ip6af_off > 0) {
  605                         free(ip6af, M_FRAG6);
  606                         goto dropfrag;
  607                 }
  608         }
  609 
  610 insert:
  611 #ifdef MAC
  612         if (!only_frag)
  613                 mac_ip6q_update(m, q6);
  614 #endif
  615 
  616         /*
  617          * Stick new segment in its place; check for complete reassembly.
  618          * If not complete, check fragment limit.  Move to front of packet
  619          * queue, as we are the most recently active fragmented packet.
  620          */
  621         frag6_enq(ip6af, af6->ip6af_up, bucket);
  622         atomic_add_int(&frag6_nfrags, 1);
  623         q6->ip6q_nfrag++;
  624         plen = 0;
  625         for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
  626              af6 = af6->ip6af_down) {
  627                 if (af6->ip6af_off != plen) {
  628                         if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
  629                                 IP6STAT_INC(ip6s_fragdropped);
  630                                 frag6_freef(q6, bucket);
  631                         }
  632                         IP6QB_UNLOCK(bucket);
  633                         return (IPPROTO_DONE);
  634                 }
  635                 plen += af6->ip6af_frglen;
  636         }
  637         if (af6->ip6af_up->ip6af_mff) {
  638                 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
  639                         IP6STAT_INC(ip6s_fragdropped);
  640                         frag6_freef(q6, bucket);
  641                 }
  642                 IP6QB_UNLOCK(bucket);
  643                 return (IPPROTO_DONE);
  644         }
  645 
  646         /* Reassembly is complete; concatenate fragments. */
  647         ip6af = q6->ip6q_down;
  648         t = m = IP6_REASS_MBUF(ip6af);
  649         af6 = ip6af->ip6af_down;
  650         frag6_deq(ip6af, bucket);
  651         while (af6 != (struct ip6asfrag *)q6) {
  652                 m->m_pkthdr.csum_flags &=
  653                     IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
  654                 m->m_pkthdr.csum_data +=
  655                     IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
  656 
  657                 af6dwn = af6->ip6af_down;
  658                 frag6_deq(af6, bucket);
  659                 while (t->m_next)
  660                         t = t->m_next;
  661                 m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
  662                 m_demote_pkthdr(IP6_REASS_MBUF(af6));
  663                 m_cat(t, IP6_REASS_MBUF(af6));
  664                 free(af6, M_FRAG6);
  665                 af6 = af6dwn;
  666         }
  667 
  668         while (m->m_pkthdr.csum_data & 0xffff0000)
  669                 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
  670                     (m->m_pkthdr.csum_data >> 16);
  671 
  672         /* Adjust offset to point where the original next header starts. */
  673         offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
  674         free(ip6af, M_FRAG6);
  675         ip6 = mtod(m, struct ip6_hdr *);
  676         ip6->ip6_plen = htons((u_short)plen + offset - sizeof(struct ip6_hdr));
  677         if (q6->ip6q_ecn == IPTOS_ECN_CE)
  678                 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
  679         nxt = q6->ip6q_nxt;
  680 
  681         if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
  682                 frag6_remque(q6, bucket);
  683                 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
  684 #ifdef MAC
  685                 mac_ip6q_destroy(q6);
  686 #endif
  687                 free(q6, M_FRAG6);
  688                 atomic_subtract_int(&V_frag6_nfragpackets, 1);
  689 
  690                 goto dropfrag;
  691         }
  692 
  693         /* Set nxt(-hdr field value) to the original value. */
  694         m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
  695             (caddr_t)&nxt);
  696 
  697         frag6_remque(q6, bucket);
  698         atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
  699 #ifdef MAC
  700         mac_ip6q_reassemble(q6, m);
  701         mac_ip6q_destroy(q6);
  702 #endif
  703         free(q6, M_FRAG6);
  704         atomic_subtract_int(&V_frag6_nfragpackets, 1);
  705 
  706         if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
  707 
  708                 plen = 0;
  709                 for (t = m; t; t = t->m_next)
  710                         plen += t->m_len;
  711                 m->m_pkthdr.len = plen;
  712         }
  713 
  714 #ifdef RSS
  715         mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc),
  716             M_NOWAIT);
  717         if (mtag == NULL)
  718                 goto dropfrag;
  719 
  720         ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
  721         ip6dc->ip6dc_nxt = nxt;
  722         ip6dc->ip6dc_off = offset;
  723 
  724         m_tag_prepend(m, mtag);
  725 #endif
  726 
  727         IP6QB_UNLOCK(bucket);
  728         IP6STAT_INC(ip6s_reassembled);
  729         in6_ifstat_inc(dstifp, ifs6_reass_ok);
  730 
  731 #ifdef RSS
  732         /* Queue/dispatch for reprocessing. */
  733         netisr_dispatch(NETISR_IPV6_DIRECT, m);
  734         return (IPPROTO_DONE);
  735 #endif
  736 
  737         /* Tell launch routine the next header. */
  738         *mp = m;
  739         *offp = offset;
  740 
  741         return (nxt);
  742 
  743 dropfrag:
  744         IP6QB_UNLOCK(bucket);
  745         in6_ifstat_inc(dstifp, ifs6_reass_fail);
  746         IP6STAT_INC(ip6s_fragdropped);
  747         m_freem(m);
  748         return (IPPROTO_DONE);
  749 }
  750 
  751 /*
  752  * IPv6 reassembling timer processing;
  753  * if a timer expires on a reassembly queue, discard it.
  754  */
  755 void
  756 frag6_slowtimo(void)
  757 {
  758         VNET_ITERATOR_DECL(vnet_iter);
  759         struct ip6q *head, *q6;
  760         uint32_t bucket;
  761 
  762         VNET_LIST_RLOCK_NOSLEEP();
  763         VNET_FOREACH(vnet_iter) {
  764                 CURVNET_SET(vnet_iter);
  765                 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
  766                         IP6QB_LOCK(bucket);
  767                         head = IP6QB_HEAD(bucket);
  768                         q6 = head->ip6q_next;
  769                         if (q6 == NULL) {
  770                                 /*
  771                                  * XXXJTL: This should never happen. This
  772                                  * should turn into an assertion.
  773                                  */
  774                                 IP6QB_UNLOCK(bucket);
  775                                 continue;
  776                         }
  777                         while (q6 != head) {
  778                                 --q6->ip6q_ttl;
  779                                 q6 = q6->ip6q_next;
  780                                 if (q6->ip6q_prev->ip6q_ttl == 0) {
  781                                         IP6STAT_INC(ip6s_fragtimeout);
  782                                         /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
  783                                         frag6_freef(q6->ip6q_prev, bucket);
  784                                 }
  785                         }
  786                         /*
  787                          * If we are over the maximum number of fragments
  788                          * (due to the limit being lowered), drain off
  789                          * enough to get down to the new limit.
  790                          * Note that we drain all reassembly queues if
  791                          * maxfragpackets is 0 (fragmentation is disabled),
  792                          * and do not enforce a limit when maxfragpackets
  793                          * is negative.
  794                          */
  795                         while ((V_ip6_maxfragpackets == 0 ||
  796                             (V_ip6_maxfragpackets > 0 &&
  797                             V_ip6qb[bucket].count > V_ip6_maxfragbucketsize)) &&
  798                             head->ip6q_prev != head) {
  799                                 IP6STAT_INC(ip6s_fragoverflow);
  800                                 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
  801                                 frag6_freef(head->ip6q_prev, bucket);
  802                         }
  803                         IP6QB_UNLOCK(bucket);
  804                 }
  805                 /*
  806                  * If we are still over the maximum number of fragmented
  807                  * packets, drain off enough to get down to the new limit.
  808                  */
  809                 bucket = 0;
  810                 while (V_ip6_maxfragpackets >= 0 &&
  811                     atomic_load_int(&V_frag6_nfragpackets) >
  812                     (u_int)V_ip6_maxfragpackets) {
  813                         IP6QB_LOCK(bucket);
  814                         head = IP6QB_HEAD(bucket);
  815                         if (head->ip6q_prev != head) {
  816                                 IP6STAT_INC(ip6s_fragoverflow);
  817                                 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
  818                                 frag6_freef(head->ip6q_prev, bucket);
  819                         }
  820                         IP6QB_UNLOCK(bucket);
  821                         bucket = (bucket + 1) % IP6REASS_NHASH;
  822                 }
  823                 CURVNET_RESTORE();
  824         }
  825         VNET_LIST_RUNLOCK_NOSLEEP();
  826 }
  827 
  828 /*
  829  * Eventhandler to adjust limits in case nmbclusters change.
  830  */
  831 static void
  832 frag6_change(void *tag)
  833 {
  834         VNET_ITERATOR_DECL(vnet_iter);
  835 
  836         ip6_maxfrags = IP6_MAXFRAGS;
  837         VNET_LIST_RLOCK_NOSLEEP();
  838         VNET_FOREACH(vnet_iter) {
  839                 CURVNET_SET(vnet_iter);
  840                 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
  841                 frag6_set_bucketsize();
  842                 CURVNET_RESTORE();
  843         }
  844         VNET_LIST_RUNLOCK_NOSLEEP();
  845 }
  846 
  847 /*
  848  * Initialise reassembly queue and fragment identifier.
  849  */
  850 void
  851 frag6_init(void)
  852 {
  853         struct ip6q *q6;
  854         uint32_t bucket;
  855 
  856         V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
  857         frag6_set_bucketsize();
  858         for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
  859                 q6 = IP6QB_HEAD(bucket);
  860                 q6->ip6q_next = q6->ip6q_prev = q6;
  861                 mtx_init(&V_ip6qb[bucket].lock, "ip6qlock", NULL, MTX_DEF);
  862                 V_ip6qb[bucket].count = 0;
  863         }
  864         V_ip6qb_hashseed = arc4random();
  865         V_ip6_maxfragsperpacket = 64;
  866         if (!IS_DEFAULT_VNET(curvnet))
  867                 return;
  868 
  869         ip6_maxfrags = IP6_MAXFRAGS;
  870         EVENTHANDLER_REGISTER(nmbclusters_change,
  871             frag6_change, NULL, EVENTHANDLER_PRI_ANY);
  872 }
  873 
  874 /*
  875  * Drain off all datagram fragments.
  876  */
  877 void
  878 frag6_drain(void)
  879 {
  880         VNET_ITERATOR_DECL(vnet_iter);
  881         struct ip6q *head;
  882         uint32_t bucket;
  883 
  884         VNET_LIST_RLOCK_NOSLEEP();
  885         VNET_FOREACH(vnet_iter) {
  886                 CURVNET_SET(vnet_iter);
  887                 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
  888                         if (IP6QB_TRYLOCK(bucket) == 0)
  889                                 continue;
  890                         head = IP6QB_HEAD(bucket);
  891                         while (head->ip6q_next != head) {
  892                                 IP6STAT_INC(ip6s_fragdropped);
  893                                 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
  894                                 frag6_freef(head->ip6q_next, bucket);
  895                         }
  896                         IP6QB_UNLOCK(bucket);
  897                 }
  898                 CURVNET_RESTORE();
  899         }
  900         VNET_LIST_RUNLOCK_NOSLEEP();
  901 }
  902 
  903 /*
  904  * Put an ip fragment on a reassembly chain.
  905  * Like insque, but pointers in middle of structure.
  906  */
  907 static void
  908 frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6,
  909     uint32_t bucket __unused)
  910 {
  911 
  912         IP6QB_LOCK_ASSERT(bucket);
  913 
  914         af6->ip6af_up = up6;
  915         af6->ip6af_down = up6->ip6af_down;
  916         up6->ip6af_down->ip6af_up = af6;
  917         up6->ip6af_down = af6;
  918 }
  919 
  920 /*
  921  * To frag6_enq as remque is to insque.
  922  */
  923 static void
  924 frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused)
  925 {
  926 
  927         IP6QB_LOCK_ASSERT(bucket);
  928 
  929         af6->ip6af_up->ip6af_down = af6->ip6af_down;
  930         af6->ip6af_down->ip6af_up = af6->ip6af_up;
  931 }
  932 
  933 static void
  934 frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket)
  935 {
  936 
  937         IP6QB_LOCK_ASSERT(bucket);
  938         KASSERT(IP6QB_HEAD(bucket) == old,
  939             ("%s: attempt to insert at head of wrong bucket"
  940             " (bucket=%u, old=%p)", __func__, bucket, old));
  941 
  942         new->ip6q_prev = old;
  943         new->ip6q_next = old->ip6q_next;
  944         old->ip6q_next->ip6q_prev= new;
  945         old->ip6q_next = new;
  946         V_ip6qb[bucket].count++;
  947 }
  948 
  949 static void
  950 frag6_remque(struct ip6q *p6, uint32_t bucket)
  951 {
  952 
  953         IP6QB_LOCK_ASSERT(bucket);
  954 
  955         p6->ip6q_prev->ip6q_next = p6->ip6q_next;
  956         p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
  957         V_ip6qb[bucket].count--;
  958 }

Cache object: 018f050a140e5678ee6033585d25ef67


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.