FreeBSD/Linux Kernel Cross Reference
sys/netinet6/frag6.c
1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * $KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $
32 */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: releng/12.0/sys/netinet6/frag6.c 338406 2018-08-31 08:37:15Z kp $");
36
37 #include "opt_rss.h"
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/hash.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/domain.h>
45 #include <sys/eventhandler.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/errno.h>
49 #include <sys/time.h>
50 #include <sys/kernel.h>
51 #include <sys/syslog.h>
52
53 #include <machine/atomic.h>
54
55 #include <net/if.h>
56 #include <net/if_var.h>
57 #include <net/netisr.h>
58 #include <net/route.h>
59 #include <net/vnet.h>
60
61 #include <netinet/in.h>
62 #include <netinet/in_var.h>
63 #include <netinet/ip6.h>
64 #include <netinet6/ip6_var.h>
65 #include <netinet/icmp6.h>
66 #include <netinet/in_systm.h> /* for ECN definitions */
67 #include <netinet/ip.h> /* for ECN definitions */
68
69 #include <security/mac/mac_framework.h>
70
71 /*
72 * Reassembly headers are stored in hash buckets.
73 */
74 #define IP6REASS_NHASH_LOG2 10
75 #define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2)
76 #define IP6REASS_HMASK (IP6REASS_NHASH - 1)
77
78 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *,
79 uint32_t bucket __unused);
80 static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused);
81 static void frag6_insque_head(struct ip6q *, struct ip6q *,
82 uint32_t bucket);
83 static void frag6_remque(struct ip6q *, uint32_t bucket);
84 static void frag6_freef(struct ip6q *, uint32_t bucket);
85
86 struct ip6qbucket {
87 struct ip6q ip6q;
88 struct mtx lock;
89 int count;
90 };
91
92 VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets);
93 volatile u_int frag6_nfrags = 0;
94 VNET_DEFINE_STATIC(struct ip6qbucket, ip6q[IP6REASS_NHASH]);
95 VNET_DEFINE_STATIC(uint32_t, ip6q_hashseed);
96
97 #define V_frag6_nfragpackets VNET(frag6_nfragpackets)
98 #define V_ip6q VNET(ip6q)
99 #define V_ip6q_hashseed VNET(ip6q_hashseed)
100
101 #define IP6Q_LOCK(i) mtx_lock(&V_ip6q[(i)].lock)
102 #define IP6Q_TRYLOCK(i) mtx_trylock(&V_ip6q[(i)].lock)
103 #define IP6Q_LOCK_ASSERT(i) mtx_assert(&V_ip6q[(i)].lock, MA_OWNED)
104 #define IP6Q_UNLOCK(i) mtx_unlock(&V_ip6q[(i)].lock)
105 #define IP6Q_HEAD(i) (&V_ip6q[(i)].ip6q)
106
107 static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
108
109 /*
110 * By default, limit the number of IP6 fragments across all reassembly
111 * queues to 1/32 of the total number of mbuf clusters.
112 *
113 * Limit the total number of reassembly queues per VNET to the
114 * IP6 fragment limit, but ensure the limit will not allow any bucket
115 * to grow above 100 items. (The bucket limit is
116 * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct
117 * multiplier to reach a 100-item limit.)
118 * The 100-item limit was chosen as brief testing seems to show that
119 * this produces "reasonable" performance on some subset of systems
120 * under DoS attack.
121 */
122 #define IP6_MAXFRAGS (nmbclusters / 32)
123 #define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50))
124
125 /*
126 * Initialise reassembly queue and fragment identifier.
127 */
128 void
129 frag6_set_bucketsize()
130 {
131 int i;
132
133 if ((i = V_ip6_maxfragpackets) > 0)
134 V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1);
135 }
136
137 static void
138 frag6_change(void *tag)
139 {
140 VNET_ITERATOR_DECL(vnet_iter);
141
142 ip6_maxfrags = IP6_MAXFRAGS;
143 VNET_LIST_RLOCK_NOSLEEP();
144 VNET_FOREACH(vnet_iter) {
145 CURVNET_SET(vnet_iter);
146 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
147 frag6_set_bucketsize();
148 CURVNET_RESTORE();
149 }
150 VNET_LIST_RUNLOCK_NOSLEEP();
151 }
152
153 void
154 frag6_init(void)
155 {
156 struct ip6q *q6;
157 int i;
158
159 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
160 frag6_set_bucketsize();
161 for (i = 0; i < IP6REASS_NHASH; i++) {
162 q6 = IP6Q_HEAD(i);
163 q6->ip6q_next = q6->ip6q_prev = q6;
164 mtx_init(&V_ip6q[i].lock, "ip6qlock", NULL, MTX_DEF);
165 V_ip6q[i].count = 0;
166 }
167 V_ip6q_hashseed = arc4random();
168 V_ip6_maxfragsperpacket = 64;
169 if (!IS_DEFAULT_VNET(curvnet))
170 return;
171
172 ip6_maxfrags = IP6_MAXFRAGS;
173 EVENTHANDLER_REGISTER(nmbclusters_change,
174 frag6_change, NULL, EVENTHANDLER_PRI_ANY);
175 }
176
177 /*
178 * In RFC2460, fragment and reassembly rule do not agree with each other,
179 * in terms of next header field handling in fragment header.
180 * While the sender will use the same value for all of the fragmented packets,
181 * receiver is suggested not to check the consistency.
182 *
183 * fragment rule (p20):
184 * (2) A Fragment header containing:
185 * The Next Header value that identifies the first header of
186 * the Fragmentable Part of the original packet.
187 * -> next header field is same for all fragments
188 *
189 * reassembly rule (p21):
190 * The Next Header field of the last header of the Unfragmentable
191 * Part is obtained from the Next Header field of the first
192 * fragment's Fragment header.
193 * -> should grab it from the first fragment only
194 *
195 * The following note also contradicts with fragment rule - no one is going to
196 * send different fragment with different next header field.
197 *
198 * additional note (p22):
199 * The Next Header values in the Fragment headers of different
200 * fragments of the same original packet may differ. Only the value
201 * from the Offset zero fragment packet is used for reassembly.
202 * -> should grab it from the first fragment only
203 *
204 * There is no explicit reason given in the RFC. Historical reason maybe?
205 */
206 /*
207 * Fragment input
208 */
209 int
210 frag6_input(struct mbuf **mp, int *offp, int proto)
211 {
212 struct mbuf *m = *mp, *t;
213 struct ip6_hdr *ip6;
214 struct ip6_frag *ip6f;
215 struct ip6q *head, *q6;
216 struct ip6asfrag *af6, *ip6af, *af6dwn;
217 struct in6_ifaddr *ia;
218 int offset = *offp, nxt, i, next;
219 int first_frag = 0;
220 int fragoff, frgpartlen; /* must be larger than u_int16_t */
221 uint32_t hashkey[(sizeof(struct in6_addr) * 2 +
222 sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)];
223 uint32_t hash, *hashkeyp;
224 struct ifnet *dstifp;
225 u_int8_t ecn, ecn0;
226 #ifdef RSS
227 struct m_tag *mtag;
228 struct ip6_direct_ctx *ip6dc;
229 #endif
230
231 #if 0
232 char ip6buf[INET6_ADDRSTRLEN];
233 #endif
234
235 ip6 = mtod(m, struct ip6_hdr *);
236 #ifndef PULLDOWN_TEST
237 IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
238 ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
239 #else
240 IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
241 if (ip6f == NULL)
242 return (IPPROTO_DONE);
243 #endif
244
245 dstifp = NULL;
246 /* find the destination interface of the packet. */
247 ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
248 if (ia != NULL) {
249 dstifp = ia->ia_ifp;
250 ifa_free(&ia->ia_ifa);
251 }
252 /* jumbo payload can't contain a fragment header */
253 if (ip6->ip6_plen == 0) {
254 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
255 in6_ifstat_inc(dstifp, ifs6_reass_fail);
256 return IPPROTO_DONE;
257 }
258
259 /*
260 * check whether fragment packet's fragment length is
261 * multiple of 8 octets.
262 * sizeof(struct ip6_frag) == 8
263 * sizeof(struct ip6_hdr) = 40
264 */
265 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
266 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
267 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
268 offsetof(struct ip6_hdr, ip6_plen));
269 in6_ifstat_inc(dstifp, ifs6_reass_fail);
270 return IPPROTO_DONE;
271 }
272
273 IP6STAT_INC(ip6s_fragments);
274 in6_ifstat_inc(dstifp, ifs6_reass_reqd);
275
276 /* offset now points to data portion */
277 offset += sizeof(struct ip6_frag);
278
279 /*
280 * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
281 * upfront, unrelated to any reassembly. Just skip the fragment header.
282 */
283 if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
284 /* XXX-BZ we want dedicated counters for this. */
285 IP6STAT_INC(ip6s_reassembled);
286 in6_ifstat_inc(dstifp, ifs6_reass_ok);
287 *offp = offset;
288 m->m_flags |= M_FRAGMENTED;
289 return (ip6f->ip6f_nxt);
290 }
291
292 /* Get fragment length and discard 0-byte fragments. */
293 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
294 if (frgpartlen == 0) {
295 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
296 offsetof(struct ip6_hdr, ip6_plen));
297 in6_ifstat_inc(dstifp, ifs6_reass_fail);
298 IP6STAT_INC(ip6s_fragdropped);
299 return IPPROTO_DONE;
300 }
301
302 hashkeyp = hashkey;
303 memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr));
304 hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
305 memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr));
306 hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
307 *hashkeyp = ip6f->ip6f_ident;
308 hash = jenkins_hash32(hashkey, nitems(hashkey), V_ip6q_hashseed);
309 hash &= IP6REASS_HMASK;
310 head = IP6Q_HEAD(hash);
311 IP6Q_LOCK(hash);
312
313 /*
314 * Enforce upper bound on number of fragments.
315 * If maxfrag is 0, never accept fragments.
316 * If maxfrag is -1, accept all fragments without limitation.
317 */
318 if (ip6_maxfrags < 0)
319 ;
320 else if (atomic_load_int(&frag6_nfrags) >= (u_int)ip6_maxfrags)
321 goto dropfrag;
322
323 for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next)
324 if (ip6f->ip6f_ident == q6->ip6q_ident &&
325 IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
326 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)
327 #ifdef MAC
328 && mac_ip6q_match(m, q6)
329 #endif
330 )
331 break;
332
333 if (q6 == head) {
334 /*
335 * the first fragment to arrive, create a reassembly queue.
336 */
337 first_frag = 1;
338
339 /*
340 * Enforce upper bound on number of fragmented packets
341 * for which we attempt reassembly;
342 * If maxfragpackets is 0, never accept fragments.
343 * If maxfragpackets is -1, accept all fragments without
344 * limitation.
345 */
346 if (V_ip6_maxfragpackets < 0)
347 ;
348 else if (V_ip6q[hash].count >= V_ip6_maxfragbucketsize ||
349 atomic_load_int(&V_frag6_nfragpackets) >=
350 (u_int)V_ip6_maxfragpackets)
351 goto dropfrag;
352 atomic_add_int(&V_frag6_nfragpackets, 1);
353 q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
354 M_NOWAIT);
355 if (q6 == NULL)
356 goto dropfrag;
357 bzero(q6, sizeof(*q6));
358 #ifdef MAC
359 if (mac_ip6q_init(q6, M_NOWAIT) != 0) {
360 free(q6, M_FTABLE);
361 goto dropfrag;
362 }
363 mac_ip6q_create(m, q6);
364 #endif
365 frag6_insque_head(q6, head, hash);
366
367 /* ip6q_nxt will be filled afterwards, from 1st fragment */
368 q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
369 #ifdef notyet
370 q6->ip6q_nxtp = (u_char *)nxtp;
371 #endif
372 q6->ip6q_ident = ip6f->ip6f_ident;
373 q6->ip6q_ttl = IPV6_FRAGTTL;
374 q6->ip6q_src = ip6->ip6_src;
375 q6->ip6q_dst = ip6->ip6_dst;
376 q6->ip6q_ecn =
377 (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
378 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
379
380 q6->ip6q_nfrag = 0;
381 }
382
383 /*
384 * If it's the 1st fragment, record the length of the
385 * unfragmentable part and the next header of the fragment header.
386 */
387 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
388 if (fragoff == 0) {
389 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
390 sizeof(struct ip6_frag);
391 q6->ip6q_nxt = ip6f->ip6f_nxt;
392 }
393
394 /*
395 * Check that the reassembled packet would not exceed 65535 bytes
396 * in size.
397 * If it would exceed, discard the fragment and return an ICMP error.
398 */
399 if (q6->ip6q_unfrglen >= 0) {
400 /* The 1st fragment has already arrived. */
401 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
402 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
403 offset - sizeof(struct ip6_frag) +
404 offsetof(struct ip6_frag, ip6f_offlg));
405 IP6Q_UNLOCK(hash);
406 return (IPPROTO_DONE);
407 }
408 } else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
409 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
410 offset - sizeof(struct ip6_frag) +
411 offsetof(struct ip6_frag, ip6f_offlg));
412 IP6Q_UNLOCK(hash);
413 return (IPPROTO_DONE);
414 }
415 /*
416 * If it's the first fragment, do the above check for each
417 * fragment already stored in the reassembly queue.
418 */
419 if (fragoff == 0) {
420 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
421 af6 = af6dwn) {
422 af6dwn = af6->ip6af_down;
423
424 if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
425 IPV6_MAXPACKET) {
426 struct mbuf *merr = IP6_REASS_MBUF(af6);
427 struct ip6_hdr *ip6err;
428 int erroff = af6->ip6af_offset;
429
430 /* dequeue the fragment. */
431 frag6_deq(af6, hash);
432 free(af6, M_FTABLE);
433
434 /* adjust pointer. */
435 ip6err = mtod(merr, struct ip6_hdr *);
436
437 /*
438 * Restore source and destination addresses
439 * in the erroneous IPv6 header.
440 */
441 ip6err->ip6_src = q6->ip6q_src;
442 ip6err->ip6_dst = q6->ip6q_dst;
443
444 icmp6_error(merr, ICMP6_PARAM_PROB,
445 ICMP6_PARAMPROB_HEADER,
446 erroff - sizeof(struct ip6_frag) +
447 offsetof(struct ip6_frag, ip6f_offlg));
448 }
449 }
450 }
451
452 ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE,
453 M_NOWAIT);
454 if (ip6af == NULL)
455 goto dropfrag;
456 bzero(ip6af, sizeof(*ip6af));
457 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
458 ip6af->ip6af_off = fragoff;
459 ip6af->ip6af_frglen = frgpartlen;
460 ip6af->ip6af_offset = offset;
461 IP6_REASS_MBUF(ip6af) = m;
462
463 if (first_frag) {
464 af6 = (struct ip6asfrag *)q6;
465 goto insert;
466 }
467
468 /*
469 * Handle ECN by comparing this segment with the first one;
470 * if CE is set, do not lose CE.
471 * drop if CE and not-ECT are mixed for the same packet.
472 */
473 ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
474 ecn0 = q6->ip6q_ecn;
475 if (ecn == IPTOS_ECN_CE) {
476 if (ecn0 == IPTOS_ECN_NOTECT) {
477 free(ip6af, M_FTABLE);
478 goto dropfrag;
479 }
480 if (ecn0 != IPTOS_ECN_CE)
481 q6->ip6q_ecn = IPTOS_ECN_CE;
482 }
483 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
484 free(ip6af, M_FTABLE);
485 goto dropfrag;
486 }
487
488 /*
489 * Find a segment which begins after this one does.
490 */
491 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
492 af6 = af6->ip6af_down)
493 if (af6->ip6af_off > ip6af->ip6af_off)
494 break;
495
496 #if 0
497 /*
498 * If there is a preceding segment, it may provide some of
499 * our data already. If so, drop the data from the incoming
500 * segment. If it provides all of our data, drop us.
501 */
502 if (af6->ip6af_up != (struct ip6asfrag *)q6) {
503 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
504 - ip6af->ip6af_off;
505 if (i > 0) {
506 if (i >= ip6af->ip6af_frglen)
507 goto dropfrag;
508 m_adj(IP6_REASS_MBUF(ip6af), i);
509 ip6af->ip6af_off += i;
510 ip6af->ip6af_frglen -= i;
511 }
512 }
513
514 /*
515 * While we overlap succeeding segments trim them or,
516 * if they are completely covered, dequeue them.
517 */
518 while (af6 != (struct ip6asfrag *)q6 &&
519 ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
520 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
521 if (i < af6->ip6af_frglen) {
522 af6->ip6af_frglen -= i;
523 af6->ip6af_off += i;
524 m_adj(IP6_REASS_MBUF(af6), i);
525 break;
526 }
527 af6 = af6->ip6af_down;
528 m_freem(IP6_REASS_MBUF(af6->ip6af_up));
529 frag6_deq(af6->ip6af_up, hash);
530 }
531 #else
532 /*
533 * If the incoming framgent overlaps some existing fragments in
534 * the reassembly queue, drop it, since it is dangerous to override
535 * existing fragments from a security point of view.
536 * We don't know which fragment is the bad guy - here we trust
537 * fragment that came in earlier, with no real reason.
538 *
539 * Note: due to changes after disabling this part, mbuf passed to
540 * m_adj() below now does not meet the requirement.
541 */
542 if (af6->ip6af_up != (struct ip6asfrag *)q6) {
543 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
544 - ip6af->ip6af_off;
545 if (i > 0) {
546 #if 0 /* suppress the noisy log */
547 log(LOG_ERR, "%d bytes of a fragment from %s "
548 "overlaps the previous fragment\n",
549 i, ip6_sprintf(ip6buf, &q6->ip6q_src));
550 #endif
551 free(ip6af, M_FTABLE);
552 goto dropfrag;
553 }
554 }
555 if (af6 != (struct ip6asfrag *)q6) {
556 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
557 if (i > 0) {
558 #if 0 /* suppress the noisy log */
559 log(LOG_ERR, "%d bytes of a fragment from %s "
560 "overlaps the succeeding fragment",
561 i, ip6_sprintf(ip6buf, &q6->ip6q_src));
562 #endif
563 free(ip6af, M_FTABLE);
564 goto dropfrag;
565 }
566 }
567 #endif
568
569 insert:
570 #ifdef MAC
571 if (!first_frag)
572 mac_ip6q_update(m, q6);
573 #endif
574
575 /*
576 * Stick new segment in its place;
577 * check for complete reassembly.
578 * If not complete, check fragment limit.
579 * Move to front of packet queue, as we are
580 * the most recently active fragmented packet.
581 */
582 frag6_enq(ip6af, af6->ip6af_up, hash);
583 atomic_add_int(&frag6_nfrags, 1);
584 q6->ip6q_nfrag++;
585 #if 0 /* xxx */
586 if (q6 != head->ip6q_next) {
587 frag6_remque(q6, hash);
588 frag6_insque_head(q6, head, hash);
589 }
590 #endif
591 next = 0;
592 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
593 af6 = af6->ip6af_down) {
594 if (af6->ip6af_off != next) {
595 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
596 IP6STAT_INC(ip6s_fragdropped);
597 frag6_freef(q6, hash);
598 }
599 IP6Q_UNLOCK(hash);
600 return IPPROTO_DONE;
601 }
602 next += af6->ip6af_frglen;
603 }
604 if (af6->ip6af_up->ip6af_mff) {
605 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
606 IP6STAT_INC(ip6s_fragdropped);
607 frag6_freef(q6, hash);
608 }
609 IP6Q_UNLOCK(hash);
610 return IPPROTO_DONE;
611 }
612
613 /*
614 * Reassembly is complete; concatenate fragments.
615 */
616 ip6af = q6->ip6q_down;
617 t = m = IP6_REASS_MBUF(ip6af);
618 af6 = ip6af->ip6af_down;
619 frag6_deq(ip6af, hash);
620 while (af6 != (struct ip6asfrag *)q6) {
621 m->m_pkthdr.csum_flags &=
622 IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
623 m->m_pkthdr.csum_data +=
624 IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
625
626 af6dwn = af6->ip6af_down;
627 frag6_deq(af6, hash);
628 while (t->m_next)
629 t = t->m_next;
630 m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
631 m_demote_pkthdr(IP6_REASS_MBUF(af6));
632 m_cat(t, IP6_REASS_MBUF(af6));
633 free(af6, M_FTABLE);
634 af6 = af6dwn;
635 }
636
637 while (m->m_pkthdr.csum_data & 0xffff0000)
638 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
639 (m->m_pkthdr.csum_data >> 16);
640
641 /* adjust offset to point where the original next header starts */
642 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
643 free(ip6af, M_FTABLE);
644 ip6 = mtod(m, struct ip6_hdr *);
645 ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
646 if (q6->ip6q_ecn == IPTOS_ECN_CE)
647 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
648 nxt = q6->ip6q_nxt;
649 #ifdef notyet
650 *q6->ip6q_nxtp = (u_char)(nxt & 0xff);
651 #endif
652
653 if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
654 frag6_remque(q6, hash);
655 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
656 #ifdef MAC
657 mac_ip6q_destroy(q6);
658 #endif
659 free(q6, M_FTABLE);
660 atomic_subtract_int(&V_frag6_nfragpackets, 1);
661
662 goto dropfrag;
663 }
664
665 /*
666 * Store NXT to the original.
667 */
668 m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
669 (caddr_t)&nxt);
670
671 frag6_remque(q6, hash);
672 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
673 #ifdef MAC
674 mac_ip6q_reassemble(q6, m);
675 mac_ip6q_destroy(q6);
676 #endif
677 free(q6, M_FTABLE);
678 atomic_subtract_int(&V_frag6_nfragpackets, 1);
679
680 if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
681 int plen = 0;
682 for (t = m; t; t = t->m_next)
683 plen += t->m_len;
684 m->m_pkthdr.len = plen;
685 }
686
687 #ifdef RSS
688 mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc),
689 M_NOWAIT);
690 if (mtag == NULL)
691 goto dropfrag;
692
693 ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
694 ip6dc->ip6dc_nxt = nxt;
695 ip6dc->ip6dc_off = offset;
696
697 m_tag_prepend(m, mtag);
698 #endif
699
700 IP6Q_UNLOCK(hash);
701 IP6STAT_INC(ip6s_reassembled);
702 in6_ifstat_inc(dstifp, ifs6_reass_ok);
703
704 #ifdef RSS
705 /*
706 * Queue/dispatch for reprocessing.
707 */
708 netisr_dispatch(NETISR_IPV6_DIRECT, m);
709 return IPPROTO_DONE;
710 #endif
711
712 /*
713 * Tell launch routine the next header
714 */
715
716 *mp = m;
717 *offp = offset;
718
719 return nxt;
720
721 dropfrag:
722 IP6Q_UNLOCK(hash);
723 in6_ifstat_inc(dstifp, ifs6_reass_fail);
724 IP6STAT_INC(ip6s_fragdropped);
725 m_freem(m);
726 return IPPROTO_DONE;
727 }
728
729 /*
730 * Free a fragment reassembly header and all
731 * associated datagrams.
732 */
733 static void
734 frag6_freef(struct ip6q *q6, uint32_t bucket)
735 {
736 struct ip6asfrag *af6, *down6;
737
738 IP6Q_LOCK_ASSERT(bucket);
739
740 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
741 af6 = down6) {
742 struct mbuf *m = IP6_REASS_MBUF(af6);
743
744 down6 = af6->ip6af_down;
745 frag6_deq(af6, bucket);
746
747 /*
748 * Return ICMP time exceeded error for the 1st fragment.
749 * Just free other fragments.
750 */
751 if (af6->ip6af_off == 0) {
752 struct ip6_hdr *ip6;
753
754 /* adjust pointer */
755 ip6 = mtod(m, struct ip6_hdr *);
756
757 /* restore source and destination addresses */
758 ip6->ip6_src = q6->ip6q_src;
759 ip6->ip6_dst = q6->ip6q_dst;
760
761 icmp6_error(m, ICMP6_TIME_EXCEEDED,
762 ICMP6_TIME_EXCEED_REASSEMBLY, 0);
763 } else
764 m_freem(m);
765 free(af6, M_FTABLE);
766 }
767 frag6_remque(q6, bucket);
768 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
769 #ifdef MAC
770 mac_ip6q_destroy(q6);
771 #endif
772 free(q6, M_FTABLE);
773 atomic_subtract_int(&V_frag6_nfragpackets, 1);
774 }
775
776 /*
777 * Put an ip fragment on a reassembly chain.
778 * Like insque, but pointers in middle of structure.
779 */
780 static void
781 frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6,
782 uint32_t bucket __unused)
783 {
784
785 IP6Q_LOCK_ASSERT(bucket);
786
787 af6->ip6af_up = up6;
788 af6->ip6af_down = up6->ip6af_down;
789 up6->ip6af_down->ip6af_up = af6;
790 up6->ip6af_down = af6;
791 }
792
793 /*
794 * To frag6_enq as remque is to insque.
795 */
796 static void
797 frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused)
798 {
799
800 IP6Q_LOCK_ASSERT(bucket);
801
802 af6->ip6af_up->ip6af_down = af6->ip6af_down;
803 af6->ip6af_down->ip6af_up = af6->ip6af_up;
804 }
805
806 static void
807 frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket)
808 {
809
810 IP6Q_LOCK_ASSERT(bucket);
811 KASSERT(IP6Q_HEAD(bucket) == old,
812 ("%s: attempt to insert at head of wrong bucket"
813 " (bucket=%u, old=%p)", __func__, bucket, old));
814
815 new->ip6q_prev = old;
816 new->ip6q_next = old->ip6q_next;
817 old->ip6q_next->ip6q_prev= new;
818 old->ip6q_next = new;
819 V_ip6q[bucket].count++;
820 }
821
822 static void
823 frag6_remque(struct ip6q *p6, uint32_t bucket)
824 {
825
826 IP6Q_LOCK_ASSERT(bucket);
827
828 p6->ip6q_prev->ip6q_next = p6->ip6q_next;
829 p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
830 V_ip6q[bucket].count--;
831 }
832
833 /*
834 * IPv6 reassembling timer processing;
835 * if a timer expires on a reassembly
836 * queue, discard it.
837 */
838 void
839 frag6_slowtimo(void)
840 {
841 VNET_ITERATOR_DECL(vnet_iter);
842 struct ip6q *head, *q6;
843 int i;
844
845 VNET_LIST_RLOCK_NOSLEEP();
846 VNET_FOREACH(vnet_iter) {
847 CURVNET_SET(vnet_iter);
848 for (i = 0; i < IP6REASS_NHASH; i++) {
849 IP6Q_LOCK(i);
850 head = IP6Q_HEAD(i);
851 q6 = head->ip6q_next;
852 if (q6 == NULL) {
853 /*
854 * XXXJTL: This should never happen. This
855 * should turn into an assertion.
856 */
857 IP6Q_UNLOCK(i);
858 continue;
859 }
860 while (q6 != head) {
861 --q6->ip6q_ttl;
862 q6 = q6->ip6q_next;
863 if (q6->ip6q_prev->ip6q_ttl == 0) {
864 IP6STAT_INC(ip6s_fragtimeout);
865 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
866 frag6_freef(q6->ip6q_prev, i);
867 }
868 }
869 /*
870 * If we are over the maximum number of fragments
871 * (due to the limit being lowered), drain off
872 * enough to get down to the new limit.
873 * Note that we drain all reassembly queues if
874 * maxfragpackets is 0 (fragmentation is disabled),
875 * and don't enforce a limit when maxfragpackets
876 * is negative.
877 */
878 while ((V_ip6_maxfragpackets == 0 ||
879 (V_ip6_maxfragpackets > 0 &&
880 V_ip6q[i].count > V_ip6_maxfragbucketsize)) &&
881 head->ip6q_prev != head) {
882 IP6STAT_INC(ip6s_fragoverflow);
883 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
884 frag6_freef(head->ip6q_prev, i);
885 }
886 IP6Q_UNLOCK(i);
887 }
888 /*
889 * If we are still over the maximum number of fragmented
890 * packets, drain off enough to get down to the new limit.
891 */
892 i = 0;
893 while (V_ip6_maxfragpackets >= 0 &&
894 atomic_load_int(&V_frag6_nfragpackets) >
895 (u_int)V_ip6_maxfragpackets) {
896 IP6Q_LOCK(i);
897 head = IP6Q_HEAD(i);
898 if (head->ip6q_prev != head) {
899 IP6STAT_INC(ip6s_fragoverflow);
900 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
901 frag6_freef(head->ip6q_prev, i);
902 }
903 IP6Q_UNLOCK(i);
904 i = (i + 1) % IP6REASS_NHASH;
905 }
906 CURVNET_RESTORE();
907 }
908 VNET_LIST_RUNLOCK_NOSLEEP();
909 }
910
911 /*
912 * Drain off all datagram fragments.
913 */
914 void
915 frag6_drain(void)
916 {
917 VNET_ITERATOR_DECL(vnet_iter);
918 struct ip6q *head;
919 int i;
920
921 VNET_LIST_RLOCK_NOSLEEP();
922 VNET_FOREACH(vnet_iter) {
923 CURVNET_SET(vnet_iter);
924 for (i = 0; i < IP6REASS_NHASH; i++) {
925 if (IP6Q_TRYLOCK(i) == 0)
926 continue;
927 head = IP6Q_HEAD(i);
928 while (head->ip6q_next != head) {
929 IP6STAT_INC(ip6s_fragdropped);
930 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
931 frag6_freef(head->ip6q_next, i);
932 }
933 IP6Q_UNLOCK(i);
934 }
935 CURVNET_RESTORE();
936 }
937 VNET_LIST_RUNLOCK_NOSLEEP();
938 }
939
940 int
941 ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
942 {
943 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
944 struct mbuf *t;
945
946 /* Delete frag6 header. */
947 if (m->m_len >= offset + sizeof(struct ip6_frag)) {
948 /* This is the only possible case with !PULLDOWN_TEST. */
949 bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
950 offset);
951 m->m_data += sizeof(struct ip6_frag);
952 m->m_len -= sizeof(struct ip6_frag);
953 } else {
954 /* This comes with no copy if the boundary is on cluster. */
955 if ((t = m_split(m, offset, wait)) == NULL)
956 return (ENOMEM);
957 m_adj(t, sizeof(struct ip6_frag));
958 m_cat(m, t);
959 }
960
961 m->m_flags |= M_FRAGMENTED;
962 return (0);
963 }
Cache object: 037dc487880a8f43be55e2471b95cd6d
|