FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_frag.c
1 /*
2 * Copyright (C) 1993-1997 by Darren Reed.
3 *
4 * Redistribution and use in source and binary forms are permitted
5 * provided that this notice is preserved and due credit is given
6 * to the original author and the contributors.
7 */
8 #if !defined(lint)
9 static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-1995 Darren Reed";
10 static const char rcsid[] = "@(#)$FreeBSD$";
11 #endif
12
13 #if !defined(_KERNEL) && defined(KERNEL)
14 #define _KERNEL
15 #endif
16 #define __FreeBSD_version 300000 /* it's a hack, but close enough */
17
18 #if !defined(_KERNEL) && !defined(KERNEL)
19 # include <string.h>
20 # include <stdlib.h>
21 #endif
22 #include <sys/errno.h>
23 #include <sys/types.h>
24 #include <sys/param.h>
25 #include <sys/time.h>
26 #include <sys/file.h>
27 #if defined(KERNEL) && (__FreeBSD_version >= 220000)
28 #include <sys/filio.h>
29 #include <sys/fcntl.h>
30 #include <sys/malloc.h>
31 #else
32 #include <sys/ioctl.h>
33 #endif
34 #include <sys/uio.h>
35 #ifndef linux
36 #include <sys/protosw.h>
37 #endif
38 #include <sys/socket.h>
39 #if defined(_KERNEL) && !defined(linux)
40 # include <sys/systm.h>
41 #endif
42 #if !defined(__SVR4) && !defined(__svr4__)
43 # ifndef linux
44 # include <sys/mbuf.h>
45 # endif
46 #else
47 # include <sys/byteorder.h>
48 # include <sys/dditypes.h>
49 # include <sys/stream.h>
50 # include <sys/kmem.h>
51 #endif
52 #if defined(KERNEL) && (__FreeBSD_version >= 300000)
53 #include <sys/malloc.h>
54 #endif
55
56 #include <net/if.h>
57 #ifdef sun
58 #include <net/af.h>
59 #endif
60 #include <net/route.h>
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/ip.h>
64 #ifndef linux
65 #include <netinet/ip_var.h>
66 #endif
67 #include <netinet/tcp.h>
68 #include <netinet/udp.h>
69 #include <netinet/ip_icmp.h>
70 #include "netinet/ip_compat.h"
71 #include <netinet/tcpip.h>
72 #include "netinet/ip_fil.h"
73 #include "netinet/ip_proxy.h"
74 #include "netinet/ip_nat.h"
75 #include "netinet/ip_frag.h"
76 #include "netinet/ip_state.h"
77 #include "netinet/ip_auth.h"
78
79 static ipfr_t *ipfr_heads[IPFT_SIZE];
80 static ipfr_t *ipfr_nattab[IPFT_SIZE];
81 static ipfrstat_t ipfr_stats;
82 static int ipfr_inuse = 0;
83 int fr_ipfrttl = 120; /* 60 seconds */
84 #ifdef _KERNEL
85 extern int ipfr_timer_id;
86 #endif
87 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
88 extern kmutex_t ipf_frag;
89 extern kmutex_t ipf_natfrag;
90 extern kmutex_t ipf_nat;
91 #endif
92
93
94 static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, int, ipfr_t **));
95 static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **));
96
97
98 ipfrstat_t *ipfr_fragstats()
99 {
100 ipfr_stats.ifs_table = ipfr_heads;
101 ipfr_stats.ifs_nattab = ipfr_nattab;
102 ipfr_stats.ifs_inuse = ipfr_inuse;
103 return &ipfr_stats;
104 }
105
106
107 /*
108 * add a new entry to the fragment cache, registering it as having come
109 * through this box, with the result of the filter operation.
110 */
111 static ipfr_t *ipfr_new(ip, fin, pass, table)
112 ip_t *ip;
113 fr_info_t *fin;
114 int pass;
115 ipfr_t *table[];
116 {
117 ipfr_t **fp, *fr, frag;
118 u_int idx;
119
120 frag.ipfr_p = ip->ip_p;
121 idx = ip->ip_p;
122 frag.ipfr_id = ip->ip_id;
123 idx += ip->ip_id;
124 frag.ipfr_tos = ip->ip_tos;
125 frag.ipfr_src.s_addr = ip->ip_src.s_addr;
126 idx += ip->ip_src.s_addr;
127 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
128 idx += ip->ip_dst.s_addr;
129 idx *= 127;
130 idx %= IPFT_SIZE;
131
132 /*
133 * first, make sure it isn't already there...
134 */
135 for (fp = &table[idx]; (fr = *fp); fp = &fr->ipfr_next)
136 if (!bcmp((char *)&frag.ipfr_src, (char *)&fr->ipfr_src,
137 IPFR_CMPSZ)) {
138 ipfr_stats.ifs_exists++;
139 return NULL;
140 }
141
142 /*
143 * allocate some memory, if possible, if not, just record that we
144 * failed to do so.
145 */
146 KMALLOC(fr, ipfr_t *, sizeof(*fr));
147 if (fr == NULL) {
148 ipfr_stats.ifs_nomem++;
149 return NULL;
150 }
151
152 /*
153 * Instert the fragment into the fragment table, copy the struct used
154 * in the search using bcopy rather than reassign each field.
155 * Set the ttl to the default and mask out logging from "pass"
156 */
157 if ((fr->ipfr_next = table[idx]))
158 table[idx]->ipfr_prev = fr;
159 fr->ipfr_prev = NULL;
160 fr->ipfr_data = NULL;
161 table[idx] = fr;
162 bcopy((char *)&frag.ipfr_src, (char *)&fr->ipfr_src, IPFR_CMPSZ);
163 fr->ipfr_ttl = fr_ipfrttl;
164 fr->ipfr_pass = pass & ~(FR_LOGFIRST|FR_LOG);
165 /*
166 * Compute the offset of the expected start of the next packet.
167 */
168 fr->ipfr_off = (ip->ip_off & 0x1fff) + (fin->fin_dlen >> 3);
169 ipfr_stats.ifs_new++;
170 ipfr_inuse++;
171 return fr;
172 }
173
174
175 int ipfr_newfrag(ip, fin, pass)
176 ip_t *ip;
177 fr_info_t *fin;
178 int pass;
179 {
180 ipfr_t *ipf;
181
182 MUTEX_ENTER(&ipf_frag);
183 ipf = ipfr_new(ip, fin, pass, ipfr_heads);
184 MUTEX_EXIT(&ipf_frag);
185 return ipf ? 0 : -1;
186 }
187
188
189 int ipfr_nat_newfrag(ip, fin, pass, nat)
190 ip_t *ip;
191 fr_info_t *fin;
192 int pass;
193 nat_t *nat;
194 {
195 ipfr_t *ipf;
196
197 MUTEX_ENTER(&ipf_natfrag);
198 if ((ipf = ipfr_new(ip, fin, pass, ipfr_nattab))) {
199 ipf->ipfr_data = nat;
200 nat->nat_data = ipf;
201 }
202 MUTEX_EXIT(&ipf_natfrag);
203 return ipf ? 0 : -1;
204 }
205
206
207 /*
208 * check the fragment cache to see if there is already a record of this packet
209 * with its filter result known.
210 */
211 static ipfr_t *ipfr_lookup(ip, fin, table)
212 ip_t *ip;
213 fr_info_t *fin;
214 ipfr_t *table[];
215 {
216 ipfr_t *f, frag;
217 u_int idx;
218
219 /*
220 * For fragments, we record protocol, packet id, TOS and both IP#'s
221 * (these should all be the same for all fragments of a packet).
222 *
223 * build up a hash value to index the table with.
224 */
225 frag.ipfr_p = ip->ip_p;
226 idx = ip->ip_p;
227 frag.ipfr_id = ip->ip_id;
228 idx += ip->ip_id;
229 frag.ipfr_tos = ip->ip_tos;
230 frag.ipfr_src.s_addr = ip->ip_src.s_addr;
231 idx += ip->ip_src.s_addr;
232 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
233 idx += ip->ip_dst.s_addr;
234 idx *= 127;
235 idx %= IPFT_SIZE;
236
237 /*
238 * check the table, careful to only compare the right amount of data
239 */
240 for (f = table[idx]; f; f = f->ipfr_next)
241 if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src,
242 IPFR_CMPSZ)) {
243 u_short atoff, off;
244
245 if (f != table[idx]) {
246 /*
247 * move fragment info. to the top of the list
248 * to speed up searches.
249 */
250 if ((f->ipfr_prev->ipfr_next = f->ipfr_next))
251 f->ipfr_next->ipfr_prev = f->ipfr_prev;
252 f->ipfr_next = table[idx];
253 table[idx]->ipfr_prev = f;
254 f->ipfr_prev = NULL;
255 table[idx] = f;
256 }
257 off = ip->ip_off;
258 atoff = off + (fin->fin_dlen >> 3);
259 /*
260 * If we've follwed the fragments, and this is the
261 * last (in order), shrink expiration time.
262 */
263 if ((off & 0x1fff) == f->ipfr_off) {
264 if (!(off & IP_MF))
265 f->ipfr_ttl = 1;
266 else
267 f->ipfr_off = atoff;
268 }
269 ipfr_stats.ifs_hits++;
270 return f;
271 }
272 return NULL;
273 }
274
275
276 /*
277 * functional interface for NAT lookups of the NAT fragment cache
278 */
279 nat_t *ipfr_nat_knownfrag(ip, fin)
280 ip_t *ip;
281 fr_info_t *fin;
282 {
283 nat_t *nat;
284 ipfr_t *ipf;
285
286 MUTEX_ENTER(&ipf_natfrag);
287 ipf = ipfr_lookup(ip, fin, ipfr_nattab);
288 if (ipf) {
289 nat = ipf->ipfr_data;
290 /*
291 * This is the last fragment for this packet.
292 */
293 if (ipf->ipfr_ttl == 1) {
294 nat->nat_data = NULL;
295 ipf->ipfr_data = NULL;
296 }
297 } else
298 nat = NULL;
299 MUTEX_EXIT(&ipf_natfrag);
300 return nat;
301 }
302
303
304 /*
305 * functional interface for normal lookups of the fragment cache
306 */
307 int ipfr_knownfrag(ip, fin)
308 ip_t *ip;
309 fr_info_t *fin;
310 {
311 int ret;
312 ipfr_t *ipf;
313
314 MUTEX_ENTER(&ipf_frag);
315 ipf = ipfr_lookup(ip, fin, ipfr_heads);
316 ret = ipf ? ipf->ipfr_pass : 0;
317 MUTEX_EXIT(&ipf_frag);
318 return ret;
319 }
320
321
322 /*
323 * forget any references to this external object.
324 */
325 void ipfr_forget(nat)
326 void *nat;
327 {
328 ipfr_t *fr;
329 int idx;
330
331 MUTEX_ENTER(&ipf_natfrag);
332 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
333 for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next)
334 if (fr->ipfr_data == nat)
335 fr->ipfr_data = NULL;
336
337 MUTEX_EXIT(&ipf_natfrag);
338 }
339
340
341 /*
342 * Free memory in use by fragment state info. kept.
343 */
344 void ipfr_unload()
345 {
346 ipfr_t **fp, *fr;
347 nat_t *nat;
348 int idx;
349
350 MUTEX_ENTER(&ipf_frag);
351 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
352 for (fp = &ipfr_heads[idx]; (fr = *fp); ) {
353 *fp = fr->ipfr_next;
354 KFREE(fr);
355 }
356 MUTEX_EXIT(&ipf_frag);
357
358 MUTEX_ENTER(&ipf_nat);
359 MUTEX_ENTER(&ipf_natfrag);
360 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
361 for (fp = &ipfr_nattab[idx]; (fr = *fp); ) {
362 *fp = fr->ipfr_next;
363 if ((nat = (nat_t *)fr->ipfr_data)) {
364 if (nat->nat_data == fr)
365 nat->nat_data = NULL;
366 }
367 KFREE(fr);
368 }
369 MUTEX_EXIT(&ipf_natfrag);
370 MUTEX_EXIT(&ipf_nat);
371 }
372
373
374 #ifdef _KERNEL
375 /*
376 * Slowly expire held state for fragments. Timeouts are set * in expectation
377 * of this being called twice per second.
378 */
379 # if (BSD >= 199306) || SOLARIS || defined(__sgi)
380 void ipfr_slowtimer()
381 # else
382 int ipfr_slowtimer()
383 # endif
384 {
385 ipfr_t **fp, *fr;
386 nat_t *nat;
387 int s, idx;
388
389 #ifdef __sgi
390 ipfilter_sgi_intfsync();
391 #endif
392
393 SPL_NET(s);
394 MUTEX_ENTER(&ipf_frag);
395
396 /*
397 * Go through the entire table, looking for entries to expire,
398 * decreasing the ttl by one for each entry. If it reaches 0,
399 * remove it from the chain and free it.
400 */
401 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
402 for (fp = &ipfr_heads[idx]; (fr = *fp); ) {
403 --fr->ipfr_ttl;
404 if (fr->ipfr_ttl == 0) {
405 if (fr->ipfr_prev)
406 fr->ipfr_prev->ipfr_next =
407 fr->ipfr_next;
408 if (fr->ipfr_next)
409 fr->ipfr_next->ipfr_prev =
410 fr->ipfr_prev;
411 *fp = fr->ipfr_next;
412 ipfr_stats.ifs_expire++;
413 ipfr_inuse--;
414 KFREE(fr);
415 } else
416 fp = &fr->ipfr_next;
417 }
418 MUTEX_EXIT(&ipf_frag);
419
420 /*
421 * Same again for the NAT table, except that if the structure also
422 * still points to a NAT structure, and the NAT structure points back
423 * at the one to be free'd, NULL the reference from the NAT struct.
424 * NOTE: We need to grab both mutex's early, and in this order so as
425 * to prevent a deadlock if both try to expire at the same time.
426 */
427 MUTEX_ENTER(&ipf_nat);
428 MUTEX_ENTER(&ipf_natfrag);
429 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
430 for (fp = &ipfr_nattab[idx]; (fr = *fp); ) {
431 --fr->ipfr_ttl;
432 if (fr->ipfr_ttl == 0) {
433 if (fr->ipfr_prev)
434 fr->ipfr_prev->ipfr_next =
435 fr->ipfr_next;
436 if (fr->ipfr_next)
437 fr->ipfr_next->ipfr_prev =
438 fr->ipfr_prev;
439 *fp = fr->ipfr_next;
440 ipfr_stats.ifs_expire++;
441 ipfr_inuse--;
442 if ((nat = (nat_t *)fr->ipfr_data)) {
443 if (nat->nat_data == fr)
444 nat->nat_data = NULL;
445 }
446 KFREE(fr);
447 } else
448 fp = &fr->ipfr_next;
449 }
450 MUTEX_EXIT(&ipf_natfrag);
451 MUTEX_EXIT(&ipf_nat);
452 SPL_X(s);
453 fr_timeoutstate();
454 ip_natexpire();
455 fr_authexpire();
456 # if SOLARIS
457 ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000));
458 # else
459 # ifndef linux
460 ip_slowtimo();
461 # endif
462 # if (BSD < 199306) && !defined(__sgi)
463 return 0;
464 # endif
465 # endif
466 }
467 #endif /* defined(_KERNEL) */
Cache object: c87c633e0d45bd5d497d2641dd700e90
|