1 /* $NetBSD: ip_state.c,v 1.44.2.4 2005/03/16 12:01:12 tron Exp $ */
2
3 /*
4 * Copyright (C) 1995-2003 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8 #if defined(KERNEL) || defined(_KERNEL)
9 # undef KERNEL
10 # undef _KERNEL
11 # define KERNEL 1
12 # define _KERNEL 1
13 #endif
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
17 #include <sys/file.h>
18 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
19 defined(_KERNEL)
20 # include "opt_ipfilter_log.h"
21 #endif
22 #if defined(_KERNEL) && defined(__FreeBSD_version) && \
23 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE)
24 #include "opt_inet6.h"
25 #endif
26 #if !defined(_KERNEL) && !defined(__KERNEL__)
27 # include <stdio.h>
28 # include <stdlib.h>
29 # include <string.h>
30 # define _KERNEL
31 # ifdef __OpenBSD__
32 struct file;
33 # endif
34 # include <sys/uio.h>
35 # undef _KERNEL
36 #endif
37 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
38 # include <sys/filio.h>
39 # include <sys/fcntl.h>
40 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
41 # include "opt_ipfilter.h"
42 # endif
43 #else
44 # include <sys/ioctl.h>
45 #endif
46 #include <sys/time.h>
47 #if !defined(linux)
48 # include <sys/protosw.h>
49 #endif
50 #include <sys/socket.h>
51 #if defined(_KERNEL)
52 # include <sys/systm.h>
53 # if !defined(__SVR4) && !defined(__svr4__)
54 # include <sys/mbuf.h>
55 # endif
56 #endif
57 #if defined(__SVR4) || defined(__svr4__)
58 # include <sys/filio.h>
59 # include <sys/byteorder.h>
60 # ifdef _KERNEL
61 # include <sys/dditypes.h>
62 # endif
63 # include <sys/stream.h>
64 # include <sys/kmem.h>
65 #endif
66
67 #include <net/if.h>
68 #ifdef sun
69 # include <net/af.h>
70 #endif
71 #include <net/route.h>
72 #include <netinet/in.h>
73 #include <netinet/in_systm.h>
74 #include <netinet/ip.h>
75 #include <netinet/tcp.h>
76 #if !defined(linux)
77 # include <netinet/ip_var.h>
78 #endif
79 #if !defined(__hpux) && !defined(linux)
80 # include <netinet/tcp_fsm.h>
81 #endif
82 #include <netinet/udp.h>
83 #include <netinet/ip_icmp.h>
84 #include "netinet/ip_compat.h"
85 #include <netinet/tcpip.h>
86 #include "netinet/ip_fil.h"
87 #include "netinet/ip_nat.h"
88 #include "netinet/ip_frag.h"
89 #include "netinet/ip_state.h"
90 #include "netinet/ip_proxy.h"
91 #ifdef IPFILTER_SYNC
92 #include "netinet/ip_sync.h"
93 #endif
94 #ifdef IPFILTER_SCAN
95 #include "netinet/ip_scan.h"
96 #endif
97 #ifdef USE_INET6
98 #include <netinet/icmp6.h>
99 #endif
100 #if (__FreeBSD_version >= 300000)
101 # include <sys/malloc.h>
102 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
103 # include <sys/libkern.h>
104 # include <sys/systm.h>
105 # endif
106 #endif
107 /* END OF INCLUDES */
108
109
110 #if !defined(lint)
111 #if defined(__NetBSD__)
112 #include <sys/cdefs.h>
113 __KERNEL_RCSID(0, "$NetBSD: ip_state.c,v 1.44.2.4 2005/03/16 12:01:12 tron Exp $");
114 #else
115 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed";
116 static const char rcsid[] = "@(#)Id: ip_state.c,v 2.186.2.13 2004/06/13 23:49:34 darrenr Exp";
117 #endif
118 #endif
119
120 static ipstate_t **ips_table = NULL;
121 static u_long *ips_seed = NULL;
122 static int ips_num = 0;
123 static u_long ips_last_force_flush = 0;
124 ips_stat_t ips_stats;
125
126 #ifdef USE_INET6
127 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *));
128 #endif
129 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *,
130 i6addr_t *, tcphdr_t *, u_32_t));
131 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *));
132 static int fr_state_flush __P((int, int));
133 static ips_stat_t *fr_statetstats __P((void));
134 static void fr_delstate __P((ipstate_t *, int));
135 static int fr_state_remove __P((caddr_t));
136 static void fr_ipsmove __P((ipstate_t *, u_int));
137 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *));
138 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *));
139 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *));
140 static void fr_fixinisn __P((fr_info_t *, ipstate_t *));
141 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *));
142 static void fr_checknewisn __P((fr_info_t *, ipstate_t *));
143
144 int fr_stputent __P((caddr_t));
145 int fr_stgetent __P((caddr_t));
146
147 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */
148 #define FIVE_DAYS (5 * ONE_DAY)
149 #define DOUBLE_HASH(x) (((x) + ips_seed[(x) % fr_statesize]) % fr_statesize)
150
151 u_long fr_tcpidletimeout = FIVE_DAYS,
152 fr_tcpclosewait = IPF_TTLVAL(2 * TCP_MSL),
153 fr_tcplastack = IPF_TTLVAL(2 * TCP_MSL),
154 fr_tcptimeout = IPF_TTLVAL(2 * TCP_MSL),
155 fr_tcpclosed = IPF_TTLVAL(60),
156 fr_tcphalfclosed = IPF_TTLVAL(2 * 3600), /* 2 hours */
157 fr_udptimeout = IPF_TTLVAL(120),
158 fr_udpacktimeout = IPF_TTLVAL(12),
159 fr_icmptimeout = IPF_TTLVAL(60),
160 fr_icmpacktimeout = IPF_TTLVAL(6);
161 int fr_statemax = IPSTATE_MAX,
162 fr_statesize = IPSTATE_SIZE;
163 int fr_state_doflush = 0,
164 fr_state_lock = 0,
165 fr_state_maxbucket = 0,
166 fr_state_maxbucket_reset = 1,
167 fr_state_init = 0;
168 ipftq_t ips_tqtqb[IPF_TCP_NSTATES],
169 ips_udptq,
170 ips_udpacktq,
171 ips_icmptq,
172 ips_icmpacktq,
173 *ips_utqe = NULL;
174 #ifdef IPFILTER_LOG
175 int ipstate_logging = 1;
176 #else
177 int ipstate_logging = 0;
178 #endif
179 ipstate_t *ips_list = NULL;
180
181
182 /* ------------------------------------------------------------------------ */
183 /* Function: fr_stateinit */
184 /* Returns: int - 0 == success, -1 == failure */
185 /* Parameters: Nil */
186 /* */
187 /* Initialise all the global variables used within the state code. */
188 /* This action also includes initiailising locks. */
189 /* ------------------------------------------------------------------------ */
190 int fr_stateinit()
191 {
192 int i;
193
194 KMALLOCS(ips_table, ipstate_t **, fr_statesize * sizeof(ipstate_t *));
195 if (ips_table == NULL)
196 return -1;
197 bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *));
198
199 KMALLOCS(ips_seed, u_long *, fr_statesize * sizeof(*ips_seed));
200 if (ips_seed == NULL)
201 return -2;
202 for (i = 0; i < fr_statesize; i++) {
203 /*
204 * XXX - ips_seed[X] should be a random number of sorts.
205 */
206 #if (__FreeBSD_version >= 400000)
207 ips_seed[i] = arc4random();
208 #else
209 ips_seed[i] = ((u_long)ips_seed + i) * fr_statesize;
210 ips_seed[i] ^= 0xa5a55a5a;
211 ips_seed[i] *= (u_long)ips_seed;
212 ips_seed[i] ^= 0x5a5aa5a5;
213 ips_seed[i] *= fr_statemax;
214 #endif
215 }
216
217 /* fill icmp reply type table */
218 for (i = 0; i <= ICMP_MAXTYPE; i++)
219 icmpreplytype4[i] = -1;
220 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY;
221 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY;
222 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY;
223 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY;
224 #ifdef USE_INET6
225 /* fill icmp reply type table */
226 for (i = 0; i <= ICMP6_MAXTYPE; i++)
227 icmpreplytype6[i] = -1;
228 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY;
229 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT;
230 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY;
231 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT;
232 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT;
233 #endif
234
235 KMALLOCS(ips_stats.iss_bucketlen, u_long *,
236 fr_statesize * sizeof(u_long));
237 if (ips_stats.iss_bucketlen == NULL)
238 return -1;
239 bzero((char *)ips_stats.iss_bucketlen, fr_statesize * sizeof(u_long));
240
241 if (fr_state_maxbucket == 0) {
242 for (i = fr_statesize; i > 0; i >>= 1)
243 fr_state_maxbucket++;
244 fr_state_maxbucket *= 2;
245 }
246
247 fr_sttab_init(ips_tqtqb);
248 ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ips_udptq;
249 ips_udptq.ifq_ttl = (u_long)fr_udptimeout;
250 ips_udptq.ifq_head = NULL;
251 ips_udptq.ifq_tail = &ips_udptq.ifq_head;
252 MUTEX_INIT(&ips_udptq.ifq_lock, "ipftq udp tab");
253 ips_udptq.ifq_next = &ips_udpacktq;
254 ips_udpacktq.ifq_ttl = (u_long)fr_udpacktimeout;
255 ips_udpacktq.ifq_head = NULL;
256 ips_udpacktq.ifq_tail = &ips_udpacktq.ifq_head;
257 MUTEX_INIT(&ips_udpacktq.ifq_lock, "ipftq udpack tab");
258 ips_udpacktq.ifq_next = &ips_icmptq;
259 ips_icmptq.ifq_ttl = (u_long)fr_icmptimeout;
260 ips_icmptq.ifq_head = NULL;
261 ips_icmptq.ifq_tail = &ips_icmptq.ifq_head;
262 MUTEX_INIT(&ips_icmptq.ifq_lock, "ipftq icmp tab");
263 ips_icmptq.ifq_next = &ips_icmpacktq;
264 ips_icmpacktq.ifq_ttl = (u_long)fr_icmpacktimeout;
265 ips_icmpacktq.ifq_head = NULL;
266 ips_icmpacktq.ifq_tail = &ips_icmpacktq.ifq_head;
267 MUTEX_INIT(&ips_icmpacktq.ifq_lock, "ipftq icmpack tab");
268 ips_icmpacktq.ifq_next = NULL;
269
270 RWLOCK_INIT(&ipf_state, "ipf IP state rwlock");
271 MUTEX_INIT(&ipf_stinsert, "ipf state insert mutex");
272 fr_state_init = 1;
273
274 ips_last_force_flush = fr_ticks;
275 return 0;
276 }
277
278
279 /* ------------------------------------------------------------------------ */
280 /* Function: fr_stateunload */
281 /* Returns: Nil */
282 /* Parameters: Nil */
283 /* */
284 /* Release and destroy any resources acquired or initialised so that */
285 /* IPFilter can be unloaded or re-initialised. */
286 /* ------------------------------------------------------------------------ */
287 void fr_stateunload()
288 {
289 ipstate_t *is;
290
291 while ((is = ips_list) != NULL)
292 fr_delstate(is, 0);
293 ips_stats.iss_inuse = 0;
294 ips_num = 0;
295
296 if (fr_state_init == 1) {
297 fr_sttab_destroy(ips_tqtqb);
298 MUTEX_DESTROY(&ips_udptq.ifq_lock);
299 MUTEX_DESTROY(&ips_icmptq.ifq_lock);
300 MUTEX_DESTROY(&ips_udpacktq.ifq_lock);
301 MUTEX_DESTROY(&ips_icmpacktq.ifq_lock);
302 }
303
304 if (ips_table != NULL) {
305 KFREES(ips_table, fr_statesize * sizeof(*ips_table));
306 ips_table = NULL;
307 }
308
309 if (ips_seed != NULL) {
310 KFREES(ips_seed, fr_statesize * sizeof(*ips_seed));
311 ips_seed = NULL;
312 }
313
314 if (ips_stats.iss_bucketlen != NULL) {
315 KFREES(ips_stats.iss_bucketlen, fr_statesize * sizeof(u_long));
316 ips_stats.iss_bucketlen = NULL;
317 }
318
319 if (fr_state_maxbucket_reset == 1)
320 fr_state_maxbucket = 0;
321
322 if (fr_state_init == 1) {
323 fr_state_init = 0;
324 RW_DESTROY(&ipf_state);
325 MUTEX_DESTROY(&ipf_stinsert);
326 }
327 }
328
329
330 /* ------------------------------------------------------------------------ */
331 /* Function: fr_statetstats */
332 /* Returns: ips_state_t* - pointer to state stats structure */
333 /* Parameters: Nil */
334 /* */
335 /* Put all the current numbers and pointers into a single struct and return */
336 /* a pointer to it. */
337 /* ------------------------------------------------------------------------ */
338 static ips_stat_t *fr_statetstats()
339 {
340 ips_stats.iss_active = ips_num;
341 ips_stats.iss_statesize = fr_statesize;
342 ips_stats.iss_statemax = fr_statemax;
343 ips_stats.iss_table = ips_table;
344 ips_stats.iss_list = ips_list;
345 ips_stats.iss_ticks = fr_ticks;
346 return &ips_stats;
347 }
348
349 /* ------------------------------------------------------------------------ */
350 /* Function: fr_state_remove */
351 /* Returns: int - 0 == success, != 0 == failure */
352 /* Parameters: data(I) - pointer to state structure to delete from table */
353 /* */
354 /* Search for a state structure that matches the one passed, according to */
355 /* the IP addresses and other protocol specific information. */
356 /* ------------------------------------------------------------------------ */
357 static int fr_state_remove(data)
358 caddr_t data;
359 {
360 ipstate_t *sp, st;
361 int error;
362
363 sp = &st;
364 error = fr_inobj(data, &st, IPFOBJ_IPSTATE);
365 if (error)
366 return EFAULT;
367
368 WRITE_ENTER(&ipf_state);
369 for (sp = ips_list; sp; sp = sp->is_next)
370 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
371 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src,
372 sizeof(st.is_src)) &&
373 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_src,
374 sizeof(st.is_dst)) &&
375 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps,
376 sizeof(st.is_ps))) {
377 fr_delstate(sp, ISL_REMOVE);
378 RWLOCK_EXIT(&ipf_state);
379 return 0;
380 }
381 RWLOCK_EXIT(&ipf_state);
382 return ESRCH;
383 }
384
385
386 /* ------------------------------------------------------------------------ */
387 /* Function: fr_state_ioctl */
388 /* Returns: int - 0 == success, != 0 == failure */
389 /* Parameters: data(I) - pointer to ioctl data */
390 /* cmd(I) - ioctl command integer */
391 /* mode(I) - file mode bits used with open */
392 /* */
393 /* Processes an ioctl call made to operate on the IP Filter state device. */
394 /* ------------------------------------------------------------------------ */
395 int fr_state_ioctl(data, cmd, mode)
396 caddr_t data;
397 ioctlcmd_t cmd;
398 int mode;
399 {
400 int arg, ret, error = 0;
401
402 switch (cmd)
403 {
404 /*
405 * Delete an entry from the state table.
406 */
407 case SIOCDELST :
408 error = fr_state_remove(data);
409 break;
410 /*
411 * Flush the state table
412 */
413 case SIOCIPFFL :
414 BCOPYIN(data, (char *)&arg, sizeof(arg));
415 if (arg == 0 || arg == 1) {
416 WRITE_ENTER(&ipf_state);
417 ret = fr_state_flush(arg, 4);
418 RWLOCK_EXIT(&ipf_state);
419 BCOPYOUT((char *)&ret, data, sizeof(ret));
420 } else
421 error = EINVAL;
422 break;
423 #ifdef USE_INET6
424 case SIOCIPFL6 :
425 BCOPYIN(data, (char *)&arg, sizeof(arg));
426 if (arg == 0 || arg == 1) {
427 WRITE_ENTER(&ipf_state);
428 ret = fr_state_flush(arg, 6);
429 RWLOCK_EXIT(&ipf_state);
430 BCOPYOUT((char *)&ret, data, sizeof(ret));
431 } else
432 error = EINVAL;
433 break;
434 #endif
435 #ifdef IPFILTER_LOG
436 /*
437 * Flush the state log.
438 */
439 case SIOCIPFFB :
440 if (!(mode & FWRITE))
441 error = EPERM;
442 else {
443 int tmp;
444
445 tmp = ipflog_clear(IPL_LOGSTATE);
446 BCOPYOUT((char *)&tmp, data, sizeof(tmp));
447 }
448 break;
449 /*
450 * Turn logging of state information on/off.
451 */
452 case SIOCSETLG :
453 if (!(mode & FWRITE))
454 error = EPERM;
455 else {
456 BCOPYIN((char *)data, (char *)&ipstate_logging,
457 sizeof(ipstate_logging));
458 }
459 break;
460 /*
461 * Return the current state of logging.
462 */
463 case SIOCGETLG :
464 BCOPYOUT((char *)&ipstate_logging, (char *)data,
465 sizeof(ipstate_logging));
466 break;
467 /*
468 * Return the number of bytes currently waiting to be read.
469 */
470 case FIONREAD :
471 arg = iplused[IPL_LOGSTATE]; /* returned in an int */
472 BCOPYOUT((char *)&arg, data, sizeof(arg));
473 break;
474 #endif
475 /*
476 * Get the current state statistics.
477 */
478 case SIOCGETFS :
479 error = fr_outobj(data, fr_statetstats(), IPFOBJ_STATESTAT);
480 break;
481 /*
482 * Lock/Unlock the state table. (Locking prevents any changes, which
483 * means no packets match).
484 */
485 case SIOCSTLCK :
486 fr_lock(data, &fr_state_lock);
487 break;
488 /*
489 * Add an entry to the current state table.
490 */
491 case SIOCSTPUT :
492 if (!fr_state_lock) {
493 error = EACCES;
494 break;
495 }
496 error = fr_stputent(data);
497 break;
498 /*
499 * Get a state table entry.
500 */
501 case SIOCSTGET :
502 if (!fr_state_lock) {
503 error = EACCES;
504 break;
505 }
506 error = fr_stgetent(data);
507 break;
508 default :
509 error = EINVAL;
510 break;
511 }
512 return error;
513 }
514
515
516 /* ------------------------------------------------------------------------ */
517 /* Function: fr_stgetent */
518 /* Returns: int - 0 == success, != 0 == failure */
519 /* Parameters: data(I) - pointer to state structure to retrieve from table */
520 /* */
521 /* Copy out state information from the kernel to a user space process. If */
522 /* there is a filter rule associated with the state entry, copy that out */
523 /* as well. The entry to copy out is taken from the value of "ips_next" in */
524 /* the struct passed in and if not null and not found in the list of current*/
525 /* state entries, the retrieval fails. */
526 /* ------------------------------------------------------------------------ */
527 int fr_stgetent(data)
528 caddr_t data;
529 {
530 ipstate_t *is, *isn;
531 ipstate_save_t ips;
532 int error;
533
534 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE);
535 if (error)
536 return EFAULT;
537
538 isn = ips.ips_next;
539 if (isn == NULL) {
540 isn = ips_list;
541 if (isn == NULL) {
542 if (ips.ips_next == NULL)
543 return ENOENT;
544 return 0;
545 }
546 } else {
547 /*
548 * Make sure the pointer we're copying from exists in the
549 * current list of entries. Security precaution to prevent
550 * copying of random kernel data.
551 */
552 for (is = ips_list; is; is = is->is_next)
553 if (is == isn)
554 break;
555 if (!is)
556 return ESRCH;
557 }
558 ips.ips_next = isn->is_next;
559 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
560 ips.ips_rule = isn->is_rule;
561 if (isn->is_rule != NULL)
562 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
563 sizeof(ips.ips_fr));
564 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE);
565 if (error)
566 return EFAULT;
567 return 0;
568 }
569
570
571 /* ------------------------------------------------------------------------ */
572 /* Function: fr_stputent */
573 /* Returns: int - 0 == success, != 0 == failure */
574 /* Parameters: data(I) - pointer to state information struct */
575 /* */
576 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */
577 /* the state table. If the state info. includes a pointer to a filter rule */
578 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */
579 /* output. */
580 /* ------------------------------------------------------------------------ */
581 int fr_stputent(data)
582 caddr_t data;
583 {
584 ipstate_t *is, *isn;
585 ipstate_save_t ips;
586 int error, out, i;
587 frentry_t *fr;
588 char *name;
589
590 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE);
591 if (error)
592 return EFAULT;
593
594 KMALLOC(isn, ipstate_t *);
595 if (isn == NULL)
596 return ENOMEM;
597
598 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
599 bzero((char *)isn, offsetof(struct ipstate, is_pkts));
600 isn->is_sti.tqe_pnext = NULL;
601 isn->is_sti.tqe_next = NULL;
602 isn->is_sti.tqe_ifq = NULL;
603 isn->is_sti.tqe_parent = isn;
604 isn->is_sync = NULL;
605 fr = ips.ips_rule;
606 if (fr == NULL) {
607 fr_stinsert(isn, 0);
608 return 0;
609 }
610
611 if (isn->is_flags & SI_NEWFR) {
612 KMALLOC(fr, frentry_t *);
613 if (fr == NULL) {
614 KFREE(isn);
615 return ENOMEM;
616 }
617 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
618 out = fr->fr_flags & FR_OUTQUE ? 1 : 0;
619 isn->is_rule = fr;
620 ips.ips_is.is_rule = fr;
621 MUTEX_NUKE(&fr->fr_lock);
622 MUTEX_INIT(&fr->fr_lock, "state filter rule lock");
623
624 /*
625 * Look up all the interface names in the rule.
626 */
627 for (i = 0; i < 4; i++) {
628 name = fr->fr_ifnames[i];
629 if ((name[1] == '\0') &&
630 ((name[0] == '-') || (name[0] == '*'))) {
631 fr->fr_ifas[i] = NULL;
632 } else if (*name != '\0') {
633 name[LIFNAMSIZ - 1] = '\0';
634 fr->fr_ifas[i] = GETIFP(name, fr->fr_v);
635 if (fr->fr_ifas[i] == NULL)
636 fr->fr_ifas[i] = (void *)-1;
637 else {
638 COPYIFNAME(fr->fr_ifas[i],
639 isn->is_ifname[i]);
640 }
641 }
642 isn->is_ifp[out] = fr->fr_ifas[i];
643 }
644
645 fr->fr_dsize = 0;
646 fr->fr_data = NULL;
647
648 fr_resolvdest(&fr->fr_tif, fr->fr_v);
649 fr_resolvdest(&fr->fr_dif, fr->fr_v);
650
651 /*
652 * send a copy back to userland of what we ended up
653 * to allow for verification.
654 */
655 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE);
656 if (error) {
657 KFREE(isn);
658 MUTEX_DESTROY(&fr->fr_lock);
659 KFREE(fr);
660 return EFAULT;
661 }
662 } else {
663 for (is = ips_list; is; is = is->is_next)
664 if (is->is_rule == fr)
665 break;
666 if (!is) {
667 KFREE(isn);
668 return ESRCH;
669 }
670 }
671 fr_stinsert(isn, 0);
672 return 0;
673 }
674
675
676 /* ------------------------------------------------------------------------ */
677 /* Function: fr_stinsert */
678 /* Returns: Nil */
679 /* Parameters: is(I) - pointer to state structure */
680 /* rev(I) - flag indicating forward/reverse direction of packet */
681 /* */
682 /* Inserts a state structure into the hash table (for lookups) and the list */
683 /* of state entries (for enumeration). Resolves all of the interface names */
684 /* to pointers and adjusts running stats for the hash table as appropriate. */
685 /* ------------------------------------------------------------------------ */
686 void fr_stinsert(is, rev)
687 ipstate_t *is;
688 int rev;
689 {
690 u_int hv;
691 char *name;
692 int i;
693
694 MUTEX_INIT(&is->is_lock, "ipf state entry");
695
696 /*
697 * Look up all the interface names in the state entry.
698 */
699 for (i = 0; i < 4; i++) {
700 name = is->is_ifname[i];
701 if ((name[1] == '\0') &&
702 ((name[0] == '-') || (name[0] == '*'))) {
703 is->is_ifp[0] = NULL;
704 } else if (*name != '\0') {
705 is->is_ifp[i] = GETIFP(name, is->is_v);
706 if (is->is_ifp[i] == NULL)
707 is->is_ifp[i] = (void *)-1;
708 }
709 }
710
711 /*
712 * If we could trust is_hv, then the modulous would not be needed, but
713 * when running with IPFILTER_SYNC, this stops bad values.
714 */
715 hv = is->is_hv % fr_statesize;
716 is->is_hv = hv;
717
718 MUTEX_ENTER(&ipf_stinsert);
719
720 /*
721 * add into list table.
722 */
723 if (ips_list != NULL)
724 ips_list->is_pnext = &is->is_next;
725 is->is_pnext = &ips_list;
726 is->is_next = ips_list;
727 ips_list = is;
728
729 if (ips_table[hv] != NULL)
730 ips_table[hv]->is_phnext = &is->is_hnext;
731 else
732 ips_stats.iss_inuse++;
733 is->is_phnext = ips_table + hv;
734 is->is_hnext = ips_table[hv];
735 ips_table[hv] = is;
736 ips_stats.iss_bucketlen[hv]++;
737 ips_num++;
738 MUTEX_EXIT(&ipf_stinsert);
739
740 fr_setstatequeue(is, rev);
741 }
742
743
744 /* ------------------------------------------------------------------------ */
745 /* Function: fr_addstate */
746 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */
747 /* Parameters: fin(I) - pointer to packet information */
748 /* stsave(O) - pointer to place to save pointer to created */
749 /* state structure. */
750 /* flags(I) - flags to use when creating the structure */
751 /* */
752 /* Creates a new IP state structure from the packet information collected. */
753 /* Inserts it into the state table and appends to the bottom of the active */
754 /* list. If the capacity of the table has reached the maximum allowed then */
755 /* the call will fail and a flush is scheduled for the next timeout call. */
756 /* ------------------------------------------------------------------------ */
757 ipstate_t *fr_addstate(fin, stsave, flags)
758 fr_info_t *fin;
759 ipstate_t **stsave;
760 u_int flags;
761 {
762 ipstate_t *is, ips;
763 struct icmp *ic;
764 u_int pass, hv;
765 frentry_t *fr;
766 tcphdr_t *tcp;
767 grehdr_t *gre;
768 void *ifp;
769 int out;
770
771 if (fr_state_lock ||
772 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGTAIL|FI_BAD)))
773 return NULL;
774
775 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN))
776 return NULL;
777
778 fr = fin->fin_fr;
779 if ((fr->fr_statemax == 0) && (ips_num == fr_statemax)) {
780 ATOMIC_INCL(ips_stats.iss_max);
781 fr_state_doflush = 1;
782 return NULL;
783 }
784
785 /*
786 * If a "keep state" rule has reached the maximum number of references
787 * to it, then schedule an automatic flush in case we can clear out
788 * some "dead old wood".
789 */
790 if ((fr != NULL) && (fr->fr_statemax != 0) &&
791 (fr->fr_statecnt >= fr->fr_statemax)) {
792 MUTEX_EXIT(&fr->fr_lock);
793 ATOMIC_INCL(ips_stats.iss_maxref);
794 fr_state_doflush = 1;
795 return NULL;
796 }
797
798 pass = (fr == NULL) ? 0 : fr->fr_flags;
799
800 ic = NULL;
801 tcp = NULL;
802 out = fin->fin_out;
803 is = &ips;
804 bzero((char *)is, sizeof(*is));
805 is->is_die = 1 + fr_ticks;
806 is->is_flags = flags & IS_INHERITED;
807
808 /*
809 * Copy and calculate...
810 */
811 hv = (is->is_p = fin->fin_fi.fi_p);
812 is->is_src = fin->fin_fi.fi_src;
813 hv += is->is_saddr;
814 is->is_dst = fin->fin_fi.fi_dst;
815 hv += is->is_daddr;
816 #ifdef USE_INET6
817 if (fin->fin_v == 6) {
818 /*
819 * For ICMPv6, we check to see if the destination address is
820 * a multicast address. If it is, do not include it in the
821 * calculation of the hash because the correct reply will come
822 * back from a real address, not a multicast address.
823 */
824 if ((is->is_p == IPPROTO_ICMPV6) &&
825 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) {
826 /*
827 * So you can do keep state with neighbour discovery.
828 */
829 is->is_flags |= SI_W_DADDR;
830 hv -= is->is_daddr;
831 } else {
832 hv += is->is_dst.i6[1];
833 hv += is->is_dst.i6[2];
834 hv += is->is_dst.i6[3];
835 }
836 hv += is->is_src.i6[1];
837 hv += is->is_src.i6[2];
838 hv += is->is_src.i6[3];
839 }
840 #endif
841
842 switch (is->is_p)
843 {
844 #ifdef USE_INET6
845 case IPPROTO_ICMPV6 :
846 ic = fin->fin_dp;
847
848 switch (ic->icmp_type)
849 {
850 case ICMP6_ECHO_REQUEST :
851 is->is_icmp.ici_type = ic->icmp_type;
852 hv += (is->is_icmp.ici_id = ic->icmp_id);
853 break;
854 case ICMP6_MEMBERSHIP_QUERY :
855 case ND_ROUTER_SOLICIT :
856 case ND_NEIGHBOR_SOLICIT :
857 case ICMP6_NI_QUERY :
858 is->is_icmp.ici_type = ic->icmp_type;
859 break;
860 default :
861 return NULL;
862 }
863 ATOMIC_INCL(ips_stats.iss_icmp);
864 break;
865 #endif
866 case IPPROTO_ICMP :
867 ic = fin->fin_dp;
868
869 switch (ic->icmp_type)
870 {
871 case ICMP_ECHO :
872 case ICMP_TSTAMP :
873 case ICMP_IREQ :
874 case ICMP_MASKREQ :
875 is->is_icmp.ici_type = ic->icmp_type;
876 hv += (is->is_icmp.ici_id = ic->icmp_id);
877 break;
878 default :
879 return NULL;
880 }
881 ATOMIC_INCL(ips_stats.iss_icmp);
882 break;
883
884 case IPPROTO_GRE :
885 gre = fin->fin_dp;
886
887 is->is_gre.gs_flags = gre->gr_flags;
888 is->is_gre.gs_ptype = gre->gr_ptype;
889 is->is_gre.gs_call = gre->gr_call;
890 hv += is->is_gre.gs_call;
891 break;
892
893 case IPPROTO_TCP :
894 tcp = fin->fin_dp;
895
896 if (tcp->th_flags & TH_RST)
897 return NULL;
898 /*
899 * The endian of the ports doesn't matter, but the ack and
900 * sequence numbers do as we do mathematics on them later.
901 */
902 is->is_sport = htons(fin->fin_data[0]);
903 is->is_dport = htons(fin->fin_data[1]);
904 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
905 hv += is->is_sport;
906 hv += is->is_dport;
907 }
908
909 /*
910 * If this is a real packet then initialise fields in the
911 * state information structure from the TCP header information.
912 */
913
914 is->is_maxdwin = 1;
915 is->is_maxswin = ntohs(tcp->th_win);
916 if (is->is_maxswin == 0)
917 is->is_maxswin = 1;
918
919 if ((fin->fin_flx & FI_IGNORE) == 0) {
920 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
921 (TCP_OFF(tcp) << 2) +
922 ((tcp->th_flags & TH_SYN) ? 1 : 0) +
923 ((tcp->th_flags & TH_FIN) ? 1 : 0);
924 is->is_maxsend = is->is_send;
925
926 /*
927 * Window scale option is only present in
928 * SYN/SYN-ACK packet.
929 */
930 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) ==
931 TH_SYN &&
932 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
933 if (fr_tcpoptions(fin, tcp,
934 &is->is_tcp.ts_data[0]))
935 is->is_swinflags = TCP_WSCALE_SEEN|
936 TCP_WSCALE_FIRST;
937 }
938
939 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) {
940 fr_checknewisn(fin, is);
941 fr_fixoutisn(fin, is);
942 }
943
944 if ((tcp->th_flags & TH_OPENING) == TH_SYN)
945 is->is_flags |= IS_TCPFSM;
946 else {
947 is->is_maxdwin = is->is_maxswin * 2;
948 is->is_dend = ntohl(tcp->th_ack);
949 is->is_maxdend = ntohl(tcp->th_ack);
950 is->is_maxdwin *= 2;
951 }
952 }
953
954 /*
955 * If we're creating state for a starting connection, start the
956 * timer on it as we'll never see an error if it fails to
957 * connect.
958 */
959 ATOMIC_INCL(ips_stats.iss_tcp);
960 break;
961
962 case IPPROTO_UDP :
963 tcp = fin->fin_dp;
964
965 is->is_sport = htons(fin->fin_data[0]);
966 is->is_dport = htons(fin->fin_data[1]);
967 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
968 hv += tcp->th_dport;
969 hv += tcp->th_sport;
970 }
971 ATOMIC_INCL(ips_stats.iss_udp);
972 break;
973
974 default :
975 break;
976 }
977 hv = DOUBLE_HASH(hv);
978 is->is_hv = hv;
979 is->is_rule = fr;
980
981 /*
982 * Look for identical state.
983 */
984 for (is = ips_table[is->is_hv % fr_statesize]; is != NULL;
985 is = is->is_hnext) {
986 if (bcmp(&ips.is_src, &is->is_src,
987 offsetof(struct ipstate, is_ps) -
988 offsetof(struct ipstate, is_src)) == 0)
989 break;
990 }
991 if (is != NULL)
992 return NULL;
993
994 if (ips_stats.iss_bucketlen[hv] >= fr_state_maxbucket) {
995 ATOMIC_INCL(ips_stats.iss_bucketfull);
996 return NULL;
997 }
998 KMALLOC(is, ipstate_t *);
999 if (is == NULL) {
1000 ATOMIC_INCL(ips_stats.iss_nomem);
1001 return NULL;
1002 }
1003 bcopy((char *)&ips, (char *)is, sizeof(*is));
1004 /*
1005 * Do not do the modulous here, it is done in fr_stinsert().
1006 */
1007 if (fr != NULL) {
1008 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN);
1009 MUTEX_ENTER(&fr->fr_lock);
1010 fr->fr_ref++;
1011 fr->fr_statecnt++;
1012 MUTEX_EXIT(&fr->fr_lock);
1013 if (fr->fr_age[0] != 0) {
1014 is->is_tqehead[0] = fr_addtimeoutqueue(&ips_utqe,
1015 fr->fr_age[0]);
1016 is->is_tqehead[1] = is->is_tqehead[0];
1017 is->is_sti.tqe_flags |= TQE_RULEBASED;
1018 }
1019 if (fr->fr_age[0] != 0 && fr->fr_age[1] != fr->fr_age[0])
1020 is->is_tqehead[1] = fr_addtimeoutqueue(&ips_utqe,
1021 fr->fr_age[1]);
1022 is->is_tag = fr->fr_logtag;
1023
1024 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1];
1025 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2];
1026 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3];
1027
1028 if (((ifp = fr->fr_ifas[1]) != NULL) &&
1029 (ifp != (void *)-1)) {
1030 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1]);
1031 }
1032 if (((ifp = fr->fr_ifas[2]) != NULL) &&
1033 (ifp != (void *)-1)) {
1034 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1]);
1035 }
1036 if (((ifp = fr->fr_ifas[3]) != NULL) &&
1037 (ifp != (void *)-1)) {
1038 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1]);
1039 }
1040 } else {
1041 pass = fr_flags;
1042 is->is_tag = FR_NOLOGTAG;
1043 }
1044
1045 is->is_ifp[out << 1] = fin->fin_ifp;
1046 if (fin->fin_ifp != NULL) {
1047 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1]);
1048 }
1049
1050 is->is_ref = 2;
1051 is->is_pass = pass;
1052 is->is_pkts[0] = 0, is->is_bytes[0] = 0;
1053 is->is_pkts[1] = 0, is->is_bytes[1] = 0;
1054 is->is_pkts[2] = 0, is->is_bytes[2] = 0;
1055 is->is_pkts[3] = 0, is->is_bytes[3] = 0;
1056 if ((fin->fin_flx & FI_IGNORE) == 0) {
1057 is->is_pkts[out] = 1;
1058 is->is_bytes[out] = fin->fin_dlen + fin->fin_hlen;
1059 is->is_flx[out][0] = fin->fin_flx & FI_CMP;
1060 is->is_flx[out][0] &= ~FI_OOW;
1061 }
1062
1063 if (pass & FR_STSTRICT)
1064 is->is_flags |= IS_STRICT;
1065
1066 /*
1067 * We want to check everything that is a property of this packet,
1068 * but we don't (automatically) care about it's fragment status as
1069 * this may change.
1070 */
1071 is->is_v = fin->fin_v;
1072 is->is_opt = fin->fin_optmsk;
1073 is->is_optmsk = 0xffffffff;
1074 is->is_sec = fin->fin_secmsk;
1075 is->is_secmsk = 0xffff;
1076 is->is_auth = fin->fin_auth;
1077 is->is_authmsk = 0xffff;
1078 if (flags & (SI_WILDP|SI_WILDA)) {
1079 ATOMIC_INCL(ips_stats.iss_wild);
1080 }
1081 is->is_rulen = fin->fin_rule;
1082
1083
1084 if (pass & FR_LOGFIRST)
1085 is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
1086
1087 READ_ENTER(&ipf_state);
1088 is->is_me = stsave;
1089
1090 fr_stinsert(is, fin->fin_rev);
1091
1092 if (fin->fin_p == IPPROTO_TCP) {
1093 /*
1094 * If we're creating state for a starting connection, start the
1095 * timer on it as we'll never see an error if it fails to
1096 * connect.
1097 */
1098 MUTEX_ENTER(&is->is_lock);
1099 (void) fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags);
1100 MUTEX_EXIT(&is->is_lock);
1101 #ifdef IPFILTER_SCAN
1102 if ((is->is_flags & SI_CLONE) == 0)
1103 (void) ipsc_attachis(is);
1104 #endif
1105 }
1106 #ifdef IPFILTER_SYNC
1107 if ((is->is_flags & SI_CLONE) == 0)
1108 is->is_sync = ipfsync_new(SMC_STATE, fin, is);
1109 #endif
1110 if (ipstate_logging)
1111 ipstate_log(is, ISL_NEW);
1112
1113 RWLOCK_EXIT(&ipf_state);
1114 fin->fin_state = is;
1115 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr);
1116 fin->fin_flx |= FI_STATE;
1117 if (fin->fin_flx & FI_FRAG)
1118 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE);
1119
1120 return is;
1121 }
1122
1123
1124 /* ------------------------------------------------------------------------ */
1125 /* Function: fr_tcpoptions */
1126 /* Returns: int - 1 == packet matches state entry, 0 == it does not */
1127 /* Parameters: fin(I) - pointer to packet information */
1128 /* tcp(I) - pointer to TCP packet header */
1129 /* td(I) - pointer to TCP data held as part of the state */
1130 /* */
1131 /* Look after the TCP header for any options and deal with those that are */
1132 /* present. Record details about those that we recogise. */
1133 /* ------------------------------------------------------------------------ */
1134 static int fr_tcpoptions(fin, tcp, td)
1135 fr_info_t *fin;
1136 tcphdr_t *tcp;
1137 tcpdata_t *td;
1138 {
1139 int off, mlen, ol, i, len, retval;
1140 char buf[64], *s, opt;
1141 mb_t *m = NULL;
1142
1143 off = fin->fin_hlen + sizeof(*tcp);
1144 len = (TCP_OFF(tcp) << 2) - sizeof(*tcp);
1145 if (fin->fin_plen < off + len)
1146 return 0;
1147
1148 m = fin->fin_m;
1149 off += fin->fin_ipoff;
1150 mlen = MSGDSIZE(m) - off;
1151 if (len > mlen) {
1152 len = mlen;
1153 retval = 0;
1154 } else {
1155 retval = 1;
1156 }
1157
1158 COPYDATA(m, off, len, buf);
1159
1160 for (s = buf; len > 0; ) {
1161 opt = *s;
1162 if (opt == TCPOPT_EOL)
1163 break;
1164 else if (opt == TCPOPT_NOP)
1165 ol = 1;
1166 else {
1167 if (len < 2)
1168 break;
1169 ol = (int)*(s + 1);
1170 if (ol < 2 || ol > len)
1171 break;
1172
1173 /*
1174 * Extract the TCP options we are interested in out of
1175 * the header and store them in the the tcpdata struct.
1176 */
1177 switch (opt)
1178 {
1179 case TCPOPT_WINDOW :
1180 if (ol == TCPOLEN_WINDOW) {
1181 i = (int)*(s + 2);
1182 if (i > TCP_WSCALE_MAX)
1183 i = TCP_WSCALE_MAX;
1184 else if (i < 0)
1185 i = 0;
1186 td->td_winscale = i;
1187 }
1188 break;
1189 case TCPOPT_MAXSEG :
1190 /*
1191 * So, if we wanted to set the TCP MAXSEG,
1192 * it should be done here...
1193 */
1194 if (ol == TCPOLEN_MAXSEG) {
1195 i = (int)*(s + 2);
1196 i <<= 8;
1197 i += (int)*(s + 3);
1198 td->td_maxseg = i;
1199 }
1200 break;
1201 }
1202 }
1203 len -= ol;
1204 s += ol;
1205 }
1206 return retval;
1207 }
1208
1209
1210 /* ------------------------------------------------------------------------ */
1211 /* Function: fr_tcpstate */
1212 /* Returns: int - 1 == packet matches state entry, 0 == it does not */
1213 /* Parameters: fin(I) - pointer to packet information */
1214 /* tcp(I) - pointer to TCP packet header */
1215 /* is(I) - pointer to master state structure */
1216 /* */
1217 /* Check to see if a packet with TCP headers fits within the TCP window. */
1218 /* Change timeout depending on whether new packet is a SYN-ACK returning */
1219 /* for a SYN or a RST or FIN which indicate time to close up shop. */
1220 /* ------------------------------------------------------------------------ */
1221 static int fr_tcpstate(fin, tcp, is)
1222 fr_info_t *fin;
1223 tcphdr_t *tcp;
1224 ipstate_t *is;
1225 {
1226 int source, ret = 0, flags;
1227 tcpdata_t *fdata, *tdata;
1228
1229 source = !fin->fin_rev;
1230 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) &&
1231 (ntohs(is->is_sport) != fin->fin_data[0]))
1232 source = 0;
1233 fdata = &is->is_tcp.ts_data[!source];
1234 tdata = &is->is_tcp.ts_data[source];
1235
1236 MUTEX_ENTER(&is->is_lock);
1237 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) {
1238 #ifdef IPFILTER_SCAN
1239 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) {
1240 ipsc_packet(fin, is);
1241 if (FR_ISBLOCK(is->is_pass)) {
1242 MUTEX_EXIT(&is->is_lock);
1243 return 1;
1244 }
1245 }
1246 #endif
1247
1248 /*
1249 * Nearing end of connection, start timeout.
1250 */
1251 ret = fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags);
1252 if (ret == 0) {
1253 MUTEX_EXIT(&is->is_lock);
1254 return 0;
1255 }
1256
1257 /*
1258 * set s0's as appropriate. Use syn-ack packet as it
1259 * contains both pieces of required information.
1260 */
1261 /*
1262 * Window scale option is only present in SYN/SYN-ACK packet.
1263 * Compare with ~TH_FIN to mask out T/TCP setups.
1264 */
1265 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL);
1266 if (flags == (TH_SYN|TH_ACK)) {
1267 is->is_s0[source] = ntohl(tcp->th_ack);
1268 is->is_s0[!source] = ntohl(tcp->th_seq) + 1;
1269 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)) &&
1270 tdata->td_winscale) {
1271 if (fr_tcpoptions(fin, tcp, fdata)) {
1272 fdata->td_winflags = TCP_WSCALE_SEEN|
1273 TCP_WSCALE_FIRST;
1274 } else {
1275 if (!fdata->td_winscale)
1276 tdata->td_winscale = 0;
1277 }
1278 }
1279 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
1280 fr_checknewisn(fin, is);
1281 } else if (flags == TH_SYN) {
1282 is->is_s0[source] = ntohl(tcp->th_seq) + 1;
1283 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)))
1284 if (fr_tcpoptions(fin, tcp, tdata)) {
1285 tdata->td_winflags = TCP_WSCALE_SEEN|
1286 TCP_WSCALE_FIRST;
1287 }
1288
1289 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
1290 fr_checknewisn(fin, is);
1291
1292 }
1293 ret = 1;
1294 } else
1295 fin->fin_flx |= FI_OOW;
1296 MUTEX_EXIT(&is->is_lock);
1297 return ret;
1298 }
1299
1300
1301 /* ------------------------------------------------------------------------ */
1302 /* Function: fr_checknewisn */
1303 /* Returns: Nil */
1304 /* Parameters: fin(I) - pointer to packet information */
1305 /* is(I) - pointer to master state structure */
1306 /* */
1307 /* Check to see if this TCP connection is expecting and needs a new */
1308 /* sequence number for a particular direction of the connection. */
1309 /* */
1310 /* NOTE: This does not actually change the sequence numbers, only gets new */
1311 /* one ready. */
1312 /* ------------------------------------------------------------------------ */
1313 static void fr_checknewisn(fin, is)
1314 fr_info_t *fin;
1315 ipstate_t *is;
1316 {
1317 u_32_t sumd, old, new;
1318 tcphdr_t *tcp;
1319 int i;
1320
1321 i = fin->fin_rev;
1322 tcp = fin->fin_dp;
1323
1324 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) ||
1325 ((i == 1) && !(is->is_flags & IS_ISNACK))) {
1326 old = tcp->th_seq;
1327 new = fr_newisn(fin);
1328 is->is_isninc[i] = new - old;
1329 CALC_SUMD(old, new, sumd);
1330 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16);
1331
1332 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK);
1333 }
1334 }
1335
1336
1337 /* ------------------------------------------------------------------------ */
1338 /* Function: fr_tcpinwindow */
1339 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */
1340 /* Parameters: fin(I) - pointer to packet information */
1341 /* fdata(I) - pointer to tcp state informatio (forward) */
1342 /* tdata(I) - pointer to tcp state informatio (reverse) */
1343 /* tcp(I) - pointer to TCP packet header */
1344 /* */
1345 /* Given a packet has matched addresses and ports, check to see if it is */
1346 /* within the TCP data window. In a show of generosity, allow packets that */
1347 /* are within the window space behind the current sequence # as well. */
1348 /* ------------------------------------------------------------------------ */
1349 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags)
1350 fr_info_t *fin;
1351 tcpdata_t *fdata, *tdata;
1352 tcphdr_t *tcp;
1353 int flags;
1354 {
1355 #ifdef BROKEN_TCP_WINDOW_CHECK
1356 tcp_seq seq, ack, end;
1357 int ackskew, tcpflags;
1358 u_32_t win, maxwin;
1359
1360 /*
1361 * Find difference between last checked packet and this packet.
1362 */
1363 tcpflags = tcp->th_flags;
1364 seq = ntohl(tcp->th_seq);
1365 ack = ntohl(tcp->th_ack);
1366 if (tcpflags & TH_SYN)
1367 win = ntohs(tcp->th_win);
1368 else
1369 win = ntohs(tcp->th_win) << fdata->td_winscale;
1370 if (win == 0)
1371 win = 1;
1372
1373 /*
1374 * if window scaling is present, the scaling is only allowed
1375 * for windows not in the first SYN packet. In that packet the
1376 * window is 65535 to specify the largest window possible
1377 * for receivers not implementing the window scale option.
1378 * Currently, we do not assume TTCP here. That means that
1379 * if we see a second packet from a host (after the initial
1380 * SYN), we can assume that the receiver of the SYN did
1381 * already send back the SYN/ACK (and thus that we know if
1382 * the receiver also does window scaling)
1383 */
1384 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) {
1385 if (tdata->td_winflags & TCP_WSCALE_SEEN) {
1386 fdata->td_winflags &= ~TCP_WSCALE_FIRST;
1387 fdata->td_maxwin = win;
1388 } else {
1389 fdata->td_winscale = 0;
1390 fdata->td_winflags = 0;
1391 tdata->td_winscale = 0;
1392 tdata->td_winflags = 0;
1393 }
1394 }
1395
1396 end = seq + fin->fin_dlen - (TCP_OFF(tcp) << 2) +
1397 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0);
1398
1399 if ((fdata->td_end == 0) &&
1400 (!(flags & IS_TCPFSM) ||
1401 ((tcpflags & TH_OPENING) == TH_OPENING))) {
1402 /*
1403 * Must be a (outgoing) SYN-ACK in reply to a SYN.
1404 */
1405 fdata->td_end = end;
1406 fdata->td_maxwin = 1;
1407 fdata->td_maxend = end + win;
1408 }
1409
1410 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */
1411 ack = tdata->td_end;
1412 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
1413 (ack == 0)) {
1414 /* gross hack to get around certain broken tcp stacks */
1415 ack = tdata->td_end;
1416 }
1417
1418 if (seq == end)
1419 seq = end = fdata->td_end;
1420
1421 maxwin = tdata->td_maxwin;
1422 ackskew = tdata->td_end - ack;
1423
1424 /*
1425 * Strict sequencing only allows in-order delivery.
1426 */
1427 if ((flags & IS_STRICT) != 0) {
1428 if (seq != fdata->td_end) {
1429 return 0;
1430 }
1431 }
1432
1433 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0)
1434 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0)
1435 if (
1436 #if defined(_KERNEL)
1437 (SEQ_GE(fdata->td_maxend, end)) &&
1438 (SEQ_GE(seq, fdata->td_end - maxwin)) &&
1439 #endif
1440 /* XXX what about big packets */
1441 #define MAXACKWINDOW 66000
1442 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) &&
1443 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) {
1444
1445 /* if ackskew < 0 then this should be due to fragmented
1446 * packets. There is no way to know the length of the
1447 * total packet in advance.
1448 * We do know the total length from the fragment cache though.
1449 * Note however that there might be more sessions with
1450 * exactly the same source and destination parameters in the
1451 * state cache (and source and destination is the only stuff
1452 * that is saved in the fragment cache). Note further that
1453 * some TCP connections in the state cache are hashed with
1454 * sport and dport as well which makes it not worthwhile to
1455 * look for them.
1456 * Thus, when ackskew is negative but still seems to belong
1457 * to this session, we bump up the destinations end value.
1458 */
1459 if (ackskew < 0)
1460 tdata->td_end = ack;
1461
1462 /* update max window seen */
1463 if (fdata->td_maxwin < win)
1464 fdata->td_maxwin = win;
1465 if (SEQ_GT(end, fdata->td_end))
1466 fdata->td_end = end;
1467 if (SEQ_GE(ack + win, tdata->td_maxend))
1468 tdata->td_maxend = ack + win;
1469 return 1;
1470 }
1471 return 0;
1472 #else
1473 return 1;
1474 #endif
1475 }
1476
1477
1478 /* ------------------------------------------------------------------------ */
1479 /* Function: fr_stclone */
1480 /* Returns: ipstate_t* - NULL == cloning failed, */
1481 /* else pointer to new state structure */
1482 /* Parameters: fin(I) - pointer to packet information */
1483 /* tcp(I) - pointer to TCP/UDP header */
1484 /* is(I) - pointer to master state structure */
1485 /* */
1486 /* Create a "duplcate" state table entry from the master. */
1487 /* ------------------------------------------------------------------------ */
1488 static ipstate_t *fr_stclone(fin, tcp, is)
1489 fr_info_t *fin;
1490 tcphdr_t *tcp;
1491 ipstate_t *is;
1492 {
1493 ipstate_t *clone;
1494 frentry_t *fr;
1495 u_32_t send;
1496
1497 if (ips_num == fr_statemax) {
1498 ATOMIC_INCL(ips_stats.iss_max);
1499 fr_state_doflush = 1;
1500 return NULL;
1501 }
1502 KMALLOC(clone, ipstate_t *);
1503 if (clone == NULL)
1504 return NULL;
1505 bcopy((char *)is, (char *)clone, sizeof(*clone));
1506
1507 MUTEX_NUKE(&clone->is_lock);
1508
1509 clone->is_die = ONE_DAY + fr_ticks;
1510 clone->is_state[0] = 0;
1511 clone->is_state[1] = 0;
1512 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) +
1513 ((tcp->th_flags & TH_SYN) ? 1 : 0) +
1514 ((tcp->th_flags & TH_FIN) ? 1 : 0);
1515
1516 if (fin->fin_rev == 1) {
1517 clone->is_dend = send;
1518 clone->is_maxdend = send;
1519 clone->is_send = 0;
1520 clone->is_maxswin = 1;
1521 clone->is_maxdwin = ntohs(tcp->th_win);
1522 if (clone->is_maxdwin == 0)
1523 clone->is_maxdwin = 1;
1524 } else {
1525 clone->is_send = send;
1526 clone->is_maxsend = send;
1527 clone->is_dend = 0;
1528 clone->is_maxdwin = 1;
1529 clone->is_maxswin = ntohs(tcp->th_win);
1530 if (clone->is_maxswin == 0)
1531 clone->is_maxswin = 1;
1532 }
1533
1534 fr = clone->is_rule;
1535 if (fr != NULL) {
1536 MUTEX_ENTER(&fr->fr_lock);
1537 fr->fr_ref++;
1538 fr->fr_statecnt++;
1539 MUTEX_EXIT(&fr->fr_lock);
1540 }
1541 clone->is_flags &= ~SI_CLONE;
1542 clone->is_flags |= SI_CLONED;
1543 fr_stinsert(clone, fin->fin_rev);
1544 MUTEX_ENTER(&clone->is_lock);
1545 clone->is_ref = 1;
1546 if (clone->is_p == IPPROTO_TCP) {
1547 (void) fr_tcp_age(&clone->is_sti, fin, ips_tqtqb,
1548 clone->is_flags);
1549 }
1550 MUTEX_EXIT(&clone->is_lock);
1551 #ifdef IPFILTER_SCAN
1552 (void) ipsc_attachis(is);
1553 #endif
1554 #ifdef IPFILTER_SYNC
1555 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone);
1556 #endif
1557 return clone;
1558 }
1559
1560
1561 /* ------------------------------------------------------------------------ */
1562 /* Function: fr_matchsrcdst */
1563 /* Returns: Nil */
1564 /* Parameters: fin(I) - pointer to packet information */
1565 /* is(I) - pointer to state structure */
1566 /* src(I) - pointer to source address */
1567 /* dst(I) - pointer to destination address */
1568 /* tcp(I) - pointer to TCP/UDP header */
1569 /* */
1570 /* Match a state table entry against an IP packet. The logic below is that */
1571 /* ret gets set to one if the match succeeds, else remains 0. If it is */
1572 /* still 0 after the test. no match. */
1573 /* ------------------------------------------------------------------------ */
1574 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask)
1575 fr_info_t *fin;
1576 ipstate_t *is;
1577 i6addr_t *src, *dst;
1578 tcphdr_t *tcp;
1579 u_32_t cmask;
1580 {
1581 int ret = 0, rev, out, flags, flx = 0, idx;
1582 u_short sp, dp;
1583 u_32_t cflx;
1584 void *ifp;
1585
1586 rev = IP6_NEQ(&is->is_dst, dst);
1587 ifp = fin->fin_ifp;
1588 out = fin->fin_out;
1589 flags = is->is_flags & (SI_WILDA);
1590 sp = 0;
1591 dp = 0;
1592
1593 if (tcp != NULL) {
1594 flags = is->is_flags;
1595 sp = tcp->th_sport;
1596 dp = tcp->th_dport;
1597 if (!rev) {
1598 if (!(flags & SI_W_SPORT) && (sp != is->is_sport))
1599 rev = 1;
1600 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport))
1601 rev = 1;
1602 }
1603 }
1604
1605 idx = (out << 1) + rev;
1606
1607 /*
1608 * If the interface for this 'direction' is set, make sure it matches.
1609 * An interface name that is not set matches any, as does a name of *.
1610 */
1611 if ((is->is_ifp[idx] == NULL &&
1612 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) ||
1613 is->is_ifp[idx] == ifp)
1614 ret = 1;
1615
1616 if (ret == 0)
1617 return NULL;
1618 ret = 0;
1619
1620 /*
1621 * Match addresses and ports.
1622 */
1623 if (rev == 0) {
1624 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) &&
1625 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR)) &&
1626 (!tcp || ((sp == is->is_sport || flags & SI_W_SPORT) &&
1627 (dp == is->is_dport || flags & SI_W_DPORT)))) {
1628 ret = 1;
1629 }
1630 } else {
1631 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) &&
1632 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR)) &&
1633 (!tcp || ((sp == is->is_dport || flags & SI_W_DPORT) &&
1634 (dp == is->is_sport || flags & SI_W_SPORT)))) {
1635 ret = 1;
1636 }
1637 }
1638 if (ret == 0)
1639 return NULL;
1640
1641 /*
1642 * Whether or not this should be here, is questionable, but the aim
1643 * is to get this out of the main line.
1644 */
1645 if (tcp == NULL)
1646 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED);
1647
1648 /*
1649 * Only one of the source or destination address can be flaged as a
1650 * wildcard. Fill in the missing address, if set.
1651 * For IPv6, if the address being copied in is multicast, then
1652 * don't reset the wild flag - multicast causes it to be set in the
1653 * first place!
1654 */
1655 if ((flags & (SI_W_SADDR|SI_W_DADDR))) {
1656 fr_ip_t *fi = &fin->fin_fi;
1657
1658 if ((flags & SI_W_SADDR) != 0) {
1659 if (rev == 0) {
1660 #ifdef USE_INET6
1661 if (is->is_v == 6 &&
1662 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6))
1663 /*EMPTY*/;
1664 else
1665 #endif
1666 {
1667 is->is_src = fi->fi_src;
1668 is->is_flags &= ~SI_W_SADDR;
1669 }
1670 } else {
1671 #ifdef USE_INET6
1672 if (is->is_v == 6 &&
1673 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6))
1674 /*EMPTY*/;
1675 else
1676 #endif
1677 {
1678 is->is_src = fi->fi_dst;
1679 is->is_flags &= ~SI_W_SADDR;
1680 }
1681 }
1682 } else if ((flags & SI_W_DADDR) != 0) {
1683 if (rev == 0) {
1684 #ifdef USE_INET6
1685 if (is->is_v == 6 &&
1686 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6))
1687 /*EMPTY*/;
1688 else
1689 #endif
1690 {
1691 is->is_dst = fi->fi_dst;
1692 is->is_flags &= ~SI_W_DADDR;
1693 }
1694 } else {
1695 #ifdef USE_INET6
1696 if (is->is_v == 6 &&
1697 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6))
1698 /*EMPTY*/;
1699 else
1700 #endif
1701 {
1702 is->is_dst = fi->fi_src;
1703 is->is_flags &= ~SI_W_DADDR;
1704 }
1705 }
1706 }
1707 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) {
1708 ATOMIC_DECL(ips_stats.iss_wild);
1709 }
1710 }
1711
1712 flx = fin->fin_flx & cmask;
1713 cflx = is->is_flx[out][rev];
1714
1715 /*
1716 * Match up any flags set from IP options.
1717 */
1718 if ((cflx && (flx != (cflx & cmask))) ||
1719 ((fin->fin_optmsk & is->is_optmsk) != is->is_opt) ||
1720 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) ||
1721 ((fin->fin_auth & is->is_authmsk) != is->is_auth))
1722 return NULL;
1723
1724 /*
1725 * Only one of the source or destination port can be flagged as a
1726 * wildcard. When filling it in, fill in a copy of the matched entry
1727 * if it has the cloning flag set.
1728 */
1729 if ((fin->fin_flx & FI_IGNORE) != 0) {
1730 fin->fin_rev = rev;
1731 return is;
1732 }
1733
1734 if ((flags & (SI_W_SPORT|SI_W_DPORT))) {
1735 if ((flags & SI_CLONE) != 0) {
1736 is = fr_stclone(fin, tcp, is);
1737 if (is == NULL)
1738 return NULL;
1739 } else {
1740 ATOMIC_DECL(ips_stats.iss_wild);
1741 }
1742
1743 if ((flags & SI_W_SPORT) != 0) {
1744 if (rev == 0) {
1745 is->is_sport = sp;
1746 is->is_send = htonl(tcp->th_seq);
1747 } else {
1748 is->is_sport = dp;
1749 is->is_send = htonl(tcp->th_ack);
1750 }
1751 is->is_maxsend = is->is_send + 1;
1752 } else if ((flags & SI_W_DPORT) != 0) {
1753 if (rev == 0) {
1754 is->is_dport = dp;
1755 is->is_dend = htonl(tcp->th_ack);
1756 } else {
1757 is->is_dport = sp;
1758 is->is_dend = htonl(tcp->th_seq);
1759 }
1760 is->is_maxdend = is->is_dend + 1;
1761 }
1762 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT);
1763 if ((flags & SI_CLONED) && ipstate_logging)
1764 ipstate_log(is, ISL_CLONE);
1765 }
1766
1767 ret = -1;
1768
1769 if (is->is_flx[out][rev] == 0)
1770 is->is_flx[out][rev] = flx;
1771
1772 /*
1773 * Check if the interface name for this "direction" is set and if not,
1774 * fill it in.
1775 */
1776 if (is->is_ifp[idx] == NULL &&
1777 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) {
1778 is->is_ifp[idx] = ifp;
1779 COPYIFNAME(ifp, is->is_ifname[idx]);
1780 }
1781 fin->fin_rev = rev;
1782 return is;
1783 }
1784
1785
1786 /* ------------------------------------------------------------------------ */
1787 /* Function: fr_checkicmpmatchingstate */
1788 /* Returns: Nil */
1789 /* Parameters: fin(I) - pointer to packet information */
1790 /* */
1791 /* If we've got an ICMP error message, using the information stored in the */
1792 /* ICMP packet, look for a matching state table entry. */
1793 /* */
1794 /* If we return NULL then no lock on ipf_state is held. */
1795 /* If we return non-null then a read-lock on ipf_state is held. */
1796 /* ------------------------------------------------------------------------ */
1797 static ipstate_t *fr_checkicmpmatchingstate(fin)
1798 fr_info_t *fin;
1799 {
1800 ipstate_t *is, **isp;
1801 u_short sport, dport;
1802 u_char pr;
1803 i6addr_t dst, src;
1804 struct icmp *ic;
1805 u_short savelen;
1806 icmphdr_t *icmp;
1807 int backward, i;
1808 fr_info_t ofin;
1809 tcphdr_t *tcp;
1810 int type, len;
1811 ip_t *oip;
1812 u_int hv;
1813
1814 /*
1815 * Does it at least have the return (basic) IP header ?
1816 * Only a basic IP header (no options) should be with
1817 * an ICMP error header.
1818 */
1819 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) ||
1820 (fin->fin_plen < ICMPERR_MINPKTLEN))
1821 return NULL;
1822 ic = fin->fin_dp;
1823 type = ic->icmp_type;
1824 /*
1825 * If it's not an error type, then return
1826 */
1827 if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1828 (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1829 (type != ICMP_PARAMPROB))
1830 return NULL;
1831
1832 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
1833 /*
1834 * Check if the at least the old IP header (with options) and
1835 * 8 bytes of payload is present.
1836 */
1837 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2))
1838 return NULL;
1839
1840 /*
1841 * Sanity Checks.
1842 */
1843 len = fin->fin_dlen - ICMPERR_ICMPHLEN;
1844 if ((len <= 0) || ((IP_HL(oip) << 2) > len))
1845 return NULL;
1846
1847 /*
1848 * Is the buffer big enough for all of it ? It's the size of the IP
1849 * header claimed in the encapsulated part which is of concern. It
1850 * may be too big to be in this buffer but not so big that it's
1851 * outside the ICMP packet, leading to TCP deref's causing problems.
1852 * This is possible because we don't know how big oip_hl is when we
1853 * do the pullup early in fr_check() and thus can't guarantee it is
1854 * all here now.
1855 */
1856 #ifdef _KERNEL
1857 {
1858 mb_t *m;
1859
1860 m = fin->fin_m;
1861 # if defined(MENTAT)
1862 if ((char *)oip + len > (char *)m->b_wptr)
1863 return NULL;
1864 # else
1865 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len)
1866 return NULL;
1867 # endif
1868 }
1869 #endif
1870 bcopy((char *)fin, (char *)&ofin, sizeof(fin));
1871
1872 /*
1873 * in the IPv4 case we must zero the i6addr union otherwise
1874 * the IP6_EQ and IP6_NEQ macros produce the wrong results because
1875 * of the 'junk' in the unused part of the union
1876 */
1877 bzero((char *)&src, sizeof(src));
1878 bzero((char *)&dst, sizeof(dst));
1879
1880 /*
1881 * we make an fin entry to be able to feed it to
1882 * matchsrcdst note that not all fields are encessary
1883 * but this is the cleanest way. Note further we fill
1884 * in fin_mp such that if someone uses it we'll get
1885 * a kernel panic. fr_matchsrcdst does not use this.
1886 *
1887 * watch out here, as ip is in host order and oip in network
1888 * order. Any change we make must be undone afterwards, like
1889 * oip->ip_off - it is still in network byte order so fix it.
1890 */
1891 savelen = oip->ip_len;
1892 oip->ip_len = len;
1893 oip->ip_off = htons(oip->ip_off);
1894
1895 ofin.fin_flx = FI_NOCKSUM;
1896 ofin.fin_v = 4;
1897 ofin.fin_ip = oip;
1898 ofin.fin_m = NULL; /* if dereferenced, panic XXX */
1899 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
1900 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN;
1901 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin);
1902 ofin.fin_ifp = fin->fin_ifp;
1903 ofin.fin_out = !fin->fin_out;
1904 /*
1905 * Reset the short and bad flag here because in fr_matchsrcdst()
1906 * the flags for the current packet (fin_flx) are compared against
1907 * those for the existing session.
1908 */
1909 ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
1910
1911 /*
1912 * Put old values of ip_len and ip_off back as we don't know
1913 * if we have to forward the packet (or process it again.
1914 */
1915 oip->ip_len = savelen;
1916 oip->ip_off = htons(oip->ip_off);
1917
1918 switch (oip->ip_p)
1919 {
1920 case IPPROTO_ICMP :
1921 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
1922
1923 /*
1924 * an ICMP error can only be generated as a result of an
1925 * ICMP query, not as the response on an ICMP error
1926 *
1927 * XXX theoretically ICMP_ECHOREP and the other reply's are
1928 * ICMP query's as well, but adding them here seems strange XXX
1929 */
1930 if ((icmp->icmp_type != ICMP_ECHO) &&
1931 (icmp->icmp_type != ICMP_TSTAMP) &&
1932 (icmp->icmp_type != ICMP_IREQ) &&
1933 (icmp->icmp_type != ICMP_MASKREQ))
1934 return NULL;
1935
1936 /*
1937 * perform a lookup of the ICMP packet in the state table
1938 */
1939 hv = (pr = oip->ip_p);
1940 src.in4 = oip->ip_src;
1941 hv += src.in4.s_addr;
1942 dst.in4 = oip->ip_dst;
1943 hv += dst.in4.s_addr;
1944 hv += icmp->icmp_id;
1945 hv = DOUBLE_HASH(hv);
1946
1947 READ_ENTER(&ipf_state);
1948 for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
1949 isp = &is->is_hnext;
1950 if ((is->is_p != pr) || (is->is_v != 4))
1951 continue;
1952 if (is->is_pass & FR_NOICMPERR)
1953 continue;
1954 is = fr_matchsrcdst(&ofin, is, &src, &dst,
1955 NULL, FI_ICMPCMP);
1956 if (is != NULL) {
1957 if ((is->is_pass & FR_NOICMPERR) != 0) {
1958 RWLOCK_EXIT(&ipf_state);
1959 return NULL;
1960 }
1961 backward = IP6_NEQ(&is->is_src, &dst);
1962 i = (backward << 1) + fin->fin_out;
1963 #if 0
1964 if (is->is_icmppkts[i] > is->is_pkts[i])
1965 continue;
1966 #endif
1967 ips_stats.iss_hits++;
1968 is->is_icmppkts[i]++;
1969 is->is_bytes[i] += fin->fin_plen;
1970 return is;
1971 }
1972 }
1973 RWLOCK_EXIT(&ipf_state);
1974 return NULL;
1975 case IPPROTO_TCP :
1976 case IPPROTO_UDP :
1977 break;
1978 default :
1979 return NULL;
1980 }
1981
1982 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
1983 dport = tcp->th_dport;
1984 sport = tcp->th_sport;
1985
1986 hv = (pr = oip->ip_p);
1987 src.in4 = oip->ip_src;
1988 hv += src.in4.s_addr;
1989 dst.in4 = oip->ip_dst;
1990 hv += dst.in4.s_addr;
1991 hv += dport;
1992 hv += sport;
1993 hv = DOUBLE_HASH(hv);
1994
1995 READ_ENTER(&ipf_state);
1996 for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
1997 isp = &is->is_hnext;
1998 /*
1999 * Only allow this icmp though if the
2000 * encapsulated packet was allowed through the
2001 * other way around. Note that the minimal amount
2002 * of info present does not allow for checking against
2003 * tcp internals such as seq and ack numbers. Only the
2004 * ports are known to be present and can be even if the
2005 * short flag is set.
2006 */
2007 if ((is->is_p == pr) && (is->is_v == 4) &&
2008 (is = fr_matchsrcdst(&ofin, is, &src, &dst,
2009 tcp, FI_ICMPCMP))) {
2010 backward = IP6_NEQ(&is->is_dst, &src);
2011 i = (backward << 1) + fin->fin_out;
2012 if (((is->is_pass & FR_NOICMPERR) != 0)
2013 #if 0
2014 || (is->is_icmppkts[i] > is->is_pkts[i])
2015 #endif
2016 )
2017 break;
2018 ips_stats.iss_hits++;
2019 is->is_icmppkts[i]++;
2020 is->is_bytes[i] += fin->fin_plen;
2021 /*
2022 * we deliberately do not touch the timeouts
2023 * for the accompanying state table entry.
2024 * It remains to be seen if that is correct. XXX
2025 */
2026 return is;
2027 }
2028 }
2029 RWLOCK_EXIT(&ipf_state);
2030 return NULL;
2031 }
2032
2033
2034 /* ------------------------------------------------------------------------ */
2035 /* Function: fr_ipsmove */
2036 /* Returns: Nil */
2037 /* Parameters: is(I) - pointer to state table entry */
2038 /* hv(I) - new hash value for state table entry */
2039 /* Write Locks: ipf_state */
2040 /* */
2041 /* Move a state entry from one position in the hash table to another. */
2042 /* ------------------------------------------------------------------------ */
2043 static void fr_ipsmove(is, hv)
2044 ipstate_t *is;
2045 u_int hv;
2046 {
2047 ipstate_t **isp;
2048 u_int hvm;
2049
2050 ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0);
2051
2052 hvm = is->is_hv;
2053 /*
2054 * Remove the hash from the old location...
2055 */
2056 isp = is->is_phnext;
2057 if (is->is_hnext)
2058 is->is_hnext->is_phnext = isp;
2059 *isp = is->is_hnext;
2060 if (ips_table[hvm] == NULL)
2061 ips_stats.iss_inuse--;
2062 ips_stats.iss_bucketlen[hvm]--;
2063
2064 /*
2065 * ...and put the hash in the new one.
2066 */
2067 hvm = DOUBLE_HASH(hv);
2068 is->is_hv = hvm;
2069 isp = &ips_table[hvm];
2070 if (*isp)
2071 (*isp)->is_phnext = &is->is_hnext;
2072 else
2073 ips_stats.iss_inuse++;
2074 ips_stats.iss_bucketlen[hvm]++;
2075 is->is_phnext = isp;
2076 is->is_hnext = *isp;
2077 *isp = is;
2078 }
2079
2080
2081 /* ------------------------------------------------------------------------ */
2082 /* Function: fr_stlookup */
2083 /* Returns: ipstate_t* - NULL == no matching state found, */
2084 /* else pointer to state information is returned */
2085 /* Parameters: fin(I) - pointer to packet information */
2086 /* tcp(I) - pointer to TCP/UDP header. */
2087 /* */
2088 /* Search the state table for a matching entry to the packet described by */
2089 /* the contents of *fin. */
2090 /* */
2091 /* If we return NULL then no lock on ipf_state is held. */
2092 /* If we return non-null then a read-lock on ipf_state is held. */
2093 /* ------------------------------------------------------------------------ */
2094 ipstate_t *fr_stlookup(fin, tcp, ifqp)
2095 fr_info_t *fin;
2096 tcphdr_t *tcp;
2097 ipftq_t **ifqp;
2098 {
2099 u_int hv, hvm, pr, v, tryagain;
2100 ipstate_t *is, **isp;
2101 u_short dport, sport;
2102 i6addr_t src, dst;
2103 struct icmp *ic;
2104 grehdr_t *gre;
2105 ipftq_t *ifq;
2106 int oow;
2107
2108 is = NULL;
2109 ifq = NULL;
2110 tcp = fin->fin_dp;
2111 ic = (struct icmp *)tcp;
2112 hv = (pr = fin->fin_fi.fi_p);
2113 src = fin->fin_fi.fi_src;
2114 dst = fin->fin_fi.fi_dst;
2115 hv += src.in4.s_addr;
2116 hv += dst.in4.s_addr;
2117
2118 v = fin->fin_fi.fi_v;
2119 #ifdef USE_INET6
2120 if (v == 6) {
2121 hv += fin->fin_fi.fi_src.i6[1];
2122 hv += fin->fin_fi.fi_src.i6[2];
2123 hv += fin->fin_fi.fi_src.i6[3];
2124
2125 if ((fin->fin_p == IPPROTO_ICMPV6) &&
2126 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) {
2127 hv -= dst.in4.s_addr;
2128 } else {
2129 hv += fin->fin_fi.fi_dst.i6[1];
2130 hv += fin->fin_fi.fi_dst.i6[2];
2131 hv += fin->fin_fi.fi_dst.i6[3];
2132 }
2133 }
2134 #endif
2135
2136 /*
2137 * Search the hash table for matching packet header info.
2138 */
2139 switch (pr)
2140 {
2141 #ifdef USE_INET6
2142 case IPPROTO_ICMPV6 :
2143 tcp = NULL;
2144 tryagain = 0;
2145 if (v == 6) {
2146 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
2147 (ic->icmp_type == ICMP6_ECHO_REPLY)) {
2148 hv += ic->icmp_id;
2149 }
2150 }
2151 READ_ENTER(&ipf_state);
2152 icmp6again:
2153 hvm = DOUBLE_HASH(hv);
2154 for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) {
2155 isp = &is->is_hnext;
2156 if ((is->is_p != pr) || (is->is_v != v))
2157 continue;
2158 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
2159 if (is != NULL &&
2160 fr_matchicmpqueryreply(v, &is->is_icmp,
2161 ic, fin->fin_rev)) {
2162 if (!(is->is_sti.tqe_flags & TQE_RULEBASED)) {
2163 if (fin->fin_rev)
2164 ifq = &ips_icmpacktq;
2165 else
2166 ifq = &ips_icmptq;
2167 }
2168 break;
2169 }
2170 }
2171
2172 if (is != NULL) {
2173 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) {
2174 hv += fin->fin_fi.fi_src.i6[0];
2175 hv += fin->fin_fi.fi_src.i6[1];
2176 hv += fin->fin_fi.fi_src.i6[2];
2177 hv += fin->fin_fi.fi_src.i6[3];
2178 fr_ipsmove(is, hv);
2179 MUTEX_DOWNGRADE(&ipf_state);
2180 }
2181 break;
2182 }
2183 RWLOCK_EXIT(&ipf_state);
2184
2185 /*
2186 * No matching icmp state entry. Perhaps this is a
2187 * response to another state entry.
2188 */
2189 if ((ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) &&
2190 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) {
2191 hv -= fin->fin_fi.fi_src.i6[0];
2192 hv -= fin->fin_fi.fi_src.i6[1];
2193 hv -= fin->fin_fi.fi_src.i6[2];
2194 hv -= fin->fin_fi.fi_src.i6[3];
2195 tryagain = 1;
2196 WRITE_ENTER(&ipf_state);
2197 goto icmp6again;
2198 }
2199
2200 is = fr_checkicmp6matchingstate(fin);
2201 if (is != NULL)
2202 return is;
2203 break;
2204 #endif
2205
2206 case IPPROTO_ICMP :
2207 if (v == 4) {
2208 hv += ic->icmp_id;
2209 }
2210 hv = DOUBLE_HASH(hv);
2211 READ_ENTER(&ipf_state);
2212 for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
2213 isp = &is->is_hnext;
2214 if ((is->is_p != pr) || (is->is_v != v))
2215 continue;
2216 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
2217 if (is != NULL &&
2218 fr_matchicmpqueryreply(v, &is->is_icmp,
2219 ic, fin->fin_rev)) {
2220 if (!(is->is_sti.tqe_flags & TQE_RULEBASED)) {
2221 if (fin->fin_rev)
2222 ifq = &ips_icmpacktq;
2223 else
2224 ifq = &ips_icmptq;
2225 }
2226 break;
2227 }
2228 }
2229 if (is == NULL) {
2230 RWLOCK_EXIT(&ipf_state);
2231 }
2232 break;
2233
2234 case IPPROTO_GRE :
2235 gre = fin->fin_dp;
2236 hv += gre->gr_call;
2237 tcp = NULL;
2238 ifqp = NULL;
2239 hv = DOUBLE_HASH(hv);
2240 READ_ENTER(&ipf_state);
2241 for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
2242 isp = &is->is_hnext;
2243 if ((is->is_p != IPPROTO_GRE) || (is->is_v != v) ||
2244 (is->is_gre.gs_call != gre->gr_call))
2245 continue;
2246 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
2247 if (is != NULL)
2248 break;
2249 }
2250 if (is == NULL) {
2251 RWLOCK_EXIT(&ipf_state);
2252 }
2253 break;
2254
2255 case IPPROTO_TCP :
2256 case IPPROTO_UDP :
2257 ifqp = NULL;
2258 sport = htons(fin->fin_data[0]);
2259 hv += sport;
2260 dport = htons(fin->fin_data[1]);
2261 hv += dport;
2262 oow = 0;
2263 tryagain = 0;
2264 READ_ENTER(&ipf_state);
2265 retry_tcpudp:
2266 hvm = DOUBLE_HASH(hv);
2267 for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) {
2268 isp = &is->is_hnext;
2269 if ((is->is_p != pr) || (is->is_v != v))
2270 continue;
2271 fin->fin_flx &= ~FI_OOW;
2272 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP);
2273 if (is != NULL) {
2274 if (pr == IPPROTO_TCP) {
2275 if (!fr_tcpstate(fin, tcp, is)) {
2276 oow |= fin->fin_flx & FI_OOW;
2277 continue;
2278 }
2279 }
2280 break;
2281 }
2282 }
2283 if (is != NULL) {
2284 if (tryagain &&
2285 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) {
2286 hv += dport;
2287 hv += sport;
2288 fr_ipsmove(is, hv);
2289 MUTEX_DOWNGRADE(&ipf_state);
2290 }
2291 break;
2292 }
2293 RWLOCK_EXIT(&ipf_state);
2294
2295 if (!tryagain && ips_stats.iss_wild) {
2296 hv -= dport;
2297 hv -= sport;
2298 tryagain = 1;
2299 WRITE_ENTER(&ipf_state);
2300 goto retry_tcpudp;
2301 }
2302 fin->fin_flx |= oow;
2303 break;
2304
2305 default :
2306 tcp = NULL;
2307 ifqp = NULL;
2308 hvm = DOUBLE_HASH(hv);
2309 READ_ENTER(&ipf_state);
2310 for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) {
2311 isp = &is->is_hnext;
2312 if ((is->is_p != pr) || (is->is_v != v))
2313 continue;
2314 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
2315 if (is != NULL)
2316 break;
2317 }
2318 if (is == NULL) {
2319 RWLOCK_EXIT(&ipf_state);
2320 }
2321 break;
2322 }
2323
2324 if (ifq != NULL && ifqp != NULL)
2325 *ifqp = ifq;
2326 return is;
2327 }
2328
2329
2330 /* ------------------------------------------------------------------------ */
2331 /* Function: fr_updatestate */
2332 /* Returns: Nil */
2333 /* Parameters: fin(I) - pointer to packet information */
2334 /* is(I) - pointer to state table entry */
2335 /* Read Locks: ipf_state */
2336 /* */
2337 /* Updates packet and byte counters for a newly received packet. Seeds the */
2338 /* fragment cache with a new entry as required. */
2339 /* ------------------------------------------------------------------------ */
2340 void fr_updatestate(fin, is, ifq)
2341 fr_info_t *fin;
2342 ipstate_t *is;
2343 ipftq_t *ifq;
2344 {
2345 ipftqent_t *tqe;
2346 int i, pass;
2347
2348 i = (fin->fin_rev << 1) + fin->fin_out;
2349
2350 /*
2351 * For TCP packets, ifq == NULL. For all others, check if this new
2352 * queue is different to the last one it was on and move it if so.
2353 */
2354 MUTEX_ENTER(&is->is_lock);
2355 tqe = &is->is_sti;
2356 if ((tqe->tqe_flags & TQE_RULEBASED) != 0)
2357 ifq = is->is_tqehead[fin->fin_rev];
2358
2359 if (ifq != NULL)
2360 fr_movequeue(tqe, tqe->tqe_ifq, ifq);
2361
2362 is->is_pkts[i]++;
2363 is->is_bytes[i] += fin->fin_plen;
2364 MUTEX_EXIT(&is->is_lock);
2365
2366 #ifdef IPFILTER_SYNC
2367 ipfsync_update(SMC_STATE, fin, is->is_sync);
2368 #endif
2369
2370 ATOMIC_INCL(ips_stats.iss_hits);
2371
2372 fin->fin_fr = is->is_rule;
2373
2374 /*
2375 * If this packet is a fragment and the rule says to track fragments,
2376 * then create a new fragment cache entry.
2377 */
2378 pass = is->is_pass;
2379 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass))
2380 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE);
2381 }
2382
2383
2384 /* ------------------------------------------------------------------------ */
2385 /* Function: fr_checkstate */
2386 /* Returns: frentry_t* - NULL == search failed, */
2387 /* else pointer to rule for matching state */
2388 /* Parameters: ifp(I) - pointer to interface */
2389 /* passp(I) - pointer to filtering result flags */
2390 /* */
2391 /* Check if a packet is associated with an entry in the state table. */
2392 /* ------------------------------------------------------------------------ */
2393 frentry_t *fr_checkstate(fin, passp)
2394 fr_info_t *fin;
2395 u_32_t *passp;
2396 {
2397 ipstate_t *is;
2398 frentry_t *fr;
2399 tcphdr_t *tcp;
2400 ipftq_t *ifq;
2401 u_int pass;
2402
2403 if (fr_state_lock || (ips_list == NULL) ||
2404 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGTAIL|FI_BAD)))
2405 return NULL;
2406
2407 is = NULL;
2408 if ((fin->fin_flx & FI_TCPUDP) ||
2409 (fin->fin_fi.fi_p == IPPROTO_ICMP)
2410 #ifdef USE_INET6
2411 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6)
2412 #endif
2413 )
2414 tcp = fin->fin_dp;
2415 else
2416 tcp = NULL;
2417
2418 /*
2419 * Search the hash table for matching packet header info.
2420 */
2421 ifq = NULL;
2422 is = fin->fin_state;
2423 if (is == NULL)
2424 is = fr_stlookup(fin, tcp, &ifq);
2425 switch (fin->fin_p)
2426 {
2427 #ifdef USE_INET6
2428 case IPPROTO_ICMPV6 :
2429 if (is != NULL)
2430 break;
2431 if (fin->fin_v == 6) {
2432 is = fr_checkicmp6matchingstate(fin);
2433 if (is != NULL)
2434 goto matched;
2435 }
2436 break;
2437 #endif
2438 case IPPROTO_ICMP :
2439 if (is != NULL)
2440 break;
2441 /*
2442 * No matching icmp state entry. Perhaps this is a
2443 * response to another state entry.
2444 */
2445 is = fr_checkicmpmatchingstate(fin);
2446 if (is != NULL)
2447 goto matched;
2448 break;
2449 case IPPROTO_TCP :
2450 if (is == NULL)
2451 break;
2452
2453 if (is->is_pass & FR_NEWISN) {
2454 if (fin->fin_out == 0)
2455 fr_fixinisn(fin, is);
2456 else if (fin->fin_out == 1)
2457 fr_fixoutisn(fin, is);
2458 }
2459 break;
2460 default :
2461 if (fin->fin_rev)
2462 ifq = &ips_udpacktq;
2463 else
2464 ifq = &ips_udptq;
2465 break;
2466 }
2467 if (is == NULL) {
2468 ATOMIC_INCL(ips_stats.iss_miss);
2469 return NULL;
2470 }
2471
2472 matched:
2473 fr = is->is_rule;
2474 if (fr != NULL) {
2475 #if 0
2476 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
2477 if (fin->fin_nattag == NULL)
2478 return NULL;
2479 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0)
2480 return NULL;
2481 }
2482 #endif
2483 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN);
2484 fin->fin_icode = fr->fr_icode;
2485 }
2486
2487 fin->fin_rule = is->is_rulen;
2488 pass = is->is_pass;
2489 fr_updatestate(fin, is, ifq);
2490 if (fin->fin_out == 1)
2491 fin->fin_nat = is->is_nat[fin->fin_rev];
2492
2493 fin->fin_state = is;
2494 is->is_touched = fr_ticks;
2495 MUTEX_ENTER(&is->is_lock);
2496 is->is_ref++;
2497 MUTEX_EXIT(&is->is_lock);
2498 RWLOCK_EXIT(&ipf_state);
2499 fin->fin_flx |= FI_STATE;
2500 if ((pass & FR_LOGFIRST) != 0)
2501 pass &= ~(FR_LOGFIRST|FR_LOG);
2502 *passp = pass;
2503 return fr;
2504 }
2505
2506
2507 /* ------------------------------------------------------------------------ */
2508 /* Function: fr_fixoutisn */
2509 /* Returns: Nil */
2510 /* Parameters: fin(I) - pointer to packet information */
2511 /* is(I) - pointer to master state structure */
2512 /* */
2513 /* Called only for outbound packets, adjusts the sequence number and the */
2514 /* TCP checksum to match that change. */
2515 /* ------------------------------------------------------------------------ */
2516 static void fr_fixoutisn(fin, is)
2517 fr_info_t *fin;
2518 ipstate_t *is;
2519 {
2520 tcphdr_t *tcp;
2521 int rev;
2522
2523 tcp = fin->fin_dp;
2524 rev = fin->fin_rev;
2525 if ((is->is_flags & IS_ISNSYN) != 0) {
2526 if (rev == 0) {
2527 tcp->th_seq += is->is_isninc[0];
2528 fix_outcksum(fin, &tcp->th_sum, is->is_sumd[0]);
2529 }
2530 }
2531 if ((is->is_flags & IS_ISNACK) != 0) {
2532 if (rev == 1) {
2533 tcp->th_seq += is->is_isninc[1];
2534 fix_outcksum(fin, &tcp->th_sum, is->is_sumd[1]);
2535 }
2536 }
2537 }
2538
2539
2540 /* ------------------------------------------------------------------------ */
2541 /* Function: fr_fixinisn */
2542 /* Returns: Nil */
2543 /* Parameters: fin(I) - pointer to packet information */
2544 /* is(I) - pointer to master state structure */
2545 /* */
2546 /* Called only for inbound packets, adjusts the acknowledge number and the */
2547 /* TCP checksum to match that change. */
2548 /* ------------------------------------------------------------------------ */
2549 static void fr_fixinisn(fin, is)
2550 fr_info_t *fin;
2551 ipstate_t *is;
2552 {
2553 tcphdr_t *tcp;
2554 int rev;
2555
2556 tcp = fin->fin_dp;
2557 rev = fin->fin_rev;
2558 if ((is->is_flags & IS_ISNSYN) != 0) {
2559 if (rev == 1) {
2560 tcp->th_ack -= is->is_isninc[0];
2561 fix_incksum(fin, &tcp->th_sum, is->is_sumd[0]);
2562 }
2563 }
2564 if ((is->is_flags & IS_ISNACK) != 0) {
2565 if (rev == 0) {
2566 tcp->th_ack -= is->is_isninc[1];
2567 fix_incksum(fin, &tcp->th_sum, is->is_sumd[1]);
2568 }
2569 }
2570 }
2571
2572
2573 /* ------------------------------------------------------------------------ */
2574 /* Function: fr_statesync */
2575 /* Returns: Nil */
2576 /* Parameters: ifp(I) - pointer to interface */
2577 /* */
2578 /* Walk through all state entries and if an interface pointer match is */
2579 /* found then look it up again, based on its name in case the pointer has */
2580 /* changed since last time. */
2581 /* ------------------------------------------------------------------------ */
2582 void fr_statesync(ifp)
2583 void *ifp;
2584 {
2585 ipstate_t *is;
2586 int i;
2587
2588 if (fr_running <= 0)
2589 return;
2590
2591 WRITE_ENTER(&ipf_state);
2592
2593 if (fr_running <= 0) {
2594 RWLOCK_EXIT(&ipf_state);
2595 return;
2596 }
2597
2598 for (is = ips_list; is; is = is->is_next) {
2599 /*
2600 * Look up all the interface names in the state entry.
2601 */
2602 for (i = 0; i < 4; i++) {
2603 if (is->is_ifp[i] == ifp) {
2604 is->is_ifname[i][LIFNAMSIZ - 1] = '\0';
2605 is->is_ifp[i] = GETIFP(is->is_ifname[i],
2606 is->is_v);
2607 if (is->is_ifp[i] == NULL)
2608 is->is_ifp[i] = (void *)-1;
2609 }
2610 }
2611 }
2612 RWLOCK_EXIT(&ipf_state);
2613 }
2614
2615
2616 /* ------------------------------------------------------------------------ */
2617 /* Function: fr_delstate */
2618 /* Returns: Nil */
2619 /* Parameters: is(I) - pointer to state structure to delete */
2620 /* why(I) - if not 0, log reason why it was deleted */
2621 /* Write Locks: ipf_state */
2622 /* */
2623 /* Deletes a state entry from the enumerated list as well as the hash table */
2624 /* and timeout queue lists. Make adjustments to hash table statistics and */
2625 /* global counters as required. */
2626 /* ------------------------------------------------------------------------ */
2627 static void fr_delstate(is, why)
2628 ipstate_t *is;
2629 int why;
2630 {
2631 ipftqent_t *tqe;
2632 ipftq_t *ifq;
2633
2634 ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0);
2635
2636 /*
2637 * Since we want to delete this, remove it from the state table,
2638 * where it can be found & used, first.
2639 */
2640 *is->is_pnext = is->is_next;
2641
2642 if (is->is_next != NULL)
2643 is->is_next->is_pnext = is->is_pnext;
2644
2645
2646 is->is_pnext = NULL;
2647 is->is_next = NULL;
2648
2649 *is->is_phnext = is->is_hnext;
2650 if (is->is_hnext != NULL)
2651 is->is_hnext->is_phnext = is->is_phnext;
2652 if (ips_table[is->is_hv] == NULL)
2653 ips_stats.iss_inuse--;
2654 ips_stats.iss_bucketlen[is->is_hv]--;
2655
2656 is->is_phnext = NULL;
2657 is->is_hnext = NULL;
2658
2659 /*
2660 * Because ips_stats.iss_wild is a count of entries in the state
2661 * table that have wildcard flags set, only decerement it once
2662 * and do it here.
2663 */
2664 if (is->is_flags & (SI_WILDP|SI_WILDA)) {
2665 if (!(is->is_flags & SI_CLONED)) {
2666 ATOMIC_DECL(ips_stats.iss_wild);
2667 }
2668 }
2669
2670
2671 /*
2672 * Next, remove it from the timeout queue it is in.
2673 */
2674 tqe = &is->is_sti;
2675 ifq = tqe->tqe_ifq;
2676 if (tqe->tqe_pnext != NULL) {
2677 *tqe->tqe_pnext = tqe->tqe_next;
2678 if (tqe->tqe_next != NULL)
2679 tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
2680 else /* we must be the tail anyway */
2681 ifq->ifq_tail = tqe->tqe_pnext;
2682 tqe->tqe_pnext = NULL;
2683 tqe->tqe_ifq = NULL;
2684 }
2685
2686 if ((ifq->ifq_flags & IFQF_USER) != 0)
2687 fr_deletetimeoutqueue(ifq);
2688
2689 /*
2690 * If it is still in use by something else, do not go any further,
2691 * but note that at this point it is now an orphan.
2692 */
2693 is->is_ref--;
2694 if (is->is_ref > 0)
2695 return;
2696
2697 #ifdef IPFILTER_SYNC
2698 if (is->is_sync)
2699 ipfsync_del(is->is_sync);
2700 #endif
2701 #ifdef IPFILTER_SCAN
2702 (void) ipsc_detachis(is);
2703 #endif
2704
2705 if (ipstate_logging != 0 && why != 0)
2706 ipstate_log(is, why);
2707
2708 if (is->is_rule != NULL) {
2709 is->is_rule->fr_statecnt--;
2710 (void)fr_derefrule(&is->is_rule);
2711 }
2712
2713 MUTEX_DESTROY(&is->is_lock);
2714 KFREE(is);
2715 ips_num--;
2716 }
2717
2718
2719 /* ------------------------------------------------------------------------ */
2720 /* Function: fr_timeoutstate */
2721 /* Returns: Nil */
2722 /* Parameters: Nil */
2723 /* */
2724 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */
2725 /* used here is to keep the queue sorted with the oldest things at the top */
2726 /* and the youngest at the bottom. So if the top one doesn't need to be */
2727 /* expired then neither will any under it. */
2728 /* ------------------------------------------------------------------------ */
2729 void fr_timeoutstate()
2730 {
2731 ipftq_t *ifq, *ifqnext;
2732 ipftqent_t *tqe, *tqn;
2733 ipstate_t *is;
2734 #if defined(USE_SPL) && defined(_KERNEL)
2735 int s;
2736 #endif
2737
2738 SPL_NET(s);
2739 WRITE_ENTER(&ipf_state);
2740 for (ifq = ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next)
2741 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
2742 if (tqe->tqe_die > fr_ticks)
2743 break;
2744 tqn = tqe->tqe_next;
2745 is = tqe->tqe_parent;
2746 fr_delstate(is, ISL_EXPIRE);
2747 }
2748
2749 for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) {
2750 ifqnext = ifq->ifq_next;
2751
2752 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
2753 if (tqe->tqe_die > fr_ticks)
2754 break;
2755 tqn = tqe->tqe_next;
2756 is = tqe->tqe_parent;
2757 fr_delstate(is, ISL_EXPIRE);
2758 }
2759 }
2760 if (fr_state_doflush) {
2761 (void) fr_state_flush(2, 0);
2762 fr_state_doflush = 0;
2763 }
2764 RWLOCK_EXIT(&ipf_state);
2765 SPL_X(s);
2766 }
2767
2768
2769 /* ------------------------------------------------------------------------ */
2770 /* Function: fr_state_flush */
2771 /* Returns: int - 0 == success, -1 == failure */
2772 /* Parameters: Nil */
2773 /* Write Locks: ipf_state */
2774 /* */
2775 /* Flush state tables. Three actions currently defined: */
2776 /* which == 0 : flush all state table entries */
2777 /* which == 1 : flush TCP connections which have started to close but are */
2778 /* stuck for some reason. */
2779 /* which == 2 : flush TCP connections which have been idle for a long time, */
2780 /* starting at > 4 days idle and working back in successive half-*/
2781 /* days to at most 12 hours old. If this fails to free enough */
2782 /* slots then work backwards in half hour slots to 30 minutes. */
2783 /* If that too fails, then work backwards in 30 second intervals */
2784 /* for the last 30 minutes to at worst 30 seconds idle. */
2785 /* ------------------------------------------------------------------------ */
2786 static int fr_state_flush(which, proto)
2787 int which, proto;
2788 {
2789 ipftq_t *ifq, *ifqnext;
2790 ipftqent_t *tqe, *tqn;
2791 ipstate_t *is, **isp;
2792 int delete, removed;
2793 long try, maxtick;
2794 u_long interval;
2795 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
2796 int s;
2797 #endif
2798
2799 removed = 0;
2800
2801 SPL_NET(s);
2802 for (isp = &ips_list; ((is = *isp) != NULL); ) {
2803 delete = 0;
2804
2805 if ((proto != 0) && (is->is_v != proto)) {
2806 isp = &is->is_next;
2807 continue;
2808 }
2809
2810 switch (which)
2811 {
2812 case 0 :
2813 delete = 1;
2814 break;
2815 case 1 :
2816 case 2 :
2817 if (is->is_p != IPPROTO_TCP)
2818 break;
2819 if ((is->is_state[0] != IPF_TCPS_ESTABLISHED) ||
2820 (is->is_state[1] != IPF_TCPS_ESTABLISHED))
2821 delete = 1;
2822 break;
2823 }
2824
2825 if (delete) {
2826 if (is->is_p == IPPROTO_TCP)
2827 ips_stats.iss_fin++;
2828 else
2829 ips_stats.iss_expire++;
2830 fr_delstate(is, ISL_FLUSH);
2831 removed++;
2832 } else
2833 isp = &is->is_next;
2834 }
2835
2836 if (which != 2) {
2837 SPL_X(s);
2838 return removed;
2839 }
2840
2841 /*
2842 * Asked to remove inactive entries because the table is full, try
2843 * again, 3 times, if first attempt failed with a different criteria
2844 * each time. The order tried in must be in decreasing age.
2845 * Another alternative is to implement random drop and drop N entries
2846 * at random until N have been freed up.
2847 */
2848 if (fr_ticks - ips_last_force_flush < IPF_TTLVAL(5))
2849 goto force_flush_skipped;
2850 ips_last_force_flush = fr_ticks;
2851
2852 if (fr_ticks > IPF_TTLVAL(43200))
2853 interval = IPF_TTLVAL(43200);
2854 else if (fr_ticks > IPF_TTLVAL(1800))
2855 interval = IPF_TTLVAL(1800);
2856 else if (fr_ticks > IPF_TTLVAL(30))
2857 interval = IPF_TTLVAL(30);
2858 else
2859 interval = IPF_TTLVAL(10);
2860 try = fr_ticks - (fr_ticks - interval);
2861 if (try < 0)
2862 goto force_flush_skipped;
2863
2864 while (removed == 0) {
2865 maxtick = fr_ticks - interval;
2866 if (maxtick < 0)
2867 break;
2868
2869 while (try < maxtick) {
2870 for (ifq = ips_tqtqb; ifq != NULL;
2871 ifq = ifq->ifq_next) {
2872 for (tqn = ifq->ifq_head;
2873 ((tqe = tqn) != NULL); ) {
2874 if (tqe->tqe_die > try)
2875 break;
2876 tqn = tqe->tqe_next;
2877 is = tqe->tqe_parent;
2878 fr_delstate(is, ISL_EXPIRE);
2879 removed++;
2880 }
2881 }
2882
2883 for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) {
2884 ifqnext = ifq->ifq_next;
2885
2886 for (tqn = ifq->ifq_head;
2887 ((tqe = tqn) != NULL); ) {
2888 if (tqe->tqe_die > try)
2889 break;
2890 tqn = tqe->tqe_next;
2891 is = tqe->tqe_parent;
2892 fr_delstate(is, ISL_EXPIRE);
2893 removed++;
2894 }
2895 }
2896 if (try + interval > maxtick)
2897 break;
2898 try += interval;
2899 }
2900
2901 if (removed == 0) {
2902 if (interval == IPF_TTLVAL(43200)) {
2903 interval = IPF_TTLVAL(1800);
2904 } else if (interval == IPF_TTLVAL(1800)) {
2905 interval = IPF_TTLVAL(30);
2906 } else if (interval == IPF_TTLVAL(30)) {
2907 interval = IPF_TTLVAL(10);
2908 } else {
2909 break;
2910 }
2911 }
2912 }
2913 force_flush_skipped:
2914 SPL_X(s);
2915 return removed;
2916 }
2917
2918
2919
2920 /* ------------------------------------------------------------------------ */
2921 /* Function: fr_tcp_age */
2922 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */
2923 /* Parameters: tq(I) - pointer to timeout queue information */
2924 /* fin(I) - pointer to packet information */
2925 /* tqtab(I) - TCP timeout queue table this is in */
2926 /* flags(I) - flags from state/NAT entry */
2927 /* */
2928 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */
2929 /* */
2930 /* - (try to) base state transitions on real evidence only, */
2931 /* i.e. packets that are sent and have been received by ipfilter; */
2932 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */
2933 /* */
2934 /* - deal with half-closed connections correctly; */
2935 /* */
2936 /* - store the state of the source in state[0] such that ipfstat */
2937 /* displays the state as source/dest instead of dest/source; the calls */
2938 /* to fr_tcp_age have been changed accordingly. */
2939 /* */
2940 /* Internal Parameters: */
2941 /* */
2942 /* state[0] = state of source (host that initiated connection) */
2943 /* state[1] = state of dest (host that accepted the connection) */
2944 /* */
2945 /* dir == 0 : a packet from source to dest */
2946 /* dir == 1 : a packet from dest to source */
2947 /* ------------------------------------------------------------------------ */
2948 int fr_tcp_age(tqe, fin, tqtab, flags)
2949 ipftqent_t *tqe;
2950 fr_info_t *fin;
2951 ipftq_t *tqtab;
2952 int flags;
2953 {
2954 int dlen, ostate, nstate, rval, dir;
2955 u_char tcpflags;
2956 tcphdr_t *tcp;
2957
2958 tcp = fin->fin_dp;
2959
2960 rval = 0;
2961 dir = fin->fin_rev;
2962 tcpflags = tcp->th_flags;
2963 dlen = fin->fin_plen - fin->fin_hlen - (TCP_OFF(tcp) << 2);
2964
2965 if (tcpflags & TH_RST) {
2966 if (!(tcpflags & TH_PUSH) && !dlen)
2967 nstate = IPF_TCPS_CLOSED;
2968 else
2969 nstate = IPF_TCPS_CLOSE_WAIT;
2970 rval = 1;
2971 } else {
2972 ostate = tqe->tqe_state[1 - dir];
2973 nstate = tqe->tqe_state[dir];
2974
2975 switch (nstate)
2976 {
2977 case IPF_TCPS_CLOSED: /* 0 */
2978 if ((tcpflags & TH_OPENING) == TH_OPENING) {
2979 /*
2980 * 'dir' received an S and sends SA in
2981 * response, CLOSED -> SYN_RECEIVED
2982 */
2983 nstate = IPF_TCPS_SYN_RECEIVED;
2984 rval = 1;
2985 } else if ((tcpflags & TH_OPENING) == TH_SYN) {
2986 /* 'dir' sent S, CLOSED -> SYN_SENT */
2987 nstate = IPF_TCPS_SYN_SENT;
2988 rval = 1;
2989 }
2990 /*
2991 * the next piece of code makes it possible to get
2992 * already established connections into the state table
2993 * after a restart or reload of the filter rules; this
2994 * does not work when a strict 'flags S keep state' is
2995 * used for tcp connections of course
2996 */
2997 if (((flags & IS_TCPFSM) == 0) &&
2998 ((tcpflags & TH_ACKMASK) == TH_ACK)) {
2999 /*
3000 * we saw an A, guess 'dir' is in ESTABLISHED
3001 * mode
3002 */
3003 if (ostate == IPF_TCPS_CLOSED) {
3004 nstate = IPF_TCPS_HALF_ESTAB;
3005 rval = 1;
3006 } else if (ostate == IPF_TCPS_ESTABLISHED ||
3007 ostate == IPF_TCPS_HALF_ESTAB) {
3008 nstate = IPF_TCPS_ESTABLISHED;
3009 rval = 1;
3010 }
3011 }
3012 /*
3013 * TODO: besides regular ACK packets we can have other
3014 * packets as well; it is yet to be determined how we
3015 * should initialize the states in those cases
3016 */
3017 break;
3018
3019 case IPF_TCPS_LISTEN: /* 1 */
3020 /* NOT USED */
3021 break;
3022
3023 case IPF_TCPS_SYN_SENT: /* 2 */
3024 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) {
3025 /*
3026 * A retransmitted SYN packet. We do not reset
3027 * the timeout here to fr_tcptimeout because a
3028 * connection connect timeout does not renew
3029 * after every packet that is sent. We need to
3030 * set rval so as to indicate the packet has
3031 * passed the check for its flags being valid
3032 * in the TCP FSM. Setting rval to 2 has the
3033 * result of not resetting the timeout.
3034 */
3035 rval = 2;
3036 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) ==
3037 TH_ACK) {
3038 /*
3039 * we see an A from 'dir' which is in SYN_SENT
3040 * state: 'dir' sent an A in response to an SA
3041 * which it received, SYN_SENT -> ESTABLISHED
3042 */
3043 nstate = IPF_TCPS_ESTABLISHED;
3044 rval = 1;
3045 } else if (tcpflags & TH_FIN) {
3046 /*
3047 * we see an F from 'dir' which is in SYN_SENT
3048 * state and wants to close its side of the
3049 * connection; SYN_SENT -> FIN_WAIT_1
3050 */
3051 nstate = IPF_TCPS_FIN_WAIT_1;
3052 rval = 1;
3053 } else if ((tcpflags & TH_OPENING) == TH_OPENING) {
3054 /*
3055 * we see an SA from 'dir' which is already in
3056 * SYN_SENT state, this means we have a
3057 * simultaneous open; SYN_SENT -> SYN_RECEIVED
3058 */
3059 nstate = IPF_TCPS_SYN_RECEIVED;
3060 rval = 1;
3061 }
3062 break;
3063
3064 case IPF_TCPS_SYN_RECEIVED: /* 3 */
3065 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
3066 /*
3067 * we see an A from 'dir' which was in
3068 * SYN_RECEIVED state so it must now be in
3069 * established state, SYN_RECEIVED ->
3070 * ESTABLISHED
3071 */
3072 nstate = IPF_TCPS_ESTABLISHED;
3073 rval = 1;
3074 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) ==
3075 TH_OPENING) {
3076 /*
3077 * We see an SA from 'dir' which is already in
3078 * SYN_RECEIVED state.
3079 */
3080 rval = 2;
3081 } else if (tcpflags & TH_FIN) {
3082 /*
3083 * we see an F from 'dir' which is in
3084 * SYN_RECEIVED state and wants to close its
3085 * side of the connection; SYN_RECEIVED ->
3086 * FIN_WAIT_1
3087 */
3088 nstate = IPF_TCPS_FIN_WAIT_1;
3089 rval = 1;
3090 }
3091 break;
3092
3093 case IPF_TCPS_HALF_ESTAB: /* 4 */
3094 if (ostate >= IPF_TCPS_HALF_ESTAB) {
3095 if ((tcpflags & TH_ACKMASK) == TH_ACK) {
3096 nstate = IPF_TCPS_ESTABLISHED;
3097 rval = 1;
3098 }
3099 }
3100
3101 break;
3102
3103 case IPF_TCPS_ESTABLISHED: /* 5 */
3104 rval = 1;
3105 if (tcpflags & TH_FIN) {
3106 /*
3107 * 'dir' closed its side of the connection;
3108 * this gives us a half-closed connection;
3109 * ESTABLISHED -> FIN_WAIT_1
3110 */
3111 nstate = IPF_TCPS_FIN_WAIT_1;
3112 } else if (tcpflags & TH_ACK) {
3113 /*
3114 * an ACK, should we exclude other flags here?
3115 */
3116 if (ostate == IPF_TCPS_FIN_WAIT_1) {
3117 /*
3118 * We know the other side did an active
3119 * close, so we are ACKing the recvd
3120 * FIN packet (does the window matching
3121 * code guarantee this?) and go into
3122 * CLOSE_WAIT state; this gives us a
3123 * half-closed connection
3124 */
3125 nstate = IPF_TCPS_CLOSE_WAIT;
3126 } else if (ostate < IPF_TCPS_CLOSE_WAIT) {
3127 /*
3128 * still a fully established
3129 * connection reset timeout
3130 */
3131 nstate = IPF_TCPS_ESTABLISHED;
3132 }
3133 }
3134 break;
3135
3136 case IPF_TCPS_CLOSE_WAIT: /* 6 */
3137 rval = 1;
3138 if (tcpflags & TH_FIN) {
3139 /*
3140 * application closed and 'dir' sent a FIN,
3141 * we're now going into LAST_ACK state
3142 */
3143 nstate = IPF_TCPS_LAST_ACK;
3144 } else {
3145 /*
3146 * we remain in CLOSE_WAIT because the other
3147 * side has closed already and we did not
3148 * close our side yet; reset timeout
3149 */
3150 nstate = IPF_TCPS_CLOSE_WAIT;
3151 }
3152 break;
3153
3154 case IPF_TCPS_FIN_WAIT_1: /* 7 */
3155 rval = 1;
3156 if ((tcpflags & TH_ACK) &&
3157 ostate > IPF_TCPS_CLOSE_WAIT) {
3158 /*
3159 * if the other side is not active anymore
3160 * it has sent us a FIN packet that we are
3161 * ack'ing now with an ACK; this means both
3162 * sides have now closed the connection and
3163 * we go into TIME_WAIT
3164 */
3165 /*
3166 * XXX: how do we know we really are ACKing
3167 * the FIN packet here? does the window code
3168 * guarantee that?
3169 */
3170 nstate = IPF_TCPS_TIME_WAIT;
3171 } else {
3172 /*
3173 * we closed our side of the connection
3174 * already but the other side is still active
3175 * (ESTABLISHED/CLOSE_WAIT); continue with
3176 * this half-closed connection
3177 */
3178 nstate = IPF_TCPS_FIN_WAIT_1;
3179 }
3180 break;
3181
3182 case IPF_TCPS_CLOSING: /* 8 */
3183 /* NOT USED */
3184 break;
3185
3186 case IPF_TCPS_LAST_ACK: /* 9 */
3187 if (tcpflags & TH_ACK) {
3188 if ((tcpflags & TH_PUSH) || dlen)
3189 /*
3190 * there is still data to be delivered,
3191 * reset timeout
3192 */
3193 rval = 1;
3194 else
3195 rval = 2;
3196 }
3197 /*
3198 * we cannot detect when we go out of LAST_ACK state to
3199 * CLOSED because that is based on the reception of ACK
3200 * packets; ipfilter can only detect that a packet
3201 * has been sent by a host
3202 */
3203 break;
3204
3205 case IPF_TCPS_FIN_WAIT_2: /* 10 */
3206 rval = 1;
3207 if ((tcpflags & TH_OPENING) == TH_OPENING)
3208 nstate = IPF_TCPS_SYN_RECEIVED;
3209 else if (tcpflags & TH_SYN)
3210 nstate = IPF_TCPS_SYN_SENT;
3211 break;
3212
3213 case IPF_TCPS_TIME_WAIT: /* 11 */
3214 /* we're in 2MSL timeout now */
3215 rval = 1;
3216 break;
3217
3218 default :
3219 #if defined(_KERNEL)
3220 # if SOLARIS
3221 cmn_err(CE_NOTE,
3222 "tcp %lx flags %x si %lx nstate %d ostate %d\n",
3223 (u_long)tcp, tcpflags, (u_long)tqe,
3224 nstate, ostate);
3225 # else
3226 printf("tcp %lx flags %x si %lx nstate %d ostate %d\n",
3227 (u_long)tcp, tcpflags, (u_long)tqe,
3228 nstate, ostate);
3229 # endif
3230 # ifdef DIAGNOSTIC
3231 panic("invalid TCP state");
3232 # endif
3233 #else
3234 abort();
3235 #endif
3236 break;
3237 }
3238 }
3239
3240 /*
3241 * If rval == 2 then do not update the queue position, but treat the
3242 * packet as being ok.
3243 */
3244 if (rval == 2)
3245 rval = 1;
3246 else if (rval == 1) {
3247 tqe->tqe_state[dir] = nstate;
3248 if ((tqe->tqe_flags & TQE_RULEBASED) == 0)
3249 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate);
3250 }
3251
3252 return rval;
3253 }
3254
3255
3256 /* ------------------------------------------------------------------------ */
3257 /* Function: ipstate_log */
3258 /* Returns: Nil */
3259 /* Parameters: is(I) - pointer to state structure */
3260 /* type(I) - type of log entry to create */
3261 /* */
3262 /* Creates a state table log entry using the state structure and type info. */
3263 /* passed in. Log packet/byte counts, source/destination address and other */
3264 /* protocol specific information. */
3265 /* ------------------------------------------------------------------------ */
3266 void ipstate_log(is, type)
3267 struct ipstate *is;
3268 u_int type;
3269 {
3270 #ifdef IPFILTER_LOG
3271 struct ipslog ipsl;
3272 size_t sizes[1];
3273 void *items[1];
3274 int types[1];
3275
3276 /*
3277 * Copy information out of the ipstate_t structure and into the
3278 * structure used for logging.
3279 */
3280 ipsl.isl_type = type;
3281 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0];
3282 ipsl.isl_bytes[0] = is->is_bytes[0];
3283 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1];
3284 ipsl.isl_bytes[1] = is->is_bytes[1];
3285 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2];
3286 ipsl.isl_bytes[2] = is->is_bytes[2];
3287 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3];
3288 ipsl.isl_bytes[3] = is->is_bytes[3];
3289 ipsl.isl_src = is->is_src;
3290 ipsl.isl_dst = is->is_dst;
3291 ipsl.isl_p = is->is_p;
3292 ipsl.isl_v = is->is_v;
3293 ipsl.isl_flags = is->is_flags;
3294 ipsl.isl_tag = is->is_tag;
3295 ipsl.isl_rulen = is->is_rulen;
3296 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN);
3297
3298 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
3299 ipsl.isl_sport = is->is_sport;
3300 ipsl.isl_dport = is->is_dport;
3301 if (ipsl.isl_p == IPPROTO_TCP) {
3302 ipsl.isl_state[0] = is->is_state[0];
3303 ipsl.isl_state[1] = is->is_state[1];
3304 }
3305 } else if (ipsl.isl_p == IPPROTO_ICMP) {
3306 ipsl.isl_itype = is->is_icmp.ici_type;
3307 } else if (ipsl.isl_p == IPPROTO_ICMPV6) {
3308 ipsl.isl_itype = is->is_icmp.ici_type;
3309 } else {
3310 ipsl.isl_ps.isl_filler[0] = 0;
3311 ipsl.isl_ps.isl_filler[1] = 0;
3312 }
3313
3314 items[0] = &ipsl;
3315 sizes[0] = sizeof(ipsl);
3316 types[0] = 0;
3317
3318 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1)) {
3319 ATOMIC_INCL(ips_stats.iss_logged);
3320 } else {
3321 ATOMIC_INCL(ips_stats.iss_logfail);
3322 }
3323 #endif
3324 }
3325
3326
3327 #ifdef USE_INET6
3328 /* ------------------------------------------------------------------------ */
3329 /* Function: fr_checkicmp6matchingstate */
3330 /* Returns: ipstate_t* - NULL == no match found, */
3331 /* else pointer to matching state entry */
3332 /* Parameters: fin(I) - pointer to packet information */
3333 /* Locks: NULL == no locks, else Read Lock on ipf_state */
3334 /* */
3335 /* If we've got an ICMPv6 error message, using the information stored in */
3336 /* the ICMPv6 packet, look for a matching state table entry. */
3337 /* ------------------------------------------------------------------------ */
3338 static ipstate_t *fr_checkicmp6matchingstate(fin)
3339 fr_info_t *fin;
3340 {
3341 struct icmp6_hdr *ic6, *oic;
3342 int type, backward, i;
3343 ipstate_t *is, **isp;
3344 u_short sport, dport;
3345 i6addr_t dst, src;
3346 u_short savelen;
3347 icmpinfo_t *ic;
3348 fr_info_t ofin;
3349 tcphdr_t *tcp;
3350 ip6_t *oip6;
3351 u_char pr;
3352 u_int hv;
3353
3354 /*
3355 * Does it at least have the return (basic) IP header ?
3356 * Only a basic IP header (no options) should be with
3357 * an ICMP error header.
3358 */
3359 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN))
3360 return NULL;
3361
3362 ic6 = fin->fin_dp;
3363 type = ic6->icmp6_type;
3364 /*
3365 * If it's not an error type, then return
3366 */
3367 if ((type != ICMP6_DST_UNREACH) && (type != ICMP6_PACKET_TOO_BIG) &&
3368 (type != ICMP6_TIME_EXCEEDED) && (type != ICMP6_PARAM_PROB))
3369 return NULL;
3370
3371 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN);
3372 if (fin->fin_plen < sizeof(*oip6))
3373 return NULL;
3374
3375 bcopy((char *)fin, (char *)&ofin, sizeof(fin));
3376 ofin.fin_v = 6;
3377 ofin.fin_ifp = fin->fin_ifp;
3378 ofin.fin_out = !fin->fin_out;
3379 ofin.fin_m = NULL; /* if dereferenced, panic XXX */
3380 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
3381
3382 /*
3383 * We make a fin entry to be able to feed it to
3384 * matchsrcdst. Note that not all fields are necessary
3385 * but this is the cleanest way. Note further we fill
3386 * in fin_mp such that if someone uses it we'll get
3387 * a kernel panic. fr_matchsrcdst does not use this.
3388 *
3389 * watch out here, as ip is in host order and oip6 in network
3390 * order. Any change we make must be undone afterwards.
3391 */
3392 savelen = oip6->ip6_plen;
3393 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN;
3394 ofin.fin_flx = FI_NOCKSUM;
3395 ofin.fin_ip = (ip_t *)oip6;
3396 ofin.fin_plen = oip6->ip6_plen;
3397 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin);
3398 ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
3399 oip6->ip6_plen = savelen;
3400
3401 if (oip6->ip6_nxt == IPPROTO_ICMPV6) {
3402 oic = (struct icmp6_hdr *)(oip6 + 1);
3403 /*
3404 * an ICMP error can only be generated as a result of an
3405 * ICMP query, not as the response on an ICMP error
3406 *
3407 * XXX theoretically ICMP_ECHOREP and the other reply's are
3408 * ICMP query's as well, but adding them here seems strange XXX
3409 */
3410 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK))
3411 return NULL;
3412
3413 /*
3414 * perform a lookup of the ICMP packet in the state table
3415 */
3416 hv = (pr = oip6->ip6_nxt);
3417 src.in6 = oip6->ip6_src;
3418 hv += src.in4.s_addr;
3419 dst.in6 = oip6->ip6_dst;
3420 hv += dst.in4.s_addr;
3421 hv += oic->icmp6_id;
3422 hv += oic->icmp6_seq;
3423 hv = DOUBLE_HASH(hv);
3424
3425 READ_ENTER(&ipf_state);
3426 for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
3427 ic = &is->is_icmp;
3428 isp = &is->is_hnext;
3429 if ((is->is_p == pr) &&
3430 !(is->is_pass & FR_NOICMPERR) &&
3431 (oic->icmp6_id == ic->ici_id) &&
3432 (oic->icmp6_seq == ic->ici_seq) &&
3433 (is = fr_matchsrcdst(&ofin, is, &src,
3434 &dst, NULL, FI_ICMPCMP))) {
3435 /*
3436 * in the state table ICMP query's are stored
3437 * with the type of the corresponding ICMP
3438 * response. Correct here
3439 */
3440 if (((ic->ici_type == ICMP6_ECHO_REPLY) &&
3441 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
3442 (ic->ici_type - 1 == oic->icmp6_type )) {
3443 ips_stats.iss_hits++;
3444 backward = IP6_NEQ(&is->is_dst, &src);
3445 i = (backward << 1) + fin->fin_out;
3446 is->is_pkts[i]++;
3447 is->is_bytes[i] += fin->fin_plen;
3448 return is;
3449 }
3450 }
3451 }
3452 RWLOCK_EXIT(&ipf_state);
3453 return NULL;
3454 }
3455
3456 hv = (pr = oip6->ip6_nxt);
3457 src.in6 = oip6->ip6_src;
3458 hv += src.i6[0];
3459 hv += src.i6[1];
3460 hv += src.i6[2];
3461 hv += src.i6[3];
3462 dst.in6 = oip6->ip6_dst;
3463 hv += dst.i6[0];
3464 hv += dst.i6[1];
3465 hv += dst.i6[2];
3466 hv += dst.i6[3];
3467
3468 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) {
3469 tcp = (tcphdr_t *)(oip6 + 1);
3470 dport = tcp->th_dport;
3471 sport = tcp->th_sport;
3472 hv += dport;
3473 hv += sport;
3474 } else
3475 tcp = NULL;
3476 hv = DOUBLE_HASH(hv);
3477
3478 READ_ENTER(&ipf_state);
3479 for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
3480 isp = &is->is_hnext;
3481 /*
3482 * Only allow this icmp though if the
3483 * encapsulated packet was allowed through the
3484 * other way around. Note that the minimal amount
3485 * of info present does not allow for checking against
3486 * tcp internals such as seq and ack numbers.
3487 */
3488 if ((is->is_p != pr) || (is->is_v != 6) ||
3489 (is->is_pass & FR_NOICMPERR))
3490 continue;
3491 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP);
3492 if (is != NULL) {
3493 ips_stats.iss_hits++;
3494 backward = IP6_NEQ(&is->is_dst, &src);
3495 i = (backward << 1) + fin->fin_out;
3496 is->is_pkts[i]++;
3497 is->is_bytes[i] += fin->fin_plen;
3498 /*
3499 * we deliberately do not touch the timeouts
3500 * for the accompanying state table entry.
3501 * It remains to be seen if that is correct. XXX
3502 */
3503 return is;
3504 }
3505 }
3506 RWLOCK_EXIT(&ipf_state);
3507 return NULL;
3508 }
3509 #endif
3510
3511
3512 /* ------------------------------------------------------------------------ */
3513 /* Function: fr_sttab_init */
3514 /* Returns: Nil */
3515 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
3516 /* */
3517 /* Initialise the array of timeout queues for TCP. */
3518 /* ------------------------------------------------------------------------ */
3519 void fr_sttab_init(tqp)
3520 ipftq_t *tqp;
3521 {
3522 int i;
3523
3524 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) {
3525 tqp[i].ifq_ttl = 0;
3526 tqp[i].ifq_head = NULL;
3527 tqp[i].ifq_tail = &tqp[i].ifq_head;
3528 tqp[i].ifq_next = tqp + i + 1;
3529 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab");
3530 }
3531 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL;
3532 tqp[IPF_TCPS_CLOSED].ifq_ttl = fr_tcpclosed;
3533 tqp[IPF_TCPS_LISTEN].ifq_ttl = fr_tcptimeout;
3534 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = fr_tcptimeout;
3535 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = fr_tcptimeout;
3536 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = fr_tcpidletimeout;
3537 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = fr_tcphalfclosed;
3538 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = fr_tcphalfclosed;
3539 tqp[IPF_TCPS_CLOSING].ifq_ttl = fr_tcptimeout;
3540 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = fr_tcplastack;
3541 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = fr_tcpclosewait;
3542 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = fr_tcptimeout;
3543 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = fr_tcptimeout;
3544 }
3545
3546
3547 /* ------------------------------------------------------------------------ */
3548 /* Function: fr_sttab_destroy */
3549 /* Returns: Nil */
3550 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
3551 /* */
3552 /* Do whatever is necessary to "destroy" each of the entries in the array */
3553 /* of timeout queues for TCP. */
3554 /* ------------------------------------------------------------------------ */
3555 void fr_sttab_destroy(tqp)
3556 ipftq_t *tqp;
3557 {
3558 int i;
3559
3560 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--)
3561 MUTEX_DESTROY(&tqp[i].ifq_lock);
3562 }
3563
3564
3565 /* ------------------------------------------------------------------------ */
3566 /* Function: fr_statederef */
3567 /* Returns: Nil */
3568 /* Parameters: isp(I) - pointer to pointer to state table entry */
3569 /* */
3570 /* Decrement the reference counter for this state table entry and free it */
3571 /* if there are no more things using it. */
3572 /* */
3573 /* When operating in userland (ipftest), we have no timers to clear a state */
3574 /* entry. Therefore, we make a few simple tests before deleting an entry */
3575 /* outright. We compare states on each side looking for a combination of */
3576 /* TIME_WAIT (should really be FIN_WAIT_2?) and LAST_ACK. Then we factor */
3577 /* in packet direction with the interface list to make sure we don't */
3578 /* prematurely delete an entry on a final inbound packet that's we're also */
3579 /* supposed to route elsewhere. */
3580 /* */
3581 /* Internal parameters: */
3582 /* state[0] = state of source (host that initiated connection) */
3583 /* state[1] = state of dest (host that accepted the connection) */
3584 /* */
3585 /* dir == 0 : a packet from source to dest */
3586 /* dir == 1 : a packet from dest to source */
3587 /* ------------------------------------------------------------------------ */
3588 void fr_statederef(fin, isp)
3589 fr_info_t *fin;
3590 ipstate_t **isp;
3591 {
3592 ipstate_t *is = *isp;
3593 #if 0
3594 int nstate, ostate, dir, eol;
3595
3596 eol = 0; /* End-of-the-line flag. */
3597 dir = fin->fin_rev;
3598 ostate = is->is_state[1 - dir];
3599 nstate = is->is_state[dir];
3600 /*
3601 * Determine whether this packet is local or routed. State entries
3602 * with us as the destination will have an interface list of
3603 * int1,-,-,int1. Entries with us as the origin run as -,int1,int1,-.
3604 */
3605 if ((fin->fin_p == IPPROTO_TCP) && (fin->fin_out == 0)) {
3606 if ((strcmp(is->is_ifname[0], is->is_ifname[3]) == 0) &&
3607 (strcmp(is->is_ifname[1], is->is_ifname[2]) == 0)) {
3608 if ((dir == 0) &&
3609 (strcmp(is->is_ifname[1], "-") == 0) &&
3610 (strcmp(is->is_ifname[0], "-") != 0)) {
3611 eol = 1;
3612 } else if ((dir == 1) &&
3613 (strcmp(is->is_ifname[0], "-") == 0) &&
3614 (strcmp(is->is_ifname[1], "-") != 0)) {
3615 eol = 1;
3616 }
3617 }
3618 }
3619 #endif
3620
3621 fin = fin; /* LINT */
3622 is = *isp;
3623 *isp = NULL;
3624 MUTEX_ENTER(&is->is_lock);
3625 is->is_ref--;
3626 if (is->is_ref == 0) {
3627 is->is_ref++; /* To counter ref-- in fr_delstate() */
3628 MUTEX_EXIT(&is->is_lock);
3629 WRITE_ENTER(&ipf_state);
3630 fr_delstate(is, ISL_EXPIRE);
3631 RWLOCK_EXIT(&ipf_state);
3632 #ifndef _KERNEL
3633 #if 0
3634 } else if (((fin->fin_out == 1) || (eol == 1)) &&
3635 ((ostate == IPF_TCPS_LAST_ACK) &&
3636 (nstate == IPF_TCPS_TIME_WAIT))) {
3637 #else
3638 } else if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) ||
3639 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) {
3640 #endif
3641 MUTEX_EXIT(&is->is_lock);
3642 WRITE_ENTER(&ipf_state);
3643 fr_delstate(is, ISL_ORPHAN);
3644 RWLOCK_EXIT(&ipf_state);
3645 #endif
3646 } else {
3647 MUTEX_EXIT(&is->is_lock);
3648 }
3649 }
3650
3651
3652 /* ------------------------------------------------------------------------ */
3653 /* Function: fr_setstatequeue */
3654 /* Returns: Nil */
3655 /* Parameters: is(I) - pointer to state structure */
3656 /* rev(I) - forward(0) or reverse(1) direction */
3657 /* Locks: ipf_state (read or write) */
3658 /* */
3659 /* Put the state entry on its default queue entry, using rev as a helped in */
3660 /* determining which queue it should be placed on. */
3661 /* ------------------------------------------------------------------------ */
3662 void fr_setstatequeue(is, rev)
3663 ipstate_t *is;
3664 int rev;
3665 {
3666 ipftq_t *oifq, *nifq;
3667
3668
3669 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0)
3670 nifq = is->is_tqehead[rev];
3671 else
3672 nifq = NULL;
3673
3674 if (nifq == NULL) {
3675 switch (is->is_p)
3676 {
3677 #ifdef USE_INET6
3678 case IPPROTO_ICMPV6 :
3679 if (rev == 1)
3680 nifq = &ips_icmpacktq;
3681 else
3682 nifq = &ips_icmptq;
3683 break;
3684 #endif
3685 case IPPROTO_ICMP :
3686 if (rev == 1)
3687 nifq = &ips_icmpacktq;
3688 else
3689 nifq = &ips_icmptq;
3690 break;
3691 case IPPROTO_TCP :
3692 nifq = ips_tqtqb + is->is_state[rev];
3693 break;
3694 default :
3695 if (rev == 1)
3696 nifq = &ips_udpacktq;
3697 else
3698 nifq = &ips_udptq;
3699 break;
3700 }
3701 }
3702
3703 oifq = is->is_sti.tqe_ifq;
3704 /*
3705 * If it's currently on a timeout queue, move it from one queue to
3706 * another, else put it on the end of the newly determined queue.
3707 */
3708 if (oifq != NULL)
3709 fr_movequeue(&is->is_sti, oifq, nifq);
3710 else
3711 fr_queueappend(&is->is_sti, nifq, is);
3712 return;
3713 }
Cache object: fe3d161babf1408f34ba4a1a5c4d03c1
|