FreeBSD/Linux Kernel Cross Reference
sys/net/pf.c
1 /* $OpenBSD: pf.c,v 1.1171 2023/01/22 23:05:51 yasuoka Exp $ */
2
3 /*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * - Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38 #include "bpfilter.h"
39 #include "carp.h"
40 #include "pflog.h"
41 #include "pfsync.h"
42 #include "pflow.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/mbuf.h>
47 #include <sys/filio.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/kernel.h>
51 #include <sys/time.h>
52 #include <sys/pool.h>
53 #include <sys/proc.h>
54 #include <sys/rwlock.h>
55 #include <sys/syslog.h>
56
57 #include <crypto/sha2.h>
58
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_types.h>
62 #include <net/route.h>
63 #include <net/toeplitz.h>
64
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/ip.h>
68 #include <netinet/in_pcb.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/ip_icmp.h>
71 #include <netinet/icmp_var.h>
72 #include <netinet/tcp.h>
73 #include <netinet/tcp_seq.h>
74 #include <netinet/tcp_timer.h>
75 #include <netinet/tcp_var.h>
76 #include <netinet/tcp_fsm.h>
77 #include <netinet/udp.h>
78 #include <netinet/udp_var.h>
79 #include <netinet/ip_divert.h>
80
81 #ifdef INET6
82 #include <netinet6/in6_var.h>
83 #include <netinet/ip6.h>
84 #include <netinet6/ip6_var.h>
85 #include <netinet/icmp6.h>
86 #include <netinet6/nd6.h>
87 #include <netinet6/ip6_divert.h>
88 #endif /* INET6 */
89
90 #include <net/pfvar.h>
91 #include <net/pfvar_priv.h>
92
93 #if NPFLOG > 0
94 #include <net/if_pflog.h>
95 #endif /* NPFLOG > 0 */
96
97 #if NPFLOW > 0
98 #include <net/if_pflow.h>
99 #endif /* NPFLOW > 0 */
100
101 #if NPFSYNC > 0
102 #include <net/if_pfsync.h>
103 #else
104 struct pfsync_deferral;
105 #endif /* NPFSYNC > 0 */
106
107 /*
108 * Global variables
109 */
110 struct pf_state_tree pf_statetbl;
111 struct pf_queuehead pf_queues[2];
112 struct pf_queuehead *pf_queues_active;
113 struct pf_queuehead *pf_queues_inactive;
114
115 struct pf_status pf_status;
116
117 int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */
118
119 SHA2_CTX pf_tcp_secret_ctx;
120 u_char pf_tcp_secret[16];
121 int pf_tcp_secret_init;
122 int pf_tcp_iss_off;
123
124 int pf_npurge;
125 struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge);
126 struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL);
127
128 enum pf_test_status {
129 PF_TEST_FAIL = -1,
130 PF_TEST_OK,
131 PF_TEST_QUICK
132 };
133
134 struct pf_test_ctx {
135 struct pf_pdesc *pd;
136 struct pf_rule_actions act;
137 u_int8_t icmpcode;
138 u_int8_t icmptype;
139 int icmp_dir;
140 int state_icmp;
141 int tag;
142 u_short reason;
143 struct pf_rule_item *ri;
144 struct pf_src_node *sns[PF_SN_MAX];
145 struct pf_rule_slist rules;
146 struct pf_rule *nr;
147 struct pf_rule **rm;
148 struct pf_rule *a;
149 struct pf_rule **am;
150 struct pf_ruleset **rsm;
151 struct pf_ruleset *arsm;
152 struct pf_ruleset *aruleset;
153 struct tcphdr *th;
154 };
155
156 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
157 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
158 struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl;
159
160 void pf_add_threshold(struct pf_threshold *);
161 int pf_check_threshold(struct pf_threshold *);
162 int pf_check_tcp_cksum(struct mbuf *, int, int,
163 sa_family_t);
164 __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t,
165 u_int8_t);
166 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *,
167 const struct pf_addr *, sa_family_t, u_int8_t);
168 int pf_modulate_sack(struct pf_pdesc *,
169 struct pf_state_peer *);
170 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *,
171 u_int16_t *, u_int16_t *);
172 int pf_change_icmp_af(struct mbuf *, int,
173 struct pf_pdesc *, struct pf_pdesc *,
174 struct pf_addr *, struct pf_addr *, sa_family_t,
175 sa_family_t);
176 int pf_translate_a(struct pf_pdesc *, struct pf_addr *,
177 struct pf_addr *);
178 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *,
179 u_int16_t *, struct pf_addr *, struct pf_addr *,
180 u_int16_t);
181 int pf_translate_icmp_af(struct pf_pdesc*, int, void *);
182 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int,
183 sa_family_t, struct pf_rule *, u_int);
184 void pf_detach_state(struct pf_state *);
185 struct pf_state_key *pf_state_key_attach(struct pf_state_key *,
186 struct pf_state *, int);
187 void pf_state_key_detach(struct pf_state *, int);
188 u_int32_t pf_tcp_iss(struct pf_pdesc *);
189 void pf_rule_to_actions(struct pf_rule *,
190 struct pf_rule_actions *);
191 int pf_test_rule(struct pf_pdesc *, struct pf_rule **,
192 struct pf_state **, struct pf_rule **,
193 struct pf_ruleset **, u_short *,
194 struct pfsync_deferral **);
195 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *,
196 struct pf_rule *, struct pf_rule *,
197 struct pf_state_key **, struct pf_state_key **,
198 int *, struct pf_state **, int,
199 struct pf_rule_slist *, struct pf_rule_actions *,
200 struct pf_src_node **);
201 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *,
202 int, struct pf_addr *, int, struct pf_addr *,
203 int, int);
204 int pf_state_key_setup(struct pf_pdesc *, struct
205 pf_state_key **, struct pf_state_key **, int);
206 int pf_tcp_track_full(struct pf_pdesc *,
207 struct pf_state **, u_short *, int *, int);
208 int pf_tcp_track_sloppy(struct pf_pdesc *,
209 struct pf_state **, u_short *);
210 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **,
211 u_short *);
212 int pf_test_state(struct pf_pdesc *, struct pf_state **,
213 u_short *);
214 int pf_icmp_state_lookup(struct pf_pdesc *,
215 struct pf_state_key_cmp *, struct pf_state **,
216 u_int16_t, u_int16_t, int, int *, int, int);
217 int pf_test_state_icmp(struct pf_pdesc *,
218 struct pf_state **, u_short *);
219 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int,
220 u_int16_t);
221 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *,
222 sa_family_t, struct pf_src_node **);
223 struct pf_divert *pf_get_divert(struct mbuf *);
224 int pf_walk_option(struct pf_pdesc *, struct ip *,
225 int, int, u_short *);
226 int pf_walk_header(struct pf_pdesc *, struct ip *,
227 u_short *);
228 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *,
229 int, int, u_short *);
230 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *,
231 u_short *);
232 void pf_print_state_parts(struct pf_state *,
233 struct pf_state_key *, struct pf_state_key *);
234 int pf_addr_wrap_neq(struct pf_addr_wrap *,
235 struct pf_addr_wrap *);
236 int pf_compare_state_keys(struct pf_state_key *,
237 struct pf_state_key *, struct pfi_kif *, u_int);
238 u_int16_t pf_pkt_hash(sa_family_t, uint8_t,
239 const struct pf_addr *, const struct pf_addr *,
240 uint16_t, uint16_t);
241 int pf_find_state(struct pf_pdesc *,
242 struct pf_state_key_cmp *, struct pf_state **);
243 int pf_src_connlimit(struct pf_state **);
244 int pf_match_rcvif(struct mbuf *, struct pf_rule *);
245 int pf_step_into_anchor(struct pf_test_ctx *,
246 struct pf_rule *);
247 int pf_match_rule(struct pf_test_ctx *,
248 struct pf_ruleset *);
249 void pf_counters_inc(int, struct pf_pdesc *,
250 struct pf_state *, struct pf_rule *,
251 struct pf_rule *);
252
253 int pf_state_key_isvalid(struct pf_state_key *);
254 struct pf_state_key *pf_state_key_ref(struct pf_state_key *);
255 void pf_state_key_unref(struct pf_state_key *);
256 void pf_state_key_link_reverse(struct pf_state_key *,
257 struct pf_state_key *);
258 void pf_state_key_unlink_reverse(struct pf_state_key *);
259 void pf_state_key_link_inpcb(struct pf_state_key *,
260 struct inpcb *);
261 void pf_state_key_unlink_inpcb(struct pf_state_key *);
262 void pf_inpcb_unlink_state_key(struct inpcb *);
263 void pf_pktenqueue_delayed(void *);
264 int32_t pf_state_expires(const struct pf_state *, uint8_t);
265
266 #if NPFLOG > 0
267 void pf_log_matches(struct pf_pdesc *, struct pf_rule *,
268 struct pf_rule *, struct pf_ruleset *,
269 struct pf_rule_slist *);
270 #endif /* NPFLOG > 0 */
271
272 extern struct pool pfr_ktable_pl;
273 extern struct pool pfr_kentry_pl;
274
275 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
276 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT },
277 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT },
278 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT },
279 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT },
280 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT },
281 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS },
282 { &pf_anchor_pl, PF_ANCHOR_HIWAT, PF_ANCHOR_HIWAT }
283 };
284
285 #define BOUND_IFACE(r, k) \
286 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
287
288 #define STATE_INC_COUNTERS(s) \
289 do { \
290 struct pf_rule_item *mrm; \
291 s->rule.ptr->states_cur++; \
292 s->rule.ptr->states_tot++; \
293 if (s->anchor.ptr != NULL) { \
294 s->anchor.ptr->states_cur++; \
295 s->anchor.ptr->states_tot++; \
296 } \
297 SLIST_FOREACH(mrm, &s->match_rules, entry) \
298 mrm->r->states_cur++; \
299 } while (0)
300
301 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
302 static inline int pf_state_compare_key(const struct pf_state_key *,
303 const struct pf_state_key *);
304 static inline int pf_state_compare_id(const struct pf_state *,
305 const struct pf_state *);
306 #ifdef INET6
307 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t);
308 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t);
309 #endif /* INET6 */
310 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t);
311
312 struct pf_src_tree tree_src_tracking;
313
314 struct pf_state_tree_id tree_id;
315 struct pf_state_list pf_state_list = PF_STATE_LIST_INITIALIZER(pf_state_list);
316
317 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
318 RBT_GENERATE(pf_state_tree, pf_state_key, sk_entry, pf_state_compare_key);
319 RBT_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id);
320
321 int
322 pf_addr_compare(const struct pf_addr *a, const struct pf_addr *b,
323 sa_family_t af)
324 {
325 switch (af) {
326 case AF_INET:
327 if (a->addr32[0] > b->addr32[0])
328 return (1);
329 if (a->addr32[0] < b->addr32[0])
330 return (-1);
331 break;
332 #ifdef INET6
333 case AF_INET6:
334 if (a->addr32[3] > b->addr32[3])
335 return (1);
336 if (a->addr32[3] < b->addr32[3])
337 return (-1);
338 if (a->addr32[2] > b->addr32[2])
339 return (1);
340 if (a->addr32[2] < b->addr32[2])
341 return (-1);
342 if (a->addr32[1] > b->addr32[1])
343 return (1);
344 if (a->addr32[1] < b->addr32[1])
345 return (-1);
346 if (a->addr32[0] > b->addr32[0])
347 return (1);
348 if (a->addr32[0] < b->addr32[0])
349 return (-1);
350 break;
351 #endif /* INET6 */
352 }
353 return (0);
354 }
355
356 static __inline int
357 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
358 {
359 int diff;
360
361 if (a->rule.ptr > b->rule.ptr)
362 return (1);
363 if (a->rule.ptr < b->rule.ptr)
364 return (-1);
365 if ((diff = a->type - b->type) != 0)
366 return (diff);
367 if ((diff = a->af - b->af) != 0)
368 return (diff);
369 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0)
370 return (diff);
371 return (0);
372 }
373
374 static __inline void
375 pf_set_protostate(struct pf_state *st, int which, u_int8_t newstate)
376 {
377 if (which == PF_PEER_DST || which == PF_PEER_BOTH)
378 st->dst.state = newstate;
379 if (which == PF_PEER_DST)
380 return;
381
382 if (st->src.state == newstate)
383 return;
384 if (st->creatorid == pf_status.hostid &&
385 st->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
386 !(TCPS_HAVEESTABLISHED(st->src.state) ||
387 st->src.state == TCPS_CLOSED) &&
388 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED))
389 pf_status.states_halfopen--;
390
391 st->src.state = newstate;
392 }
393
394 void
395 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
396 {
397 switch (af) {
398 case AF_INET:
399 dst->addr32[0] = src->addr32[0];
400 break;
401 #ifdef INET6
402 case AF_INET6:
403 dst->addr32[0] = src->addr32[0];
404 dst->addr32[1] = src->addr32[1];
405 dst->addr32[2] = src->addr32[2];
406 dst->addr32[3] = src->addr32[3];
407 break;
408 #endif /* INET6 */
409 default:
410 unhandled_af(af);
411 }
412 }
413
414 void
415 pf_init_threshold(struct pf_threshold *threshold,
416 u_int32_t limit, u_int32_t seconds)
417 {
418 threshold->limit = limit * PF_THRESHOLD_MULT;
419 threshold->seconds = seconds;
420 threshold->count = 0;
421 threshold->last = getuptime();
422 }
423
424 void
425 pf_add_threshold(struct pf_threshold *threshold)
426 {
427 u_int32_t t = getuptime(), diff = t - threshold->last;
428
429 if (diff >= threshold->seconds)
430 threshold->count = 0;
431 else
432 threshold->count -= threshold->count * diff /
433 threshold->seconds;
434 threshold->count += PF_THRESHOLD_MULT;
435 threshold->last = t;
436 }
437
438 int
439 pf_check_threshold(struct pf_threshold *threshold)
440 {
441 return (threshold->count > threshold->limit);
442 }
443
444 void
445 pf_state_list_insert(struct pf_state_list *pfs, struct pf_state *st)
446 {
447 /*
448 * we can always put states on the end of the list.
449 *
450 * things reading the list should take a read lock, then
451 * the mutex, get the head and tail pointers, release the
452 * mutex, and then they can iterate between the head and tail.
453 */
454
455 pf_state_ref(st); /* get a ref for the list */
456
457 mtx_enter(&pfs->pfs_mtx);
458 TAILQ_INSERT_TAIL(&pfs->pfs_list, st, entry_list);
459 mtx_leave(&pfs->pfs_mtx);
460 }
461
462 void
463 pf_state_list_remove(struct pf_state_list *pfs, struct pf_state *st)
464 {
465 /* states can only be removed when the write lock is held */
466 rw_assert_wrlock(&pfs->pfs_rwl);
467
468 mtx_enter(&pfs->pfs_mtx);
469 TAILQ_REMOVE(&pfs->pfs_list, st, entry_list);
470 mtx_leave(&pfs->pfs_mtx);
471
472 pf_state_unref(st); /* list no longer references the state */
473 }
474
475 int
476 pf_src_connlimit(struct pf_state **stp)
477 {
478 int bad = 0;
479 struct pf_src_node *sn;
480
481 if ((sn = pf_get_src_node((*stp), PF_SN_NONE)) == NULL)
482 return (0);
483
484 sn->conn++;
485 (*stp)->src.tcp_est = 1;
486 pf_add_threshold(&sn->conn_rate);
487
488 if ((*stp)->rule.ptr->max_src_conn &&
489 (*stp)->rule.ptr->max_src_conn < sn->conn) {
490 pf_status.lcounters[LCNT_SRCCONN]++;
491 bad++;
492 }
493
494 if ((*stp)->rule.ptr->max_src_conn_rate.limit &&
495 pf_check_threshold(&sn->conn_rate)) {
496 pf_status.lcounters[LCNT_SRCCONNRATE]++;
497 bad++;
498 }
499
500 if (!bad)
501 return (0);
502
503 if ((*stp)->rule.ptr->overload_tbl) {
504 struct pfr_addr p;
505 u_int32_t killed = 0;
506
507 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
508 if (pf_status.debug >= LOG_NOTICE) {
509 log(LOG_NOTICE,
510 "pf: pf_src_connlimit: blocking address ");
511 pf_print_host(&sn->addr, 0,
512 (*stp)->key[PF_SK_WIRE]->af);
513 }
514
515 memset(&p, 0, sizeof(p));
516 p.pfra_af = (*stp)->key[PF_SK_WIRE]->af;
517 switch ((*stp)->key[PF_SK_WIRE]->af) {
518 case AF_INET:
519 p.pfra_net = 32;
520 p.pfra_ip4addr = sn->addr.v4;
521 break;
522 #ifdef INET6
523 case AF_INET6:
524 p.pfra_net = 128;
525 p.pfra_ip6addr = sn->addr.v6;
526 break;
527 #endif /* INET6 */
528 }
529
530 pfr_insert_kentry((*stp)->rule.ptr->overload_tbl,
531 &p, gettime());
532
533 /* kill existing states if that's required. */
534 if ((*stp)->rule.ptr->flush) {
535 struct pf_state_key *sk;
536 struct pf_state *st;
537
538 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
539 RBT_FOREACH(st, pf_state_tree_id, &tree_id) {
540 sk = st->key[PF_SK_WIRE];
541 /*
542 * Kill states from this source. (Only those
543 * from the same rule if PF_FLUSH_GLOBAL is not
544 * set)
545 */
546 if (sk->af ==
547 (*stp)->key[PF_SK_WIRE]->af &&
548 (((*stp)->direction == PF_OUT &&
549 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) ||
550 ((*stp)->direction == PF_IN &&
551 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) &&
552 ((*stp)->rule.ptr->flush &
553 PF_FLUSH_GLOBAL ||
554 (*stp)->rule.ptr == st->rule.ptr)) {
555 st->timeout = PFTM_PURGE;
556 pf_set_protostate(st, PF_PEER_BOTH,
557 TCPS_CLOSED);
558 killed++;
559 }
560 }
561 if (pf_status.debug >= LOG_NOTICE)
562 addlog(", %u states killed", killed);
563 }
564 if (pf_status.debug >= LOG_NOTICE)
565 addlog("\n");
566 }
567
568 /* kill this state */
569 (*stp)->timeout = PFTM_PURGE;
570 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_CLOSED);
571 return (1);
572 }
573
574 int
575 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
576 enum pf_sn_types type, sa_family_t af, struct pf_addr *src,
577 struct pf_addr *raddr, struct pfi_kif *kif)
578 {
579 struct pf_src_node k;
580
581 if (*sn == NULL) {
582 k.af = af;
583 k.type = type;
584 pf_addrcpy(&k.addr, src, af);
585 k.rule.ptr = rule;
586 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
587 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
588 }
589 if (*sn == NULL) {
590 if (!rule->max_src_nodes ||
591 rule->src_nodes < rule->max_src_nodes)
592 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
593 else
594 pf_status.lcounters[LCNT_SRCNODES]++;
595 if ((*sn) == NULL)
596 return (-1);
597
598 pf_init_threshold(&(*sn)->conn_rate,
599 rule->max_src_conn_rate.limit,
600 rule->max_src_conn_rate.seconds);
601
602 (*sn)->type = type;
603 (*sn)->af = af;
604 (*sn)->rule.ptr = rule;
605 pf_addrcpy(&(*sn)->addr, src, af);
606 if (raddr)
607 pf_addrcpy(&(*sn)->raddr, raddr, af);
608 if (RB_INSERT(pf_src_tree,
609 &tree_src_tracking, *sn) != NULL) {
610 if (pf_status.debug >= LOG_NOTICE) {
611 log(LOG_NOTICE,
612 "pf: src_tree insert failed: ");
613 pf_print_host(&(*sn)->addr, 0, af);
614 addlog("\n");
615 }
616 pool_put(&pf_src_tree_pl, *sn);
617 return (-1);
618 }
619 (*sn)->creation = getuptime();
620 (*sn)->rule.ptr->src_nodes++;
621 if (kif != NULL) {
622 (*sn)->kif = kif;
623 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE);
624 }
625 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
626 pf_status.src_nodes++;
627 } else {
628 if (rule->max_src_states &&
629 (*sn)->states >= rule->max_src_states) {
630 pf_status.lcounters[LCNT_SRCSTATES]++;
631 return (-1);
632 }
633 }
634 return (0);
635 }
636
637 void
638 pf_remove_src_node(struct pf_src_node *sn)
639 {
640 if (sn->states > 0 || sn->expire > getuptime())
641 return;
642
643 sn->rule.ptr->src_nodes--;
644 if (sn->rule.ptr->states_cur == 0 &&
645 sn->rule.ptr->src_nodes == 0)
646 pf_rm_rule(NULL, sn->rule.ptr);
647 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
648 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
649 pf_status.src_nodes--;
650 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE);
651 pool_put(&pf_src_tree_pl, sn);
652 }
653
654 struct pf_src_node *
655 pf_get_src_node(struct pf_state *st, enum pf_sn_types type)
656 {
657 struct pf_sn_item *sni;
658
659 SLIST_FOREACH(sni, &st->src_nodes, next)
660 if (sni->sn->type == type)
661 return (sni->sn);
662 return (NULL);
663 }
664
665 void
666 pf_state_rm_src_node(struct pf_state *st, struct pf_src_node *sn)
667 {
668 struct pf_sn_item *sni, *snin, *snip = NULL;
669
670 for (sni = SLIST_FIRST(&st->src_nodes); sni; sni = snin) {
671 snin = SLIST_NEXT(sni, next);
672 if (sni->sn == sn) {
673 if (snip)
674 SLIST_REMOVE_AFTER(snip, next);
675 else
676 SLIST_REMOVE_HEAD(&st->src_nodes, next);
677 pool_put(&pf_sn_item_pl, sni);
678 sni = NULL;
679 sn->states--;
680 }
681 if (sni != NULL)
682 snip = sni;
683 }
684 }
685
686 /* state table stuff */
687
688 static inline int
689 pf_state_compare_key(const struct pf_state_key *a,
690 const struct pf_state_key *b)
691 {
692 int diff;
693
694 if ((diff = a->hash - b->hash) != 0)
695 return (diff);
696 if ((diff = a->proto - b->proto) != 0)
697 return (diff);
698 if ((diff = a->af - b->af) != 0)
699 return (diff);
700 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0)
701 return (diff);
702 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0)
703 return (diff);
704 if ((diff = a->port[0] - b->port[0]) != 0)
705 return (diff);
706 if ((diff = a->port[1] - b->port[1]) != 0)
707 return (diff);
708 if ((diff = a->rdomain - b->rdomain) != 0)
709 return (diff);
710 return (0);
711 }
712
713 static inline int
714 pf_state_compare_id(const struct pf_state *a, const struct pf_state *b)
715 {
716 if (a->id > b->id)
717 return (1);
718 if (a->id < b->id)
719 return (-1);
720 if (a->creatorid > b->creatorid)
721 return (1);
722 if (a->creatorid < b->creatorid)
723 return (-1);
724
725 return (0);
726 }
727
728 /*
729 * on failure, pf_state_key_attach() releases the pf_state_key
730 * reference and returns NULL.
731 */
732 struct pf_state_key *
733 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *st, int idx)
734 {
735 struct pf_state_item *si;
736 struct pf_state_key *cur;
737 struct pf_state *oldst = NULL;
738
739 PF_ASSERT_LOCKED();
740
741 KASSERT(st->key[idx] == NULL);
742 sk->sk_removed = 0;
743 cur = RBT_INSERT(pf_state_tree, &pf_statetbl, sk);
744 if (cur != NULL) {
745 sk->sk_removed = 1;
746 /* key exists. check for same kif, if none, add to key */
747 TAILQ_FOREACH(si, &cur->sk_states, si_entry) {
748 struct pf_state *sist = si->si_st;
749 if (sist->kif == st->kif &&
750 ((sist->key[PF_SK_WIRE]->af == sk->af &&
751 sist->direction == st->direction) ||
752 (sist->key[PF_SK_WIRE]->af !=
753 sist->key[PF_SK_STACK]->af &&
754 sk->af == sist->key[PF_SK_STACK]->af &&
755 sist->direction != st->direction))) {
756 int reuse = 0;
757
758 if (sk->proto == IPPROTO_TCP &&
759 sist->src.state >= TCPS_FIN_WAIT_2 &&
760 sist->dst.state >= TCPS_FIN_WAIT_2)
761 reuse = 1;
762 if (pf_status.debug >= LOG_NOTICE) {
763 log(LOG_NOTICE,
764 "pf: %s key attach %s on %s: ",
765 (idx == PF_SK_WIRE) ?
766 "wire" : "stack",
767 reuse ? "reuse" : "failed",
768 st->kif->pfik_name);
769 pf_print_state_parts(st,
770 (idx == PF_SK_WIRE) ? sk : NULL,
771 (idx == PF_SK_STACK) ? sk : NULL);
772 addlog(", existing: ");
773 pf_print_state_parts(sist,
774 (idx == PF_SK_WIRE) ? sk : NULL,
775 (idx == PF_SK_STACK) ? sk : NULL);
776 addlog("\n");
777 }
778 if (reuse) {
779 pf_set_protostate(sist, PF_PEER_BOTH,
780 TCPS_CLOSED);
781 /* remove late or sks can go away */
782 oldst = sist;
783 } else {
784 pf_state_key_unref(sk);
785 return (NULL); /* collision! */
786 }
787 }
788 }
789
790 /* reuse the existing state key */
791 pf_state_key_unref(sk);
792 sk = cur;
793 }
794
795 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
796 if (TAILQ_EMPTY(&sk->sk_states)) {
797 KASSERT(cur == NULL);
798 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk);
799 sk->sk_removed = 1;
800 pf_state_key_unref(sk);
801 }
802
803 return (NULL);
804 }
805
806 st->key[idx] = pf_state_key_ref(sk); /* give a ref to state */
807 si->si_st = pf_state_ref(st);
808
809 /* list is sorted, if-bound states before floating */
810 if (st->kif == pfi_all)
811 TAILQ_INSERT_TAIL(&sk->sk_states, si, si_entry);
812 else
813 TAILQ_INSERT_HEAD(&sk->sk_states, si, si_entry);
814
815 if (oldst)
816 pf_remove_state(oldst);
817
818 /* caller owns the pf_state ref, which owns a pf_state_key ref now */
819 return (sk);
820 }
821
822 void
823 pf_detach_state(struct pf_state *st)
824 {
825 KASSERT(st->key[PF_SK_WIRE] != NULL);
826 pf_state_key_detach(st, PF_SK_WIRE);
827
828 KASSERT(st->key[PF_SK_STACK] != NULL);
829 if (st->key[PF_SK_STACK] != st->key[PF_SK_WIRE])
830 pf_state_key_detach(st, PF_SK_STACK);
831 }
832
833 void
834 pf_state_key_detach(struct pf_state *st, int idx)
835 {
836 struct pf_state_item *si;
837 struct pf_state_key *sk;
838
839 PF_ASSERT_LOCKED();
840
841 sk = st->key[idx];
842 if (sk == NULL)
843 return;
844
845 TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
846 if (si->si_st == st)
847 break;
848 }
849 if (si == NULL)
850 return;
851
852 TAILQ_REMOVE(&sk->sk_states, si, si_entry);
853 pool_put(&pf_state_item_pl, si);
854
855 if (TAILQ_EMPTY(&sk->sk_states)) {
856 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk);
857 sk->sk_removed = 1;
858 pf_state_key_unlink_reverse(sk);
859 pf_state_key_unlink_inpcb(sk);
860 pf_state_key_unref(sk);
861 }
862
863 pf_state_unref(st);
864 }
865
866 struct pf_state_key *
867 pf_alloc_state_key(int pool_flags)
868 {
869 struct pf_state_key *sk;
870
871 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL)
872 return (NULL);
873
874 PF_REF_INIT(sk->sk_refcnt);
875 TAILQ_INIT(&sk->sk_states);
876 sk->sk_removed = 1;
877
878 return (sk);
879 }
880
881 static __inline int
882 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx,
883 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi)
884 {
885 struct pf_state_key_cmp *key = arg;
886 #ifdef INET6
887 struct pf_addr *target;
888
889 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6)
890 goto copy;
891
892 switch (pd->hdr.icmp6.icmp6_type) {
893 case ND_NEIGHBOR_SOLICIT:
894 if (multi)
895 return (-1);
896 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target;
897 daddr = target;
898 break;
899 case ND_NEIGHBOR_ADVERT:
900 if (multi)
901 return (-1);
902 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target;
903 saddr = target;
904 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) {
905 key->addr[didx].addr32[0] = 0;
906 key->addr[didx].addr32[1] = 0;
907 key->addr[didx].addr32[2] = 0;
908 key->addr[didx].addr32[3] = 0;
909 daddr = NULL; /* overwritten */
910 }
911 break;
912 default:
913 if (multi) {
914 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL;
915 key->addr[sidx].addr32[1] = 0;
916 key->addr[sidx].addr32[2] = 0;
917 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE;
918 saddr = NULL; /* overwritten */
919 }
920 }
921 copy:
922 #endif /* INET6 */
923 if (saddr)
924 pf_addrcpy(&key->addr[sidx], saddr, af);
925 if (daddr)
926 pf_addrcpy(&key->addr[didx], daddr, af);
927
928 return (0);
929 }
930
931 int
932 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw,
933 struct pf_state_key **sks, int rtableid)
934 {
935 /* if returning error we MUST pool_put state keys ourselves */
936 struct pf_state_key *sk1, *sk2;
937 u_int wrdom = pd->rdomain;
938 int afto = pd->af != pd->naf;
939
940 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
941 return (ENOMEM);
942
943 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst,
944 pd->af, 0);
945 sk1->port[pd->sidx] = pd->osport;
946 sk1->port[pd->didx] = pd->odport;
947 sk1->proto = pd->proto;
948 sk1->af = pd->af;
949 sk1->rdomain = pd->rdomain;
950 sk1->hash = pf_pkt_hash(sk1->af, sk1->proto,
951 &sk1->addr[0], &sk1->addr[1], sk1->port[0], sk1->port[1]);
952 if (rtableid >= 0)
953 wrdom = rtable_l2(rtableid);
954
955 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) ||
956 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) ||
957 pd->nsport != pd->osport || pd->ndport != pd->odport ||
958 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */
959 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) {
960 pf_state_key_unref(sk1);
961 return (ENOMEM);
962 }
963 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx,
964 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr,
965 pd->naf, 0);
966 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport;
967 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport;
968 if (afto) {
969 switch (pd->proto) {
970 case IPPROTO_ICMP:
971 sk2->proto = IPPROTO_ICMPV6;
972 break;
973 case IPPROTO_ICMPV6:
974 sk2->proto = IPPROTO_ICMP;
975 break;
976 default:
977 sk2->proto = pd->proto;
978 }
979 } else
980 sk2->proto = pd->proto;
981 sk2->af = pd->naf;
982 sk2->rdomain = wrdom;
983 sk2->hash = pf_pkt_hash(sk2->af, sk2->proto,
984 &sk2->addr[0], &sk2->addr[1], sk2->port[0], sk2->port[1]);
985 } else
986 sk2 = pf_state_key_ref(sk1);
987
988 if (pd->dir == PF_IN) {
989 *skw = sk1;
990 *sks = sk2;
991 } else {
992 *sks = sk1;
993 *skw = sk2;
994 }
995
996 if (pf_status.debug >= LOG_DEBUG) {
997 log(LOG_DEBUG, "pf: key setup: ");
998 pf_print_state_parts(NULL, *skw, *sks);
999 addlog("\n");
1000 }
1001
1002 return (0);
1003 }
1004
1005 /*
1006 * pf_state_insert() does the following:
1007 * - links the pf_state up with pf_state_key(s).
1008 * - inserts the pf_state_keys into pf_state_tree.
1009 * - inserts the pf_state into the into pf_state_tree_id.
1010 * - tells pfsync about the state.
1011 *
1012 * pf_state_insert() owns the references to the pf_state_key structs
1013 * it is given. on failure to insert, these references are released.
1014 * on success, the caller owns a pf_state reference that allows it
1015 * to access the state keys.
1016 */
1017
1018 int
1019 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skwp,
1020 struct pf_state_key **sksp, struct pf_state *st)
1021 {
1022 struct pf_state_key *skw = *skwp;
1023 struct pf_state_key *sks = *sksp;
1024 int same = (skw == sks);
1025
1026 PF_ASSERT_LOCKED();
1027
1028 st->kif = kif;
1029 PF_STATE_ENTER_WRITE();
1030
1031 skw = pf_state_key_attach(skw, st, PF_SK_WIRE);
1032 if (skw == NULL) {
1033 pf_state_key_unref(sks);
1034 PF_STATE_EXIT_WRITE();
1035 return (-1);
1036 }
1037
1038 if (same) {
1039 /* pf_state_key_attach might have swapped skw */
1040 pf_state_key_unref(sks);
1041 st->key[PF_SK_STACK] = sks = pf_state_key_ref(skw);
1042 } else if (pf_state_key_attach(sks, st, PF_SK_STACK) == NULL) {
1043 pf_state_key_detach(st, PF_SK_WIRE);
1044 PF_STATE_EXIT_WRITE();
1045 return (-1);
1046 }
1047
1048 if (st->id == 0 && st->creatorid == 0) {
1049 st->id = htobe64(pf_status.stateid++);
1050 st->creatorid = pf_status.hostid;
1051 }
1052 if (RBT_INSERT(pf_state_tree_id, &tree_id, st) != NULL) {
1053 if (pf_status.debug >= LOG_NOTICE) {
1054 log(LOG_NOTICE, "pf: state insert failed: "
1055 "id: %016llx creatorid: %08x",
1056 betoh64(st->id), ntohl(st->creatorid));
1057 addlog("\n");
1058 }
1059 pf_detach_state(st);
1060 PF_STATE_EXIT_WRITE();
1061 return (-1);
1062 }
1063 pf_state_list_insert(&pf_state_list, st);
1064 pf_status.fcounters[FCNT_STATE_INSERT]++;
1065 pf_status.states++;
1066 pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1067 #if NPFSYNC > 0
1068 pfsync_insert_state(st);
1069 #endif /* NPFSYNC > 0 */
1070 PF_STATE_EXIT_WRITE();
1071
1072 *skwp = skw;
1073 *sksp = sks;
1074
1075 return (0);
1076 }
1077
1078 struct pf_state *
1079 pf_find_state_byid(struct pf_state_cmp *key)
1080 {
1081 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1082
1083 return (RBT_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
1084 }
1085
1086 int
1087 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b,
1088 struct pfi_kif *kif, u_int dir)
1089 {
1090 /* a (from hdr) and b (new) must be exact opposites of each other */
1091 if (a->af == b->af && a->proto == b->proto &&
1092 PF_AEQ(&a->addr[0], &b->addr[1], a->af) &&
1093 PF_AEQ(&a->addr[1], &b->addr[0], a->af) &&
1094 a->port[0] == b->port[1] &&
1095 a->port[1] == b->port[0] && a->rdomain == b->rdomain)
1096 return (0);
1097 else {
1098 /* mismatch. must not happen. */
1099 if (pf_status.debug >= LOG_ERR) {
1100 log(LOG_ERR,
1101 "pf: state key linking mismatch! dir=%s, "
1102 "if=%s, stored af=%u, a0: ",
1103 dir == PF_OUT ? "OUT" : "IN",
1104 kif->pfik_name, a->af);
1105 pf_print_host(&a->addr[0], a->port[0], a->af);
1106 addlog(", a1: ");
1107 pf_print_host(&a->addr[1], a->port[1], a->af);
1108 addlog(", proto=%u", a->proto);
1109 addlog(", found af=%u, a0: ", b->af);
1110 pf_print_host(&b->addr[0], b->port[0], b->af);
1111 addlog(", a1: ");
1112 pf_print_host(&b->addr[1], b->port[1], b->af);
1113 addlog(", proto=%u", b->proto);
1114 addlog("\n");
1115 }
1116 return (-1);
1117 }
1118 }
1119
1120 int
1121 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key,
1122 struct pf_state **stp)
1123 {
1124 struct pf_state_key *sk, *pkt_sk, *inp_sk;
1125 struct pf_state_item *si;
1126 struct pf_state *st = NULL;
1127
1128 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1129 if (pf_status.debug >= LOG_DEBUG) {
1130 log(LOG_DEBUG, "pf: key search, %s on %s: ",
1131 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name);
1132 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL);
1133 addlog("\n");
1134 }
1135
1136 inp_sk = NULL;
1137 pkt_sk = NULL;
1138 sk = NULL;
1139 if (pd->dir == PF_OUT) {
1140 /* first if block deals with outbound forwarded packet */
1141 pkt_sk = pd->m->m_pkthdr.pf.statekey;
1142
1143 if (!pf_state_key_isvalid(pkt_sk)) {
1144 pf_mbuf_unlink_state_key(pd->m);
1145 pkt_sk = NULL;
1146 }
1147
1148 if (pkt_sk && pf_state_key_isvalid(pkt_sk->sk_reverse))
1149 sk = pkt_sk->sk_reverse;
1150
1151 if (pkt_sk == NULL) {
1152 /* here we deal with local outbound packet */
1153 if (pd->m->m_pkthdr.pf.inp != NULL) {
1154 inp_sk = pd->m->m_pkthdr.pf.inp->inp_pf_sk;
1155 if (pf_state_key_isvalid(inp_sk))
1156 sk = inp_sk;
1157 else
1158 pf_inpcb_unlink_state_key(
1159 pd->m->m_pkthdr.pf.inp);
1160 }
1161 }
1162 }
1163
1164 if (sk == NULL) {
1165 if ((sk = RBT_FIND(pf_state_tree, &pf_statetbl,
1166 (struct pf_state_key *)key)) == NULL)
1167 return (PF_DROP);
1168 if (pd->dir == PF_OUT && pkt_sk &&
1169 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0)
1170 pf_state_key_link_reverse(sk, pkt_sk);
1171 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp &&
1172 !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->sk_inp)
1173 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp);
1174 }
1175
1176 /* remove firewall data from outbound packet */
1177 if (pd->dir == PF_OUT)
1178 pf_pkt_addr_changed(pd->m);
1179
1180 /* list is sorted, if-bound states before floating ones */
1181 TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
1182 struct pf_state *sist = si->si_st;
1183 if (sist->timeout != PFTM_PURGE &&
1184 (sist->kif == pfi_all || sist->kif == pd->kif) &&
1185 ((sist->key[PF_SK_WIRE]->af == sist->key[PF_SK_STACK]->af &&
1186 sk == (pd->dir == PF_IN ? sist->key[PF_SK_WIRE] :
1187 sist->key[PF_SK_STACK])) ||
1188 (sist->key[PF_SK_WIRE]->af != sist->key[PF_SK_STACK]->af
1189 && pd->dir == PF_IN && (sk == sist->key[PF_SK_STACK] ||
1190 sk == sist->key[PF_SK_WIRE])))) {
1191 st = sist;
1192 break;
1193 }
1194 }
1195
1196 if (st == NULL)
1197 return (PF_DROP);
1198 if (ISSET(st->state_flags, PFSTATE_INP_UNLINKED))
1199 return (PF_DROP);
1200
1201 if (st->rule.ptr->pktrate.limit && pd->dir == st->direction) {
1202 pf_add_threshold(&st->rule.ptr->pktrate);
1203 if (pf_check_threshold(&st->rule.ptr->pktrate))
1204 return (PF_DROP);
1205 }
1206
1207 *stp = st;
1208
1209 return (PF_MATCH);
1210 }
1211
1212 struct pf_state *
1213 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1214 {
1215 struct pf_state_key *sk;
1216 struct pf_state_item *si, *ret = NULL;
1217
1218 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1219
1220 sk = RBT_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key);
1221
1222 if (sk != NULL) {
1223 TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
1224 struct pf_state *sist = si->si_st;
1225 if (dir == PF_INOUT ||
1226 (sk == (dir == PF_IN ? sist->key[PF_SK_WIRE] :
1227 sist->key[PF_SK_STACK]))) {
1228 if (more == NULL)
1229 return (sist);
1230
1231 if (ret)
1232 (*more)++;
1233 else
1234 ret = si;
1235 }
1236 }
1237 }
1238 return (ret ? ret->si_st : NULL);
1239 }
1240
1241 void
1242 pf_state_peer_hton(const struct pf_state_peer *s, struct pfsync_state_peer *d)
1243 {
1244 d->seqlo = htonl(s->seqlo);
1245 d->seqhi = htonl(s->seqhi);
1246 d->seqdiff = htonl(s->seqdiff);
1247 d->max_win = htons(s->max_win);
1248 d->mss = htons(s->mss);
1249 d->state = s->state;
1250 d->wscale = s->wscale;
1251 if (s->scrub) {
1252 d->scrub.pfss_flags =
1253 htons(s->scrub->pfss_flags & PFSS_TIMESTAMP);
1254 d->scrub.pfss_ttl = (s)->scrub->pfss_ttl;
1255 d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);
1256 d->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID;
1257 }
1258 }
1259
1260 void
1261 pf_state_peer_ntoh(const struct pfsync_state_peer *s, struct pf_state_peer *d)
1262 {
1263 d->seqlo = ntohl(s->seqlo);
1264 d->seqhi = ntohl(s->seqhi);
1265 d->seqdiff = ntohl(s->seqdiff);
1266 d->max_win = ntohs(s->max_win);
1267 d->mss = ntohs(s->mss);
1268 d->state = s->state;
1269 d->wscale = s->wscale;
1270 if (s->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID &&
1271 d->scrub != NULL) {
1272 d->scrub->pfss_flags =
1273 ntohs(s->scrub.pfss_flags) & PFSS_TIMESTAMP;
1274 d->scrub->pfss_ttl = s->scrub.pfss_ttl;
1275 d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod);
1276 }
1277 }
1278
1279 void
1280 pf_state_export(struct pfsync_state *sp, struct pf_state *st)
1281 {
1282 int32_t expire;
1283
1284 memset(sp, 0, sizeof(struct pfsync_state));
1285
1286 /* copy from state key */
1287 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
1288 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
1289 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
1290 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
1291 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain);
1292 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af;
1293 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
1294 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
1295 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
1296 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
1297 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain);
1298 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af;
1299 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]);
1300 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]);
1301 sp->proto = st->key[PF_SK_WIRE]->proto;
1302 sp->af = st->key[PF_SK_WIRE]->af;
1303
1304 /* copy from state */
1305 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
1306 sp->rt = st->rt;
1307 sp->rt_addr = st->rt_addr;
1308 sp->creation = htonl(getuptime() - st->creation);
1309 expire = pf_state_expires(st, st->timeout);
1310 if (expire <= getuptime())
1311 sp->expire = htonl(0);
1312 else
1313 sp->expire = htonl(expire - getuptime());
1314
1315 sp->direction = st->direction;
1316 #if NPFLOG > 0
1317 sp->log = st->log;
1318 #endif /* NPFLOG > 0 */
1319 sp->timeout = st->timeout;
1320 sp->state_flags = htons(st->state_flags);
1321 if (!SLIST_EMPTY(&st->src_nodes))
1322 sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
1323
1324 sp->id = st->id;
1325 sp->creatorid = st->creatorid;
1326 pf_state_peer_hton(&st->src, &sp->src);
1327 pf_state_peer_hton(&st->dst, &sp->dst);
1328
1329 if (st->rule.ptr == NULL)
1330 sp->rule = htonl(-1);
1331 else
1332 sp->rule = htonl(st->rule.ptr->nr);
1333 if (st->anchor.ptr == NULL)
1334 sp->anchor = htonl(-1);
1335 else
1336 sp->anchor = htonl(st->anchor.ptr->nr);
1337 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */
1338
1339 pf_state_counter_hton(st->packets[0], sp->packets[0]);
1340 pf_state_counter_hton(st->packets[1], sp->packets[1]);
1341 pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
1342 pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
1343
1344 sp->max_mss = htons(st->max_mss);
1345 sp->min_ttl = st->min_ttl;
1346 sp->set_tos = st->set_tos;
1347 sp->set_prio[0] = st->set_prio[0];
1348 sp->set_prio[1] = st->set_prio[1];
1349 }
1350
1351 int
1352 pf_state_alloc_scrub_memory(const struct pfsync_state_peer *s,
1353 struct pf_state_peer *d)
1354 {
1355 if (s->scrub.scrub_flag && d->scrub == NULL)
1356 return (pf_normalize_tcp_alloc(d));
1357
1358 return (0);
1359 }
1360
1361 #if NPFSYNC > 0
1362 int
1363 pf_state_import(const struct pfsync_state *sp, int flags)
1364 {
1365 struct pf_state *st = NULL;
1366 struct pf_state_key *skw = NULL, *sks = NULL;
1367 struct pf_rule *r = NULL;
1368 struct pfi_kif *kif;
1369 int pool_flags;
1370 int error = ENOMEM;
1371 int n = 0;
1372
1373 if (sp->creatorid == 0) {
1374 DPFPRINTF(LOG_NOTICE, "%s: invalid creator id: %08x", __func__,
1375 ntohl(sp->creatorid));
1376 return (EINVAL);
1377 }
1378
1379 if ((kif = pfi_kif_get(sp->ifname, NULL)) == NULL) {
1380 DPFPRINTF(LOG_NOTICE, "%s: unknown interface: %s", __func__,
1381 sp->ifname);
1382 if (flags & PFSYNC_SI_IOCTL)
1383 return (EINVAL);
1384 return (0); /* skip this state */
1385 }
1386
1387 if (sp->af == 0)
1388 return (0); /* skip this state */
1389
1390 /*
1391 * If the ruleset checksums match or the state is coming from the ioctl,
1392 * it's safe to associate the state with the rule of that number.
1393 */
1394 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
1395 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) &&
1396 ntohl(sp->rule) < pf_main_ruleset.rules.active.rcount) {
1397 TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries)
1398 if (ntohl(sp->rule) == n++)
1399 break;
1400 } else
1401 r = &pf_default_rule;
1402
1403 if ((r->max_states && r->states_cur >= r->max_states))
1404 goto cleanup;
1405
1406 if (flags & PFSYNC_SI_IOCTL)
1407 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
1408 else
1409 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO;
1410
1411 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
1412 goto cleanup;
1413
1414 if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
1415 goto cleanup;
1416
1417 if ((sp->key[PF_SK_WIRE].af &&
1418 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) ||
1419 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
1420 &sp->key[PF_SK_STACK].addr[0], sp->af) ||
1421 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
1422 &sp->key[PF_SK_STACK].addr[1], sp->af) ||
1423 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
1424 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
1425 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
1426 if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
1427 goto cleanup;
1428 } else
1429 sks = pf_state_key_ref(skw);
1430
1431 /* allocate memory for scrub info */
1432 if (pf_state_alloc_scrub_memory(&sp->src, &st->src) ||
1433 pf_state_alloc_scrub_memory(&sp->dst, &st->dst))
1434 goto cleanup;
1435
1436 /* copy to state key(s) */
1437 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
1438 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
1439 skw->port[0] = sp->key[PF_SK_WIRE].port[0];
1440 skw->port[1] = sp->key[PF_SK_WIRE].port[1];
1441 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
1442 skw->proto = sp->proto;
1443 if (!(skw->af = sp->key[PF_SK_WIRE].af))
1444 skw->af = sp->af;
1445 skw->hash = pf_pkt_hash(skw->af, skw->proto,
1446 &skw->addr[0], &skw->addr[1], skw->port[0], skw->port[1]);
1447
1448 if (sks != skw) {
1449 sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
1450 sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
1451 sks->port[0] = sp->key[PF_SK_STACK].port[0];
1452 sks->port[1] = sp->key[PF_SK_STACK].port[1];
1453 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
1454 if (!(sks->af = sp->key[PF_SK_STACK].af))
1455 sks->af = sp->af;
1456 if (sks->af != skw->af) {
1457 switch (sp->proto) {
1458 case IPPROTO_ICMP:
1459 sks->proto = IPPROTO_ICMPV6;
1460 break;
1461 case IPPROTO_ICMPV6:
1462 sks->proto = IPPROTO_ICMP;
1463 break;
1464 default:
1465 sks->proto = sp->proto;
1466 }
1467 } else
1468 sks->proto = sp->proto;
1469
1470 if (((sks->af != AF_INET) && (sks->af != AF_INET6)) ||
1471 ((skw->af != AF_INET) && (skw->af != AF_INET6))) {
1472 error = EINVAL;
1473 goto cleanup;
1474 }
1475
1476 sks->hash = pf_pkt_hash(sks->af, sks->proto,
1477 &sks->addr[0], &sks->addr[1], sks->port[0], sks->port[1]);
1478
1479 } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) {
1480 error = EINVAL;
1481 goto cleanup;
1482 }
1483 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
1484 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
1485
1486 /* copy to state */
1487 st->rt_addr = sp->rt_addr;
1488 st->rt = sp->rt;
1489 st->creation = getuptime() - ntohl(sp->creation);
1490 st->expire = getuptime();
1491 if (ntohl(sp->expire)) {
1492 u_int32_t timeout;
1493
1494 timeout = r->timeout[sp->timeout];
1495 if (!timeout)
1496 timeout = pf_default_rule.timeout[sp->timeout];
1497
1498 /* sp->expire may have been adaptively scaled by export. */
1499 st->expire -= timeout - ntohl(sp->expire);
1500 }
1501
1502 st->direction = sp->direction;
1503 st->log = sp->log;
1504 st->timeout = sp->timeout;
1505 st->state_flags = ntohs(sp->state_flags);
1506 st->max_mss = ntohs(sp->max_mss);
1507 st->min_ttl = sp->min_ttl;
1508 st->set_tos = sp->set_tos;
1509 st->set_prio[0] = sp->set_prio[0];
1510 st->set_prio[1] = sp->set_prio[1];
1511
1512 st->id = sp->id;
1513 st->creatorid = sp->creatorid;
1514 pf_state_peer_ntoh(&sp->src, &st->src);
1515 pf_state_peer_ntoh(&sp->dst, &st->dst);
1516
1517 st->rule.ptr = r;
1518 st->anchor.ptr = NULL;
1519
1520 st->pfsync_time = getuptime();
1521 st->sync_state = PFSYNC_S_NONE;
1522
1523 PF_REF_INIT(st->refcnt);
1524 mtx_init(&st->mtx, IPL_NET);
1525
1526 /* XXX when we have anchors, use STATE_INC_COUNTERS */
1527 r->states_cur++;
1528 r->states_tot++;
1529
1530 #if NPFSYNC > 0
1531 if (!ISSET(flags, PFSYNC_SI_IOCTL))
1532 SET(st->state_flags, PFSTATE_NOSYNC);
1533 #endif
1534
1535 /*
1536 * We just set PFSTATE_NOSYNC bit, which prevents
1537 * pfsync_insert_state() to insert state to pfsync.
1538 */
1539 if (pf_state_insert(kif, &skw, &sks, st) != 0) {
1540 /* XXX when we have anchors, use STATE_DEC_COUNTERS */
1541 r->states_cur--;
1542 error = EEXIST;
1543 goto cleanup_state;
1544 }
1545
1546 #if NPFSYNC > 0
1547 if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
1548 CLR(st->state_flags, PFSTATE_NOSYNC);
1549 if (ISSET(st->state_flags, PFSTATE_ACK))
1550 pfsync_iack(st);
1551 }
1552 CLR(st->state_flags, PFSTATE_ACK);
1553 #endif
1554
1555 return (0);
1556
1557 cleanup:
1558 if (skw != NULL)
1559 pf_state_key_unref(skw);
1560 if (sks != NULL)
1561 pf_state_key_unref(sks);
1562
1563 cleanup_state: /* pf_state_insert frees the state keys */
1564 if (st) {
1565 if (st->dst.scrub)
1566 pool_put(&pf_state_scrub_pl, st->dst.scrub);
1567 if (st->src.scrub)
1568 pool_put(&pf_state_scrub_pl, st->src.scrub);
1569 pool_put(&pf_state_pl, st);
1570 }
1571 return (error);
1572 }
1573 #endif /* NPFSYNC > 0 */
1574
1575 /* END state table stuff */
1576
1577 void
1578 pf_purge_timeout(void *unused)
1579 {
1580 /* XXX move to systqmp to avoid KERNEL_LOCK */
1581 task_add(systq, &pf_purge_task);
1582 }
1583
1584 void
1585 pf_purge(void *xnloops)
1586 {
1587 int *nloops = xnloops;
1588
1589 /*
1590 * process a fraction of the state table every second
1591 * Note:
1592 * we no longer need PF_LOCK() here, because
1593 * pf_purge_expired_states() uses pf_state_lock to maintain
1594 * consistency.
1595 */
1596 if (pf_default_rule.timeout[PFTM_INTERVAL] > 0)
1597 pf_purge_expired_states(1 + (pf_status.states
1598 / pf_default_rule.timeout[PFTM_INTERVAL]));
1599
1600 NET_LOCK();
1601
1602 PF_LOCK();
1603 /* purge other expired types every PFTM_INTERVAL seconds */
1604 if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL])
1605 pf_purge_expired_src_nodes();
1606 PF_UNLOCK();
1607
1608 /*
1609 * Fragments don't require PF_LOCK(), they use their own lock.
1610 */
1611 if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1612 pf_purge_expired_fragments();
1613 *nloops = 0;
1614 }
1615 NET_UNLOCK();
1616
1617 timeout_add_sec(&pf_purge_to, 1);
1618 }
1619
1620 int32_t
1621 pf_state_expires(const struct pf_state *st, uint8_t stimeout)
1622 {
1623 u_int32_t timeout;
1624 u_int32_t start;
1625 u_int32_t end;
1626 u_int32_t states;
1627
1628 /*
1629 * pf_state_expires is used by the state purge task to
1630 * decide if a state is a candidate for cleanup, and by the
1631 * pfsync state export code to populate an expiry time.
1632 *
1633 * this function may be called by the state purge task while
1634 * the state is being modified. avoid inconsistent reads of
1635 * state->timeout by having the caller do the read (and any
1636 * checks it needs to do on the same variable) and then pass
1637 * their view of the timeout in here for this function to use.
1638 * the only consequence of using a stale timeout value is
1639 * that the state won't be a candidate for purging until the
1640 * next pass of the purge task.
1641 */
1642
1643 /* handle all PFTM_* > PFTM_MAX here */
1644 if (stimeout > PFTM_MAX)
1645 return (0);
1646
1647 KASSERT(stimeout < PFTM_MAX);
1648
1649 timeout = st->rule.ptr->timeout[stimeout];
1650 if (!timeout)
1651 timeout = pf_default_rule.timeout[stimeout];
1652
1653 start = st->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1654 if (start) {
1655 end = st->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1656 states = st->rule.ptr->states_cur;
1657 } else {
1658 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1659 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1660 states = pf_status.states;
1661 }
1662 if (end && states > start && start < end) {
1663 if (states >= end)
1664 return (0);
1665
1666 timeout = (u_int64_t)timeout * (end - states) / (end - start);
1667 }
1668
1669 return (st->expire + timeout);
1670 }
1671
1672 void
1673 pf_purge_expired_src_nodes(void)
1674 {
1675 struct pf_src_node *cur, *next;
1676
1677 PF_ASSERT_LOCKED();
1678
1679 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1680 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1681
1682 if (cur->states == 0 && cur->expire <= getuptime()) {
1683 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1684 pf_remove_src_node(cur);
1685 }
1686 }
1687 }
1688
1689 void
1690 pf_src_tree_remove_state(struct pf_state *st)
1691 {
1692 u_int32_t timeout;
1693 struct pf_sn_item *sni;
1694
1695 while ((sni = SLIST_FIRST(&st->src_nodes)) != NULL) {
1696 SLIST_REMOVE_HEAD(&st->src_nodes, next);
1697 if (st->src.tcp_est)
1698 --sni->sn->conn;
1699 if (--sni->sn->states == 0) {
1700 timeout = st->rule.ptr->timeout[PFTM_SRC_NODE];
1701 if (!timeout)
1702 timeout =
1703 pf_default_rule.timeout[PFTM_SRC_NODE];
1704 sni->sn->expire = getuptime() + timeout;
1705 }
1706 pool_put(&pf_sn_item_pl, sni);
1707 }
1708 }
1709
1710 void
1711 pf_remove_state(struct pf_state *st)
1712 {
1713 PF_ASSERT_LOCKED();
1714
1715 /* handle load balancing related tasks */
1716 pf_postprocess_addr(st);
1717
1718 if (st->src.state == PF_TCPS_PROXY_DST) {
1719 pf_send_tcp(st->rule.ptr, st->key[PF_SK_WIRE]->af,
1720 &st->key[PF_SK_WIRE]->addr[1],
1721 &st->key[PF_SK_WIRE]->addr[0],
1722 st->key[PF_SK_WIRE]->port[1],
1723 st->key[PF_SK_WIRE]->port[0],
1724 st->src.seqhi, st->src.seqlo + 1,
1725 TH_RST|TH_ACK, 0, 0, 0, 1, st->tag,
1726 st->key[PF_SK_WIRE]->rdomain);
1727 }
1728 if (st->key[PF_SK_STACK]->proto == IPPROTO_TCP)
1729 pf_set_protostate(st, PF_PEER_BOTH, TCPS_CLOSED);
1730
1731 RBT_REMOVE(pf_state_tree_id, &tree_id, st);
1732 #if NPFLOW > 0
1733 if (st->state_flags & PFSTATE_PFLOW)
1734 export_pflow(st);
1735 #endif /* NPFLOW > 0 */
1736 #if NPFSYNC > 0
1737 pfsync_delete_state(st);
1738 #endif /* NPFSYNC > 0 */
1739 st->timeout = PFTM_UNLINKED;
1740 pf_src_tree_remove_state(st);
1741 pf_detach_state(st);
1742 }
1743
1744 void
1745 pf_remove_divert_state(struct pf_state_key *sk)
1746 {
1747 struct pf_state_item *si;
1748
1749 PF_ASSERT_UNLOCKED();
1750
1751 PF_LOCK();
1752 PF_STATE_ENTER_WRITE();
1753 TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
1754 struct pf_state *sist = si->si_st;
1755 if (sk == sist->key[PF_SK_STACK] && sist->rule.ptr &&
1756 (sist->rule.ptr->divert.type == PF_DIVERT_TO ||
1757 sist->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
1758 if (sist->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
1759 sist->key[PF_SK_WIRE] != sist->key[PF_SK_STACK]) {
1760 /*
1761 * If the local address is translated, keep
1762 * the state for "tcp.closed" seconds to
1763 * prevent its source port from being reused.
1764 */
1765 if (sist->src.state < TCPS_FIN_WAIT_2 ||
1766 sist->dst.state < TCPS_FIN_WAIT_2) {
1767 pf_set_protostate(sist, PF_PEER_BOTH,
1768 TCPS_TIME_WAIT);
1769 sist->timeout = PFTM_TCP_CLOSED;
1770 sist->expire = getuptime();
1771 }
1772 sist->state_flags |= PFSTATE_INP_UNLINKED;
1773 } else
1774 pf_remove_state(sist);
1775 break;
1776 }
1777 }
1778 PF_STATE_EXIT_WRITE();
1779 PF_UNLOCK();
1780 }
1781
1782 void
1783 pf_free_state(struct pf_state *st)
1784 {
1785 struct pf_rule_item *ri;
1786
1787 PF_ASSERT_LOCKED();
1788
1789 #if NPFSYNC > 0
1790 if (pfsync_state_in_use(st))
1791 return;
1792 #endif /* NPFSYNC > 0 */
1793 KASSERT(st->timeout == PFTM_UNLINKED);
1794 if (--st->rule.ptr->states_cur == 0 &&
1795 st->rule.ptr->src_nodes == 0)
1796 pf_rm_rule(NULL, st->rule.ptr);
1797 if (st->anchor.ptr != NULL)
1798 if (--st->anchor.ptr->states_cur == 0)
1799 pf_rm_rule(NULL, st->anchor.ptr);
1800 while ((ri = SLIST_FIRST(&st->match_rules))) {
1801 SLIST_REMOVE_HEAD(&st->match_rules, entry);
1802 if (--ri->r->states_cur == 0 &&
1803 ri->r->src_nodes == 0)
1804 pf_rm_rule(NULL, ri->r);
1805 pool_put(&pf_rule_item_pl, ri);
1806 }
1807 pf_normalize_tcp_cleanup(st);
1808 pfi_kif_unref(st->kif, PFI_KIF_REF_STATE);
1809 pf_state_list_remove(&pf_state_list, st);
1810 if (st->tag)
1811 pf_tag_unref(st->tag);
1812 pf_state_unref(st);
1813 pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1814 pf_status.states--;
1815 }
1816
1817 void
1818 pf_purge_expired_states(u_int32_t maxcheck)
1819 {
1820 /*
1821 * this task/thread/context/whatever is the only thing that
1822 * removes states from the pf_state_list, so the cur reference
1823 * it holds between calls is guaranteed to still be in the
1824 * list.
1825 */
1826 static struct pf_state *cur = NULL;
1827
1828 struct pf_state *head, *tail;
1829 struct pf_state *st;
1830 SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl);
1831 time_t now;
1832
1833 PF_ASSERT_UNLOCKED();
1834
1835 rw_enter_read(&pf_state_list.pfs_rwl);
1836
1837 mtx_enter(&pf_state_list.pfs_mtx);
1838 head = TAILQ_FIRST(&pf_state_list.pfs_list);
1839 tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
1840 mtx_leave(&pf_state_list.pfs_mtx);
1841
1842 if (head == NULL) {
1843 /* the list is empty */
1844 rw_exit_read(&pf_state_list.pfs_rwl);
1845 return;
1846 }
1847
1848 /* (re)start at the front of the list */
1849 if (cur == NULL)
1850 cur = head;
1851
1852 now = getuptime();
1853
1854 do {
1855 uint8_t stimeout = cur->timeout;
1856
1857 if ((stimeout == PFTM_UNLINKED) ||
1858 (pf_state_expires(cur, stimeout) <= now)) {
1859 st = pf_state_ref(cur);
1860 SLIST_INSERT_HEAD(&gcl, st, gc_list);
1861 }
1862
1863 /* don't iterate past the end of our view of the list */
1864 if (cur == tail) {
1865 cur = NULL;
1866 break;
1867 }
1868
1869 cur = TAILQ_NEXT(cur, entry_list);
1870 } while (maxcheck--);
1871
1872 rw_exit_read(&pf_state_list.pfs_rwl);
1873
1874 if (SLIST_EMPTY(&gcl))
1875 return;
1876
1877 NET_LOCK();
1878 rw_enter_write(&pf_state_list.pfs_rwl);
1879 PF_LOCK();
1880 PF_STATE_ENTER_WRITE();
1881 SLIST_FOREACH(st, &gcl, gc_list) {
1882 if (st->timeout != PFTM_UNLINKED)
1883 pf_remove_state(st);
1884
1885 pf_free_state(st);
1886 }
1887 PF_STATE_EXIT_WRITE();
1888 PF_UNLOCK();
1889 rw_exit_write(&pf_state_list.pfs_rwl);
1890 NET_UNLOCK();
1891
1892 while ((st = SLIST_FIRST(&gcl)) != NULL) {
1893 SLIST_REMOVE_HEAD(&gcl, gc_list);
1894 pf_state_unref(st);
1895 }
1896 }
1897
1898 int
1899 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw, int wait)
1900 {
1901 if (aw->type != PF_ADDR_TABLE)
1902 return (0);
1903 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, wait)) == NULL)
1904 return (1);
1905 return (0);
1906 }
1907
1908 void
1909 pf_tbladdr_remove(struct pf_addr_wrap *aw)
1910 {
1911 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1912 return;
1913 pfr_detach_table(aw->p.tbl);
1914 aw->p.tbl = NULL;
1915 }
1916
1917 void
1918 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1919 {
1920 struct pfr_ktable *kt = aw->p.tbl;
1921
1922 if (aw->type != PF_ADDR_TABLE || kt == NULL)
1923 return;
1924 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1925 kt = kt->pfrkt_root;
1926 aw->p.tbl = NULL;
1927 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1928 kt->pfrkt_cnt : -1;
1929 }
1930
1931 void
1932 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1933 {
1934 switch (af) {
1935 case AF_INET: {
1936 u_int32_t a = ntohl(addr->addr32[0]);
1937 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1938 (a>>8)&255, a&255);
1939 if (p) {
1940 p = ntohs(p);
1941 addlog(":%u", p);
1942 }
1943 break;
1944 }
1945 #ifdef INET6
1946 case AF_INET6: {
1947 u_int16_t b;
1948 u_int8_t i, curstart, curend, maxstart, maxend;
1949 curstart = curend = maxstart = maxend = 255;
1950 for (i = 0; i < 8; i++) {
1951 if (!addr->addr16[i]) {
1952 if (curstart == 255)
1953 curstart = i;
1954 curend = i;
1955 } else {
1956 if ((curend - curstart) >
1957 (maxend - maxstart)) {
1958 maxstart = curstart;
1959 maxend = curend;
1960 }
1961 curstart = curend = 255;
1962 }
1963 }
1964 if ((curend - curstart) >
1965 (maxend - maxstart)) {
1966 maxstart = curstart;
1967 maxend = curend;
1968 }
1969 for (i = 0; i < 8; i++) {
1970 if (i >= maxstart && i <= maxend) {
1971 if (i == 0)
1972 addlog(":");
1973 if (i == maxend)
1974 addlog(":");
1975 } else {
1976 b = ntohs(addr->addr16[i]);
1977 addlog("%x", b);
1978 if (i < 7)
1979 addlog(":");
1980 }
1981 }
1982 if (p) {
1983 p = ntohs(p);
1984 addlog("[%u]", p);
1985 }
1986 break;
1987 }
1988 #endif /* INET6 */
1989 }
1990 }
1991
1992 void
1993 pf_print_state(struct pf_state *st)
1994 {
1995 pf_print_state_parts(st, NULL, NULL);
1996 }
1997
1998 void
1999 pf_print_state_parts(struct pf_state *st,
2000 struct pf_state_key *skwp, struct pf_state_key *sksp)
2001 {
2002 struct pf_state_key *skw, *sks;
2003 u_int8_t proto, dir;
2004
2005 /* Do our best to fill these, but they're skipped if NULL */
2006 skw = skwp ? skwp : (st ? st->key[PF_SK_WIRE] : NULL);
2007 sks = sksp ? sksp : (st ? st->key[PF_SK_STACK] : NULL);
2008 proto = skw ? skw->proto : (sks ? sks->proto : 0);
2009 dir = st ? st->direction : 0;
2010
2011 switch (proto) {
2012 case IPPROTO_IPV4:
2013 addlog("IPv4");
2014 break;
2015 case IPPROTO_IPV6:
2016 addlog("IPv6");
2017 break;
2018 case IPPROTO_TCP:
2019 addlog("TCP");
2020 break;
2021 case IPPROTO_UDP:
2022 addlog("UDP");
2023 break;
2024 case IPPROTO_ICMP:
2025 addlog("ICMP");
2026 break;
2027 case IPPROTO_ICMPV6:
2028 addlog("ICMPv6");
2029 break;
2030 default:
2031 addlog("%u", proto);
2032 break;
2033 }
2034 switch (dir) {
2035 case PF_IN:
2036 addlog(" in");
2037 break;
2038 case PF_OUT:
2039 addlog(" out");
2040 break;
2041 }
2042 if (skw) {
2043 addlog(" wire: (%d) ", skw->rdomain);
2044 pf_print_host(&skw->addr[0], skw->port[0], skw->af);
2045 addlog(" ");
2046 pf_print_host(&skw->addr[1], skw->port[1], skw->af);
2047 }
2048 if (sks) {
2049 addlog(" stack: (%d) ", sks->rdomain);
2050 if (sks != skw) {
2051 pf_print_host(&sks->addr[0], sks->port[0], sks->af);
2052 addlog(" ");
2053 pf_print_host(&sks->addr[1], sks->port[1], sks->af);
2054 } else
2055 addlog("-");
2056 }
2057 if (st) {
2058 if (proto == IPPROTO_TCP) {
2059 addlog(" [lo=%u high=%u win=%u modulator=%u",
2060 st->src.seqlo, st->src.seqhi,
2061 st->src.max_win, st->src.seqdiff);
2062 if (st->src.wscale && st->dst.wscale)
2063 addlog(" wscale=%u",
2064 st->src.wscale & PF_WSCALE_MASK);
2065 addlog("]");
2066 addlog(" [lo=%u high=%u win=%u modulator=%u",
2067 st->dst.seqlo, st->dst.seqhi,
2068 st->dst.max_win, st->dst.seqdiff);
2069 if (st->src.wscale && st->dst.wscale)
2070 addlog(" wscale=%u",
2071 st->dst.wscale & PF_WSCALE_MASK);
2072 addlog("]");
2073 }
2074 addlog(" %u:%u", st->src.state, st->dst.state);
2075 if (st->rule.ptr)
2076 addlog(" @%d", st->rule.ptr->nr);
2077 }
2078 }
2079
2080 void
2081 pf_print_flags(u_int8_t f)
2082 {
2083 if (f)
2084 addlog(" ");
2085 if (f & TH_FIN)
2086 addlog("F");
2087 if (f & TH_SYN)
2088 addlog("S");
2089 if (f & TH_RST)
2090 addlog("R");
2091 if (f & TH_PUSH)
2092 addlog("P");
2093 if (f & TH_ACK)
2094 addlog("A");
2095 if (f & TH_URG)
2096 addlog("U");
2097 if (f & TH_ECE)
2098 addlog("E");
2099 if (f & TH_CWR)
2100 addlog("W");
2101 }
2102
2103 #define PF_SET_SKIP_STEPS(i) \
2104 do { \
2105 while (head[i] != cur) { \
2106 head[i]->skip[i].ptr = cur; \
2107 head[i] = TAILQ_NEXT(head[i], entries); \
2108 } \
2109 } while (0)
2110
2111 void
2112 pf_calc_skip_steps(struct pf_rulequeue *rules)
2113 {
2114 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
2115 int i;
2116
2117 cur = TAILQ_FIRST(rules);
2118 prev = cur;
2119 for (i = 0; i < PF_SKIP_COUNT; ++i)
2120 head[i] = cur;
2121 while (cur != NULL) {
2122 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
2123 PF_SET_SKIP_STEPS(PF_SKIP_IFP);
2124 if (cur->direction != prev->direction)
2125 PF_SET_SKIP_STEPS(PF_SKIP_DIR);
2126 if (cur->onrdomain != prev->onrdomain ||
2127 cur->ifnot != prev->ifnot)
2128 PF_SET_SKIP_STEPS(PF_SKIP_RDOM);
2129 if (cur->af != prev->af)
2130 PF_SET_SKIP_STEPS(PF_SKIP_AF);
2131 if (cur->proto != prev->proto)
2132 PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
2133 if (cur->src.neg != prev->src.neg ||
2134 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
2135 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
2136 if (cur->dst.neg != prev->dst.neg ||
2137 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
2138 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
2139 if (cur->src.port[0] != prev->src.port[0] ||
2140 cur->src.port[1] != prev->src.port[1] ||
2141 cur->src.port_op != prev->src.port_op)
2142 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2143 if (cur->dst.port[0] != prev->dst.port[0] ||
2144 cur->dst.port[1] != prev->dst.port[1] ||
2145 cur->dst.port_op != prev->dst.port_op)
2146 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2147
2148 prev = cur;
2149 cur = TAILQ_NEXT(cur, entries);
2150 }
2151 for (i = 0; i < PF_SKIP_COUNT; ++i)
2152 PF_SET_SKIP_STEPS(i);
2153 }
2154
2155 int
2156 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2157 {
2158 if (aw1->type != aw2->type)
2159 return (1);
2160 switch (aw1->type) {
2161 case PF_ADDR_ADDRMASK:
2162 case PF_ADDR_RANGE:
2163 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
2164 return (1);
2165 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
2166 return (1);
2167 return (0);
2168 case PF_ADDR_DYNIFTL:
2169 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
2170 case PF_ADDR_NONE:
2171 case PF_ADDR_NOROUTE:
2172 case PF_ADDR_URPFFAILED:
2173 return (0);
2174 case PF_ADDR_TABLE:
2175 return (aw1->p.tbl != aw2->p.tbl);
2176 case PF_ADDR_RTLABEL:
2177 return (aw1->v.rtlabel != aw2->v.rtlabel);
2178 default:
2179 addlog("invalid address type: %d\n", aw1->type);
2180 return (1);
2181 }
2182 }
2183
2184 /* This algorithm computes 'a + b - c' in ones-complement using a trick to
2185 * emulate at most one ones-complement subtraction. This thereby limits net
2186 * carries/borrows to at most one, eliminating a reduction step and saving one
2187 * each of +, >>, & and ~.
2188 *
2189 * def. x mod y = x - (x//y)*y for integer x,y
2190 * def. sum = x mod 2^16
2191 * def. accumulator = (x >> 16) mod 2^16
2192 *
2193 * The trick works as follows: subtracting exactly one u_int16_t from the
2194 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the
2195 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the
2196 * ones-complement borrow:
2197 *
2198 * (sum + accumulator) mod 2^16
2199 * = { assume underflow: accumulator := 2^16 - 1 }
2200 * (sum + 2^16 - 1) mod 2^16
2201 * = { mod }
2202 * (sum - 1) mod 2^16
2203 *
2204 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's
2205 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown
2206 * to zero as that requires subtraction of at least 2^16, which exceeds a
2207 * single u_int16_t's range.
2208 *
2209 * We use the following theorem to derive the implementation:
2210 *
2211 * th. (x + (y mod z)) mod z = (x + y) mod z (0)
2212 * proof.
2213 * (x + (y mod z)) mod z
2214 * = { def mod }
2215 * (x + y - (y//z)*z) mod z
2216 * = { (a + b*c) mod c = a mod c }
2217 * (x + y) mod z [end of proof]
2218 *
2219 * ... and thereby obtain:
2220 *
2221 * (sum + accumulator) mod 2^16
2222 * = { def. accumulator, def. sum }
2223 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16
2224 * = { (0), twice }
2225 * (x + (x >> 16)) mod 2^16
2226 * = { x mod 2^n = x & (2^n - 1) }
2227 * (x + (x >> 16)) & 0xffff
2228 *
2229 * Note: this serves also as a reduction step for at most one add (as the
2230 * trailing mod 2^16 prevents further reductions by destroying carries).
2231 */
2232 __inline void
2233 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now,
2234 u_int8_t proto)
2235 {
2236 u_int32_t x;
2237 const int udp = proto == IPPROTO_UDP;
2238
2239 x = *cksum + was - now;
2240 x = (x + (x >> 16)) & 0xffff;
2241
2242 /* optimise: eliminate a branch when not udp */
2243 if (udp && *cksum == 0x0000)
2244 return;
2245 if (udp && x == 0x0000)
2246 x = 0xffff;
2247
2248 *cksum = (u_int16_t)(x);
2249 }
2250
2251 #ifdef INET6
2252 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */
2253 static __inline void
2254 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto)
2255 {
2256 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto);
2257 }
2258
2259 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */
2260 static __inline void
2261 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto)
2262 {
2263 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto);
2264 }
2265 #endif /* INET6 */
2266
2267 /* pre: *a is 16-bit aligned within its packet
2268 *
2269 * This algorithm emulates 16-bit ones-complement sums on a twos-complement
2270 * machine by conserving ones-complement's otherwise discarded carries in the
2271 * upper bits of x. These accumulated carries when added to the lower 16-bits
2272 * over at least zero 'reduction' steps then complete the ones-complement sum.
2273 *
2274 * def. sum = x mod 2^16
2275 * def. accumulator = (x >> 16)
2276 *
2277 * At most two reduction steps
2278 *
2279 * x := sum + accumulator
2280 * = { def sum, def accumulator }
2281 * x := x mod 2^16 + (x >> 16)
2282 * = { x mod 2^n = x & (2^n - 1) }
2283 * x := (x & 0xffff) + (x >> 16)
2284 *
2285 * are necessary to incorporate the accumulated carries (at most one per add)
2286 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits.
2287 *
2288 * The function is also invariant over the endian of the host. Why?
2289 *
2290 * Define the unary transpose operator ~ on a bitstring in python slice
2291 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P.
2292 *
2293 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e.
2294 *
2295 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length)
2296 *
2297 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two
2298 * 'half-adds'. Under ones-complement addition, each half-add carries to the
2299 * other, so the sum of each half-add is unaffected by their relative
2300 * order. Therefore:
2301 *
2302 * ~m +_1 ~n
2303 * = { half-adds invariant under transposition }
2304 * ~s
2305 * = { substitute }
2306 * ~(m +_1 n) [end of proof]
2307 *
2308 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine
2309 * with the converse endian does not alter the result.
2310 *
2311 * proof.
2312 * { converse machine endian: load/store transposes, P := 8 }
2313 * ~(~m +_1 ~n)
2314 * = { ~ over +_1 }
2315 * ~~m +_1 ~~n
2316 * = { ~ is an involution }
2317 * m +_1 n [end of proof]
2318 *
2319 */
2320 #define NEG(x) ((u_int16_t)~(x))
2321 void
2322 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a,
2323 const struct pf_addr *an, sa_family_t af, u_int8_t proto)
2324 {
2325 u_int32_t x;
2326 const u_int16_t *n = an->addr16;
2327 const u_int16_t *o = a->addr16;
2328 const int udp = proto == IPPROTO_UDP;
2329
2330 switch (af) {
2331 case AF_INET:
2332 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]);
2333 break;
2334 #ifdef INET6
2335 case AF_INET6:
2336 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\
2337 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\
2338 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\
2339 o[6] + NEG(n[6]) + o[7] + NEG(n[7]);
2340 break;
2341 #endif /* INET6 */
2342 default:
2343 unhandled_af(af);
2344 }
2345
2346 x = (x & 0xffff) + (x >> 16);
2347 x = (x & 0xffff) + (x >> 16);
2348
2349 /* optimise: eliminate a branch when not udp */
2350 if (udp && *cksum == 0x0000)
2351 return;
2352 if (udp && x == 0x0000)
2353 x = 0xffff;
2354
2355 *cksum = (u_int16_t)(x);
2356 }
2357
2358 int
2359 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi)
2360 {
2361 int rewrite = 0;
2362
2363 if (*f != v) {
2364 u_int16_t old = htons(hi ? (*f << 8) : *f);
2365 u_int16_t new = htons(hi ? ( v << 8) : v);
2366
2367 pf_cksum_fixup(pd->pcksum, old, new, pd->proto);
2368 *f = v;
2369 rewrite = 1;
2370 }
2371
2372 return (rewrite);
2373 }
2374
2375 /* pre: *f is 16-bit aligned within its packet */
2376 int
2377 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v)
2378 {
2379 int rewrite = 0;
2380
2381 if (*f != v) {
2382 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto);
2383 *f = v;
2384 rewrite = 1;
2385 }
2386
2387 return (rewrite);
2388 }
2389
2390 int
2391 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi)
2392 {
2393 int rewrite = 0;
2394 u_int8_t *fb = (u_int8_t*)f;
2395 u_int8_t *vb = (u_int8_t*)&v;
2396
2397 if (hi && ALIGNED_POINTER(f, u_int16_t)) {
2398 return (pf_patch_16(pd, f, v)); /* optimise */
2399 }
2400
2401 rewrite += pf_patch_8(pd, fb++, *vb++, hi);
2402 rewrite += pf_patch_8(pd, fb++, *vb++,!hi);
2403
2404 return (rewrite);
2405 }
2406
2407 /* pre: *f is 16-bit aligned within its packet */
2408 /* pre: pd->proto != IPPROTO_UDP */
2409 int
2410 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v)
2411 {
2412 int rewrite = 0;
2413 u_int16_t *pc = pd->pcksum;
2414 u_int8_t proto = pd->proto;
2415
2416 /* optimise: inline udp fixup code is unused; let compiler scrub it */
2417 if (proto == IPPROTO_UDP)
2418 panic("%s: udp", __func__);
2419
2420 /* optimise: skip *f != v guard; true for all use-cases */
2421 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto);
2422 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto);
2423
2424 *f = v;
2425 rewrite = 1;
2426
2427 return (rewrite);
2428 }
2429
2430 int
2431 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi)
2432 {
2433 int rewrite = 0;
2434 u_int8_t *fb = (u_int8_t*)f;
2435 u_int8_t *vb = (u_int8_t*)&v;
2436
2437 if (hi && ALIGNED_POINTER(f, u_int32_t)) {
2438 return (pf_patch_32(pd, f, v)); /* optimise */
2439 }
2440
2441 rewrite += pf_patch_8(pd, fb++, *vb++, hi);
2442 rewrite += pf_patch_8(pd, fb++, *vb++,!hi);
2443 rewrite += pf_patch_8(pd, fb++, *vb++, hi);
2444 rewrite += pf_patch_8(pd, fb++, *vb++,!hi);
2445
2446 return (rewrite);
2447 }
2448
2449 int
2450 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir,
2451 u_int16_t *virtual_id, u_int16_t *virtual_type)
2452 {
2453 /*
2454 * ICMP types marked with PF_OUT are typically responses to
2455 * PF_IN, and will match states in the opposite direction.
2456 * PF_IN ICMP types need to match a state with that type.
2457 */
2458 *icmp_dir = PF_OUT;
2459
2460 /* Queries (and responses) */
2461 switch (pd->af) {
2462 case AF_INET:
2463 switch (type) {
2464 case ICMP_ECHO:
2465 *icmp_dir = PF_IN;
2466 /* FALLTHROUGH */
2467 case ICMP_ECHOREPLY:
2468 *virtual_type = ICMP_ECHO;
2469 *virtual_id = pd->hdr.icmp.icmp_id;
2470 break;
2471
2472 case ICMP_TSTAMP:
2473 *icmp_dir = PF_IN;
2474 /* FALLTHROUGH */
2475 case ICMP_TSTAMPREPLY:
2476 *virtual_type = ICMP_TSTAMP;
2477 *virtual_id = pd->hdr.icmp.icmp_id;
2478 break;
2479
2480 case ICMP_IREQ:
2481 *icmp_dir = PF_IN;
2482 /* FALLTHROUGH */
2483 case ICMP_IREQREPLY:
2484 *virtual_type = ICMP_IREQ;
2485 *virtual_id = pd->hdr.icmp.icmp_id;
2486 break;
2487
2488 case ICMP_MASKREQ:
2489 *icmp_dir = PF_IN;
2490 /* FALLTHROUGH */
2491 case ICMP_MASKREPLY:
2492 *virtual_type = ICMP_MASKREQ;
2493 *virtual_id = pd->hdr.icmp.icmp_id;
2494 break;
2495
2496 case ICMP_IPV6_WHEREAREYOU:
2497 *icmp_dir = PF_IN;
2498 /* FALLTHROUGH */
2499 case ICMP_IPV6_IAMHERE:
2500 *virtual_type = ICMP_IPV6_WHEREAREYOU;
2501 *virtual_id = 0; /* Nothing sane to match on! */
2502 break;
2503
2504 case ICMP_MOBILE_REGREQUEST:
2505 *icmp_dir = PF_IN;
2506 /* FALLTHROUGH */
2507 case ICMP_MOBILE_REGREPLY:
2508 *virtual_type = ICMP_MOBILE_REGREQUEST;
2509 *virtual_id = 0; /* Nothing sane to match on! */
2510 break;
2511
2512 case ICMP_ROUTERSOLICIT:
2513 *icmp_dir = PF_IN;
2514 /* FALLTHROUGH */
2515 case ICMP_ROUTERADVERT:
2516 *virtual_type = ICMP_ROUTERSOLICIT;
2517 *virtual_id = 0; /* Nothing sane to match on! */
2518 break;
2519
2520 /* These ICMP types map to other connections */
2521 case ICMP_UNREACH:
2522 case ICMP_SOURCEQUENCH:
2523 case ICMP_REDIRECT:
2524 case ICMP_TIMXCEED:
2525 case ICMP_PARAMPROB:
2526 /* These will not be used, but set them anyway */
2527 *icmp_dir = PF_IN;
2528 *virtual_type = htons(type);
2529 *virtual_id = 0;
2530 return (1); /* These types match to another state */
2531
2532 /*
2533 * All remaining ICMP types get their own states,
2534 * and will only match in one direction.
2535 */
2536 default:
2537 *icmp_dir = PF_IN;
2538 *virtual_type = type;
2539 *virtual_id = 0;
2540 break;
2541 }
2542 break;
2543 #ifdef INET6
2544 case AF_INET6:
2545 switch (type) {
2546 case ICMP6_ECHO_REQUEST:
2547 *icmp_dir = PF_IN;
2548 /* FALLTHROUGH */
2549 case ICMP6_ECHO_REPLY:
2550 *virtual_type = ICMP6_ECHO_REQUEST;
2551 *virtual_id = pd->hdr.icmp6.icmp6_id;
2552 break;
2553
2554 case MLD_LISTENER_QUERY:
2555 case MLD_LISTENER_REPORT: {
2556 struct mld_hdr *mld = &pd->hdr.mld;
2557 u_int32_t h;
2558
2559 /*
2560 * Listener Report can be sent by clients
2561 * without an associated Listener Query.
2562 * In addition to that, when Report is sent as a
2563 * reply to a Query its source and destination
2564 * address are different.
2565 */
2566 *icmp_dir = PF_IN;
2567 *virtual_type = MLD_LISTENER_QUERY;
2568 /* generate fake id for these messages */
2569 h = mld->mld_addr.s6_addr32[0] ^
2570 mld->mld_addr.s6_addr32[1] ^
2571 mld->mld_addr.s6_addr32[2] ^
2572 mld->mld_addr.s6_addr32[3];
2573 *virtual_id = (h >> 16) ^ (h & 0xffff);
2574 break;
2575 }
2576
2577 /*
2578 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as
2579 * ICMP6_WRU
2580 */
2581 case ICMP6_WRUREQUEST:
2582 *icmp_dir = PF_IN;
2583 /* FALLTHROUGH */
2584 case ICMP6_WRUREPLY:
2585 *virtual_type = ICMP6_WRUREQUEST;
2586 *virtual_id = 0; /* Nothing sane to match on! */
2587 break;
2588
2589 case MLD_MTRACE:
2590 *icmp_dir = PF_IN;
2591 /* FALLTHROUGH */
2592 case MLD_MTRACE_RESP:
2593 *virtual_type = MLD_MTRACE;
2594 *virtual_id = 0; /* Nothing sane to match on! */
2595 break;
2596
2597 case ND_NEIGHBOR_SOLICIT:
2598 *icmp_dir = PF_IN;
2599 /* FALLTHROUGH */
2600 case ND_NEIGHBOR_ADVERT: {
2601 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns;
2602 u_int32_t h;
2603
2604 *virtual_type = ND_NEIGHBOR_SOLICIT;
2605 /* generate fake id for these messages */
2606 h = nd->nd_ns_target.s6_addr32[0] ^
2607 nd->nd_ns_target.s6_addr32[1] ^
2608 nd->nd_ns_target.s6_addr32[2] ^
2609 nd->nd_ns_target.s6_addr32[3];
2610 *virtual_id = (h >> 16) ^ (h & 0xffff);
2611 break;
2612 }
2613
2614 /*
2615 * These ICMP types map to other connections.
2616 * ND_REDIRECT can't be in this list because the triggering
2617 * packet header is optional.
2618 */
2619 case ICMP6_DST_UNREACH:
2620 case ICMP6_PACKET_TOO_BIG:
2621 case ICMP6_TIME_EXCEEDED:
2622 case ICMP6_PARAM_PROB:
2623 /* These will not be used, but set them anyway */
2624 *icmp_dir = PF_IN;
2625 *virtual_type = htons(type);
2626 *virtual_id = 0;
2627 return (1); /* These types match to another state */
2628 /*
2629 * All remaining ICMP6 types get their own states,
2630 * and will only match in one direction.
2631 */
2632 default:
2633 *icmp_dir = PF_IN;
2634 *virtual_type = type;
2635 *virtual_id = 0;
2636 break;
2637 }
2638 break;
2639 #endif /* INET6 */
2640 }
2641 *virtual_type = htons(*virtual_type);
2642 return (0); /* These types match to their own state */
2643 }
2644
2645 void
2646 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp,
2647 struct pf_addr *oa, struct pf_addr *na, u_int16_t np)
2648 {
2649 /* note: doesn't trouble to fixup quoted checksums, if any */
2650
2651 /* change quoted protocol port */
2652 if (qp != NULL)
2653 pf_patch_16(pd, qp, np);
2654
2655 /* change quoted ip address */
2656 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto);
2657 pf_addrcpy(qa, na, pd->af);
2658
2659 /* change network-header's ip address */
2660 if (oa)
2661 pf_translate_a(pd, oa, na);
2662 }
2663
2664 /* pre: *a is 16-bit aligned within its packet */
2665 /* *a is a network header src/dst address */
2666 int
2667 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an)
2668 {
2669 int rewrite = 0;
2670
2671 /* warning: !PF_ANEQ != PF_AEQ */
2672 if (!PF_ANEQ(a, an, pd->af))
2673 return (0);
2674
2675 /* fixup transport pseudo-header, if any */
2676 switch (pd->proto) {
2677 case IPPROTO_TCP: /* FALLTHROUGH */
2678 case IPPROTO_UDP: /* FALLTHROUGH */
2679 case IPPROTO_ICMPV6:
2680 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto);
2681 break;
2682 default:
2683 break; /* assume no pseudo-header */
2684 }
2685
2686 pf_addrcpy(a, an, pd->af);
2687 rewrite = 1;
2688
2689 return (rewrite);
2690 }
2691
2692 #ifdef INET6
2693 /* pf_translate_af() may change pd->m, adjust local copies after calling */
2694 int
2695 pf_translate_af(struct pf_pdesc *pd)
2696 {
2697 static const struct pf_addr zero;
2698 struct ip *ip4;
2699 struct ip6_hdr *ip6;
2700 int copyback = 0;
2701 u_int hlen, ohlen, dlen;
2702 u_int16_t *pc;
2703 u_int8_t af_proto, naf_proto;
2704
2705 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6);
2706 ohlen = pd->off;
2707 dlen = pd->tot_len - pd->off;
2708 pc = pd->pcksum;
2709
2710 af_proto = naf_proto = pd->proto;
2711 if (naf_proto == IPPROTO_ICMP)
2712 af_proto = IPPROTO_ICMPV6;
2713 if (naf_proto == IPPROTO_ICMPV6)
2714 af_proto = IPPROTO_ICMP;
2715
2716 /* uncover stale pseudo-header */
2717 switch (af_proto) {
2718 case IPPROTO_ICMPV6:
2719 /* optimise: unchanged for TCP/UDP */
2720 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto);
2721 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto);
2722 /* FALLTHROUGH */
2723 case IPPROTO_UDP: /* FALLTHROUGH */
2724 case IPPROTO_TCP:
2725 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto);
2726 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto);
2727 copyback = 1;
2728 break;
2729 default:
2730 break; /* assume no pseudo-header */
2731 }
2732
2733 /* replace the network header */
2734 m_adj(pd->m, pd->off);
2735 pd->src = NULL;
2736 pd->dst = NULL;
2737
2738 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) {
2739 pd->m = NULL;
2740 return (-1);
2741 }
2742
2743 pd->off = hlen;
2744 pd->tot_len += hlen - ohlen;
2745
2746 switch (pd->naf) {
2747 case AF_INET:
2748 ip4 = mtod(pd->m, struct ip *);
2749 memset(ip4, 0, hlen);
2750 ip4->ip_v = IPVERSION;
2751 ip4->ip_hl = hlen >> 2;
2752 ip4->ip_tos = pd->tos;
2753 ip4->ip_len = htons(hlen + dlen);
2754 ip4->ip_id = htons(ip_randomid());
2755 ip4->ip_off = htons(IP_DF);
2756 ip4->ip_ttl = pd->ttl;
2757 ip4->ip_p = pd->proto;
2758 ip4->ip_src = pd->nsaddr.v4;
2759 ip4->ip_dst = pd->ndaddr.v4;
2760 break;
2761 case AF_INET6:
2762 ip6 = mtod(pd->m, struct ip6_hdr *);
2763 memset(ip6, 0, hlen);
2764 ip6->ip6_vfc = IPV6_VERSION;
2765 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20);
2766 ip6->ip6_plen = htons(dlen);
2767 ip6->ip6_nxt = pd->proto;
2768 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM)
2769 ip6->ip6_hlim = IPV6_DEFHLIM;
2770 else
2771 ip6->ip6_hlim = pd->ttl;
2772 ip6->ip6_src = pd->nsaddr.v6;
2773 ip6->ip6_dst = pd->ndaddr.v6;
2774 break;
2775 default:
2776 unhandled_af(pd->naf);
2777 }
2778
2779 /* UDP over IPv6 must be checksummed per rfc2460 p27 */
2780 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 &&
2781 pd->naf == AF_INET6) {
2782 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
2783 }
2784
2785 /* cover fresh pseudo-header */
2786 switch (naf_proto) {
2787 case IPPROTO_ICMPV6:
2788 /* optimise: unchanged for TCP/UDP */
2789 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto);
2790 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto);
2791 /* FALLTHROUGH */
2792 case IPPROTO_UDP: /* FALLTHROUGH */
2793 case IPPROTO_TCP:
2794 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto);
2795 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto);
2796 copyback = 1;
2797 break;
2798 default:
2799 break; /* assume no pseudo-header */
2800 }
2801
2802 /* flush pd->pcksum */
2803 if (copyback)
2804 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
2805
2806 return (0);
2807 }
2808
2809 int
2810 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd,
2811 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst,
2812 sa_family_t af, sa_family_t naf)
2813 {
2814 struct mbuf *n = NULL;
2815 struct ip *ip4;
2816 struct ip6_hdr *ip6;
2817 u_int hlen, ohlen, dlen;
2818 int d;
2819
2820 if (af == naf || (af != AF_INET && af != AF_INET6) ||
2821 (naf != AF_INET && naf != AF_INET6))
2822 return (-1);
2823
2824 /* split the mbuf chain on the quoted ip/ip6 header boundary */
2825 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL)
2826 return (-1);
2827
2828 /* new quoted header */
2829 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
2830 /* old quoted header */
2831 ohlen = pd2->off - ipoff2;
2832
2833 /* trim old quoted header */
2834 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto);
2835 m_adj(n, ohlen);
2836
2837 /* prepend a new, translated, quoted header */
2838 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL)
2839 return (-1);
2840
2841 switch (naf) {
2842 case AF_INET:
2843 ip4 = mtod(n, struct ip *);
2844 memset(ip4, 0, sizeof(*ip4));
2845 ip4->ip_v = IPVERSION;
2846 ip4->ip_hl = sizeof(*ip4) >> 2;
2847 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen);
2848 ip4->ip_id = htons(ip_randomid());
2849 ip4->ip_off = htons(IP_DF);
2850 ip4->ip_ttl = pd2->ttl;
2851 if (pd2->proto == IPPROTO_ICMPV6)
2852 ip4->ip_p = IPPROTO_ICMP;
2853 else
2854 ip4->ip_p = pd2->proto;
2855 ip4->ip_src = src->v4;
2856 ip4->ip_dst = dst->v4;
2857 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2);
2858 break;
2859 case AF_INET6:
2860 ip6 = mtod(n, struct ip6_hdr *);
2861 memset(ip6, 0, sizeof(*ip6));
2862 ip6->ip6_vfc = IPV6_VERSION;
2863 ip6->ip6_plen = htons(pd2->tot_len - ohlen);
2864 if (pd2->proto == IPPROTO_ICMP)
2865 ip6->ip6_nxt = IPPROTO_ICMPV6;
2866 else
2867 ip6->ip6_nxt = pd2->proto;
2868 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM)
2869 ip6->ip6_hlim = IPV6_DEFHLIM;
2870 else
2871 ip6->ip6_hlim = pd2->ttl;
2872 ip6->ip6_src = src->v6;
2873 ip6->ip6_dst = dst->v6;
2874 break;
2875 }
2876
2877 /* cover new quoted header */
2878 /* optimise: any new AF_INET header of ours sums to zero */
2879 if (naf != AF_INET) {
2880 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto);
2881 }
2882
2883 /* reattach modified quoted packet to outer header */
2884 {
2885 int nlen = n->m_pkthdr.len;
2886 m_cat(m, n);
2887 m->m_pkthdr.len += nlen;
2888 }
2889
2890 /* account for altered length */
2891 d = hlen - ohlen;
2892
2893 if (pd->proto == IPPROTO_ICMPV6) {
2894 /* fixup pseudo-header */
2895 dlen = pd->tot_len - pd->off;
2896 pf_cksum_fixup(pd->pcksum,
2897 htons(dlen), htons(dlen + d), pd->proto);
2898 }
2899
2900 pd->tot_len += d;
2901 pd2->tot_len += d;
2902 pd2->off += d;
2903
2904 /* note: not bothering to update network headers as
2905 these due for rewrite by pf_translate_af() */
2906
2907 return (0);
2908 }
2909
2910
2911 #define PTR_IP(field) (offsetof(struct ip, field))
2912 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field))
2913
2914 int
2915 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg)
2916 {
2917 struct icmp *icmp4;
2918 struct icmp6_hdr *icmp6;
2919 u_int32_t mtu;
2920 int32_t ptr = -1;
2921 u_int8_t type;
2922 u_int8_t code;
2923
2924 switch (af) {
2925 case AF_INET:
2926 icmp6 = arg;
2927 type = icmp6->icmp6_type;
2928 code = icmp6->icmp6_code;
2929 mtu = ntohl(icmp6->icmp6_mtu);
2930
2931 switch (type) {
2932 case ICMP6_ECHO_REQUEST:
2933 type = ICMP_ECHO;
2934 break;
2935 case ICMP6_ECHO_REPLY:
2936 type = ICMP_ECHOREPLY;
2937 break;
2938 case ICMP6_DST_UNREACH:
2939 type = ICMP_UNREACH;
2940 switch (code) {
2941 case ICMP6_DST_UNREACH_NOROUTE:
2942 case ICMP6_DST_UNREACH_BEYONDSCOPE:
2943 case ICMP6_DST_UNREACH_ADDR:
2944 code = ICMP_UNREACH_HOST;
2945 break;
2946 case ICMP6_DST_UNREACH_ADMIN:
2947 code = ICMP_UNREACH_HOST_PROHIB;
2948 break;
2949 case ICMP6_DST_UNREACH_NOPORT:
2950 code = ICMP_UNREACH_PORT;
2951 break;
2952 default:
2953 return (-1);
2954 }
2955 break;
2956 case ICMP6_PACKET_TOO_BIG:
2957 type = ICMP_UNREACH;
2958 code = ICMP_UNREACH_NEEDFRAG;
2959 mtu -= 20;
2960 break;
2961 case ICMP6_TIME_EXCEEDED:
2962 type = ICMP_TIMXCEED;
2963 break;
2964 case ICMP6_PARAM_PROB:
2965 switch (code) {
2966 case ICMP6_PARAMPROB_HEADER:
2967 type = ICMP_PARAMPROB;
2968 code = ICMP_PARAMPROB_ERRATPTR;
2969 ptr = ntohl(icmp6->icmp6_pptr);
2970
2971 if (ptr == PTR_IP6(ip6_vfc))
2972 ; /* preserve */
2973 else if (ptr == PTR_IP6(ip6_vfc) + 1)
2974 ptr = PTR_IP(ip_tos);
2975 else if (ptr == PTR_IP6(ip6_plen) ||
2976 ptr == PTR_IP6(ip6_plen) + 1)
2977 ptr = PTR_IP(ip_len);
2978 else if (ptr == PTR_IP6(ip6_nxt))
2979 ptr = PTR_IP(ip_p);
2980 else if (ptr == PTR_IP6(ip6_hlim))
2981 ptr = PTR_IP(ip_ttl);
2982 else if (ptr >= PTR_IP6(ip6_src) &&
2983 ptr < PTR_IP6(ip6_dst))
2984 ptr = PTR_IP(ip_src);
2985 else if (ptr >= PTR_IP6(ip6_dst) &&
2986 ptr < sizeof(struct ip6_hdr))
2987 ptr = PTR_IP(ip_dst);
2988 else {
2989 return (-1);
2990 }
2991 break;
2992 case ICMP6_PARAMPROB_NEXTHEADER:
2993 type = ICMP_UNREACH;
2994 code = ICMP_UNREACH_PROTOCOL;
2995 break;
2996 default:
2997 return (-1);
2998 }
2999 break;
3000 default:
3001 return (-1);
3002 }
3003
3004 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI);
3005 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO);
3006
3007 /* aligns well with a icmpv4 nextmtu */
3008 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu));
3009
3010 /* icmpv4 pptr is a one most significant byte */
3011 if (ptr >= 0)
3012 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24));
3013 break;
3014 case AF_INET6:
3015 icmp4 = arg;
3016 type = icmp4->icmp_type;
3017 code = icmp4->icmp_code;
3018 mtu = ntohs(icmp4->icmp_nextmtu);
3019
3020 switch (type) {
3021 case ICMP_ECHO:
3022 type = ICMP6_ECHO_REQUEST;
3023 break;
3024 case ICMP_ECHOREPLY:
3025 type = ICMP6_ECHO_REPLY;
3026 break;
3027 case ICMP_UNREACH:
3028 type = ICMP6_DST_UNREACH;
3029 switch (code) {
3030 case ICMP_UNREACH_NET:
3031 case ICMP_UNREACH_HOST:
3032 case ICMP_UNREACH_NET_UNKNOWN:
3033 case ICMP_UNREACH_HOST_UNKNOWN:
3034 case ICMP_UNREACH_ISOLATED:
3035 case ICMP_UNREACH_TOSNET:
3036 case ICMP_UNREACH_TOSHOST:
3037 code = ICMP6_DST_UNREACH_NOROUTE;
3038 break;
3039 case ICMP_UNREACH_PORT:
3040 code = ICMP6_DST_UNREACH_NOPORT;
3041 break;
3042 case ICMP_UNREACH_NET_PROHIB:
3043 case ICMP_UNREACH_HOST_PROHIB:
3044 case ICMP_UNREACH_FILTER_PROHIB:
3045 case ICMP_UNREACH_PRECEDENCE_CUTOFF:
3046 code = ICMP6_DST_UNREACH_ADMIN;
3047 break;
3048 case ICMP_UNREACH_PROTOCOL:
3049 type = ICMP6_PARAM_PROB;
3050 code = ICMP6_PARAMPROB_NEXTHEADER;
3051 ptr = offsetof(struct ip6_hdr, ip6_nxt);
3052 break;
3053 case ICMP_UNREACH_NEEDFRAG:
3054 type = ICMP6_PACKET_TOO_BIG;
3055 code = 0;
3056 mtu += 20;
3057 break;
3058 default:
3059 return (-1);
3060 }
3061 break;
3062 case ICMP_TIMXCEED:
3063 type = ICMP6_TIME_EXCEEDED;
3064 break;
3065 case ICMP_PARAMPROB:
3066 type = ICMP6_PARAM_PROB;
3067 switch (code) {
3068 case ICMP_PARAMPROB_ERRATPTR:
3069 code = ICMP6_PARAMPROB_HEADER;
3070 break;
3071 case ICMP_PARAMPROB_LENGTH:
3072 code = ICMP6_PARAMPROB_HEADER;
3073 break;
3074 default:
3075 return (-1);
3076 }
3077
3078 ptr = icmp4->icmp_pptr;
3079 if (ptr == 0 || ptr == PTR_IP(ip_tos))
3080 ; /* preserve */
3081 else if (ptr == PTR_IP(ip_len) ||
3082 ptr == PTR_IP(ip_len) + 1)
3083 ptr = PTR_IP6(ip6_plen);
3084 else if (ptr == PTR_IP(ip_ttl))
3085 ptr = PTR_IP6(ip6_hlim);
3086 else if (ptr == PTR_IP(ip_p))
3087 ptr = PTR_IP6(ip6_nxt);
3088 else if (ptr >= PTR_IP(ip_src) &&
3089 ptr < PTR_IP(ip_dst))
3090 ptr = PTR_IP6(ip6_src);
3091 else if (ptr >= PTR_IP(ip_dst) &&
3092 ptr < sizeof(struct ip))
3093 ptr = PTR_IP6(ip6_dst);
3094 else {
3095 return (-1);
3096 }
3097 break;
3098 default:
3099 return (-1);
3100 }
3101
3102 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI);
3103 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO);
3104 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu));
3105 if (ptr >= 0)
3106 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr));
3107 break;
3108 }
3109
3110 return (0);
3111 }
3112 #endif /* INET6 */
3113
3114 /*
3115 * Need to modulate the sequence numbers in the TCP SACK option
3116 * (credits to Krzysztof Pfaff for report and patch)
3117 */
3118 int
3119 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst)
3120 {
3121 struct sackblk sack;
3122 int copyback = 0, i;
3123 int olen, optsoff;
3124 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh;
3125
3126 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
3127 optsoff = pd->off + sizeof(struct tcphdr);
3128 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2)
3129 if (olen < TCPOLEN_MINSACK ||
3130 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af))
3131 return (0);
3132
3133 eoh = opts + olen;
3134 opt = opts;
3135 while ((opt = pf_find_tcpopt(opt, opts, olen,
3136 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL)
3137 {
3138 size_t safelen = MIN(opt[1], (eoh - opt));
3139 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) {
3140 size_t startoff = (opt + i) - opts;
3141 memcpy(&sack, &opt[i], sizeof(sack));
3142 pf_patch_32_unaligned(pd, &sack.start,
3143 htonl(ntohl(sack.start) - dst->seqdiff),
3144 PF_ALGNMNT(startoff));
3145 pf_patch_32_unaligned(pd, &sack.end,
3146 htonl(ntohl(sack.end) - dst->seqdiff),
3147 PF_ALGNMNT(startoff + sizeof(sack.start)));
3148 memcpy(&opt[i], &sack, sizeof(sack));
3149 }
3150 copyback = 1;
3151 opt += opt[1];
3152 }
3153
3154 if (copyback)
3155 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT);
3156 return (copyback);
3157 }
3158
3159 struct mbuf *
3160 pf_build_tcp(const struct pf_rule *r, sa_family_t af,
3161 const struct pf_addr *saddr, const struct pf_addr *daddr,
3162 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
3163 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
3164 u_int16_t rtag, u_int sack, u_int rdom)
3165 {
3166 struct mbuf *m;
3167 int len, tlen;
3168 struct ip *h;
3169 #ifdef INET6
3170 struct ip6_hdr *h6;
3171 #endif /* INET6 */
3172 struct tcphdr *th;
3173 char *opt;
3174
3175 /* maximum segment size tcp option */
3176 tlen = sizeof(struct tcphdr);
3177 if (mss)
3178 tlen += 4;
3179 if (sack)
3180 tlen += 2;
3181
3182 switch (af) {
3183 case AF_INET:
3184 len = sizeof(struct ip) + tlen;
3185 break;
3186 #ifdef INET6
3187 case AF_INET6:
3188 len = sizeof(struct ip6_hdr) + tlen;
3189 break;
3190 #endif /* INET6 */
3191 default:
3192 unhandled_af(af);
3193 }
3194
3195 /* create outgoing mbuf */
3196 m = m_gethdr(M_DONTWAIT, MT_HEADER);
3197 if (m == NULL)
3198 return (NULL);
3199 if (tag)
3200 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
3201 m->m_pkthdr.pf.tag = rtag;
3202 m->m_pkthdr.ph_rtableid = rdom;
3203 if (r && (r->scrub_flags & PFSTATE_SETPRIO))
3204 m->m_pkthdr.pf.prio = r->set_prio[0];
3205 if (r && r->qid)
3206 m->m_pkthdr.pf.qid = r->qid;
3207 m->m_data += max_linkhdr;
3208 m->m_pkthdr.len = m->m_len = len;
3209 m->m_pkthdr.ph_ifidx = 0;
3210 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
3211 memset(m->m_data, 0, len);
3212 switch (af) {
3213 case AF_INET:
3214 h = mtod(m, struct ip *);
3215 h->ip_p = IPPROTO_TCP;
3216 h->ip_len = htons(tlen);
3217 h->ip_v = 4;
3218 h->ip_hl = sizeof(*h) >> 2;
3219 h->ip_tos = IPTOS_LOWDELAY;
3220 h->ip_len = htons(len);
3221 h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
3222 h->ip_ttl = ttl ? ttl : ip_defttl;
3223 h->ip_sum = 0;
3224 h->ip_src.s_addr = saddr->v4.s_addr;
3225 h->ip_dst.s_addr = daddr->v4.s_addr;
3226
3227 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
3228 break;
3229 #ifdef INET6
3230 case AF_INET6:
3231 h6 = mtod(m, struct ip6_hdr *);
3232 h6->ip6_nxt = IPPROTO_TCP;
3233 h6->ip6_plen = htons(tlen);
3234 h6->ip6_vfc |= IPV6_VERSION;
3235 h6->ip6_hlim = IPV6_DEFHLIM;
3236 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
3237 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
3238
3239 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
3240 break;
3241 #endif /* INET6 */
3242 default:
3243 unhandled_af(af);
3244 }
3245
3246 /* TCP header */
3247 th->th_sport = sport;
3248 th->th_dport = dport;
3249 th->th_seq = htonl(seq);
3250 th->th_ack = htonl(ack);
3251 th->th_off = tlen >> 2;
3252 th->th_flags = flags;
3253 th->th_win = htons(win);
3254
3255 opt = (char *)(th + 1);
3256 if (mss) {
3257 opt[0] = TCPOPT_MAXSEG;
3258 opt[1] = 4;
3259 mss = htons(mss);
3260 memcpy((opt + 2), &mss, 2);
3261 opt += 4;
3262 }
3263 if (sack) {
3264 opt[0] = TCPOPT_SACK_PERMITTED;
3265 opt[1] = 2;
3266 opt += 2;
3267 }
3268
3269 return (m);
3270 }
3271
3272 void
3273 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
3274 const struct pf_addr *saddr, const struct pf_addr *daddr,
3275 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
3276 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
3277 u_int16_t rtag, u_int rdom)
3278 {
3279 struct mbuf *m;
3280
3281 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack,
3282 flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL)
3283 return;
3284
3285 switch (af) {
3286 case AF_INET:
3287 ip_send(m);
3288 break;
3289 #ifdef INET6
3290 case AF_INET6:
3291 ip6_send(m);
3292 break;
3293 #endif /* INET6 */
3294 }
3295 }
3296
3297 static void
3298 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *st,
3299 struct pf_state_peer *src, struct pf_state_peer *dst)
3300 {
3301 /*
3302 * We are sending challenge ACK as a response to SYN packet, which
3303 * matches existing state (modulo TCP window check). Therefore packet
3304 * must be sent on behalf of destination.
3305 *
3306 * We expect sender to remain either silent, or send RST packet
3307 * so both, firewall and remote peer, can purge dead state from
3308 * memory.
3309 */
3310 pf_send_tcp(st->rule.ptr, pd->af, pd->dst, pd->src,
3311 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo,
3312 src->seqlo, TH_ACK, 0, 0, st->rule.ptr->return_ttl, 1, 0,
3313 pd->rdomain);
3314 }
3315
3316 void
3317 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param,
3318 sa_family_t af, struct pf_rule *r, u_int rdomain)
3319 {
3320 struct mbuf *m0;
3321
3322 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL)
3323 return;
3324
3325 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
3326 m0->m_pkthdr.ph_rtableid = rdomain;
3327 if (r && (r->scrub_flags & PFSTATE_SETPRIO))
3328 m0->m_pkthdr.pf.prio = r->set_prio[0];
3329 if (r && r->qid)
3330 m0->m_pkthdr.pf.qid = r->qid;
3331
3332 switch (af) {
3333 case AF_INET:
3334 icmp_error(m0, type, code, 0, param);
3335 break;
3336 #ifdef INET6
3337 case AF_INET6:
3338 icmp6_error(m0, type, code, param);
3339 break;
3340 #endif /* INET6 */
3341 }
3342 }
3343
3344 /*
3345 * Return ((n = 0) == (a = b [with mask m]))
3346 * Note: n != 0 => returns (a != b [with mask m])
3347 */
3348 int
3349 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
3350 struct pf_addr *b, sa_family_t af)
3351 {
3352 switch (af) {
3353 case AF_INET:
3354 if ((a->addr32[0] & m->addr32[0]) ==
3355 (b->addr32[0] & m->addr32[0]))
3356 return (n == 0);
3357 break;
3358 #ifdef INET6
3359 case AF_INET6:
3360 if (((a->addr32[0] & m->addr32[0]) ==
3361 (b->addr32[0] & m->addr32[0])) &&
3362 ((a->addr32[1] & m->addr32[1]) ==
3363 (b->addr32[1] & m->addr32[1])) &&
3364 ((a->addr32[2] & m->addr32[2]) ==
3365 (b->addr32[2] & m->addr32[2])) &&
3366 ((a->addr32[3] & m->addr32[3]) ==
3367 (b->addr32[3] & m->addr32[3])))
3368 return (n == 0);
3369 break;
3370 #endif /* INET6 */
3371 }
3372
3373 return (n != 0);
3374 }
3375
3376 /*
3377 * Return 1 if b <= a <= e, otherwise return 0.
3378 */
3379 int
3380 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
3381 struct pf_addr *a, sa_family_t af)
3382 {
3383 switch (af) {
3384 case AF_INET:
3385 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
3386 (ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
3387 return (0);
3388 break;
3389 #ifdef INET6
3390 case AF_INET6: {
3391 int i;
3392
3393 /* check a >= b */
3394 for (i = 0; i < 4; ++i)
3395 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
3396 break;
3397 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
3398 return (0);
3399 /* check a <= e */
3400 for (i = 0; i < 4; ++i)
3401 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
3402 break;
3403 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
3404 return (0);
3405 break;
3406 }
3407 #endif /* INET6 */
3408 }
3409 return (1);
3410 }
3411
3412 int
3413 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
3414 {
3415 switch (op) {
3416 case PF_OP_IRG:
3417 return ((p > a1) && (p < a2));
3418 case PF_OP_XRG:
3419 return ((p < a1) || (p > a2));
3420 case PF_OP_RRG:
3421 return ((p >= a1) && (p <= a2));
3422 case PF_OP_EQ:
3423 return (p == a1);
3424 case PF_OP_NE:
3425 return (p != a1);
3426 case PF_OP_LT:
3427 return (p < a1);
3428 case PF_OP_LE:
3429 return (p <= a1);
3430 case PF_OP_GT:
3431 return (p > a1);
3432 case PF_OP_GE:
3433 return (p >= a1);
3434 }
3435 return (0); /* never reached */
3436 }
3437
3438 int
3439 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
3440 {
3441 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p)));
3442 }
3443
3444 int
3445 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
3446 {
3447 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE)
3448 return (0);
3449 return (pf_match(op, a1, a2, u));
3450 }
3451
3452 int
3453 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
3454 {
3455 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE)
3456 return (0);
3457 return (pf_match(op, a1, a2, g));
3458 }
3459
3460 int
3461 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
3462 {
3463 if (*tag == -1)
3464 *tag = m->m_pkthdr.pf.tag;
3465
3466 return ((!r->match_tag_not && r->match_tag == *tag) ||
3467 (r->match_tag_not && r->match_tag != *tag));
3468 }
3469
3470 int
3471 pf_match_rcvif(struct mbuf *m, struct pf_rule *r)
3472 {
3473 struct ifnet *ifp;
3474 #if NCARP > 0
3475 struct ifnet *ifp0;
3476 #endif
3477 struct pfi_kif *kif;
3478
3479 ifp = if_get(m->m_pkthdr.ph_ifidx);
3480 if (ifp == NULL)
3481 return (0);
3482
3483 #if NCARP > 0
3484 if (ifp->if_type == IFT_CARP &&
3485 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) {
3486 kif = (struct pfi_kif *)ifp0->if_pf_kif;
3487 if_put(ifp0);
3488 } else
3489 #endif /* NCARP */
3490 kif = (struct pfi_kif *)ifp->if_pf_kif;
3491
3492 if_put(ifp);
3493
3494 if (kif == NULL) {
3495 DPFPRINTF(LOG_ERR,
3496 "%s: kif == NULL, @%d via %s", __func__,
3497 r->nr, r->rcv_ifname);
3498 return (0);
3499 }
3500
3501 return (pfi_kif_match(r->rcv_kif, kif));
3502 }
3503
3504 void
3505 pf_tag_packet(struct mbuf *m, int tag, int rtableid)
3506 {
3507 if (tag > 0)
3508 m->m_pkthdr.pf.tag = tag;
3509 if (rtableid >= 0)
3510 m->m_pkthdr.ph_rtableid = (u_int)rtableid;
3511 }
3512
3513 void
3514 pf_anchor_stack_init(void)
3515 {
3516 struct pf_anchor_stackframe *stack;
3517
3518 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
3519 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = &stack[0];
3520 cpumem_leave(pf_anchor_stack, stack);
3521 }
3522
3523 int
3524 pf_anchor_stack_is_full(struct pf_anchor_stackframe *sf)
3525 {
3526 struct pf_anchor_stackframe *stack;
3527 int rv;
3528
3529 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
3530 rv = (sf == &stack[PF_ANCHOR_STACK_MAX]);
3531 cpumem_leave(pf_anchor_stack, stack);
3532
3533 return (rv);
3534 }
3535
3536 int
3537 pf_anchor_stack_is_empty(struct pf_anchor_stackframe *sf)
3538 {
3539 struct pf_anchor_stackframe *stack;
3540 int rv;
3541
3542 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
3543 rv = (sf == &stack[0]);
3544 cpumem_leave(pf_anchor_stack, stack);
3545
3546 return (rv);
3547 }
3548
3549 struct pf_anchor_stackframe *
3550 pf_anchor_stack_top(void)
3551 {
3552 struct pf_anchor_stackframe *stack;
3553 struct pf_anchor_stackframe *top_sf;
3554
3555 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
3556 top_sf = stack[PF_ANCHOR_STACK_MAX].sf_stack_top;
3557 cpumem_leave(pf_anchor_stack, stack);
3558
3559 return (top_sf);
3560 }
3561
3562 int
3563 pf_anchor_stack_push(struct pf_ruleset *rs, struct pf_rule *r,
3564 struct pf_anchor *child, int jump_target)
3565 {
3566 struct pf_anchor_stackframe *stack;
3567 struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top();
3568
3569 top_sf++;
3570 if (pf_anchor_stack_is_full(top_sf))
3571 return (-1);
3572
3573 top_sf->sf_rs = rs;
3574 top_sf->sf_r = r;
3575 top_sf->sf_child = child;
3576 top_sf->sf_jump_target = jump_target;
3577
3578 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
3579
3580 if ((top_sf <= &stack[0]) || (top_sf >= &stack[PF_ANCHOR_STACK_MAX]))
3581 panic("%s: top frame outside of anchor stack range", __func__);
3582
3583 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf;
3584 cpumem_leave(pf_anchor_stack, stack);
3585
3586 return (0);
3587 }
3588
3589 int
3590 pf_anchor_stack_pop(struct pf_ruleset **rs, struct pf_rule **r,
3591 struct pf_anchor **child, int *jump_target)
3592 {
3593 struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top();
3594 struct pf_anchor_stackframe *stack;
3595 int on_top;
3596
3597 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
3598 if (pf_anchor_stack_is_empty(top_sf)) {
3599 on_top = -1;
3600 } else {
3601 if ((top_sf <= &stack[0]) ||
3602 (top_sf >= &stack[PF_ANCHOR_STACK_MAX]))
3603 panic("%s: top frame outside of anchor stack range",
3604 __func__);
3605
3606 *rs = top_sf->sf_rs;
3607 *r = top_sf->sf_r;
3608 *child = top_sf->sf_child;
3609 *jump_target = top_sf->sf_jump_target;
3610 top_sf--;
3611 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf;
3612 on_top = 0;
3613 }
3614 cpumem_leave(pf_anchor_stack, stack);
3615
3616 return (on_top);
3617 }
3618
3619 void
3620 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
3621 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
3622 {
3623 switch (af) {
3624 case AF_INET:
3625 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3626 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
3627 break;
3628 #ifdef INET6
3629 case AF_INET6:
3630 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3631 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
3632 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
3633 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
3634 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
3635 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
3636 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
3637 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
3638 break;
3639 #endif /* INET6 */
3640 default:
3641 unhandled_af(af);
3642 }
3643 }
3644
3645 void
3646 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
3647 {
3648 switch (af) {
3649 case AF_INET:
3650 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
3651 break;
3652 #ifdef INET6
3653 case AF_INET6:
3654 if (addr->addr32[3] == 0xffffffff) {
3655 addr->addr32[3] = 0;
3656 if (addr->addr32[2] == 0xffffffff) {
3657 addr->addr32[2] = 0;
3658 if (addr->addr32[1] == 0xffffffff) {
3659 addr->addr32[1] = 0;
3660 addr->addr32[0] =
3661 htonl(ntohl(addr->addr32[0]) + 1);
3662 } else
3663 addr->addr32[1] =
3664 htonl(ntohl(addr->addr32[1]) + 1);
3665 } else
3666 addr->addr32[2] =
3667 htonl(ntohl(addr->addr32[2]) + 1);
3668 } else
3669 addr->addr32[3] =
3670 htonl(ntohl(addr->addr32[3]) + 1);
3671 break;
3672 #endif /* INET6 */
3673 default:
3674 unhandled_af(af);
3675 }
3676 }
3677
3678 int
3679 pf_socket_lookup(struct pf_pdesc *pd)
3680 {
3681 struct pf_addr *saddr, *daddr;
3682 u_int16_t sport, dport;
3683 struct inpcbtable *tb;
3684 struct inpcb *inp;
3685
3686 pd->lookup.uid = -1;
3687 pd->lookup.gid = -1;
3688 pd->lookup.pid = NO_PID;
3689 switch (pd->virtual_proto) {
3690 case IPPROTO_TCP:
3691 sport = pd->hdr.tcp.th_sport;
3692 dport = pd->hdr.tcp.th_dport;
3693 PF_ASSERT_LOCKED();
3694 NET_ASSERT_LOCKED();
3695 tb = &tcbtable;
3696 break;
3697 case IPPROTO_UDP:
3698 sport = pd->hdr.udp.uh_sport;
3699 dport = pd->hdr.udp.uh_dport;
3700 PF_ASSERT_LOCKED();
3701 NET_ASSERT_LOCKED();
3702 tb = &udbtable;
3703 break;
3704 default:
3705 return (-1);
3706 }
3707 if (pd->dir == PF_IN) {
3708 saddr = pd->src;
3709 daddr = pd->dst;
3710 } else {
3711 u_int16_t p;
3712
3713 p = sport;
3714 sport = dport;
3715 dport = p;
3716 saddr = pd->dst;
3717 daddr = pd->src;
3718 }
3719 switch (pd->af) {
3720 case AF_INET:
3721 /*
3722 * Fails when rtable is changed while evaluating the ruleset
3723 * The socket looked up will not match the one hit in the end.
3724 */
3725 inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport,
3726 pd->rdomain);
3727 if (inp == NULL) {
3728 inp = in_pcblookup_listen(tb, daddr->v4, dport,
3729 NULL, pd->rdomain);
3730 if (inp == NULL)
3731 return (-1);
3732 }
3733 break;
3734 #ifdef INET6
3735 case AF_INET6:
3736 inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6,
3737 dport, pd->rdomain);
3738 if (inp == NULL) {
3739 inp = in6_pcblookup_listen(tb, &daddr->v6, dport,
3740 NULL, pd->rdomain);
3741 if (inp == NULL)
3742 return (-1);
3743 }
3744 break;
3745 #endif /* INET6 */
3746 default:
3747 unhandled_af(pd->af);
3748 }
3749 pd->lookup.uid = inp->inp_socket->so_euid;
3750 pd->lookup.gid = inp->inp_socket->so_egid;
3751 pd->lookup.pid = inp->inp_socket->so_cpid;
3752 in_pcbunref(inp);
3753 return (1);
3754 }
3755
3756 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity"
3757 * /\ (eoh - r) >= min_typelen >= 2 "safety" )
3758 *
3759 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen
3760 */
3761 u_int8_t*
3762 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type,
3763 u_int8_t min_typelen)
3764 {
3765 u_int8_t *eoh = opts + hlen;
3766
3767 if (min_typelen < 2)
3768 return (NULL);
3769
3770 while ((eoh - opt) >= min_typelen) {
3771 switch (*opt) {
3772 case TCPOPT_EOL:
3773 /* FALLTHROUGH - Workaround the failure of some
3774 systems to NOP-pad their bzero'd option buffers,
3775 producing spurious EOLs */
3776 case TCPOPT_NOP:
3777 opt++;
3778 continue;
3779 default:
3780 if (opt[0] == type &&
3781 opt[1] >= min_typelen)
3782 return (opt);
3783 }
3784
3785 opt += MAX(opt[1], 2); /* evade infinite loops */
3786 }
3787
3788 return (NULL);
3789 }
3790
3791 u_int8_t
3792 pf_get_wscale(struct pf_pdesc *pd)
3793 {
3794 int olen;
3795 u_int8_t opts[MAX_TCPOPTLEN], *opt;
3796 u_int8_t wscale = 0;
3797
3798 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
3799 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m,
3800 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af))
3801 return (0);
3802
3803 opt = opts;
3804 while ((opt = pf_find_tcpopt(opt, opts, olen,
3805 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) {
3806 wscale = opt[2];
3807 wscale = MIN(wscale, TCP_MAX_WINSHIFT);
3808 wscale |= PF_WSCALE_FLAG;
3809
3810 opt += opt[1];
3811 }
3812
3813 return (wscale);
3814 }
3815
3816 u_int16_t
3817 pf_get_mss(struct pf_pdesc *pd)
3818 {
3819 int olen;
3820 u_int8_t opts[MAX_TCPOPTLEN], *opt;
3821 u_int16_t mss = tcp_mssdflt;
3822
3823 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
3824 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m,
3825 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af))
3826 return (0);
3827
3828 opt = opts;
3829 while ((opt = pf_find_tcpopt(opt, opts, olen,
3830 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) {
3831 memcpy(&mss, (opt + 2), 2);
3832 mss = ntohs(mss);
3833
3834 opt += opt[1];
3835 }
3836 return (mss);
3837 }
3838
3839 u_int16_t
3840 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
3841 {
3842 struct ifnet *ifp;
3843 struct sockaddr_in *dst;
3844 #ifdef INET6
3845 struct sockaddr_in6 *dst6;
3846 #endif /* INET6 */
3847 struct rtentry *rt = NULL;
3848 struct sockaddr_storage ss;
3849 int hlen;
3850 u_int16_t mss = tcp_mssdflt;
3851
3852 memset(&ss, 0, sizeof(ss));
3853
3854 switch (af) {
3855 case AF_INET:
3856 hlen = sizeof(struct ip);
3857 dst = (struct sockaddr_in *)&ss;
3858 dst->sin_family = AF_INET;
3859 dst->sin_len = sizeof(*dst);
3860 dst->sin_addr = addr->v4;
3861 rt = rtalloc(sintosa(dst), 0, rtableid);
3862 break;
3863 #ifdef INET6
3864 case AF_INET6:
3865 hlen = sizeof(struct ip6_hdr);
3866 dst6 = (struct sockaddr_in6 *)&ss;
3867 dst6->sin6_family = AF_INET6;
3868 dst6->sin6_len = sizeof(*dst6);
3869 dst6->sin6_addr = addr->v6;
3870 rt = rtalloc(sin6tosa(dst6), 0, rtableid);
3871 break;
3872 #endif /* INET6 */
3873 }
3874
3875 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) {
3876 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr);
3877 mss = max(tcp_mssdflt, mss);
3878 if_put(ifp);
3879 }
3880 rtfree(rt);
3881 mss = min(mss, offer);
3882 mss = max(mss, 64); /* sanity - at least max opt space */
3883 return (mss);
3884 }
3885
3886 static __inline int
3887 pf_set_rt_ifp(struct pf_state *st, struct pf_addr *saddr, sa_family_t af,
3888 struct pf_src_node **sns)
3889 {
3890 struct pf_rule *r = st->rule.ptr;
3891 int rv;
3892
3893 if (!r->rt)
3894 return (0);
3895
3896 rv = pf_map_addr(af, r, saddr, &st->rt_addr, NULL, sns,
3897 &r->route, PF_SN_ROUTE);
3898 if (rv == 0)
3899 st->rt = r->rt;
3900
3901 return (rv);
3902 }
3903
3904 u_int32_t
3905 pf_tcp_iss(struct pf_pdesc *pd)
3906 {
3907 SHA2_CTX ctx;
3908 union {
3909 uint8_t bytes[SHA512_DIGEST_LENGTH];
3910 uint32_t words[1];
3911 } digest;
3912
3913 if (pf_tcp_secret_init == 0) {
3914 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret));
3915 SHA512Init(&pf_tcp_secret_ctx);
3916 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret,
3917 sizeof(pf_tcp_secret));
3918 pf_tcp_secret_init = 1;
3919 }
3920 ctx = pf_tcp_secret_ctx;
3921
3922 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain));
3923 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short));
3924 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short));
3925 switch (pd->af) {
3926 case AF_INET:
3927 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr));
3928 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr));
3929 break;
3930 #ifdef INET6
3931 case AF_INET6:
3932 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr));
3933 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr));
3934 break;
3935 #endif /* INET6 */
3936 }
3937 SHA512Final(digest.bytes, &ctx);
3938 pf_tcp_iss_off += 4096;
3939 return (digest.words[0] + READ_ONCE(tcp_iss) + pf_tcp_iss_off);
3940 }
3941
3942 void
3943 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a)
3944 {
3945 if (r->qid)
3946 a->qid = r->qid;
3947 if (r->pqid)
3948 a->pqid = r->pqid;
3949 if (r->rtableid >= 0)
3950 a->rtableid = r->rtableid;
3951 #if NPFLOG > 0
3952 a->log |= r->log;
3953 #endif /* NPFLOG > 0 */
3954 if (r->scrub_flags & PFSTATE_SETTOS)
3955 a->set_tos = r->set_tos;
3956 if (r->min_ttl)
3957 a->min_ttl = r->min_ttl;
3958 if (r->max_mss)
3959 a->max_mss = r->max_mss;
3960 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID|
3961 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO));
3962 if (r->scrub_flags & PFSTATE_SETPRIO) {
3963 a->set_prio[0] = r->set_prio[0];
3964 a->set_prio[1] = r->set_prio[1];
3965 }
3966 if (r->rule_flag & PFRULE_SETDELAY)
3967 a->delay = r->delay;
3968 }
3969
3970 #define PF_TEST_ATTRIB(t, a) \
3971 if (t) { \
3972 r = a; \
3973 continue; \
3974 } else do { \
3975 } while (0)
3976
3977 enum pf_test_status
3978 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset)
3979 {
3980 struct pf_rule *r;
3981 struct pf_anchor *child = NULL;
3982 int target;
3983
3984 pf_anchor_stack_init();
3985 enter_ruleset:
3986 r = TAILQ_FIRST(ruleset->rules.active.ptr);
3987 while (r != NULL) {
3988 PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED,
3989 TAILQ_NEXT(r, entries));
3990 r->evaluations++;
3991 PF_TEST_ATTRIB(
3992 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot),
3993 r->skip[PF_SKIP_IFP].ptr);
3994 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir),
3995 r->skip[PF_SKIP_DIR].ptr);
3996 PF_TEST_ATTRIB((r->onrdomain >= 0 &&
3997 (r->onrdomain == ctx->pd->rdomain) == r->ifnot),
3998 r->skip[PF_SKIP_RDOM].ptr);
3999 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af),
4000 r->skip[PF_SKIP_AF].ptr);
4001 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto),
4002 r->skip[PF_SKIP_PROTO].ptr);
4003 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr,
4004 ctx->pd->naf, r->src.neg, ctx->pd->kif,
4005 ctx->act.rtableid)),
4006 r->skip[PF_SKIP_SRC_ADDR].ptr);
4007 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr,
4008 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)),
4009 r->skip[PF_SKIP_DST_ADDR].ptr);
4010
4011 switch (ctx->pd->virtual_proto) {
4012 case PF_VPROTO_FRAGMENT:
4013 /* tcp/udp only. port_op always 0 in other cases */
4014 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
4015 TAILQ_NEXT(r, entries));
4016 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP &&
4017 r->flagset),
4018 TAILQ_NEXT(r, entries));
4019 /* icmp only. type/code always 0 in other cases */
4020 PF_TEST_ATTRIB((r->type || r->code),
4021 TAILQ_NEXT(r, entries));
4022 /* tcp/udp only. {uid|gid}.op always 0 in other cases */
4023 PF_TEST_ATTRIB((r->gid.op || r->uid.op),
4024 TAILQ_NEXT(r, entries));
4025 break;
4026
4027 case IPPROTO_TCP:
4028 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) !=
4029 r->flags),
4030 TAILQ_NEXT(r, entries));
4031 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
4032 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd),
4033 r->os_fingerprint)),
4034 TAILQ_NEXT(r, entries));
4035 /* FALLTHROUGH */
4036
4037 case IPPROTO_UDP:
4038 /* tcp/udp only. port_op always 0 in other cases */
4039 PF_TEST_ATTRIB((r->src.port_op &&
4040 !pf_match_port(r->src.port_op, r->src.port[0],
4041 r->src.port[1], ctx->pd->nsport)),
4042 r->skip[PF_SKIP_SRC_PORT].ptr);
4043 PF_TEST_ATTRIB((r->dst.port_op &&
4044 !pf_match_port(r->dst.port_op, r->dst.port[0],
4045 r->dst.port[1], ctx->pd->ndport)),
4046 r->skip[PF_SKIP_DST_PORT].ptr);
4047 /* tcp/udp only. uid.op always 0 in other cases */
4048 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done ||
4049 (ctx->pd->lookup.done =
4050 pf_socket_lookup(ctx->pd), 1)) &&
4051 !pf_match_uid(r->uid.op, r->uid.uid[0],
4052 r->uid.uid[1], ctx->pd->lookup.uid)),
4053 TAILQ_NEXT(r, entries));
4054 /* tcp/udp only. gid.op always 0 in other cases */
4055 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done ||
4056 (ctx->pd->lookup.done =
4057 pf_socket_lookup(ctx->pd), 1)) &&
4058 !pf_match_gid(r->gid.op, r->gid.gid[0],
4059 r->gid.gid[1], ctx->pd->lookup.gid)),
4060 TAILQ_NEXT(r, entries));
4061 break;
4062
4063 case IPPROTO_ICMP:
4064 case IPPROTO_ICMPV6:
4065 /* icmp only. type always 0 in other cases */
4066 PF_TEST_ATTRIB((r->type &&
4067 r->type != ctx->icmptype + 1),
4068 TAILQ_NEXT(r, entries));
4069 /* icmp only. type always 0 in other cases */
4070 PF_TEST_ATTRIB((r->code &&
4071 r->code != ctx->icmpcode + 1),
4072 TAILQ_NEXT(r, entries));
4073 /* icmp only. don't create states on replies */
4074 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp &&
4075 (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
4076 ctx->icmp_dir != PF_IN),
4077 TAILQ_NEXT(r, entries));
4078 break;
4079
4080 default:
4081 break;
4082 }
4083
4084 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
4085 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
4086 TAILQ_NEXT(r, entries));
4087 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)),
4088 TAILQ_NEXT(r, entries));
4089 PF_TEST_ATTRIB((r->prob &&
4090 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
4091 TAILQ_NEXT(r, entries));
4092 PF_TEST_ATTRIB((r->match_tag &&
4093 !pf_match_tag(ctx->pd->m, r, &ctx->tag)),
4094 TAILQ_NEXT(r, entries));
4095 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) ==
4096 r->rcvifnot),
4097 TAILQ_NEXT(r, entries));
4098 PF_TEST_ATTRIB((r->prio &&
4099 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
4100 ctx->pd->m->m_pkthdr.pf.prio),
4101 TAILQ_NEXT(r, entries));
4102
4103 /* must be last! */
4104 if (r->pktrate.limit) {
4105 pf_add_threshold(&r->pktrate);
4106 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)),
4107 TAILQ_NEXT(r, entries));
4108 }
4109
4110 /* FALLTHROUGH */
4111 if (r->tag)
4112 ctx->tag = r->tag;
4113 if (r->anchor == NULL) {
4114
4115 if (r->rule_flag & PFRULE_ONCE) {
4116 u_int32_t rule_flag;
4117
4118 rule_flag = r->rule_flag;
4119 if (((rule_flag & PFRULE_EXPIRED) == 0) &&
4120 atomic_cas_uint(&r->rule_flag, rule_flag,
4121 rule_flag | PFRULE_EXPIRED) == rule_flag) {
4122 r->exptime = gettime();
4123 } else {
4124 r = TAILQ_NEXT(r, entries);
4125 continue;
4126 }
4127 }
4128
4129 if (r->action == PF_MATCH) {
4130 if ((ctx->ri = pool_get(&pf_rule_item_pl,
4131 PR_NOWAIT)) == NULL) {
4132 REASON_SET(&ctx->reason, PFRES_MEMORY);
4133 return (PF_TEST_FAIL);
4134 }
4135 ctx->ri->r = r;
4136 /* order is irrelevant */
4137 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry);
4138 ctx->ri = NULL;
4139 pf_rule_to_actions(r, &ctx->act);
4140 if (r->rule_flag & PFRULE_AFTO)
4141 ctx->pd->naf = r->naf;
4142 if (pf_get_transaddr(r, ctx->pd, ctx->sns,
4143 &ctx->nr) == -1) {
4144 REASON_SET(&ctx->reason,
4145 PFRES_TRANSLATE);
4146 return (PF_TEST_FAIL);
4147 }
4148 #if NPFLOG > 0
4149 if (r->log) {
4150 REASON_SET(&ctx->reason, PFRES_MATCH);
4151 pflog_packet(ctx->pd, ctx->reason, r,
4152 ctx->a, ruleset, NULL);
4153 }
4154 #endif /* NPFLOG > 0 */
4155 } else {
4156 /*
4157 * found matching r
4158 */
4159 *ctx->rm = r;
4160 /*
4161 * anchor, with ruleset, where r belongs to
4162 */
4163 *ctx->am = ctx->a;
4164 /*
4165 * ruleset where r belongs to
4166 */
4167 *ctx->rsm = ruleset;
4168 /*
4169 * ruleset, where anchor belongs to.
4170 */
4171 ctx->arsm = ctx->aruleset;
4172 }
4173
4174 #if NPFLOG > 0
4175 if (ctx->act.log & PF_LOG_MATCHES)
4176 pf_log_matches(ctx->pd, r, ctx->a, ruleset,
4177 &ctx->rules);
4178 #endif /* NPFLOG > 0 */
4179
4180 if (r->quick)
4181 return (PF_TEST_QUICK);
4182 } else {
4183 ctx->a = r;
4184 ctx->aruleset = &r->anchor->ruleset;
4185 if (r->anchor_wildcard) {
4186 RB_FOREACH(child, pf_anchor_node,
4187 &r->anchor->children) {
4188 if (pf_anchor_stack_push(ruleset, r, child,
4189 PF_NEXT_CHILD) != 0)
4190 return (PF_TEST_FAIL);
4191
4192 ruleset = &child->ruleset;
4193 goto enter_ruleset;
4194 next_child:
4195 continue; /* with RB_FOREACH() */
4196 }
4197 } else {
4198 if (pf_anchor_stack_push(ruleset, r, child,
4199 PF_NEXT_RULE) != 0)
4200 return (PF_TEST_FAIL);
4201
4202 ruleset = &r->anchor->ruleset;
4203 child = NULL;
4204 goto enter_ruleset;
4205 next_rule:
4206 ;
4207 }
4208 }
4209 r = TAILQ_NEXT(r, entries);
4210 }
4211
4212 if (pf_anchor_stack_pop(&ruleset, &r, &child, &target) == 0) {
4213 /* stop if any rule matched within quick anchors. */
4214 if (r->quick == PF_TEST_QUICK && *ctx->am == r)
4215 return (PF_TEST_QUICK);
4216
4217 switch (target) {
4218 case PF_NEXT_CHILD:
4219 goto next_child;
4220 case PF_NEXT_RULE:
4221 goto next_rule;
4222 default:
4223 panic("%s: unknown jump target", __func__);
4224 }
4225 }
4226
4227 return (PF_TEST_OK);
4228 }
4229
4230 int
4231 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
4232 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason,
4233 struct pfsync_deferral **pdeferral)
4234 {
4235 struct pf_rule *r = NULL;
4236 struct pf_rule *a = NULL;
4237 struct pf_ruleset *ruleset = NULL;
4238 struct pf_state_key *skw = NULL, *sks = NULL;
4239 int rewrite = 0;
4240 u_int16_t virtual_type, virtual_id;
4241 int action = PF_DROP;
4242 struct pf_test_ctx ctx;
4243 int rv;
4244
4245 memset(&ctx, 0, sizeof(ctx));
4246 ctx.pd = pd;
4247 ctx.rm = rm;
4248 ctx.am = am;
4249 ctx.rsm = rsm;
4250 ctx.th = &pd->hdr.tcp;
4251 ctx.act.rtableid = pd->rdomain;
4252 ctx.tag = -1;
4253 SLIST_INIT(&ctx.rules);
4254
4255 if (pd->dir == PF_IN && if_congested()) {
4256 REASON_SET(&ctx.reason, PFRES_CONGEST);
4257 return (PF_DROP);
4258 }
4259
4260 switch (pd->virtual_proto) {
4261 case IPPROTO_ICMP:
4262 ctx.icmptype = pd->hdr.icmp.icmp_type;
4263 ctx.icmpcode = pd->hdr.icmp.icmp_code;
4264 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
4265 &ctx.icmp_dir, &virtual_id, &virtual_type);
4266 if (ctx.icmp_dir == PF_IN) {
4267 pd->osport = pd->nsport = virtual_id;
4268 pd->odport = pd->ndport = virtual_type;
4269 } else {
4270 pd->osport = pd->nsport = virtual_type;
4271 pd->odport = pd->ndport = virtual_id;
4272 }
4273 break;
4274 #ifdef INET6
4275 case IPPROTO_ICMPV6:
4276 ctx.icmptype = pd->hdr.icmp6.icmp6_type;
4277 ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
4278 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
4279 &ctx.icmp_dir, &virtual_id, &virtual_type);
4280 if (ctx.icmp_dir == PF_IN) {
4281 pd->osport = pd->nsport = virtual_id;
4282 pd->odport = pd->ndport = virtual_type;
4283 } else {
4284 pd->osport = pd->nsport = virtual_type;
4285 pd->odport = pd->ndport = virtual_id;
4286 }
4287 break;
4288 #endif /* INET6 */
4289 }
4290
4291 ruleset = &pf_main_ruleset;
4292 rv = pf_match_rule(&ctx, ruleset);
4293 if (rv == PF_TEST_FAIL) {
4294 /*
4295 * Reason has been set in pf_match_rule() already.
4296 */
4297 goto cleanup;
4298 }
4299
4300 r = *ctx.rm; /* matching rule */
4301 a = *ctx.am; /* rule that defines an anchor containing 'r' */
4302 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */
4303 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */
4304
4305 /* apply actions for last matching pass/block rule */
4306 pf_rule_to_actions(r, &ctx.act);
4307 if (r->rule_flag & PFRULE_AFTO)
4308 pd->naf = r->naf;
4309 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) {
4310 REASON_SET(&ctx.reason, PFRES_TRANSLATE);
4311 goto cleanup;
4312 }
4313 REASON_SET(&ctx.reason, PFRES_MATCH);
4314
4315 #if NPFLOG > 0
4316 if (r->log)
4317 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL);
4318 if (ctx.act.log & PF_LOG_MATCHES)
4319 pf_log_matches(pd, r, a, ruleset, &ctx.rules);
4320 #endif /* NPFLOG > 0 */
4321
4322 if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
4323 (r->action == PF_DROP) &&
4324 ((r->rule_flag & PFRULE_RETURNRST) ||
4325 (r->rule_flag & PFRULE_RETURNICMP) ||
4326 (r->rule_flag & PFRULE_RETURN))) {
4327 if (pd->proto == IPPROTO_TCP &&
4328 ((r->rule_flag & PFRULE_RETURNRST) ||
4329 (r->rule_flag & PFRULE_RETURN)) &&
4330 !(ctx.th->th_flags & TH_RST)) {
4331 u_int32_t ack =
4332 ntohl(ctx.th->th_seq) + pd->p_len;
4333
4334 if (pf_check_tcp_cksum(pd->m, pd->off,
4335 pd->tot_len - pd->off, pd->af))
4336 REASON_SET(&ctx.reason, PFRES_PROTCKSUM);
4337 else {
4338 if (ctx.th->th_flags & TH_SYN)
4339 ack++;
4340 if (ctx.th->th_flags & TH_FIN)
4341 ack++;
4342 pf_send_tcp(r, pd->af, pd->dst,
4343 pd->src, ctx.th->th_dport,
4344 ctx.th->th_sport, ntohl(ctx.th->th_ack),
4345 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl,
4346 1, 0, pd->rdomain);
4347 }
4348 } else if ((pd->proto != IPPROTO_ICMP ||
4349 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET &&
4350 r->return_icmp)
4351 pf_send_icmp(pd->m, r->return_icmp >> 8,
4352 r->return_icmp & 255, 0, pd->af, r, pd->rdomain);
4353 else if ((pd->proto != IPPROTO_ICMPV6 ||
4354 (ctx.icmptype >= ICMP6_ECHO_REQUEST &&
4355 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
4356 r->return_icmp6)
4357 pf_send_icmp(pd->m, r->return_icmp6 >> 8,
4358 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain);
4359 }
4360
4361 if (r->action == PF_DROP)
4362 goto cleanup;
4363
4364 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid);
4365 if (ctx.act.rtableid >= 0 &&
4366 rtable_l2(ctx.act.rtableid) != pd->rdomain)
4367 pd->destchg = 1;
4368
4369 if (r->action == PF_PASS && pd->badopts != 0 && ! r->allow_opts) {
4370 REASON_SET(&ctx.reason, PFRES_IPOPTIONS);
4371 #if NPFLOG > 0
4372 pd->pflog |= PF_LOG_FORCE;
4373 #endif /* NPFLOG > 0 */
4374 DPFPRINTF(LOG_NOTICE, "dropping packet with "
4375 "ip/ipv6 options in pf_test_rule()");
4376 goto cleanup;
4377 }
4378
4379 action = PF_PASS;
4380
4381 if (pd->virtual_proto != PF_VPROTO_FRAGMENT
4382 && !ctx.state_icmp && r->keep_state) {
4383
4384 if (r->rule_flag & PFRULE_SRCTRACK &&
4385 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE,
4386 pd->af, pd->src, NULL, NULL) != 0) {
4387 REASON_SET(&ctx.reason, PFRES_SRCLIMIT);
4388 goto cleanup;
4389 }
4390
4391 if (r->max_states && (r->states_cur >= r->max_states)) {
4392 pf_status.lcounters[LCNT_STATES]++;
4393 REASON_SET(&ctx.reason, PFRES_MAXSTATES);
4394 goto cleanup;
4395 }
4396
4397 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks,
4398 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns);
4399
4400 if (action != PF_PASS)
4401 goto cleanup;
4402 if (sks != skw) {
4403 struct pf_state_key *sk;
4404
4405 if (pd->dir == PF_IN)
4406 sk = sks;
4407 else
4408 sk = skw;
4409 rewrite += pf_translate(pd,
4410 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx],
4411 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
4412 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
4413 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
4414 virtual_type, ctx.icmp_dir);
4415 }
4416
4417 #ifdef INET6
4418 if (rewrite && skw->af != sks->af)
4419 action = PF_AFRT;
4420 #endif /* INET6 */
4421
4422 } else {
4423 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
4424 SLIST_REMOVE_HEAD(&ctx.rules, entry);
4425 pool_put(&pf_rule_item_pl, ctx.ri);
4426 }
4427 }
4428
4429 /* copy back packet headers if needed */
4430 if (rewrite && pd->hdrlen) {
4431 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
4432 }
4433
4434 #if NPFSYNC > 0
4435 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) &&
4436 pd->dir == PF_OUT && pfsync_is_up()) {
4437 /*
4438 * We want the state created, but we dont
4439 * want to send this in case a partner
4440 * firewall has to know about it to allow
4441 * replies through it.
4442 */
4443 if (pfsync_defer(*sm, pd->m, pdeferral))
4444 return (PF_DEFER);
4445 }
4446 #endif /* NPFSYNC > 0 */
4447
4448 return (action);
4449
4450 cleanup:
4451 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
4452 SLIST_REMOVE_HEAD(&ctx.rules, entry);
4453 pool_put(&pf_rule_item_pl, ctx.ri);
4454 }
4455
4456 return (action);
4457 }
4458
4459 static __inline int
4460 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a,
4461 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks,
4462 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules,
4463 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX])
4464 {
4465 struct pf_state *st = NULL;
4466 struct tcphdr *th = &pd->hdr.tcp;
4467 u_int16_t mss = tcp_mssdflt;
4468 u_short reason;
4469 u_int i;
4470
4471 st = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO);
4472 if (st == NULL) {
4473 REASON_SET(&reason, PFRES_MEMORY);
4474 goto csfailed;
4475 }
4476 st->rule.ptr = r;
4477 st->anchor.ptr = a;
4478 st->natrule.ptr = nr;
4479 if (r->allow_opts)
4480 st->state_flags |= PFSTATE_ALLOWOPTS;
4481 if (r->rule_flag & PFRULE_STATESLOPPY)
4482 st->state_flags |= PFSTATE_SLOPPY;
4483 if (r->rule_flag & PFRULE_PFLOW)
4484 st->state_flags |= PFSTATE_PFLOW;
4485 #if NPFLOG > 0
4486 st->log = act->log & PF_LOG_ALL;
4487 #endif /* NPFLOG > 0 */
4488 st->qid = act->qid;
4489 st->pqid = act->pqid;
4490 st->rtableid[pd->didx] = act->rtableid;
4491 st->rtableid[pd->sidx] = -1; /* return traffic is routed normally */
4492 st->min_ttl = act->min_ttl;
4493 st->set_tos = act->set_tos;
4494 st->max_mss = act->max_mss;
4495 st->state_flags |= act->flags;
4496 #if NPFSYNC > 0
4497 st->sync_state = PFSYNC_S_NONE;
4498 #endif /* NPFSYNC > 0 */
4499 st->set_prio[0] = act->set_prio[0];
4500 st->set_prio[1] = act->set_prio[1];
4501 st->delay = act->delay;
4502 SLIST_INIT(&st->src_nodes);
4503 /*
4504 * must initialize refcnt, before pf_state_insert() gets called.
4505 * pf_state_inserts() grabs reference for pfsync!
4506 */
4507 PF_REF_INIT(st->refcnt);
4508 mtx_init(&st->mtx, IPL_NET);
4509
4510 switch (pd->proto) {
4511 case IPPROTO_TCP:
4512 st->src.seqlo = ntohl(th->th_seq);
4513 st->src.seqhi = st->src.seqlo + pd->p_len + 1;
4514 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
4515 r->keep_state == PF_STATE_MODULATE) {
4516 /* Generate sequence number modulator */
4517 st->src.seqdiff = pf_tcp_iss(pd) - st->src.seqlo;
4518 if (st->src.seqdiff == 0)
4519 st->src.seqdiff = 1;
4520 pf_patch_32(pd, &th->th_seq,
4521 htonl(st->src.seqlo + st->src.seqdiff));
4522 *rewrite = 1;
4523 } else
4524 st->src.seqdiff = 0;
4525 if (th->th_flags & TH_SYN) {
4526 st->src.seqhi++;
4527 st->src.wscale = pf_get_wscale(pd);
4528 }
4529 st->src.max_win = MAX(ntohs(th->th_win), 1);
4530 if (st->src.wscale & PF_WSCALE_MASK) {
4531 /* Remove scale factor from initial window */
4532 int win = st->src.max_win;
4533 win += 1 << (st->src.wscale & PF_WSCALE_MASK);
4534 st->src.max_win = (win - 1) >>
4535 (st->src.wscale & PF_WSCALE_MASK);
4536 }
4537 if (th->th_flags & TH_FIN)
4538 st->src.seqhi++;
4539 st->dst.seqhi = 1;
4540 st->dst.max_win = 1;
4541 pf_set_protostate(st, PF_PEER_SRC, TCPS_SYN_SENT);
4542 pf_set_protostate(st, PF_PEER_DST, TCPS_CLOSED);
4543 st->timeout = PFTM_TCP_FIRST_PACKET;
4544 pf_status.states_halfopen++;
4545 break;
4546 case IPPROTO_UDP:
4547 pf_set_protostate(st, PF_PEER_SRC, PFUDPS_SINGLE);
4548 pf_set_protostate(st, PF_PEER_DST, PFUDPS_NO_TRAFFIC);
4549 st->timeout = PFTM_UDP_FIRST_PACKET;
4550 break;
4551 case IPPROTO_ICMP:
4552 #ifdef INET6
4553 case IPPROTO_ICMPV6:
4554 #endif /* INET6 */
4555 st->timeout = PFTM_ICMP_FIRST_PACKET;
4556 break;
4557 default:
4558 pf_set_protostate(st, PF_PEER_SRC, PFOTHERS_SINGLE);
4559 pf_set_protostate(st, PF_PEER_DST, PFOTHERS_NO_TRAFFIC);
4560 st->timeout = PFTM_OTHER_FIRST_PACKET;
4561 }
4562
4563 st->creation = getuptime();
4564 st->expire = getuptime();
4565
4566 if (pd->proto == IPPROTO_TCP) {
4567 if (st->state_flags & PFSTATE_SCRUB_TCP &&
4568 pf_normalize_tcp_init(pd, &st->src)) {
4569 REASON_SET(&reason, PFRES_MEMORY);
4570 goto csfailed;
4571 }
4572 if (st->state_flags & PFSTATE_SCRUB_TCP && st->src.scrub &&
4573 pf_normalize_tcp_stateful(pd, &reason, st,
4574 &st->src, &st->dst, rewrite)) {
4575 /* This really shouldn't happen!!! */
4576 DPFPRINTF(LOG_ERR,
4577 "%s: tcp normalize failed on first pkt", __func__);
4578 goto csfailed;
4579 }
4580 }
4581 st->direction = pd->dir;
4582
4583 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) {
4584 REASON_SET(&reason, PFRES_MEMORY);
4585 goto csfailed;
4586 }
4587
4588 if (pf_set_rt_ifp(st, pd->src, (*skw)->af, sns) != 0) {
4589 REASON_SET(&reason, PFRES_NOROUTE);
4590 goto csfailed;
4591 }
4592
4593 for (i = 0; i < PF_SN_MAX; i++)
4594 if (sns[i] != NULL) {
4595 struct pf_sn_item *sni;
4596
4597 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT);
4598 if (sni == NULL) {
4599 REASON_SET(&reason, PFRES_MEMORY);
4600 goto csfailed;
4601 }
4602 sni->sn = sns[i];
4603 SLIST_INSERT_HEAD(&st->src_nodes, sni, next);
4604 sni->sn->states++;
4605 }
4606
4607 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, st)) {
4608 *sks = *skw = NULL;
4609 REASON_SET(&reason, PFRES_STATEINS);
4610 goto csfailed;
4611 } else
4612 *sm = st;
4613
4614 /*
4615 * Make state responsible for rules it binds here.
4616 */
4617 memcpy(&st->match_rules, rules, sizeof(st->match_rules));
4618 memset(rules, 0, sizeof(*rules));
4619 STATE_INC_COUNTERS(st);
4620
4621 if (tag > 0) {
4622 pf_tag_ref(tag);
4623 st->tag = tag;
4624 }
4625 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
4626 TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
4627 int rtid = pd->rdomain;
4628 if (act->rtableid >= 0)
4629 rtid = act->rtableid;
4630 pf_set_protostate(st, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
4631 st->src.seqhi = arc4random();
4632 /* Find mss option */
4633 mss = pf_get_mss(pd);
4634 mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
4635 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
4636 st->src.mss = mss;
4637 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
4638 th->th_sport, st->src.seqhi, ntohl(th->th_seq) + 1,
4639 TH_SYN|TH_ACK, 0, st->src.mss, 0, 1, 0, pd->rdomain);
4640 REASON_SET(&reason, PFRES_SYNPROXY);
4641 return (PF_SYNPROXY_DROP);
4642 }
4643
4644 return (PF_PASS);
4645
4646 csfailed:
4647 if (st) {
4648 pf_normalize_tcp_cleanup(st); /* safe even w/o init */
4649 pf_src_tree_remove_state(st);
4650 pool_put(&pf_state_pl, st);
4651 }
4652
4653 for (i = 0; i < PF_SN_MAX; i++)
4654 if (sns[i] != NULL)
4655 pf_remove_src_node(sns[i]);
4656
4657 return (PF_DROP);
4658 }
4659
4660 int
4661 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport,
4662 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type,
4663 int icmp_dir)
4664 {
4665 int rewrite = 0;
4666 int afto = pd->af != pd->naf;
4667
4668 if (afto || PF_ANEQ(daddr, pd->dst, pd->af))
4669 pd->destchg = 1;
4670
4671 switch (pd->proto) {
4672 case IPPROTO_TCP: /* FALLTHROUGH */
4673 case IPPROTO_UDP:
4674 rewrite += pf_patch_16(pd, pd->sport, sport);
4675 rewrite += pf_patch_16(pd, pd->dport, dport);
4676 break;
4677
4678 case IPPROTO_ICMP:
4679 if (pd->af != AF_INET)
4680 return (0);
4681
4682 #ifdef INET6
4683 if (afto) {
4684 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp))
4685 return (0);
4686 pd->proto = IPPROTO_ICMPV6;
4687 rewrite = 1;
4688 }
4689 #endif /* INET6 */
4690 if (virtual_type == htons(ICMP_ECHO)) {
4691 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
4692 rewrite += pf_patch_16(pd,
4693 &pd->hdr.icmp.icmp_id, icmpid);
4694 }
4695 break;
4696
4697 #ifdef INET6
4698 case IPPROTO_ICMPV6:
4699 if (pd->af != AF_INET6)
4700 return (0);
4701
4702 if (afto) {
4703 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6))
4704 return (0);
4705 pd->proto = IPPROTO_ICMP;
4706 rewrite = 1;
4707 }
4708 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) {
4709 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
4710 rewrite += pf_patch_16(pd,
4711 &pd->hdr.icmp6.icmp6_id, icmpid);
4712 }
4713 break;
4714 #endif /* INET6 */
4715 }
4716
4717 if (!afto) {
4718 rewrite += pf_translate_a(pd, pd->src, saddr);
4719 rewrite += pf_translate_a(pd, pd->dst, daddr);
4720 }
4721
4722 return (rewrite);
4723 }
4724
4725 int
4726 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason,
4727 int *copyback, int reverse)
4728 {
4729 struct tcphdr *th = &pd->hdr.tcp;
4730 struct pf_state_peer *src, *dst;
4731 u_int16_t win = ntohs(th->th_win);
4732 u_int32_t ack, end, data_end, seq, orig_seq;
4733 u_int8_t sws, dws, psrc, pdst;
4734 int ackskew;
4735
4736 if ((pd->dir == (*stp)->direction && !reverse) ||
4737 (pd->dir != (*stp)->direction && reverse)) {
4738 src = &(*stp)->src;
4739 dst = &(*stp)->dst;
4740 psrc = PF_PEER_SRC;
4741 pdst = PF_PEER_DST;
4742 } else {
4743 src = &(*stp)->dst;
4744 dst = &(*stp)->src;
4745 psrc = PF_PEER_DST;
4746 pdst = PF_PEER_SRC;
4747 }
4748
4749 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4750 sws = src->wscale & PF_WSCALE_MASK;
4751 dws = dst->wscale & PF_WSCALE_MASK;
4752 } else
4753 sws = dws = 0;
4754
4755 /*
4756 * Sequence tracking algorithm from Guido van Rooij's paper:
4757 * http://www.madison-gurkha.com/publications/tcp_filtering/
4758 * tcp_filtering.ps
4759 */
4760
4761 orig_seq = seq = ntohl(th->th_seq);
4762 if (src->seqlo == 0) {
4763 /* First packet from this end. Set its state */
4764
4765 if (((*stp)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) &&
4766 src->scrub == NULL) {
4767 if (pf_normalize_tcp_init(pd, src)) {
4768 REASON_SET(reason, PFRES_MEMORY);
4769 return (PF_DROP);
4770 }
4771 }
4772
4773 /* Deferred generation of sequence number modulator */
4774 if (dst->seqdiff && !src->seqdiff) {
4775 /* use random iss for the TCP server */
4776 while ((src->seqdiff = arc4random() - seq) == 0)
4777 continue;
4778 ack = ntohl(th->th_ack) - dst->seqdiff;
4779 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff));
4780 pf_patch_32(pd, &th->th_ack, htonl(ack));
4781 *copyback = 1;
4782 } else {
4783 ack = ntohl(th->th_ack);
4784 }
4785
4786 end = seq + pd->p_len;
4787 if (th->th_flags & TH_SYN) {
4788 end++;
4789 if (dst->wscale & PF_WSCALE_FLAG) {
4790 src->wscale = pf_get_wscale(pd);
4791 if (src->wscale & PF_WSCALE_FLAG) {
4792 /* Remove scale factor from initial
4793 * window */
4794 sws = src->wscale & PF_WSCALE_MASK;
4795 win = ((u_int32_t)win + (1 << sws) - 1)
4796 >> sws;
4797 dws = dst->wscale & PF_WSCALE_MASK;
4798 } else {
4799 /* fixup other window */
4800 dst->max_win = MIN(TCP_MAXWIN,
4801 (u_int32_t)dst->max_win <<
4802 (dst->wscale & PF_WSCALE_MASK));
4803 /* in case of a retrans SYN|ACK */
4804 dst->wscale = 0;
4805 }
4806 }
4807 }
4808 data_end = end;
4809 if (th->th_flags & TH_FIN)
4810 end++;
4811
4812 src->seqlo = seq;
4813 if (src->state < TCPS_SYN_SENT)
4814 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT);
4815
4816 /*
4817 * May need to slide the window (seqhi may have been set by
4818 * the crappy stack check or if we picked up the connection
4819 * after establishment)
4820 */
4821 if (src->seqhi == 1 ||
4822 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4823 src->seqhi = end + MAX(1, dst->max_win << dws);
4824 if (win > src->max_win)
4825 src->max_win = win;
4826
4827 } else {
4828 ack = ntohl(th->th_ack) - dst->seqdiff;
4829 if (src->seqdiff) {
4830 /* Modulate sequence numbers */
4831 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff));
4832 pf_patch_32(pd, &th->th_ack, htonl(ack));
4833 *copyback = 1;
4834 }
4835 end = seq + pd->p_len;
4836 if (th->th_flags & TH_SYN)
4837 end++;
4838 data_end = end;
4839 if (th->th_flags & TH_FIN)
4840 end++;
4841 }
4842
4843 if ((th->th_flags & TH_ACK) == 0) {
4844 /* Let it pass through the ack skew check */
4845 ack = dst->seqlo;
4846 } else if ((ack == 0 &&
4847 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4848 /* broken tcp stacks do not set ack */
4849 (dst->state < TCPS_SYN_SENT)) {
4850 /*
4851 * Many stacks (ours included) will set the ACK number in an
4852 * FIN|ACK if the SYN times out -- no sequence to ACK.
4853 */
4854 ack = dst->seqlo;
4855 }
4856
4857 if (seq == end) {
4858 /* Ease sequencing restrictions on no data packets */
4859 seq = src->seqlo;
4860 data_end = end = seq;
4861 }
4862
4863 ackskew = dst->seqlo - ack;
4864
4865
4866 /*
4867 * Need to demodulate the sequence numbers in any TCP SACK options
4868 * (Selective ACK). We could optionally validate the SACK values
4869 * against the current ACK window, either forwards or backwards, but
4870 * I'm not confident that SACK has been implemented properly
4871 * everywhere. It wouldn't surprise me if several stacks accidently
4872 * SACK too far backwards of previously ACKed data. There really aren't
4873 * any security implications of bad SACKing unless the target stack
4874 * doesn't validate the option length correctly. Someone trying to
4875 * spoof into a TCP connection won't bother blindly sending SACK
4876 * options anyway.
4877 */
4878 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4879 if (pf_modulate_sack(pd, dst))
4880 *copyback = 1;
4881 }
4882
4883
4884 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
4885 if (SEQ_GEQ(src->seqhi, data_end) &&
4886 /* Last octet inside other's window space */
4887 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4888 /* Retrans: not more than one window back */
4889 (ackskew >= -MAXACKWINDOW) &&
4890 /* Acking not more than one reassembled fragment backwards */
4891 (ackskew <= (MAXACKWINDOW << sws)) &&
4892 /* Acking not more than one window forward */
4893 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4894 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) {
4895 /* Require an exact/+1 sequence match on resets when possible */
4896
4897 if (dst->scrub || src->scrub) {
4898 if (pf_normalize_tcp_stateful(pd, reason, *stp, src,
4899 dst, copyback))
4900 return (PF_DROP);
4901 }
4902
4903 /* update max window */
4904 if (src->max_win < win)
4905 src->max_win = win;
4906 /* synchronize sequencing */
4907 if (SEQ_GT(end, src->seqlo))
4908 src->seqlo = end;
4909 /* slide the window of what the other end can send */
4910 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4911 dst->seqhi = ack + MAX((win << sws), 1);
4912
4913 /* update states */
4914 if (th->th_flags & TH_SYN)
4915 if (src->state < TCPS_SYN_SENT)
4916 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT);
4917 if (th->th_flags & TH_FIN)
4918 if (src->state < TCPS_CLOSING)
4919 pf_set_protostate(*stp, psrc, TCPS_CLOSING);
4920 if (th->th_flags & TH_ACK) {
4921 if (dst->state == TCPS_SYN_SENT) {
4922 pf_set_protostate(*stp, pdst,
4923 TCPS_ESTABLISHED);
4924 if (src->state == TCPS_ESTABLISHED &&
4925 !SLIST_EMPTY(&(*stp)->src_nodes) &&
4926 pf_src_connlimit(stp)) {
4927 REASON_SET(reason, PFRES_SRCLIMIT);
4928 return (PF_DROP);
4929 }
4930 } else if (dst->state == TCPS_CLOSING)
4931 pf_set_protostate(*stp, pdst,
4932 TCPS_FIN_WAIT_2);
4933 }
4934 if (th->th_flags & TH_RST)
4935 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT);
4936
4937 /* update expire time */
4938 (*stp)->expire = getuptime();
4939 if (src->state >= TCPS_FIN_WAIT_2 &&
4940 dst->state >= TCPS_FIN_WAIT_2)
4941 (*stp)->timeout = PFTM_TCP_CLOSED;
4942 else if (src->state >= TCPS_CLOSING &&
4943 dst->state >= TCPS_CLOSING)
4944 (*stp)->timeout = PFTM_TCP_FIN_WAIT;
4945 else if (src->state < TCPS_ESTABLISHED ||
4946 dst->state < TCPS_ESTABLISHED)
4947 (*stp)->timeout = PFTM_TCP_OPENING;
4948 else if (src->state >= TCPS_CLOSING ||
4949 dst->state >= TCPS_CLOSING)
4950 (*stp)->timeout = PFTM_TCP_CLOSING;
4951 else
4952 (*stp)->timeout = PFTM_TCP_ESTABLISHED;
4953
4954 /* Fall through to PASS packet */
4955 } else if ((dst->state < TCPS_SYN_SENT ||
4956 dst->state >= TCPS_FIN_WAIT_2 ||
4957 src->state >= TCPS_FIN_WAIT_2) &&
4958 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) &&
4959 /* Within a window forward of the originating packet */
4960 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4961 /* Within a window backward of the originating packet */
4962
4963 /*
4964 * This currently handles three situations:
4965 * 1) Stupid stacks will shotgun SYNs before their peer
4966 * replies.
4967 * 2) When PF catches an already established stream (the
4968 * firewall rebooted, the state table was flushed, routes
4969 * changed...)
4970 * 3) Packets get funky immediately after the connection
4971 * closes (this should catch Solaris spurious ACK|FINs
4972 * that web servers like to spew after a close)
4973 *
4974 * This must be a little more careful than the above code
4975 * since packet floods will also be caught here. We don't
4976 * update the TTL here to mitigate the damage of a packet
4977 * flood and so the same code can handle awkward establishment
4978 * and a loosened connection close.
4979 * In the establishment case, a correct peer response will
4980 * validate the connection, go through the normal state code
4981 * and keep updating the state TTL.
4982 */
4983
4984 if (pf_status.debug >= LOG_NOTICE) {
4985 log(LOG_NOTICE, "pf: loose state match: ");
4986 pf_print_state(*stp);
4987 pf_print_flags(th->th_flags);
4988 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4989 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
4990 pd->p_len, ackskew, (*stp)->packets[0],
4991 (*stp)->packets[1],
4992 pd->dir == PF_IN ? "in" : "out",
4993 pd->dir == (*stp)->direction ? "fwd" : "rev");
4994 }
4995
4996 if (dst->scrub || src->scrub) {
4997 if (pf_normalize_tcp_stateful(pd, reason, *stp, src,
4998 dst, copyback))
4999 return (PF_DROP);
5000 }
5001
5002 /* update max window */
5003 if (src->max_win < win)
5004 src->max_win = win;
5005 /* synchronize sequencing */
5006 if (SEQ_GT(end, src->seqlo))
5007 src->seqlo = end;
5008 /* slide the window of what the other end can send */
5009 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
5010 dst->seqhi = ack + MAX((win << sws), 1);
5011
5012 /*
5013 * Cannot set dst->seqhi here since this could be a shotgunned
5014 * SYN and not an already established connection.
5015 */
5016 if (th->th_flags & TH_FIN)
5017 if (src->state < TCPS_CLOSING)
5018 pf_set_protostate(*stp, psrc, TCPS_CLOSING);
5019 if (th->th_flags & TH_RST)
5020 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT);
5021
5022 /* Fall through to PASS packet */
5023 } else {
5024 if ((*stp)->dst.state == TCPS_SYN_SENT &&
5025 (*stp)->src.state == TCPS_SYN_SENT) {
5026 /* Send RST for state mismatches during handshake */
5027 if (!(th->th_flags & TH_RST))
5028 pf_send_tcp((*stp)->rule.ptr, pd->af,
5029 pd->dst, pd->src, th->th_dport,
5030 th->th_sport, ntohl(th->th_ack), 0,
5031 TH_RST, 0, 0,
5032 (*stp)->rule.ptr->return_ttl, 1, 0,
5033 pd->rdomain);
5034 src->seqlo = 0;
5035 src->seqhi = 1;
5036 src->max_win = 1;
5037 } else if (pf_status.debug >= LOG_NOTICE) {
5038 log(LOG_NOTICE, "pf: BAD state: ");
5039 pf_print_state(*stp);
5040 pf_print_flags(th->th_flags);
5041 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d "
5042 "pkts=%llu:%llu dir=%s,%s\n",
5043 seq, orig_seq, ack, pd->p_len, ackskew,
5044 (*stp)->packets[0], (*stp)->packets[1],
5045 pd->dir == PF_IN ? "in" : "out",
5046 pd->dir == (*stp)->direction ? "fwd" : "rev");
5047 addlog("pf: State failure on: %c %c %c %c | %c %c\n",
5048 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1',
5049 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
5050 ' ': '2',
5051 (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
5052 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
5053 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ?
5054 ' ' :'5',
5055 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
5056 }
5057 REASON_SET(reason, PFRES_BADSTATE);
5058 return (PF_DROP);
5059 }
5060
5061 return (PF_PASS);
5062 }
5063
5064 int
5065 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **stp,
5066 u_short *reason)
5067 {
5068 struct tcphdr *th = &pd->hdr.tcp;
5069 struct pf_state_peer *src, *dst;
5070 u_int8_t psrc, pdst;
5071
5072 if (pd->dir == (*stp)->direction) {
5073 src = &(*stp)->src;
5074 dst = &(*stp)->dst;
5075 psrc = PF_PEER_SRC;
5076 pdst = PF_PEER_DST;
5077 } else {
5078 src = &(*stp)->dst;
5079 dst = &(*stp)->src;
5080 psrc = PF_PEER_DST;
5081 pdst = PF_PEER_SRC;
5082 }
5083
5084 if (th->th_flags & TH_SYN)
5085 if (src->state < TCPS_SYN_SENT)
5086 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT);
5087 if (th->th_flags & TH_FIN)
5088 if (src->state < TCPS_CLOSING)
5089 pf_set_protostate(*stp, psrc, TCPS_CLOSING);
5090 if (th->th_flags & TH_ACK) {
5091 if (dst->state == TCPS_SYN_SENT) {
5092 pf_set_protostate(*stp, pdst, TCPS_ESTABLISHED);
5093 if (src->state == TCPS_ESTABLISHED &&
5094 !SLIST_EMPTY(&(*stp)->src_nodes) &&
5095 pf_src_connlimit(stp)) {
5096 REASON_SET(reason, PFRES_SRCLIMIT);
5097 return (PF_DROP);
5098 }
5099 } else if (dst->state == TCPS_CLOSING) {
5100 pf_set_protostate(*stp, pdst, TCPS_FIN_WAIT_2);
5101 } else if (src->state == TCPS_SYN_SENT &&
5102 dst->state < TCPS_SYN_SENT) {
5103 /*
5104 * Handle a special sloppy case where we only see one
5105 * half of the connection. If there is a ACK after
5106 * the initial SYN without ever seeing a packet from
5107 * the destination, set the connection to established.
5108 */
5109 pf_set_protostate(*stp, PF_PEER_BOTH,
5110 TCPS_ESTABLISHED);
5111 if (!SLIST_EMPTY(&(*stp)->src_nodes) &&
5112 pf_src_connlimit(stp)) {
5113 REASON_SET(reason, PFRES_SRCLIMIT);
5114 return (PF_DROP);
5115 }
5116 } else if (src->state == TCPS_CLOSING &&
5117 dst->state == TCPS_ESTABLISHED &&
5118 dst->seqlo == 0) {
5119 /*
5120 * Handle the closing of half connections where we
5121 * don't see the full bidirectional FIN/ACK+ACK
5122 * handshake.
5123 */
5124 pf_set_protostate(*stp, pdst, TCPS_CLOSING);
5125 }
5126 }
5127 if (th->th_flags & TH_RST)
5128 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT);
5129
5130 /* update expire time */
5131 (*stp)->expire = getuptime();
5132 if (src->state >= TCPS_FIN_WAIT_2 &&
5133 dst->state >= TCPS_FIN_WAIT_2)
5134 (*stp)->timeout = PFTM_TCP_CLOSED;
5135 else if (src->state >= TCPS_CLOSING &&
5136 dst->state >= TCPS_CLOSING)
5137 (*stp)->timeout = PFTM_TCP_FIN_WAIT;
5138 else if (src->state < TCPS_ESTABLISHED ||
5139 dst->state < TCPS_ESTABLISHED)
5140 (*stp)->timeout = PFTM_TCP_OPENING;
5141 else if (src->state >= TCPS_CLOSING ||
5142 dst->state >= TCPS_CLOSING)
5143 (*stp)->timeout = PFTM_TCP_CLOSING;
5144 else
5145 (*stp)->timeout = PFTM_TCP_ESTABLISHED;
5146
5147 return (PF_PASS);
5148 }
5149
5150 static __inline int
5151 pf_synproxy(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason)
5152 {
5153 struct pf_state_key *sk = (*stp)->key[pd->didx];
5154
5155 if ((*stp)->src.state == PF_TCPS_PROXY_SRC) {
5156 struct tcphdr *th = &pd->hdr.tcp;
5157
5158 if (pd->dir != (*stp)->direction) {
5159 REASON_SET(reason, PFRES_SYNPROXY);
5160 return (PF_SYNPROXY_DROP);
5161 }
5162 if (th->th_flags & TH_SYN) {
5163 if (ntohl(th->th_seq) != (*stp)->src.seqlo) {
5164 REASON_SET(reason, PFRES_SYNPROXY);
5165 return (PF_DROP);
5166 }
5167 pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst,
5168 pd->src, th->th_dport, th->th_sport,
5169 (*stp)->src.seqhi, ntohl(th->th_seq) + 1,
5170 TH_SYN|TH_ACK, 0, (*stp)->src.mss, 0, 1,
5171 0, pd->rdomain);
5172 REASON_SET(reason, PFRES_SYNPROXY);
5173 return (PF_SYNPROXY_DROP);
5174 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK ||
5175 (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) ||
5176 (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) {
5177 REASON_SET(reason, PFRES_SYNPROXY);
5178 return (PF_DROP);
5179 } else if (!SLIST_EMPTY(&(*stp)->src_nodes) &&
5180 pf_src_connlimit(stp)) {
5181 REASON_SET(reason, PFRES_SRCLIMIT);
5182 return (PF_DROP);
5183 } else
5184 pf_set_protostate(*stp, PF_PEER_SRC,
5185 PF_TCPS_PROXY_DST);
5186 }
5187 if ((*stp)->src.state == PF_TCPS_PROXY_DST) {
5188 struct tcphdr *th = &pd->hdr.tcp;
5189
5190 if (pd->dir == (*stp)->direction) {
5191 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
5192 (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) ||
5193 (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) {
5194 REASON_SET(reason, PFRES_SYNPROXY);
5195 return (PF_DROP);
5196 }
5197 (*stp)->src.max_win = MAX(ntohs(th->th_win), 1);
5198 if ((*stp)->dst.seqhi == 1)
5199 (*stp)->dst.seqhi = arc4random();
5200 pf_send_tcp((*stp)->rule.ptr, pd->af,
5201 &sk->addr[pd->sidx], &sk->addr[pd->didx],
5202 sk->port[pd->sidx], sk->port[pd->didx],
5203 (*stp)->dst.seqhi, 0, TH_SYN, 0,
5204 (*stp)->src.mss, 0, 0, (*stp)->tag,
5205 sk->rdomain);
5206 REASON_SET(reason, PFRES_SYNPROXY);
5207 return (PF_SYNPROXY_DROP);
5208 } else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
5209 (TH_SYN|TH_ACK)) ||
5210 (ntohl(th->th_ack) != (*stp)->dst.seqhi + 1)) {
5211 REASON_SET(reason, PFRES_SYNPROXY);
5212 return (PF_DROP);
5213 } else {
5214 (*stp)->dst.max_win = MAX(ntohs(th->th_win), 1);
5215 (*stp)->dst.seqlo = ntohl(th->th_seq);
5216 pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst,
5217 pd->src, th->th_dport, th->th_sport,
5218 ntohl(th->th_ack), ntohl(th->th_seq) + 1,
5219 TH_ACK, (*stp)->src.max_win, 0, 0, 0,
5220 (*stp)->tag, pd->rdomain);
5221 pf_send_tcp((*stp)->rule.ptr, pd->af,
5222 &sk->addr[pd->sidx], &sk->addr[pd->didx],
5223 sk->port[pd->sidx], sk->port[pd->didx],
5224 (*stp)->src.seqhi + 1, (*stp)->src.seqlo + 1,
5225 TH_ACK, (*stp)->dst.max_win, 0, 0, 1,
5226 0, sk->rdomain);
5227 (*stp)->src.seqdiff = (*stp)->dst.seqhi -
5228 (*stp)->src.seqlo;
5229 (*stp)->dst.seqdiff = (*stp)->src.seqhi -
5230 (*stp)->dst.seqlo;
5231 (*stp)->src.seqhi = (*stp)->src.seqlo +
5232 (*stp)->dst.max_win;
5233 (*stp)->dst.seqhi = (*stp)->dst.seqlo +
5234 (*stp)->src.max_win;
5235 (*stp)->src.wscale = (*stp)->dst.wscale = 0;
5236 pf_set_protostate(*stp, PF_PEER_BOTH,
5237 TCPS_ESTABLISHED);
5238 REASON_SET(reason, PFRES_SYNPROXY);
5239 return (PF_SYNPROXY_DROP);
5240 }
5241 }
5242 return (PF_PASS);
5243 }
5244
5245 int
5246 pf_test_state(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason)
5247 {
5248 int copyback = 0;
5249 struct pf_state_peer *src, *dst;
5250 int action;
5251 struct inpcb *inp = pd->m->m_pkthdr.pf.inp;
5252 u_int8_t psrc, pdst;
5253
5254 action = PF_PASS;
5255 if (pd->dir == (*stp)->direction) {
5256 src = &(*stp)->src;
5257 dst = &(*stp)->dst;
5258 psrc = PF_PEER_SRC;
5259 pdst = PF_PEER_DST;
5260 } else {
5261 src = &(*stp)->dst;
5262 dst = &(*stp)->src;
5263 psrc = PF_PEER_DST;
5264 pdst = PF_PEER_SRC;
5265 }
5266
5267 switch (pd->virtual_proto) {
5268 case IPPROTO_TCP:
5269 if ((action = pf_synproxy(pd, stp, reason)) != PF_PASS)
5270 return (action);
5271 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) {
5272
5273 if (dst->state >= TCPS_FIN_WAIT_2 &&
5274 src->state >= TCPS_FIN_WAIT_2) {
5275 if (pf_status.debug >= LOG_NOTICE) {
5276 log(LOG_NOTICE, "pf: state reuse ");
5277 pf_print_state(*stp);
5278 pf_print_flags(pd->hdr.tcp.th_flags);
5279 addlog("\n");
5280 }
5281 /* XXX make sure it's the same direction ?? */
5282 (*stp)->timeout = PFTM_PURGE;
5283 pf_state_unref(*stp);
5284 *stp = NULL;
5285 pf_mbuf_link_inpcb(pd->m, inp);
5286 return (PF_DROP);
5287 } else if (dst->state >= TCPS_ESTABLISHED &&
5288 src->state >= TCPS_ESTABLISHED) {
5289 /*
5290 * SYN matches existing state???
5291 * Typically happens when sender boots up after
5292 * sudden panic. Certain protocols (NFSv3) are
5293 * always using same port numbers. Challenge
5294 * ACK enables all parties (firewall and peers)
5295 * to get in sync again.
5296 */
5297 pf_send_challenge_ack(pd, *stp, src, dst);
5298 return (PF_DROP);
5299 }
5300 }
5301
5302 if ((*stp)->state_flags & PFSTATE_SLOPPY) {
5303 if (pf_tcp_track_sloppy(pd, stp, reason) == PF_DROP)
5304 return (PF_DROP);
5305 } else {
5306 if (pf_tcp_track_full(pd, stp, reason, ©back,
5307 PF_REVERSED_KEY((*stp)->key, pd->af)) == PF_DROP)
5308 return (PF_DROP);
5309 }
5310 break;
5311 case IPPROTO_UDP:
5312 /* update states */
5313 if (src->state < PFUDPS_SINGLE)
5314 pf_set_protostate(*stp, psrc, PFUDPS_SINGLE);
5315 if (dst->state == PFUDPS_SINGLE)
5316 pf_set_protostate(*stp, pdst, PFUDPS_MULTIPLE);
5317
5318 /* update expire time */
5319 (*stp)->expire = getuptime();
5320 if (src->state == PFUDPS_MULTIPLE &&
5321 dst->state == PFUDPS_MULTIPLE)
5322 (*stp)->timeout = PFTM_UDP_MULTIPLE;
5323 else
5324 (*stp)->timeout = PFTM_UDP_SINGLE;
5325 break;
5326 default:
5327 /* update states */
5328 if (src->state < PFOTHERS_SINGLE)
5329 pf_set_protostate(*stp, psrc, PFOTHERS_SINGLE);
5330 if (dst->state == PFOTHERS_SINGLE)
5331 pf_set_protostate(*stp, pdst, PFOTHERS_MULTIPLE);
5332
5333 /* update expire time */
5334 (*stp)->expire = getuptime();
5335 if (src->state == PFOTHERS_MULTIPLE &&
5336 dst->state == PFOTHERS_MULTIPLE)
5337 (*stp)->timeout = PFTM_OTHER_MULTIPLE;
5338 else
5339 (*stp)->timeout = PFTM_OTHER_SINGLE;
5340 break;
5341 }
5342
5343 /* translate source/destination address, if necessary */
5344 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) {
5345 struct pf_state_key *nk;
5346 int afto, sidx, didx;
5347
5348 if (PF_REVERSED_KEY((*stp)->key, pd->af))
5349 nk = (*stp)->key[pd->sidx];
5350 else
5351 nk = (*stp)->key[pd->didx];
5352
5353 afto = pd->af != nk->af;
5354 sidx = afto ? pd->didx : pd->sidx;
5355 didx = afto ? pd->sidx : pd->didx;
5356
5357 #ifdef INET6
5358 if (afto) {
5359 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af);
5360 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af);
5361 pd->naf = nk->af;
5362 action = PF_AFRT;
5363 }
5364 #endif /* INET6 */
5365
5366 if (!afto)
5367 pf_translate_a(pd, pd->src, &nk->addr[sidx]);
5368
5369 if (pd->sport != NULL)
5370 pf_patch_16(pd, pd->sport, nk->port[sidx]);
5371
5372 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) ||
5373 pd->rdomain != nk->rdomain)
5374 pd->destchg = 1;
5375
5376 if (!afto)
5377 pf_translate_a(pd, pd->dst, &nk->addr[didx]);
5378
5379 if (pd->dport != NULL)
5380 pf_patch_16(pd, pd->dport, nk->port[didx]);
5381
5382 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
5383 copyback = 1;
5384 }
5385
5386 if (copyback && pd->hdrlen > 0) {
5387 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
5388 }
5389
5390 return (action);
5391 }
5392
5393 int
5394 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key,
5395 struct pf_state **stp, u_int16_t icmpid, u_int16_t type,
5396 int icmp_dir, int *iidx, int multi, int inner)
5397 {
5398 int direction, action;
5399
5400 key->af = pd->af;
5401 key->proto = pd->proto;
5402 key->rdomain = pd->rdomain;
5403 if (icmp_dir == PF_IN) {
5404 *iidx = pd->sidx;
5405 key->port[pd->sidx] = icmpid;
5406 key->port[pd->didx] = type;
5407 } else {
5408 *iidx = pd->didx;
5409 key->port[pd->sidx] = type;
5410 key->port[pd->didx] = icmpid;
5411 }
5412
5413 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx,
5414 pd->dst, pd->af, multi))
5415 return (PF_DROP);
5416
5417 key->hash = pf_pkt_hash(key->af, key->proto,
5418 &key->addr[0], &key->addr[1], 0, 0);
5419
5420 action = pf_find_state(pd, key, stp);
5421 if (action != PF_MATCH)
5422 return (action);
5423
5424 if ((*stp)->state_flags & PFSTATE_SLOPPY)
5425 return (-1);
5426
5427 /* Is this ICMP message flowing in right direction? */
5428 if ((*stp)->key[PF_SK_WIRE]->af != (*stp)->key[PF_SK_STACK]->af)
5429 direction = (pd->af == (*stp)->key[PF_SK_WIRE]->af) ?
5430 PF_IN : PF_OUT;
5431 else
5432 direction = (*stp)->direction;
5433 if ((((!inner && direction == pd->dir) ||
5434 (inner && direction != pd->dir)) ?
5435 PF_IN : PF_OUT) != icmp_dir) {
5436 if (pf_status.debug >= LOG_NOTICE) {
5437 log(LOG_NOTICE,
5438 "pf: icmp type %d in wrong direction (%d): ",
5439 ntohs(type), icmp_dir);
5440 pf_print_state(*stp);
5441 addlog("\n");
5442 }
5443 return (PF_DROP);
5444 }
5445 return (-1);
5446 }
5447
5448 int
5449 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **stp,
5450 u_short *reason)
5451 {
5452 u_int16_t virtual_id, virtual_type;
5453 u_int8_t icmptype, icmpcode;
5454 int icmp_dir, iidx, ret, copyback = 0;
5455
5456 struct pf_state_key_cmp key;
5457
5458 switch (pd->proto) {
5459 case IPPROTO_ICMP:
5460 icmptype = pd->hdr.icmp.icmp_type;
5461 icmpcode = pd->hdr.icmp.icmp_code;
5462 break;
5463 #ifdef INET6
5464 case IPPROTO_ICMPV6:
5465 icmptype = pd->hdr.icmp6.icmp6_type;
5466 icmpcode = pd->hdr.icmp6.icmp6_code;
5467 break;
5468 #endif /* INET6 */
5469 default:
5470 panic("unhandled proto %d", pd->proto);
5471 }
5472
5473 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id,
5474 &virtual_type) == 0) {
5475 /*
5476 * ICMP query/reply message not related to a TCP/UDP packet.
5477 * Search for an ICMP state.
5478 */
5479 ret = pf_icmp_state_lookup(pd, &key, stp,
5480 virtual_id, virtual_type, icmp_dir, &iidx,
5481 0, 0);
5482 /* IPv6? try matching a multicast address */
5483 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT)
5484 ret = pf_icmp_state_lookup(pd, &key, stp, virtual_id,
5485 virtual_type, icmp_dir, &iidx, 1, 0);
5486 if (ret >= 0)
5487 return (ret);
5488
5489 (*stp)->expire = getuptime();
5490 (*stp)->timeout = PFTM_ICMP_ERROR_REPLY;
5491
5492 /* translate source/destination address, if necessary */
5493 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) {
5494 struct pf_state_key *nk;
5495 int afto, sidx, didx;
5496
5497 if (PF_REVERSED_KEY((*stp)->key, pd->af))
5498 nk = (*stp)->key[pd->sidx];
5499 else
5500 nk = (*stp)->key[pd->didx];
5501
5502 afto = pd->af != nk->af;
5503 sidx = afto ? pd->didx : pd->sidx;
5504 didx = afto ? pd->sidx : pd->didx;
5505 iidx = afto ? !iidx : iidx;
5506 #ifdef INET6
5507 if (afto) {
5508 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx],
5509 nk->af);
5510 pf_addrcpy(&pd->ndaddr, &nk->addr[didx],
5511 nk->af);
5512 pd->naf = nk->af;
5513 }
5514 #endif /* INET6 */
5515 if (!afto) {
5516 pf_translate_a(pd, pd->src, &nk->addr[sidx]);
5517 pf_translate_a(pd, pd->dst, &nk->addr[didx]);
5518 }
5519
5520 if (pd->rdomain != nk->rdomain)
5521 pd->destchg = 1;
5522 if (!afto && PF_ANEQ(pd->dst,
5523 &nk->addr[didx], pd->af))
5524 pd->destchg = 1;
5525 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
5526
5527 switch (pd->af) {
5528 case AF_INET:
5529 #ifdef INET6
5530 if (afto) {
5531 if (pf_translate_icmp_af(pd, AF_INET6,
5532 &pd->hdr.icmp))
5533 return (PF_DROP);
5534 pd->proto = IPPROTO_ICMPV6;
5535 }
5536 #endif /* INET6 */
5537 pf_patch_16(pd,
5538 &pd->hdr.icmp.icmp_id, nk->port[iidx]);
5539
5540 m_copyback(pd->m, pd->off, ICMP_MINLEN,
5541 &pd->hdr.icmp, M_NOWAIT);
5542 copyback = 1;
5543 break;
5544 #ifdef INET6
5545 case AF_INET6:
5546 if (afto) {
5547 if (pf_translate_icmp_af(pd, AF_INET,
5548 &pd->hdr.icmp6))
5549 return (PF_DROP);
5550 pd->proto = IPPROTO_ICMP;
5551 }
5552
5553 pf_patch_16(pd,
5554 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]);
5555
5556 m_copyback(pd->m, pd->off,
5557 sizeof(struct icmp6_hdr), &pd->hdr.icmp6,
5558 M_NOWAIT);
5559 copyback = 1;
5560 break;
5561 #endif /* INET6 */
5562 }
5563 #ifdef INET6
5564 if (afto)
5565 return (PF_AFRT);
5566 #endif /* INET6 */
5567 }
5568 } else {
5569 /*
5570 * ICMP error message in response to a TCP/UDP packet.
5571 * Extract the inner TCP/UDP header and search for that state.
5572 */
5573 struct pf_pdesc pd2;
5574 struct ip h2;
5575 #ifdef INET6
5576 struct ip6_hdr h2_6;
5577 #endif /* INET6 */
5578 int ipoff2;
5579
5580 /* Initialize pd2 fields valid for both packets with pd. */
5581 memset(&pd2, 0, sizeof(pd2));
5582 pd2.af = pd->af;
5583 pd2.dir = pd->dir;
5584 pd2.kif = pd->kif;
5585 pd2.m = pd->m;
5586 pd2.rdomain = pd->rdomain;
5587 /* Payload packet is from the opposite direction. */
5588 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0;
5589 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1;
5590 switch (pd->af) {
5591 case AF_INET:
5592 /* offset of h2 in mbuf chain */
5593 ipoff2 = pd->off + ICMP_MINLEN;
5594
5595 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2),
5596 NULL, reason, pd2.af)) {
5597 DPFPRINTF(LOG_NOTICE,
5598 "ICMP error message too short (ip)");
5599 return (PF_DROP);
5600 }
5601 /*
5602 * ICMP error messages don't refer to non-first
5603 * fragments
5604 */
5605 if (h2.ip_off & htons(IP_OFFMASK)) {
5606 REASON_SET(reason, PFRES_FRAG);
5607 return (PF_DROP);
5608 }
5609
5610 /* offset of protocol header that follows h2 */
5611 pd2.off = ipoff2;
5612 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS)
5613 return (PF_DROP);
5614
5615 pd2.tot_len = ntohs(h2.ip_len);
5616 pd2.src = (struct pf_addr *)&h2.ip_src;
5617 pd2.dst = (struct pf_addr *)&h2.ip_dst;
5618 break;
5619 #ifdef INET6
5620 case AF_INET6:
5621 ipoff2 = pd->off + sizeof(struct icmp6_hdr);
5622
5623 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6),
5624 NULL, reason, pd2.af)) {
5625 DPFPRINTF(LOG_NOTICE,
5626 "ICMP error message too short (ip6)");
5627 return (PF_DROP);
5628 }
5629
5630 pd2.off = ipoff2;
5631 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS)
5632 return (PF_DROP);
5633
5634 pd2.tot_len = ntohs(h2_6.ip6_plen) +
5635 sizeof(struct ip6_hdr);
5636 pd2.src = (struct pf_addr *)&h2_6.ip6_src;
5637 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
5638 break;
5639 #endif /* INET6 */
5640 default:
5641 unhandled_af(pd->af);
5642 }
5643
5644 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) {
5645 if (pf_status.debug >= LOG_NOTICE) {
5646 log(LOG_NOTICE,
5647 "pf: BAD ICMP %d:%d outer dst: ",
5648 icmptype, icmpcode);
5649 pf_print_host(pd->src, 0, pd->af);
5650 addlog(" -> ");
5651 pf_print_host(pd->dst, 0, pd->af);
5652 addlog(" inner src: ");
5653 pf_print_host(pd2.src, 0, pd2.af);
5654 addlog(" -> ");
5655 pf_print_host(pd2.dst, 0, pd2.af);
5656 addlog("\n");
5657 }
5658 REASON_SET(reason, PFRES_BADSTATE);
5659 return (PF_DROP);
5660 }
5661
5662 switch (pd2.proto) {
5663 case IPPROTO_TCP: {
5664 struct tcphdr *th = &pd2.hdr.tcp;
5665 u_int32_t seq;
5666 struct pf_state_peer *src, *dst;
5667 u_int8_t dws;
5668 int action;
5669
5670 /*
5671 * Only the first 8 bytes of the TCP header can be
5672 * expected. Don't access any TCP header fields after
5673 * th_seq, an ackskew test is not possible.
5674 */
5675 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, NULL, reason,
5676 pd2.af)) {
5677 DPFPRINTF(LOG_NOTICE,
5678 "ICMP error message too short (tcp)");
5679 return (PF_DROP);
5680 }
5681
5682 key.af = pd2.af;
5683 key.proto = IPPROTO_TCP;
5684 key.rdomain = pd2.rdomain;
5685 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
5686 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
5687 key.port[pd2.sidx] = th->th_sport;
5688 key.port[pd2.didx] = th->th_dport;
5689 key.hash = pf_pkt_hash(pd2.af, pd2.proto,
5690 pd2.src, pd2.dst, th->th_sport, th->th_dport);
5691
5692 action = pf_find_state(&pd2, &key, stp);
5693 if (action != PF_MATCH)
5694 return (action);
5695
5696 if (pd2.dir == (*stp)->direction) {
5697 if (PF_REVERSED_KEY((*stp)->key, pd->af)) {
5698 src = &(*stp)->src;
5699 dst = &(*stp)->dst;
5700 } else {
5701 src = &(*stp)->dst;
5702 dst = &(*stp)->src;
5703 }
5704 } else {
5705 if (PF_REVERSED_KEY((*stp)->key, pd->af)) {
5706 src = &(*stp)->dst;
5707 dst = &(*stp)->src;
5708 } else {
5709 src = &(*stp)->src;
5710 dst = &(*stp)->dst;
5711 }
5712 }
5713
5714 if (src->wscale && dst->wscale)
5715 dws = dst->wscale & PF_WSCALE_MASK;
5716 else
5717 dws = 0;
5718
5719 /* Demodulate sequence number */
5720 seq = ntohl(th->th_seq) - src->seqdiff;
5721 if (src->seqdiff) {
5722 pf_patch_32(pd, &th->th_seq, htonl(seq));
5723 copyback = 1;
5724 }
5725
5726 if (!((*stp)->state_flags & PFSTATE_SLOPPY) &&
5727 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq,
5728 src->seqlo - (dst->max_win << dws)))) {
5729 if (pf_status.debug >= LOG_NOTICE) {
5730 log(LOG_NOTICE,
5731 "pf: BAD ICMP %d:%d ",
5732 icmptype, icmpcode);
5733 pf_print_host(pd->src, 0, pd->af);
5734 addlog(" -> ");
5735 pf_print_host(pd->dst, 0, pd->af);
5736 addlog(" state: ");
5737 pf_print_state(*stp);
5738 addlog(" seq=%u\n", seq);
5739 }
5740 REASON_SET(reason, PFRES_BADSTATE);
5741 return (PF_DROP);
5742 } else {
5743 if (pf_status.debug >= LOG_DEBUG) {
5744 log(LOG_DEBUG,
5745 "pf: OK ICMP %d:%d ",
5746 icmptype, icmpcode);
5747 pf_print_host(pd->src, 0, pd->af);
5748 addlog(" -> ");
5749 pf_print_host(pd->dst, 0, pd->af);
5750 addlog(" state: ");
5751 pf_print_state(*stp);
5752 addlog(" seq=%u\n", seq);
5753 }
5754 }
5755
5756 /* translate source/destination address, if necessary */
5757 if ((*stp)->key[PF_SK_WIRE] !=
5758 (*stp)->key[PF_SK_STACK]) {
5759 struct pf_state_key *nk;
5760 int afto, sidx, didx;
5761
5762 if (PF_REVERSED_KEY((*stp)->key, pd->af))
5763 nk = (*stp)->key[pd->sidx];
5764 else
5765 nk = (*stp)->key[pd->didx];
5766
5767 afto = pd->af != nk->af;
5768 sidx = afto ? pd2.didx : pd2.sidx;
5769 didx = afto ? pd2.sidx : pd2.didx;
5770
5771 #ifdef INET6
5772 if (afto) {
5773 if (pf_translate_icmp_af(pd, nk->af,
5774 &pd->hdr.icmp))
5775 return (PF_DROP);
5776 m_copyback(pd->m, pd->off,
5777 sizeof(struct icmp6_hdr),
5778 &pd->hdr.icmp6, M_NOWAIT);
5779 if (pf_change_icmp_af(pd->m, ipoff2,
5780 pd, &pd2, &nk->addr[sidx],
5781 &nk->addr[didx], pd->af, nk->af))
5782 return (PF_DROP);
5783 if (nk->af == AF_INET)
5784 pd->proto = IPPROTO_ICMP;
5785 else
5786 pd->proto = IPPROTO_ICMPV6;
5787 pd->m->m_pkthdr.ph_rtableid =
5788 nk->rdomain;
5789 pd->destchg = 1;
5790 pf_addrcpy(&pd->nsaddr,
5791 &nk->addr[pd2.sidx], nk->af);
5792 pf_addrcpy(&pd->ndaddr,
5793 &nk->addr[pd2.didx], nk->af);
5794 pd->naf = nk->af;
5795
5796 pf_patch_16(pd,
5797 &th->th_sport, nk->port[sidx]);
5798 pf_patch_16(pd,
5799 &th->th_dport, nk->port[didx]);
5800
5801 m_copyback(pd2.m, pd2.off, 8, th,
5802 M_NOWAIT);
5803 return (PF_AFRT);
5804 }
5805 #endif /* INET6 */
5806 if (PF_ANEQ(pd2.src,
5807 &nk->addr[pd2.sidx], pd2.af) ||
5808 nk->port[pd2.sidx] != th->th_sport)
5809 pf_translate_icmp(pd, pd2.src,
5810 &th->th_sport, pd->dst,
5811 &nk->addr[pd2.sidx],
5812 nk->port[pd2.sidx]);
5813
5814 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
5815 pd2.af) || pd2.rdomain != nk->rdomain)
5816 pd->destchg = 1;
5817 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
5818
5819 if (PF_ANEQ(pd2.dst,
5820 &nk->addr[pd2.didx], pd2.af) ||
5821 nk->port[pd2.didx] != th->th_dport)
5822 pf_translate_icmp(pd, pd2.dst,
5823 &th->th_dport, pd->src,
5824 &nk->addr[pd2.didx],
5825 nk->port[pd2.didx]);
5826 copyback = 1;
5827 }
5828
5829 if (copyback) {
5830 switch (pd2.af) {
5831 case AF_INET:
5832 m_copyback(pd->m, pd->off, ICMP_MINLEN,
5833 &pd->hdr.icmp, M_NOWAIT);
5834 m_copyback(pd2.m, ipoff2, sizeof(h2),
5835 &h2, M_NOWAIT);
5836 break;
5837 #ifdef INET6
5838 case AF_INET6:
5839 m_copyback(pd->m, pd->off,
5840 sizeof(struct icmp6_hdr),
5841 &pd->hdr.icmp6, M_NOWAIT);
5842 m_copyback(pd2.m, ipoff2, sizeof(h2_6),
5843 &h2_6, M_NOWAIT);
5844 break;
5845 #endif /* INET6 */
5846 }
5847 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT);
5848 }
5849 break;
5850 }
5851 case IPPROTO_UDP: {
5852 struct udphdr *uh = &pd2.hdr.udp;
5853 int action;
5854
5855 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh),
5856 NULL, reason, pd2.af)) {
5857 DPFPRINTF(LOG_NOTICE,
5858 "ICMP error message too short (udp)");
5859 return (PF_DROP);
5860 }
5861
5862 key.af = pd2.af;
5863 key.proto = IPPROTO_UDP;
5864 key.rdomain = pd2.rdomain;
5865 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
5866 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
5867 key.port[pd2.sidx] = uh->uh_sport;
5868 key.port[pd2.didx] = uh->uh_dport;
5869 key.hash = pf_pkt_hash(pd2.af, pd2.proto,
5870 pd2.src, pd2.dst, uh->uh_sport, uh->uh_dport);
5871
5872 action = pf_find_state(&pd2, &key, stp);
5873 if (action != PF_MATCH)
5874 return (action);
5875
5876 /* translate source/destination address, if necessary */
5877 if ((*stp)->key[PF_SK_WIRE] !=
5878 (*stp)->key[PF_SK_STACK]) {
5879 struct pf_state_key *nk;
5880 int afto, sidx, didx;
5881
5882 if (PF_REVERSED_KEY((*stp)->key, pd->af))
5883 nk = (*stp)->key[pd->sidx];
5884 else
5885 nk = (*stp)->key[pd->didx];
5886
5887 afto = pd->af != nk->af;
5888 sidx = afto ? pd2.didx : pd2.sidx;
5889 didx = afto ? pd2.sidx : pd2.didx;
5890
5891 #ifdef INET6
5892 if (afto) {
5893 if (pf_translate_icmp_af(pd, nk->af,
5894 &pd->hdr.icmp))
5895 return (PF_DROP);
5896 m_copyback(pd->m, pd->off,
5897 sizeof(struct icmp6_hdr),
5898 &pd->hdr.icmp6, M_NOWAIT);
5899 if (pf_change_icmp_af(pd->m, ipoff2,
5900 pd, &pd2, &nk->addr[sidx],
5901 &nk->addr[didx], pd->af, nk->af))
5902 return (PF_DROP);
5903 if (nk->af == AF_INET)
5904 pd->proto = IPPROTO_ICMP;
5905 else
5906 pd->proto = IPPROTO_ICMPV6;
5907 pd->m->m_pkthdr.ph_rtableid =
5908 nk->rdomain;
5909 pd->destchg = 1;
5910 pf_addrcpy(&pd->nsaddr,
5911 &nk->addr[pd2.sidx], nk->af);
5912 pf_addrcpy(&pd->ndaddr,
5913 &nk->addr[pd2.didx], nk->af);
5914 pd->naf = nk->af;
5915
5916 pf_patch_16(pd,
5917 &uh->uh_sport, nk->port[sidx]);
5918 pf_patch_16(pd,
5919 &uh->uh_dport, nk->port[didx]);
5920
5921 m_copyback(pd2.m, pd2.off, sizeof(*uh),
5922 uh, M_NOWAIT);
5923 return (PF_AFRT);
5924 }
5925 #endif /* INET6 */
5926
5927 if (PF_ANEQ(pd2.src,
5928 &nk->addr[pd2.sidx], pd2.af) ||
5929 nk->port[pd2.sidx] != uh->uh_sport)
5930 pf_translate_icmp(pd, pd2.src,
5931 &uh->uh_sport, pd->dst,
5932 &nk->addr[pd2.sidx],
5933 nk->port[pd2.sidx]);
5934
5935 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
5936 pd2.af) || pd2.rdomain != nk->rdomain)
5937 pd->destchg = 1;
5938 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
5939
5940 if (PF_ANEQ(pd2.dst,
5941 &nk->addr[pd2.didx], pd2.af) ||
5942 nk->port[pd2.didx] != uh->uh_dport)
5943 pf_translate_icmp(pd, pd2.dst,
5944 &uh->uh_dport, pd->src,
5945 &nk->addr[pd2.didx],
5946 nk->port[pd2.didx]);
5947
5948 switch (pd2.af) {
5949 case AF_INET:
5950 m_copyback(pd->m, pd->off, ICMP_MINLEN,
5951 &pd->hdr.icmp, M_NOWAIT);
5952 m_copyback(pd2.m, ipoff2, sizeof(h2),
5953 &h2, M_NOWAIT);
5954 break;
5955 #ifdef INET6
5956 case AF_INET6:
5957 m_copyback(pd->m, pd->off,
5958 sizeof(struct icmp6_hdr),
5959 &pd->hdr.icmp6, M_NOWAIT);
5960 m_copyback(pd2.m, ipoff2, sizeof(h2_6),
5961 &h2_6, M_NOWAIT);
5962 break;
5963 #endif /* INET6 */
5964 }
5965 /* Avoid recomputing quoted UDP checksum.
5966 * note: udp6 0 csum invalid per rfc2460 p27.
5967 * but presumed nothing cares in this context */
5968 pf_patch_16(pd, &uh->uh_sum, 0);
5969 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh,
5970 M_NOWAIT);
5971 copyback = 1;
5972 }
5973 break;
5974 }
5975 case IPPROTO_ICMP: {
5976 struct icmp *iih = &pd2.hdr.icmp;
5977
5978 if (pd2.af != AF_INET) {
5979 REASON_SET(reason, PFRES_NORM);
5980 return (PF_DROP);
5981 }
5982
5983 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN,
5984 NULL, reason, pd2.af)) {
5985 DPFPRINTF(LOG_NOTICE,
5986 "ICMP error message too short (icmp)");
5987 return (PF_DROP);
5988 }
5989
5990 pf_icmp_mapping(&pd2, iih->icmp_type,
5991 &icmp_dir, &virtual_id, &virtual_type);
5992
5993 ret = pf_icmp_state_lookup(&pd2, &key, stp,
5994 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
5995 if (ret >= 0)
5996 return (ret);
5997
5998 /* translate source/destination address, if necessary */
5999 if ((*stp)->key[PF_SK_WIRE] !=
6000 (*stp)->key[PF_SK_STACK]) {
6001 struct pf_state_key *nk;
6002 int afto, sidx, didx;
6003
6004 if (PF_REVERSED_KEY((*stp)->key, pd->af))
6005 nk = (*stp)->key[pd->sidx];
6006 else
6007 nk = (*stp)->key[pd->didx];
6008
6009 afto = pd->af != nk->af;
6010 sidx = afto ? pd2.didx : pd2.sidx;
6011 didx = afto ? pd2.sidx : pd2.didx;
6012 iidx = afto ? !iidx : iidx;
6013
6014 #ifdef INET6
6015 if (afto) {
6016 if (nk->af != AF_INET6)
6017 return (PF_DROP);
6018 if (pf_translate_icmp_af(pd, nk->af,
6019 &pd->hdr.icmp))
6020 return (PF_DROP);
6021 m_copyback(pd->m, pd->off,
6022 sizeof(struct icmp6_hdr),
6023 &pd->hdr.icmp6, M_NOWAIT);
6024 if (pf_change_icmp_af(pd->m, ipoff2,
6025 pd, &pd2, &nk->addr[sidx],
6026 &nk->addr[didx], pd->af, nk->af))
6027 return (PF_DROP);
6028 pd->proto = IPPROTO_ICMPV6;
6029 if (pf_translate_icmp_af(pd,
6030 nk->af, iih))
6031 return (PF_DROP);
6032 if (virtual_type == htons(ICMP_ECHO))
6033 pf_patch_16(pd, &iih->icmp_id,
6034 nk->port[iidx]);
6035 m_copyback(pd2.m, pd2.off, ICMP_MINLEN,
6036 iih, M_NOWAIT);
6037 pd->m->m_pkthdr.ph_rtableid =
6038 nk->rdomain;
6039 pd->destchg = 1;
6040 pf_addrcpy(&pd->nsaddr,
6041 &nk->addr[pd2.sidx], nk->af);
6042 pf_addrcpy(&pd->ndaddr,
6043 &nk->addr[pd2.didx], nk->af);
6044 pd->naf = nk->af;
6045 return (PF_AFRT);
6046 }
6047 #endif /* INET6 */
6048
6049 if (PF_ANEQ(pd2.src,
6050 &nk->addr[pd2.sidx], pd2.af) ||
6051 (virtual_type == htons(ICMP_ECHO) &&
6052 nk->port[iidx] != iih->icmp_id))
6053 pf_translate_icmp(pd, pd2.src,
6054 (virtual_type == htons(ICMP_ECHO)) ?
6055 &iih->icmp_id : NULL,
6056 pd->dst, &nk->addr[pd2.sidx],
6057 (virtual_type == htons(ICMP_ECHO)) ?
6058 nk->port[iidx] : 0);
6059
6060 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
6061 pd2.af) || pd2.rdomain != nk->rdomain)
6062 pd->destchg = 1;
6063 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
6064
6065 if (PF_ANEQ(pd2.dst,
6066 &nk->addr[pd2.didx], pd2.af))
6067 pf_translate_icmp(pd, pd2.dst, NULL,
6068 pd->src, &nk->addr[pd2.didx], 0);
6069
6070 m_copyback(pd->m, pd->off, ICMP_MINLEN,
6071 &pd->hdr.icmp, M_NOWAIT);
6072 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2,
6073 M_NOWAIT);
6074 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih,
6075 M_NOWAIT);
6076 copyback = 1;
6077 }
6078 break;
6079 }
6080 #ifdef INET6
6081 case IPPROTO_ICMPV6: {
6082 struct icmp6_hdr *iih = &pd2.hdr.icmp6;
6083
6084 if (pd2.af != AF_INET6) {
6085 REASON_SET(reason, PFRES_NORM);
6086 return (PF_DROP);
6087 }
6088
6089 if (!pf_pull_hdr(pd2.m, pd2.off, iih,
6090 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
6091 DPFPRINTF(LOG_NOTICE,
6092 "ICMP error message too short (icmp6)");
6093 return (PF_DROP);
6094 }
6095
6096 pf_icmp_mapping(&pd2, iih->icmp6_type,
6097 &icmp_dir, &virtual_id, &virtual_type);
6098 ret = pf_icmp_state_lookup(&pd2, &key, stp,
6099 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
6100 /* IPv6? try matching a multicast address */
6101 if (ret == PF_DROP && pd2.af == AF_INET6 &&
6102 icmp_dir == PF_OUT)
6103 ret = pf_icmp_state_lookup(&pd2, &key, stp,
6104 virtual_id, virtual_type, icmp_dir, &iidx,
6105 1, 1);
6106 if (ret >= 0)
6107 return (ret);
6108
6109 /* translate source/destination address, if necessary */
6110 if ((*stp)->key[PF_SK_WIRE] !=
6111 (*stp)->key[PF_SK_STACK]) {
6112 struct pf_state_key *nk;
6113 int afto, sidx, didx;
6114
6115 if (PF_REVERSED_KEY((*stp)->key, pd->af))
6116 nk = (*stp)->key[pd->sidx];
6117 else
6118 nk = (*stp)->key[pd->didx];
6119
6120 afto = pd->af != nk->af;
6121 sidx = afto ? pd2.didx : pd2.sidx;
6122 didx = afto ? pd2.sidx : pd2.didx;
6123 iidx = afto ? !iidx : iidx;
6124
6125 if (afto) {
6126 if (nk->af != AF_INET)
6127 return (PF_DROP);
6128 if (pf_translate_icmp_af(pd, nk->af,
6129 &pd->hdr.icmp))
6130 return (PF_DROP);
6131 m_copyback(pd->m, pd->off,
6132 sizeof(struct icmp6_hdr),
6133 &pd->hdr.icmp6, M_NOWAIT);
6134 if (pf_change_icmp_af(pd->m, ipoff2,
6135 pd, &pd2, &nk->addr[sidx],
6136 &nk->addr[didx], pd->af, nk->af))
6137 return (PF_DROP);
6138 pd->proto = IPPROTO_ICMP;
6139 if (pf_translate_icmp_af(pd,
6140 nk->af, iih))
6141 return (PF_DROP);
6142 if (virtual_type ==
6143 htons(ICMP6_ECHO_REQUEST))
6144 pf_patch_16(pd, &iih->icmp6_id,
6145 nk->port[iidx]);
6146 m_copyback(pd2.m, pd2.off,
6147 sizeof(struct icmp6_hdr), iih,
6148 M_NOWAIT);
6149 pd->m->m_pkthdr.ph_rtableid =
6150 nk->rdomain;
6151 pd->destchg = 1;
6152 pf_addrcpy(&pd->nsaddr,
6153 &nk->addr[pd2.sidx], nk->af);
6154 pf_addrcpy(&pd->ndaddr,
6155 &nk->addr[pd2.didx], nk->af);
6156 pd->naf = nk->af;
6157 return (PF_AFRT);
6158 }
6159
6160 if (PF_ANEQ(pd2.src,
6161 &nk->addr[pd2.sidx], pd2.af) ||
6162 ((virtual_type ==
6163 htons(ICMP6_ECHO_REQUEST)) &&
6164 nk->port[pd2.sidx] != iih->icmp6_id))
6165 pf_translate_icmp(pd, pd2.src,
6166 (virtual_type ==
6167 htons(ICMP6_ECHO_REQUEST))
6168 ? &iih->icmp6_id : NULL,
6169 pd->dst, &nk->addr[pd2.sidx],
6170 (virtual_type ==
6171 htons(ICMP6_ECHO_REQUEST))
6172 ? nk->port[iidx] : 0);
6173
6174 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
6175 pd2.af) || pd2.rdomain != nk->rdomain)
6176 pd->destchg = 1;
6177 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
6178
6179 if (PF_ANEQ(pd2.dst,
6180 &nk->addr[pd2.didx], pd2.af))
6181 pf_translate_icmp(pd, pd2.dst, NULL,
6182 pd->src, &nk->addr[pd2.didx], 0);
6183
6184 m_copyback(pd->m, pd->off,
6185 sizeof(struct icmp6_hdr), &pd->hdr.icmp6,
6186 M_NOWAIT);
6187 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6,
6188 M_NOWAIT);
6189 m_copyback(pd2.m, pd2.off,
6190 sizeof(struct icmp6_hdr), iih, M_NOWAIT);
6191 copyback = 1;
6192 }
6193 break;
6194 }
6195 #endif /* INET6 */
6196 default: {
6197 int action;
6198
6199 key.af = pd2.af;
6200 key.proto = pd2.proto;
6201 key.rdomain = pd2.rdomain;
6202 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
6203 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
6204 key.port[0] = key.port[1] = 0;
6205 key.hash = pf_pkt_hash(pd2.af, pd2.proto,
6206 pd2.src, pd2.dst, 0, 0);
6207
6208 action = pf_find_state(&pd2, &key, stp);
6209 if (action != PF_MATCH)
6210 return (action);
6211
6212 /* translate source/destination address, if necessary */
6213 if ((*stp)->key[PF_SK_WIRE] !=
6214 (*stp)->key[PF_SK_STACK]) {
6215 struct pf_state_key *nk =
6216 (*stp)->key[pd->didx];
6217
6218 if (PF_ANEQ(pd2.src,
6219 &nk->addr[pd2.sidx], pd2.af))
6220 pf_translate_icmp(pd, pd2.src, NULL,
6221 pd->dst, &nk->addr[pd2.sidx], 0);
6222
6223 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
6224 pd2.af) || pd2.rdomain != nk->rdomain)
6225 pd->destchg = 1;
6226 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
6227
6228 if (PF_ANEQ(pd2.dst,
6229 &nk->addr[pd2.didx], pd2.af))
6230 pf_translate_icmp(pd, pd2.dst, NULL,
6231 pd->src, &nk->addr[pd2.didx], 0);
6232
6233 switch (pd2.af) {
6234 case AF_INET:
6235 m_copyback(pd->m, pd->off, ICMP_MINLEN,
6236 &pd->hdr.icmp, M_NOWAIT);
6237 m_copyback(pd2.m, ipoff2, sizeof(h2),
6238 &h2, M_NOWAIT);
6239 break;
6240 #ifdef INET6
6241 case AF_INET6:
6242 m_copyback(pd->m, pd->off,
6243 sizeof(struct icmp6_hdr),
6244 &pd->hdr.icmp6, M_NOWAIT);
6245 m_copyback(pd2.m, ipoff2, sizeof(h2_6),
6246 &h2_6, M_NOWAIT);
6247 break;
6248 #endif /* INET6 */
6249 }
6250 copyback = 1;
6251 }
6252 break;
6253 }
6254 }
6255 }
6256 if (copyback) {
6257 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
6258 }
6259
6260 return (PF_PASS);
6261 }
6262
6263 /*
6264 * ipoff and off are measured from the start of the mbuf chain.
6265 * h must be at "ipoff" on the mbuf chain.
6266 */
6267 void *
6268 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
6269 u_short *actionp, u_short *reasonp, sa_family_t af)
6270 {
6271 int iplen = 0;
6272
6273 switch (af) {
6274 case AF_INET: {
6275 struct ip *h = mtod(m, struct ip *);
6276 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
6277
6278 if (fragoff) {
6279 if (fragoff >= len)
6280 ACTION_SET(actionp, PF_PASS);
6281 else {
6282 ACTION_SET(actionp, PF_DROP);
6283 REASON_SET(reasonp, PFRES_FRAG);
6284 }
6285 return (NULL);
6286 }
6287 iplen = ntohs(h->ip_len);
6288 break;
6289 }
6290 #ifdef INET6
6291 case AF_INET6: {
6292 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
6293
6294 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6295 break;
6296 }
6297 #endif /* INET6 */
6298 }
6299 if (m->m_pkthdr.len < off + len || iplen < off + len) {
6300 ACTION_SET(actionp, PF_DROP);
6301 REASON_SET(reasonp, PFRES_SHORT);
6302 return (NULL);
6303 }
6304 m_copydata(m, off, len, p);
6305 return (p);
6306 }
6307
6308 int
6309 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
6310 int rtableid)
6311 {
6312 struct sockaddr_storage ss;
6313 struct sockaddr_in *dst;
6314 int ret = 1;
6315 int check_mpath;
6316 #ifdef INET6
6317 struct sockaddr_in6 *dst6;
6318 #endif /* INET6 */
6319 struct rtentry *rt = NULL;
6320
6321 check_mpath = 0;
6322 memset(&ss, 0, sizeof(ss));
6323 switch (af) {
6324 case AF_INET:
6325 dst = (struct sockaddr_in *)&ss;
6326 dst->sin_family = AF_INET;
6327 dst->sin_len = sizeof(*dst);
6328 dst->sin_addr = addr->v4;
6329 if (ipmultipath)
6330 check_mpath = 1;
6331 break;
6332 #ifdef INET6
6333 case AF_INET6:
6334 /*
6335 * Skip check for addresses with embedded interface scope,
6336 * as they would always match anyway.
6337 */
6338 if (IN6_IS_SCOPE_EMBED(&addr->v6))
6339 goto out;
6340 dst6 = (struct sockaddr_in6 *)&ss;
6341 dst6->sin6_family = AF_INET6;
6342 dst6->sin6_len = sizeof(*dst6);
6343 dst6->sin6_addr = addr->v6;
6344 if (ip6_multipath)
6345 check_mpath = 1;
6346 break;
6347 #endif /* INET6 */
6348 }
6349
6350 /* Skip checks for ipsec interfaces */
6351 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
6352 goto out;
6353
6354 rt = rtalloc(sstosa(&ss), 0, rtableid);
6355 if (rt != NULL) {
6356 /* No interface given, this is a no-route check */
6357 if (kif == NULL)
6358 goto out;
6359
6360 if (kif->pfik_ifp == NULL) {
6361 ret = 0;
6362 goto out;
6363 }
6364
6365 /* Perform uRPF check if passed input interface */
6366 ret = 0;
6367 do {
6368 if (rt->rt_ifidx == kif->pfik_ifp->if_index) {
6369 ret = 1;
6370 #if NCARP > 0
6371 } else {
6372 struct ifnet *ifp;
6373
6374 ifp = if_get(rt->rt_ifidx);
6375 if (ifp != NULL && ifp->if_type == IFT_CARP &&
6376 ifp->if_carpdevidx ==
6377 kif->pfik_ifp->if_index)
6378 ret = 1;
6379 if_put(ifp);
6380 #endif /* NCARP */
6381 }
6382
6383 rt = rtable_iterate(rt);
6384 } while (check_mpath == 1 && rt != NULL && ret == 0);
6385 } else
6386 ret = 0;
6387 out:
6388 rtfree(rt);
6389 return (ret);
6390 }
6391
6392 int
6393 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw,
6394 int rtableid)
6395 {
6396 struct sockaddr_storage ss;
6397 struct sockaddr_in *dst;
6398 #ifdef INET6
6399 struct sockaddr_in6 *dst6;
6400 #endif /* INET6 */
6401 struct rtentry *rt;
6402 int ret = 0;
6403
6404 memset(&ss, 0, sizeof(ss));
6405 switch (af) {
6406 case AF_INET:
6407 dst = (struct sockaddr_in *)&ss;
6408 dst->sin_family = AF_INET;
6409 dst->sin_len = sizeof(*dst);
6410 dst->sin_addr = addr->v4;
6411 break;
6412 #ifdef INET6
6413 case AF_INET6:
6414 dst6 = (struct sockaddr_in6 *)&ss;
6415 dst6->sin6_family = AF_INET6;
6416 dst6->sin6_len = sizeof(*dst6);
6417 dst6->sin6_addr = addr->v6;
6418 break;
6419 #endif /* INET6 */
6420 }
6421
6422 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid);
6423 if (rt != NULL) {
6424 if (rt->rt_labelid == aw->v.rtlabel)
6425 ret = 1;
6426 rtfree(rt);
6427 }
6428
6429 return (ret);
6430 }
6431
6432 /* pf_route() may change pd->m, adjust local copies after calling */
6433 void
6434 pf_route(struct pf_pdesc *pd, struct pf_state *st)
6435 {
6436 struct mbuf *m0;
6437 struct mbuf_list fml;
6438 struct sockaddr_in *dst, sin;
6439 struct rtentry *rt = NULL;
6440 struct ip *ip;
6441 struct ifnet *ifp = NULL;
6442 int error = 0;
6443 unsigned int rtableid;
6444
6445 if (pd->m->m_pkthdr.pf.routed++ > 3) {
6446 m_freem(pd->m);
6447 pd->m = NULL;
6448 return;
6449 }
6450
6451 if (st->rt == PF_DUPTO) {
6452 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
6453 return;
6454 } else {
6455 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
6456 return;
6457 m0 = pd->m;
6458 pd->m = NULL;
6459 }
6460
6461 if (m0->m_len < sizeof(struct ip)) {
6462 DPFPRINTF(LOG_ERR,
6463 "%s: m0->m_len < sizeof(struct ip)", __func__);
6464 goto bad;
6465 }
6466
6467 ip = mtod(m0, struct ip *);
6468
6469 if (pd->dir == PF_IN) {
6470 if (ip->ip_ttl <= IPTTLDEC) {
6471 if (st->rt != PF_DUPTO) {
6472 pf_send_icmp(m0, ICMP_TIMXCEED,
6473 ICMP_TIMXCEED_INTRANS, 0,
6474 pd->af, st->rule.ptr, pd->rdomain);
6475 }
6476 goto bad;
6477 }
6478 ip->ip_ttl -= IPTTLDEC;
6479 }
6480
6481 memset(&sin, 0, sizeof(sin));
6482 dst = &sin;
6483 dst->sin_family = AF_INET;
6484 dst->sin_len = sizeof(*dst);
6485 dst->sin_addr = st->rt_addr.v4;
6486 rtableid = m0->m_pkthdr.ph_rtableid;
6487
6488 rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid);
6489 if (!rtisvalid(rt)) {
6490 if (st->rt != PF_DUPTO) {
6491 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST,
6492 0, pd->af, st->rule.ptr, pd->rdomain);
6493 }
6494 ipstat_inc(ips_noroute);
6495 goto bad;
6496 }
6497
6498 ifp = if_get(rt->rt_ifidx);
6499 if (ifp == NULL)
6500 goto bad;
6501
6502 /* A locally generated packet may have invalid source address. */
6503 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET &&
6504 (ifp->if_flags & IFF_LOOPBACK) == 0)
6505 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr;
6506
6507 if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
6508 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
6509 goto bad;
6510 else if (m0 == NULL)
6511 goto done;
6512 if (m0->m_len < sizeof(struct ip)) {
6513 DPFPRINTF(LOG_ERR,
6514 "%s: m0->m_len < sizeof(struct ip)", __func__);
6515 goto bad;
6516 }
6517 ip = mtod(m0, struct ip *);
6518 }
6519
6520 in_proto_cksum_out(m0, ifp);
6521
6522 if (ntohs(ip->ip_len) <= ifp->if_mtu) {
6523 ip->ip_sum = 0;
6524 if (ifp->if_capabilities & IFCAP_CSUM_IPv4)
6525 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
6526 else {
6527 ipstat_inc(ips_outswcsum);
6528 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
6529 }
6530 error = ifp->if_output(ifp, m0, sintosa(dst), rt);
6531 goto done;
6532 }
6533
6534 /*
6535 * Too large for interface; fragment if possible.
6536 * Must be able to put at least 8 bytes per fragment.
6537 */
6538 if (ip->ip_off & htons(IP_DF)) {
6539 ipstat_inc(ips_cantfrag);
6540 if (st->rt != PF_DUPTO)
6541 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
6542 ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain);
6543 goto bad;
6544 }
6545
6546 error = ip_fragment(m0, &fml, ifp, ifp->if_mtu);
6547 if (error)
6548 goto done;
6549
6550 while ((m0 = ml_dequeue(&fml)) != NULL) {
6551 error = ifp->if_output(ifp, m0, sintosa(dst), rt);
6552 if (error)
6553 break;
6554 }
6555 if (error)
6556 ml_purge(&fml);
6557 else
6558 ipstat_inc(ips_fragmented);
6559
6560 done:
6561 if_put(ifp);
6562 rtfree(rt);
6563 return;
6564
6565 bad:
6566 m_freem(m0);
6567 goto done;
6568 }
6569
6570 #ifdef INET6
6571 /* pf_route6() may change pd->m, adjust local copies after calling */
6572 void
6573 pf_route6(struct pf_pdesc *pd, struct pf_state *st)
6574 {
6575 struct mbuf *m0;
6576 struct sockaddr_in6 *dst, sin6;
6577 struct rtentry *rt = NULL;
6578 struct ip6_hdr *ip6;
6579 struct ifnet *ifp = NULL;
6580 struct m_tag *mtag;
6581 unsigned int rtableid;
6582
6583 if (pd->m->m_pkthdr.pf.routed++ > 3) {
6584 m_freem(pd->m);
6585 pd->m = NULL;
6586 return;
6587 }
6588
6589 if (st->rt == PF_DUPTO) {
6590 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
6591 return;
6592 } else {
6593 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
6594 return;
6595 m0 = pd->m;
6596 pd->m = NULL;
6597 }
6598
6599 if (m0->m_len < sizeof(struct ip6_hdr)) {
6600 DPFPRINTF(LOG_ERR,
6601 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
6602 goto bad;
6603 }
6604 ip6 = mtod(m0, struct ip6_hdr *);
6605
6606 if (pd->dir == PF_IN) {
6607 if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
6608 if (st->rt != PF_DUPTO) {
6609 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED,
6610 ICMP6_TIME_EXCEED_TRANSIT, 0,
6611 pd->af, st->rule.ptr, pd->rdomain);
6612 }
6613 goto bad;
6614 }
6615 ip6->ip6_hlim -= IPV6_HLIMDEC;
6616 }
6617
6618 memset(&sin6, 0, sizeof(sin6));
6619 dst = &sin6;
6620 dst->sin6_family = AF_INET6;
6621 dst->sin6_len = sizeof(*dst);
6622 dst->sin6_addr = st->rt_addr.v6;
6623 rtableid = m0->m_pkthdr.ph_rtableid;
6624
6625 rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0],
6626 rtableid);
6627 if (!rtisvalid(rt)) {
6628 if (st->rt != PF_DUPTO) {
6629 pf_send_icmp(m0, ICMP6_DST_UNREACH,
6630 ICMP6_DST_UNREACH_NOROUTE, 0,
6631 pd->af, st->rule.ptr, pd->rdomain);
6632 }
6633 ip6stat_inc(ip6s_noroute);
6634 goto bad;
6635 }
6636
6637 ifp = if_get(rt->rt_ifidx);
6638 if (ifp == NULL)
6639 goto bad;
6640
6641 /* A locally generated packet may have invalid source address. */
6642 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
6643 (ifp->if_flags & IFF_LOOPBACK) == 0)
6644 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
6645
6646 if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
6647 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS)
6648 goto bad;
6649 else if (m0 == NULL)
6650 goto done;
6651 if (m0->m_len < sizeof(struct ip6_hdr)) {
6652 DPFPRINTF(LOG_ERR,
6653 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
6654 goto bad;
6655 }
6656 }
6657
6658 in6_proto_cksum_out(m0, ifp);
6659
6660 /*
6661 * If packet has been reassembled by PF earlier, we have to
6662 * use pf_refragment6() here to turn it back to fragments.
6663 */
6664 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) {
6665 (void) pf_refragment6(&m0, mtag, dst, ifp, rt);
6666 } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
6667 ifp->if_output(ifp, m0, sin6tosa(dst), rt);
6668 } else {
6669 ip6stat_inc(ip6s_cantfrag);
6670 if (st->rt != PF_DUPTO)
6671 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0,
6672 ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain);
6673 goto bad;
6674 }
6675
6676 done:
6677 if_put(ifp);
6678 rtfree(rt);
6679 return;
6680
6681 bad:
6682 m_freem(m0);
6683 goto done;
6684 }
6685 #endif /* INET6 */
6686
6687 /*
6688 * check TCP checksum and set mbuf flag
6689 * off is the offset where the protocol header starts
6690 * len is the total length of protocol header plus payload
6691 * returns 0 when the checksum is valid, otherwise returns 1.
6692 * if the _OUT flag is set the checksum isn't done yet, consider these ok
6693 */
6694 int
6695 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af)
6696 {
6697 u_int16_t sum;
6698
6699 if (m->m_pkthdr.csum_flags &
6700 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) {
6701 return (0);
6702 }
6703 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD ||
6704 off < sizeof(struct ip) ||
6705 m->m_pkthdr.len < off + len) {
6706 return (1);
6707 }
6708
6709 /* need to do it in software */
6710 tcpstat_inc(tcps_inswcsum);
6711
6712 switch (af) {
6713 case AF_INET:
6714 if (m->m_len < sizeof(struct ip))
6715 return (1);
6716
6717 sum = in4_cksum(m, IPPROTO_TCP, off, len);
6718 break;
6719 #ifdef INET6
6720 case AF_INET6:
6721 if (m->m_len < sizeof(struct ip6_hdr))
6722 return (1);
6723
6724 sum = in6_cksum(m, IPPROTO_TCP, off, len);
6725 break;
6726 #endif /* INET6 */
6727 default:
6728 unhandled_af(af);
6729 }
6730 if (sum) {
6731 tcpstat_inc(tcps_rcvbadsum);
6732 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD;
6733 return (1);
6734 }
6735
6736 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
6737 return (0);
6738 }
6739
6740 struct pf_divert *
6741 pf_find_divert(struct mbuf *m)
6742 {
6743 struct m_tag *mtag;
6744
6745 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL)
6746 return (NULL);
6747
6748 return ((struct pf_divert *)(mtag + 1));
6749 }
6750
6751 struct pf_divert *
6752 pf_get_divert(struct mbuf *m)
6753 {
6754 struct m_tag *mtag;
6755
6756 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) {
6757 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert),
6758 M_NOWAIT);
6759 if (mtag == NULL)
6760 return (NULL);
6761 memset(mtag + 1, 0, sizeof(struct pf_divert));
6762 m_tag_prepend(m, mtag);
6763 }
6764
6765 return ((struct pf_divert *)(mtag + 1));
6766 }
6767
6768 int
6769 pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end,
6770 u_short *reason)
6771 {
6772 uint8_t type, length, opts[15 * 4 - sizeof(struct ip)];
6773
6774 /* IP header in payload of ICMP packet may be too short */
6775 if (pd->m->m_pkthdr.len < end) {
6776 DPFPRINTF(LOG_NOTICE, "IP option too short");
6777 REASON_SET(reason, PFRES_SHORT);
6778 return (PF_DROP);
6779 }
6780
6781 KASSERT(end - off <= sizeof(opts));
6782 m_copydata(pd->m, off, end - off, opts);
6783 end -= off;
6784 off = 0;
6785
6786 while (off < end) {
6787 type = opts[off];
6788 if (type == IPOPT_EOL)
6789 break;
6790 if (type == IPOPT_NOP) {
6791 off++;
6792 continue;
6793 }
6794 if (off + 2 > end) {
6795 DPFPRINTF(LOG_NOTICE, "IP length opt");
6796 REASON_SET(reason, PFRES_IPOPTIONS);
6797 return (PF_DROP);
6798 }
6799 length = opts[off + 1];
6800 if (length < 2) {
6801 DPFPRINTF(LOG_NOTICE, "IP short opt");
6802 REASON_SET(reason, PFRES_IPOPTIONS);
6803 return (PF_DROP);
6804 }
6805 if (off + length > end) {
6806 DPFPRINTF(LOG_NOTICE, "IP long opt");
6807 REASON_SET(reason, PFRES_IPOPTIONS);
6808 return (PF_DROP);
6809 }
6810 switch (type) {
6811 case IPOPT_RA:
6812 SET(pd->badopts, PF_OPT_ROUTER_ALERT);
6813 break;
6814 default:
6815 SET(pd->badopts, PF_OPT_OTHER);
6816 break;
6817 }
6818 off += length;
6819 }
6820
6821 return (PF_PASS);
6822 }
6823
6824 int
6825 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
6826 {
6827 struct ip6_ext ext;
6828 u_int32_t hlen, end;
6829 int hdr_cnt;
6830
6831 hlen = h->ip_hl << 2;
6832 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) {
6833 REASON_SET(reason, PFRES_SHORT);
6834 return (PF_DROP);
6835 }
6836 if (hlen != sizeof(struct ip)) {
6837 if (pf_walk_option(pd, h, pd->off + sizeof(struct ip),
6838 pd->off + hlen, reason) != PF_PASS)
6839 return (PF_DROP);
6840 /* header options which contain only padding is fishy */
6841 if (pd->badopts == 0)
6842 SET(pd->badopts, PF_OPT_OTHER);
6843 }
6844 end = pd->off + ntohs(h->ip_len);
6845 pd->off += hlen;
6846 pd->proto = h->ip_p;
6847 /* IGMP packets have router alert options, allow them */
6848 if (pd->proto == IPPROTO_IGMP) {
6849 /* According to RFC 1112 ttl must be set to 1. */
6850 if ((h->ip_ttl != 1) || !IN_MULTICAST(h->ip_dst.s_addr)) {
6851 DPFPRINTF(LOG_NOTICE, "Invalid IGMP");
6852 REASON_SET(reason, PFRES_IPOPTIONS);
6853 return (PF_DROP);
6854 }
6855 CLR(pd->badopts, PF_OPT_ROUTER_ALERT);
6856 }
6857 /* stop walking over non initial fragments */
6858 if ((h->ip_off & htons(IP_OFFMASK)) != 0)
6859 return (PF_PASS);
6860
6861 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) {
6862 switch (pd->proto) {
6863 case IPPROTO_AH:
6864 /* fragments may be short */
6865 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 &&
6866 end < pd->off + sizeof(ext))
6867 return (PF_PASS);
6868 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
6869 NULL, reason, AF_INET)) {
6870 DPFPRINTF(LOG_NOTICE, "IP short exthdr");
6871 return (PF_DROP);
6872 }
6873 pd->off += (ext.ip6e_len + 2) * 4;
6874 pd->proto = ext.ip6e_nxt;
6875 break;
6876 default:
6877 return (PF_PASS);
6878 }
6879 }
6880 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit");
6881 REASON_SET(reason, PFRES_IPOPTIONS);
6882 return (PF_DROP);
6883 }
6884
6885 #ifdef INET6
6886 int
6887 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
6888 u_short *reason)
6889 {
6890 struct ip6_opt opt;
6891 struct ip6_opt_jumbo jumbo;
6892
6893 while (off < end) {
6894 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type,
6895 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) {
6896 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type");
6897 return (PF_DROP);
6898 }
6899 if (opt.ip6o_type == IP6OPT_PAD1) {
6900 off++;
6901 continue;
6902 }
6903 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt),
6904 NULL, reason, AF_INET6)) {
6905 DPFPRINTF(LOG_NOTICE, "IPv6 short opt");
6906 return (PF_DROP);
6907 }
6908 if (off + sizeof(opt) + opt.ip6o_len > end) {
6909 DPFPRINTF(LOG_NOTICE, "IPv6 long opt");
6910 REASON_SET(reason, PFRES_IPOPTIONS);
6911 return (PF_DROP);
6912 }
6913 switch (opt.ip6o_type) {
6914 case IP6OPT_PADN:
6915 break;
6916 case IP6OPT_JUMBO:
6917 SET(pd->badopts, PF_OPT_JUMBO);
6918 if (pd->jumbolen != 0) {
6919 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo");
6920 REASON_SET(reason, PFRES_IPOPTIONS);
6921 return (PF_DROP);
6922 }
6923 if (ntohs(h->ip6_plen) != 0) {
6924 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen");
6925 REASON_SET(reason, PFRES_IPOPTIONS);
6926 return (PF_DROP);
6927 }
6928 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo),
6929 NULL, reason, AF_INET6)) {
6930 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo");
6931 return (PF_DROP);
6932 }
6933 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len,
6934 sizeof(pd->jumbolen));
6935 pd->jumbolen = ntohl(pd->jumbolen);
6936 if (pd->jumbolen < IPV6_MAXPACKET) {
6937 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen");
6938 REASON_SET(reason, PFRES_IPOPTIONS);
6939 return (PF_DROP);
6940 }
6941 break;
6942 case IP6OPT_ROUTER_ALERT:
6943 SET(pd->badopts, PF_OPT_ROUTER_ALERT);
6944 break;
6945 default:
6946 SET(pd->badopts, PF_OPT_OTHER);
6947 break;
6948 }
6949 off += sizeof(opt) + opt.ip6o_len;
6950 }
6951
6952 return (PF_PASS);
6953 }
6954
6955 int
6956 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
6957 {
6958 struct ip6_frag frag;
6959 struct ip6_ext ext;
6960 struct icmp6_hdr icmp6;
6961 struct ip6_rthdr rthdr;
6962 u_int32_t end;
6963 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0;
6964
6965 pd->off += sizeof(struct ip6_hdr);
6966 end = pd->off + ntohs(h->ip6_plen);
6967 pd->fragoff = pd->extoff = pd->jumbolen = 0;
6968 pd->proto = h->ip6_nxt;
6969
6970 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) {
6971 switch (pd->proto) {
6972 case IPPROTO_ROUTING:
6973 case IPPROTO_DSTOPTS:
6974 SET(pd->badopts, PF_OPT_OTHER);
6975 break;
6976 case IPPROTO_HOPOPTS:
6977 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
6978 NULL, reason, AF_INET6)) {
6979 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr");
6980 return (PF_DROP);
6981 }
6982 if (pf_walk_option6(pd, h, pd->off + sizeof(ext),
6983 pd->off + (ext.ip6e_len + 1) * 8, reason)
6984 != PF_PASS)
6985 return (PF_DROP);
6986 /* option header which contains only padding is fishy */
6987 if (pd->badopts == 0)
6988 SET(pd->badopts, PF_OPT_OTHER);
6989 break;
6990 }
6991 switch (pd->proto) {
6992 case IPPROTO_FRAGMENT:
6993 if (fraghdr_cnt++) {
6994 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment");
6995 REASON_SET(reason, PFRES_FRAG);
6996 return (PF_DROP);
6997 }
6998 /* jumbo payload packets cannot be fragmented */
6999 if (pd->jumbolen != 0) {
7000 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo");
7001 REASON_SET(reason, PFRES_FRAG);
7002 return (PF_DROP);
7003 }
7004 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag),
7005 NULL, reason, AF_INET6)) {
7006 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment");
7007 return (PF_DROP);
7008 }
7009 /* stop walking over non initial fragments */
7010 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) {
7011 pd->fragoff = pd->off;
7012 return (PF_PASS);
7013 }
7014 /* RFC6946: reassemble only non atomic fragments */
7015 if (frag.ip6f_offlg & IP6F_MORE_FRAG)
7016 pd->fragoff = pd->off;
7017 pd->off += sizeof(frag);
7018 pd->proto = frag.ip6f_nxt;
7019 break;
7020 case IPPROTO_ROUTING:
7021 if (rthdr_cnt++) {
7022 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr");
7023 REASON_SET(reason, PFRES_IPOPTIONS);
7024 return (PF_DROP);
7025 }
7026 /* fragments may be short */
7027 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) {
7028 pd->off = pd->fragoff;
7029 pd->proto = IPPROTO_FRAGMENT;
7030 return (PF_PASS);
7031 }
7032 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr),
7033 NULL, reason, AF_INET6)) {
7034 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr");
7035 return (PF_DROP);
7036 }
7037 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
7038 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0");
7039 REASON_SET(reason, PFRES_IPOPTIONS);
7040 return (PF_DROP);
7041 }
7042 /* FALLTHROUGH */
7043 case IPPROTO_HOPOPTS:
7044 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */
7045 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) {
7046 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first");
7047 REASON_SET(reason, PFRES_IPOPTIONS);
7048 return (PF_DROP);
7049 }
7050 /* FALLTHROUGH */
7051 case IPPROTO_AH:
7052 case IPPROTO_DSTOPTS:
7053 /* fragments may be short */
7054 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) {
7055 pd->off = pd->fragoff;
7056 pd->proto = IPPROTO_FRAGMENT;
7057 return (PF_PASS);
7058 }
7059 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
7060 NULL, reason, AF_INET6)) {
7061 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr");
7062 return (PF_DROP);
7063 }
7064 /* reassembly needs the ext header before the frag */
7065 if (pd->fragoff == 0)
7066 pd->extoff = pd->off;
7067 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 &&
7068 ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) {
7069 DPFPRINTF(LOG_NOTICE, "IPv6 missing jumbo");
7070 REASON_SET(reason, PFRES_IPOPTIONS);
7071 return (PF_DROP);
7072 }
7073 if (pd->proto == IPPROTO_AH)
7074 pd->off += (ext.ip6e_len + 2) * 4;
7075 else
7076 pd->off += (ext.ip6e_len + 1) * 8;
7077 pd->proto = ext.ip6e_nxt;
7078 break;
7079 case IPPROTO_ICMPV6:
7080 /* fragments may be short, ignore inner header then */
7081 if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) {
7082 pd->off = pd->fragoff;
7083 pd->proto = IPPROTO_FRAGMENT;
7084 return (PF_PASS);
7085 }
7086 if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6),
7087 NULL, reason, AF_INET6)) {
7088 DPFPRINTF(LOG_NOTICE, "IPv6 short icmp6hdr");
7089 return (PF_DROP);
7090 }
7091 /* ICMP multicast packets have router alert options */
7092 switch (icmp6.icmp6_type) {
7093 case MLD_LISTENER_QUERY:
7094 case MLD_LISTENER_REPORT:
7095 case MLD_LISTENER_DONE:
7096 case MLDV2_LISTENER_REPORT:
7097 /*
7098 * According to RFC 2710 all MLD messages are
7099 * sent with hop-limit (ttl) set to 1, and link
7100 * local source address. If either one is
7101 * missing then MLD message is invalid and
7102 * should be discarded.
7103 */
7104 if ((h->ip6_hlim != 1) ||
7105 !IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) {
7106 DPFPRINTF(LOG_NOTICE, "Invalid MLD");
7107 REASON_SET(reason, PFRES_IPOPTIONS);
7108 return (PF_DROP);
7109 }
7110 CLR(pd->badopts, PF_OPT_ROUTER_ALERT);
7111 break;
7112 }
7113 return (PF_PASS);
7114 case IPPROTO_TCP:
7115 case IPPROTO_UDP:
7116 /* fragments may be short, ignore inner header then */
7117 if (pd->fragoff != 0 && end < pd->off +
7118 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) :
7119 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) :
7120 sizeof(struct icmp6_hdr))) {
7121 pd->off = pd->fragoff;
7122 pd->proto = IPPROTO_FRAGMENT;
7123 }
7124 /* FALLTHROUGH */
7125 default:
7126 return (PF_PASS);
7127 }
7128 }
7129 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit");
7130 REASON_SET(reason, PFRES_IPOPTIONS);
7131 return (PF_DROP);
7132 }
7133 #endif /* INET6 */
7134
7135 u_int16_t
7136 pf_pkt_hash(sa_family_t af, uint8_t proto,
7137 const struct pf_addr *src, const struct pf_addr *dst,
7138 uint16_t sport, uint16_t dport)
7139 {
7140 uint32_t hash;
7141
7142 hash = src->addr32[0] ^ dst->addr32[0];
7143 #ifdef INET6
7144 if (af == AF_INET6) {
7145 hash ^= src->addr32[1] ^ dst->addr32[1];
7146 hash ^= src->addr32[2] ^ dst->addr32[2];
7147 hash ^= src->addr32[3] ^ dst->addr32[3];
7148 }
7149 #endif
7150
7151 switch (proto) {
7152 case IPPROTO_TCP:
7153 case IPPROTO_UDP:
7154 hash ^= sport ^ dport;
7155 break;
7156 }
7157
7158 return stoeplitz_n32(hash);
7159 }
7160
7161 int
7162 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir,
7163 struct pfi_kif *kif, struct mbuf *m, u_short *reason)
7164 {
7165 memset(pd, 0, sizeof(*pd));
7166 pd->dir = dir;
7167 pd->kif = kif; /* kif is NULL when called by pflog */
7168 pd->m = m;
7169 pd->sidx = (dir == PF_IN) ? 0 : 1;
7170 pd->didx = (dir == PF_IN) ? 1 : 0;
7171 pd->af = pd->naf = af;
7172 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid);
7173
7174 switch (pd->af) {
7175 case AF_INET: {
7176 struct ip *h;
7177
7178 /* Check for illegal packets */
7179 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) {
7180 REASON_SET(reason, PFRES_SHORT);
7181 return (PF_DROP);
7182 }
7183
7184 h = mtod(pd->m, struct ip *);
7185 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) {
7186 REASON_SET(reason, PFRES_SHORT);
7187 return (PF_DROP);
7188 }
7189
7190 if (pf_walk_header(pd, h, reason) != PF_PASS)
7191 return (PF_DROP);
7192
7193 pd->src = (struct pf_addr *)&h->ip_src;
7194 pd->dst = (struct pf_addr *)&h->ip_dst;
7195 pd->tot_len = ntohs(h->ip_len);
7196 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK;
7197 pd->ttl = h->ip_ttl;
7198 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ?
7199 PF_VPROTO_FRAGMENT : pd->proto;
7200
7201 break;
7202 }
7203 #ifdef INET6
7204 case AF_INET6: {
7205 struct ip6_hdr *h;
7206
7207 /* Check for illegal packets */
7208 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) {
7209 REASON_SET(reason, PFRES_SHORT);
7210 return (PF_DROP);
7211 }
7212
7213 h = mtod(pd->m, struct ip6_hdr *);
7214 if (pd->m->m_pkthdr.len <
7215 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) {
7216 REASON_SET(reason, PFRES_SHORT);
7217 return (PF_DROP);
7218 }
7219
7220 if (pf_walk_header6(pd, h, reason) != PF_PASS)
7221 return (PF_DROP);
7222
7223 #if 1
7224 /*
7225 * we do not support jumbogram yet. if we keep going, zero
7226 * ip6_plen will do something bad, so drop the packet for now.
7227 */
7228 if (pd->jumbolen != 0) {
7229 REASON_SET(reason, PFRES_NORM);
7230 return (PF_DROP);
7231 }
7232 #endif /* 1 */
7233
7234 pd->src = (struct pf_addr *)&h->ip6_src;
7235 pd->dst = (struct pf_addr *)&h->ip6_dst;
7236 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
7237 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20;
7238 pd->ttl = h->ip6_hlim;
7239 pd->virtual_proto = (pd->fragoff != 0) ?
7240 PF_VPROTO_FRAGMENT : pd->proto;
7241
7242 break;
7243 }
7244 #endif /* INET6 */
7245 default:
7246 panic("pf_setup_pdesc called with illegal af %u", pd->af);
7247
7248 }
7249
7250 pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
7251 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
7252
7253 switch (pd->virtual_proto) {
7254 case IPPROTO_TCP: {
7255 struct tcphdr *th = &pd->hdr.tcp;
7256
7257 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th),
7258 NULL, reason, pd->af))
7259 return (PF_DROP);
7260 pd->hdrlen = sizeof(*th);
7261 if (th->th_dport == 0 ||
7262 pd->off + (th->th_off << 2) > pd->tot_len ||
7263 (th->th_off << 2) < sizeof(struct tcphdr)) {
7264 REASON_SET(reason, PFRES_SHORT);
7265 return (PF_DROP);
7266 }
7267 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2);
7268 pd->sport = &th->th_sport;
7269 pd->dport = &th->th_dport;
7270 pd->pcksum = &th->th_sum;
7271 break;
7272 }
7273 case IPPROTO_UDP: {
7274 struct udphdr *uh = &pd->hdr.udp;
7275
7276 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh),
7277 NULL, reason, pd->af))
7278 return (PF_DROP);
7279 pd->hdrlen = sizeof(*uh);
7280 if (uh->uh_dport == 0 ||
7281 pd->off + ntohs(uh->uh_ulen) > pd->tot_len ||
7282 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) {
7283 REASON_SET(reason, PFRES_SHORT);
7284 return (PF_DROP);
7285 }
7286 pd->sport = &uh->uh_sport;
7287 pd->dport = &uh->uh_dport;
7288 pd->pcksum = &uh->uh_sum;
7289 break;
7290 }
7291 case IPPROTO_ICMP: {
7292 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN,
7293 NULL, reason, pd->af))
7294 return (PF_DROP);
7295 pd->hdrlen = ICMP_MINLEN;
7296 if (pd->off + pd->hdrlen > pd->tot_len) {
7297 REASON_SET(reason, PFRES_SHORT);
7298 return (PF_DROP);
7299 }
7300 pd->pcksum = &pd->hdr.icmp.icmp_cksum;
7301 break;
7302 }
7303 #ifdef INET6
7304 case IPPROTO_ICMPV6: {
7305 size_t icmp_hlen = sizeof(struct icmp6_hdr);
7306
7307 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
7308 NULL, reason, pd->af))
7309 return (PF_DROP);
7310 /* ICMP headers we look further into to match state */
7311 switch (pd->hdr.icmp6.icmp6_type) {
7312 case MLD_LISTENER_QUERY:
7313 case MLD_LISTENER_REPORT:
7314 icmp_hlen = sizeof(struct mld_hdr);
7315 break;
7316 case ND_NEIGHBOR_SOLICIT:
7317 case ND_NEIGHBOR_ADVERT:
7318 icmp_hlen = sizeof(struct nd_neighbor_solicit);
7319 /* FALLTHROUGH */
7320 case ND_ROUTER_SOLICIT:
7321 case ND_ROUTER_ADVERT:
7322 case ND_REDIRECT:
7323 if (pd->ttl != 255) {
7324 REASON_SET(reason, PFRES_NORM);
7325 return (PF_DROP);
7326 }
7327 break;
7328 }
7329 if (icmp_hlen > sizeof(struct icmp6_hdr) &&
7330 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
7331 NULL, reason, pd->af))
7332 return (PF_DROP);
7333 pd->hdrlen = icmp_hlen;
7334 if (pd->off + pd->hdrlen > pd->tot_len) {
7335 REASON_SET(reason, PFRES_SHORT);
7336 return (PF_DROP);
7337 }
7338 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum;
7339 break;
7340 }
7341 #endif /* INET6 */
7342 }
7343
7344 if (pd->sport)
7345 pd->osport = pd->nsport = *pd->sport;
7346 if (pd->dport)
7347 pd->odport = pd->ndport = *pd->dport;
7348
7349 pd->hash = pf_pkt_hash(pd->af, pd->proto,
7350 pd->src, pd->dst, pd->osport, pd->odport);
7351
7352 return (PF_PASS);
7353 }
7354
7355 void
7356 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *st,
7357 struct pf_rule *r, struct pf_rule *a)
7358 {
7359 int dirndx;
7360 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT]
7361 [action != PF_PASS] += pd->tot_len;
7362 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT]
7363 [action != PF_PASS]++;
7364
7365 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) {
7366 dirndx = (pd->dir == PF_OUT);
7367 r->packets[dirndx]++;
7368 r->bytes[dirndx] += pd->tot_len;
7369 if (a != NULL) {
7370 a->packets[dirndx]++;
7371 a->bytes[dirndx] += pd->tot_len;
7372 }
7373 if (st != NULL) {
7374 struct pf_rule_item *ri;
7375 struct pf_sn_item *sni;
7376
7377 SLIST_FOREACH(sni, &st->src_nodes, next) {
7378 sni->sn->packets[dirndx]++;
7379 sni->sn->bytes[dirndx] += pd->tot_len;
7380 }
7381 dirndx = (pd->dir == st->direction) ? 0 : 1;
7382 st->packets[dirndx]++;
7383 st->bytes[dirndx] += pd->tot_len;
7384
7385 SLIST_FOREACH(ri, &st->match_rules, entry) {
7386 ri->r->packets[dirndx]++;
7387 ri->r->bytes[dirndx] += pd->tot_len;
7388
7389 if (ri->r->src.addr.type == PF_ADDR_TABLE)
7390 pfr_update_stats(ri->r->src.addr.p.tbl,
7391 &st->key[(st->direction == PF_IN)]->
7392 addr[(st->direction == PF_OUT)],
7393 pd, ri->r->action, ri->r->src.neg);
7394 if (ri->r->dst.addr.type == PF_ADDR_TABLE)
7395 pfr_update_stats(ri->r->dst.addr.p.tbl,
7396 &st->key[(st->direction == PF_IN)]->
7397 addr[(st->direction == PF_IN)],
7398 pd, ri->r->action, ri->r->dst.neg);
7399 }
7400 }
7401 if (r->src.addr.type == PF_ADDR_TABLE)
7402 pfr_update_stats(r->src.addr.p.tbl,
7403 (st == NULL) ? pd->src :
7404 &st->key[(st->direction == PF_IN)]->
7405 addr[(st->direction == PF_OUT)],
7406 pd, r->action, r->src.neg);
7407 if (r->dst.addr.type == PF_ADDR_TABLE)
7408 pfr_update_stats(r->dst.addr.p.tbl,
7409 (st == NULL) ? pd->dst :
7410 &st->key[(st->direction == PF_IN)]->
7411 addr[(st->direction == PF_IN)],
7412 pd, r->action, r->dst.neg);
7413 }
7414 }
7415
7416 int
7417 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
7418 {
7419 #if NCARP > 0
7420 struct ifnet *ifp0;
7421 #endif
7422 struct pfi_kif *kif;
7423 u_short action, reason = 0;
7424 struct pf_rule *a = NULL, *r = &pf_default_rule;
7425 struct pf_state *st = NULL;
7426 struct pf_state_key_cmp key;
7427 struct pf_ruleset *ruleset = NULL;
7428 struct pf_pdesc pd;
7429 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir;
7430 u_int32_t qid, pqid = 0;
7431 int have_pf_lock = 0;
7432 struct pfsync_deferral *deferral = NULL;
7433
7434 if (!pf_status.running)
7435 return (PF_PASS);
7436
7437 #if NCARP > 0
7438 if (ifp->if_type == IFT_CARP &&
7439 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) {
7440 kif = (struct pfi_kif *)ifp0->if_pf_kif;
7441 if_put(ifp0);
7442 } else
7443 #endif /* NCARP */
7444 kif = (struct pfi_kif *)ifp->if_pf_kif;
7445
7446 if (kif == NULL) {
7447 DPFPRINTF(LOG_ERR,
7448 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname);
7449 return (PF_DROP);
7450 }
7451 if (kif->pfik_flags & PFI_IFLAG_SKIP)
7452 return (PF_PASS);
7453
7454 #ifdef DIAGNOSTIC
7455 if (((*m0)->m_flags & M_PKTHDR) == 0)
7456 panic("non-M_PKTHDR is passed to pf_test");
7457 #endif /* DIAGNOSTIC */
7458
7459 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED)
7460 return (PF_PASS);
7461
7462 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) {
7463 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET;
7464 return (PF_PASS);
7465 }
7466
7467 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) {
7468 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED;
7469 return (PF_PASS);
7470 }
7471
7472 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason);
7473 if (action != PF_PASS) {
7474 #if NPFLOG > 0
7475 pd.pflog |= PF_LOG_FORCE;
7476 #endif /* NPFLOG > 0 */
7477 goto done;
7478 }
7479
7480 /* packet normalization and reassembly */
7481 switch (pd.af) {
7482 case AF_INET:
7483 action = pf_normalize_ip(&pd, &reason);
7484 break;
7485 #ifdef INET6
7486 case AF_INET6:
7487 action = pf_normalize_ip6(&pd, &reason);
7488 break;
7489 #endif /* INET6 */
7490 }
7491 *m0 = pd.m;
7492 /* if packet sits in reassembly queue, return without error */
7493 if (pd.m == NULL)
7494 return PF_PASS;
7495
7496 if (action != PF_PASS) {
7497 #if NPFLOG > 0
7498 pd.pflog |= PF_LOG_FORCE;
7499 #endif /* NPFLOG > 0 */
7500 goto done;
7501 }
7502
7503 /* if packet has been reassembled, update packet description */
7504 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) {
7505 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason);
7506 if (action != PF_PASS) {
7507 #if NPFLOG > 0
7508 pd.pflog |= PF_LOG_FORCE;
7509 #endif /* NPFLOG > 0 */
7510 goto done;
7511 }
7512 }
7513 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED;
7514
7515 /*
7516 * Avoid pcb-lookups from the forwarding path. They should never
7517 * match and would cause MP locking problems.
7518 */
7519 if (fwdir == PF_FWD) {
7520 pd.lookup.done = -1;
7521 pd.lookup.uid = -1;
7522 pd.lookup.gid = -1;
7523 pd.lookup.pid = NO_PID;
7524 }
7525
7526 switch (pd.virtual_proto) {
7527
7528 case PF_VPROTO_FRAGMENT: {
7529 /*
7530 * handle fragments that aren't reassembled by
7531 * normalization
7532 */
7533 PF_LOCK();
7534 have_pf_lock = 1;
7535 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, &reason,
7536 &deferral);
7537 st = pf_state_ref(st);
7538 if (action != PF_PASS)
7539 REASON_SET(&reason, PFRES_FRAG);
7540 break;
7541 }
7542
7543 case IPPROTO_ICMP: {
7544 if (pd.af != AF_INET) {
7545 action = PF_DROP;
7546 REASON_SET(&reason, PFRES_NORM);
7547 DPFPRINTF(LOG_NOTICE,
7548 "dropping IPv6 packet with ICMPv4 payload");
7549 break;
7550 }
7551 PF_STATE_ENTER_READ();
7552 action = pf_test_state_icmp(&pd, &st, &reason);
7553 st = pf_state_ref(st);
7554 PF_STATE_EXIT_READ();
7555 if (action == PF_PASS || action == PF_AFRT) {
7556 #if NPFSYNC > 0
7557 pfsync_update_state(st);
7558 #endif /* NPFSYNC > 0 */
7559 r = st->rule.ptr;
7560 a = st->anchor.ptr;
7561 #if NPFLOG > 0
7562 pd.pflog |= st->log;
7563 #endif /* NPFLOG > 0 */
7564 } else if (st == NULL) {
7565 PF_LOCK();
7566 have_pf_lock = 1;
7567 action = pf_test_rule(&pd, &r, &st, &a, &ruleset,
7568 &reason, &deferral);
7569 st = pf_state_ref(st);
7570 }
7571 break;
7572 }
7573
7574 #ifdef INET6
7575 case IPPROTO_ICMPV6: {
7576 if (pd.af != AF_INET6) {
7577 action = PF_DROP;
7578 REASON_SET(&reason, PFRES_NORM);
7579 DPFPRINTF(LOG_NOTICE,
7580 "dropping IPv4 packet with ICMPv6 payload");
7581 break;
7582 }
7583 PF_STATE_ENTER_READ();
7584 action = pf_test_state_icmp(&pd, &st, &reason);
7585 st = pf_state_ref(st);
7586 PF_STATE_EXIT_READ();
7587 if (action == PF_PASS || action == PF_AFRT) {
7588 #if NPFSYNC > 0
7589 pfsync_update_state(st);
7590 #endif /* NPFSYNC > 0 */
7591 r = st->rule.ptr;
7592 a = st->anchor.ptr;
7593 #if NPFLOG > 0
7594 pd.pflog |= st->log;
7595 #endif /* NPFLOG > 0 */
7596 } else if (st == NULL) {
7597 PF_LOCK();
7598 have_pf_lock = 1;
7599 action = pf_test_rule(&pd, &r, &st, &a, &ruleset,
7600 &reason, &deferral);
7601 st = pf_state_ref(st);
7602 }
7603 break;
7604 }
7605 #endif /* INET6 */
7606
7607 default:
7608 if (pd.virtual_proto == IPPROTO_TCP) {
7609 if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags &
7610 (TH_SYN|TH_ACK)) == TH_SYN &&
7611 pf_synflood_check(&pd)) {
7612 PF_LOCK();
7613 have_pf_lock = 1;
7614 pf_syncookie_send(&pd);
7615 action = PF_DROP;
7616 break;
7617 }
7618 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0)
7619 pqid = 1;
7620 action = pf_normalize_tcp(&pd);
7621 if (action == PF_DROP)
7622 break;
7623 }
7624
7625 key.af = pd.af;
7626 key.proto = pd.virtual_proto;
7627 key.rdomain = pd.rdomain;
7628 pf_addrcpy(&key.addr[pd.sidx], pd.src, key.af);
7629 pf_addrcpy(&key.addr[pd.didx], pd.dst, key.af);
7630 key.port[pd.sidx] = pd.osport;
7631 key.port[pd.didx] = pd.odport;
7632 key.hash = pd.hash;
7633
7634 PF_STATE_ENTER_READ();
7635 action = pf_find_state(&pd, &key, &st);
7636 st = pf_state_ref(st);
7637 PF_STATE_EXIT_READ();
7638
7639 /* check for syncookies if tcp ack and no active state */
7640 if (pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP &&
7641 (st == NULL || (st->src.state >= TCPS_FIN_WAIT_2 &&
7642 st->dst.state >= TCPS_FIN_WAIT_2)) &&
7643 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK &&
7644 pf_syncookie_validate(&pd)) {
7645 struct mbuf *msyn = pf_syncookie_recreate_syn(&pd);
7646 if (msyn) {
7647 action = pf_test(af, fwdir, ifp, &msyn);
7648 m_freem(msyn);
7649 if (action == PF_PASS || action == PF_AFRT) {
7650 PF_STATE_ENTER_READ();
7651 pf_state_unref(st);
7652 action = pf_find_state(&pd, &key, &st);
7653 st = pf_state_ref(st);
7654 PF_STATE_EXIT_READ();
7655 if (st == NULL)
7656 return (PF_DROP);
7657 st->src.seqhi = st->dst.seqhi =
7658 ntohl(pd.hdr.tcp.th_ack) - 1;
7659 st->src.seqlo =
7660 ntohl(pd.hdr.tcp.th_seq) - 1;
7661 pf_set_protostate(st, PF_PEER_SRC,
7662 PF_TCPS_PROXY_DST);
7663 }
7664 } else
7665 action = PF_DROP;
7666 }
7667
7668 if (action == PF_MATCH)
7669 action = pf_test_state(&pd, &st, &reason);
7670
7671 if (action == PF_PASS || action == PF_AFRT) {
7672 #if NPFSYNC > 0
7673 pfsync_update_state(st);
7674 #endif /* NPFSYNC > 0 */
7675 r = st->rule.ptr;
7676 a = st->anchor.ptr;
7677 #if NPFLOG > 0
7678 pd.pflog |= st->log;
7679 #endif /* NPFLOG > 0 */
7680 } else if (st == NULL) {
7681 PF_LOCK();
7682 have_pf_lock = 1;
7683 action = pf_test_rule(&pd, &r, &st, &a, &ruleset,
7684 &reason, &deferral);
7685 st = pf_state_ref(st);
7686 }
7687
7688 if (pd.virtual_proto == IPPROTO_TCP) {
7689 if (st) {
7690 if (st->max_mss)
7691 pf_normalize_mss(&pd, st->max_mss);
7692 } else if (r->max_mss)
7693 pf_normalize_mss(&pd, r->max_mss);
7694 }
7695
7696 break;
7697 }
7698
7699 if (have_pf_lock != 0)
7700 PF_UNLOCK();
7701
7702 /*
7703 * At the moment, we rely on NET_LOCK() to prevent removal of items
7704 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have
7705 * to be refcounted when NET_LOCK() is gone.
7706 */
7707
7708 done:
7709 if (action != PF_DROP) {
7710 if (st) {
7711 /* The non-state case is handled in pf_test_rule() */
7712 if (action == PF_PASS && pd.badopts != 0 &&
7713 !(st->state_flags & PFSTATE_ALLOWOPTS)) {
7714 action = PF_DROP;
7715 REASON_SET(&reason, PFRES_IPOPTIONS);
7716 #if NPFLOG > 0
7717 pd.pflog |= PF_LOG_FORCE;
7718 #endif /* NPFLOG > 0 */
7719 DPFPRINTF(LOG_NOTICE, "dropping packet with "
7720 "ip/ipv6 options in pf_test()");
7721 }
7722
7723 pf_scrub(pd.m, st->state_flags, pd.af, st->min_ttl,
7724 st->set_tos);
7725 pf_tag_packet(pd.m, st->tag, st->rtableid[pd.didx]);
7726 if (pqid || (pd.tos & IPTOS_LOWDELAY)) {
7727 qid = st->pqid;
7728 if (st->state_flags & PFSTATE_SETPRIO) {
7729 pd.m->m_pkthdr.pf.prio =
7730 st->set_prio[1];
7731 }
7732 } else {
7733 qid = st->qid;
7734 if (st->state_flags & PFSTATE_SETPRIO) {
7735 pd.m->m_pkthdr.pf.prio =
7736 st->set_prio[0];
7737 }
7738 }
7739 pd.m->m_pkthdr.pf.delay = st->delay;
7740 } else {
7741 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl,
7742 r->set_tos);
7743 if (pqid || (pd.tos & IPTOS_LOWDELAY)) {
7744 qid = r->pqid;
7745 if (r->scrub_flags & PFSTATE_SETPRIO)
7746 pd.m->m_pkthdr.pf.prio = r->set_prio[1];
7747 } else {
7748 qid = r->qid;
7749 if (r->scrub_flags & PFSTATE_SETPRIO)
7750 pd.m->m_pkthdr.pf.prio = r->set_prio[0];
7751 }
7752 pd.m->m_pkthdr.pf.delay = r->delay;
7753 }
7754 }
7755
7756 if (action == PF_PASS && qid)
7757 pd.m->m_pkthdr.pf.qid = qid;
7758 if (pd.dir == PF_IN && st && st->key[PF_SK_STACK])
7759 pf_mbuf_link_state_key(pd.m, st->key[PF_SK_STACK]);
7760 if (pd.dir == PF_OUT &&
7761 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk &&
7762 st && st->key[PF_SK_STACK] && !st->key[PF_SK_STACK]->sk_inp)
7763 pf_state_key_link_inpcb(st->key[PF_SK_STACK],
7764 pd.m->m_pkthdr.pf.inp);
7765
7766 if (st != NULL && !ISSET(pd.m->m_pkthdr.csum_flags, M_FLOWID)) {
7767 pd.m->m_pkthdr.ph_flowid = st->key[PF_SK_WIRE]->hash;
7768 SET(pd.m->m_pkthdr.csum_flags, M_FLOWID);
7769 }
7770
7771 /*
7772 * connections redirected to loopback should not match sockets
7773 * bound specifically to loopback due to security implications,
7774 * see in_pcblookup_listen().
7775 */
7776 if (pd.destchg)
7777 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >>
7778 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) ||
7779 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)))
7780 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
7781 /* We need to redo the route lookup on outgoing routes. */
7782 if (pd.destchg && pd.dir == PF_OUT)
7783 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE;
7784
7785 if (pd.dir == PF_IN && action == PF_PASS &&
7786 (r->divert.type == PF_DIVERT_TO ||
7787 r->divert.type == PF_DIVERT_REPLY)) {
7788 struct pf_divert *divert;
7789
7790 if ((divert = pf_get_divert(pd.m))) {
7791 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
7792 divert->addr = r->divert.addr;
7793 divert->port = r->divert.port;
7794 divert->rdomain = pd.rdomain;
7795 divert->type = r->divert.type;
7796 }
7797 }
7798
7799 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET)
7800 action = PF_DIVERT;
7801
7802 #if NPFLOG > 0
7803 if (pd.pflog) {
7804 struct pf_rule_item *ri;
7805
7806 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL)
7807 pflog_packet(&pd, reason, r, a, ruleset, NULL);
7808 if (st) {
7809 SLIST_FOREACH(ri, &st->match_rules, entry)
7810 if (ri->r->log & PF_LOG_ALL)
7811 pflog_packet(&pd, reason, ri->r, a,
7812 ruleset, NULL);
7813 }
7814 }
7815 #endif /* NPFLOG > 0 */
7816
7817 pf_counters_inc(action, &pd, st, r, a);
7818
7819 switch (action) {
7820 case PF_SYNPROXY_DROP:
7821 m_freem(pd.m);
7822 /* FALLTHROUGH */
7823 case PF_DEFER:
7824 #if NPFSYNC > 0
7825 /*
7826 * We no longer hold PF_LOCK() here, so we can dispatch
7827 * deferral if we are asked to do so.
7828 */
7829 if (deferral != NULL)
7830 pfsync_undefer(deferral, 0);
7831 #endif /* NPFSYNC > 0 */
7832 pd.m = NULL;
7833 action = PF_PASS;
7834 break;
7835 case PF_DIVERT:
7836 switch (pd.af) {
7837 case AF_INET:
7838 divert_packet(pd.m, pd.dir, r->divert.port);
7839 pd.m = NULL;
7840 break;
7841 #ifdef INET6
7842 case AF_INET6:
7843 divert6_packet(pd.m, pd.dir, r->divert.port);
7844 pd.m = NULL;
7845 break;
7846 #endif /* INET6 */
7847 }
7848 action = PF_PASS;
7849 break;
7850 #ifdef INET6
7851 case PF_AFRT:
7852 if (pf_translate_af(&pd)) {
7853 action = PF_DROP;
7854 break;
7855 }
7856 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
7857 switch (pd.naf) {
7858 case AF_INET:
7859 if (pd.dir == PF_IN) {
7860 if (ipforwarding == 0) {
7861 ipstat_inc(ips_cantforward);
7862 action = PF_DROP;
7863 break;
7864 }
7865 ip_forward(pd.m, ifp, NULL, 1);
7866 } else
7867 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0);
7868 break;
7869 case AF_INET6:
7870 if (pd.dir == PF_IN) {
7871 if (ip6_forwarding == 0) {
7872 ip6stat_inc(ip6s_cantforward);
7873 action = PF_DROP;
7874 break;
7875 }
7876 ip6_forward(pd.m, NULL, 1);
7877 } else
7878 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL);
7879 break;
7880 }
7881 if (action != PF_DROP) {
7882 pd.m = NULL;
7883 action = PF_PASS;
7884 }
7885 break;
7886 #endif /* INET6 */
7887 case PF_DROP:
7888 m_freem(pd.m);
7889 pd.m = NULL;
7890 break;
7891 default:
7892 if (st && st->rt) {
7893 switch (pd.af) {
7894 case AF_INET:
7895 pf_route(&pd, st);
7896 break;
7897 #ifdef INET6
7898 case AF_INET6:
7899 pf_route6(&pd, st);
7900 break;
7901 #endif /* INET6 */
7902 }
7903 }
7904 break;
7905 }
7906
7907 #ifdef INET6
7908 /* if reassembled packet passed, create new fragments */
7909 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD &&
7910 pd.af == AF_INET6) {
7911 struct m_tag *mtag;
7912
7913 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)))
7914 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL);
7915 }
7916 #endif /* INET6 */
7917 if (st && action != PF_DROP) {
7918 if (!st->if_index_in && dir == PF_IN)
7919 st->if_index_in = ifp->if_index;
7920 else if (!st->if_index_out && dir == PF_OUT)
7921 st->if_index_out = ifp->if_index;
7922 }
7923
7924 *m0 = pd.m;
7925
7926 pf_state_unref(st);
7927
7928 return (action);
7929 }
7930
7931 int
7932 pf_ouraddr(struct mbuf *m)
7933 {
7934 struct pf_state_key *sk;
7935
7936 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED)
7937 return (1);
7938
7939 sk = m->m_pkthdr.pf.statekey;
7940 if (sk != NULL) {
7941 if (sk->sk_inp != NULL)
7942 return (1);
7943 }
7944
7945 return (-1);
7946 }
7947
7948 /*
7949 * must be called whenever any addressing information such as
7950 * address, port, protocol has changed
7951 */
7952 void
7953 pf_pkt_addr_changed(struct mbuf *m)
7954 {
7955 pf_mbuf_unlink_state_key(m);
7956 pf_mbuf_unlink_inpcb(m);
7957 }
7958
7959 struct inpcb *
7960 pf_inp_lookup(struct mbuf *m)
7961 {
7962 struct inpcb *inp = NULL;
7963 struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
7964
7965 if (!pf_state_key_isvalid(sk))
7966 pf_mbuf_unlink_state_key(m);
7967 else
7968 inp = m->m_pkthdr.pf.statekey->sk_inp;
7969
7970 if (inp && inp->inp_pf_sk)
7971 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk);
7972
7973 in_pcbref(inp);
7974 return (inp);
7975 }
7976
7977 void
7978 pf_inp_link(struct mbuf *m, struct inpcb *inp)
7979 {
7980 struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
7981
7982 if (!pf_state_key_isvalid(sk)) {
7983 pf_mbuf_unlink_state_key(m);
7984 return;
7985 }
7986
7987 /*
7988 * we don't need to grab PF-lock here. At worst case we link inp to
7989 * state, which might be just being marked as deleted by another
7990 * thread.
7991 */
7992 if (inp && !sk->sk_inp && !inp->inp_pf_sk)
7993 pf_state_key_link_inpcb(sk, inp);
7994
7995 /* The statekey has finished finding the inp, it is no longer needed. */
7996 pf_mbuf_unlink_state_key(m);
7997 }
7998
7999 void
8000 pf_inp_unlink(struct inpcb *inp)
8001 {
8002 pf_inpcb_unlink_state_key(inp);
8003 }
8004
8005 void
8006 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev)
8007 {
8008 struct pf_state_key *old_reverse;
8009
8010 old_reverse = atomic_cas_ptr(&sk->sk_reverse, NULL, skrev);
8011 if (old_reverse != NULL)
8012 KASSERT(old_reverse == skrev);
8013 else {
8014 pf_state_key_ref(skrev);
8015
8016 /*
8017 * NOTE: if sk == skrev, then KASSERT() below holds true, we
8018 * still want to grab a reference in such case, because
8019 * pf_state_key_unlink_reverse() does not check whether keys
8020 * are identical or not.
8021 */
8022 old_reverse = atomic_cas_ptr(&skrev->sk_reverse, NULL, sk);
8023 if (old_reverse != NULL)
8024 KASSERT(old_reverse == sk);
8025
8026 pf_state_key_ref(sk);
8027 }
8028 }
8029
8030 #if NPFLOG > 0
8031 void
8032 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am,
8033 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules)
8034 {
8035 struct pf_rule_item *ri;
8036
8037 /* if this is the log(matches) rule, packet has been logged already */
8038 if (rm->log & PF_LOG_MATCHES)
8039 return;
8040
8041 SLIST_FOREACH(ri, matchrules, entry)
8042 if (ri->r->log & PF_LOG_MATCHES)
8043 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r);
8044 }
8045 #endif /* NPFLOG > 0 */
8046
8047 struct pf_state_key *
8048 pf_state_key_ref(struct pf_state_key *sk)
8049 {
8050 if (sk != NULL)
8051 PF_REF_TAKE(sk->sk_refcnt);
8052
8053 return (sk);
8054 }
8055
8056 void
8057 pf_state_key_unref(struct pf_state_key *sk)
8058 {
8059 if (PF_REF_RELE(sk->sk_refcnt)) {
8060 /* state key must be removed from tree */
8061 KASSERT(!pf_state_key_isvalid(sk));
8062 /* state key must be unlinked from reverse key */
8063 KASSERT(sk->sk_reverse == NULL);
8064 /* state key must be unlinked from socket */
8065 KASSERT(sk->sk_inp == NULL);
8066 pool_put(&pf_state_key_pl, sk);
8067 }
8068 }
8069
8070 int
8071 pf_state_key_isvalid(struct pf_state_key *sk)
8072 {
8073 return ((sk != NULL) && (sk->sk_removed == 0));
8074 }
8075
8076 void
8077 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk)
8078 {
8079 KASSERT(m->m_pkthdr.pf.statekey == NULL);
8080 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk);
8081 }
8082
8083 void
8084 pf_mbuf_unlink_state_key(struct mbuf *m)
8085 {
8086 struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
8087
8088 if (sk != NULL) {
8089 m->m_pkthdr.pf.statekey = NULL;
8090 pf_state_key_unref(sk);
8091 }
8092 }
8093
8094 void
8095 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp)
8096 {
8097 KASSERT(m->m_pkthdr.pf.inp == NULL);
8098 m->m_pkthdr.pf.inp = in_pcbref(inp);
8099 }
8100
8101 void
8102 pf_mbuf_unlink_inpcb(struct mbuf *m)
8103 {
8104 struct inpcb *inp = m->m_pkthdr.pf.inp;
8105
8106 if (inp != NULL) {
8107 m->m_pkthdr.pf.inp = NULL;
8108 in_pcbunref(inp);
8109 }
8110 }
8111
8112 void
8113 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp)
8114 {
8115 KASSERT(sk->sk_inp == NULL);
8116 sk->sk_inp = in_pcbref(inp);
8117 KASSERT(inp->inp_pf_sk == NULL);
8118 inp->inp_pf_sk = pf_state_key_ref(sk);
8119 }
8120
8121 void
8122 pf_inpcb_unlink_state_key(struct inpcb *inp)
8123 {
8124 struct pf_state_key *sk = inp->inp_pf_sk;
8125
8126 if (sk != NULL) {
8127 KASSERT(sk->sk_inp == inp);
8128 sk->sk_inp = NULL;
8129 inp->inp_pf_sk = NULL;
8130 pf_state_key_unref(sk);
8131 in_pcbunref(inp);
8132 }
8133 }
8134
8135 void
8136 pf_state_key_unlink_inpcb(struct pf_state_key *sk)
8137 {
8138 struct inpcb *inp = sk->sk_inp;
8139
8140 if (inp != NULL) {
8141 KASSERT(inp->inp_pf_sk == sk);
8142 sk->sk_inp = NULL;
8143 inp->inp_pf_sk = NULL;
8144 pf_state_key_unref(sk);
8145 in_pcbunref(inp);
8146 }
8147 }
8148
8149 void
8150 pf_state_key_unlink_reverse(struct pf_state_key *sk)
8151 {
8152 struct pf_state_key *skrev = sk->sk_reverse;
8153
8154 /* Note that sk and skrev may be equal, then we unref twice. */
8155 if (skrev != NULL) {
8156 KASSERT(skrev->sk_reverse == sk);
8157 sk->sk_reverse = NULL;
8158 skrev->sk_reverse = NULL;
8159 pf_state_key_unref(skrev);
8160 pf_state_key_unref(sk);
8161 }
8162 }
8163
8164 struct pf_state *
8165 pf_state_ref(struct pf_state *st)
8166 {
8167 if (st != NULL)
8168 PF_REF_TAKE(st->refcnt);
8169 return (st);
8170 }
8171
8172 void
8173 pf_state_unref(struct pf_state *st)
8174 {
8175 if ((st != NULL) && PF_REF_RELE(st->refcnt)) {
8176 /* never inserted or removed */
8177 #if NPFSYNC > 0
8178 KASSERT((TAILQ_NEXT(st, sync_list) == NULL) ||
8179 ((TAILQ_NEXT(st, sync_list) == _Q_INVALID) &&
8180 (st->sync_state == PFSYNC_S_NONE)));
8181 #endif /* NPFSYNC */
8182 KASSERT((TAILQ_NEXT(st, entry_list) == NULL) ||
8183 (TAILQ_NEXT(st, entry_list) == _Q_INVALID));
8184
8185 pf_state_key_unref(st->key[PF_SK_WIRE]);
8186 pf_state_key_unref(st->key[PF_SK_STACK]);
8187
8188 pool_put(&pf_state_pl, st);
8189 }
8190 }
8191
8192 int
8193 pf_delay_pkt(struct mbuf *m, u_int ifidx)
8194 {
8195 struct pf_pktdelay *pdy;
8196
8197 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) {
8198 m_freem(m);
8199 return (ENOBUFS);
8200 }
8201 pdy->ifidx = ifidx;
8202 pdy->m = m;
8203 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy);
8204 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay);
8205 m->m_pkthdr.pf.delay = 0;
8206 return (0);
8207 }
8208
8209 void
8210 pf_pktenqueue_delayed(void *arg)
8211 {
8212 struct pf_pktdelay *pdy = arg;
8213 struct ifnet *ifp;
8214
8215 ifp = if_get(pdy->ifidx);
8216 if (ifp != NULL) {
8217 if_enqueue(ifp, pdy->m);
8218 if_put(ifp);
8219 } else
8220 m_freem(pdy->m);
8221
8222 pool_put(&pf_pktdelay_pl, pdy);
8223 }
Cache object: 828c0a2ba69593011ca8315d0d2ba4f8
|