FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_icmp.c
1 /* $OpenBSD: ip_icmp.c,v 1.191 2022/05/05 13:57:40 claudio Exp $ */
2 /* $NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1988, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
33 *
34 * NRL grants permission for redistribution and use in source and binary
35 * forms, with or without modification, of the software and documentation
36 * created at NRL provided that the following conditions are met:
37 *
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgements:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * This product includes software developed at the Information
48 * Technology Division, US Naval Research Laboratory.
49 * 4. Neither the name of the NRL nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64 *
65 * The views and conclusions contained in the software and documentation
66 * are those of the authors and should not be interpreted as representing
67 * official policies, either expressed or implied, of the US Naval
68 * Research Laboratory (NRL).
69 */
70
71 #include "carp.h"
72 #include "pf.h"
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/sysctl.h>
80
81 #include <net/if.h>
82 #include <net/if_var.h>
83 #include <net/route.h>
84
85 #include <netinet/in.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_icmp.h>
90 #include <netinet/ip_var.h>
91 #include <netinet/icmp_var.h>
92
93 #if NCARP > 0
94 #include <net/if_types.h>
95 #include <netinet/ip_carp.h>
96 #endif
97
98 #if NPF > 0
99 #include <net/pfvar.h>
100 #endif
101
102 /*
103 * ICMP routines: error generation, receive packet processing, and
104 * routines to turnaround packets back to the originator, and
105 * host table maintenance routines.
106 */
107
108 #ifdef ICMPPRINTFS
109 int icmpprintfs = 0; /* Settable from ddb */
110 #endif
111
112 /* values controllable via sysctl */
113 int icmpmaskrepl = 0;
114 int icmpbmcastecho = 0;
115 int icmptstamprepl = 1;
116 int icmperrppslim = 100;
117 int icmp_rediraccept = 0;
118 int icmp_redirtimeout = 10 * 60;
119
120 static int icmperrpps_count = 0;
121 static struct timeval icmperrppslim_last;
122
123 struct rttimer_queue ip_mtudisc_timeout_q;
124 struct rttimer_queue icmp_redirect_timeout_q;
125 struct cpumem *icmpcounters;
126
127 const struct sysctl_bounded_args icmpctl_vars[] = {
128 { ICMPCTL_MASKREPL, &icmpmaskrepl, 0, 1 },
129 { ICMPCTL_BMCASTECHO, &icmpbmcastecho, 0, 1 },
130 { ICMPCTL_ERRPPSLIMIT, &icmperrppslim, -1, INT_MAX },
131 { ICMPCTL_REDIRACCEPT, &icmp_rediraccept, 0, 1 },
132 { ICMPCTL_TSTAMPREPL, &icmptstamprepl, 0, 1 },
133 };
134
135
136 void icmp_mtudisc_timeout(struct rtentry *, u_int);
137 int icmp_ratelimit(const struct in_addr *, const int, const int);
138 int icmp_input_if(struct ifnet *, struct mbuf **, int *, int, int);
139 int icmp_sysctl_icmpstat(void *, size_t *, void *);
140
141 void
142 icmp_init(void)
143 {
144 rt_timer_queue_init(&ip_mtudisc_timeout_q, ip_mtudisc_timeout,
145 &icmp_mtudisc_timeout);
146 rt_timer_queue_init(&icmp_redirect_timeout_q, icmp_redirtimeout,
147 NULL);
148 icmpcounters = counters_alloc(icps_ncounters);
149 }
150
151 struct mbuf *
152 icmp_do_error(struct mbuf *n, int type, int code, u_int32_t dest, int destmtu)
153 {
154 struct ip *oip = mtod(n, struct ip *), *nip;
155 unsigned oiplen = oip->ip_hl << 2;
156 struct icmp *icp;
157 struct mbuf *m;
158 unsigned icmplen, mblen;
159
160 #ifdef ICMPPRINTFS
161 if (icmpprintfs)
162 printf("icmp_error(%x, %d, %d)\n", oip, type, code);
163 #endif
164 if (type != ICMP_REDIRECT)
165 icmpstat_inc(icps_error);
166 /*
167 * Don't send error if not the first fragment of message.
168 * Don't error if the old packet protocol was ICMP
169 * error message, only known informational types.
170 */
171 if (oip->ip_off & htons(IP_OFFMASK))
172 goto freeit;
173 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
174 n->m_len >= oiplen + ICMP_MINLEN &&
175 !ICMP_INFOTYPE(((struct icmp *)
176 ((caddr_t)oip + oiplen))->icmp_type)) {
177 icmpstat_inc(icps_oldicmp);
178 goto freeit;
179 }
180 /* Don't send error in response to a multicast or broadcast packet */
181 if (n->m_flags & (M_BCAST|M_MCAST))
182 goto freeit;
183
184 /*
185 * First, do a rate limitation check.
186 */
187 if (icmp_ratelimit(&oip->ip_src, type, code)) {
188 icmpstat_inc(icps_toofreq);
189 goto freeit;
190 }
191
192 /*
193 * Now, formulate icmp message
194 */
195 icmplen = oiplen + min(8, ntohs(oip->ip_len));
196 /*
197 * Defend against mbuf chains shorter than oip->ip_len:
198 */
199 mblen = 0;
200 for (m = n; m && (mblen < icmplen); m = m->m_next)
201 mblen += m->m_len;
202 icmplen = min(mblen, icmplen);
203
204 /*
205 * As we are not required to return everything we have,
206 * we return whatever we can return at ease.
207 *
208 * Note that ICMP datagrams longer than 576 octets are out of spec
209 * according to RFC1812;
210 */
211
212 KASSERT(ICMP_MINLEN + sizeof (struct ip) <= MCLBYTES);
213
214 if (sizeof (struct ip) + icmplen + ICMP_MINLEN > MCLBYTES)
215 icmplen = MCLBYTES - ICMP_MINLEN - sizeof (struct ip);
216
217 m = m_gethdr(M_DONTWAIT, MT_HEADER);
218 if (m && ((sizeof (struct ip) + icmplen + ICMP_MINLEN +
219 sizeof(long) - 1) &~ (sizeof(long) - 1)) > MHLEN) {
220 MCLGET(m, M_DONTWAIT);
221 if ((m->m_flags & M_EXT) == 0) {
222 m_freem(m);
223 m = NULL;
224 }
225 }
226 if (m == NULL)
227 goto freeit;
228 /* keep in same rtable and preserve other pkthdr bits */
229 m->m_pkthdr.ph_rtableid = n->m_pkthdr.ph_rtableid;
230 m->m_pkthdr.ph_ifidx = n->m_pkthdr.ph_ifidx;
231 /* move PF_GENERATED to new packet, if existent XXX preserve more? */
232 if (n->m_pkthdr.pf.flags & PF_TAG_GENERATED)
233 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
234 m->m_pkthdr.len = m->m_len = icmplen + ICMP_MINLEN;
235 m_align(m, m->m_len);
236 icp = mtod(m, struct icmp *);
237 if ((u_int)type > ICMP_MAXTYPE)
238 panic("icmp_error");
239 icmpstat_inc(icps_outhist + type);
240 icp->icmp_type = type;
241 if (type == ICMP_REDIRECT)
242 icp->icmp_gwaddr.s_addr = dest;
243 else {
244 icp->icmp_void = 0;
245 /*
246 * The following assignments assume an overlay with the
247 * zeroed icmp_void field.
248 */
249 if (type == ICMP_PARAMPROB) {
250 icp->icmp_pptr = code;
251 code = 0;
252 } else if (type == ICMP_UNREACH &&
253 code == ICMP_UNREACH_NEEDFRAG && destmtu)
254 icp->icmp_nextmtu = htons(destmtu);
255 }
256
257 icp->icmp_code = code;
258 m_copydata(n, 0, icmplen, &icp->icmp_ip);
259
260 /*
261 * Now, copy old ip header (without options)
262 * in front of icmp message.
263 */
264 m = m_prepend(m, sizeof(struct ip), M_DONTWAIT);
265 if (m == NULL)
266 goto freeit;
267 nip = mtod(m, struct ip *);
268 /* ip_v set in ip_output */
269 nip->ip_hl = sizeof(struct ip) >> 2;
270 nip->ip_tos = 0;
271 nip->ip_len = htons(m->m_len);
272 /* ip_id set in ip_output */
273 nip->ip_off = 0;
274 /* ip_ttl set in icmp_reflect */
275 nip->ip_p = IPPROTO_ICMP;
276 nip->ip_src = oip->ip_src;
277 nip->ip_dst = oip->ip_dst;
278
279 m_freem(n);
280 return (m);
281
282 freeit:
283 m_freem(n);
284 return (NULL);
285 }
286
287 /*
288 * Generate an error packet of type error
289 * in response to bad packet ip.
290 *
291 * The ip packet inside has ip_off and ip_len in host byte order.
292 */
293 void
294 icmp_error(struct mbuf *n, int type, int code, u_int32_t dest, int destmtu)
295 {
296 struct mbuf *m;
297
298 m = icmp_do_error(n, type, code, dest, destmtu);
299 if (m != NULL)
300 if (!icmp_reflect(m, NULL, NULL))
301 icmp_send(m, NULL);
302 }
303
304 /*
305 * Process a received ICMP message.
306 */
307 int
308 icmp_input(struct mbuf **mp, int *offp, int proto, int af)
309 {
310 struct ifnet *ifp;
311
312 ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
313 if (ifp == NULL) {
314 m_freemp(mp);
315 return IPPROTO_DONE;
316 }
317
318 proto = icmp_input_if(ifp, mp, offp, proto, af);
319 if_put(ifp);
320 return proto;
321 }
322
323 int
324 icmp_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto, int af)
325 {
326 struct mbuf *m = *mp;
327 int hlen = *offp;
328 struct icmp *icp;
329 struct ip *ip = mtod(m, struct ip *);
330 struct sockaddr_in sin;
331 int icmplen, i, code;
332 struct in_ifaddr *ia;
333 void (*ctlfunc)(int, struct sockaddr *, u_int, void *);
334 struct mbuf *opts;
335
336 /*
337 * Locate icmp structure in mbuf, and check
338 * that not corrupted and of at least minimum length.
339 */
340 icmplen = ntohs(ip->ip_len) - hlen;
341 #ifdef ICMPPRINTFS
342 if (icmpprintfs) {
343 char dst[INET_ADDRSTRLEN], src[INET_ADDRSTRLEN];
344
345 inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst));
346 inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src));
347
348 printf("icmp_input from %s to %s, len %d\n", src, dst, icmplen);
349 }
350 #endif
351 if (icmplen < ICMP_MINLEN) {
352 icmpstat_inc(icps_tooshort);
353 goto freeit;
354 }
355 i = hlen + min(icmplen, ICMP_ADVLENMAX);
356 if ((m = *mp = m_pullup(m, i)) == NULL) {
357 icmpstat_inc(icps_tooshort);
358 return IPPROTO_DONE;
359 }
360 ip = mtod(m, struct ip *);
361 if (in4_cksum(m, 0, hlen, icmplen)) {
362 icmpstat_inc(icps_checksum);
363 goto freeit;
364 }
365
366 icp = (struct icmp *)(mtod(m, caddr_t) + hlen);
367 #ifdef ICMPPRINTFS
368 /*
369 * Message type specific processing.
370 */
371 if (icmpprintfs)
372 printf("icmp_input, type %d code %d\n", icp->icmp_type,
373 icp->icmp_code);
374 #endif
375 if (icp->icmp_type > ICMP_MAXTYPE)
376 goto raw;
377 #if NPF > 0
378 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
379 switch (icp->icmp_type) {
380 /*
381 * As pf_icmp_mapping() considers redirects belonging to a
382 * diverted connection, we must include it here.
383 */
384 case ICMP_REDIRECT:
385 /* FALLTHROUGH */
386 /*
387 * These ICMP types map to other connections. They must be
388 * delivered to pr_ctlinput() also for diverted connections.
389 */
390 case ICMP_UNREACH:
391 case ICMP_TIMXCEED:
392 case ICMP_PARAMPROB:
393 case ICMP_SOURCEQUENCH:
394 /*
395 * Do not use the divert-to property of the TCP or UDP
396 * rule when doing the PCB lookup for the raw socket.
397 */
398 m->m_pkthdr.pf.flags &=~ PF_TAG_DIVERTED;
399 break;
400 default:
401 goto raw;
402 }
403 }
404 #endif /* NPF */
405 icmpstat_inc(icps_inhist + icp->icmp_type);
406 code = icp->icmp_code;
407 switch (icp->icmp_type) {
408
409 case ICMP_UNREACH:
410 switch (code) {
411 case ICMP_UNREACH_NET:
412 case ICMP_UNREACH_HOST:
413 case ICMP_UNREACH_PROTOCOL:
414 case ICMP_UNREACH_PORT:
415 case ICMP_UNREACH_SRCFAIL:
416 code += PRC_UNREACH_NET;
417 break;
418
419 case ICMP_UNREACH_NEEDFRAG:
420 code = PRC_MSGSIZE;
421 break;
422
423 case ICMP_UNREACH_NET_UNKNOWN:
424 case ICMP_UNREACH_NET_PROHIB:
425 case ICMP_UNREACH_TOSNET:
426 code = PRC_UNREACH_NET;
427 break;
428
429 case ICMP_UNREACH_HOST_UNKNOWN:
430 case ICMP_UNREACH_ISOLATED:
431 case ICMP_UNREACH_HOST_PROHIB:
432 case ICMP_UNREACH_TOSHOST:
433 case ICMP_UNREACH_FILTER_PROHIB:
434 case ICMP_UNREACH_HOST_PRECEDENCE:
435 case ICMP_UNREACH_PRECEDENCE_CUTOFF:
436 code = PRC_UNREACH_HOST;
437 break;
438
439 default:
440 goto badcode;
441 }
442 goto deliver;
443
444 case ICMP_TIMXCEED:
445 if (code > 1)
446 goto badcode;
447 code += PRC_TIMXCEED_INTRANS;
448 goto deliver;
449
450 case ICMP_PARAMPROB:
451 if (code > 1)
452 goto badcode;
453 code = PRC_PARAMPROB;
454 goto deliver;
455
456 case ICMP_SOURCEQUENCH:
457 if (code)
458 goto badcode;
459 code = PRC_QUENCH;
460 deliver:
461 /*
462 * Problem with datagram; advise higher level routines.
463 */
464 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
465 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
466 icmpstat_inc(icps_badlen);
467 goto freeit;
468 }
469 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
470 goto badcode;
471 #ifdef INET6
472 /* Get more contiguous data for a v6 in v4 ICMP message. */
473 if (icp->icmp_ip.ip_p == IPPROTO_IPV6) {
474 if (icmplen < ICMP_V6ADVLENMIN ||
475 icmplen < ICMP_V6ADVLEN(icp)) {
476 icmpstat_inc(icps_badlen);
477 goto freeit;
478 }
479 }
480 #endif /* INET6 */
481 #ifdef ICMPPRINTFS
482 if (icmpprintfs)
483 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
484 #endif
485 memset(&sin, 0, sizeof(sin));
486 sin.sin_family = AF_INET;
487 sin.sin_len = sizeof(struct sockaddr_in);
488 sin.sin_addr = icp->icmp_ip.ip_dst;
489 #if NCARP > 0
490 if (carp_lsdrop(ifp, m, AF_INET, &sin.sin_addr.s_addr,
491 &ip->ip_dst.s_addr, 1))
492 goto freeit;
493 #endif
494 /*
495 * XXX if the packet contains [IPv4 AH TCP], we can't make a
496 * notification to TCP layer.
497 */
498 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
499 if (ctlfunc)
500 (*ctlfunc)(code, sintosa(&sin), m->m_pkthdr.ph_rtableid,
501 &icp->icmp_ip);
502 break;
503
504 badcode:
505 icmpstat_inc(icps_badcode);
506 break;
507
508 case ICMP_ECHO:
509 if (!icmpbmcastecho &&
510 (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
511 icmpstat_inc(icps_bmcastecho);
512 break;
513 }
514 icp->icmp_type = ICMP_ECHOREPLY;
515 goto reflect;
516
517 case ICMP_TSTAMP:
518 if (icmptstamprepl == 0)
519 break;
520
521 if (!icmpbmcastecho &&
522 (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
523 icmpstat_inc(icps_bmcastecho);
524 break;
525 }
526 if (icmplen < ICMP_TSLEN) {
527 icmpstat_inc(icps_badlen);
528 break;
529 }
530 icp->icmp_type = ICMP_TSTAMPREPLY;
531 icp->icmp_rtime = iptime();
532 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
533 goto reflect;
534
535 case ICMP_MASKREQ:
536 if (icmpmaskrepl == 0)
537 break;
538 if (icmplen < ICMP_MASKLEN) {
539 icmpstat_inc(icps_badlen);
540 break;
541 }
542 /*
543 * We are not able to respond with all ones broadcast
544 * unless we receive it over a point-to-point interface.
545 */
546 memset(&sin, 0, sizeof(sin));
547 sin.sin_family = AF_INET;
548 sin.sin_len = sizeof(struct sockaddr_in);
549 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
550 ip->ip_dst.s_addr == INADDR_ANY)
551 sin.sin_addr = ip->ip_src;
552 else
553 sin.sin_addr = ip->ip_dst;
554 if (ifp == NULL)
555 break;
556 ia = ifatoia(ifaof_ifpforaddr(sintosa(&sin), ifp));
557 if (ia == NULL)
558 break;
559 icp->icmp_type = ICMP_MASKREPLY;
560 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
561 if (ip->ip_src.s_addr == 0) {
562 if (ifp->if_flags & IFF_BROADCAST) {
563 if (ia->ia_broadaddr.sin_addr.s_addr)
564 ip->ip_src = ia->ia_broadaddr.sin_addr;
565 else
566 ip->ip_src.s_addr = INADDR_BROADCAST;
567 }
568 else if (ifp->if_flags & IFF_POINTOPOINT)
569 ip->ip_src = ia->ia_dstaddr.sin_addr;
570 }
571 reflect:
572 #if NCARP > 0
573 if (carp_lsdrop(ifp, m, AF_INET, &ip->ip_src.s_addr,
574 &ip->ip_dst.s_addr, 1))
575 goto freeit;
576 #endif
577 icmpstat_inc(icps_reflect);
578 icmpstat_inc(icps_outhist + icp->icmp_type);
579 if (!icmp_reflect(m, &opts, NULL)) {
580 icmp_send(m, opts);
581 m_free(opts);
582 }
583 return IPPROTO_DONE;
584
585 case ICMP_REDIRECT:
586 {
587 struct sockaddr_in sdst;
588 struct sockaddr_in sgw;
589 struct sockaddr_in ssrc;
590 struct rtentry *newrt = NULL;
591
592 if (icmp_rediraccept == 0 || ipforwarding == 1)
593 goto freeit;
594 if (code > 3)
595 goto badcode;
596 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
597 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
598 icmpstat_inc(icps_badlen);
599 break;
600 }
601 /*
602 * Short circuit routing redirects to force
603 * immediate change in the kernel's routing
604 * tables. The message is also handed to anyone
605 * listening on a raw socket (e.g. the routing
606 * daemon for use in updating its tables).
607 */
608 memset(&sdst, 0, sizeof(sdst));
609 memset(&sgw, 0, sizeof(sgw));
610 memset(&ssrc, 0, sizeof(ssrc));
611 sdst.sin_family = sgw.sin_family = ssrc.sin_family = AF_INET;
612 sdst.sin_len = sgw.sin_len = ssrc.sin_len = sizeof(sdst);
613 memcpy(&sdst.sin_addr, &icp->icmp_ip.ip_dst,
614 sizeof(sdst.sin_addr));
615 memcpy(&sgw.sin_addr, &icp->icmp_gwaddr,
616 sizeof(sgw.sin_addr));
617 memcpy(&ssrc.sin_addr, &ip->ip_src,
618 sizeof(ssrc.sin_addr));
619
620 #ifdef ICMPPRINTFS
621 if (icmpprintfs) {
622 char gw[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
623
624 inet_ntop(AF_INET, &icp->icmp_gwaddr, gw, sizeof(gw));
625 inet_ntop(AF_INET, &icp->icmp_ip.ip_dst,
626 dst, sizeof(dst));
627
628 printf("redirect dst %s to %s\n", dst, gw);
629 }
630 #endif
631
632 #if NCARP > 0
633 if (carp_lsdrop(ifp, m, AF_INET, &sdst.sin_addr.s_addr,
634 &ip->ip_dst.s_addr, 1))
635 goto freeit;
636 #endif
637 rtredirect(sintosa(&sdst), sintosa(&sgw),
638 sintosa(&ssrc), &newrt, m->m_pkthdr.ph_rtableid);
639 if (newrt != NULL && icmp_redirtimeout > 0) {
640 rt_timer_add(newrt, &icmp_redirect_timeout_q,
641 m->m_pkthdr.ph_rtableid);
642 }
643 rtfree(newrt);
644 pfctlinput(PRC_REDIRECT_HOST, sintosa(&sdst));
645 break;
646 }
647 /*
648 * No kernel processing for the following;
649 * just fall through to send to raw listener.
650 */
651 case ICMP_ECHOREPLY:
652 case ICMP_ROUTERADVERT:
653 case ICMP_ROUTERSOLICIT:
654 case ICMP_TSTAMPREPLY:
655 case ICMP_IREQREPLY:
656 case ICMP_MASKREPLY:
657 case ICMP_TRACEROUTE:
658 case ICMP_DATACONVERR:
659 case ICMP_MOBILE_REDIRECT:
660 case ICMP_IPV6_WHEREAREYOU:
661 case ICMP_IPV6_IAMHERE:
662 case ICMP_MOBILE_REGREQUEST:
663 case ICMP_MOBILE_REGREPLY:
664 case ICMP_PHOTURIS:
665 default:
666 break;
667 }
668
669 raw:
670 return rip_input(mp, offp, proto, af);
671
672 freeit:
673 m_freem(m);
674 return IPPROTO_DONE;
675 }
676
677 /*
678 * Reflect the ip packet back to the source
679 */
680 int
681 icmp_reflect(struct mbuf *m, struct mbuf **op, struct in_ifaddr *ia)
682 {
683 struct ip *ip = mtod(m, struct ip *);
684 struct mbuf *opts = NULL;
685 struct sockaddr_in sin;
686 struct rtentry *rt = NULL;
687 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
688 u_int rtableid;
689 u_int8_t pfflags;
690
691 if (!in_canforward(ip->ip_src) &&
692 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
693 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
694 m_freem(m); /* Bad return address */
695 return (EHOSTUNREACH);
696 }
697
698 if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) {
699 m_freem(m);
700 return (ELOOP);
701 }
702 rtableid = m->m_pkthdr.ph_rtableid;
703 pfflags = m->m_pkthdr.pf.flags;
704 m_resethdr(m);
705 m->m_pkthdr.ph_rtableid = rtableid;
706 m->m_pkthdr.pf.flags = pfflags & PF_TAG_GENERATED;
707
708 /*
709 * If the incoming packet was addressed directly to us,
710 * use dst as the src for the reply. For broadcast, use
711 * the address which corresponds to the incoming interface.
712 */
713 if (ia == NULL) {
714 memset(&sin, 0, sizeof(sin));
715 sin.sin_len = sizeof(sin);
716 sin.sin_family = AF_INET;
717 sin.sin_addr = ip->ip_dst;
718
719 rt = rtalloc(sintosa(&sin), 0, rtableid);
720 if (rtisvalid(rt) &&
721 ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST))
722 ia = ifatoia(rt->rt_ifa);
723 }
724
725 /*
726 * The following happens if the packet was not addressed to us.
727 * Use the new source address and do a route lookup. If it fails
728 * drop the packet as there is no path to the host.
729 */
730 if (ia == NULL) {
731 rtfree(rt);
732
733 memset(&sin, 0, sizeof(sin));
734 sin.sin_len = sizeof(sin);
735 sin.sin_family = AF_INET;
736 sin.sin_addr = ip->ip_src;
737
738 /* keep packet in the original virtual instance */
739 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid);
740 if (rt == NULL) {
741 ipstat_inc(ips_noroute);
742 m_freem(m);
743 return (EHOSTUNREACH);
744 }
745
746 ia = ifatoia(rt->rt_ifa);
747 }
748
749 ip->ip_dst = ip->ip_src;
750 ip->ip_ttl = MAXTTL;
751
752 /* It is safe to dereference ``ia'' iff ``rt'' is valid. */
753 ip->ip_src = ia->ia_addr.sin_addr;
754 rtfree(rt);
755
756 if (optlen > 0) {
757 u_char *cp;
758 int opt, cnt;
759 u_int len;
760
761 /*
762 * Retrieve any source routing from the incoming packet;
763 * add on any record-route or timestamp options.
764 */
765 cp = (u_char *) (ip + 1);
766 if (op && (opts = ip_srcroute(m)) == NULL &&
767 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
768 opts->m_len = sizeof(struct in_addr);
769 mtod(opts, struct in_addr *)->s_addr = 0;
770 }
771 if (op && opts) {
772 #ifdef ICMPPRINTFS
773 if (icmpprintfs)
774 printf("icmp_reflect optlen %d rt %d => ",
775 optlen, opts->m_len);
776 #endif
777 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
778 opt = cp[IPOPT_OPTVAL];
779 if (opt == IPOPT_EOL)
780 break;
781 if (opt == IPOPT_NOP)
782 len = 1;
783 else {
784 if (cnt < IPOPT_OLEN + sizeof(*cp))
785 break;
786 len = cp[IPOPT_OLEN];
787 if (len < IPOPT_OLEN + sizeof(*cp) ||
788 len > cnt)
789 break;
790 }
791 /*
792 * Should check for overflow, but it
793 * "can't happen"
794 */
795 if (opt == IPOPT_RR || opt == IPOPT_TS ||
796 opt == IPOPT_SECURITY) {
797 memcpy(mtod(opts, caddr_t) +
798 opts->m_len, cp, len);
799 opts->m_len += len;
800 }
801 }
802 /* Terminate & pad, if necessary */
803 if ((cnt = opts->m_len % 4) != 0)
804 for (; cnt < 4; cnt++) {
805 *(mtod(opts, caddr_t) + opts->m_len) =
806 IPOPT_EOL;
807 opts->m_len++;
808 }
809 #ifdef ICMPPRINTFS
810 if (icmpprintfs)
811 printf("%d\n", opts->m_len);
812 #endif
813 }
814 ip_stripoptions(m);
815 }
816 m->m_flags &= ~(M_BCAST|M_MCAST);
817 if (op)
818 *op = opts;
819
820 return (0);
821 }
822
823 /*
824 * Send an icmp packet back to the ip level
825 */
826 void
827 icmp_send(struct mbuf *m, struct mbuf *opts)
828 {
829 struct ip *ip = mtod(m, struct ip *);
830 int hlen;
831 struct icmp *icp;
832
833 hlen = ip->ip_hl << 2;
834 icp = (struct icmp *)(mtod(m, caddr_t) + hlen);
835 icp->icmp_cksum = 0;
836 m->m_pkthdr.csum_flags = M_ICMP_CSUM_OUT;
837 #ifdef ICMPPRINTFS
838 if (icmpprintfs) {
839 char dst[INET_ADDRSTRLEN], src[INET_ADDRSTRLEN];
840
841 inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst));
842 inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src));
843
844 printf("icmp_send dst %s src %s\n", dst, src);
845 }
846 #endif
847 /*
848 * ip_send() cannot handle IP options properly. So in case we have
849 * options fill out the IP header here and use ip_send_raw() instead.
850 */
851 if (opts != NULL) {
852 m = ip_insertoptions(m, opts, &hlen);
853 ip = mtod(m, struct ip *);
854 ip->ip_hl = (hlen >> 2);
855 ip->ip_v = IPVERSION;
856 ip->ip_off &= htons(IP_DF);
857 ip->ip_id = htons(ip_randomid());
858 ipstat_inc(ips_localout);
859 ip_send_raw(m);
860 } else
861 ip_send(m);
862 }
863
864 u_int32_t
865 iptime(void)
866 {
867 struct timeval atv;
868 u_long t;
869
870 microtime(&atv);
871 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
872 return (htonl(t));
873 }
874
875 int
876 icmp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
877 size_t newlen)
878 {
879 int error;
880
881 /* All sysctl names at this level are terminal. */
882 if (namelen != 1)
883 return (ENOTDIR);
884
885 switch (name[0]) {
886 case ICMPCTL_REDIRTIMEOUT:
887 NET_LOCK();
888 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
889 &icmp_redirtimeout, 0, INT_MAX);
890 rt_timer_queue_change(&icmp_redirect_timeout_q,
891 icmp_redirtimeout);
892 NET_UNLOCK();
893 break;
894
895 case ICMPCTL_STATS:
896 error = icmp_sysctl_icmpstat(oldp, oldlenp, newp);
897 break;
898
899 default:
900 NET_LOCK();
901 error = sysctl_bounded_arr(icmpctl_vars, nitems(icmpctl_vars),
902 name, namelen, oldp, oldlenp, newp, newlen);
903 NET_UNLOCK();
904 break;
905 }
906
907 return (error);
908 }
909
910 int
911 icmp_sysctl_icmpstat(void *oldp, size_t *oldlenp, void *newp)
912 {
913 uint64_t counters[icps_ncounters];
914 struct icmpstat icmpstat;
915 u_long *words = (u_long *)&icmpstat;
916 int i;
917
918 CTASSERT(sizeof(icmpstat) == (nitems(counters) * sizeof(u_long)));
919 memset(&icmpstat, 0, sizeof icmpstat);
920 counters_read(icmpcounters, counters, nitems(counters));
921
922 for (i = 0; i < nitems(counters); i++)
923 words[i] = (u_long)counters[i];
924
925 return (sysctl_rdstruct(oldp, oldlenp, newp,
926 &icmpstat, sizeof(icmpstat)));
927 }
928
929 struct rtentry *
930 icmp_mtudisc_clone(struct in_addr dst, u_int rtableid, int ipsec)
931 {
932 struct sockaddr_in sin;
933 struct rtentry *rt;
934 int error;
935
936 memset(&sin, 0, sizeof(sin));
937 sin.sin_family = AF_INET;
938 sin.sin_len = sizeof(sin);
939 sin.sin_addr = dst;
940
941 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid);
942
943 /* Check if the route is actually usable */
944 if (!rtisvalid(rt))
945 goto bad;
946 /* IPsec needs the route only for PMTU, it can use reject for that */
947 if (!ipsec && (rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)))
948 goto bad;
949
950 /*
951 * No PMTU for local routes and permanent neighbors,
952 * ARP and NDP use the same expire timer as the route.
953 */
954 if (ISSET(rt->rt_flags, RTF_LOCAL) ||
955 (ISSET(rt->rt_flags, RTF_LLINFO) && rt->rt_expire == 0))
956 goto bad;
957
958 /* If we didn't get a host route, allocate one */
959 if ((rt->rt_flags & RTF_HOST) == 0) {
960 struct rtentry *nrt;
961 struct rt_addrinfo info;
962 struct sockaddr_rtlabel sa_rl;
963
964 memset(&info, 0, sizeof(info));
965 info.rti_ifa = rt->rt_ifa;
966 info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC;
967 info.rti_info[RTAX_DST] = sintosa(&sin);
968 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
969 info.rti_info[RTAX_LABEL] =
970 rtlabel_id2sa(rt->rt_labelid, &sa_rl);
971
972 error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt,
973 rtableid);
974 if (error)
975 goto bad;
976 nrt->rt_rmx = rt->rt_rmx;
977 rtfree(rt);
978 rt = nrt;
979 rtm_send(rt, RTM_ADD, 0, rtableid);
980 }
981 error = rt_timer_add(rt, &ip_mtudisc_timeout_q, rtableid);
982 if (error)
983 goto bad;
984
985 return (rt);
986 bad:
987 rtfree(rt);
988 return (NULL);
989 }
990
991 /* Table of common MTUs: */
992 static const u_short mtu_table[] = {
993 65535, 65280, 32000, 17914, 9180, 8166,
994 4352, 2002, 1492, 1006, 508, 296, 68, 0
995 };
996
997 void
998 icmp_mtudisc(struct icmp *icp, u_int rtableid)
999 {
1000 struct rtentry *rt;
1001 struct ifnet *ifp;
1002 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
1003
1004 rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid, 0);
1005 if (rt == NULL)
1006 return;
1007
1008 ifp = if_get(rt->rt_ifidx);
1009 if (ifp == NULL) {
1010 rtfree(rt);
1011 return;
1012 }
1013
1014 if (mtu == 0) {
1015 int i = 0;
1016
1017 mtu = ntohs(icp->icmp_ip.ip_len);
1018 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
1019 if (mtu > rt->rt_mtu && rt->rt_mtu != 0)
1020 mtu -= (icp->icmp_ip.ip_hl << 2);
1021
1022 /* If we still can't guess a value, try the route */
1023 if (mtu == 0) {
1024 mtu = rt->rt_mtu;
1025
1026 /* If no route mtu, default to the interface mtu */
1027
1028 if (mtu == 0)
1029 mtu = ifp->if_mtu;
1030 }
1031
1032 for (i = 0; i < nitems(mtu_table); i++)
1033 if (mtu > mtu_table[i]) {
1034 mtu = mtu_table[i];
1035 break;
1036 }
1037 }
1038
1039 /*
1040 * XXX: RTV_MTU is overloaded, since the admin can set it
1041 * to turn off PMTU for a route, and the kernel can
1042 * set it to indicate a serious problem with PMTU
1043 * on a route. We should be using a separate flag
1044 * for the kernel to indicate this.
1045 */
1046 if ((rt->rt_locks & RTV_MTU) == 0) {
1047 if (mtu < 296 || mtu > ifp->if_mtu)
1048 rt->rt_locks |= RTV_MTU;
1049 else if (rt->rt_mtu > mtu || rt->rt_mtu == 0)
1050 rt->rt_mtu = mtu;
1051 }
1052
1053 if_put(ifp);
1054 rtfree(rt);
1055 }
1056
1057 void
1058 icmp_mtudisc_timeout(struct rtentry *rt, u_int rtableid)
1059 {
1060 struct ifnet *ifp;
1061
1062 NET_ASSERT_LOCKED();
1063
1064 ifp = if_get(rt->rt_ifidx);
1065 if (ifp == NULL)
1066 return;
1067
1068 if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
1069 void (*ctlfunc)(int, struct sockaddr *, u_int, void *);
1070 struct sockaddr_in sin;
1071
1072 sin = *satosin(rt_key(rt));
1073
1074 rtdeletemsg(rt, ifp, rtableid);
1075
1076 /* Notify TCP layer of increased Path MTU estimate */
1077 ctlfunc = inetsw[ip_protox[IPPROTO_TCP]].pr_ctlinput;
1078 if (ctlfunc)
1079 (*ctlfunc)(PRC_MTUINC, sintosa(&sin),
1080 rtableid, NULL);
1081 } else {
1082 if ((rt->rt_locks & RTV_MTU) == 0)
1083 rt->rt_mtu = 0;
1084 }
1085
1086 if_put(ifp);
1087 }
1088
1089 /*
1090 * Perform rate limit check.
1091 * Returns 0 if it is okay to send the icmp packet.
1092 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1093 * limitation.
1094 *
1095 * XXX per-destination/type check necessary?
1096 */
1097 int
1098 icmp_ratelimit(const struct in_addr *dst, const int type, const int code)
1099 {
1100 /* PPS limit */
1101 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1102 icmperrppslim))
1103 return 1; /* The packet is subject to rate limit */
1104 return 0; /* okay to send */
1105 }
1106
1107 int
1108 icmp_do_exthdr(struct mbuf *m, u_int16_t class, u_int8_t ctype, void *buf,
1109 size_t len)
1110 {
1111 struct ip *ip = mtod(m, struct ip *);
1112 int hlen, off;
1113 struct mbuf *n;
1114 struct icmp *icp;
1115 struct icmp_ext_hdr *ieh;
1116 struct {
1117 struct icmp_ext_hdr ieh;
1118 struct icmp_ext_obj_hdr ieo;
1119 } hdr;
1120
1121 hlen = ip->ip_hl << 2;
1122 icp = (struct icmp *)(mtod(m, caddr_t) + hlen);
1123 if (icp->icmp_type != ICMP_TIMXCEED && icp->icmp_type != ICMP_UNREACH &&
1124 icp->icmp_type != ICMP_PARAMPROB)
1125 /* exthdr not supported */
1126 return (0);
1127
1128 if (icp->icmp_length != 0)
1129 /* exthdr already present, giving up */
1130 return (0);
1131
1132 /* the actual offset starts after the common ICMP header */
1133 hlen += ICMP_MINLEN;
1134 /* exthdr must start on a word boundary */
1135 off = roundup(ntohs(ip->ip_len) - hlen, sizeof(u_int32_t));
1136 /* ... and at an offset of ICMP_EXT_OFFSET or bigger */
1137 off = max(off, ICMP_EXT_OFFSET);
1138 icp->icmp_length = off / sizeof(u_int32_t);
1139
1140 memset(&hdr, 0, sizeof(hdr));
1141 hdr.ieh.ieh_version = ICMP_EXT_HDR_VERSION;
1142 hdr.ieo.ieo_length = htons(sizeof(struct icmp_ext_obj_hdr) + len);
1143 hdr.ieo.ieo_cnum = class;
1144 hdr.ieo.ieo_ctype = ctype;
1145
1146 if (m_copyback(m, hlen + off, sizeof(hdr), &hdr, M_NOWAIT) ||
1147 m_copyback(m, hlen + off + sizeof(hdr), len, buf, M_NOWAIT)) {
1148 m_freem(m);
1149 return (ENOBUFS);
1150 }
1151
1152 /* calculate checksum */
1153 n = m_getptr(m, hlen + off, &off);
1154 if (n == NULL)
1155 panic("icmp_do_exthdr: m_getptr failure");
1156 ieh = (struct icmp_ext_hdr *)(mtod(n, caddr_t) + off);
1157 ieh->ieh_cksum = in4_cksum(n, 0, off, sizeof(hdr) + len);
1158
1159 ip->ip_len = htons(m->m_pkthdr.len);
1160
1161 return (0);
1162 }
Cache object: 4518751b2246f924b189b3d02f633a73
|