The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_icmp.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: ip_icmp.c,v 1.178 2022/08/29 09:14:02 knakahara Exp $  */
    2 
    3 /*
    4  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Public Access Networks Corporation ("Panix").  It was developed under
    9  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
   10  *
   11  * This code is derived from software contributed to The NetBSD Foundation
   12  * by Jason R. Thorpe of Zembu Labs, Inc.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33  * POSSIBILITY OF SUCH DAMAGE.
   34  */
   35 
   36 /*
   37  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
   38  * All rights reserved.
   39  *
   40  * Redistribution and use in source and binary forms, with or without
   41  * modification, are permitted provided that the following conditions
   42  * are met:
   43  * 1. Redistributions of source code must retain the above copyright
   44  *    notice, this list of conditions and the following disclaimer.
   45  * 2. Redistributions in binary form must reproduce the above copyright
   46  *    notice, this list of conditions and the following disclaimer in the
   47  *    documentation and/or other materials provided with the distribution.
   48  * 3. Neither the name of the project nor the names of its contributors
   49  *    may be used to endorse or promote products derived from this software
   50  *    without specific prior written permission.
   51  *
   52  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   62  * SUCH DAMAGE.
   63  */
   64 
   65 /*
   66  * Copyright (c) 1982, 1986, 1988, 1993
   67  *      The Regents of the University of California.  All rights reserved.
   68  *
   69  * Redistribution and use in source and binary forms, with or without
   70  * modification, are permitted provided that the following conditions
   71  * are met:
   72  * 1. Redistributions of source code must retain the above copyright
   73  *    notice, this list of conditions and the following disclaimer.
   74  * 2. Redistributions in binary form must reproduce the above copyright
   75  *    notice, this list of conditions and the following disclaimer in the
   76  *    documentation and/or other materials provided with the distribution.
   77  * 3. Neither the name of the University nor the names of its contributors
   78  *    may be used to endorse or promote products derived from this software
   79  *    without specific prior written permission.
   80  *
   81  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   82  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   83  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   84  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   85  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   86  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   87  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   88  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   89  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   90  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   91  * SUCH DAMAGE.
   92  *
   93  *      @(#)ip_icmp.c   8.2 (Berkeley) 1/4/94
   94  */
   95 
   96 #include <sys/cdefs.h>
   97 __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.178 2022/08/29 09:14:02 knakahara Exp $");
   98 
   99 #ifdef _KERNEL_OPT
  100 #include "opt_ipsec.h"
  101 #endif
  102 
  103 #include <sys/param.h>
  104 #include <sys/systm.h>
  105 #include <sys/mbuf.h>
  106 #include <sys/protosw.h>
  107 #include <sys/socket.h>
  108 #include <sys/socketvar.h> /* For softnet_lock */
  109 #include <sys/kmem.h>
  110 #include <sys/time.h>
  111 #include <sys/kernel.h>
  112 #include <sys/syslog.h>
  113 #include <sys/sysctl.h>
  114 
  115 #include <net/if.h>
  116 #include <net/route.h>
  117 
  118 #include <netinet/in.h>
  119 #include <netinet/in_systm.h>
  120 #include <netinet/in_var.h>
  121 #include <netinet/ip.h>
  122 #include <netinet/ip_icmp.h>
  123 #include <netinet/ip_var.h>
  124 #include <netinet/in_pcb.h>
  125 #include <netinet/in_proto.h>
  126 #include <netinet/icmp_var.h>
  127 #include <netinet/icmp_private.h>
  128 #include <netinet/wqinput.h>
  129 
  130 #ifdef IPSEC
  131 #include <netipsec/ipsec.h>
  132 #include <netipsec/key.h>
  133 #endif
  134 
  135 /*
  136  * ICMP routines: error generation, receive packet processing, and
  137  * routines to turnaround packets back to the originator, and
  138  * host table maintenance routines.
  139  */
  140 
  141 int icmpmaskrepl = 0;
  142 int icmpbmcastecho = 0;
  143 int icmpreturndatabytes = 8;
  144 
  145 percpu_t *icmpstat_percpu;
  146 
  147 /*
  148  * List of callbacks to notify when Path MTU changes are made.
  149  */
  150 struct icmp_mtudisc_callback {
  151         LIST_ENTRY(icmp_mtudisc_callback) mc_list;
  152         void (*mc_func)(struct in_addr);
  153 };
  154 
  155 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
  156     LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
  157 
  158 /* unused... */
  159 u_int ip_next_mtu(u_int, int);
  160 
  161 bool icmp_dynamic_rt_msg = false;
  162 
  163 static int icmperrppslim = 100;                 /* 100pps */
  164 static int icmperrpps_count = 0;
  165 static struct timeval icmperrppslim_last;
  166 static int icmp_rediraccept = 1;
  167 static int icmp_redirtimeout = 600;
  168 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
  169 
  170 /* Protect mtudisc and redirect stuff */
  171 static kmutex_t icmp_mtx __cacheline_aligned;
  172 
  173 static void icmp_send(struct mbuf *, struct mbuf *);
  174 static void icmp_mtudisc_timeout(struct rtentry *, struct rttimer *);
  175 static void icmp_redirect_timeout(struct rtentry *, struct rttimer *);
  176 
  177 static void sysctl_netinet_icmp_setup(struct sysctllog **);
  178 
  179 /* workqueue-based pr_input */
  180 static struct wqinput *icmp_wqinput;
  181 static void _icmp_input(struct mbuf *, int, int);
  182 
  183 void
  184 icmp_init(void)
  185 {
  186 
  187         sysctl_netinet_icmp_setup(NULL);
  188 
  189         mutex_init(&icmp_mtx, MUTEX_DEFAULT, IPL_NONE);
  190         /*
  191          * This is only useful if the user initializes redirtimeout to
  192          * something other than zero.
  193          */
  194         mutex_enter(&icmp_mtx);
  195         icmp_redirect_timeout_q = rt_timer_queue_create(icmp_redirtimeout);
  196         mutex_exit(&icmp_mtx);
  197 
  198         icmpstat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP_NSTATS);
  199         icmp_wqinput = wqinput_create("icmp", _icmp_input);
  200 }
  201 
  202 void
  203 icmp_mtudisc_lock(void)
  204 {
  205 
  206         mutex_enter(&icmp_mtx);
  207 }
  208 
  209 void
  210 icmp_mtudisc_unlock(void)
  211 {
  212 
  213         mutex_exit(&icmp_mtx);
  214 }
  215 
  216 /*
  217  * Register a Path MTU Discovery callback.
  218  */
  219 void
  220 icmp_mtudisc_callback_register(void (*func)(struct in_addr))
  221 {
  222         struct icmp_mtudisc_callback *mc, *new;
  223 
  224         new = kmem_alloc(sizeof(*mc), KM_SLEEP);
  225 
  226         mutex_enter(&icmp_mtx);
  227         for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
  228              mc = LIST_NEXT(mc, mc_list)) {
  229                 if (mc->mc_func == func) {
  230                         mutex_exit(&icmp_mtx);
  231                         kmem_free(new, sizeof(*mc));
  232                         return;
  233                 }
  234         }
  235 
  236         new->mc_func = func;
  237         LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, new, mc_list);
  238         mutex_exit(&icmp_mtx);
  239 }
  240 
  241 /*
  242  * Generate an error packet of type error in response to a bad IP packet. 'n'
  243  * contains this packet. We create 'm' and send it.
  244  * 
  245  * As we are not required to return everything we have, we return whatever
  246  * we can return at ease.
  247  *
  248  * Note that ICMP datagrams longer than 576 octets are out of spec according
  249  * to RFC1812; the limit on icmpreturndatabytes will keep things below that
  250  * limit.
  251  */
  252 void
  253 icmp_error(struct mbuf *n, int type, int code, n_long dest, int destmtu)
  254 {
  255         struct ip *oip = mtod(n, struct ip *), *nip;
  256         const unsigned oiphlen = oip->ip_hl << 2;
  257         struct icmp *icp;
  258         struct mbuf *m;
  259         struct m_tag *mtag;
  260         unsigned datalen, mblen;
  261         int totlen;
  262 
  263         if (type != ICMP_REDIRECT)
  264                 ICMP_STATINC(ICMP_STAT_ERROR);
  265 
  266         /*
  267          * Don't send error if:
  268          *  - The original packet was encrypted.
  269          *  - The packet is multicast or broadcast.
  270          *  - The packet is not the first fragment of the message.
  271          *  - The packet is an ICMP message with an unknown type.
  272          */
  273         if (n->m_flags & M_DECRYPTED)
  274                 goto freeit;
  275         if (n->m_flags & (M_BCAST|M_MCAST))
  276                 goto freeit;
  277         if (oip->ip_off &~ htons(IP_MF|IP_DF))
  278                 goto freeit;
  279         if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
  280             n->m_len >= oiphlen + ICMP_MINLEN) {
  281                 struct icmp *oicp = (struct icmp *)((char *)oip + oiphlen);
  282                 if (!ICMP_INFOTYPE(oicp->icmp_type)) {
  283                         ICMP_STATINC(ICMP_STAT_OLDICMP);
  284                         goto freeit;
  285                 }
  286         }
  287 
  288         /*
  289          * First, do a rate limitation check.
  290          */
  291         if (icmp_ratelimit(&oip->ip_src, type, code)) {
  292                 /* XXX stat */
  293                 goto freeit;
  294         }
  295 
  296         /*
  297          * Compute the number of bytes we will put in 'icmp_ip'. Truncate
  298          * it to the size of the mbuf, if it's too big.
  299          */
  300         datalen = oiphlen + uimin(icmpreturndatabytes,
  301             ntohs(oip->ip_len) - oiphlen);
  302         mblen = 0;
  303         for (m = n; m && (mblen < datalen); m = m->m_next)
  304                 mblen += m->m_len;
  305         datalen = uimin(mblen, datalen);
  306 
  307         /*
  308          * Compute the total length of the new packet. Truncate it if it's
  309          * bigger than the size of a cluster.
  310          */
  311         CTASSERT(ICMP_MINLEN + sizeof(struct ip) <= MCLBYTES);
  312         totlen = sizeof(struct ip) + ICMP_MINLEN + datalen;
  313         if (totlen > MCLBYTES) {
  314                 datalen = MCLBYTES - ICMP_MINLEN - sizeof(struct ip);
  315                 totlen = MCLBYTES;
  316         }
  317 
  318         /*
  319          * Allocate the mbuf for the new packet.
  320          */
  321         m = m_gethdr(M_DONTWAIT, MT_HEADER);
  322         if (m && (totlen > MHLEN)) {
  323                 MCLGET(m, M_DONTWAIT);
  324                 if ((m->m_flags & M_EXT) == 0) {
  325                         m_freem(m);
  326                         m = NULL;
  327                 }
  328         }
  329         if (m == NULL)
  330                 goto freeit;
  331         MCLAIM(m, n->m_owner);
  332         m->m_len = totlen;
  333         m->m_pkthdr.len = m->m_len;
  334         m_copy_rcvif(m, n);
  335 
  336         if ((u_int)type > ICMP_MAXTYPE)
  337                 panic("icmp_error");
  338         ICMP_STATINC(ICMP_STAT_OUTHIST + type);
  339 
  340         if ((m->m_flags & M_EXT) == 0)
  341                 m_align(m, m->m_len);
  342 
  343         /*
  344          * Get pointers on the IP header and the ICMP header.
  345          */
  346         nip = mtod(m, struct ip *);
  347         icp = (struct icmp *)(nip + 1);
  348 
  349         /*
  350          * Fill in the fields of the ICMP header: icmp_type, icmp_code
  351          * and icmp_ip. icmp_cksum gets filled later.
  352          */
  353         icp->icmp_type = type;
  354         if (type == ICMP_REDIRECT) {
  355                 icp->icmp_gwaddr.s_addr = dest;
  356         } else {
  357                 icp->icmp_void = 0;
  358                 /*
  359                  * The following assignments assume an overlay with the
  360                  * zeroed icmp_void field.
  361                  */
  362                 if (type == ICMP_PARAMPROB) {
  363                         icp->icmp_pptr = code;
  364                         code = 0;
  365                 } else if (type == ICMP_UNREACH &&
  366                     code == ICMP_UNREACH_NEEDFRAG && destmtu)
  367                         icp->icmp_nextmtu = htons(destmtu);
  368         }
  369         icp->icmp_code = code;
  370         m_copydata(n, 0, datalen, (void *)&icp->icmp_ip);
  371 
  372         /*
  373          * Now, copy the old IP header (without options) in front of the
  374          * ICMP message. The src/dst fields will be swapped in icmp_reflect.
  375          */
  376         /* ip_v set in ip_output */
  377         nip->ip_hl = sizeof(struct ip) >> 2;
  378         nip->ip_tos = 0;
  379         nip->ip_len = htons(m->m_len);
  380         /* ip_id set in ip_output */
  381         nip->ip_off = htons(0);
  382         /* ip_ttl set in icmp_reflect */
  383         nip->ip_p = IPPROTO_ICMP;
  384         nip->ip_src = oip->ip_src;
  385         nip->ip_dst = oip->ip_dst;
  386         /* move PF m_tag to new packet, if it exists */
  387         mtag = m_tag_find(n, PACKET_TAG_PF);
  388         if (mtag != NULL) {
  389                 m_tag_unlink(n, mtag);
  390                 m_tag_prepend(m, mtag);
  391         }
  392 
  393         icmp_reflect(m);
  394 
  395 freeit:
  396         m_freem(n);
  397 }
  398 
  399 struct sockaddr_in icmpsrc = {
  400         .sin_len = sizeof(struct sockaddr_in),
  401         .sin_family = AF_INET,
  402 };
  403 
  404 /*
  405  * Process a received ICMP message.
  406  */
  407 static void
  408 _icmp_input(struct mbuf *m, int hlen, int proto)
  409 {
  410         struct icmp *icp;
  411         struct ip *ip = mtod(m, struct ip *);
  412         int icmplen;
  413         int i;
  414         struct in_ifaddr *ia;
  415         void *(*ctlfunc)(int, const struct sockaddr *, void *);
  416         int code;
  417         struct rtentry *rt;
  418         struct sockaddr_in icmpdst = {
  419                 .sin_len = sizeof(struct sockaddr_in),
  420                 .sin_family = AF_INET,
  421         };
  422         struct sockaddr_in icmpgw = {
  423                 .sin_len = sizeof(struct sockaddr_in),
  424                 .sin_family = AF_INET,
  425         };
  426 
  427         /*
  428          * Locate icmp structure in mbuf, and check
  429          * that not corrupted and of at least minimum length.
  430          */
  431         icmplen = ntohs(ip->ip_len) - hlen;
  432         if (icmplen < ICMP_MINLEN) {
  433                 ICMP_STATINC(ICMP_STAT_TOOSHORT);
  434                 goto freeit;
  435         }
  436         i = hlen + uimin(icmplen, ICMP_ADVLENMIN);
  437         if (M_UNWRITABLE(m, i) && (m = m_pullup(m, i)) == NULL) {
  438                 ICMP_STATINC(ICMP_STAT_TOOSHORT);
  439                 return;
  440         }
  441         ip = mtod(m, struct ip *);
  442         m->m_len -= hlen;
  443         m->m_data += hlen;
  444         icp = mtod(m, struct icmp *);
  445         /* Don't need to assert alignment, here. */
  446         if (in_cksum(m, icmplen)) {
  447                 ICMP_STATINC(ICMP_STAT_CHECKSUM);
  448                 goto freeit;
  449         }
  450         m->m_len += hlen;
  451         m->m_data -= hlen;
  452 
  453         if (icp->icmp_type > ICMP_MAXTYPE)
  454                 goto raw;
  455         ICMP_STATINC(ICMP_STAT_INHIST + icp->icmp_type);
  456         code = icp->icmp_code;
  457 
  458         switch (icp->icmp_type) {
  459         case ICMP_UNREACH:
  460                 switch (code) {
  461                 case ICMP_UNREACH_PROTOCOL:
  462                         code = PRC_UNREACH_PROTOCOL;
  463                         break;
  464 
  465                 case ICMP_UNREACH_PORT:
  466                         code = PRC_UNREACH_PORT;
  467                         break;
  468 
  469                 case ICMP_UNREACH_SRCFAIL:
  470                         code = PRC_UNREACH_SRCFAIL;
  471                         break;
  472 
  473                 case ICMP_UNREACH_NEEDFRAG:
  474                         code = PRC_MSGSIZE;
  475                         break;
  476 
  477                 case ICMP_UNREACH_NET:
  478                 case ICMP_UNREACH_NET_UNKNOWN:
  479                 case ICMP_UNREACH_NET_PROHIB:
  480                 case ICMP_UNREACH_TOSNET:
  481                         code = PRC_UNREACH_NET;
  482                         break;
  483 
  484                 case ICMP_UNREACH_HOST:
  485                 case ICMP_UNREACH_HOST_UNKNOWN:
  486                 case ICMP_UNREACH_ISOLATED:
  487                 case ICMP_UNREACH_HOST_PROHIB:
  488                 case ICMP_UNREACH_TOSHOST:
  489                 case ICMP_UNREACH_ADMIN_PROHIBIT:
  490                 case ICMP_UNREACH_HOST_PREC:
  491                 case ICMP_UNREACH_PREC_CUTOFF:
  492                         code = PRC_UNREACH_HOST;
  493                         break;
  494 
  495                 default:
  496                         goto badcode;
  497                 }
  498                 goto deliver;
  499 
  500         case ICMP_TIMXCEED:
  501                 if (code > 1)
  502                         goto badcode;
  503                 code += PRC_TIMXCEED_INTRANS;
  504                 goto deliver;
  505 
  506         case ICMP_PARAMPROB:
  507                 if (code > 1)
  508                         goto badcode;
  509                 code = PRC_PARAMPROB;
  510                 goto deliver;
  511 
  512         case ICMP_SOURCEQUENCH:
  513                 if (code)
  514                         goto badcode;
  515                 code = PRC_QUENCH;
  516                 goto deliver;
  517 
  518         deliver:
  519                 /*
  520                  * Problem with datagram; advise higher level routines.
  521                  */
  522                 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
  523                     icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
  524                         ICMP_STATINC(ICMP_STAT_BADLEN);
  525                         goto freeit;
  526                 }
  527                 if (m->m_len < hlen + ICMP_ADVLEN(icp)) {
  528                         m = m_pullup(m, hlen + ICMP_ADVLEN(icp));
  529                         if (m == NULL)
  530                                 goto freeit;
  531                 }
  532                 ip = mtod(m, struct ip *);
  533                 icp = (struct icmp *)(mtod(m, uint8_t *) + hlen);
  534 
  535                 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
  536                         goto badcode;
  537 
  538                 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
  539                 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
  540                 if (ctlfunc)
  541                         (void) (*ctlfunc)(code, sintosa(&icmpsrc),
  542                             &icp->icmp_ip);
  543                 break;
  544 
  545         badcode:
  546                 ICMP_STATINC(ICMP_STAT_BADCODE);
  547                 break;
  548 
  549         case ICMP_ECHO:
  550                 if (!icmpbmcastecho &&
  551                     (m->m_flags & (M_MCAST | M_BCAST)) != 0)  {
  552                         ICMP_STATINC(ICMP_STAT_BMCASTECHO);
  553                         break;
  554                 }
  555                 icp->icmp_type = ICMP_ECHOREPLY;
  556                 goto reflect;
  557 
  558         case ICMP_TSTAMP:
  559                 if (icmplen < ICMP_TSLEN) {
  560                         ICMP_STATINC(ICMP_STAT_BADLEN);
  561                         break;
  562                 }
  563                 if (!icmpbmcastecho &&
  564                     (m->m_flags & (M_MCAST | M_BCAST)) != 0)  {
  565                         ICMP_STATINC(ICMP_STAT_BMCASTTSTAMP);
  566                         break;
  567                 }
  568                 icp->icmp_type = ICMP_TSTAMPREPLY;
  569                 icp->icmp_rtime = iptime();
  570                 icp->icmp_ttime = icp->icmp_rtime;      /* bogus, do later! */
  571                 goto reflect;
  572 
  573         case ICMP_MASKREQ: {
  574                 struct ifnet *rcvif;
  575                 int s, ss;
  576                 struct ifaddr *ifa = NULL;
  577 
  578                 if (icmpmaskrepl == 0)
  579                         break;
  580                 /*
  581                  * We are not able to respond with all ones broadcast
  582                  * unless we receive it over a point-to-point interface.
  583                  */
  584                 if (icmplen < ICMP_MASKLEN) {
  585                         ICMP_STATINC(ICMP_STAT_BADLEN);
  586                         break;
  587                 }
  588                 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
  589                     in_nullhost(ip->ip_dst))
  590                         icmpdst.sin_addr = ip->ip_src;
  591                 else
  592                         icmpdst.sin_addr = ip->ip_dst;
  593                 ss = pserialize_read_enter();
  594                 rcvif = m_get_rcvif(m, &s);
  595                 if (__predict_true(rcvif != NULL))
  596                         ifa = ifaof_ifpforaddr(sintosa(&icmpdst), rcvif);
  597                 m_put_rcvif(rcvif, &s);
  598                 if (ifa == NULL) {
  599                         pserialize_read_exit(ss);
  600                         break;
  601                 }
  602                 ia = ifatoia(ifa);
  603                 icp->icmp_type = ICMP_MASKREPLY;
  604                 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
  605                 if (in_nullhost(ip->ip_src)) {
  606                         if (ia->ia_ifp->if_flags & IFF_BROADCAST)
  607                                 ip->ip_src = ia->ia_broadaddr.sin_addr;
  608                         else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
  609                                 ip->ip_src = ia->ia_dstaddr.sin_addr;
  610                 }
  611                 pserialize_read_exit(ss);
  612 reflect:
  613                 {
  614                         uint64_t *icps = percpu_getref(icmpstat_percpu);
  615                         icps[ICMP_STAT_REFLECT]++;
  616                         icps[ICMP_STAT_OUTHIST + icp->icmp_type]++;
  617                         percpu_putref(icmpstat_percpu);
  618                 }
  619                 icmp_reflect(m);
  620                 return;
  621         }
  622 
  623         case ICMP_REDIRECT:
  624                 if (code > 3)
  625                         goto badcode;
  626                 if (icmp_rediraccept == 0)
  627                         goto freeit;
  628                 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
  629                     icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
  630                         ICMP_STATINC(ICMP_STAT_BADLEN);
  631                         break;
  632                 }
  633                 /*
  634                  * Short circuit routing redirects to force
  635                  * immediate change in the kernel's routing
  636                  * tables.  The message is also handed to anyone
  637                  * listening on a raw socket (e.g. the routing
  638                  * daemon for use in updating its tables).
  639                  */
  640                 icmpgw.sin_addr = ip->ip_src;
  641                 icmpdst.sin_addr = icp->icmp_gwaddr;
  642                 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
  643                 rt = NULL;
  644                 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
  645                     NULL, RTF_GATEWAY | RTF_HOST, sintosa(&icmpgw), &rt);
  646                 mutex_enter(&icmp_mtx);
  647                 if (rt != NULL && icmp_redirtimeout != 0) {
  648                         i = rt_timer_add(rt, icmp_redirect_timeout,
  649                                          icmp_redirect_timeout_q);
  650                         if (i) {
  651                                 char buf[INET_ADDRSTRLEN];
  652                                 log(LOG_ERR, "ICMP:  redirect failed to "
  653                                     "register timeout for route to %s, "
  654                                     "code %d\n",
  655                                     IN_PRINT(buf, &icp->icmp_ip.ip_dst), i);
  656                         }
  657                 }
  658                 mutex_exit(&icmp_mtx);
  659                 if (rt != NULL)
  660                         rt_unref(rt);
  661 
  662                 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
  663 #if defined(IPSEC)
  664                 if (ipsec_used)
  665                         key_sa_routechange((struct sockaddr *)&icmpsrc);
  666 #endif
  667                 break;
  668 
  669         /*
  670          * No kernel processing for the following;
  671          * just fall through to send to raw listener.
  672          */
  673         case ICMP_ECHOREPLY:
  674         case ICMP_ROUTERADVERT:
  675         case ICMP_ROUTERSOLICIT:
  676         case ICMP_TSTAMPREPLY:
  677         case ICMP_IREQREPLY:
  678         case ICMP_MASKREPLY:
  679         default:
  680                 break;
  681         }
  682 
  683 raw:
  684         /*
  685          * Currently, pim_input() is always called holding softnet_lock
  686          * by ipintr()(!NET_MPSAFE) or PR_INPUT_WRAP()(NET_MPSAFE).
  687          */
  688         KASSERT(mutex_owned(softnet_lock));
  689         rip_input(m, hlen, proto);
  690         return;
  691 
  692 freeit:
  693         m_freem(m);
  694         return;
  695 }
  696 
  697 void
  698 icmp_input(struct mbuf *m, int off, int proto)
  699 {
  700         wqinput_input(icmp_wqinput, m, off, proto);
  701 }
  702 
  703 /*
  704  * Reflect the ip packet back to the source
  705  */
  706 void
  707 icmp_reflect(struct mbuf *m)
  708 {
  709         struct ip *ip = mtod(m, struct ip *);
  710         struct in_ifaddr *ia;
  711         struct ifaddr *ifa;
  712         struct sockaddr_in *sin;
  713         struct in_addr t;
  714         struct mbuf *opts = NULL;
  715         int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
  716         struct ifnet *rcvif;
  717         struct psref psref, psref_ia;
  718         int s;
  719         int bound;
  720 
  721         bound = curlwp_bind();
  722 
  723         if (!in_canforward(ip->ip_src) &&
  724             ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
  725              htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
  726                 m_freem(m);     /* Bad return address */
  727                 goto done;      /* ip_output() will check for broadcast */
  728         }
  729         t = ip->ip_dst;
  730         ip->ip_dst = ip->ip_src;
  731 
  732         /*
  733          * If the incoming packet was addressed directly to us, use
  734          * dst as the src for the reply.  Otherwise (broadcast or
  735          * anonymous), use an address which corresponds to the
  736          * incoming interface, with a preference for the address which
  737          * corresponds to the route to the destination of the ICMP.
  738          */
  739 
  740         /* Look for packet addressed to us */
  741         ia = in_get_ia_psref(t, &psref_ia);
  742         if (ia && (ia->ia4_flags & IN_IFF_NOTREADY)) {
  743                 ia4_release(ia, &psref_ia);
  744                 ia = NULL;
  745         }
  746 
  747         rcvif = m_get_rcvif_psref(m, &psref);
  748 
  749         /* look for packet sent to broadcast address */
  750         if (ia == NULL && rcvif &&
  751             (rcvif->if_flags & IFF_BROADCAST)) {
  752                 s = pserialize_read_enter();
  753                 IFADDR_READER_FOREACH(ifa, rcvif) {
  754                         if (ifa->ifa_addr->sa_family != AF_INET)
  755                                 continue;
  756                         if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
  757                                 ia = ifatoia(ifa);
  758                                 if ((ia->ia4_flags & IN_IFF_NOTREADY) == 0)
  759                                         break;
  760                                 ia = NULL;
  761                         }
  762                 }
  763                 if (ia != NULL)
  764                         ia4_acquire(ia, &psref_ia);
  765                 pserialize_read_exit(s);
  766         }
  767 
  768         sin = ia ? &ia->ia_addr : NULL;
  769 
  770         /*
  771          * if the packet is addressed somewhere else, compute the
  772          * source address for packets routed back to the source, and
  773          * use that, if it's an address on the interface which
  774          * received the packet
  775          */
  776         if (sin == NULL && rcvif) {
  777                 struct sockaddr_in sin_dst;
  778                 struct route icmproute;
  779                 int errornum;
  780 
  781                 sockaddr_in_init(&sin_dst, &ip->ip_dst, 0);
  782                 memset(&icmproute, 0, sizeof(icmproute));
  783                 errornum = 0;
  784                 ia = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum,
  785                     &psref_ia);
  786                 /* errornum is never used */
  787                 rtcache_free(&icmproute);
  788                 /* check to make sure sin is a source address on rcvif */
  789                 if (ia != NULL) {
  790                         sin = &ia->ia_addr;
  791                         t = sin->sin_addr;
  792                         sin = NULL;
  793                         ia4_release(ia, &psref_ia);
  794                         ia = in_get_ia_on_iface_psref(t, rcvif, &psref_ia);
  795                         if (ia != NULL)
  796                                 sin = &ia->ia_addr;
  797                 }
  798         }
  799 
  800         /*
  801          * if it was not addressed to us, but the route doesn't go out
  802          * the source interface, pick an address on the source
  803          * interface.  This can happen when routing is asymmetric, or
  804          * when the incoming packet was encapsulated
  805          */
  806         if (sin == NULL && rcvif) {
  807                 KASSERT(ia == NULL);
  808                 s = pserialize_read_enter();
  809                 IFADDR_READER_FOREACH(ifa, rcvif) {
  810                         if (ifa->ifa_addr->sa_family != AF_INET)
  811                                 continue;
  812                         sin = &(ifatoia(ifa)->ia_addr);
  813                         ia = ifatoia(ifa);
  814                         ia4_acquire(ia, &psref_ia);
  815                         break;
  816                 }
  817                 pserialize_read_exit(s);
  818         }
  819 
  820         m_put_rcvif_psref(rcvif, &psref);
  821 
  822         /*
  823          * The following happens if the packet was not addressed to us,
  824          * and was received on an interface with no IP address:
  825          * We find the first AF_INET address on the first non-loopback
  826          * interface.
  827          */
  828         if (sin == NULL) {
  829                 KASSERT(ia == NULL);
  830                 s = pserialize_read_enter();
  831                 IN_ADDRLIST_READER_FOREACH(ia) {
  832                         if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
  833                                 continue;
  834                         sin = &ia->ia_addr;
  835                         ia4_acquire(ia, &psref_ia);
  836                         break;
  837                 }
  838                 pserialize_read_exit(s);
  839         }
  840 
  841         /*
  842          * If we still didn't find an address, punt.  We could have an
  843          * interface up (and receiving packets) with no address.
  844          */
  845         if (sin == NULL) {
  846                 KASSERT(ia == NULL);
  847                 m_freem(m);
  848                 goto done;
  849         }
  850 
  851         ip->ip_src = sin->sin_addr;
  852         ip->ip_ttl = MAXTTL;
  853 
  854         if (ia != NULL)
  855                 ia4_release(ia, &psref_ia);
  856 
  857         if (optlen > 0) {
  858                 u_char *cp;
  859                 int opt, cnt;
  860                 u_int len;
  861 
  862                 /*
  863                  * Retrieve any source routing from the incoming packet;
  864                  * add on any record-route or timestamp options.
  865                  */
  866                 cp = (u_char *)(ip + 1);
  867                 if ((opts = ip_srcroute(m)) == NULL &&
  868                     (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
  869                         MCLAIM(opts, m->m_owner);
  870                         opts->m_len = sizeof(struct in_addr);
  871                         *mtod(opts, struct in_addr *) = zeroin_addr;
  872                 }
  873 
  874                 if (opts) {
  875                         for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
  876                                 opt = cp[IPOPT_OPTVAL];
  877                                 if (opt == IPOPT_EOL)
  878                                         break;
  879                                 if (opt == IPOPT_NOP)
  880                                         len = 1;
  881                                 else {
  882                                         if (cnt < IPOPT_OLEN + sizeof(*cp))
  883                                                 break;
  884                                         len = cp[IPOPT_OLEN];
  885                                         if (len < IPOPT_OLEN + sizeof(*cp) ||
  886                                             len > cnt)
  887                                                 break;
  888                                 }
  889 
  890                                 /* Overflows can't happen */
  891                                 KASSERT(opts->m_len + len <= MHLEN);
  892 
  893                                 if (opt == IPOPT_RR || opt == IPOPT_TS ||
  894                                     opt == IPOPT_SECURITY) {
  895                                         memmove(mtod(opts, char *) +
  896                                             opts->m_len, cp, len);
  897                                         opts->m_len += len;
  898                                 }
  899                         }
  900 
  901                         /* Terminate & pad, if necessary */
  902                         if ((cnt = opts->m_len % 4) != 0) {
  903                                 for (; cnt < 4; cnt++) {
  904                                         *(mtod(opts, char *) + opts->m_len) =
  905                                             IPOPT_EOL;
  906                                         opts->m_len++;
  907                                 }
  908                         }
  909                 }
  910 
  911                 /*
  912                  * Now strip out original options by copying rest of first
  913                  * mbuf's data back, and adjust the IP length.
  914                  */
  915                 ip->ip_len = htons(ntohs(ip->ip_len) - optlen);
  916                 ip->ip_hl = sizeof(struct ip) >> 2;
  917                 m->m_len -= optlen;
  918                 if (m->m_flags & M_PKTHDR)
  919                         m->m_pkthdr.len -= optlen;
  920                 optlen += sizeof(struct ip);
  921                 memmove(ip + 1, (char *)ip + optlen,
  922                     (unsigned)(m->m_len - sizeof(struct ip)));
  923         }
  924         m_tag_delete_chain(m);
  925         m->m_flags &= ~(M_BCAST|M_MCAST);
  926 
  927         /*
  928          * Clear any in-bound checksum flags for this packet.
  929          */
  930         if (m->m_flags & M_PKTHDR)
  931                 m->m_pkthdr.csum_flags = 0;
  932 
  933         icmp_send(m, opts);
  934 done:
  935         curlwp_bindx(bound);
  936         if (opts)
  937                 (void)m_free(opts);
  938 }
  939 
  940 /*
  941  * Send an icmp packet back to the ip level,
  942  * after supplying a checksum.
  943  */
  944 static void
  945 icmp_send(struct mbuf *m, struct mbuf *opts)
  946 {
  947         struct ip *ip = mtod(m, struct ip *);
  948         int hlen;
  949         struct icmp *icp;
  950 
  951         hlen = ip->ip_hl << 2;
  952         m->m_data += hlen;
  953         m->m_len -= hlen;
  954         icp = mtod(m, struct icmp *);
  955         icp->icmp_cksum = 0;
  956         icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
  957         m->m_data -= hlen;
  958         m->m_len += hlen;
  959 
  960         (void)ip_output(m, opts, NULL, 0, NULL, NULL);
  961 }
  962 
  963 n_time
  964 iptime(void)
  965 {
  966         struct timeval atv;
  967         u_long t;
  968 
  969         microtime(&atv);
  970         t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
  971         return (htonl(t));
  972 }
  973 
  974 /*
  975  * sysctl helper routine for net.inet.icmp.returndatabytes.  ensures
  976  * that the new value is in the correct range.
  977  */
  978 static int
  979 sysctl_net_inet_icmp_returndatabytes(SYSCTLFN_ARGS)
  980 {
  981         int error, t;
  982         struct sysctlnode node;
  983 
  984         node = *rnode;
  985         node.sysctl_data = &t;
  986         t = icmpreturndatabytes;
  987         error = sysctl_lookup(SYSCTLFN_CALL(&node));
  988         if (error || newp == NULL)
  989                 return error;
  990 
  991         if (t < 8 || t > 512)
  992                 return EINVAL;
  993         icmpreturndatabytes = t;
  994 
  995         return 0;
  996 }
  997 
  998 /*
  999  * sysctl helper routine for net.inet.icmp.redirtimeout.  ensures that
 1000  * the given value is not less than zero and then resets the timeout
 1001  * queue.
 1002  */
 1003 static int
 1004 sysctl_net_inet_icmp_redirtimeout(SYSCTLFN_ARGS)
 1005 {
 1006         int error, tmp;
 1007         struct sysctlnode node;
 1008 
 1009         mutex_enter(&icmp_mtx);
 1010 
 1011         node = *rnode;
 1012         node.sysctl_data = &tmp;
 1013         tmp = icmp_redirtimeout;
 1014         error = sysctl_lookup(SYSCTLFN_CALL(&node));
 1015         if (error || newp == NULL)
 1016                 goto out;
 1017         if (tmp < 0) {
 1018                 error = EINVAL;
 1019                 goto out;
 1020         }
 1021         icmp_redirtimeout = tmp;
 1022 
 1023         /*
 1024          * was it a *defined* side-effect that anyone even *reading*
 1025          * this value causes these things to happen?
 1026          */
 1027         if (icmp_redirect_timeout_q != NULL) {
 1028                 if (icmp_redirtimeout == 0) {
 1029                         rt_timer_queue_destroy(icmp_redirect_timeout_q);
 1030                         icmp_redirect_timeout_q = NULL;
 1031                 } else {
 1032                         rt_timer_queue_change(icmp_redirect_timeout_q,
 1033                             icmp_redirtimeout);
 1034                 }
 1035         } else if (icmp_redirtimeout > 0) {
 1036                 icmp_redirect_timeout_q =
 1037                     rt_timer_queue_create(icmp_redirtimeout);
 1038         }
 1039         error = 0;
 1040 out:
 1041         mutex_exit(&icmp_mtx);
 1042         return error;
 1043 }
 1044 
 1045 static int
 1046 sysctl_net_inet_icmp_stats(SYSCTLFN_ARGS)
 1047 {
 1048 
 1049         return (NETSTAT_SYSCTL(icmpstat_percpu, ICMP_NSTATS));
 1050 }
 1051 
 1052 static void
 1053 sysctl_netinet_icmp_setup(struct sysctllog **clog)
 1054 {
 1055 
 1056         sysctl_createv(clog, 0, NULL, NULL,
 1057                        CTLFLAG_PERMANENT,
 1058                        CTLTYPE_NODE, "inet", NULL,
 1059                        NULL, 0, NULL, 0,
 1060                        CTL_NET, PF_INET, CTL_EOL);
 1061         sysctl_createv(clog, 0, NULL, NULL,
 1062                        CTLFLAG_PERMANENT,
 1063                        CTLTYPE_NODE, "icmp",
 1064                        SYSCTL_DESCR("ICMPv4 related settings"),
 1065                        NULL, 0, NULL, 0,
 1066                        CTL_NET, PF_INET, IPPROTO_ICMP, CTL_EOL);
 1067 
 1068         sysctl_createv(clog, 0, NULL, NULL,
 1069                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 1070                        CTLTYPE_INT, "maskrepl",
 1071                        SYSCTL_DESCR("Respond to ICMP_MASKREQ messages"),
 1072                        NULL, 0, &icmpmaskrepl, 0,
 1073                        CTL_NET, PF_INET, IPPROTO_ICMP,
 1074                        ICMPCTL_MASKREPL, CTL_EOL);
 1075         sysctl_createv(clog, 0, NULL, NULL,
 1076                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 1077                        CTLTYPE_INT, "returndatabytes",
 1078                        SYSCTL_DESCR("Number of bytes to return in an ICMP "
 1079                                     "error message"),
 1080                        sysctl_net_inet_icmp_returndatabytes, 0,
 1081                        &icmpreturndatabytes, 0,
 1082                        CTL_NET, PF_INET, IPPROTO_ICMP,
 1083                        ICMPCTL_RETURNDATABYTES, CTL_EOL);
 1084         sysctl_createv(clog, 0, NULL, NULL,
 1085                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 1086                        CTLTYPE_INT, "errppslimit",
 1087                        SYSCTL_DESCR("Maximum number of outgoing ICMP error "
 1088                                     "messages per second"),
 1089                        NULL, 0, &icmperrppslim, 0,
 1090                        CTL_NET, PF_INET, IPPROTO_ICMP,
 1091                        ICMPCTL_ERRPPSLIMIT, CTL_EOL);
 1092         sysctl_createv(clog, 0, NULL, NULL,
 1093                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 1094                        CTLTYPE_INT, "rediraccept",
 1095                        SYSCTL_DESCR("Accept ICMP_REDIRECT messages"),
 1096                        NULL, 0, &icmp_rediraccept, 0,
 1097                        CTL_NET, PF_INET, IPPROTO_ICMP,
 1098                        ICMPCTL_REDIRACCEPT, CTL_EOL);
 1099         sysctl_createv(clog, 0, NULL, NULL,
 1100                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 1101                        CTLTYPE_INT, "redirtimeout",
 1102                        SYSCTL_DESCR("Lifetime of ICMP_REDIRECT generated "
 1103                                     "routes"),
 1104                        sysctl_net_inet_icmp_redirtimeout, 0,
 1105                        &icmp_redirtimeout, 0,
 1106                        CTL_NET, PF_INET, IPPROTO_ICMP,
 1107                        ICMPCTL_REDIRTIMEOUT, CTL_EOL);
 1108         sysctl_createv(clog, 0, NULL, NULL,
 1109                        CTLFLAG_PERMANENT,
 1110                        CTLTYPE_STRUCT, "stats",
 1111                        SYSCTL_DESCR("ICMP statistics"), 
 1112                        sysctl_net_inet_icmp_stats, 0, NULL, 0,
 1113                        CTL_NET, PF_INET, IPPROTO_ICMP, ICMPCTL_STATS,
 1114                        CTL_EOL);
 1115         sysctl_createv(clog, 0, NULL, NULL,
 1116                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 1117                        CTLTYPE_INT, "bmcastecho",
 1118                        SYSCTL_DESCR("Respond to ICMP_ECHO or ICMP_TIMESTAMP "
 1119                                     "message to the broadcast or multicast"),
 1120                        NULL, 0, &icmpbmcastecho, 0,
 1121                        CTL_NET, PF_INET, IPPROTO_ICMP, ICMPCTL_BMCASTECHO,
 1122                        CTL_EOL);
 1123         sysctl_createv(clog, 0, NULL, NULL,
 1124                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 1125                        CTLTYPE_BOOL, "dynamic_rt_msg",
 1126                        SYSCTL_DESCR("Send routing message for RTF_DYNAMIC"),
 1127                        NULL, 0, &icmp_dynamic_rt_msg, 0,
 1128                        CTL_NET, PF_INET, IPPROTO_ICMP, ICMPCTL_DYNAMIC_RT_MSG,
 1129                        CTL_EOL);
 1130 }
 1131 
 1132 void
 1133 icmp_statinc(u_int stat)
 1134 {
 1135 
 1136         KASSERT(stat < ICMP_NSTATS);
 1137         ICMP_STATINC(stat);
 1138 }
 1139 
 1140 /* Table of common MTUs */
 1141 static const u_int mtu_table[] = {
 1142         65535, 65280, 32000, 17914, 9180, 8166,
 1143         4352, 2002, 1492, 1006, 508, 296, 68, 0
 1144 };
 1145 
 1146 void
 1147 icmp_mtudisc(struct icmp *icp, struct in_addr faddr)
 1148 {
 1149         struct icmp_mtudisc_callback *mc;
 1150         struct sockaddr *dst = sintosa(&icmpsrc);
 1151         struct rtentry *rt;
 1152         u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
 1153         int error;
 1154 
 1155         rt = rtalloc1(dst, 1);
 1156         if (rt == NULL)
 1157                 return;
 1158 
 1159         /* If we didn't get a host route, allocate one */
 1160         if ((rt->rt_flags & RTF_HOST) == 0) {
 1161                 struct rtentry *nrt;
 1162 
 1163                 error = rtrequest(RTM_ADD, dst, rt->rt_gateway, NULL,
 1164                     RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
 1165                 if (error) {
 1166                         rt_unref(rt);
 1167                         return;
 1168                 }
 1169                 nrt->rt_rmx = rt->rt_rmx;
 1170                 rt_newmsg_dynamic(RTM_ADD, nrt);
 1171                 rt_unref(rt);
 1172                 rt = nrt;
 1173         }
 1174 
 1175         mutex_enter(&icmp_mtx);
 1176         error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
 1177         mutex_exit(&icmp_mtx);
 1178         if (error) {
 1179                 rt_unref(rt);
 1180                 return;
 1181         }
 1182 
 1183         if (mtu == 0) {
 1184                 int i = 0;
 1185 
 1186                 mtu = ntohs(icp->icmp_ip.ip_len);
 1187                 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
 1188                 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
 1189                         mtu -= (icp->icmp_ip.ip_hl << 2);
 1190 
 1191                 /* If we still can't guess a value, try the route */
 1192                 if (mtu == 0) {
 1193                         mtu = rt->rt_rmx.rmx_mtu;
 1194 
 1195                         /* If no route mtu, default to the interface mtu */
 1196                         if (mtu == 0)
 1197                                 mtu = rt->rt_ifp->if_mtu;
 1198                 }
 1199 
 1200                 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++) {
 1201                         if (mtu > mtu_table[i]) {
 1202                                 mtu = mtu_table[i];
 1203                                 break;
 1204                         }
 1205                 }
 1206         }
 1207 
 1208         /*
 1209          * XXX:   RTV_MTU is overloaded, since the admin can set it
 1210          *        to turn off PMTU for a route, and the kernel can
 1211          *        set it to indicate a serious problem with PMTU
 1212          *        on a route.  We should be using a separate flag
 1213          *        for the kernel to indicate this.
 1214          */
 1215 
 1216         if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
 1217                 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
 1218                         rt->rt_rmx.rmx_locks |= RTV_MTU;
 1219                 else if (rt->rt_rmx.rmx_mtu > mtu ||
 1220                          rt->rt_rmx.rmx_mtu == 0) {
 1221                         ICMP_STATINC(ICMP_STAT_PMTUCHG);
 1222                         rt->rt_rmx.rmx_mtu = mtu;
 1223                 }
 1224         }
 1225 
 1226         if (rt != NULL)
 1227                 rt_unref(rt);
 1228 
 1229         /*
 1230          * Notify protocols that the MTU for this destination
 1231          * has changed.
 1232          */
 1233         mutex_enter(&icmp_mtx);
 1234         for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
 1235              mc = LIST_NEXT(mc, mc_list))
 1236                 (*mc->mc_func)(faddr);
 1237         mutex_exit(&icmp_mtx);
 1238 }
 1239 
 1240 /*
 1241  * Return the next larger or smaller MTU plateau (table from RFC 1191)
 1242  * given current value MTU.  If DIR is less than zero, a larger plateau
 1243  * is returned; otherwise, a smaller value is returned.
 1244  */
 1245 u_int
 1246 ip_next_mtu(u_int mtu, int dir) /* XXX unused */
 1247 {
 1248         int i;
 1249 
 1250         for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
 1251                 if (mtu >= mtu_table[i])
 1252                         break;
 1253         }
 1254 
 1255         if (dir < 0) {
 1256                 if (i == 0) {
 1257                         return 0;
 1258                 } else {
 1259                         return mtu_table[i - 1];
 1260                 }
 1261         } else {
 1262                 if (mtu_table[i] == 0) {
 1263                         return 0;
 1264                 } else if (mtu > mtu_table[i]) {
 1265                         return mtu_table[i];
 1266                 } else {
 1267                         return mtu_table[i + 1];
 1268                 }
 1269         }
 1270 }
 1271 
 1272 static void
 1273 icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
 1274 {
 1275         struct rtentry *retrt;
 1276 
 1277         KASSERT(rt != NULL);
 1278         rt_assert_referenced(rt);
 1279 
 1280         if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
 1281             (RTF_DYNAMIC | RTF_HOST)) {
 1282                 rtrequest(RTM_DELETE, rt_getkey(rt),
 1283                     rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt);
 1284                 rt_newmsg_dynamic(RTM_DELETE, retrt);
 1285                 rt_unref(rt);
 1286                 rt_free(retrt);
 1287         } else {
 1288                 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
 1289                         rt->rt_rmx.rmx_mtu = 0;
 1290                 }
 1291         }
 1292 }
 1293 
 1294 static void
 1295 icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r)
 1296 {
 1297         struct rtentry *retrt;
 1298 
 1299         KASSERT(rt != NULL);
 1300         rt_assert_referenced(rt);
 1301 
 1302         if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
 1303             (RTF_DYNAMIC | RTF_HOST)) {
 1304                 rtrequest(RTM_DELETE, rt_getkey(rt),
 1305                     rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt);
 1306                 rt_newmsg_dynamic(RTM_DELETE, retrt);
 1307                 rt_unref(rt);
 1308                 rt_free(retrt);
 1309         }
 1310 }
 1311 
 1312 /*
 1313  * Perform rate limit check.
 1314  * Returns 0 if it is okay to send the icmp packet.
 1315  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
 1316  * limitation.
 1317  *
 1318  * XXX per-destination/type check necessary?
 1319  */
 1320 int
 1321 icmp_ratelimit(const struct in_addr *dst, const int type,
 1322     const int code)
 1323 {
 1324 
 1325         /* PPS limit */
 1326         if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
 1327             icmperrppslim)) {
 1328                 /* The packet is subject to rate limit */
 1329                 return 1;
 1330         }
 1331 
 1332         /* okay to send */
 1333         return 0;
 1334 }

Cache object: 91b7e3149494aa61c59d183c54ec4cb3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.