The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/udp_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
    5  *      The Regents of the University of California.
    6  * Copyright (c) 2008 Robert N. M. Watson
    7  * Copyright (c) 2010-2011 Juniper Networks, Inc.
    8  * Copyright (c) 2014 Kevin Lo
    9  * All rights reserved.
   10  *
   11  * Portions of this software were developed by Robert N. M. Watson under
   12  * contract to Juniper Networks, Inc.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)udp_usrreq.c        8.6 (Berkeley) 5/23/95
   39  */
   40 
   41 #include <sys/cdefs.h>
   42 __FBSDID("$FreeBSD$");
   43 
   44 #include "opt_inet.h"
   45 #include "opt_inet6.h"
   46 #include "opt_ipsec.h"
   47 #include "opt_rss.h"
   48 
   49 #include <sys/param.h>
   50 #include <sys/domain.h>
   51 #include <sys/eventhandler.h>
   52 #include <sys/jail.h>
   53 #include <sys/kernel.h>
   54 #include <sys/lock.h>
   55 #include <sys/malloc.h>
   56 #include <sys/mbuf.h>
   57 #include <sys/priv.h>
   58 #include <sys/proc.h>
   59 #include <sys/protosw.h>
   60 #include <sys/sdt.h>
   61 #include <sys/signalvar.h>
   62 #include <sys/socket.h>
   63 #include <sys/socketvar.h>
   64 #include <sys/sx.h>
   65 #include <sys/sysctl.h>
   66 #include <sys/syslog.h>
   67 #include <sys/systm.h>
   68 
   69 #include <vm/uma.h>
   70 
   71 #include <net/if.h>
   72 #include <net/if_var.h>
   73 #include <net/route.h>
   74 #include <net/rss_config.h>
   75 
   76 #include <netinet/in.h>
   77 #include <netinet/in_kdtrace.h>
   78 #include <netinet/in_pcb.h>
   79 #include <netinet/in_systm.h>
   80 #include <netinet/in_var.h>
   81 #include <netinet/ip.h>
   82 #ifdef INET6
   83 #include <netinet/ip6.h>
   84 #endif
   85 #include <netinet/ip_icmp.h>
   86 #include <netinet/icmp_var.h>
   87 #include <netinet/ip_var.h>
   88 #include <netinet/ip_options.h>
   89 #ifdef INET6
   90 #include <netinet6/ip6_var.h>
   91 #endif
   92 #include <netinet/udp.h>
   93 #include <netinet/udp_var.h>
   94 #include <netinet/udplite.h>
   95 #include <netinet/in_rss.h>
   96 
   97 #include <netipsec/ipsec_support.h>
   98 
   99 #include <machine/in_cksum.h>
  100 
  101 #include <security/mac/mac_framework.h>
  102 
  103 /*
  104  * UDP and UDP-Lite protocols implementation.
  105  * Per RFC 768, August, 1980.
  106  * Per RFC 3828, July, 2004.
  107  */
  108 
  109 /*
  110  * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
  111  * removes the only data integrity mechanism for packets and malformed
  112  * packets that would otherwise be discarded due to bad checksums, and may
  113  * cause problems (especially for NFS data blocks).
  114  */
  115 VNET_DEFINE(int, udp_cksum) = 1;
  116 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_VNET | CTLFLAG_RW,
  117     &VNET_NAME(udp_cksum), 0, "compute udp checksum");
  118 
  119 VNET_DEFINE(int, udp_log_in_vain) = 0;
  120 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_VNET | CTLFLAG_RW,
  121     &VNET_NAME(udp_log_in_vain), 0, "Log all incoming UDP packets");
  122 
  123 VNET_DEFINE(int, udp_blackhole) = 0;
  124 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
  125     &VNET_NAME(udp_blackhole), 0,
  126     "Do not send port unreachables for refused connects");
  127 
  128 u_long  udp_sendspace = 9216;           /* really max datagram size */
  129 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
  130     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
  131 
  132 u_long  udp_recvspace = 40 * (1024 +
  133 #ifdef INET6
  134                                       sizeof(struct sockaddr_in6)
  135 #else
  136                                       sizeof(struct sockaddr_in)
  137 #endif
  138                                       );        /* 40 1K datagrams */
  139 
  140 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
  141     &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
  142 
  143 VNET_DEFINE(struct inpcbhead, udb);             /* from udp_var.h */
  144 VNET_DEFINE(struct inpcbinfo, udbinfo);
  145 VNET_DEFINE(struct inpcbhead, ulitecb);
  146 VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
  147 VNET_DEFINE_STATIC(uma_zone_t, udpcb_zone);
  148 #define V_udpcb_zone                    VNET(udpcb_zone)
  149 
  150 #ifndef UDBHASHSIZE
  151 #define UDBHASHSIZE     128
  152 #endif
  153 
  154 VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat);          /* from udp_var.h */
  155 VNET_PCPUSTAT_SYSINIT(udpstat);
  156 SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
  157     udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
  158 
  159 #ifdef VIMAGE
  160 VNET_PCPUSTAT_SYSUNINIT(udpstat);
  161 #endif /* VIMAGE */
  162 #ifdef INET
  163 static void     udp_detach(struct socket *so);
  164 static int      udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
  165                     struct mbuf *, struct thread *, int);
  166 #endif
  167 
  168 static void
  169 udp_zone_change(void *tag)
  170 {
  171 
  172         uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
  173         uma_zone_set_max(V_udpcb_zone, maxsockets);
  174 }
  175 
  176 static int
  177 udp_inpcb_init(void *mem, int size, int flags)
  178 {
  179         struct inpcb *inp;
  180 
  181         inp = mem;
  182         INP_LOCK_INIT(inp, "inp", "udpinp");
  183         return (0);
  184 }
  185 
  186 static int
  187 udplite_inpcb_init(void *mem, int size, int flags)
  188 {
  189         struct inpcb *inp;
  190 
  191         inp = mem;
  192         INP_LOCK_INIT(inp, "inp", "udpliteinp");
  193         return (0);
  194 }
  195 
  196 void
  197 udp_init(void)
  198 {
  199 
  200         /*
  201          * For now default to 2-tuple UDP hashing - until the fragment
  202          * reassembly code can also update the flowid.
  203          *
  204          * Once we can calculate the flowid that way and re-establish
  205          * a 4-tuple, flip this to 4-tuple.
  206          */
  207         in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
  208             "udp_inpcb", udp_inpcb_init, IPI_HASHFIELDS_2TUPLE);
  209         V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
  210             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  211         uma_zone_set_max(V_udpcb_zone, maxsockets);
  212         uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
  213         EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
  214             EVENTHANDLER_PRI_ANY);
  215 }
  216 
  217 void
  218 udplite_init(void)
  219 {
  220 
  221         in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
  222             UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init,
  223             IPI_HASHFIELDS_2TUPLE);
  224 }
  225 
  226 /*
  227  * Kernel module interface for updating udpstat.  The argument is an index
  228  * into udpstat treated as an array of u_long.  While this encodes the
  229  * general layout of udpstat into the caller, it doesn't encode its location,
  230  * so that future changes to add, for example, per-CPU stats support won't
  231  * cause binary compatibility problems for kernel modules.
  232  */
  233 void
  234 kmod_udpstat_inc(int statnum)
  235 {
  236 
  237         counter_u64_add(VNET(udpstat)[statnum], 1);
  238 }
  239 
  240 int
  241 udp_newudpcb(struct inpcb *inp)
  242 {
  243         struct udpcb *up;
  244 
  245         up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO);
  246         if (up == NULL)
  247                 return (ENOBUFS);
  248         inp->inp_ppcb = up;
  249         return (0);
  250 }
  251 
  252 void
  253 udp_discardcb(struct udpcb *up)
  254 {
  255 
  256         uma_zfree(V_udpcb_zone, up);
  257 }
  258 
  259 #ifdef VIMAGE
  260 static void
  261 udp_destroy(void *unused __unused)
  262 {
  263 
  264         in_pcbinfo_destroy(&V_udbinfo);
  265         uma_zdestroy(V_udpcb_zone);
  266 }
  267 VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL);
  268 
  269 static void
  270 udplite_destroy(void *unused __unused)
  271 {
  272 
  273         in_pcbinfo_destroy(&V_ulitecbinfo);
  274 }
  275 VNET_SYSUNINIT(udplite, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udplite_destroy,
  276     NULL);
  277 #endif
  278 
  279 #ifdef INET
  280 /*
  281  * Subroutine of udp_input(), which appends the provided mbuf chain to the
  282  * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
  283  * contains the source address.  If the socket ends up being an IPv6 socket,
  284  * udp_append() will convert to a sockaddr_in6 before passing the address
  285  * into the socket code.
  286  *
  287  * In the normal case udp_append() will return 0, indicating that you
  288  * must unlock the inp. However if a tunneling protocol is in place we increment
  289  * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we
  290  * then decrement the reference count. If the inp_rele returns 1, indicating the
  291  * inp is gone, we return that to the caller to tell them *not* to unlock
  292  * the inp. In the case of multi-cast this will cause the distribution
  293  * to stop (though most tunneling protocols known currently do *not* use
  294  * multicast).
  295  */
  296 static int
  297 udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
  298     struct sockaddr_in *udp_in)
  299 {
  300         struct sockaddr *append_sa;
  301         struct socket *so;
  302         struct mbuf *tmpopts, *opts = NULL;
  303 #ifdef INET6
  304         struct sockaddr_in6 udp_in6;
  305 #endif
  306         struct udpcb *up;
  307 
  308         INP_LOCK_ASSERT(inp);
  309 
  310         /*
  311          * Engage the tunneling protocol.
  312          */
  313         up = intoudpcb(inp);
  314         if (up->u_tun_func != NULL) {
  315                 in_pcbref(inp);
  316                 INP_RUNLOCK(inp);
  317                 (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0],
  318                     up->u_tun_ctx);
  319                 INP_RLOCK(inp);
  320                 return (in_pcbrele_rlocked(inp));
  321         }
  322 
  323         off += sizeof(struct udphdr);
  324 
  325 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  326         /* Check AH/ESP integrity. */
  327         if (IPSEC_ENABLED(ipv4) &&
  328             IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) {
  329                 m_freem(n);
  330                 return (0);
  331         }
  332         if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */
  333                 if (IPSEC_ENABLED(ipv4) &&
  334                     UDPENCAP_INPUT(n, off, AF_INET) != 0)
  335                         return (0);     /* Consumed. */
  336         }
  337 #endif /* IPSEC */
  338 #ifdef MAC
  339         if (mac_inpcb_check_deliver(inp, n) != 0) {
  340                 m_freem(n);
  341                 return (0);
  342         }
  343 #endif /* MAC */
  344         if (inp->inp_flags & INP_CONTROLOPTS ||
  345             inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
  346 #ifdef INET6
  347                 if (inp->inp_vflag & INP_IPV6)
  348                         (void)ip6_savecontrol_v4(inp, n, &opts, NULL);
  349                 else
  350 #endif /* INET6 */
  351                         ip_savecontrol(inp, &opts, ip, n);
  352         }
  353         if ((inp->inp_vflag & INP_IPV4) && (inp->inp_flags2 & INP_ORIGDSTADDR)) {
  354                 tmpopts = sbcreatecontrol((caddr_t)&udp_in[1],
  355                         sizeof(struct sockaddr_in), IP_ORIGDSTADDR, IPPROTO_IP);
  356                 if (tmpopts) {
  357                         if (opts) {
  358                                 tmpopts->m_next = opts;
  359                                 opts = tmpopts;
  360                         } else
  361                                 opts = tmpopts;
  362                 }
  363         }
  364 #ifdef INET6
  365         if (inp->inp_vflag & INP_IPV6) {
  366                 bzero(&udp_in6, sizeof(udp_in6));
  367                 udp_in6.sin6_len = sizeof(udp_in6);
  368                 udp_in6.sin6_family = AF_INET6;
  369                 in6_sin_2_v4mapsin6(&udp_in[0], &udp_in6);
  370                 append_sa = (struct sockaddr *)&udp_in6;
  371         } else
  372 #endif /* INET6 */
  373                 append_sa = (struct sockaddr *)&udp_in[0];
  374         m_adj(n, off);
  375 
  376         so = inp->inp_socket;
  377         SOCKBUF_LOCK(&so->so_rcv);
  378         if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
  379                 soroverflow_locked(so);
  380                 m_freem(n);
  381                 if (opts)
  382                         m_freem(opts);
  383                 UDPSTAT_INC(udps_fullsock);
  384         } else
  385                 sorwakeup_locked(so);
  386         return (0);
  387 }
  388 
  389 int
  390 udp_input(struct mbuf **mp, int *offp, int proto)
  391 {
  392         struct ip *ip;
  393         struct udphdr *uh;
  394         struct ifnet *ifp;
  395         struct inpcb *inp;
  396         uint16_t len, ip_len;
  397         struct inpcbinfo *pcbinfo;
  398         struct ip save_ip;
  399         struct sockaddr_in udp_in[2];
  400         struct mbuf *m;
  401         struct m_tag *fwd_tag;
  402         struct epoch_tracker et;
  403         int cscov_partial, iphlen;
  404 
  405         m = *mp;
  406         iphlen = *offp;
  407         ifp = m->m_pkthdr.rcvif;
  408         *mp = NULL;
  409         UDPSTAT_INC(udps_ipackets);
  410 
  411         /*
  412          * Strip IP options, if any; should skip this, make available to
  413          * user, and use on returned packets, but we don't yet have a way to
  414          * check the checksum with options still present.
  415          */
  416         if (iphlen > sizeof (struct ip)) {
  417                 ip_stripoptions(m);
  418                 iphlen = sizeof(struct ip);
  419         }
  420 
  421         /*
  422          * Get IP and UDP header together in first mbuf.
  423          */
  424         if (m->m_len < iphlen + sizeof(struct udphdr)) {
  425                 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
  426                         UDPSTAT_INC(udps_hdrops);
  427                         return (IPPROTO_DONE);
  428                 }
  429         }
  430         ip = mtod(m, struct ip *);
  431         uh = (struct udphdr *)((caddr_t)ip + iphlen);
  432         cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0;
  433 
  434         /*
  435          * Destination port of 0 is illegal, based on RFC768.
  436          */
  437         if (uh->uh_dport == 0)
  438                 goto badunlocked;
  439 
  440         /*
  441          * Construct sockaddr format source address.  Stuff source address
  442          * and datagram in user buffer.
  443          */
  444         bzero(&udp_in[0], sizeof(struct sockaddr_in) * 2);
  445         udp_in[0].sin_len = sizeof(struct sockaddr_in);
  446         udp_in[0].sin_family = AF_INET;
  447         udp_in[0].sin_port = uh->uh_sport;
  448         udp_in[0].sin_addr = ip->ip_src;
  449         udp_in[1].sin_len = sizeof(struct sockaddr_in);
  450         udp_in[1].sin_family = AF_INET;
  451         udp_in[1].sin_port = uh->uh_dport;
  452         udp_in[1].sin_addr = ip->ip_dst;
  453 
  454         /*
  455          * Make mbuf data length reflect UDP length.  If not enough data to
  456          * reflect UDP length, drop.
  457          */
  458         len = ntohs((u_short)uh->uh_ulen);
  459         ip_len = ntohs(ip->ip_len) - iphlen;
  460         if (proto == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) {
  461                 /* Zero means checksum over the complete packet. */
  462                 if (len == 0)
  463                         len = ip_len;
  464                 cscov_partial = 0;
  465         }
  466         if (ip_len != len) {
  467                 if (len > ip_len || len < sizeof(struct udphdr)) {
  468                         UDPSTAT_INC(udps_badlen);
  469                         goto badunlocked;
  470                 }
  471                 if (proto == IPPROTO_UDP)
  472                         m_adj(m, len - ip_len);
  473         }
  474 
  475         /*
  476          * Save a copy of the IP header in case we want restore it for
  477          * sending an ICMP error message in response.
  478          */
  479         if (!V_udp_blackhole)
  480                 save_ip = *ip;
  481         else
  482                 memset(&save_ip, 0, sizeof(save_ip));
  483 
  484         /*
  485          * Checksum extended UDP header and data.
  486          */
  487         if (uh->uh_sum) {
  488                 u_short uh_sum;
  489 
  490                 if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
  491                     !cscov_partial) {
  492                         if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
  493                                 uh_sum = m->m_pkthdr.csum_data;
  494                         else
  495                                 uh_sum = in_pseudo(ip->ip_src.s_addr,
  496                                     ip->ip_dst.s_addr, htonl((u_short)len +
  497                                     m->m_pkthdr.csum_data + proto));
  498                         uh_sum ^= 0xffff;
  499                 } else {
  500                         char b[9];
  501 
  502                         bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
  503                         bzero(((struct ipovly *)ip)->ih_x1, 9);
  504                         ((struct ipovly *)ip)->ih_len = (proto == IPPROTO_UDP) ?
  505                             uh->uh_ulen : htons(ip_len);
  506                         uh_sum = in_cksum(m, len + sizeof (struct ip));
  507                         bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
  508                 }
  509                 if (uh_sum) {
  510                         UDPSTAT_INC(udps_badsum);
  511                         m_freem(m);
  512                         return (IPPROTO_DONE);
  513                 }
  514         } else {
  515                 if (proto == IPPROTO_UDP) {
  516                         UDPSTAT_INC(udps_nosum);
  517                 } else {
  518                         /* UDPLite requires a checksum */
  519                         /* XXX: What is the right UDPLite MIB counter here? */
  520                         m_freem(m);
  521                         return (IPPROTO_DONE);
  522                 }
  523         }
  524 
  525         pcbinfo = udp_get_inpcbinfo(proto);
  526         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
  527             in_broadcast(ip->ip_dst, ifp)) {
  528                 struct inpcb *last;
  529                 struct inpcbhead *pcblist;
  530 
  531                 INP_INFO_RLOCK_ET(pcbinfo, et);
  532                 pcblist = udp_get_pcblist(proto);
  533                 last = NULL;
  534                 CK_LIST_FOREACH(inp, pcblist, inp_list) {
  535                         if (inp->inp_lport != uh->uh_dport)
  536                                 continue;
  537 #ifdef INET6
  538                         if ((inp->inp_vflag & INP_IPV4) == 0)
  539                                 continue;
  540 #endif
  541                         if (inp->inp_laddr.s_addr != INADDR_ANY &&
  542                             inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
  543                                 continue;
  544                         if (inp->inp_faddr.s_addr != INADDR_ANY &&
  545                             inp->inp_faddr.s_addr != ip->ip_src.s_addr)
  546                                 continue;
  547                         if (inp->inp_fport != 0 &&
  548                             inp->inp_fport != uh->uh_sport)
  549                                 continue;
  550 
  551                         INP_RLOCK(inp);
  552 
  553                         if (__predict_false(inp->inp_flags2 & INP_FREED)) {
  554                                 INP_RUNLOCK(inp);
  555                                 continue;
  556                         }
  557 
  558                         /*
  559                          * XXXRW: Because we weren't holding either the inpcb
  560                          * or the hash lock when we checked for a match
  561                          * before, we should probably recheck now that the
  562                          * inpcb lock is held.
  563                          */
  564 
  565                         /*
  566                          * Handle socket delivery policy for any-source
  567                          * and source-specific multicast. [RFC3678]
  568                          */
  569                         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
  570                                 struct ip_moptions      *imo;
  571                                 struct sockaddr_in       group;
  572                                 int                      blocked;
  573 
  574                                 imo = inp->inp_moptions;
  575                                 if (imo == NULL) {
  576                                         INP_RUNLOCK(inp);
  577                                         continue;
  578                                 }
  579                                 bzero(&group, sizeof(struct sockaddr_in));
  580                                 group.sin_len = sizeof(struct sockaddr_in);
  581                                 group.sin_family = AF_INET;
  582                                 group.sin_addr = ip->ip_dst;
  583 
  584                                 blocked = imo_multi_filter(imo, ifp,
  585                                         (struct sockaddr *)&group,
  586                                         (struct sockaddr *)&udp_in[0]);
  587                                 if (blocked != MCAST_PASS) {
  588                                         if (blocked == MCAST_NOTGMEMBER)
  589                                                 IPSTAT_INC(ips_notmember);
  590                                         if (blocked == MCAST_NOTSMEMBER ||
  591                                             blocked == MCAST_MUTED)
  592                                                 UDPSTAT_INC(udps_filtermcast);
  593                                         INP_RUNLOCK(inp);
  594                                         continue;
  595                                 }
  596                         }
  597                         if (last != NULL) {
  598                                 struct mbuf *n;
  599 
  600                                 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) !=
  601                                     NULL) {
  602                                         if (proto == IPPROTO_UDPLITE)
  603                                                 UDPLITE_PROBE(receive, NULL, last, ip,
  604                                                     last, uh);
  605                                         else
  606                                                 UDP_PROBE(receive, NULL, last, ip, last,
  607                                                     uh);
  608                                         if (udp_append(last, ip, n, iphlen,
  609                                                 udp_in)) {
  610                                                 goto inp_lost;
  611                                         }
  612                                 }
  613                                 INP_RUNLOCK(last);
  614                         }
  615                         last = inp;
  616                         /*
  617                          * Don't look for additional matches if this one does
  618                          * not have either the SO_REUSEPORT or SO_REUSEADDR
  619                          * socket options set.  This heuristic avoids
  620                          * searching through all pcbs in the common case of a
  621                          * non-shared port.  It assumes that an application
  622                          * will never clear these options after setting them.
  623                          */
  624                         if ((last->inp_socket->so_options &
  625                             (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
  626                                 break;
  627                 }
  628 
  629                 if (last == NULL) {
  630                         /*
  631                          * No matching pcb found; discard datagram.  (No need
  632                          * to send an ICMP Port Unreachable for a broadcast
  633                          * or multicast datgram.)
  634                          */
  635                         UDPSTAT_INC(udps_noportbcast);
  636                         if (inp)
  637                                 INP_RUNLOCK(inp);
  638                         INP_INFO_RUNLOCK_ET(pcbinfo, et);
  639                         goto badunlocked;
  640                 }
  641                 if (proto == IPPROTO_UDPLITE)
  642                         UDPLITE_PROBE(receive, NULL, last, ip, last, uh);
  643                 else
  644                         UDP_PROBE(receive, NULL, last, ip, last, uh);
  645                 if (udp_append(last, ip, m, iphlen, udp_in) == 0) 
  646                         INP_RUNLOCK(last);
  647         inp_lost:
  648                 INP_INFO_RUNLOCK_ET(pcbinfo, et);
  649                 return (IPPROTO_DONE);
  650         }
  651 
  652         /*
  653          * Locate pcb for datagram.
  654          */
  655 
  656         /*
  657          * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
  658          */
  659         if ((m->m_flags & M_IP_NEXTHOP) &&
  660             (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
  661                 struct sockaddr_in *next_hop;
  662 
  663                 next_hop = (struct sockaddr_in *)(fwd_tag + 1);
  664 
  665                 /*
  666                  * Transparently forwarded. Pretend to be the destination.
  667                  * Already got one like this?
  668                  */
  669                 inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
  670                     ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
  671                 if (!inp) {
  672                         /*
  673                          * It's new.  Try to find the ambushing socket.
  674                          * Because we've rewritten the destination address,
  675                          * any hardware-generated hash is ignored.
  676                          */
  677                         inp = in_pcblookup(pcbinfo, ip->ip_src,
  678                             uh->uh_sport, next_hop->sin_addr,
  679                             next_hop->sin_port ? htons(next_hop->sin_port) :
  680                             uh->uh_dport, INPLOOKUP_WILDCARD |
  681                             INPLOOKUP_RLOCKPCB, ifp);
  682                 }
  683                 /* Remove the tag from the packet. We don't need it anymore. */
  684                 m_tag_delete(m, fwd_tag);
  685                 m->m_flags &= ~M_IP_NEXTHOP;
  686         } else
  687                 inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
  688                     ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
  689                     INPLOOKUP_RLOCKPCB, ifp, m);
  690         if (inp == NULL) {
  691                 if (V_udp_log_in_vain) {
  692                         char src[INET_ADDRSTRLEN];
  693                         char dst[INET_ADDRSTRLEN];
  694 
  695                         log(LOG_INFO,
  696                             "Connection attempt to UDP %s:%d from %s:%d\n",
  697                             inet_ntoa_r(ip->ip_dst, dst), ntohs(uh->uh_dport),
  698                             inet_ntoa_r(ip->ip_src, src), ntohs(uh->uh_sport));
  699                 }
  700                 if (proto == IPPROTO_UDPLITE)
  701                         UDPLITE_PROBE(receive, NULL, NULL, ip, NULL, uh);
  702                 else
  703                         UDP_PROBE(receive, NULL, NULL, ip, NULL, uh);
  704                 UDPSTAT_INC(udps_noport);
  705                 if (m->m_flags & (M_BCAST | M_MCAST)) {
  706                         UDPSTAT_INC(udps_noportbcast);
  707                         goto badunlocked;
  708                 }
  709                 if (V_udp_blackhole)
  710                         goto badunlocked;
  711                 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
  712                         goto badunlocked;
  713                 *ip = save_ip;
  714                 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
  715                 return (IPPROTO_DONE);
  716         }
  717 
  718         /*
  719          * Check the minimum TTL for socket.
  720          */
  721         INP_RLOCK_ASSERT(inp);
  722         if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
  723                 if (proto == IPPROTO_UDPLITE)
  724                         UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
  725                 else
  726                         UDP_PROBE(receive, NULL, inp, ip, inp, uh);
  727                 INP_RUNLOCK(inp);
  728                 m_freem(m);
  729                 return (IPPROTO_DONE);
  730         }
  731         if (cscov_partial) {
  732                 struct udpcb *up;
  733 
  734                 up = intoudpcb(inp);
  735                 if (up->u_rxcslen == 0 || up->u_rxcslen > len) {
  736                         INP_RUNLOCK(inp);
  737                         m_freem(m);
  738                         return (IPPROTO_DONE);
  739                 }
  740         }
  741 
  742         if (proto == IPPROTO_UDPLITE)
  743                 UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
  744         else
  745                 UDP_PROBE(receive, NULL, inp, ip, inp, uh);
  746         if (udp_append(inp, ip, m, iphlen, udp_in) == 0) 
  747                 INP_RUNLOCK(inp);
  748         return (IPPROTO_DONE);
  749 
  750 badunlocked:
  751         m_freem(m);
  752         return (IPPROTO_DONE);
  753 }
  754 #endif /* INET */
  755 
  756 /*
  757  * Notify a udp user of an asynchronous error; just wake up so that they can
  758  * collect error status.
  759  */
  760 struct inpcb *
  761 udp_notify(struct inpcb *inp, int errno)
  762 {
  763 
  764         INP_WLOCK_ASSERT(inp);
  765         if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
  766              errno == EHOSTDOWN) && inp->inp_route.ro_rt) {
  767                 RTFREE(inp->inp_route.ro_rt);
  768                 inp->inp_route.ro_rt = (struct rtentry *)NULL;
  769         }
  770 
  771         inp->inp_socket->so_error = errno;
  772         sorwakeup(inp->inp_socket);
  773         sowwakeup(inp->inp_socket);
  774         return (inp);
  775 }
  776 
  777 #ifdef INET
  778 static void
  779 udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
  780     struct inpcbinfo *pcbinfo)
  781 {
  782         struct ip *ip = vip;
  783         struct udphdr *uh;
  784         struct in_addr faddr;
  785         struct inpcb *inp;
  786 
  787         faddr = ((struct sockaddr_in *)sa)->sin_addr;
  788         if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
  789                 return;
  790 
  791         if (PRC_IS_REDIRECT(cmd)) {
  792                 /* signal EHOSTDOWN, as it flushes the cached route */
  793                 in_pcbnotifyall(&V_udbinfo, faddr, EHOSTDOWN, udp_notify);
  794                 return;
  795         }
  796 
  797         /*
  798          * Hostdead is ugly because it goes linearly through all PCBs.
  799          *
  800          * XXX: We never get this from ICMP, otherwise it makes an excellent
  801          * DoS attack on machines with many connections.
  802          */
  803         if (cmd == PRC_HOSTDEAD)
  804                 ip = NULL;
  805         else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
  806                 return;
  807         if (ip != NULL) {
  808                 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
  809                 inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
  810                     ip->ip_src, uh->uh_sport, INPLOOKUP_WLOCKPCB, NULL);
  811                 if (inp != NULL) {
  812                         INP_WLOCK_ASSERT(inp);
  813                         if (inp->inp_socket != NULL) {
  814                                 udp_notify(inp, inetctlerrmap[cmd]);
  815                         }
  816                         INP_WUNLOCK(inp);
  817                 } else {
  818                         inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
  819                                            ip->ip_src, uh->uh_sport,
  820                                            INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
  821                         if (inp != NULL) {
  822                                 struct udpcb *up;
  823                                 void *ctx;
  824                                 udp_tun_icmp_t func;
  825 
  826                                 up = intoudpcb(inp);
  827                                 ctx = up->u_tun_ctx;
  828                                 func = up->u_icmp_func;
  829                                 INP_RUNLOCK(inp);
  830                                 if (func != NULL)
  831                                         (*func)(cmd, sa, vip, ctx);
  832                         }
  833                 }
  834         } else
  835                 in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
  836                     udp_notify);
  837 }
  838 void
  839 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
  840 {
  841 
  842         return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
  843 }
  844 
  845 void
  846 udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
  847 {
  848 
  849         return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
  850 }
  851 #endif /* INET */
  852 
  853 static int
  854 udp_pcblist(SYSCTL_HANDLER_ARGS)
  855 {
  856         int error, i, n;
  857         struct inpcb *inp, **inp_list;
  858         inp_gen_t gencnt;
  859         struct xinpgen xig;
  860         struct epoch_tracker et;
  861 
  862         /*
  863          * The process of preparing the PCB list is too time-consuming and
  864          * resource-intensive to repeat twice on every request.
  865          */
  866         if (req->oldptr == 0) {
  867                 n = V_udbinfo.ipi_count;
  868                 n += imax(n / 8, 10);
  869                 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
  870                 return (0);
  871         }
  872 
  873         if (req->newptr != 0)
  874                 return (EPERM);
  875 
  876         /*
  877          * OK, now we're committed to doing something.
  878          */
  879         INP_INFO_RLOCK_ET(&V_udbinfo, et);
  880         gencnt = V_udbinfo.ipi_gencnt;
  881         n = V_udbinfo.ipi_count;
  882         INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
  883 
  884         error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
  885                 + n * sizeof(struct xinpcb));
  886         if (error != 0)
  887                 return (error);
  888 
  889         bzero(&xig, sizeof(xig));
  890         xig.xig_len = sizeof xig;
  891         xig.xig_count = n;
  892         xig.xig_gen = gencnt;
  893         xig.xig_sogen = so_gencnt;
  894         error = SYSCTL_OUT(req, &xig, sizeof xig);
  895         if (error)
  896                 return (error);
  897 
  898         inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
  899         if (inp_list == NULL)
  900                 return (ENOMEM);
  901 
  902         INP_INFO_RLOCK_ET(&V_udbinfo, et);
  903         for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
  904              inp = CK_LIST_NEXT(inp, inp_list)) {
  905                 INP_WLOCK(inp);
  906                 if (inp->inp_gencnt <= gencnt &&
  907                     cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
  908                         in_pcbref(inp);
  909                         inp_list[i++] = inp;
  910                 }
  911                 INP_WUNLOCK(inp);
  912         }
  913         INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
  914         n = i;
  915 
  916         error = 0;
  917         for (i = 0; i < n; i++) {
  918                 inp = inp_list[i];
  919                 INP_RLOCK(inp);
  920                 if (inp->inp_gencnt <= gencnt) {
  921                         struct xinpcb xi;
  922 
  923                         in_pcbtoxinpcb(inp, &xi);
  924                         INP_RUNLOCK(inp);
  925                         error = SYSCTL_OUT(req, &xi, sizeof xi);
  926                 } else
  927                         INP_RUNLOCK(inp);
  928         }
  929         INP_INFO_WLOCK(&V_udbinfo);
  930         for (i = 0; i < n; i++) {
  931                 inp = inp_list[i];
  932                 INP_RLOCK(inp);
  933                 if (!in_pcbrele_rlocked(inp))
  934                         INP_RUNLOCK(inp);
  935         }
  936         INP_INFO_WUNLOCK(&V_udbinfo);
  937 
  938         if (!error) {
  939                 /*
  940                  * Give the user an updated idea of our state.  If the
  941                  * generation differs from what we told her before, she knows
  942                  * that something happened while we were processing this
  943                  * request, and it might be necessary to retry.
  944                  */
  945                 INP_INFO_RLOCK_ET(&V_udbinfo, et);
  946                 xig.xig_gen = V_udbinfo.ipi_gencnt;
  947                 xig.xig_sogen = so_gencnt;
  948                 xig.xig_count = V_udbinfo.ipi_count;
  949                 INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
  950                 error = SYSCTL_OUT(req, &xig, sizeof xig);
  951         }
  952         free(inp_list, M_TEMP);
  953         return (error);
  954 }
  955 
  956 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
  957     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
  958     udp_pcblist, "S,xinpcb", "List of active UDP sockets");
  959 
  960 #ifdef INET
  961 static int
  962 udp_getcred(SYSCTL_HANDLER_ARGS)
  963 {
  964         struct xucred xuc;
  965         struct sockaddr_in addrs[2];
  966         struct inpcb *inp;
  967         int error;
  968 
  969         error = priv_check(req->td, PRIV_NETINET_GETCRED);
  970         if (error)
  971                 return (error);
  972         error = SYSCTL_IN(req, addrs, sizeof(addrs));
  973         if (error)
  974                 return (error);
  975         inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
  976             addrs[0].sin_addr, addrs[0].sin_port,
  977             INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
  978         if (inp != NULL) {
  979                 INP_RLOCK_ASSERT(inp);
  980                 if (inp->inp_socket == NULL)
  981                         error = ENOENT;
  982                 if (error == 0)
  983                         error = cr_canseeinpcb(req->td->td_ucred, inp);
  984                 if (error == 0)
  985                         cru2x(inp->inp_cred, &xuc);
  986                 INP_RUNLOCK(inp);
  987         } else
  988                 error = ENOENT;
  989         if (error == 0)
  990                 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
  991         return (error);
  992 }
  993 
  994 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
  995     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
  996     udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
  997 #endif /* INET */
  998 
  999 int
 1000 udp_ctloutput(struct socket *so, struct sockopt *sopt)
 1001 {
 1002         struct inpcb *inp;
 1003         struct udpcb *up;
 1004         int isudplite, error, optval;
 1005 
 1006         error = 0;
 1007         isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
 1008         inp = sotoinpcb(so);
 1009         KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 1010         INP_WLOCK(inp);
 1011         if (sopt->sopt_level != so->so_proto->pr_protocol) {
 1012 #ifdef INET6
 1013                 if (INP_CHECK_SOCKAF(so, AF_INET6)) {
 1014                         INP_WUNLOCK(inp);
 1015                         error = ip6_ctloutput(so, sopt);
 1016                 }
 1017 #endif
 1018 #if defined(INET) && defined(INET6)
 1019                 else
 1020 #endif
 1021 #ifdef INET
 1022                 {
 1023                         INP_WUNLOCK(inp);
 1024                         error = ip_ctloutput(so, sopt);
 1025                 }
 1026 #endif
 1027                 return (error);
 1028         }
 1029 
 1030         switch (sopt->sopt_dir) {
 1031         case SOPT_SET:
 1032                 switch (sopt->sopt_name) {
 1033 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 1034 #ifdef INET
 1035                 case UDP_ENCAP:
 1036                         if (!IPSEC_ENABLED(ipv4)) {
 1037                                 INP_WUNLOCK(inp);
 1038                                 return (ENOPROTOOPT);
 1039                         }
 1040                         error = UDPENCAP_PCBCTL(inp, sopt);
 1041                         break;
 1042 #endif /* INET */
 1043 #endif /* IPSEC */
 1044                 case UDPLITE_SEND_CSCOV:
 1045                 case UDPLITE_RECV_CSCOV:
 1046                         if (!isudplite) {
 1047                                 INP_WUNLOCK(inp);
 1048                                 error = ENOPROTOOPT;
 1049                                 break;
 1050                         }
 1051                         INP_WUNLOCK(inp);
 1052                         error = sooptcopyin(sopt, &optval, sizeof(optval),
 1053                             sizeof(optval));
 1054                         if (error != 0)
 1055                                 break;
 1056                         inp = sotoinpcb(so);
 1057                         KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 1058                         INP_WLOCK(inp);
 1059                         up = intoudpcb(inp);
 1060                         KASSERT(up != NULL, ("%s: up == NULL", __func__));
 1061                         if ((optval != 0 && optval < 8) || (optval > 65535)) {
 1062                                 INP_WUNLOCK(inp);
 1063                                 error = EINVAL;
 1064                                 break;
 1065                         }
 1066                         if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
 1067                                 up->u_txcslen = optval;
 1068                         else
 1069                                 up->u_rxcslen = optval;
 1070                         INP_WUNLOCK(inp);
 1071                         break;
 1072                 default:
 1073                         INP_WUNLOCK(inp);
 1074                         error = ENOPROTOOPT;
 1075                         break;
 1076                 }
 1077                 break;
 1078         case SOPT_GET:
 1079                 switch (sopt->sopt_name) {
 1080 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 1081 #ifdef INET
 1082                 case UDP_ENCAP:
 1083                         if (!IPSEC_ENABLED(ipv4)) {
 1084                                 INP_WUNLOCK(inp);
 1085                                 return (ENOPROTOOPT);
 1086                         }
 1087                         error = UDPENCAP_PCBCTL(inp, sopt);
 1088                         break;
 1089 #endif /* INET */
 1090 #endif /* IPSEC */
 1091                 case UDPLITE_SEND_CSCOV:
 1092                 case UDPLITE_RECV_CSCOV:
 1093                         if (!isudplite) {
 1094                                 INP_WUNLOCK(inp);
 1095                                 error = ENOPROTOOPT;
 1096                                 break;
 1097                         }
 1098                         up = intoudpcb(inp);
 1099                         KASSERT(up != NULL, ("%s: up == NULL", __func__));
 1100                         if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
 1101                                 optval = up->u_txcslen;
 1102                         else
 1103                                 optval = up->u_rxcslen;
 1104                         INP_WUNLOCK(inp);
 1105                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1106                         break;
 1107                 default:
 1108                         INP_WUNLOCK(inp);
 1109                         error = ENOPROTOOPT;
 1110                         break;
 1111                 }
 1112                 break;
 1113         }       
 1114         return (error);
 1115 }
 1116 
 1117 #ifdef INET6
 1118 /* The logic here is derived from ip6_setpktopt(). See comments there. */
 1119 static int
 1120 udp_v4mapped_pktinfo(struct cmsghdr *cm, struct sockaddr_in * src,
 1121     struct inpcb *inp, int flags)
 1122 {
 1123         struct ifnet *ifp;
 1124         struct in6_pktinfo *pktinfo;
 1125         struct in_addr ia;
 1126 
 1127         if ((flags & PRUS_IPV6) == 0)
 1128                 return (0);
 1129 
 1130         if (cm->cmsg_level != IPPROTO_IPV6)
 1131                 return (0);
 1132 
 1133         if  (cm->cmsg_type != IPV6_2292PKTINFO &&
 1134             cm->cmsg_type != IPV6_PKTINFO)
 1135                 return (0);
 1136 
 1137         if (cm->cmsg_len !=
 1138             CMSG_LEN(sizeof(struct in6_pktinfo)))
 1139                 return (EINVAL);
 1140 
 1141         pktinfo = (struct in6_pktinfo *)CMSG_DATA(cm);
 1142         if (!IN6_IS_ADDR_V4MAPPED(&pktinfo->ipi6_addr) &&
 1143             !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr))
 1144                 return (EINVAL);
 1145 
 1146         /* Validate the interface index if specified. */
 1147         if (pktinfo->ipi6_ifindex > V_if_index)
 1148                 return (ENXIO);
 1149 
 1150         ifp = NULL;
 1151         if (pktinfo->ipi6_ifindex) {
 1152                 ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
 1153                 if (ifp == NULL)
 1154                         return (ENXIO);
 1155         }
 1156         if (ifp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 1157 
 1158                 ia.s_addr = pktinfo->ipi6_addr.s6_addr32[3];
 1159                 if (in_ifhasaddr(ifp, ia) == 0)
 1160                         return (EADDRNOTAVAIL);
 1161         }
 1162 
 1163         bzero(src, sizeof(*src));
 1164         src->sin_family = AF_INET;
 1165         src->sin_len = sizeof(*src);
 1166         src->sin_port = inp->inp_lport;
 1167         src->sin_addr.s_addr = pktinfo->ipi6_addr.s6_addr32[3];
 1168 
 1169         return (0);
 1170 }
 1171 #endif
 1172 
 1173 #ifdef INET
 1174 #define UH_WLOCKED      2
 1175 #define UH_RLOCKED      1
 1176 #define UH_UNLOCKED     0
 1177 static int
 1178 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
 1179     struct mbuf *control, struct thread *td, int flags)
 1180 {
 1181         struct udpiphdr *ui;
 1182         int len = m->m_pkthdr.len;
 1183         struct in_addr faddr, laddr;
 1184         struct cmsghdr *cm;
 1185         struct inpcbinfo *pcbinfo;
 1186         struct sockaddr_in *sin, src;
 1187         struct epoch_tracker et;
 1188         int cscov_partial = 0;
 1189         int error = 0;
 1190         int ipflags;
 1191         u_short fport, lport;
 1192         int unlock_udbinfo, unlock_inp;
 1193         u_char tos;
 1194         uint8_t pr;
 1195         uint16_t cscov = 0;
 1196         uint32_t flowid = 0;
 1197         uint8_t flowtype = M_HASHTYPE_NONE;
 1198 
 1199         /*
 1200          * udp_output() may need to temporarily bind or connect the current
 1201          * inpcb.  As such, we don't know up front whether we will need the
 1202          * pcbinfo lock or not.  Do any work to decide what is needed up
 1203          * front before acquiring any locks.
 1204          */
 1205         if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
 1206                 if (control)
 1207                         m_freem(control);
 1208                 m_freem(m);
 1209                 return (EMSGSIZE);
 1210         }
 1211 
 1212         src.sin_family = 0;
 1213         sin = (struct sockaddr_in *)addr;
 1214 retry:
 1215         if (sin == NULL ||
 1216             (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
 1217                 INP_WLOCK(inp);
 1218                 /*
 1219                  * In case we lost a race and another thread bound addr/port
 1220                  * on the inp we cannot keep the wlock (which still would be
 1221                  * fine) as further down, based on these values we make
 1222                  * decisions for the pcbinfo lock.  If the locks are not in
 1223                  * synch the assertions on unlock will fire, hence we go for
 1224                  * one retry loop.
 1225                  */
 1226                 if (sin != NULL && (inp->inp_laddr.s_addr != INADDR_ANY ||
 1227                     inp->inp_lport != 0)) {
 1228                         INP_WUNLOCK(inp);
 1229                         goto retry;
 1230                 }
 1231                 unlock_inp = UH_WLOCKED;
 1232         } else {
 1233                 INP_RLOCK(inp);
 1234                 unlock_inp = UH_RLOCKED;
 1235         }
 1236         tos = inp->inp_ip_tos;
 1237         if (control != NULL) {
 1238                 /*
 1239                  * XXX: Currently, we assume all the optional information is
 1240                  * stored in a single mbuf.
 1241                  */
 1242                 if (control->m_next) {
 1243                         if (unlock_inp == UH_WLOCKED)
 1244                                 INP_WUNLOCK(inp);
 1245                         else
 1246                                 INP_RUNLOCK(inp);
 1247                         m_freem(control);
 1248                         m_freem(m);
 1249                         return (EINVAL);
 1250                 }
 1251                 for (; control->m_len > 0;
 1252                     control->m_data += CMSG_ALIGN(cm->cmsg_len),
 1253                     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 1254                         cm = mtod(control, struct cmsghdr *);
 1255                         if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
 1256                             || cm->cmsg_len > control->m_len) {
 1257                                 error = EINVAL;
 1258                                 break;
 1259                         }
 1260 #ifdef INET6
 1261                         error = udp_v4mapped_pktinfo(cm, &src, inp, flags);
 1262                         if (error != 0)
 1263                                 break;
 1264 #endif
 1265                         if (cm->cmsg_level != IPPROTO_IP)
 1266                                 continue;
 1267 
 1268                         switch (cm->cmsg_type) {
 1269                         case IP_SENDSRCADDR:
 1270                                 if (cm->cmsg_len !=
 1271                                     CMSG_LEN(sizeof(struct in_addr))) {
 1272                                         error = EINVAL;
 1273                                         break;
 1274                                 }
 1275                                 bzero(&src, sizeof(src));
 1276                                 src.sin_family = AF_INET;
 1277                                 src.sin_len = sizeof(src);
 1278                                 src.sin_port = inp->inp_lport;
 1279                                 src.sin_addr =
 1280                                     *(struct in_addr *)CMSG_DATA(cm);
 1281                                 break;
 1282 
 1283                         case IP_TOS:
 1284                                 if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) {
 1285                                         error = EINVAL;
 1286                                         break;
 1287                                 }
 1288                                 tos = *(u_char *)CMSG_DATA(cm);
 1289                                 break;
 1290 
 1291                         case IP_FLOWID:
 1292                                 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 1293                                         error = EINVAL;
 1294                                         break;
 1295                                 }
 1296                                 flowid = *(uint32_t *) CMSG_DATA(cm);
 1297                                 break;
 1298 
 1299                         case IP_FLOWTYPE:
 1300                                 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 1301                                         error = EINVAL;
 1302                                         break;
 1303                                 }
 1304                                 flowtype = *(uint32_t *) CMSG_DATA(cm);
 1305                                 break;
 1306 
 1307 #ifdef  RSS
 1308                         case IP_RSSBUCKETID:
 1309                                 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 1310                                         error = EINVAL;
 1311                                         break;
 1312                                 }
 1313                                 /* This is just a placeholder for now */
 1314                                 break;
 1315 #endif  /* RSS */
 1316                         default:
 1317                                 error = ENOPROTOOPT;
 1318                                 break;
 1319                         }
 1320                         if (error)
 1321                                 break;
 1322                 }
 1323                 m_freem(control);
 1324         }
 1325         if (error) {
 1326                 if (unlock_inp == UH_WLOCKED)
 1327                         INP_WUNLOCK(inp);
 1328                 else
 1329                         INP_RUNLOCK(inp);
 1330                 m_freem(m);
 1331                 return (error);
 1332         }
 1333 
 1334         /*
 1335          * In the old days, depending on whether or not the application had
 1336          * bound or connected the socket, we had to do varying levels of work.
 1337          * The optimal case was for a connected UDP socket, as a global lock
 1338          * wasn't required at all.
 1339          * In order to decide which we need, we required stability of the
 1340          * inpcb binding, which we ensured by acquiring a read lock on the
 1341          * inpcb.  This didn't strictly follow the lock order, so we played
 1342          * the trylock and retry game.
 1343          * With the re-introduction of the route-cache in some cases, we started
 1344          * to acquire an early inp wlock and a possible race during re-lock
 1345          * went away.  With the introduction of epoch(9) some read locking
 1346          * became epoch(9) and the lock-order issues also went away.
 1347          * Due to route-cache we may now hold more conservative locks than
 1348          * otherwise required and have split up the 2nd case in case 2 and 3
 1349          * in order to keep the udpinfo lock level in sync with the inp one
 1350          * for the IP_SENDSRCADDR case below.
 1351          */
 1352         pr = inp->inp_socket->so_proto->pr_protocol;
 1353         pcbinfo = udp_get_inpcbinfo(pr);
 1354         if (sin != NULL &&
 1355             (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
 1356                 INP_HASH_WLOCK(pcbinfo);
 1357                 unlock_udbinfo = UH_WLOCKED;
 1358         } else if (sin != NULL &&
 1359             (sin->sin_addr.s_addr == INADDR_ANY ||
 1360             sin->sin_addr.s_addr == INADDR_BROADCAST ||
 1361             inp->inp_laddr.s_addr == INADDR_ANY ||
 1362             inp->inp_lport == 0)) {
 1363                 INP_HASH_RLOCK_ET(pcbinfo, et);
 1364                 unlock_udbinfo = UH_RLOCKED;
 1365         } else if (src.sin_family == AF_INET) {
 1366                 if (unlock_inp == UH_WLOCKED) {
 1367                         INP_HASH_WLOCK(pcbinfo);
 1368                         unlock_udbinfo = UH_WLOCKED;
 1369                 } else {
 1370                         INP_HASH_RLOCK_ET(pcbinfo, et);
 1371                         unlock_udbinfo = UH_RLOCKED;
 1372                 }
 1373         } else
 1374                 unlock_udbinfo = UH_UNLOCKED;
 1375 
 1376         /*
 1377          * If the IP_SENDSRCADDR control message was specified, override the
 1378          * source address for this datagram.  Its use is invalidated if the
 1379          * address thus specified is incomplete or clobbers other inpcbs.
 1380          */
 1381         laddr = inp->inp_laddr;
 1382         lport = inp->inp_lport;
 1383         if (src.sin_family == AF_INET) {
 1384                 INP_HASH_LOCK_ASSERT(pcbinfo);
 1385                 if ((lport == 0) ||
 1386                     (laddr.s_addr == INADDR_ANY &&
 1387                      src.sin_addr.s_addr == INADDR_ANY)) {
 1388                         error = EINVAL;
 1389                         goto release;
 1390                 }
 1391                 error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
 1392                     &laddr.s_addr, &lport, td->td_ucred);
 1393                 if (error)
 1394                         goto release;
 1395         }
 1396 
 1397         /*
 1398          * If a UDP socket has been connected, then a local address/port will
 1399          * have been selected and bound.
 1400          *
 1401          * If a UDP socket has not been connected to, then an explicit
 1402          * destination address must be used, in which case a local
 1403          * address/port may not have been selected and bound.
 1404          */
 1405         if (sin != NULL) {
 1406                 INP_LOCK_ASSERT(inp);
 1407                 if (inp->inp_faddr.s_addr != INADDR_ANY) {
 1408                         error = EISCONN;
 1409                         goto release;
 1410                 }
 1411 
 1412                 /*
 1413                  * Jail may rewrite the destination address, so let it do
 1414                  * that before we use it.
 1415                  */
 1416                 error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 1417                 if (error)
 1418                         goto release;
 1419 
 1420                 /*
 1421                  * If a local address or port hasn't yet been selected, or if
 1422                  * the destination address needs to be rewritten due to using
 1423                  * a special INADDR_ constant, invoke in_pcbconnect_setup()
 1424                  * to do the heavy lifting.  Once a port is selected, we
 1425                  * commit the binding back to the socket; we also commit the
 1426                  * binding of the address if in jail.
 1427                  *
 1428                  * If we already have a valid binding and we're not
 1429                  * requesting a destination address rewrite, use a fast path.
 1430                  */
 1431                 if (inp->inp_laddr.s_addr == INADDR_ANY ||
 1432                     inp->inp_lport == 0 ||
 1433                     sin->sin_addr.s_addr == INADDR_ANY ||
 1434                     sin->sin_addr.s_addr == INADDR_BROADCAST) {
 1435                         INP_HASH_LOCK_ASSERT(pcbinfo);
 1436                         error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
 1437                             &lport, &faddr.s_addr, &fport, NULL,
 1438                             td->td_ucred);
 1439                         if (error)
 1440                                 goto release;
 1441 
 1442                         /*
 1443                          * XXXRW: Why not commit the port if the address is
 1444                          * !INADDR_ANY?
 1445                          */
 1446                         /* Commit the local port if newly assigned. */
 1447                         if (inp->inp_laddr.s_addr == INADDR_ANY &&
 1448                             inp->inp_lport == 0) {
 1449                                 INP_WLOCK_ASSERT(inp);
 1450                                 INP_HASH_WLOCK_ASSERT(pcbinfo);
 1451                                 /*
 1452                                  * Remember addr if jailed, to prevent
 1453                                  * rebinding.
 1454                                  */
 1455                                 if (prison_flag(td->td_ucred, PR_IP4))
 1456                                         inp->inp_laddr = laddr;
 1457                                 inp->inp_lport = lport;
 1458                                 if (in_pcbinshash(inp) != 0) {
 1459                                         inp->inp_lport = 0;
 1460                                         error = EAGAIN;
 1461                                         goto release;
 1462                                 }
 1463                                 inp->inp_flags |= INP_ANONPORT;
 1464                         }
 1465                 } else {
 1466                         faddr = sin->sin_addr;
 1467                         fport = sin->sin_port;
 1468                 }
 1469         } else {
 1470                 INP_LOCK_ASSERT(inp);
 1471                 faddr = inp->inp_faddr;
 1472                 fport = inp->inp_fport;
 1473                 if (faddr.s_addr == INADDR_ANY) {
 1474                         error = ENOTCONN;
 1475                         goto release;
 1476                 }
 1477         }
 1478 
 1479         /*
 1480          * Calculate data length and get a mbuf for UDP, IP, and possible
 1481          * link-layer headers.  Immediate slide the data pointer back forward
 1482          * since we won't use that space at this layer.
 1483          */
 1484         M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
 1485         if (m == NULL) {
 1486                 error = ENOBUFS;
 1487                 goto release;
 1488         }
 1489         m->m_data += max_linkhdr;
 1490         m->m_len -= max_linkhdr;
 1491         m->m_pkthdr.len -= max_linkhdr;
 1492 
 1493         /*
 1494          * Fill in mbuf with extended UDP header and addresses and length put
 1495          * into network format.
 1496          */
 1497         ui = mtod(m, struct udpiphdr *);
 1498         bzero(ui->ui_x1, sizeof(ui->ui_x1));    /* XXX still needed? */
 1499         ui->ui_v = IPVERSION << 4;
 1500         ui->ui_pr = pr;
 1501         ui->ui_src = laddr;
 1502         ui->ui_dst = faddr;
 1503         ui->ui_sport = lport;
 1504         ui->ui_dport = fport;
 1505         ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
 1506         if (pr == IPPROTO_UDPLITE) {
 1507                 struct udpcb *up;
 1508                 uint16_t plen;
 1509 
 1510                 up = intoudpcb(inp);
 1511                 cscov = up->u_txcslen;
 1512                 plen = (u_short)len + sizeof(struct udphdr);
 1513                 if (cscov >= plen)
 1514                         cscov = 0;
 1515                 ui->ui_len = htons(plen);
 1516                 ui->ui_ulen = htons(cscov);
 1517                 /*
 1518                  * For UDP-Lite, checksum coverage length of zero means
 1519                  * the entire UDPLite packet is covered by the checksum.
 1520                  */
 1521                 cscov_partial = (cscov == 0) ? 0 : 1;
 1522         }
 1523 
 1524         /*
 1525          * Set the Don't Fragment bit in the IP header.
 1526          */
 1527         if (inp->inp_flags & INP_DONTFRAG) {
 1528                 struct ip *ip;
 1529 
 1530                 ip = (struct ip *)&ui->ui_i;
 1531                 ip->ip_off |= htons(IP_DF);
 1532         }
 1533 
 1534         ipflags = 0;
 1535         if (inp->inp_socket->so_options & SO_DONTROUTE)
 1536                 ipflags |= IP_ROUTETOIF;
 1537         if (inp->inp_socket->so_options & SO_BROADCAST)
 1538                 ipflags |= IP_ALLOWBROADCAST;
 1539         if (inp->inp_flags & INP_ONESBCAST)
 1540                 ipflags |= IP_SENDONES;
 1541 
 1542 #ifdef MAC
 1543         mac_inpcb_create_mbuf(inp, m);
 1544 #endif
 1545 
 1546         /*
 1547          * Set up checksum and output datagram.
 1548          */
 1549         ui->ui_sum = 0;
 1550         if (pr == IPPROTO_UDPLITE) {
 1551                 if (inp->inp_flags & INP_ONESBCAST)
 1552                         faddr.s_addr = INADDR_BROADCAST;
 1553                 if (cscov_partial) {
 1554                         if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
 1555                                 ui->ui_sum = 0xffff;
 1556                 } else {
 1557                         if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
 1558                                 ui->ui_sum = 0xffff;
 1559                 }
 1560         } else if (V_udp_cksum) {
 1561                 if (inp->inp_flags & INP_ONESBCAST)
 1562                         faddr.s_addr = INADDR_BROADCAST;
 1563                 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
 1564                     htons((u_short)len + sizeof(struct udphdr) + pr));
 1565                 m->m_pkthdr.csum_flags = CSUM_UDP;
 1566                 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 1567         }
 1568         ((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
 1569         ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;    /* XXX */
 1570         ((struct ip *)ui)->ip_tos = tos;                /* XXX */
 1571         UDPSTAT_INC(udps_opackets);
 1572 
 1573         /*
 1574          * Setup flowid / RSS information for outbound socket.
 1575          *
 1576          * Once the UDP code decides to set a flowid some other way,
 1577          * this allows the flowid to be overridden by userland.
 1578          */
 1579         if (flowtype != M_HASHTYPE_NONE) {
 1580                 m->m_pkthdr.flowid = flowid;
 1581                 M_HASHTYPE_SET(m, flowtype);
 1582         }
 1583 #ifdef  RSS
 1584         else {
 1585                 uint32_t hash_val, hash_type;
 1586                 /*
 1587                  * Calculate an appropriate RSS hash for UDP and
 1588                  * UDP Lite.
 1589                  *
 1590                  * The called function will take care of figuring out
 1591                  * whether a 2-tuple or 4-tuple hash is required based
 1592                  * on the currently configured scheme.
 1593                  *
 1594                  * Later later on connected socket values should be
 1595                  * cached in the inpcb and reused, rather than constantly
 1596                  * re-calculating it.
 1597                  *
 1598                  * UDP Lite is a different protocol number and will
 1599                  * likely end up being hashed as a 2-tuple until
 1600                  * RSS / NICs grow UDP Lite protocol awareness.
 1601                  */
 1602                 if (rss_proto_software_hash_v4(faddr, laddr, fport, lport,
 1603                     pr, &hash_val, &hash_type) == 0) {
 1604                         m->m_pkthdr.flowid = hash_val;
 1605                         M_HASHTYPE_SET(m, hash_type);
 1606                 }
 1607         }
 1608 
 1609         /*
 1610          * Don't override with the inp cached flowid value.
 1611          *
 1612          * Depending upon the kind of send being done, the inp
 1613          * flowid/flowtype values may actually not be appropriate
 1614          * for this particular socket send.
 1615          *
 1616          * We should either leave the flowid at zero (which is what is
 1617          * currently done) or set it to some software generated
 1618          * hash value based on the packet contents.
 1619          */
 1620         ipflags |= IP_NODEFAULTFLOWID;
 1621 #endif  /* RSS */
 1622 
 1623         if (unlock_udbinfo == UH_WLOCKED)
 1624                 INP_HASH_WUNLOCK(pcbinfo);
 1625         else if (unlock_udbinfo == UH_RLOCKED)
 1626                 INP_HASH_RUNLOCK_ET(pcbinfo, et);
 1627         if (pr == IPPROTO_UDPLITE)
 1628                 UDPLITE_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
 1629         else
 1630                 UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
 1631         error = ip_output(m, inp->inp_options,
 1632             (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
 1633             inp->inp_moptions, inp);
 1634         if (unlock_inp == UH_WLOCKED)
 1635                 INP_WUNLOCK(inp);
 1636         else
 1637                 INP_RUNLOCK(inp);
 1638         return (error);
 1639 
 1640 release:
 1641         if (unlock_udbinfo == UH_WLOCKED) {
 1642                 KASSERT(unlock_inp == UH_WLOCKED,
 1643                     ("%s: excl udbinfo lock %#03x, shared inp lock %#03x, "
 1644                     "sin %p daddr %#010x inp %p laddr %#010x lport %#06x "
 1645                     "src fam %#04x",
 1646                     __func__, unlock_udbinfo, unlock_inp, sin,
 1647                     (sin != NULL) ? sin->sin_addr.s_addr : 0xfefefefe, inp,
 1648                     inp->inp_laddr.s_addr, inp->inp_lport, src.sin_family));
 1649                 INP_HASH_WUNLOCK(pcbinfo);
 1650                 INP_WUNLOCK(inp);
 1651         } else if (unlock_udbinfo == UH_RLOCKED) {
 1652                 KASSERT(unlock_inp == UH_RLOCKED,
 1653                     ("%s: shared udbinfo lock %#03x, excl inp lock %#03x, "
 1654                     "sin %p daddr %#010x inp %p laddr %#010x lport %#06x "
 1655                     "src fam %#04x",
 1656                     __func__, unlock_udbinfo, unlock_inp, sin,
 1657                     (sin != NULL) ? sin->sin_addr.s_addr : 0xfefefefe, inp,
 1658                     inp->inp_laddr.s_addr, inp->inp_lport, src.sin_family));
 1659                 INP_HASH_RUNLOCK_ET(pcbinfo, et);
 1660                 INP_RUNLOCK(inp);
 1661         } else if (unlock_inp == UH_WLOCKED)
 1662                 INP_WUNLOCK(inp);
 1663         else
 1664                 INP_RUNLOCK(inp);
 1665         m_freem(m);
 1666         return (error);
 1667 }
 1668 
 1669 static void
 1670 udp_abort(struct socket *so)
 1671 {
 1672         struct inpcb *inp;
 1673         struct inpcbinfo *pcbinfo;
 1674 
 1675         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 1676         inp = sotoinpcb(so);
 1677         KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
 1678         INP_WLOCK(inp);
 1679         if (inp->inp_faddr.s_addr != INADDR_ANY) {
 1680                 INP_HASH_WLOCK(pcbinfo);
 1681                 in_pcbdisconnect(inp);
 1682                 inp->inp_laddr.s_addr = INADDR_ANY;
 1683                 INP_HASH_WUNLOCK(pcbinfo);
 1684                 soisdisconnected(so);
 1685         }
 1686         INP_WUNLOCK(inp);
 1687 }
 1688 
 1689 static int
 1690 udp_attach(struct socket *so, int proto, struct thread *td)
 1691 {
 1692         static uint32_t udp_flowid;
 1693         struct inpcb *inp;
 1694         struct inpcbinfo *pcbinfo;
 1695         int error;
 1696 
 1697         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 1698         inp = sotoinpcb(so);
 1699         KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
 1700         error = soreserve(so, udp_sendspace, udp_recvspace);
 1701         if (error)
 1702                 return (error);
 1703         INP_INFO_WLOCK(pcbinfo);
 1704         error = in_pcballoc(so, pcbinfo);
 1705         if (error) {
 1706                 INP_INFO_WUNLOCK(pcbinfo);
 1707                 return (error);
 1708         }
 1709 
 1710         inp = sotoinpcb(so);
 1711         inp->inp_vflag |= INP_IPV4;
 1712         inp->inp_ip_ttl = V_ip_defttl;
 1713         inp->inp_flowid = atomic_fetchadd_int(&udp_flowid, 1);
 1714         inp->inp_flowtype = M_HASHTYPE_OPAQUE;
 1715 
 1716         error = udp_newudpcb(inp);
 1717         if (error) {
 1718                 in_pcbdetach(inp);
 1719                 in_pcbfree(inp);
 1720                 INP_INFO_WUNLOCK(pcbinfo);
 1721                 return (error);
 1722         }
 1723 
 1724         INP_WUNLOCK(inp);
 1725         INP_INFO_WUNLOCK(pcbinfo);
 1726         return (0);
 1727 }
 1728 #endif /* INET */
 1729 
 1730 int
 1731 udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t i, void *ctx)
 1732 {
 1733         struct inpcb *inp;
 1734         struct udpcb *up;
 1735 
 1736         KASSERT(so->so_type == SOCK_DGRAM,
 1737             ("udp_set_kernel_tunneling: !dgram"));
 1738         inp = sotoinpcb(so);
 1739         KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
 1740         INP_WLOCK(inp);
 1741         up = intoudpcb(inp);
 1742         if ((f != NULL || i != NULL) && ((up->u_tun_func != NULL) ||
 1743             (up->u_icmp_func != NULL))) {
 1744                 INP_WUNLOCK(inp);
 1745                 return (EBUSY);
 1746         }
 1747         up->u_tun_func = f;
 1748         up->u_icmp_func = i;
 1749         up->u_tun_ctx = ctx;
 1750         INP_WUNLOCK(inp);
 1751         return (0);
 1752 }
 1753 
 1754 #ifdef INET
 1755 static int
 1756 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 1757 {
 1758         struct inpcb *inp;
 1759         struct inpcbinfo *pcbinfo;
 1760         int error;
 1761 
 1762         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 1763         inp = sotoinpcb(so);
 1764         KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
 1765         INP_WLOCK(inp);
 1766         INP_HASH_WLOCK(pcbinfo);
 1767         error = in_pcbbind(inp, nam, td->td_ucred);
 1768         INP_HASH_WUNLOCK(pcbinfo);
 1769         INP_WUNLOCK(inp);
 1770         return (error);
 1771 }
 1772 
 1773 static void
 1774 udp_close(struct socket *so)
 1775 {
 1776         struct inpcb *inp;
 1777         struct inpcbinfo *pcbinfo;
 1778 
 1779         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 1780         inp = sotoinpcb(so);
 1781         KASSERT(inp != NULL, ("udp_close: inp == NULL"));
 1782         INP_WLOCK(inp);
 1783         if (inp->inp_faddr.s_addr != INADDR_ANY) {
 1784                 INP_HASH_WLOCK(pcbinfo);
 1785                 in_pcbdisconnect(inp);
 1786                 inp->inp_laddr.s_addr = INADDR_ANY;
 1787                 INP_HASH_WUNLOCK(pcbinfo);
 1788                 soisdisconnected(so);
 1789         }
 1790         INP_WUNLOCK(inp);
 1791 }
 1792 
 1793 static int
 1794 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 1795 {
 1796         struct inpcb *inp;
 1797         struct inpcbinfo *pcbinfo;
 1798         struct sockaddr_in *sin;
 1799         int error;
 1800 
 1801         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 1802         inp = sotoinpcb(so);
 1803         KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
 1804         INP_WLOCK(inp);
 1805         if (inp->inp_faddr.s_addr != INADDR_ANY) {
 1806                 INP_WUNLOCK(inp);
 1807                 return (EISCONN);
 1808         }
 1809         sin = (struct sockaddr_in *)nam;
 1810         error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 1811         if (error != 0) {
 1812                 INP_WUNLOCK(inp);
 1813                 return (error);
 1814         }
 1815         INP_HASH_WLOCK(pcbinfo);
 1816         error = in_pcbconnect(inp, nam, td->td_ucred);
 1817         INP_HASH_WUNLOCK(pcbinfo);
 1818         if (error == 0)
 1819                 soisconnected(so);
 1820         INP_WUNLOCK(inp);
 1821         return (error);
 1822 }
 1823 
 1824 static void
 1825 udp_detach(struct socket *so)
 1826 {
 1827         struct inpcb *inp;
 1828         struct inpcbinfo *pcbinfo;
 1829         struct udpcb *up;
 1830 
 1831         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 1832         inp = sotoinpcb(so);
 1833         KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
 1834         KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
 1835             ("udp_detach: not disconnected"));
 1836         INP_INFO_WLOCK(pcbinfo);
 1837         INP_WLOCK(inp);
 1838         up = intoudpcb(inp);
 1839         KASSERT(up != NULL, ("%s: up == NULL", __func__));
 1840         inp->inp_ppcb = NULL;
 1841         in_pcbdetach(inp);
 1842         in_pcbfree(inp);
 1843         INP_INFO_WUNLOCK(pcbinfo);
 1844         udp_discardcb(up);
 1845 }
 1846 
 1847 static int
 1848 udp_disconnect(struct socket *so)
 1849 {
 1850         struct inpcb *inp;
 1851         struct inpcbinfo *pcbinfo;
 1852 
 1853         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 1854         inp = sotoinpcb(so);
 1855         KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
 1856         INP_WLOCK(inp);
 1857         if (inp->inp_faddr.s_addr == INADDR_ANY) {
 1858                 INP_WUNLOCK(inp);
 1859                 return (ENOTCONN);
 1860         }
 1861         INP_HASH_WLOCK(pcbinfo);
 1862         in_pcbdisconnect(inp);
 1863         inp->inp_laddr.s_addr = INADDR_ANY;
 1864         INP_HASH_WUNLOCK(pcbinfo);
 1865         SOCK_LOCK(so);
 1866         so->so_state &= ~SS_ISCONNECTED;                /* XXX */
 1867         SOCK_UNLOCK(so);
 1868         INP_WUNLOCK(inp);
 1869         return (0);
 1870 }
 1871 
 1872 static int
 1873 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 1874     struct mbuf *control, struct thread *td)
 1875 {
 1876         struct inpcb *inp;
 1877 
 1878         inp = sotoinpcb(so);
 1879         KASSERT(inp != NULL, ("udp_send: inp == NULL"));
 1880         return (udp_output(inp, m, addr, control, td, flags));
 1881 }
 1882 #endif /* INET */
 1883 
 1884 int
 1885 udp_shutdown(struct socket *so)
 1886 {
 1887         struct inpcb *inp;
 1888 
 1889         inp = sotoinpcb(so);
 1890         KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
 1891         INP_WLOCK(inp);
 1892         socantsendmore(so);
 1893         INP_WUNLOCK(inp);
 1894         return (0);
 1895 }
 1896 
 1897 #ifdef INET
 1898 struct pr_usrreqs udp_usrreqs = {
 1899         .pru_abort =            udp_abort,
 1900         .pru_attach =           udp_attach,
 1901         .pru_bind =             udp_bind,
 1902         .pru_connect =          udp_connect,
 1903         .pru_control =          in_control,
 1904         .pru_detach =           udp_detach,
 1905         .pru_disconnect =       udp_disconnect,
 1906         .pru_peeraddr =         in_getpeeraddr,
 1907         .pru_send =             udp_send,
 1908         .pru_soreceive =        soreceive_dgram,
 1909         .pru_sosend =           sosend_dgram,
 1910         .pru_shutdown =         udp_shutdown,
 1911         .pru_sockaddr =         in_getsockaddr,
 1912         .pru_sosetlabel =       in_pcbsosetlabel,
 1913         .pru_close =            udp_close,
 1914 };
 1915 #endif /* INET */

Cache object: defe738adc5499f34e94ad898454eaae


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.