The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/bsd/netinet/in_pcb.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
    3  *
    4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
    5  * 
    6  * This file contains Original Code and/or Modifications of Original Code
    7  * as defined in and that are subject to the Apple Public Source License
    8  * Version 2.0 (the 'License'). You may not use this file except in
    9  * compliance with the License. The rights granted to you under the License
   10  * may not be used to create, or enable the creation or redistribution of,
   11  * unlawful or unlicensed copies of an Apple operating system, or to
   12  * circumvent, violate, or enable the circumvention or violation of, any
   13  * terms of an Apple operating system software license agreement.
   14  * 
   15  * Please obtain a copy of the License at
   16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
   17  * 
   18  * The Original Code and all software distributed under the License are
   19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   23  * Please see the License for the specific language governing rights and
   24  * limitations under the License.
   25  * 
   26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   27  */
   28 /*
   29  * Copyright (c) 1982, 1986, 1991, 1993, 1995
   30  *      The Regents of the University of California.  All rights reserved.
   31  *
   32  * Redistribution and use in source and binary forms, with or without
   33  * modification, are permitted provided that the following conditions
   34  * are met:
   35  * 1. Redistributions of source code must retain the above copyright
   36  *    notice, this list of conditions and the following disclaimer.
   37  * 2. Redistributions in binary form must reproduce the above copyright
   38  *    notice, this list of conditions and the following disclaimer in the
   39  *    documentation and/or other materials provided with the distribution.
   40  * 3. All advertising materials mentioning features or use of this software
   41  *    must display the following acknowledgement:
   42  *      This product includes software developed by the University of
   43  *      California, Berkeley and its contributors.
   44  * 4. Neither the name of the University nor the names of its contributors
   45  *    may be used to endorse or promote products derived from this software
   46  *    without specific prior written permission.
   47  *
   48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   58  * SUCH DAMAGE.
   59  *
   60  *      @(#)in_pcb.c    8.4 (Berkeley) 5/24/95
   61  * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
   62  */
   63 
   64 #include <sys/param.h>
   65 #include <sys/systm.h>
   66 #include <sys/malloc.h>
   67 #include <sys/mbuf.h>
   68 #include <sys/domain.h>
   69 #include <sys/protosw.h>
   70 #include <sys/socket.h>
   71 #include <sys/socketvar.h>
   72 #include <sys/proc.h>
   73 #ifndef __APPLE__
   74 #include <sys/jail.h>
   75 #endif
   76 #include <sys/kernel.h>
   77 #include <sys/sysctl.h>
   78 #include <sys/mcache.h>
   79 #include <sys/kauth.h>
   80 #include <sys/priv.h>
   81 #include <libkern/OSAtomic.h>
   82 
   83 #include <machine/limits.h>
   84 
   85 #ifdef __APPLE__
   86 #include <kern/zalloc.h>
   87 #endif
   88 
   89 #include <net/if.h>
   90 #include <net/if_types.h>
   91 #include <net/route.h>
   92 
   93 #include <netinet/in.h>
   94 #include <netinet/in_pcb.h>
   95 #include <netinet/in_var.h>
   96 #include <netinet/ip_var.h>
   97 #if INET6
   98 #include <netinet/ip6.h>
   99 #include <netinet6/ip6_var.h>
  100 #endif /* INET6 */
  101 
  102 #include "faith.h"
  103 
  104 #if IPSEC
  105 #include <netinet6/ipsec.h>
  106 #include <netkey/key.h>
  107 #endif /* IPSEC */
  108 
  109 #include <sys/kdebug.h>
  110 #include <sys/random.h>
  111 
  112 #if IPSEC
  113 extern int ipsec_bypass;
  114 #endif
  115 
  116 #define DBG_FNC_PCB_LOOKUP      NETDBG_CODE(DBG_NETTCP, (6 << 8))
  117 #define DBG_FNC_PCB_HLOOKUP     NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
  118 
  119 struct  in_addr zeroin_addr;
  120 
  121 /*
  122  * These configure the range of local port addresses assigned to
  123  * "unspecified" outgoing connections/packets/whatever.
  124  */
  125 int     ipport_lowfirstauto  = IPPORT_RESERVED - 1;     /* 1023 */
  126 int     ipport_lowlastauto = IPPORT_RESERVEDSTART;      /* 600 */
  127 #ifndef __APPLE__
  128 int     ipport_firstauto = IPPORT_RESERVED;             /* 1024 */
  129 int     ipport_lastauto  = IPPORT_USERRESERVED;         /* 5000 */
  130 #else
  131 int     ipport_firstauto = IPPORT_HIFIRSTAUTO;          /* 49152 */
  132 int     ipport_lastauto  = IPPORT_HILASTAUTO;           /* 65535 */
  133 #endif
  134 int     ipport_hifirstauto = IPPORT_HIFIRSTAUTO;        /* 49152 */
  135 int     ipport_hilastauto  = IPPORT_HILASTAUTO;         /* 65535 */
  136 
  137 #define RANGECHK(var, min, max) \
  138         if ((var) < (min)) { (var) = (min); } \
  139         else if ((var) > (max)) { (var) = (max); }
  140 
  141 static int
  142 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
  143 {
  144 #pragma unused(arg1, arg2)
  145         int error = sysctl_handle_int(oidp,
  146                 oidp->oid_arg1, oidp->oid_arg2, req);
  147         if (!error) {
  148                 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
  149                 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
  150                 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
  151                 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
  152                 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
  153                 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
  154         }
  155         return error;
  156 }
  157 
  158 #undef RANGECHK
  159 
  160 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
  161 
  162 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
  163            &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
  164 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
  165            &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
  166 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
  167            &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
  168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
  169            &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
  170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
  171            &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
  172 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
  173            &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
  174 
  175 extern int      udp_use_randomport;
  176 extern int      tcp_use_randomport;
  177 
  178 /*
  179  * in_pcb.c: manage the Protocol Control Blocks.
  180  *
  181  * NOTE: It is assumed that most of these functions will be called at
  182  * splnet(). XXX - There are, unfortunately, a few exceptions to this
  183  * rule that should be fixed.
  184  */
  185 
  186 /*
  187  * Allocate a PCB and associate it with the socket.
  188  *
  189  * Returns:     0                       Success
  190  *              ENOBUFS
  191  *              ENOMEM
  192  *      ipsec_init_policy:???           [IPSEC]
  193  */
  194 int
  195 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *p)
  196 {
  197         struct inpcb *inp;
  198         caddr_t               temp;
  199 #if IPSEC
  200 #ifndef __APPLE__
  201         int error;
  202 #endif
  203 #endif
  204 #if CONFIG_MACF_NET
  205         int mac_error;
  206 #endif
  207 
  208         if (so->cached_in_sock_layer == 0) {
  209 #if TEMPDEBUG
  210             printf("PCBALLOC calling zalloc for socket %x\n", so);
  211 #endif
  212             inp = (struct inpcb *) zalloc(pcbinfo->ipi_zone);
  213             if (inp == NULL)
  214                  return (ENOBUFS);
  215             bzero((caddr_t)inp, sizeof(*inp));
  216         }
  217         else {
  218 #if TEMPDEBUG
  219             printf("PCBALLOC reusing PCB for socket %x\n", so);
  220 #endif
  221             inp = (struct inpcb *) so->so_saved_pcb;
  222             temp = inp->inp_saved_ppcb;
  223             bzero((caddr_t) inp, sizeof(*inp));
  224             inp->inp_saved_ppcb = temp;
  225         }
  226 
  227         inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
  228         inp->inp_pcbinfo = pcbinfo;
  229         inp->inp_socket = so;
  230 #if CONFIG_MACF_NET
  231         mac_error = mac_inpcb_label_init(inp, M_WAITOK);
  232         if (mac_error != 0) {
  233                 if (so->cached_in_sock_layer == 0)
  234                         zfree(pcbinfo->ipi_zone, inp);
  235                 return (mac_error);
  236         }
  237         mac_inpcb_label_associate(so, inp);
  238 #endif
  239         // make sure inp_stat is always 64bit aligned
  240         inp->inp_stat = (struct inp_stat*)P2ROUNDUP(inp->inp_stat_store, sizeof(u_int64_t));
  241         if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store)
  242                 + sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
  243                 panic("insufficient space to align inp_stat");
  244         }
  245 
  246         so->so_pcb = (caddr_t)inp;
  247 
  248         if (so->so_proto->pr_flags & PR_PCBLOCK) {
  249                 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->mtx_grp, pcbinfo->mtx_attr);
  250         }
  251 
  252 #if IPSEC
  253 #ifndef __APPLE__
  254         if (ipsec_bypass == 0) {
  255                 error = ipsec_init_policy(so, &inp->inp_sp);
  256                 if (error != 0) {
  257                         zfree(pcbinfo->ipi_zone, inp);
  258                         return error;
  259                 }
  260         }
  261 #endif
  262 #endif /*IPSEC*/
  263 #if INET6
  264         if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on)
  265                 inp->inp_flags |= IN6P_IPV6_V6ONLY;
  266 #endif
  267         
  268 #if INET6
  269         if (ip6_auto_flowlabel)
  270                 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
  271 #endif
  272         lck_rw_lock_exclusive(pcbinfo->mtx);
  273         inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
  274         LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
  275         pcbinfo->ipi_count++;
  276         lck_rw_done(pcbinfo->mtx);
  277         return (0);
  278 }
  279 
  280 
  281 /*
  282   in_pcblookup_local_and_cleanup does everything
  283   in_pcblookup_local does but it checks for a socket
  284   that's going away. Since we know that the lock is
  285   held read+write when this funciton is called, we
  286   can safely dispose of this socket like the slow
  287   timer would usually do and return NULL. This is
  288   great for bind.
  289 */
  290 struct inpcb*
  291 in_pcblookup_local_and_cleanup(
  292         struct inpcbinfo *pcbinfo,
  293         struct in_addr laddr,
  294         u_int lport_arg,
  295         int wild_okay)
  296 {
  297         struct inpcb *inp;
  298         
  299         /* Perform normal lookup */
  300         inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
  301         
  302         /* Check if we found a match but it's waiting to be disposed */
  303         if (inp && inp->inp_wantcnt == WNT_STOPUSING) {
  304                 struct socket *so = inp->inp_socket;
  305                 
  306                 lck_mtx_lock(&inp->inpcb_mtx);
  307                 
  308                 if (so->so_usecount == 0) {
  309                         if (inp->inp_state != INPCB_STATE_DEAD)
  310                                 in_pcbdetach(inp);
  311                         in_pcbdispose(inp);
  312                         inp = NULL;
  313                 }
  314                 else {
  315                         lck_mtx_unlock(&inp->inpcb_mtx);
  316                 }
  317         }
  318         
  319         return inp;
  320 }
  321 
  322 #ifdef __APPLE_API_PRIVATE
  323 static void
  324 in_pcb_conflict_post_msg(u_int16_t port)
  325 {
  326         /* 
  327          * Radar 5523020 send a kernel event notification if a non-participating socket tries to bind
  328          *               the port a socket who has set SOF_NOTIFYCONFLICT owns.
  329          */
  330         struct kev_msg        ev_msg;
  331         struct kev_in_portinuse in_portinuse;
  332 
  333         bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
  334         bzero(&ev_msg, sizeof(struct kev_msg));
  335         in_portinuse.port = ntohs(port);        /* port in host order */
  336         in_portinuse.req_pid = proc_selfpid();
  337         ev_msg.vendor_code = KEV_VENDOR_APPLE;
  338         ev_msg.kev_class = KEV_NETWORK_CLASS;
  339         ev_msg.kev_subclass = KEV_INET_SUBCLASS;
  340         ev_msg.event_code = KEV_INET_PORTINUSE;
  341         ev_msg.dv[0].data_ptr = &in_portinuse;
  342         ev_msg.dv[0].data_length      = sizeof(struct kev_in_portinuse);
  343         ev_msg.dv[1].data_length = 0;
  344         kev_post_msg(&ev_msg);
  345 }
  346 #endif
  347 /*
  348  * Returns:     0                       Success
  349  *              EADDRNOTAVAIL           Address not available.
  350  *              EINVAL                  Invalid argument
  351  *              EAFNOSUPPORT            Address family not supported [notdef]
  352  *              EACCES                  Permission denied
  353  *              EADDRINUSE              Address in use
  354  *              EAGAIN                  Resource unavailable, try again
  355  *              priv_check_cred:EPERM   Operation not permitted
  356  */
  357 int
  358 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
  359 {
  360         struct socket *so = inp->inp_socket;
  361         unsigned short *lastport;
  362         struct sockaddr_in *sin;
  363         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
  364         u_short lport = 0, rand_port = 0;
  365         int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
  366         int error, randomport, conflict = 0;
  367         kauth_cred_t cred;
  368 
  369         if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
  370                 return (EADDRNOTAVAIL);
  371         if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
  372                 return (EINVAL);
  373         if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
  374                 wild = 1;
  375         socket_unlock(so, 0); /* keep reference on socket */
  376         lck_rw_lock_exclusive(pcbinfo->mtx);
  377         if (nam) {
  378                 unsigned int outif = 0;
  379 
  380                 sin = (struct sockaddr_in *)nam;
  381                 if (nam->sa_len != sizeof (*sin)) {
  382                         lck_rw_done(pcbinfo->mtx);
  383                         socket_lock(so, 0);
  384                         return (EINVAL);
  385                 }
  386 #ifdef notdef
  387                 /*
  388                  * We should check the family, but old programs
  389                  * incorrectly fail to initialize it.
  390                  */
  391                 if (sin->sin_family != AF_INET) {
  392                         lck_rw_done(pcbinfo->mtx);
  393                         socket_lock(so, 0);
  394                         return (EAFNOSUPPORT);
  395                 }
  396 #endif
  397                 lport = sin->sin_port;
  398                 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
  399                         /*
  400                          * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
  401                          * allow complete duplication of binding if
  402                          * SO_REUSEPORT is set, or if SO_REUSEADDR is set
  403                          * and a multicast address is bound on both
  404                          * new and duplicated sockets.
  405                          */
  406                         if (so->so_options & SO_REUSEADDR)
  407                                 reuseport = SO_REUSEADDR|SO_REUSEPORT;
  408                 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
  409                         struct ifaddr *ifa;
  410                         sin->sin_port = 0;              /* yech... */
  411                         if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) {
  412                                 lck_rw_done(pcbinfo->mtx);
  413                                 socket_lock(so, 0);
  414                                 return (EADDRNOTAVAIL);
  415                         }
  416                         else {
  417                                 IFA_LOCK(ifa);
  418                                 outif = ifa->ifa_ifp->if_index;
  419                                 IFA_UNLOCK(ifa);
  420                                 IFA_REMREF(ifa);
  421                         }
  422                 }
  423                 if (lport) {
  424                         struct inpcb *t;
  425 
  426                         /* GROSS */
  427 #if !CONFIG_EMBEDDED
  428                         if (ntohs(lport) < IPPORT_RESERVED) {
  429                                 cred = kauth_cred_proc_ref(p);
  430                                 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
  431                                 kauth_cred_unref(&cred);
  432                                 if (error != 0) {
  433                                         lck_rw_done(pcbinfo->mtx);
  434                                         socket_lock(so, 0);
  435                                         return (EACCES);
  436                                 }
  437                         }
  438 #endif
  439                         if (so->so_uid &&
  440                             !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
  441                                 t = in_pcblookup_local_and_cleanup(inp->inp_pcbinfo,
  442                                     sin->sin_addr, lport, INPLOOKUP_WILDCARD);
  443                                 if (t &&
  444                                     (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
  445                                      ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
  446                                      (t->inp_socket->so_options &
  447                                          SO_REUSEPORT) == 0) &&
  448                                      (so->so_uid != t->inp_socket->so_uid) &&
  449                                       ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0)) {
  450 #if INET6
  451                                         if (ntohl(sin->sin_addr.s_addr) !=
  452                                             INADDR_ANY ||
  453                                             ntohl(t->inp_laddr.s_addr) !=
  454                                             INADDR_ANY ||
  455                                             INP_SOCKAF(so) ==
  456                                             INP_SOCKAF(t->inp_socket))
  457 #endif /* INET6 */
  458                                         {
  459 #ifdef __APPLE_API_PRIVATE
  460 
  461                                                 if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0)) 
  462                                                         conflict = 1;
  463 
  464                                                 lck_rw_done(pcbinfo->mtx);
  465 
  466                                                 if (conflict)
  467                                                         in_pcb_conflict_post_msg(lport);
  468 #else
  469                                                 lck_rw_done(pcbinfo->mtx);
  470 #endif /* __APPLE_API_PRIVATE */
  471 
  472                                                 socket_lock(so, 0);
  473                                                 return (EADDRINUSE);
  474                                         }
  475                                 }
  476                         }
  477                         t = in_pcblookup_local_and_cleanup(pcbinfo, sin->sin_addr,
  478                             lport, wild);
  479                         if (t &&
  480                             (reuseport & t->inp_socket->so_options) == 0) {
  481 #if INET6
  482                                 if (ip6_mapped_addr_on == 0 ||
  483                                     ntohl(sin->sin_addr.s_addr) !=
  484                                     INADDR_ANY ||
  485                                     ntohl(t->inp_laddr.s_addr) !=
  486                                     INADDR_ANY ||
  487                                     INP_SOCKAF(so) ==
  488                                     INP_SOCKAF(t->inp_socket))
  489 #endif /* INET6 */
  490                                 {
  491 #ifdef __APPLE_API_PRIVATE
  492 
  493                                         if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0)) 
  494                                                 conflict = 1;
  495 
  496                                         lck_rw_done(pcbinfo->mtx);
  497 
  498                                         if (conflict)
  499                                                 in_pcb_conflict_post_msg(lport);
  500 #else
  501                                         lck_rw_done(pcbinfo->mtx);
  502 #endif /* __APPLE_API_PRIVATE */
  503                                         socket_lock(so, 0);
  504                                         return (EADDRINUSE);
  505                                 }
  506                         }
  507                 }
  508                 inp->inp_laddr = sin->sin_addr;
  509                 inp->inp_last_outif = outif;
  510         }
  511         if (lport == 0) {
  512                 u_short first, last;
  513                 int count;
  514 
  515                 randomport = (so->so_flags & SOF_BINDRANDOMPORT) || 
  516                         (so->so_type == SOCK_STREAM ? tcp_use_randomport : udp_use_randomport);
  517 
  518                 inp->inp_flags |= INP_ANONPORT;
  519 
  520                 if (inp->inp_flags & INP_HIGHPORT) {
  521                         first = ipport_hifirstauto;     /* sysctl */
  522                         last  = ipport_hilastauto;
  523                         lastport = &pcbinfo->lasthi;
  524                 } else if (inp->inp_flags & INP_LOWPORT) {
  525                         cred = kauth_cred_proc_ref(p);
  526                         error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
  527                         kauth_cred_unref(&cred);
  528                         if (error != 0) {
  529                                 lck_rw_done(pcbinfo->mtx);
  530                                 socket_lock(so, 0);
  531                                 return error;
  532                         }
  533                         first = ipport_lowfirstauto;    /* 1023 */
  534                         last  = ipport_lowlastauto;     /* 600 */
  535                         lastport = &pcbinfo->lastlow;
  536                 } else {
  537                         first = ipport_firstauto;       /* sysctl */
  538                         last  = ipport_lastauto;
  539                         lastport = &pcbinfo->lastport;
  540                 }
  541                 /* No point in randomizing if only one port is available */
  542 
  543                 if (first == last)
  544                         randomport = 0; 
  545                 /*
  546                  * Simple check to ensure all ports are not used up causing
  547                  * a deadlock here.
  548                  *
  549                  * We split the two cases (up and down) so that the direction
  550                  * is not being tested on each round of the loop.
  551                  */
  552                 if (first > last) {
  553                         /*
  554                          * counting down
  555                          */
  556                         if (randomport) {
  557                                 read_random(&rand_port, sizeof(rand_port));
  558                                 *lastport = first - (rand_port % (first - last));
  559                         }
  560                         count = first - last;
  561 
  562                         do {
  563                                 if (count-- < 0) {      /* completely used? */
  564                                         lck_rw_done(pcbinfo->mtx);
  565                                         socket_lock(so, 0);
  566                                         inp->inp_laddr.s_addr = INADDR_ANY;
  567                                         inp->inp_last_outif = 0;
  568                                         return (EADDRNOTAVAIL);
  569                                 }
  570                                 --*lastport;
  571                                 if (*lastport > first || *lastport < last)
  572                                         *lastport = first;
  573                                 lport = htons(*lastport);
  574                         } while (in_pcblookup_local_and_cleanup(pcbinfo,
  575                                  inp->inp_laddr, lport, wild));
  576                 } else {
  577                         /*
  578                          * counting up
  579                          */
  580                         if (randomport) {
  581                                 read_random(&rand_port, sizeof(rand_port));
  582                                 *lastport = first + (rand_port % (first - last));
  583                         }
  584                         count = last - first;
  585 
  586                         do {
  587                                 if (count-- < 0) {      /* completely used? */
  588                                         lck_rw_done(pcbinfo->mtx);
  589                                         socket_lock(so, 0);
  590                                         inp->inp_laddr.s_addr = INADDR_ANY;
  591                                         inp->inp_last_outif = 0;
  592                                         return (EADDRNOTAVAIL);
  593                                 }
  594                                 ++*lastport;
  595                                 if (*lastport < first || *lastport > last)
  596                                         *lastport = first;
  597                                 lport = htons(*lastport);
  598                         } while (in_pcblookup_local_and_cleanup(pcbinfo,
  599                                  inp->inp_laddr, lport, wild));
  600                 }
  601         }
  602         socket_lock(so, 0);
  603         inp->inp_lport = lport;
  604         if (in_pcbinshash(inp, 1) != 0) {
  605                 inp->inp_laddr.s_addr = INADDR_ANY;
  606                 inp->inp_lport = 0;
  607                 inp->inp_last_outif = 0;
  608                 lck_rw_done(pcbinfo->mtx);
  609                 return (EAGAIN);
  610         }
  611         lck_rw_done(pcbinfo->mtx);
  612         sflt_notify(so, sock_evt_bound, NULL);
  613         return (0);
  614 }
  615 
  616 /*
  617  *   Transform old in_pcbconnect() into an inner subroutine for new
  618  *   in_pcbconnect(): Do some validity-checking on the remote
  619  *   address (in mbuf 'nam') and then determine local host address
  620  *   (i.e., which interface) to use to access that remote host.
  621  *
  622  *   This preserves definition of in_pcbconnect(), while supporting a
  623  *   slightly different version for T/TCP.  (This is more than
  624  *   a bit of a kludge, but cleaning up the internal interfaces would
  625  *   have forced minor changes in every protocol).
  626  *
  627  * Returns:     0                       Success
  628  *              EINVAL                  Invalid argument
  629  *              EAFNOSUPPORT            Address family not supported
  630  *              EADDRNOTAVAIL           Address not available
  631  */
  632 int
  633 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
  634     struct sockaddr_in *plocal_sin, unsigned int *out_ifscope)
  635 {
  636         struct in_ifaddr *ia;
  637         struct sockaddr_in *sin = (struct sockaddr_in *)nam;
  638 
  639         if (nam->sa_len != sizeof (*sin))
  640                 return (EINVAL);
  641         if (sin->sin_family != AF_INET)
  642                 return (EAFNOSUPPORT);
  643         if (sin->sin_port == 0)
  644                 return (EADDRNOTAVAIL);
  645 
  646         lck_rw_lock_shared(in_ifaddr_rwlock);
  647         if (!TAILQ_EMPTY(&in_ifaddrhead)) {
  648                 ia = TAILQ_FIRST(&in_ifaddrhead);
  649                 /*
  650                  * If the destination address is INADDR_ANY,
  651                  * use the primary local address.
  652                  * If the supplied address is INADDR_BROADCAST,
  653                  * and the primary interface supports broadcast,
  654                  * choose the broadcast address for that interface.
  655                  */
  656 #define satosin(sa)     ((struct sockaddr_in *)(sa))
  657 #define sintosa(sin)    ((struct sockaddr *)(sin))
  658 #define ifatoia(ifa)    ((struct in_ifaddr *)(ifa))
  659                 IFA_LOCK_SPIN(&ia->ia_ifa);
  660                 if (sin->sin_addr.s_addr == INADDR_ANY)
  661                         sin->sin_addr = IA_SIN(ia)->sin_addr;
  662                 else if (sin->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST &&
  663                     (ia->ia_ifp->if_flags & IFF_BROADCAST))
  664                         sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr;
  665                 IFA_UNLOCK(&ia->ia_ifa);
  666                 ia = NULL;
  667         }
  668         lck_rw_done(in_ifaddr_rwlock);
  669 
  670         if (inp->inp_laddr.s_addr == INADDR_ANY) {
  671                 struct route *ro;
  672                 unsigned int ifscope = IFSCOPE_NONE;
  673                 unsigned int nocell;
  674                 /*
  675                  * If the socket is bound to a specifc interface, the
  676                   * optional scoped takes precedence over that if it
  677                   * is set by the caller.
  678                  */
  679                 ia = (struct in_ifaddr *)0;
  680 
  681                 if (out_ifscope != NULL && *out_ifscope != IFSCOPE_NONE)
  682                         ifscope = *out_ifscope;
  683                 else if (inp->inp_flags & INP_BOUND_IF)
  684                         ifscope = inp->inp_boundif;
  685 
  686                 nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
  687                 /*
  688                  * If route is known or can be allocated now,
  689                  * our src addr is taken from the i/f, else punt.
  690                  * Note that we should check the address family of the cached
  691                  * destination, in case of sharing the cache with IPv6.
  692                  */
  693                 ro = &inp->inp_route;
  694                 if (ro->ro_rt != NULL)
  695                         RT_LOCK_SPIN(ro->ro_rt);
  696                 if (ro->ro_rt && (ro->ro_dst.sa_family != AF_INET ||
  697                     satosin(&ro->ro_dst)->sin_addr.s_addr !=
  698                     sin->sin_addr.s_addr ||
  699                     inp->inp_socket->so_options & SO_DONTROUTE ||
  700                     ro->ro_rt->generation_id != route_generation)) {
  701                         RT_UNLOCK(ro->ro_rt);
  702                         rtfree(ro->ro_rt);
  703                         ro->ro_rt = NULL;
  704                 }
  705                 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
  706                     (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
  707                         if (ro->ro_rt != NULL)
  708                                 RT_UNLOCK(ro->ro_rt);
  709                         /* No route yet, so try to acquire one */
  710                         bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
  711                         ro->ro_dst.sa_family = AF_INET;
  712                         ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
  713                         ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
  714                                 sin->sin_addr;
  715                         rtalloc_scoped(ro, ifscope);
  716                         if (ro->ro_rt != NULL)
  717                                 RT_LOCK_SPIN(ro->ro_rt);
  718                 }
  719                 /*
  720                  * If the route points to a cellular interface and the
  721                  * caller forbids our using interfaces of such type,
  722                  * pretend that there is no route.
  723                  */
  724                 if (nocell && ro->ro_rt != NULL) {
  725                         RT_LOCK_ASSERT_HELD(ro->ro_rt);
  726                         if (ro->ro_rt->rt_ifp->if_type == IFT_CELLULAR) {
  727                                 RT_UNLOCK(ro->ro_rt);
  728                                 rtfree(ro->ro_rt);
  729                                 ro->ro_rt = NULL;
  730                         }
  731                 }
  732                 /*
  733                  * If we found a route, use the address
  734                  * corresponding to the outgoing interface
  735                  * unless it is the loopback (in case a route
  736                  * to our address on another net goes to loopback).
  737                  */
  738                 if (ro->ro_rt != NULL) {
  739                         /* Become a regular mutex */
  740                         RT_CONVERT_LOCK(ro->ro_rt);
  741                         if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
  742                                 ia = ifatoia(ro->ro_rt->rt_ifa);
  743                                 if (ia) {
  744                                         IFA_ADDREF(&ia->ia_ifa);
  745                                 }
  746                         }
  747                         RT_UNLOCK(ro->ro_rt);
  748                 }
  749                 if (ia == 0) {
  750                         u_short fport = sin->sin_port;
  751 
  752                         sin->sin_port = 0;
  753                         ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
  754                         if (ia == 0) {
  755                                 ia = ifatoia(ifa_ifwithnet_scoped(sintosa(sin),
  756                                     ifscope));
  757                         }
  758                         sin->sin_port = fport;
  759                         if (ia == 0) {
  760                                 lck_rw_lock_shared(in_ifaddr_rwlock);
  761                                 ia = TAILQ_FIRST(&in_ifaddrhead);
  762                                 if (ia)
  763                                         IFA_ADDREF(&ia->ia_ifa);
  764                                 lck_rw_done(in_ifaddr_rwlock);
  765                         }
  766                         /*
  767                          * If the source address belongs to a cellular interface
  768                          * and the socket forbids our using interfaces of such
  769                          * type, pretend that there is no source address.
  770                          */
  771                         if (nocell && ia != NULL &&
  772                             ia->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) {
  773                                 IFA_REMREF(&ia->ia_ifa);
  774                                 ia = NULL;
  775                         }
  776                         if (ia == 0)
  777                                 return (EADDRNOTAVAIL);
  778                 }
  779                 /*
  780                  * If the destination address is multicast and an outgoing
  781                  * interface has been set as a multicast option, use the
  782                  * address of that interface as our source address.
  783                  */
  784                 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
  785                     inp->inp_moptions != NULL) {
  786                         struct ip_moptions *imo;
  787                         struct ifnet *ifp;
  788 
  789                         imo = inp->inp_moptions;
  790                         IMO_LOCK(imo);
  791                         if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
  792                                 ia->ia_ifp != imo->imo_multicast_ifp)) {
  793                                 ifp = imo->imo_multicast_ifp;
  794                                 if (ia)
  795                                         IFA_REMREF(&ia->ia_ifa);
  796                                 lck_rw_lock_shared(in_ifaddr_rwlock);
  797                                 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
  798                                         if (ia->ia_ifp == ifp)
  799                                                 break;
  800                                 }
  801                                 if (ia)
  802                                         IFA_ADDREF(&ia->ia_ifa);
  803                                 lck_rw_done(in_ifaddr_rwlock);
  804                                 if (ia == 0) {
  805                                         IMO_UNLOCK(imo);
  806                                         return (EADDRNOTAVAIL);
  807                                 }
  808                         }
  809                         IMO_UNLOCK(imo);
  810                 }
  811                 /*
  812                  * Don't do pcblookup call here; return interface in plocal_sin
  813                  * and exit to caller, that will do the lookup.
  814                  */
  815                 IFA_LOCK_SPIN(&ia->ia_ifa);
  816                 *plocal_sin = ia->ia_addr;
  817                 if (out_ifscope != NULL)
  818                         *out_ifscope = ia->ia_ifp->if_index;
  819                 IFA_UNLOCK(&ia->ia_ifa);
  820                 IFA_REMREF(&ia->ia_ifa);
  821         }
  822         return(0);
  823 }
  824 
  825 /*
  826  * Outer subroutine:
  827  * Connect from a socket to a specified address.
  828  * Both address and port must be specified in argument sin.
  829  * If don't have a local address for this socket yet,
  830  * then pick one.
  831  */
  832 int
  833 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, unsigned int *ifscope)
  834 {
  835         struct sockaddr_in ifaddr;
  836         struct sockaddr_in *sin = (struct sockaddr_in *)nam;
  837         struct inpcb *pcb;
  838         int error;
  839 
  840         /*
  841          *   Call inner routine, to assign local interface address.
  842          */
  843         if ((error = in_pcbladdr(inp, nam, &ifaddr, ifscope)) != 0)
  844                 return(error);
  845 
  846         socket_unlock(inp->inp_socket, 0);
  847         pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
  848             inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr.sin_addr,
  849             inp->inp_lport, 0, NULL);
  850         socket_lock(inp->inp_socket, 0);
  851 
  852         /* Check if the socket is still in a valid state. When we unlock this 
  853          * embryonic socket, it can get aborted if another thread is closing 
  854          * the listener (radar 7947600).
  855          */
  856         if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) {
  857                 return ECONNREFUSED;
  858         }
  859 
  860         if (pcb != NULL) {
  861                 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
  862                 return (EADDRINUSE);
  863         }
  864         if (inp->inp_laddr.s_addr == INADDR_ANY) {
  865                 if (inp->inp_lport == 0) {
  866                         error = in_pcbbind(inp, (struct sockaddr *)0, p);
  867                         if (error)
  868                             return (error);
  869                 }
  870                 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
  871                         /*lock inversion issue, mostly with udp multicast packets */
  872                         socket_unlock(inp->inp_socket, 0);
  873                         lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
  874                         socket_lock(inp->inp_socket, 0);
  875                 }
  876                 inp->inp_laddr = ifaddr.sin_addr;
  877                 inp->inp_last_outif = ifscope ? *ifscope : IFSCOPE_NONE;
  878                 inp->inp_flags |= INP_INADDR_ANY;
  879         }
  880          else {
  881                 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
  882                         /*lock inversion issue, mostly with udp multicast packets */
  883                         socket_unlock(inp->inp_socket, 0);
  884                         lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
  885                         socket_lock(inp->inp_socket, 0);
  886                 }
  887         }
  888         inp->inp_faddr = sin->sin_addr;
  889         inp->inp_fport = sin->sin_port;
  890         in_pcbrehash(inp);
  891         lck_rw_done(inp->inp_pcbinfo->mtx);
  892         return (0);
  893 }
  894 
  895 void
  896 in_pcbdisconnect(struct inpcb *inp)
  897 {
  898 
  899         inp->inp_faddr.s_addr = INADDR_ANY;
  900         inp->inp_fport = 0;
  901 
  902         if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
  903                 /*lock inversion issue, mostly with udp multicast packets */
  904                 socket_unlock(inp->inp_socket, 0);
  905                 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
  906                 socket_lock(inp->inp_socket, 0);
  907         }
  908 
  909         in_pcbrehash(inp);
  910         lck_rw_done(inp->inp_pcbinfo->mtx);
  911 
  912         if (inp->inp_socket->so_state & SS_NOFDREF) 
  913                 in_pcbdetach(inp);
  914 }
  915 
  916 void
  917 in_pcbdetach(struct inpcb *inp)
  918 {
  919         struct socket *so = inp->inp_socket;
  920 
  921         if (so->so_pcb == 0) { /* we've been called twice */
  922                 panic("in_pcbdetach: inp=%p so=%p proto=%d so_pcb is null!\n",
  923                         inp, so, so->so_proto->pr_protocol);
  924         }
  925 
  926 #if IPSEC
  927         if (ipsec_bypass == 0) {
  928                 ipsec4_delete_pcbpolicy(inp);
  929         }
  930 #endif /*IPSEC*/
  931 
  932         /* mark socket state as dead */
  933         if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING)
  934                 panic("in_pcbdetach so=%p prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol);
  935 
  936 #if TEMPDEBUG
  937         if (so->cached_in_sock_layer)
  938             printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags);
  939         else
  940             printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags);
  941 #endif
  942         if ((so->so_flags & SOF_PCBCLEARING) == 0) {
  943                 struct rtentry *rt;
  944                 struct ip_moptions *imo;
  945 
  946                 inp->inp_vflag = 0;
  947                 if (inp->inp_options) 
  948                         (void)m_free(inp->inp_options);
  949                 if ((rt = inp->inp_route.ro_rt) != NULL) {
  950                         inp->inp_route.ro_rt = NULL;
  951                         rtfree(rt);
  952                 }
  953                 imo = inp->inp_moptions;
  954                 inp->inp_moptions = NULL;
  955                 if (imo != NULL)
  956                         IMO_REMREF(imo);
  957                 sofreelastref(so, 0);
  958                 inp->inp_state = INPCB_STATE_DEAD;
  959                 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
  960         }
  961 }
  962 
  963 
  964 void 
  965 in_pcbdispose(struct inpcb *inp) 
  966 {
  967         struct socket *so = inp->inp_socket;
  968         struct inpcbinfo *ipi = inp->inp_pcbinfo;
  969 
  970 #if TEMPDEBUG
  971         if (inp->inp_state != INPCB_STATE_DEAD) {
  972                 printf("in_pcbdispose: not dead yet? so=%p\n", so);
  973         }
  974 #endif
  975         if (so && so->so_usecount != 0)
  976                 panic("%s: so %p so_usecount %d so_lockhistory %s\n",
  977                         __func__, so, so->so_usecount,
  978                         (so != NULL) ? solockhistory_nr(so) : "--");
  979 
  980         lck_rw_assert(ipi->mtx, LCK_RW_ASSERT_EXCLUSIVE);
  981 
  982         inp->inp_gencnt = ++ipi->ipi_gencnt;
  983         /*### access ipi in in_pcbremlists */
  984         in_pcbremlists(inp);
  985         
  986         if (so) {
  987                 if (so->so_proto->pr_flags & PR_PCBLOCK) {
  988                         sofreelastref(so, 0);
  989                         if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
  990 #if TEMPDEBUG
  991                                 printf("in_pcbdispose sb not cleaned up so=%p rc_cci=%x snd_cc=%x\n",
  992                                         so, so->so_rcv.sb_cc, so->so_snd.sb_cc);        
  993 #endif
  994                                 sbrelease(&so->so_rcv);
  995                                 sbrelease(&so->so_snd);
  996                         }
  997                         if (so->so_head != NULL)
  998                                 panic("in_pcbdispose, so=%p head still exist\n", so);
  999                         lck_mtx_unlock(&inp->inpcb_mtx);        
 1000                         lck_mtx_destroy(&inp->inpcb_mtx, ipi->mtx_grp); 
 1001                 }
 1002                 so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
 1003                 so->so_saved_pcb = (caddr_t) inp;
 1004                 so->so_pcb = 0; 
 1005                 inp->inp_socket = 0;
 1006 #if CONFIG_MACF_NET
 1007                 mac_inpcb_label_destroy(inp);
 1008 #endif
 1009                 /*
 1010                  * In case there a route cached after a detach (possible
 1011                  * in the tcp case), make sure that it is freed before
 1012                  * we deallocate the structure.
 1013                  */
 1014                 if (inp->inp_route.ro_rt != NULL) {
 1015                         rtfree(inp->inp_route.ro_rt);
 1016                         inp->inp_route.ro_rt = NULL;
 1017                 }
 1018                 if (so->cached_in_sock_layer == 0) {
 1019                         zfree(ipi->ipi_zone, inp);
 1020                 }
 1021                 sodealloc(so);
 1022         }
 1023 #if TEMPDEBUG
 1024         else
 1025                 printf("in_pcbdispose: no socket for inp=%p\n", inp);
 1026 #endif
 1027 }
 1028 
 1029 /*
 1030  * The calling convention of in_setsockaddr() and in_setpeeraddr() was
 1031  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
 1032  * in struct pr_usrreqs, so that protocols can just reference then directly
 1033  * without the need for a wrapper function.  The socket must have a valid
 1034  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
 1035  * except through a kernel programming error, so it is acceptable to panic
 1036  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
 1037  * because there actually /is/ a programming error somewhere... XXX)
 1038  *
 1039  * Returns:     0                       Success
 1040  *              ENOBUFS                 No buffer space available
 1041  *              ECONNRESET              Connection reset
 1042  */
 1043 int
 1044 in_setsockaddr(struct socket *so, struct sockaddr **nam)
 1045 {
 1046         struct inpcb *inp;
 1047         struct sockaddr_in *sin;
 1048 
 1049         /*
 1050          * Do the malloc first in case it blocks.
 1051          */
 1052         MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
 1053         if (sin == NULL)
 1054                 return ENOBUFS;
 1055         bzero(sin, sizeof *sin);
 1056         sin->sin_family = AF_INET;
 1057         sin->sin_len = sizeof(*sin);
 1058 
 1059         inp = sotoinpcb(so);
 1060         if (!inp) {
 1061                 FREE(sin, M_SONAME);
 1062                 return ECONNRESET;
 1063         }
 1064         sin->sin_port = inp->inp_lport;
 1065         sin->sin_addr = inp->inp_laddr;
 1066 
 1067         *nam = (struct sockaddr *)sin;
 1068         return 0;
 1069 }
 1070 
 1071 int
 1072 in_setpeeraddr(struct socket *so, struct sockaddr **nam)
 1073 {
 1074         struct inpcb *inp;
 1075         struct sockaddr_in *sin;
 1076 
 1077         /*
 1078          * Do the malloc first in case it blocks.
 1079          */
 1080         MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
 1081         if (sin == NULL)
 1082                 return ENOBUFS;
 1083         bzero((caddr_t)sin, sizeof (*sin));
 1084         sin->sin_family = AF_INET;
 1085         sin->sin_len = sizeof(*sin);
 1086 
 1087         inp = sotoinpcb(so);
 1088         if (!inp) {
 1089                 FREE(sin, M_SONAME);
 1090                 return ECONNRESET;
 1091         }
 1092         sin->sin_port = inp->inp_fport;
 1093         sin->sin_addr = inp->inp_faddr;
 1094 
 1095         *nam = (struct sockaddr *)sin;
 1096         return 0;
 1097 }
 1098 
 1099 void
 1100 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 1101                 int errno, void (*notify)(struct inpcb *, int))
 1102 {
 1103         struct inpcb *inp;
 1104 
 1105         lck_rw_lock_shared(pcbinfo->mtx);
 1106 
 1107         LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
 1108 #if INET6
 1109                 if ((inp->inp_vflag & INP_IPV4) == 0)
 1110                         continue;
 1111 #endif
 1112                 if (inp->inp_faddr.s_addr != faddr.s_addr ||
 1113                     inp->inp_socket == NULL)
 1114                                 continue;
 1115                 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) 
 1116                         continue;
 1117                 socket_lock(inp->inp_socket, 1);
 1118                 (*notify)(inp, errno);
 1119                 (void)in_pcb_checkstate(inp, WNT_RELEASE, 1);
 1120                 socket_unlock(inp->inp_socket, 1);
 1121         }
 1122         lck_rw_done(pcbinfo->mtx);
 1123 }
 1124 
 1125 /*
 1126  * Check for alternatives when higher level complains
 1127  * about service problems.  For now, invalidate cached
 1128  * routing information.  If the route was created dynamically
 1129  * (by a redirect), time to try a default gateway again.
 1130  */
 1131 void
 1132 in_losing(struct inpcb *inp)
 1133 {
 1134         struct rtentry *rt;
 1135         struct rt_addrinfo info;
 1136 
 1137         if ((rt = inp->inp_route.ro_rt) != NULL) {
 1138                 struct in_ifaddr *ia;
 1139 
 1140                 bzero((caddr_t)&info, sizeof(info));
 1141                 RT_LOCK(rt);
 1142                 info.rti_info[RTAX_DST] =
 1143                         (struct sockaddr *)&inp->inp_route.ro_dst;
 1144                 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 1145                 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 1146                 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
 1147                 if (rt->rt_flags & RTF_DYNAMIC) {
 1148                         /*
 1149                          * Prevent another thread from modifying rt_key,
 1150                          * rt_gateway via rt_setgate() after rt_lock is
 1151                          * dropped by marking the route as defunct.
 1152                          */
 1153                         rt->rt_flags |= RTF_CONDEMNED;
 1154                         RT_UNLOCK(rt);
 1155                         (void) rtrequest(RTM_DELETE, rt_key(rt),
 1156                                 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
 1157                                 (struct rtentry **)0);
 1158                 } else {
 1159                         RT_UNLOCK(rt);
 1160                 }
 1161                 /* if the address is gone keep the old route in the pcb */
 1162                 if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
 1163                         inp->inp_route.ro_rt = NULL;
 1164                         rtfree(rt);
 1165                         IFA_REMREF(&ia->ia_ifa);
 1166                 }
 1167                 /*
 1168                  * A new route can be allocated
 1169                  * the next time output is attempted.
 1170                  */
 1171         }
 1172 }
 1173 
 1174 /*
 1175  * After a routing change, flush old routing
 1176  * and allocate a (hopefully) better one.
 1177  */
 1178 void
 1179 in_rtchange(struct inpcb *inp, __unused int errno)
 1180 {
 1181         struct rtentry *rt;
 1182 
 1183         if ((rt = inp->inp_route.ro_rt) != NULL) {
 1184                 struct in_ifaddr *ia;
 1185 
 1186                 if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) {
 1187                         return; /* we can't remove the route now. not sure if still ok to use src */
 1188                 }
 1189                 IFA_REMREF(&ia->ia_ifa);
 1190                 rtfree(rt);
 1191                 inp->inp_route.ro_rt = NULL;
 1192                 /*
 1193                  * A new route can be allocated the next time
 1194                  * output is attempted.
 1195                  */
 1196         }
 1197 }
 1198 
 1199 /*
 1200  * Lookup a PCB based on the local address and port.
 1201  */
 1202 struct inpcb *
 1203 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
 1204                    unsigned int lport_arg, int wild_okay)
 1205 {
 1206         struct inpcb *inp;
 1207         int matchwild = 3, wildcard;
 1208         u_short lport = lport_arg;
 1209 
 1210         KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0,0,0,0,0);
 1211 
 1212         if (!wild_okay) {
 1213                 struct inpcbhead *head;
 1214                 /*
 1215                  * Look for an unconnected (wildcard foreign addr) PCB that
 1216                  * matches the local address and port we're looking for.
 1217                  */
 1218                 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
 1219                 LIST_FOREACH(inp, head, inp_hash) {
 1220 #if INET6
 1221                         if ((inp->inp_vflag & INP_IPV4) == 0)
 1222                                 continue;
 1223 #endif
 1224                         if (inp->inp_faddr.s_addr == INADDR_ANY &&
 1225                             inp->inp_laddr.s_addr == laddr.s_addr &&
 1226                             inp->inp_lport == lport) {
 1227                                 /*
 1228                                  * Found.
 1229                                  */
 1230                                 return (inp);
 1231                         }
 1232                 }
 1233                 /*
 1234                  * Not found.
 1235                  */
 1236                 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0,0,0,0,0);
 1237                 return (NULL);
 1238         } else {
 1239                 struct inpcbporthead *porthash;
 1240                 struct inpcbport *phd;
 1241                 struct inpcb *match = NULL;
 1242                 /*
 1243                  * Best fit PCB lookup.
 1244                  *
 1245                  * First see if this local port is in use by looking on the
 1246                  * port hash list.
 1247                  */
 1248                 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
 1249                     pcbinfo->porthashmask)];
 1250                 LIST_FOREACH(phd, porthash, phd_hash) {
 1251                         if (phd->phd_port == lport)
 1252                                 break;
 1253                 }
 1254                 if (phd != NULL) {
 1255                         /*
 1256                          * Port is in use by one or more PCBs. Look for best
 1257                          * fit.
 1258                          */
 1259                         LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
 1260                                 wildcard = 0;
 1261 #if INET6
 1262                                 if ((inp->inp_vflag & INP_IPV4) == 0)
 1263                                         continue;
 1264 #endif
 1265                                 if (inp->inp_faddr.s_addr != INADDR_ANY)
 1266                                         wildcard++;
 1267                                 if (inp->inp_laddr.s_addr != INADDR_ANY) {
 1268                                         if (laddr.s_addr == INADDR_ANY)
 1269                                                 wildcard++;
 1270                                         else if (inp->inp_laddr.s_addr != laddr.s_addr)
 1271                                                 continue;
 1272                                 } else {
 1273                                         if (laddr.s_addr != INADDR_ANY)
 1274                                                 wildcard++;
 1275                                 }
 1276                                 if (wildcard < matchwild) {
 1277                                         match = inp;
 1278                                         matchwild = wildcard;
 1279                                         if (matchwild == 0) {
 1280                                                 break;
 1281                                         }
 1282                                 }
 1283                         }
 1284                 }
 1285                 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,0,0,0,0);
 1286                 return (match);
 1287         }
 1288 }
 1289 
 1290 /*
 1291  * Check if PCB exists in hash list.
 1292  */
 1293 int
 1294 in_pcblookup_hash_exists(
 1295         struct inpcbinfo *pcbinfo,
 1296         struct in_addr faddr,
 1297         u_int fport_arg,
 1298         struct in_addr laddr,
 1299         u_int lport_arg,
 1300         int wildcard,
 1301         uid_t *uid,
 1302         gid_t *gid,
 1303         __unused struct ifnet *ifp)
 1304 {
 1305         struct inpcbhead *head;
 1306         struct inpcb *inp;
 1307         u_short fport = fport_arg, lport = lport_arg;
 1308         int found;
 1309 
 1310         *uid = UID_MAX;
 1311         *gid = GID_MAX;
 1312        
 1313         /*
 1314          * We may have found the pcb in the last lookup - check this first.
 1315          */
 1316 
 1317         lck_rw_lock_shared(pcbinfo->mtx);
 1318 
 1319         /*
 1320          * First look for an exact match.
 1321          */
 1322         head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
 1323             pcbinfo->hashmask)];
 1324         LIST_FOREACH(inp, head, inp_hash) {
 1325 #if INET6
 1326                 if ((inp->inp_vflag & INP_IPV4) == 0)
 1327                         continue;
 1328 #endif
 1329                 if (inp->inp_faddr.s_addr == faddr.s_addr &&
 1330                     inp->inp_laddr.s_addr == laddr.s_addr &&
 1331                     inp->inp_fport == fport &&
 1332                     inp->inp_lport == lport) {
 1333                         if ((found = (inp->inp_socket != NULL))) {
 1334                                 /*
 1335                                  * Found.
 1336                                  */
 1337                                 *uid = inp->inp_socket->so_uid;
 1338                                 *gid = inp->inp_socket->so_gid;
 1339                         }
 1340                         lck_rw_done(pcbinfo->mtx);
 1341                         return (found);
 1342                 }
 1343         }
 1344         if (wildcard) {
 1345                 struct inpcb *local_wild = NULL;
 1346 #if INET6
 1347                 struct inpcb *local_wild_mapped = NULL;
 1348 #endif
 1349 
 1350                 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
 1351                     pcbinfo->hashmask)];
 1352                 LIST_FOREACH(inp, head, inp_hash) {
 1353 #if INET6
 1354                         if ((inp->inp_vflag & INP_IPV4) == 0)
 1355                                 continue;
 1356 #endif
 1357                         if (inp->inp_faddr.s_addr == INADDR_ANY &&
 1358                             inp->inp_lport == lport) {
 1359 #if defined(NFAITH) && NFAITH > 0
 1360                                 if (ifp && ifp->if_type == IFT_FAITH &&
 1361                                     (inp->inp_flags & INP_FAITH) == 0)
 1362                                         continue;
 1363 #endif
 1364                                 if (inp->inp_laddr.s_addr == laddr.s_addr) {
 1365                                         if ((found = (inp->inp_socket != NULL))) {
 1366                                                 *uid = inp->inp_socket->so_uid;
 1367                                                 *gid = inp->inp_socket->so_gid;
 1368                                         }
 1369                                         lck_rw_done(pcbinfo->mtx);
 1370                                         return (found);
 1371                                 }
 1372                                 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 1373 #if INET6
 1374                                         if (inp->inp_socket &&
 1375                                             INP_CHECK_SOCKAF(inp->inp_socket,
 1376                                             AF_INET6))
 1377                                                 local_wild_mapped = inp;
 1378                                         else
 1379 #endif /* INET6 */
 1380                                         local_wild = inp;
 1381                                 }
 1382                         }
 1383                 }
 1384                 if (local_wild == NULL) {
 1385 #if INET6
 1386                         if (local_wild_mapped != NULL) {
 1387                                 if ((found = (local_wild_mapped->inp_socket != NULL))) {
 1388                                         *uid = local_wild_mapped->inp_socket->so_uid;
 1389                                         *gid = local_wild_mapped->inp_socket->so_gid;
 1390                                 }
 1391                                 lck_rw_done(pcbinfo->mtx);
 1392                                 return (found);
 1393                         }
 1394 #endif /* INET6 */
 1395                         lck_rw_done(pcbinfo->mtx);
 1396                         return (0);
 1397                 }
 1398                 if (local_wild != NULL) {
 1399                         if ((found = (local_wild->inp_socket != NULL))) {
 1400                                 *uid = local_wild->inp_socket->so_uid;
 1401                                 *gid = local_wild->inp_socket->so_gid;
 1402                         }
 1403                         lck_rw_done(pcbinfo->mtx);
 1404                         return (found);
 1405                 }
 1406         }
 1407 
 1408         /*
 1409          * Not found.
 1410          */
 1411         lck_rw_done(pcbinfo->mtx);
 1412         return (0);
 1413 }
 1414 
 1415 /*
 1416  * Lookup PCB in hash list.
 1417  */
 1418 struct inpcb *
 1419 in_pcblookup_hash(
 1420         struct inpcbinfo *pcbinfo,
 1421         struct in_addr faddr,
 1422         u_int fport_arg,
 1423         struct in_addr laddr,
 1424         u_int lport_arg,
 1425         int wildcard,
 1426         __unused struct ifnet *ifp)
 1427 {
 1428         struct inpcbhead *head;
 1429         struct inpcb *inp;
 1430         u_short fport = fport_arg, lport = lport_arg;
 1431 
 1432         /*
 1433          * We may have found the pcb in the last lookup - check this first.
 1434          */
 1435 
 1436         lck_rw_lock_shared(pcbinfo->mtx);
 1437 
 1438         /*
 1439          * First look for an exact match.
 1440          */
 1441         head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
 1442         LIST_FOREACH(inp, head, inp_hash) {
 1443 #if INET6
 1444                 if ((inp->inp_vflag & INP_IPV4) == 0)
 1445                         continue;
 1446 #endif
 1447                 if (inp->inp_faddr.s_addr == faddr.s_addr &&
 1448                     inp->inp_laddr.s_addr == laddr.s_addr &&
 1449                     inp->inp_fport == fport &&
 1450                     inp->inp_lport == lport) {
 1451                         /*
 1452                          * Found.
 1453                          */
 1454                         if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
 1455                                 lck_rw_done(pcbinfo->mtx);
 1456                                 return (inp);
 1457                         }
 1458                         else {  /* it's there but dead, say it isn't found */
 1459                                 lck_rw_done(pcbinfo->mtx);      
 1460                                 return(NULL);
 1461                         }
 1462                 }
 1463         }
 1464         if (wildcard) {
 1465                 struct inpcb *local_wild = NULL;
 1466 #if INET6
 1467                 struct inpcb *local_wild_mapped = NULL;
 1468 #endif
 1469 
 1470                 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
 1471                 LIST_FOREACH(inp, head, inp_hash) {
 1472 #if INET6
 1473                         if ((inp->inp_vflag & INP_IPV4) == 0)
 1474                                 continue;
 1475 #endif
 1476                         if (inp->inp_faddr.s_addr == INADDR_ANY &&
 1477                             inp->inp_lport == lport) {
 1478 #if defined(NFAITH) && NFAITH > 0
 1479                                 if (ifp && ifp->if_type == IFT_FAITH &&
 1480                                     (inp->inp_flags & INP_FAITH) == 0)
 1481                                         continue;
 1482 #endif
 1483                                 if (inp->inp_laddr.s_addr == laddr.s_addr) {
 1484                                         if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
 1485                                                 lck_rw_done(pcbinfo->mtx);
 1486                                                 return (inp);
 1487                                         }
 1488                                         else {  /* it's there but dead, say it isn't found */
 1489                                                 lck_rw_done(pcbinfo->mtx);      
 1490                                                 return(NULL);
 1491                                         }
 1492                                 }
 1493                                 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 1494 #if INET6
 1495                                         if (INP_CHECK_SOCKAF(inp->inp_socket,
 1496                                                              AF_INET6))
 1497                                                 local_wild_mapped = inp;
 1498                                         else
 1499 #endif /* INET6 */
 1500                                         local_wild = inp;
 1501                                 }
 1502                         }
 1503                 }
 1504                 if (local_wild == NULL) {
 1505 #if INET6
 1506                         if (local_wild_mapped != NULL) {
 1507                                 if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
 1508                                         lck_rw_done(pcbinfo->mtx);
 1509                                         return (local_wild_mapped);
 1510                                 }
 1511                                 else {  /* it's there but dead, say it isn't found */
 1512                                         lck_rw_done(pcbinfo->mtx);      
 1513                                         return(NULL);
 1514                                 }
 1515                         }
 1516 #endif /* INET6 */
 1517                         lck_rw_done(pcbinfo->mtx);
 1518                         return (NULL);
 1519                 }
 1520                 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
 1521                         lck_rw_done(pcbinfo->mtx);
 1522                         return (local_wild);
 1523                 }
 1524                 else {  /* it's there but dead, say it isn't found */
 1525                         lck_rw_done(pcbinfo->mtx);      
 1526                         return(NULL);
 1527                 }
 1528         }
 1529 
 1530         /*
 1531          * Not found.
 1532          */
 1533         lck_rw_done(pcbinfo->mtx);
 1534         return (NULL);
 1535 }
 1536 
 1537 /*
 1538  * Insert PCB onto various hash lists.
 1539  */
 1540 int
 1541 in_pcbinshash(struct inpcb *inp, int locked)
 1542 {
 1543         struct inpcbhead *pcbhash;
 1544         struct inpcbporthead *pcbporthash;
 1545         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 1546         struct inpcbport *phd;
 1547         u_int32_t hashkey_faddr;
 1548 
 1549         if (!locked) {
 1550                 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
 1551                         /*lock inversion issue, mostly with udp multicast packets */
 1552                         socket_unlock(inp->inp_socket, 0);
 1553                         lck_rw_lock_exclusive(pcbinfo->mtx);
 1554                         socket_lock(inp->inp_socket, 0);
 1555                         if (inp->inp_state == INPCB_STATE_DEAD) {
 1556                                 /* The socket got dropped when it was unlocked */
 1557                                 lck_rw_done(pcbinfo->mtx);
 1558                                 return(ECONNABORTED);
 1559                         }
 1560                 }
 1561         }
 1562 
 1563 #if INET6
 1564         if (inp->inp_vflag & INP_IPV6)
 1565                 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
 1566         else
 1567 #endif /* INET6 */
 1568         hashkey_faddr = inp->inp_faddr.s_addr;
 1569 
 1570         inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask);
 1571 
 1572         pcbhash = &pcbinfo->hashbase[inp->hash_element];
 1573 
 1574         pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
 1575             pcbinfo->porthashmask)];
 1576 
 1577         /*
 1578          * Go through port list and look for a head for this lport.
 1579          */
 1580         LIST_FOREACH(phd, pcbporthash, phd_hash) {
 1581                 if (phd->phd_port == inp->inp_lport)
 1582                         break;
 1583         }
 1584         /*
 1585          * If none exists, malloc one and tack it on.
 1586          */
 1587         if (phd == NULL) {
 1588                 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK);
 1589                 if (phd == NULL) {
 1590                         if (!locked)
 1591                                 lck_rw_done(pcbinfo->mtx);
 1592                         return (ENOBUFS); /* XXX */
 1593                 }
 1594                 phd->phd_port = inp->inp_lport;
 1595                 LIST_INIT(&phd->phd_pcblist);
 1596                 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
 1597         }
 1598         inp->inp_phd = phd;
 1599         LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
 1600         LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 1601         if (!locked)
 1602                 lck_rw_done(pcbinfo->mtx);
 1603         return (0);
 1604 }
 1605 
 1606 /*
 1607  * Move PCB to the proper hash bucket when { faddr, fport } have  been
 1608  * changed. NOTE: This does not handle the case of the lport changing (the
 1609  * hashed port list would have to be updated as well), so the lport must
 1610  * not change after in_pcbinshash() has been called.
 1611  */
 1612 void
 1613 in_pcbrehash(struct inpcb *inp)
 1614 {
 1615         struct inpcbhead *head;
 1616         u_int32_t hashkey_faddr;
 1617 
 1618 #if INET6
 1619         if (inp->inp_vflag & INP_IPV6)
 1620                 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
 1621         else
 1622 #endif /* INET6 */
 1623         hashkey_faddr = inp->inp_faddr.s_addr;
 1624         inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, 
 1625                                 inp->inp_fport, inp->inp_pcbinfo->hashmask);
 1626         head = &inp->inp_pcbinfo->hashbase[inp->hash_element];
 1627 
 1628         LIST_REMOVE(inp, inp_hash);
 1629         LIST_INSERT_HEAD(head, inp, inp_hash);
 1630 }
 1631 
 1632 /*
 1633  * Remove PCB from various lists.
 1634  */
 1635 //###LOCK must be called with list lock held
 1636 void
 1637 in_pcbremlists(struct inpcb *inp)
 1638 {
 1639         inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
 1640 
 1641         if (inp->inp_lport) {
 1642                 struct inpcbport *phd = inp->inp_phd;
 1643 
 1644                 LIST_REMOVE(inp, inp_hash);
 1645                 LIST_REMOVE(inp, inp_portlist);
 1646                 if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) {
 1647                         LIST_REMOVE(phd, phd_hash);
 1648                         FREE(phd, M_PCB);
 1649                 }
 1650         }
 1651         LIST_REMOVE(inp, inp_list);
 1652         inp->inp_pcbinfo->ipi_count--;
 1653 }
 1654 
 1655 /* Mechanism used to defer the memory release of PCBs
 1656  * The pcb list will contain the pcb until the ripper can clean it up if
 1657  * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING
 1658  * 3) usecount is null
 1659  * This function will be called to either mark the pcb as
 1660 */
 1661 int
 1662 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
 1663 {
 1664 
 1665         volatile UInt32 *wantcnt        = (volatile UInt32 *)&pcb->inp_wantcnt;
 1666         UInt32 origwant;
 1667         UInt32 newwant;
 1668 
 1669         switch (mode) {
 1670 
 1671                 case WNT_STOPUSING:     /* try to mark the pcb as ready for recycling */
 1672 
 1673                         /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */
 1674 
 1675                         if (locked == 0)
 1676                                 socket_lock(pcb->inp_socket, 1);
 1677                         pcb->inp_state = INPCB_STATE_DEAD;
 1678 
 1679 stopusing:
 1680                         if (pcb->inp_socket->so_usecount < 0)
 1681                                 panic("in_pcb_checkstate STOP pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
 1682                         if (locked == 0)
 1683                                 socket_unlock(pcb->inp_socket, 1);
 1684 
 1685                         origwant = *wantcnt;
 1686                         if ((UInt16) origwant == 0xffff ) /* should stop using */
 1687                                 return (WNT_STOPUSING);
 1688                         newwant = 0xffff;                       
 1689                         if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */
 1690                                 OSCompareAndSwap(origwant, newwant, wantcnt) ;
 1691                         }
 1692                         return (WNT_STOPUSING);
 1693                         break;
 1694 
 1695                 case WNT_ACQUIRE:       /* try to increase reference to pcb */
 1696                                         /* if WNT_STOPUSING should bail out */
 1697                         /*
 1698                          * if socket state DEAD, try to set count to STOPUSING, return failed
 1699                          * otherwise increase cnt
 1700                          */
 1701                         do {
 1702                                 origwant = *wantcnt;
 1703                                 if ((UInt16) origwant == 0xffff ) {/* should stop using */
 1704 //                                      printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%p\n", pcb);
 1705                                         return (WNT_STOPUSING);
 1706                                 }
 1707                                 newwant = origwant + 1;         
 1708                         } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
 1709                         return (WNT_ACQUIRE);
 1710                         break;
 1711 
 1712                 case WNT_RELEASE:       /* release reference. if result is null and pcb state is DEAD,
 1713                                            set wanted bit to STOPUSING
 1714                                          */
 1715 
 1716                         if (locked == 0)
 1717                                 socket_lock(pcb->inp_socket, 1);
 1718 
 1719                         do {
 1720                                 origwant = *wantcnt;
 1721                                 if ((UInt16) origwant == 0x0 ) 
 1722                                         panic("in_pcb_checkstate pcb=%p release with zero count", pcb);
 1723                                 if ((UInt16) origwant == 0xffff ) {/* should stop using */
 1724 #if TEMPDEBUG
 1725                                         printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%p\n", pcb);
 1726 #endif
 1727                                         if (locked == 0)
 1728                                                 socket_unlock(pcb->inp_socket, 1);
 1729                                         return (WNT_STOPUSING);
 1730                                 }
 1731                                 newwant = origwant - 1;         
 1732                         } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
 1733 
 1734                         if (pcb->inp_state == INPCB_STATE_DEAD) 
 1735                                 goto stopusing;
 1736                         if (pcb->inp_socket->so_usecount < 0)
 1737                                 panic("in_pcb_checkstate RELEASE pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
 1738                                 
 1739                         if (locked == 0)
 1740                                 socket_unlock(pcb->inp_socket, 1);
 1741                         return (WNT_RELEASE);
 1742                         break;
 1743 
 1744                 default:
 1745 
 1746                         panic("in_pcb_checkstate: so=%p not a valid state =%x\n", pcb->inp_socket, mode);
 1747         }
 1748 
 1749         /* NOTREACHED */
 1750         return (mode);
 1751 }
 1752 
 1753 /*
 1754  * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
 1755  * The inpcb_compat data structure is passed to user space and must
 1756  * not change. We intentionally avoid copying pointers.
 1757  */
 1758 void
 1759 inpcb_to_compat(
 1760         struct inpcb *inp,
 1761         struct inpcb_compat *inp_compat)
 1762 {
 1763         bzero(inp_compat, sizeof(*inp_compat));
 1764         inp_compat->inp_fport = inp->inp_fport;
 1765         inp_compat->inp_lport = inp->inp_lport;
 1766         inp_compat->nat_owner = inp->nat_owner;
 1767         inp_compat->nat_cookie = inp->nat_cookie;
 1768         inp_compat->inp_gencnt = inp->inp_gencnt;
 1769         inp_compat->inp_flags = inp->inp_flags;
 1770         inp_compat->inp_flow = inp->inp_flow;
 1771         inp_compat->inp_vflag = inp->inp_vflag;
 1772         inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
 1773         inp_compat->inp_ip_p = inp->inp_ip_p;
 1774         inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
 1775         inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
 1776         inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
 1777         inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
 1778         inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
 1779         inp_compat->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
 1780         inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
 1781 }
 1782 
 1783 #if !CONFIG_EMBEDDED
 1784 
 1785 void
 1786 inpcb_to_xinpcb64(
 1787         struct inpcb *inp,
 1788         struct xinpcb64 *xinp)
 1789 {
 1790         xinp->inp_fport = inp->inp_fport;
 1791         xinp->inp_lport = inp->inp_lport;
 1792         xinp->inp_gencnt = inp->inp_gencnt;
 1793         xinp->inp_flags = inp->inp_flags;
 1794         xinp->inp_flow = inp->inp_flow;
 1795         xinp->inp_vflag = inp->inp_vflag;
 1796         xinp->inp_ip_ttl = inp->inp_ip_ttl;
 1797         xinp->inp_ip_p = inp->inp_ip_p;
 1798         xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
 1799         xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
 1800         xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
 1801         xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
 1802         xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
 1803         xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
 1804         xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
 1805 }
 1806 
 1807 #endif /* !CONFIG_EMBEDDED */
 1808 
 1809 
 1810 /*
 1811  * The following routines implement this scheme:
 1812  *
 1813  * Callers of ip_output() that intend to cache the route in the inpcb pass
 1814  * a local copy of the struct route to ip_output().  Using a local copy of
 1815  * the cached route significantly simplifies things as IP no longer has to
 1816  * worry about having exclusive access to the passed in struct route, since
 1817  * it's defined in the caller's stack; in essence, this allows for a lock-
 1818  * less operation when updating the struct route at the IP level and below,
 1819  * whenever necessary. The scheme works as follows:
 1820  *
 1821  * Prior to dropping the socket's lock and calling ip_output(), the caller
 1822  * copies the struct route from the inpcb into its stack, and adds a reference
 1823  * to the cached route entry, if there was any.  The socket's lock is then
 1824  * dropped and ip_output() is called with a pointer to the copy of struct
 1825  * route defined on the stack (not to the one in the inpcb.)
 1826  *
 1827  * Upon returning from ip_output(), the caller then acquires the socket's
 1828  * lock and synchronizes the cache; if there is no route cached in the inpcb,
 1829  * it copies the local copy of struct route (which may or may not contain any
 1830  * route) back into the cache; otherwise, if the inpcb has a route cached in
 1831  * it, the one in the local copy will be freed, if there's any.  Trashing the
 1832  * cached route in the inpcb can be avoided because ip_output() is single-
 1833  * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
 1834  * by the socket/transport layer.)
 1835  */
 1836 void
 1837 inp_route_copyout(struct inpcb *inp, struct route *dst)
 1838 {
 1839         struct route *src = &inp->inp_route;
 1840 
 1841         lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 1842 
 1843         /*
 1844          * If the route in the PCB is not for IPv4, blow it away;
 1845          * this is possible in the case of IPv4-mapped address case.
 1846          */
 1847         if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
 1848                 rtfree(src->ro_rt);
 1849                 src->ro_rt = NULL;
 1850         }
 1851         
 1852         route_copyout(dst, src, sizeof(*dst));
 1853 }
 1854 
 1855 void
 1856 inp_route_copyin(struct inpcb *inp, struct route *src)
 1857 {
 1858         struct route *dst = &inp->inp_route;
 1859 
 1860         lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 1861 
 1862         /* Minor sanity check */
 1863         if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
 1864                 panic("%s: wrong or corrupted route: %p", __func__, src);
 1865 
 1866         route_copyin(src, dst, sizeof(*src));
 1867 }
 1868 
 1869 /*
 1870  * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
 1871  */
 1872 void
 1873 inp_bindif(struct inpcb *inp, unsigned int ifscope)
 1874 {
 1875         /*
 1876          * A zero interface scope value indicates an "unbind".
 1877          * Otherwise, take in whatever value the app desires;
 1878          * the app may already know the scope (or force itself
 1879          * to such a scope) ahead of time before the interface
 1880          * gets attached.  It doesn't matter either way; any
 1881          * route lookup from this point on will require an
 1882          * exact match for the embedded interface scope.
 1883          */
 1884         inp->inp_boundif = ifscope;
 1885         if (inp->inp_boundif == IFSCOPE_NONE)
 1886                 inp->inp_flags &= ~INP_BOUND_IF;
 1887         else
 1888                 inp->inp_flags |= INP_BOUND_IF;
 1889 
 1890         /* Blow away any cached route in the PCB */
 1891         if (inp->inp_route.ro_rt != NULL) {
 1892                 rtfree(inp->inp_route.ro_rt);
 1893                 inp->inp_route.ro_rt = NULL;
 1894         }
 1895 }
 1896 
 1897 /*
 1898  * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option.
 1899  */
 1900 int
 1901 inp_nocellular(struct inpcb *inp, unsigned int val)
 1902 {
 1903         if (val) {
 1904                 inp->inp_flags |= INP_NO_IFT_CELLULAR;
 1905         } else if (inp->inp_flags & INP_NO_IFT_CELLULAR) {
 1906                 /* once set, it cannot be unset */
 1907                 return (EINVAL);
 1908         }
 1909 
 1910         /* Blow away any cached route in the PCB */
 1911         if (inp->inp_route.ro_rt != NULL) {
 1912                 rtfree(inp->inp_route.ro_rt);
 1913                 inp->inp_route.ro_rt = NULL;
 1914         }
 1915 
 1916         return (0);
 1917 }

Cache object: 5c7c7506112c635ef3d76d30a2a2a014


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.