The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
    5  *      The Regents of the University of California.  All rights reserved.
    6  * Copyright (c) 2004 The FreeBSD Foundation.  All rights reserved.
    7  * Copyright (c) 2004-2008 Robert N. M. Watson.  All rights reserved.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
   34  */
   35 
   36 /*
   37  *
   38  * Copyright (c) 2010 Isilon Systems, Inc.
   39  * Copyright (c) 2010 iX Systems, Inc.
   40  * Copyright (c) 2010 Panasas, Inc.
   41  * All rights reserved.
   42  *
   43  * Redistribution and use in source and binary forms, with or without
   44  * modification, are permitted provided that the following conditions
   45  * are met:
   46  * 1. Redistributions of source code must retain the above copyright
   47  *    notice unmodified, this list of conditions, and the following
   48  *    disclaimer.
   49  * 2. Redistributions in binary form must reproduce the above copyright
   50  *    notice, this list of conditions and the following disclaimer in the
   51  *    documentation and/or other materials provided with the distribution.
   52  *
   53  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   54  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   55  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   56  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   57  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   58  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   59  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   60  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   61  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   62  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   63  *
   64  */
   65 #include <sys/cdefs.h>
   66 __FBSDID("$FreeBSD$");
   67 
   68 #include <sys/param.h>
   69 #include <sys/eventhandler.h>
   70 #include <sys/kernel.h>
   71 #include <sys/malloc.h>
   72 
   73 #include "sdp.h"
   74 
   75 #include <net/if.h>
   76 #include <net/route.h>
   77 #include <net/vnet.h>
   78 #include <sys/sysctl.h>
   79 
   80 uma_zone_t      sdp_zone;
   81 struct rwlock   sdp_lock;
   82 LIST_HEAD(, sdp_sock) sdp_list;
   83 
   84 struct workqueue_struct *rx_comp_wq;
   85 
   86 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
   87 #define SDP_LIST_WLOCK()        rw_wlock(&sdp_lock)
   88 #define SDP_LIST_RLOCK()        rw_rlock(&sdp_lock)
   89 #define SDP_LIST_WUNLOCK()      rw_wunlock(&sdp_lock)
   90 #define SDP_LIST_RUNLOCK()      rw_runlock(&sdp_lock)
   91 #define SDP_LIST_WLOCK_ASSERT() rw_assert(&sdp_lock, RW_WLOCKED)
   92 #define SDP_LIST_RLOCK_ASSERT() rw_assert(&sdp_lock, RW_RLOCKED)
   93 #define SDP_LIST_LOCK_ASSERT()  rw_assert(&sdp_lock, RW_LOCKED)
   94 
   95 MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol");
   96 
   97 static void sdp_stop_keepalive_timer(struct socket *so);
   98 
   99 /*
  100  * SDP protocol interface to socket abstraction.
  101  */
  102 /*
  103  * sdp_sendspace and sdp_recvspace are the default send and receive window
  104  * sizes, respectively.
  105  */
  106 u_long  sdp_sendspace = 1024*32;
  107 u_long  sdp_recvspace = 1024*64;
  108 
  109 static int sdp_count;
  110 
  111 /*
  112  * Disable async. CMA events for sockets which are being torn down.
  113  */
  114 static void
  115 sdp_destroy_cma(struct sdp_sock *ssk)
  116 {
  117 
  118         if (ssk->id == NULL)
  119                 return;
  120         rdma_destroy_id(ssk->id);
  121         ssk->id = NULL;
  122 }
  123 
  124 static int
  125 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
  126 {
  127         struct sockaddr_in *sin;
  128         struct sockaddr_in null;
  129         int error;
  130 
  131         SDP_WLOCK_ASSERT(ssk);
  132 
  133         if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
  134                 return (EINVAL);
  135         /* rdma_bind_addr handles bind races.  */
  136         SDP_WUNLOCK(ssk);
  137         if (ssk->id == NULL)
  138                 ssk->id = rdma_create_id(&init_net, sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
  139         if (ssk->id == NULL) {
  140                 SDP_WLOCK(ssk);
  141                 return (ENOMEM);
  142         }
  143         if (nam == NULL) {
  144                 null.sin_family = AF_INET;
  145                 null.sin_len = sizeof(null);
  146                 null.sin_addr.s_addr = INADDR_ANY;
  147                 null.sin_port = 0;
  148                 bzero(&null.sin_zero, sizeof(null.sin_zero));
  149                 nam = (struct sockaddr *)&null;
  150         }
  151         error = -rdma_bind_addr(ssk->id, nam);
  152         SDP_WLOCK(ssk);
  153         if (error == 0) {
  154                 sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
  155                 ssk->laddr = sin->sin_addr.s_addr;
  156                 ssk->lport = sin->sin_port;
  157         } else
  158                 sdp_destroy_cma(ssk);
  159         return (error);
  160 }
  161 
  162 static void
  163 sdp_pcbfree(struct sdp_sock *ssk)
  164 {
  165 
  166         KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
  167         KASSERT((ssk->flags & SDP_DESTROY) == 0,
  168             ("ssk %p already destroyed", ssk));
  169 
  170         sdp_dbg(ssk->socket, "Freeing pcb");
  171         SDP_WLOCK_ASSERT(ssk);
  172         ssk->flags |= SDP_DESTROY;
  173         SDP_WUNLOCK(ssk);
  174         SDP_LIST_WLOCK();
  175         sdp_count--;
  176         LIST_REMOVE(ssk, list);
  177         SDP_LIST_WUNLOCK();
  178         crfree(ssk->cred);
  179         ssk->qp_active = 0;
  180         if (ssk->qp) {
  181                 ib_destroy_qp(ssk->qp);
  182                 ssk->qp = NULL;
  183         }
  184         sdp_tx_ring_destroy(ssk);
  185         sdp_rx_ring_destroy(ssk);
  186         sdp_destroy_cma(ssk);
  187         rw_destroy(&ssk->rx_ring.destroyed_lock);
  188         rw_destroy(&ssk->lock);
  189         uma_zfree(sdp_zone, ssk);
  190 }
  191 
  192 /*
  193  * Common routines to return a socket address.
  194  */
  195 static struct sockaddr *
  196 sdp_sockaddr(in_port_t port, struct in_addr *addr_p)
  197 {
  198         struct sockaddr_in *sin;
  199 
  200         sin = malloc(sizeof *sin, M_SONAME,
  201                 M_WAITOK | M_ZERO);
  202         sin->sin_family = AF_INET;
  203         sin->sin_len = sizeof(*sin);
  204         sin->sin_addr = *addr_p;
  205         sin->sin_port = port;
  206 
  207         return (struct sockaddr *)sin;
  208 }
  209 
  210 static int
  211 sdp_getsockaddr(struct socket *so, struct sockaddr **nam)
  212 {
  213         struct sdp_sock *ssk;
  214         struct in_addr addr;
  215         in_port_t port;
  216 
  217         ssk = sdp_sk(so);
  218         SDP_RLOCK(ssk);
  219         port = ssk->lport;
  220         addr.s_addr = ssk->laddr;
  221         SDP_RUNLOCK(ssk);
  222 
  223         *nam = sdp_sockaddr(port, &addr);
  224         return 0;
  225 }
  226 
  227 static int
  228 sdp_getpeeraddr(struct socket *so, struct sockaddr **nam)
  229 {
  230         struct sdp_sock *ssk;
  231         struct in_addr addr;
  232         in_port_t port;
  233 
  234         ssk = sdp_sk(so);
  235         SDP_RLOCK(ssk);
  236         port = ssk->fport;
  237         addr.s_addr = ssk->faddr;
  238         SDP_RUNLOCK(ssk);
  239 
  240         *nam = sdp_sockaddr(port, &addr);
  241         return 0;
  242 }
  243 
  244 #if 0
  245 static void
  246 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
  247 {
  248         struct sdp_sock *ssk;
  249 
  250         SDP_LIST_RLOCK();
  251         LIST_FOREACH(ssk, &sdp_list, list) {
  252                 SDP_WLOCK(ssk);
  253                 func(ssk, arg);
  254                 SDP_WUNLOCK(ssk);
  255         }
  256         SDP_LIST_RUNLOCK();
  257 }
  258 #endif
  259 
  260 static void
  261 sdp_output_reset(struct sdp_sock *ssk)
  262 {
  263         struct rdma_cm_id *id;
  264 
  265         SDP_WLOCK_ASSERT(ssk);
  266         if (ssk->id) {
  267                 id = ssk->id;
  268                 ssk->qp_active = 0;
  269                 SDP_WUNLOCK(ssk);
  270                 rdma_disconnect(id);
  271                 SDP_WLOCK(ssk);
  272         }
  273         ssk->state = TCPS_CLOSED;
  274 }
  275 
  276 /*
  277  * Attempt to close a SDP socket, marking it as dropped, and freeing
  278  * the socket if we hold the only reference.
  279  */
  280 static struct sdp_sock *
  281 sdp_closed(struct sdp_sock *ssk)
  282 {
  283         struct socket *so;
  284 
  285         SDP_WLOCK_ASSERT(ssk);
  286 
  287         ssk->flags |= SDP_DROPPED;
  288         so = ssk->socket;
  289         soisdisconnected(so);
  290         if (ssk->flags & SDP_SOCKREF) {
  291                 ssk->flags &= ~SDP_SOCKREF;
  292                 SDP_WUNLOCK(ssk);
  293                 sorele(so);
  294                 return (NULL);
  295         }
  296         return (ssk);
  297 }
  298 
  299 /*
  300  * Perform timer based shutdowns which can not operate in
  301  * callout context.
  302  */
  303 static void
  304 sdp_shutdown_task(void *data, int pending)
  305 {
  306         struct sdp_sock *ssk;
  307 
  308         ssk = data;
  309         SDP_WLOCK(ssk);
  310         /*
  311          * I don't think this can race with another call to pcbfree()
  312          * because SDP_TIMEWAIT protects it.  SDP_DESTROY may be redundant.
  313          */
  314         if (ssk->flags & SDP_DESTROY)
  315                 panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
  316                     ssk);
  317         if (ssk->flags & SDP_DISCON)
  318                 sdp_output_reset(ssk);
  319         /* We have to clear this so sdp_detach() will call pcbfree(). */
  320         ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
  321         if ((ssk->flags & SDP_DROPPED) == 0 &&
  322             sdp_closed(ssk) == NULL)
  323                 return;
  324         if (ssk->socket == NULL) {
  325                 sdp_pcbfree(ssk);
  326                 return;
  327         }
  328         SDP_WUNLOCK(ssk);
  329 }
  330 
  331 /*
  332  * 2msl has expired, schedule the shutdown task.
  333  */
  334 static void
  335 sdp_2msl_timeout(void *data)
  336 {
  337         struct sdp_sock *ssk;
  338 
  339         ssk = data;
  340         /* Callout canceled. */
  341         if (!callout_active(&ssk->keep2msl))
  342                 goto out;
  343         callout_deactivate(&ssk->keep2msl);
  344         /* Should be impossible, defensive programming. */
  345         if ((ssk->flags & SDP_TIMEWAIT) == 0)
  346                 goto out;
  347         taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
  348 out:
  349         SDP_WUNLOCK(ssk);
  350         return;
  351 }
  352 
  353 /*
  354  * Schedule the 2msl wait timer.
  355  */
  356 static void
  357 sdp_2msl_wait(struct sdp_sock *ssk)
  358 {
  359 
  360         SDP_WLOCK_ASSERT(ssk);
  361         ssk->flags |= SDP_TIMEWAIT;
  362         ssk->state = TCPS_TIME_WAIT;
  363         soisdisconnected(ssk->socket);
  364         callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
  365 }
  366 
  367 /*
  368  * Timed out waiting for the final fin/ack from rdma_disconnect().
  369  */
  370 static void
  371 sdp_dreq_timeout(void *data)
  372 {
  373         struct sdp_sock *ssk;
  374 
  375         ssk = data;
  376         /* Callout canceled. */
  377         if (!callout_active(&ssk->keep2msl))
  378                 goto out;
  379         /* Callout rescheduled, probably as a different timer. */
  380         if (callout_pending(&ssk->keep2msl))
  381                 goto out;
  382         callout_deactivate(&ssk->keep2msl);
  383         if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
  384                 goto out;
  385         if ((ssk->flags & SDP_DREQWAIT) == 0)
  386                 goto out;
  387         ssk->flags &= ~SDP_DREQWAIT;
  388         ssk->flags |= SDP_DISCON;
  389         sdp_2msl_wait(ssk);
  390         ssk->qp_active = 0;
  391 out:
  392         SDP_WUNLOCK(ssk);
  393 }
  394 
  395 /*
  396  * Received the final fin/ack.  Cancel the 2msl.
  397  */
  398 void
  399 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
  400 {
  401         sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
  402         ssk->flags &= ~SDP_DREQWAIT;
  403         sdp_2msl_wait(ssk);
  404 }
  405 
  406 static int
  407 sdp_init_sock(struct socket *sk)
  408 {
  409         struct sdp_sock *ssk = sdp_sk(sk);
  410 
  411         sdp_dbg(sk, "%s\n", __func__);
  412 
  413         callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
  414         TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
  415 #ifdef SDP_ZCOPY
  416         INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
  417         ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
  418         ssk->tx_ring.rdma_inflight = NULL;
  419 #endif
  420         atomic_set(&ssk->mseq_ack, 0);
  421         sdp_rx_ring_init(ssk);
  422         ssk->tx_ring.buffer = NULL;
  423 
  424         return 0;
  425 }
  426 
  427 /*
  428  * Allocate an sdp_sock for the socket and reserve socket buffer space.
  429  */
  430 static int
  431 sdp_attach(struct socket *so, int proto, struct thread *td)
  432 {
  433         struct sdp_sock *ssk;
  434         int error;
  435 
  436         ssk = sdp_sk(so);
  437         KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
  438         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  439                 error = soreserve(so, sdp_sendspace, sdp_recvspace);
  440                 if (error)
  441                         return (error);
  442         }
  443         so->so_rcv.sb_flags |= SB_AUTOSIZE;
  444         so->so_snd.sb_flags |= SB_AUTOSIZE;
  445         ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
  446         if (ssk == NULL)
  447                 return (ENOBUFS);
  448         rw_init(&ssk->lock, "sdpsock");
  449         ssk->socket = so;
  450         ssk->cred = crhold(so->so_cred);
  451         so->so_pcb = (caddr_t)ssk;
  452         sdp_init_sock(so);
  453         ssk->flags = 0;
  454         ssk->qp_active = 0;
  455         ssk->state = TCPS_CLOSED;
  456         mbufq_init(&ssk->rxctlq, INT_MAX);
  457         SDP_LIST_WLOCK();
  458         LIST_INSERT_HEAD(&sdp_list, ssk, list);
  459         sdp_count++;
  460         SDP_LIST_WUNLOCK();
  461 
  462         return (0);
  463 }
  464 
  465 /*
  466  * Detach SDP from the socket, potentially leaving it around for the
  467  * timewait to expire.
  468  */
  469 static void
  470 sdp_detach(struct socket *so)
  471 {
  472         struct sdp_sock *ssk;
  473 
  474         ssk = sdp_sk(so);
  475         SDP_WLOCK(ssk);
  476         KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
  477         ssk->socket->so_pcb = NULL;
  478         ssk->socket = NULL;
  479         if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
  480                 SDP_WUNLOCK(ssk);
  481         else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
  482                 sdp_pcbfree(ssk);
  483         else
  484                 panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
  485 }
  486 
  487 /*
  488  * Allocate a local address for the socket.
  489  */
  490 static int
  491 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  492 {
  493         int error = 0;
  494         struct sdp_sock *ssk;
  495         struct sockaddr_in *sin;
  496 
  497         sin = (struct sockaddr_in *)nam;
  498         if (sin->sin_family != AF_INET)
  499                 return (EAFNOSUPPORT);
  500         if (nam->sa_len != sizeof(*sin))
  501                 return (EINVAL);
  502         if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
  503                 return (EAFNOSUPPORT);
  504 
  505         ssk = sdp_sk(so);
  506         SDP_WLOCK(ssk);
  507         if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
  508                 error = EINVAL;
  509                 goto out;
  510         }
  511         error = sdp_pcbbind(ssk, nam, td->td_ucred);
  512 out:
  513         SDP_WUNLOCK(ssk);
  514 
  515         return (error);
  516 }
  517 
  518 /*
  519  * Prepare to accept connections.
  520  */
  521 static int
  522 sdp_listen(struct socket *so, int backlog, struct thread *td)
  523 {
  524         int error = 0;
  525         struct sdp_sock *ssk;
  526 
  527         ssk = sdp_sk(so);
  528         SDP_WLOCK(ssk);
  529         if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
  530                 error = EINVAL;
  531                 goto out;
  532         }
  533         if (error == 0 && ssk->lport == 0)
  534                 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
  535         SOCK_LOCK(so);
  536         if (error == 0)
  537                 error = solisten_proto_check(so);
  538         if (error == 0) {
  539                 solisten_proto(so, backlog);
  540                 ssk->state = TCPS_LISTEN;
  541         }
  542         SOCK_UNLOCK(so);
  543 
  544 out:
  545         SDP_WUNLOCK(ssk);
  546         if (error == 0)
  547                 error = -rdma_listen(ssk->id, backlog);
  548         return (error);
  549 }
  550 
  551 /*
  552  * Initiate a SDP connection to nam.
  553  */
  554 static int
  555 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
  556 {
  557         struct sockaddr_in src;
  558         struct socket *so;
  559         int error;
  560 
  561         so = ssk->socket;
  562 
  563         SDP_WLOCK_ASSERT(ssk);
  564         if (ssk->lport == 0) {
  565                 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
  566                 if (error)
  567                         return error;
  568         }
  569         src.sin_family = AF_INET;
  570         src.sin_len = sizeof(src);
  571         bzero(&src.sin_zero, sizeof(src.sin_zero));
  572         src.sin_port = ssk->lport;
  573         src.sin_addr.s_addr = ssk->laddr;
  574         soisconnecting(so);
  575         SDP_WUNLOCK(ssk);
  576         error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
  577             SDP_RESOLVE_TIMEOUT);
  578         SDP_WLOCK(ssk);
  579         if (error == 0)
  580                 ssk->state = TCPS_SYN_SENT;
  581 
  582         return 0;
  583 }
  584 
  585 /*
  586  * Initiate SDP connection.
  587  */
  588 static int
  589 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  590 {
  591         int error = 0;
  592         struct sdp_sock *ssk;
  593         struct sockaddr_in *sin;
  594 
  595         sin = (struct sockaddr_in *)nam;
  596         if (nam->sa_len != sizeof(*sin))
  597                 return (EINVAL);
  598         if (sin->sin_family != AF_INET)
  599                 return (EAFNOSUPPORT);
  600         if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
  601                 return (EAFNOSUPPORT);
  602         if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
  603                 return (error);
  604         ssk = sdp_sk(so);
  605         SDP_WLOCK(ssk);
  606         if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
  607                 error = EINVAL;
  608         else
  609                 error = sdp_start_connect(ssk, nam, td);
  610         SDP_WUNLOCK(ssk);
  611         return (error);
  612 }
  613 
  614 /*
  615  * Drop a SDP socket, reporting
  616  * the specified error.  If connection is synchronized,
  617  * then send a RST to peer.
  618  */
  619 static struct sdp_sock *
  620 sdp_drop(struct sdp_sock *ssk, int errno)
  621 {
  622         struct socket *so;
  623 
  624         SDP_WLOCK_ASSERT(ssk);
  625         so = ssk->socket;
  626         if (TCPS_HAVERCVDSYN(ssk->state))
  627                 sdp_output_reset(ssk);
  628         if (errno == ETIMEDOUT && ssk->softerror)
  629                 errno = ssk->softerror;
  630         so->so_error = errno;
  631         return (sdp_closed(ssk));
  632 }
  633 
  634 /*
  635  * User issued close, and wish to trail through shutdown states:
  636  * if never received SYN, just forget it.  If got a SYN from peer,
  637  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
  638  * If already got a FIN from peer, then almost done; go to LAST_ACK
  639  * state.  In all other cases, have already sent FIN to peer (e.g.
  640  * after PRU_SHUTDOWN), and just have to play tedious game waiting
  641  * for peer to send FIN or not respond to keep-alives, etc.
  642  * We can let the user exit from the close as soon as the FIN is acked.
  643  */
  644 static void
  645 sdp_usrclosed(struct sdp_sock *ssk)
  646 {
  647 
  648         SDP_WLOCK_ASSERT(ssk);
  649 
  650         switch (ssk->state) {
  651         case TCPS_LISTEN:
  652                 ssk->state = TCPS_CLOSED;
  653                 SDP_WUNLOCK(ssk);
  654                 sdp_destroy_cma(ssk);
  655                 SDP_WLOCK(ssk);
  656                 /* FALLTHROUGH */
  657         case TCPS_CLOSED:
  658                 ssk = sdp_closed(ssk);
  659                 /*
  660                  * sdp_closed() should never return NULL here as the socket is
  661                  * still open.
  662                  */
  663                 KASSERT(ssk != NULL,
  664                     ("sdp_usrclosed: sdp_closed() returned NULL"));
  665                 break;
  666 
  667         case TCPS_SYN_SENT:
  668                 /* FALLTHROUGH */
  669         case TCPS_SYN_RECEIVED:
  670                 ssk->flags |= SDP_NEEDFIN;
  671                 break;
  672 
  673         case TCPS_ESTABLISHED:
  674                 ssk->flags |= SDP_NEEDFIN;
  675                 ssk->state = TCPS_FIN_WAIT_1;
  676                 break;
  677 
  678         case TCPS_CLOSE_WAIT:
  679                 ssk->state = TCPS_LAST_ACK;
  680                 break;
  681         }
  682         if (ssk->state >= TCPS_FIN_WAIT_2) {
  683                 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
  684                 if (ssk->state == TCPS_FIN_WAIT_2)
  685                         sdp_2msl_wait(ssk);
  686                 else
  687                         soisdisconnected(ssk->socket);
  688         }
  689 }
  690 
  691 static void
  692 sdp_output_disconnect(struct sdp_sock *ssk)
  693 {
  694 
  695         SDP_WLOCK_ASSERT(ssk);
  696         callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
  697             sdp_dreq_timeout, ssk);
  698         ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
  699         sdp_post_sends(ssk, M_NOWAIT);
  700 }
  701 
  702 /*
  703  * Initiate or continue a disconnect.
  704  * If embryonic state, just send reset (once).
  705  * If in ``let data drain'' option and linger null, just drop.
  706  * Otherwise (hard), mark socket disconnecting and drop
  707  * current input data; switch states based on user close, and
  708  * send segment to peer (with FIN).
  709  */
  710 static void
  711 sdp_start_disconnect(struct sdp_sock *ssk)
  712 {
  713         struct socket *so;
  714         int unread;
  715 
  716         so = ssk->socket;
  717         SDP_WLOCK_ASSERT(ssk);
  718         sdp_stop_keepalive_timer(so);
  719         /*
  720          * Neither sdp_closed() nor sdp_drop() should return NULL, as the
  721          * socket is still open.
  722          */
  723         if (ssk->state < TCPS_ESTABLISHED) {
  724                 ssk = sdp_closed(ssk);
  725                 KASSERT(ssk != NULL,
  726                     ("sdp_start_disconnect: sdp_close() returned NULL"));
  727         } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
  728                 ssk = sdp_drop(ssk, 0);
  729                 KASSERT(ssk != NULL,
  730                     ("sdp_start_disconnect: sdp_drop() returned NULL"));
  731         } else {
  732                 soisdisconnecting(so);
  733                 unread = sbused(&so->so_rcv);
  734                 sbflush(&so->so_rcv);
  735                 sdp_usrclosed(ssk);
  736                 if (!(ssk->flags & SDP_DROPPED)) {
  737                         if (unread)
  738                                 sdp_output_reset(ssk);
  739                         else
  740                                 sdp_output_disconnect(ssk);
  741                 }
  742         }
  743 }
  744 
  745 /*
  746  * User initiated disconnect.
  747  */
  748 static int
  749 sdp_disconnect(struct socket *so)
  750 {
  751         struct sdp_sock *ssk;
  752         int error = 0;
  753 
  754         ssk = sdp_sk(so);
  755         SDP_WLOCK(ssk);
  756         if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
  757                 error = ECONNRESET;
  758                 goto out;
  759         }
  760         sdp_start_disconnect(ssk);
  761 out:
  762         SDP_WUNLOCK(ssk);
  763         return (error);
  764 }
  765 
  766 /*
  767  * Accept a connection.  Essentially all the work is done at higher levels;
  768  * just return the address of the peer, storing through addr.
  769  *
  770  *
  771  * XXX This is broken XXX
  772  * 
  773  * The rationale for acquiring the sdp lock here is somewhat complicated,
  774  * and is described in detail in the commit log entry for r175612.  Acquiring
  775  * it delays an accept(2) racing with sonewconn(), which inserts the socket
  776  * before the address/port fields are initialized.  A better fix would
  777  * prevent the socket from being placed in the listen queue until all fields
  778  * are fully initialized.
  779  */
  780 static int
  781 sdp_accept(struct socket *so, struct sockaddr **nam)
  782 {
  783         struct sdp_sock *ssk = NULL;
  784         struct in_addr addr;
  785         in_port_t port;
  786         int error;
  787 
  788         if (so->so_state & SS_ISDISCONNECTED)
  789                 return (ECONNABORTED);
  790 
  791         port = 0;
  792         addr.s_addr = 0;
  793         error = 0;
  794         ssk = sdp_sk(so);
  795         SDP_WLOCK(ssk);
  796         if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
  797                 error = ECONNABORTED;
  798                 goto out;
  799         }
  800         port = ssk->fport;
  801         addr.s_addr = ssk->faddr;
  802 out:
  803         SDP_WUNLOCK(ssk);
  804         if (error == 0)
  805                 *nam = sdp_sockaddr(port, &addr);
  806         return error;
  807 }
  808 
  809 /*
  810  * Mark the connection as being incapable of further output.
  811  */
  812 static int
  813 sdp_shutdown(struct socket *so)
  814 {
  815         int error = 0;
  816         struct sdp_sock *ssk;
  817 
  818         ssk = sdp_sk(so);
  819         SDP_WLOCK(ssk);
  820         if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
  821                 error = ECONNRESET;
  822                 goto out;
  823         }
  824         socantsendmore(so);
  825         sdp_usrclosed(ssk);
  826         if (!(ssk->flags & SDP_DROPPED))
  827                 sdp_output_disconnect(ssk);
  828 
  829 out:
  830         SDP_WUNLOCK(ssk);
  831 
  832         return (error);
  833 }
  834 
  835 static void
  836 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
  837 {
  838         struct mbuf *n;
  839         int ncnt;
  840 
  841         SOCKBUF_LOCK_ASSERT(sb);
  842         SBLASTRECORDCHK(sb);
  843         KASSERT(mb->m_flags & M_PKTHDR,
  844                 ("sdp_append: %p Missing packet header.\n", mb));
  845         n = sb->sb_lastrecord;
  846         /*
  847          * If the queue is empty just set all pointers and proceed.
  848          */
  849         if (n == NULL) {
  850                 sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
  851                 for (; mb; mb = mb->m_next) {
  852                         sb->sb_mbtail = mb;
  853                         sballoc(sb, mb);
  854                 }
  855                 return;
  856         }
  857         /*
  858          * Count the number of mbufs in the current tail.
  859          */
  860         for (ncnt = 0; n->m_next; n = n->m_next)
  861                 ncnt++;
  862         n = sb->sb_lastrecord;
  863         /*
  864          * If the two chains can fit in a single sdp packet and
  865          * the last record has not been sent yet (WRITABLE) coalesce
  866          * them.  The lastrecord remains the same but we must strip the
  867          * packet header and then let sbcompress do the hard part.
  868          */
  869         if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
  870             n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
  871             ssk->xmit_size_goal) {
  872                 m_adj(mb, SDP_HEAD_SIZE);
  873                 n->m_pkthdr.len += mb->m_pkthdr.len;
  874                 n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
  875                 m_demote(mb, 1, 0);
  876                 sbcompress(sb, mb, sb->sb_mbtail);
  877                 return;
  878         }
  879         /*
  880          * Not compressible, just append to the end and adjust counters.
  881          */
  882         sb->sb_lastrecord->m_flags |= M_PUSH;
  883         sb->sb_lastrecord->m_nextpkt = mb;
  884         sb->sb_lastrecord = mb;
  885         if (sb->sb_sndptr == NULL)
  886                 sb->sb_sndptr = mb;
  887         for (; mb; mb = mb->m_next) {
  888                 sb->sb_mbtail = mb;
  889                 sballoc(sb, mb);
  890         }
  891 }
  892 
  893 /*
  894  * Do a send by putting data in output queue and updating urgent
  895  * marker if URG set.  Possibly send more data.  Unlike the other
  896  * pru_*() routines, the mbuf chains are our responsibility.  We
  897  * must either enqueue them or free them.  The other pru_* routines
  898  * generally are caller-frees.
  899  *
  900  * This comes from sendfile, normal sends will come from sdp_sosend().
  901  */
  902 static int
  903 sdp_send(struct socket *so, int flags, struct mbuf *m,
  904     struct sockaddr *nam, struct mbuf *control, struct thread *td)
  905 {
  906         struct sdp_sock *ssk;
  907         struct mbuf *n;
  908         int error;
  909         int cnt;
  910 
  911         if (nam != NULL) {
  912                 if (nam->sa_family != AF_INET) {
  913                         if (control)
  914                                 m_freem(control);
  915                         m_freem(m);
  916                         return (EAFNOSUPPORT);
  917                 }
  918                 if (nam->sa_len != sizeof(struct sockaddr_in)) {
  919                         if (control)
  920                                 m_freem(control);
  921                         m_freem(m);
  922                         return (EINVAL);
  923                 }
  924         }
  925 
  926         error = 0;
  927         ssk = sdp_sk(so);
  928         KASSERT(m->m_flags & M_PKTHDR,
  929             ("sdp_send: %p no packet header", m));
  930         M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
  931         mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA; 
  932         for (n = m, cnt = 0; n->m_next; n = n->m_next)
  933                 cnt++;
  934         if (cnt > SDP_MAX_SEND_SGES) {
  935                 n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
  936                 if (n == NULL) {
  937                         m_freem(m);
  938                         return (EMSGSIZE);
  939                 }
  940                 m = n;
  941                 for (cnt = 0; n->m_next; n = n->m_next)
  942                         cnt++;
  943         }
  944         SDP_WLOCK(ssk);
  945         if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
  946                 if (control)
  947                         m_freem(control);
  948                 if (m)
  949                         m_freem(m);
  950                 error = ECONNRESET;
  951                 goto out;
  952         }
  953         if (control) {
  954                 /* SDP doesn't support control messages. */
  955                 if (control->m_len) {
  956                         m_freem(control);
  957                         if (m)
  958                                 m_freem(m);
  959                         error = EINVAL;
  960                         goto out;
  961                 }
  962                 m_freem(control);       /* empty control, just free it */
  963         }
  964         if (!(flags & PRUS_OOB)) {
  965                 SOCKBUF_LOCK(&so->so_snd);
  966                 sdp_append(ssk, &so->so_snd, m, cnt);
  967                 SOCKBUF_UNLOCK(&so->so_snd);
  968                 if (nam && ssk->state < TCPS_SYN_SENT) {
  969                         /*
  970                          * Do implied connect if not yet connected.
  971                          */
  972                         error = sdp_start_connect(ssk, nam, td);
  973                         if (error)
  974                                 goto out;
  975                 }
  976                 if (flags & PRUS_EOF) {
  977                         /*
  978                          * Close the send side of the connection after
  979                          * the data is sent.
  980                          */
  981                         socantsendmore(so);
  982                         sdp_usrclosed(ssk);
  983                         if (!(ssk->flags & SDP_DROPPED))
  984                                 sdp_output_disconnect(ssk);
  985                 } else if (!(ssk->flags & SDP_DROPPED) &&
  986                     !(flags & PRUS_MORETOCOME))
  987                         sdp_post_sends(ssk, M_NOWAIT);
  988                 SDP_WUNLOCK(ssk);
  989                 return (0);
  990         } else {
  991                 SOCKBUF_LOCK(&so->so_snd);
  992                 if (sbspace(&so->so_snd) < -512) {
  993                         SOCKBUF_UNLOCK(&so->so_snd);
  994                         m_freem(m);
  995                         error = ENOBUFS;
  996                         goto out;
  997                 }
  998                 /*
  999                  * According to RFC961 (Assigned Protocols),
 1000                  * the urgent pointer points to the last octet
 1001                  * of urgent data.  We continue, however,
 1002                  * to consider it to indicate the first octet
 1003                  * of data past the urgent section.
 1004                  * Otherwise, snd_up should be one lower.
 1005                  */
 1006                 m->m_flags |= M_URG | M_PUSH;
 1007                 sdp_append(ssk, &so->so_snd, m, cnt);
 1008                 SOCKBUF_UNLOCK(&so->so_snd);
 1009                 if (nam && ssk->state < TCPS_SYN_SENT) {
 1010                         /*
 1011                          * Do implied connect if not yet connected.
 1012                          */
 1013                         error = sdp_start_connect(ssk, nam, td);
 1014                         if (error)
 1015                                 goto out;
 1016                 }
 1017                 sdp_post_sends(ssk, M_NOWAIT);
 1018                 SDP_WUNLOCK(ssk);
 1019                 return (0);
 1020         }
 1021 out:
 1022         SDP_WUNLOCK(ssk);
 1023         return (error);
 1024 }
 1025 
 1026 /*
 1027  * Send on a socket.  If send must go all at once and message is larger than
 1028  * send buffering, then hard error.  Lock against other senders.  If must go
 1029  * all at once and not enough room now, then inform user that this would
 1030  * block and do nothing.  Otherwise, if nonblocking, send as much as
 1031  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
 1032  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
 1033  * in mbuf chain must be small enough to send all at once.
 1034  *
 1035  * Returns nonzero on error, timeout or signal; callers must check for short
 1036  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
 1037  * on return.
 1038  */
 1039 static int
 1040 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 1041     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 1042 {
 1043         struct sdp_sock *ssk;
 1044         long space, resid;
 1045         int atomic;
 1046         int error;
 1047         int copy;
 1048 
 1049         if (uio != NULL)
 1050                 resid = uio->uio_resid;
 1051         else
 1052                 resid = top->m_pkthdr.len;
 1053         atomic = top != NULL;
 1054         if (control != NULL) {
 1055                 if (control->m_len) {
 1056                         m_freem(control);
 1057                         if (top)
 1058                                 m_freem(top);
 1059                         return (EINVAL);
 1060                 }
 1061                 m_freem(control);
 1062                 control = NULL;
 1063         }
 1064         /*
 1065          * In theory resid should be unsigned.  However, space must be
 1066          * signed, as it might be less than 0 if we over-committed, and we
 1067          * must use a signed comparison of space and resid.  On the other
 1068          * hand, a negative resid causes us to loop sending 0-length
 1069          * segments to the protocol.
 1070          *
 1071          * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 1072          * type sockets since that's an error.
 1073          */
 1074         if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 1075                 error = EINVAL;
 1076                 goto out;
 1077         }
 1078         if (td != NULL)
 1079                 td->td_ru.ru_msgsnd++;
 1080 
 1081         ssk = sdp_sk(so);
 1082         error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
 1083         if (error)
 1084                 goto out;
 1085 
 1086 restart:
 1087         do {
 1088                 SOCKBUF_LOCK(&so->so_snd);
 1089                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1090                         SOCKBUF_UNLOCK(&so->so_snd);
 1091                         error = EPIPE;
 1092                         goto release;
 1093                 }
 1094                 if (so->so_error) {
 1095                         error = so->so_error;
 1096                         so->so_error = 0;
 1097                         SOCKBUF_UNLOCK(&so->so_snd);
 1098                         goto release;
 1099                 }
 1100                 if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
 1101                         SOCKBUF_UNLOCK(&so->so_snd);
 1102                         error = ENOTCONN;
 1103                         goto release;
 1104                 }
 1105                 space = sbspace(&so->so_snd);
 1106                 if (flags & MSG_OOB)
 1107                         space += 1024;
 1108                 if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
 1109                         SOCKBUF_UNLOCK(&so->so_snd);
 1110                         error = EMSGSIZE;
 1111                         goto release;
 1112                 }
 1113                 if (space < resid &&
 1114                     (atomic || space < so->so_snd.sb_lowat)) {
 1115                         if ((so->so_state & SS_NBIO) ||
 1116                             (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
 1117                                 SOCKBUF_UNLOCK(&so->so_snd);
 1118                                 error = EWOULDBLOCK;
 1119                                 goto release;
 1120                         }
 1121                         error = sbwait(so, SO_SND);
 1122                         SOCKBUF_UNLOCK(&so->so_snd);
 1123                         if (error)
 1124                                 goto release;
 1125                         goto restart;
 1126                 }
 1127                 SOCKBUF_UNLOCK(&so->so_snd);
 1128                 do {
 1129                         if (uio == NULL) {
 1130                                 resid = 0;
 1131                                 if (flags & MSG_EOR)
 1132                                         top->m_flags |= M_EOR;
 1133                         } else {
 1134                                 /*
 1135                                  * Copy the data from userland into a mbuf
 1136                                  * chain.  If no data is to be copied in,
 1137                                  * a single empty mbuf is returned.
 1138                                  */
 1139                                 copy = min(space,
 1140                                     ssk->xmit_size_goal - SDP_HEAD_SIZE);
 1141                                 top = m_uiotombuf(uio, M_WAITOK, copy,
 1142                                     0, M_PKTHDR |
 1143                                     ((flags & MSG_EOR) ? M_EOR : 0));
 1144                                 if (top == NULL) {
 1145                                         /* only possible error */
 1146                                         error = EFAULT;
 1147                                         goto release;
 1148                                 }
 1149                                 space -= resid - uio->uio_resid;
 1150                                 resid = uio->uio_resid;
 1151                         }
 1152                         /*
 1153                          * XXX all the SBS_CANTSENDMORE checks previously
 1154                          * done could be out of date after dropping the
 1155                          * socket lock.
 1156                          */
 1157                         error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
 1158                         /*
 1159                          * Set EOF on the last send if the user specified
 1160                          * MSG_EOF.
 1161                          */
 1162                             ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
 1163                         /* If there is more to send set PRUS_MORETOCOME. */
 1164                             (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 1165                             top, addr, NULL, td);
 1166                         top = NULL;
 1167                         if (error)
 1168                                 goto release;
 1169                 } while (resid && space > 0);
 1170         } while (resid);
 1171 
 1172 release:
 1173         SOCK_IO_SEND_UNLOCK(so);
 1174 out:
 1175         if (top != NULL)
 1176                 m_freem(top);
 1177         return (error);
 1178 }
 1179 
 1180 /*
 1181  * The part of soreceive() that implements reading non-inline out-of-band
 1182  * data from a socket.  For more complete comments, see soreceive(), from
 1183  * which this code originated.
 1184  *
 1185  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
 1186  * unable to return an mbuf chain to the caller.
 1187  */
 1188 static int
 1189 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
 1190 {
 1191         struct protosw *pr = so->so_proto;
 1192         struct mbuf *m;
 1193         int error;
 1194 
 1195         KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
 1196 
 1197         m = m_get(M_WAITOK, MT_DATA);
 1198         error = pr->pr_rcvoob(so, m, flags & MSG_PEEK);
 1199         if (error)
 1200                 goto bad;
 1201         do {
 1202                 error = uiomove(mtod(m, void *),
 1203                     (int) min(uio->uio_resid, m->m_len), uio);
 1204                 m = m_free(m);
 1205         } while (uio->uio_resid && error == 0 && m);
 1206 bad:
 1207         if (m != NULL)
 1208                 m_freem(m);
 1209         return (error);
 1210 }
 1211 
 1212 /*
 1213  * Optimized version of soreceive() for stream (TCP) sockets.
 1214  */
 1215 static int
 1216 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
 1217     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 1218 {
 1219         int len = 0, error = 0, flags, oresid;
 1220         struct sockbuf *sb;
 1221         struct mbuf *m, *n = NULL;
 1222         struct sdp_sock *ssk;
 1223 
 1224         /* We only do stream sockets. */
 1225         if (so->so_type != SOCK_STREAM)
 1226                 return (EINVAL);
 1227         if (psa != NULL)
 1228                 *psa = NULL;
 1229         if (controlp != NULL)
 1230                 return (EINVAL);
 1231         if (flagsp != NULL)
 1232                 flags = *flagsp &~ MSG_EOR;
 1233         else
 1234                 flags = 0;
 1235         if (flags & MSG_OOB)
 1236                 return (soreceive_rcvoob(so, uio, flags));
 1237         if (mp0 != NULL)
 1238                 *mp0 = NULL;
 1239 
 1240         sb = &so->so_rcv;
 1241         ssk = sdp_sk(so);
 1242 
 1243         /* Prevent other readers from entering the socket. */
 1244         error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
 1245         if (error)
 1246                 return (error);
 1247         SOCKBUF_LOCK(sb);
 1248 
 1249         /* Easy one, no space to copyout anything. */
 1250         if (uio->uio_resid == 0) {
 1251                 error = EINVAL;
 1252                 goto out;
 1253         }
 1254         oresid = uio->uio_resid;
 1255 
 1256         /* We will never ever get anything unless we are connected. */
 1257         if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 1258                 /* When disconnecting there may be still some data left. */
 1259                 if (sbavail(sb))
 1260                         goto deliver;
 1261                 if (!(so->so_state & SS_ISDISCONNECTED))
 1262                         error = ENOTCONN;
 1263                 goto out;
 1264         }
 1265 
 1266         /* Socket buffer is empty and we shall not block. */
 1267         if (sbavail(sb) == 0 &&
 1268             ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
 1269                 error = EAGAIN;
 1270                 goto out;
 1271         }
 1272 
 1273 restart:
 1274         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 1275 
 1276         /* Abort if socket has reported problems. */
 1277         if (so->so_error) {
 1278                 if (sbavail(sb))
 1279                         goto deliver;
 1280                 if (oresid > uio->uio_resid)
 1281                         goto out;
 1282                 error = so->so_error;
 1283                 if (!(flags & MSG_PEEK))
 1284                         so->so_error = 0;
 1285                 goto out;
 1286         }
 1287 
 1288         /* Door is closed.  Deliver what is left, if any. */
 1289         if (sb->sb_state & SBS_CANTRCVMORE) {
 1290                 if (sbavail(sb))
 1291                         goto deliver;
 1292                 else
 1293                         goto out;
 1294         }
 1295 
 1296         /* Socket buffer got some data that we shall deliver now. */
 1297         if (sbavail(sb) && !(flags & MSG_WAITALL) &&
 1298             ((so->so_state & SS_NBIO) ||
 1299              (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
 1300              sbavail(sb) >= sb->sb_lowat ||
 1301              sbavail(sb) >= uio->uio_resid ||
 1302              sbavail(sb) >= sb->sb_hiwat) ) {
 1303                 goto deliver;
 1304         }
 1305 
 1306         /* On MSG_WAITALL we must wait until all data or error arrives. */
 1307         if ((flags & MSG_WAITALL) &&
 1308             (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat))
 1309                 goto deliver;
 1310 
 1311         /*
 1312          * Wait and block until (more) data comes in.
 1313          * NB: Drops the sockbuf lock during wait.
 1314          */
 1315         error = sbwait(so, SO_RCV);
 1316         if (error)
 1317                 goto out;
 1318         goto restart;
 1319 
 1320 deliver:
 1321         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 1322         KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__));
 1323         KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
 1324 
 1325         /* Statistics. */
 1326         if (uio->uio_td)
 1327                 uio->uio_td->td_ru.ru_msgrcv++;
 1328 
 1329         /* Fill uio until full or current end of socket buffer is reached. */
 1330         len = min(uio->uio_resid, sbavail(sb));
 1331         if (mp0 != NULL) {
 1332                 /* Dequeue as many mbufs as possible. */
 1333                 if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
 1334                         for (*mp0 = m = sb->sb_mb;
 1335                              m != NULL && m->m_len <= len;
 1336                              m = m->m_next) {
 1337                                 len -= m->m_len;
 1338                                 uio->uio_resid -= m->m_len;
 1339                                 sbfree(sb, m);
 1340                                 n = m;
 1341                         }
 1342                         sb->sb_mb = m;
 1343                         if (sb->sb_mb == NULL)
 1344                                 SB_EMPTY_FIXUP(sb);
 1345                         n->m_next = NULL;
 1346                 }
 1347                 /* Copy the remainder. */
 1348                 if (len > 0) {
 1349                         KASSERT(sb->sb_mb != NULL,
 1350                             ("%s: len > 0 && sb->sb_mb empty", __func__));
 1351 
 1352                         m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
 1353                         if (m == NULL)
 1354                                 len = 0;        /* Don't flush data from sockbuf. */
 1355                         else
 1356                                 uio->uio_resid -= m->m_len;
 1357                         if (*mp0 != NULL)
 1358                                 n->m_next = m;
 1359                         else
 1360                                 *mp0 = m;
 1361                         if (*mp0 == NULL) {
 1362                                 error = ENOBUFS;
 1363                                 goto out;
 1364                         }
 1365                 }
 1366         } else {
 1367                 /* NB: Must unlock socket buffer as uiomove may sleep. */
 1368                 SOCKBUF_UNLOCK(sb);
 1369                 error = m_mbuftouio(uio, sb->sb_mb, len);
 1370                 SOCKBUF_LOCK(sb);
 1371                 if (error)
 1372                         goto out;
 1373         }
 1374         SBLASTRECORDCHK(sb);
 1375         SBLASTMBUFCHK(sb);
 1376 
 1377         /*
 1378          * Remove the delivered data from the socket buffer unless we
 1379          * were only peeking.
 1380          */
 1381         if (!(flags & MSG_PEEK)) {
 1382                 if (len > 0)
 1383                         sbdrop_locked(sb, len);
 1384 
 1385                 /* Notify protocol that we drained some data. */
 1386                 SOCKBUF_UNLOCK(sb);
 1387                 SDP_WLOCK(ssk);
 1388                 sdp_do_posts(ssk);
 1389                 SDP_WUNLOCK(ssk);
 1390                 SOCKBUF_LOCK(sb);
 1391         }
 1392 
 1393         /*
 1394          * For MSG_WAITALL we may have to loop again and wait for
 1395          * more data to come in.
 1396          */
 1397         if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
 1398                 goto restart;
 1399 out:
 1400         SBLASTRECORDCHK(sb);
 1401         SBLASTMBUFCHK(sb);
 1402         SOCKBUF_UNLOCK(sb);
 1403         SOCK_IO_RECV_UNLOCK(so);
 1404         return (error);
 1405 }
 1406 
 1407 /*
 1408  * Abort is used to teardown a connection typically while sitting in
 1409  * the accept queue.
 1410  */
 1411 void
 1412 sdp_abort(struct socket *so)
 1413 {
 1414         struct sdp_sock *ssk;
 1415 
 1416         ssk = sdp_sk(so);
 1417         SDP_WLOCK(ssk);
 1418         /*
 1419          * If we have not yet dropped, do it now.
 1420          */
 1421         if (!(ssk->flags & SDP_TIMEWAIT) &&
 1422             !(ssk->flags & SDP_DROPPED))
 1423                 sdp_drop(ssk, ECONNABORTED);
 1424         KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
 1425             ssk, ssk->flags));
 1426         SDP_WUNLOCK(ssk);
 1427 }
 1428 
 1429 /*
 1430  * Close a SDP socket and initiate a friendly disconnect.
 1431  */
 1432 static void
 1433 sdp_close(struct socket *so)
 1434 {
 1435         struct sdp_sock *ssk;
 1436 
 1437         ssk = sdp_sk(so);
 1438         SDP_WLOCK(ssk);
 1439         /*
 1440          * If we have not yet dropped, do it now.
 1441          */
 1442         if (!(ssk->flags & SDP_TIMEWAIT) &&
 1443             !(ssk->flags & SDP_DROPPED)) 
 1444                 sdp_start_disconnect(ssk);
 1445 
 1446         /*
 1447          * If we've still not dropped let the socket layer know we're
 1448          * holding on to the socket and pcb for a while.
 1449          */
 1450         if (!(ssk->flags & SDP_DROPPED)) {
 1451                 ssk->flags |= SDP_SOCKREF;
 1452                 soref(so);
 1453         }
 1454         SDP_WUNLOCK(ssk);
 1455 }
 1456 
 1457 /*
 1458  * User requests out-of-band data.
 1459  */
 1460 static int
 1461 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
 1462 {
 1463         int error = 0;
 1464         struct sdp_sock *ssk;
 1465 
 1466         ssk = sdp_sk(so);
 1467         SDP_WLOCK(ssk);
 1468         if (!rx_ring_trylock(&ssk->rx_ring)) {
 1469                 SDP_WUNLOCK(ssk);
 1470                 return (ECONNRESET);
 1471         }
 1472         if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 1473                 error = ECONNRESET;
 1474                 goto out;
 1475         }
 1476         if ((so->so_oobmark == 0 &&
 1477              (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 1478             so->so_options & SO_OOBINLINE ||
 1479             ssk->oobflags & SDP_HADOOB) {
 1480                 error = EINVAL;
 1481                 goto out;
 1482         }
 1483         if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
 1484                 error = EWOULDBLOCK;
 1485                 goto out;
 1486         }
 1487         m->m_len = 1;
 1488         *mtod(m, caddr_t) = ssk->iobc;
 1489         if ((flags & MSG_PEEK) == 0)
 1490                 ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
 1491 out:
 1492         rx_ring_unlock(&ssk->rx_ring);
 1493         SDP_WUNLOCK(ssk);
 1494         return (error);
 1495 }
 1496 
 1497 void
 1498 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
 1499 {
 1500         struct mbuf *m;
 1501         struct socket *so;
 1502 
 1503         so = ssk->socket;
 1504         if (so == NULL)
 1505                 return;
 1506 
 1507         so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1;
 1508         sohasoutofband(so);
 1509         ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
 1510         if (!(so->so_options & SO_OOBINLINE)) {
 1511                 for (m = mb; m->m_next != NULL; m = m->m_next);
 1512                 ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
 1513                 ssk->oobflags |= SDP_HAVEOOB;
 1514                 m->m_len--;
 1515                 mb->m_pkthdr.len--;
 1516         }
 1517 }
 1518 
 1519 /*
 1520  * Notify a sdp socket of an asynchronous error.
 1521  *
 1522  * Do not wake up user since there currently is no mechanism for
 1523  * reporting soft errors (yet - a kqueue filter may be added).
 1524  */
 1525 struct sdp_sock *
 1526 sdp_notify(struct sdp_sock *ssk, int error)
 1527 {
 1528 
 1529         SDP_WLOCK_ASSERT(ssk);
 1530 
 1531         if ((ssk->flags & SDP_TIMEWAIT) ||
 1532             (ssk->flags & SDP_DROPPED))
 1533                 return (ssk);
 1534 
 1535         /*
 1536          * Ignore some errors if we are hooked up.
 1537          */
 1538         if (ssk->state == TCPS_ESTABLISHED &&
 1539             (error == EHOSTUNREACH || error == ENETUNREACH ||
 1540              error == EHOSTDOWN))
 1541                 return (ssk);
 1542         ssk->softerror = error;
 1543         return sdp_drop(ssk, error);
 1544 }
 1545 
 1546 static void
 1547 sdp_keepalive_timeout(void *data)
 1548 {
 1549         struct sdp_sock *ssk;
 1550 
 1551         ssk = data;
 1552         /* Callout canceled. */
 1553         if (!callout_active(&ssk->keep2msl))
 1554                 return;
 1555         /* Callout rescheduled as a different kind of timer. */
 1556         if (callout_pending(&ssk->keep2msl))
 1557                 goto out;
 1558         callout_deactivate(&ssk->keep2msl);
 1559         if (ssk->flags & SDP_DROPPED ||
 1560             (ssk->socket->so_options & SO_KEEPALIVE) == 0)
 1561                 goto out;
 1562         sdp_post_keepalive(ssk);
 1563         callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
 1564             sdp_keepalive_timeout, ssk);
 1565 out:
 1566         SDP_WUNLOCK(ssk);
 1567 }
 1568 
 1569 
 1570 void
 1571 sdp_start_keepalive_timer(struct socket *so)
 1572 {
 1573         struct sdp_sock *ssk;
 1574 
 1575         ssk = sdp_sk(so);
 1576         if (!callout_pending(&ssk->keep2msl))
 1577                 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
 1578                     sdp_keepalive_timeout, ssk);
 1579 }
 1580 
 1581 static void
 1582 sdp_stop_keepalive_timer(struct socket *so)
 1583 {
 1584         struct sdp_sock *ssk;
 1585 
 1586         ssk = sdp_sk(so);
 1587         callout_stop(&ssk->keep2msl);
 1588 }
 1589 
 1590 /*
 1591  * sdp_ctloutput() must drop the inpcb lock before performing copyin on
 1592  * socket option arguments.  When it re-acquires the lock after the copy, it
 1593  * has to revalidate that the connection is still valid for the socket
 1594  * option.
 1595  */
 1596 #define SDP_WLOCK_RECHECK(inp) do {                                     \
 1597         SDP_WLOCK(ssk);                                                 \
 1598         if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {                \
 1599                 SDP_WUNLOCK(ssk);                                       \
 1600                 return (ECONNRESET);                                    \
 1601         }                                                               \
 1602 } while(0)
 1603 
 1604 static int
 1605 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
 1606 {
 1607         int     error, opt, optval;
 1608         struct sdp_sock *ssk;
 1609 
 1610         error = 0;
 1611         ssk = sdp_sk(so);
 1612         if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
 1613                 SDP_WLOCK(ssk);
 1614                 if (so->so_options & SO_KEEPALIVE)
 1615                         sdp_start_keepalive_timer(so);
 1616                 else
 1617                         sdp_stop_keepalive_timer(so);
 1618                 SDP_WUNLOCK(ssk);
 1619         }
 1620         if (sopt->sopt_level != IPPROTO_TCP)
 1621                 return (error);
 1622 
 1623         SDP_WLOCK(ssk);
 1624         if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 1625                 SDP_WUNLOCK(ssk);
 1626                 return (ECONNRESET);
 1627         }
 1628 
 1629         switch (sopt->sopt_dir) {
 1630         case SOPT_SET:
 1631                 switch (sopt->sopt_name) {
 1632                 case TCP_NODELAY:
 1633                         SDP_WUNLOCK(ssk);
 1634                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1635                             sizeof optval);
 1636                         if (error)
 1637                                 return (error);
 1638 
 1639                         SDP_WLOCK_RECHECK(ssk);
 1640                         opt = SDP_NODELAY;
 1641                         if (optval)
 1642                                 ssk->flags |= opt;
 1643                         else
 1644                                 ssk->flags &= ~opt;
 1645                         sdp_do_posts(ssk);
 1646                         SDP_WUNLOCK(ssk);
 1647                         break;
 1648 
 1649                 default:
 1650                         SDP_WUNLOCK(ssk);
 1651                         error = ENOPROTOOPT;
 1652                         break;
 1653                 }
 1654                 break;
 1655 
 1656         case SOPT_GET:
 1657                 switch (sopt->sopt_name) {
 1658                 case TCP_NODELAY:
 1659                         optval = ssk->flags & SDP_NODELAY;
 1660                         SDP_WUNLOCK(ssk);
 1661                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1662                         break;
 1663                 default:
 1664                         SDP_WUNLOCK(ssk);
 1665                         error = ENOPROTOOPT;
 1666                         break;
 1667                 }
 1668                 break;
 1669         }
 1670         return (error);
 1671 }
 1672 #undef SDP_WLOCK_RECHECK
 1673 
 1674 int sdp_mod_count = 0;
 1675 int sdp_mod_usec = 0;
 1676 
 1677 void
 1678 sdp_set_default_moderation(struct sdp_sock *ssk)
 1679 {
 1680         if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
 1681                 return;
 1682         ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
 1683 }
 1684 
 1685 static void
 1686 sdp_dev_add(struct ib_device *device)
 1687 {
 1688         struct ib_fmr_pool_param param;
 1689         struct sdp_device *sdp_dev;
 1690 
 1691         sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
 1692         sdp_dev->pd = ib_alloc_pd(device, 0);
 1693         if (IS_ERR(sdp_dev->pd))
 1694                 goto out_pd;
 1695         memset(&param, 0, sizeof param);
 1696         param.max_pages_per_fmr = SDP_FMR_SIZE;
 1697         param.page_shift = PAGE_SHIFT;
 1698         param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
 1699         param.pool_size = SDP_FMR_POOL_SIZE;
 1700         param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
 1701         param.cache = 1;
 1702         sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, &param);
 1703         if (IS_ERR(sdp_dev->fmr_pool))
 1704                 goto out_fmr;
 1705         ib_set_client_data(device, &sdp_client, sdp_dev);
 1706         return;
 1707 
 1708 out_fmr:
 1709         ib_dealloc_pd(sdp_dev->pd);
 1710 out_pd:
 1711         free(sdp_dev, M_SDP);
 1712 }
 1713 
 1714 static void
 1715 sdp_dev_rem(struct ib_device *device, void *client_data)
 1716 {
 1717         struct sdp_device *sdp_dev;
 1718         struct sdp_sock *ssk;
 1719 
 1720         SDP_LIST_WLOCK();
 1721         LIST_FOREACH(ssk, &sdp_list, list) {
 1722                 if (ssk->ib_device != device)
 1723                         continue;
 1724                 SDP_WLOCK(ssk);
 1725                 if ((ssk->flags & SDP_DESTROY) == 0)
 1726                         ssk = sdp_notify(ssk, ECONNRESET);
 1727                 if (ssk)
 1728                         SDP_WUNLOCK(ssk);
 1729         }
 1730         SDP_LIST_WUNLOCK();
 1731         /*
 1732          * XXX Do I need to wait between these two?
 1733          */
 1734         sdp_dev = ib_get_client_data(device, &sdp_client);
 1735         if (!sdp_dev)
 1736                 return;
 1737         ib_flush_fmr_pool(sdp_dev->fmr_pool);
 1738         ib_destroy_fmr_pool(sdp_dev->fmr_pool);
 1739         ib_dealloc_pd(sdp_dev->pd);
 1740         free(sdp_dev, M_SDP);
 1741 }
 1742 
 1743 struct ib_client sdp_client =
 1744     { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
 1745 
 1746 
 1747 static int
 1748 sdp_pcblist(SYSCTL_HANDLER_ARGS)
 1749 {
 1750         int error, n, i;
 1751         struct sdp_sock *ssk;
 1752         struct xinpgen xig;
 1753 
 1754         /*
 1755          * The process of preparing the TCB list is too time-consuming and
 1756          * resource-intensive to repeat twice on every request.
 1757          */
 1758         if (req->oldptr == NULL) {
 1759                 n = sdp_count;
 1760                 n += imax(n / 8, 10);
 1761                 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
 1762                 return (0);
 1763         }
 1764 
 1765         if (req->newptr != NULL)
 1766                 return (EPERM);
 1767 
 1768         /*
 1769          * OK, now we're committed to doing something.
 1770          */
 1771         SDP_LIST_RLOCK();
 1772         n = sdp_count;
 1773         SDP_LIST_RUNLOCK();
 1774 
 1775         error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 1776                 + n * sizeof(struct xtcpcb));
 1777         if (error != 0)
 1778                 return (error);
 1779 
 1780         bzero(&xig, sizeof(xig));
 1781         xig.xig_len = sizeof xig;
 1782         xig.xig_count = n;
 1783         xig.xig_gen = 0;
 1784         xig.xig_sogen = so_gencnt;
 1785         error = SYSCTL_OUT(req, &xig, sizeof xig);
 1786         if (error)
 1787                 return (error);
 1788 
 1789         SDP_LIST_RLOCK();
 1790         for (ssk = LIST_FIRST(&sdp_list), i = 0;
 1791             ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
 1792                 struct xtcpcb xt;
 1793 
 1794                 SDP_RLOCK(ssk);
 1795                 if (ssk->flags & SDP_TIMEWAIT) {
 1796                         if (ssk->cred != NULL)
 1797                                 error = cr_cansee(req->td->td_ucred,
 1798                                     ssk->cred);
 1799                         else
 1800                                 error = EINVAL; /* Skip this inp. */
 1801                 } else if (ssk->socket)
 1802                         error = cr_canseesocket(req->td->td_ucred,
 1803                             ssk->socket);
 1804                 else
 1805                         error = EINVAL;
 1806                 if (error) {
 1807                         error = 0;
 1808                         goto next;
 1809                 }
 1810 
 1811                 bzero(&xt, sizeof(xt));
 1812                 xt.xt_len = sizeof xt;
 1813                 xt.xt_inp.inp_gencnt = 0;
 1814                 xt.xt_inp.inp_vflag = INP_IPV4;
 1815                 memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
 1816                 xt.xt_inp.inp_lport = ssk->lport;
 1817                 memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
 1818                 xt.xt_inp.inp_fport = ssk->fport;
 1819                 xt.t_state = ssk->state;
 1820                 if (ssk->socket != NULL)
 1821                         sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket);
 1822                 xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
 1823                 SDP_RUNLOCK(ssk);
 1824                 error = SYSCTL_OUT(req, &xt, sizeof xt);
 1825                 if (error)
 1826                         break;
 1827                 i++;
 1828                 continue;
 1829 next:
 1830                 SDP_RUNLOCK(ssk);
 1831         }
 1832         if (!error) {
 1833                 /*
 1834                  * Give the user an updated idea of our state.
 1835                  * If the generation differs from what we told
 1836                  * her before, she knows that something happened
 1837                  * while we were processing this request, and it
 1838                  * might be necessary to retry.
 1839                  */
 1840                 xig.xig_gen = 0;
 1841                 xig.xig_sogen = so_gencnt;
 1842                 xig.xig_count = sdp_count;
 1843                 error = SYSCTL_OUT(req, &xig, sizeof xig);
 1844         }
 1845         SDP_LIST_RUNLOCK();
 1846         return (error);
 1847 }
 1848 
 1849 SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 1850     "SDP");
 1851 
 1852 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
 1853     CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_MPSAFE,
 1854     0, 0, sdp_pcblist, "S,xtcpcb",
 1855     "List of active SDP connections");
 1856 
 1857 static void
 1858 sdp_zone_change(void *tag)
 1859 {
 1860 
 1861         uma_zone_set_max(sdp_zone, maxsockets);
 1862 }
 1863 
 1864 static void
 1865 sdp_init(void *arg __unused)
 1866 {
 1867 
 1868         LIST_INIT(&sdp_list);
 1869         sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
 1870             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1871         uma_zone_set_max(sdp_zone, maxsockets);
 1872         EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
 1873                 EVENTHANDLER_PRI_ANY);
 1874         rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
 1875         ib_register_client(&sdp_client);
 1876 }
 1877 SYSINIT(sdp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, sdp_init, NULL);
 1878 
 1879 #define SDP_PROTOSW                                                     \
 1880         .pr_type =              SOCK_STREAM,                            \
 1881         .pr_flags =             PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,\
 1882         .pr_ctloutput =         sdp_ctloutput,                          \
 1883         .pr_abort =             sdp_abort,                              \
 1884         .pr_accept =            sdp_accept,                             \
 1885         .pr_attach =            sdp_attach,                             \
 1886         .pr_bind =              sdp_bind,                               \
 1887         .pr_connect =           sdp_connect,                            \
 1888         .pr_detach =            sdp_detach,                             \
 1889         .pr_disconnect =        sdp_disconnect,                         \
 1890         .pr_listen =            sdp_listen,                             \
 1891         .pr_peeraddr =          sdp_getpeeraddr,                        \
 1892         .pr_rcvoob =            sdp_rcvoob,                             \
 1893         .pr_send =              sdp_send,                               \
 1894         .pr_sosend =            sdp_sosend,                             \
 1895         .pr_soreceive =         sdp_sorecv,                             \
 1896         .pr_shutdown =          sdp_shutdown,                           \
 1897         .pr_sockaddr =          sdp_getsockaddr,                        \
 1898         .pr_close =             sdp_close
 1899 
 1900 
 1901 static struct protosw sdp_ip_protosw = {
 1902         .pr_protocol =          IPPROTO_IP,
 1903         SDP_PROTOSW
 1904 };
 1905 static struct protosw sdp_tcp_protosw = {
 1906         .pr_protocol =          IPPROTO_TCP,
 1907         SDP_PROTOSW
 1908 };
 1909 
 1910 static struct domain sdpdomain = {
 1911         .dom_family =           AF_INET_SDP,
 1912         .dom_name =             "SDP",
 1913         .dom_nprotosw =         2,
 1914         .dom_protosw = {
 1915                 &sdp_ip_protosw,
 1916                 &sdp_tcp_protosw,
 1917         },
 1918 };
 1919 
 1920 DOMAIN_SET(sdp);
 1921 
 1922 int sdp_debug_level = 1;
 1923 int sdp_data_debug_level = 0;

Cache object: 48f1d0f8dd5e93d9e21668fd126180a5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.