uipc_socket2.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: uipc_socket2.c,v 1.100.4.1 2009/02/02 21:04:45 snj Exp $       */
    2 
    3 /*-
    4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26  * POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * Copyright (c) 1982, 1986, 1988, 1990, 1993
   31  *      The Regents of the University of California.  All rights reserved.
   32  *
   33  * Redistribution and use in source and binary forms, with or without
   34  * modification, are permitted provided that the following conditions
   35  * are met:
   36  * 1. Redistributions of source code must retain the above copyright
   37  *    notice, this list of conditions and the following disclaimer.
   38  * 2. Redistributions in binary form must reproduce the above copyright
   39  *    notice, this list of conditions and the following disclaimer in the
   40  *    documentation and/or other materials provided with the distribution.
   41  * 3. Neither the name of the University nor the names of its contributors
   42  *    may be used to endorse or promote products derived from this software
   43  *    without specific prior written permission.
   44  *
   45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   55  * SUCH DAMAGE.
   56  *
   57  *      @(#)uipc_socket2.c      8.2 (Berkeley) 2/14/95
   58  */
   59 
   60 #include <sys/cdefs.h>
   61 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.100.4.1 2009/02/02 21:04:45 snj Exp $");
   62 
   63 #include "opt_mbuftrace.h"
   64 #include "opt_sb_max.h"
   65 
   66 #include <sys/param.h>
   67 #include <sys/systm.h>
   68 #include <sys/proc.h>
   69 #include <sys/file.h>
   70 #include <sys/buf.h>
   71 #include <sys/malloc.h>
   72 #include <sys/mbuf.h>
   73 #include <sys/protosw.h>
   74 #include <sys/domain.h>
   75 #include <sys/poll.h>
   76 #include <sys/socket.h>
   77 #include <sys/socketvar.h>
   78 #include <sys/signalvar.h>
   79 #include <sys/kauth.h>
   80 #include <sys/pool.h>
   81 #include <sys/uidinfo.h>
   82 
   83 /*
   84  * Primitive routines for operating on sockets and socket buffers.
   85  *
   86  * Locking rules and assumptions:
   87  *
   88  * o socket::so_lock can change on the fly.  The low level routines used
   89  *   to lock sockets are aware of this.  When so_lock is acquired, the
   90  *   routine locking must check to see if so_lock still points to the
   91  *   lock that was acquired.  If so_lock has changed in the meantime, the
   92  *   now irellevant lock that was acquired must be dropped and the lock
   93  *   operation retried.  Although not proven here, this is completely safe
   94  *   on a multiprocessor system, even with relaxed memory ordering, given
   95  *   the next two rules:
   96  *
   97  * o In order to mutate so_lock, the lock pointed to by the current value
   98  *   of so_lock must be held: i.e., the socket must be held locked by the
   99  *   changing thread.  The thread must issue membar_exit() to prevent
  100  *   memory accesses being reordered, and can set so_lock to the desired
  101  *   value.  If the lock pointed to by the new value of so_lock is not
  102  *   held by the changing thread, the socket must then be considered
  103  *   unlocked.
  104  *
  105  * o If so_lock is mutated, and the previous lock referred to by so_lock
  106  *   could still be visible to other threads in the system (e.g. via file
  107  *   descriptor or protocol-internal reference), then the old lock must
  108  *   remain valid until the socket and/or protocol control block has been
  109  *   torn down.
  110  *
  111  * o If a socket has a non-NULL so_head value (i.e. is in the process of
  112  *   connecting), then locking the socket must also lock the socket pointed
  113  *   to by so_head: their lock pointers must match.
  114  *
  115  * o If a socket has connections in progress (so_q, so_q0 not empty) then
  116  *   locking the socket must also lock the sockets attached to both queues.
  117  *   Again, their lock pointers must match.
  118  *
  119  * o Beyond the initial lock assigment in socreate(), assigning locks to
  120  *   sockets is the responsibility of the individual protocols / protocol
  121  *   domains.
  122  */
  123 
  124 static pool_cache_t socket_cache;
  125 
  126 u_long  sb_max = SB_MAX;        /* maximum socket buffer size */
  127 static u_long sb_max_adj;       /* adjusted sb_max */
  128 
  129 /*
  130  * Procedures to manipulate state flags of socket
  131  * and do appropriate wakeups.  Normal sequence from the
  132  * active (originating) side is that soisconnecting() is
  133  * called during processing of connect() call,
  134  * resulting in an eventual call to soisconnected() if/when the
  135  * connection is established.  When the connection is torn down
  136  * soisdisconnecting() is called during processing of disconnect() call,
  137  * and soisdisconnected() is called when the connection to the peer
  138  * is totally severed.  The semantics of these routines are such that
  139  * connectionless protocols can call soisconnected() and soisdisconnected()
  140  * only, bypassing the in-progress calls when setting up a ``connection''
  141  * takes no time.
  142  *
  143  * From the passive side, a socket is created with
  144  * two queues of sockets: so_q0 for connections in progress
  145  * and so_q for connections already made and awaiting user acceptance.
  146  * As a protocol is preparing incoming connections, it creates a socket
  147  * structure queued on so_q0 by calling sonewconn().  When the connection
  148  * is established, soisconnected() is called, and transfers the
  149  * socket structure to so_q, making it available to accept().
  150  *
  151  * If a socket is closed with sockets on either
  152  * so_q0 or so_q, these sockets are dropped.
  153  *
  154  * If higher level protocols are implemented in
  155  * the kernel, the wakeups done here will sometimes
  156  * cause software-interrupt process scheduling.
  157  */
  158 
  159 void
  160 soisconnecting(struct socket *so)
  161 {
  162 
  163         KASSERT(solocked(so));
  164 
  165         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
  166         so->so_state |= SS_ISCONNECTING;
  167 }
  168 
  169 void
  170 soisconnected(struct socket *so)
  171 {
  172         struct socket   *head;
  173 
  174         head = so->so_head;
  175 
  176         KASSERT(solocked(so));
  177         KASSERT(head == NULL || solocked2(so, head));
  178 
  179         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
  180         so->so_state |= SS_ISCONNECTED;
  181         if (head && so->so_onq == &head->so_q0) {
  182                 if ((so->so_options & SO_ACCEPTFILTER) == 0) {
  183                         soqremque(so, 0);
  184                         soqinsque(head, so, 1);
  185                         sorwakeup(head);
  186                         cv_broadcast(&head->so_cv);
  187                 } else {
  188                         so->so_upcall =
  189                             head->so_accf->so_accept_filter->accf_callback;
  190                         so->so_upcallarg = head->so_accf->so_accept_filter_arg;
  191                         so->so_rcv.sb_flags |= SB_UPCALL;
  192                         so->so_options &= ~SO_ACCEPTFILTER;
  193                         (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  194                 }
  195         } else {
  196                 cv_broadcast(&so->so_cv);
  197                 sorwakeup(so);
  198                 sowwakeup(so);
  199         }
  200 }
  201 
  202 void
  203 soisdisconnecting(struct socket *so)
  204 {
  205 
  206         KASSERT(solocked(so));
  207 
  208         so->so_state &= ~SS_ISCONNECTING;
  209         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
  210         cv_broadcast(&so->so_cv);
  211         sowwakeup(so);
  212         sorwakeup(so);
  213 }
  214 
  215 void
  216 soisdisconnected(struct socket *so)
  217 {
  218 
  219         KASSERT(solocked(so));
  220 
  221         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
  222         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
  223         cv_broadcast(&so->so_cv);
  224         sowwakeup(so);
  225         sorwakeup(so);
  226 }
  227 
  228 void
  229 soinit2(void)
  230 {
  231 
  232         socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0,
  233             "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL);
  234 }
  235 
  236 /*
  237  * When an attempt at a new connection is noted on a socket
  238  * which accepts connections, sonewconn is called.  If the
  239  * connection is possible (subject to space constraints, etc.)
  240  * then we allocate a new structure, propoerly linked into the
  241  * data structure of the original socket, and return this.
  242  * Connstatus may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED.
  243  */
  244 struct socket *
  245 sonewconn(struct socket *head, int connstatus)
  246 {
  247         struct socket   *so;
  248         int             soqueue, error;
  249 
  250         KASSERT(solocked(head));
  251 
  252         if ((head->so_options & SO_ACCEPTFILTER) != 0)
  253                 connstatus = 0;
  254         soqueue = connstatus ? 1 : 0;
  255         if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
  256                 return NULL;
  257         so = soget(false);
  258         if (so == NULL)
  259                 return NULL;
  260         mutex_obj_hold(head->so_lock);
  261         so->so_lock = head->so_lock;
  262         so->so_type = head->so_type;
  263         so->so_options = head->so_options &~ SO_ACCEPTCONN;
  264         so->so_linger = head->so_linger;
  265         so->so_state = head->so_state | SS_NOFDREF;
  266         so->so_nbio = head->so_nbio;
  267         so->so_proto = head->so_proto;
  268         so->so_timeo = head->so_timeo;
  269         so->so_pgid = head->so_pgid;
  270         so->so_send = head->so_send;
  271         so->so_receive = head->so_receive;
  272         so->so_uidinfo = head->so_uidinfo;
  273         so->so_egid = head->so_egid;
  274         so->so_cpid = head->so_cpid;
  275 #ifdef MBUFTRACE
  276         so->so_mowner = head->so_mowner;
  277         so->so_rcv.sb_mowner = head->so_rcv.sb_mowner;
  278         so->so_snd.sb_mowner = head->so_snd.sb_mowner;
  279 #endif
  280         (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
  281         so->so_snd.sb_lowat = head->so_snd.sb_lowat;
  282         so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
  283         so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
  284         so->so_snd.sb_timeo = head->so_snd.sb_timeo;
  285         so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
  286         so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
  287         soqinsque(head, so, soqueue);
  288         error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL,
  289             NULL, NULL);
  290         KASSERT(solocked(so));
  291         if (error != 0) {
  292                 (void) soqremque(so, soqueue);
  293                 /*
  294                  * Remove acccept filter if one is present.
  295                  * XXX Is this really needed?
  296                  */
  297                 if (so->so_accf != NULL)
  298                         (void)accept_filt_clear(so);
  299                 soput(so);
  300                 return NULL;
  301         }
  302         if (connstatus) {
  303                 sorwakeup(head);
  304                 cv_broadcast(&head->so_cv);
  305                 so->so_state |= connstatus;
  306         }
  307         return so;
  308 }
  309 
  310 struct socket *
  311 soget(bool waitok)
  312 {
  313         struct socket *so;
  314 
  315         so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT));
  316         if (__predict_false(so == NULL))
  317                 return (NULL);
  318         memset(so, 0, sizeof(*so));
  319         TAILQ_INIT(&so->so_q0);
  320         TAILQ_INIT(&so->so_q);
  321         cv_init(&so->so_cv, "socket");
  322         cv_init(&so->so_rcv.sb_cv, "netio");
  323         cv_init(&so->so_snd.sb_cv, "netio");
  324         selinit(&so->so_rcv.sb_sel);
  325         selinit(&so->so_snd.sb_sel);
  326         so->so_rcv.sb_so = so;
  327         so->so_snd.sb_so = so;
  328         return so;
  329 }
  330 
  331 void
  332 soput(struct socket *so)
  333 {
  334 
  335         KASSERT(!cv_has_waiters(&so->so_cv));
  336         KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv));
  337         KASSERT(!cv_has_waiters(&so->so_snd.sb_cv));
  338         seldestroy(&so->so_rcv.sb_sel);
  339         seldestroy(&so->so_snd.sb_sel);
  340         mutex_obj_free(so->so_lock);
  341         cv_destroy(&so->so_cv);
  342         cv_destroy(&so->so_rcv.sb_cv);
  343         cv_destroy(&so->so_snd.sb_cv);
  344         pool_cache_put(socket_cache, so);
  345 }
  346 
  347 void
  348 soqinsque(struct socket *head, struct socket *so, int q)
  349 {
  350 
  351         KASSERT(solocked2(head, so));
  352 
  353 #ifdef DIAGNOSTIC
  354         if (so->so_onq != NULL)
  355                 panic("soqinsque");
  356 #endif
  357 
  358         so->so_head = head;
  359         if (q == 0) {
  360                 head->so_q0len++;
  361                 so->so_onq = &head->so_q0;
  362         } else {
  363                 head->so_qlen++;
  364                 so->so_onq = &head->so_q;
  365         }
  366         TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
  367 }
  368 
  369 int
  370 soqremque(struct socket *so, int q)
  371 {
  372         struct socket   *head;
  373 
  374         head = so->so_head;
  375 
  376         KASSERT(solocked(so));
  377         if (q == 0) {
  378                 if (so->so_onq != &head->so_q0)
  379                         return (0);
  380                 head->so_q0len--;
  381         } else {
  382                 if (so->so_onq != &head->so_q)
  383                         return (0);
  384                 head->so_qlen--;
  385         }
  386         KASSERT(solocked2(so, head));
  387         TAILQ_REMOVE(so->so_onq, so, so_qe);
  388         so->so_onq = NULL;
  389         so->so_head = NULL;
  390         return (1);
  391 }
  392 
  393 /*
  394  * Socantsendmore indicates that no more data will be sent on the
  395  * socket; it would normally be applied to a socket when the user
  396  * informs the system that no more data is to be sent, by the protocol
  397  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  398  * will be received, and will normally be applied to the socket by a
  399  * protocol when it detects that the peer will send no more data.
  400  * Data queued for reading in the socket may yet be read.
  401  */
  402 
  403 void
  404 socantsendmore(struct socket *so)
  405 {
  406 
  407         KASSERT(solocked(so));
  408 
  409         so->so_state |= SS_CANTSENDMORE;
  410         sowwakeup(so);
  411 }
  412 
  413 void
  414 socantrcvmore(struct socket *so)
  415 {
  416 
  417         KASSERT(solocked(so));
  418 
  419         so->so_state |= SS_CANTRCVMORE;
  420         sorwakeup(so);
  421 }
  422 
  423 /*
  424  * Wait for data to arrive at/drain from a socket buffer.
  425  */
  426 int
  427 sbwait(struct sockbuf *sb)
  428 {
  429         struct socket *so;
  430         kmutex_t *lock;
  431         int error;
  432 
  433         so = sb->sb_so;
  434 
  435         KASSERT(solocked(so));
  436 
  437         sb->sb_flags |= SB_NOTIFY;
  438         lock = so->so_lock;
  439         if ((sb->sb_flags & SB_NOINTR) != 0)
  440                 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo);
  441         else
  442                 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo);
  443         if (__predict_false(lock != so->so_lock))
  444                 solockretry(so, lock);
  445         return error;
  446 }
  447 
  448 /*
  449  * Wakeup processes waiting on a socket buffer.
  450  * Do asynchronous notification via SIGIO
  451  * if the socket buffer has the SB_ASYNC flag set.
  452  */
  453 void
  454 sowakeup(struct socket *so, struct sockbuf *sb, int code)
  455 {
  456         int band;
  457 
  458         KASSERT(solocked(so));
  459         KASSERT(sb->sb_so == so);
  460 
  461         if (code == POLL_IN)
  462                 band = POLLIN|POLLRDNORM;
  463         else
  464                 band = POLLOUT|POLLWRNORM;
  465         sb->sb_flags &= ~SB_NOTIFY;
  466         selnotify(&sb->sb_sel, band, NOTE_SUBMIT);
  467         cv_broadcast(&sb->sb_cv);
  468         if (sb->sb_flags & SB_ASYNC)
  469                 fownsignal(so->so_pgid, SIGIO, code, band, so);
  470         if (sb->sb_flags & SB_UPCALL)
  471                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  472 }
  473 
  474 /*
  475  * Reset a socket's lock pointer.  Wake all threads waiting on the
  476  * socket's condition variables so that they can restart their waits
  477  * using the new lock.  The existing lock must be held.
  478  */
  479 void
  480 solockreset(struct socket *so, kmutex_t *lock)
  481 {
  482 
  483         KASSERT(solocked(so));
  484 
  485         so->so_lock = lock;
  486         cv_broadcast(&so->so_snd.sb_cv);
  487         cv_broadcast(&so->so_rcv.sb_cv);
  488         cv_broadcast(&so->so_cv);
  489 }
  490 
  491 /*
  492  * Socket buffer (struct sockbuf) utility routines.
  493  *
  494  * Each socket contains two socket buffers: one for sending data and
  495  * one for receiving data.  Each buffer contains a queue of mbufs,
  496  * information about the number of mbufs and amount of data in the
  497  * queue, and other fields allowing poll() statements and notification
  498  * on data availability to be implemented.
  499  *
  500  * Data stored in a socket buffer is maintained as a list of records.
  501  * Each record is a list of mbufs chained together with the m_next
  502  * field.  Records are chained together with the m_nextpkt field. The upper
  503  * level routine soreceive() expects the following conventions to be
  504  * observed when placing information in the receive buffer:
  505  *
  506  * 1. If the protocol requires each message be preceded by the sender's
  507  *    name, then a record containing that name must be present before
  508  *    any associated data (mbuf's must be of type MT_SONAME).
  509  * 2. If the protocol supports the exchange of ``access rights'' (really
  510  *    just additional data associated with the message), and there are
  511  *    ``rights'' to be received, then a record containing this data
  512  *    should be present (mbuf's must be of type MT_CONTROL).
  513  * 3. If a name or rights record exists, then it must be followed by
  514  *    a data record, perhaps of zero length.
  515  *
  516  * Before using a new socket structure it is first necessary to reserve
  517  * buffer space to the socket, by calling sbreserve().  This should commit
  518  * some of the available buffer space in the system buffer pool for the
  519  * socket (currently, it does nothing but enforce limits).  The space
  520  * should be released by calling sbrelease() when the socket is destroyed.
  521  */
  522 
  523 int
  524 sb_max_set(u_long new_sbmax)
  525 {
  526         int s;
  527 
  528         if (new_sbmax < (16 * 1024))
  529                 return (EINVAL);
  530 
  531         s = splsoftnet();
  532         sb_max = new_sbmax;
  533         sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES);
  534         splx(s);
  535 
  536         return (0);
  537 }
  538 
  539 int
  540 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
  541 {
  542 
  543         KASSERT(so->so_lock == NULL || solocked(so));
  544 
  545         /*
  546          * there's at least one application (a configure script of screen)
  547          * which expects a fifo is writable even if it has "some" bytes
  548          * in its buffer.
  549          * so we want to make sure (hiwat - lowat) >= (some bytes).
  550          *
  551          * PIPE_BUF here is an arbitrary value chosen as (some bytes) above.
  552          * we expect it's large enough for such applications.
  553          */
  554         u_long  lowat = MAX(sock_loan_thresh, MCLBYTES);
  555         u_long  hiwat = lowat + PIPE_BUF;
  556 
  557         if (sndcc < hiwat)
  558                 sndcc = hiwat;
  559         if (sbreserve(&so->so_snd, sndcc, so) == 0)
  560                 goto bad;
  561         if (sbreserve(&so->so_rcv, rcvcc, so) == 0)
  562                 goto bad2;
  563         if (so->so_rcv.sb_lowat == 0)
  564                 so->so_rcv.sb_lowat = 1;
  565         if (so->so_snd.sb_lowat == 0)
  566                 so->so_snd.sb_lowat = lowat;
  567         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  568                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  569         return (0);
  570  bad2:
  571         sbrelease(&so->so_snd, so);
  572  bad:
  573         return (ENOBUFS);
  574 }
  575 
  576 /*
  577  * Allot mbufs to a sockbuf.
  578  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  579  * if buffering efficiency is near the normal case.
  580  */
  581 int
  582 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so)
  583 {
  584         struct lwp *l = curlwp; /* XXX */
  585         rlim_t maxcc;
  586         struct uidinfo *uidinfo;
  587 
  588         KASSERT(so->so_lock == NULL || solocked(so));
  589         KASSERT(sb->sb_so == so);
  590         KASSERT(sb_max_adj != 0);
  591 
  592         if (cc == 0 || cc > sb_max_adj)
  593                 return (0);
  594 
  595         if (kauth_cred_geteuid(l->l_cred) == so->so_uidinfo->ui_uid)
  596                 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur;
  597         else
  598                 maxcc = RLIM_INFINITY;
  599 
  600         uidinfo = so->so_uidinfo;
  601         if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc))
  602                 return 0;
  603         sb->sb_mbmax = min(cc * 2, sb_max);
  604         if (sb->sb_lowat > sb->sb_hiwat)
  605                 sb->sb_lowat = sb->sb_hiwat;
  606         return (1);
  607 }
  608 
  609 /*
  610  * Free mbufs held by a socket, and reserved mbuf space.  We do not assert
  611  * that the socket is held locked here: see sorflush().
  612  */
  613 void
  614 sbrelease(struct sockbuf *sb, struct socket *so)
  615 {
  616 
  617         KASSERT(sb->sb_so == so);
  618 
  619         sbflush(sb);
  620         (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY);
  621         sb->sb_mbmax = 0;
  622 }
  623 
  624 /*
  625  * Routines to add and remove
  626  * data from an mbuf queue.
  627  *
  628  * The routines sbappend() or sbappendrecord() are normally called to
  629  * append new mbufs to a socket buffer, after checking that adequate
  630  * space is available, comparing the function sbspace() with the amount
  631  * of data to be added.  sbappendrecord() differs from sbappend() in
  632  * that data supplied is treated as the beginning of a new record.
  633  * To place a sender's address, optional access rights, and data in a
  634  * socket receive buffer, sbappendaddr() should be used.  To place
  635  * access rights and data in a socket receive buffer, sbappendrights()
  636  * should be used.  In either case, the new data begins a new record.
  637  * Note that unlike sbappend() and sbappendrecord(), these routines check
  638  * for the caller that there will be enough space to store the data.
  639  * Each fails if there is not enough space, or if it cannot find mbufs
  640  * to store additional information in.
  641  *
  642  * Reliable protocols may use the socket send buffer to hold data
  643  * awaiting acknowledgement.  Data is normally copied from a socket
  644  * send buffer in a protocol with m_copy for output to a peer,
  645  * and then removing the data from the socket buffer with sbdrop()
  646  * or sbdroprecord() when the data is acknowledged by the peer.
  647  */
  648 
  649 #ifdef SOCKBUF_DEBUG
  650 void
  651 sblastrecordchk(struct sockbuf *sb, const char *where)
  652 {
  653         struct mbuf *m = sb->sb_mb;
  654 
  655         KASSERT(solocked(sb->sb_so));
  656 
  657         while (m && m->m_nextpkt)
  658                 m = m->m_nextpkt;
  659 
  660         if (m != sb->sb_lastrecord) {
  661                 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
  662                     sb->sb_mb, sb->sb_lastrecord, m);
  663                 printf("packet chain:\n");
  664                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  665                         printf("\t%p\n", m);
  666                 panic("sblastrecordchk from %s", where);
  667         }
  668 }
  669 
  670 void
  671 sblastmbufchk(struct sockbuf *sb, const char *where)
  672 {
  673         struct mbuf *m = sb->sb_mb;
  674         struct mbuf *n;
  675 
  676         KASSERT(solocked(sb->sb_so));
  677 
  678         while (m && m->m_nextpkt)
  679                 m = m->m_nextpkt;
  680 
  681         while (m && m->m_next)
  682                 m = m->m_next;
  683 
  684         if (m != sb->sb_mbtail) {
  685                 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
  686                     sb->sb_mb, sb->sb_mbtail, m);
  687                 printf("packet tree:\n");
  688                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  689                         printf("\t");
  690                         for (n = m; n != NULL; n = n->m_next)
  691                                 printf("%p ", n);
  692                         printf("\n");
  693                 }
  694                 panic("sblastmbufchk from %s", where);
  695         }
  696 }
  697 #endif /* SOCKBUF_DEBUG */
  698 
  699 /*
  700  * Link a chain of records onto a socket buffer
  701  */
  702 #define SBLINKRECORDCHAIN(sb, m0, mlast)                                \
  703 do {                                                                    \
  704         if ((sb)->sb_lastrecord != NULL)                                \
  705                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  706         else                                                            \
  707                 (sb)->sb_mb = (m0);                                     \
  708         (sb)->sb_lastrecord = (mlast);                                  \
  709 } while (/*CONSTCOND*/0)
  710 
  711 
  712 #define SBLINKRECORD(sb, m0)                                            \
  713     SBLINKRECORDCHAIN(sb, m0, m0)
  714 
  715 /*
  716  * Append mbuf chain m to the last record in the
  717  * socket buffer sb.  The additional space associated
  718  * the mbuf chain is recorded in sb.  Empty mbufs are
  719  * discarded and mbufs are compacted where possible.
  720  */
  721 void
  722 sbappend(struct sockbuf *sb, struct mbuf *m)
  723 {
  724         struct mbuf     *n;
  725 
  726         KASSERT(solocked(sb->sb_so));
  727 
  728         if (m == 0)
  729                 return;
  730 
  731 #ifdef MBUFTRACE
  732         m_claimm(m, sb->sb_mowner);
  733 #endif
  734 
  735         SBLASTRECORDCHK(sb, "sbappend 1");
  736 
  737         if ((n = sb->sb_lastrecord) != NULL) {
  738                 /*
  739                  * XXX Would like to simply use sb_mbtail here, but
  740                  * XXX I need to verify that I won't miss an EOR that
  741                  * XXX way.
  742                  */
  743                 do {
  744                         if (n->m_flags & M_EOR) {
  745                                 sbappendrecord(sb, m); /* XXXXXX!!!! */
  746                                 return;
  747                         }
  748                 } while (n->m_next && (n = n->m_next));
  749         } else {
  750                 /*
  751                  * If this is the first record in the socket buffer, it's
  752                  * also the last record.
  753                  */
  754                 sb->sb_lastrecord = m;
  755         }
  756         sbcompress(sb, m, n);
  757         SBLASTRECORDCHK(sb, "sbappend 2");
  758 }
  759 
  760 /*
  761  * This version of sbappend() should only be used when the caller
  762  * absolutely knows that there will never be more than one record
  763  * in the socket buffer, that is, a stream protocol (such as TCP).
  764  */
  765 void
  766 sbappendstream(struct sockbuf *sb, struct mbuf *m)
  767 {
  768 
  769         KASSERT(solocked(sb->sb_so));
  770         KDASSERT(m->m_nextpkt == NULL);
  771         KASSERT(sb->sb_mb == sb->sb_lastrecord);
  772 
  773         SBLASTMBUFCHK(sb, __func__);
  774 
  775 #ifdef MBUFTRACE
  776         m_claimm(m, sb->sb_mowner);
  777 #endif
  778 
  779         sbcompress(sb, m, sb->sb_mbtail);
  780 
  781         sb->sb_lastrecord = sb->sb_mb;
  782         SBLASTRECORDCHK(sb, __func__);
  783 }
  784 
  785 #ifdef SOCKBUF_DEBUG
  786 void
  787 sbcheck(struct sockbuf *sb)
  788 {
  789         struct mbuf     *m, *m2;
  790         u_long          len, mbcnt;
  791 
  792         KASSERT(solocked(sb->sb_so));
  793 
  794         len = 0;
  795         mbcnt = 0;
  796         for (m = sb->sb_mb; m; m = m->m_nextpkt) {
  797                 for (m2 = m; m2 != NULL; m2 = m2->m_next) {
  798                         len += m2->m_len;
  799                         mbcnt += MSIZE;
  800                         if (m2->m_flags & M_EXT)
  801                                 mbcnt += m2->m_ext.ext_size;
  802                         if (m2->m_nextpkt != NULL)
  803                                 panic("sbcheck nextpkt");
  804                 }
  805         }
  806         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  807                 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
  808                     mbcnt, sb->sb_mbcnt);
  809                 panic("sbcheck");
  810         }
  811 }
  812 #endif
  813 
  814 /*
  815  * As above, except the mbuf chain
  816  * begins a new record.
  817  */
  818 void
  819 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
  820 {
  821         struct mbuf     *m;
  822 
  823         KASSERT(solocked(sb->sb_so));
  824 
  825         if (m0 == 0)
  826                 return;
  827 
  828 #ifdef MBUFTRACE
  829         m_claimm(m0, sb->sb_mowner);
  830 #endif
  831         /*
  832          * Put the first mbuf on the queue.
  833          * Note this permits zero length records.
  834          */
  835         sballoc(sb, m0);
  836         SBLASTRECORDCHK(sb, "sbappendrecord 1");
  837         SBLINKRECORD(sb, m0);
  838         m = m0->m_next;
  839         m0->m_next = 0;
  840         if (m && (m0->m_flags & M_EOR)) {
  841                 m0->m_flags &= ~M_EOR;
  842                 m->m_flags |= M_EOR;
  843         }
  844         sbcompress(sb, m, m0);
  845         SBLASTRECORDCHK(sb, "sbappendrecord 2");
  846 }
  847 
  848 /*
  849  * As above except that OOB data
  850  * is inserted at the beginning of the sockbuf,
  851  * but after any other OOB data.
  852  */
  853 void
  854 sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
  855 {
  856         struct mbuf     *m, **mp;
  857 
  858         KASSERT(solocked(sb->sb_so));
  859 
  860         if (m0 == 0)
  861                 return;
  862 
  863         SBLASTRECORDCHK(sb, "sbinsertoob 1");
  864 
  865         for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
  866             again:
  867                 switch (m->m_type) {
  868 
  869                 case MT_OOBDATA:
  870                         continue;               /* WANT next train */
  871 
  872                 case MT_CONTROL:
  873                         if ((m = m->m_next) != NULL)
  874                                 goto again;     /* inspect THIS train further */
  875                 }
  876                 break;
  877         }
  878         /*
  879          * Put the first mbuf on the queue.
  880          * Note this permits zero length records.
  881          */
  882         sballoc(sb, m0);
  883         m0->m_nextpkt = *mp;
  884         if (*mp == NULL) {
  885                 /* m0 is actually the new tail */
  886                 sb->sb_lastrecord = m0;
  887         }
  888         *mp = m0;
  889         m = m0->m_next;
  890         m0->m_next = 0;
  891         if (m && (m0->m_flags & M_EOR)) {
  892                 m0->m_flags &= ~M_EOR;
  893                 m->m_flags |= M_EOR;
  894         }
  895         sbcompress(sb, m, m0);
  896         SBLASTRECORDCHK(sb, "sbinsertoob 2");
  897 }
  898 
  899 /*
  900  * Append address and data, and optionally, control (ancillary) data
  901  * to the receive queue of a socket.  If present,
  902  * m0 must include a packet header with total length.
  903  * Returns 0 if no space in sockbuf or insufficient mbufs.
  904  */
  905 int
  906 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0,
  907         struct mbuf *control)
  908 {
  909         struct mbuf     *m, *n, *nlast;
  910         int             space, len;
  911 
  912         KASSERT(solocked(sb->sb_so));
  913 
  914         space = asa->sa_len;
  915 
  916         if (m0 != NULL) {
  917                 if ((m0->m_flags & M_PKTHDR) == 0)
  918                         panic("sbappendaddr");
  919                 space += m0->m_pkthdr.len;
  920 #ifdef MBUFTRACE
  921                 m_claimm(m0, sb->sb_mowner);
  922 #endif
  923         }
  924         for (n = control; n; n = n->m_next) {
  925                 space += n->m_len;
  926                 MCLAIM(n, sb->sb_mowner);
  927                 if (n->m_next == 0)     /* keep pointer to last control buf */
  928                         break;
  929         }
  930         if (space > sbspace(sb))
  931                 return (0);
  932         MGET(m, M_DONTWAIT, MT_SONAME);
  933         if (m == 0)
  934                 return (0);
  935         MCLAIM(m, sb->sb_mowner);
  936         /*
  937          * XXX avoid 'comparison always true' warning which isn't easily
  938          * avoided.
  939          */
  940         len = asa->sa_len;
  941         if (len > MLEN) {
  942                 MEXTMALLOC(m, asa->sa_len, M_NOWAIT);
  943                 if ((m->m_flags & M_EXT) == 0) {
  944                         m_free(m);
  945                         return (0);
  946                 }
  947         }
  948         m->m_len = asa->sa_len;
  949         memcpy(mtod(m, void *), asa, asa->sa_len);
  950         if (n)
  951                 n->m_next = m0;         /* concatenate data to control */
  952         else
  953                 control = m0;
  954         m->m_next = control;
  955 
  956         SBLASTRECORDCHK(sb, "sbappendaddr 1");
  957 
  958         for (n = m; n->m_next != NULL; n = n->m_next)
  959                 sballoc(sb, n);
  960         sballoc(sb, n);
  961         nlast = n;
  962         SBLINKRECORD(sb, m);
  963 
  964         sb->sb_mbtail = nlast;
  965         SBLASTMBUFCHK(sb, "sbappendaddr");
  966         SBLASTRECORDCHK(sb, "sbappendaddr 2");
  967 
  968         return (1);
  969 }
  970 
  971 /*
  972  * Helper for sbappendchainaddr: prepend a struct sockaddr* to
  973  * an mbuf chain.
  974  */
  975 static inline struct mbuf *
  976 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0,
  977                    const struct sockaddr *asa)
  978 {
  979         struct mbuf *m;
  980         const int salen = asa->sa_len;
  981 
  982         KASSERT(solocked(sb->sb_so));
  983 
  984         /* only the first in each chain need be a pkthdr */
  985         MGETHDR(m, M_DONTWAIT, MT_SONAME);
  986         if (m == 0)
  987                 return (0);
  988         MCLAIM(m, sb->sb_mowner);
  989 #ifdef notyet
  990         if (salen > MHLEN) {
  991                 MEXTMALLOC(m, salen, M_NOWAIT);
  992                 if ((m->m_flags & M_EXT) == 0) {
  993                         m_free(m);
  994                         return (0);
  995                 }
  996         }
  997 #else
  998         KASSERT(salen <= MHLEN);
  999 #endif
 1000         m->m_len = salen;
 1001         memcpy(mtod(m, void *), asa, salen);
 1002         m->m_next = m0;
 1003         m->m_pkthdr.len = salen + m0->m_pkthdr.len;
 1004 
 1005         return m;
 1006 }
 1007 
 1008 int
 1009 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa,
 1010                   struct mbuf *m0, int sbprio)
 1011 {
 1012         int space;
 1013         struct mbuf *m, *n, *n0, *nlast;
 1014         int error;
 1015 
 1016         KASSERT(solocked(sb->sb_so));
 1017 
 1018         /*
 1019          * XXX sbprio reserved for encoding priority of this* request:
 1020          *  SB_PRIO_NONE --> honour normal sb limits
 1021          *  SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space,
 1022          *      take whole chain. Intended for large requests
 1023          *      that should be delivered atomically (all, or none).
 1024          * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow
 1025          *       over normal socket limits, for messages indicating
 1026          *       buffer overflow in earlier normal/lower-priority messages
 1027          * SB_PRIO_BESTEFFORT -->  ignore limits entirely.
 1028          *       Intended for  kernel-generated messages only.
 1029          *        Up to generator to avoid total mbuf resource exhaustion.
 1030          */
 1031         (void)sbprio;
 1032 
 1033         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 1034                 panic("sbappendaddrchain");
 1035 
 1036         space = sbspace(sb);
 1037 
 1038 #ifdef notyet
 1039         /*
 1040          * Enforce SB_PRIO_* limits as described above.
 1041          */
 1042 #endif
 1043 
 1044         n0 = NULL;
 1045         nlast = NULL;
 1046         for (m = m0; m; m = m->m_nextpkt) {
 1047                 struct mbuf *np;
 1048 
 1049 #ifdef MBUFTRACE
 1050                 m_claimm(m, sb->sb_mowner);
 1051 #endif
 1052 
 1053                 /* Prepend sockaddr to this record (m) of input chain m0 */
 1054                 n = m_prepend_sockaddr(sb, m, asa);
 1055                 if (n == NULL) {
 1056                         error = ENOBUFS;
 1057                         goto bad;
 1058                 }
 1059 
 1060                 /* Append record (asa+m) to end of new chain n0 */
 1061                 if (n0 == NULL) {
 1062                         n0 = n;
 1063                 } else {
 1064                         nlast->m_nextpkt = n;
 1065                 }
 1066                 /* Keep track of last record on new chain */
 1067                 nlast = n;
 1068 
 1069                 for (np = n; np; np = np->m_next)
 1070                         sballoc(sb, np);
 1071         }
 1072 
 1073         SBLASTRECORDCHK(sb, "sbappendaddrchain 1");
 1074 
 1075         /* Drop the entire chain of (asa+m) records onto the socket */
 1076         SBLINKRECORDCHAIN(sb, n0, nlast);
 1077 
 1078         SBLASTRECORDCHK(sb, "sbappendaddrchain 2");
 1079 
 1080         for (m = nlast; m->m_next; m = m->m_next)
 1081                 ;
 1082         sb->sb_mbtail = m;
 1083         SBLASTMBUFCHK(sb, "sbappendaddrchain");
 1084 
 1085         return (1);
 1086 
 1087 bad:
 1088         /*
 1089          * On error, free the prepended addreseses. For consistency
 1090          * with sbappendaddr(), leave it to our caller to free
 1091          * the input record chain passed to us as m0.
 1092          */
 1093         while ((n = n0) != NULL) {
 1094                 struct mbuf *np;
 1095 
 1096                 /* Undo the sballoc() of this record */
 1097                 for (np = n; np; np = np->m_next)
 1098                         sbfree(sb, np);
 1099 
 1100                 n0 = n->m_nextpkt;      /* iterate at next prepended address */
 1101                 MFREE(n, np);           /* free prepended address (not data) */
 1102         }
 1103         return 0;
 1104 }
 1105 
 1106 
 1107 int
 1108 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
 1109 {
 1110         struct mbuf     *m, *mlast, *n;
 1111         int             space;
 1112 
 1113         KASSERT(solocked(sb->sb_so));
 1114 
 1115         space = 0;
 1116         if (control == 0)
 1117                 panic("sbappendcontrol");
 1118         for (m = control; ; m = m->m_next) {
 1119                 space += m->m_len;
 1120                 MCLAIM(m, sb->sb_mowner);
 1121                 if (m->m_next == 0)
 1122                         break;
 1123         }
 1124         n = m;                  /* save pointer to last control buffer */
 1125         for (m = m0; m; m = m->m_next) {
 1126                 MCLAIM(m, sb->sb_mowner);
 1127                 space += m->m_len;
 1128         }
 1129         if (space > sbspace(sb))
 1130                 return (0);
 1131         n->m_next = m0;                 /* concatenate data to control */
 1132 
 1133         SBLASTRECORDCHK(sb, "sbappendcontrol 1");
 1134 
 1135         for (m = control; m->m_next != NULL; m = m->m_next)
 1136                 sballoc(sb, m);
 1137         sballoc(sb, m);
 1138         mlast = m;
 1139         SBLINKRECORD(sb, control);
 1140 
 1141         sb->sb_mbtail = mlast;
 1142         SBLASTMBUFCHK(sb, "sbappendcontrol");
 1143         SBLASTRECORDCHK(sb, "sbappendcontrol 2");
 1144 
 1145         return (1);
 1146 }
 1147 
 1148 /*
 1149  * Compress mbuf chain m into the socket
 1150  * buffer sb following mbuf n.  If n
 1151  * is null, the buffer is presumed empty.
 1152  */
 1153 void
 1154 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 1155 {
 1156         int             eor;
 1157         struct mbuf     *o;
 1158 
 1159         KASSERT(solocked(sb->sb_so));
 1160 
 1161         eor = 0;
 1162         while (m) {
 1163                 eor |= m->m_flags & M_EOR;
 1164                 if (m->m_len == 0 &&
 1165                     (eor == 0 ||
 1166                      (((o = m->m_next) || (o = n)) &&
 1167                       o->m_type == m->m_type))) {
 1168                         if (sb->sb_lastrecord == m)
 1169                                 sb->sb_lastrecord = m->m_next;
 1170                         m = m_free(m);
 1171                         continue;
 1172                 }
 1173                 if (n && (n->m_flags & M_EOR) == 0 &&
 1174                     /* M_TRAILINGSPACE() checks buffer writeability */
 1175                     m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */
 1176                     m->m_len <= M_TRAILINGSPACE(n) &&
 1177                     n->m_type == m->m_type) {
 1178                         memcpy(mtod(n, char *) + n->m_len, mtod(m, void *),
 1179                             (unsigned)m->m_len);
 1180                         n->m_len += m->m_len;
 1181                         sb->sb_cc += m->m_len;
 1182                         m = m_free(m);
 1183                         continue;
 1184                 }
 1185                 if (n)
 1186                         n->m_next = m;
 1187                 else
 1188                         sb->sb_mb = m;
 1189                 sb->sb_mbtail = m;
 1190                 sballoc(sb, m);
 1191                 n = m;
 1192                 m->m_flags &= ~M_EOR;
 1193                 m = m->m_next;
 1194                 n->m_next = 0;
 1195         }
 1196         if (eor) {
 1197                 if (n)
 1198                         n->m_flags |= eor;
 1199                 else
 1200                         printf("semi-panic: sbcompress\n");
 1201         }
 1202         SBLASTMBUFCHK(sb, __func__);
 1203 }
 1204 
 1205 /*
 1206  * Free all mbufs in a sockbuf.
 1207  * Check that all resources are reclaimed.
 1208  */
 1209 void
 1210 sbflush(struct sockbuf *sb)
 1211 {
 1212 
 1213         KASSERT(solocked(sb->sb_so));
 1214         KASSERT((sb->sb_flags & SB_LOCK) == 0);
 1215 
 1216         while (sb->sb_mbcnt)
 1217                 sbdrop(sb, (int)sb->sb_cc);
 1218 
 1219         KASSERT(sb->sb_cc == 0);
 1220         KASSERT(sb->sb_mb == NULL);
 1221         KASSERT(sb->sb_mbtail == NULL);
 1222         KASSERT(sb->sb_lastrecord == NULL);
 1223 }
 1224 
 1225 /*
 1226  * Drop data from (the front of) a sockbuf.
 1227  */
 1228 void
 1229 sbdrop(struct sockbuf *sb, int len)
 1230 {
 1231         struct mbuf     *m, *mn, *next;
 1232 
 1233         KASSERT(solocked(sb->sb_so));
 1234 
 1235         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 1236         while (len > 0) {
 1237                 if (m == 0) {
 1238                         if (next == 0)
 1239                                 panic("sbdrop");
 1240                         m = next;
 1241                         next = m->m_nextpkt;
 1242                         continue;
 1243                 }
 1244                 if (m->m_len > len) {
 1245                         m->m_len -= len;
 1246                         m->m_data += len;
 1247                         sb->sb_cc -= len;
 1248                         break;
 1249                 }
 1250                 len -= m->m_len;
 1251                 sbfree(sb, m);
 1252                 MFREE(m, mn);
 1253                 m = mn;
 1254         }
 1255         while (m && m->m_len == 0) {
 1256                 sbfree(sb, m);
 1257                 MFREE(m, mn);
 1258                 m = mn;
 1259         }
 1260         if (m) {
 1261                 sb->sb_mb = m;
 1262                 m->m_nextpkt = next;
 1263         } else
 1264                 sb->sb_mb = next;
 1265         /*
 1266          * First part is an inline SB_EMPTY_FIXUP().  Second part
 1267          * makes sure sb_lastrecord is up-to-date if we dropped
 1268          * part of the last record.
 1269          */
 1270         m = sb->sb_mb;
 1271         if (m == NULL) {
 1272                 sb->sb_mbtail = NULL;
 1273                 sb->sb_lastrecord = NULL;
 1274         } else if (m->m_nextpkt == NULL)
 1275                 sb->sb_lastrecord = m;
 1276 }
 1277 
 1278 /*
 1279  * Drop a record off the front of a sockbuf
 1280  * and move the next record to the front.
 1281  */
 1282 void
 1283 sbdroprecord(struct sockbuf *sb)
 1284 {
 1285         struct mbuf     *m, *mn;
 1286 
 1287         KASSERT(solocked(sb->sb_so));
 1288 
 1289         m = sb->sb_mb;
 1290         if (m) {
 1291                 sb->sb_mb = m->m_nextpkt;
 1292                 do {
 1293                         sbfree(sb, m);
 1294                         MFREE(m, mn);
 1295                 } while ((m = mn) != NULL);
 1296         }
 1297         SB_EMPTY_FIXUP(sb);
 1298 }
 1299 
 1300 /*
 1301  * Create a "control" mbuf containing the specified data
 1302  * with the specified type for presentation on a socket buffer.
 1303  */
 1304 struct mbuf *
 1305 sbcreatecontrol(void *p, int size, int type, int level)
 1306 {
 1307         struct cmsghdr  *cp;
 1308         struct mbuf     *m;
 1309 
 1310         if (CMSG_SPACE(size) > MCLBYTES) {
 1311                 printf("sbcreatecontrol: message too large %d\n", size);
 1312                 return NULL;
 1313         }
 1314 
 1315         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
 1316                 return ((struct mbuf *) NULL);
 1317         if (CMSG_SPACE(size) > MLEN) {
 1318                 MCLGET(m, M_DONTWAIT);
 1319                 if ((m->m_flags & M_EXT) == 0) {
 1320                         m_free(m);
 1321                         return NULL;
 1322                 }
 1323         }
 1324         cp = mtod(m, struct cmsghdr *);
 1325         memcpy(CMSG_DATA(cp), p, size);
 1326         m->m_len = CMSG_SPACE(size);
 1327         cp->cmsg_len = CMSG_LEN(size);
 1328         cp->cmsg_level = level;
 1329         cp->cmsg_type = type;
 1330         return (m);
 1331 }
 1332 
 1333 void
 1334 solockretry(struct socket *so, kmutex_t *lock)
 1335 {
 1336 
 1337         while (lock != so->so_lock) {
 1338                 mutex_exit(lock);
 1339                 lock = so->so_lock;
 1340                 mutex_enter(lock);
 1341         }
 1342 }
 1343 
 1344 bool
 1345 solocked(struct socket *so)
 1346 {
 1347 
 1348         return mutex_owned(so->so_lock);
 1349 }
 1350 
 1351 bool
 1352 solocked2(struct socket *so1, struct socket *so2)
 1353 {
 1354         kmutex_t *lock;
 1355 
 1356         lock = so1->so_lock;
 1357         if (lock != so2->so_lock)
 1358                 return false;
 1359         return mutex_owned(lock);
 1360 }
 1361 
 1362 /*
 1363  * Assign a default lock to a new socket.  For PRU_ATTACH, and done by
 1364  * protocols that do not have special locking requirements.
 1365  */
 1366 void
 1367 sosetlock(struct socket *so)
 1368 {
 1369         kmutex_t *lock;
 1370 
 1371         if (so->so_lock == NULL) {
 1372                 lock = softnet_lock;
 1373                 so->so_lock = lock;
 1374                 mutex_obj_hold(lock);
 1375                 mutex_enter(lock);
 1376         }
 1377 
 1378         /* In all cases, lock must be held on return from PRU_ATTACH. */
 1379         KASSERT(solocked(so));
 1380 }
 1381 
 1382 /*
 1383  * Set lock on sockbuf sb; sleep if lock is already held.
 1384  * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
 1385  * Returns error without lock if sleep is interrupted.
 1386  */
 1387 int
 1388 sblock(struct sockbuf *sb, int wf)
 1389 {
 1390         struct socket *so;
 1391         kmutex_t *lock;
 1392         int error;
 1393 
 1394         KASSERT(solocked(sb->sb_so));
 1395 
 1396         for (;;) {
 1397                 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) {
 1398                         sb->sb_flags |= SB_LOCK;
 1399                         return 0;
 1400                 }
 1401                 if (wf != M_WAITOK)
 1402                         return EWOULDBLOCK;
 1403                 so = sb->sb_so;
 1404                 lock = so->so_lock;
 1405                 if ((sb->sb_flags & SB_NOINTR) != 0) {
 1406                         cv_wait(&so->so_cv, lock);
 1407                         error = 0;
 1408                 } else
 1409                         error = cv_wait_sig(&so->so_cv, lock);
 1410                 if (__predict_false(lock != so->so_lock))
 1411                         solockretry(so, lock);
 1412                 if (error != 0)
 1413                         return error;
 1414         }
 1415 }
 1416 
 1417 void
 1418 sbunlock(struct sockbuf *sb)
 1419 {
 1420         struct socket *so;
 1421 
 1422         so = sb->sb_so;
 1423 
 1424         KASSERT(solocked(so));
 1425         KASSERT((sb->sb_flags & SB_LOCK) != 0);
 1426 
 1427         sb->sb_flags &= ~SB_LOCK;
 1428         cv_broadcast(&so->so_cv);
 1429 }
 1430 
 1431 int
 1432 sowait(struct socket *so, bool catch, int timo)
 1433 {
 1434         kmutex_t *lock;
 1435         int error;
 1436 
 1437         KASSERT(solocked(so));
 1438         KASSERT(catch || timo != 0);
 1439 
 1440         lock = so->so_lock;
 1441         if (catch)
 1442                 error = cv_timedwait_sig(&so->so_cv, lock, timo);
 1443         else
 1444                 error = cv_timedwait(&so->so_cv, lock, timo);
 1445         if (__predict_false(lock != so->so_lock))
 1446                 solockretry(so, lock);
 1447         return error;
 1448 }
Cache object: 1912e7668bf951daa42eb08fab409c83
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/uipc_socket2.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket2.c