The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket2.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: releng/6.2/sys/kern/uipc_socket2.c 164286 2006-11-14 20:42:41Z cvs2svn $");
   34 
   35 #include "opt_mac.h"
   36 #include "opt_param.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/aio.h> /* for aio_swake proto */
   40 #include <sys/domain.h>
   41 #include <sys/event.h>
   42 #include <sys/eventhandler.h>
   43 #include <sys/file.h>   /* for maxfiles */
   44 #include <sys/kernel.h>
   45 #include <sys/lock.h>
   46 #include <sys/mac.h>
   47 #include <sys/malloc.h>
   48 #include <sys/mbuf.h>
   49 #include <sys/mutex.h>
   50 #include <sys/proc.h>
   51 #include <sys/protosw.h>
   52 #include <sys/resourcevar.h>
   53 #include <sys/signalvar.h>
   54 #include <sys/socket.h>
   55 #include <sys/socketvar.h>
   56 #include <sys/stat.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/systm.h>
   59 
   60 int     maxsockets;
   61 
   62 void (*aio_swake)(struct socket *, struct sockbuf *);
   63 
   64 /*
   65  * Primitive routines for operating on sockets and socket buffers
   66  */
   67 
   68 u_long  sb_max = SB_MAX;
   69 static  u_long sb_max_adj =
   70     SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
   71 
   72 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
   73 
   74 #ifdef REGRESSION
   75 static int regression_sonewconn_earlytest = 1;
   76 SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW,
   77     &regression_sonewconn_earlytest, 0, "Perform early sonewconn limit test");
   78 #endif
   79 
   80 /*
   81  * Procedures to manipulate state flags of socket
   82  * and do appropriate wakeups.  Normal sequence from the
   83  * active (originating) side is that soisconnecting() is
   84  * called during processing of connect() call,
   85  * resulting in an eventual call to soisconnected() if/when the
   86  * connection is established.  When the connection is torn down
   87  * soisdisconnecting() is called during processing of disconnect() call,
   88  * and soisdisconnected() is called when the connection to the peer
   89  * is totally severed.  The semantics of these routines are such that
   90  * connectionless protocols can call soisconnected() and soisdisconnected()
   91  * only, bypassing the in-progress calls when setting up a ``connection''
   92  * takes no time.
   93  *
   94  * From the passive side, a socket is created with
   95  * two queues of sockets: so_incomp for connections in progress
   96  * and so_comp for connections already made and awaiting user acceptance.
   97  * As a protocol is preparing incoming connections, it creates a socket
   98  * structure queued on so_incomp by calling sonewconn().  When the connection
   99  * is established, soisconnected() is called, and transfers the
  100  * socket structure to so_comp, making it available to accept().
  101  *
  102  * If a socket is closed with sockets on either
  103  * so_incomp or so_comp, these sockets are dropped.
  104  *
  105  * If higher level protocols are implemented in
  106  * the kernel, the wakeups done here will sometimes
  107  * cause software-interrupt process scheduling.
  108  */
  109 
  110 void
  111 soisconnecting(so)
  112         register struct socket *so;
  113 {
  114 
  115         SOCK_LOCK(so);
  116         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
  117         so->so_state |= SS_ISCONNECTING;
  118         SOCK_UNLOCK(so);
  119 }
  120 
  121 void
  122 soisconnected(so)
  123         struct socket *so;
  124 {
  125         struct socket *head;
  126 
  127         ACCEPT_LOCK();
  128         SOCK_LOCK(so);
  129         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
  130         so->so_state |= SS_ISCONNECTED;
  131         head = so->so_head;
  132         if (head != NULL && (so->so_qstate & SQ_INCOMP)) {
  133                 if ((so->so_options & SO_ACCEPTFILTER) == 0) {
  134                         SOCK_UNLOCK(so);
  135                         TAILQ_REMOVE(&head->so_incomp, so, so_list);
  136                         head->so_incqlen--;
  137                         so->so_qstate &= ~SQ_INCOMP;
  138                         TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
  139                         head->so_qlen++;
  140                         so->so_qstate |= SQ_COMP;
  141                         ACCEPT_UNLOCK();
  142                         sorwakeup(head);
  143                         wakeup_one(&head->so_timeo);
  144                 } else {
  145                         ACCEPT_UNLOCK();
  146                         so->so_upcall =
  147                             head->so_accf->so_accept_filter->accf_callback;
  148                         so->so_upcallarg = head->so_accf->so_accept_filter_arg;
  149                         so->so_rcv.sb_flags |= SB_UPCALL;
  150                         so->so_options &= ~SO_ACCEPTFILTER;
  151                         SOCK_UNLOCK(so);
  152                         so->so_upcall(so, so->so_upcallarg, M_DONTWAIT);
  153                 }
  154                 return;
  155         }
  156         SOCK_UNLOCK(so);
  157         ACCEPT_UNLOCK();
  158         wakeup(&so->so_timeo);
  159         sorwakeup(so);
  160         sowwakeup(so);
  161 }
  162 
  163 void
  164 soisdisconnecting(so)
  165         register struct socket *so;
  166 {
  167 
  168         /*
  169          * XXXRW: This code assumes that SOCK_LOCK(so) and
  170          * SOCKBUF_LOCK(&so->so_rcv) are the same.
  171          */
  172         SOCKBUF_LOCK(&so->so_rcv);
  173         so->so_state &= ~SS_ISCONNECTING;
  174         so->so_state |= SS_ISDISCONNECTING;
  175         so->so_rcv.sb_state |= SBS_CANTRCVMORE;
  176         sorwakeup_locked(so);
  177         SOCKBUF_LOCK(&so->so_snd);
  178         so->so_snd.sb_state |= SBS_CANTSENDMORE;
  179         sowwakeup_locked(so);
  180         wakeup(&so->so_timeo);
  181 }
  182 
  183 void
  184 soisdisconnected(so)
  185         register struct socket *so;
  186 {
  187 
  188         /*
  189          * XXXRW: This code assumes that SOCK_LOCK(so) and
  190          * SOCKBUF_LOCK(&so->so_rcv) are the same.
  191          */
  192         SOCKBUF_LOCK(&so->so_rcv);
  193         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
  194         so->so_state |= SS_ISDISCONNECTED;
  195         so->so_rcv.sb_state |= SBS_CANTRCVMORE;
  196         sorwakeup_locked(so);
  197         SOCKBUF_LOCK(&so->so_snd);
  198         so->so_snd.sb_state |= SBS_CANTSENDMORE;
  199         sbdrop_locked(&so->so_snd, so->so_snd.sb_cc);
  200         sowwakeup_locked(so);
  201         wakeup(&so->so_timeo);
  202 }
  203 
  204 /*
  205  * When an attempt at a new connection is noted on a socket
  206  * which accepts connections, sonewconn is called.  If the
  207  * connection is possible (subject to space constraints, etc.)
  208  * then we allocate a new structure, propoerly linked into the
  209  * data structure of the original socket, and return this.
  210  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
  211  *
  212  * note: the ref count on the socket is 0 on return
  213  */
  214 struct socket *
  215 sonewconn(head, connstatus)
  216         register struct socket *head;
  217         int connstatus;
  218 {
  219         register struct socket *so;
  220         int over;
  221 
  222         ACCEPT_LOCK();
  223         over = (head->so_qlen > 3 * head->so_qlimit / 2);
  224         ACCEPT_UNLOCK();
  225 #ifdef REGRESSION
  226         if (regression_sonewconn_earlytest && over)
  227 #else
  228         if (over)
  229 #endif
  230                 return (NULL);
  231         so = soalloc(M_NOWAIT);
  232         if (so == NULL)
  233                 return (NULL);
  234         if ((head->so_options & SO_ACCEPTFILTER) != 0)
  235                 connstatus = 0;
  236         so->so_head = head;
  237         so->so_type = head->so_type;
  238         so->so_options = head->so_options &~ SO_ACCEPTCONN;
  239         so->so_linger = head->so_linger;
  240         so->so_state = head->so_state | SS_NOFDREF;
  241         so->so_proto = head->so_proto;
  242         so->so_cred = crhold(head->so_cred);
  243 #ifdef MAC
  244         SOCK_LOCK(head);
  245         mac_create_socket_from_socket(head, so);
  246         SOCK_UNLOCK(head);
  247 #endif
  248         knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv),
  249             NULL, NULL, NULL);
  250         knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd),
  251             NULL, NULL, NULL);
  252         if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
  253             (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
  254                 sodealloc(so);
  255                 return (NULL);
  256         }
  257         so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
  258         so->so_snd.sb_lowat = head->so_snd.sb_lowat;
  259         so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
  260         so->so_snd.sb_timeo = head->so_snd.sb_timeo;
  261         so->so_state |= connstatus;
  262         ACCEPT_LOCK();
  263         if (connstatus) {
  264                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
  265                 so->so_qstate |= SQ_COMP;
  266                 head->so_qlen++;
  267         } else {
  268                 /*
  269                  * Keep removing sockets from the head until there's room for
  270                  * us to insert on the tail.  In pre-locking revisions, this
  271                  * was a simple if(), but as we could be racing with other
  272                  * threads and soabort() requires dropping locks, we must
  273                  * loop waiting for the condition to be true.
  274                  */
  275                 while (head->so_incqlen > head->so_qlimit) {
  276                         struct socket *sp;
  277                         sp = TAILQ_FIRST(&head->so_incomp);
  278                         TAILQ_REMOVE(&head->so_incomp, sp, so_list);
  279                         head->so_incqlen--;
  280                         sp->so_qstate &= ~SQ_INCOMP;
  281                         sp->so_head = NULL;
  282                         ACCEPT_UNLOCK();
  283                         (void) soabort(sp);
  284                         ACCEPT_LOCK();
  285                 }
  286                 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
  287                 so->so_qstate |= SQ_INCOMP;
  288                 head->so_incqlen++;
  289         }
  290         ACCEPT_UNLOCK();
  291         if (connstatus) {
  292                 sorwakeup(head);
  293                 wakeup_one(&head->so_timeo);
  294         }
  295         return (so);
  296 }
  297 
  298 /*
  299  * Socantsendmore indicates that no more data will be sent on the
  300  * socket; it would normally be applied to a socket when the user
  301  * informs the system that no more data is to be sent, by the protocol
  302  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  303  * will be received, and will normally be applied to the socket by a
  304  * protocol when it detects that the peer will send no more data.
  305  * Data queued for reading in the socket may yet be read.
  306  */
  307 void
  308 socantsendmore_locked(so)
  309         struct socket *so;
  310 {
  311 
  312         SOCKBUF_LOCK_ASSERT(&so->so_snd);
  313 
  314         so->so_snd.sb_state |= SBS_CANTSENDMORE;
  315         sowwakeup_locked(so);
  316         mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
  317 }
  318 
  319 void
  320 socantsendmore(so)
  321         struct socket *so;
  322 {
  323 
  324         SOCKBUF_LOCK(&so->so_snd);
  325         socantsendmore_locked(so);
  326         mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
  327 }
  328 
  329 void
  330 socantrcvmore_locked(so)
  331         struct socket *so;
  332 {
  333 
  334         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
  335 
  336         so->so_rcv.sb_state |= SBS_CANTRCVMORE;
  337         sorwakeup_locked(so);
  338         mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
  339 }
  340 
  341 void
  342 socantrcvmore(so)
  343         struct socket *so;
  344 {
  345 
  346         SOCKBUF_LOCK(&so->so_rcv);
  347         socantrcvmore_locked(so);
  348         mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
  349 }
  350 
  351 /*
  352  * Wait for data to arrive at/drain from a socket buffer.
  353  */
  354 int
  355 sbwait(sb)
  356         struct sockbuf *sb;
  357 {
  358 
  359         SOCKBUF_LOCK_ASSERT(sb);
  360 
  361         sb->sb_flags |= SB_WAIT;
  362         return (msleep(&sb->sb_cc, &sb->sb_mtx,
  363             (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
  364             sb->sb_timeo));
  365 }
  366 
  367 /*
  368  * Lock a sockbuf already known to be locked;
  369  * return any error returned from sleep (EINTR).
  370  */
  371 int
  372 sb_lock(sb)
  373         register struct sockbuf *sb;
  374 {
  375         int error;
  376 
  377         SOCKBUF_LOCK_ASSERT(sb);
  378 
  379         while (sb->sb_flags & SB_LOCK) {
  380                 sb->sb_flags |= SB_WANT;
  381                 error = msleep(&sb->sb_flags, &sb->sb_mtx,
  382                     (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
  383                     "sblock", 0);
  384                 if (error)
  385                         return (error);
  386         }
  387         sb->sb_flags |= SB_LOCK;
  388         return (0);
  389 }
  390 
  391 /*
  392  * Wakeup processes waiting on a socket buffer.  Do asynchronous
  393  * notification via SIGIO if the socket has the SS_ASYNC flag set.
  394  *
  395  * Called with the socket buffer lock held; will release the lock by the end
  396  * of the function.  This allows the caller to acquire the socket buffer lock
  397  * while testing for the need for various sorts of wakeup and hold it through
  398  * to the point where it's no longer required.  We currently hold the lock
  399  * through calls out to other subsystems (with the exception of kqueue), and
  400  * then release it to avoid lock order issues.  It's not clear that's
  401  * correct.
  402  */
  403 void
  404 sowakeup(so, sb)
  405         register struct socket *so;
  406         register struct sockbuf *sb;
  407 {
  408 
  409         SOCKBUF_LOCK_ASSERT(sb);
  410 
  411         selwakeuppri(&sb->sb_sel, PSOCK);
  412         sb->sb_flags &= ~SB_SEL;
  413         if (sb->sb_flags & SB_WAIT) {
  414                 sb->sb_flags &= ~SB_WAIT;
  415                 wakeup(&sb->sb_cc);
  416         }
  417         KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
  418         SOCKBUF_UNLOCK(sb);
  419         if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
  420                 pgsigio(&so->so_sigio, SIGIO, 0);
  421         if (sb->sb_flags & SB_UPCALL)
  422                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  423         if (sb->sb_flags & SB_AIO)
  424                 aio_swake(so, sb);
  425         mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
  426 }
  427 
  428 /*
  429  * Socket buffer (struct sockbuf) utility routines.
  430  *
  431  * Each socket contains two socket buffers: one for sending data and
  432  * one for receiving data.  Each buffer contains a queue of mbufs,
  433  * information about the number of mbufs and amount of data in the
  434  * queue, and other fields allowing select() statements and notification
  435  * on data availability to be implemented.
  436  *
  437  * Data stored in a socket buffer is maintained as a list of records.
  438  * Each record is a list of mbufs chained together with the m_next
  439  * field.  Records are chained together with the m_nextpkt field. The upper
  440  * level routine soreceive() expects the following conventions to be
  441  * observed when placing information in the receive buffer:
  442  *
  443  * 1. If the protocol requires each message be preceded by the sender's
  444  *    name, then a record containing that name must be present before
  445  *    any associated data (mbuf's must be of type MT_SONAME).
  446  * 2. If the protocol supports the exchange of ``access rights'' (really
  447  *    just additional data associated with the message), and there are
  448  *    ``rights'' to be received, then a record containing this data
  449  *    should be present (mbuf's must be of type MT_RIGHTS).
  450  * 3. If a name or rights record exists, then it must be followed by
  451  *    a data record, perhaps of zero length.
  452  *
  453  * Before using a new socket structure it is first necessary to reserve
  454  * buffer space to the socket, by calling sbreserve().  This should commit
  455  * some of the available buffer space in the system buffer pool for the
  456  * socket (currently, it does nothing but enforce limits).  The space
  457  * should be released by calling sbrelease() when the socket is destroyed.
  458  */
  459 
  460 int
  461 soreserve(so, sndcc, rcvcc)
  462         register struct socket *so;
  463         u_long sndcc, rcvcc;
  464 {
  465         struct thread *td = curthread;
  466 
  467         SOCKBUF_LOCK(&so->so_snd);
  468         SOCKBUF_LOCK(&so->so_rcv);
  469         if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
  470                 goto bad;
  471         if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
  472                 goto bad2;
  473         if (so->so_rcv.sb_lowat == 0)
  474                 so->so_rcv.sb_lowat = 1;
  475         if (so->so_snd.sb_lowat == 0)
  476                 so->so_snd.sb_lowat = MCLBYTES;
  477         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  478                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  479         SOCKBUF_UNLOCK(&so->so_rcv);
  480         SOCKBUF_UNLOCK(&so->so_snd);
  481         return (0);
  482 bad2:
  483         sbrelease_locked(&so->so_snd, so);
  484 bad:
  485         SOCKBUF_UNLOCK(&so->so_rcv);
  486         SOCKBUF_UNLOCK(&so->so_snd);
  487         return (ENOBUFS);
  488 }
  489 
  490 static int
  491 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
  492 {
  493         int error = 0;
  494         u_long tmp_sb_max = sb_max;
  495 
  496         error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
  497         if (error || !req->newptr)
  498                 return (error);
  499         if (tmp_sb_max < MSIZE + MCLBYTES)
  500                 return (EINVAL);
  501         sb_max = tmp_sb_max;
  502         sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
  503         return (0);
  504 }
  505         
  506 /*
  507  * Allot mbufs to a sockbuf.
  508  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  509  * if buffering efficiency is near the normal case.
  510  */
  511 int
  512 sbreserve_locked(sb, cc, so, td)
  513         struct sockbuf *sb;
  514         u_long cc;
  515         struct socket *so;
  516         struct thread *td;
  517 {
  518         rlim_t sbsize_limit;
  519 
  520         SOCKBUF_LOCK_ASSERT(sb);
  521 
  522         /*
  523          * td will only be NULL when we're in an interrupt
  524          * (e.g. in tcp_input())
  525          */
  526         if (cc > sb_max_adj)
  527                 return (0);
  528         if (td != NULL) {
  529                 PROC_LOCK(td->td_proc);
  530                 sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE);
  531                 PROC_UNLOCK(td->td_proc);
  532         } else
  533                 sbsize_limit = RLIM_INFINITY;
  534         if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
  535             sbsize_limit))
  536                 return (0);
  537         sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
  538         if (sb->sb_lowat > sb->sb_hiwat)
  539                 sb->sb_lowat = sb->sb_hiwat;
  540         return (1);
  541 }
  542 
  543 int
  544 sbreserve(sb, cc, so, td)
  545         struct sockbuf *sb;
  546         u_long cc;
  547         struct socket *so;
  548         struct thread *td;
  549 {
  550         int error;
  551 
  552         SOCKBUF_LOCK(sb);
  553         error = sbreserve_locked(sb, cc, so, td);
  554         SOCKBUF_UNLOCK(sb);
  555         return (error);
  556 }
  557 
  558 /*
  559  * Free mbufs held by a socket, and reserved mbuf space.
  560  */
  561 void
  562 sbrelease_locked(sb, so)
  563         struct sockbuf *sb;
  564         struct socket *so;
  565 {
  566 
  567         SOCKBUF_LOCK_ASSERT(sb);
  568 
  569         sbflush_locked(sb);
  570         (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
  571             RLIM_INFINITY);
  572         sb->sb_mbmax = 0;
  573 }
  574 
  575 void
  576 sbrelease(sb, so)
  577         struct sockbuf *sb;
  578         struct socket *so;
  579 {
  580 
  581         SOCKBUF_LOCK(sb);
  582         sbrelease_locked(sb, so);
  583         SOCKBUF_UNLOCK(sb);
  584 }
  585 /*
  586  * Routines to add and remove
  587  * data from an mbuf queue.
  588  *
  589  * The routines sbappend() or sbappendrecord() are normally called to
  590  * append new mbufs to a socket buffer, after checking that adequate
  591  * space is available, comparing the function sbspace() with the amount
  592  * of data to be added.  sbappendrecord() differs from sbappend() in
  593  * that data supplied is treated as the beginning of a new record.
  594  * To place a sender's address, optional access rights, and data in a
  595  * socket receive buffer, sbappendaddr() should be used.  To place
  596  * access rights and data in a socket receive buffer, sbappendrights()
  597  * should be used.  In either case, the new data begins a new record.
  598  * Note that unlike sbappend() and sbappendrecord(), these routines check
  599  * for the caller that there will be enough space to store the data.
  600  * Each fails if there is not enough space, or if it cannot find mbufs
  601  * to store additional information in.
  602  *
  603  * Reliable protocols may use the socket send buffer to hold data
  604  * awaiting acknowledgement.  Data is normally copied from a socket
  605  * send buffer in a protocol with m_copy for output to a peer,
  606  * and then removing the data from the socket buffer with sbdrop()
  607  * or sbdroprecord() when the data is acknowledged by the peer.
  608  */
  609 
  610 #ifdef SOCKBUF_DEBUG
  611 void
  612 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
  613 {
  614         struct mbuf *m = sb->sb_mb;
  615 
  616         SOCKBUF_LOCK_ASSERT(sb);
  617 
  618         while (m && m->m_nextpkt)
  619                 m = m->m_nextpkt;
  620 
  621         if (m != sb->sb_lastrecord) {
  622                 printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
  623                         __func__, sb->sb_mb, sb->sb_lastrecord, m);
  624                 printf("packet chain:\n");
  625                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  626                         printf("\t%p\n", m);
  627                 panic("%s from %s:%u", __func__, file, line);
  628         }
  629 }
  630 
  631 void
  632 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
  633 {
  634         struct mbuf *m = sb->sb_mb;
  635         struct mbuf *n;
  636 
  637         SOCKBUF_LOCK_ASSERT(sb);
  638 
  639         while (m && m->m_nextpkt)
  640                 m = m->m_nextpkt;
  641 
  642         while (m && m->m_next)
  643                 m = m->m_next;
  644 
  645         if (m != sb->sb_mbtail) {
  646                 printf("%s: sb_mb %p sb_mbtail %p last %p\n",
  647                         __func__, sb->sb_mb, sb->sb_mbtail, m);
  648                 printf("packet tree:\n");
  649                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  650                         printf("\t");
  651                         for (n = m; n != NULL; n = n->m_next)
  652                                 printf("%p ", n);
  653                         printf("\n");
  654                 }
  655                 panic("%s from %s:%u", __func__, file, line);
  656         }
  657 }
  658 #endif /* SOCKBUF_DEBUG */
  659 
  660 #define SBLINKRECORD(sb, m0) do {                                       \
  661         SOCKBUF_LOCK_ASSERT(sb);                                        \
  662         if ((sb)->sb_lastrecord != NULL)                                \
  663                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  664         else                                                            \
  665                 (sb)->sb_mb = (m0);                                     \
  666         (sb)->sb_lastrecord = (m0);                                     \
  667 } while (/*CONSTCOND*/0)
  668 
  669 /*
  670  * Append mbuf chain m to the last record in the
  671  * socket buffer sb.  The additional space associated
  672  * the mbuf chain is recorded in sb.  Empty mbufs are
  673  * discarded and mbufs are compacted where possible.
  674  */
  675 void
  676 sbappend_locked(sb, m)
  677         struct sockbuf *sb;
  678         struct mbuf *m;
  679 {
  680         register struct mbuf *n;
  681 
  682         SOCKBUF_LOCK_ASSERT(sb);
  683 
  684         if (m == 0)
  685                 return;
  686 
  687         SBLASTRECORDCHK(sb);
  688         n = sb->sb_mb;
  689         if (n) {
  690                 while (n->m_nextpkt)
  691                         n = n->m_nextpkt;
  692                 do {
  693                         if (n->m_flags & M_EOR) {
  694                                 sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
  695                                 return;
  696                         }
  697                 } while (n->m_next && (n = n->m_next));
  698         } else {
  699                 /*
  700                  * XXX Would like to simply use sb_mbtail here, but
  701                  * XXX I need to verify that I won't miss an EOR that
  702                  * XXX way.
  703                  */
  704                 if ((n = sb->sb_lastrecord) != NULL) {
  705                         do {
  706                                 if (n->m_flags & M_EOR) {
  707                                         sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
  708                                         return;
  709                                 }
  710                         } while (n->m_next && (n = n->m_next));
  711                 } else {
  712                         /*
  713                          * If this is the first record in the socket buffer,
  714                          * it's also the last record.
  715                          */
  716                         sb->sb_lastrecord = m;
  717                 }
  718         }
  719         sbcompress(sb, m, n);
  720         SBLASTRECORDCHK(sb);
  721 }
  722 
  723 /*
  724  * Append mbuf chain m to the last record in the
  725  * socket buffer sb.  The additional space associated
  726  * the mbuf chain is recorded in sb.  Empty mbufs are
  727  * discarded and mbufs are compacted where possible.
  728  */
  729 void
  730 sbappend(sb, m)
  731         struct sockbuf *sb;
  732         struct mbuf *m;
  733 {
  734 
  735         SOCKBUF_LOCK(sb);
  736         sbappend_locked(sb, m);
  737         SOCKBUF_UNLOCK(sb);
  738 }
  739 
  740 /*
  741  * This version of sbappend() should only be used when the caller
  742  * absolutely knows that there will never be more than one record
  743  * in the socket buffer, that is, a stream protocol (such as TCP).
  744  */
  745 void
  746 sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
  747 {
  748         SOCKBUF_LOCK_ASSERT(sb);
  749 
  750         KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
  751         KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
  752 
  753         SBLASTMBUFCHK(sb);
  754 
  755         sbcompress(sb, m, sb->sb_mbtail);
  756 
  757         sb->sb_lastrecord = sb->sb_mb;
  758         SBLASTRECORDCHK(sb);
  759 }
  760 
  761 /*
  762  * This version of sbappend() should only be used when the caller
  763  * absolutely knows that there will never be more than one record
  764  * in the socket buffer, that is, a stream protocol (such as TCP).
  765  */
  766 void
  767 sbappendstream(struct sockbuf *sb, struct mbuf *m)
  768 {
  769 
  770         SOCKBUF_LOCK(sb);
  771         sbappendstream_locked(sb, m);
  772         SOCKBUF_UNLOCK(sb);
  773 }
  774 
  775 #ifdef SOCKBUF_DEBUG
  776 void
  777 sbcheck(sb)
  778         struct sockbuf *sb;
  779 {
  780         struct mbuf *m;
  781         struct mbuf *n = 0;
  782         u_long len = 0, mbcnt = 0;
  783 
  784         SOCKBUF_LOCK_ASSERT(sb);
  785 
  786         for (m = sb->sb_mb; m; m = n) {
  787             n = m->m_nextpkt;
  788             for (; m; m = m->m_next) {
  789                 len += m->m_len;
  790                 mbcnt += MSIZE;
  791                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
  792                         mbcnt += m->m_ext.ext_size;
  793             }
  794         }
  795         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  796                 printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
  797                     mbcnt, sb->sb_mbcnt);
  798                 panic("sbcheck");
  799         }
  800 }
  801 #endif
  802 
  803 /*
  804  * As above, except the mbuf chain
  805  * begins a new record.
  806  */
  807 void
  808 sbappendrecord_locked(sb, m0)
  809         register struct sockbuf *sb;
  810         register struct mbuf *m0;
  811 {
  812         register struct mbuf *m;
  813 
  814         SOCKBUF_LOCK_ASSERT(sb);
  815 
  816         if (m0 == 0)
  817                 return;
  818         m = sb->sb_mb;
  819         if (m)
  820                 while (m->m_nextpkt)
  821                         m = m->m_nextpkt;
  822         /*
  823          * Put the first mbuf on the queue.
  824          * Note this permits zero length records.
  825          */
  826         sballoc(sb, m0);
  827         SBLASTRECORDCHK(sb);
  828         SBLINKRECORD(sb, m0);
  829         if (m)
  830                 m->m_nextpkt = m0;
  831         else
  832                 sb->sb_mb = m0;
  833         m = m0->m_next;
  834         m0->m_next = 0;
  835         if (m && (m0->m_flags & M_EOR)) {
  836                 m0->m_flags &= ~M_EOR;
  837                 m->m_flags |= M_EOR;
  838         }
  839         sbcompress(sb, m, m0);
  840 }
  841 
  842 /*
  843  * As above, except the mbuf chain
  844  * begins a new record.
  845  */
  846 void
  847 sbappendrecord(sb, m0)
  848         register struct sockbuf *sb;
  849         register struct mbuf *m0;
  850 {
  851 
  852         SOCKBUF_LOCK(sb);
  853         sbappendrecord_locked(sb, m0);
  854         SOCKBUF_UNLOCK(sb);
  855 }
  856 
  857 /*
  858  * Append address and data, and optionally, control (ancillary) data
  859  * to the receive queue of a socket.  If present,
  860  * m0 must include a packet header with total length.
  861  * Returns 0 if no space in sockbuf or insufficient mbufs.
  862  */
  863 int
  864 sbappendaddr_locked(sb, asa, m0, control)
  865         struct sockbuf *sb;
  866         const struct sockaddr *asa;
  867         struct mbuf *m0, *control;
  868 {
  869         struct mbuf *m, *n, *nlast;
  870         int space = asa->sa_len;
  871 
  872         SOCKBUF_LOCK_ASSERT(sb);
  873 
  874         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
  875                 panic("sbappendaddr_locked");
  876         if (m0)
  877                 space += m0->m_pkthdr.len;
  878         space += m_length(control, &n);
  879 
  880         if (space > sbspace(sb))
  881                 return (0);
  882 #if MSIZE <= 256
  883         if (asa->sa_len > MLEN)
  884                 return (0);
  885 #endif
  886         MGET(m, M_DONTWAIT, MT_SONAME);
  887         if (m == 0)
  888                 return (0);
  889         m->m_len = asa->sa_len;
  890         bcopy(asa, mtod(m, caddr_t), asa->sa_len);
  891         if (n)
  892                 n->m_next = m0;         /* concatenate data to control */
  893         else
  894                 control = m0;
  895         m->m_next = control;
  896         for (n = m; n->m_next != NULL; n = n->m_next)
  897                 sballoc(sb, n);
  898         sballoc(sb, n);
  899         nlast = n;
  900         SBLINKRECORD(sb, m);
  901 
  902         sb->sb_mbtail = nlast;
  903         SBLASTMBUFCHK(sb);
  904 
  905         SBLASTRECORDCHK(sb);
  906         return (1);
  907 }
  908 
  909 /*
  910  * Append address and data, and optionally, control (ancillary) data
  911  * to the receive queue of a socket.  If present,
  912  * m0 must include a packet header with total length.
  913  * Returns 0 if no space in sockbuf or insufficient mbufs.
  914  */
  915 int
  916 sbappendaddr(sb, asa, m0, control)
  917         struct sockbuf *sb;
  918         const struct sockaddr *asa;
  919         struct mbuf *m0, *control;
  920 {
  921         int retval;
  922 
  923         SOCKBUF_LOCK(sb);
  924         retval = sbappendaddr_locked(sb, asa, m0, control);
  925         SOCKBUF_UNLOCK(sb);
  926         return (retval);
  927 }
  928 
  929 int
  930 sbappendcontrol_locked(sb, m0, control)
  931         struct sockbuf *sb;
  932         struct mbuf *control, *m0;
  933 {
  934         struct mbuf *m, *n, *mlast;
  935         int space;
  936 
  937         SOCKBUF_LOCK_ASSERT(sb);
  938 
  939         if (control == 0)
  940                 panic("sbappendcontrol_locked");
  941         space = m_length(control, &n) + m_length(m0, NULL);
  942 
  943         if (space > sbspace(sb))
  944                 return (0);
  945         n->m_next = m0;                 /* concatenate data to control */
  946 
  947         SBLASTRECORDCHK(sb);
  948 
  949         for (m = control; m->m_next; m = m->m_next)
  950                 sballoc(sb, m);
  951         sballoc(sb, m);
  952         mlast = m;
  953         SBLINKRECORD(sb, control);
  954 
  955         sb->sb_mbtail = mlast;
  956         SBLASTMBUFCHK(sb);
  957 
  958         SBLASTRECORDCHK(sb);
  959         return (1);
  960 }
  961 
  962 int
  963 sbappendcontrol(sb, m0, control)
  964         struct sockbuf *sb;
  965         struct mbuf *control, *m0;
  966 {
  967         int retval;
  968 
  969         SOCKBUF_LOCK(sb);
  970         retval = sbappendcontrol_locked(sb, m0, control);
  971         SOCKBUF_UNLOCK(sb);
  972         return (retval);
  973 }
  974 
  975 /*
  976  * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
  977  * (n).  If (n) is NULL, the buffer is presumed empty.
  978  *
  979  * When the data is compressed, mbufs in the chain may be handled in one of
  980  * three ways:
  981  *
  982  * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
  983  *     record boundary, and no change in data type).
  984  *
  985  * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
  986  *     an mbuf already in the socket buffer.  This can occur if an
  987  *     appropriate mbuf exists, there is room, and no merging of data types
  988  *     will occur.
  989  *
  990  * (3) The mbuf may be appended to the end of the existing mbuf chain.
  991  *
  992  * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
  993  * end-of-record.
  994  */
  995 void
  996 sbcompress(sb, m, n)
  997         register struct sockbuf *sb;
  998         register struct mbuf *m, *n;
  999 {
 1000         register int eor = 0;
 1001         register struct mbuf *o;
 1002 
 1003         SOCKBUF_LOCK_ASSERT(sb);
 1004 
 1005         while (m) {
 1006                 eor |= m->m_flags & M_EOR;
 1007                 if (m->m_len == 0 &&
 1008                     (eor == 0 ||
 1009                      (((o = m->m_next) || (o = n)) &&
 1010                       o->m_type == m->m_type))) {
 1011                         if (sb->sb_lastrecord == m)
 1012                                 sb->sb_lastrecord = m->m_next;
 1013                         m = m_free(m);
 1014                         continue;
 1015                 }
 1016                 if (n && (n->m_flags & M_EOR) == 0 &&
 1017                     M_WRITABLE(n) &&
 1018                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 1019                     m->m_len <= M_TRAILINGSPACE(n) &&
 1020                     n->m_type == m->m_type) {
 1021                         bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
 1022                             (unsigned)m->m_len);
 1023                         n->m_len += m->m_len;
 1024                         sb->sb_cc += m->m_len;
 1025                         if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
 1026                             m->m_type != MT_OOBDATA)
 1027                                 /* XXX: Probably don't need.*/
 1028                                 sb->sb_ctl += m->m_len;
 1029                         m = m_free(m);
 1030                         continue;
 1031                 }
 1032                 if (n)
 1033                         n->m_next = m;
 1034                 else
 1035                         sb->sb_mb = m;
 1036                 sb->sb_mbtail = m;
 1037                 sballoc(sb, m);
 1038                 n = m;
 1039                 m->m_flags &= ~M_EOR;
 1040                 m = m->m_next;
 1041                 n->m_next = 0;
 1042         }
 1043         if (eor) {
 1044                 KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
 1045                 n->m_flags |= eor;
 1046         }
 1047         SBLASTMBUFCHK(sb);
 1048 }
 1049 
 1050 /*
 1051  * Free all mbufs in a sockbuf.
 1052  * Check that all resources are reclaimed.
 1053  */
 1054 void
 1055 sbflush_locked(sb)
 1056         register struct sockbuf *sb;
 1057 {
 1058 
 1059         SOCKBUF_LOCK_ASSERT(sb);
 1060 
 1061         if (sb->sb_flags & SB_LOCK)
 1062                 panic("sbflush_locked: locked");
 1063         while (sb->sb_mbcnt) {
 1064                 /*
 1065                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
 1066                  * we would loop forever. Panic instead.
 1067                  */
 1068                 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
 1069                         break;
 1070                 sbdrop_locked(sb, (int)sb->sb_cc);
 1071         }
 1072         if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
 1073                 panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
 1074 }
 1075 
 1076 void
 1077 sbflush(sb)
 1078         register struct sockbuf *sb;
 1079 {
 1080 
 1081         SOCKBUF_LOCK(sb);
 1082         sbflush_locked(sb);
 1083         SOCKBUF_UNLOCK(sb);
 1084 }
 1085 
 1086 /*
 1087  * Drop data from (the front of) a sockbuf.
 1088  */
 1089 void
 1090 sbdrop_locked(sb, len)
 1091         register struct sockbuf *sb;
 1092         register int len;
 1093 {
 1094         register struct mbuf *m;
 1095         struct mbuf *next;
 1096 
 1097         SOCKBUF_LOCK_ASSERT(sb);
 1098 
 1099         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 1100         while (len > 0) {
 1101                 if (m == 0) {
 1102                         if (next == 0)
 1103                                 panic("sbdrop");
 1104                         m = next;
 1105                         next = m->m_nextpkt;
 1106                         continue;
 1107                 }
 1108                 if (m->m_len > len) {
 1109                         m->m_len -= len;
 1110                         m->m_data += len;
 1111                         sb->sb_cc -= len;
 1112                         if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
 1113                             m->m_type != MT_OOBDATA)
 1114                                 sb->sb_ctl -= len;
 1115                         break;
 1116                 }
 1117                 len -= m->m_len;
 1118                 sbfree(sb, m);
 1119                 m = m_free(m);
 1120         }
 1121         while (m && m->m_len == 0) {
 1122                 sbfree(sb, m);
 1123                 m = m_free(m);
 1124         }
 1125         if (m) {
 1126                 sb->sb_mb = m;
 1127                 m->m_nextpkt = next;
 1128         } else
 1129                 sb->sb_mb = next;
 1130         /*
 1131          * First part is an inline SB_EMPTY_FIXUP().  Second part
 1132          * makes sure sb_lastrecord is up-to-date if we dropped
 1133          * part of the last record.
 1134          */
 1135         m = sb->sb_mb;
 1136         if (m == NULL) {
 1137                 sb->sb_mbtail = NULL;
 1138                 sb->sb_lastrecord = NULL;
 1139         } else if (m->m_nextpkt == NULL) {
 1140                 sb->sb_lastrecord = m;
 1141         }
 1142 }
 1143 
 1144 /*
 1145  * Drop data from (the front of) a sockbuf.
 1146  */
 1147 void
 1148 sbdrop(sb, len)
 1149         register struct sockbuf *sb;
 1150         register int len;
 1151 {
 1152 
 1153         SOCKBUF_LOCK(sb);
 1154         sbdrop_locked(sb, len);
 1155         SOCKBUF_UNLOCK(sb);
 1156 }
 1157 
 1158 /*
 1159  * Drop a record off the front of a sockbuf
 1160  * and move the next record to the front.
 1161  */
 1162 void
 1163 sbdroprecord_locked(sb)
 1164         register struct sockbuf *sb;
 1165 {
 1166         register struct mbuf *m;
 1167 
 1168         SOCKBUF_LOCK_ASSERT(sb);
 1169 
 1170         m = sb->sb_mb;
 1171         if (m) {
 1172                 sb->sb_mb = m->m_nextpkt;
 1173                 do {
 1174                         sbfree(sb, m);
 1175                         m = m_free(m);
 1176                 } while (m);
 1177         }
 1178         SB_EMPTY_FIXUP(sb);
 1179 }
 1180 
 1181 /*
 1182  * Drop a record off the front of a sockbuf
 1183  * and move the next record to the front.
 1184  */
 1185 void
 1186 sbdroprecord(sb)
 1187         register struct sockbuf *sb;
 1188 {
 1189 
 1190         SOCKBUF_LOCK(sb);
 1191         sbdroprecord_locked(sb);
 1192         SOCKBUF_UNLOCK(sb);
 1193 }
 1194 
 1195 /*
 1196  * Create a "control" mbuf containing the specified data
 1197  * with the specified type for presentation on a socket buffer.
 1198  */
 1199 struct mbuf *
 1200 sbcreatecontrol(p, size, type, level)
 1201         caddr_t p;
 1202         register int size;
 1203         int type, level;
 1204 {
 1205         register struct cmsghdr *cp;
 1206         struct mbuf *m;
 1207 
 1208         if (CMSG_SPACE((u_int)size) > MCLBYTES)
 1209                 return ((struct mbuf *) NULL);
 1210         if (CMSG_SPACE((u_int)size) > MLEN)
 1211                 m = m_getcl(M_DONTWAIT, MT_CONTROL, 0);
 1212         else
 1213                 m = m_get(M_DONTWAIT, MT_CONTROL);
 1214         if (m == NULL)
 1215                 return ((struct mbuf *) NULL);
 1216         cp = mtod(m, struct cmsghdr *);
 1217         m->m_len = 0;
 1218         KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
 1219             ("sbcreatecontrol: short mbuf"));
 1220         if (p != NULL)
 1221                 (void)memcpy(CMSG_DATA(cp), p, size);
 1222         m->m_len = CMSG_SPACE(size);
 1223         cp->cmsg_len = CMSG_LEN(size);
 1224         cp->cmsg_level = level;
 1225         cp->cmsg_type = type;
 1226         return (m);
 1227 }
 1228 
 1229 /*
 1230  * Some routines that return EOPNOTSUPP for entry points that are not
 1231  * supported by a protocol.  Fill in as needed.
 1232  */
 1233 int
 1234 pru_abort_notsupp(struct socket *so)
 1235 {
 1236         return EOPNOTSUPP;
 1237 }
 1238 
 1239 int
 1240 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
 1241 {
 1242         return EOPNOTSUPP;
 1243 }
 1244 
 1245 int
 1246 pru_attach_notsupp(struct socket *so, int proto, struct thread *td)
 1247 {
 1248         return EOPNOTSUPP;
 1249 }
 1250 
 1251 int
 1252 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 1253 {
 1254         return EOPNOTSUPP;
 1255 }
 1256 
 1257 int
 1258 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 1259 {
 1260         return EOPNOTSUPP;
 1261 }
 1262 
 1263 int
 1264 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 1265 {
 1266         return EOPNOTSUPP;
 1267 }
 1268 
 1269 int
 1270 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
 1271         struct ifnet *ifp, struct thread *td)
 1272 {
 1273         return EOPNOTSUPP;
 1274 }
 1275 
 1276 int
 1277 pru_detach_notsupp(struct socket *so)
 1278 {
 1279         return EOPNOTSUPP;
 1280 }
 1281 
 1282 int
 1283 pru_disconnect_notsupp(struct socket *so)
 1284 {
 1285         return EOPNOTSUPP;
 1286 }
 1287 
 1288 int
 1289 pru_listen_notsupp(struct socket *so, struct thread *td)
 1290 {
 1291         return EOPNOTSUPP;
 1292 }
 1293 
 1294 int
 1295 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
 1296 {
 1297         return EOPNOTSUPP;
 1298 }
 1299 
 1300 int
 1301 pru_rcvd_notsupp(struct socket *so, int flags)
 1302 {
 1303         return EOPNOTSUPP;
 1304 }
 1305 
 1306 int
 1307 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 1308 {
 1309         return EOPNOTSUPP;
 1310 }
 1311 
 1312 int
 1313 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
 1314         struct sockaddr *addr, struct mbuf *control, struct thread *td)
 1315 {
 1316         return EOPNOTSUPP;
 1317 }
 1318 
 1319 /*
 1320  * This isn't really a ``null'' operation, but it's the default one
 1321  * and doesn't do anything destructive.
 1322  */
 1323 int
 1324 pru_sense_null(struct socket *so, struct stat *sb)
 1325 {
 1326         sb->st_blksize = so->so_snd.sb_hiwat;
 1327         return 0;
 1328 }
 1329 
 1330 int
 1331 pru_shutdown_notsupp(struct socket *so)
 1332 {
 1333         return EOPNOTSUPP;
 1334 }
 1335 
 1336 int
 1337 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
 1338 {
 1339         return EOPNOTSUPP;
 1340 }
 1341 
 1342 int
 1343 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
 1344         struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 1345 {
 1346         return EOPNOTSUPP;
 1347 }
 1348 
 1349 int
 1350 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
 1351         struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 1352         int *flagsp)
 1353 {
 1354         return EOPNOTSUPP;
 1355 }
 1356 
 1357 int
 1358 pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
 1359         struct thread *td)
 1360 {
 1361         return EOPNOTSUPP;
 1362 }
 1363 
 1364 /*
 1365  * For protocol types that don't keep cached copies of labels in their
 1366  * pcbs, provide a null sosetlabel that does a NOOP.
 1367  */
 1368 void
 1369 pru_sosetlabel_null(struct socket *so)
 1370 {
 1371 
 1372 }
 1373 
 1374 /*
 1375  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
 1376  */
 1377 struct sockaddr *
 1378 sodupsockaddr(const struct sockaddr *sa, int mflags)
 1379 {
 1380         struct sockaddr *sa2;
 1381 
 1382         sa2 = malloc(sa->sa_len, M_SONAME, mflags);
 1383         if (sa2)
 1384                 bcopy(sa, sa2, sa->sa_len);
 1385         return sa2;
 1386 }
 1387 
 1388 /*
 1389  * Create an external-format (``xsocket'') structure using the information
 1390  * in the kernel-format socket structure pointed to by so.  This is done
 1391  * to reduce the spew of irrelevant information over this interface,
 1392  * to isolate user code from changes in the kernel structure, and
 1393  * potentially to provide information-hiding if we decide that
 1394  * some of this information should be hidden from users.
 1395  */
 1396 void
 1397 sotoxsocket(struct socket *so, struct xsocket *xso)
 1398 {
 1399         xso->xso_len = sizeof *xso;
 1400         xso->xso_so = so;
 1401         xso->so_type = so->so_type;
 1402         xso->so_options = so->so_options;
 1403         xso->so_linger = so->so_linger;
 1404         xso->so_state = so->so_state;
 1405         xso->so_pcb = so->so_pcb;
 1406         xso->xso_protocol = so->so_proto->pr_protocol;
 1407         xso->xso_family = so->so_proto->pr_domain->dom_family;
 1408         xso->so_qlen = so->so_qlen;
 1409         xso->so_incqlen = so->so_incqlen;
 1410         xso->so_qlimit = so->so_qlimit;
 1411         xso->so_timeo = so->so_timeo;
 1412         xso->so_error = so->so_error;
 1413         xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 1414         xso->so_oobmark = so->so_oobmark;
 1415         sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 1416         sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 1417         xso->so_uid = so->so_cred->cr_uid;
 1418 }
 1419 
 1420 /*
 1421  * This does the same for sockbufs.  Note that the xsockbuf structure,
 1422  * since it is always embedded in a socket, does not include a self
 1423  * pointer nor a length.  We make this entry point public in case
 1424  * some other mechanism needs it.
 1425  */
 1426 void
 1427 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 1428 {
 1429         xsb->sb_cc = sb->sb_cc;
 1430         xsb->sb_hiwat = sb->sb_hiwat;
 1431         xsb->sb_mbcnt = sb->sb_mbcnt;
 1432         xsb->sb_mbmax = sb->sb_mbmax;
 1433         xsb->sb_lowat = sb->sb_lowat;
 1434         xsb->sb_flags = sb->sb_flags;
 1435         xsb->sb_timeo = sb->sb_timeo;
 1436 }
 1437 
 1438 /*
 1439  * Here is the definition of some of the basic objects in the kern.ipc
 1440  * branch of the MIB.
 1441  */
 1442 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
 1443 
 1444 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 1445 static int dummy;
 1446 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
 1447 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW, 
 1448     &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
 1449 static int
 1450 sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
 1451 {
 1452         int error, newmaxsockets;
 1453 
 1454         newmaxsockets = maxsockets;
 1455         error = sysctl_handle_int(oidp, &newmaxsockets, sizeof(int), req); 
 1456         if (error == 0 && req->newptr) {
 1457                 if (newmaxsockets > maxsockets) {
 1458                         maxsockets = newmaxsockets;
 1459                         if (maxsockets > ((maxfiles / 4) * 3)) {
 1460                                 maxfiles = (maxsockets * 5) / 4;
 1461                                 maxfilesperproc = (maxfiles * 9) / 10;
 1462                         }
 1463                         EVENTHANDLER_INVOKE(maxsockets_change);
 1464                 } else
 1465                         error = EINVAL;
 1466         }
 1467         return (error);
 1468 }
 1469 SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW,
 1470     &maxsockets, 0, sysctl_maxsockets, "IU",
 1471     "Maximum number of sockets avaliable");
 1472 SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
 1473     &sb_efficiency, 0, "");
 1474 
 1475 /*
 1476  * Initialise maxsockets 
 1477  */
 1478 static void init_maxsockets(void *ignored)
 1479 {
 1480         TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
 1481         maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
 1482 }
 1483 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);

Cache object: 64122b60cae0e9f8fe8da5c438e8f66a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.