The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket2.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
   34  * $FreeBSD$
   35  */
   36 
   37 #include "opt_param.h"
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/domain.h>
   41 #include <sys/file.h>   /* for maxfiles */
   42 #include <sys/kernel.h>
   43 #include <sys/proc.h>
   44 #include <sys/malloc.h>
   45 #include <sys/mbuf.h>
   46 #include <sys/protosw.h>
   47 #include <sys/resourcevar.h>
   48 #include <sys/stat.h>
   49 #include <sys/socket.h>
   50 #include <sys/socketvar.h>
   51 #include <sys/signalvar.h>
   52 #include <sys/sysctl.h>
   53 #include <sys/aio.h> /* for aio_swake proto */
   54 #include <sys/event.h>
   55 
   56 int     maxsockets;
   57 
   58 /*
   59  * Primitive routines for operating on sockets and socket buffers
   60  */
   61 
   62 u_long  sb_max = SB_MAX;
   63 u_long  sb_max_adj =
   64     SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
   65 
   66 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
   67 
   68 /*
   69  * Procedures to manipulate state flags of socket
   70  * and do appropriate wakeups.  Normal sequence from the
   71  * active (originating) side is that soisconnecting() is
   72  * called during processing of connect() call,
   73  * resulting in an eventual call to soisconnected() if/when the
   74  * connection is established.  When the connection is torn down
   75  * soisdisconnecting() is called during processing of disconnect() call,
   76  * and soisdisconnected() is called when the connection to the peer
   77  * is totally severed.  The semantics of these routines are such that
   78  * connectionless protocols can call soisconnected() and soisdisconnected()
   79  * only, bypassing the in-progress calls when setting up a ``connection''
   80  * takes no time.
   81  *
   82  * From the passive side, a socket is created with
   83  * two queues of sockets: so_incomp for connections in progress
   84  * and so_comp for connections already made and awaiting user acceptance.
   85  * As a protocol is preparing incoming connections, it creates a socket
   86  * structure queued on so_incomp by calling sonewconn().  When the connection
   87  * is established, soisconnected() is called, and transfers the
   88  * socket structure to so_comp, making it available to accept().
   89  *
   90  * If a socket is closed with sockets on either
   91  * so_incomp or so_comp, these sockets are dropped.
   92  *
   93  * If higher level protocols are implemented in
   94  * the kernel, the wakeups done here will sometimes
   95  * cause software-interrupt process scheduling.
   96  */
   97 
   98 void
   99 soisconnecting(so)
  100         register struct socket *so;
  101 {
  102 
  103         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
  104         so->so_state |= SS_ISCONNECTING;
  105 }
  106 
  107 void
  108 soisconnected(so)
  109         struct socket *so;
  110 {
  111         struct socket *head = so->so_head;
  112 
  113         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
  114         so->so_state |= SS_ISCONNECTED;
  115         if (head && (so->so_state & SS_INCOMP)) {
  116                 if ((so->so_options & SO_ACCEPTFILTER) != 0) {
  117                         so->so_upcall = head->so_accf->so_accept_filter->accf_callback;
  118                         so->so_upcallarg = head->so_accf->so_accept_filter_arg;
  119                         so->so_rcv.sb_flags |= SB_UPCALL;
  120                         so->so_options &= ~SO_ACCEPTFILTER;
  121                         so->so_upcall(so, so->so_upcallarg, 0);
  122                         return;
  123                 }
  124                 TAILQ_REMOVE(&head->so_incomp, so, so_list);
  125                 head->so_incqlen--;
  126                 so->so_state &= ~SS_INCOMP;
  127                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
  128                 head->so_qlen++;
  129                 so->so_state |= SS_COMP;
  130                 sorwakeup(head);
  131                 wakeup_one(&head->so_timeo);
  132         } else {
  133                 wakeup(&so->so_timeo);
  134                 sorwakeup(so);
  135                 sowwakeup(so);
  136         }
  137 }
  138 
  139 void
  140 soisdisconnecting(so)
  141         register struct socket *so;
  142 {
  143 
  144         so->so_state &= ~SS_ISCONNECTING;
  145         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
  146         wakeup((caddr_t)&so->so_timeo);
  147         sowwakeup(so);
  148         sorwakeup(so);
  149 }
  150 
  151 void
  152 soisdisconnected(so)
  153         register struct socket *so;
  154 {
  155 
  156         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
  157         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
  158         wakeup((caddr_t)&so->so_timeo);
  159         sbdrop(&so->so_snd, so->so_snd.sb_cc);
  160         sowwakeup(so);
  161         sorwakeup(so);
  162 }
  163 
  164 /*
  165  * When an attempt at a new connection is noted on a socket
  166  * which accepts connections, sonewconn is called.  If the
  167  * connection is possible (subject to space constraints, etc.)
  168  * then we allocate a new structure, propoerly linked into the
  169  * data structure of the original socket, and return this.
  170  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
  171  */
  172 struct socket *
  173 sonewconn(head, connstatus)
  174         register struct socket *head;
  175         int connstatus;
  176 {
  177 
  178         return (sonewconn3(head, connstatus, NULL));
  179 }
  180 
  181 struct socket *
  182 sonewconn3(head, connstatus, p)
  183         register struct socket *head;
  184         int connstatus;
  185         struct proc *p;
  186 {
  187         register struct socket *so;
  188 
  189         if (head->so_qlen > 3 * head->so_qlimit / 2)
  190                 return ((struct socket *)0);
  191         so = soalloc(0);
  192         if (so == NULL)
  193                 return ((struct socket *)0);
  194         if ((head->so_options & SO_ACCEPTFILTER) != 0)
  195                 connstatus = 0;
  196         so->so_head = head;
  197         so->so_type = head->so_type;
  198         so->so_options = head->so_options &~ SO_ACCEPTCONN;
  199         so->so_linger = head->so_linger;
  200         so->so_state = head->so_state | SS_NOFDREF;
  201         so->so_proto = head->so_proto;
  202         so->so_timeo = head->so_timeo;
  203         so->so_cred = p ? p->p_ucred : head->so_cred;
  204         crhold(so->so_cred);
  205         if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
  206             (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
  207                 sodealloc(so);
  208                 return ((struct socket *)0);
  209         }
  210 
  211         if (connstatus) {
  212                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
  213                 so->so_state |= SS_COMP;
  214                 head->so_qlen++;
  215         } else {
  216                 if (head->so_incqlen > head->so_qlimit) {
  217                         struct socket *sp;
  218                         sp = TAILQ_FIRST(&head->so_incomp);
  219                         (void) soabort(sp);
  220                 }
  221                 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
  222                 so->so_state |= SS_INCOMP;
  223                 head->so_incqlen++;
  224         }
  225         if (connstatus) {
  226                 sorwakeup(head);
  227                 wakeup_one(&head->so_timeo);
  228                 so->so_state |= connstatus;
  229         }
  230         return (so);
  231 }
  232 
  233 /*
  234  * Socantsendmore indicates that no more data will be sent on the
  235  * socket; it would normally be applied to a socket when the user
  236  * informs the system that no more data is to be sent, by the protocol
  237  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  238  * will be received, and will normally be applied to the socket by a
  239  * protocol when it detects that the peer will send no more data.
  240  * Data queued for reading in the socket may yet be read.
  241  */
  242 
  243 void
  244 socantsendmore(so)
  245         struct socket *so;
  246 {
  247 
  248         so->so_state |= SS_CANTSENDMORE;
  249         sowwakeup(so);
  250 }
  251 
  252 void
  253 socantrcvmore(so)
  254         struct socket *so;
  255 {
  256 
  257         so->so_state |= SS_CANTRCVMORE;
  258         sorwakeup(so);
  259 }
  260 
  261 /*
  262  * Wait for data to arrive at/drain from a socket buffer.
  263  */
  264 int
  265 sbwait(sb)
  266         struct sockbuf *sb;
  267 {
  268 
  269         sb->sb_flags |= SB_WAIT;
  270         return (tsleep((caddr_t)&sb->sb_cc,
  271             (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
  272             sb->sb_timeo));
  273 }
  274 
  275 /*
  276  * Lock a sockbuf already known to be locked;
  277  * return any error returned from sleep (EINTR).
  278  */
  279 int
  280 sb_lock(sb)
  281         register struct sockbuf *sb;
  282 {
  283         int error;
  284 
  285         while (sb->sb_flags & SB_LOCK) {
  286                 sb->sb_flags |= SB_WANT;
  287                 error = tsleep((caddr_t)&sb->sb_flags,
  288                     (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
  289                     "sblock", 0);
  290                 if (error)
  291                         return (error);
  292         }
  293         sb->sb_flags |= SB_LOCK;
  294         return (0);
  295 }
  296 
  297 /*
  298  * Wakeup processes waiting on a socket buffer.
  299  * Do asynchronous notification via SIGIO
  300  * if the socket has the SS_ASYNC flag set.
  301  */
  302 void
  303 sowakeup(so, sb)
  304         register struct socket *so;
  305         register struct sockbuf *sb;
  306 {
  307         selwakeup(&sb->sb_sel);
  308         sb->sb_flags &= ~SB_SEL;
  309         if (sb->sb_flags & SB_WAIT) {
  310                 sb->sb_flags &= ~SB_WAIT;
  311                 wakeup((caddr_t)&sb->sb_cc);
  312         }
  313         if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
  314                 pgsigio(so->so_sigio, SIGIO, 0);
  315         if (sb->sb_flags & SB_UPCALL)
  316                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  317         if (sb->sb_flags & SB_AIO)
  318                 aio_swake(so, sb);
  319         KNOTE(&sb->sb_sel.si_note, 0);
  320 }
  321 
  322 /*
  323  * Socket buffer (struct sockbuf) utility routines.
  324  *
  325  * Each socket contains two socket buffers: one for sending data and
  326  * one for receiving data.  Each buffer contains a queue of mbufs,
  327  * information about the number of mbufs and amount of data in the
  328  * queue, and other fields allowing select() statements and notification
  329  * on data availability to be implemented.
  330  *
  331  * Data stored in a socket buffer is maintained as a list of records.
  332  * Each record is a list of mbufs chained together with the m_next
  333  * field.  Records are chained together with the m_nextpkt field. The upper
  334  * level routine soreceive() expects the following conventions to be
  335  * observed when placing information in the receive buffer:
  336  *
  337  * 1. If the protocol requires each message be preceded by the sender's
  338  *    name, then a record containing that name must be present before
  339  *    any associated data (mbuf's must be of type MT_SONAME).
  340  * 2. If the protocol supports the exchange of ``access rights'' (really
  341  *    just additional data associated with the message), and there are
  342  *    ``rights'' to be received, then a record containing this data
  343  *    should be present (mbuf's must be of type MT_RIGHTS).
  344  * 3. If a name or rights record exists, then it must be followed by
  345  *    a data record, perhaps of zero length.
  346  *
  347  * Before using a new socket structure it is first necessary to reserve
  348  * buffer space to the socket, by calling sbreserve().  This should commit
  349  * some of the available buffer space in the system buffer pool for the
  350  * socket (currently, it does nothing but enforce limits).  The space
  351  * should be released by calling sbrelease() when the socket is destroyed.
  352  */
  353 
  354 int
  355 soreserve(so, sndcc, rcvcc)
  356         register struct socket *so;
  357         u_long sndcc, rcvcc;
  358 {
  359         struct proc *p = curproc;
  360 
  361         if (sbreserve(&so->so_snd, sndcc, so, p) == 0)
  362                 goto bad;
  363         if (sbreserve(&so->so_rcv, rcvcc, so, p) == 0)
  364                 goto bad2;
  365         if (so->so_rcv.sb_lowat == 0)
  366                 so->so_rcv.sb_lowat = 1;
  367         if (so->so_snd.sb_lowat == 0)
  368                 so->so_snd.sb_lowat = MCLBYTES;
  369         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  370                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  371         return (0);
  372 bad2:
  373         sbrelease(&so->so_snd, so);
  374 bad:
  375         return (ENOBUFS);
  376 }
  377 
  378 static int
  379 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
  380 {
  381         int error = 0;
  382         u_long old_sb_max = sb_max;
  383 
  384         error = SYSCTL_OUT(req, arg1, sizeof(int));
  385         if (error || !req->newptr)
  386                 return (error);
  387         error = SYSCTL_IN(req, arg1, sizeof(int));
  388         if (error)
  389                 return (error);
  390         if (sb_max < MSIZE + MCLBYTES) {
  391                 sb_max = old_sb_max;
  392                 return (EINVAL);
  393         }
  394         sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
  395         return (0);
  396 }
  397         
  398 /*
  399  * Allot mbufs to a sockbuf.
  400  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  401  * if buffering efficiency is near the normal case.
  402  */
  403 int
  404 sbreserve(sb, cc, so, p)
  405         struct sockbuf *sb;
  406         u_long cc;
  407         struct socket *so;
  408         struct proc *p;
  409 {
  410 
  411         /*
  412          * p will only be NULL when we're in an interrupt
  413          * (e.g. in tcp_input())
  414          */
  415         if (cc > sb_max_adj)
  416                 return (0);
  417         if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
  418             p ? p->p_rlimit[RLIMIT_SBSIZE].rlim_cur : RLIM_INFINITY)) {
  419                 return (0);
  420         }
  421         sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
  422         if (sb->sb_lowat > sb->sb_hiwat)
  423                 sb->sb_lowat = sb->sb_hiwat;
  424         return (1);
  425 }
  426 
  427 /*
  428  * Free mbufs held by a socket, and reserved mbuf space.
  429  */
  430 void
  431 sbrelease(sb, so)
  432         struct sockbuf *sb;
  433         struct socket *so;
  434 {
  435 
  436         sbflush(sb);
  437         (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
  438             RLIM_INFINITY);
  439         sb->sb_mbmax = 0;
  440 }
  441 
  442 /*
  443  * Routines to add and remove
  444  * data from an mbuf queue.
  445  *
  446  * The routines sbappend() or sbappendrecord() are normally called to
  447  * append new mbufs to a socket buffer, after checking that adequate
  448  * space is available, comparing the function sbspace() with the amount
  449  * of data to be added.  sbappendrecord() differs from sbappend() in
  450  * that data supplied is treated as the beginning of a new record.
  451  * To place a sender's address, optional access rights, and data in a
  452  * socket receive buffer, sbappendaddr() should be used.  To place
  453  * access rights and data in a socket receive buffer, sbappendrights()
  454  * should be used.  In either case, the new data begins a new record.
  455  * Note that unlike sbappend() and sbappendrecord(), these routines check
  456  * for the caller that there will be enough space to store the data.
  457  * Each fails if there is not enough space, or if it cannot find mbufs
  458  * to store additional information in.
  459  *
  460  * Reliable protocols may use the socket send buffer to hold data
  461  * awaiting acknowledgement.  Data is normally copied from a socket
  462  * send buffer in a protocol with m_copy for output to a peer,
  463  * and then removing the data from the socket buffer with sbdrop()
  464  * or sbdroprecord() when the data is acknowledged by the peer.
  465  */
  466 
  467 #ifdef SOCKBUF_DEBUG
  468 void
  469 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
  470 {
  471         struct mbuf *m = sb->sb_mb;
  472 
  473         while (m && m->m_nextpkt)
  474                 m = m->m_nextpkt;
  475 
  476         if (m != sb->sb_lastrecord) {
  477                 printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
  478                         __func__, sb->sb_mb, sb->sb_lastrecord, m);
  479                 printf("packet chain:\n");
  480                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  481                         printf("\t%p\n", m);
  482                 panic("%s from %s:%u", __func__, file, line);
  483         }
  484 }
  485 
  486 void
  487 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
  488 {
  489         struct mbuf *m = sb->sb_mb;
  490         struct mbuf *n;
  491 
  492         while (m && m->m_nextpkt)
  493                 m = m->m_nextpkt;
  494 
  495         while (m && m->m_next)
  496                 m = m->m_next;
  497 
  498         if (m != sb->sb_mbtail) {
  499                 printf("%s: sb_mb %p sb_mbtail %p last %p\n",
  500                         __func__, sb->sb_mb, sb->sb_mbtail, m);
  501                 printf("packet tree:\n");
  502                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  503                         printf("\t");
  504                         for (n = m; n != NULL; n = n->m_next)
  505                                 printf("%p ", n);
  506                         printf("\n");
  507                 }
  508                 panic("%s from %s:%u", __func__, file, line);
  509         }
  510 }
  511 #endif /* SOCKBUF_DEBUG */
  512 
  513 #define SBLINKRECORD(sb, m0) do {                                       \
  514         if ((sb)->sb_lastrecord != NULL)                                \
  515                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  516         else                                                            \
  517                 (sb)->sb_mb = (m0);                                     \
  518         (sb)->sb_lastrecord = (m0);                                     \
  519 } while (/*CONSTCOND*/0)
  520 
  521 /*
  522  * Append mbuf chain m to the last record in the
  523  * socket buffer sb.  The additional space associated
  524  * the mbuf chain is recorded in sb.  Empty mbufs are
  525  * discarded and mbufs are compacted where possible.
  526  */
  527 void
  528 sbappend(sb, m)
  529         struct sockbuf *sb;
  530         struct mbuf *m;
  531 {
  532         register struct mbuf *n;
  533 
  534         if (m == 0)
  535                 return;
  536         SBLASTRECORDCHK(sb);
  537         n = sb->sb_mb;
  538         if (n) {
  539                 while (n->m_nextpkt)
  540                         n = n->m_nextpkt;
  541                 do {
  542                         if (n->m_flags & M_EOR) {
  543                                 sbappendrecord(sb, m); /* XXXXXX!!!! */
  544                                 return;
  545                         }
  546                 } while (n->m_next && (n = n->m_next));
  547         } else {
  548                 /*
  549                  * XXX Would like to simply use sb_mbtail here, but
  550                  * XXX I need to verify that I won't miss an EOR that
  551                  * XXX way.
  552                  */
  553                 if ((n = sb->sb_lastrecord) != NULL) {
  554                         do {
  555                                 if (n->m_flags & M_EOR) {
  556                                         sbappendrecord(sb, m); /* XXXXXX!!!! */
  557                                         return;
  558                                 }
  559                         } while (n->m_next && (n = n->m_next));
  560                 } else {
  561                         /*
  562                          * If this is the first record in the socket buffer,
  563                          * it's also the last record.
  564                          */
  565                         sb->sb_lastrecord = m;
  566                 }
  567         }
  568         sbcompress(sb, m, n);
  569         SBLASTRECORDCHK(sb);
  570 }
  571 
  572 /*
  573  * This version of sbappend() should only be used when the caller
  574  * absolutely knows that there will never be more than one record
  575  * in the socket buffer, that is, a stream protocol (such as TCP).
  576  */
  577 void
  578 sbappendstream(struct sockbuf *sb, struct mbuf *m)
  579 {
  580 
  581         KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
  582         KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
  583 
  584         SBLASTMBUFCHK(sb);
  585 
  586         sbcompress(sb, m, sb->sb_mbtail);
  587 
  588         sb->sb_lastrecord = sb->sb_mb;
  589         SBLASTRECORDCHK(sb);
  590 }
  591 
  592 #ifdef SOCKBUF_DEBUG
  593 void
  594 sbcheck(sb)
  595         register struct sockbuf *sb;
  596 {
  597         register struct mbuf *m;
  598         register struct mbuf *n = 0;
  599         register u_long len = 0, mbcnt = 0;
  600 
  601         for (m = sb->sb_mb; m; m = n) {
  602             n = m->m_nextpkt;
  603             for (; m; m = m->m_next) {
  604                 len += m->m_len;
  605                 mbcnt += MSIZE;
  606                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
  607                         mbcnt += m->m_ext.ext_size;
  608             }
  609         }
  610         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  611                 printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
  612                     mbcnt, sb->sb_mbcnt);
  613                 panic("sbcheck");
  614         }
  615 }
  616 #endif
  617 
  618 /*
  619  * As above, except the mbuf chain
  620  * begins a new record.
  621  */
  622 void
  623 sbappendrecord(sb, m0)
  624         register struct sockbuf *sb;
  625         register struct mbuf *m0;
  626 {
  627         register struct mbuf *m;
  628 
  629         if (m0 == 0)
  630                 return;
  631         m = sb->sb_mb;
  632         if (m)
  633                 while (m->m_nextpkt)
  634                         m = m->m_nextpkt;
  635         /*
  636          * Put the first mbuf on the queue.
  637          * Note this permits zero length records.
  638          */
  639         sballoc(sb, m0);
  640         SBLASTRECORDCHK(sb);
  641         SBLINKRECORD(sb, m0);
  642         if (m)
  643                 m->m_nextpkt = m0;
  644         else
  645                 sb->sb_mb = m0;
  646         m = m0->m_next;
  647         m0->m_next = 0;
  648         if (m && (m0->m_flags & M_EOR)) {
  649                 m0->m_flags &= ~M_EOR;
  650                 m->m_flags |= M_EOR;
  651         }
  652         sbcompress(sb, m, m0);
  653 }
  654 
  655 /*
  656  * As above except that OOB data
  657  * is inserted at the beginning of the sockbuf,
  658  * but after any other OOB data.
  659  */
  660 void
  661 sbinsertoob(sb, m0)
  662         register struct sockbuf *sb;
  663         register struct mbuf *m0;
  664 {
  665         register struct mbuf *m;
  666         register struct mbuf **mp;
  667 
  668         if (m0 == 0)
  669                 return;
  670         for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
  671             m = *mp;
  672             again:
  673                 switch (m->m_type) {
  674 
  675                 case MT_OOBDATA:
  676                         continue;               /* WANT next train */
  677 
  678                 case MT_CONTROL:
  679                         m = m->m_next;
  680                         if (m)
  681                                 goto again;     /* inspect THIS train further */
  682                 }
  683                 break;
  684         }
  685         /*
  686          * Put the first mbuf on the queue.
  687          * Note this permits zero length records.
  688          */
  689         sballoc(sb, m0);
  690         m0->m_nextpkt = *mp;
  691         *mp = m0;
  692         m = m0->m_next;
  693         m0->m_next = 0;
  694         if (m && (m0->m_flags & M_EOR)) {
  695                 m0->m_flags &= ~M_EOR;
  696                 m->m_flags |= M_EOR;
  697         }
  698         sbcompress(sb, m, m0);
  699 }
  700 
  701 /*
  702  * Append address and data, and optionally, control (ancillary) data
  703  * to the receive queue of a socket.  If present,
  704  * m0 must include a packet header with total length.
  705  * Returns 0 if no space in sockbuf or insufficient mbufs.
  706  */
  707 int
  708 sbappendaddr(sb, asa, m0, control)
  709         register struct sockbuf *sb;
  710         struct sockaddr *asa;
  711         struct mbuf *m0, *control;
  712 {
  713         register struct mbuf *m, *n, *nlast;
  714         int space = asa->sa_len;
  715 
  716 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
  717 panic("sbappendaddr");
  718         if (m0)
  719                 space += m0->m_pkthdr.len;
  720         for (n = control; n; n = n->m_next) {
  721                 space += n->m_len;
  722                 if (n->m_next == 0)     /* keep pointer to last control buf */
  723                         break;
  724         }
  725         if (space > sbspace(sb))
  726                 return (0);
  727         if (asa->sa_len > MLEN)
  728                 return (0);
  729         MGET(m, M_DONTWAIT, MT_SONAME);
  730         if (m == 0)
  731                 return (0);
  732         m->m_len = asa->sa_len;
  733         bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
  734         if (n)
  735                 n->m_next = m0;         /* concatenate data to control */
  736         else
  737                 control = m0;
  738         m->m_next = control;
  739         for (n = m; n->m_next != NULL; n = n->m_next)
  740                 sballoc(sb, n);
  741         sballoc(sb, n);
  742         nlast = n;
  743         SBLINKRECORD(sb, m);
  744 
  745         sb->sb_mbtail = nlast;
  746         SBLASTMBUFCHK(sb);
  747 
  748         SBLASTRECORDCHK(sb);
  749         return (1);
  750 }
  751 
  752 int
  753 sbappendcontrol(sb, m0, control)
  754         struct sockbuf *sb;
  755         struct mbuf *control, *m0;
  756 {
  757         register struct mbuf *m, *n, *mlast;
  758         int space = 0;
  759 
  760         if (control == 0)
  761                 panic("sbappendcontrol");
  762         for (m = control; ; m = m->m_next) {
  763                 space += m->m_len;
  764                 if (m->m_next == 0)
  765                         break;
  766         }
  767         n = m;                  /* save pointer to last control buffer */
  768         for (m = m0; m; m = m->m_next)
  769                 space += m->m_len;
  770         if (space > sbspace(sb))
  771                 return (0);
  772         n->m_next = m0;                 /* concatenate data to control */
  773 
  774         SBLASTRECORDCHK(sb);
  775 
  776         for (m = control; m->m_next; m = m->m_next)
  777                 sballoc(sb, m);
  778         sballoc(sb, m);
  779         mlast = m;
  780         SBLINKRECORD(sb, control);
  781 
  782         sb->sb_mbtail = mlast;
  783         SBLASTMBUFCHK(sb);
  784 
  785         SBLASTRECORDCHK(sb);
  786         return (1);
  787 }
  788 
  789 /*
  790  * Compress mbuf chain m into the socket
  791  * buffer sb following mbuf n.  If n
  792  * is null, the buffer is presumed empty.
  793  */
  794 void
  795 sbcompress(sb, m, n)
  796         register struct sockbuf *sb;
  797         register struct mbuf *m, *n;
  798 {
  799         register int eor = 0;
  800         register struct mbuf *o;
  801 
  802         while (m) {
  803                 eor |= m->m_flags & M_EOR;
  804                 if (m->m_len == 0 &&
  805                     (eor == 0 ||
  806                      (((o = m->m_next) || (o = n)) &&
  807                       o->m_type == m->m_type))) {
  808                         if (sb->sb_lastrecord == m)
  809                                 sb->sb_lastrecord = m->m_next;
  810                         m = m_free(m);
  811                         continue;
  812                 }
  813                 if (n && (n->m_flags & M_EOR) == 0 &&
  814                     M_WRITABLE(n) &&
  815                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
  816                     m->m_len <= M_TRAILINGSPACE(n) &&
  817                     n->m_type == m->m_type) {
  818                         bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
  819                             (unsigned)m->m_len);
  820                         n->m_len += m->m_len;
  821                         sb->sb_cc += m->m_len;
  822                         m = m_free(m);
  823                         continue;
  824                 }
  825                 if (n)
  826                         n->m_next = m;
  827                 else
  828                         sb->sb_mb = m;
  829                 sb->sb_mbtail = m;
  830                 sballoc(sb, m);
  831                 n = m;
  832                 m->m_flags &= ~M_EOR;
  833                 m = m->m_next;
  834                 n->m_next = 0;
  835         }
  836         if (eor) {
  837                 if (n)
  838                         n->m_flags |= eor;
  839                 else
  840                         printf("semi-panic: sbcompress\n");
  841         }
  842         SBLASTMBUFCHK(sb);
  843 }
  844 
  845 /*
  846  * Free all mbufs in a sockbuf.
  847  * Check that all resources are reclaimed.
  848  */
  849 void
  850 sbflush(sb)
  851         register struct sockbuf *sb;
  852 {
  853 
  854         if (sb->sb_flags & SB_LOCK)
  855                 panic("sbflush: locked");
  856         while (sb->sb_mbcnt) {
  857                 /*
  858                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
  859                  * we would loop forever. Panic instead.
  860                  */
  861                 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
  862                         break;
  863                 sbdrop(sb, (int)sb->sb_cc);
  864         }
  865         if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
  866                 panic("sbflush: cc %ld || mb %p || mbcnt %ld", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
  867 }
  868 
  869 /*
  870  * Drop data from (the front of) a sockbuf.
  871  */
  872 void
  873 sbdrop(sb, len)
  874         register struct sockbuf *sb;
  875         register int len;
  876 {
  877         register struct mbuf *m;
  878         struct mbuf *next;
  879 
  880         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
  881         while (len > 0) {
  882                 if (m == 0) {
  883                         if (next == 0)
  884                                 panic("sbdrop");
  885                         m = next;
  886                         next = m->m_nextpkt;
  887                         continue;
  888                 }
  889                 if (m->m_len > len) {
  890                         m->m_len -= len;
  891                         m->m_data += len;
  892                         sb->sb_cc -= len;
  893                         break;
  894                 }
  895                 len -= m->m_len;
  896                 sbfree(sb, m);
  897                 m = m_free(m);
  898         }
  899         while (m && m->m_len == 0) {
  900                 sbfree(sb, m);
  901                 m = m_free(m);
  902         }
  903         if (m) {
  904                 sb->sb_mb = m;
  905                 m->m_nextpkt = next;
  906         } else
  907                 sb->sb_mb = next;
  908         /*
  909          * First part is an inline SB_EMPTY_FIXUP().  Second part
  910          * makes sure sb_lastrecord is up-to-date if we dropped
  911          * part of the last record.
  912          */
  913         m = sb->sb_mb;
  914         if (m == NULL) {
  915                 sb->sb_mbtail = NULL;
  916                 sb->sb_lastrecord = NULL;
  917         } else if (m->m_nextpkt == NULL) {
  918                 sb->sb_lastrecord = m;
  919         }
  920 }
  921 
  922 /*
  923  * Drop a record off the front of a sockbuf
  924  * and move the next record to the front.
  925  */
  926 void
  927 sbdroprecord(sb)
  928         register struct sockbuf *sb;
  929 {
  930         register struct mbuf *m;
  931 
  932         m = sb->sb_mb;
  933         if (m) {
  934                 sb->sb_mb = m->m_nextpkt;
  935                 do {
  936                         sbfree(sb, m);
  937                         m = m_free(m);
  938                 } while (m);
  939         }
  940         SB_EMPTY_FIXUP(sb);
  941 }
  942 
  943 /*
  944  * Create a "control" mbuf containing the specified data
  945  * with the specified type for presentation on a socket buffer.
  946  */
  947 struct mbuf *
  948 sbcreatecontrol(p, size, type, level)
  949         caddr_t p;
  950         register int size;
  951         int type, level;
  952 {
  953         register struct cmsghdr *cp;
  954         struct mbuf *m;
  955 
  956         if (CMSG_SPACE((u_int)size) > MCLBYTES)
  957                 return ((struct mbuf *) NULL);
  958         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
  959                 return ((struct mbuf *) NULL);
  960         if (CMSG_SPACE((u_int)size) > MLEN) {
  961                 MCLGET(m, M_DONTWAIT);
  962                 if ((m->m_flags & M_EXT) == 0) {
  963                         m_free(m);
  964                         return ((struct mbuf *) NULL);
  965                 }
  966         }
  967         cp = mtod(m, struct cmsghdr *);
  968         m->m_len = 0;
  969         KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
  970             ("sbcreatecontrol: short mbuf"));
  971         if (p != NULL)
  972                 (void)memcpy(CMSG_DATA(cp), p, size);
  973         m->m_len = CMSG_SPACE(size);
  974         cp->cmsg_len = CMSG_LEN(size);
  975         cp->cmsg_level = level;
  976         cp->cmsg_type = type;
  977         return (m);
  978 }
  979 
  980 /*
  981  * Some routines that return EOPNOTSUPP for entry points that are not
  982  * supported by a protocol.  Fill in as needed.
  983  */
  984 int
  985 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
  986 {
  987         return EOPNOTSUPP;
  988 }
  989 
  990 int
  991 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
  992 {
  993         return EOPNOTSUPP;
  994 }
  995 
  996 int
  997 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
  998 {
  999         return EOPNOTSUPP;
 1000 }
 1001 
 1002 int
 1003 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
 1004                     struct ifnet *ifp, struct proc *p)
 1005 {
 1006         return EOPNOTSUPP;
 1007 }
 1008 
 1009 int
 1010 pru_listen_notsupp(struct socket *so, struct proc *p)
 1011 {
 1012         return EOPNOTSUPP;
 1013 }
 1014 
 1015 int
 1016 pru_rcvd_notsupp(struct socket *so, int flags)
 1017 {
 1018         return EOPNOTSUPP;
 1019 }
 1020 
 1021 int
 1022 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 1023 {
 1024         return EOPNOTSUPP;
 1025 }
 1026 
 1027 /*
 1028  * This isn't really a ``null'' operation, but it's the default one
 1029  * and doesn't do anything destructive.
 1030  */
 1031 int
 1032 pru_sense_null(struct socket *so, struct stat *sb)
 1033 {
 1034         sb->st_blksize = so->so_snd.sb_hiwat;
 1035         return 0;
 1036 }
 1037 
 1038 /*
 1039  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
 1040  */
 1041 struct sockaddr *
 1042 dup_sockaddr(sa, canwait)
 1043         struct sockaddr *sa;
 1044         int canwait;
 1045 {
 1046         struct sockaddr *sa2;
 1047 
 1048         MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME, 
 1049                canwait ? M_WAITOK : M_NOWAIT);
 1050         if (sa2)
 1051                 bcopy(sa, sa2, sa->sa_len);
 1052         return sa2;
 1053 }
 1054 
 1055 /*
 1056  * Create an external-format (``xsocket'') structure using the information
 1057  * in the kernel-format socket structure pointed to by so.  This is done
 1058  * to reduce the spew of irrelevant information over this interface,
 1059  * to isolate user code from changes in the kernel structure, and
 1060  * potentially to provide information-hiding if we decide that
 1061  * some of this information should be hidden from users.
 1062  */
 1063 void
 1064 sotoxsocket(struct socket *so, struct xsocket *xso)
 1065 {
 1066         xso->xso_len = sizeof *xso;
 1067         xso->xso_so = so;
 1068         xso->so_type = so->so_type;
 1069         xso->so_options = so->so_options;
 1070         xso->so_linger = so->so_linger;
 1071         xso->so_state = so->so_state;
 1072         xso->so_pcb = so->so_pcb;
 1073         xso->xso_protocol = so->so_proto->pr_protocol;
 1074         xso->xso_family = so->so_proto->pr_domain->dom_family;
 1075         xso->so_qlen = so->so_qlen;
 1076         xso->so_incqlen = so->so_incqlen;
 1077         xso->so_qlimit = so->so_qlimit;
 1078         xso->so_timeo = so->so_timeo;
 1079         xso->so_error = so->so_error;
 1080         xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 1081         xso->so_oobmark = so->so_oobmark;
 1082         sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 1083         sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 1084         xso->so_uid = so->so_cred->cr_uid;
 1085 }
 1086 
 1087 /*
 1088  * This does the same for sockbufs.  Note that the xsockbuf structure,
 1089  * since it is always embedded in a socket, does not include a self
 1090  * pointer nor a length.  We make this entry point public in case
 1091  * some other mechanism needs it.
 1092  */
 1093 void
 1094 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 1095 {
 1096         xsb->sb_cc = sb->sb_cc;
 1097         xsb->sb_hiwat = sb->sb_hiwat;
 1098         xsb->sb_mbcnt = sb->sb_mbcnt;
 1099         xsb->sb_mbmax = sb->sb_mbmax;
 1100         xsb->sb_lowat = sb->sb_lowat;
 1101         xsb->sb_flags = sb->sb_flags;
 1102         xsb->sb_timeo = sb->sb_timeo;
 1103 }
 1104 
 1105 /*
 1106  * Here is the definition of some of the basic objects in the kern.ipc
 1107  * branch of the MIB.
 1108  */
 1109 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
 1110 
 1111 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 1112 static int dummy;
 1113 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
 1114 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_INT|CTLFLAG_RW, 
 1115     &sb_max, 0, sysctl_handle_sb_max, "I", "Maximum socket buffer size");
 1116 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, 
 1117     &maxsockets, 0, "Maximum number of sockets avaliable");
 1118 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
 1119     &sb_efficiency, 0, "");
 1120 
 1121 /*
 1122  * Initialise maxsockets 
 1123  */
 1124 static void init_maxsockets(void *ignored)
 1125 {
 1126     TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
 1127     maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
 1128 }
 1129 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);

Cache object: 8279e7522451eb1e6e6357880c928bc6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.