The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket2.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uipc_socket2.c,v 1.58.2.3 2004/07/14 11:07:12 tron Exp $       */
    2 
    3 /*
    4  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)uipc_socket2.c      8.2 (Berkeley) 2/14/95
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.58.2.3 2004/07/14 11:07:12 tron Exp $");
   36 
   37 #include "opt_mbuftrace.h"
   38 #include "opt_sb_max.h"
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/proc.h>
   43 #include <sys/file.h>
   44 #include <sys/buf.h>
   45 #include <sys/malloc.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/protosw.h>
   48 #include <sys/poll.h>
   49 #include <sys/socket.h>
   50 #include <sys/socketvar.h>
   51 #include <sys/signalvar.h>
   52 
   53 /*
   54  * Primitive routines for operating on sockets and socket buffers
   55  */
   56 
   57 /* strings for sleep message: */
   58 const char      netcon[] = "netcon";
   59 const char      netcls[] = "netcls";
   60 const char      netio[] = "netio";
   61 const char      netlck[] = "netlck";
   62 
   63 u_long  sb_max = SB_MAX;        /* maximum socket buffer size */
   64 static u_long sb_max_adj;       /* adjusted sb_max */
   65 
   66 /*
   67  * Procedures to manipulate state flags of socket
   68  * and do appropriate wakeups.  Normal sequence from the
   69  * active (originating) side is that soisconnecting() is
   70  * called during processing of connect() call,
   71  * resulting in an eventual call to soisconnected() if/when the
   72  * connection is established.  When the connection is torn down
   73  * soisdisconnecting() is called during processing of disconnect() call,
   74  * and soisdisconnected() is called when the connection to the peer
   75  * is totally severed.  The semantics of these routines are such that
   76  * connectionless protocols can call soisconnected() and soisdisconnected()
   77  * only, bypassing the in-progress calls when setting up a ``connection''
   78  * takes no time.
   79  *
   80  * From the passive side, a socket is created with
   81  * two queues of sockets: so_q0 for connections in progress
   82  * and so_q for connections already made and awaiting user acceptance.
   83  * As a protocol is preparing incoming connections, it creates a socket
   84  * structure queued on so_q0 by calling sonewconn().  When the connection
   85  * is established, soisconnected() is called, and transfers the
   86  * socket structure to so_q, making it available to accept().
   87  * 
   88  * If a socket is closed with sockets on either
   89  * so_q0 or so_q, these sockets are dropped.
   90  *
   91  * If higher level protocols are implemented in
   92  * the kernel, the wakeups done here will sometimes
   93  * cause software-interrupt process scheduling.
   94  */
   95 
   96 void
   97 soisconnecting(struct socket *so)
   98 {
   99 
  100         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
  101         so->so_state |= SS_ISCONNECTING;
  102 }
  103 
  104 void
  105 soisconnected(struct socket *so)
  106 {
  107         struct socket   *head;
  108 
  109         head = so->so_head;
  110         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
  111         so->so_state |= SS_ISCONNECTED;
  112         if (head && soqremque(so, 0)) {
  113                 soqinsque(head, so, 1);
  114                 sorwakeup(head);
  115                 wakeup((caddr_t)&head->so_timeo);
  116         } else {
  117                 wakeup((caddr_t)&so->so_timeo);
  118                 sorwakeup(so);
  119                 sowwakeup(so);
  120         }
  121 }
  122 
  123 void
  124 soisdisconnecting(struct socket *so)
  125 {
  126 
  127         so->so_state &= ~SS_ISCONNECTING;
  128         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
  129         wakeup((caddr_t)&so->so_timeo);
  130         sowwakeup(so);
  131         sorwakeup(so);
  132 }
  133 
  134 void
  135 soisdisconnected(struct socket *so)
  136 {
  137 
  138         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
  139         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
  140         wakeup((caddr_t)&so->so_timeo);
  141         sowwakeup(so);
  142         sorwakeup(so);
  143 }
  144 
  145 /*
  146  * When an attempt at a new connection is noted on a socket
  147  * which accepts connections, sonewconn is called.  If the
  148  * connection is possible (subject to space constraints, etc.)
  149  * then we allocate a new structure, propoerly linked into the
  150  * data structure of the original socket, and return this.
  151  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
  152  *
  153  * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
  154  * to catch calls that are missing the (new) second parameter.
  155  */
  156 struct socket *
  157 sonewconn1(struct socket *head, int connstatus)
  158 {
  159         struct socket   *so;
  160         int             soqueue;
  161 
  162         soqueue = connstatus ? 1 : 0;
  163         if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
  164                 return ((struct socket *)0);
  165         so = pool_get(&socket_pool, PR_NOWAIT);
  166         if (so == NULL) 
  167                 return (NULL);
  168         memset((caddr_t)so, 0, sizeof(*so));
  169         so->so_type = head->so_type;
  170         so->so_options = head->so_options &~ SO_ACCEPTCONN;
  171         so->so_linger = head->so_linger;
  172         so->so_state = head->so_state | SS_NOFDREF;
  173         so->so_proto = head->so_proto;
  174         so->so_timeo = head->so_timeo;
  175         so->so_pgid = head->so_pgid;
  176         so->so_send = head->so_send;
  177         so->so_receive = head->so_receive;
  178         so->so_uid = head->so_uid;
  179 #ifdef MBUFTRACE
  180         so->so_mowner = head->so_mowner;
  181         so->so_rcv.sb_mowner = head->so_rcv.sb_mowner;
  182         so->so_snd.sb_mowner = head->so_snd.sb_mowner;
  183 #endif
  184         (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
  185         soqinsque(head, so, soqueue);
  186         if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
  187             (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
  188             (struct proc *)0)) {
  189                 (void) soqremque(so, soqueue);
  190                 pool_put(&socket_pool, so);
  191                 return (NULL);
  192         }
  193         if (connstatus) {
  194                 sorwakeup(head);
  195                 wakeup((caddr_t)&head->so_timeo);
  196                 so->so_state |= connstatus;
  197         }
  198         return (so);
  199 }
  200 
  201 void
  202 soqinsque(struct socket *head, struct socket *so, int q)
  203 {
  204 
  205 #ifdef DIAGNOSTIC
  206         if (so->so_onq != NULL)
  207                 panic("soqinsque");
  208 #endif
  209 
  210         so->so_head = head;
  211         if (q == 0) {
  212                 head->so_q0len++;
  213                 so->so_onq = &head->so_q0;
  214         } else {
  215                 head->so_qlen++;
  216                 so->so_onq = &head->so_q;
  217         }
  218         TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
  219 }
  220 
  221 int
  222 soqremque(struct socket *so, int q)
  223 {
  224         struct socket   *head;
  225 
  226         head = so->so_head;
  227         if (q == 0) {
  228                 if (so->so_onq != &head->so_q0)
  229                         return (0);
  230                 head->so_q0len--;
  231         } else {
  232                 if (so->so_onq != &head->so_q)
  233                         return (0);
  234                 head->so_qlen--;
  235         }
  236         TAILQ_REMOVE(so->so_onq, so, so_qe);
  237         so->so_onq = NULL;
  238         so->so_head = NULL;
  239         return (1);
  240 }
  241 
  242 /*
  243  * Socantsendmore indicates that no more data will be sent on the
  244  * socket; it would normally be applied to a socket when the user
  245  * informs the system that no more data is to be sent, by the protocol
  246  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  247  * will be received, and will normally be applied to the socket by a
  248  * protocol when it detects that the peer will send no more data.
  249  * Data queued for reading in the socket may yet be read.
  250  */
  251 
  252 void
  253 socantsendmore(struct socket *so)
  254 {
  255 
  256         so->so_state |= SS_CANTSENDMORE;
  257         sowwakeup(so);
  258 }
  259 
  260 void
  261 socantrcvmore(struct socket *so)
  262 {
  263 
  264         so->so_state |= SS_CANTRCVMORE;
  265         sorwakeup(so);
  266 }
  267 
  268 /*
  269  * Wait for data to arrive at/drain from a socket buffer.
  270  */
  271 int
  272 sbwait(struct sockbuf *sb)
  273 {
  274 
  275         sb->sb_flags |= SB_WAIT;
  276         return (tsleep((caddr_t)&sb->sb_cc,
  277             (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
  278             sb->sb_timeo));
  279 }
  280 
  281 /* 
  282  * Lock a sockbuf already known to be locked;
  283  * return any error returned from sleep (EINTR).
  284  */
  285 int
  286 sb_lock(struct sockbuf *sb)
  287 {
  288         int     error;
  289 
  290         while (sb->sb_flags & SB_LOCK) {
  291                 sb->sb_flags |= SB_WANT;
  292                 error = tsleep((caddr_t)&sb->sb_flags, 
  293                     (sb->sb_flags & SB_NOINTR) ?  PSOCK : PSOCK|PCATCH,
  294                     netlck, 0);
  295                 if (error)
  296                         return (error);
  297         }
  298         sb->sb_flags |= SB_LOCK;
  299         return (0);
  300 }
  301 
  302 /*
  303  * Wakeup processes waiting on a socket buffer.
  304  * Do asynchronous notification via SIGIO
  305  * if the socket buffer has the SB_ASYNC flag set.
  306  */
  307 void
  308 sowakeup(struct socket *so, struct sockbuf *sb, int code)
  309 {
  310         selnotify(&sb->sb_sel, 0);
  311         sb->sb_flags &= ~SB_SEL;
  312         if (sb->sb_flags & SB_WAIT) {
  313                 sb->sb_flags &= ~SB_WAIT;
  314                 wakeup((caddr_t)&sb->sb_cc);
  315         }
  316         if (sb->sb_flags & SB_ASYNC) {
  317                 int band;
  318                 if (code == POLL_IN)
  319                         band = POLLIN|POLLRDNORM;
  320                 else
  321                         band = POLLOUT|POLLWRNORM;
  322                 fownsignal(so->so_pgid, SIGIO, code, band, so);
  323         }
  324         if (sb->sb_flags & SB_UPCALL)
  325                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  326 }
  327 
  328 /*
  329  * Socket buffer (struct sockbuf) utility routines.
  330  *
  331  * Each socket contains two socket buffers: one for sending data and
  332  * one for receiving data.  Each buffer contains a queue of mbufs,
  333  * information about the number of mbufs and amount of data in the
  334  * queue, and other fields allowing poll() statements and notification
  335  * on data availability to be implemented.
  336  *
  337  * Data stored in a socket buffer is maintained as a list of records.
  338  * Each record is a list of mbufs chained together with the m_next
  339  * field.  Records are chained together with the m_nextpkt field. The upper
  340  * level routine soreceive() expects the following conventions to be
  341  * observed when placing information in the receive buffer:
  342  *
  343  * 1. If the protocol requires each message be preceded by the sender's
  344  *    name, then a record containing that name must be present before
  345  *    any associated data (mbuf's must be of type MT_SONAME).
  346  * 2. If the protocol supports the exchange of ``access rights'' (really
  347  *    just additional data associated with the message), and there are
  348  *    ``rights'' to be received, then a record containing this data
  349  *    should be present (mbuf's must be of type MT_CONTROL).
  350  * 3. If a name or rights record exists, then it must be followed by
  351  *    a data record, perhaps of zero length.
  352  *
  353  * Before using a new socket structure it is first necessary to reserve
  354  * buffer space to the socket, by calling sbreserve().  This should commit
  355  * some of the available buffer space in the system buffer pool for the
  356  * socket (currently, it does nothing but enforce limits).  The space
  357  * should be released by calling sbrelease() when the socket is destroyed.
  358  */
  359 
  360 int
  361 sb_max_set(u_long new_sbmax)
  362 {
  363         int s;
  364 
  365         if (new_sbmax < (16 * 1024))
  366                 return (EINVAL);
  367 
  368         s = splsoftnet();
  369         sb_max = new_sbmax;
  370         sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES);
  371         splx(s);
  372 
  373         return (0);
  374 }
  375 
  376 int
  377 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
  378 {
  379 
  380         if (sbreserve(&so->so_snd, sndcc) == 0)
  381                 goto bad;
  382         if (sbreserve(&so->so_rcv, rcvcc) == 0)
  383                 goto bad2;
  384         if (so->so_rcv.sb_lowat == 0)
  385                 so->so_rcv.sb_lowat = 1;
  386         if (so->so_snd.sb_lowat == 0)
  387                 so->so_snd.sb_lowat = MCLBYTES;
  388         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  389                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  390         return (0);
  391  bad2:
  392         sbrelease(&so->so_snd);
  393  bad:
  394         return (ENOBUFS);
  395 }
  396 
  397 /*
  398  * Allot mbufs to a sockbuf.
  399  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  400  * if buffering efficiency is near the normal case.
  401  */
  402 int
  403 sbreserve(struct sockbuf *sb, u_long cc)
  404 {
  405 
  406         KDASSERT(sb_max_adj != 0);
  407         if (cc == 0 || cc > sb_max_adj)
  408                 return (0);
  409         sb->sb_hiwat = cc;
  410         sb->sb_mbmax = min(cc * 2, sb_max);
  411         if (sb->sb_lowat > sb->sb_hiwat)
  412                 sb->sb_lowat = sb->sb_hiwat;
  413         return (1);
  414 }
  415 
  416 /*
  417  * Free mbufs held by a socket, and reserved mbuf space.
  418  */
  419 void
  420 sbrelease(struct sockbuf *sb)
  421 {
  422 
  423         sbflush(sb);
  424         sb->sb_hiwat = sb->sb_mbmax = 0;
  425 }
  426 
  427 /*
  428  * Routines to add and remove
  429  * data from an mbuf queue.
  430  *
  431  * The routines sbappend() or sbappendrecord() are normally called to
  432  * append new mbufs to a socket buffer, after checking that adequate
  433  * space is available, comparing the function sbspace() with the amount
  434  * of data to be added.  sbappendrecord() differs from sbappend() in
  435  * that data supplied is treated as the beginning of a new record.
  436  * To place a sender's address, optional access rights, and data in a
  437  * socket receive buffer, sbappendaddr() should be used.  To place
  438  * access rights and data in a socket receive buffer, sbappendrights()
  439  * should be used.  In either case, the new data begins a new record.
  440  * Note that unlike sbappend() and sbappendrecord(), these routines check
  441  * for the caller that there will be enough space to store the data.
  442  * Each fails if there is not enough space, or if it cannot find mbufs
  443  * to store additional information in.
  444  *
  445  * Reliable protocols may use the socket send buffer to hold data
  446  * awaiting acknowledgement.  Data is normally copied from a socket
  447  * send buffer in a protocol with m_copy for output to a peer,
  448  * and then removing the data from the socket buffer with sbdrop()
  449  * or sbdroprecord() when the data is acknowledged by the peer.
  450  */
  451 
  452 #ifdef SOCKBUF_DEBUG
  453 void
  454 sblastrecordchk(struct sockbuf *sb, const char *where)
  455 {
  456         struct mbuf *m = sb->sb_mb;
  457 
  458         while (m && m->m_nextpkt)
  459                 m = m->m_nextpkt;
  460 
  461         if (m != sb->sb_lastrecord) {
  462                 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
  463                     sb->sb_mb, sb->sb_lastrecord, m);
  464                 printf("packet chain:\n");
  465                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  466                         printf("\t%p\n", m);
  467                 panic("sblastrecordchk from %s", where);
  468         }
  469 }
  470 
  471 void
  472 sblastmbufchk(struct sockbuf *sb, const char *where)
  473 {
  474         struct mbuf *m = sb->sb_mb;
  475         struct mbuf *n;
  476 
  477         while (m && m->m_nextpkt)
  478                 m = m->m_nextpkt;
  479 
  480         while (m && m->m_next)
  481                 m = m->m_next;
  482 
  483         if (m != sb->sb_mbtail) {
  484                 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
  485                     sb->sb_mb, sb->sb_mbtail, m);
  486                 printf("packet tree:\n");
  487                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  488                         printf("\t");
  489                         for (n = m; n != NULL; n = n->m_next)
  490                                 printf("%p ", n);
  491                         printf("\n");
  492                 }
  493                 panic("sblastmbufchk from %s", where);
  494         }
  495 }
  496 #endif /* SOCKBUF_DEBUG */
  497 
  498 /*
  499  * Link a chain of records onto a socket buffer
  500  */
  501 #define SBLINKRECORDCHAIN(sb, m0, mlast)                                \
  502 do {                                                                    \
  503         if ((sb)->sb_lastrecord != NULL)                                \
  504                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  505         else                                                            \
  506                 (sb)->sb_mb = (m0);                                     \
  507         (sb)->sb_lastrecord = (mlast);                                  \
  508 } while (/*CONSTCOND*/0)
  509 
  510 
  511 #define SBLINKRECORD(sb, m0)                                            \
  512     SBLINKRECORDCHAIN(sb, m0, m0)
  513 
  514 /*
  515  * Append mbuf chain m to the last record in the
  516  * socket buffer sb.  The additional space associated
  517  * the mbuf chain is recorded in sb.  Empty mbufs are
  518  * discarded and mbufs are compacted where possible.
  519  */
  520 void
  521 sbappend(struct sockbuf *sb, struct mbuf *m)
  522 {
  523         struct mbuf     *n;
  524 
  525         if (m == 0)
  526                 return;
  527 
  528 #ifdef MBUFTRACE
  529         m_claimm(m, sb->sb_mowner);
  530 #endif
  531 
  532         SBLASTRECORDCHK(sb, "sbappend 1");
  533 
  534         if ((n = sb->sb_lastrecord) != NULL) {
  535                 /*
  536                  * XXX Would like to simply use sb_mbtail here, but
  537                  * XXX I need to verify that I won't miss an EOR that
  538                  * XXX way.
  539                  */
  540                 do {
  541                         if (n->m_flags & M_EOR) {
  542                                 sbappendrecord(sb, m); /* XXXXXX!!!! */
  543                                 return;
  544                         }
  545                 } while (n->m_next && (n = n->m_next));
  546         } else {
  547                 /*
  548                  * If this is the first record in the socket buffer, it's
  549                  * also the last record.
  550                  */
  551                 sb->sb_lastrecord = m;
  552         }
  553         sbcompress(sb, m, n);
  554         SBLASTRECORDCHK(sb, "sbappend 2");
  555 }
  556 
  557 /*
  558  * This version of sbappend() should only be used when the caller
  559  * absolutely knows that there will never be more than one record
  560  * in the socket buffer, that is, a stream protocol (such as TCP).
  561  */
  562 void
  563 sbappendstream(struct sockbuf *sb, struct mbuf *m)
  564 {
  565 
  566         KDASSERT(m->m_nextpkt == NULL);
  567         KASSERT(sb->sb_mb == sb->sb_lastrecord);
  568 
  569         SBLASTMBUFCHK(sb, __func__);
  570 
  571 #ifdef MBUFTRACE
  572         m_claimm(m, sb->sb_mowner);
  573 #endif
  574 
  575         sbcompress(sb, m, sb->sb_mbtail);
  576 
  577         sb->sb_lastrecord = sb->sb_mb;
  578         SBLASTRECORDCHK(sb, __func__);
  579 }
  580 
  581 #ifdef SOCKBUF_DEBUG
  582 void
  583 sbcheck(struct sockbuf *sb)
  584 {
  585         struct mbuf     *m;
  586         u_long          len, mbcnt;
  587 
  588         len = 0;
  589         mbcnt = 0;
  590         for (m = sb->sb_mb; m; m = m->m_next) {
  591                 len += m->m_len;
  592                 mbcnt += MSIZE;
  593                 if (m->m_flags & M_EXT)
  594                         mbcnt += m->m_ext.ext_size;
  595                 if (m->m_nextpkt)
  596                         panic("sbcheck nextpkt");
  597         }
  598         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  599                 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
  600                     mbcnt, sb->sb_mbcnt);
  601                 panic("sbcheck");
  602         }
  603 }
  604 #endif
  605 
  606 /*
  607  * As above, except the mbuf chain
  608  * begins a new record.
  609  */
  610 void
  611 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
  612 {
  613         struct mbuf     *m;
  614 
  615         if (m0 == 0)
  616                 return;
  617 
  618 #ifdef MBUFTRACE
  619         m_claimm(m0, sb->sb_mowner);
  620 #endif
  621         /*
  622          * Put the first mbuf on the queue.
  623          * Note this permits zero length records.
  624          */
  625         sballoc(sb, m0);
  626         SBLASTRECORDCHK(sb, "sbappendrecord 1");
  627         SBLINKRECORD(sb, m0);
  628         m = m0->m_next;
  629         m0->m_next = 0;
  630         if (m && (m0->m_flags & M_EOR)) {
  631                 m0->m_flags &= ~M_EOR;
  632                 m->m_flags |= M_EOR;
  633         }
  634         sbcompress(sb, m, m0);
  635         SBLASTRECORDCHK(sb, "sbappendrecord 2");
  636 }
  637 
  638 /*
  639  * As above except that OOB data
  640  * is inserted at the beginning of the sockbuf,
  641  * but after any other OOB data.
  642  */
  643 void
  644 sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
  645 {
  646         struct mbuf     *m, **mp;
  647 
  648         if (m0 == 0)
  649                 return;
  650 
  651         SBLASTRECORDCHK(sb, "sbinsertoob 1");
  652 
  653         for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
  654             again:
  655                 switch (m->m_type) {
  656 
  657                 case MT_OOBDATA:
  658                         continue;               /* WANT next train */
  659 
  660                 case MT_CONTROL:
  661                         if ((m = m->m_next) != NULL)
  662                                 goto again;     /* inspect THIS train further */
  663                 }
  664                 break;
  665         }
  666         /*
  667          * Put the first mbuf on the queue.
  668          * Note this permits zero length records.
  669          */
  670         sballoc(sb, m0);
  671         m0->m_nextpkt = *mp;
  672         if (*mp == NULL) {
  673                 /* m0 is actually the new tail */
  674                 sb->sb_lastrecord = m0;
  675         }
  676         *mp = m0;
  677         m = m0->m_next;
  678         m0->m_next = 0;
  679         if (m && (m0->m_flags & M_EOR)) {
  680                 m0->m_flags &= ~M_EOR;
  681                 m->m_flags |= M_EOR;
  682         }
  683         sbcompress(sb, m, m0);
  684         SBLASTRECORDCHK(sb, "sbinsertoob 2");
  685 }
  686 
  687 /*
  688  * Append address and data, and optionally, control (ancillary) data
  689  * to the receive queue of a socket.  If present,
  690  * m0 must include a packet header with total length.
  691  * Returns 0 if no space in sockbuf or insufficient mbufs.
  692  */
  693 int
  694 sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
  695         struct mbuf *control)
  696 {
  697         struct mbuf     *m, *n, *nlast;
  698         int             space, len;
  699 
  700         space = asa->sa_len;
  701 
  702         if (m0 != NULL) {
  703                 if ((m0->m_flags & M_PKTHDR) == 0)
  704                         panic("sbappendaddr");
  705                 space += m0->m_pkthdr.len;
  706 #ifdef MBUFTRACE
  707                 m_claimm(m0, sb->sb_mowner);
  708 #endif
  709         }
  710         for (n = control; n; n = n->m_next) {
  711                 space += n->m_len;
  712                 MCLAIM(n, sb->sb_mowner);
  713                 if (n->m_next == 0)     /* keep pointer to last control buf */
  714                         break;
  715         }
  716         if (space > sbspace(sb))
  717                 return (0);
  718         MGET(m, M_DONTWAIT, MT_SONAME);
  719         if (m == 0)
  720                 return (0);
  721         MCLAIM(m, sb->sb_mowner);
  722         /*
  723          * XXX avoid 'comparison always true' warning which isn't easily
  724          * avoided.
  725          */
  726         len = asa->sa_len;
  727         if (len > MLEN) {
  728                 MEXTMALLOC(m, asa->sa_len, M_NOWAIT);
  729                 if ((m->m_flags & M_EXT) == 0) {
  730                         m_free(m);
  731                         return (0);
  732                 }
  733         }
  734         m->m_len = asa->sa_len;
  735         memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len);
  736         if (n)
  737                 n->m_next = m0;         /* concatenate data to control */
  738         else
  739                 control = m0;
  740         m->m_next = control;
  741 
  742         SBLASTRECORDCHK(sb, "sbappendaddr 1");
  743 
  744         for (n = m; n->m_next != NULL; n = n->m_next)
  745                 sballoc(sb, n);
  746         sballoc(sb, n);
  747         nlast = n;
  748         SBLINKRECORD(sb, m);
  749 
  750         sb->sb_mbtail = nlast;
  751         SBLASTMBUFCHK(sb, "sbappendaddr");
  752 
  753         SBLASTRECORDCHK(sb, "sbappendaddr 2");
  754 
  755         return (1);
  756 }
  757 
  758 /*
  759  * Helper for sbappendchainaddr: prepend a struct sockaddr* to
  760  * an mbuf chain.
  761  */
  762 static __inline struct mbuf *
  763 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0,
  764                    const struct sockaddr *asa)
  765 {
  766         struct mbuf *m;
  767         const int salen = asa->sa_len;
  768 
  769         /* only the first in each chain need be a pkthdr */
  770         MGETHDR(m, M_DONTWAIT, MT_SONAME);
  771         if (m == 0)
  772                 return (0);
  773         MCLAIM(m, sb->sb_mowner);
  774 #ifdef notyet
  775         if (salen > MHLEN) {
  776                 MEXTMALLOC(m, salen, M_NOWAIT);
  777                 if ((m->m_flags & M_EXT) == 0) {
  778                         m_free(m);
  779                         return (0);
  780                 }
  781         }
  782 #else
  783         KASSERT(salen <= MHLEN);
  784 #endif
  785         m->m_len = salen;
  786         memcpy(mtod(m, caddr_t), (caddr_t)asa, salen);
  787         m->m_next = m0;
  788         m->m_pkthdr.len = salen + m0->m_pkthdr.len;
  789 
  790         return m;
  791 }
  792 
  793 int
  794 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa,
  795                   struct mbuf *m0, int sbprio)
  796 {
  797         int space;
  798         struct mbuf *m, *n, *n0, *nlast;
  799         int error;
  800 
  801         /*
  802          * XXX sbprio reserved for encoding priority of this* request:
  803          *  SB_PRIO_NONE --> honour normal sb limits
  804          *  SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space,
  805          *      take whole chain. Intended for large requests
  806          *      that should be delivered atomically (all, or none).
  807          * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow
  808          *       over normal socket limits, for messages indicating
  809          *       buffer overflow in earlier normal/lower-priority messages
  810          * SB_PRIO_BESTEFFORT -->  ignore limits entirely.
  811          *       Intended for  kernel-generated messages only.
  812          *        Up to generator to avoid total mbuf resource exhaustion.
  813          */
  814         (void)sbprio;
  815 
  816         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
  817                 panic("sbappendaddrchain");
  818 
  819         space = sbspace(sb);
  820         
  821 #ifdef notyet
  822         /* 
  823          * Enforce SB_PRIO_* limits as described above.
  824          */
  825 #endif
  826 
  827         n0 = NULL;
  828         nlast = NULL;
  829         for (m = m0; m; m = m->m_nextpkt) {
  830                 struct mbuf *np;
  831 
  832 #ifdef MBUFTRACE
  833                 m_claimm(m, sb->sb_mowner);
  834 #endif
  835 
  836                 /* Prepend sockaddr to this record (m) of input chain m0 */
  837                 n = m_prepend_sockaddr(sb, m, asa);
  838                 if (n == NULL) {
  839                         error = ENOBUFS;
  840                         goto bad;
  841                 }
  842 
  843                 /* Append record (asa+m) to end of new chain n0 */
  844                 if (n0 == NULL) {
  845                         n0 = n;
  846                 } else {
  847                         nlast->m_nextpkt = n;
  848                 }
  849                 /* Keep track of last record on new chain */
  850                 nlast = n;
  851 
  852                 for (np = n; np; np = np->m_next)
  853                         sballoc(sb, np);
  854         }
  855 
  856         SBLASTRECORDCHK(sb, "sbappendaddrchain 1");
  857 
  858         /* Drop the entire chain of (asa+m) records onto the socket */
  859         SBLINKRECORDCHAIN(sb, n0, nlast);
  860 
  861         SBLASTRECORDCHK(sb, "sbappendaddrchain 2");
  862 
  863         for (m = nlast; m->m_next; m = m->m_next)
  864                 ;
  865         sb->sb_mbtail = m;
  866         SBLASTMBUFCHK(sb, "sbappendaddrchain");
  867 
  868         return (1);
  869 
  870 bad:
  871         /*
  872          * On error, free the prepended addreseses. For consistency
  873          * with sbappendaddr(), leave it to our caller to free
  874          * the input record chain passed to us as m0.
  875          */
  876         while ((n = n0) != NULL) {
  877                 struct mbuf *np;
  878 
  879                 /* Undo the sballoc() of this record */
  880                 for (np = n; np; np = np->m_next)
  881                         sbfree(sb, np);
  882 
  883                 n0 = n->m_nextpkt;      /* iterate at next prepended address */
  884                 MFREE(n, np);           /* free prepended address (not data) */
  885         }
  886         return 0;       
  887 }
  888 
  889 
  890 int
  891 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
  892 {
  893         struct mbuf     *m, *mlast, *n;
  894         int             space;
  895 
  896         space = 0;
  897         if (control == 0)
  898                 panic("sbappendcontrol");
  899         for (m = control; ; m = m->m_next) {
  900                 space += m->m_len;
  901                 MCLAIM(m, sb->sb_mowner);
  902                 if (m->m_next == 0)
  903                         break;
  904         }
  905         n = m;                  /* save pointer to last control buffer */
  906         for (m = m0; m; m = m->m_next) {
  907                 MCLAIM(m, sb->sb_mowner);
  908                 space += m->m_len;
  909         }
  910         if (space > sbspace(sb))
  911                 return (0);
  912         n->m_next = m0;                 /* concatenate data to control */
  913 
  914         SBLASTRECORDCHK(sb, "sbappendcontrol 1");
  915 
  916         for (m = control; m->m_next != NULL; m = m->m_next)
  917                 sballoc(sb, m);
  918         sballoc(sb, m);
  919         mlast = m;
  920         SBLINKRECORD(sb, control);
  921 
  922         sb->sb_mbtail = mlast;
  923         SBLASTMBUFCHK(sb, "sbappendcontrol");
  924 
  925         SBLASTRECORDCHK(sb, "sbappendcontrol 2");
  926 
  927         return (1);
  928 }
  929 
  930 /*
  931  * Compress mbuf chain m into the socket
  932  * buffer sb following mbuf n.  If n
  933  * is null, the buffer is presumed empty.
  934  */
  935 void
  936 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
  937 {
  938         int             eor;
  939         struct mbuf     *o;
  940 
  941         eor = 0;
  942         while (m) {
  943                 eor |= m->m_flags & M_EOR;
  944                 if (m->m_len == 0 &&
  945                     (eor == 0 ||
  946                      (((o = m->m_next) || (o = n)) &&
  947                       o->m_type == m->m_type))) {
  948                         if (sb->sb_lastrecord == m)
  949                                 sb->sb_lastrecord = m->m_next;
  950                         m = m_free(m);
  951                         continue;
  952                 }
  953                 if (n && (n->m_flags & M_EOR) == 0 &&
  954                     /* M_TRAILINGSPACE() checks buffer writeability */
  955                     m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */
  956                     m->m_len <= M_TRAILINGSPACE(n) &&
  957                     n->m_type == m->m_type) {
  958                         memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t),
  959                             (unsigned)m->m_len);
  960                         n->m_len += m->m_len;
  961                         sb->sb_cc += m->m_len;
  962                         m = m_free(m);
  963                         continue;
  964                 }
  965                 if (n)
  966                         n->m_next = m;
  967                 else
  968                         sb->sb_mb = m;
  969                 sb->sb_mbtail = m;
  970                 sballoc(sb, m);
  971                 n = m;
  972                 m->m_flags &= ~M_EOR;
  973                 m = m->m_next;
  974                 n->m_next = 0;
  975         }
  976         if (eor) {
  977                 if (n)
  978                         n->m_flags |= eor;
  979                 else
  980                         printf("semi-panic: sbcompress\n");
  981         }
  982         SBLASTMBUFCHK(sb, __func__);
  983 }
  984 
  985 /*
  986  * Free all mbufs in a sockbuf.
  987  * Check that all resources are reclaimed.
  988  */
  989 void
  990 sbflush(struct sockbuf *sb)
  991 {
  992 
  993         KASSERT((sb->sb_flags & SB_LOCK) == 0);
  994 
  995         while (sb->sb_mbcnt)
  996                 sbdrop(sb, (int)sb->sb_cc);
  997 
  998         KASSERT(sb->sb_cc == 0);
  999         KASSERT(sb->sb_mb == NULL);
 1000         KASSERT(sb->sb_mbtail == NULL);
 1001         KASSERT(sb->sb_lastrecord == NULL);
 1002 }
 1003 
 1004 /*
 1005  * Drop data from (the front of) a sockbuf.
 1006  */
 1007 void
 1008 sbdrop(struct sockbuf *sb, int len)
 1009 {
 1010         struct mbuf     *m, *mn, *next;
 1011 
 1012         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 1013         while (len > 0) {
 1014                 if (m == 0) {
 1015                         if (next == 0)
 1016                                 panic("sbdrop");
 1017                         m = next;
 1018                         next = m->m_nextpkt;
 1019                         continue;
 1020                 }
 1021                 if (m->m_len > len) {
 1022                         m->m_len -= len;
 1023                         m->m_data += len;
 1024                         sb->sb_cc -= len;
 1025                         break;
 1026                 }
 1027                 len -= m->m_len;
 1028                 sbfree(sb, m);
 1029                 MFREE(m, mn);
 1030                 m = mn;
 1031         }
 1032         while (m && m->m_len == 0) {
 1033                 sbfree(sb, m);
 1034                 MFREE(m, mn);
 1035                 m = mn;
 1036         }
 1037         if (m) {
 1038                 sb->sb_mb = m;
 1039                 m->m_nextpkt = next;
 1040         } else
 1041                 sb->sb_mb = next;
 1042         /*
 1043          * First part is an inline SB_EMPTY_FIXUP().  Second part
 1044          * makes sure sb_lastrecord is up-to-date if we dropped
 1045          * part of the last record.
 1046          */
 1047         m = sb->sb_mb;
 1048         if (m == NULL) {
 1049                 sb->sb_mbtail = NULL;
 1050                 sb->sb_lastrecord = NULL;
 1051         } else if (m->m_nextpkt == NULL)
 1052                 sb->sb_lastrecord = m;
 1053 }
 1054 
 1055 /*
 1056  * Drop a record off the front of a sockbuf
 1057  * and move the next record to the front.
 1058  */
 1059 void
 1060 sbdroprecord(struct sockbuf *sb)
 1061 {
 1062         struct mbuf     *m, *mn;
 1063 
 1064         m = sb->sb_mb;
 1065         if (m) {
 1066                 sb->sb_mb = m->m_nextpkt;
 1067                 do {
 1068                         sbfree(sb, m);
 1069                         MFREE(m, mn);
 1070                 } while ((m = mn) != NULL);
 1071         }
 1072         SB_EMPTY_FIXUP(sb);
 1073 }
 1074 
 1075 /*
 1076  * Create a "control" mbuf containing the specified data
 1077  * with the specified type for presentation on a socket buffer.
 1078  */
 1079 struct mbuf *
 1080 sbcreatecontrol(caddr_t p, int size, int type, int level)
 1081 {
 1082         struct cmsghdr  *cp;
 1083         struct mbuf     *m;
 1084 
 1085         if (CMSG_SPACE(size) > MCLBYTES) {
 1086                 printf("sbcreatecontrol: message too large %d\n", size);
 1087                 return NULL;
 1088         }
 1089 
 1090         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
 1091                 return ((struct mbuf *) NULL);
 1092         if (CMSG_SPACE(size) > MLEN) {
 1093                 MCLGET(m, M_DONTWAIT);
 1094                 if ((m->m_flags & M_EXT) == 0) {
 1095                         m_free(m);
 1096                         return NULL;
 1097                 }
 1098         }
 1099         cp = mtod(m, struct cmsghdr *);
 1100         memcpy(CMSG_DATA(cp), p, size);
 1101         m->m_len = CMSG_SPACE(size);
 1102         cp->cmsg_len = CMSG_LEN(size);
 1103         cp->cmsg_level = level;
 1104         cp->cmsg_type = type;
 1105         return (m);
 1106 }

Cache object: 7f1a4b804d5275035fc89966c269be29


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.