The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket2.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uipc_socket2.c,v 1.81 2006/11/01 10:17:59 yamt Exp $   */
    2 
    3 /*
    4  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)uipc_socket2.c      8.2 (Berkeley) 2/14/95
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.81 2006/11/01 10:17:59 yamt Exp $");
   36 
   37 #include "opt_mbuftrace.h"
   38 #include "opt_sb_max.h"
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/proc.h>
   43 #include <sys/file.h>
   44 #include <sys/buf.h>
   45 #include <sys/malloc.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/protosw.h>
   48 #include <sys/poll.h>
   49 #include <sys/socket.h>
   50 #include <sys/socketvar.h>
   51 #include <sys/signalvar.h>
   52 #include <sys/kauth.h>
   53 
   54 /*
   55  * Primitive routines for operating on sockets and socket buffers
   56  */
   57 
   58 /* strings for sleep message: */
   59 const char      netcon[] = "netcon";
   60 const char      netcls[] = "netcls";
   61 const char      netio[] = "netio";
   62 const char      netlck[] = "netlck";
   63 
   64 u_long  sb_max = SB_MAX;        /* maximum socket buffer size */
   65 static u_long sb_max_adj;       /* adjusted sb_max */
   66 
   67 /*
   68  * Procedures to manipulate state flags of socket
   69  * and do appropriate wakeups.  Normal sequence from the
   70  * active (originating) side is that soisconnecting() is
   71  * called during processing of connect() call,
   72  * resulting in an eventual call to soisconnected() if/when the
   73  * connection is established.  When the connection is torn down
   74  * soisdisconnecting() is called during processing of disconnect() call,
   75  * and soisdisconnected() is called when the connection to the peer
   76  * is totally severed.  The semantics of these routines are such that
   77  * connectionless protocols can call soisconnected() and soisdisconnected()
   78  * only, bypassing the in-progress calls when setting up a ``connection''
   79  * takes no time.
   80  *
   81  * From the passive side, a socket is created with
   82  * two queues of sockets: so_q0 for connections in progress
   83  * and so_q for connections already made and awaiting user acceptance.
   84  * As a protocol is preparing incoming connections, it creates a socket
   85  * structure queued on so_q0 by calling sonewconn().  When the connection
   86  * is established, soisconnected() is called, and transfers the
   87  * socket structure to so_q, making it available to accept().
   88  *
   89  * If a socket is closed with sockets on either
   90  * so_q0 or so_q, these sockets are dropped.
   91  *
   92  * If higher level protocols are implemented in
   93  * the kernel, the wakeups done here will sometimes
   94  * cause software-interrupt process scheduling.
   95  */
   96 
   97 void
   98 soisconnecting(struct socket *so)
   99 {
  100 
  101         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
  102         so->so_state |= SS_ISCONNECTING;
  103 }
  104 
  105 void
  106 soisconnected(struct socket *so)
  107 {
  108         struct socket   *head;
  109 
  110         head = so->so_head;
  111         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
  112         so->so_state |= SS_ISCONNECTED;
  113         if (head && soqremque(so, 0)) {
  114                 soqinsque(head, so, 1);
  115                 sorwakeup(head);
  116                 wakeup((caddr_t)&head->so_timeo);
  117         } else {
  118                 wakeup((caddr_t)&so->so_timeo);
  119                 sorwakeup(so);
  120                 sowwakeup(so);
  121         }
  122 }
  123 
  124 void
  125 soisdisconnecting(struct socket *so)
  126 {
  127 
  128         so->so_state &= ~SS_ISCONNECTING;
  129         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
  130         wakeup((caddr_t)&so->so_timeo);
  131         sowwakeup(so);
  132         sorwakeup(so);
  133 }
  134 
  135 void
  136 soisdisconnected(struct socket *so)
  137 {
  138 
  139         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
  140         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
  141         wakeup((caddr_t)&so->so_timeo);
  142         sowwakeup(so);
  143         sorwakeup(so);
  144 }
  145 
  146 /*
  147  * When an attempt at a new connection is noted on a socket
  148  * which accepts connections, sonewconn is called.  If the
  149  * connection is possible (subject to space constraints, etc.)
  150  * then we allocate a new structure, propoerly linked into the
  151  * data structure of the original socket, and return this.
  152  * Connstatus may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED.
  153  */
  154 struct socket *
  155 sonewconn(struct socket *head, int connstatus)
  156 {
  157         struct socket   *so;
  158         int             soqueue;
  159 
  160         soqueue = connstatus ? 1 : 0;
  161         if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
  162                 return ((struct socket *)0);
  163         so = pool_get(&socket_pool, PR_NOWAIT);
  164         if (so == NULL)
  165                 return (NULL);
  166         memset((caddr_t)so, 0, sizeof(*so));
  167         so->so_type = head->so_type;
  168         so->so_options = head->so_options &~ SO_ACCEPTCONN;
  169         so->so_linger = head->so_linger;
  170         so->so_state = head->so_state | SS_NOFDREF;
  171         so->so_proto = head->so_proto;
  172         so->so_timeo = head->so_timeo;
  173         so->so_pgid = head->so_pgid;
  174         so->so_send = head->so_send;
  175         so->so_receive = head->so_receive;
  176         so->so_uidinfo = head->so_uidinfo;
  177 #ifdef MBUFTRACE
  178         so->so_mowner = head->so_mowner;
  179         so->so_rcv.sb_mowner = head->so_rcv.sb_mowner;
  180         so->so_snd.sb_mowner = head->so_snd.sb_mowner;
  181 #endif
  182         (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
  183         soqinsque(head, so, soqueue);
  184         if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
  185             (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
  186             (struct lwp *)0)) {
  187                 (void) soqremque(so, soqueue);
  188                 pool_put(&socket_pool, so);
  189                 return (NULL);
  190         }
  191         if (connstatus) {
  192                 sorwakeup(head);
  193                 wakeup((caddr_t)&head->so_timeo);
  194                 so->so_state |= connstatus;
  195         }
  196         return (so);
  197 }
  198 
  199 void
  200 soqinsque(struct socket *head, struct socket *so, int q)
  201 {
  202 
  203 #ifdef DIAGNOSTIC
  204         if (so->so_onq != NULL)
  205                 panic("soqinsque");
  206 #endif
  207 
  208         so->so_head = head;
  209         if (q == 0) {
  210                 head->so_q0len++;
  211                 so->so_onq = &head->so_q0;
  212         } else {
  213                 head->so_qlen++;
  214                 so->so_onq = &head->so_q;
  215         }
  216         TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
  217 }
  218 
  219 int
  220 soqremque(struct socket *so, int q)
  221 {
  222         struct socket   *head;
  223 
  224         head = so->so_head;
  225         if (q == 0) {
  226                 if (so->so_onq != &head->so_q0)
  227                         return (0);
  228                 head->so_q0len--;
  229         } else {
  230                 if (so->so_onq != &head->so_q)
  231                         return (0);
  232                 head->so_qlen--;
  233         }
  234         TAILQ_REMOVE(so->so_onq, so, so_qe);
  235         so->so_onq = NULL;
  236         so->so_head = NULL;
  237         return (1);
  238 }
  239 
  240 /*
  241  * Socantsendmore indicates that no more data will be sent on the
  242  * socket; it would normally be applied to a socket when the user
  243  * informs the system that no more data is to be sent, by the protocol
  244  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  245  * will be received, and will normally be applied to the socket by a
  246  * protocol when it detects that the peer will send no more data.
  247  * Data queued for reading in the socket may yet be read.
  248  */
  249 
  250 void
  251 socantsendmore(struct socket *so)
  252 {
  253 
  254         so->so_state |= SS_CANTSENDMORE;
  255         sowwakeup(so);
  256 }
  257 
  258 void
  259 socantrcvmore(struct socket *so)
  260 {
  261 
  262         so->so_state |= SS_CANTRCVMORE;
  263         sorwakeup(so);
  264 }
  265 
  266 /*
  267  * Wait for data to arrive at/drain from a socket buffer.
  268  */
  269 int
  270 sbwait(struct sockbuf *sb)
  271 {
  272 
  273         sb->sb_flags |= SB_WAIT;
  274         return (tsleep((caddr_t)&sb->sb_cc,
  275             (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
  276             sb->sb_timeo));
  277 }
  278 
  279 /*
  280  * Lock a sockbuf already known to be locked;
  281  * return any error returned from sleep (EINTR).
  282  */
  283 int
  284 sb_lock(struct sockbuf *sb)
  285 {
  286         int     error;
  287 
  288         while (sb->sb_flags & SB_LOCK) {
  289                 sb->sb_flags |= SB_WANT;
  290                 error = tsleep((caddr_t)&sb->sb_flags,
  291                     (sb->sb_flags & SB_NOINTR) ?  PSOCK : PSOCK|PCATCH,
  292                     netlck, 0);
  293                 if (error)
  294                         return (error);
  295         }
  296         sb->sb_flags |= SB_LOCK;
  297         return (0);
  298 }
  299 
  300 /*
  301  * Wakeup processes waiting on a socket buffer.
  302  * Do asynchronous notification via SIGIO
  303  * if the socket buffer has the SB_ASYNC flag set.
  304  */
  305 void
  306 sowakeup(struct socket *so, struct sockbuf *sb, int code)
  307 {
  308         selnotify(&sb->sb_sel, 0);
  309         sb->sb_flags &= ~SB_SEL;
  310         if (sb->sb_flags & SB_WAIT) {
  311                 sb->sb_flags &= ~SB_WAIT;
  312                 wakeup((caddr_t)&sb->sb_cc);
  313         }
  314         if (sb->sb_flags & SB_ASYNC) {
  315                 int band;
  316                 if (code == POLL_IN)
  317                         band = POLLIN|POLLRDNORM;
  318                 else
  319                         band = POLLOUT|POLLWRNORM;
  320                 fownsignal(so->so_pgid, SIGIO, code, band, so);
  321         }
  322         if (sb->sb_flags & SB_UPCALL)
  323                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  324 }
  325 
  326 /*
  327  * Socket buffer (struct sockbuf) utility routines.
  328  *
  329  * Each socket contains two socket buffers: one for sending data and
  330  * one for receiving data.  Each buffer contains a queue of mbufs,
  331  * information about the number of mbufs and amount of data in the
  332  * queue, and other fields allowing poll() statements and notification
  333  * on data availability to be implemented.
  334  *
  335  * Data stored in a socket buffer is maintained as a list of records.
  336  * Each record is a list of mbufs chained together with the m_next
  337  * field.  Records are chained together with the m_nextpkt field. The upper
  338  * level routine soreceive() expects the following conventions to be
  339  * observed when placing information in the receive buffer:
  340  *
  341  * 1. If the protocol requires each message be preceded by the sender's
  342  *    name, then a record containing that name must be present before
  343  *    any associated data (mbuf's must be of type MT_SONAME).
  344  * 2. If the protocol supports the exchange of ``access rights'' (really
  345  *    just additional data associated with the message), and there are
  346  *    ``rights'' to be received, then a record containing this data
  347  *    should be present (mbuf's must be of type MT_CONTROL).
  348  * 3. If a name or rights record exists, then it must be followed by
  349  *    a data record, perhaps of zero length.
  350  *
  351  * Before using a new socket structure it is first necessary to reserve
  352  * buffer space to the socket, by calling sbreserve().  This should commit
  353  * some of the available buffer space in the system buffer pool for the
  354  * socket (currently, it does nothing but enforce limits).  The space
  355  * should be released by calling sbrelease() when the socket is destroyed.
  356  */
  357 
  358 int
  359 sb_max_set(u_long new_sbmax)
  360 {
  361         int s;
  362 
  363         if (new_sbmax < (16 * 1024))
  364                 return (EINVAL);
  365 
  366         s = splsoftnet();
  367         sb_max = new_sbmax;
  368         sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES);
  369         splx(s);
  370 
  371         return (0);
  372 }
  373 
  374 int
  375 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
  376 {
  377         /*
  378          * there's at least one application (a configure script of screen)
  379          * which expects a fifo is writable even if it has "some" bytes
  380          * in its buffer.
  381          * so we want to make sure (hiwat - lowat) >= (some bytes).
  382          *
  383          * PIPE_BUF here is an arbitrary value chosen as (some bytes) above.
  384          * we expect it's large enough for such applications.
  385          */
  386         u_long  lowat = MAX(sock_loan_thresh, MCLBYTES);
  387         u_long  hiwat = lowat + PIPE_BUF;
  388 
  389         if (sndcc < hiwat)
  390                 sndcc = hiwat;
  391         if (sbreserve(&so->so_snd, sndcc, so) == 0)
  392                 goto bad;
  393         if (sbreserve(&so->so_rcv, rcvcc, so) == 0)
  394                 goto bad2;
  395         if (so->so_rcv.sb_lowat == 0)
  396                 so->so_rcv.sb_lowat = 1;
  397         if (so->so_snd.sb_lowat == 0)
  398                 so->so_snd.sb_lowat = lowat;
  399         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  400                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  401         return (0);
  402  bad2:
  403         sbrelease(&so->so_snd, so);
  404  bad:
  405         return (ENOBUFS);
  406 }
  407 
  408 /*
  409  * Allot mbufs to a sockbuf.
  410  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  411  * if buffering efficiency is near the normal case.
  412  */
  413 int
  414 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so)
  415 {
  416         struct lwp *l = curlwp; /* XXX */
  417         rlim_t maxcc;
  418         struct uidinfo *uidinfo;
  419 
  420         KDASSERT(sb_max_adj != 0);
  421         if (cc == 0 || cc > sb_max_adj)
  422                 return (0);
  423         if (so) {
  424                 if (l && kauth_cred_geteuid(l->l_cred) == so->so_uidinfo->ui_uid)
  425                         maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur;
  426                 else
  427                         maxcc = RLIM_INFINITY;
  428                 uidinfo = so->so_uidinfo;
  429         } else {
  430                 uidinfo = uid_find(0);  /* XXX: nothing better */
  431                 maxcc = RLIM_INFINITY;
  432         }
  433         if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc))
  434                 return 0;
  435         sb->sb_mbmax = min(cc * 2, sb_max);
  436         if (sb->sb_lowat > sb->sb_hiwat)
  437                 sb->sb_lowat = sb->sb_hiwat;
  438         return (1);
  439 }
  440 
  441 /*
  442  * Free mbufs held by a socket, and reserved mbuf space.
  443  */
  444 void
  445 sbrelease(struct sockbuf *sb, struct socket *so)
  446 {
  447 
  448         sbflush(sb);
  449         (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0,
  450             RLIM_INFINITY);
  451         sb->sb_mbmax = 0;
  452 }
  453 
  454 /*
  455  * Routines to add and remove
  456  * data from an mbuf queue.
  457  *
  458  * The routines sbappend() or sbappendrecord() are normally called to
  459  * append new mbufs to a socket buffer, after checking that adequate
  460  * space is available, comparing the function sbspace() with the amount
  461  * of data to be added.  sbappendrecord() differs from sbappend() in
  462  * that data supplied is treated as the beginning of a new record.
  463  * To place a sender's address, optional access rights, and data in a
  464  * socket receive buffer, sbappendaddr() should be used.  To place
  465  * access rights and data in a socket receive buffer, sbappendrights()
  466  * should be used.  In either case, the new data begins a new record.
  467  * Note that unlike sbappend() and sbappendrecord(), these routines check
  468  * for the caller that there will be enough space to store the data.
  469  * Each fails if there is not enough space, or if it cannot find mbufs
  470  * to store additional information in.
  471  *
  472  * Reliable protocols may use the socket send buffer to hold data
  473  * awaiting acknowledgement.  Data is normally copied from a socket
  474  * send buffer in a protocol with m_copy for output to a peer,
  475  * and then removing the data from the socket buffer with sbdrop()
  476  * or sbdroprecord() when the data is acknowledged by the peer.
  477  */
  478 
  479 #ifdef SOCKBUF_DEBUG
  480 void
  481 sblastrecordchk(struct sockbuf *sb, const char *where)
  482 {
  483         struct mbuf *m = sb->sb_mb;
  484 
  485         while (m && m->m_nextpkt)
  486                 m = m->m_nextpkt;
  487 
  488         if (m != sb->sb_lastrecord) {
  489                 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
  490                     sb->sb_mb, sb->sb_lastrecord, m);
  491                 printf("packet chain:\n");
  492                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  493                         printf("\t%p\n", m);
  494                 panic("sblastrecordchk from %s", where);
  495         }
  496 }
  497 
  498 void
  499 sblastmbufchk(struct sockbuf *sb, const char *where)
  500 {
  501         struct mbuf *m = sb->sb_mb;
  502         struct mbuf *n;
  503 
  504         while (m && m->m_nextpkt)
  505                 m = m->m_nextpkt;
  506 
  507         while (m && m->m_next)
  508                 m = m->m_next;
  509 
  510         if (m != sb->sb_mbtail) {
  511                 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
  512                     sb->sb_mb, sb->sb_mbtail, m);
  513                 printf("packet tree:\n");
  514                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  515                         printf("\t");
  516                         for (n = m; n != NULL; n = n->m_next)
  517                                 printf("%p ", n);
  518                         printf("\n");
  519                 }
  520                 panic("sblastmbufchk from %s", where);
  521         }
  522 }
  523 #endif /* SOCKBUF_DEBUG */
  524 
  525 /*
  526  * Link a chain of records onto a socket buffer
  527  */
  528 #define SBLINKRECORDCHAIN(sb, m0, mlast)                                \
  529 do {                                                                    \
  530         if ((sb)->sb_lastrecord != NULL)                                \
  531                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  532         else                                                            \
  533                 (sb)->sb_mb = (m0);                                     \
  534         (sb)->sb_lastrecord = (mlast);                                  \
  535 } while (/*CONSTCOND*/0)
  536 
  537 
  538 #define SBLINKRECORD(sb, m0)                                            \
  539     SBLINKRECORDCHAIN(sb, m0, m0)
  540 
  541 /*
  542  * Append mbuf chain m to the last record in the
  543  * socket buffer sb.  The additional space associated
  544  * the mbuf chain is recorded in sb.  Empty mbufs are
  545  * discarded and mbufs are compacted where possible.
  546  */
  547 void
  548 sbappend(struct sockbuf *sb, struct mbuf *m)
  549 {
  550         struct mbuf     *n;
  551 
  552         if (m == 0)
  553                 return;
  554 
  555 #ifdef MBUFTRACE
  556         m_claimm(m, sb->sb_mowner);
  557 #endif
  558 
  559         SBLASTRECORDCHK(sb, "sbappend 1");
  560 
  561         if ((n = sb->sb_lastrecord) != NULL) {
  562                 /*
  563                  * XXX Would like to simply use sb_mbtail here, but
  564                  * XXX I need to verify that I won't miss an EOR that
  565                  * XXX way.
  566                  */
  567                 do {
  568                         if (n->m_flags & M_EOR) {
  569                                 sbappendrecord(sb, m); /* XXXXXX!!!! */
  570                                 return;
  571                         }
  572                 } while (n->m_next && (n = n->m_next));
  573         } else {
  574                 /*
  575                  * If this is the first record in the socket buffer, it's
  576                  * also the last record.
  577                  */
  578                 sb->sb_lastrecord = m;
  579         }
  580         sbcompress(sb, m, n);
  581         SBLASTRECORDCHK(sb, "sbappend 2");
  582 }
  583 
  584 /*
  585  * This version of sbappend() should only be used when the caller
  586  * absolutely knows that there will never be more than one record
  587  * in the socket buffer, that is, a stream protocol (such as TCP).
  588  */
  589 void
  590 sbappendstream(struct sockbuf *sb, struct mbuf *m)
  591 {
  592 
  593         KDASSERT(m->m_nextpkt == NULL);
  594         KASSERT(sb->sb_mb == sb->sb_lastrecord);
  595 
  596         SBLASTMBUFCHK(sb, __func__);
  597 
  598 #ifdef MBUFTRACE
  599         m_claimm(m, sb->sb_mowner);
  600 #endif
  601 
  602         sbcompress(sb, m, sb->sb_mbtail);
  603 
  604         sb->sb_lastrecord = sb->sb_mb;
  605         SBLASTRECORDCHK(sb, __func__);
  606 }
  607 
  608 #ifdef SOCKBUF_DEBUG
  609 void
  610 sbcheck(struct sockbuf *sb)
  611 {
  612         struct mbuf     *m;
  613         u_long          len, mbcnt;
  614 
  615         len = 0;
  616         mbcnt = 0;
  617         for (m = sb->sb_mb; m; m = m->m_next) {
  618                 len += m->m_len;
  619                 mbcnt += MSIZE;
  620                 if (m->m_flags & M_EXT)
  621                         mbcnt += m->m_ext.ext_size;
  622                 if (m->m_nextpkt)
  623                         panic("sbcheck nextpkt");
  624         }
  625         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  626                 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
  627                     mbcnt, sb->sb_mbcnt);
  628                 panic("sbcheck");
  629         }
  630 }
  631 #endif
  632 
  633 /*
  634  * As above, except the mbuf chain
  635  * begins a new record.
  636  */
  637 void
  638 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
  639 {
  640         struct mbuf     *m;
  641 
  642         if (m0 == 0)
  643                 return;
  644 
  645 #ifdef MBUFTRACE
  646         m_claimm(m0, sb->sb_mowner);
  647 #endif
  648         /*
  649          * Put the first mbuf on the queue.
  650          * Note this permits zero length records.
  651          */
  652         sballoc(sb, m0);
  653         SBLASTRECORDCHK(sb, "sbappendrecord 1");
  654         SBLINKRECORD(sb, m0);
  655         m = m0->m_next;
  656         m0->m_next = 0;
  657         if (m && (m0->m_flags & M_EOR)) {
  658                 m0->m_flags &= ~M_EOR;
  659                 m->m_flags |= M_EOR;
  660         }
  661         sbcompress(sb, m, m0);
  662         SBLASTRECORDCHK(sb, "sbappendrecord 2");
  663 }
  664 
  665 /*
  666  * As above except that OOB data
  667  * is inserted at the beginning of the sockbuf,
  668  * but after any other OOB data.
  669  */
  670 void
  671 sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
  672 {
  673         struct mbuf     *m, **mp;
  674 
  675         if (m0 == 0)
  676                 return;
  677 
  678         SBLASTRECORDCHK(sb, "sbinsertoob 1");
  679 
  680         for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
  681             again:
  682                 switch (m->m_type) {
  683 
  684                 case MT_OOBDATA:
  685                         continue;               /* WANT next train */
  686 
  687                 case MT_CONTROL:
  688                         if ((m = m->m_next) != NULL)
  689                                 goto again;     /* inspect THIS train further */
  690                 }
  691                 break;
  692         }
  693         /*
  694          * Put the first mbuf on the queue.
  695          * Note this permits zero length records.
  696          */
  697         sballoc(sb, m0);
  698         m0->m_nextpkt = *mp;
  699         if (*mp == NULL) {
  700                 /* m0 is actually the new tail */
  701                 sb->sb_lastrecord = m0;
  702         }
  703         *mp = m0;
  704         m = m0->m_next;
  705         m0->m_next = 0;
  706         if (m && (m0->m_flags & M_EOR)) {
  707                 m0->m_flags &= ~M_EOR;
  708                 m->m_flags |= M_EOR;
  709         }
  710         sbcompress(sb, m, m0);
  711         SBLASTRECORDCHK(sb, "sbinsertoob 2");
  712 }
  713 
  714 /*
  715  * Append address and data, and optionally, control (ancillary) data
  716  * to the receive queue of a socket.  If present,
  717  * m0 must include a packet header with total length.
  718  * Returns 0 if no space in sockbuf or insufficient mbufs.
  719  */
  720 int
  721 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0,
  722         struct mbuf *control)
  723 {
  724         struct mbuf     *m, *n, *nlast;
  725         int             space, len;
  726 
  727         space = asa->sa_len;
  728 
  729         if (m0 != NULL) {
  730                 if ((m0->m_flags & M_PKTHDR) == 0)
  731                         panic("sbappendaddr");
  732                 space += m0->m_pkthdr.len;
  733 #ifdef MBUFTRACE
  734                 m_claimm(m0, sb->sb_mowner);
  735 #endif
  736         }
  737         for (n = control; n; n = n->m_next) {
  738                 space += n->m_len;
  739                 MCLAIM(n, sb->sb_mowner);
  740                 if (n->m_next == 0)     /* keep pointer to last control buf */
  741                         break;
  742         }
  743         if (space > sbspace(sb))
  744                 return (0);
  745         MGET(m, M_DONTWAIT, MT_SONAME);
  746         if (m == 0)
  747                 return (0);
  748         MCLAIM(m, sb->sb_mowner);
  749         /*
  750          * XXX avoid 'comparison always true' warning which isn't easily
  751          * avoided.
  752          */
  753         len = asa->sa_len;
  754         if (len > MLEN) {
  755                 MEXTMALLOC(m, asa->sa_len, M_NOWAIT);
  756                 if ((m->m_flags & M_EXT) == 0) {
  757                         m_free(m);
  758                         return (0);
  759                 }
  760         }
  761         m->m_len = asa->sa_len;
  762         memcpy(mtod(m, caddr_t), asa, asa->sa_len);
  763         if (n)
  764                 n->m_next = m0;         /* concatenate data to control */
  765         else
  766                 control = m0;
  767         m->m_next = control;
  768 
  769         SBLASTRECORDCHK(sb, "sbappendaddr 1");
  770 
  771         for (n = m; n->m_next != NULL; n = n->m_next)
  772                 sballoc(sb, n);
  773         sballoc(sb, n);
  774         nlast = n;
  775         SBLINKRECORD(sb, m);
  776 
  777         sb->sb_mbtail = nlast;
  778         SBLASTMBUFCHK(sb, "sbappendaddr");
  779 
  780         SBLASTRECORDCHK(sb, "sbappendaddr 2");
  781 
  782         return (1);
  783 }
  784 
  785 /*
  786  * Helper for sbappendchainaddr: prepend a struct sockaddr* to
  787  * an mbuf chain.
  788  */
  789 static inline struct mbuf *
  790 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0,
  791                    const struct sockaddr *asa)
  792 {
  793         struct mbuf *m;
  794         const int salen = asa->sa_len;
  795 
  796         /* only the first in each chain need be a pkthdr */
  797         MGETHDR(m, M_DONTWAIT, MT_SONAME);
  798         if (m == 0)
  799                 return (0);
  800         MCLAIM(m, sb->sb_mowner);
  801 #ifdef notyet
  802         if (salen > MHLEN) {
  803                 MEXTMALLOC(m, salen, M_NOWAIT);
  804                 if ((m->m_flags & M_EXT) == 0) {
  805                         m_free(m);
  806                         return (0);
  807                 }
  808         }
  809 #else
  810         KASSERT(salen <= MHLEN);
  811 #endif
  812         m->m_len = salen;
  813         memcpy(mtod(m, caddr_t), asa, salen);
  814         m->m_next = m0;
  815         m->m_pkthdr.len = salen + m0->m_pkthdr.len;
  816 
  817         return m;
  818 }
  819 
  820 int
  821 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa,
  822                   struct mbuf *m0, int sbprio)
  823 {
  824         int space;
  825         struct mbuf *m, *n, *n0, *nlast;
  826         int error;
  827 
  828         /*
  829          * XXX sbprio reserved for encoding priority of this* request:
  830          *  SB_PRIO_NONE --> honour normal sb limits
  831          *  SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space,
  832          *      take whole chain. Intended for large requests
  833          *      that should be delivered atomically (all, or none).
  834          * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow
  835          *       over normal socket limits, for messages indicating
  836          *       buffer overflow in earlier normal/lower-priority messages
  837          * SB_PRIO_BESTEFFORT -->  ignore limits entirely.
  838          *       Intended for  kernel-generated messages only.
  839          *        Up to generator to avoid total mbuf resource exhaustion.
  840          */
  841         (void)sbprio;
  842 
  843         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
  844                 panic("sbappendaddrchain");
  845 
  846         space = sbspace(sb);
  847 
  848 #ifdef notyet
  849         /*
  850          * Enforce SB_PRIO_* limits as described above.
  851          */
  852 #endif
  853 
  854         n0 = NULL;
  855         nlast = NULL;
  856         for (m = m0; m; m = m->m_nextpkt) {
  857                 struct mbuf *np;
  858 
  859 #ifdef MBUFTRACE
  860                 m_claimm(m, sb->sb_mowner);
  861 #endif
  862 
  863                 /* Prepend sockaddr to this record (m) of input chain m0 */
  864                 n = m_prepend_sockaddr(sb, m, asa);
  865                 if (n == NULL) {
  866                         error = ENOBUFS;
  867                         goto bad;
  868                 }
  869 
  870                 /* Append record (asa+m) to end of new chain n0 */
  871                 if (n0 == NULL) {
  872                         n0 = n;
  873                 } else {
  874                         nlast->m_nextpkt = n;
  875                 }
  876                 /* Keep track of last record on new chain */
  877                 nlast = n;
  878 
  879                 for (np = n; np; np = np->m_next)
  880                         sballoc(sb, np);
  881         }
  882 
  883         SBLASTRECORDCHK(sb, "sbappendaddrchain 1");
  884 
  885         /* Drop the entire chain of (asa+m) records onto the socket */
  886         SBLINKRECORDCHAIN(sb, n0, nlast);
  887 
  888         SBLASTRECORDCHK(sb, "sbappendaddrchain 2");
  889 
  890         for (m = nlast; m->m_next; m = m->m_next)
  891                 ;
  892         sb->sb_mbtail = m;
  893         SBLASTMBUFCHK(sb, "sbappendaddrchain");
  894 
  895         return (1);
  896 
  897 bad:
  898         /*
  899          * On error, free the prepended addreseses. For consistency
  900          * with sbappendaddr(), leave it to our caller to free
  901          * the input record chain passed to us as m0.
  902          */
  903         while ((n = n0) != NULL) {
  904                 struct mbuf *np;
  905 
  906                 /* Undo the sballoc() of this record */
  907                 for (np = n; np; np = np->m_next)
  908                         sbfree(sb, np);
  909 
  910                 n0 = n->m_nextpkt;      /* iterate at next prepended address */
  911                 MFREE(n, np);           /* free prepended address (not data) */
  912         }
  913         return 0;
  914 }
  915 
  916 
  917 int
  918 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
  919 {
  920         struct mbuf     *m, *mlast, *n;
  921         int             space;
  922 
  923         space = 0;
  924         if (control == 0)
  925                 panic("sbappendcontrol");
  926         for (m = control; ; m = m->m_next) {
  927                 space += m->m_len;
  928                 MCLAIM(m, sb->sb_mowner);
  929                 if (m->m_next == 0)
  930                         break;
  931         }
  932         n = m;                  /* save pointer to last control buffer */
  933         for (m = m0; m; m = m->m_next) {
  934                 MCLAIM(m, sb->sb_mowner);
  935                 space += m->m_len;
  936         }
  937         if (space > sbspace(sb))
  938                 return (0);
  939         n->m_next = m0;                 /* concatenate data to control */
  940 
  941         SBLASTRECORDCHK(sb, "sbappendcontrol 1");
  942 
  943         for (m = control; m->m_next != NULL; m = m->m_next)
  944                 sballoc(sb, m);
  945         sballoc(sb, m);
  946         mlast = m;
  947         SBLINKRECORD(sb, control);
  948 
  949         sb->sb_mbtail = mlast;
  950         SBLASTMBUFCHK(sb, "sbappendcontrol");
  951 
  952         SBLASTRECORDCHK(sb, "sbappendcontrol 2");
  953 
  954         return (1);
  955 }
  956 
  957 /*
  958  * Compress mbuf chain m into the socket
  959  * buffer sb following mbuf n.  If n
  960  * is null, the buffer is presumed empty.
  961  */
  962 void
  963 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
  964 {
  965         int             eor;
  966         struct mbuf     *o;
  967 
  968         eor = 0;
  969         while (m) {
  970                 eor |= m->m_flags & M_EOR;
  971                 if (m->m_len == 0 &&
  972                     (eor == 0 ||
  973                      (((o = m->m_next) || (o = n)) &&
  974                       o->m_type == m->m_type))) {
  975                         if (sb->sb_lastrecord == m)
  976                                 sb->sb_lastrecord = m->m_next;
  977                         m = m_free(m);
  978                         continue;
  979                 }
  980                 if (n && (n->m_flags & M_EOR) == 0 &&
  981                     /* M_TRAILINGSPACE() checks buffer writeability */
  982                     m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */
  983                     m->m_len <= M_TRAILINGSPACE(n) &&
  984                     n->m_type == m->m_type) {
  985                         memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t),
  986                             (unsigned)m->m_len);
  987                         n->m_len += m->m_len;
  988                         sb->sb_cc += m->m_len;
  989                         m = m_free(m);
  990                         continue;
  991                 }
  992                 if (n)
  993                         n->m_next = m;
  994                 else
  995                         sb->sb_mb = m;
  996                 sb->sb_mbtail = m;
  997                 sballoc(sb, m);
  998                 n = m;
  999                 m->m_flags &= ~M_EOR;
 1000                 m = m->m_next;
 1001                 n->m_next = 0;
 1002         }
 1003         if (eor) {
 1004                 if (n)
 1005                         n->m_flags |= eor;
 1006                 else
 1007                         printf("semi-panic: sbcompress\n");
 1008         }
 1009         SBLASTMBUFCHK(sb, __func__);
 1010 }
 1011 
 1012 /*
 1013  * Free all mbufs in a sockbuf.
 1014  * Check that all resources are reclaimed.
 1015  */
 1016 void
 1017 sbflush(struct sockbuf *sb)
 1018 {
 1019 
 1020         KASSERT((sb->sb_flags & SB_LOCK) == 0);
 1021 
 1022         while (sb->sb_mbcnt)
 1023                 sbdrop(sb, (int)sb->sb_cc);
 1024 
 1025         KASSERT(sb->sb_cc == 0);
 1026         KASSERT(sb->sb_mb == NULL);
 1027         KASSERT(sb->sb_mbtail == NULL);
 1028         KASSERT(sb->sb_lastrecord == NULL);
 1029 }
 1030 
 1031 /*
 1032  * Drop data from (the front of) a sockbuf.
 1033  */
 1034 void
 1035 sbdrop(struct sockbuf *sb, int len)
 1036 {
 1037         struct mbuf     *m, *mn, *next;
 1038 
 1039         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 1040         while (len > 0) {
 1041                 if (m == 0) {
 1042                         if (next == 0)
 1043                                 panic("sbdrop");
 1044                         m = next;
 1045                         next = m->m_nextpkt;
 1046                         continue;
 1047                 }
 1048                 if (m->m_len > len) {
 1049                         m->m_len -= len;
 1050                         m->m_data += len;
 1051                         sb->sb_cc -= len;
 1052                         break;
 1053                 }
 1054                 len -= m->m_len;
 1055                 sbfree(sb, m);
 1056                 MFREE(m, mn);
 1057                 m = mn;
 1058         }
 1059         while (m && m->m_len == 0) {
 1060                 sbfree(sb, m);
 1061                 MFREE(m, mn);
 1062                 m = mn;
 1063         }
 1064         if (m) {
 1065                 sb->sb_mb = m;
 1066                 m->m_nextpkt = next;
 1067         } else
 1068                 sb->sb_mb = next;
 1069         /*
 1070          * First part is an inline SB_EMPTY_FIXUP().  Second part
 1071          * makes sure sb_lastrecord is up-to-date if we dropped
 1072          * part of the last record.
 1073          */
 1074         m = sb->sb_mb;
 1075         if (m == NULL) {
 1076                 sb->sb_mbtail = NULL;
 1077                 sb->sb_lastrecord = NULL;
 1078         } else if (m->m_nextpkt == NULL)
 1079                 sb->sb_lastrecord = m;
 1080 }
 1081 
 1082 /*
 1083  * Drop a record off the front of a sockbuf
 1084  * and move the next record to the front.
 1085  */
 1086 void
 1087 sbdroprecord(struct sockbuf *sb)
 1088 {
 1089         struct mbuf     *m, *mn;
 1090 
 1091         m = sb->sb_mb;
 1092         if (m) {
 1093                 sb->sb_mb = m->m_nextpkt;
 1094                 do {
 1095                         sbfree(sb, m);
 1096                         MFREE(m, mn);
 1097                 } while ((m = mn) != NULL);
 1098         }
 1099         SB_EMPTY_FIXUP(sb);
 1100 }
 1101 
 1102 /*
 1103  * Create a "control" mbuf containing the specified data
 1104  * with the specified type for presentation on a socket buffer.
 1105  */
 1106 struct mbuf *
 1107 sbcreatecontrol(caddr_t p, int size, int type, int level)
 1108 {
 1109         struct cmsghdr  *cp;
 1110         struct mbuf     *m;
 1111 
 1112         if (CMSG_SPACE(size) > MCLBYTES) {
 1113                 printf("sbcreatecontrol: message too large %d\n", size);
 1114                 return NULL;
 1115         }
 1116 
 1117         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
 1118                 return ((struct mbuf *) NULL);
 1119         if (CMSG_SPACE(size) > MLEN) {
 1120                 MCLGET(m, M_DONTWAIT);
 1121                 if ((m->m_flags & M_EXT) == 0) {
 1122                         m_free(m);
 1123                         return NULL;
 1124                 }
 1125         }
 1126         cp = mtod(m, struct cmsghdr *);
 1127         memcpy(CMSG_DATA(cp), p, size);
 1128         m->m_len = CMSG_SPACE(size);
 1129         cp->cmsg_len = CMSG_LEN(size);
 1130         cp->cmsg_level = level;
 1131         cp->cmsg_type = type;
 1132         return (m);
 1133 }

Cache object: 5ebbb7b4246f45b242e59bafe4159c94


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.