The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket2.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
   34  * $FreeBSD$
   35  */
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/domain.h>
   40 #include <sys/kernel.h>
   41 #include <sys/proc.h>
   42 #include <sys/malloc.h>
   43 #include <sys/mbuf.h>
   44 #include <sys/protosw.h>
   45 #include <sys/stat.h>
   46 #include <sys/socket.h>
   47 #include <sys/socketvar.h>
   48 #include <sys/signalvar.h>
   49 #include <sys/sysctl.h>
   50 
   51 /*
   52  * Primitive routines for operating on sockets and socket buffers
   53  */
   54 
   55 u_long  sb_max = SB_MAX;                /* XXX should be static */
   56 
   57 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
   58 
   59 /*
   60  * Procedures to manipulate state flags of socket
   61  * and do appropriate wakeups.  Normal sequence from the
   62  * active (originating) side is that soisconnecting() is
   63  * called during processing of connect() call,
   64  * resulting in an eventual call to soisconnected() if/when the
   65  * connection is established.  When the connection is torn down
   66  * soisdisconnecting() is called during processing of disconnect() call,
   67  * and soisdisconnected() is called when the connection to the peer
   68  * is totally severed.  The semantics of these routines are such that
   69  * connectionless protocols can call soisconnected() and soisdisconnected()
   70  * only, bypassing the in-progress calls when setting up a ``connection''
   71  * takes no time.
   72  *
   73  * From the passive side, a socket is created with
   74  * two queues of sockets: so_q0 for connections in progress
   75  * and so_q for connections already made and awaiting user acceptance.
   76  * As a protocol is preparing incoming connections, it creates a socket
   77  * structure queued on so_q0 by calling sonewconn().  When the connection
   78  * is established, soisconnected() is called, and transfers the
   79  * socket structure to so_q, making it available to accept().
   80  *
   81  * If a socket is closed with sockets on either
   82  * so_q0 or so_q, these sockets are dropped.
   83  *
   84  * If higher level protocols are implemented in
   85  * the kernel, the wakeups done here will sometimes
   86  * cause software-interrupt process scheduling.
   87  */
   88 
   89 void
   90 soisconnecting(so)
   91         register struct socket *so;
   92 {
   93 
   94         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
   95         so->so_state |= SS_ISCONNECTING;
   96 }
   97 
   98 void
   99 soisconnected(so)
  100         register struct socket *so;
  101 {
  102         register struct socket *head = so->so_head;
  103 
  104         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
  105         so->so_state |= SS_ISCONNECTED;
  106         if (head && (so->so_state & SS_INCOMP)) {
  107                 TAILQ_REMOVE(&head->so_incomp, so, so_list);
  108                 head->so_incqlen--;
  109                 so->so_state &= ~SS_INCOMP;
  110                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
  111                 so->so_state |= SS_COMP;
  112                 sorwakeup(head);
  113                 wakeup_one(&head->so_timeo);
  114         } else {
  115                 wakeup(&so->so_timeo);
  116                 sorwakeup(so);
  117                 sowwakeup(so);
  118         }
  119 }
  120 
  121 void
  122 soisdisconnecting(so)
  123         register struct socket *so;
  124 {
  125 
  126         so->so_state &= ~SS_ISCONNECTING;
  127         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
  128         wakeup((caddr_t)&so->so_timeo);
  129         sowwakeup(so);
  130         sorwakeup(so);
  131 }
  132 
  133 void
  134 soisdisconnected(so)
  135         register struct socket *so;
  136 {
  137 
  138         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
  139         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
  140         wakeup((caddr_t)&so->so_timeo);
  141         sowwakeup(so);
  142         sorwakeup(so);
  143 }
  144 
  145 /*
  146  * Return a random connection that hasn't been serviced yet and
  147  * is eligible for discard.  There is a one in qlen chance that
  148  * we will return a null, saying that there are no dropable
  149  * requests.  In this case, the protocol specific code should drop
  150  * the new request.  This insures fairness.
  151  *
  152  * This may be used in conjunction with protocol specific queue
  153  * congestion routines.
  154  */
  155 struct socket *
  156 sodropablereq(head)
  157         register struct socket *head;
  158 {
  159         register struct socket *so;
  160         unsigned int i, j, qlen;
  161         static int rnd;
  162         static struct timeval old_runtime;
  163         static unsigned int cur_cnt, old_cnt;
  164         struct timeval tv;
  165 
  166         getmicrouptime(&tv);
  167         if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
  168                 old_runtime = tv;
  169                 old_cnt = cur_cnt / i;
  170                 cur_cnt = 0;
  171         }
  172 
  173         so = TAILQ_FIRST(&head->so_incomp);
  174         if (!so)
  175                 return (so);
  176 
  177         qlen = head->so_incqlen;
  178         if (++cur_cnt > qlen || old_cnt > qlen) {
  179                 rnd = (314159 * rnd + 66329) & 0xffff;
  180                 j = ((qlen + 1) * rnd) >> 16;
  181 
  182                 while (j-- && so)
  183                     so = TAILQ_NEXT(so, so_list);
  184         }
  185 
  186         return (so);
  187 }
  188 
  189 /*
  190  * When an attempt at a new connection is noted on a socket
  191  * which accepts connections, sonewconn is called.  If the
  192  * connection is possible (subject to space constraints, etc.)
  193  * then we allocate a new structure, propoerly linked into the
  194  * data structure of the original socket, and return this.
  195  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
  196  */
  197 struct socket *
  198 sonewconn(head, connstatus)
  199         register struct socket *head;
  200         int connstatus;
  201 {
  202         register struct socket *so;
  203 
  204         if (head->so_qlen > 3 * head->so_qlimit / 2)
  205                 return ((struct socket *)0);
  206         so = soalloc(0);
  207         if (so == NULL)
  208                 return ((struct socket *)0);
  209         so->so_head = head;
  210         so->so_type = head->so_type;
  211         so->so_options = head->so_options &~ SO_ACCEPTCONN;
  212         so->so_linger = head->so_linger;
  213         so->so_state = head->so_state | SS_NOFDREF;
  214         so->so_proto = head->so_proto;
  215         so->so_timeo = head->so_timeo;
  216         so->so_cred = head->so_cred;
  217         if (so->so_cred)
  218                 so->so_cred->p_refcnt++;
  219         (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
  220 
  221         if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
  222                 sodealloc(so);
  223                 return ((struct socket *)0);
  224         }
  225 
  226         if (connstatus) {
  227                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
  228                 so->so_state |= SS_COMP;
  229         } else {
  230                 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
  231                 so->so_state |= SS_INCOMP;
  232                 head->so_incqlen++;
  233         }
  234         head->so_qlen++;
  235         if (connstatus) {
  236                 sorwakeup(head);
  237                 wakeup((caddr_t)&head->so_timeo);
  238                 so->so_state |= connstatus;
  239         }
  240         return (so);
  241 }
  242 
  243 /*
  244  * Socantsendmore indicates that no more data will be sent on the
  245  * socket; it would normally be applied to a socket when the user
  246  * informs the system that no more data is to be sent, by the protocol
  247  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  248  * will be received, and will normally be applied to the socket by a
  249  * protocol when it detects that the peer will send no more data.
  250  * Data queued for reading in the socket may yet be read.
  251  */
  252 
  253 void
  254 socantsendmore(so)
  255         struct socket *so;
  256 {
  257 
  258         so->so_state |= SS_CANTSENDMORE;
  259         sowwakeup(so);
  260 }
  261 
  262 void
  263 socantrcvmore(so)
  264         struct socket *so;
  265 {
  266 
  267         so->so_state |= SS_CANTRCVMORE;
  268         sorwakeup(so);
  269 }
  270 
  271 /*
  272  * Wait for data to arrive at/drain from a socket buffer.
  273  */
  274 int
  275 sbwait(sb)
  276         struct sockbuf *sb;
  277 {
  278 
  279         sb->sb_flags |= SB_WAIT;
  280         return (tsleep((caddr_t)&sb->sb_cc,
  281             (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
  282             sb->sb_timeo));
  283 }
  284 
  285 /*
  286  * Lock a sockbuf already known to be locked;
  287  * return any error returned from sleep (EINTR).
  288  */
  289 int
  290 sb_lock(sb)
  291         register struct sockbuf *sb;
  292 {
  293         int error;
  294 
  295         while (sb->sb_flags & SB_LOCK) {
  296                 sb->sb_flags |= SB_WANT;
  297                 error = tsleep((caddr_t)&sb->sb_flags,
  298                     (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
  299                     "sblock", 0);
  300                 if (error)
  301                         return (error);
  302         }
  303         sb->sb_flags |= SB_LOCK;
  304         return (0);
  305 }
  306 
  307 /*
  308  * Wakeup processes waiting on a socket buffer.
  309  * Do asynchronous notification via SIGIO
  310  * if the socket has the SS_ASYNC flag set.
  311  */
  312 void
  313 sowakeup(so, sb)
  314         register struct socket *so;
  315         register struct sockbuf *sb;
  316 {
  317         selwakeup(&sb->sb_sel);
  318         sb->sb_flags &= ~SB_SEL;
  319         if (sb->sb_flags & SB_WAIT) {
  320                 sb->sb_flags &= ~SB_WAIT;
  321                 wakeup((caddr_t)&sb->sb_cc);
  322         }
  323         if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
  324                 pgsigio(so->so_sigio, SIGIO, 0);
  325         if (sb->sb_flags & SB_UPCALL)
  326                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  327 }
  328 
  329 /*
  330  * Socket buffer (struct sockbuf) utility routines.
  331  *
  332  * Each socket contains two socket buffers: one for sending data and
  333  * one for receiving data.  Each buffer contains a queue of mbufs,
  334  * information about the number of mbufs and amount of data in the
  335  * queue, and other fields allowing select() statements and notification
  336  * on data availability to be implemented.
  337  *
  338  * Data stored in a socket buffer is maintained as a list of records.
  339  * Each record is a list of mbufs chained together with the m_next
  340  * field.  Records are chained together with the m_nextpkt field. The upper
  341  * level routine soreceive() expects the following conventions to be
  342  * observed when placing information in the receive buffer:
  343  *
  344  * 1. If the protocol requires each message be preceded by the sender's
  345  *    name, then a record containing that name must be present before
  346  *    any associated data (mbuf's must be of type MT_SONAME).
  347  * 2. If the protocol supports the exchange of ``access rights'' (really
  348  *    just additional data associated with the message), and there are
  349  *    ``rights'' to be received, then a record containing this data
  350  *    should be present (mbuf's must be of type MT_RIGHTS).
  351  * 3. If a name or rights record exists, then it must be followed by
  352  *    a data record, perhaps of zero length.
  353  *
  354  * Before using a new socket structure it is first necessary to reserve
  355  * buffer space to the socket, by calling sbreserve().  This should commit
  356  * some of the available buffer space in the system buffer pool for the
  357  * socket (currently, it does nothing but enforce limits).  The space
  358  * should be released by calling sbrelease() when the socket is destroyed.
  359  */
  360 
  361 int
  362 soreserve(so, sndcc, rcvcc)
  363         register struct socket *so;
  364         u_long sndcc, rcvcc;
  365 {
  366 
  367         if (sbreserve(&so->so_snd, sndcc) == 0)
  368                 goto bad;
  369         if (sbreserve(&so->so_rcv, rcvcc) == 0)
  370                 goto bad2;
  371         if (so->so_rcv.sb_lowat == 0)
  372                 so->so_rcv.sb_lowat = 1;
  373         if (so->so_snd.sb_lowat == 0)
  374                 so->so_snd.sb_lowat = MCLBYTES;
  375         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  376                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  377         return (0);
  378 bad2:
  379         sbrelease(&so->so_snd);
  380 bad:
  381         return (ENOBUFS);
  382 }
  383 
  384 /*
  385  * Allot mbufs to a sockbuf.
  386  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  387  * if buffering efficiency is near the normal case.
  388  */
  389 int
  390 sbreserve(sb, cc)
  391         struct sockbuf *sb;
  392         u_long cc;
  393 {
  394         if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
  395                 return (0);
  396         sb->sb_hiwat = cc;
  397         sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
  398         if (sb->sb_lowat > sb->sb_hiwat)
  399                 sb->sb_lowat = sb->sb_hiwat;
  400         return (1);
  401 }
  402 
  403 /*
  404  * Free mbufs held by a socket, and reserved mbuf space.
  405  */
  406 void
  407 sbrelease(sb)
  408         struct sockbuf *sb;
  409 {
  410 
  411         sbflush(sb);
  412         sb->sb_hiwat = sb->sb_mbmax = 0;
  413 }
  414 
  415 /*
  416  * Routines to add and remove
  417  * data from an mbuf queue.
  418  *
  419  * The routines sbappend() or sbappendrecord() are normally called to
  420  * append new mbufs to a socket buffer, after checking that adequate
  421  * space is available, comparing the function sbspace() with the amount
  422  * of data to be added.  sbappendrecord() differs from sbappend() in
  423  * that data supplied is treated as the beginning of a new record.
  424  * To place a sender's address, optional access rights, and data in a
  425  * socket receive buffer, sbappendaddr() should be used.  To place
  426  * access rights and data in a socket receive buffer, sbappendrights()
  427  * should be used.  In either case, the new data begins a new record.
  428  * Note that unlike sbappend() and sbappendrecord(), these routines check
  429  * for the caller that there will be enough space to store the data.
  430  * Each fails if there is not enough space, or if it cannot find mbufs
  431  * to store additional information in.
  432  *
  433  * Reliable protocols may use the socket send buffer to hold data
  434  * awaiting acknowledgement.  Data is normally copied from a socket
  435  * send buffer in a protocol with m_copy for output to a peer,
  436  * and then removing the data from the socket buffer with sbdrop()
  437  * or sbdroprecord() when the data is acknowledged by the peer.
  438  */
  439 
  440 /*
  441  * Append mbuf chain m to the last record in the
  442  * socket buffer sb.  The additional space associated
  443  * the mbuf chain is recorded in sb.  Empty mbufs are
  444  * discarded and mbufs are compacted where possible.
  445  */
  446 void
  447 sbappend(sb, m)
  448         struct sockbuf *sb;
  449         struct mbuf *m;
  450 {
  451         register struct mbuf *n;
  452 
  453         if (m == 0)
  454                 return;
  455         n = sb->sb_mb;
  456         if (n) {
  457                 while (n->m_nextpkt)
  458                         n = n->m_nextpkt;
  459                 do {
  460                         if (n->m_flags & M_EOR) {
  461                                 sbappendrecord(sb, m); /* XXXXXX!!!! */
  462                                 return;
  463                         }
  464                 } while (n->m_next && (n = n->m_next));
  465         }
  466         sbcompress(sb, m, n);
  467 }
  468 
  469 #ifdef SOCKBUF_DEBUG
  470 void
  471 sbcheck(sb)
  472         register struct sockbuf *sb;
  473 {
  474         register struct mbuf *m;
  475         register struct mbuf *n = 0;
  476         register u_long len = 0, mbcnt = 0;
  477 
  478         for (m = sb->sb_mb; m; m = n) {
  479             n = m->m_nextpkt;
  480             for (; m; m = m->m_next) {
  481                 len += m->m_len;
  482                 mbcnt += MSIZE;
  483                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
  484                         mbcnt += m->m_ext.ext_size;
  485             }
  486         }
  487         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  488                 printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
  489                     mbcnt, sb->sb_mbcnt);
  490                 panic("sbcheck");
  491         }
  492 }
  493 #endif
  494 
  495 /*
  496  * As above, except the mbuf chain
  497  * begins a new record.
  498  */
  499 void
  500 sbappendrecord(sb, m0)
  501         register struct sockbuf *sb;
  502         register struct mbuf *m0;
  503 {
  504         register struct mbuf *m;
  505 
  506         if (m0 == 0)
  507                 return;
  508         m = sb->sb_mb;
  509         if (m)
  510                 while (m->m_nextpkt)
  511                         m = m->m_nextpkt;
  512         /*
  513          * Put the first mbuf on the queue.
  514          * Note this permits zero length records.
  515          */
  516         sballoc(sb, m0);
  517         if (m)
  518                 m->m_nextpkt = m0;
  519         else
  520                 sb->sb_mb = m0;
  521         m = m0->m_next;
  522         m0->m_next = 0;
  523         if (m && (m0->m_flags & M_EOR)) {
  524                 m0->m_flags &= ~M_EOR;
  525                 m->m_flags |= M_EOR;
  526         }
  527         sbcompress(sb, m, m0);
  528 }
  529 
  530 /*
  531  * As above except that OOB data
  532  * is inserted at the beginning of the sockbuf,
  533  * but after any other OOB data.
  534  */
  535 void
  536 sbinsertoob(sb, m0)
  537         register struct sockbuf *sb;
  538         register struct mbuf *m0;
  539 {
  540         register struct mbuf *m;
  541         register struct mbuf **mp;
  542 
  543         if (m0 == 0)
  544                 return;
  545         for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
  546             m = *mp;
  547             again:
  548                 switch (m->m_type) {
  549 
  550                 case MT_OOBDATA:
  551                         continue;               /* WANT next train */
  552 
  553                 case MT_CONTROL:
  554                         m = m->m_next;
  555                         if (m)
  556                                 goto again;     /* inspect THIS train further */
  557                 }
  558                 break;
  559         }
  560         /*
  561          * Put the first mbuf on the queue.
  562          * Note this permits zero length records.
  563          */
  564         sballoc(sb, m0);
  565         m0->m_nextpkt = *mp;
  566         *mp = m0;
  567         m = m0->m_next;
  568         m0->m_next = 0;
  569         if (m && (m0->m_flags & M_EOR)) {
  570                 m0->m_flags &= ~M_EOR;
  571                 m->m_flags |= M_EOR;
  572         }
  573         sbcompress(sb, m, m0);
  574 }
  575 
  576 /*
  577  * Append address and data, and optionally, control (ancillary) data
  578  * to the receive queue of a socket.  If present,
  579  * m0 must include a packet header with total length.
  580  * Returns 0 if no space in sockbuf or insufficient mbufs.
  581  */
  582 int
  583 sbappendaddr(sb, asa, m0, control)
  584         register struct sockbuf *sb;
  585         struct sockaddr *asa;
  586         struct mbuf *m0, *control;
  587 {
  588         register struct mbuf *m, *n;
  589         int space = asa->sa_len;
  590 
  591 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
  592 panic("sbappendaddr");
  593         if (m0)
  594                 space += m0->m_pkthdr.len;
  595         for (n = control; n; n = n->m_next) {
  596                 space += n->m_len;
  597                 if (n->m_next == 0)     /* keep pointer to last control buf */
  598                         break;
  599         }
  600         if (space > sbspace(sb))
  601                 return (0);
  602         if (asa->sa_len > MLEN)
  603                 return (0);
  604         MGET(m, M_DONTWAIT, MT_SONAME);
  605         if (m == 0)
  606                 return (0);
  607         m->m_len = asa->sa_len;
  608         bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
  609         if (n)
  610                 n->m_next = m0;         /* concatenate data to control */
  611         else
  612                 control = m0;
  613         m->m_next = control;
  614         for (n = m; n; n = n->m_next)
  615                 sballoc(sb, n);
  616         n = sb->sb_mb;
  617         if (n) {
  618                 while (n->m_nextpkt)
  619                         n = n->m_nextpkt;
  620                 n->m_nextpkt = m;
  621         } else
  622                 sb->sb_mb = m;
  623         return (1);
  624 }
  625 
  626 int
  627 sbappendcontrol(sb, m0, control)
  628         struct sockbuf *sb;
  629         struct mbuf *control, *m0;
  630 {
  631         register struct mbuf *m, *n;
  632         int space = 0;
  633 
  634         if (control == 0)
  635                 panic("sbappendcontrol");
  636         for (m = control; ; m = m->m_next) {
  637                 space += m->m_len;
  638                 if (m->m_next == 0)
  639                         break;
  640         }
  641         n = m;                  /* save pointer to last control buffer */
  642         for (m = m0; m; m = m->m_next)
  643                 space += m->m_len;
  644         if (space > sbspace(sb))
  645                 return (0);
  646         n->m_next = m0;                 /* concatenate data to control */
  647         for (m = control; m; m = m->m_next)
  648                 sballoc(sb, m);
  649         n = sb->sb_mb;
  650         if (n) {
  651                 while (n->m_nextpkt)
  652                         n = n->m_nextpkt;
  653                 n->m_nextpkt = control;
  654         } else
  655                 sb->sb_mb = control;
  656         return (1);
  657 }
  658 
  659 /*
  660  * Compress mbuf chain m into the socket
  661  * buffer sb following mbuf n.  If n
  662  * is null, the buffer is presumed empty.
  663  */
  664 void
  665 sbcompress(sb, m, n)
  666         register struct sockbuf *sb;
  667         register struct mbuf *m, *n;
  668 {
  669         register int eor = 0;
  670         register struct mbuf *o;
  671 
  672         while (m) {
  673                 eor |= m->m_flags & M_EOR;
  674                 if (m->m_len == 0 &&
  675                     (eor == 0 ||
  676                      (((o = m->m_next) || (o = n)) &&
  677                       o->m_type == m->m_type))) {
  678                         m = m_free(m);
  679                         continue;
  680                 }
  681                 if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
  682                     (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
  683                     n->m_type == m->m_type) {
  684                         bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
  685                             (unsigned)m->m_len);
  686                         n->m_len += m->m_len;
  687                         sb->sb_cc += m->m_len;
  688                         m = m_free(m);
  689                         continue;
  690                 }
  691                 if (n)
  692                         n->m_next = m;
  693                 else
  694                         sb->sb_mb = m;
  695                 sballoc(sb, m);
  696                 n = m;
  697                 m->m_flags &= ~M_EOR;
  698                 m = m->m_next;
  699                 n->m_next = 0;
  700         }
  701         if (eor) {
  702                 if (n)
  703                         n->m_flags |= eor;
  704                 else
  705                         printf("semi-panic: sbcompress\n");
  706         }
  707 }
  708 
  709 /*
  710  * Free all mbufs in a sockbuf.
  711  * Check that all resources are reclaimed.
  712  */
  713 void
  714 sbflush(sb)
  715         register struct sockbuf *sb;
  716 {
  717 
  718         if (sb->sb_flags & SB_LOCK)
  719                 panic("sbflush: locked");
  720         while (sb->sb_mbcnt) {
  721                 /*
  722                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
  723                  * we would loop forever. Panic instead.
  724                  */
  725                 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
  726                         break;
  727                 sbdrop(sb, (int)sb->sb_cc);
  728         }
  729         if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
  730                 panic("sbflush: cc %ld || mb %p || mbcnt %ld", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
  731 }
  732 
  733 /*
  734  * Drop data from (the front of) a sockbuf.
  735  */
  736 void
  737 sbdrop(sb, len)
  738         register struct sockbuf *sb;
  739         register int len;
  740 {
  741         register struct mbuf *m, *mn;
  742         struct mbuf *next;
  743 
  744         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
  745         while (len > 0) {
  746                 if (m == 0) {
  747                         if (next == 0)
  748                                 panic("sbdrop");
  749                         m = next;
  750                         next = m->m_nextpkt;
  751                         continue;
  752                 }
  753                 if (m->m_len > len) {
  754                         m->m_len -= len;
  755                         m->m_data += len;
  756                         sb->sb_cc -= len;
  757                         break;
  758                 }
  759                 len -= m->m_len;
  760                 sbfree(sb, m);
  761                 MFREE(m, mn);
  762                 m = mn;
  763         }
  764         while (m && m->m_len == 0) {
  765                 sbfree(sb, m);
  766                 MFREE(m, mn);
  767                 m = mn;
  768         }
  769         if (m) {
  770                 sb->sb_mb = m;
  771                 m->m_nextpkt = next;
  772         } else
  773                 sb->sb_mb = next;
  774 }
  775 
  776 /*
  777  * Drop a record off the front of a sockbuf
  778  * and move the next record to the front.
  779  */
  780 void
  781 sbdroprecord(sb)
  782         register struct sockbuf *sb;
  783 {
  784         register struct mbuf *m, *mn;
  785 
  786         m = sb->sb_mb;
  787         if (m) {
  788                 sb->sb_mb = m->m_nextpkt;
  789                 do {
  790                         sbfree(sb, m);
  791                         MFREE(m, mn);
  792                         m = mn;
  793                 } while (m);
  794         }
  795 }
  796 
  797 /*
  798  * Create a "control" mbuf containing the specified data
  799  * with the specified type for presentation on a socket buffer.
  800  */
  801 struct mbuf *
  802 sbcreatecontrol(p, size, type, level)
  803         caddr_t p;
  804         register int size;
  805         int type, level;
  806 {
  807         register struct cmsghdr *cp;
  808         struct mbuf *m;
  809 
  810         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
  811                 return ((struct mbuf *) NULL);
  812         cp = mtod(m, struct cmsghdr *);
  813         /* XXX check size? */
  814         (void)memcpy(CMSG_DATA(cp), p, size);
  815         size += sizeof(*cp);
  816         m->m_len = size;
  817         cp->cmsg_len = size;
  818         cp->cmsg_level = level;
  819         cp->cmsg_type = type;
  820         return (m);
  821 }
  822 
  823 /*
  824  * Some routines that return EOPNOTSUPP for entry points that are not
  825  * supported by a protocol.  Fill in as needed.
  826  */
  827 int
  828 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
  829 {
  830         return EOPNOTSUPP;
  831 }
  832 
  833 int
  834 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
  835 {
  836         return EOPNOTSUPP;
  837 }
  838 
  839 int
  840 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
  841 {
  842         return EOPNOTSUPP;
  843 }
  844 
  845 int
  846 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
  847                     struct ifnet *ifp, struct proc *p)
  848 {
  849         return EOPNOTSUPP;
  850 }
  851 
  852 int
  853 pru_listen_notsupp(struct socket *so, struct proc *p)
  854 {
  855         return EOPNOTSUPP;
  856 }
  857 
  858 int
  859 pru_rcvd_notsupp(struct socket *so, int flags)
  860 {
  861         return EOPNOTSUPP;
  862 }
  863 
  864 int
  865 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
  866 {
  867         return EOPNOTSUPP;
  868 }
  869 
  870 /*
  871  * This isn't really a ``null'' operation, but it's the default one
  872  * and doesn't do anything destructive.
  873  */
  874 int
  875 pru_sense_null(struct socket *so, struct stat *sb)
  876 {
  877         sb->st_blksize = so->so_snd.sb_hiwat;
  878         return 0;
  879 }
  880 
  881 /*
  882  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
  883  */
  884 struct sockaddr *
  885 dup_sockaddr(sa, canwait)
  886         struct sockaddr *sa;
  887         int canwait;
  888 {
  889         struct sockaddr *sa2;
  890 
  891         MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME, 
  892                canwait ? M_WAITOK : M_NOWAIT);
  893         if (sa2)
  894                 bcopy(sa, sa2, sa->sa_len);
  895         return sa2;
  896 }
  897 
  898 /*
  899  * Create an external-format (``xsocket'') structure using the information
  900  * in the kernel-format socket structure pointed to by so.  This is done
  901  * to reduce the spew of irrelevant information over this interface,
  902  * to isolate user code from changes in the kernel structure, and
  903  * potentially to provide information-hiding if we decide that
  904  * some of this information should be hidden from users.
  905  */
  906 void
  907 sotoxsocket(struct socket *so, struct xsocket *xso)
  908 {
  909         xso->xso_len = sizeof *xso;
  910         xso->xso_so = so;
  911         xso->so_type = so->so_type;
  912         xso->so_options = so->so_options;
  913         xso->so_linger = so->so_linger;
  914         xso->so_state = so->so_state;
  915         xso->so_pcb = so->so_pcb;
  916         xso->xso_protocol = so->so_proto->pr_protocol;
  917         xso->xso_family = so->so_proto->pr_domain->dom_family;
  918         xso->so_qlen = so->so_qlen;
  919         xso->so_incqlen = so->so_incqlen;
  920         xso->so_qlimit = so->so_qlimit;
  921         xso->so_timeo = so->so_timeo;
  922         xso->so_error = so->so_error;
  923         xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
  924         xso->so_oobmark = so->so_oobmark;
  925         sbtoxsockbuf(&so->so_snd, &xso->so_snd);
  926         sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
  927         xso->so_uid = so->so_cred ? so->so_cred->pc_ucred->cr_uid : -1;
  928 }
  929 
  930 /*
  931  * This does the same for sockbufs.  Note that the xsockbuf structure,
  932  * since it is always embedded in a socket, does not include a self
  933  * pointer nor a length.  We make this entry point public in case
  934  * some other mechanism needs it.
  935  */
  936 void
  937 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
  938 {
  939         xsb->sb_cc = sb->sb_cc;
  940         xsb->sb_hiwat = sb->sb_hiwat;
  941         xsb->sb_mbcnt = sb->sb_mbcnt;
  942         xsb->sb_mbmax = sb->sb_mbmax;
  943         xsb->sb_lowat = sb->sb_lowat;
  944         xsb->sb_flags = sb->sb_flags;
  945         xsb->sb_timeo = sb->sb_timeo;
  946 }
  947 
  948 /*
  949  * Here is the definition of some of the basic objects in the kern.ipc
  950  * branch of the MIB.
  951  */
  952 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
  953 
  954 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
  955 static int dummy;
  956 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
  957 
  958 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, "");
  959 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, "");
  960 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
  961            &sb_efficiency, 0, "");
  962 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
  963 

Cache object: bb8d42d97b9c09f1933ec0db4a511c76


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.