The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket2.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
   34  */
   35 
   36 #include <sys/cdefs.h>
   37 __FBSDID("$FreeBSD: releng/5.2/sys/kern/uipc_socket2.c 122875 2003-11-18 00:39:07Z rwatson $");
   38 
   39 #include "opt_mac.h"
   40 #include "opt_param.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/aio.h> /* for aio_swake proto */
   44 #include <sys/domain.h>
   45 #include <sys/event.h>
   46 #include <sys/file.h>   /* for maxfiles */
   47 #include <sys/kernel.h>
   48 #include <sys/lock.h>
   49 #include <sys/mac.h>
   50 #include <sys/malloc.h>
   51 #include <sys/mbuf.h>
   52 #include <sys/mutex.h>
   53 #include <sys/proc.h>
   54 #include <sys/protosw.h>
   55 #include <sys/resourcevar.h>
   56 #include <sys/signalvar.h>
   57 #include <sys/socket.h>
   58 #include <sys/socketvar.h>
   59 #include <sys/stat.h>
   60 #include <sys/sysctl.h>
   61 #include <sys/systm.h>
   62 
   63 int     maxsockets;
   64 
   65 void (*aio_swake)(struct socket *, struct sockbuf *);
   66 
   67 /*
   68  * Primitive routines for operating on sockets and socket buffers
   69  */
   70 
   71 u_long  sb_max = SB_MAX;
   72 static  u_long sb_max_adj =
   73     SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
   74 
   75 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
   76 
   77 /*
   78  * Procedures to manipulate state flags of socket
   79  * and do appropriate wakeups.  Normal sequence from the
   80  * active (originating) side is that soisconnecting() is
   81  * called during processing of connect() call,
   82  * resulting in an eventual call to soisconnected() if/when the
   83  * connection is established.  When the connection is torn down
   84  * soisdisconnecting() is called during processing of disconnect() call,
   85  * and soisdisconnected() is called when the connection to the peer
   86  * is totally severed.  The semantics of these routines are such that
   87  * connectionless protocols can call soisconnected() and soisdisconnected()
   88  * only, bypassing the in-progress calls when setting up a ``connection''
   89  * takes no time.
   90  *
   91  * From the passive side, a socket is created with
   92  * two queues of sockets: so_incomp for connections in progress
   93  * and so_comp for connections already made and awaiting user acceptance.
   94  * As a protocol is preparing incoming connections, it creates a socket
   95  * structure queued on so_incomp by calling sonewconn().  When the connection
   96  * is established, soisconnected() is called, and transfers the
   97  * socket structure to so_comp, making it available to accept().
   98  *
   99  * If a socket is closed with sockets on either
  100  * so_incomp or so_comp, these sockets are dropped.
  101  *
  102  * If higher level protocols are implemented in
  103  * the kernel, the wakeups done here will sometimes
  104  * cause software-interrupt process scheduling.
  105  */
  106 
  107 void
  108 soisconnecting(so)
  109         register struct socket *so;
  110 {
  111 
  112         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
  113         so->so_state |= SS_ISCONNECTING;
  114 }
  115 
  116 void
  117 soisconnected(so)
  118         struct socket *so;
  119 {
  120         struct socket *head = so->so_head;
  121 
  122         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
  123         so->so_state |= SS_ISCONNECTED;
  124         if (head && (so->so_state & SS_INCOMP)) {
  125                 if ((so->so_options & SO_ACCEPTFILTER) != 0) {
  126                         so->so_upcall = head->so_accf->so_accept_filter->accf_callback;
  127                         so->so_upcallarg = head->so_accf->so_accept_filter_arg;
  128                         so->so_rcv.sb_flags |= SB_UPCALL;
  129                         so->so_options &= ~SO_ACCEPTFILTER;
  130                         so->so_upcall(so, so->so_upcallarg, M_TRYWAIT);
  131                         return;
  132                 }
  133                 TAILQ_REMOVE(&head->so_incomp, so, so_list);
  134                 head->so_incqlen--;
  135                 so->so_state &= ~SS_INCOMP;
  136                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
  137                 head->so_qlen++;
  138                 so->so_state |= SS_COMP;
  139                 sorwakeup(head);
  140                 wakeup_one(&head->so_timeo);
  141         } else {
  142                 wakeup(&so->so_timeo);
  143                 sorwakeup(so);
  144                 sowwakeup(so);
  145         }
  146 }
  147 
  148 void
  149 soisdisconnecting(so)
  150         register struct socket *so;
  151 {
  152 
  153         so->so_state &= ~SS_ISCONNECTING;
  154         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
  155         wakeup(&so->so_timeo);
  156         sowwakeup(so);
  157         sorwakeup(so);
  158 }
  159 
  160 void
  161 soisdisconnected(so)
  162         register struct socket *so;
  163 {
  164 
  165         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
  166         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
  167         wakeup(&so->so_timeo);
  168         sbdrop(&so->so_snd, so->so_snd.sb_cc);
  169         sowwakeup(so);
  170         sorwakeup(so);
  171 }
  172 
  173 /*
  174  * When an attempt at a new connection is noted on a socket
  175  * which accepts connections, sonewconn is called.  If the
  176  * connection is possible (subject to space constraints, etc.)
  177  * then we allocate a new structure, propoerly linked into the
  178  * data structure of the original socket, and return this.
  179  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
  180  *
  181  * note: the ref count on the socket is 0 on return
  182  */
  183 struct socket *
  184 sonewconn(head, connstatus)
  185         register struct socket *head;
  186         int connstatus;
  187 {
  188         register struct socket *so;
  189 
  190         if (head->so_qlen > 3 * head->so_qlimit / 2)
  191                 return ((struct socket *)0);
  192         so = soalloc(0);
  193         if (so == NULL)
  194                 return ((struct socket *)0);
  195         if ((head->so_options & SO_ACCEPTFILTER) != 0)
  196                 connstatus = 0;
  197         so->so_head = head;
  198         so->so_type = head->so_type;
  199         so->so_options = head->so_options &~ SO_ACCEPTCONN;
  200         so->so_linger = head->so_linger;
  201         so->so_state = head->so_state | SS_NOFDREF;
  202         so->so_proto = head->so_proto;
  203         so->so_timeo = head->so_timeo;
  204         so->so_cred = crhold(head->so_cred);
  205 #ifdef MAC
  206         mac_create_socket_from_socket(head, so);
  207 #endif
  208         if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
  209             (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
  210                 sodealloc(so);
  211                 return ((struct socket *)0);
  212         }
  213 
  214         if (connstatus) {
  215                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
  216                 so->so_state |= SS_COMP;
  217                 head->so_qlen++;
  218         } else {
  219                 if (head->so_incqlen > head->so_qlimit) {
  220                         struct socket *sp;
  221                         sp = TAILQ_FIRST(&head->so_incomp);
  222                         (void) soabort(sp);
  223                 }
  224                 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
  225                 so->so_state |= SS_INCOMP;
  226                 head->so_incqlen++;
  227         }
  228         if (connstatus) {
  229                 sorwakeup(head);
  230                 wakeup(&head->so_timeo);
  231                 so->so_state |= connstatus;
  232         }
  233         return (so);
  234 }
  235 
  236 /*
  237  * Socantsendmore indicates that no more data will be sent on the
  238  * socket; it would normally be applied to a socket when the user
  239  * informs the system that no more data is to be sent, by the protocol
  240  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  241  * will be received, and will normally be applied to the socket by a
  242  * protocol when it detects that the peer will send no more data.
  243  * Data queued for reading in the socket may yet be read.
  244  */
  245 
  246 void
  247 socantsendmore(so)
  248         struct socket *so;
  249 {
  250 
  251         so->so_state |= SS_CANTSENDMORE;
  252         sowwakeup(so);
  253 }
  254 
  255 void
  256 socantrcvmore(so)
  257         struct socket *so;
  258 {
  259 
  260         so->so_state |= SS_CANTRCVMORE;
  261         sorwakeup(so);
  262 }
  263 
  264 /*
  265  * Wait for data to arrive at/drain from a socket buffer.
  266  */
  267 int
  268 sbwait(sb)
  269         struct sockbuf *sb;
  270 {
  271 
  272         sb->sb_flags |= SB_WAIT;
  273         return (tsleep(&sb->sb_cc,
  274             (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
  275             sb->sb_timeo));
  276 }
  277 
  278 /*
  279  * Lock a sockbuf already known to be locked;
  280  * return any error returned from sleep (EINTR).
  281  */
  282 int
  283 sb_lock(sb)
  284         register struct sockbuf *sb;
  285 {
  286         int error;
  287 
  288         while (sb->sb_flags & SB_LOCK) {
  289                 sb->sb_flags |= SB_WANT;
  290                 error = tsleep(&sb->sb_flags,
  291                     (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
  292                     "sblock", 0);
  293                 if (error)
  294                         return (error);
  295         }
  296         sb->sb_flags |= SB_LOCK;
  297         return (0);
  298 }
  299 
  300 /*
  301  * Wakeup processes waiting on a socket buffer.
  302  * Do asynchronous notification via SIGIO
  303  * if the socket has the SS_ASYNC flag set.
  304  */
  305 void
  306 sowakeup(so, sb)
  307         register struct socket *so;
  308         register struct sockbuf *sb;
  309 {
  310 
  311         selwakeuppri(&sb->sb_sel, PSOCK);
  312         sb->sb_flags &= ~SB_SEL;
  313         if (sb->sb_flags & SB_WAIT) {
  314                 sb->sb_flags &= ~SB_WAIT;
  315                 wakeup(&sb->sb_cc);
  316         }
  317         if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
  318                 pgsigio(&so->so_sigio, SIGIO, 0);
  319         if (sb->sb_flags & SB_UPCALL)
  320                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  321         if (sb->sb_flags & SB_AIO)
  322                 aio_swake(so, sb);
  323         KNOTE(&sb->sb_sel.si_note, 0);
  324 }
  325 
  326 /*
  327  * Socket buffer (struct sockbuf) utility routines.
  328  *
  329  * Each socket contains two socket buffers: one for sending data and
  330  * one for receiving data.  Each buffer contains a queue of mbufs,
  331  * information about the number of mbufs and amount of data in the
  332  * queue, and other fields allowing select() statements and notification
  333  * on data availability to be implemented.
  334  *
  335  * Data stored in a socket buffer is maintained as a list of records.
  336  * Each record is a list of mbufs chained together with the m_next
  337  * field.  Records are chained together with the m_nextpkt field. The upper
  338  * level routine soreceive() expects the following conventions to be
  339  * observed when placing information in the receive buffer:
  340  *
  341  * 1. If the protocol requires each message be preceded by the sender's
  342  *    name, then a record containing that name must be present before
  343  *    any associated data (mbuf's must be of type MT_SONAME).
  344  * 2. If the protocol supports the exchange of ``access rights'' (really
  345  *    just additional data associated with the message), and there are
  346  *    ``rights'' to be received, then a record containing this data
  347  *    should be present (mbuf's must be of type MT_RIGHTS).
  348  * 3. If a name or rights record exists, then it must be followed by
  349  *    a data record, perhaps of zero length.
  350  *
  351  * Before using a new socket structure it is first necessary to reserve
  352  * buffer space to the socket, by calling sbreserve().  This should commit
  353  * some of the available buffer space in the system buffer pool for the
  354  * socket (currently, it does nothing but enforce limits).  The space
  355  * should be released by calling sbrelease() when the socket is destroyed.
  356  */
  357 
  358 int
  359 soreserve(so, sndcc, rcvcc)
  360         register struct socket *so;
  361         u_long sndcc, rcvcc;
  362 {
  363         struct thread *td = curthread;
  364 
  365         if (sbreserve(&so->so_snd, sndcc, so, td) == 0)
  366                 goto bad;
  367         if (sbreserve(&so->so_rcv, rcvcc, so, td) == 0)
  368                 goto bad2;
  369         if (so->so_rcv.sb_lowat == 0)
  370                 so->so_rcv.sb_lowat = 1;
  371         if (so->so_snd.sb_lowat == 0)
  372                 so->so_snd.sb_lowat = MCLBYTES;
  373         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  374                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  375         return (0);
  376 bad2:
  377         sbrelease(&so->so_snd, so);
  378 bad:
  379         return (ENOBUFS);
  380 }
  381 
  382 static int
  383 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
  384 {
  385         int error = 0;
  386         u_long old_sb_max = sb_max;
  387 
  388         error = SYSCTL_OUT(req, arg1, sizeof(u_long));
  389         if (error || !req->newptr)
  390                 return (error);
  391         error = SYSCTL_IN(req, arg1, sizeof(u_long));
  392         if (error)
  393                 return (error);
  394         if (sb_max < MSIZE + MCLBYTES) {
  395                 sb_max = old_sb_max;
  396                 return (EINVAL);
  397         }
  398         sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
  399         return (0);
  400 }
  401         
  402 /*
  403  * Allot mbufs to a sockbuf.
  404  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  405  * if buffering efficiency is near the normal case.
  406  */
  407 int
  408 sbreserve(sb, cc, so, td)
  409         struct sockbuf *sb;
  410         u_long cc;
  411         struct socket *so;
  412         struct thread *td;
  413 {
  414 
  415         /*
  416          * td will only be NULL when we're in an interrupt
  417          * (e.g. in tcp_input())
  418          */
  419         if (cc > sb_max_adj)
  420                 return (0);
  421         if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
  422             td ? td->td_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur : RLIM_INFINITY)) {
  423                 return (0);
  424         }
  425         sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
  426         if (sb->sb_lowat > sb->sb_hiwat)
  427                 sb->sb_lowat = sb->sb_hiwat;
  428         return (1);
  429 }
  430 
  431 /*
  432  * Free mbufs held by a socket, and reserved mbuf space.
  433  */
  434 void
  435 sbrelease(sb, so)
  436         struct sockbuf *sb;
  437         struct socket *so;
  438 {
  439 
  440         sbflush(sb);
  441         (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
  442             RLIM_INFINITY);
  443         sb->sb_mbmax = 0;
  444 }
  445 
  446 /*
  447  * Routines to add and remove
  448  * data from an mbuf queue.
  449  *
  450  * The routines sbappend() or sbappendrecord() are normally called to
  451  * append new mbufs to a socket buffer, after checking that adequate
  452  * space is available, comparing the function sbspace() with the amount
  453  * of data to be added.  sbappendrecord() differs from sbappend() in
  454  * that data supplied is treated as the beginning of a new record.
  455  * To place a sender's address, optional access rights, and data in a
  456  * socket receive buffer, sbappendaddr() should be used.  To place
  457  * access rights and data in a socket receive buffer, sbappendrights()
  458  * should be used.  In either case, the new data begins a new record.
  459  * Note that unlike sbappend() and sbappendrecord(), these routines check
  460  * for the caller that there will be enough space to store the data.
  461  * Each fails if there is not enough space, or if it cannot find mbufs
  462  * to store additional information in.
  463  *
  464  * Reliable protocols may use the socket send buffer to hold data
  465  * awaiting acknowledgement.  Data is normally copied from a socket
  466  * send buffer in a protocol with m_copy for output to a peer,
  467  * and then removing the data from the socket buffer with sbdrop()
  468  * or sbdroprecord() when the data is acknowledged by the peer.
  469  */
  470 
  471 #ifdef SOCKBUF_DEBUG
  472 void
  473 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
  474 {
  475         struct mbuf *m = sb->sb_mb;
  476 
  477         while (m && m->m_nextpkt)
  478                 m = m->m_nextpkt;
  479 
  480         if (m != sb->sb_lastrecord) {
  481                 printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
  482                         __func__, sb->sb_mb, sb->sb_lastrecord, m);
  483                 printf("packet chain:\n");
  484                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  485                         printf("\t%p\n", m);
  486                 panic("%s from %s:%u", __func__, file, line);
  487         }
  488 }
  489 
  490 void
  491 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
  492 {
  493         struct mbuf *m = sb->sb_mb;
  494         struct mbuf *n;
  495 
  496         while (m && m->m_nextpkt)
  497                 m = m->m_nextpkt;
  498 
  499         while (m && m->m_next)
  500                 m = m->m_next;
  501 
  502         if (m != sb->sb_mbtail) {
  503                 printf("%s: sb_mb %p sb_mbtail %p last %p\n",
  504                         __func__, sb->sb_mb, sb->sb_mbtail, m);
  505                 printf("packet tree:\n");
  506                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  507                         printf("\t");
  508                         for (n = m; n != NULL; n = n->m_next)
  509                                 printf("%p ", n);
  510                         printf("\n");
  511                 }
  512                 panic("%s from %s:%u", __func__, file, line);
  513         }
  514 }
  515 #endif /* SOCKBUF_DEBUG */
  516 
  517 #define SBLINKRECORD(sb, m0) do {                                       \
  518         if ((sb)->sb_lastrecord != NULL)                                \
  519                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  520         else                                                            \
  521                 (sb)->sb_mb = (m0);                                     \
  522         (sb)->sb_lastrecord = (m0);                                     \
  523 } while (/*CONSTCOND*/0)
  524 
  525 /*
  526  * Append mbuf chain m to the last record in the
  527  * socket buffer sb.  The additional space associated
  528  * the mbuf chain is recorded in sb.  Empty mbufs are
  529  * discarded and mbufs are compacted where possible.
  530  */
  531 void
  532 sbappend(sb, m)
  533         struct sockbuf *sb;
  534         struct mbuf *m;
  535 {
  536         register struct mbuf *n;
  537 
  538         if (m == 0)
  539                 return;
  540         SBLASTRECORDCHK(sb);
  541         n = sb->sb_mb;
  542         if (n) {
  543                 while (n->m_nextpkt)
  544                         n = n->m_nextpkt;
  545                 do {
  546                         if (n->m_flags & M_EOR) {
  547                                 sbappendrecord(sb, m); /* XXXXXX!!!! */
  548                                 return;
  549                         }
  550                 } while (n->m_next && (n = n->m_next));
  551         } else {
  552                 /*
  553                  * XXX Would like to simply use sb_mbtail here, but
  554                  * XXX I need to verify that I won't miss an EOR that
  555                  * XXX way.
  556                  */
  557                 if ((n = sb->sb_lastrecord) != NULL) {
  558                         do {
  559                                 if (n->m_flags & M_EOR) {
  560                                         sbappendrecord(sb, m); /* XXXXXX!!!! */
  561                                         return;
  562                                 }
  563                         } while (n->m_next && (n = n->m_next));
  564                 } else {
  565                         /*
  566                          * If this is the first record in the socket buffer,
  567                          * it's also the last record.
  568                          */
  569                         sb->sb_lastrecord = m;
  570                 }
  571         }
  572         sbcompress(sb, m, n);
  573         SBLASTRECORDCHK(sb);
  574 }
  575 
  576 /*
  577  * This version of sbappend() should only be used when the caller
  578  * absolutely knows that there will never be more than one record
  579  * in the socket buffer, that is, a stream protocol (such as TCP).
  580  */
  581 void
  582 sbappendstream(struct sockbuf *sb, struct mbuf *m)
  583 {
  584 
  585         KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
  586         KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
  587 
  588         SBLASTMBUFCHK(sb);
  589 
  590 #ifdef MBUFTRACE
  591         m_claim(m, sb->sb_mowner);
  592 #endif
  593 
  594         sbcompress(sb, m, sb->sb_mbtail);
  595 
  596         sb->sb_lastrecord = sb->sb_mb;
  597         SBLASTRECORDCHK(sb);
  598 }
  599 
  600 #ifdef SOCKBUF_DEBUG
  601 void
  602 sbcheck(sb)
  603         struct sockbuf *sb;
  604 {
  605         struct mbuf *m;
  606         struct mbuf *n = 0;
  607         u_long len = 0, mbcnt = 0;
  608 
  609         for (m = sb->sb_mb; m; m = n) {
  610             n = m->m_nextpkt;
  611             for (; m; m = m->m_next) {
  612                 len += m->m_len;
  613                 mbcnt += MSIZE;
  614                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
  615                         mbcnt += m->m_ext.ext_size;
  616             }
  617         }
  618         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  619                 printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
  620                     mbcnt, sb->sb_mbcnt);
  621                 panic("sbcheck");
  622         }
  623 }
  624 #endif
  625 
  626 /*
  627  * As above, except the mbuf chain
  628  * begins a new record.
  629  */
  630 void
  631 sbappendrecord(sb, m0)
  632         register struct sockbuf *sb;
  633         register struct mbuf *m0;
  634 {
  635         register struct mbuf *m;
  636 
  637         if (m0 == 0)
  638                 return;
  639         m = sb->sb_mb;
  640         if (m)
  641                 while (m->m_nextpkt)
  642                         m = m->m_nextpkt;
  643         /*
  644          * Put the first mbuf on the queue.
  645          * Note this permits zero length records.
  646          */
  647         sballoc(sb, m0);
  648         SBLASTRECORDCHK(sb);
  649         SBLINKRECORD(sb, m0);
  650         if (m)
  651                 m->m_nextpkt = m0;
  652         else
  653                 sb->sb_mb = m0;
  654         m = m0->m_next;
  655         m0->m_next = 0;
  656         if (m && (m0->m_flags & M_EOR)) {
  657                 m0->m_flags &= ~M_EOR;
  658                 m->m_flags |= M_EOR;
  659         }
  660         sbcompress(sb, m, m0);
  661 }
  662 
  663 /*
  664  * As above except that OOB data
  665  * is inserted at the beginning of the sockbuf,
  666  * but after any other OOB data.
  667  */
  668 void
  669 sbinsertoob(sb, m0)
  670         register struct sockbuf *sb;
  671         register struct mbuf *m0;
  672 {
  673         register struct mbuf *m;
  674         register struct mbuf **mp;
  675 
  676         if (m0 == 0)
  677                 return;
  678         for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
  679             m = *mp;
  680             again:
  681                 switch (m->m_type) {
  682 
  683                 case MT_OOBDATA:
  684                         continue;               /* WANT next train */
  685 
  686                 case MT_CONTROL:
  687                         m = m->m_next;
  688                         if (m)
  689                                 goto again;     /* inspect THIS train further */
  690                 }
  691                 break;
  692         }
  693         /*
  694          * Put the first mbuf on the queue.
  695          * Note this permits zero length records.
  696          */
  697         sballoc(sb, m0);
  698         m0->m_nextpkt = *mp;
  699         *mp = m0;
  700         m = m0->m_next;
  701         m0->m_next = 0;
  702         if (m && (m0->m_flags & M_EOR)) {
  703                 m0->m_flags &= ~M_EOR;
  704                 m->m_flags |= M_EOR;
  705         }
  706         sbcompress(sb, m, m0);
  707 }
  708 
  709 /*
  710  * Append address and data, and optionally, control (ancillary) data
  711  * to the receive queue of a socket.  If present,
  712  * m0 must include a packet header with total length.
  713  * Returns 0 if no space in sockbuf or insufficient mbufs.
  714  */
  715 int
  716 sbappendaddr(sb, asa, m0, control)
  717         struct sockbuf *sb;
  718         struct sockaddr *asa;
  719         struct mbuf *m0, *control;
  720 {
  721         struct mbuf *m, *n, *nlast;
  722         int space = asa->sa_len;
  723 
  724         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
  725                 panic("sbappendaddr");
  726         if (m0)
  727                 space += m0->m_pkthdr.len;
  728         space += m_length(control, &n);
  729         if (space > sbspace(sb))
  730                 return (0);
  731 #if MSIZE <= 256
  732         if (asa->sa_len > MLEN)
  733                 return (0);
  734 #endif
  735         MGET(m, M_DONTWAIT, MT_SONAME);
  736         if (m == 0)
  737                 return (0);
  738         m->m_len = asa->sa_len;
  739         bcopy(asa, mtod(m, caddr_t), asa->sa_len);
  740         if (n)
  741                 n->m_next = m0;         /* concatenate data to control */
  742         else
  743                 control = m0;
  744         m->m_next = control;
  745         for (n = m; n->m_next != NULL; n = n->m_next)
  746                 sballoc(sb, n);
  747         sballoc(sb, n);
  748         nlast = n;
  749         SBLINKRECORD(sb, m);
  750 
  751         sb->sb_mbtail = nlast;
  752         SBLASTMBUFCHK(sb);
  753 
  754         SBLASTRECORDCHK(sb);
  755         return (1);
  756 }
  757 
  758 int
  759 sbappendcontrol(sb, m0, control)
  760         struct sockbuf *sb;
  761         struct mbuf *control, *m0;
  762 {
  763         struct mbuf *m, *n, *mlast;
  764         int space;
  765 
  766         if (control == 0)
  767                 panic("sbappendcontrol");
  768         space = m_length(control, &n) + m_length(m0, NULL);
  769         if (space > sbspace(sb))
  770                 return (0);
  771         n->m_next = m0;                 /* concatenate data to control */
  772 
  773         SBLASTRECORDCHK(sb);
  774 
  775         for (m = control; m->m_next; m = m->m_next)
  776                 sballoc(sb, m);
  777         sballoc(sb, m);
  778         mlast = m;
  779         SBLINKRECORD(sb, control);
  780 
  781         sb->sb_mbtail = mlast;
  782         SBLASTMBUFCHK(sb);
  783 
  784         SBLASTRECORDCHK(sb);
  785         return (1);
  786 }
  787 
  788 /*
  789  * Compress mbuf chain m into the socket
  790  * buffer sb following mbuf n.  If n
  791  * is null, the buffer is presumed empty.
  792  */
  793 void
  794 sbcompress(sb, m, n)
  795         register struct sockbuf *sb;
  796         register struct mbuf *m, *n;
  797 {
  798         register int eor = 0;
  799         register struct mbuf *o;
  800 
  801         while (m) {
  802                 eor |= m->m_flags & M_EOR;
  803                 if (m->m_len == 0 &&
  804                     (eor == 0 ||
  805                      (((o = m->m_next) || (o = n)) &&
  806                       o->m_type == m->m_type))) {
  807                         if (sb->sb_lastrecord == m)
  808                                 sb->sb_lastrecord = m->m_next;
  809                         m = m_free(m);
  810                         continue;
  811                 }
  812                 if (n && (n->m_flags & M_EOR) == 0 &&
  813                     M_WRITABLE(n) &&
  814                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
  815                     m->m_len <= M_TRAILINGSPACE(n) &&
  816                     n->m_type == m->m_type) {
  817                         bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
  818                             (unsigned)m->m_len);
  819                         n->m_len += m->m_len;
  820                         sb->sb_cc += m->m_len;
  821                         if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
  822                             m->m_type != MT_OOBDATA)
  823                                 /* XXX: Probably don't need.*/
  824                                 sb->sb_ctl += m->m_len;
  825                         m = m_free(m);
  826                         continue;
  827                 }
  828                 if (n)
  829                         n->m_next = m;
  830                 else
  831                         sb->sb_mb = m;
  832                 sb->sb_mbtail = m;
  833                 sballoc(sb, m);
  834                 n = m;
  835                 m->m_flags &= ~M_EOR;
  836                 m = m->m_next;
  837                 n->m_next = 0;
  838         }
  839         if (eor) {
  840                 if (n)
  841                         n->m_flags |= eor;
  842                 else
  843                         printf("semi-panic: sbcompress\n");
  844         }
  845         SBLASTMBUFCHK(sb);
  846 }
  847 
  848 /*
  849  * Free all mbufs in a sockbuf.
  850  * Check that all resources are reclaimed.
  851  */
  852 void
  853 sbflush(sb)
  854         register struct sockbuf *sb;
  855 {
  856 
  857         if (sb->sb_flags & SB_LOCK)
  858                 panic("sbflush: locked");
  859         while (sb->sb_mbcnt) {
  860                 /*
  861                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
  862                  * we would loop forever. Panic instead.
  863                  */
  864                 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
  865                         break;
  866                 sbdrop(sb, (int)sb->sb_cc);
  867         }
  868         if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
  869                 panic("sbflush: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
  870 }
  871 
  872 /*
  873  * Drop data from (the front of) a sockbuf.
  874  */
  875 void
  876 sbdrop(sb, len)
  877         register struct sockbuf *sb;
  878         register int len;
  879 {
  880         register struct mbuf *m;
  881         struct mbuf *next;
  882 
  883         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
  884         while (len > 0) {
  885                 if (m == 0) {
  886                         if (next == 0)
  887                                 panic("sbdrop");
  888                         m = next;
  889                         next = m->m_nextpkt;
  890                         continue;
  891                 }
  892                 if (m->m_len > len) {
  893                         m->m_len -= len;
  894                         m->m_data += len;
  895                         sb->sb_cc -= len;
  896                         if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
  897                             m->m_type != MT_OOBDATA)
  898                                 sb->sb_ctl -= len;
  899                         break;
  900                 }
  901                 len -= m->m_len;
  902                 sbfree(sb, m);
  903                 m = m_free(m);
  904         }
  905         while (m && m->m_len == 0) {
  906                 sbfree(sb, m);
  907                 m = m_free(m);
  908         }
  909         if (m) {
  910                 sb->sb_mb = m;
  911                 m->m_nextpkt = next;
  912         } else
  913                 sb->sb_mb = next;
  914         /*
  915          * First part is an inline SB_EMPTY_FIXUP().  Second part
  916          * makes sure sb_lastrecord is up-to-date if we dropped
  917          * part of the last record.
  918          */
  919         m = sb->sb_mb;
  920         if (m == NULL) {
  921                 sb->sb_mbtail = NULL;
  922                 sb->sb_lastrecord = NULL;
  923         } else if (m->m_nextpkt == NULL) {
  924                 sb->sb_lastrecord = m;
  925         }
  926 }
  927 
  928 /*
  929  * Drop a record off the front of a sockbuf
  930  * and move the next record to the front.
  931  */
  932 void
  933 sbdroprecord(sb)
  934         register struct sockbuf *sb;
  935 {
  936         register struct mbuf *m;
  937 
  938         m = sb->sb_mb;
  939         if (m) {
  940                 sb->sb_mb = m->m_nextpkt;
  941                 do {
  942                         sbfree(sb, m);
  943                         m = m_free(m);
  944                 } while (m);
  945         }
  946         SB_EMPTY_FIXUP(sb);
  947 }
  948 
  949 /*
  950  * Create a "control" mbuf containing the specified data
  951  * with the specified type for presentation on a socket buffer.
  952  */
  953 struct mbuf *
  954 sbcreatecontrol(p, size, type, level)
  955         caddr_t p;
  956         register int size;
  957         int type, level;
  958 {
  959         register struct cmsghdr *cp;
  960         struct mbuf *m;
  961 
  962         if (CMSG_SPACE((u_int)size) > MCLBYTES)
  963                 return ((struct mbuf *) NULL);
  964         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
  965                 return ((struct mbuf *) NULL);
  966         if (CMSG_SPACE((u_int)size) > MLEN) {
  967                 MCLGET(m, M_DONTWAIT);
  968                 if ((m->m_flags & M_EXT) == 0) {
  969                         m_free(m);
  970                         return ((struct mbuf *) NULL);
  971                 }
  972         }
  973         cp = mtod(m, struct cmsghdr *);
  974         m->m_len = 0;
  975         KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
  976             ("sbcreatecontrol: short mbuf"));
  977         if (p != NULL)
  978                 (void)memcpy(CMSG_DATA(cp), p, size);
  979         m->m_len = CMSG_SPACE(size);
  980         cp->cmsg_len = CMSG_LEN(size);
  981         cp->cmsg_level = level;
  982         cp->cmsg_type = type;
  983         return (m);
  984 }
  985 
  986 /*
  987  * Some routines that return EOPNOTSUPP for entry points that are not
  988  * supported by a protocol.  Fill in as needed.
  989  */
  990 int
  991 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
  992 {
  993         return EOPNOTSUPP;
  994 }
  995 
  996 int
  997 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
  998 {
  999         return EOPNOTSUPP;
 1000 }
 1001 
 1002 int
 1003 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 1004 {
 1005         return EOPNOTSUPP;
 1006 }
 1007 
 1008 int
 1009 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
 1010                     struct ifnet *ifp, struct thread *td)
 1011 {
 1012         return EOPNOTSUPP;
 1013 }
 1014 
 1015 int
 1016 pru_listen_notsupp(struct socket *so, struct thread *td)
 1017 {
 1018         return EOPNOTSUPP;
 1019 }
 1020 
 1021 int
 1022 pru_rcvd_notsupp(struct socket *so, int flags)
 1023 {
 1024         return EOPNOTSUPP;
 1025 }
 1026 
 1027 int
 1028 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 1029 {
 1030         return EOPNOTSUPP;
 1031 }
 1032 
 1033 /*
 1034  * This isn't really a ``null'' operation, but it's the default one
 1035  * and doesn't do anything destructive.
 1036  */
 1037 int
 1038 pru_sense_null(struct socket *so, struct stat *sb)
 1039 {
 1040         sb->st_blksize = so->so_snd.sb_hiwat;
 1041         return 0;
 1042 }
 1043 
 1044 /*
 1045  * For protocol types that don't keep cached copies of labels in their
 1046  * pcbs, provide a null sosetlabel that does a NOOP.
 1047  */
 1048 void
 1049 pru_sosetlabel_null(struct socket *so)
 1050 {
 1051 
 1052 }
 1053 
 1054 /*
 1055  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
 1056  */
 1057 struct sockaddr *
 1058 dup_sockaddr(sa, canwait)
 1059         struct sockaddr *sa;
 1060         int canwait;
 1061 {
 1062         struct sockaddr *sa2;
 1063 
 1064         MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME, 
 1065                canwait ? M_WAITOK : M_NOWAIT);
 1066         if (sa2)
 1067                 bcopy(sa, sa2, sa->sa_len);
 1068         return sa2;
 1069 }
 1070 
 1071 /*
 1072  * Create an external-format (``xsocket'') structure using the information
 1073  * in the kernel-format socket structure pointed to by so.  This is done
 1074  * to reduce the spew of irrelevant information over this interface,
 1075  * to isolate user code from changes in the kernel structure, and
 1076  * potentially to provide information-hiding if we decide that
 1077  * some of this information should be hidden from users.
 1078  */
 1079 void
 1080 sotoxsocket(struct socket *so, struct xsocket *xso)
 1081 {
 1082         xso->xso_len = sizeof *xso;
 1083         xso->xso_so = so;
 1084         xso->so_type = so->so_type;
 1085         xso->so_options = so->so_options;
 1086         xso->so_linger = so->so_linger;
 1087         xso->so_state = so->so_state;
 1088         xso->so_pcb = so->so_pcb;
 1089         xso->xso_protocol = so->so_proto->pr_protocol;
 1090         xso->xso_family = so->so_proto->pr_domain->dom_family;
 1091         xso->so_qlen = so->so_qlen;
 1092         xso->so_incqlen = so->so_incqlen;
 1093         xso->so_qlimit = so->so_qlimit;
 1094         xso->so_timeo = so->so_timeo;
 1095         xso->so_error = so->so_error;
 1096         xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 1097         xso->so_oobmark = so->so_oobmark;
 1098         sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 1099         sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 1100         xso->so_uid = so->so_cred->cr_uid;
 1101 }
 1102 
 1103 /*
 1104  * This does the same for sockbufs.  Note that the xsockbuf structure,
 1105  * since it is always embedded in a socket, does not include a self
 1106  * pointer nor a length.  We make this entry point public in case
 1107  * some other mechanism needs it.
 1108  */
 1109 void
 1110 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 1111 {
 1112         xsb->sb_cc = sb->sb_cc;
 1113         xsb->sb_hiwat = sb->sb_hiwat;
 1114         xsb->sb_mbcnt = sb->sb_mbcnt;
 1115         xsb->sb_mbmax = sb->sb_mbmax;
 1116         xsb->sb_lowat = sb->sb_lowat;
 1117         xsb->sb_flags = sb->sb_flags;
 1118         xsb->sb_timeo = sb->sb_timeo;
 1119 }
 1120 
 1121 /*
 1122  * Here is the definition of some of the basic objects in the kern.ipc
 1123  * branch of the MIB.
 1124  */
 1125 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
 1126 
 1127 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 1128 static int dummy;
 1129 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
 1130 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW, 
 1131     &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
 1132 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RDTUN, 
 1133     &maxsockets, 0, "Maximum number of sockets avaliable");
 1134 SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
 1135     &sb_efficiency, 0, "");
 1136 
 1137 /*
 1138  * Initialise maxsockets 
 1139  */
 1140 static void init_maxsockets(void *ignored)
 1141 {
 1142         TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
 1143         maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
 1144 }
 1145 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);

Cache object: 48d7b9ca580f1796c9af986b4d6c8f24


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.