The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket2.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: uipc_socket2.c,v 1.134 2023/01/27 18:46:34 mvs Exp $  */
    2 /*      $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $       */
    3 
    4 /*
    5  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
   33  */
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/malloc.h>
   38 #include <sys/mbuf.h>
   39 #include <sys/protosw.h>
   40 #include <sys/domain.h>
   41 #include <sys/socket.h>
   42 #include <sys/socketvar.h>
   43 #include <sys/signalvar.h>
   44 #include <sys/event.h>
   45 #include <sys/pool.h>
   46 
   47 /*
   48  * Primitive routines for operating on sockets and socket buffers
   49  */
   50 
   51 u_long  sb_max = SB_MAX;                /* patchable */
   52 
   53 extern struct pool mclpools[];
   54 extern struct pool mbpool;
   55 
   56 /*
   57  * Procedures to manipulate state flags of socket
   58  * and do appropriate wakeups.  Normal sequence from the
   59  * active (originating) side is that soisconnecting() is
   60  * called during processing of connect() call,
   61  * resulting in an eventual call to soisconnected() if/when the
   62  * connection is established.  When the connection is torn down
   63  * soisdisconnecting() is called during processing of disconnect() call,
   64  * and soisdisconnected() is called when the connection to the peer
   65  * is totally severed.  The semantics of these routines are such that
   66  * connectionless protocols can call soisconnected() and soisdisconnected()
   67  * only, bypassing the in-progress calls when setting up a ``connection''
   68  * takes no time.
   69  *
   70  * From the passive side, a socket is created with
   71  * two queues of sockets: so_q0 for connections in progress
   72  * and so_q for connections already made and awaiting user acceptance.
   73  * As a protocol is preparing incoming connections, it creates a socket
   74  * structure queued on so_q0 by calling sonewconn().  When the connection
   75  * is established, soisconnected() is called, and transfers the
   76  * socket structure to so_q, making it available to accept().
   77  *
   78  * If a socket is closed with sockets on either
   79  * so_q0 or so_q, these sockets are dropped.
   80  *
   81  * If higher level protocols are implemented in
   82  * the kernel, the wakeups done here will sometimes
   83  * cause software-interrupt process scheduling.
   84  */
   85 
   86 void
   87 soisconnecting(struct socket *so)
   88 {
   89         soassertlocked(so);
   90         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
   91         so->so_state |= SS_ISCONNECTING;
   92 }
   93 
   94 void
   95 soisconnected(struct socket *so)
   96 {
   97         struct socket *head = so->so_head;
   98 
   99         soassertlocked(so);
  100         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
  101         so->so_state |= SS_ISCONNECTED;
  102 
  103         if (head != NULL && so->so_onq == &head->so_q0) {
  104                 int persocket = solock_persocket(so);
  105 
  106                 if (persocket) {
  107                         soref(so);
  108                         soref(head);
  109 
  110                         sounlock(so);
  111                         solock(head);
  112                         solock(so);
  113 
  114                         if (so->so_onq != &head->so_q0) {
  115                                 sounlock(head);
  116                                 sorele(head);
  117                                 sorele(so);
  118 
  119                                 return;
  120                         }
  121 
  122                         sorele(head);
  123                         sorele(so);
  124                 }
  125 
  126                 soqremque(so, 0);
  127                 soqinsque(head, so, 1);
  128                 sorwakeup(head);
  129                 wakeup_one(&head->so_timeo);
  130 
  131                 if (persocket)
  132                         sounlock(head);
  133         } else {
  134                 wakeup(&so->so_timeo);
  135                 sorwakeup(so);
  136                 sowwakeup(so);
  137         }
  138 }
  139 
  140 void
  141 soisdisconnecting(struct socket *so)
  142 {
  143         soassertlocked(so);
  144         so->so_state &= ~SS_ISCONNECTING;
  145         so->so_state |= SS_ISDISCONNECTING;
  146         so->so_rcv.sb_state |= SS_CANTRCVMORE;
  147         so->so_snd.sb_state |= SS_CANTSENDMORE;
  148         wakeup(&so->so_timeo);
  149         sowwakeup(so);
  150         sorwakeup(so);
  151 }
  152 
  153 void
  154 soisdisconnected(struct socket *so)
  155 {
  156         soassertlocked(so);
  157         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
  158         so->so_state |= SS_ISDISCONNECTED;
  159         so->so_rcv.sb_state |= SS_CANTRCVMORE;
  160         so->so_snd.sb_state |= SS_CANTSENDMORE;
  161         wakeup(&so->so_timeo);
  162         sowwakeup(so);
  163         sorwakeup(so);
  164 }
  165 
  166 /*
  167  * When an attempt at a new connection is noted on a socket
  168  * which accepts connections, sonewconn is called.  If the
  169  * connection is possible (subject to space constraints, etc.)
  170  * then we allocate a new structure, properly linked into the
  171  * data structure of the original socket, and return this.
  172  * Connstatus may be 0 or SS_ISCONNECTED.
  173  */
  174 struct socket *
  175 sonewconn(struct socket *head, int connstatus, int wait)
  176 {
  177         struct socket *so;
  178         int persocket = solock_persocket(head);
  179         int error;
  180 
  181         /*
  182          * XXXSMP as long as `so' and `head' share the same lock, we
  183          * can call soreserve() and pr_attach() below w/o explicitly
  184          * locking `so'.
  185          */
  186         soassertlocked(head);
  187 
  188         if (m_pool_used() > 95)
  189                 return (NULL);
  190         if (head->so_qlen + head->so_q0len > head->so_qlimit * 3)
  191                 return (NULL);
  192         so = soalloc(wait);
  193         if (so == NULL)
  194                 return (NULL);
  195         so->so_type = head->so_type;
  196         so->so_options = head->so_options &~ SO_ACCEPTCONN;
  197         so->so_linger = head->so_linger;
  198         so->so_state = head->so_state | SS_NOFDREF;
  199         so->so_proto = head->so_proto;
  200         so->so_timeo = head->so_timeo;
  201         so->so_euid = head->so_euid;
  202         so->so_ruid = head->so_ruid;
  203         so->so_egid = head->so_egid;
  204         so->so_rgid = head->so_rgid;
  205         so->so_cpid = head->so_cpid;
  206 
  207         /*
  208          * Lock order will be `head' -> `so' while these sockets are linked.
  209          */
  210         if (persocket)
  211                 solock(so);
  212 
  213         /*
  214          * Inherit watermarks but those may get clamped in low mem situations.
  215          */
  216         if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
  217                 if (persocket)
  218                         sounlock(so);
  219                 pool_put(&socket_pool, so);
  220                 return (NULL);
  221         }
  222         so->so_snd.sb_wat = head->so_snd.sb_wat;
  223         so->so_snd.sb_lowat = head->so_snd.sb_lowat;
  224         so->so_snd.sb_timeo_nsecs = head->so_snd.sb_timeo_nsecs;
  225         so->so_rcv.sb_wat = head->so_rcv.sb_wat;
  226         so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
  227         so->so_rcv.sb_timeo_nsecs = head->so_rcv.sb_timeo_nsecs;
  228 
  229         klist_init(&so->so_rcv.sb_klist, &socket_klistops, so);
  230         klist_init(&so->so_snd.sb_klist, &socket_klistops, so);
  231         sigio_init(&so->so_sigio);
  232         sigio_copy(&so->so_sigio, &head->so_sigio);
  233 
  234         soqinsque(head, so, 0);
  235 
  236         /*
  237          * We need to unlock `head' because PCB layer could release
  238          * solock() to enforce desired lock order.
  239          */
  240         if (persocket) {
  241                 head->so_newconn++;
  242                 sounlock(head);
  243         }
  244 
  245         error = pru_attach(so, 0, wait);
  246 
  247         if (persocket) {
  248                 sounlock(so);
  249                 solock(head);
  250                 solock(so);
  251 
  252                 if ((head->so_newconn--) == 0) {
  253                         if ((head->so_state & SS_NEWCONN_WAIT) != 0) {
  254                                 head->so_state &= ~SS_NEWCONN_WAIT;
  255                                 wakeup(&head->so_newconn);
  256                         }
  257                 }
  258         }
  259 
  260         if (error) {
  261                 soqremque(so, 0);
  262                 if (persocket)
  263                         sounlock(so);
  264                 sigio_free(&so->so_sigio);
  265                 klist_free(&so->so_rcv.sb_klist);
  266                 klist_free(&so->so_snd.sb_klist);
  267                 pool_put(&socket_pool, so);
  268                 return (NULL);
  269         }
  270 
  271         if (connstatus) {
  272                 so->so_state |= connstatus;
  273                 soqremque(so, 0);
  274                 soqinsque(head, so, 1);
  275                 sorwakeup(head);
  276                 wakeup(&head->so_timeo);
  277         }
  278 
  279         if (persocket)
  280                 sounlock(so);
  281 
  282         return (so);
  283 }
  284 
  285 void
  286 soqinsque(struct socket *head, struct socket *so, int q)
  287 {
  288         soassertlocked(head);
  289         soassertlocked(so);
  290 
  291         KASSERT(so->so_onq == NULL);
  292 
  293         so->so_head = head;
  294         if (q == 0) {
  295                 head->so_q0len++;
  296                 so->so_onq = &head->so_q0;
  297         } else {
  298                 head->so_qlen++;
  299                 so->so_onq = &head->so_q;
  300         }
  301         TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
  302 }
  303 
  304 int
  305 soqremque(struct socket *so, int q)
  306 {
  307         struct socket *head = so->so_head;
  308 
  309         soassertlocked(so);
  310         soassertlocked(head);
  311 
  312         if (q == 0) {
  313                 if (so->so_onq != &head->so_q0)
  314                         return (0);
  315                 head->so_q0len--;
  316         } else {
  317                 if (so->so_onq != &head->so_q)
  318                         return (0);
  319                 head->so_qlen--;
  320         }
  321         TAILQ_REMOVE(so->so_onq, so, so_qe);
  322         so->so_onq = NULL;
  323         so->so_head = NULL;
  324         return (1);
  325 }
  326 
  327 /*
  328  * Socantsendmore indicates that no more data will be sent on the
  329  * socket; it would normally be applied to a socket when the user
  330  * informs the system that no more data is to be sent, by the protocol
  331  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  332  * will be received, and will normally be applied to the socket by a
  333  * protocol when it detects that the peer will send no more data.
  334  * Data queued for reading in the socket may yet be read.
  335  */
  336 
  337 void
  338 socantsendmore(struct socket *so)
  339 {
  340         soassertlocked(so);
  341         so->so_snd.sb_state |= SS_CANTSENDMORE;
  342         sowwakeup(so);
  343 }
  344 
  345 void
  346 socantrcvmore(struct socket *so)
  347 {
  348         soassertlocked(so);
  349         so->so_rcv.sb_state |= SS_CANTRCVMORE;
  350         sorwakeup(so);
  351 }
  352 
  353 void
  354 solock(struct socket *so)
  355 {
  356         switch (so->so_proto->pr_domain->dom_family) {
  357         case PF_INET:
  358         case PF_INET6:
  359                 NET_LOCK();
  360                 break;
  361         default:
  362                 rw_enter_write(&so->so_lock);
  363                 break;
  364         }
  365 }
  366 
  367 void
  368 solock_shared(struct socket *so)
  369 {
  370         switch (so->so_proto->pr_domain->dom_family) {
  371         case PF_INET:
  372         case PF_INET6:
  373                 if (so->so_proto->pr_usrreqs->pru_lock != NULL) {
  374                         NET_LOCK_SHARED();
  375                         pru_lock(so);
  376                 } else
  377                         NET_LOCK();
  378                 break;
  379         default:
  380                 rw_enter_write(&so->so_lock);
  381                 break;
  382         }
  383 }
  384 
  385 int
  386 solock_persocket(struct socket *so)
  387 {
  388         switch (so->so_proto->pr_domain->dom_family) {
  389         case PF_INET:
  390         case PF_INET6:
  391                 return 0;
  392         default:
  393                 return 1;
  394         }
  395 }
  396 
  397 void
  398 solock_pair(struct socket *so1, struct socket *so2)
  399 {
  400         KASSERT(so1 != so2);
  401         KASSERT(so1->so_type == so2->so_type);
  402         KASSERT(solock_persocket(so1));
  403 
  404         if (so1 < so2) {
  405                 solock(so1);
  406                 solock(so2);
  407         } else {
  408                 solock(so2);
  409                 solock(so1);
  410         }
  411 }
  412 
  413 void
  414 sounlock(struct socket *so)
  415 {
  416         switch (so->so_proto->pr_domain->dom_family) {
  417         case PF_INET:
  418         case PF_INET6:
  419                 NET_UNLOCK();
  420                 break;
  421         default:
  422                 rw_exit_write(&so->so_lock);
  423                 break;
  424         }
  425 }
  426 
  427 void
  428 sounlock_shared(struct socket *so)
  429 {
  430         switch (so->so_proto->pr_domain->dom_family) {
  431         case PF_INET:
  432         case PF_INET6:
  433                 if (so->so_proto->pr_usrreqs->pru_unlock != NULL) {
  434                         pru_unlock(so);
  435                         NET_UNLOCK_SHARED();
  436                 } else
  437                         NET_UNLOCK();
  438                 break;
  439         default:
  440                 rw_exit_write(&so->so_lock);
  441                 break;
  442         }
  443 }
  444 
  445 void
  446 soassertlocked(struct socket *so)
  447 {
  448         switch (so->so_proto->pr_domain->dom_family) {
  449         case PF_INET:
  450         case PF_INET6:
  451                 NET_ASSERT_LOCKED();
  452                 break;
  453         default:
  454                 rw_assert_wrlock(&so->so_lock);
  455                 break;
  456         }
  457 }
  458 
  459 int
  460 sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg,
  461     uint64_t nsecs)
  462 {
  463         int ret;
  464 
  465         switch (so->so_proto->pr_domain->dom_family) {
  466         case PF_INET:
  467         case PF_INET6:
  468                 if (so->so_proto->pr_usrreqs->pru_unlock != NULL &&
  469                     rw_status(&netlock) == RW_READ) {
  470                         pru_unlock(so);
  471                 }
  472                 ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
  473                 if (so->so_proto->pr_usrreqs->pru_lock != NULL &&
  474                     rw_status(&netlock) == RW_READ) {
  475                         pru_lock(so);
  476                 }
  477                 break;
  478         default:
  479                 ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs);
  480                 break;
  481         }
  482 
  483         return ret;
  484 }
  485 
  486 /*
  487  * Wait for data to arrive at/drain from a socket buffer.
  488  */
  489 int
  490 sbwait(struct socket *so, struct sockbuf *sb)
  491 {
  492         int prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
  493 
  494         soassertlocked(so);
  495 
  496         sb->sb_flags |= SB_WAIT;
  497         return sosleep_nsec(so, &sb->sb_cc, prio, "netio", sb->sb_timeo_nsecs);
  498 }
  499 
  500 int
  501 sblock(struct socket *so, struct sockbuf *sb, int wait)
  502 {
  503         int error, prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
  504 
  505         soassertlocked(so);
  506 
  507         if ((sb->sb_flags & SB_LOCK) == 0) {
  508                 sb->sb_flags |= SB_LOCK;
  509                 return (0);
  510         }
  511         if (wait & M_NOWAIT)
  512                 return (EWOULDBLOCK);
  513 
  514         while (sb->sb_flags & SB_LOCK) {
  515                 sb->sb_flags |= SB_WANT;
  516                 error = sosleep_nsec(so, &sb->sb_flags, prio, "netlck", INFSLP);
  517                 if (error)
  518                         return (error);
  519         }
  520         sb->sb_flags |= SB_LOCK;
  521         return (0);
  522 }
  523 
  524 void
  525 sbunlock(struct socket *so, struct sockbuf *sb)
  526 {
  527         soassertlocked(so);
  528 
  529         sb->sb_flags &= ~SB_LOCK;
  530         if (sb->sb_flags & SB_WANT) {
  531                 sb->sb_flags &= ~SB_WANT;
  532                 wakeup(&sb->sb_flags);
  533         }
  534 }
  535 
  536 /*
  537  * Wakeup processes waiting on a socket buffer.
  538  * Do asynchronous notification via SIGIO
  539  * if the socket buffer has the SB_ASYNC flag set.
  540  */
  541 void
  542 sowakeup(struct socket *so, struct sockbuf *sb)
  543 {
  544         soassertlocked(so);
  545 
  546         if (sb->sb_flags & SB_WAIT) {
  547                 sb->sb_flags &= ~SB_WAIT;
  548                 wakeup(&sb->sb_cc);
  549         }
  550         if (sb->sb_flags & SB_ASYNC)
  551                 pgsigio(&so->so_sigio, SIGIO, 0);
  552         KNOTE(&sb->sb_klist, 0);
  553 }
  554 
  555 /*
  556  * Socket buffer (struct sockbuf) utility routines.
  557  *
  558  * Each socket contains two socket buffers: one for sending data and
  559  * one for receiving data.  Each buffer contains a queue of mbufs,
  560  * information about the number of mbufs and amount of data in the
  561  * queue, and other fields allowing select() statements and notification
  562  * on data availability to be implemented.
  563  *
  564  * Data stored in a socket buffer is maintained as a list of records.
  565  * Each record is a list of mbufs chained together with the m_next
  566  * field.  Records are chained together with the m_nextpkt field. The upper
  567  * level routine soreceive() expects the following conventions to be
  568  * observed when placing information in the receive buffer:
  569  *
  570  * 1. If the protocol requires each message be preceded by the sender's
  571  *    name, then a record containing that name must be present before
  572  *    any associated data (mbuf's must be of type MT_SONAME).
  573  * 2. If the protocol supports the exchange of ``access rights'' (really
  574  *    just additional data associated with the message), and there are
  575  *    ``rights'' to be received, then a record containing this data
  576  *    should be present (mbuf's must be of type MT_CONTROL).
  577  * 3. If a name or rights record exists, then it must be followed by
  578  *    a data record, perhaps of zero length.
  579  *
  580  * Before using a new socket structure it is first necessary to reserve
  581  * buffer space to the socket, by calling sbreserve().  This should commit
  582  * some of the available buffer space in the system buffer pool for the
  583  * socket (currently, it does nothing but enforce limits).  The space
  584  * should be released by calling sbrelease() when the socket is destroyed.
  585  */
  586 
  587 int
  588 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
  589 {
  590         soassertlocked(so);
  591 
  592         if (sbreserve(so, &so->so_snd, sndcc))
  593                 goto bad;
  594         if (sbreserve(so, &so->so_rcv, rcvcc))
  595                 goto bad2;
  596         so->so_snd.sb_wat = sndcc;
  597         so->so_rcv.sb_wat = rcvcc;
  598         if (so->so_rcv.sb_lowat == 0)
  599                 so->so_rcv.sb_lowat = 1;
  600         if (so->so_snd.sb_lowat == 0)
  601                 so->so_snd.sb_lowat = MCLBYTES;
  602         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  603                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  604         return (0);
  605 bad2:
  606         sbrelease(so, &so->so_snd);
  607 bad:
  608         return (ENOBUFS);
  609 }
  610 
  611 /*
  612  * Allot mbufs to a sockbuf.
  613  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  614  * if buffering efficiency is near the normal case.
  615  */
  616 int
  617 sbreserve(struct socket *so, struct sockbuf *sb, u_long cc)
  618 {
  619         KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
  620         soassertlocked(so);
  621 
  622         if (cc == 0 || cc > sb_max)
  623                 return (1);
  624         sb->sb_hiwat = cc;
  625         sb->sb_mbmax = max(3 * MAXMCLBYTES, cc * 8);
  626         if (sb->sb_lowat > sb->sb_hiwat)
  627                 sb->sb_lowat = sb->sb_hiwat;
  628         return (0);
  629 }
  630 
  631 /*
  632  * In low memory situation, do not accept any greater than normal request.
  633  */
  634 int
  635 sbcheckreserve(u_long cnt, u_long defcnt)
  636 {
  637         if (cnt > defcnt && sbchecklowmem())
  638                 return (ENOBUFS);
  639         return (0);
  640 }
  641 
  642 int
  643 sbchecklowmem(void)
  644 {
  645         static int sblowmem;
  646         unsigned int used = m_pool_used();
  647 
  648         if (used < 60)
  649                 sblowmem = 0;
  650         else if (used > 80)
  651                 sblowmem = 1;
  652 
  653         return (sblowmem);
  654 }
  655 
  656 /*
  657  * Free mbufs held by a socket, and reserved mbuf space.
  658  */
  659 void
  660 sbrelease(struct socket *so, struct sockbuf *sb)
  661 {
  662 
  663         sbflush(so, sb);
  664         sb->sb_hiwat = sb->sb_mbmax = 0;
  665 }
  666 
  667 /*
  668  * Routines to add and remove
  669  * data from an mbuf queue.
  670  *
  671  * The routines sbappend() or sbappendrecord() are normally called to
  672  * append new mbufs to a socket buffer, after checking that adequate
  673  * space is available, comparing the function sbspace() with the amount
  674  * of data to be added.  sbappendrecord() differs from sbappend() in
  675  * that data supplied is treated as the beginning of a new record.
  676  * To place a sender's address, optional access rights, and data in a
  677  * socket receive buffer, sbappendaddr() should be used.  To place
  678  * access rights and data in a socket receive buffer, sbappendrights()
  679  * should be used.  In either case, the new data begins a new record.
  680  * Note that unlike sbappend() and sbappendrecord(), these routines check
  681  * for the caller that there will be enough space to store the data.
  682  * Each fails if there is not enough space, or if it cannot find mbufs
  683  * to store additional information in.
  684  *
  685  * Reliable protocols may use the socket send buffer to hold data
  686  * awaiting acknowledgement.  Data is normally copied from a socket
  687  * send buffer in a protocol with m_copym for output to a peer,
  688  * and then removing the data from the socket buffer with sbdrop()
  689  * or sbdroprecord() when the data is acknowledged by the peer.
  690  */
  691 
  692 #ifdef SOCKBUF_DEBUG
  693 void
  694 sblastrecordchk(struct sockbuf *sb, const char *where)
  695 {
  696         struct mbuf *m = sb->sb_mb;
  697 
  698         while (m && m->m_nextpkt)
  699                 m = m->m_nextpkt;
  700 
  701         if (m != sb->sb_lastrecord) {
  702                 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
  703                     sb->sb_mb, sb->sb_lastrecord, m);
  704                 printf("packet chain:\n");
  705                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  706                         printf("\t%p\n", m);
  707                 panic("sblastrecordchk from %s", where);
  708         }
  709 }
  710 
  711 void
  712 sblastmbufchk(struct sockbuf *sb, const char *where)
  713 {
  714         struct mbuf *m = sb->sb_mb;
  715         struct mbuf *n;
  716 
  717         while (m && m->m_nextpkt)
  718                 m = m->m_nextpkt;
  719 
  720         while (m && m->m_next)
  721                 m = m->m_next;
  722 
  723         if (m != sb->sb_mbtail) {
  724                 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
  725                     sb->sb_mb, sb->sb_mbtail, m);
  726                 printf("packet tree:\n");
  727                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  728                         printf("\t");
  729                         for (n = m; n != NULL; n = n->m_next)
  730                                 printf("%p ", n);
  731                         printf("\n");
  732                 }
  733                 panic("sblastmbufchk from %s", where);
  734         }
  735 }
  736 #endif /* SOCKBUF_DEBUG */
  737 
  738 #define SBLINKRECORD(sb, m0)                                            \
  739 do {                                                                    \
  740         if ((sb)->sb_lastrecord != NULL)                                \
  741                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  742         else                                                            \
  743                 (sb)->sb_mb = (m0);                                     \
  744         (sb)->sb_lastrecord = (m0);                                     \
  745 } while (/*CONSTCOND*/0)
  746 
  747 /*
  748  * Append mbuf chain m to the last record in the
  749  * socket buffer sb.  The additional space associated
  750  * the mbuf chain is recorded in sb.  Empty mbufs are
  751  * discarded and mbufs are compacted where possible.
  752  */
  753 void
  754 sbappend(struct socket *so, struct sockbuf *sb, struct mbuf *m)
  755 {
  756         struct mbuf *n;
  757 
  758         if (m == NULL)
  759                 return;
  760 
  761         soassertlocked(so);
  762         SBLASTRECORDCHK(sb, "sbappend 1");
  763 
  764         if ((n = sb->sb_lastrecord) != NULL) {
  765                 /*
  766                  * XXX Would like to simply use sb_mbtail here, but
  767                  * XXX I need to verify that I won't miss an EOR that
  768                  * XXX way.
  769                  */
  770                 do {
  771                         if (n->m_flags & M_EOR) {
  772                                 sbappendrecord(so, sb, m); /* XXXXXX!!!! */
  773                                 return;
  774                         }
  775                 } while (n->m_next && (n = n->m_next));
  776         } else {
  777                 /*
  778                  * If this is the first record in the socket buffer, it's
  779                  * also the last record.
  780                  */
  781                 sb->sb_lastrecord = m;
  782         }
  783         sbcompress(so, sb, m, n);
  784         SBLASTRECORDCHK(sb, "sbappend 2");
  785 }
  786 
  787 /*
  788  * This version of sbappend() should only be used when the caller
  789  * absolutely knows that there will never be more than one record
  790  * in the socket buffer, that is, a stream protocol (such as TCP).
  791  */
  792 void
  793 sbappendstream(struct socket *so, struct sockbuf *sb, struct mbuf *m)
  794 {
  795         KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
  796         soassertlocked(so);
  797         KDASSERT(m->m_nextpkt == NULL);
  798         KASSERT(sb->sb_mb == sb->sb_lastrecord);
  799 
  800         SBLASTMBUFCHK(sb, __func__);
  801 
  802         sbcompress(so, sb, m, sb->sb_mbtail);
  803 
  804         sb->sb_lastrecord = sb->sb_mb;
  805         SBLASTRECORDCHK(sb, __func__);
  806 }
  807 
  808 #ifdef SOCKBUF_DEBUG
  809 void
  810 sbcheck(struct socket *so, struct sockbuf *sb)
  811 {
  812         struct mbuf *m, *n;
  813         u_long len = 0, mbcnt = 0;
  814 
  815         for (m = sb->sb_mb; m; m = m->m_nextpkt) {
  816                 for (n = m; n; n = n->m_next) {
  817                         len += n->m_len;
  818                         mbcnt += MSIZE;
  819                         if (n->m_flags & M_EXT)
  820                                 mbcnt += n->m_ext.ext_size;
  821                         if (m != n && n->m_nextpkt)
  822                                 panic("sbcheck nextpkt");
  823                 }
  824         }
  825         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  826                 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
  827                     mbcnt, sb->sb_mbcnt);
  828                 panic("sbcheck");
  829         }
  830 }
  831 #endif
  832 
  833 /*
  834  * As above, except the mbuf chain
  835  * begins a new record.
  836  */
  837 void
  838 sbappendrecord(struct socket *so, struct sockbuf *sb, struct mbuf *m0)
  839 {
  840         struct mbuf *m;
  841 
  842         KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
  843         soassertlocked(so);
  844 
  845         if (m0 == NULL)
  846                 return;
  847 
  848         /*
  849          * Put the first mbuf on the queue.
  850          * Note this permits zero length records.
  851          */
  852         sballoc(so, sb, m0);
  853         SBLASTRECORDCHK(sb, "sbappendrecord 1");
  854         SBLINKRECORD(sb, m0);
  855         m = m0->m_next;
  856         m0->m_next = NULL;
  857         if (m && (m0->m_flags & M_EOR)) {
  858                 m0->m_flags &= ~M_EOR;
  859                 m->m_flags |= M_EOR;
  860         }
  861         sbcompress(so, sb, m, m0);
  862         SBLASTRECORDCHK(sb, "sbappendrecord 2");
  863 }
  864 
  865 /*
  866  * Append address and data, and optionally, control (ancillary) data
  867  * to the receive queue of a socket.  If present,
  868  * m0 must include a packet header with total length.
  869  * Returns 0 if no space in sockbuf or insufficient mbufs.
  870  */
  871 int
  872 sbappendaddr(struct socket *so, struct sockbuf *sb, const struct sockaddr *asa,
  873     struct mbuf *m0, struct mbuf *control)
  874 {
  875         struct mbuf *m, *n, *nlast;
  876         int space = asa->sa_len;
  877 
  878         soassertlocked(so);
  879 
  880         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
  881                 panic("sbappendaddr");
  882         if (m0)
  883                 space += m0->m_pkthdr.len;
  884         for (n = control; n; n = n->m_next) {
  885                 space += n->m_len;
  886                 if (n->m_next == NULL)  /* keep pointer to last control buf */
  887                         break;
  888         }
  889         if (space > sbspace(so, sb))
  890                 return (0);
  891         if (asa->sa_len > MLEN)
  892                 return (0);
  893         MGET(m, M_DONTWAIT, MT_SONAME);
  894         if (m == NULL)
  895                 return (0);
  896         m->m_len = asa->sa_len;
  897         memcpy(mtod(m, caddr_t), asa, asa->sa_len);
  898         if (n)
  899                 n->m_next = m0;         /* concatenate data to control */
  900         else
  901                 control = m0;
  902         m->m_next = control;
  903 
  904         SBLASTRECORDCHK(sb, "sbappendaddr 1");
  905 
  906         for (n = m; n->m_next != NULL; n = n->m_next)
  907                 sballoc(so, sb, n);
  908         sballoc(so, sb, n);
  909         nlast = n;
  910         SBLINKRECORD(sb, m);
  911 
  912         sb->sb_mbtail = nlast;
  913         SBLASTMBUFCHK(sb, "sbappendaddr");
  914 
  915         SBLASTRECORDCHK(sb, "sbappendaddr 2");
  916 
  917         return (1);
  918 }
  919 
  920 int
  921 sbappendcontrol(struct socket *so, struct sockbuf *sb, struct mbuf *m0,
  922     struct mbuf *control)
  923 {
  924         struct mbuf *m, *mlast, *n;
  925         int space = 0;
  926 
  927         if (control == NULL)
  928                 panic("sbappendcontrol");
  929         for (m = control; ; m = m->m_next) {
  930                 space += m->m_len;
  931                 if (m->m_next == NULL)
  932                         break;
  933         }
  934         n = m;                  /* save pointer to last control buffer */
  935         for (m = m0; m; m = m->m_next)
  936                 space += m->m_len;
  937         if (space > sbspace(so, sb))
  938                 return (0);
  939         n->m_next = m0;                 /* concatenate data to control */
  940 
  941         SBLASTRECORDCHK(sb, "sbappendcontrol 1");
  942 
  943         for (m = control; m->m_next != NULL; m = m->m_next)
  944                 sballoc(so, sb, m);
  945         sballoc(so, sb, m);
  946         mlast = m;
  947         SBLINKRECORD(sb, control);
  948 
  949         sb->sb_mbtail = mlast;
  950         SBLASTMBUFCHK(sb, "sbappendcontrol");
  951 
  952         SBLASTRECORDCHK(sb, "sbappendcontrol 2");
  953 
  954         return (1);
  955 }
  956 
  957 /*
  958  * Compress mbuf chain m into the socket
  959  * buffer sb following mbuf n.  If n
  960  * is null, the buffer is presumed empty.
  961  */
  962 void
  963 sbcompress(struct socket *so, struct sockbuf *sb, struct mbuf *m,
  964     struct mbuf *n)
  965 {
  966         int eor = 0;
  967         struct mbuf *o;
  968 
  969         while (m) {
  970                 eor |= m->m_flags & M_EOR;
  971                 if (m->m_len == 0 &&
  972                     (eor == 0 ||
  973                     (((o = m->m_next) || (o = n)) &&
  974                     o->m_type == m->m_type))) {
  975                         if (sb->sb_lastrecord == m)
  976                                 sb->sb_lastrecord = m->m_next;
  977                         m = m_free(m);
  978                         continue;
  979                 }
  980                 if (n && (n->m_flags & M_EOR) == 0 &&
  981                     /* m_trailingspace() checks buffer writeability */
  982                     m->m_len <= ((n->m_flags & M_EXT)? n->m_ext.ext_size :
  983                        MCLBYTES) / 4 && /* XXX Don't copy too much */
  984                     m->m_len <= m_trailingspace(n) &&
  985                     n->m_type == m->m_type) {
  986                         memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t),
  987                             m->m_len);
  988                         n->m_len += m->m_len;
  989                         sb->sb_cc += m->m_len;
  990                         if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME)
  991                                 sb->sb_datacc += m->m_len;
  992                         m = m_free(m);
  993                         continue;
  994                 }
  995                 if (n)
  996                         n->m_next = m;
  997                 else
  998                         sb->sb_mb = m;
  999                 sb->sb_mbtail = m;
 1000                 sballoc(so, sb, m);
 1001                 n = m;
 1002                 m->m_flags &= ~M_EOR;
 1003                 m = m->m_next;
 1004                 n->m_next = NULL;
 1005         }
 1006         if (eor) {
 1007                 if (n)
 1008                         n->m_flags |= eor;
 1009                 else
 1010                         printf("semi-panic: sbcompress");
 1011         }
 1012         SBLASTMBUFCHK(sb, __func__);
 1013 }
 1014 
 1015 /*
 1016  * Free all mbufs in a sockbuf.
 1017  * Check that all resources are reclaimed.
 1018  */
 1019 void
 1020 sbflush(struct socket *so, struct sockbuf *sb)
 1021 {
 1022         KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
 1023         KASSERT((sb->sb_flags & SB_LOCK) == 0);
 1024 
 1025         while (sb->sb_mbcnt)
 1026                 sbdrop(so, sb, (int)sb->sb_cc);
 1027 
 1028         KASSERT(sb->sb_cc == 0);
 1029         KASSERT(sb->sb_datacc == 0);
 1030         KASSERT(sb->sb_mb == NULL);
 1031         KASSERT(sb->sb_mbtail == NULL);
 1032         KASSERT(sb->sb_lastrecord == NULL);
 1033 }
 1034 
 1035 /*
 1036  * Drop data from (the front of) a sockbuf.
 1037  */
 1038 void
 1039 sbdrop(struct socket *so, struct sockbuf *sb, int len)
 1040 {
 1041         struct mbuf *m, *mn;
 1042         struct mbuf *next;
 1043 
 1044         KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
 1045         soassertlocked(so);
 1046 
 1047         next = (m = sb->sb_mb) ? m->m_nextpkt : NULL;
 1048         while (len > 0) {
 1049                 if (m == NULL) {
 1050                         if (next == NULL)
 1051                                 panic("sbdrop");
 1052                         m = next;
 1053                         next = m->m_nextpkt;
 1054                         continue;
 1055                 }
 1056                 if (m->m_len > len) {
 1057                         m->m_len -= len;
 1058                         m->m_data += len;
 1059                         sb->sb_cc -= len;
 1060                         if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME)
 1061                                 sb->sb_datacc -= len;
 1062                         break;
 1063                 }
 1064                 len -= m->m_len;
 1065                 sbfree(so, sb, m);
 1066                 mn = m_free(m);
 1067                 m = mn;
 1068         }
 1069         while (m && m->m_len == 0) {
 1070                 sbfree(so, sb, m);
 1071                 mn = m_free(m);
 1072                 m = mn;
 1073         }
 1074         if (m) {
 1075                 sb->sb_mb = m;
 1076                 m->m_nextpkt = next;
 1077         } else
 1078                 sb->sb_mb = next;
 1079         /*
 1080          * First part is an inline SB_EMPTY_FIXUP().  Second part
 1081          * makes sure sb_lastrecord is up-to-date if we dropped
 1082          * part of the last record.
 1083          */
 1084         m = sb->sb_mb;
 1085         if (m == NULL) {
 1086                 sb->sb_mbtail = NULL;
 1087                 sb->sb_lastrecord = NULL;
 1088         } else if (m->m_nextpkt == NULL)
 1089                 sb->sb_lastrecord = m;
 1090 }
 1091 
 1092 /*
 1093  * Drop a record off the front of a sockbuf
 1094  * and move the next record to the front.
 1095  */
 1096 void
 1097 sbdroprecord(struct socket *so, struct sockbuf *sb)
 1098 {
 1099         struct mbuf *m, *mn;
 1100 
 1101         m = sb->sb_mb;
 1102         if (m) {
 1103                 sb->sb_mb = m->m_nextpkt;
 1104                 do {
 1105                         sbfree(so, sb, m);
 1106                         mn = m_free(m);
 1107                 } while ((m = mn) != NULL);
 1108         }
 1109         SB_EMPTY_FIXUP(sb);
 1110 }
 1111 
 1112 /*
 1113  * Create a "control" mbuf containing the specified data
 1114  * with the specified type for presentation on a socket buffer.
 1115  */
 1116 struct mbuf *
 1117 sbcreatecontrol(const void *p, size_t size, int type, int level)
 1118 {
 1119         struct cmsghdr *cp;
 1120         struct mbuf *m;
 1121 
 1122         if (CMSG_SPACE(size) > MCLBYTES) {
 1123                 printf("sbcreatecontrol: message too large %zu\n", size);
 1124                 return (NULL);
 1125         }
 1126 
 1127         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
 1128                 return (NULL);
 1129         if (CMSG_SPACE(size) > MLEN) {
 1130                 MCLGET(m, M_DONTWAIT);
 1131                 if ((m->m_flags & M_EXT) == 0) {
 1132                         m_free(m);
 1133                         return NULL;
 1134                 }
 1135         }
 1136         cp = mtod(m, struct cmsghdr *);
 1137         memset(cp, 0, CMSG_SPACE(size));
 1138         memcpy(CMSG_DATA(cp), p, size);
 1139         m->m_len = CMSG_SPACE(size);
 1140         cp->cmsg_len = CMSG_LEN(size);
 1141         cp->cmsg_level = level;
 1142         cp->cmsg_type = type;
 1143         return (m);
 1144 }

Cache object: 38edfe41cc52c1acf775a28c121a2874


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.