1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
32 */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include "opt_param.h"
38
39 #include <sys/param.h>
40 #include <sys/aio.h> /* for aio_swake proto */
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/protosw.h>
48 #include <sys/resourcevar.h>
49 #include <sys/signalvar.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sx.h>
53 #include <sys/sysctl.h>
54
55 /*
56 * Function pointer set by the AIO routines so that the socket buffer code
57 * can call back into the AIO module if it is loaded.
58 */
59 void (*aio_swake)(struct socket *, struct sockbuf *);
60
61 /*
62 * Primitive routines for operating on socket buffers
63 */
64
65 u_long sb_max = SB_MAX;
66 u_long sb_max_adj =
67 (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
68
69 static u_long sb_efficiency = 8; /* parameter for sbreserve() */
70
71 static struct mbuf *sbcut_internal(struct sockbuf *sb, int len);
72 static void sbflush_internal(struct sockbuf *sb);
73
74 /*
75 * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY.
76 */
77 static void
78 sbm_clrprotoflags(struct mbuf *m, int flags)
79 {
80 int mask;
81
82 mask = ~M_PROTOFLAGS;
83 if (flags & PRUS_NOTREADY)
84 mask |= M_NOTREADY;
85 while (m) {
86 m->m_flags &= mask;
87 m = m->m_next;
88 }
89 }
90
91 /*
92 * Mark ready "count" mbufs starting with "m".
93 */
94 int
95 sbready(struct sockbuf *sb, struct mbuf *m, int count)
96 {
97 u_int blocker;
98
99 SOCKBUF_LOCK_ASSERT(sb);
100 KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
101
102 blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
103
104 for (int i = 0; i < count; i++, m = m->m_next) {
105 KASSERT(m->m_flags & M_NOTREADY,
106 ("%s: m %p !M_NOTREADY", __func__, m));
107 m->m_flags &= ~(M_NOTREADY | blocker);
108 if (blocker)
109 sb->sb_acc += m->m_len;
110 }
111
112 if (!blocker)
113 return (EINPROGRESS);
114
115 /* This one was blocking all the queue. */
116 for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
117 KASSERT(m->m_flags & M_BLOCKED,
118 ("%s: m %p !M_BLOCKED", __func__, m));
119 m->m_flags &= ~M_BLOCKED;
120 sb->sb_acc += m->m_len;
121 }
122
123 sb->sb_fnrdy = m;
124
125 return (0);
126 }
127
128 /*
129 * Adjust sockbuf state reflecting allocation of m.
130 */
131 void
132 sballoc(struct sockbuf *sb, struct mbuf *m)
133 {
134
135 SOCKBUF_LOCK_ASSERT(sb);
136
137 sb->sb_ccc += m->m_len;
138
139 if (sb->sb_fnrdy == NULL) {
140 if (m->m_flags & M_NOTREADY)
141 sb->sb_fnrdy = m;
142 else
143 sb->sb_acc += m->m_len;
144 } else
145 m->m_flags |= M_BLOCKED;
146
147 if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
148 sb->sb_ctl += m->m_len;
149
150 sb->sb_mbcnt += MSIZE;
151 sb->sb_mcnt += 1;
152
153 if (m->m_flags & M_EXT) {
154 sb->sb_mbcnt += m->m_ext.ext_size;
155 sb->sb_ccnt += 1;
156 }
157 }
158
159 /*
160 * Adjust sockbuf state reflecting freeing of m.
161 */
162 void
163 sbfree(struct sockbuf *sb, struct mbuf *m)
164 {
165
166 #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */
167 SOCKBUF_LOCK_ASSERT(sb);
168 #endif
169
170 sb->sb_ccc -= m->m_len;
171
172 if (!(m->m_flags & M_NOTAVAIL))
173 sb->sb_acc -= m->m_len;
174
175 if (m == sb->sb_fnrdy) {
176 struct mbuf *n;
177
178 KASSERT(m->m_flags & M_NOTREADY,
179 ("%s: m %p !M_NOTREADY", __func__, m));
180
181 n = m->m_next;
182 while (n != NULL && !(n->m_flags & M_NOTREADY)) {
183 n->m_flags &= ~M_BLOCKED;
184 sb->sb_acc += n->m_len;
185 n = n->m_next;
186 }
187 sb->sb_fnrdy = n;
188 }
189
190 if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
191 sb->sb_ctl -= m->m_len;
192
193 sb->sb_mbcnt -= MSIZE;
194 sb->sb_mcnt -= 1;
195 if (m->m_flags & M_EXT) {
196 sb->sb_mbcnt -= m->m_ext.ext_size;
197 sb->sb_ccnt -= 1;
198 }
199
200 if (sb->sb_sndptr == m) {
201 sb->sb_sndptr = NULL;
202 sb->sb_sndptroff = 0;
203 }
204 if (sb->sb_sndptroff != 0)
205 sb->sb_sndptroff -= m->m_len;
206 }
207
208 /*
209 * Socantsendmore indicates that no more data will be sent on the socket; it
210 * would normally be applied to a socket when the user informs the system
211 * that no more data is to be sent, by the protocol code (in case
212 * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be
213 * received, and will normally be applied to the socket by a protocol when it
214 * detects that the peer will send no more data. Data queued for reading in
215 * the socket may yet be read.
216 */
217 void
218 socantsendmore_locked(struct socket *so)
219 {
220
221 SOCKBUF_LOCK_ASSERT(&so->so_snd);
222
223 so->so_snd.sb_state |= SBS_CANTSENDMORE;
224 sowwakeup_locked(so);
225 mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
226 }
227
228 void
229 socantsendmore(struct socket *so)
230 {
231
232 SOCKBUF_LOCK(&so->so_snd);
233 socantsendmore_locked(so);
234 mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
235 }
236
237 void
238 socantrcvmore_locked(struct socket *so)
239 {
240
241 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
242
243 so->so_rcv.sb_state |= SBS_CANTRCVMORE;
244 sorwakeup_locked(so);
245 mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
246 }
247
248 void
249 socantrcvmore(struct socket *so)
250 {
251
252 SOCKBUF_LOCK(&so->so_rcv);
253 socantrcvmore_locked(so);
254 mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
255 }
256
257 void
258 soroverflow_locked(struct socket *so)
259 {
260
261 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
262
263 if (so->so_options & SO_RERROR) {
264 so->so_rerror = ENOBUFS;
265 sorwakeup_locked(so);
266 } else
267 SOCKBUF_UNLOCK(&so->so_rcv);
268
269 mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
270 }
271
272 void
273 soroverflow(struct socket *so)
274 {
275
276 SOCKBUF_LOCK(&so->so_rcv);
277 soroverflow_locked(so);
278 mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
279 }
280
281 /*
282 * Wait for data to arrive at/drain from a socket buffer.
283 */
284 int
285 sbwait(struct sockbuf *sb)
286 {
287
288 SOCKBUF_LOCK_ASSERT(sb);
289
290 sb->sb_flags |= SB_WAIT;
291 return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx,
292 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
293 sb->sb_timeo, 0, 0));
294 }
295
296 int
297 sblock(struct sockbuf *sb, int flags)
298 {
299
300 KASSERT((flags & SBL_VALID) == flags,
301 ("sblock: flags invalid (0x%x)", flags));
302
303 if (flags & SBL_WAIT) {
304 if ((sb->sb_flags & SB_NOINTR) ||
305 (flags & SBL_NOINTR)) {
306 sx_xlock(&sb->sb_sx);
307 return (0);
308 }
309 return (sx_xlock_sig(&sb->sb_sx));
310 } else {
311 if (sx_try_xlock(&sb->sb_sx) == 0)
312 return (EWOULDBLOCK);
313 return (0);
314 }
315 }
316
317 void
318 sbunlock(struct sockbuf *sb)
319 {
320
321 sx_xunlock(&sb->sb_sx);
322 }
323
324 /*
325 * Wakeup processes waiting on a socket buffer. Do asynchronous notification
326 * via SIGIO if the socket has the SS_ASYNC flag set.
327 *
328 * Called with the socket buffer lock held; will release the lock by the end
329 * of the function. This allows the caller to acquire the socket buffer lock
330 * while testing for the need for various sorts of wakeup and hold it through
331 * to the point where it's no longer required. We currently hold the lock
332 * through calls out to other subsystems (with the exception of kqueue), and
333 * then release it to avoid lock order issues. It's not clear that's
334 * correct.
335 */
336 void
337 sowakeup(struct socket *so, struct sockbuf *sb)
338 {
339 int ret;
340
341 SOCKBUF_LOCK_ASSERT(sb);
342
343 selwakeuppri(sb->sb_sel, PSOCK);
344 if (!SEL_WAITING(sb->sb_sel))
345 sb->sb_flags &= ~SB_SEL;
346 if (sb->sb_flags & SB_WAIT) {
347 sb->sb_flags &= ~SB_WAIT;
348 wakeup(&sb->sb_acc);
349 }
350 KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
351 if (sb->sb_upcall != NULL) {
352 ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
353 if (ret == SU_ISCONNECTED) {
354 KASSERT(sb == &so->so_rcv,
355 ("SO_SND upcall returned SU_ISCONNECTED"));
356 soupcall_clear(so, SO_RCV);
357 }
358 } else
359 ret = SU_OK;
360 if (sb->sb_flags & SB_AIO)
361 sowakeup_aio(so, sb);
362 SOCKBUF_UNLOCK(sb);
363 if (ret == SU_ISCONNECTED)
364 soisconnected(so);
365 if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
366 pgsigio(&so->so_sigio, SIGIO, 0);
367 mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
368 }
369
370 /*
371 * Socket buffer (struct sockbuf) utility routines.
372 *
373 * Each socket contains two socket buffers: one for sending data and one for
374 * receiving data. Each buffer contains a queue of mbufs, information about
375 * the number of mbufs and amount of data in the queue, and other fields
376 * allowing select() statements and notification on data availability to be
377 * implemented.
378 *
379 * Data stored in a socket buffer is maintained as a list of records. Each
380 * record is a list of mbufs chained together with the m_next field. Records
381 * are chained together with the m_nextpkt field. The upper level routine
382 * soreceive() expects the following conventions to be observed when placing
383 * information in the receive buffer:
384 *
385 * 1. If the protocol requires each message be preceded by the sender's name,
386 * then a record containing that name must be present before any
387 * associated data (mbuf's must be of type MT_SONAME).
388 * 2. If the protocol supports the exchange of ``access rights'' (really just
389 * additional data associated with the message), and there are ``rights''
390 * to be received, then a record containing this data should be present
391 * (mbuf's must be of type MT_RIGHTS).
392 * 3. If a name or rights record exists, then it must be followed by a data
393 * record, perhaps of zero length.
394 *
395 * Before using a new socket structure it is first necessary to reserve
396 * buffer space to the socket, by calling sbreserve(). This should commit
397 * some of the available buffer space in the system buffer pool for the
398 * socket (currently, it does nothing but enforce limits). The space should
399 * be released by calling sbrelease() when the socket is destroyed.
400 */
401 int
402 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
403 {
404 struct thread *td = curthread;
405
406 SOCKBUF_LOCK(&so->so_snd);
407 SOCKBUF_LOCK(&so->so_rcv);
408 if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
409 goto bad;
410 if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
411 goto bad2;
412 if (so->so_rcv.sb_lowat == 0)
413 so->so_rcv.sb_lowat = 1;
414 if (so->so_snd.sb_lowat == 0)
415 so->so_snd.sb_lowat = MCLBYTES;
416 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
417 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
418 SOCKBUF_UNLOCK(&so->so_rcv);
419 SOCKBUF_UNLOCK(&so->so_snd);
420 return (0);
421 bad2:
422 sbrelease_locked(&so->so_snd, so);
423 bad:
424 SOCKBUF_UNLOCK(&so->so_rcv);
425 SOCKBUF_UNLOCK(&so->so_snd);
426 return (ENOBUFS);
427 }
428
429 static int
430 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
431 {
432 int error = 0;
433 u_long tmp_sb_max = sb_max;
434
435 error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
436 if (error || !req->newptr)
437 return (error);
438 if (tmp_sb_max < MSIZE + MCLBYTES)
439 return (EINVAL);
440 sb_max = tmp_sb_max;
441 sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
442 return (0);
443 }
444
445 /*
446 * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't
447 * become limiting if buffering efficiency is near the normal case.
448 */
449 int
450 sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
451 struct thread *td)
452 {
453 rlim_t sbsize_limit;
454
455 SOCKBUF_LOCK_ASSERT(sb);
456
457 /*
458 * When a thread is passed, we take into account the thread's socket
459 * buffer size limit. The caller will generally pass curthread, but
460 * in the TCP input path, NULL will be passed to indicate that no
461 * appropriate thread resource limits are available. In that case,
462 * we don't apply a process limit.
463 */
464 if (cc > sb_max_adj)
465 return (0);
466 if (td != NULL) {
467 sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
468 } else
469 sbsize_limit = RLIM_INFINITY;
470 if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
471 sbsize_limit))
472 return (0);
473 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
474 if (sb->sb_lowat > sb->sb_hiwat)
475 sb->sb_lowat = sb->sb_hiwat;
476 return (1);
477 }
478
479 int
480 sbsetopt(struct socket *so, int cmd, u_long cc)
481 {
482 struct sockbuf *sb;
483 short *flags;
484 u_int *hiwat, *lowat;
485 int error;
486
487 sb = NULL;
488 SOCK_LOCK(so);
489 if (SOLISTENING(so)) {
490 switch (cmd) {
491 case SO_SNDLOWAT:
492 case SO_SNDBUF:
493 lowat = &so->sol_sbsnd_lowat;
494 hiwat = &so->sol_sbsnd_hiwat;
495 flags = &so->sol_sbsnd_flags;
496 break;
497 case SO_RCVLOWAT:
498 case SO_RCVBUF:
499 lowat = &so->sol_sbrcv_lowat;
500 hiwat = &so->sol_sbrcv_hiwat;
501 flags = &so->sol_sbrcv_flags;
502 break;
503 }
504 } else {
505 switch (cmd) {
506 case SO_SNDLOWAT:
507 case SO_SNDBUF:
508 sb = &so->so_snd;
509 break;
510 case SO_RCVLOWAT:
511 case SO_RCVBUF:
512 sb = &so->so_rcv;
513 break;
514 }
515 flags = &sb->sb_flags;
516 hiwat = &sb->sb_hiwat;
517 lowat = &sb->sb_lowat;
518 SOCKBUF_LOCK(sb);
519 }
520
521 error = 0;
522 switch (cmd) {
523 case SO_SNDBUF:
524 case SO_RCVBUF:
525 if (SOLISTENING(so)) {
526 if (cc > sb_max_adj) {
527 error = ENOBUFS;
528 break;
529 }
530 *hiwat = cc;
531 if (*lowat > *hiwat)
532 *lowat = *hiwat;
533 } else {
534 if (!sbreserve_locked(sb, cc, so, curthread))
535 error = ENOBUFS;
536 }
537 if (error == 0)
538 *flags &= ~SB_AUTOSIZE;
539 break;
540 case SO_SNDLOWAT:
541 case SO_RCVLOWAT:
542 /*
543 * Make sure the low-water is never greater than the
544 * high-water.
545 */
546 *lowat = (cc > *hiwat) ? *hiwat : cc;
547 break;
548 }
549
550 if (!SOLISTENING(so))
551 SOCKBUF_UNLOCK(sb);
552 SOCK_UNLOCK(so);
553 return (error);
554 }
555
556 /*
557 * Free mbufs held by a socket, and reserved mbuf space.
558 */
559 void
560 sbrelease_internal(struct sockbuf *sb, struct socket *so)
561 {
562
563 sbflush_internal(sb);
564 (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
565 RLIM_INFINITY);
566 sb->sb_mbmax = 0;
567 }
568
569 void
570 sbrelease_locked(struct sockbuf *sb, struct socket *so)
571 {
572
573 SOCKBUF_LOCK_ASSERT(sb);
574
575 sbrelease_internal(sb, so);
576 }
577
578 void
579 sbrelease(struct sockbuf *sb, struct socket *so)
580 {
581
582 SOCKBUF_LOCK(sb);
583 sbrelease_locked(sb, so);
584 SOCKBUF_UNLOCK(sb);
585 }
586
587 void
588 sbdestroy(struct sockbuf *sb, struct socket *so)
589 {
590
591 sbrelease_internal(sb, so);
592 }
593
594 /*
595 * Routines to add and remove data from an mbuf queue.
596 *
597 * The routines sbappend() or sbappendrecord() are normally called to append
598 * new mbufs to a socket buffer, after checking that adequate space is
599 * available, comparing the function sbspace() with the amount of data to be
600 * added. sbappendrecord() differs from sbappend() in that data supplied is
601 * treated as the beginning of a new record. To place a sender's address,
602 * optional access rights, and data in a socket receive buffer,
603 * sbappendaddr() should be used. To place access rights and data in a
604 * socket receive buffer, sbappendrights() should be used. In either case,
605 * the new data begins a new record. Note that unlike sbappend() and
606 * sbappendrecord(), these routines check for the caller that there will be
607 * enough space to store the data. Each fails if there is not enough space,
608 * or if it cannot find mbufs to store additional information in.
609 *
610 * Reliable protocols may use the socket send buffer to hold data awaiting
611 * acknowledgement. Data is normally copied from a socket send buffer in a
612 * protocol with m_copy for output to a peer, and then removing the data from
613 * the socket buffer with sbdrop() or sbdroprecord() when the data is
614 * acknowledged by the peer.
615 */
616 #ifdef SOCKBUF_DEBUG
617 void
618 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
619 {
620 struct mbuf *m = sb->sb_mb;
621
622 SOCKBUF_LOCK_ASSERT(sb);
623
624 while (m && m->m_nextpkt)
625 m = m->m_nextpkt;
626
627 if (m != sb->sb_lastrecord) {
628 printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
629 __func__, sb->sb_mb, sb->sb_lastrecord, m);
630 printf("packet chain:\n");
631 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
632 printf("\t%p\n", m);
633 panic("%s from %s:%u", __func__, file, line);
634 }
635 }
636
637 void
638 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
639 {
640 struct mbuf *m = sb->sb_mb;
641 struct mbuf *n;
642
643 SOCKBUF_LOCK_ASSERT(sb);
644
645 while (m && m->m_nextpkt)
646 m = m->m_nextpkt;
647
648 while (m && m->m_next)
649 m = m->m_next;
650
651 if (m != sb->sb_mbtail) {
652 printf("%s: sb_mb %p sb_mbtail %p last %p\n",
653 __func__, sb->sb_mb, sb->sb_mbtail, m);
654 printf("packet tree:\n");
655 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
656 printf("\t");
657 for (n = m; n != NULL; n = n->m_next)
658 printf("%p ", n);
659 printf("\n");
660 }
661 panic("%s from %s:%u", __func__, file, line);
662 }
663 }
664 #endif /* SOCKBUF_DEBUG */
665
666 #define SBLINKRECORD(sb, m0) do { \
667 SOCKBUF_LOCK_ASSERT(sb); \
668 if ((sb)->sb_lastrecord != NULL) \
669 (sb)->sb_lastrecord->m_nextpkt = (m0); \
670 else \
671 (sb)->sb_mb = (m0); \
672 (sb)->sb_lastrecord = (m0); \
673 } while (/*CONSTCOND*/0)
674
675 /*
676 * Append mbuf chain m to the last record in the socket buffer sb. The
677 * additional space associated the mbuf chain is recorded in sb. Empty mbufs
678 * are discarded and mbufs are compacted where possible.
679 */
680 void
681 sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags)
682 {
683 struct mbuf *n;
684
685 SOCKBUF_LOCK_ASSERT(sb);
686
687 if (m == NULL)
688 return;
689 sbm_clrprotoflags(m, flags);
690 SBLASTRECORDCHK(sb);
691 n = sb->sb_mb;
692 if (n) {
693 while (n->m_nextpkt)
694 n = n->m_nextpkt;
695 do {
696 if (n->m_flags & M_EOR) {
697 sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
698 return;
699 }
700 } while (n->m_next && (n = n->m_next));
701 } else {
702 /*
703 * XXX Would like to simply use sb_mbtail here, but
704 * XXX I need to verify that I won't miss an EOR that
705 * XXX way.
706 */
707 if ((n = sb->sb_lastrecord) != NULL) {
708 do {
709 if (n->m_flags & M_EOR) {
710 sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
711 return;
712 }
713 } while (n->m_next && (n = n->m_next));
714 } else {
715 /*
716 * If this is the first record in the socket buffer,
717 * it's also the last record.
718 */
719 sb->sb_lastrecord = m;
720 }
721 }
722 sbcompress(sb, m, n);
723 SBLASTRECORDCHK(sb);
724 }
725
726 /*
727 * Append mbuf chain m to the last record in the socket buffer sb. The
728 * additional space associated the mbuf chain is recorded in sb. Empty mbufs
729 * are discarded and mbufs are compacted where possible.
730 */
731 void
732 sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
733 {
734
735 SOCKBUF_LOCK(sb);
736 sbappend_locked(sb, m, flags);
737 SOCKBUF_UNLOCK(sb);
738 }
739
740 /*
741 * This version of sbappend() should only be used when the caller absolutely
742 * knows that there will never be more than one record in the socket buffer,
743 * that is, a stream protocol (such as TCP).
744 */
745 void
746 sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
747 {
748 SOCKBUF_LOCK_ASSERT(sb);
749
750 KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
751 KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
752
753 SBLASTMBUFCHK(sb);
754
755 /* Remove all packet headers and mbuf tags to get a pure data chain. */
756 m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
757
758 sbcompress(sb, m, sb->sb_mbtail);
759
760 sb->sb_lastrecord = sb->sb_mb;
761 SBLASTRECORDCHK(sb);
762 }
763
764 /*
765 * This version of sbappend() should only be used when the caller absolutely
766 * knows that there will never be more than one record in the socket buffer,
767 * that is, a stream protocol (such as TCP).
768 */
769 void
770 sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
771 {
772
773 SOCKBUF_LOCK(sb);
774 sbappendstream_locked(sb, m, flags);
775 SOCKBUF_UNLOCK(sb);
776 }
777
778 #ifdef SOCKBUF_DEBUG
779 void
780 sbcheck(struct sockbuf *sb, const char *file, int line)
781 {
782 struct mbuf *m, *n, *fnrdy;
783 u_long acc, ccc, mbcnt;
784
785 SOCKBUF_LOCK_ASSERT(sb);
786
787 acc = ccc = mbcnt = 0;
788 fnrdy = NULL;
789
790 for (m = sb->sb_mb; m; m = n) {
791 n = m->m_nextpkt;
792 for (; m; m = m->m_next) {
793 if (m->m_len == 0) {
794 printf("sb %p empty mbuf %p\n", sb, m);
795 goto fail;
796 }
797 if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
798 if (m != sb->sb_fnrdy) {
799 printf("sb %p: fnrdy %p != m %p\n",
800 sb, sb->sb_fnrdy, m);
801 goto fail;
802 }
803 fnrdy = m;
804 }
805 if (fnrdy) {
806 if (!(m->m_flags & M_NOTAVAIL)) {
807 printf("sb %p: fnrdy %p, m %p is avail\n",
808 sb, sb->sb_fnrdy, m);
809 goto fail;
810 }
811 } else
812 acc += m->m_len;
813 ccc += m->m_len;
814 mbcnt += MSIZE;
815 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
816 mbcnt += m->m_ext.ext_size;
817 }
818 }
819 if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
820 printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
821 acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
822 goto fail;
823 }
824 return;
825 fail:
826 panic("%s from %s:%u", __func__, file, line);
827 }
828 #endif
829
830 /*
831 * As above, except the mbuf chain begins a new record.
832 */
833 void
834 sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
835 {
836 struct mbuf *m;
837
838 SOCKBUF_LOCK_ASSERT(sb);
839
840 if (m0 == NULL)
841 return;
842 m_clrprotoflags(m0);
843 /*
844 * Put the first mbuf on the queue. Note this permits zero length
845 * records.
846 */
847 sballoc(sb, m0);
848 SBLASTRECORDCHK(sb);
849 SBLINKRECORD(sb, m0);
850 sb->sb_mbtail = m0;
851 m = m0->m_next;
852 m0->m_next = 0;
853 if (m && (m0->m_flags & M_EOR)) {
854 m0->m_flags &= ~M_EOR;
855 m->m_flags |= M_EOR;
856 }
857 /* always call sbcompress() so it can do SBLASTMBUFCHK() */
858 sbcompress(sb, m, m0);
859 }
860
861 /*
862 * As above, except the mbuf chain begins a new record.
863 */
864 void
865 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
866 {
867
868 SOCKBUF_LOCK(sb);
869 sbappendrecord_locked(sb, m0);
870 SOCKBUF_UNLOCK(sb);
871 }
872
873 /* Helper routine that appends data, control, and address to a sockbuf. */
874 static int
875 sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
876 struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
877 {
878 struct mbuf *m, *n, *nlast;
879 #if MSIZE <= 256
880 if (asa->sa_len > MLEN)
881 return (0);
882 #endif
883 m = m_get(M_NOWAIT, MT_SONAME);
884 if (m == NULL)
885 return (0);
886 m->m_len = asa->sa_len;
887 bcopy(asa, mtod(m, caddr_t), asa->sa_len);
888 if (m0) {
889 m_clrprotoflags(m0);
890 m_tag_delete_chain(m0, NULL);
891 /*
892 * Clear some persistent info from pkthdr.
893 * We don't use m_demote(), because some netgraph consumers
894 * expect M_PKTHDR presence.
895 */
896 m0->m_pkthdr.rcvif = NULL;
897 m0->m_pkthdr.flowid = 0;
898 m0->m_pkthdr.csum_flags = 0;
899 m0->m_pkthdr.fibnum = 0;
900 m0->m_pkthdr.rsstype = 0;
901 }
902 if (ctrl_last)
903 ctrl_last->m_next = m0; /* concatenate data to control */
904 else
905 control = m0;
906 m->m_next = control;
907 for (n = m; n->m_next != NULL; n = n->m_next)
908 sballoc(sb, n);
909 sballoc(sb, n);
910 nlast = n;
911 SBLINKRECORD(sb, m);
912
913 sb->sb_mbtail = nlast;
914 SBLASTMBUFCHK(sb);
915
916 SBLASTRECORDCHK(sb);
917 return (1);
918 }
919
920 /*
921 * Append address and data, and optionally, control (ancillary) data to the
922 * receive queue of a socket. If present, m0 must include a packet header
923 * with total length. Returns 0 if no space in sockbuf or insufficient
924 * mbufs.
925 */
926 int
927 sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
928 struct mbuf *m0, struct mbuf *control)
929 {
930 struct mbuf *ctrl_last;
931 int space = asa->sa_len;
932
933 SOCKBUF_LOCK_ASSERT(sb);
934
935 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
936 panic("sbappendaddr_locked");
937 if (m0)
938 space += m0->m_pkthdr.len;
939 space += m_length(control, &ctrl_last);
940
941 if (space > sbspace(sb))
942 return (0);
943 return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
944 }
945
946 /*
947 * Append address and data, and optionally, control (ancillary) data to the
948 * receive queue of a socket. If present, m0 must include a packet header
949 * with total length. Returns 0 if insufficient mbufs. Does not validate space
950 * on the receiving sockbuf.
951 */
952 int
953 sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
954 struct mbuf *m0, struct mbuf *control)
955 {
956 struct mbuf *ctrl_last;
957
958 SOCKBUF_LOCK_ASSERT(sb);
959
960 ctrl_last = (control == NULL) ? NULL : m_last(control);
961 return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
962 }
963
964 /*
965 * Append address and data, and optionally, control (ancillary) data to the
966 * receive queue of a socket. If present, m0 must include a packet header
967 * with total length. Returns 0 if no space in sockbuf or insufficient
968 * mbufs.
969 */
970 int
971 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
972 struct mbuf *m0, struct mbuf *control)
973 {
974 int retval;
975
976 SOCKBUF_LOCK(sb);
977 retval = sbappendaddr_locked(sb, asa, m0, control);
978 SOCKBUF_UNLOCK(sb);
979 return (retval);
980 }
981
982 void
983 sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
984 struct mbuf *control, int flags)
985 {
986 struct mbuf *m, *mlast;
987
988 sbm_clrprotoflags(m0, flags);
989 m_last(control)->m_next = m0;
990
991 SBLASTRECORDCHK(sb);
992
993 for (m = control; m->m_next; m = m->m_next)
994 sballoc(sb, m);
995 sballoc(sb, m);
996 mlast = m;
997 SBLINKRECORD(sb, control);
998
999 sb->sb_mbtail = mlast;
1000 SBLASTMBUFCHK(sb);
1001
1002 SBLASTRECORDCHK(sb);
1003 }
1004
1005 void
1006 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
1007 int flags)
1008 {
1009
1010 SOCKBUF_LOCK(sb);
1011 sbappendcontrol_locked(sb, m0, control, flags);
1012 SOCKBUF_UNLOCK(sb);
1013 }
1014
1015 /*
1016 * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
1017 * (n). If (n) is NULL, the buffer is presumed empty.
1018 *
1019 * When the data is compressed, mbufs in the chain may be handled in one of
1020 * three ways:
1021 *
1022 * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
1023 * record boundary, and no change in data type).
1024 *
1025 * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
1026 * an mbuf already in the socket buffer. This can occur if an
1027 * appropriate mbuf exists, there is room, both mbufs are not marked as
1028 * not ready, and no merging of data types will occur.
1029 *
1030 * (3) The mbuf may be appended to the end of the existing mbuf chain.
1031 *
1032 * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
1033 * end-of-record.
1034 */
1035 void
1036 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1037 {
1038 int eor = 0;
1039 struct mbuf *o;
1040
1041 SOCKBUF_LOCK_ASSERT(sb);
1042
1043 while (m) {
1044 eor |= m->m_flags & M_EOR;
1045 if (m->m_len == 0 &&
1046 (eor == 0 ||
1047 (((o = m->m_next) || (o = n)) &&
1048 o->m_type == m->m_type))) {
1049 if (sb->sb_lastrecord == m)
1050 sb->sb_lastrecord = m->m_next;
1051 m = m_free(m);
1052 continue;
1053 }
1054 if (n && (n->m_flags & M_EOR) == 0 &&
1055 M_WRITABLE(n) &&
1056 ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
1057 !(m->m_flags & M_NOTREADY) &&
1058 !(n->m_flags & M_NOTREADY) &&
1059 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1060 m->m_len <= M_TRAILINGSPACE(n) &&
1061 n->m_type == m->m_type) {
1062 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1063 (unsigned)m->m_len);
1064 n->m_len += m->m_len;
1065 sb->sb_ccc += m->m_len;
1066 if (sb->sb_fnrdy == NULL)
1067 sb->sb_acc += m->m_len;
1068 if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
1069 /* XXX: Probably don't need.*/
1070 sb->sb_ctl += m->m_len;
1071 m = m_free(m);
1072 continue;
1073 }
1074 if (n)
1075 n->m_next = m;
1076 else
1077 sb->sb_mb = m;
1078 sb->sb_mbtail = m;
1079 sballoc(sb, m);
1080 n = m;
1081 m->m_flags &= ~M_EOR;
1082 m = m->m_next;
1083 n->m_next = 0;
1084 }
1085 if (eor) {
1086 KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
1087 n->m_flags |= eor;
1088 }
1089 SBLASTMBUFCHK(sb);
1090 }
1091
1092 /*
1093 * Free all mbufs in a sockbuf. Check that all resources are reclaimed.
1094 */
1095 static void
1096 sbflush_internal(struct sockbuf *sb)
1097 {
1098
1099 while (sb->sb_mbcnt) {
1100 /*
1101 * Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
1102 * we would loop forever. Panic instead.
1103 */
1104 if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1105 break;
1106 m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
1107 }
1108 KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
1109 ("%s: ccc %u mb %p mbcnt %u", __func__,
1110 sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
1111 }
1112
1113 void
1114 sbflush_locked(struct sockbuf *sb)
1115 {
1116
1117 SOCKBUF_LOCK_ASSERT(sb);
1118 sbflush_internal(sb);
1119 }
1120
1121 void
1122 sbflush(struct sockbuf *sb)
1123 {
1124
1125 SOCKBUF_LOCK(sb);
1126 sbflush_locked(sb);
1127 SOCKBUF_UNLOCK(sb);
1128 }
1129
1130 /*
1131 * Cut data from (the front of) a sockbuf.
1132 */
1133 static struct mbuf *
1134 sbcut_internal(struct sockbuf *sb, int len)
1135 {
1136 struct mbuf *m, *next, *mfree;
1137
1138 KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
1139 __func__, len));
1140 KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u",
1141 __func__, len, sb->sb_ccc));
1142
1143 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1144 mfree = NULL;
1145
1146 while (len > 0) {
1147 if (m == NULL) {
1148 KASSERT(next, ("%s: no next, len %d", __func__, len));
1149 m = next;
1150 next = m->m_nextpkt;
1151 }
1152 if (m->m_len > len) {
1153 KASSERT(!(m->m_flags & M_NOTAVAIL),
1154 ("%s: m %p M_NOTAVAIL", __func__, m));
1155 m->m_len -= len;
1156 m->m_data += len;
1157 sb->sb_ccc -= len;
1158 sb->sb_acc -= len;
1159 if (sb->sb_sndptroff != 0)
1160 sb->sb_sndptroff -= len;
1161 if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
1162 sb->sb_ctl -= len;
1163 break;
1164 }
1165 len -= m->m_len;
1166 sbfree(sb, m);
1167 /*
1168 * Do not put M_NOTREADY buffers to the free list, they
1169 * are referenced from outside.
1170 */
1171 if (m->m_flags & M_NOTREADY)
1172 m = m->m_next;
1173 else {
1174 struct mbuf *n;
1175
1176 n = m->m_next;
1177 m->m_next = mfree;
1178 mfree = m;
1179 m = n;
1180 }
1181 }
1182 /*
1183 * Free any zero-length mbufs from the buffer.
1184 * For SOCK_DGRAM sockets such mbufs represent empty records.
1185 * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer,
1186 * when sosend_generic() needs to send only control data.
1187 */
1188 while (m && m->m_len == 0) {
1189 struct mbuf *n;
1190
1191 sbfree(sb, m);
1192 n = m->m_next;
1193 m->m_next = mfree;
1194 mfree = m;
1195 m = n;
1196 }
1197 if (m) {
1198 sb->sb_mb = m;
1199 m->m_nextpkt = next;
1200 } else
1201 sb->sb_mb = next;
1202 /*
1203 * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure
1204 * sb_lastrecord is up-to-date if we dropped part of the last record.
1205 */
1206 m = sb->sb_mb;
1207 if (m == NULL) {
1208 sb->sb_mbtail = NULL;
1209 sb->sb_lastrecord = NULL;
1210 } else if (m->m_nextpkt == NULL) {
1211 sb->sb_lastrecord = m;
1212 }
1213
1214 return (mfree);
1215 }
1216
1217 /*
1218 * Drop data from (the front of) a sockbuf.
1219 */
1220 void
1221 sbdrop_locked(struct sockbuf *sb, int len)
1222 {
1223
1224 SOCKBUF_LOCK_ASSERT(sb);
1225 m_freem(sbcut_internal(sb, len));
1226 }
1227
1228 /*
1229 * Drop data from (the front of) a sockbuf,
1230 * and return it to caller.
1231 */
1232 struct mbuf *
1233 sbcut_locked(struct sockbuf *sb, int len)
1234 {
1235
1236 SOCKBUF_LOCK_ASSERT(sb);
1237 return (sbcut_internal(sb, len));
1238 }
1239
1240 void
1241 sbdrop(struct sockbuf *sb, int len)
1242 {
1243 struct mbuf *mfree;
1244
1245 SOCKBUF_LOCK(sb);
1246 mfree = sbcut_internal(sb, len);
1247 SOCKBUF_UNLOCK(sb);
1248
1249 m_freem(mfree);
1250 }
1251
1252 /*
1253 * Maintain a pointer and offset pair into the socket buffer mbuf chain to
1254 * avoid traversal of the entire socket buffer for larger offsets.
1255 */
1256 struct mbuf *
1257 sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
1258 {
1259 struct mbuf *m, *ret;
1260
1261 KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
1262 KASSERT(off + len <= sb->sb_acc, ("%s: beyond sb", __func__));
1263 KASSERT(sb->sb_sndptroff <= sb->sb_acc, ("%s: sndptroff broken", __func__));
1264
1265 /*
1266 * Is off below stored offset? Happens on retransmits.
1267 * Just return, we can't help here.
1268 */
1269 if (sb->sb_sndptroff > off) {
1270 *moff = off;
1271 return (sb->sb_mb);
1272 }
1273
1274 /* Return closest mbuf in chain for current offset. */
1275 *moff = off - sb->sb_sndptroff;
1276 m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb;
1277 if (*moff == m->m_len) {
1278 *moff = 0;
1279 sb->sb_sndptroff += m->m_len;
1280 m = ret = m->m_next;
1281 KASSERT(ret->m_len > 0,
1282 ("mbuf %p in sockbuf %p chain has no valid data", ret, sb));
1283 }
1284
1285 /* Advance by len to be as close as possible for the next transmit. */
1286 for (off = off - sb->sb_sndptroff + len - 1;
1287 off > 0 && m != NULL && off >= m->m_len;
1288 m = m->m_next) {
1289 sb->sb_sndptroff += m->m_len;
1290 off -= m->m_len;
1291 }
1292 if (off > 0 && m == NULL)
1293 panic("%s: sockbuf %p and mbuf %p clashing", __func__, sb, ret);
1294 sb->sb_sndptr = m;
1295
1296 return (ret);
1297 }
1298
1299 struct mbuf *
1300 sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
1301 {
1302 struct mbuf *m;
1303
1304 KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
1305 if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
1306 *moff = off;
1307 if (sb->sb_sndptr == NULL) {
1308 sb->sb_sndptr = sb->sb_mb;
1309 sb->sb_sndptroff = 0;
1310 }
1311 return (sb->sb_mb);
1312 } else {
1313 m = sb->sb_sndptr;
1314 off -= sb->sb_sndptroff;
1315 }
1316 *moff = off;
1317 return (m);
1318 }
1319
1320 void
1321 sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
1322 {
1323 /*
1324 * A small copy was done, advance forward the sb_sbsndptr to cover
1325 * it.
1326 */
1327 struct mbuf *m;
1328
1329 if (mb != sb->sb_sndptr) {
1330 /* Did not copyout at the same mbuf */
1331 return;
1332 }
1333 m = mb;
1334 while (m && (len > 0)) {
1335 if (len >= m->m_len) {
1336 len -= m->m_len;
1337 if (m->m_next) {
1338 sb->sb_sndptroff += m->m_len;
1339 sb->sb_sndptr = m->m_next;
1340 }
1341 m = m->m_next;
1342 } else {
1343 len = 0;
1344 }
1345 }
1346 }
1347
1348 /*
1349 * Return the first mbuf and the mbuf data offset for the provided
1350 * send offset without changing the "sb_sndptroff" field.
1351 */
1352 struct mbuf *
1353 sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff)
1354 {
1355 struct mbuf *m;
1356
1357 KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
1358
1359 /*
1360 * If the "off" is below the stored offset, which happens on
1361 * retransmits, just use "sb_mb":
1362 */
1363 if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
1364 m = sb->sb_mb;
1365 } else {
1366 m = sb->sb_sndptr;
1367 off -= sb->sb_sndptroff;
1368 }
1369 while (off > 0 && m != NULL) {
1370 if (off < m->m_len)
1371 break;
1372 off -= m->m_len;
1373 m = m->m_next;
1374 }
1375 *moff = off;
1376 return (m);
1377 }
1378
1379 /*
1380 * Drop a record off the front of a sockbuf and move the next record to the
1381 * front.
1382 */
1383 void
1384 sbdroprecord_locked(struct sockbuf *sb)
1385 {
1386 struct mbuf *m;
1387
1388 SOCKBUF_LOCK_ASSERT(sb);
1389
1390 m = sb->sb_mb;
1391 if (m) {
1392 sb->sb_mb = m->m_nextpkt;
1393 do {
1394 sbfree(sb, m);
1395 m = m_free(m);
1396 } while (m);
1397 }
1398 SB_EMPTY_FIXUP(sb);
1399 }
1400
1401 /*
1402 * Drop a record off the front of a sockbuf and move the next record to the
1403 * front.
1404 */
1405 void
1406 sbdroprecord(struct sockbuf *sb)
1407 {
1408
1409 SOCKBUF_LOCK(sb);
1410 sbdroprecord_locked(sb);
1411 SOCKBUF_UNLOCK(sb);
1412 }
1413
1414 /*
1415 * Create a "control" mbuf containing the specified data with the specified
1416 * type for presentation on a socket buffer.
1417 */
1418 struct mbuf *
1419 sbcreatecontrol(caddr_t p, int size, int type, int level)
1420 {
1421 struct cmsghdr *cp;
1422 struct mbuf *m;
1423
1424 if (CMSG_SPACE((u_int)size) > MCLBYTES)
1425 return ((struct mbuf *) NULL);
1426 if (CMSG_SPACE((u_int)size) > MLEN)
1427 m = m_getcl(M_NOWAIT, MT_CONTROL, 0);
1428 else
1429 m = m_get(M_NOWAIT, MT_CONTROL);
1430 if (m == NULL)
1431 return ((struct mbuf *) NULL);
1432 cp = mtod(m, struct cmsghdr *);
1433 m->m_len = 0;
1434 KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
1435 ("sbcreatecontrol: short mbuf"));
1436 /*
1437 * Don't leave the padding between the msg header and the
1438 * cmsg data and the padding after the cmsg data un-initialized.
1439 */
1440 bzero(cp, CMSG_SPACE((u_int)size));
1441 if (p != NULL)
1442 (void)memcpy(CMSG_DATA(cp), p, size);
1443 m->m_len = CMSG_SPACE(size);
1444 cp->cmsg_len = CMSG_LEN(size);
1445 cp->cmsg_level = level;
1446 cp->cmsg_type = type;
1447 return (m);
1448 }
1449
1450 /*
1451 * This does the same for socket buffers that sotoxsocket does for sockets:
1452 * generate an user-format data structure describing the socket buffer. Note
1453 * that the xsockbuf structure, since it is always embedded in a socket, does
1454 * not include a self pointer nor a length. We make this entry point public
1455 * in case some other mechanism needs it.
1456 */
1457 void
1458 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1459 {
1460
1461 xsb->sb_cc = sb->sb_ccc;
1462 xsb->sb_hiwat = sb->sb_hiwat;
1463 xsb->sb_mbcnt = sb->sb_mbcnt;
1464 xsb->sb_mcnt = sb->sb_mcnt;
1465 xsb->sb_ccnt = sb->sb_ccnt;
1466 xsb->sb_mbmax = sb->sb_mbmax;
1467 xsb->sb_lowat = sb->sb_lowat;
1468 xsb->sb_flags = sb->sb_flags;
1469 xsb->sb_timeo = sb->sb_timeo;
1470 }
1471
1472 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1473 static int dummy;
1474 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1475 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW,
1476 &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
1477 SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1478 &sb_efficiency, 0, "Socket buffer size waste factor");
Cache object: 01c8ea09a9b01e82ecd7f7a3d79bec9d
|