1 /*
2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
3 * Copyright (c) 1982, 1986, 1988, 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
31 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.17 2002/08/31 19:04:55 dwmalone Exp $
32 * $DragonFly: src/sys/kern/uipc_sockbuf.c,v 1.3 2007/08/09 01:10:04 dillon Exp $
33 */
34
35 #include "opt_param.h"
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/domain.h>
39 #include <sys/file.h> /* for maxfiles */
40 #include <sys/kernel.h>
41 #include <sys/proc.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/protosw.h>
45 #include <sys/resourcevar.h>
46 #include <sys/stat.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49
50 #include <sys/thread2.h>
51 #include <sys/msgport2.h>
52
53 /*
54 * Routines to add and remove data from an mbuf queue.
55 *
56 * The routines sbappend() or sbappendrecord() are normally called to
57 * append new mbufs to a socket buffer. sbappendrecord() differs from
58 * sbappend() in that data supplied is treated as the beginning of a new
59 * record. sbappend() only begins a new record if the last mbuf in the
60 * sockbuf is marked M_EOR.
61 *
62 * To place a sender's address, optional access rights, and data in a
63 * socket receive buffer, sbappendaddr() or sbappendcontrol() should be
64 * used. These functions also begin a new record.
65 *
66 * Reliable protocols may use the socket send buffer to hold data
67 * awaiting acknowledgement. Data is normally copied from a socket
68 * send buffer in a protocol with m_copy for output to a peer,
69 * and then removing the data from the socket buffer with sbdrop()
70 * or sbdroprecord() when the data is acknowledged by the peer.
71 */
72
73 /*
74 * Append mbuf chain m to the last record in the socket buffer sb.
75 * The additional space associated the mbuf chain is recorded in sb.
76 * Empty mbufs are discarded and mbufs are compacted where possible.
77 *
78 * If M_EOR is set in the first or last mbuf of the last record, the
79 * mbuf chain is appended as a new record. M_EOR is usually just set
80 * in the last mbuf of the last record's mbuf chain (see sbcompress()),
81 * but this may be changed in the future since there is no real need
82 * to propogate the flag any more.
83 */
84 void
85 sbappend(struct sockbuf *sb, struct mbuf *m)
86 {
87 struct mbuf *n;
88
89 mbuftrackid(m, 16);
90
91 if (m) {
92 n = sb->sb_lastrecord;
93 if (n) {
94 if (n->m_flags & M_EOR) {
95 sbappendrecord(sb, m);
96 return;
97 }
98 }
99 n = sb->sb_lastmbuf;
100 if (n) {
101 if (n->m_flags & M_EOR) {
102 sbappendrecord(sb, m);
103 return;
104 }
105 }
106 sbcompress(sb, m, n);
107 }
108 }
109
110 /*
111 * sbappendstream() is an optimized form of sbappend() for protocols
112 * such as TCP that only have one record in the socket buffer, are
113 * not PR_ATOMIC, nor allow MT_CONTROL data. A protocol that uses
114 * sbappendstream() must use sbappendstream() exclusively.
115 */
116 void
117 sbappendstream(struct sockbuf *sb, struct mbuf *m)
118 {
119 mbuftrackid(m, 17);
120 KKASSERT(m->m_nextpkt == NULL);
121 sbcompress(sb, m, sb->sb_lastmbuf);
122 }
123
124 #ifdef SOCKBUF_DEBUG
125
126 void
127 _sbcheck(struct sockbuf *sb)
128 {
129 struct mbuf *m;
130 struct mbuf *n = NULL;
131 u_long len = 0, mbcnt = 0;
132
133 for (m = sb->sb_mb; m; m = n) {
134 n = m->m_nextpkt;
135 if (n == NULL && sb->sb_lastrecord != m) {
136 kprintf("sockbuf %p mismatched lastrecord %p vs %p\n", sb, sb->sb_lastrecord, m);
137 panic("sbcheck1");
138
139 }
140 for (; m; m = m->m_next) {
141 len += m->m_len;
142 mbcnt += MSIZE;
143 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
144 mbcnt += m->m_ext.ext_size;
145 if (n == NULL && m->m_next == NULL) {
146 if (sb->sb_lastmbuf != m) {
147 kprintf("sockbuf %p mismatched lastmbuf %p vs %p\n", sb, sb->sb_lastmbuf, m);
148 panic("sbcheck2");
149 }
150 }
151 }
152 }
153 if (sb->sb_mb == NULL) {
154 if (sb->sb_lastrecord != NULL) {
155 kprintf("sockbuf %p is empty, lastrecord not NULL: %p\n",
156 sb, sb->sb_lastrecord);
157 panic("sbcheck3");
158 }
159 if (sb->sb_lastmbuf != NULL) {
160 kprintf("sockbuf %p is empty, lastmbuf not NULL: %p\n",
161 sb, sb->sb_lastmbuf);
162 panic("sbcheck4");
163 }
164 }
165 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
166 kprintf("sockbuf %p cc %ld != %ld || mbcnt %ld != %ld\n",
167 sb, len, sb->sb_cc, mbcnt, sb->sb_mbcnt);
168 panic("sbcheck5");
169 }
170 }
171
172 #endif
173
174 /*
175 * Same as sbappend(), except the mbuf chain begins a new record.
176 */
177 void
178 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
179 {
180 struct mbuf *firstmbuf;
181 struct mbuf *secondmbuf;
182
183 if (m0 == NULL)
184 return;
185 mbuftrackid(m0, 18);
186
187 sbcheck(sb);
188
189 /*
190 * Break the first mbuf off from the rest of the mbuf chain.
191 */
192 firstmbuf = m0;
193 secondmbuf = m0->m_next;
194 m0->m_next = NULL;
195
196 /*
197 * Insert the first mbuf of the m0 mbuf chain as the last record of
198 * the sockbuf. Note this permits zero length records! Keep the
199 * sockbuf state consistent.
200 */
201 if (sb->sb_mb == NULL)
202 sb->sb_mb = firstmbuf;
203 else
204 sb->sb_lastrecord->m_nextpkt = firstmbuf;
205 sb->sb_lastrecord = firstmbuf; /* update hint for new last record */
206 sb->sb_lastmbuf = firstmbuf; /* update hint for new last mbuf */
207
208 /*
209 * propagate the EOR flag so sbcompress() can pick it up
210 */
211 if ((firstmbuf->m_flags & M_EOR) && (secondmbuf != NULL)) {
212 firstmbuf->m_flags &= ~M_EOR;
213 secondmbuf->m_flags |= M_EOR;
214 }
215
216 /*
217 * The succeeding call to sbcompress() omits accounting for
218 * the first mbuf, so do it here.
219 */
220 sballoc(sb, firstmbuf);
221
222 /* Compact the rest of the mbuf chain in after the first mbuf. */
223 sbcompress(sb, secondmbuf, firstmbuf);
224 }
225
226 /*
227 * Append address and data, and optionally, control (ancillary) data
228 * to the receive queue of a socket. If present,
229 * m0 must include a packet header with total length.
230 * Returns 0 if insufficient mbufs.
231 */
232 int
233 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0,
234 struct mbuf *control)
235 {
236 struct mbuf *m, *n;
237 int eor;
238
239 mbuftrackid(m0, 19);
240 mbuftrackid(control, 20);
241 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
242 panic("sbappendaddr");
243 sbcheck(sb);
244
245 for (n = control; n; n = n->m_next) {
246 if (n->m_next == NULL) /* keep pointer to last control buf */
247 break;
248 }
249 if (asa->sa_len > MLEN)
250 return (0);
251 MGET(m, MB_DONTWAIT, MT_SONAME);
252 if (m == NULL)
253 return (0);
254 KKASSERT(m->m_nextpkt == NULL);
255 m->m_len = asa->sa_len;
256 bcopy(asa, mtod(m, caddr_t), asa->sa_len);
257 if (n)
258 n->m_next = m0; /* concatenate data to control */
259 else
260 control = m0;
261 m->m_next = control;
262 for (n = m; n; n = n->m_next)
263 sballoc(sb, n);
264
265 if (sb->sb_mb == NULL)
266 sb->sb_mb = m;
267 else
268 sb->sb_lastrecord->m_nextpkt = m;
269 sb->sb_lastrecord = m;
270
271 /*
272 * Propogate M_EOR to the last mbuf and calculate sb_lastmbuf
273 * so sbappend() can find it.
274 */
275 eor = m->m_flags;
276 while (m->m_next) {
277 m->m_flags &= ~M_EOR;
278 m = m->m_next;
279 eor |= m->m_flags;
280 }
281 m->m_flags |= eor & M_EOR;
282 sb->sb_lastmbuf = m;
283
284 return (1);
285 }
286
287 /*
288 * Append control information followed by data. Both the control and data
289 * must be non-null.
290 */
291 int
292 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
293 {
294 struct mbuf *n;
295 u_int length, cmbcnt, m0mbcnt;
296 int eor;
297
298 KASSERT(control != NULL, ("sbappendcontrol"));
299 KKASSERT(control->m_nextpkt == NULL);
300 sbcheck(sb);
301
302 mbuftrackid(m0, 21);
303 mbuftrackid(control, 22);
304
305 length = m_countm(control, &n, &cmbcnt) + m_countm(m0, NULL, &m0mbcnt);
306
307 KKASSERT(m0 != NULL);
308
309 n->m_next = m0; /* concatenate data to control */
310
311 if (sb->sb_mb == NULL)
312 sb->sb_mb = control;
313 else
314 sb->sb_lastrecord->m_nextpkt = control;
315 sb->sb_lastrecord = control;
316
317 /*
318 * Propogate M_EOR to the last mbuf and calculate sb_lastmbuf
319 * so sbappend() can find it.
320 */
321 eor = m0->m_flags;
322 while (m0->m_next) {
323 m0->m_flags &= ~M_EOR;
324 m0 = m0->m_next;
325 eor |= m0->m_flags;
326 }
327 m0->m_flags |= eor & M_EOR;
328 sb->sb_lastmbuf = m0;
329
330 sb->sb_cc += length;
331 sb->sb_mbcnt += cmbcnt + m0mbcnt;
332
333 return (1);
334 }
335
336 /*
337 * Compress mbuf chain m into the socket buffer sb following mbuf tailm.
338 * If tailm is null, the buffer is presumed empty. Also, as a side-effect,
339 * increment the sockbuf counts for each mbuf in the chain.
340 */
341 void
342 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *tailm)
343 {
344 int eor = 0;
345 struct mbuf *free_chain = NULL;
346
347 mbuftrackid(m, 23);
348
349 sbcheck(sb);
350 while (m) {
351 struct mbuf *o;
352
353 eor |= m->m_flags & M_EOR;
354 /*
355 * Disregard empty mbufs as long as we don't encounter
356 * an end-of-record or there is a trailing mbuf of
357 * the same type to propagate the EOR flag to.
358 *
359 * Defer the m_free() call because it can block and break
360 * the atomicy of the sockbuf.
361 */
362 if (m->m_len == 0 &&
363 (eor == 0 ||
364 (((o = m->m_next) || (o = tailm)) &&
365 o->m_type == m->m_type))) {
366 o = m->m_next;
367 m->m_next = free_chain;
368 free_chain = m;
369 m = o;
370 continue;
371 }
372
373 /* See if we can coalesce with preceding mbuf. */
374 if (tailm && !(tailm->m_flags & M_EOR) && M_WRITABLE(tailm) &&
375 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
376 m->m_len <= M_TRAILINGSPACE(tailm) &&
377 tailm->m_type == m->m_type) {
378 u_long mbcnt_sz;
379
380 bcopy(mtod(m, caddr_t),
381 mtod(tailm, caddr_t) + tailm->m_len,
382 (unsigned)m->m_len);
383 tailm->m_len += m->m_len;
384
385 sb->sb_cc += m->m_len; /* update sb counter */
386
387 /*
388 * Fix the wrongly updated mbcnt_prealloc
389 */
390 mbcnt_sz = MSIZE;
391 if (m->m_flags & M_EXT)
392 mbcnt_sz += m->m_ext.ext_size;
393 atomic_subtract_long(&sb->sb_mbcnt_prealloc, mbcnt_sz);
394
395 o = m->m_next;
396 m->m_next = free_chain;
397 free_chain = m;
398 m = o;
399 continue;
400 }
401
402 /* Insert whole mbuf. */
403 if (tailm == NULL) {
404 KASSERT(sb->sb_mb == NULL,
405 ("sbcompress: sb_mb not NULL"));
406 sb->sb_mb = m; /* only mbuf in sockbuf */
407 sb->sb_lastrecord = m; /* new last record */
408 } else {
409 tailm->m_next = m; /* tack m on following tailm */
410 }
411 sb->sb_lastmbuf = m; /* update last mbuf hint */
412
413 tailm = m; /* just inserted mbuf becomes the new tail */
414 m = m->m_next; /* advance to next mbuf */
415 tailm->m_next = NULL; /* split inserted mbuf off from chain */
416
417 /* update sb counters for just added mbuf */
418 sballoc(sb, tailm);
419
420 /* clear EOR on intermediate mbufs */
421 tailm->m_flags &= ~M_EOR;
422 }
423
424 /*
425 * Propogate EOR to the last mbuf
426 */
427 if (eor) {
428 if (tailm)
429 tailm->m_flags |= eor;
430 else
431 kprintf("semi-panic: sbcompress");
432 }
433
434 /*
435 * Clean up any defered frees.
436 */
437 while (free_chain)
438 free_chain = m_free(free_chain);
439
440 sbcheck(sb);
441 }
442
443 /*
444 * Free all mbufs in a sockbuf.
445 * Check that all resources are reclaimed.
446 */
447 void
448 sbflush(struct sockbuf *sb)
449 {
450 while (sb->sb_mbcnt) {
451 /*
452 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
453 * we would loop forever. Panic instead.
454 */
455 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
456 break;
457 sbdrop(sb, (int)sb->sb_cc);
458 }
459 KASSERT(!(sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_lastmbuf),
460 ("sbflush: cc %ld || mb %p || mbcnt %ld || lastmbuf %p",
461 sb->sb_cc, sb->sb_mb, sb->sb_mbcnt, sb->sb_lastmbuf));
462 }
463
464 /*
465 * Drop data from (the front of) a sockbuf. If the current record is
466 * exhausted this routine will move onto the next one and continue dropping
467 * data.
468 */
469 void
470 sbdrop(struct sockbuf *sb, int len)
471 {
472 struct mbuf *m;
473 struct mbuf *free_chain = NULL;
474
475 sbcheck(sb);
476 crit_enter();
477
478 m = sb->sb_mb;
479 while (m && len > 0) {
480 if (m->m_len > len) {
481 m->m_len -= len;
482 m->m_data += len;
483 sb->sb_cc -= len;
484 atomic_subtract_long(&sb->sb_cc_prealloc, len);
485 break;
486 }
487 len -= m->m_len;
488 m = sbunlinkmbuf(sb, m, &free_chain);
489 if (m == NULL && len)
490 m = sb->sb_mb;
491 }
492
493 /*
494 * Remove any trailing 0-length mbufs in the current record. If
495 * the last record for which data was removed is now empty, m will be
496 * NULL.
497 */
498 while (m && m->m_len == 0) {
499 m = sbunlinkmbuf(sb, m, &free_chain);
500 }
501 crit_exit();
502 if (free_chain)
503 m_freem(free_chain);
504 sbcheck(sb);
505 }
506
507 /*
508 * Drop a record off the front of a sockbuf and move the next record
509 * to the front.
510 *
511 * Must be called while holding a critical section.
512 */
513 void
514 sbdroprecord(struct sockbuf *sb)
515 {
516 struct mbuf *m;
517 struct mbuf *n;
518
519 sbcheck(sb);
520 m = sb->sb_mb;
521 if (m) {
522 if ((sb->sb_mb = m->m_nextpkt) == NULL) {
523 sb->sb_lastrecord = NULL;
524 sb->sb_lastmbuf = NULL;
525 }
526 m->m_nextpkt = NULL;
527 for (n = m; n; n = n->m_next)
528 sbfree(sb, n);
529 m_freem(m);
530 sbcheck(sb);
531 }
532 }
533
534 /*
535 * Drop the first mbuf off the sockbuf and move the next mbuf to the front.
536 * Currently only the head mbuf of the sockbuf may be dropped this way.
537 *
538 * The next mbuf in the same record as the mbuf being removed is returned
539 * or NULL if the record is exhausted. Note that other records may remain
540 * in the sockbuf when NULL is returned.
541 *
542 * Must be called while holding a critical section.
543 */
544 struct mbuf *
545 sbunlinkmbuf(struct sockbuf *sb, struct mbuf *m, struct mbuf **free_chain)
546 {
547 struct mbuf *n;
548
549 KKASSERT(sb->sb_mb == m);
550 sbfree(sb, m);
551 n = m->m_next;
552 if (n) {
553 sb->sb_mb = n;
554 if (sb->sb_lastrecord == m)
555 sb->sb_lastrecord = n;
556 KKASSERT(sb->sb_lastmbuf != m);
557 n->m_nextpkt = m->m_nextpkt;
558 } else {
559 sb->sb_mb = m->m_nextpkt;
560 if (sb->sb_lastrecord == m) {
561 KKASSERT(sb->sb_mb == NULL);
562 sb->sb_lastrecord = NULL;
563 }
564 if (sb->sb_mb == NULL)
565 sb->sb_lastmbuf = NULL;
566 }
567 m->m_nextpkt = NULL;
568 if (free_chain) {
569 m->m_next = *free_chain;
570 *free_chain = m;
571 } else {
572 m->m_next = NULL;
573 }
574 return(n);
575 }
576
577 /*
578 * Create a "control" mbuf containing the specified data
579 * with the specified type for presentation on a socket buffer.
580 */
581 struct mbuf *
582 sbcreatecontrol(caddr_t p, int size, int type, int level)
583 {
584 struct cmsghdr *cp;
585 struct mbuf *m;
586
587 if (CMSG_SPACE((u_int)size) > MCLBYTES)
588 return (NULL);
589 m = m_getl(CMSG_SPACE((u_int)size), MB_DONTWAIT, MT_CONTROL, 0, NULL);
590 if (m == NULL)
591 return (NULL);
592 m->m_len = CMSG_SPACE(size);
593 cp = mtod(m, struct cmsghdr *);
594 if (p != NULL)
595 memcpy(CMSG_DATA(cp), p, size);
596 cp->cmsg_len = CMSG_LEN(size);
597 cp->cmsg_level = level;
598 cp->cmsg_type = type;
599 mbuftrackid(m, 24);
600 return (m);
601 }
602
Cache object: 1877527905c1c3512d7d4880e5c9a34f
|