FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_mbuf.c
1 /*-
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD: releng/6.3/sys/kern/uipc_mbuf.c 173886 2007-11-24 19:45:58Z cvs2svn $");
34
35 #include "opt_mac.h"
36 #include "opt_param.h"
37 #include "opt_mbuf_stress_test.h"
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/lock.h>
44 #include <sys/mac.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/sysctl.h>
48 #include <sys/domain.h>
49 #include <sys/protosw.h>
50 #include <sys/uio.h>
51
52 int max_linkhdr;
53 int max_protohdr;
54 int max_hdr;
55 int max_datalen;
56 #ifdef MBUF_STRESS_TEST
57 int m_defragpackets;
58 int m_defragbytes;
59 int m_defraguseless;
60 int m_defragfailure;
61 int m_defragrandomfailures;
62 #endif
63
64 /*
65 * sysctl(8) exported objects
66 */
67 SYSCTL_DECL(_kern_ipc);
68 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RD,
69 &max_linkhdr, 0, "Size of largest link layer header");
70 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RD,
71 &max_protohdr, 0, "Size of largest protocol layer header");
72 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RD,
73 &max_hdr, 0, "Size of largest link plus protocol header");
74 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RD,
75 &max_datalen, 0, "Minimum space left in mbuf after max_hdr");
76 #ifdef MBUF_STRESS_TEST
77 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
78 &m_defragpackets, 0, "");
79 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
80 &m_defragbytes, 0, "");
81 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
82 &m_defraguseless, 0, "");
83 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
84 &m_defragfailure, 0, "");
85 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
86 &m_defragrandomfailures, 0, "");
87 #endif
88
89 /*
90 * Malloc-type for external ext_buf ref counts.
91 */
92 static MALLOC_DEFINE(M_MBUF, "mbextcnt", "mbuf external ref counts");
93
94 /*
95 * Allocate a given length worth of mbufs and/or clusters (whatever fits
96 * best) and return a pointer to the top of the allocated chain. If an
97 * existing mbuf chain is provided, then we will append the new chain
98 * to the existing one but still return the top of the newly allocated
99 * chain.
100 */
101 struct mbuf *
102 m_getm(struct mbuf *m, int len, int how, short type)
103 {
104 struct mbuf *mb, *top, *cur, *mtail;
105 int num, rem;
106 int i;
107
108 KASSERT(len >= 0, ("m_getm(): len is < 0"));
109
110 /* If m != NULL, we will append to the end of that chain. */
111 if (m != NULL)
112 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
113 else
114 mtail = NULL;
115
116 /*
117 * Calculate how many mbufs+clusters ("packets") we need and how much
118 * leftover there is after that and allocate the first mbuf+cluster
119 * if required.
120 */
121 num = len / MCLBYTES;
122 rem = len % MCLBYTES;
123 top = cur = NULL;
124 if (num > 0) {
125 if ((top = cur = m_getcl(how, type, 0)) == NULL)
126 goto failed;
127 top->m_len = 0;
128 }
129 num--;
130
131 for (i = 0; i < num; i++) {
132 mb = m_getcl(how, type, 0);
133 if (mb == NULL)
134 goto failed;
135 mb->m_len = 0;
136 cur = (cur->m_next = mb);
137 }
138 if (rem > 0) {
139 mb = (rem > MINCLSIZE) ?
140 m_getcl(how, type, 0) : m_get(how, type);
141 if (mb == NULL)
142 goto failed;
143 mb->m_len = 0;
144 if (cur == NULL)
145 top = mb;
146 else
147 cur->m_next = mb;
148 }
149
150 if (mtail != NULL)
151 mtail->m_next = top;
152 return top;
153 failed:
154 if (top != NULL)
155 m_freem(top);
156 return NULL;
157 }
158
159 /*
160 * Free an entire chain of mbufs and associated external buffers, if
161 * applicable.
162 */
163 void
164 m_freem(struct mbuf *mb)
165 {
166
167 while (mb != NULL)
168 mb = m_free(mb);
169 }
170
171 /*-
172 * Configure a provided mbuf to refer to the provided external storage
173 * buffer and setup a reference count for said buffer. If the setting
174 * up of the reference count fails, the M_EXT bit will not be set. If
175 * successfull, the M_EXT bit is set in the mbuf's flags.
176 *
177 * Arguments:
178 * mb The existing mbuf to which to attach the provided buffer.
179 * buf The address of the provided external storage buffer.
180 * size The size of the provided buffer.
181 * freef A pointer to a routine that is responsible for freeing the
182 * provided external storage buffer.
183 * args A pointer to an argument structure (of any type) to be passed
184 * to the provided freef routine (may be NULL).
185 * flags Any other flags to be passed to the provided mbuf.
186 * type The type that the external storage buffer should be
187 * labeled with.
188 *
189 * Returns:
190 * Nothing.
191 */
192 void
193 m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
194 void (*freef)(void *, void *), void *args, int flags, int type)
195 {
196 u_int *ref_cnt = NULL;
197
198 /* XXX Shouldn't be adding EXT_CLUSTER with this API */
199 if (type == EXT_CLUSTER)
200 ref_cnt = (u_int *)uma_find_refcnt(zone_clust,
201 mb->m_ext.ext_buf);
202 else if (type == EXT_EXTREF)
203 ref_cnt = __DEVOLATILE(u_int *, mb->m_ext.ref_cnt);
204 mb->m_ext.ref_cnt = (ref_cnt == NULL) ?
205 malloc(sizeof(u_int), M_MBUF, M_NOWAIT) : (u_int *)ref_cnt;
206 if (mb->m_ext.ref_cnt != NULL) {
207 *(mb->m_ext.ref_cnt) = 1;
208 mb->m_flags |= (M_EXT | flags);
209 mb->m_ext.ext_buf = buf;
210 mb->m_data = mb->m_ext.ext_buf;
211 mb->m_ext.ext_size = size;
212 mb->m_ext.ext_free = freef;
213 mb->m_ext.ext_args = args;
214 mb->m_ext.ext_type = type;
215 }
216 }
217
218 /*
219 * Non-directly-exported function to clean up after mbufs with M_EXT
220 * storage attached to them if the reference count hits 0.
221 */
222 void
223 mb_free_ext(struct mbuf *m)
224 {
225 u_int cnt;
226 int dofree;
227
228 /* Account for lazy ref count assign. */
229 if (m->m_ext.ref_cnt == NULL)
230 dofree = 1;
231 else
232 dofree = 0;
233
234 /*
235 * This is tricky. We need to make sure to decrement the
236 * refcount in a safe way but to also clean up if we're the
237 * last reference. This method seems to do it without race.
238 */
239 while (dofree == 0) {
240 cnt = *(m->m_ext.ref_cnt);
241 if (atomic_cmpset_int(m->m_ext.ref_cnt, cnt, cnt - 1)) {
242 if (cnt == 1)
243 dofree = 1;
244 break;
245 }
246 }
247
248 if (dofree) {
249 /*
250 * Do the free, should be safe.
251 */
252 switch (m->m_ext.ext_type) {
253 case EXT_PACKET:
254 uma_zfree(zone_pack, m);
255 return;
256 case EXT_CLUSTER:
257 uma_zfree(zone_clust, m->m_ext.ext_buf);
258 m->m_ext.ext_buf = NULL;
259 break;
260 case EXT_JUMBOP:
261 uma_zfree(zone_jumbop, m->m_ext.ext_buf);
262 break;
263 case EXT_JUMBO9:
264 uma_zfree(zone_jumbo9, m->m_ext.ext_buf);
265 break;
266 case EXT_JUMBO16:
267 uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
268 break;
269 default:
270 KASSERT(m->m_ext.ext_free != NULL,
271 ("%s: external free pointer not set", __func__));
272 (*(m->m_ext.ext_free))(m->m_ext.ext_buf,
273 m->m_ext.ext_args);
274 if (m->m_ext.ext_type != EXT_EXTREF) {
275 if (m->m_ext.ref_cnt != NULL)
276 free(__DEVOLATILE(u_int *,
277 m->m_ext.ref_cnt), M_MBUF);
278 m->m_ext.ref_cnt = NULL;
279 }
280 m->m_ext.ext_buf = NULL;
281 }
282 }
283 uma_zfree(zone_mbuf, m);
284 }
285
286 /*
287 * "Move" mbuf pkthdr from "from" to "to".
288 * "from" must have M_PKTHDR set, and "to" must be empty.
289 */
290 void
291 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
292 {
293
294 #if 0
295 /* see below for why these are not enabled */
296 M_ASSERTPKTHDR(to);
297 /* Note: with MAC, this may not be a good assertion. */
298 KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
299 ("m_move_pkthdr: to has tags"));
300 #endif
301 #ifdef MAC
302 /*
303 * XXXMAC: It could be this should also occur for non-MAC?
304 */
305 if (to->m_flags & M_PKTHDR)
306 m_tag_delete_chain(to, NULL);
307 #endif
308 to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
309 if ((to->m_flags & M_EXT) == 0)
310 to->m_data = to->m_pktdat;
311 to->m_pkthdr = from->m_pkthdr; /* especially tags */
312 SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */
313 from->m_flags &= ~M_PKTHDR;
314 }
315
316 /*
317 * Duplicate "from"'s mbuf pkthdr in "to".
318 * "from" must have M_PKTHDR set, and "to" must be empty.
319 * In particular, this does a deep copy of the packet tags.
320 */
321 int
322 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
323 {
324
325 #if 0
326 /*
327 * The mbuf allocator only initializes the pkthdr
328 * when the mbuf is allocated with MGETHDR. Many users
329 * (e.g. m_copy*, m_prepend) use MGET and then
330 * smash the pkthdr as needed causing these
331 * assertions to trip. For now just disable them.
332 */
333 M_ASSERTPKTHDR(to);
334 /* Note: with MAC, this may not be a good assertion. */
335 KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
336 #endif
337 MBUF_CHECKSLEEP(how);
338 #ifdef MAC
339 if (to->m_flags & M_PKTHDR)
340 m_tag_delete_chain(to, NULL);
341 #endif
342 to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
343 if ((to->m_flags & M_EXT) == 0)
344 to->m_data = to->m_pktdat;
345 to->m_pkthdr = from->m_pkthdr;
346 SLIST_INIT(&to->m_pkthdr.tags);
347 return (m_tag_copy_chain(to, from, MBTOM(how)));
348 }
349
350 /*
351 * Lesser-used path for M_PREPEND:
352 * allocate new mbuf to prepend to chain,
353 * copy junk along.
354 */
355 struct mbuf *
356 m_prepend(struct mbuf *m, int len, int how)
357 {
358 struct mbuf *mn;
359
360 if (m->m_flags & M_PKTHDR)
361 MGETHDR(mn, how, m->m_type);
362 else
363 MGET(mn, how, m->m_type);
364 if (mn == NULL) {
365 m_freem(m);
366 return (NULL);
367 }
368 if (m->m_flags & M_PKTHDR)
369 M_MOVE_PKTHDR(mn, m);
370 mn->m_next = m;
371 m = mn;
372 if (len < MHLEN)
373 MH_ALIGN(m, len);
374 m->m_len = len;
375 return (m);
376 }
377
378 /*
379 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
380 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
381 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
382 * Note that the copy is read-only, because clusters are not copied,
383 * only their reference counts are incremented.
384 */
385 struct mbuf *
386 m_copym(struct mbuf *m, int off0, int len, int wait)
387 {
388 struct mbuf *n, **np;
389 int off = off0;
390 struct mbuf *top;
391 int copyhdr = 0;
392
393 KASSERT(off >= 0, ("m_copym, negative off %d", off));
394 KASSERT(len >= 0, ("m_copym, negative len %d", len));
395 MBUF_CHECKSLEEP(wait);
396 if (off == 0 && m->m_flags & M_PKTHDR)
397 copyhdr = 1;
398 while (off > 0) {
399 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
400 if (off < m->m_len)
401 break;
402 off -= m->m_len;
403 m = m->m_next;
404 }
405 np = ⊤
406 top = 0;
407 while (len > 0) {
408 if (m == NULL) {
409 KASSERT(len == M_COPYALL,
410 ("m_copym, length > size of mbuf chain"));
411 break;
412 }
413 if (copyhdr)
414 MGETHDR(n, wait, m->m_type);
415 else
416 MGET(n, wait, m->m_type);
417 *np = n;
418 if (n == NULL)
419 goto nospace;
420 if (copyhdr) {
421 if (!m_dup_pkthdr(n, m, wait))
422 goto nospace;
423 if (len == M_COPYALL)
424 n->m_pkthdr.len -= off0;
425 else
426 n->m_pkthdr.len = len;
427 copyhdr = 0;
428 }
429 n->m_len = min(len, m->m_len - off);
430 if (m->m_flags & M_EXT) {
431 n->m_data = m->m_data + off;
432 n->m_ext = m->m_ext;
433 n->m_flags |= M_EXT;
434 MEXT_ADD_REF(m);
435 n->m_ext.ref_cnt = m->m_ext.ref_cnt;
436 } else
437 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
438 (u_int)n->m_len);
439 if (len != M_COPYALL)
440 len -= n->m_len;
441 off = 0;
442 m = m->m_next;
443 np = &n->m_next;
444 }
445 if (top == NULL)
446 mbstat.m_mcfail++; /* XXX: No consistency. */
447
448 return (top);
449 nospace:
450 m_freem(top);
451 mbstat.m_mcfail++; /* XXX: No consistency. */
452 return (NULL);
453 }
454
455 /*
456 * Copy an entire packet, including header (which must be present).
457 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
458 * Note that the copy is read-only, because clusters are not copied,
459 * only their reference counts are incremented.
460 * Preserve alignment of the first mbuf so if the creator has left
461 * some room at the beginning (e.g. for inserting protocol headers)
462 * the copies still have the room available.
463 */
464 struct mbuf *
465 m_copypacket(struct mbuf *m, int how)
466 {
467 struct mbuf *top, *n, *o;
468
469 MBUF_CHECKSLEEP(how);
470 MGET(n, how, m->m_type);
471 top = n;
472 if (n == NULL)
473 goto nospace;
474
475 if (!m_dup_pkthdr(n, m, how))
476 goto nospace;
477 n->m_len = m->m_len;
478 if (m->m_flags & M_EXT) {
479 n->m_data = m->m_data;
480 n->m_ext = m->m_ext;
481 n->m_flags |= M_EXT;
482 MEXT_ADD_REF(m);
483 n->m_ext.ref_cnt = m->m_ext.ref_cnt;
484 } else {
485 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
486 bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
487 }
488
489 m = m->m_next;
490 while (m) {
491 MGET(o, how, m->m_type);
492 if (o == NULL)
493 goto nospace;
494
495 n->m_next = o;
496 n = n->m_next;
497
498 n->m_len = m->m_len;
499 if (m->m_flags & M_EXT) {
500 n->m_data = m->m_data;
501 n->m_ext = m->m_ext;
502 n->m_flags |= M_EXT;
503 MEXT_ADD_REF(m);
504 n->m_ext.ref_cnt = m->m_ext.ref_cnt;
505 } else {
506 bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
507 }
508
509 m = m->m_next;
510 }
511 return top;
512 nospace:
513 m_freem(top);
514 mbstat.m_mcfail++; /* XXX: No consistency. */
515 return (NULL);
516 }
517
518 /*
519 * Copy data from an mbuf chain starting "off" bytes from the beginning,
520 * continuing for "len" bytes, into the indicated buffer.
521 */
522 void
523 m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
524 {
525 u_int count;
526
527 KASSERT(off >= 0, ("m_copydata, negative off %d", off));
528 KASSERT(len >= 0, ("m_copydata, negative len %d", len));
529 while (off > 0) {
530 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
531 if (off < m->m_len)
532 break;
533 off -= m->m_len;
534 m = m->m_next;
535 }
536 while (len > 0) {
537 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
538 count = min(m->m_len - off, len);
539 bcopy(mtod(m, caddr_t) + off, cp, count);
540 len -= count;
541 cp += count;
542 off = 0;
543 m = m->m_next;
544 }
545 }
546
547 /*
548 * Copy a packet header mbuf chain into a completely new chain, including
549 * copying any mbuf clusters. Use this instead of m_copypacket() when
550 * you need a writable copy of an mbuf chain.
551 */
552 struct mbuf *
553 m_dup(struct mbuf *m, int how)
554 {
555 struct mbuf **p, *top = NULL;
556 int remain, moff, nsize;
557
558 MBUF_CHECKSLEEP(how);
559 /* Sanity check */
560 if (m == NULL)
561 return (NULL);
562 M_ASSERTPKTHDR(m);
563
564 /* While there's more data, get a new mbuf, tack it on, and fill it */
565 remain = m->m_pkthdr.len;
566 moff = 0;
567 p = ⊤
568 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */
569 struct mbuf *n;
570
571 /* Get the next new mbuf */
572 if (remain >= MINCLSIZE) {
573 n = m_getcl(how, m->m_type, 0);
574 nsize = MCLBYTES;
575 } else {
576 n = m_get(how, m->m_type);
577 nsize = MLEN;
578 }
579 if (n == NULL)
580 goto nospace;
581
582 if (top == NULL) { /* First one, must be PKTHDR */
583 if (!m_dup_pkthdr(n, m, how)) {
584 m_free(n);
585 goto nospace;
586 }
587 if ((n->m_flags & M_EXT) == 0)
588 nsize = MHLEN;
589 }
590 n->m_len = 0;
591
592 /* Link it into the new chain */
593 *p = n;
594 p = &n->m_next;
595
596 /* Copy data from original mbuf(s) into new mbuf */
597 while (n->m_len < nsize && m != NULL) {
598 int chunk = min(nsize - n->m_len, m->m_len - moff);
599
600 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
601 moff += chunk;
602 n->m_len += chunk;
603 remain -= chunk;
604 if (moff == m->m_len) {
605 m = m->m_next;
606 moff = 0;
607 }
608 }
609
610 /* Check correct total mbuf length */
611 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
612 ("%s: bogus m_pkthdr.len", __func__));
613 }
614 return (top);
615
616 nospace:
617 m_freem(top);
618 mbstat.m_mcfail++; /* XXX: No consistency. */
619 return (NULL);
620 }
621
622 /*
623 * Concatenate mbuf chain n to m.
624 * Both chains must be of the same type (e.g. MT_DATA).
625 * Any m_pkthdr is not updated.
626 */
627 void
628 m_cat(struct mbuf *m, struct mbuf *n)
629 {
630 while (m->m_next)
631 m = m->m_next;
632 while (n) {
633 if (m->m_flags & M_EXT ||
634 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
635 /* just join the two chains */
636 m->m_next = n;
637 return;
638 }
639 /* splat the data from one into the other */
640 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
641 (u_int)n->m_len);
642 m->m_len += n->m_len;
643 n = m_free(n);
644 }
645 }
646
647 void
648 m_adj(struct mbuf *mp, int req_len)
649 {
650 int len = req_len;
651 struct mbuf *m;
652 int count;
653
654 if ((m = mp) == NULL)
655 return;
656 if (len >= 0) {
657 /*
658 * Trim from head.
659 */
660 while (m != NULL && len > 0) {
661 if (m->m_len <= len) {
662 len -= m->m_len;
663 m->m_len = 0;
664 m = m->m_next;
665 } else {
666 m->m_len -= len;
667 m->m_data += len;
668 len = 0;
669 }
670 }
671 m = mp;
672 if (mp->m_flags & M_PKTHDR)
673 m->m_pkthdr.len -= (req_len - len);
674 } else {
675 /*
676 * Trim from tail. Scan the mbuf chain,
677 * calculating its length and finding the last mbuf.
678 * If the adjustment only affects this mbuf, then just
679 * adjust and return. Otherwise, rescan and truncate
680 * after the remaining size.
681 */
682 len = -len;
683 count = 0;
684 for (;;) {
685 count += m->m_len;
686 if (m->m_next == (struct mbuf *)0)
687 break;
688 m = m->m_next;
689 }
690 if (m->m_len >= len) {
691 m->m_len -= len;
692 if (mp->m_flags & M_PKTHDR)
693 mp->m_pkthdr.len -= len;
694 return;
695 }
696 count -= len;
697 if (count < 0)
698 count = 0;
699 /*
700 * Correct length for chain is "count".
701 * Find the mbuf with last data, adjust its length,
702 * and toss data from remaining mbufs on chain.
703 */
704 m = mp;
705 if (m->m_flags & M_PKTHDR)
706 m->m_pkthdr.len = count;
707 for (; m; m = m->m_next) {
708 if (m->m_len >= count) {
709 m->m_len = count;
710 if (m->m_next != NULL) {
711 m_freem(m->m_next);
712 m->m_next = NULL;
713 }
714 break;
715 }
716 count -= m->m_len;
717 }
718 }
719 }
720
721 /*
722 * Rearange an mbuf chain so that len bytes are contiguous
723 * and in the data area of an mbuf (so that mtod and dtom
724 * will work for a structure of size len). Returns the resulting
725 * mbuf chain on success, frees it and returns null on failure.
726 * If there is room, it will add up to max_protohdr-len extra bytes to the
727 * contiguous region in an attempt to avoid being called next time.
728 */
729 struct mbuf *
730 m_pullup(struct mbuf *n, int len)
731 {
732 struct mbuf *m;
733 int count;
734 int space;
735
736 /*
737 * If first mbuf has no cluster, and has room for len bytes
738 * without shifting current data, pullup into it,
739 * otherwise allocate a new mbuf to prepend to the chain.
740 */
741 if ((n->m_flags & M_EXT) == 0 &&
742 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
743 if (n->m_len >= len)
744 return (n);
745 m = n;
746 n = n->m_next;
747 len -= m->m_len;
748 } else {
749 if (len > MHLEN)
750 goto bad;
751 MGET(m, M_DONTWAIT, n->m_type);
752 if (m == NULL)
753 goto bad;
754 m->m_len = 0;
755 if (n->m_flags & M_PKTHDR)
756 M_MOVE_PKTHDR(m, n);
757 }
758 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
759 do {
760 count = min(min(max(len, max_protohdr), space), n->m_len);
761 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
762 (u_int)count);
763 len -= count;
764 m->m_len += count;
765 n->m_len -= count;
766 space -= count;
767 if (n->m_len)
768 n->m_data += count;
769 else
770 n = m_free(n);
771 } while (len > 0 && n);
772 if (len > 0) {
773 (void) m_free(m);
774 goto bad;
775 }
776 m->m_next = n;
777 return (m);
778 bad:
779 m_freem(n);
780 mbstat.m_mpfail++; /* XXX: No consistency. */
781 return (NULL);
782 }
783
784 /*
785 * Like m_pullup(), except a new mbuf is always allocated, and we allow
786 * the amount of empty space before the data in the new mbuf to be specified
787 * (in the event that the caller expects to prepend later).
788 */
789 int MSFail;
790
791 struct mbuf *
792 m_copyup(struct mbuf *n, int len, int dstoff)
793 {
794 struct mbuf *m;
795 int count, space;
796
797 if (len > (MHLEN - dstoff))
798 goto bad;
799 MGET(m, M_DONTWAIT, n->m_type);
800 if (m == NULL)
801 goto bad;
802 m->m_len = 0;
803 if (n->m_flags & M_PKTHDR)
804 M_MOVE_PKTHDR(m, n);
805 m->m_data += dstoff;
806 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
807 do {
808 count = min(min(max(len, max_protohdr), space), n->m_len);
809 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
810 (unsigned)count);
811 len -= count;
812 m->m_len += count;
813 n->m_len -= count;
814 space -= count;
815 if (n->m_len)
816 n->m_data += count;
817 else
818 n = m_free(n);
819 } while (len > 0 && n);
820 if (len > 0) {
821 (void) m_free(m);
822 goto bad;
823 }
824 m->m_next = n;
825 return (m);
826 bad:
827 m_freem(n);
828 MSFail++;
829 return (NULL);
830 }
831
832 /*
833 * Partition an mbuf chain in two pieces, returning the tail --
834 * all but the first len0 bytes. In case of failure, it returns NULL and
835 * attempts to restore the chain to its original state.
836 *
837 * Note that the resulting mbufs might be read-only, because the new
838 * mbuf can end up sharing an mbuf cluster with the original mbuf if
839 * the "breaking point" happens to lie within a cluster mbuf. Use the
840 * M_WRITABLE() macro to check for this case.
841 */
842 struct mbuf *
843 m_split(struct mbuf *m0, int len0, int wait)
844 {
845 struct mbuf *m, *n;
846 u_int len = len0, remain;
847
848 MBUF_CHECKSLEEP(wait);
849 for (m = m0; m && len > m->m_len; m = m->m_next)
850 len -= m->m_len;
851 if (m == NULL)
852 return (NULL);
853 remain = m->m_len - len;
854 if (m0->m_flags & M_PKTHDR) {
855 MGETHDR(n, wait, m0->m_type);
856 if (n == NULL)
857 return (NULL);
858 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
859 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
860 m0->m_pkthdr.len = len0;
861 if (m->m_flags & M_EXT)
862 goto extpacket;
863 if (remain > MHLEN) {
864 /* m can't be the lead packet */
865 MH_ALIGN(n, 0);
866 n->m_next = m_split(m, len, wait);
867 if (n->m_next == NULL) {
868 (void) m_free(n);
869 return (NULL);
870 } else {
871 n->m_len = 0;
872 return (n);
873 }
874 } else
875 MH_ALIGN(n, remain);
876 } else if (remain == 0) {
877 n = m->m_next;
878 m->m_next = NULL;
879 return (n);
880 } else {
881 MGET(n, wait, m->m_type);
882 if (n == NULL)
883 return (NULL);
884 M_ALIGN(n, remain);
885 }
886 extpacket:
887 if (m->m_flags & M_EXT) {
888 n->m_flags |= M_EXT;
889 n->m_ext = m->m_ext;
890 MEXT_ADD_REF(m);
891 n->m_ext.ref_cnt = m->m_ext.ref_cnt;
892 n->m_data = m->m_data + len;
893 } else {
894 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
895 }
896 n->m_len = remain;
897 m->m_len = len;
898 n->m_next = m->m_next;
899 m->m_next = NULL;
900 return (n);
901 }
902 /*
903 * Routine to copy from device local memory into mbufs.
904 * Note that `off' argument is offset into first mbuf of target chain from
905 * which to begin copying the data to.
906 */
907 struct mbuf *
908 m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
909 void (*copy)(char *from, caddr_t to, u_int len))
910 {
911 struct mbuf *m;
912 struct mbuf *top = NULL, **mp = ⊤
913 int len;
914
915 if (off < 0 || off > MHLEN)
916 return (NULL);
917
918 while (totlen > 0) {
919 if (top == NULL) { /* First one, must be PKTHDR */
920 if (totlen + off >= MINCLSIZE) {
921 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
922 len = MCLBYTES;
923 } else {
924 m = m_gethdr(M_DONTWAIT, MT_DATA);
925 len = MHLEN;
926
927 /* Place initial small packet/header at end of mbuf */
928 if (m && totlen + off + max_linkhdr <= MLEN) {
929 m->m_data += max_linkhdr;
930 len -= max_linkhdr;
931 }
932 }
933 if (m == NULL)
934 return NULL;
935 m->m_pkthdr.rcvif = ifp;
936 m->m_pkthdr.len = totlen;
937 } else {
938 if (totlen + off >= MINCLSIZE) {
939 m = m_getcl(M_DONTWAIT, MT_DATA, 0);
940 len = MCLBYTES;
941 } else {
942 m = m_get(M_DONTWAIT, MT_DATA);
943 len = MLEN;
944 }
945 if (m == NULL) {
946 m_freem(top);
947 return NULL;
948 }
949 }
950 if (off) {
951 m->m_data += off;
952 len -= off;
953 off = 0;
954 }
955 m->m_len = len = min(totlen, len);
956 if (copy)
957 copy(buf, mtod(m, caddr_t), (u_int)len);
958 else
959 bcopy(buf, mtod(m, caddr_t), (u_int)len);
960 buf += len;
961 *mp = m;
962 mp = &m->m_next;
963 totlen -= len;
964 }
965 return (top);
966 }
967
968 /*
969 * Copy data from a buffer back into the indicated mbuf chain,
970 * starting "off" bytes from the beginning, extending the mbuf
971 * chain if necessary.
972 */
973 void
974 m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
975 {
976 int mlen;
977 struct mbuf *m = m0, *n;
978 int totlen = 0;
979
980 if (m0 == NULL)
981 return;
982 while (off > (mlen = m->m_len)) {
983 off -= mlen;
984 totlen += mlen;
985 if (m->m_next == NULL) {
986 n = m_get(M_DONTWAIT, m->m_type);
987 if (n == NULL)
988 goto out;
989 bzero(mtod(n, caddr_t), MLEN);
990 n->m_len = min(MLEN, len + off);
991 m->m_next = n;
992 }
993 m = m->m_next;
994 }
995 while (len > 0) {
996 mlen = min (m->m_len - off, len);
997 bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
998 cp += mlen;
999 len -= mlen;
1000 mlen += off;
1001 off = 0;
1002 totlen += mlen;
1003 if (len == 0)
1004 break;
1005 if (m->m_next == NULL) {
1006 n = m_get(M_DONTWAIT, m->m_type);
1007 if (n == NULL)
1008 break;
1009 n->m_len = min(MLEN, len);
1010 m->m_next = n;
1011 }
1012 m = m->m_next;
1013 }
1014 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1015 m->m_pkthdr.len = totlen;
1016 }
1017
1018 /*
1019 * Append the specified data to the indicated mbuf chain,
1020 * Extend the mbuf chain if the new data does not fit in
1021 * existing space.
1022 *
1023 * Return 1 if able to complete the job; otherwise 0.
1024 */
1025 int
1026 m_append(struct mbuf *m0, int len, c_caddr_t cp)
1027 {
1028 struct mbuf *m, *n;
1029 int remainder, space;
1030
1031 for (m = m0; m->m_next != NULL; m = m->m_next)
1032 ;
1033 remainder = len;
1034 space = M_TRAILINGSPACE(m);
1035 if (space > 0) {
1036 /*
1037 * Copy into available space.
1038 */
1039 if (space > remainder)
1040 space = remainder;
1041 bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
1042 m->m_len += space;
1043 cp += space, remainder -= space;
1044 }
1045 while (remainder > 0) {
1046 /*
1047 * Allocate a new mbuf; could check space
1048 * and allocate a cluster instead.
1049 */
1050 n = m_get(M_DONTWAIT, m->m_type);
1051 if (n == NULL)
1052 break;
1053 n->m_len = min(MLEN, remainder);
1054 bcopy(cp, mtod(n, caddr_t), n->m_len);
1055 cp += n->m_len, remainder -= n->m_len;
1056 m->m_next = n;
1057 m = n;
1058 }
1059 if (m0->m_flags & M_PKTHDR)
1060 m0->m_pkthdr.len += len - remainder;
1061 return (remainder == 0);
1062 }
1063
1064 /*
1065 * Apply function f to the data in an mbuf chain starting "off" bytes from
1066 * the beginning, continuing for "len" bytes.
1067 */
1068 int
1069 m_apply(struct mbuf *m, int off, int len,
1070 int (*f)(void *, void *, u_int), void *arg)
1071 {
1072 u_int count;
1073 int rval;
1074
1075 KASSERT(off >= 0, ("m_apply, negative off %d", off));
1076 KASSERT(len >= 0, ("m_apply, negative len %d", len));
1077 while (off > 0) {
1078 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1079 if (off < m->m_len)
1080 break;
1081 off -= m->m_len;
1082 m = m->m_next;
1083 }
1084 while (len > 0) {
1085 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1086 count = min(m->m_len - off, len);
1087 rval = (*f)(arg, mtod(m, caddr_t) + off, count);
1088 if (rval)
1089 return (rval);
1090 len -= count;
1091 off = 0;
1092 m = m->m_next;
1093 }
1094 return (0);
1095 }
1096
1097 /*
1098 * Return a pointer to mbuf/offset of location in mbuf chain.
1099 */
1100 struct mbuf *
1101 m_getptr(struct mbuf *m, int loc, int *off)
1102 {
1103
1104 while (loc >= 0) {
1105 /* Normal end of search. */
1106 if (m->m_len > loc) {
1107 *off = loc;
1108 return (m);
1109 } else {
1110 loc -= m->m_len;
1111 if (m->m_next == NULL) {
1112 if (loc == 0) {
1113 /* Point at the end of valid data. */
1114 *off = m->m_len;
1115 return (m);
1116 }
1117 return (NULL);
1118 }
1119 m = m->m_next;
1120 }
1121 }
1122 return (NULL);
1123 }
1124
1125 void
1126 m_print(const struct mbuf *m, int maxlen)
1127 {
1128 int len;
1129 int pdata;
1130 const struct mbuf *m2;
1131
1132 if (m->m_flags & M_PKTHDR)
1133 len = m->m_pkthdr.len;
1134 else
1135 len = -1;
1136 m2 = m;
1137 while (m2 != NULL && (len == -1 || len)) {
1138 pdata = m2->m_len;
1139 if (maxlen != -1 && pdata > maxlen)
1140 pdata = maxlen;
1141 printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len,
1142 m2->m_next, m2->m_flags, "\2\20freelist\17skipfw"
1143 "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly"
1144 "\3eor\2pkthdr\1ext", pdata ? "" : "\n");
1145 if (pdata)
1146 printf(", %*D\n", m2->m_len, (u_char *)m2->m_data, "-");
1147 if (len != -1)
1148 len -= m2->m_len;
1149 m2 = m2->m_next;
1150 }
1151 if (len > 0)
1152 printf("%d bytes unaccounted for.\n", len);
1153 return;
1154 }
1155
1156 u_int
1157 m_fixhdr(struct mbuf *m0)
1158 {
1159 u_int len;
1160
1161 len = m_length(m0, NULL);
1162 m0->m_pkthdr.len = len;
1163 return (len);
1164 }
1165
1166 u_int
1167 m_length(struct mbuf *m0, struct mbuf **last)
1168 {
1169 struct mbuf *m;
1170 u_int len;
1171
1172 len = 0;
1173 for (m = m0; m != NULL; m = m->m_next) {
1174 len += m->m_len;
1175 if (m->m_next == NULL)
1176 break;
1177 }
1178 if (last != NULL)
1179 *last = m;
1180 return (len);
1181 }
1182
1183 /*
1184 * Defragment a mbuf chain, returning the shortest possible
1185 * chain of mbufs and clusters. If allocation fails and
1186 * this cannot be completed, NULL will be returned, but
1187 * the passed in chain will be unchanged. Upon success,
1188 * the original chain will be freed, and the new chain
1189 * will be returned.
1190 *
1191 * If a non-packet header is passed in, the original
1192 * mbuf (chain?) will be returned unharmed.
1193 */
1194 struct mbuf *
1195 m_defrag(struct mbuf *m0, int how)
1196 {
1197 struct mbuf *m_new = NULL, *m_final = NULL;
1198 int progress = 0, length;
1199
1200 MBUF_CHECKSLEEP(how);
1201 if (!(m0->m_flags & M_PKTHDR))
1202 return (m0);
1203
1204 m_fixhdr(m0); /* Needed sanity check */
1205
1206 #ifdef MBUF_STRESS_TEST
1207 if (m_defragrandomfailures) {
1208 int temp = arc4random() & 0xff;
1209 if (temp == 0xba)
1210 goto nospace;
1211 }
1212 #endif
1213
1214 if (m0->m_pkthdr.len > MHLEN)
1215 m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1216 else
1217 m_final = m_gethdr(how, MT_DATA);
1218
1219 if (m_final == NULL)
1220 goto nospace;
1221
1222 if (m_dup_pkthdr(m_final, m0, how) == 0)
1223 goto nospace;
1224
1225 m_new = m_final;
1226
1227 while (progress < m0->m_pkthdr.len) {
1228 length = m0->m_pkthdr.len - progress;
1229 if (length > MCLBYTES)
1230 length = MCLBYTES;
1231
1232 if (m_new == NULL) {
1233 if (length > MLEN)
1234 m_new = m_getcl(how, MT_DATA, 0);
1235 else
1236 m_new = m_get(how, MT_DATA);
1237 if (m_new == NULL)
1238 goto nospace;
1239 }
1240
1241 m_copydata(m0, progress, length, mtod(m_new, caddr_t));
1242 progress += length;
1243 m_new->m_len = length;
1244 if (m_new != m_final)
1245 m_cat(m_final, m_new);
1246 m_new = NULL;
1247 }
1248 #ifdef MBUF_STRESS_TEST
1249 if (m0->m_next == NULL)
1250 m_defraguseless++;
1251 #endif
1252 m_freem(m0);
1253 m0 = m_final;
1254 #ifdef MBUF_STRESS_TEST
1255 m_defragpackets++;
1256 m_defragbytes += m0->m_pkthdr.len;
1257 #endif
1258 return (m0);
1259 nospace:
1260 #ifdef MBUF_STRESS_TEST
1261 m_defragfailure++;
1262 #endif
1263 if (m_final)
1264 m_freem(m_final);
1265 return (NULL);
1266 }
1267
1268 #ifdef MBUF_STRESS_TEST
1269
1270 /*
1271 * Fragment an mbuf chain. There's no reason you'd ever want to do
1272 * this in normal usage, but it's great for stress testing various
1273 * mbuf consumers.
1274 *
1275 * If fragmentation is not possible, the original chain will be
1276 * returned.
1277 *
1278 * Possible length values:
1279 * 0 no fragmentation will occur
1280 * > 0 each fragment will be of the specified length
1281 * -1 each fragment will be the same random value in length
1282 * -2 each fragment's length will be entirely random
1283 * (Random values range from 1 to 256)
1284 */
1285 struct mbuf *
1286 m_fragment(struct mbuf *m0, int how, int length)
1287 {
1288 struct mbuf *m_new = NULL, *m_final = NULL;
1289 int progress = 0;
1290
1291 if (!(m0->m_flags & M_PKTHDR))
1292 return (m0);
1293
1294 if ((length == 0) || (length < -2))
1295 return (m0);
1296
1297 m_fixhdr(m0); /* Needed sanity check */
1298
1299 m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1300
1301 if (m_final == NULL)
1302 goto nospace;
1303
1304 if (m_dup_pkthdr(m_final, m0, how) == 0)
1305 goto nospace;
1306
1307 m_new = m_final;
1308
1309 if (length == -1)
1310 length = 1 + (arc4random() & 255);
1311
1312 while (progress < m0->m_pkthdr.len) {
1313 int fraglen;
1314
1315 if (length > 0)
1316 fraglen = length;
1317 else
1318 fraglen = 1 + (arc4random() & 255);
1319 if (fraglen > m0->m_pkthdr.len - progress)
1320 fraglen = m0->m_pkthdr.len - progress;
1321
1322 if (fraglen > MCLBYTES)
1323 fraglen = MCLBYTES;
1324
1325 if (m_new == NULL) {
1326 m_new = m_getcl(how, MT_DATA, 0);
1327 if (m_new == NULL)
1328 goto nospace;
1329 }
1330
1331 m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t));
1332 progress += fraglen;
1333 m_new->m_len = fraglen;
1334 if (m_new != m_final)
1335 m_cat(m_final, m_new);
1336 m_new = NULL;
1337 }
1338 m_freem(m0);
1339 m0 = m_final;
1340 return (m0);
1341 nospace:
1342 if (m_final)
1343 m_freem(m_final);
1344 /* Return the original chain on failure */
1345 return (m0);
1346 }
1347
1348 #endif
1349
1350 struct mbuf *
1351 m_uiotombuf(struct uio *uio, int how, int len, int align)
1352 {
1353 struct mbuf *m_new = NULL, *m_final = NULL;
1354 int progress = 0, error = 0, length, total;
1355
1356 if (len > 0)
1357 total = min(uio->uio_resid, len);
1358 else
1359 total = uio->uio_resid;
1360 if (align >= MHLEN)
1361 goto nospace;
1362 if (total + align > MHLEN)
1363 m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1364 else
1365 m_final = m_gethdr(how, MT_DATA);
1366 if (m_final == NULL)
1367 goto nospace;
1368 m_final->m_data += align;
1369 m_new = m_final;
1370 while (progress < total) {
1371 length = total - progress;
1372 if (length > MCLBYTES)
1373 length = MCLBYTES;
1374 if (m_new == NULL) {
1375 if (length > MLEN)
1376 m_new = m_getcl(how, MT_DATA, 0);
1377 else
1378 m_new = m_get(how, MT_DATA);
1379 if (m_new == NULL)
1380 goto nospace;
1381 }
1382 error = uiomove(mtod(m_new, void *), length, uio);
1383 if (error)
1384 goto nospace;
1385 progress += length;
1386 m_new->m_len = length;
1387 if (m_new != m_final)
1388 m_cat(m_final, m_new);
1389 m_new = NULL;
1390 }
1391 m_fixhdr(m_final);
1392 return (m_final);
1393 nospace:
1394 if (m_new)
1395 m_free(m_new);
1396 if (m_final)
1397 m_freem(m_final);
1398 return (NULL);
1399 }
1400
1401 /*
1402 * Set the m_data pointer of a newly-allocated mbuf
1403 * to place an object of the specified size at the
1404 * end of the mbuf, longword aligned.
1405 */
1406 void
1407 m_align(struct mbuf *m, int len)
1408 {
1409 int adjust;
1410
1411 if (m->m_flags & M_EXT)
1412 adjust = m->m_ext.ext_size - len;
1413 else if (m->m_flags & M_PKTHDR)
1414 adjust = MHLEN - len;
1415 else
1416 adjust = MLEN - len;
1417 m->m_data += adjust &~ (sizeof(long)-1);
1418 }
1419
1420 /*
1421 * Create a writable copy of the mbuf chain. While doing this
1422 * we compact the chain with a goal of producing a chain with
1423 * at most two mbufs. The second mbuf in this chain is likely
1424 * to be a cluster. The primary purpose of this work is to create
1425 * a writable packet for encryption, compression, etc. The
1426 * secondary goal is to linearize the data so the data can be
1427 * passed to crypto hardware in the most efficient manner possible.
1428 */
1429 struct mbuf *
1430 m_unshare(struct mbuf *m0, int how)
1431 {
1432 struct mbuf *m, *mprev;
1433 struct mbuf *n, *mfirst, *mlast;
1434 int len, off;
1435
1436 mprev = NULL;
1437 for (m = m0; m != NULL; m = mprev->m_next) {
1438 /*
1439 * Regular mbufs are ignored unless there's a cluster
1440 * in front of it that we can use to coalesce. We do
1441 * the latter mainly so later clusters can be coalesced
1442 * also w/o having to handle them specially (i.e. convert
1443 * mbuf+cluster -> cluster). This optimization is heavily
1444 * influenced by the assumption that we're running over
1445 * Ethernet where MCLBYTES is large enough that the max
1446 * packet size will permit lots of coalescing into a
1447 * single cluster. This in turn permits efficient
1448 * crypto operations, especially when using hardware.
1449 */
1450 if ((m->m_flags & M_EXT) == 0) {
1451 if (mprev && (mprev->m_flags & M_EXT) &&
1452 m->m_len <= M_TRAILINGSPACE(mprev)) {
1453 /* XXX: this ignores mbuf types */
1454 memcpy(mtod(mprev, caddr_t) + mprev->m_len,
1455 mtod(m, caddr_t), m->m_len);
1456 mprev->m_len += m->m_len;
1457 mprev->m_next = m->m_next; /* unlink from chain */
1458 m_free(m); /* reclaim mbuf */
1459 #if 0
1460 newipsecstat.ips_mbcoalesced++;
1461 #endif
1462 } else {
1463 mprev = m;
1464 }
1465 continue;
1466 }
1467 /*
1468 * Writable mbufs are left alone (for now).
1469 */
1470 if (M_WRITABLE(m)) {
1471 mprev = m;
1472 continue;
1473 }
1474
1475 /*
1476 * Not writable, replace with a copy or coalesce with
1477 * the previous mbuf if possible (since we have to copy
1478 * it anyway, we try to reduce the number of mbufs and
1479 * clusters so that future work is easier).
1480 */
1481 KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
1482 /* NB: we only coalesce into a cluster or larger */
1483 if (mprev != NULL && (mprev->m_flags & M_EXT) &&
1484 m->m_len <= M_TRAILINGSPACE(mprev)) {
1485 /* XXX: this ignores mbuf types */
1486 memcpy(mtod(mprev, caddr_t) + mprev->m_len,
1487 mtod(m, caddr_t), m->m_len);
1488 mprev->m_len += m->m_len;
1489 mprev->m_next = m->m_next; /* unlink from chain */
1490 m_free(m); /* reclaim mbuf */
1491 #if 0
1492 newipsecstat.ips_clcoalesced++;
1493 #endif
1494 continue;
1495 }
1496
1497 /*
1498 * Allocate new space to hold the copy...
1499 */
1500 /* XXX why can M_PKTHDR be set past the first mbuf? */
1501 if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
1502 /*
1503 * NB: if a packet header is present we must
1504 * allocate the mbuf separately from any cluster
1505 * because M_MOVE_PKTHDR will smash the data
1506 * pointer and drop the M_EXT marker.
1507 */
1508 MGETHDR(n, how, m->m_type);
1509 if (n == NULL) {
1510 m_freem(m0);
1511 return (NULL);
1512 }
1513 M_MOVE_PKTHDR(n, m);
1514 MCLGET(n, how);
1515 if ((n->m_flags & M_EXT) == 0) {
1516 m_free(n);
1517 m_freem(m0);
1518 return (NULL);
1519 }
1520 } else {
1521 n = m_getcl(how, m->m_type, m->m_flags);
1522 if (n == NULL) {
1523 m_freem(m0);
1524 return (NULL);
1525 }
1526 }
1527 /*
1528 * ... and copy the data. We deal with jumbo mbufs
1529 * (i.e. m_len > MCLBYTES) by splitting them into
1530 * clusters. We could just malloc a buffer and make
1531 * it external but too many device drivers don't know
1532 * how to break up the non-contiguous memory when
1533 * doing DMA.
1534 */
1535 len = m->m_len;
1536 off = 0;
1537 mfirst = n;
1538 mlast = NULL;
1539 for (;;) {
1540 int cc = min(len, MCLBYTES);
1541 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
1542 n->m_len = cc;
1543 if (mlast != NULL)
1544 mlast->m_next = n;
1545 mlast = n;
1546 #if 0
1547 newipsecstat.ips_clcopied++;
1548 #endif
1549
1550 len -= cc;
1551 if (len <= 0)
1552 break;
1553 off += cc;
1554
1555 n = m_getcl(how, m->m_type, m->m_flags);
1556 if (n == NULL) {
1557 m_freem(mfirst);
1558 m_freem(m0);
1559 return (NULL);
1560 }
1561 }
1562 n->m_next = m->m_next;
1563 if (mprev == NULL)
1564 m0 = mfirst; /* new head of chain */
1565 else
1566 mprev->m_next = mfirst; /* replace old mbuf */
1567 m_free(m); /* release old mbuf */
1568 mprev = mfirst;
1569 }
1570 return (m0);
1571 }
Cache object: 79192ca8068338282fda4d338bd5c8e8
|