FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_mbuf.c
1 /*-
2 * Copyright (c) 2004, 2005,
3 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD: releng/6.0/sys/kern/kern_mbuf.c 148370 2005-07-25 00:08:12Z rwatson $");
30
31 #include "opt_mac.h"
32 #include "opt_param.h"
33
34 #include <sys/param.h>
35 #include <sys/mac.h>
36 #include <sys/malloc.h>
37 #include <sys/systm.h>
38 #include <sys/mbuf.h>
39 #include <sys/domain.h>
40 #include <sys/eventhandler.h>
41 #include <sys/kernel.h>
42 #include <sys/protosw.h>
43 #include <sys/smp.h>
44 #include <sys/sysctl.h>
45
46 #include <vm/vm.h>
47 #include <vm/vm_page.h>
48 #include <vm/uma.h>
49 #include <vm/uma_int.h>
50 #include <vm/uma_dbg.h>
51
52 /*
53 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
54 * Zones.
55 *
56 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
57 * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the
58 * administrator so desires.
59 *
60 * Mbufs are allocated from a UMA Master Zone called the Mbuf
61 * Zone.
62 *
63 * Additionally, FreeBSD provides a Packet Zone, which it
64 * configures as a Secondary Zone to the Mbuf Master Zone,
65 * thus sharing backend Slab kegs with the Mbuf Master Zone.
66 *
67 * Thus common-case allocations and locking are simplified:
68 *
69 * m_clget() m_getcl()
70 * | |
71 * | .------------>[(Packet Cache)] m_get(), m_gethdr()
72 * | | [ Packet ] |
73 * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ]
74 * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ]
75 * | \________ |
76 * [ Cluster Keg ] \ /
77 * | [ Mbuf Keg ]
78 * [ Cluster Slabs ] |
79 * | [ Mbuf Slabs ]
80 * \____________(VM)_________________/
81 */
82
83 int nmbclusters;
84 struct mbstat mbstat;
85
86 static void
87 tunable_mbinit(void *dummy)
88 {
89
90 /* This has to be done before VM init. */
91 nmbclusters = 1024 + maxusers * 64;
92 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
93 }
94 SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
95
96 SYSCTL_DECL(_kern_ipc);
97 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RW, &nmbclusters, 0,
98 "Maximum number of mbuf clusters allowed");
99 SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
100 "Mbuf general information and statistics");
101
102 /*
103 * Zones from which we allocate.
104 */
105 uma_zone_t zone_mbuf;
106 uma_zone_t zone_clust;
107 uma_zone_t zone_pack;
108
109 /*
110 * Local prototypes.
111 */
112 static int mb_ctor_mbuf(void *, int, void *, int);
113 static int mb_ctor_clust(void *, int, void *, int);
114 static int mb_ctor_pack(void *, int, void *, int);
115 static void mb_dtor_mbuf(void *, int, void *);
116 static void mb_dtor_clust(void *, int, void *); /* XXX */
117 static void mb_dtor_pack(void *, int, void *); /* XXX */
118 static int mb_init_pack(void *, int, int);
119 static void mb_fini_pack(void *, int);
120
121 static void mb_reclaim(void *);
122 static void mbuf_init(void *);
123
124 /* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */
125 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
126
127 /*
128 * Initialize FreeBSD Network buffer allocation.
129 */
130 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL)
131 static void
132 mbuf_init(void *dummy)
133 {
134
135 /*
136 * Configure UMA zones for Mbufs, Clusters, and Packets.
137 */
138 zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, mb_ctor_mbuf,
139 mb_dtor_mbuf,
140 #ifdef INVARIANTS
141 trash_init, trash_fini, MSIZE - 1, UMA_ZONE_MAXBUCKET);
142 #else
143 NULL, NULL, MSIZE - 1, UMA_ZONE_MAXBUCKET);
144 #endif
145 zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
146 mb_ctor_clust,
147 #ifdef INVARIANTS
148 mb_dtor_clust, trash_init, trash_fini, UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
149 #else
150 mb_dtor_clust, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
151 #endif
152 if (nmbclusters > 0)
153 uma_zone_set_max(zone_clust, nmbclusters);
154 zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
155 mb_dtor_pack, mb_init_pack, mb_fini_pack, zone_mbuf);
156
157 /* uma_prealloc() goes here */
158
159 /*
160 * Hook event handler for low-memory situation, used to
161 * drain protocols and push data back to the caches (UMA
162 * later pushes it back to VM).
163 */
164 EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
165 EVENTHANDLER_PRI_FIRST);
166
167 /*
168 * [Re]set counters and local statistics knobs.
169 * XXX Some of these should go and be replaced, but UMA stat
170 * gathering needs to be revised.
171 */
172 mbstat.m_mbufs = 0;
173 mbstat.m_mclusts = 0;
174 mbstat.m_drain = 0;
175 mbstat.m_msize = MSIZE;
176 mbstat.m_mclbytes = MCLBYTES;
177 mbstat.m_minclsize = MINCLSIZE;
178 mbstat.m_mlen = MLEN;
179 mbstat.m_mhlen = MHLEN;
180 mbstat.m_numtypes = MT_NTYPES;
181
182 mbstat.m_mcfail = mbstat.m_mpfail = 0;
183 mbstat.sf_iocnt = 0;
184 mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
185 }
186
187 /*
188 * Constructor for Mbuf master zone.
189 *
190 * The 'arg' pointer points to a mb_args structure which
191 * contains call-specific information required to support the
192 * mbuf allocation API.
193 */
194 static int
195 mb_ctor_mbuf(void *mem, int size, void *arg, int how)
196 {
197 struct mbuf *m;
198 struct mb_args *args;
199 #ifdef MAC
200 int error;
201 #endif
202 int flags;
203 short type;
204
205 #ifdef INVARIANTS
206 trash_ctor(mem, size, arg, how);
207 #endif
208 m = (struct mbuf *)mem;
209 args = (struct mb_args *)arg;
210 flags = args->flags;
211 type = args->type;
212
213 m->m_type = type;
214 m->m_next = NULL;
215 m->m_nextpkt = NULL;
216 m->m_flags = flags;
217 if (flags & M_PKTHDR) {
218 m->m_data = m->m_pktdat;
219 m->m_pkthdr.rcvif = NULL;
220 m->m_pkthdr.csum_flags = 0;
221 SLIST_INIT(&m->m_pkthdr.tags);
222 #ifdef MAC
223 /* If the label init fails, fail the alloc */
224 error = mac_init_mbuf(m, how);
225 if (error)
226 return (error);
227 #endif
228 } else
229 m->m_data = m->m_dat;
230 mbstat.m_mbufs += 1; /* XXX */
231 return (0);
232 }
233
234 /*
235 * The Mbuf master zone and Packet secondary zone destructor.
236 */
237 static void
238 mb_dtor_mbuf(void *mem, int size, void *arg)
239 {
240 struct mbuf *m;
241
242 m = (struct mbuf *)mem;
243 if ((m->m_flags & M_PKTHDR) != 0)
244 m_tag_delete_chain(m, NULL);
245 #ifdef INVARIANTS
246 trash_dtor(mem, size, arg);
247 #endif
248 mbstat.m_mbufs -= 1; /* XXX */
249 }
250
251 /* XXX Only because of stats */
252 static void
253 mb_dtor_pack(void *mem, int size, void *arg)
254 {
255 struct mbuf *m;
256
257 m = (struct mbuf *)mem;
258 if ((m->m_flags & M_PKTHDR) != 0)
259 m_tag_delete_chain(m, NULL);
260 #ifdef INVARIANTS
261 trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
262 #endif
263 mbstat.m_mbufs -= 1; /* XXX */
264 mbstat.m_mclusts -= 1; /* XXX */
265 }
266
267 /*
268 * The Cluster zone constructor.
269 *
270 * Here the 'arg' pointer points to the Mbuf which we
271 * are configuring cluster storage for.
272 */
273 static int
274 mb_ctor_clust(void *mem, int size, void *arg, int how)
275 {
276 struct mbuf *m;
277
278 #ifdef INVARIANTS
279 trash_ctor(mem, size, arg, how);
280 #endif
281 m = (struct mbuf *)arg;
282 m->m_ext.ext_buf = (caddr_t)mem;
283 m->m_data = m->m_ext.ext_buf;
284 m->m_flags |= M_EXT;
285 m->m_ext.ext_free = NULL;
286 m->m_ext.ext_args = NULL;
287 m->m_ext.ext_size = MCLBYTES;
288 m->m_ext.ext_type = EXT_CLUSTER;
289 m->m_ext.ref_cnt = NULL; /* Lazy counter assign. */
290 mbstat.m_mclusts += 1; /* XXX */
291 return (0);
292 }
293
294 /* XXX */
295 static void
296 mb_dtor_clust(void *mem, int size, void *arg)
297 {
298 #ifdef INVARIANTS
299 trash_dtor(mem, size, arg);
300 #endif
301 mbstat.m_mclusts -= 1; /* XXX */
302 }
303
304 /*
305 * The Packet secondary zone's init routine, executed on the
306 * object's transition from keg slab to zone cache.
307 */
308 static int
309 mb_init_pack(void *mem, int size, int how)
310 {
311 struct mbuf *m;
312
313 m = (struct mbuf *)mem;
314 m->m_ext.ext_buf = NULL;
315 uma_zalloc_arg(zone_clust, m, how);
316 if (m->m_ext.ext_buf == NULL)
317 return (ENOMEM);
318 #ifdef INVARIANTS
319 trash_init(m->m_ext.ext_buf, MCLBYTES, how);
320 #endif
321 mbstat.m_mclusts -= 1; /* XXX */
322 return (0);
323 }
324
325 /*
326 * The Packet secondary zone's fini routine, executed on the
327 * object's transition from zone cache to keg slab.
328 */
329 static void
330 mb_fini_pack(void *mem, int size)
331 {
332 struct mbuf *m;
333
334 m = (struct mbuf *)mem;
335 #ifdef INVARIANTS
336 trash_fini(m->m_ext.ext_buf, MCLBYTES);
337 #endif
338 uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
339 m->m_ext.ext_buf = NULL;
340 mbstat.m_mclusts += 1; /* XXX */
341 #ifdef INVARIANTS
342 trash_dtor(mem, size, NULL);
343 #endif
344 }
345
346 /*
347 * The "packet" keg constructor.
348 */
349 static int
350 mb_ctor_pack(void *mem, int size, void *arg, int how)
351 {
352 struct mbuf *m;
353 struct mb_args *args;
354 #ifdef MAC
355 int error;
356 #endif
357 int flags;
358 short type;
359
360 m = (struct mbuf *)mem;
361 args = (struct mb_args *)arg;
362 flags = args->flags;
363 type = args->type;
364
365 #ifdef INVARIANTS
366 trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
367 #endif
368 m->m_type = type;
369 m->m_next = NULL;
370 m->m_nextpkt = NULL;
371 m->m_data = m->m_ext.ext_buf;
372 m->m_flags = flags|M_EXT;
373 m->m_ext.ext_free = NULL;
374 m->m_ext.ext_args = NULL;
375 m->m_ext.ext_size = MCLBYTES;
376 m->m_ext.ext_type = EXT_PACKET;
377 m->m_ext.ref_cnt = NULL; /* Lazy counter assign. */
378
379 if (flags & M_PKTHDR) {
380 m->m_pkthdr.rcvif = NULL;
381 m->m_pkthdr.csum_flags = 0;
382 SLIST_INIT(&m->m_pkthdr.tags);
383 #ifdef MAC
384 /* If the label init fails, fail the alloc */
385 error = mac_init_mbuf(m, how);
386 if (error)
387 return (error);
388 #endif
389 }
390 mbstat.m_mbufs += 1; /* XXX */
391 mbstat.m_mclusts += 1; /* XXX */
392 return (0);
393 }
394
395 /*
396 * This is the protocol drain routine.
397 *
398 * No locks should be held when this is called. The drain routines have to
399 * presently acquire some locks which raises the possibility of lock order
400 * reversal.
401 */
402 static void
403 mb_reclaim(void *junk)
404 {
405 struct domain *dp;
406 struct protosw *pr;
407
408 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
409 "mb_reclaim()");
410
411 mbstat.m_drain++;
412 for (dp = domains; dp != NULL; dp = dp->dom_next)
413 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
414 if (pr->pr_drain != NULL)
415 (*pr->pr_drain)();
416 }
Cache object: 6d99edfb0458c98551ffc85aae45a4f1
|