1 /*-
2 * Copyright (c) 2015
3 * Jonathan Looney. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29 #include <sys/queue.h>
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/sysctl.h>
35 #include <sys/systm.h>
36 #include <sys/mbuf.h>
37 #include <sys/eventhandler.h>
38 #include <machine/atomic.h>
39 #include <netinet/in.h>
40 #include <netinet/in_pcb.h>
41 #include <netinet/tcp_var.h>
42 #include <netinet/tcp_pcap.h>
43
44 #define M_LEADINGSPACE_NOWRITE(m) \
45 ((m)->m_data - M_START(m))
46
47 int tcp_pcap_aggressive_free = 1;
48 static int tcp_pcap_clusters_referenced_cur = 0;
49 static int tcp_pcap_clusters_referenced_max = 0;
50
51 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_aggressive_free,
52 CTLFLAG_RW, &tcp_pcap_aggressive_free, 0,
53 "Free saved packets when the memory system comes under pressure");
54 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_cur,
55 CTLFLAG_RD, &tcp_pcap_clusters_referenced_cur, 0,
56 "Number of clusters currently referenced on TCP PCAP queues");
57 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_max,
58 CTLFLAG_RW, &tcp_pcap_clusters_referenced_max, 0,
59 "Maximum number of clusters allowed to be referenced on TCP PCAP "
60 "queues");
61
62 static int tcp_pcap_alloc_reuse_ext = 0;
63 static int tcp_pcap_alloc_reuse_mbuf = 0;
64 static int tcp_pcap_alloc_new_mbuf = 0;
65 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_ext,
66 CTLFLAG_RD, &tcp_pcap_alloc_reuse_ext, 0,
67 "Number of mbufs with external storage reused for the TCP PCAP "
68 "functionality");
69 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_mbuf,
70 CTLFLAG_RD, &tcp_pcap_alloc_reuse_mbuf, 0,
71 "Number of mbufs with internal storage reused for the TCP PCAP "
72 "functionality");
73 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_new_mbuf,
74 CTLFLAG_RD, &tcp_pcap_alloc_new_mbuf, 0,
75 "Number of new mbufs allocated for the TCP PCAP functionality");
76
77 VNET_DEFINE(int, tcp_pcap_packets) = 0;
78 #define V_tcp_pcap_packets VNET(tcp_pcap_packets)
79 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_packets,
80 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_pcap_packets), 0,
81 "Default number of packets saved per direction per TCPCB");
82
83 /* Initialize the values. */
84 static void
85 tcp_pcap_max_set(void)
86 {
87
88 tcp_pcap_clusters_referenced_max = nmbclusters / 4;
89 }
90
91 void
92 tcp_pcap_init(void)
93 {
94
95 tcp_pcap_max_set();
96 EVENTHANDLER_REGISTER(nmbclusters_change, tcp_pcap_max_set,
97 NULL, EVENTHANDLER_PRI_ANY);
98 }
99
100 /*
101 * If we are below the maximum allowed cluster references,
102 * increment the reference count and return TRUE. Otherwise,
103 * leave the reference count alone and return FALSE.
104 */
105 static __inline bool
106 tcp_pcap_take_cluster_reference(void)
107 {
108 if (atomic_fetchadd_int(&tcp_pcap_clusters_referenced_cur, 1) >=
109 tcp_pcap_clusters_referenced_max) {
110 atomic_add_int(&tcp_pcap_clusters_referenced_cur, -1);
111 return FALSE;
112 }
113 return TRUE;
114 }
115
116 /*
117 * For all the external entries in m, apply the given adjustment.
118 * This can be used to adjust the counter when an mbuf chain is
119 * copied or freed.
120 */
121 static __inline void
122 tcp_pcap_adj_cluster_reference(struct mbuf *m, int adj)
123 {
124 while (m) {
125 if (m->m_flags & M_EXT)
126 atomic_add_int(&tcp_pcap_clusters_referenced_cur, adj);
127
128 m = m->m_next;
129 }
130 }
131
132 /*
133 * Free all mbufs in a chain, decrementing the reference count as
134 * necessary.
135 *
136 * Functions in this file should use this instead of m_freem() when
137 * they are freeing mbuf chains that may contain clusters that were
138 * already included in tcp_pcap_clusters_referenced_cur.
139 */
140 static void
141 tcp_pcap_m_freem(struct mbuf *mb)
142 {
143 while (mb != NULL) {
144 if (mb->m_flags & M_EXT)
145 atomic_subtract_int(&tcp_pcap_clusters_referenced_cur,
146 1);
147 mb = m_free(mb);
148 }
149 }
150
151 /*
152 * Copy data from m to n, where n cannot fit all the data we might
153 * want from m.
154 *
155 * Prioritize data like this:
156 * 1. TCP header
157 * 2. IP header
158 * 3. Data
159 */
160 static void
161 tcp_pcap_copy_bestfit(struct tcphdr *th, struct mbuf *m, struct mbuf *n)
162 {
163 struct mbuf *m_cur = m;
164 int bytes_to_copy=0, trailing_data, skip=0, tcp_off;
165
166 /* Below, we assume these will be non-NULL. */
167 KASSERT(th, ("%s: called with th == NULL", __func__));
168 KASSERT(m, ("%s: called with m == NULL", __func__));
169 KASSERT(n, ("%s: called with n == NULL", __func__));
170
171 /* We assume this initialization occurred elsewhere. */
172 KASSERT(n->m_len == 0, ("%s: called with n->m_len=%d (expected 0)",
173 __func__, n->m_len));
174 KASSERT(n->m_data == M_START(n),
175 ("%s: called with n->m_data != M_START(n)", __func__));
176
177 /*
178 * Calculate the size of the TCP header. We use this often
179 * enough that it is worth just calculating at the start.
180 */
181 tcp_off = th->th_off << 2;
182
183 /* Trim off leading empty mbufs. */
184 while (m && m->m_len == 0)
185 m = m->m_next;
186
187 if (m) {
188 m_cur = m;
189 }
190 else {
191 /*
192 * No data? Highly unusual. We would expect to at
193 * least see a TCP header in the mbuf.
194 * As we have a pointer to the TCP header, I guess
195 * we should just copy that. (???)
196 */
197 fallback:
198 bytes_to_copy = tcp_off;
199 if (bytes_to_copy > M_SIZE(n))
200 bytes_to_copy = M_SIZE(n);
201 bcopy(th, n->m_data, bytes_to_copy);
202 n->m_len = bytes_to_copy;
203 return;
204 }
205
206 /*
207 * Find TCP header. Record the total number of bytes up to,
208 * and including, the TCP header.
209 */
210 while (m_cur) {
211 if ((caddr_t) th >= (caddr_t) m_cur->m_data &&
212 (caddr_t) th < (caddr_t) (m_cur->m_data + m_cur->m_len))
213 break;
214 bytes_to_copy += m_cur->m_len;
215 m_cur = m_cur->m_next;
216 }
217 if (m_cur)
218 bytes_to_copy += (caddr_t) th - (caddr_t) m_cur->m_data;
219 else
220 goto fallback;
221 bytes_to_copy += tcp_off;
222
223 /*
224 * If we already want to copy more bytes than we can hold
225 * in the destination mbuf, skip leading bytes and copy
226 * what we can.
227 *
228 * Otherwise, consider trailing data.
229 */
230 if (bytes_to_copy > M_SIZE(n)) {
231 skip = bytes_to_copy - M_SIZE(n);
232 bytes_to_copy = M_SIZE(n);
233 }
234 else {
235 /*
236 * Determine how much trailing data is in the chain.
237 * We start with the length of this mbuf (the one
238 * containing th) and subtract the size of the TCP
239 * header (tcp_off) and the size of the data prior
240 * to th (th - m_cur->m_data).
241 *
242 * This *should not* be negative, as the TCP code
243 * should put the whole TCP header in a single
244 * mbuf. But, it isn't a problem if it is. We will
245 * simple work off our negative balance as we look
246 * at subsequent mbufs.
247 */
248 trailing_data = m_cur->m_len - tcp_off;
249 trailing_data -= (caddr_t) th - (caddr_t) m_cur->m_data;
250 m_cur = m_cur->m_next;
251 while (m_cur) {
252 trailing_data += m_cur->m_len;
253 m_cur = m_cur->m_next;
254 }
255 if ((bytes_to_copy + trailing_data) > M_SIZE(n))
256 bytes_to_copy = M_SIZE(n);
257 else
258 bytes_to_copy += trailing_data;
259 }
260
261 m_copydata(m, skip, bytes_to_copy, n->m_data);
262 n->m_len = bytes_to_copy;
263 }
264
265 void
266 tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue)
267 {
268 struct mbuf *n = NULL, *mhead;
269
270 KASSERT(th, ("%s: called with th == NULL", __func__));
271 KASSERT(m, ("%s: called with m == NULL", __func__));
272 KASSERT(queue, ("%s: called with queue == NULL", __func__));
273
274 /* We only care about data packets. */
275 while (m && m->m_type != MT_DATA)
276 m = m->m_next;
277
278 /* We only need to do something if we still have an mbuf. */
279 if (!m)
280 return;
281
282 /* If we are not saving mbufs, return now. */
283 if (queue->mq_maxlen == 0)
284 return;
285
286 /*
287 * Check to see if we will need to recycle mbufs.
288 *
289 * If we need to get rid of mbufs to stay below
290 * our packet count, try to reuse the mbuf. Once
291 * we already have a new mbuf (n), then we can
292 * simply free subsequent mbufs.
293 *
294 * Note that most of the logic in here is to deal
295 * with the reuse. If we are fine with constant
296 * mbuf allocs/deallocs, we could ditch this logic.
297 * But, it only seems to make sense to reuse
298 * mbufs we already have.
299 */
300 while (mbufq_full(queue)) {
301 mhead = mbufq_dequeue(queue);
302
303 if (n) {
304 tcp_pcap_m_freem(mhead);
305 }
306 else {
307 /*
308 * If this held an external cluster, try to
309 * detach the cluster. But, if we held the
310 * last reference, go through the normal
311 * free-ing process.
312 */
313 if (mhead->m_flags & M_EXTPG) {
314 /* Don't mess around with these. */
315 tcp_pcap_m_freem(mhead);
316 continue;
317 } else if (mhead->m_flags & M_EXT) {
318 switch (mhead->m_ext.ext_type) {
319 case EXT_SFBUF:
320 /* Don't mess around with these. */
321 tcp_pcap_m_freem(mhead);
322 continue;
323 default:
324 if (atomic_fetchadd_int(
325 mhead->m_ext.ext_cnt, -1) == 1)
326 {
327 /*
328 * We held the last reference
329 * on this cluster. Restore
330 * the reference count and put
331 * it back in the pool.
332 */
333 *(mhead->m_ext.ext_cnt) = 1;
334 tcp_pcap_m_freem(mhead);
335 continue;
336 }
337 /*
338 * We were able to cleanly free the
339 * reference.
340 */
341 atomic_subtract_int(
342 &tcp_pcap_clusters_referenced_cur,
343 1);
344 tcp_pcap_alloc_reuse_ext++;
345 break;
346 }
347 } else {
348 tcp_pcap_alloc_reuse_mbuf++;
349 }
350
351 n = mhead;
352 tcp_pcap_m_freem(n->m_next);
353 m_init(n, M_NOWAIT, MT_DATA, 0);
354 }
355 }
356
357 /* Check to see if we need to get a new mbuf. */
358 if (!n) {
359 if (!(n = m_get(M_NOWAIT, MT_DATA)))
360 return;
361 tcp_pcap_alloc_new_mbuf++;
362 }
363
364 /*
365 * What are we dealing with? If a cluster, attach it. Otherwise,
366 * try to copy the data from the beginning of the mbuf to the
367 * end of data. (There may be data between the start of the data
368 * area and the current data pointer. We want to get this, because
369 * it may contain header information that is useful.)
370 * In cases where that isn't possible, settle for what we can
371 * get.
372 */
373 if ((m->m_flags & (M_EXT|M_EXTPG)) &&
374 tcp_pcap_take_cluster_reference()) {
375 n->m_data = m->m_data;
376 n->m_len = m->m_len;
377 mb_dupcl(n, m);
378 }
379 else if (((m->m_data + m->m_len) - M_START(m)) <= M_SIZE(n)) {
380 /*
381 * At this point, n is guaranteed to be a normal mbuf
382 * with no cluster and no packet header. Because the
383 * logic in this code block requires this, the assert
384 * is here to catch any instances where someone
385 * changes the logic to invalidate that assumption.
386 */
387 KASSERT((n->m_flags & (M_EXT | M_PKTHDR)) == 0,
388 ("%s: Unexpected flags (%#x) for mbuf",
389 __func__, n->m_flags));
390 n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m);
391 n->m_len = m->m_len;
392 if (m->m_flags & M_EXTPG)
393 m_copydata(m, 0, m->m_len, n->m_data);
394 else
395 bcopy(M_START(m), n->m_dat,
396 m->m_len + M_LEADINGSPACE_NOWRITE(m));
397 }
398 else {
399 /*
400 * This is the case where we need to "settle for what
401 * we can get". The most probable way to this code
402 * path is that we've already taken references to the
403 * maximum number of mbuf clusters we can, and the data
404 * is too long to fit in an mbuf's internal storage.
405 * Try for a "best fit".
406 */
407 tcp_pcap_copy_bestfit(th, m, n);
408
409 /* Don't try to get additional data. */
410 goto add_to_queue;
411 }
412
413 if (m->m_next) {
414 n->m_next = m_copym(m->m_next, 0, M_COPYALL, M_NOWAIT);
415 tcp_pcap_adj_cluster_reference(n->m_next, 1);
416 }
417
418 add_to_queue:
419 /* Add the new mbuf to the list. */
420 if (mbufq_enqueue(queue, n)) {
421 /* This shouldn't happen. If INVARIANTS is defined, panic. */
422 KASSERT(0, ("%s: mbufq was unexpectedly full!", __func__));
423 tcp_pcap_m_freem(n);
424 }
425 }
426
427 void
428 tcp_pcap_drain(struct mbufq *queue)
429 {
430 struct mbuf *m;
431 while ((m = mbufq_dequeue(queue)))
432 tcp_pcap_m_freem(m);
433 }
434
435 void
436 tcp_pcap_tcpcb_init(struct tcpcb *tp)
437 {
438 mbufq_init(&(tp->t_inpkts), V_tcp_pcap_packets);
439 mbufq_init(&(tp->t_outpkts), V_tcp_pcap_packets);
440 }
441
442 void
443 tcp_pcap_set_sock_max(struct mbufq *queue, int newval)
444 {
445 queue->mq_maxlen = newval;
446 while (queue->mq_len > queue->mq_maxlen)
447 tcp_pcap_m_freem(mbufq_dequeue(queue));
448 }
449
450 int
451 tcp_pcap_get_sock_max(struct mbufq *queue)
452 {
453 return queue->mq_maxlen;
454 }
Cache object: 7d977ab5e964e05c88a46d18713e6bcf
|