1 /**************************************************************************
2 *
3 * Copyright (c) 2007, Kip Macy kmacy@freebsd.org
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 *
12 * 2. The name of Kip Macy nor the names of other
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 *
28 *
29 ***************************************************************************/
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD: releng/6.4/sys/dev/cxgb/sys/uipc_mvec.c 170900 2007-06-17 23:52:17Z kmacy $");
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/ktr.h>
41 #include <sys/sf_buf.h>
42
43 #include <machine/bus.h>
44 #ifdef CONFIG_DEFINED
45 #include <cxgb_include.h>
46 #else
47 #include <dev/cxgb/cxgb_include.h>
48 #endif
49
50 #include <vm/vm.h>
51 #include <vm/vm_page.h>
52 #include <vm/pmap.h>
53
54 #define MAX_BUFS 36
55 #define MAX_HVEC 8
56
57 extern uint32_t collapse_free;
58 extern uint32_t mb_free_vec_free;
59
60 struct mbuf_ext {
61 struct mbuf *me_m;
62 caddr_t me_base;
63 volatile u_int *me_refcnt;
64 int me_flags;
65 uint32_t me_offset;
66 };
67
68 int
69 _m_explode(struct mbuf *m)
70 {
71 int i, offset, type, first, len;
72 caddr_t cl;
73 struct mbuf *m0, *head = NULL;
74 struct mbuf_vec *mv;
75
76 #ifdef INVARIANTS
77 len = m->m_len;
78 m0 = m->m_next;
79 while (m0) {
80 KASSERT((m0->m_flags & M_PKTHDR) == 0,
81 ("pkthdr set on intermediate mbuf - pre"));
82 len += m0->m_len;
83 m0 = m0->m_next;
84
85 }
86 if (len != m->m_pkthdr.len)
87 panic("at start len=%d pktlen=%d", len, m->m_pkthdr.len);
88 #endif
89 mv = mtomv(m);
90 first = mv->mv_first;
91 for (i = mv->mv_count + first - 1; i > first; i--) {
92 type = mbuf_vec_get_type(mv, i);
93 cl = mv->mv_vec[i].mi_base;
94 offset = mv->mv_vec[i].mi_offset;
95 len = mv->mv_vec[i].mi_len;
96 if (__predict_false(type == EXT_MBUF)) {
97 m0 = (struct mbuf *)cl;
98 KASSERT((m0->m_flags & M_EXT) == 0, ("M_EXT set on mbuf"));
99 m0->m_len = len;
100 m0->m_data = cl + offset;
101 goto skip_cluster;
102
103 } else if ((m0 = m_get(M_NOWAIT, MT_DATA)) == NULL) {
104 /*
105 * Check for extra memory leaks
106 */
107 m_freem(head);
108 return (ENOMEM);
109 }
110 m0->m_flags = 0;
111
112 m_cljset(m0, (uint8_t *)cl, type);
113 m0->m_len = mv->mv_vec[i].mi_len;
114 if (offset)
115 m_adj(m0, offset);
116 skip_cluster:
117 m0->m_next = head;
118 m->m_len -= m0->m_len;
119 head = m0;
120 }
121 offset = mv->mv_vec[first].mi_offset;
122 cl = mv->mv_vec[first].mi_base;
123 type = mbuf_vec_get_type(mv, first);
124 m->m_flags &= ~(M_IOVEC);
125 m_cljset(m, cl, type);
126 if (offset)
127 m_adj(m, offset);
128 m->m_next = head;
129 head = m;
130 M_SANITY(m, 0);
131
132 return (0);
133 }
134
135 static __inline int
136 m_vectorize(struct mbuf *m, int max, struct mbuf **vec, int *count)
137 {
138 int i, error = 0;
139
140 for (i = 0; i < max; i++) {
141 if (m == NULL)
142 break;
143 #ifndef MBUF_PACKET_ZONE_DISABLE
144 if ((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_PACKET))
145 return (EINVAL);
146 #endif
147 #ifdef ZERO_COPY_SOCKETS
148 if ((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_SFBUF))
149 return (EINVAL);
150 #endif
151 M_SANITY(m, 0);
152 vec[i] = m;
153 m = m->m_next;
154 }
155 if (m)
156 error = EFBIG;
157
158 *count = i;
159
160 return (error);
161 }
162
163 static __inline int
164 m_findmbufs(struct mbuf **ivec, int maxbufs, struct mbuf_ext *ovec, int osize, int *ocount)
165 {
166 int i, j, nhbufsneed, nhbufs;
167 struct mbuf *m;
168
169 nhbufsneed = min(((maxbufs - 1)/MAX_MBUF_IOV) + 1, osize);
170 ovec[0].me_m = NULL;
171
172 for (nhbufs = j = i = 0; i < maxbufs && nhbufs < nhbufsneed; i++) {
173 if ((ivec[i]->m_flags & M_EXT) == 0)
174 continue;
175 m = ivec[i];
176 ovec[nhbufs].me_m = m;
177 ovec[nhbufs].me_base = m->m_ext.ext_buf;
178 ovec[nhbufs].me_refcnt = m->m_ext.ref_cnt;
179 ovec[nhbufs].me_offset = (m->m_data - m->m_ext.ext_buf);
180 ovec[nhbufs].me_flags = m->m_ext.ext_type;
181 nhbufs++;
182 }
183 if (nhbufs == 0) {
184 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
185 goto m_getfail;
186 ovec[nhbufs].me_m = m;
187 nhbufs = 1;
188 }
189 while (nhbufs < nhbufsneed) {
190 if ((m = m_get(M_NOWAIT, MT_DATA)) == NULL)
191 goto m_getfail;
192 ovec[nhbufs].me_m = m;
193 nhbufs++;
194 }
195 /*
196 * Copy over packet header to new head of chain
197 */
198 if (ovec[0].me_m != ivec[0]) {
199 ovec[0].me_m->m_flags |= M_PKTHDR;
200 memcpy(&ovec[0].me_m->m_pkthdr, &ivec[0]->m_pkthdr, sizeof(struct pkthdr));
201 SLIST_INIT(&ivec[0]->m_pkthdr.tags);
202 }
203 *ocount = nhbufs;
204 return (0);
205 m_getfail:
206 for (i = 0; i < nhbufs; i++)
207 if ((ovec[i].me_m->m_flags & M_EXT) == 0)
208 uma_zfree(zone_mbuf, ovec[i].me_m);
209 return (ENOMEM);
210
211 }
212
213 static __inline void
214 m_setiovec(struct mbuf_iovec *mi, struct mbuf *m, struct mbuf_ext *extvec, int *me_index,
215 int max_me_index)
216 {
217 int idx = *me_index;
218
219 mi->mi_len = m->m_len;
220 if (idx < max_me_index && extvec[idx].me_m == m) {
221 struct mbuf_ext *me = &extvec[idx];
222 (*me_index)++;
223 mi->mi_base = me->me_base;
224 mi->mi_refcnt = me->me_refcnt;
225 mi->mi_offset = me->me_offset;
226 mi->mi_flags = me->me_flags;
227 } else if (m->m_flags & M_EXT) {
228 mi->mi_base = m->m_ext.ext_buf;
229 mi->mi_refcnt = m->m_ext.ref_cnt;
230 mi->mi_offset =
231 (m->m_data - m->m_ext.ext_buf);
232 mi->mi_flags = m->m_ext.ext_type;
233 } else {
234 KASSERT(m->m_len < 256, ("mbuf too large len=%d",
235 m->m_len));
236 mi->mi_base = (caddr_t)m;
237 mi->mi_refcnt = NULL;
238 mi->mi_offset =
239 (m->m_data - (caddr_t)m);
240 mi->mi_flags = EXT_MBUF;
241 }
242 DPRINTF("type=%d len=%d refcnt=%p cl=%p offset=0x%x\n",
243 mi->mi_flags, mi->mi_len, mi->mi_refcnt, mi->mi_base,
244 mi->mi_offset);
245 }
246
247 int
248 _m_collapse(struct mbuf *m, int maxbufs, struct mbuf **mnew)
249 {
250 struct mbuf *m0, *lmvec[MAX_BUFS];
251 struct mbuf **mnext;
252 struct mbuf **vec = lmvec;
253 struct mbuf *mhead = NULL;
254 struct mbuf_vec *mv;
255 int err, i, j, max, len, nhbufs;
256 struct mbuf_ext dvec[MAX_HVEC];
257 int hidx = 0, dvecidx;
258
259 M_SANITY(m, 0);
260 if (maxbufs > MAX_BUFS) {
261 if ((vec = malloc(maxbufs * sizeof(struct mbuf *),
262 M_DEVBUF, M_NOWAIT)) == NULL)
263 return (ENOMEM);
264 }
265
266 if ((err = m_vectorize(m, maxbufs, vec, &max)) != 0)
267 goto out;
268 if ((err = m_findmbufs(vec, max, dvec, MAX_HVEC, &nhbufs)) != 0)
269 goto out;
270
271 KASSERT(max > 0, ("invalid mbuf count"));
272 KASSERT(nhbufs > 0, ("invalid header mbuf count"));
273
274 mhead = m0 = dvec[0].me_m;
275
276 DPRINTF("nbufs=%d nhbufs=%d\n", max, nhbufs);
277 for (hidx = dvecidx = i = 0, mnext = NULL; i < max; hidx++) {
278 m0 = dvec[hidx].me_m;
279 m0->m_flags &= ~M_EXT;
280 m0->m_flags |= M_IOVEC;
281
282 if (mnext)
283 *mnext = m0;
284
285 mv = mtomv(m0);
286 len = mv->mv_first = 0;
287 for (j = 0; j < MAX_MBUF_IOV && i < max; j++, i++) {
288 struct mbuf_iovec *mi = &mv->mv_vec[j];
289
290 m_setiovec(mi, vec[i], dvec, &dvecidx, nhbufs);
291 len += mi->mi_len;
292 }
293 m0->m_data = mv->mv_vec[0].mi_base + mv->mv_vec[0].mi_offset;
294 mv->mv_count = j;
295 m0->m_len = len;
296 mnext = &m0->m_next;
297 DPRINTF("count=%d len=%d\n", j, len);
298 }
299
300 /*
301 * Terminate chain
302 */
303 m0->m_next = NULL;
304
305 /*
306 * Free all mbufs not used by the mbuf iovec chain
307 */
308 for (i = 0; i < max; i++)
309 if (vec[i]->m_flags & M_EXT) {
310 vec[i]->m_flags &= ~M_EXT;
311 collapse_free++;
312 uma_zfree(zone_mbuf, vec[i]);
313 }
314
315 *mnew = mhead;
316 out:
317 if (vec != lmvec)
318 free(vec, M_DEVBUF);
319 return (err);
320 }
321
322 void
323 mb_free_vec(struct mbuf *m)
324 {
325 struct mbuf_vec *mv;
326 int i;
327
328 KASSERT((m->m_flags & (M_EXT|M_IOVEC)) == M_IOVEC,
329 ("%s: M_EXT set", __func__));
330
331 mv = mtomv(m);
332 KASSERT(mv->mv_count <= MAX_MBUF_IOV,
333 ("%s: mi_count too large %d", __func__, mv->mv_count));
334
335 DPRINTF("count=%d len=%d\n", mv->mv_count, m->m_len);
336 for (i = mv->mv_first; i < mv->mv_count; i++) {
337 uma_zone_t zone = NULL;
338 volatile unsigned int *refcnt = mv->mv_vec[i].mi_refcnt;
339 int type = mbuf_vec_get_type(mv, i);
340 void *cl = mv->mv_vec[i].mi_base;
341
342 if ((type != EXT_MBUF) && *refcnt != 1 &&
343 atomic_fetchadd_int(refcnt, -1) != 1)
344 continue;
345
346 DPRINTF("freeing idx=%d refcnt=%p type=%d cl=%p\n", i, refcnt, type, cl);
347 switch (type) {
348 case EXT_MBUF:
349 mb_free_vec_free++;
350 case EXT_CLUSTER:
351 case EXT_JUMBOP:
352 case EXT_JUMBO9:
353 case EXT_JUMBO16:
354 zone = m_getzonefromtype(type);
355 uma_zfree(zone, cl);
356 continue;
357 case EXT_SFBUF:
358 *refcnt = 0;
359 #ifdef notyet
360 /* XXX M_MBUF is a static */
361 free(__DEVOLATILE(u_int *,
362 refcnt), M_MBUF);
363 #endif
364 #ifdef __i386__
365 sf_buf_mext(cl, mv->mv_vec[i].mi_args);
366 #else
367 /*
368 * Every architecture other than i386 uses a vm_page
369 * for an sf_buf (well ... sparc64 does but shouldn't)
370 */
371 sf_buf_mext(cl, PHYS_TO_VM_PAGE(vtophys(cl)));
372 #endif
373 continue;
374 default:
375 KASSERT(m->m_ext.ext_type == 0,
376 ("%s: unknown ext_type", __func__));
377 break;
378 }
379 }
380 /*
381 * Free this mbuf back to the mbuf zone with all iovec
382 * information purged.
383 */
384 mb_free_vec_free++;
385 uma_zfree(zone_mbuf, m);
386 }
387
388 #if (!defined(__sparc64__) && !defined(__sun4v__))
389 #include <sys/sysctl.h>
390
391 #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3
392 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4
393
394 struct bounce_zone {
395 STAILQ_ENTRY(bounce_zone) links;
396 STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
397 int total_bpages;
398 int free_bpages;
399 int reserved_bpages;
400 int active_bpages;
401 int total_bounced;
402 int total_deferred;
403 bus_size_t alignment;
404 bus_size_t boundary;
405 bus_addr_t lowaddr;
406 char zoneid[8];
407 char lowaddrid[20];
408 struct sysctl_ctx_list sysctl_tree;
409 struct sysctl_oid *sysctl_tree_top;
410 };
411 struct bus_dma_tag {
412 bus_dma_tag_t parent;
413 bus_size_t alignment;
414 bus_size_t boundary;
415 bus_addr_t lowaddr;
416 bus_addr_t highaddr;
417 bus_dma_filter_t *filter;
418 void *filterarg;
419 bus_size_t maxsize;
420 u_int nsegments;
421 bus_size_t maxsegsz;
422 int flags;
423 int ref_count;
424 int map_count;
425 bus_dma_lock_t *lockfunc;
426 void *lockfuncarg;
427 bus_dma_segment_t *segments;
428 struct bounce_zone *bounce_zone;
429 };
430
431 struct bus_dmamap {
432 struct bp_list bpages;
433 int pagesneeded;
434 int pagesreserved;
435 bus_dma_tag_t dmat;
436 void *buf; /* unmapped buffer pointer */
437 bus_size_t buflen; /* unmapped buffer length */
438 bus_dmamap_callback_t *callback;
439 void *callback_arg;
440 STAILQ_ENTRY(bus_dmamap) links;
441 };
442
443 static struct bus_dmamap nobounce_dmamap;
444
445 static __inline int
446 run_filter(bus_dma_tag_t dmat, bus_addr_t paddr)
447 {
448 int retval;
449
450 retval = 0;
451
452 do {
453 if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr)
454 || ((paddr & (dmat->alignment - 1)) != 0))
455 && (dmat->filter == NULL
456 || (*dmat->filter)(dmat->filterarg, paddr) != 0))
457 retval = 1;
458
459 dmat = dmat->parent;
460 } while (retval == 0 && dmat != NULL);
461 return (retval);
462 }
463
464 static __inline int
465 _bus_dmamap_load_buffer(bus_dma_tag_t dmat,
466 bus_dmamap_t map,
467 void *buf, bus_size_t buflen,
468 pmap_t pmap,
469 int flags,
470 bus_addr_t *lastaddrp,
471 bus_dma_segment_t *segs,
472 int *segp,
473 int first)
474 {
475 bus_size_t sgsize;
476 bus_addr_t curaddr, lastaddr, baddr, bmask;
477 vm_offset_t vaddr;
478 int needbounce = 0;
479 int seg;
480
481 if (map == NULL)
482 map = &nobounce_dmamap;
483
484 /* Reserve Necessary Bounce Pages */
485 if (map->pagesneeded != 0)
486 panic("don't support bounce pages");
487
488 vaddr = (vm_offset_t)buf;
489 lastaddr = *lastaddrp;
490 bmask = ~(dmat->boundary - 1);
491
492 for (seg = *segp; buflen > 0 ; ) {
493 /*
494 * Get the physical address for this segment.
495 */
496 if (pmap)
497 curaddr = pmap_extract(pmap, vaddr);
498 else
499 curaddr = pmap_kextract(vaddr);
500
501
502 /*
503 * Compute the segment size, and adjust counts.
504 */
505 sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK);
506 if (buflen < sgsize)
507 sgsize = buflen;
508
509 /*
510 * Make sure we don't cross any boundaries.
511 */
512 if (dmat->boundary > 0) {
513 baddr = (curaddr + dmat->boundary) & bmask;
514 if (sgsize > (baddr - curaddr))
515 sgsize = (baddr - curaddr);
516 }
517
518 if (map->pagesneeded != 0 && run_filter(dmat, curaddr))
519 panic("no bounce page support");
520
521 /*
522 * Insert chunk into a segment, coalescing with
523 * previous segment if possible.
524 */
525 if (first) {
526 segs[seg].ds_addr = curaddr;
527 segs[seg].ds_len = sgsize;
528 first = 0;
529 } else {
530 if (needbounce == 0 && curaddr == lastaddr &&
531 (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
532 (dmat->boundary == 0 ||
533 (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
534 segs[seg].ds_len += sgsize;
535 else {
536 if (++seg >= dmat->nsegments)
537 break;
538 segs[seg].ds_addr = curaddr;
539 segs[seg].ds_len = sgsize;
540 }
541 }
542
543 lastaddr = curaddr + sgsize;
544 vaddr += sgsize;
545 buflen -= sgsize;
546 }
547
548 *segp = seg;
549 *lastaddrp = lastaddr;
550
551 /*
552 * Did we fit?
553 */
554 return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
555 }
556
557 int
558 bus_dmamap_load_mvec_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
559 bus_dma_segment_t *segs, int *nsegs, int flags)
560 {
561 int error, i;
562
563 M_ASSERTPKTHDR(m0);
564
565 if ((m0->m_flags & M_IOVEC) == 0)
566 return (bus_dmamap_load_mbuf_sg(dmat, map, m0, segs, nsegs, flags));
567
568 flags |= BUS_DMA_NOWAIT;
569 *nsegs = 0;
570 error = 0;
571 if (m0->m_pkthdr.len <= dmat->maxsize) {
572 int first = 1;
573 bus_addr_t lastaddr = 0;
574 struct mbuf *m;
575
576 for (m = m0; m != NULL && error == 0; m = m->m_next) {
577 struct mbuf_vec *mv;
578 int count, firstcl;
579 if (!(m->m_len > 0))
580 continue;
581
582 mv = mtomv(m);
583 count = mv->mv_count;
584 firstcl = mv->mv_first;
585 KASSERT(count <= MAX_MBUF_IOV, ("count=%d too large", count));
586 for (i = firstcl; i < count && error == 0; i++) {
587 void *data = mv->mv_vec[i].mi_base + mv->mv_vec[i].mi_offset;
588 int len = mv->mv_vec[i].mi_len;
589
590 if (len == 0)
591 continue;
592 DPRINTF("mapping data=%p len=%d\n", data, len);
593 error = _bus_dmamap_load_buffer(dmat, NULL,
594 data, len, NULL, flags, &lastaddr,
595 segs, nsegs, first);
596 DPRINTF("%d: addr=0x%jx len=%ju\n", i,
597 (uintmax_t)segs[i].ds_addr, (uintmax_t)segs[i].ds_len);
598 first = 0;
599 }
600 }
601 } else {
602 error = EINVAL;
603 }
604
605 (*nsegs)++;
606
607 CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
608 __func__, dmat, dmat->flags, error, *nsegs);
609 return (error);
610 }
611 #endif /* !__sparc64__ && !__sun4v__ */
Cache object: eec80b5afd7d0977d621e86bf6802b84
|