The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/busdma_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2012-2015 Ian Lepore
    5  * Copyright (c) 2010 Mark Tinguely
    6  * Copyright (c) 2004 Olivier Houchard
    7  * Copyright (c) 2002 Peter Grehan
    8  * Copyright (c) 1997, 1998 Justin T. Gibbs.
    9  * All rights reserved.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions, and the following disclaimer,
   16  *    without modification, immediately at the beginning of the file.
   17  * 2. The name of the author may not be used to endorse or promote products
   18  *    derived from this software without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
   24  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *  From i386/busdma_machdep.c 191438 2009-04-23 20:24:19Z jhb
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD$");
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/malloc.h>
   41 #include <sys/bus.h>
   42 #include <sys/busdma_bufalloc.h>
   43 #include <sys/counter.h>
   44 #include <sys/interrupt.h>
   45 #include <sys/kernel.h>
   46 #include <sys/ktr.h>
   47 #include <sys/lock.h>
   48 #include <sys/memdesc.h>
   49 #include <sys/proc.h>
   50 #include <sys/mutex.h>
   51 #include <sys/sysctl.h>
   52 #include <sys/uio.h>
   53 
   54 #include <vm/vm.h>
   55 #include <vm/vm_param.h>
   56 #include <vm/vm_page.h>
   57 #include <vm/vm_phys.h>
   58 #include <vm/vm_map.h>
   59 #include <vm/vm_extern.h>
   60 #include <vm/vm_kern.h>
   61 
   62 #include <machine/atomic.h>
   63 #include <machine/bus.h>
   64 #include <machine/cpu.h>
   65 #include <machine/md_var.h>
   66 
   67 //#define ARM_BUSDMA_MAPLOAD_STATS
   68 
   69 #define BUSDMA_DCACHE_ALIGN     cpuinfo.dcache_line_size
   70 #define BUSDMA_DCACHE_MASK      cpuinfo.dcache_line_mask
   71 
   72 #define MAX_BPAGES              64
   73 #define MAX_DMA_SEGMENTS        4096
   74 #define BUS_DMA_EXCL_BOUNCE     BUS_DMA_BUS2
   75 #define BUS_DMA_ALIGN_BOUNCE    BUS_DMA_BUS3
   76 #define BUS_DMA_COULD_BOUNCE    (BUS_DMA_EXCL_BOUNCE | BUS_DMA_ALIGN_BOUNCE)
   77 #define BUS_DMA_MIN_ALLOC_COMP  BUS_DMA_BUS4
   78 
   79 struct bounce_page;
   80 struct bounce_zone;
   81 
   82 struct bus_dma_tag {
   83         bus_dma_tag_t           parent;
   84         bus_size_t              alignment;
   85         bus_addr_t              boundary;
   86         bus_addr_t              lowaddr;
   87         bus_addr_t              highaddr;
   88         bus_dma_filter_t        *filter;
   89         void                    *filterarg;
   90         bus_size_t              maxsize;
   91         u_int                   nsegments;
   92         bus_size_t              maxsegsz;
   93         int                     flags;
   94         int                     ref_count;
   95         int                     map_count;
   96         bus_dma_lock_t          *lockfunc;
   97         void                    *lockfuncarg;
   98         struct bounce_zone      *bounce_zone;
   99 };
  100 
  101 struct sync_list {
  102         vm_offset_t     vaddr;          /* kva of client data */
  103         bus_addr_t      paddr;          /* physical address */
  104         vm_page_t       pages;          /* starting page of client data */
  105         bus_size_t      datacount;      /* client data count */
  106 };
  107 
  108 static uint32_t tags_total;
  109 static uint32_t maps_total;
  110 static uint32_t maps_dmamem;
  111 static uint32_t maps_coherent;
  112 #ifdef ARM_BUSDMA_MAPLOAD_STATS
  113 static counter_u64_t maploads_total;
  114 static counter_u64_t maploads_bounced;
  115 static counter_u64_t maploads_coherent;
  116 static counter_u64_t maploads_dmamem;
  117 static counter_u64_t maploads_mbuf;
  118 static counter_u64_t maploads_physmem;
  119 #endif
  120 
  121 SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  122     "Busdma parameters");
  123 SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0,
  124    "Number of active tags");
  125 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0,
  126    "Number of active maps");
  127 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0,
  128    "Number of active maps for bus_dmamem_alloc buffers");
  129 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0,
  130    "Number of active maps with BUS_DMA_COHERENT flag set");
  131 #ifdef ARM_BUSDMA_MAPLOAD_STATS
  132 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD,
  133     &maploads_total, "Number of load operations performed");
  134 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD,
  135     &maploads_bounced, "Number of load operations that used bounce buffers");
  136 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD,
  137     &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory");
  138 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD,
  139     &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers");
  140 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD,
  141     &maploads_mbuf, "Number of load operations for mbufs");
  142 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD,
  143     &maploads_physmem, "Number of load operations on physical buffers");
  144 #endif
  145 
  146 struct bus_dmamap {
  147         STAILQ_HEAD(, bounce_page) bpages;
  148         int                     pagesneeded;
  149         int                     pagesreserved;
  150         bus_dma_tag_t           dmat;
  151         struct memdesc          mem;
  152         bus_dmamap_callback_t   *callback;
  153         void                    *callback_arg;
  154         int                     flags;
  155 #define DMAMAP_COHERENT         (1 << 0)
  156 #define DMAMAP_DMAMEM_ALLOC     (1 << 1)
  157 #define DMAMAP_MBUF             (1 << 2)
  158         STAILQ_ENTRY(bus_dmamap) links;
  159         bus_dma_segment_t       *segments;
  160         int                     sync_count;
  161         struct sync_list        slist[];
  162 };
  163 
  164 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap,
  165     bus_dmamap_t map, void *buf, bus_size_t buflen, int flags);
  166 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
  167     vm_paddr_t buf, bus_size_t buflen, int flags);
  168 static void dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size);
  169 static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op);
  170 
  171 static busdma_bufalloc_t coherent_allocator;    /* Cache of coherent buffers */
  172 static busdma_bufalloc_t standard_allocator;    /* Cache of standard buffers */
  173 
  174 MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata");
  175 
  176 #define dmat_alignment(dmat)    ((dmat)->alignment)
  177 #define dmat_flags(dmat)        ((dmat)->flags)
  178 #define dmat_lowaddr(dmat)      ((dmat)->lowaddr)
  179 #define dmat_lockfunc(dmat)     ((dmat)->lockfunc)
  180 #define dmat_lockfuncarg(dmat)  ((dmat)->lockfuncarg)
  181 
  182 #include "../../kern/subr_busdma_bounce.c"
  183 
  184 static void
  185 busdma_init(void *dummy)
  186 {
  187         int uma_flags;
  188 
  189 #ifdef ARM_BUSDMA_MAPLOAD_STATS
  190         maploads_total    = counter_u64_alloc(M_WAITOK);
  191         maploads_bounced  = counter_u64_alloc(M_WAITOK);
  192         maploads_coherent = counter_u64_alloc(M_WAITOK);
  193         maploads_dmamem   = counter_u64_alloc(M_WAITOK);
  194         maploads_mbuf     = counter_u64_alloc(M_WAITOK);
  195         maploads_physmem  = counter_u64_alloc(M_WAITOK);
  196 #endif
  197 
  198         uma_flags = 0;
  199 
  200         /* Create a cache of buffers in standard (cacheable) memory. */
  201         standard_allocator = busdma_bufalloc_create("buffer",
  202             BUSDMA_DCACHE_ALIGN,/* minimum_alignment */
  203             NULL,               /* uma_alloc func */
  204             NULL,               /* uma_free func */
  205             uma_flags);         /* uma_zcreate_flags */
  206 
  207 #ifdef INVARIANTS
  208         /*
  209          * Force UMA zone to allocate service structures like
  210          * slabs using own allocator. uma_debug code performs
  211          * atomic ops on uma_slab_t fields and safety of this
  212          * operation is not guaranteed for write-back caches
  213          */
  214         uma_flags = UMA_ZONE_NOTOUCH;
  215 #endif
  216         /*
  217          * Create a cache of buffers in uncacheable memory, to implement the
  218          * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag.
  219          */
  220         coherent_allocator = busdma_bufalloc_create("coherent",
  221             BUSDMA_DCACHE_ALIGN,/* minimum_alignment */
  222             busdma_bufalloc_alloc_uncacheable,
  223             busdma_bufalloc_free_uncacheable,
  224             uma_flags); /* uma_zcreate_flags */
  225 }
  226 
  227 /*
  228  * This init historically used SI_SUB_VM, but now the init code requires
  229  * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get
  230  * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by
  231  * using SI_SUB_KMEM+1.
  232  */
  233 SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL);
  234 
  235 /*
  236  * This routine checks the exclusion zone constraints from a tag against the
  237  * physical RAM available on the machine.  If a tag specifies an exclusion zone
  238  * but there's no RAM in that zone, then we avoid allocating resources to bounce
  239  * a request, and we can use any memory allocator (as opposed to needing
  240  * kmem_alloc_contig() just because it can allocate pages in an address range).
  241  *
  242  * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the
  243  * same value on 32-bit architectures) as their lowaddr constraint, and we can't
  244  * possibly have RAM at an address higher than the highest address we can
  245  * express, so we take a fast out.
  246  */
  247 static int
  248 exclusion_bounce_check(vm_offset_t lowaddr, vm_offset_t highaddr)
  249 {
  250         int i;
  251 
  252         if (lowaddr >= BUS_SPACE_MAXADDR)
  253                 return (0);
  254 
  255         for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) {
  256                 if ((lowaddr >= phys_avail[i] && lowaddr < phys_avail[i + 1]) ||
  257                     (lowaddr < phys_avail[i] && highaddr >= phys_avail[i]))
  258                         return (1);
  259         }
  260         return (0);
  261 }
  262 
  263 /*
  264  * Return true if the tag has an exclusion zone that could lead to bouncing.
  265  */
  266 static __inline int
  267 exclusion_bounce(bus_dma_tag_t dmat)
  268 {
  269 
  270         return (dmat->flags & BUS_DMA_EXCL_BOUNCE);
  271 }
  272 
  273 /*
  274  * Return true if the given address does not fall on the alignment boundary.
  275  */
  276 static __inline int
  277 alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr)
  278 {
  279 
  280         return (!vm_addr_align_ok(addr, dmat->alignment));
  281 }
  282 
  283 /*
  284  * Return true if the DMA should bounce because the start or end does not fall
  285  * on a cacheline boundary (which would require a partial cacheline flush).
  286  * COHERENT memory doesn't trigger cacheline flushes.  Memory allocated by
  287  * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a
  288  * strict rule that such memory cannot be accessed by the CPU while DMA is in
  289  * progress (or by multiple DMA engines at once), so that it's always safe to do
  290  * full cacheline flushes even if that affects memory outside the range of a
  291  * given DMA operation that doesn't involve the full allocated buffer.  If we're
  292  * mapping an mbuf, that follows the same rules as a buffer we allocated.
  293  */
  294 static __inline int
  295 cacheline_bounce(bus_dmamap_t map, bus_addr_t addr, bus_size_t size)
  296 {
  297 
  298         if (map->flags & (DMAMAP_DMAMEM_ALLOC | DMAMAP_COHERENT | DMAMAP_MBUF))
  299                 return (0);
  300         return ((addr | size) & BUSDMA_DCACHE_MASK);
  301 }
  302 
  303 /*
  304  * Return true if we might need to bounce the DMA described by addr and size.
  305  *
  306  * This is used to quick-check whether we need to do the more expensive work of
  307  * checking the DMA page-by-page looking for alignment and exclusion bounces.
  308  *
  309  * Note that the addr argument might be either virtual or physical.  It doesn't
  310  * matter because we only look at the low-order bits, which are the same in both
  311  * address spaces and maximum alignment of generic buffer is limited up to page
  312  * size.
  313  * Bouncing of buffers allocated by bus_dmamem_alloc()is not necessary, these
  314  * always comply with the required rules (alignment, boundary, and address
  315  * range).
  316  */
  317 static __inline int
  318 might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr,
  319     bus_size_t size)
  320 {
  321 
  322         KASSERT(map->flags & DMAMAP_DMAMEM_ALLOC ||
  323             dmat->alignment <= PAGE_SIZE,
  324             ("%s: unsupported alignment (0x%08lx) for buffer not "
  325             "allocated by bus_dmamem_alloc()",
  326             __func__, dmat->alignment));
  327 
  328         return (!(map->flags & DMAMAP_DMAMEM_ALLOC) &&
  329             ((dmat->flags & BUS_DMA_EXCL_BOUNCE) ||
  330             alignment_bounce(dmat, addr) ||
  331             cacheline_bounce(map, addr, size)));
  332 }
  333 
  334 /*
  335  * Return true if we must bounce the DMA described by paddr and size.
  336  *
  337  * Bouncing can be triggered by DMA that doesn't begin and end on cacheline
  338  * boundaries, or doesn't begin on an alignment boundary, or falls within the
  339  * exclusion zone of any tag in the ancestry chain.
  340  *
  341  * For exclusions, walk the chain of tags comparing paddr to the exclusion zone
  342  * within each tag.  If the tag has a filter function, use it to decide whether
  343  * the DMA needs to bounce, otherwise any DMA within the zone bounces.
  344  */
  345 static int
  346 must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr,
  347     bus_size_t size)
  348 {
  349 
  350         if (cacheline_bounce(map, paddr, size))
  351                 return (1);
  352 
  353         /*
  354          *  The tag already contains ancestors' alignment restrictions so this
  355          *  check doesn't need to be inside the loop.
  356          */
  357         if (alignment_bounce(dmat, paddr))
  358                 return (1);
  359 
  360         /*
  361          * Even though each tag has an exclusion zone that is a superset of its
  362          * own and all its ancestors' exclusions, the exclusion zone of each tag
  363          * up the chain must be checked within the loop, because the busdma
  364          * rules say the filter function is called only when the address lies
  365          * within the low-highaddr range of the tag that filterfunc belongs to.
  366          */
  367         while (dmat != NULL && exclusion_bounce(dmat)) {
  368                 if ((paddr >= dmat->lowaddr && paddr <= dmat->highaddr) &&
  369                     (dmat->filter == NULL ||
  370                     dmat->filter(dmat->filterarg, paddr) != 0))
  371                         return (1);
  372                 dmat = dmat->parent;
  373         }
  374 
  375         return (0);
  376 }
  377 
  378 /*
  379  * Allocate a device specific dma_tag.
  380  */
  381 int
  382 bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
  383     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
  384     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
  385     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
  386     void *lockfuncarg, bus_dma_tag_t *dmat)
  387 {
  388         bus_dma_tag_t newtag;
  389         int error = 0;
  390 
  391         /* Basic sanity checking. */
  392         KASSERT(boundary == 0 || powerof2(boundary),
  393             ("dma tag boundary %lu, must be a power of 2", boundary));
  394         KASSERT(boundary == 0 || boundary >= maxsegsz,
  395             ("dma tag boundary %lu is < maxsegsz %lu\n", boundary, maxsegsz));
  396         KASSERT(alignment != 0 && powerof2(alignment),
  397             ("dma tag alignment %lu, must be non-zero power of 2", alignment));
  398         KASSERT(maxsegsz != 0, ("dma tag maxsegsz must not be zero"));
  399 
  400         /* Return a NULL tag on failure */
  401         *dmat = NULL;
  402 
  403         newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA,
  404             M_ZERO | M_NOWAIT);
  405         if (newtag == NULL) {
  406                 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
  407                     __func__, newtag, 0, error);
  408                 return (ENOMEM);
  409         }
  410 
  411         newtag->parent = parent;
  412         newtag->alignment = alignment;
  413         newtag->boundary = boundary;
  414         newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1);
  415         newtag->highaddr = trunc_page((vm_paddr_t)highaddr) +
  416             (PAGE_SIZE - 1);
  417         newtag->filter = filter;
  418         newtag->filterarg = filterarg;
  419         newtag->maxsize = maxsize;
  420         newtag->nsegments = nsegments;
  421         newtag->maxsegsz = maxsegsz;
  422         newtag->flags = flags;
  423         newtag->ref_count = 1; /* Count ourself */
  424         newtag->map_count = 0;
  425         if (lockfunc != NULL) {
  426                 newtag->lockfunc = lockfunc;
  427                 newtag->lockfuncarg = lockfuncarg;
  428         } else {
  429                 newtag->lockfunc = _busdma_dflt_lock;
  430                 newtag->lockfuncarg = NULL;
  431         }
  432 
  433         /* Take into account any restrictions imposed by our parent tag */
  434         if (parent != NULL) {
  435                 newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr);
  436                 newtag->highaddr = MAX(parent->highaddr, newtag->highaddr);
  437                 newtag->alignment = MAX(parent->alignment, newtag->alignment);
  438                 newtag->flags |= parent->flags & BUS_DMA_COULD_BOUNCE;
  439                 newtag->flags |= parent->flags & BUS_DMA_COHERENT;
  440                 if (newtag->boundary == 0)
  441                         newtag->boundary = parent->boundary;
  442                 else if (parent->boundary != 0)
  443                         newtag->boundary = MIN(parent->boundary,
  444                                                newtag->boundary);
  445                 if (newtag->filter == NULL) {
  446                         /*
  447                          * Short circuit to looking at our parent directly
  448                          * since we have encapsulated all of its information
  449                          */
  450                         newtag->filter = parent->filter;
  451                         newtag->filterarg = parent->filterarg;
  452                         newtag->parent = parent->parent;
  453                 }
  454                 if (newtag->parent != NULL)
  455                         atomic_add_int(&parent->ref_count, 1);
  456         }
  457 
  458         if (exclusion_bounce_check(newtag->lowaddr, newtag->highaddr))
  459                 newtag->flags |= BUS_DMA_EXCL_BOUNCE;
  460         if (alignment_bounce(newtag, 1))
  461                 newtag->flags |= BUS_DMA_ALIGN_BOUNCE;
  462 
  463         /*
  464          * Any request can auto-bounce due to cacheline alignment, in addition
  465          * to any alignment or boundary specifications in the tag, so if the
  466          * ALLOCNOW flag is set, there's always work to do.
  467          */
  468         if ((flags & BUS_DMA_ALLOCNOW) != 0) {
  469                 struct bounce_zone *bz;
  470                 /*
  471                  * Round size up to a full page, and add one more page because
  472                  * there can always be one more boundary crossing than the
  473                  * number of pages in a transfer.
  474                  */
  475                 maxsize = roundup2(maxsize, PAGE_SIZE) + PAGE_SIZE;
  476 
  477                 if ((error = alloc_bounce_zone(newtag)) != 0) {
  478                         free(newtag, M_BUSDMA);
  479                         return (error);
  480                 }
  481                 bz = newtag->bounce_zone;
  482 
  483                 if (ptoa(bz->total_bpages) < maxsize) {
  484                         int pages;
  485 
  486                         pages = atop(maxsize) - bz->total_bpages;
  487 
  488                         /* Add pages to our bounce pool */
  489                         if (alloc_bounce_pages(newtag, pages) < pages)
  490                                 error = ENOMEM;
  491                 }
  492                 /* Performed initial allocation */
  493                 newtag->flags |= BUS_DMA_MIN_ALLOC_COMP;
  494         } else
  495                 newtag->bounce_zone = NULL;
  496 
  497         if (error != 0) {
  498                 free(newtag, M_BUSDMA);
  499         } else {
  500                 atomic_add_32(&tags_total, 1);
  501                 *dmat = newtag;
  502         }
  503         CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
  504             __func__, newtag, (newtag != NULL ? newtag->flags : 0), error);
  505         return (error);
  506 }
  507 
  508 void
  509 bus_dma_template_clone(bus_dma_template_t *t, bus_dma_tag_t dmat)
  510 {
  511 
  512         if (t == NULL || dmat == NULL)
  513                 return;
  514 
  515         t->parent = dmat->parent;
  516         t->alignment = dmat->alignment;
  517         t->boundary = dmat->boundary;
  518         t->lowaddr = dmat->lowaddr;
  519         t->highaddr = dmat->highaddr;
  520         t->maxsize = dmat->maxsize;
  521         t->nsegments = dmat->nsegments;
  522         t->maxsegsize = dmat->maxsegsz;
  523         t->flags = dmat->flags;
  524         t->lockfunc = dmat->lockfunc;
  525         t->lockfuncarg = dmat->lockfuncarg;
  526 }
  527 
  528 int
  529 bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain)
  530 {
  531 
  532         return (0);
  533 }
  534 
  535 int
  536 bus_dma_tag_destroy(bus_dma_tag_t dmat)
  537 {
  538 #ifdef KTR
  539         bus_dma_tag_t dmat_copy = dmat;
  540 #endif
  541         int error;
  542 
  543         error = 0;
  544 
  545         if (dmat != NULL) {
  546                 if (dmat->map_count != 0) {
  547                         error = EBUSY;
  548                         goto out;
  549                 }
  550 
  551                 while (dmat != NULL) {
  552                         bus_dma_tag_t parent;
  553 
  554                         parent = dmat->parent;
  555                         atomic_subtract_int(&dmat->ref_count, 1);
  556                         if (dmat->ref_count == 0) {
  557                                 atomic_subtract_32(&tags_total, 1);
  558                                 free(dmat, M_BUSDMA);
  559                                 /*
  560                                  * Last reference count, so
  561                                  * release our reference
  562                                  * count on our parent.
  563                                  */
  564                                 dmat = parent;
  565                         } else
  566                                 dmat = NULL;
  567                 }
  568         }
  569 out:
  570         CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
  571         return (error);
  572 }
  573 
  574 static int
  575 allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t mapp)
  576 {
  577         struct bounce_zone *bz;
  578         int maxpages;
  579         int error;
  580 
  581         if (dmat->bounce_zone == NULL)
  582                 if ((error = alloc_bounce_zone(dmat)) != 0)
  583                         return (error);
  584         bz = dmat->bounce_zone;
  585         /* Initialize the new map */
  586         STAILQ_INIT(&(mapp->bpages));
  587 
  588         /*
  589          * Attempt to add pages to our pool on a per-instance basis up to a sane
  590          * limit.  Even if the tag isn't flagged as COULD_BOUNCE due to
  591          * alignment and boundary constraints, it could still auto-bounce due to
  592          * cacheline alignment, which requires at most two bounce pages.
  593          */
  594         if (dmat->flags & BUS_DMA_COULD_BOUNCE)
  595                 maxpages = MAX_BPAGES;
  596         else
  597                 maxpages = 2 * bz->map_count;
  598         if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 ||
  599             (bz->map_count > 0 && bz->total_bpages < maxpages)) {
  600                 int pages;
  601 
  602                 pages = atop(roundup2(dmat->maxsize, PAGE_SIZE)) + 1;
  603                 pages = MIN(maxpages - bz->total_bpages, pages);
  604                 pages = MAX(pages, 2);
  605                 if (alloc_bounce_pages(dmat, pages) < pages)
  606                         return (ENOMEM);
  607 
  608                 if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0)
  609                         dmat->flags |= BUS_DMA_MIN_ALLOC_COMP;
  610         }
  611         bz->map_count++;
  612         return (0);
  613 }
  614 
  615 static bus_dmamap_t
  616 allocate_map(bus_dma_tag_t dmat, int mflags)
  617 {
  618         int mapsize, segsize;
  619         bus_dmamap_t map;
  620 
  621         /*
  622          * Allocate the map.  The map structure ends with an embedded
  623          * variable-sized array of sync_list structures.  Following that
  624          * we allocate enough extra space to hold the array of bus_dma_segments.
  625          */
  626         KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS,
  627            ("cannot allocate %u dma segments (max is %u)",
  628             dmat->nsegments, MAX_DMA_SEGMENTS));
  629         segsize = sizeof(struct bus_dma_segment) * dmat->nsegments;
  630         mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments;
  631         map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO);
  632         if (map == NULL) {
  633                 CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
  634                 return (NULL);
  635         }
  636         map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize);
  637         STAILQ_INIT(&map->bpages);
  638         return (map);
  639 }
  640 
  641 /*
  642  * Allocate a handle for mapping from kva/uva/physical
  643  * address space into bus device space.
  644  */
  645 int
  646 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
  647 {
  648         bus_dmamap_t map;
  649         int error = 0;
  650 
  651         *mapp = map = allocate_map(dmat, M_NOWAIT);
  652         if (map == NULL) {
  653                 CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
  654                 return (ENOMEM);
  655         }
  656 
  657         /*
  658          * Bouncing might be required if the driver asks for an exclusion
  659          * region, a data alignment that is stricter than 1, or DMA that begins
  660          * or ends with a partial cacheline.  Whether bouncing will actually
  661          * happen can't be known until mapping time, but we need to pre-allocate
  662          * resources now because we might not be allowed to at mapping time.
  663          */
  664         error = allocate_bz_and_pages(dmat, map);
  665         if (error != 0) {
  666                 free(map, M_BUSDMA);
  667                 *mapp = NULL;
  668                 return (error);
  669         }
  670         if (map->flags & DMAMAP_COHERENT)
  671                 atomic_add_32(&maps_coherent, 1);
  672         atomic_add_32(&maps_total, 1);
  673         dmat->map_count++;
  674 
  675         return (0);
  676 }
  677 
  678 /*
  679  * Destroy a handle for mapping from kva/uva/physical
  680  * address space into bus device space.
  681  */
  682 int
  683 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
  684 {
  685 
  686         if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) {
  687                 CTR3(KTR_BUSDMA, "%s: tag %p error %d",
  688                     __func__, dmat, EBUSY);
  689                 return (EBUSY);
  690         }
  691         if (dmat->bounce_zone)
  692                 dmat->bounce_zone->map_count--;
  693         if (map->flags & DMAMAP_COHERENT)
  694                 atomic_subtract_32(&maps_coherent, 1);
  695         atomic_subtract_32(&maps_total, 1);
  696         free(map, M_BUSDMA);
  697         dmat->map_count--;
  698         CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
  699         return (0);
  700 }
  701 
  702 /*
  703  * Allocate a piece of memory that can be efficiently mapped into bus device
  704  * space based on the constraints listed in the dma tag.  Returns a pointer to
  705  * the allocated memory, and a pointer to an associated bus_dmamap.
  706  */
  707 int
  708 bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags,
  709     bus_dmamap_t *mapp)
  710 {
  711         busdma_bufalloc_t ba;
  712         struct busdma_bufzone *bufzone;
  713         bus_dmamap_t map;
  714         vm_memattr_t memattr;
  715         int mflags;
  716 
  717         if (flags & BUS_DMA_NOWAIT)
  718                 mflags = M_NOWAIT;
  719         else
  720                 mflags = M_WAITOK;
  721         if (flags & BUS_DMA_ZERO)
  722                 mflags |= M_ZERO;
  723 
  724         *mapp = map = allocate_map(dmat, mflags);
  725         if (map == NULL) {
  726                 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
  727                     __func__, dmat, dmat->flags, ENOMEM);
  728                 return (ENOMEM);
  729         }
  730         map->flags = DMAMAP_DMAMEM_ALLOC;
  731 
  732         /* For coherent memory, set the map flag that disables sync ops. */
  733         if (flags & BUS_DMA_COHERENT)
  734                 map->flags |= DMAMAP_COHERENT;
  735 
  736         /*
  737          * Choose a busdma buffer allocator based on memory type flags.
  738          * If the tag's COHERENT flag is set, that means normal memory
  739          * is already coherent, use the normal allocator.
  740          */
  741         if ((flags & BUS_DMA_COHERENT) &&
  742             ((dmat->flags & BUS_DMA_COHERENT) == 0)) {
  743                 memattr = VM_MEMATTR_UNCACHEABLE;
  744                 ba = coherent_allocator;
  745         } else {
  746                 memattr = VM_MEMATTR_DEFAULT;
  747                 ba = standard_allocator;
  748         }
  749 
  750         /*
  751          * Try to find a bufzone in the allocator that holds a cache of buffers
  752          * of the right size for this request.  If the buffer is too big to be
  753          * held in the allocator cache, this returns NULL.
  754          */
  755         bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
  756 
  757         /*
  758          * Allocate the buffer from the uma(9) allocator if...
  759          *  - It's small enough to be in the allocator (bufzone not NULL).
  760          *  - The alignment constraint isn't larger than the allocation size
  761          *    (the allocator aligns buffers to their size boundaries).
  762          *  - There's no need to handle lowaddr/highaddr exclusion zones.
  763          * else allocate non-contiguous pages if...
  764          *  - The page count that could get allocated doesn't exceed
  765          *    nsegments also when the maximum segment size is less
  766          *    than PAGE_SIZE.
  767          *  - The alignment constraint isn't larger than a page boundary.
  768          *  - There are no boundary-crossing constraints.
  769          * else allocate a block of contiguous pages because one or more of the
  770          * constraints is something that only the contig allocator can fulfill.
  771          */
  772         if (bufzone != NULL && dmat->alignment <= bufzone->size &&
  773             !exclusion_bounce(dmat)) {
  774                 *vaddr = uma_zalloc(bufzone->umazone, mflags);
  775         } else if (dmat->nsegments >=
  776             howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) &&
  777             dmat->alignment <= PAGE_SIZE &&
  778             (dmat->boundary % PAGE_SIZE) == 0) {
  779                 *vaddr = kmem_alloc_attr(dmat->maxsize, mflags, 0,
  780                     dmat->lowaddr, memattr);
  781         } else {
  782                 *vaddr = kmem_alloc_contig(dmat->maxsize, mflags, 0,
  783                     dmat->lowaddr, dmat->alignment, dmat->boundary, memattr);
  784         }
  785         if (*vaddr == NULL) {
  786                 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
  787                     __func__, dmat, dmat->flags, ENOMEM);
  788                 free(map, M_BUSDMA);
  789                 *mapp = NULL;
  790                 return (ENOMEM);
  791         }
  792         if (map->flags & DMAMAP_COHERENT)
  793                 atomic_add_32(&maps_coherent, 1);
  794         atomic_add_32(&maps_dmamem, 1);
  795         atomic_add_32(&maps_total, 1);
  796         dmat->map_count++;
  797 
  798         CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
  799             __func__, dmat, dmat->flags, 0);
  800         return (0);
  801 }
  802 
  803 /*
  804  * Free a piece of memory that was allocated via bus_dmamem_alloc, along with
  805  * its associated map.
  806  */
  807 void
  808 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
  809 {
  810         struct busdma_bufzone *bufzone;
  811         busdma_bufalloc_t ba;
  812 
  813         if ((map->flags & DMAMAP_COHERENT) &&
  814             ((dmat->flags & BUS_DMA_COHERENT) == 0))
  815                 ba = coherent_allocator;
  816         else
  817                 ba = standard_allocator;
  818 
  819         bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
  820 
  821         if (bufzone != NULL && dmat->alignment <= bufzone->size &&
  822             !exclusion_bounce(dmat))
  823                 uma_zfree(bufzone->umazone, vaddr);
  824         else
  825                 kmem_free(vaddr, dmat->maxsize);
  826 
  827         dmat->map_count--;
  828         if (map->flags & DMAMAP_COHERENT)
  829                 atomic_subtract_32(&maps_coherent, 1);
  830         atomic_subtract_32(&maps_total, 1);
  831         atomic_subtract_32(&maps_dmamem, 1);
  832         free(map, M_BUSDMA);
  833         CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
  834 }
  835 
  836 static void
  837 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
  838     bus_size_t buflen, int flags)
  839 {
  840         bus_addr_t curaddr;
  841         bus_size_t sgsize;
  842 
  843         if (map->pagesneeded == 0) {
  844                 CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d"
  845                     " map= %p, pagesneeded= %d",
  846                     dmat->lowaddr, dmat->boundary, dmat->alignment,
  847                     map, map->pagesneeded);
  848                 /*
  849                  * Count the number of bounce pages
  850                  * needed in order to complete this transfer
  851                  */
  852                 curaddr = buf;
  853                 while (buflen != 0) {
  854                         sgsize = MIN(buflen, dmat->maxsegsz);
  855                         if (must_bounce(dmat, map, curaddr, sgsize) != 0) {
  856                                 sgsize = MIN(sgsize,
  857                                     PAGE_SIZE - (curaddr & PAGE_MASK));
  858                                 map->pagesneeded++;
  859                         }
  860                         curaddr += sgsize;
  861                         buflen -= sgsize;
  862                 }
  863                 CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded);
  864         }
  865 }
  866 
  867 static void
  868 _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map,
  869     void *buf, bus_size_t buflen, int flags)
  870 {
  871         vm_offset_t vaddr;
  872         vm_offset_t vendaddr;
  873         bus_addr_t paddr;
  874 
  875         if (map->pagesneeded == 0) {
  876                 CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d"
  877                     " map= %p, pagesneeded= %d",
  878                     dmat->lowaddr, dmat->boundary, dmat->alignment,
  879                     map, map->pagesneeded);
  880                 /*
  881                  * Count the number of bounce pages
  882                  * needed in order to complete this transfer
  883                  */
  884                 vaddr = (vm_offset_t)buf;
  885                 vendaddr = (vm_offset_t)buf + buflen;
  886 
  887                 while (vaddr < vendaddr) {
  888                         if (__predict_true(pmap == kernel_pmap))
  889                                 paddr = pmap_kextract(vaddr);
  890                         else
  891                                 paddr = pmap_extract(pmap, vaddr);
  892                         if (must_bounce(dmat, map, paddr,
  893                             min(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr &
  894                             PAGE_MASK)))) != 0) {
  895                                 map->pagesneeded++;
  896                         }
  897                         vaddr += (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK));
  898                 }
  899                 CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded);
  900         }
  901 }
  902 
  903 /*
  904  * Add a single contiguous physical range to the segment list.
  905  */
  906 static int
  907 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
  908     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
  909 {
  910         int seg;
  911 
  912         /*
  913          * Make sure we don't cross any boundaries.
  914          */
  915         if (!vm_addr_bound_ok(curaddr, sgsize, dmat->boundary))
  916                 sgsize = roundup2(curaddr, dmat->boundary) - curaddr;
  917 
  918         /*
  919          * Insert chunk into a segment, coalescing with
  920          * previous segment if possible.
  921          */
  922         seg = *segp;
  923         if (seg == -1) {
  924                 seg = 0;
  925                 segs[seg].ds_addr = curaddr;
  926                 segs[seg].ds_len = sgsize;
  927         } else {
  928                 if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
  929                     (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
  930                     vm_addr_bound_ok(segs[seg].ds_addr,
  931                     segs[seg].ds_len + sgsize, dmat->boundary))
  932                         segs[seg].ds_len += sgsize;
  933                 else {
  934                         if (++seg >= dmat->nsegments)
  935                                 return (0);
  936                         segs[seg].ds_addr = curaddr;
  937                         segs[seg].ds_len = sgsize;
  938                 }
  939         }
  940         *segp = seg;
  941         return (sgsize);
  942 }
  943 
  944 /*
  945  * Utility function to load a physical buffer.  segp contains
  946  * the starting segment on entrace, and the ending segment on exit.
  947  */
  948 int
  949 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
  950     bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp)
  951 {
  952         bus_addr_t curaddr;
  953         bus_addr_t sl_end = 0;
  954         bus_size_t sgsize;
  955         struct sync_list *sl;
  956         int error;
  957 
  958         if (segs == NULL)
  959                 segs = map->segments;
  960 
  961 #ifdef ARM_BUSDMA_MAPLOAD_STATS
  962         counter_u64_add(maploads_total, 1);
  963         counter_u64_add(maploads_physmem, 1);
  964 #endif
  965 
  966         if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) {
  967                 _bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
  968                 if (map->pagesneeded != 0) {
  969 #ifdef ARM_BUSDMA_MAPLOAD_STATS
  970                         counter_u64_add(maploads_bounced, 1);
  971 #endif
  972                         error = _bus_dmamap_reserve_pages(dmat, map, flags);
  973                         if (error)
  974                                 return (error);
  975                 }
  976         }
  977 
  978         sl = map->slist + map->sync_count - 1;
  979 
  980         while (buflen > 0) {
  981                 curaddr = buf;
  982                 sgsize = MIN(buflen, dmat->maxsegsz);
  983                 if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr,
  984                     sgsize)) {
  985                         sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
  986                         curaddr = add_bounce_page(dmat, map, 0, curaddr,
  987                             sgsize);
  988                 } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) {
  989                         if (map->sync_count > 0)
  990                                 sl_end = sl->paddr + sl->datacount;
  991 
  992                         if (map->sync_count == 0 || curaddr != sl_end) {
  993                                 if (++map->sync_count > dmat->nsegments)
  994                                         break;
  995                                 sl++;
  996                                 sl->vaddr = 0;
  997                                 sl->paddr = curaddr;
  998                                 sl->datacount = sgsize;
  999                                 sl->pages = PHYS_TO_VM_PAGE(curaddr);
 1000                                 KASSERT(sl->pages != NULL,
 1001                                     ("%s: page at PA:0x%08lx is not in "
 1002                                     "vm_page_array", __func__, curaddr));
 1003                         } else
 1004                                 sl->datacount += sgsize;
 1005                 }
 1006                 sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 1007                     segp);
 1008                 if (sgsize == 0)
 1009                         break;
 1010                 buf += sgsize;
 1011                 buflen -= sgsize;
 1012         }
 1013 
 1014         /*
 1015          * Did we fit?
 1016          */
 1017         if (buflen != 0) {
 1018                 bus_dmamap_unload(dmat, map);
 1019                 return (EFBIG); /* XXX better return value here? */
 1020         }
 1021         return (0);
 1022 }
 1023 
 1024 int
 1025 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
 1026     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
 1027     bus_dma_segment_t *segs, int *segp)
 1028 {
 1029 
 1030         return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags,
 1031             segs, segp));
 1032 }
 1033 
 1034 /*
 1035  * Utility function to load a linear buffer.  segp contains
 1036  * the starting segment on entrance, and the ending segment on exit.
 1037  */
 1038 int
 1039 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
 1040     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
 1041     int *segp)
 1042 {
 1043         bus_size_t sgsize;
 1044         bus_addr_t curaddr;
 1045         bus_addr_t sl_pend = 0;
 1046         vm_offset_t kvaddr, vaddr, sl_vend = 0;
 1047         struct sync_list *sl;
 1048         int error;
 1049 
 1050 #ifdef ARM_BUSDMA_MAPLOAD_STATS
 1051         counter_u64_add(maploads_total, 1);
 1052         if (map->flags & DMAMAP_COHERENT)
 1053                 counter_u64_add(maploads_coherent, 1);
 1054         if (map->flags & DMAMAP_DMAMEM_ALLOC)
 1055                 counter_u64_add(maploads_dmamem, 1);
 1056 #endif
 1057 
 1058         if (segs == NULL)
 1059                 segs = map->segments;
 1060 
 1061         if (flags & BUS_DMA_LOAD_MBUF) {
 1062 #ifdef ARM_BUSDMA_MAPLOAD_STATS
 1063                 counter_u64_add(maploads_mbuf, 1);
 1064 #endif
 1065                 map->flags |= DMAMAP_MBUF;
 1066         }
 1067 
 1068         if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) {
 1069                 _bus_dmamap_count_pages(dmat, pmap, map, buf, buflen, flags);
 1070                 if (map->pagesneeded != 0) {
 1071 #ifdef ARM_BUSDMA_MAPLOAD_STATS
 1072                         counter_u64_add(maploads_bounced, 1);
 1073 #endif
 1074                         error = _bus_dmamap_reserve_pages(dmat, map, flags);
 1075                         if (error)
 1076                                 return (error);
 1077                 }
 1078         }
 1079 
 1080         sl = map->slist + map->sync_count - 1;
 1081         vaddr = (vm_offset_t)buf;
 1082 
 1083         while (buflen > 0) {
 1084                 /*
 1085                  * Get the physical address for this segment.
 1086                  */
 1087                 if (__predict_true(pmap == kernel_pmap)) {
 1088                         curaddr = pmap_kextract(vaddr);
 1089                         kvaddr = vaddr;
 1090                 } else {
 1091                         curaddr = pmap_extract(pmap, vaddr);
 1092                         kvaddr = 0;
 1093                 }
 1094 
 1095                 /*
 1096                  * Compute the segment size, and adjust counts.
 1097                  */
 1098                 sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 1099                 if (sgsize > dmat->maxsegsz)
 1100                         sgsize = dmat->maxsegsz;
 1101                 if (buflen < sgsize)
 1102                         sgsize = buflen;
 1103 
 1104                 if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr,
 1105                     sgsize)) {
 1106                         curaddr = add_bounce_page(dmat, map, kvaddr, curaddr,
 1107                             sgsize);
 1108                 } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) {
 1109                         if (map->sync_count > 0) {
 1110                                 sl_pend = sl->paddr + sl->datacount;
 1111                                 sl_vend = sl->vaddr + sl->datacount;
 1112                         }
 1113 
 1114                         if (map->sync_count == 0 ||
 1115                             (kvaddr != 0 && kvaddr != sl_vend) ||
 1116                             (curaddr != sl_pend)) {
 1117                                 if (++map->sync_count > dmat->nsegments)
 1118                                         goto cleanup;
 1119                                 sl++;
 1120                                 sl->vaddr = kvaddr;
 1121                                 sl->paddr = curaddr;
 1122                                 if (kvaddr != 0) {
 1123                                         sl->pages = NULL;
 1124                                 } else {
 1125                                         sl->pages = PHYS_TO_VM_PAGE(curaddr);
 1126                                         KASSERT(sl->pages != NULL,
 1127                                             ("%s: page at PA:0x%08lx is not "
 1128                                             "in vm_page_array", __func__,
 1129                                             curaddr));
 1130                                 }
 1131                                 sl->datacount = sgsize;
 1132                         } else
 1133                                 sl->datacount += sgsize;
 1134                 }
 1135                 sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 1136                     segp);
 1137                 if (sgsize == 0)
 1138                         break;
 1139                 vaddr += sgsize;
 1140                 buflen -= sgsize;
 1141         }
 1142 
 1143 cleanup:
 1144         /*
 1145          * Did we fit?
 1146          */
 1147         if (buflen != 0) {
 1148                 bus_dmamap_unload(dmat, map);
 1149                 return (EFBIG); /* XXX better return value here? */
 1150         }
 1151         return (0);
 1152 }
 1153 
 1154 void
 1155 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem,
 1156     bus_dmamap_callback_t *callback, void *callback_arg)
 1157 {
 1158 
 1159         map->mem = *mem;
 1160         map->dmat = dmat;
 1161         map->callback = callback;
 1162         map->callback_arg = callback_arg;
 1163 }
 1164 
 1165 bus_dma_segment_t *
 1166 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
 1167     bus_dma_segment_t *segs, int nsegs, int error)
 1168 {
 1169 
 1170         if (segs == NULL)
 1171                 segs = map->segments;
 1172         return (segs);
 1173 }
 1174 
 1175 /*
 1176  * Release the mapping held by map.
 1177  */
 1178 void
 1179 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 1180 {
 1181         struct bounce_zone *bz;
 1182 
 1183         if ((bz = dmat->bounce_zone) != NULL) {
 1184                 free_bounce_pages(dmat, map);
 1185 
 1186                 if (map->pagesreserved != 0) {
 1187                         mtx_lock(&bounce_lock);
 1188                         bz->free_bpages += map->pagesreserved;
 1189                         bz->reserved_bpages -= map->pagesreserved;
 1190                         mtx_unlock(&bounce_lock);
 1191                         map->pagesreserved = 0;
 1192                 }
 1193                 map->pagesneeded = 0;
 1194         }
 1195         map->sync_count = 0;
 1196         map->flags &= ~DMAMAP_MBUF;
 1197 }
 1198 
 1199 static void
 1200 dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size)
 1201 {
 1202         /*
 1203          * Write back any partial cachelines immediately before and
 1204          * after the DMA region.  We don't need to round the address
 1205          * down to the nearest cacheline or specify the exact size,
 1206          * as dcache_wb_poc() will do the rounding for us and works
 1207          * at cacheline granularity.
 1208          */
 1209         if (va & BUSDMA_DCACHE_MASK)
 1210                 dcache_wb_poc(va, pa, 1);
 1211         if ((va + size) & BUSDMA_DCACHE_MASK)
 1212                 dcache_wb_poc(va + size, pa + size, 1);
 1213 
 1214         dcache_inv_poc_dma(va, pa, size);
 1215 }
 1216 
 1217 static void
 1218 dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op)
 1219 {
 1220         uint32_t len, offset;
 1221         vm_page_t m;
 1222         vm_paddr_t pa;
 1223         vm_offset_t va, tempva;
 1224         bus_size_t size;
 1225 
 1226         offset = sl->paddr & PAGE_MASK;
 1227         m = sl->pages;
 1228         size = sl->datacount;
 1229         pa = sl->paddr;
 1230 
 1231         for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) {
 1232                 tempva = 0;
 1233                 if (sl->vaddr == 0) {
 1234                         len = min(PAGE_SIZE - offset, size);
 1235                         tempva = pmap_quick_enter_page(m);
 1236                         va = tempva | offset;
 1237                         KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset),
 1238                             ("unexpected vm_page_t phys: 0x%08x != 0x%08x",
 1239                             VM_PAGE_TO_PHYS(m) | offset, pa));
 1240                 } else {
 1241                         len = sl->datacount;
 1242                         va = sl->vaddr;
 1243                 }
 1244 
 1245                 switch (op) {
 1246                 case BUS_DMASYNC_PREWRITE:
 1247                 case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD:
 1248                         dcache_wb_poc(va, pa, len);
 1249                         break;
 1250                 case BUS_DMASYNC_PREREAD:
 1251                         /*
 1252                          * An mbuf may start in the middle of a cacheline. There
 1253                          * will be no cpu writes to the beginning of that line
 1254                          * (which contains the mbuf header) while dma is in
 1255                          * progress.  Handle that case by doing a writeback of
 1256                          * just the first cacheline before invalidating the
 1257                          * overall buffer.  Any mbuf in a chain may have this
 1258                          * misalignment.  Buffers which are not mbufs bounce if
 1259                          * they are not aligned to a cacheline.
 1260                          */
 1261                         dma_preread_safe(va, pa, len);
 1262                         break;
 1263                 case BUS_DMASYNC_POSTREAD:
 1264                 case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
 1265                         dcache_inv_poc(va, pa, len);
 1266                         break;
 1267                 default:
 1268                         panic("unsupported combination of sync operations: "
 1269                               "0x%08x\n", op);
 1270                 }
 1271 
 1272                 if (tempva != 0)
 1273                         pmap_quick_remove_page(tempva);
 1274         }
 1275 }
 1276 
 1277 void
 1278 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 1279 {
 1280         struct bounce_page *bpage;
 1281         struct sync_list *sl, *end;
 1282         vm_offset_t datavaddr, tempvaddr;
 1283 
 1284         if (op == BUS_DMASYNC_POSTWRITE)
 1285                 return;
 1286 
 1287         /*
 1288          * If the buffer was from user space, it is possible that this is not
 1289          * the same vm map, especially on a POST operation.  It's not clear that
 1290          * dma on userland buffers can work at all right now.  To be safe, until
 1291          * we're able to test direct userland dma, panic on a map mismatch.
 1292          */
 1293         if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 1294                 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 1295                     "performing bounce", __func__, dmat, dmat->flags, op);
 1296 
 1297                 /*
 1298                  * For PREWRITE do a writeback.  Clean the caches from the
 1299                  * innermost to the outermost levels.
 1300                  */
 1301                 if (op & BUS_DMASYNC_PREWRITE) {
 1302                         while (bpage != NULL) {
 1303                                 tempvaddr = 0;
 1304                                 datavaddr = bpage->datavaddr;
 1305                                 if (datavaddr == 0) {
 1306                                         tempvaddr = pmap_quick_enter_page(
 1307                                             bpage->datapage);
 1308                                         datavaddr = tempvaddr | bpage->dataoffs;
 1309                                 }
 1310                                 bcopy((void *)datavaddr, (void *)bpage->vaddr,
 1311                                     bpage->datacount);
 1312                                 if (tempvaddr != 0)
 1313                                         pmap_quick_remove_page(tempvaddr);
 1314                                 if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 1315                                         dcache_wb_poc(bpage->vaddr,
 1316                                             bpage->busaddr, bpage->datacount);
 1317                                 bpage = STAILQ_NEXT(bpage, links);
 1318                         }
 1319                         dmat->bounce_zone->total_bounced++;
 1320                 }
 1321 
 1322                 /*
 1323                  * Do an invalidate for PREREAD unless a writeback was already
 1324                  * done above due to PREWRITE also being set.  The reason for a
 1325                  * PREREAD invalidate is to prevent dirty lines currently in the
 1326                  * cache from being evicted during the DMA.  If a writeback was
 1327                  * done due to PREWRITE also being set there will be no dirty
 1328                  * lines and the POSTREAD invalidate handles the rest. The
 1329                  * invalidate is done from the innermost to outermost level. If
 1330                  * L2 were done first, a dirty cacheline could be automatically
 1331                  * evicted from L1 before we invalidated it, re-dirtying the L2.
 1332                  */
 1333                 if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) {
 1334                         bpage = STAILQ_FIRST(&map->bpages);
 1335                         while (bpage != NULL) {
 1336                                 if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 1337                                         dcache_inv_poc_dma(bpage->vaddr,
 1338                                             bpage->busaddr, bpage->datacount);
 1339                                 bpage = STAILQ_NEXT(bpage, links);
 1340                         }
 1341                 }
 1342 
 1343                 /*
 1344                  * Re-invalidate the caches on a POSTREAD, even though they were
 1345                  * already invalidated at PREREAD time.  Aggressive prefetching
 1346                  * due to accesses to other data near the dma buffer could have
 1347                  * brought buffer data into the caches which is now stale.  The
 1348                  * caches are invalidated from the outermost to innermost; the
 1349                  * prefetches could be happening right now, and if L1 were
 1350                  * invalidated first, stale L2 data could be prefetched into L1.
 1351                  */
 1352                 if (op & BUS_DMASYNC_POSTREAD) {
 1353                         while (bpage != NULL) {
 1354                                 if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 1355                                         dcache_inv_poc(bpage->vaddr,
 1356                                             bpage->busaddr, bpage->datacount);
 1357                                 tempvaddr = 0;
 1358                                 datavaddr = bpage->datavaddr;
 1359                                 if (datavaddr == 0) {
 1360                                         tempvaddr = pmap_quick_enter_page(
 1361                                             bpage->datapage);
 1362                                         datavaddr = tempvaddr | bpage->dataoffs;
 1363                                 }
 1364                                 bcopy((void *)bpage->vaddr, (void *)datavaddr,
 1365                                     bpage->datacount);
 1366                                 if (tempvaddr != 0)
 1367                                         pmap_quick_remove_page(tempvaddr);
 1368                                 bpage = STAILQ_NEXT(bpage, links);
 1369                         }
 1370                         dmat->bounce_zone->total_bounced++;
 1371                 }
 1372         }
 1373 
 1374         /*
 1375          * For COHERENT memory no cache maintenance is necessary, but ensure all
 1376          * writes have reached memory for the PREWRITE case.  No action is
 1377          * needed for a PREREAD without PREWRITE also set, because that would
 1378          * imply that the cpu had written to the COHERENT buffer and expected
 1379          * the dma device to see that change, and by definition a PREWRITE sync
 1380          * is required to make that happen.
 1381          */
 1382         if (map->flags & DMAMAP_COHERENT) {
 1383                 if (op & BUS_DMASYNC_PREWRITE) {
 1384                         dsb();
 1385                         if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 1386                                 cpu_l2cache_drain_writebuf();
 1387                 }
 1388                 return;
 1389         }
 1390 
 1391         /*
 1392          * Cache maintenance for normal (non-COHERENT non-bounce) buffers.  All
 1393          * the comments about the sequences for flushing cache levels in the
 1394          * bounce buffer code above apply here as well.  In particular, the fact
 1395          * that the sequence is inner-to-outer for PREREAD invalidation and
 1396          * outer-to-inner for POSTREAD invalidation is not a mistake.
 1397          */
 1398         if (map->sync_count != 0) {
 1399                 sl = &map->slist[0];
 1400                 end = &map->slist[map->sync_count];
 1401                 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 1402                     "performing sync", __func__, dmat, dmat->flags, op);
 1403 
 1404                 for ( ; sl != end; ++sl)
 1405                         dma_dcache_sync(sl, op);
 1406         }
 1407 }

Cache object: 3c53d1dede1a5855009fd22ae7c2bb09


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.