The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/mm/percpu-vm.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * mm/percpu-vm.c - vmalloc area based chunk allocation
    3  *
    4  * Copyright (C) 2010           SUSE Linux Products GmbH
    5  * Copyright (C) 2010           Tejun Heo <tj@kernel.org>
    6  *
    7  * This file is released under the GPLv2.
    8  *
    9  * Chunks are mapped into vmalloc areas and populated page by page.
   10  * This is the default chunk allocator.
   11  */
   12 
   13 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
   14                                     unsigned int cpu, int page_idx)
   15 {
   16         /* must not be used on pre-mapped chunk */
   17         WARN_ON(chunk->immutable);
   18 
   19         return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx));
   20 }
   21 
   22 /**
   23  * pcpu_get_pages_and_bitmap - get temp pages array and bitmap
   24  * @chunk: chunk of interest
   25  * @bitmapp: output parameter for bitmap
   26  * @may_alloc: may allocate the array
   27  *
   28  * Returns pointer to array of pointers to struct page and bitmap,
   29  * both of which can be indexed with pcpu_page_idx().  The returned
   30  * array is cleared to zero and *@bitmapp is copied from
   31  * @chunk->populated.  Note that there is only one array and bitmap
   32  * and access exclusion is the caller's responsibility.
   33  *
   34  * CONTEXT:
   35  * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc.
   36  * Otherwise, don't care.
   37  *
   38  * RETURNS:
   39  * Pointer to temp pages array on success, NULL on failure.
   40  */
   41 static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
   42                                                unsigned long **bitmapp,
   43                                                bool may_alloc)
   44 {
   45         static struct page **pages;
   46         static unsigned long *bitmap;
   47         size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
   48         size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) *
   49                              sizeof(unsigned long);
   50 
   51         if (!pages || !bitmap) {
   52                 if (may_alloc && !pages)
   53                         pages = pcpu_mem_zalloc(pages_size);
   54                 if (may_alloc && !bitmap)
   55                         bitmap = pcpu_mem_zalloc(bitmap_size);
   56                 if (!pages || !bitmap)
   57                         return NULL;
   58         }
   59 
   60         bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
   61 
   62         *bitmapp = bitmap;
   63         return pages;
   64 }
   65 
   66 /**
   67  * pcpu_free_pages - free pages which were allocated for @chunk
   68  * @chunk: chunk pages were allocated for
   69  * @pages: array of pages to be freed, indexed by pcpu_page_idx()
   70  * @populated: populated bitmap
   71  * @page_start: page index of the first page to be freed
   72  * @page_end: page index of the last page to be freed + 1
   73  *
   74  * Free pages [@page_start and @page_end) in @pages for all units.
   75  * The pages were allocated for @chunk.
   76  */
   77 static void pcpu_free_pages(struct pcpu_chunk *chunk,
   78                             struct page **pages, unsigned long *populated,
   79                             int page_start, int page_end)
   80 {
   81         unsigned int cpu;
   82         int i;
   83 
   84         for_each_possible_cpu(cpu) {
   85                 for (i = page_start; i < page_end; i++) {
   86                         struct page *page = pages[pcpu_page_idx(cpu, i)];
   87 
   88                         if (page)
   89                                 __free_page(page);
   90                 }
   91         }
   92 }
   93 
   94 /**
   95  * pcpu_alloc_pages - allocates pages for @chunk
   96  * @chunk: target chunk
   97  * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
   98  * @populated: populated bitmap
   99  * @page_start: page index of the first page to be allocated
  100  * @page_end: page index of the last page to be allocated + 1
  101  *
  102  * Allocate pages [@page_start,@page_end) into @pages for all units.
  103  * The allocation is for @chunk.  Percpu core doesn't care about the
  104  * content of @pages and will pass it verbatim to pcpu_map_pages().
  105  */
  106 static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
  107                             struct page **pages, unsigned long *populated,
  108                             int page_start, int page_end)
  109 {
  110         const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
  111         unsigned int cpu;
  112         int i;
  113 
  114         for_each_possible_cpu(cpu) {
  115                 for (i = page_start; i < page_end; i++) {
  116                         struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
  117 
  118                         *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
  119                         if (!*pagep) {
  120                                 pcpu_free_pages(chunk, pages, populated,
  121                                                 page_start, page_end);
  122                                 return -ENOMEM;
  123                         }
  124                 }
  125         }
  126         return 0;
  127 }
  128 
  129 /**
  130  * pcpu_pre_unmap_flush - flush cache prior to unmapping
  131  * @chunk: chunk the regions to be flushed belongs to
  132  * @page_start: page index of the first page to be flushed
  133  * @page_end: page index of the last page to be flushed + 1
  134  *
  135  * Pages in [@page_start,@page_end) of @chunk are about to be
  136  * unmapped.  Flush cache.  As each flushing trial can be very
  137  * expensive, issue flush on the whole region at once rather than
  138  * doing it for each cpu.  This could be an overkill but is more
  139  * scalable.
  140  */
  141 static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
  142                                  int page_start, int page_end)
  143 {
  144         flush_cache_vunmap(
  145                 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
  146                 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
  147 }
  148 
  149 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
  150 {
  151         unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT);
  152 }
  153 
  154 /**
  155  * pcpu_unmap_pages - unmap pages out of a pcpu_chunk
  156  * @chunk: chunk of interest
  157  * @pages: pages array which can be used to pass information to free
  158  * @populated: populated bitmap
  159  * @page_start: page index of the first page to unmap
  160  * @page_end: page index of the last page to unmap + 1
  161  *
  162  * For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
  163  * Corresponding elements in @pages were cleared by the caller and can
  164  * be used to carry information to pcpu_free_pages() which will be
  165  * called after all unmaps are finished.  The caller should call
  166  * proper pre/post flush functions.
  167  */
  168 static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
  169                              struct page **pages, unsigned long *populated,
  170                              int page_start, int page_end)
  171 {
  172         unsigned int cpu;
  173         int i;
  174 
  175         for_each_possible_cpu(cpu) {
  176                 for (i = page_start; i < page_end; i++) {
  177                         struct page *page;
  178 
  179                         page = pcpu_chunk_page(chunk, cpu, i);
  180                         WARN_ON(!page);
  181                         pages[pcpu_page_idx(cpu, i)] = page;
  182                 }
  183                 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
  184                                    page_end - page_start);
  185         }
  186 
  187         bitmap_clear(populated, page_start, page_end - page_start);
  188 }
  189 
  190 /**
  191  * pcpu_post_unmap_tlb_flush - flush TLB after unmapping
  192  * @chunk: pcpu_chunk the regions to be flushed belong to
  193  * @page_start: page index of the first page to be flushed
  194  * @page_end: page index of the last page to be flushed + 1
  195  *
  196  * Pages [@page_start,@page_end) of @chunk have been unmapped.  Flush
  197  * TLB for the regions.  This can be skipped if the area is to be
  198  * returned to vmalloc as vmalloc will handle TLB flushing lazily.
  199  *
  200  * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
  201  * for the whole region.
  202  */
  203 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
  204                                       int page_start, int page_end)
  205 {
  206         flush_tlb_kernel_range(
  207                 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
  208                 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
  209 }
  210 
  211 static int __pcpu_map_pages(unsigned long addr, struct page **pages,
  212                             int nr_pages)
  213 {
  214         return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT,
  215                                         PAGE_KERNEL, pages);
  216 }
  217 
  218 /**
  219  * pcpu_map_pages - map pages into a pcpu_chunk
  220  * @chunk: chunk of interest
  221  * @pages: pages array containing pages to be mapped
  222  * @populated: populated bitmap
  223  * @page_start: page index of the first page to map
  224  * @page_end: page index of the last page to map + 1
  225  *
  226  * For each cpu, map pages [@page_start,@page_end) into @chunk.  The
  227  * caller is responsible for calling pcpu_post_map_flush() after all
  228  * mappings are complete.
  229  *
  230  * This function is responsible for setting corresponding bits in
  231  * @chunk->populated bitmap and whatever is necessary for reverse
  232  * lookup (addr -> chunk).
  233  */
  234 static int pcpu_map_pages(struct pcpu_chunk *chunk,
  235                           struct page **pages, unsigned long *populated,
  236                           int page_start, int page_end)
  237 {
  238         unsigned int cpu, tcpu;
  239         int i, err;
  240 
  241         for_each_possible_cpu(cpu) {
  242                 err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
  243                                        &pages[pcpu_page_idx(cpu, page_start)],
  244                                        page_end - page_start);
  245                 if (err < 0)
  246                         goto err;
  247         }
  248 
  249         /* mapping successful, link chunk and mark populated */
  250         for (i = page_start; i < page_end; i++) {
  251                 for_each_possible_cpu(cpu)
  252                         pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
  253                                             chunk);
  254                 __set_bit(i, populated);
  255         }
  256 
  257         return 0;
  258 
  259 err:
  260         for_each_possible_cpu(tcpu) {
  261                 if (tcpu == cpu)
  262                         break;
  263                 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
  264                                    page_end - page_start);
  265         }
  266         return err;
  267 }
  268 
  269 /**
  270  * pcpu_post_map_flush - flush cache after mapping
  271  * @chunk: pcpu_chunk the regions to be flushed belong to
  272  * @page_start: page index of the first page to be flushed
  273  * @page_end: page index of the last page to be flushed + 1
  274  *
  275  * Pages [@page_start,@page_end) of @chunk have been mapped.  Flush
  276  * cache.
  277  *
  278  * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
  279  * for the whole region.
  280  */
  281 static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
  282                                 int page_start, int page_end)
  283 {
  284         flush_cache_vmap(
  285                 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
  286                 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
  287 }
  288 
  289 /**
  290  * pcpu_populate_chunk - populate and map an area of a pcpu_chunk
  291  * @chunk: chunk of interest
  292  * @off: offset to the area to populate
  293  * @size: size of the area to populate in bytes
  294  *
  295  * For each cpu, populate and map pages [@page_start,@page_end) into
  296  * @chunk.  The area is cleared on return.
  297  *
  298  * CONTEXT:
  299  * pcpu_alloc_mutex, does GFP_KERNEL allocation.
  300  */
  301 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
  302 {
  303         int page_start = PFN_DOWN(off);
  304         int page_end = PFN_UP(off + size);
  305         int free_end = page_start, unmap_end = page_start;
  306         struct page **pages;
  307         unsigned long *populated;
  308         unsigned int cpu;
  309         int rs, re, rc;
  310 
  311         /* quick path, check whether all pages are already there */
  312         rs = page_start;
  313         pcpu_next_pop(chunk, &rs, &re, page_end);
  314         if (rs == page_start && re == page_end)
  315                 goto clear;
  316 
  317         /* need to allocate and map pages, this chunk can't be immutable */
  318         WARN_ON(chunk->immutable);
  319 
  320         pages = pcpu_get_pages_and_bitmap(chunk, &populated, true);
  321         if (!pages)
  322                 return -ENOMEM;
  323 
  324         /* alloc and map */
  325         pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
  326                 rc = pcpu_alloc_pages(chunk, pages, populated, rs, re);
  327                 if (rc)
  328                         goto err_free;
  329                 free_end = re;
  330         }
  331 
  332         pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
  333                 rc = pcpu_map_pages(chunk, pages, populated, rs, re);
  334                 if (rc)
  335                         goto err_unmap;
  336                 unmap_end = re;
  337         }
  338         pcpu_post_map_flush(chunk, page_start, page_end);
  339 
  340         /* commit new bitmap */
  341         bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
  342 clear:
  343         for_each_possible_cpu(cpu)
  344                 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
  345         return 0;
  346 
  347 err_unmap:
  348         pcpu_pre_unmap_flush(chunk, page_start, unmap_end);
  349         pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end)
  350                 pcpu_unmap_pages(chunk, pages, populated, rs, re);
  351         pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end);
  352 err_free:
  353         pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end)
  354                 pcpu_free_pages(chunk, pages, populated, rs, re);
  355         return rc;
  356 }
  357 
  358 /**
  359  * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk
  360  * @chunk: chunk to depopulate
  361  * @off: offset to the area to depopulate
  362  * @size: size of the area to depopulate in bytes
  363  *
  364  * For each cpu, depopulate and unmap pages [@page_start,@page_end)
  365  * from @chunk.  If @flush is true, vcache is flushed before unmapping
  366  * and tlb after.
  367  *
  368  * CONTEXT:
  369  * pcpu_alloc_mutex.
  370  */
  371 static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size)
  372 {
  373         int page_start = PFN_DOWN(off);
  374         int page_end = PFN_UP(off + size);
  375         struct page **pages;
  376         unsigned long *populated;
  377         int rs, re;
  378 
  379         /* quick path, check whether it's empty already */
  380         rs = page_start;
  381         pcpu_next_unpop(chunk, &rs, &re, page_end);
  382         if (rs == page_start && re == page_end)
  383                 return;
  384 
  385         /* immutable chunks can't be depopulated */
  386         WARN_ON(chunk->immutable);
  387 
  388         /*
  389          * If control reaches here, there must have been at least one
  390          * successful population attempt so the temp pages array must
  391          * be available now.
  392          */
  393         pages = pcpu_get_pages_and_bitmap(chunk, &populated, false);
  394         BUG_ON(!pages);
  395 
  396         /* unmap and free */
  397         pcpu_pre_unmap_flush(chunk, page_start, page_end);
  398 
  399         pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end)
  400                 pcpu_unmap_pages(chunk, pages, populated, rs, re);
  401 
  402         /* no need to flush tlb, vmalloc will handle it lazily */
  403 
  404         pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end)
  405                 pcpu_free_pages(chunk, pages, populated, rs, re);
  406 
  407         /* commit new bitmap */
  408         bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
  409 }
  410 
  411 static struct pcpu_chunk *pcpu_create_chunk(void)
  412 {
  413         struct pcpu_chunk *chunk;
  414         struct vm_struct **vms;
  415 
  416         chunk = pcpu_alloc_chunk();
  417         if (!chunk)
  418                 return NULL;
  419 
  420         vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes,
  421                                 pcpu_nr_groups, pcpu_atom_size);
  422         if (!vms) {
  423                 pcpu_free_chunk(chunk);
  424                 return NULL;
  425         }
  426 
  427         chunk->data = vms;
  428         chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0];
  429         return chunk;
  430 }
  431 
  432 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
  433 {
  434         if (chunk && chunk->data)
  435                 pcpu_free_vm_areas(chunk->data, pcpu_nr_groups);
  436         pcpu_free_chunk(chunk);
  437 }
  438 
  439 static struct page *pcpu_addr_to_page(void *addr)
  440 {
  441         return vmalloc_to_page(addr);
  442 }
  443 
  444 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai)
  445 {
  446         /* no extra restriction */
  447         return 0;
  448 }

Cache object: 63fc42e31c7ad8fa044be4d515f21968


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.