The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_physmem.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2014 Ian Lepore <ian@freebsd.org>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include "opt_acpi.h"
   33 #include "opt_ddb.h"
   34 
   35 /*
   36  * Routines for describing and initializing anything related to physical memory.
   37  */
   38 
   39 #include <sys/param.h>
   40 #include <sys/systm.h>
   41 #include <sys/physmem.h>
   42 #include <vm/vm.h>
   43 #include <machine/md_var.h>
   44 
   45 /*
   46  * These structures are used internally to keep track of regions of physical
   47  * ram, and regions within the physical ram that need to be excluded.  An
   48  * exclusion region can be excluded from crash dumps, from the vm pool of pages
   49  * that can be allocated, or both, depending on the exclusion flags associated
   50  * with the region.
   51  */
   52 #ifdef DEV_ACPI
   53 #define MAX_HWCNT       32      /* ACPI needs more regions */
   54 #define MAX_EXCNT       32
   55 #else
   56 #define MAX_HWCNT       16
   57 #define MAX_EXCNT       16
   58 #endif
   59 
   60 #if defined(__arm__)
   61 #define MAX_PHYS_ADDR   0xFFFFFFFFull
   62 #elif defined(__aarch64__) || defined(__riscv)
   63 #define MAX_PHYS_ADDR   0xFFFFFFFFFFFFFFFFull
   64 #endif
   65 
   66 struct region {
   67         vm_paddr_t      addr;
   68         vm_size_t       size;
   69         uint32_t        flags;
   70 };
   71 
   72 static struct region hwregions[MAX_HWCNT];
   73 static struct region exregions[MAX_EXCNT];
   74 
   75 static size_t hwcnt;
   76 static size_t excnt;
   77 
   78 /*
   79  * These "avail lists" are globals used to communicate physical memory layout to
   80  * other parts of the kernel.  Within the arrays, each value is the starting
   81  * address of a contiguous area of physical address space.  The values at even
   82  * indexes are areas that contain usable memory and the values at odd indexes
   83  * are areas that aren't usable.  Each list is terminated by a pair of zero
   84  * entries.
   85  *
   86  * dump_avail tells the dump code what regions to include in a crash dump, and
   87  * phys_avail is the way we hand all the remaining physical ram we haven't used
   88  * in early kernel init over to the vm system for allocation management.
   89  *
   90  * We size these arrays to hold twice as many available regions as we allow for
   91  * hardware memory regions, to allow for the fact that exclusions can split a
   92  * hardware region into two or more available regions.  In the real world there
   93  * will typically be one or two hardware regions and two or three exclusions.
   94  *
   95  * Each available region in this list occupies two array slots (the start of the
   96  * available region and the start of the unavailable region that follows it).
   97  */
   98 #define MAX_AVAIL_REGIONS       (MAX_HWCNT * 2)
   99 #define MAX_AVAIL_ENTRIES       (MAX_AVAIL_REGIONS * 2)
  100 
  101 vm_paddr_t phys_avail[MAX_AVAIL_ENTRIES + 2]; /* +2 to allow for a pair  */
  102 vm_paddr_t dump_avail[MAX_AVAIL_ENTRIES + 2]; /* of zeroes to terminate. */
  103 
  104 /*
  105  * realmem is the total number of hardware pages, excluded or not.
  106  * Maxmem is one greater than the last physical page number.
  107  */
  108 long realmem;
  109 long Maxmem;
  110 
  111 /*
  112  * Print the contents of the physical and excluded region tables using the
  113  * provided printf-like output function (which will be either printf or
  114  * db_printf).
  115  */
  116 static void
  117 physmem_dump_tables(int (*prfunc)(const char *, ...))
  118 {
  119         int flags, i;
  120         uintmax_t addr, size;
  121         const unsigned int mbyte = 1024 * 1024;
  122 
  123         prfunc("Physical memory chunk(s):\n");
  124         for (i = 0; i < hwcnt; ++i) {
  125                 addr = hwregions[i].addr;
  126                 size = hwregions[i].size;
  127                 prfunc("  0x%08jx - 0x%08jx, %5ju MB (%7ju pages)\n", addr,
  128                     addr + size - 1, size / mbyte, size / PAGE_SIZE);
  129         }
  130 
  131         prfunc("Excluded memory regions:\n");
  132         for (i = 0; i < excnt; ++i) {
  133                 addr  = exregions[i].addr;
  134                 size  = exregions[i].size;
  135                 flags = exregions[i].flags;
  136                 prfunc("  0x%08jx - 0x%08jx, %5ju MB (%7ju pages) %s %s\n",
  137                     addr, addr + size - 1, size / mbyte, size / PAGE_SIZE,
  138                     (flags & EXFLAG_NOALLOC) ? "NoAlloc" : "",
  139                     (flags & EXFLAG_NODUMP)  ? "NoDump" : "");
  140         }
  141 
  142 #ifdef DEBUG
  143         prfunc("Avail lists:\n");
  144         for (i = 0; phys_avail[i] != 0; ++i) {
  145                 prfunc("  phys_avail[%d] 0x%08x\n", i, phys_avail[i]);
  146         }
  147         for (i = 0; dump_avail[i] != 0; ++i) {
  148                 prfunc("  dump_avail[%d] 0x%08x\n", i, dump_avail[i]);
  149         }
  150 #endif
  151 }
  152 
  153 /*
  154  * Print the contents of the static mapping table.  Used for bootverbose.
  155  */
  156 void
  157 physmem_print_tables(void)
  158 {
  159 
  160         physmem_dump_tables(printf);
  161 }
  162 
  163 /*
  164  * Walk the list of hardware regions, processing it against the list of
  165  * exclusions that contain the given exflags, and generating an "avail list".
  166  *
  167  * Updates the value at *pavail with the sum of all pages in all hw regions.
  168  *
  169  * Returns the number of pages of non-excluded memory added to the avail list.
  170  */
  171 static size_t
  172 regions_to_avail(vm_paddr_t *avail, uint32_t exflags, size_t maxavail,
  173     long *pavail, long *prealmem)
  174 {
  175         size_t acnt, exi, hwi;
  176         uint64_t end, start, xend, xstart;
  177         long availmem, totalmem;
  178         const struct region *exp, *hwp;
  179 
  180         totalmem = 0;
  181         availmem = 0;
  182         acnt = 0;
  183         for (hwi = 0, hwp = hwregions; hwi < hwcnt; ++hwi, ++hwp) {
  184                 start = hwp->addr;
  185                 end   = hwp->size + start;
  186                 totalmem += atop((vm_offset_t)(end - start));
  187                 for (exi = 0, exp = exregions; exi < excnt; ++exi, ++exp) {
  188                         /*
  189                          * If the excluded region does not match given flags,
  190                          * continue checking with the next excluded region.
  191                          */
  192                         if ((exp->flags & exflags) == 0)
  193                                 continue;
  194                         xstart = exp->addr;
  195                         xend   = exp->size + xstart;
  196                         /*
  197                          * If the excluded region ends before this hw region,
  198                          * continue checking with the next excluded region.
  199                          */
  200                         if (xend <= start)
  201                                 continue;
  202                         /*
  203                          * If the excluded region begins after this hw region
  204                          * we're done because both lists are sorted.
  205                          */
  206                         if (xstart >= end)
  207                                 break;
  208                         /*
  209                          * If the excluded region completely covers this hw
  210                          * region, shrink this hw region to zero size.
  211                          */
  212                         if ((start >= xstart) && (end <= xend)) {
  213                                 start = xend;
  214                                 end = xend;
  215                                 break;
  216                         }
  217                         /*
  218                          * If the excluded region falls wholly within this hw
  219                          * region without abutting or overlapping the beginning
  220                          * or end, create an available entry from the leading
  221                          * fragment, then adjust the start of this hw region to
  222                          * the end of the excluded region, and continue checking
  223                          * the next excluded region because another exclusion
  224                          * could affect the remainder of this hw region.
  225                          */
  226                         if ((xstart > start) && (xend < end)) {
  227                                 if (acnt > 0 &&
  228                                     avail[acnt - 1] == (vm_paddr_t)start) {
  229                                         avail[acnt - 1] = (vm_paddr_t)xstart;
  230                                 } else {
  231                                         avail[acnt++] = (vm_paddr_t)start;
  232                                         avail[acnt++] = (vm_paddr_t)xstart;
  233                                 }
  234                                 availmem += atop((vm_offset_t)(xstart - start));
  235                                 start = xend;
  236                                 continue;
  237                         }
  238                         /*
  239                          * We know the excluded region overlaps either the start
  240                          * or end of this hardware region (but not both), trim
  241                          * the excluded portion off the appropriate end.
  242                          */
  243                         if (xstart <= start)
  244                                 start = xend;
  245                         else
  246                                 end = xstart;
  247                 }
  248                 /*
  249                  * If the trimming actions above left a non-zero size, create an
  250                  * available entry for it.
  251                  */
  252                 if (end > start) {
  253                         if (acnt > 0 && avail[acnt - 1] == (vm_paddr_t)start) {
  254                                 avail[acnt - 1] = (vm_paddr_t)end;
  255                         } else {
  256                                 avail[acnt++] = (vm_paddr_t)start;
  257                                 avail[acnt++] = (vm_paddr_t)end;
  258                         }
  259                         availmem += atop((vm_offset_t)(end - start));
  260                 }
  261                 if (acnt >= maxavail)
  262                         panic("Not enough space in the dump/phys_avail arrays");
  263         }
  264 
  265         if (pavail != NULL)
  266                 *pavail = availmem;
  267         if (prealmem != NULL)
  268                 *prealmem = totalmem;
  269         return (acnt);
  270 }
  271 
  272 /*
  273  * Insertion-sort a new entry into a regions list; sorted by start address.
  274  */
  275 static size_t
  276 insert_region(struct region *regions, size_t rcnt, vm_paddr_t addr,
  277     vm_size_t size, uint32_t flags)
  278 {
  279         size_t i;
  280         struct region *ep, *rp;
  281 
  282         ep = regions + rcnt;
  283         for (i = 0, rp = regions; i < rcnt; ++i, ++rp) {
  284                 if (rp->addr == addr && rp->size == size) /* Pure dup. */
  285                         return (rcnt);
  286                 if (flags == rp->flags) {
  287                         if (addr + size == rp->addr) {
  288                                 rp->addr = addr;
  289                                 rp->size += size;
  290                                 return (rcnt);
  291                         } else if (rp->addr + rp->size == addr) {
  292                                 rp->size += size;
  293                                 return (rcnt);
  294                         }
  295                 }
  296                 if (addr < rp->addr) {
  297                         bcopy(rp, rp + 1, (ep - rp) * sizeof(*rp));
  298                         break;
  299                 }
  300         }
  301         rp->addr  = addr;
  302         rp->size  = size;
  303         rp->flags = flags;
  304         rcnt++;
  305 
  306         return (rcnt);
  307 }
  308 
  309 /*
  310  * Add a hardware memory region.
  311  */
  312 void
  313 physmem_hardware_region(uint64_t pa, uint64_t sz)
  314 {
  315         vm_offset_t adj;
  316 
  317         /*
  318          * Filter out the page at PA 0x00000000.  The VM can't handle it, as
  319          * pmap_extract() == 0 means failure.
  320          */
  321         if (pa == 0) {
  322                 if (sz <= PAGE_SIZE)
  323                         return;
  324                 pa  = PAGE_SIZE;
  325                 sz -= PAGE_SIZE;
  326         } else if (pa > MAX_PHYS_ADDR) {
  327                 /* This range is past usable memory, ignore it */
  328                 return;
  329         }
  330 
  331         /*
  332          * Also filter out the page at the end of the physical address space --
  333          * if addr is non-zero and addr+size is zero we wrapped to the next byte
  334          * beyond what vm_paddr_t can express.  That leads to a NULL pointer
  335          * deref early in startup; work around it by leaving the last page out.
  336          *
  337          * XXX This just in:  subtract out a whole megabyte, not just 1 page.
  338          * Reducing the size by anything less than 1MB results in the NULL
  339          * pointer deref in _vm_map_lock_read().  Better to give up a megabyte
  340          * than leave some folks with an unusable system while we investigate.
  341          */
  342         if ((pa + sz) > (MAX_PHYS_ADDR - 1024 * 1024)) {
  343                 sz = MAX_PHYS_ADDR - pa + 1;
  344                 if (sz <= 1024 * 1024)
  345                         return;
  346                 sz -= 1024 * 1024;
  347         }
  348 
  349         /*
  350          * Round the starting address up to a page boundary, and truncate the
  351          * ending page down to a page boundary.
  352          */
  353         adj = round_page(pa) - pa;
  354         pa  = round_page(pa);
  355         sz  = trunc_page(sz - adj);
  356 
  357         if (sz > 0 && hwcnt < nitems(hwregions))
  358                 hwcnt = insert_region(hwregions, hwcnt, pa, sz, 0);
  359 }
  360 
  361 /*
  362  * Add an exclusion region.
  363  */
  364 void
  365 physmem_exclude_region(vm_paddr_t pa, vm_size_t sz, uint32_t exflags)
  366 {
  367         vm_offset_t adj;
  368 
  369         /*
  370          * Truncate the starting address down to a page boundary, and round the
  371          * ending page up to a page boundary.
  372          */
  373         adj = pa - trunc_page(pa);
  374         pa  = trunc_page(pa);
  375         sz  = round_page(sz + adj);
  376 
  377         if (excnt >= nitems(exregions))
  378                 panic("failed to exclude region %#jx-%#jx", (uintmax_t)pa,
  379                     (uintmax_t)(pa + sz));
  380         excnt = insert_region(exregions, excnt, pa, sz, exflags);
  381 }
  382 
  383 size_t
  384 physmem_avail(vm_paddr_t *avail, size_t maxavail)
  385 {
  386 
  387         return (regions_to_avail(avail, EXFLAG_NOALLOC, maxavail, NULL, NULL));
  388 }
  389 
  390 /*
  391  * Process all the regions added earlier into the global avail lists.
  392  *
  393  * Updates the kernel global 'physmem' with the number of physical pages
  394  * available for use (all pages not in any exclusion region).
  395  *
  396  * Updates the kernel global 'Maxmem' with the page number one greater then the
  397  * last page of physical memory in the system.
  398  */
  399 void
  400 physmem_init_kernel_globals(void)
  401 {
  402         size_t nextidx;
  403 
  404         regions_to_avail(dump_avail, EXFLAG_NODUMP, MAX_AVAIL_ENTRIES, NULL,
  405             NULL);
  406         nextidx = regions_to_avail(phys_avail, EXFLAG_NOALLOC,
  407             MAX_AVAIL_ENTRIES, &physmem, &realmem);
  408         if (nextidx == 0)
  409                 panic("No memory entries in phys_avail");
  410         Maxmem = atop(phys_avail[nextidx - 1]);
  411 }
  412 
  413 #ifdef DDB
  414 #include <ddb/ddb.h>
  415 
  416 DB_SHOW_COMMAND(physmem, db_show_physmem)
  417 {
  418 
  419         physmem_dump_tables(db_printf);
  420 }
  421 
  422 #endif /* DDB */

Cache object: 04631a6a25bef9732b6c256503c8a92f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.