The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/x86/acpica/srat.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010 Hudson River Trading LLC
    3  * Written by: John H. Baldwin <jhb@FreeBSD.org>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/11.2/sys/x86/acpica/srat.c 322996 2017-08-29 07:01:15Z mav $");
   30 
   31 #include "opt_vm.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/bus.h>
   35 #include <sys/kernel.h>
   36 #include <sys/lock.h>
   37 #include <sys/mutex.h>
   38 #include <sys/smp.h>
   39 #include <sys/vmmeter.h>
   40 #include <vm/vm.h>
   41 #include <vm/pmap.h>
   42 #include <vm/vm_param.h>
   43 #include <vm/vm_page.h>
   44 #include <vm/vm_phys.h>
   45 
   46 #include <contrib/dev/acpica/include/acpi.h>
   47 #include <contrib/dev/acpica/include/aclocal.h>
   48 #include <contrib/dev/acpica/include/actables.h>
   49 
   50 #include <machine/intr_machdep.h>
   51 #include <x86/apicvar.h>
   52 
   53 #include <dev/acpica/acpivar.h>
   54 
   55 #if MAXMEMDOM > 1
   56 struct cpu_info {
   57         int enabled:1;
   58         int has_memory:1;
   59         int domain;
   60 } cpus[MAX_APIC_ID + 1];
   61 
   62 struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1];
   63 int num_mem;
   64 
   65 static ACPI_TABLE_SRAT *srat;
   66 static vm_paddr_t srat_physaddr;
   67 
   68 static int domain_pxm[MAXMEMDOM];
   69 static int ndomain;
   70 
   71 static ACPI_TABLE_SLIT *slit;
   72 static vm_paddr_t slit_physaddr;
   73 static int vm_locality_table[MAXMEMDOM * MAXMEMDOM];
   74 
   75 static void     srat_walk_table(acpi_subtable_handler *handler, void *arg);
   76 
   77 /*
   78  * SLIT parsing.
   79  */
   80 
   81 static void
   82 slit_parse_table(ACPI_TABLE_SLIT *s)
   83 {
   84         int i, j;
   85         int i_domain, j_domain;
   86         int offset = 0;
   87         uint8_t e;
   88 
   89         /*
   90          * This maps the SLIT data into the VM-domain centric view.
   91          * There may be sparse entries in the PXM namespace, so
   92          * remap them to a VM-domain ID and if it doesn't exist,
   93          * skip it.
   94          *
   95          * It should result in a packed 2d array of VM-domain
   96          * locality information entries.
   97          */
   98 
   99         if (bootverbose)
  100                 printf("SLIT.Localities: %d\n", (int) s->LocalityCount);
  101         for (i = 0; i < s->LocalityCount; i++) {
  102                 i_domain = acpi_map_pxm_to_vm_domainid(i);
  103                 if (i_domain < 0)
  104                         continue;
  105 
  106                 if (bootverbose)
  107                         printf("%d: ", i);
  108                 for (j = 0; j < s->LocalityCount; j++) {
  109                         j_domain = acpi_map_pxm_to_vm_domainid(j);
  110                         if (j_domain < 0)
  111                                 continue;
  112                         e = s->Entry[i * s->LocalityCount + j];
  113                         if (bootverbose)
  114                                 printf("%d ", (int) e);
  115                         /* 255 == "no locality information" */
  116                         if (e == 255)
  117                                 vm_locality_table[offset] = -1;
  118                         else
  119                                 vm_locality_table[offset] = e;
  120                         offset++;
  121                 }
  122                 if (bootverbose)
  123                         printf("\n");
  124         }
  125 }
  126 
  127 /*
  128  * Look for an ACPI System Locality Distance Information Table ("SLIT")
  129  */
  130 static int
  131 parse_slit(void)
  132 {
  133 
  134         if (resource_disabled("slit", 0)) {
  135                 return (-1);
  136         }
  137 
  138         slit_physaddr = acpi_find_table(ACPI_SIG_SLIT);
  139         if (slit_physaddr == 0) {
  140                 return (-1);
  141         }
  142 
  143         /*
  144          * Make a pass over the table to populate the cpus[] and
  145          * mem_info[] tables.
  146          */
  147         slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT);
  148         slit_parse_table(slit);
  149         acpi_unmap_table(slit);
  150         slit = NULL;
  151 
  152 #ifdef VM_NUMA_ALLOC
  153         /* Tell the VM about it! */
  154         mem_locality = vm_locality_table;
  155 #endif
  156         return (0);
  157 }
  158 
  159 /*
  160  * SRAT parsing.
  161  */
  162 
  163 /*
  164  * Returns true if a memory range overlaps with at least one range in
  165  * phys_avail[].
  166  */
  167 static int
  168 overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end)
  169 {
  170         int i;
  171 
  172         for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) {
  173                 if (phys_avail[i + 1] <= start)
  174                         continue;
  175                 if (phys_avail[i] < end)
  176                         return (1);
  177                 break;
  178         }
  179         return (0);
  180         
  181 }
  182 
  183 static void
  184 srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg)
  185 {
  186         ACPI_SRAT_CPU_AFFINITY *cpu;
  187         ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic;
  188         ACPI_SRAT_MEM_AFFINITY *mem;
  189         int domain, i, slot;
  190 
  191         switch (entry->Type) {
  192         case ACPI_SRAT_TYPE_CPU_AFFINITY:
  193                 cpu = (ACPI_SRAT_CPU_AFFINITY *)entry;
  194                 domain = cpu->ProximityDomainLo |
  195                     cpu->ProximityDomainHi[0] << 8 |
  196                     cpu->ProximityDomainHi[1] << 16 |
  197                     cpu->ProximityDomainHi[2] << 24;
  198                 if (bootverbose)
  199                         printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
  200                             cpu->ApicId, domain,
  201                             (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ?
  202                             "enabled" : "disabled");
  203                 if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED))
  204                         break;
  205                 if (cpu->ApicId > MAX_APIC_ID) {
  206                         printf("SRAT: Ignoring local APIC ID %u (too high)\n",
  207                             cpu->ApicId);
  208                         break;
  209                 }
  210 
  211                 if (cpus[cpu->ApicId].enabled) {
  212                         printf("SRAT: Duplicate local APIC ID %u\n",
  213                             cpu->ApicId);
  214                         *(int *)arg = ENXIO;
  215                         break;
  216                 }
  217                 cpus[cpu->ApicId].domain = domain;
  218                 cpus[cpu->ApicId].enabled = 1;
  219                 break;
  220         case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
  221                 x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry;
  222                 if (bootverbose)
  223                         printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
  224                             x2apic->ApicId, x2apic->ProximityDomain,
  225                             (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ?
  226                             "enabled" : "disabled");
  227                 if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED))
  228                         break;
  229                 if (x2apic->ApicId > MAX_APIC_ID) {
  230                         printf("SRAT: Ignoring local APIC ID %u (too high)\n",
  231                             x2apic->ApicId);
  232                         break;
  233                 }
  234 
  235                 KASSERT(!cpus[x2apic->ApicId].enabled,
  236                     ("Duplicate local APIC ID %u", x2apic->ApicId));
  237                 cpus[x2apic->ApicId].domain = x2apic->ProximityDomain;
  238                 cpus[x2apic->ApicId].enabled = 1;
  239                 break;
  240         case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
  241                 mem = (ACPI_SRAT_MEM_AFFINITY *)entry;
  242                 if (bootverbose)
  243                         printf(
  244                     "SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n",
  245                             mem->ProximityDomain, (uintmax_t)mem->BaseAddress,
  246                             (uintmax_t)mem->Length,
  247                             (mem->Flags & ACPI_SRAT_MEM_ENABLED) ?
  248                             "enabled" : "disabled");
  249                 if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED))
  250                         break;
  251                 if (!overlaps_phys_avail(mem->BaseAddress,
  252                     mem->BaseAddress + mem->Length)) {
  253                         printf("SRAT: Ignoring memory at addr 0x%jx\n",
  254                             (uintmax_t)mem->BaseAddress);
  255                         break;
  256                 }
  257                 if (num_mem == VM_PHYSSEG_MAX) {
  258                         printf("SRAT: Too many memory regions\n");
  259                         *(int *)arg = ENXIO;
  260                         break;
  261                 }
  262                 slot = num_mem;
  263                 for (i = 0; i < num_mem; i++) {
  264                         if (mem_info[i].end <= mem->BaseAddress)
  265                                 continue;
  266                         if (mem_info[i].start <
  267                             (mem->BaseAddress + mem->Length)) {
  268                                 printf("SRAT: Overlapping memory entries\n");
  269                                 *(int *)arg = ENXIO;
  270                                 return;
  271                         }
  272                         slot = i;
  273                 }
  274                 for (i = num_mem; i > slot; i--)
  275                         mem_info[i] = mem_info[i - 1];
  276                 mem_info[slot].start = mem->BaseAddress;
  277                 mem_info[slot].end = mem->BaseAddress + mem->Length;
  278                 mem_info[slot].domain = mem->ProximityDomain;
  279                 num_mem++;
  280                 break;
  281         }
  282 }
  283 
  284 /*
  285  * Ensure each memory domain has at least one CPU and that each CPU
  286  * has at least one memory domain.
  287  */
  288 static int
  289 check_domains(void)
  290 {
  291         int found, i, j;
  292 
  293         for (i = 0; i < num_mem; i++) {
  294                 found = 0;
  295                 for (j = 0; j <= MAX_APIC_ID; j++)
  296                         if (cpus[j].enabled &&
  297                             cpus[j].domain == mem_info[i].domain) {
  298                                 cpus[j].has_memory = 1;
  299                                 found++;
  300                         }
  301                 if (!found) {
  302                         printf("SRAT: No CPU found for memory domain %d\n",
  303                             mem_info[i].domain);
  304                         return (ENXIO);
  305                 }
  306         }
  307         for (i = 0; i <= MAX_APIC_ID; i++)
  308                 if (cpus[i].enabled && !cpus[i].has_memory) {
  309                         printf("SRAT: No memory found for CPU %d\n", i);
  310                         return (ENXIO);
  311                 }
  312         return (0);
  313 }
  314 
  315 /*
  316  * Check that the SRAT memory regions cover all of the regions in
  317  * phys_avail[].
  318  */
  319 static int
  320 check_phys_avail(void)
  321 {
  322         vm_paddr_t address;
  323         int i, j;
  324 
  325         /* j is the current offset into phys_avail[]. */
  326         address = phys_avail[0];
  327         j = 0;
  328         for (i = 0; i < num_mem; i++) {
  329                 /*
  330                  * Consume as many phys_avail[] entries as fit in this
  331                  * region.
  332                  */
  333                 while (address >= mem_info[i].start &&
  334                     address <= mem_info[i].end) {
  335                         /*
  336                          * If we cover the rest of this phys_avail[] entry,
  337                          * advance to the next entry.
  338                          */
  339                         if (phys_avail[j + 1] <= mem_info[i].end) {
  340                                 j += 2;
  341                                 if (phys_avail[j] == 0 &&
  342                                     phys_avail[j + 1] == 0) {
  343                                         return (0);
  344                                 }
  345                                 address = phys_avail[j];
  346                         } else
  347                                 address = mem_info[i].end + 1;
  348                 }
  349         }
  350         printf("SRAT: No memory region found for 0x%jx - 0x%jx\n",
  351             (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]);
  352         return (ENXIO);
  353 }
  354 
  355 /*
  356  * Renumber the memory domains to be compact and zero-based if not
  357  * already.  Returns an error if there are too many domains.
  358  */
  359 static int
  360 renumber_domains(void)
  361 {
  362         int i, j, slot;
  363 
  364         /* Enumerate all the domains. */
  365         ndomain = 0;
  366         for (i = 0; i < num_mem; i++) {
  367                 /* See if this domain is already known. */
  368                 for (j = 0; j < ndomain; j++) {
  369                         if (domain_pxm[j] >= mem_info[i].domain)
  370                                 break;
  371                 }
  372                 if (j < ndomain && domain_pxm[j] == mem_info[i].domain)
  373                         continue;
  374 
  375                 if (ndomain >= MAXMEMDOM) {
  376                         ndomain = 1;
  377                         printf("SRAT: Too many memory domains\n");
  378                         return (EFBIG);
  379                 }
  380 
  381                 /* Insert the new domain at slot 'j'. */
  382                 slot = j;
  383                 for (j = ndomain; j > slot; j--)
  384                         domain_pxm[j] = domain_pxm[j - 1];
  385                 domain_pxm[slot] = mem_info[i].domain;
  386                 ndomain++;
  387         }
  388 
  389         /* Renumber each domain to its index in the sorted 'domain_pxm' list. */
  390         for (i = 0; i < ndomain; i++) {
  391                 /*
  392                  * If the domain is already the right value, no need
  393                  * to renumber.
  394                  */
  395                 if (domain_pxm[i] == i)
  396                         continue;
  397 
  398                 /* Walk the cpu[] and mem_info[] arrays to renumber. */
  399                 for (j = 0; j < num_mem; j++)
  400                         if (mem_info[j].domain == domain_pxm[i])
  401                                 mem_info[j].domain = i;
  402                 for (j = 0; j <= MAX_APIC_ID; j++)
  403                         if (cpus[j].enabled && cpus[j].domain == domain_pxm[i])
  404                                 cpus[j].domain = i;
  405         }
  406 
  407         return (0);
  408 }
  409 
  410 /*
  411  * Look for an ACPI System Resource Affinity Table ("SRAT")
  412  */
  413 static int
  414 parse_srat(void)
  415 {
  416         int error;
  417 
  418         if (resource_disabled("srat", 0))
  419                 return (-1);
  420 
  421         srat_physaddr = acpi_find_table(ACPI_SIG_SRAT);
  422         if (srat_physaddr == 0)
  423                 return (-1);
  424 
  425         /*
  426          * Make a pass over the table to populate the cpus[] and
  427          * mem_info[] tables.
  428          */
  429         srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT);
  430         error = 0;
  431         srat_walk_table(srat_parse_entry, &error);
  432         acpi_unmap_table(srat);
  433         srat = NULL;
  434         if (error || check_domains() != 0 || check_phys_avail() != 0 ||
  435             renumber_domains() != 0) {
  436                 srat_physaddr = 0;
  437                 return (-1);
  438         }
  439 
  440 #ifdef VM_NUMA_ALLOC
  441         /* Point vm_phys at our memory affinity table. */
  442         vm_ndomains = ndomain;
  443         mem_affinity = mem_info;
  444 #endif
  445 
  446         return (0);
  447 }
  448 
  449 static void
  450 init_mem_locality(void)
  451 {
  452         int i;
  453 
  454         /*
  455          * For now, assume -1 == "no locality information for
  456          * this pairing.
  457          */
  458         for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++)
  459                 vm_locality_table[i] = -1;
  460 }
  461 
  462 static void
  463 parse_acpi_tables(void *dummy)
  464 {
  465 
  466         if (parse_srat() < 0)
  467                 return;
  468         init_mem_locality();
  469         (void) parse_slit();
  470 }
  471 SYSINIT(parse_acpi_tables, SI_SUB_VM - 1, SI_ORDER_FIRST, parse_acpi_tables,
  472     NULL);
  473 
  474 static void
  475 srat_walk_table(acpi_subtable_handler *handler, void *arg)
  476 {
  477 
  478         acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length,
  479             handler, arg);
  480 }
  481 
  482 /*
  483  * Setup per-CPU domain IDs.
  484  */
  485 static void
  486 srat_set_cpus(void *dummy)
  487 {
  488         struct cpu_info *cpu;
  489         struct pcpu *pc;
  490         u_int i;
  491 
  492         if (srat_physaddr == 0)
  493                 return;
  494         for (i = 0; i < MAXCPU; i++) {
  495                 if (CPU_ABSENT(i))
  496                         continue;
  497                 pc = pcpu_find(i);
  498                 KASSERT(pc != NULL, ("no pcpu data for CPU %u", i));
  499                 cpu = &cpus[pc->pc_apic_id];
  500                 if (!cpu->enabled)
  501                         panic("SRAT: CPU with APIC ID %u is not known",
  502                             pc->pc_apic_id);
  503                 pc->pc_domain = cpu->domain;
  504                 CPU_SET(i, &cpuset_domain[cpu->domain]);
  505                 if (bootverbose)
  506                         printf("SRAT: CPU %u has memory domain %d\n", i,
  507                             cpu->domain);
  508         }
  509 }
  510 SYSINIT(srat_set_cpus, SI_SUB_CPU, SI_ORDER_ANY, srat_set_cpus, NULL);
  511 
  512 /*
  513  * Map a _PXM value to a VM domain ID.
  514  *
  515  * Returns the domain ID, or -1 if no domain ID was found.
  516  */
  517 int
  518 acpi_map_pxm_to_vm_domainid(int pxm)
  519 {
  520         int i;
  521 
  522         for (i = 0; i < ndomain; i++) {
  523                 if (domain_pxm[i] == pxm)
  524                         return (i);
  525         }
  526 
  527         return (-1);
  528 }
  529 
  530 #else /* MAXMEMDOM == 1 */
  531 
  532 int
  533 acpi_map_pxm_to_vm_domainid(int pxm)
  534 {
  535 
  536         return (-1);
  537 }
  538 
  539 #endif /* MAXMEMDOM > 1 */

Cache object: fb15120889ce99bf90cc4a50b51c1dde


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.