vm_phys.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2002-2006 Rice University
    3  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
    4  * All rights reserved.
    5  *
    6  * This software was developed for the FreeBSD Project by Alan L. Cox,
    7  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
   22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
   25  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
   28  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  *      Physical memory system implementation
   34  *
   35  * Any external functions defined by this module are only to be used by the
   36  * virtual memory system.
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include "opt_ddb.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/lock.h>
   47 #include <sys/kernel.h>
   48 #include <sys/malloc.h>
   49 #include <sys/mutex.h>
   50 #include <sys/queue.h>
   51 #include <sys/sbuf.h>
   52 #include <sys/sysctl.h>
   53 #include <sys/vmmeter.h>
   54 
   55 #include <ddb/ddb.h>
   56 
   57 #include <vm/vm.h>
   58 #include <vm/vm_param.h>
   59 #include <vm/vm_kern.h>
   60 #include <vm/vm_object.h>
   61 #include <vm/vm_page.h>
   62 #include <vm/vm_phys.h>
   63 
   64 /*
   65  * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
   66  * domain.  These extra lists are stored at the end of the regular
   67  * free lists starting with VM_NFREELIST.
   68  */
   69 #define VM_RAW_NFREELIST        (VM_NFREELIST + VM_NDOMAIN - 1)
   70 
   71 struct vm_freelist {
   72         struct pglist pl;
   73         int lcnt;
   74 };
   75 
   76 struct vm_phys_seg {
   77         vm_paddr_t      start;
   78         vm_paddr_t      end;
   79         vm_page_t       first_page;
   80         int             domain;
   81         struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
   82 };
   83 
   84 struct mem_affinity *mem_affinity;
   85 
   86 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
   87 
   88 static int vm_phys_nsegs;
   89 
   90 #define VM_PHYS_FICTITIOUS_NSEGS        8
   91 static struct vm_phys_fictitious_seg {
   92         vm_paddr_t      start;
   93         vm_paddr_t      end;
   94         vm_page_t       first_page;
   95 } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS];
   96 static struct mtx vm_phys_fictitious_reg_mtx;
   97 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
   98 
   99 static struct vm_freelist
  100     vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
  101 static struct vm_freelist
  102 (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
  103 
  104 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
  105 
  106 static int cnt_prezero;
  107 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
  108     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
  109 
  110 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
  111 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
  112     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
  113 
  114 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
  115 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
  116     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
  117 
  118 #if VM_NDOMAIN > 1
  119 static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
  120 SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
  121     NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
  122 #endif
  123 
  124 static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
  125     int order);
  126 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
  127     int domain);
  128 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
  129 static int vm_phys_paddr_to_segind(vm_paddr_t pa);
  130 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
  131     int order);
  132 
  133 /*
  134  * Outputs the state of the physical memory allocator, specifically,
  135  * the amount of physical memory in each free list.
  136  */
  137 static int
  138 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
  139 {
  140         struct sbuf sbuf;
  141         struct vm_freelist *fl;
  142         int error, flind, oind, pind;
  143 
  144         error = sysctl_wire_old_buffer(req, 0);
  145         if (error != 0)
  146                 return (error);
  147         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  148         for (flind = 0; flind < vm_nfreelists; flind++) {
  149                 sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
  150                     "\n  ORDER (SIZE)  |  NUMBER"
  151                     "\n              ", flind);
  152                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
  153                         sbuf_printf(&sbuf, "  |  POOL %d", pind);
  154                 sbuf_printf(&sbuf, "\n--            ");
  155                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
  156                         sbuf_printf(&sbuf, "-- --      ");
  157                 sbuf_printf(&sbuf, "--\n");
  158                 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
  159                         sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
  160                             1 << (PAGE_SHIFT - 10 + oind));
  161                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  162                                 fl = vm_phys_free_queues[flind][pind];
  163                                 sbuf_printf(&sbuf, "  |  %6d", fl[oind].lcnt);
  164                         }
  165                         sbuf_printf(&sbuf, "\n");
  166                 }
  167         }
  168         error = sbuf_finish(&sbuf);
  169         sbuf_delete(&sbuf);
  170         return (error);
  171 }
  172 
  173 /*
  174  * Outputs the set of physical memory segments.
  175  */
  176 static int
  177 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
  178 {
  179         struct sbuf sbuf;
  180         struct vm_phys_seg *seg;
  181         int error, segind;
  182 
  183         error = sysctl_wire_old_buffer(req, 0);
  184         if (error != 0)
  185                 return (error);
  186         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  187         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  188                 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
  189                 seg = &vm_phys_segs[segind];
  190                 sbuf_printf(&sbuf, "start:     %#jx\n",
  191                     (uintmax_t)seg->start);
  192                 sbuf_printf(&sbuf, "end:       %#jx\n",
  193                     (uintmax_t)seg->end);
  194                 sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
  195                 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
  196         }
  197         error = sbuf_finish(&sbuf);
  198         sbuf_delete(&sbuf);
  199         return (error);
  200 }
  201 
  202 #if VM_NDOMAIN > 1
  203 /*
  204  * Outputs the set of free list lookup lists.
  205  */
  206 static int
  207 sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
  208 {
  209         struct sbuf sbuf;
  210         int domain, error, flind, ndomains;
  211 
  212         error = sysctl_wire_old_buffer(req, 0);
  213         if (error != 0)
  214                 return (error);
  215         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  216         ndomains = vm_nfreelists - VM_NFREELIST + 1;
  217         for (domain = 0; domain < ndomains; domain++) {
  218                 sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
  219                 for (flind = 0; flind < vm_nfreelists; flind++)
  220                         sbuf_printf(&sbuf, "  [%d]:\t%p\n", flind,
  221                             vm_phys_lookup_lists[domain][flind]);
  222         }
  223         error = sbuf_finish(&sbuf);
  224         sbuf_delete(&sbuf);
  225         return (error);
  226 }
  227 #endif
  228         
  229 /*
  230  * Create a physical memory segment.
  231  */
  232 static void
  233 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
  234 {
  235         struct vm_phys_seg *seg;
  236 #ifdef VM_PHYSSEG_SPARSE
  237         long pages;
  238         int segind;
  239 
  240         pages = 0;
  241         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  242                 seg = &vm_phys_segs[segind];
  243                 pages += atop(seg->end - seg->start);
  244         }
  245 #endif
  246         KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
  247             ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
  248         seg = &vm_phys_segs[vm_phys_nsegs++];
  249         seg->start = start;
  250         seg->end = end;
  251         seg->domain = domain;
  252 #ifdef VM_PHYSSEG_SPARSE
  253         seg->first_page = &vm_page_array[pages];
  254 #else
  255         seg->first_page = PHYS_TO_VM_PAGE(start);
  256 #endif
  257 #if VM_NDOMAIN > 1
  258         if (flind == VM_FREELIST_DEFAULT && domain != 0) {
  259                 flind = VM_NFREELIST + (domain - 1);
  260                 if (flind >= vm_nfreelists)
  261                         vm_nfreelists = flind + 1;
  262         }
  263 #endif
  264         seg->free_queues = &vm_phys_free_queues[flind];
  265 }
  266 
  267 static void
  268 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
  269 {
  270         int i;
  271 
  272         if (mem_affinity == NULL) {
  273                 _vm_phys_create_seg(start, end, flind, 0);
  274                 return;
  275         }
  276 
  277         for (i = 0;; i++) {
  278                 if (mem_affinity[i].end == 0)
  279                         panic("Reached end of affinity info");
  280                 if (mem_affinity[i].end <= start)
  281                         continue;
  282                 if (mem_affinity[i].start > start)
  283                         panic("No affinity info for start %jx",
  284                             (uintmax_t)start);
  285                 if (mem_affinity[i].end >= end) {
  286                         _vm_phys_create_seg(start, end, flind,
  287                             mem_affinity[i].domain);
  288                         break;
  289                 }
  290                 _vm_phys_create_seg(start, mem_affinity[i].end, flind,
  291                     mem_affinity[i].domain);
  292                 start = mem_affinity[i].end;
  293         }
  294 }
  295 
  296 /*
  297  * Initialize the physical memory allocator.
  298  */
  299 void
  300 vm_phys_init(void)
  301 {
  302         struct vm_freelist *fl;
  303         int flind, i, oind, pind;
  304 #if VM_NDOMAIN > 1
  305         int ndomains, j;
  306 #endif
  307 
  308         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
  309 #ifdef  VM_FREELIST_ISADMA
  310                 if (phys_avail[i] < 16777216) {
  311                         if (phys_avail[i + 1] > 16777216) {
  312                                 vm_phys_create_seg(phys_avail[i], 16777216,
  313                                     VM_FREELIST_ISADMA);
  314                                 vm_phys_create_seg(16777216, phys_avail[i + 1],
  315                                     VM_FREELIST_DEFAULT);
  316                         } else {
  317                                 vm_phys_create_seg(phys_avail[i],
  318                                     phys_avail[i + 1], VM_FREELIST_ISADMA);
  319                         }
  320                         if (VM_FREELIST_ISADMA >= vm_nfreelists)
  321                                 vm_nfreelists = VM_FREELIST_ISADMA + 1;
  322                 } else
  323 #endif
  324 #ifdef  VM_FREELIST_HIGHMEM
  325                 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
  326                         if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
  327                                 vm_phys_create_seg(phys_avail[i],
  328                                     VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
  329                                 vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
  330                                     phys_avail[i + 1], VM_FREELIST_HIGHMEM);
  331                         } else {
  332                                 vm_phys_create_seg(phys_avail[i],
  333                                     phys_avail[i + 1], VM_FREELIST_HIGHMEM);
  334                         }
  335                         if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
  336                                 vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
  337                 } else
  338 #endif
  339                 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
  340                     VM_FREELIST_DEFAULT);
  341         }
  342         for (flind = 0; flind < vm_nfreelists; flind++) {
  343                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  344                         fl = vm_phys_free_queues[flind][pind];
  345                         for (oind = 0; oind < VM_NFREEORDER; oind++)
  346                                 TAILQ_INIT(&fl[oind].pl);
  347                 }
  348         }
  349 #if VM_NDOMAIN > 1
  350         /*
  351          * Build a free list lookup list for each domain.  All of the
  352          * memory domain lists are inserted at the VM_FREELIST_DEFAULT
  353          * index in a round-robin order starting with the current
  354          * domain.
  355          */
  356         ndomains = vm_nfreelists - VM_NFREELIST + 1;
  357         for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
  358                 for (i = 0; i < ndomains; i++)
  359                         vm_phys_lookup_lists[i][flind] =
  360                             &vm_phys_free_queues[flind];
  361         for (i = 0; i < ndomains; i++)
  362                 for (j = 0; j < ndomains; j++) {
  363                         flind = (i + j) % ndomains;
  364                         if (flind == 0)
  365                                 flind = VM_FREELIST_DEFAULT;
  366                         else
  367                                 flind += VM_NFREELIST - 1;
  368                         vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
  369                             &vm_phys_free_queues[flind];
  370                 }
  371         for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
  372              flind++)
  373                 for (i = 0; i < ndomains; i++)
  374                         vm_phys_lookup_lists[i][flind + ndomains - 1] =
  375                             &vm_phys_free_queues[flind];
  376 #else
  377         for (flind = 0; flind < vm_nfreelists; flind++)
  378                 vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
  379 #endif
  380 
  381         mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
  382 }
  383 
  384 /*
  385  * Split a contiguous, power of two-sized set of physical pages.
  386  */
  387 static __inline void
  388 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
  389 {
  390         vm_page_t m_buddy;
  391 
  392         while (oind > order) {
  393                 oind--;
  394                 m_buddy = &m[1 << oind];
  395                 KASSERT(m_buddy->order == VM_NFREEORDER,
  396                     ("vm_phys_split_pages: page %p has unexpected order %d",
  397                     m_buddy, m_buddy->order));
  398                 m_buddy->order = oind;
  399                 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq);
  400                 fl[oind].lcnt++;
  401         }
  402 }
  403 
  404 /*
  405  * Initialize a physical page and add it to the free lists.
  406  */
  407 void
  408 vm_phys_add_page(vm_paddr_t pa)
  409 {
  410         vm_page_t m;
  411 
  412         cnt.v_page_count++;
  413         m = vm_phys_paddr_to_vm_page(pa);
  414         m->phys_addr = pa;
  415         m->queue = PQ_NONE;
  416         m->segind = vm_phys_paddr_to_segind(pa);
  417         m->flags = PG_FREE;
  418         KASSERT(m->order == VM_NFREEORDER,
  419             ("vm_phys_add_page: page %p has unexpected order %d",
  420             m, m->order));
  421         m->pool = VM_FREEPOOL_DEFAULT;
  422         pmap_page_init(m);
  423         mtx_lock(&vm_page_queue_free_mtx);
  424         cnt.v_free_count++;
  425         vm_phys_free_pages(m, 0);
  426         mtx_unlock(&vm_page_queue_free_mtx);
  427 }
  428 
  429 /*
  430  * Allocate a contiguous, power of two-sized set of physical pages
  431  * from the free lists.
  432  *
  433  * The free page queues must be locked.
  434  */
  435 vm_page_t
  436 vm_phys_alloc_pages(int pool, int order)
  437 {
  438         vm_page_t m;
  439         int domain, flind;
  440 
  441         KASSERT(pool < VM_NFREEPOOL,
  442             ("vm_phys_alloc_pages: pool %d is out of range", pool));
  443         KASSERT(order < VM_NFREEORDER,
  444             ("vm_phys_alloc_pages: order %d is out of range", order));
  445 
  446 #if VM_NDOMAIN > 1
  447         domain = PCPU_GET(domain);
  448 #else
  449         domain = 0;
  450 #endif
  451         for (flind = 0; flind < vm_nfreelists; flind++) {
  452                 m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
  453                 if (m != NULL)
  454                         return (m);
  455         }
  456         return (NULL);
  457 }
  458 
  459 /*
  460  * Find and dequeue a free page on the given free list, with the 
  461  * specified pool and order
  462  */
  463 vm_page_t
  464 vm_phys_alloc_freelist_pages(int flind, int pool, int order)
  465 {
  466 #if VM_NDOMAIN > 1
  467         vm_page_t m;
  468         int i, ndomains;
  469 #endif
  470         int domain;
  471 
  472         KASSERT(flind < VM_NFREELIST,
  473             ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
  474         KASSERT(pool < VM_NFREEPOOL,
  475             ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
  476         KASSERT(order < VM_NFREEORDER,
  477             ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
  478 
  479 #if VM_NDOMAIN > 1
  480         /*
  481          * This routine expects to be called with a VM_FREELIST_* constant.
  482          * On a system with multiple domains we need to adjust the flind
  483          * appropriately.  If it is for VM_FREELIST_DEFAULT we need to
  484          * iterate over the per-domain lists.
  485          */
  486         domain = PCPU_GET(domain);
  487         ndomains = vm_nfreelists - VM_NFREELIST + 1;
  488         if (flind == VM_FREELIST_DEFAULT) {
  489                 m = NULL;
  490                 for (i = 0; i < ndomains; i++, flind++) {
  491                         m = vm_phys_alloc_domain_pages(domain, flind, pool,
  492                             order);
  493                         if (m != NULL)
  494                                 break;
  495                 }
  496                 return (m);
  497         } else if (flind > VM_FREELIST_DEFAULT)
  498                 flind += ndomains - 1;
  499 #else
  500         domain = 0;
  501 #endif
  502         return (vm_phys_alloc_domain_pages(domain, flind, pool, order));
  503 }
  504 
  505 static vm_page_t
  506 vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
  507 {       
  508         struct vm_freelist *fl;
  509         struct vm_freelist *alt;
  510         int oind, pind;
  511         vm_page_t m;
  512 
  513         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  514         fl = (*vm_phys_lookup_lists[domain][flind])[pool];
  515         for (oind = order; oind < VM_NFREEORDER; oind++) {
  516                 m = TAILQ_FIRST(&fl[oind].pl);
  517                 if (m != NULL) {
  518                         TAILQ_REMOVE(&fl[oind].pl, m, pageq);
  519                         fl[oind].lcnt--;
  520                         m->order = VM_NFREEORDER;
  521                         vm_phys_split_pages(m, oind, fl, order);
  522                         return (m);
  523                 }
  524         }
  525 
  526         /*
  527          * The given pool was empty.  Find the largest
  528          * contiguous, power-of-two-sized set of pages in any
  529          * pool.  Transfer these pages to the given pool, and
  530          * use them to satisfy the allocation.
  531          */
  532         for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
  533                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  534                         alt = (*vm_phys_lookup_lists[domain][flind])[pind];
  535                         m = TAILQ_FIRST(&alt[oind].pl);
  536                         if (m != NULL) {
  537                                 TAILQ_REMOVE(&alt[oind].pl, m, pageq);
  538                                 alt[oind].lcnt--;
  539                                 m->order = VM_NFREEORDER;
  540                                 vm_phys_set_pool(pool, m, oind);
  541                                 vm_phys_split_pages(m, oind, fl, order);
  542                                 return (m);
  543                         }
  544                 }
  545         }
  546         return (NULL);
  547 }
  548 
  549 /*
  550  * Find the vm_page corresponding to the given physical address.
  551  */
  552 vm_page_t
  553 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
  554 {
  555         struct vm_phys_seg *seg;
  556         int segind;
  557 
  558         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  559                 seg = &vm_phys_segs[segind];
  560                 if (pa >= seg->start && pa < seg->end)
  561                         return (&seg->first_page[atop(pa - seg->start)]);
  562         }
  563         return (NULL);
  564 }
  565 
  566 vm_page_t
  567 vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
  568 {
  569         struct vm_phys_fictitious_seg *seg;
  570         vm_page_t m;
  571         int segind;
  572 
  573         m = NULL;
  574         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  575                 seg = &vm_phys_fictitious_segs[segind];
  576                 if (pa >= seg->start && pa < seg->end) {
  577                         m = &seg->first_page[atop(pa - seg->start)];
  578                         KASSERT((m->flags & PG_FICTITIOUS) != 0,
  579                             ("%p not fictitious", m));
  580                         break;
  581                 }
  582         }
  583         return (m);
  584 }
  585 
  586 int
  587 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
  588     vm_memattr_t memattr)
  589 {
  590         struct vm_phys_fictitious_seg *seg;
  591         vm_page_t fp;
  592         long i, page_count;
  593         int segind;
  594 #ifdef VM_PHYSSEG_DENSE
  595         long pi;
  596         boolean_t malloced;
  597 #endif
  598 
  599         page_count = (end - start) / PAGE_SIZE;
  600 
  601 #ifdef VM_PHYSSEG_DENSE
  602         pi = atop(start);
  603         if (pi >= first_page && pi < vm_page_array_size + first_page) {
  604                 if (atop(end) >= vm_page_array_size + first_page)
  605                         return (EINVAL);
  606                 fp = &vm_page_array[pi - first_page];
  607                 malloced = FALSE;
  608         } else
  609 #endif
  610         {
  611                 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
  612                     M_WAITOK | M_ZERO);
  613 #ifdef VM_PHYSSEG_DENSE
  614                 malloced = TRUE;
  615 #endif
  616         }
  617         for (i = 0; i < page_count; i++) {
  618                 vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
  619                 pmap_page_init(&fp[i]);
  620                 fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED);
  621         }
  622         mtx_lock(&vm_phys_fictitious_reg_mtx);
  623         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  624                 seg = &vm_phys_fictitious_segs[segind];
  625                 if (seg->start == 0 && seg->end == 0) {
  626                         seg->start = start;
  627                         seg->end = end;
  628                         seg->first_page = fp;
  629                         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  630                         return (0);
  631                 }
  632         }
  633         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  634 #ifdef VM_PHYSSEG_DENSE
  635         if (malloced)
  636 #endif
  637                 free(fp, M_FICT_PAGES);
  638         return (EBUSY);
  639 }
  640 
  641 void
  642 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
  643 {
  644         struct vm_phys_fictitious_seg *seg;
  645         vm_page_t fp;
  646         int segind;
  647 #ifdef VM_PHYSSEG_DENSE
  648         long pi;
  649 #endif
  650 
  651 #ifdef VM_PHYSSEG_DENSE
  652         pi = atop(start);
  653 #endif
  654 
  655         mtx_lock(&vm_phys_fictitious_reg_mtx);
  656         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  657                 seg = &vm_phys_fictitious_segs[segind];
  658                 if (seg->start == start && seg->end == end) {
  659                         seg->start = seg->end = 0;
  660                         fp = seg->first_page;
  661                         seg->first_page = NULL;
  662                         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  663 #ifdef VM_PHYSSEG_DENSE
  664                         if (pi < first_page || atop(end) >= vm_page_array_size)
  665 #endif
  666                                 free(fp, M_FICT_PAGES);
  667                         return;
  668                 }
  669         }
  670         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  671         KASSERT(0, ("Unregistering not registered fictitious range"));
  672 }
  673 
  674 /*
  675  * Find the segment containing the given physical address.
  676  */
  677 static int
  678 vm_phys_paddr_to_segind(vm_paddr_t pa)
  679 {
  680         struct vm_phys_seg *seg;
  681         int segind;
  682 
  683         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  684                 seg = &vm_phys_segs[segind];
  685                 if (pa >= seg->start && pa < seg->end)
  686                         return (segind);
  687         }
  688         panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
  689             (uintmax_t)pa);
  690 }
  691 
  692 /*
  693  * Free a contiguous, power of two-sized set of physical pages.
  694  *
  695  * The free page queues must be locked.
  696  */
  697 void
  698 vm_phys_free_pages(vm_page_t m, int order)
  699 {
  700         struct vm_freelist *fl;
  701         struct vm_phys_seg *seg;
  702         vm_paddr_t pa;
  703         vm_page_t m_buddy;
  704 
  705         KASSERT(m->order == VM_NFREEORDER,
  706             ("vm_phys_free_pages: page %p has unexpected order %d",
  707             m, m->order));
  708         KASSERT(m->pool < VM_NFREEPOOL,
  709             ("vm_phys_free_pages: page %p has unexpected pool %d",
  710             m, m->pool));
  711         KASSERT(order < VM_NFREEORDER,
  712             ("vm_phys_free_pages: order %d is out of range", order));
  713         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  714         seg = &vm_phys_segs[m->segind];
  715         if (order < VM_NFREEORDER - 1) {
  716                 pa = VM_PAGE_TO_PHYS(m);
  717                 do {
  718                         pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
  719                         if (pa < seg->start || pa >= seg->end)
  720                                 break;
  721                         m_buddy = &seg->first_page[atop(pa - seg->start)];
  722                         if (m_buddy->order != order)
  723                                 break;
  724                         fl = (*seg->free_queues)[m_buddy->pool];
  725                         TAILQ_REMOVE(&fl[order].pl, m_buddy, pageq);
  726                         fl[order].lcnt--;
  727                         m_buddy->order = VM_NFREEORDER;
  728                         if (m_buddy->pool != m->pool)
  729                                 vm_phys_set_pool(m->pool, m_buddy, order);
  730                         order++;
  731                         pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
  732                         m = &seg->first_page[atop(pa - seg->start)];
  733                 } while (order < VM_NFREEORDER - 1);
  734         }
  735         m->order = order;
  736         fl = (*seg->free_queues)[m->pool];
  737         TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
  738         fl[order].lcnt++;
  739 }
  740 
  741 /*
  742  * Free a contiguous, arbitrarily sized set of physical pages.
  743  *
  744  * The free page queues must be locked.
  745  */
  746 void
  747 vm_phys_free_contig(vm_page_t m, u_long npages)
  748 {
  749         u_int n;
  750         int order;
  751 
  752         /*
  753          * Avoid unnecessary coalescing by freeing the pages in the largest
  754          * possible power-of-two-sized subsets.
  755          */
  756         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  757         for (;; npages -= n) {
  758                 /*
  759                  * Unsigned "min" is used here so that "order" is assigned
  760                  * "VM_NFREEORDER - 1" when "m"'s physical address is zero
  761                  * or the low-order bits of its physical address are zero
  762                  * because the size of a physical address exceeds the size of
  763                  * a long.
  764                  */
  765                 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
  766                     VM_NFREEORDER - 1);
  767                 n = 1 << order;
  768                 if (npages < n)
  769                         break;
  770                 vm_phys_free_pages(m, order);
  771                 m += n;
  772         }
  773         /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
  774         for (; npages > 0; npages -= n) {
  775                 order = flsl(npages) - 1;
  776                 n = 1 << order;
  777                 vm_phys_free_pages(m, order);
  778                 m += n;
  779         }
  780 }
  781 
  782 /*
  783  * Set the pool for a contiguous, power of two-sized set of physical pages. 
  784  */
  785 void
  786 vm_phys_set_pool(int pool, vm_page_t m, int order)
  787 {
  788         vm_page_t m_tmp;
  789 
  790         for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
  791                 m_tmp->pool = pool;
  792 }
  793 
  794 /*
  795  * Search for the given physical page "m" in the free lists.  If the search
  796  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
  797  * FALSE, indicating that "m" is not in the free lists.
  798  *
  799  * The free page queues must be locked.
  800  */
  801 boolean_t
  802 vm_phys_unfree_page(vm_page_t m)
  803 {
  804         struct vm_freelist *fl;
  805         struct vm_phys_seg *seg;
  806         vm_paddr_t pa, pa_half;
  807         vm_page_t m_set, m_tmp;
  808         int order;
  809 
  810         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  811 
  812         /*
  813          * First, find the contiguous, power of two-sized set of free
  814          * physical pages containing the given physical page "m" and
  815          * assign it to "m_set".
  816          */
  817         seg = &vm_phys_segs[m->segind];
  818         for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
  819             order < VM_NFREEORDER - 1; ) {
  820                 order++;
  821                 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
  822                 if (pa >= seg->start)
  823                         m_set = &seg->first_page[atop(pa - seg->start)];
  824                 else
  825                         return (FALSE);
  826         }
  827         if (m_set->order < order)
  828                 return (FALSE);
  829         if (m_set->order == VM_NFREEORDER)
  830                 return (FALSE);
  831         KASSERT(m_set->order < VM_NFREEORDER,
  832             ("vm_phys_unfree_page: page %p has unexpected order %d",
  833             m_set, m_set->order));
  834 
  835         /*
  836          * Next, remove "m_set" from the free lists.  Finally, extract
  837          * "m" from "m_set" using an iterative algorithm: While "m_set"
  838          * is larger than a page, shrink "m_set" by returning the half
  839          * of "m_set" that does not contain "m" to the free lists.
  840          */
  841         fl = (*seg->free_queues)[m_set->pool];
  842         order = m_set->order;
  843         TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
  844         fl[order].lcnt--;
  845         m_set->order = VM_NFREEORDER;
  846         while (order > 0) {
  847                 order--;
  848                 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
  849                 if (m->phys_addr < pa_half)
  850                         m_tmp = &seg->first_page[atop(pa_half - seg->start)];
  851                 else {
  852                         m_tmp = m_set;
  853                         m_set = &seg->first_page[atop(pa_half - seg->start)];
  854                 }
  855                 m_tmp->order = order;
  856                 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
  857                 fl[order].lcnt++;
  858         }
  859         KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
  860         return (TRUE);
  861 }
  862 
  863 /*
  864  * Try to zero one physical page.  Used by an idle priority thread.
  865  */
  866 boolean_t
  867 vm_phys_zero_pages_idle(void)
  868 {
  869         static struct vm_freelist *fl = vm_phys_free_queues[0][0];
  870         static int flind, oind, pind;
  871         vm_page_t m, m_tmp;
  872 
  873         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  874         for (;;) {
  875                 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
  876                         for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
  877                                 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
  878                                         vm_phys_unfree_page(m_tmp);
  879                                         cnt.v_free_count--;
  880                                         mtx_unlock(&vm_page_queue_free_mtx);
  881                                         pmap_zero_page_idle(m_tmp);
  882                                         m_tmp->flags |= PG_ZERO;
  883                                         mtx_lock(&vm_page_queue_free_mtx);
  884                                         cnt.v_free_count++;
  885                                         vm_phys_free_pages(m_tmp, 0);
  886                                         vm_page_zero_count++;
  887                                         cnt_prezero++;
  888                                         return (TRUE);
  889                                 }
  890                         }
  891                 }
  892                 oind++;
  893                 if (oind == VM_NFREEORDER) {
  894                         oind = 0;
  895                         pind++;
  896                         if (pind == VM_NFREEPOOL) {
  897                                 pind = 0;
  898                                 flind++;
  899                                 if (flind == vm_nfreelists)
  900                                         flind = 0;
  901                         }
  902                         fl = vm_phys_free_queues[flind][pind];
  903                 }
  904         }
  905 }
  906 
  907 /*
  908  * Allocate a contiguous set of physical pages of the given size
  909  * "npages" from the free lists.  All of the physical pages must be at
  910  * or above the given physical address "low" and below the given
  911  * physical address "high".  The given value "alignment" determines the
  912  * alignment of the first physical page in the set.  If the given value
  913  * "boundary" is non-zero, then the set of physical pages cannot cross
  914  * any physical address boundary that is a multiple of that value.  Both
  915  * "alignment" and "boundary" must be a power of two.
  916  */
  917 vm_page_t
  918 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
  919     u_long alignment, u_long boundary)
  920 {
  921         struct vm_freelist *fl;
  922         struct vm_phys_seg *seg;
  923         vm_paddr_t pa, pa_last, size;
  924         vm_page_t m, m_ret;
  925         u_long npages_end;
  926         int domain, flind, oind, order, pind;
  927 
  928         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  929 #if VM_NDOMAIN > 1
  930         domain = PCPU_GET(domain);
  931 #else
  932         domain = 0;
  933 #endif
  934         size = npages << PAGE_SHIFT;
  935         KASSERT(size != 0,
  936             ("vm_phys_alloc_contig: size must not be 0"));
  937         KASSERT((alignment & (alignment - 1)) == 0,
  938             ("vm_phys_alloc_contig: alignment must be a power of 2"));
  939         KASSERT((boundary & (boundary - 1)) == 0,
  940             ("vm_phys_alloc_contig: boundary must be a power of 2"));
  941         /* Compute the queue that is the best fit for npages. */
  942         for (order = 0; (1 << order) < npages; order++);
  943         for (flind = 0; flind < vm_nfreelists; flind++) {
  944                 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
  945                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  946                                 fl = (*vm_phys_lookup_lists[domain][flind])
  947                                     [pind];
  948                                 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
  949                                         /*
  950                                          * A free list may contain physical pages
  951                                          * from one or more segments.
  952                                          */
  953                                         seg = &vm_phys_segs[m_ret->segind];
  954                                         if (seg->start > high ||
  955                                             low >= seg->end)
  956                                                 continue;
  957 
  958                                         /*
  959                                          * Is the size of this allocation request
  960                                          * larger than the largest block size?
  961                                          */
  962                                         if (order >= VM_NFREEORDER) {
  963                                                 /*
  964                                                  * Determine if a sufficient number
  965                                                  * of subsequent blocks to satisfy
  966                                                  * the allocation request are free.
  967                                                  */
  968                                                 pa = VM_PAGE_TO_PHYS(m_ret);
  969                                                 pa_last = pa + size;
  970                                                 for (;;) {
  971                                                         pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
  972                                                         if (pa >= pa_last)
  973                                                                 break;
  974                                                         if (pa < seg->start ||
  975                                                             pa >= seg->end)
  976                                                                 break;
  977                                                         m = &seg->first_page[atop(pa - seg->start)];
  978                                                         if (m->order != VM_NFREEORDER - 1)
  979                                                                 break;
  980                                                 }
  981                                                 /* If not, continue to the next block. */
  982                                                 if (pa < pa_last)
  983                                                         continue;
  984                                         }
  985 
  986                                         /*
  987                                          * Determine if the blocks are within the given range,
  988                                          * satisfy the given alignment, and do not cross the
  989                                          * given boundary.
  990                                          */
  991                                         pa = VM_PAGE_TO_PHYS(m_ret);
  992                                         if (pa >= low &&
  993                                             pa + size <= high &&
  994                                             (pa & (alignment - 1)) == 0 &&
  995                                             ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
  996                                                 goto done;
  997                                 }
  998                         }
  999                 }
 1000         }
 1001         return (NULL);
 1002 done:
 1003         for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
 1004                 fl = (*seg->free_queues)[m->pool];
 1005                 TAILQ_REMOVE(&fl[m->order].pl, m, pageq);
 1006                 fl[m->order].lcnt--;
 1007                 m->order = VM_NFREEORDER;
 1008         }
 1009         if (m_ret->pool != VM_FREEPOOL_DEFAULT)
 1010                 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
 1011         fl = (*seg->free_queues)[m_ret->pool];
 1012         vm_phys_split_pages(m_ret, oind, fl, order);
 1013         /* Return excess pages to the free lists. */
 1014         npages_end = roundup2(npages, 1 << imin(oind, order));
 1015         if (npages < npages_end)
 1016                 vm_phys_free_contig(&m_ret[npages], npages_end - npages);
 1017         return (m_ret);
 1018 }
 1019 
 1020 #ifdef DDB
 1021 /*
 1022  * Show the number of physical pages in each of the free lists.
 1023  */
 1024 DB_SHOW_COMMAND(freepages, db_show_freepages)
 1025 {
 1026         struct vm_freelist *fl;
 1027         int flind, oind, pind;
 1028 
 1029         for (flind = 0; flind < vm_nfreelists; flind++) {
 1030                 db_printf("FREE LIST %d:\n"
 1031                     "\n  ORDER (SIZE)  |  NUMBER"
 1032                     "\n              ", flind);
 1033                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
 1034                         db_printf("  |  POOL %d", pind);
 1035                 db_printf("\n--            ");
 1036                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
 1037                         db_printf("-- --      ");
 1038                 db_printf("--\n");
 1039                 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
 1040                         db_printf("  %2.2d (%6.6dK)", oind,
 1041                             1 << (PAGE_SHIFT - 10 + oind));
 1042                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 1043                                 fl = vm_phys_free_queues[flind][pind];
 1044                                 db_printf("  |  %6.6d", fl[oind].lcnt);
 1045                         }
 1046                         db_printf("\n");
 1047                 }
 1048                 db_printf("\n");
 1049         }
 1050 }
 1051 #endif
Cache object: 985d9813438e20a0a93a068ae61f1fdb
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_phys.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_phys.c