vm_phys.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2002-2006 Rice University
    3  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
    4  * All rights reserved.
    5  *
    6  * This software was developed for the FreeBSD Project by Alan L. Cox,
    7  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
   22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
   25  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
   28  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  *      Physical memory system implementation
   34  *
   35  * Any external functions defined by this module are only to be used by the
   36  * virtual memory system.
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD: releng/10.0/sys/vm/vm_phys.c 256275 2013-10-10 16:11:45Z alc $");
   41 
   42 #include "opt_ddb.h"
   43 #include "opt_vm.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/lock.h>
   48 #include <sys/kernel.h>
   49 #include <sys/malloc.h>
   50 #include <sys/mutex.h>
   51 #if MAXMEMDOM > 1
   52 #include <sys/proc.h>
   53 #endif
   54 #include <sys/queue.h>
   55 #include <sys/sbuf.h>
   56 #include <sys/sysctl.h>
   57 #include <sys/vmmeter.h>
   58 
   59 #include <ddb/ddb.h>
   60 
   61 #include <vm/vm.h>
   62 #include <vm/vm_param.h>
   63 #include <vm/vm_kern.h>
   64 #include <vm/vm_object.h>
   65 #include <vm/vm_page.h>
   66 #include <vm/vm_phys.h>
   67 
   68 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
   69     "Too many physsegs.");
   70 
   71 struct mem_affinity *mem_affinity;
   72 
   73 int vm_ndomains = 1;
   74 
   75 struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
   76 int vm_phys_nsegs;
   77 
   78 #define VM_PHYS_FICTITIOUS_NSEGS        8
   79 static struct vm_phys_fictitious_seg {
   80         vm_paddr_t      start;
   81         vm_paddr_t      end;
   82         vm_page_t       first_page;
   83 } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS];
   84 static struct mtx vm_phys_fictitious_reg_mtx;
   85 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
   86 
   87 static struct vm_freelist
   88     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
   89 
   90 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
   91 
   92 static int cnt_prezero;
   93 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
   94     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
   95 
   96 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
   97 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
   98     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
   99 
  100 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
  101 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
  102     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
  103 
  104 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
  105     &vm_ndomains, 0, "Number of physical memory domains available.");
  106 
  107 static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
  108     int order);
  109 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
  110     int domain);
  111 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
  112 static int vm_phys_paddr_to_segind(vm_paddr_t pa);
  113 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
  114     int order);
  115 
  116 static __inline int
  117 vm_rr_selectdomain(void)
  118 {
  119 #if MAXMEMDOM > 1
  120         struct thread *td;
  121 
  122         td = curthread;
  123 
  124         td->td_dom_rr_idx++;
  125         td->td_dom_rr_idx %= vm_ndomains;
  126         return (td->td_dom_rr_idx);
  127 #else
  128         return (0);
  129 #endif
  130 }
  131 
  132 boolean_t
  133 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
  134 {
  135         struct vm_phys_seg *s;
  136         int idx;
  137 
  138         while ((idx = ffsl(mask)) != 0) {
  139                 idx--;  /* ffsl counts from 1 */
  140                 mask &= ~(1UL << idx);
  141                 s = &vm_phys_segs[idx];
  142                 if (low < s->end && high > s->start)
  143                         return (TRUE);
  144         }
  145         return (FALSE);
  146 }
  147 
  148 /*
  149  * Outputs the state of the physical memory allocator, specifically,
  150  * the amount of physical memory in each free list.
  151  */
  152 static int
  153 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
  154 {
  155         struct sbuf sbuf;
  156         struct vm_freelist *fl;
  157         int dom, error, flind, oind, pind;
  158 
  159         error = sysctl_wire_old_buffer(req, 0);
  160         if (error != 0)
  161                 return (error);
  162         sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
  163         for (dom = 0; dom < vm_ndomains; dom++) {
  164                 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
  165                 for (flind = 0; flind < vm_nfreelists; flind++) {
  166                         sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
  167                             "\n  ORDER (SIZE)  |  NUMBER"
  168                             "\n              ", flind);
  169                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
  170                                 sbuf_printf(&sbuf, "  |  POOL %d", pind);
  171                         sbuf_printf(&sbuf, "\n--            ");
  172                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
  173                                 sbuf_printf(&sbuf, "-- --      ");
  174                         sbuf_printf(&sbuf, "--\n");
  175                         for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
  176                                 sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
  177                                     1 << (PAGE_SHIFT - 10 + oind));
  178                                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  179                                 fl = vm_phys_free_queues[dom][flind][pind];
  180                                         sbuf_printf(&sbuf, "  |  %6d",
  181                                             fl[oind].lcnt);
  182                                 }
  183                                 sbuf_printf(&sbuf, "\n");
  184                         }
  185                 }
  186         }
  187         error = sbuf_finish(&sbuf);
  188         sbuf_delete(&sbuf);
  189         return (error);
  190 }
  191 
  192 /*
  193  * Outputs the set of physical memory segments.
  194  */
  195 static int
  196 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
  197 {
  198         struct sbuf sbuf;
  199         struct vm_phys_seg *seg;
  200         int error, segind;
  201 
  202         error = sysctl_wire_old_buffer(req, 0);
  203         if (error != 0)
  204                 return (error);
  205         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  206         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  207                 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
  208                 seg = &vm_phys_segs[segind];
  209                 sbuf_printf(&sbuf, "start:     %#jx\n",
  210                     (uintmax_t)seg->start);
  211                 sbuf_printf(&sbuf, "end:       %#jx\n",
  212                     (uintmax_t)seg->end);
  213                 sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
  214                 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
  215         }
  216         error = sbuf_finish(&sbuf);
  217         sbuf_delete(&sbuf);
  218         return (error);
  219 }
  220 
  221 static void
  222 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
  223 {
  224 
  225         m->order = order;
  226         if (tail)
  227                 TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
  228         else
  229                 TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
  230         fl[order].lcnt++;
  231 }
  232 
  233 static void
  234 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
  235 {
  236 
  237         TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
  238         fl[order].lcnt--;
  239         m->order = VM_NFREEORDER;
  240 }
  241 
  242 /*
  243  * Create a physical memory segment.
  244  */
  245 static void
  246 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
  247 {
  248         struct vm_phys_seg *seg;
  249 #ifdef VM_PHYSSEG_SPARSE
  250         long pages;
  251         int segind;
  252 
  253         pages = 0;
  254         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  255                 seg = &vm_phys_segs[segind];
  256                 pages += atop(seg->end - seg->start);
  257         }
  258 #endif
  259         KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
  260             ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
  261         KASSERT(domain < vm_ndomains,
  262             ("vm_phys_create_seg: invalid domain provided"));
  263         seg = &vm_phys_segs[vm_phys_nsegs++];
  264         seg->start = start;
  265         seg->end = end;
  266         seg->domain = domain;
  267 #ifdef VM_PHYSSEG_SPARSE
  268         seg->first_page = &vm_page_array[pages];
  269 #else
  270         seg->first_page = PHYS_TO_VM_PAGE(start);
  271 #endif
  272         seg->free_queues = &vm_phys_free_queues[domain][flind];
  273 }
  274 
  275 static void
  276 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
  277 {
  278         int i;
  279 
  280         if (mem_affinity == NULL) {
  281                 _vm_phys_create_seg(start, end, flind, 0);
  282                 return;
  283         }
  284 
  285         for (i = 0;; i++) {
  286                 if (mem_affinity[i].end == 0)
  287                         panic("Reached end of affinity info");
  288                 if (mem_affinity[i].end <= start)
  289                         continue;
  290                 if (mem_affinity[i].start > start)
  291                         panic("No affinity info for start %jx",
  292                             (uintmax_t)start);
  293                 if (mem_affinity[i].end >= end) {
  294                         _vm_phys_create_seg(start, end, flind,
  295                             mem_affinity[i].domain);
  296                         break;
  297                 }
  298                 _vm_phys_create_seg(start, mem_affinity[i].end, flind,
  299                     mem_affinity[i].domain);
  300                 start = mem_affinity[i].end;
  301         }
  302 }
  303 
  304 /*
  305  * Initialize the physical memory allocator.
  306  */
  307 void
  308 vm_phys_init(void)
  309 {
  310         struct vm_freelist *fl;
  311         int dom, flind, i, oind, pind;
  312 
  313         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
  314 #ifdef  VM_FREELIST_ISADMA
  315                 if (phys_avail[i] < 16777216) {
  316                         if (phys_avail[i + 1] > 16777216) {
  317                                 vm_phys_create_seg(phys_avail[i], 16777216,
  318                                     VM_FREELIST_ISADMA);
  319                                 vm_phys_create_seg(16777216, phys_avail[i + 1],
  320                                     VM_FREELIST_DEFAULT);
  321                         } else {
  322                                 vm_phys_create_seg(phys_avail[i],
  323                                     phys_avail[i + 1], VM_FREELIST_ISADMA);
  324                         }
  325                         if (VM_FREELIST_ISADMA >= vm_nfreelists)
  326                                 vm_nfreelists = VM_FREELIST_ISADMA + 1;
  327                 } else
  328 #endif
  329 #ifdef  VM_FREELIST_HIGHMEM
  330                 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
  331                         if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
  332                                 vm_phys_create_seg(phys_avail[i],
  333                                     VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
  334                                 vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
  335                                     phys_avail[i + 1], VM_FREELIST_HIGHMEM);
  336                         } else {
  337                                 vm_phys_create_seg(phys_avail[i],
  338                                     phys_avail[i + 1], VM_FREELIST_HIGHMEM);
  339                         }
  340                         if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
  341                                 vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
  342                 } else
  343 #endif
  344                 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
  345                     VM_FREELIST_DEFAULT);
  346         }
  347         for (dom = 0; dom < vm_ndomains; dom++) {
  348                 for (flind = 0; flind < vm_nfreelists; flind++) {
  349                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  350                                 fl = vm_phys_free_queues[dom][flind][pind];
  351                                 for (oind = 0; oind < VM_NFREEORDER; oind++)
  352                                         TAILQ_INIT(&fl[oind].pl);
  353                         }
  354                 }
  355         }
  356         mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
  357 }
  358 
  359 /*
  360  * Split a contiguous, power of two-sized set of physical pages.
  361  */
  362 static __inline void
  363 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
  364 {
  365         vm_page_t m_buddy;
  366 
  367         while (oind > order) {
  368                 oind--;
  369                 m_buddy = &m[1 << oind];
  370                 KASSERT(m_buddy->order == VM_NFREEORDER,
  371                     ("vm_phys_split_pages: page %p has unexpected order %d",
  372                     m_buddy, m_buddy->order));
  373                 vm_freelist_add(fl, m_buddy, oind, 0);
  374         }
  375 }
  376 
  377 /*
  378  * Initialize a physical page and add it to the free lists.
  379  */
  380 void
  381 vm_phys_add_page(vm_paddr_t pa)
  382 {
  383         vm_page_t m;
  384         struct vm_domain *vmd;
  385 
  386         cnt.v_page_count++;
  387         m = vm_phys_paddr_to_vm_page(pa);
  388         m->phys_addr = pa;
  389         m->queue = PQ_NONE;
  390         m->segind = vm_phys_paddr_to_segind(pa);
  391         vmd = vm_phys_domain(m);
  392         vmd->vmd_page_count++;
  393         vmd->vmd_segs |= 1UL << m->segind;
  394         m->flags = PG_FREE;
  395         KASSERT(m->order == VM_NFREEORDER,
  396             ("vm_phys_add_page: page %p has unexpected order %d",
  397             m, m->order));
  398         m->pool = VM_FREEPOOL_DEFAULT;
  399         pmap_page_init(m);
  400         mtx_lock(&vm_page_queue_free_mtx);
  401         vm_phys_freecnt_adj(m, 1);
  402         vm_phys_free_pages(m, 0);
  403         mtx_unlock(&vm_page_queue_free_mtx);
  404 }
  405 
  406 /*
  407  * Allocate a contiguous, power of two-sized set of physical pages
  408  * from the free lists.
  409  *
  410  * The free page queues must be locked.
  411  */
  412 vm_page_t
  413 vm_phys_alloc_pages(int pool, int order)
  414 {
  415         vm_page_t m;
  416         int dom, domain, flind;
  417 
  418         KASSERT(pool < VM_NFREEPOOL,
  419             ("vm_phys_alloc_pages: pool %d is out of range", pool));
  420         KASSERT(order < VM_NFREEORDER,
  421             ("vm_phys_alloc_pages: order %d is out of range", order));
  422 
  423         for (dom = 0; dom < vm_ndomains; dom++) {
  424                 domain = vm_rr_selectdomain();
  425                 for (flind = 0; flind < vm_nfreelists; flind++) {
  426                         m = vm_phys_alloc_domain_pages(domain, flind, pool,
  427                             order);
  428                         if (m != NULL)
  429                                 return (m);
  430                 }
  431         }
  432         return (NULL);
  433 }
  434 
  435 /*
  436  * Find and dequeue a free page on the given free list, with the 
  437  * specified pool and order
  438  */
  439 vm_page_t
  440 vm_phys_alloc_freelist_pages(int flind, int pool, int order)
  441 {
  442         vm_page_t m;
  443         int dom, domain;
  444 
  445         KASSERT(flind < VM_NFREELIST,
  446             ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
  447         KASSERT(pool < VM_NFREEPOOL,
  448             ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
  449         KASSERT(order < VM_NFREEORDER,
  450             ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
  451 
  452         for (dom = 0; dom < vm_ndomains; dom++) {
  453                 domain = vm_rr_selectdomain();
  454                 m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
  455                 if (m != NULL)
  456                         return (m);
  457         }
  458         return (NULL);
  459 }
  460 
  461 static vm_page_t
  462 vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
  463 {       
  464         struct vm_freelist *fl;
  465         struct vm_freelist *alt;
  466         int oind, pind;
  467         vm_page_t m;
  468 
  469         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  470         fl = &vm_phys_free_queues[domain][flind][pool][0];
  471         for (oind = order; oind < VM_NFREEORDER; oind++) {
  472                 m = TAILQ_FIRST(&fl[oind].pl);
  473                 if (m != NULL) {
  474                         vm_freelist_rem(fl, m, oind);
  475                         vm_phys_split_pages(m, oind, fl, order);
  476                         return (m);
  477                 }
  478         }
  479 
  480         /*
  481          * The given pool was empty.  Find the largest
  482          * contiguous, power-of-two-sized set of pages in any
  483          * pool.  Transfer these pages to the given pool, and
  484          * use them to satisfy the allocation.
  485          */
  486         for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
  487                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  488                         alt = &vm_phys_free_queues[domain][flind][pind][0];
  489                         m = TAILQ_FIRST(&alt[oind].pl);
  490                         if (m != NULL) {
  491                                 vm_freelist_rem(alt, m, oind);
  492                                 vm_phys_set_pool(pool, m, oind);
  493                                 vm_phys_split_pages(m, oind, fl, order);
  494                                 return (m);
  495                         }
  496                 }
  497         }
  498         return (NULL);
  499 }
  500 
  501 /*
  502  * Find the vm_page corresponding to the given physical address.
  503  */
  504 vm_page_t
  505 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
  506 {
  507         struct vm_phys_seg *seg;
  508         int segind;
  509 
  510         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  511                 seg = &vm_phys_segs[segind];
  512                 if (pa >= seg->start && pa < seg->end)
  513                         return (&seg->first_page[atop(pa - seg->start)]);
  514         }
  515         return (NULL);
  516 }
  517 
  518 vm_page_t
  519 vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
  520 {
  521         struct vm_phys_fictitious_seg *seg;
  522         vm_page_t m;
  523         int segind;
  524 
  525         m = NULL;
  526         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  527                 seg = &vm_phys_fictitious_segs[segind];
  528                 if (pa >= seg->start && pa < seg->end) {
  529                         m = &seg->first_page[atop(pa - seg->start)];
  530                         KASSERT((m->flags & PG_FICTITIOUS) != 0,
  531                             ("%p not fictitious", m));
  532                         break;
  533                 }
  534         }
  535         return (m);
  536 }
  537 
  538 int
  539 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
  540     vm_memattr_t memattr)
  541 {
  542         struct vm_phys_fictitious_seg *seg;
  543         vm_page_t fp;
  544         long i, page_count;
  545         int segind;
  546 #ifdef VM_PHYSSEG_DENSE
  547         long pi;
  548         boolean_t malloced;
  549 #endif
  550 
  551         page_count = (end - start) / PAGE_SIZE;
  552 
  553 #ifdef VM_PHYSSEG_DENSE
  554         pi = atop(start);
  555         if (pi >= first_page && atop(end) < vm_page_array_size) {
  556                 fp = &vm_page_array[pi - first_page];
  557                 malloced = FALSE;
  558         } else
  559 #endif
  560         {
  561                 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
  562                     M_WAITOK | M_ZERO);
  563 #ifdef VM_PHYSSEG_DENSE
  564                 malloced = TRUE;
  565 #endif
  566         }
  567         for (i = 0; i < page_count; i++) {
  568                 vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
  569                 fp[i].oflags &= ~VPO_UNMANAGED;
  570                 fp[i].busy_lock = VPB_UNBUSIED;
  571         }
  572         mtx_lock(&vm_phys_fictitious_reg_mtx);
  573         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  574                 seg = &vm_phys_fictitious_segs[segind];
  575                 if (seg->start == 0 && seg->end == 0) {
  576                         seg->start = start;
  577                         seg->end = end;
  578                         seg->first_page = fp;
  579                         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  580                         return (0);
  581                 }
  582         }
  583         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  584 #ifdef VM_PHYSSEG_DENSE
  585         if (malloced)
  586 #endif
  587                 free(fp, M_FICT_PAGES);
  588         return (EBUSY);
  589 }
  590 
  591 void
  592 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
  593 {
  594         struct vm_phys_fictitious_seg *seg;
  595         vm_page_t fp;
  596         int segind;
  597 #ifdef VM_PHYSSEG_DENSE
  598         long pi;
  599 #endif
  600 
  601 #ifdef VM_PHYSSEG_DENSE
  602         pi = atop(start);
  603 #endif
  604 
  605         mtx_lock(&vm_phys_fictitious_reg_mtx);
  606         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  607                 seg = &vm_phys_fictitious_segs[segind];
  608                 if (seg->start == start && seg->end == end) {
  609                         seg->start = seg->end = 0;
  610                         fp = seg->first_page;
  611                         seg->first_page = NULL;
  612                         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  613 #ifdef VM_PHYSSEG_DENSE
  614                         if (pi < first_page || atop(end) >= vm_page_array_size)
  615 #endif
  616                                 free(fp, M_FICT_PAGES);
  617                         return;
  618                 }
  619         }
  620         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  621         KASSERT(0, ("Unregistering not registered fictitious range"));
  622 }
  623 
  624 /*
  625  * Find the segment containing the given physical address.
  626  */
  627 static int
  628 vm_phys_paddr_to_segind(vm_paddr_t pa)
  629 {
  630         struct vm_phys_seg *seg;
  631         int segind;
  632 
  633         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  634                 seg = &vm_phys_segs[segind];
  635                 if (pa >= seg->start && pa < seg->end)
  636                         return (segind);
  637         }
  638         panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
  639             (uintmax_t)pa);
  640 }
  641 
  642 /*
  643  * Free a contiguous, power of two-sized set of physical pages.
  644  *
  645  * The free page queues must be locked.
  646  */
  647 void
  648 vm_phys_free_pages(vm_page_t m, int order)
  649 {
  650         struct vm_freelist *fl;
  651         struct vm_phys_seg *seg;
  652         vm_paddr_t pa;
  653         vm_page_t m_buddy;
  654 
  655         KASSERT(m->order == VM_NFREEORDER,
  656             ("vm_phys_free_pages: page %p has unexpected order %d",
  657             m, m->order));
  658         KASSERT(m->pool < VM_NFREEPOOL,
  659             ("vm_phys_free_pages: page %p has unexpected pool %d",
  660             m, m->pool));
  661         KASSERT(order < VM_NFREEORDER,
  662             ("vm_phys_free_pages: order %d is out of range", order));
  663         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  664         seg = &vm_phys_segs[m->segind];
  665         if (order < VM_NFREEORDER - 1) {
  666                 pa = VM_PAGE_TO_PHYS(m);
  667                 do {
  668                         pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
  669                         if (pa < seg->start || pa >= seg->end)
  670                                 break;
  671                         m_buddy = &seg->first_page[atop(pa - seg->start)];
  672                         if (m_buddy->order != order)
  673                                 break;
  674                         fl = (*seg->free_queues)[m_buddy->pool];
  675                         vm_freelist_rem(fl, m_buddy, order);
  676                         if (m_buddy->pool != m->pool)
  677                                 vm_phys_set_pool(m->pool, m_buddy, order);
  678                         order++;
  679                         pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
  680                         m = &seg->first_page[atop(pa - seg->start)];
  681                 } while (order < VM_NFREEORDER - 1);
  682         }
  683         fl = (*seg->free_queues)[m->pool];
  684         vm_freelist_add(fl, m, order, 1);
  685 }
  686 
  687 /*
  688  * Free a contiguous, arbitrarily sized set of physical pages.
  689  *
  690  * The free page queues must be locked.
  691  */
  692 void
  693 vm_phys_free_contig(vm_page_t m, u_long npages)
  694 {
  695         u_int n;
  696         int order;
  697 
  698         /*
  699          * Avoid unnecessary coalescing by freeing the pages in the largest
  700          * possible power-of-two-sized subsets.
  701          */
  702         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  703         for (;; npages -= n) {
  704                 /*
  705                  * Unsigned "min" is used here so that "order" is assigned
  706                  * "VM_NFREEORDER - 1" when "m"'s physical address is zero
  707                  * or the low-order bits of its physical address are zero
  708                  * because the size of a physical address exceeds the size of
  709                  * a long.
  710                  */
  711                 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
  712                     VM_NFREEORDER - 1);
  713                 n = 1 << order;
  714                 if (npages < n)
  715                         break;
  716                 vm_phys_free_pages(m, order);
  717                 m += n;
  718         }
  719         /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
  720         for (; npages > 0; npages -= n) {
  721                 order = flsl(npages) - 1;
  722                 n = 1 << order;
  723                 vm_phys_free_pages(m, order);
  724                 m += n;
  725         }
  726 }
  727 
  728 /*
  729  * Set the pool for a contiguous, power of two-sized set of physical pages. 
  730  */
  731 void
  732 vm_phys_set_pool(int pool, vm_page_t m, int order)
  733 {
  734         vm_page_t m_tmp;
  735 
  736         for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
  737                 m_tmp->pool = pool;
  738 }
  739 
  740 /*
  741  * Search for the given physical page "m" in the free lists.  If the search
  742  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
  743  * FALSE, indicating that "m" is not in the free lists.
  744  *
  745  * The free page queues must be locked.
  746  */
  747 boolean_t
  748 vm_phys_unfree_page(vm_page_t m)
  749 {
  750         struct vm_freelist *fl;
  751         struct vm_phys_seg *seg;
  752         vm_paddr_t pa, pa_half;
  753         vm_page_t m_set, m_tmp;
  754         int order;
  755 
  756         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  757 
  758         /*
  759          * First, find the contiguous, power of two-sized set of free
  760          * physical pages containing the given physical page "m" and
  761          * assign it to "m_set".
  762          */
  763         seg = &vm_phys_segs[m->segind];
  764         for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
  765             order < VM_NFREEORDER - 1; ) {
  766                 order++;
  767                 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
  768                 if (pa >= seg->start)
  769                         m_set = &seg->first_page[atop(pa - seg->start)];
  770                 else
  771                         return (FALSE);
  772         }
  773         if (m_set->order < order)
  774                 return (FALSE);
  775         if (m_set->order == VM_NFREEORDER)
  776                 return (FALSE);
  777         KASSERT(m_set->order < VM_NFREEORDER,
  778             ("vm_phys_unfree_page: page %p has unexpected order %d",
  779             m_set, m_set->order));
  780 
  781         /*
  782          * Next, remove "m_set" from the free lists.  Finally, extract
  783          * "m" from "m_set" using an iterative algorithm: While "m_set"
  784          * is larger than a page, shrink "m_set" by returning the half
  785          * of "m_set" that does not contain "m" to the free lists.
  786          */
  787         fl = (*seg->free_queues)[m_set->pool];
  788         order = m_set->order;
  789         vm_freelist_rem(fl, m_set, order);
  790         while (order > 0) {
  791                 order--;
  792                 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
  793                 if (m->phys_addr < pa_half)
  794                         m_tmp = &seg->first_page[atop(pa_half - seg->start)];
  795                 else {
  796                         m_tmp = m_set;
  797                         m_set = &seg->first_page[atop(pa_half - seg->start)];
  798                 }
  799                 vm_freelist_add(fl, m_tmp, order, 0);
  800         }
  801         KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
  802         return (TRUE);
  803 }
  804 
  805 /*
  806  * Try to zero one physical page.  Used by an idle priority thread.
  807  */
  808 boolean_t
  809 vm_phys_zero_pages_idle(void)
  810 {
  811         static struct vm_freelist *fl;
  812         static int flind, oind, pind;
  813         vm_page_t m, m_tmp;
  814         int domain;
  815 
  816         domain = vm_rr_selectdomain();
  817         fl = vm_phys_free_queues[domain][0][0];
  818         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  819         for (;;) {
  820                 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) {
  821                         for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
  822                                 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
  823                                         vm_phys_unfree_page(m_tmp);
  824                                         vm_phys_freecnt_adj(m, -1);
  825                                         mtx_unlock(&vm_page_queue_free_mtx);
  826                                         pmap_zero_page_idle(m_tmp);
  827                                         m_tmp->flags |= PG_ZERO;
  828                                         mtx_lock(&vm_page_queue_free_mtx);
  829                                         vm_phys_freecnt_adj(m, 1);
  830                                         vm_phys_free_pages(m_tmp, 0);
  831                                         vm_page_zero_count++;
  832                                         cnt_prezero++;
  833                                         return (TRUE);
  834                                 }
  835                         }
  836                 }
  837                 oind++;
  838                 if (oind == VM_NFREEORDER) {
  839                         oind = 0;
  840                         pind++;
  841                         if (pind == VM_NFREEPOOL) {
  842                                 pind = 0;
  843                                 flind++;
  844                                 if (flind == vm_nfreelists)
  845                                         flind = 0;
  846                         }
  847                         fl = vm_phys_free_queues[domain][flind][pind];
  848                 }
  849         }
  850 }
  851 
  852 /*
  853  * Allocate a contiguous set of physical pages of the given size
  854  * "npages" from the free lists.  All of the physical pages must be at
  855  * or above the given physical address "low" and below the given
  856  * physical address "high".  The given value "alignment" determines the
  857  * alignment of the first physical page in the set.  If the given value
  858  * "boundary" is non-zero, then the set of physical pages cannot cross
  859  * any physical address boundary that is a multiple of that value.  Both
  860  * "alignment" and "boundary" must be a power of two.
  861  */
  862 vm_page_t
  863 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
  864     u_long alignment, vm_paddr_t boundary)
  865 {
  866         struct vm_freelist *fl;
  867         struct vm_phys_seg *seg;
  868         vm_paddr_t pa, pa_last, size;
  869         vm_page_t m, m_ret;
  870         u_long npages_end;
  871         int dom, domain, flind, oind, order, pind;
  872 
  873         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  874         size = npages << PAGE_SHIFT;
  875         KASSERT(size != 0,
  876             ("vm_phys_alloc_contig: size must not be 0"));
  877         KASSERT((alignment & (alignment - 1)) == 0,
  878             ("vm_phys_alloc_contig: alignment must be a power of 2"));
  879         KASSERT((boundary & (boundary - 1)) == 0,
  880             ("vm_phys_alloc_contig: boundary must be a power of 2"));
  881         /* Compute the queue that is the best fit for npages. */
  882         for (order = 0; (1 << order) < npages; order++);
  883         dom = 0;
  884 restartdom:
  885         domain = vm_rr_selectdomain();
  886         for (flind = 0; flind < vm_nfreelists; flind++) {
  887                 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
  888                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  889                                 fl = &vm_phys_free_queues[domain][flind][pind][0];
  890                                 TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
  891                                         /*
  892                                          * A free list may contain physical pages
  893                                          * from one or more segments.
  894                                          */
  895                                         seg = &vm_phys_segs[m_ret->segind];
  896                                         if (seg->start > high ||
  897                                             low >= seg->end)
  898                                                 continue;
  899 
  900                                         /*
  901                                          * Is the size of this allocation request
  902                                          * larger than the largest block size?
  903                                          */
  904                                         if (order >= VM_NFREEORDER) {
  905                                                 /*
  906                                                  * Determine if a sufficient number
  907                                                  * of subsequent blocks to satisfy
  908                                                  * the allocation request are free.
  909                                                  */
  910                                                 pa = VM_PAGE_TO_PHYS(m_ret);
  911                                                 pa_last = pa + size;
  912                                                 for (;;) {
  913                                                         pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
  914                                                         if (pa >= pa_last)
  915                                                                 break;
  916                                                         if (pa < seg->start ||
  917                                                             pa >= seg->end)
  918                                                                 break;
  919                                                         m = &seg->first_page[atop(pa - seg->start)];
  920                                                         if (m->order != VM_NFREEORDER - 1)
  921                                                                 break;
  922                                                 }
  923                                                 /* If not, continue to the next block. */
  924                                                 if (pa < pa_last)
  925                                                         continue;
  926                                         }
  927 
  928                                         /*
  929                                          * Determine if the blocks are within the given range,
  930                                          * satisfy the given alignment, and do not cross the
  931                                          * given boundary.
  932                                          */
  933                                         pa = VM_PAGE_TO_PHYS(m_ret);
  934                                         if (pa >= low &&
  935                                             pa + size <= high &&
  936                                             (pa & (alignment - 1)) == 0 &&
  937                                             ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
  938                                                 goto done;
  939                                 }
  940                         }
  941                 }
  942         }
  943         if (++dom < vm_ndomains)
  944                 goto restartdom;
  945         return (NULL);
  946 done:
  947         for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
  948                 fl = (*seg->free_queues)[m->pool];
  949                 vm_freelist_rem(fl, m, m->order);
  950         }
  951         if (m_ret->pool != VM_FREEPOOL_DEFAULT)
  952                 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
  953         fl = (*seg->free_queues)[m_ret->pool];
  954         vm_phys_split_pages(m_ret, oind, fl, order);
  955         /* Return excess pages to the free lists. */
  956         npages_end = roundup2(npages, 1 << imin(oind, order));
  957         if (npages < npages_end)
  958                 vm_phys_free_contig(&m_ret[npages], npages_end - npages);
  959         return (m_ret);
  960 }
  961 
  962 #ifdef DDB
  963 /*
  964  * Show the number of physical pages in each of the free lists.
  965  */
  966 DB_SHOW_COMMAND(freepages, db_show_freepages)
  967 {
  968         struct vm_freelist *fl;
  969         int flind, oind, pind, dom;
  970 
  971         for (dom = 0; dom < vm_ndomains; dom++) {
  972                 db_printf("DOMAIN: %d\n", dom);
  973                 for (flind = 0; flind < vm_nfreelists; flind++) {
  974                         db_printf("FREE LIST %d:\n"
  975                             "\n  ORDER (SIZE)  |  NUMBER"
  976                             "\n              ", flind);
  977                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
  978                                 db_printf("  |  POOL %d", pind);
  979                         db_printf("\n--            ");
  980                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
  981                                 db_printf("-- --      ");
  982                         db_printf("--\n");
  983                         for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
  984                                 db_printf("  %2.2d (%6.6dK)", oind,
  985                                     1 << (PAGE_SHIFT - 10 + oind));
  986                                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  987                                 fl = vm_phys_free_queues[dom][flind][pind];
  988                                         db_printf("  |  %6.6d", fl[oind].lcnt);
  989                                 }
  990                                 db_printf("\n");
  991                         }
  992                         db_printf("\n");
  993                 }
  994                 db_printf("\n");
  995         }
  996 }
  997 #endif
Cache object: dcf5cd74b988086b6796e0070eba354c
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_phys.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_phys.c