vm_phys.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2002-2006 Rice University
    3  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
    4  * All rights reserved.
    5  *
    6  * This software was developed for the FreeBSD Project by Alan L. Cox,
    7  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
   22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
   25  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
   28  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  *      Physical memory system implementation
   34  *
   35  * Any external functions defined by this module are only to be used by the
   36  * virtual memory system.
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD: releng/10.1/sys/vm/vm_phys.c 265435 2014-05-06 12:20:07Z kib $");
   41 
   42 #include "opt_ddb.h"
   43 #include "opt_vm.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/lock.h>
   48 #include <sys/kernel.h>
   49 #include <sys/malloc.h>
   50 #include <sys/mutex.h>
   51 #if MAXMEMDOM > 1
   52 #include <sys/proc.h>
   53 #endif
   54 #include <sys/queue.h>
   55 #include <sys/sbuf.h>
   56 #include <sys/sysctl.h>
   57 #include <sys/vmmeter.h>
   58 
   59 #include <ddb/ddb.h>
   60 
   61 #include <vm/vm.h>
   62 #include <vm/vm_param.h>
   63 #include <vm/vm_kern.h>
   64 #include <vm/vm_object.h>
   65 #include <vm/vm_page.h>
   66 #include <vm/vm_phys.h>
   67 
   68 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
   69     "Too many physsegs.");
   70 
   71 struct mem_affinity *mem_affinity;
   72 
   73 int vm_ndomains = 1;
   74 
   75 struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
   76 int vm_phys_nsegs;
   77 
   78 #define VM_PHYS_FICTITIOUS_NSEGS        8
   79 static struct vm_phys_fictitious_seg {
   80         vm_paddr_t      start;
   81         vm_paddr_t      end;
   82         vm_page_t       first_page;
   83 } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS];
   84 static struct mtx vm_phys_fictitious_reg_mtx;
   85 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
   86 
   87 static struct vm_freelist
   88     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
   89 
   90 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
   91 
   92 static int cnt_prezero;
   93 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
   94     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
   95 
   96 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
   97 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
   98     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
   99 
  100 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
  101 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
  102     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
  103 
  104 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
  105     &vm_ndomains, 0, "Number of physical memory domains available.");
  106 
  107 static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
  108     int order);
  109 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
  110     int domain);
  111 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
  112 static int vm_phys_paddr_to_segind(vm_paddr_t pa);
  113 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
  114     int order);
  115 
  116 static __inline int
  117 vm_rr_selectdomain(void)
  118 {
  119 #if MAXMEMDOM > 1
  120         struct thread *td;
  121 
  122         td = curthread;
  123 
  124         td->td_dom_rr_idx++;
  125         td->td_dom_rr_idx %= vm_ndomains;
  126         return (td->td_dom_rr_idx);
  127 #else
  128         return (0);
  129 #endif
  130 }
  131 
  132 boolean_t
  133 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
  134 {
  135         struct vm_phys_seg *s;
  136         int idx;
  137 
  138         while ((idx = ffsl(mask)) != 0) {
  139                 idx--;  /* ffsl counts from 1 */
  140                 mask &= ~(1UL << idx);
  141                 s = &vm_phys_segs[idx];
  142                 if (low < s->end && high > s->start)
  143                         return (TRUE);
  144         }
  145         return (FALSE);
  146 }
  147 
  148 /*
  149  * Outputs the state of the physical memory allocator, specifically,
  150  * the amount of physical memory in each free list.
  151  */
  152 static int
  153 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
  154 {
  155         struct sbuf sbuf;
  156         struct vm_freelist *fl;
  157         int dom, error, flind, oind, pind;
  158 
  159         error = sysctl_wire_old_buffer(req, 0);
  160         if (error != 0)
  161                 return (error);
  162         sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
  163         for (dom = 0; dom < vm_ndomains; dom++) {
  164                 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
  165                 for (flind = 0; flind < vm_nfreelists; flind++) {
  166                         sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
  167                             "\n  ORDER (SIZE)  |  NUMBER"
  168                             "\n              ", flind);
  169                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
  170                                 sbuf_printf(&sbuf, "  |  POOL %d", pind);
  171                         sbuf_printf(&sbuf, "\n--            ");
  172                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
  173                                 sbuf_printf(&sbuf, "-- --      ");
  174                         sbuf_printf(&sbuf, "--\n");
  175                         for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
  176                                 sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
  177                                     1 << (PAGE_SHIFT - 10 + oind));
  178                                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  179                                 fl = vm_phys_free_queues[dom][flind][pind];
  180                                         sbuf_printf(&sbuf, "  |  %6d",
  181                                             fl[oind].lcnt);
  182                                 }
  183                                 sbuf_printf(&sbuf, "\n");
  184                         }
  185                 }
  186         }
  187         error = sbuf_finish(&sbuf);
  188         sbuf_delete(&sbuf);
  189         return (error);
  190 }
  191 
  192 /*
  193  * Outputs the set of physical memory segments.
  194  */
  195 static int
  196 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
  197 {
  198         struct sbuf sbuf;
  199         struct vm_phys_seg *seg;
  200         int error, segind;
  201 
  202         error = sysctl_wire_old_buffer(req, 0);
  203         if (error != 0)
  204                 return (error);
  205         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  206         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  207                 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
  208                 seg = &vm_phys_segs[segind];
  209                 sbuf_printf(&sbuf, "start:     %#jx\n",
  210                     (uintmax_t)seg->start);
  211                 sbuf_printf(&sbuf, "end:       %#jx\n",
  212                     (uintmax_t)seg->end);
  213                 sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
  214                 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
  215         }
  216         error = sbuf_finish(&sbuf);
  217         sbuf_delete(&sbuf);
  218         return (error);
  219 }
  220 
  221 static void
  222 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
  223 {
  224 
  225         m->order = order;
  226         if (tail)
  227                 TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
  228         else
  229                 TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
  230         fl[order].lcnt++;
  231 }
  232 
  233 static void
  234 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
  235 {
  236 
  237         TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
  238         fl[order].lcnt--;
  239         m->order = VM_NFREEORDER;
  240 }
  241 
  242 /*
  243  * Create a physical memory segment.
  244  */
  245 static void
  246 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
  247 {
  248         struct vm_phys_seg *seg;
  249 #ifdef VM_PHYSSEG_SPARSE
  250         long pages;
  251         int segind;
  252 
  253         pages = 0;
  254         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  255                 seg = &vm_phys_segs[segind];
  256                 pages += atop(seg->end - seg->start);
  257         }
  258 #endif
  259         KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
  260             ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
  261         KASSERT(domain < vm_ndomains,
  262             ("vm_phys_create_seg: invalid domain provided"));
  263         seg = &vm_phys_segs[vm_phys_nsegs++];
  264         seg->start = start;
  265         seg->end = end;
  266         seg->domain = domain;
  267 #ifdef VM_PHYSSEG_SPARSE
  268         seg->first_page = &vm_page_array[pages];
  269 #else
  270         seg->first_page = PHYS_TO_VM_PAGE(start);
  271 #endif
  272         seg->free_queues = &vm_phys_free_queues[domain][flind];
  273 }
  274 
  275 static void
  276 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
  277 {
  278         int i;
  279 
  280         if (mem_affinity == NULL) {
  281                 _vm_phys_create_seg(start, end, flind, 0);
  282                 return;
  283         }
  284 
  285         for (i = 0;; i++) {
  286                 if (mem_affinity[i].end == 0)
  287                         panic("Reached end of affinity info");
  288                 if (mem_affinity[i].end <= start)
  289                         continue;
  290                 if (mem_affinity[i].start > start)
  291                         panic("No affinity info for start %jx",
  292                             (uintmax_t)start);
  293                 if (mem_affinity[i].end >= end) {
  294                         _vm_phys_create_seg(start, end, flind,
  295                             mem_affinity[i].domain);
  296                         break;
  297                 }
  298                 _vm_phys_create_seg(start, mem_affinity[i].end, flind,
  299                     mem_affinity[i].domain);
  300                 start = mem_affinity[i].end;
  301         }
  302 }
  303 
  304 /*
  305  * Initialize the physical memory allocator.
  306  */
  307 void
  308 vm_phys_init(void)
  309 {
  310         struct vm_freelist *fl;
  311         int dom, flind, i, oind, pind;
  312 
  313         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
  314 #ifdef  VM_FREELIST_ISADMA
  315                 if (phys_avail[i] < 16777216) {
  316                         if (phys_avail[i + 1] > 16777216) {
  317                                 vm_phys_create_seg(phys_avail[i], 16777216,
  318                                     VM_FREELIST_ISADMA);
  319                                 vm_phys_create_seg(16777216, phys_avail[i + 1],
  320                                     VM_FREELIST_DEFAULT);
  321                         } else {
  322                                 vm_phys_create_seg(phys_avail[i],
  323                                     phys_avail[i + 1], VM_FREELIST_ISADMA);
  324                         }
  325                         if (VM_FREELIST_ISADMA >= vm_nfreelists)
  326                                 vm_nfreelists = VM_FREELIST_ISADMA + 1;
  327                 } else
  328 #endif
  329 #ifdef  VM_FREELIST_HIGHMEM
  330                 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
  331                         if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
  332                                 vm_phys_create_seg(phys_avail[i],
  333                                     VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
  334                                 vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
  335                                     phys_avail[i + 1], VM_FREELIST_HIGHMEM);
  336                         } else {
  337                                 vm_phys_create_seg(phys_avail[i],
  338                                     phys_avail[i + 1], VM_FREELIST_HIGHMEM);
  339                         }
  340                         if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
  341                                 vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
  342                 } else
  343 #endif
  344                 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
  345                     VM_FREELIST_DEFAULT);
  346         }
  347         for (dom = 0; dom < vm_ndomains; dom++) {
  348                 for (flind = 0; flind < vm_nfreelists; flind++) {
  349                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  350                                 fl = vm_phys_free_queues[dom][flind][pind];
  351                                 for (oind = 0; oind < VM_NFREEORDER; oind++)
  352                                         TAILQ_INIT(&fl[oind].pl);
  353                         }
  354                 }
  355         }
  356         mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
  357 }
  358 
  359 /*
  360  * Split a contiguous, power of two-sized set of physical pages.
  361  */
  362 static __inline void
  363 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
  364 {
  365         vm_page_t m_buddy;
  366 
  367         while (oind > order) {
  368                 oind--;
  369                 m_buddy = &m[1 << oind];
  370                 KASSERT(m_buddy->order == VM_NFREEORDER,
  371                     ("vm_phys_split_pages: page %p has unexpected order %d",
  372                     m_buddy, m_buddy->order));
  373                 vm_freelist_add(fl, m_buddy, oind, 0);
  374         }
  375 }
  376 
  377 /*
  378  * Initialize a physical page and add it to the free lists.
  379  */
  380 void
  381 vm_phys_add_page(vm_paddr_t pa)
  382 {
  383         vm_page_t m;
  384         struct vm_domain *vmd;
  385 
  386         cnt.v_page_count++;
  387         m = vm_phys_paddr_to_vm_page(pa);
  388         m->phys_addr = pa;
  389         m->queue = PQ_NONE;
  390         m->segind = vm_phys_paddr_to_segind(pa);
  391         vmd = vm_phys_domain(m);
  392         vmd->vmd_page_count++;
  393         vmd->vmd_segs |= 1UL << m->segind;
  394         m->flags = PG_FREE;
  395         KASSERT(m->order == VM_NFREEORDER,
  396             ("vm_phys_add_page: page %p has unexpected order %d",
  397             m, m->order));
  398         m->pool = VM_FREEPOOL_DEFAULT;
  399         pmap_page_init(m);
  400         mtx_lock(&vm_page_queue_free_mtx);
  401         vm_phys_freecnt_adj(m, 1);
  402         vm_phys_free_pages(m, 0);
  403         mtx_unlock(&vm_page_queue_free_mtx);
  404 }
  405 
  406 /*
  407  * Allocate a contiguous, power of two-sized set of physical pages
  408  * from the free lists.
  409  *
  410  * The free page queues must be locked.
  411  */
  412 vm_page_t
  413 vm_phys_alloc_pages(int pool, int order)
  414 {
  415         vm_page_t m;
  416         int dom, domain, flind;
  417 
  418         KASSERT(pool < VM_NFREEPOOL,
  419             ("vm_phys_alloc_pages: pool %d is out of range", pool));
  420         KASSERT(order < VM_NFREEORDER,
  421             ("vm_phys_alloc_pages: order %d is out of range", order));
  422 
  423         for (dom = 0; dom < vm_ndomains; dom++) {
  424                 domain = vm_rr_selectdomain();
  425                 for (flind = 0; flind < vm_nfreelists; flind++) {
  426                         m = vm_phys_alloc_domain_pages(domain, flind, pool,
  427                             order);
  428                         if (m != NULL)
  429                                 return (m);
  430                 }
  431         }
  432         return (NULL);
  433 }
  434 
  435 /*
  436  * Find and dequeue a free page on the given free list, with the 
  437  * specified pool and order
  438  */
  439 vm_page_t
  440 vm_phys_alloc_freelist_pages(int flind, int pool, int order)
  441 {
  442         vm_page_t m;
  443         int dom, domain;
  444 
  445         KASSERT(flind < VM_NFREELIST,
  446             ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
  447         KASSERT(pool < VM_NFREEPOOL,
  448             ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
  449         KASSERT(order < VM_NFREEORDER,
  450             ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
  451 
  452         for (dom = 0; dom < vm_ndomains; dom++) {
  453                 domain = vm_rr_selectdomain();
  454                 m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
  455                 if (m != NULL)
  456                         return (m);
  457         }
  458         return (NULL);
  459 }
  460 
  461 static vm_page_t
  462 vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
  463 {       
  464         struct vm_freelist *fl;
  465         struct vm_freelist *alt;
  466         int oind, pind;
  467         vm_page_t m;
  468 
  469         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  470         fl = &vm_phys_free_queues[domain][flind][pool][0];
  471         for (oind = order; oind < VM_NFREEORDER; oind++) {
  472                 m = TAILQ_FIRST(&fl[oind].pl);
  473                 if (m != NULL) {
  474                         vm_freelist_rem(fl, m, oind);
  475                         vm_phys_split_pages(m, oind, fl, order);
  476                         return (m);
  477                 }
  478         }
  479 
  480         /*
  481          * The given pool was empty.  Find the largest
  482          * contiguous, power-of-two-sized set of pages in any
  483          * pool.  Transfer these pages to the given pool, and
  484          * use them to satisfy the allocation.
  485          */
  486         for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
  487                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  488                         alt = &vm_phys_free_queues[domain][flind][pind][0];
  489                         m = TAILQ_FIRST(&alt[oind].pl);
  490                         if (m != NULL) {
  491                                 vm_freelist_rem(alt, m, oind);
  492                                 vm_phys_set_pool(pool, m, oind);
  493                                 vm_phys_split_pages(m, oind, fl, order);
  494                                 return (m);
  495                         }
  496                 }
  497         }
  498         return (NULL);
  499 }
  500 
  501 /*
  502  * Find the vm_page corresponding to the given physical address.
  503  */
  504 vm_page_t
  505 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
  506 {
  507         struct vm_phys_seg *seg;
  508         int segind;
  509 
  510         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  511                 seg = &vm_phys_segs[segind];
  512                 if (pa >= seg->start && pa < seg->end)
  513                         return (&seg->first_page[atop(pa - seg->start)]);
  514         }
  515         return (NULL);
  516 }
  517 
  518 vm_page_t
  519 vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
  520 {
  521         struct vm_phys_fictitious_seg *seg;
  522         vm_page_t m;
  523         int segind;
  524 
  525         m = NULL;
  526         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  527                 seg = &vm_phys_fictitious_segs[segind];
  528                 if (pa >= seg->start && pa < seg->end) {
  529                         m = &seg->first_page[atop(pa - seg->start)];
  530                         KASSERT((m->flags & PG_FICTITIOUS) != 0,
  531                             ("%p not fictitious", m));
  532                         break;
  533                 }
  534         }
  535         return (m);
  536 }
  537 
  538 int
  539 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
  540     vm_memattr_t memattr)
  541 {
  542         struct vm_phys_fictitious_seg *seg;
  543         vm_page_t fp;
  544         long i, page_count;
  545         int segind;
  546 #ifdef VM_PHYSSEG_DENSE
  547         long pi;
  548         boolean_t malloced;
  549 #endif
  550 
  551         page_count = (end - start) / PAGE_SIZE;
  552 
  553 #ifdef VM_PHYSSEG_DENSE
  554         pi = atop(start);
  555         if (pi >= first_page && pi < vm_page_array_size + first_page) {
  556                 if (atop(end) >= vm_page_array_size + first_page)
  557                         return (EINVAL);
  558                 fp = &vm_page_array[pi - first_page];
  559                 malloced = FALSE;
  560         } else
  561 #endif
  562         {
  563                 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
  564                     M_WAITOK | M_ZERO);
  565 #ifdef VM_PHYSSEG_DENSE
  566                 malloced = TRUE;
  567 #endif
  568         }
  569         for (i = 0; i < page_count; i++) {
  570                 vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
  571                 fp[i].oflags &= ~VPO_UNMANAGED;
  572                 fp[i].busy_lock = VPB_UNBUSIED;
  573         }
  574         mtx_lock(&vm_phys_fictitious_reg_mtx);
  575         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  576                 seg = &vm_phys_fictitious_segs[segind];
  577                 if (seg->start == 0 && seg->end == 0) {
  578                         seg->start = start;
  579                         seg->end = end;
  580                         seg->first_page = fp;
  581                         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  582                         return (0);
  583                 }
  584         }
  585         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  586 #ifdef VM_PHYSSEG_DENSE
  587         if (malloced)
  588 #endif
  589                 free(fp, M_FICT_PAGES);
  590         return (EBUSY);
  591 }
  592 
  593 void
  594 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
  595 {
  596         struct vm_phys_fictitious_seg *seg;
  597         vm_page_t fp;
  598         int segind;
  599 #ifdef VM_PHYSSEG_DENSE
  600         long pi;
  601 #endif
  602 
  603 #ifdef VM_PHYSSEG_DENSE
  604         pi = atop(start);
  605 #endif
  606 
  607         mtx_lock(&vm_phys_fictitious_reg_mtx);
  608         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  609                 seg = &vm_phys_fictitious_segs[segind];
  610                 if (seg->start == start && seg->end == end) {
  611                         seg->start = seg->end = 0;
  612                         fp = seg->first_page;
  613                         seg->first_page = NULL;
  614                         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  615 #ifdef VM_PHYSSEG_DENSE
  616                         if (pi < first_page || atop(end) >= vm_page_array_size)
  617 #endif
  618                                 free(fp, M_FICT_PAGES);
  619                         return;
  620                 }
  621         }
  622         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  623         KASSERT(0, ("Unregistering not registered fictitious range"));
  624 }
  625 
  626 /*
  627  * Find the segment containing the given physical address.
  628  */
  629 static int
  630 vm_phys_paddr_to_segind(vm_paddr_t pa)
  631 {
  632         struct vm_phys_seg *seg;
  633         int segind;
  634 
  635         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  636                 seg = &vm_phys_segs[segind];
  637                 if (pa >= seg->start && pa < seg->end)
  638                         return (segind);
  639         }
  640         panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
  641             (uintmax_t)pa);
  642 }
  643 
  644 /*
  645  * Free a contiguous, power of two-sized set of physical pages.
  646  *
  647  * The free page queues must be locked.
  648  */
  649 void
  650 vm_phys_free_pages(vm_page_t m, int order)
  651 {
  652         struct vm_freelist *fl;
  653         struct vm_phys_seg *seg;
  654         vm_paddr_t pa;
  655         vm_page_t m_buddy;
  656 
  657         KASSERT(m->order == VM_NFREEORDER,
  658             ("vm_phys_free_pages: page %p has unexpected order %d",
  659             m, m->order));
  660         KASSERT(m->pool < VM_NFREEPOOL,
  661             ("vm_phys_free_pages: page %p has unexpected pool %d",
  662             m, m->pool));
  663         KASSERT(order < VM_NFREEORDER,
  664             ("vm_phys_free_pages: order %d is out of range", order));
  665         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  666         seg = &vm_phys_segs[m->segind];
  667         if (order < VM_NFREEORDER - 1) {
  668                 pa = VM_PAGE_TO_PHYS(m);
  669                 do {
  670                         pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
  671                         if (pa < seg->start || pa >= seg->end)
  672                                 break;
  673                         m_buddy = &seg->first_page[atop(pa - seg->start)];
  674                         if (m_buddy->order != order)
  675                                 break;
  676                         fl = (*seg->free_queues)[m_buddy->pool];
  677                         vm_freelist_rem(fl, m_buddy, order);
  678                         if (m_buddy->pool != m->pool)
  679                                 vm_phys_set_pool(m->pool, m_buddy, order);
  680                         order++;
  681                         pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
  682                         m = &seg->first_page[atop(pa - seg->start)];
  683                 } while (order < VM_NFREEORDER - 1);
  684         }
  685         fl = (*seg->free_queues)[m->pool];
  686         vm_freelist_add(fl, m, order, 1);
  687 }
  688 
  689 /*
  690  * Free a contiguous, arbitrarily sized set of physical pages.
  691  *
  692  * The free page queues must be locked.
  693  */
  694 void
  695 vm_phys_free_contig(vm_page_t m, u_long npages)
  696 {
  697         u_int n;
  698         int order;
  699 
  700         /*
  701          * Avoid unnecessary coalescing by freeing the pages in the largest
  702          * possible power-of-two-sized subsets.
  703          */
  704         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  705         for (;; npages -= n) {
  706                 /*
  707                  * Unsigned "min" is used here so that "order" is assigned
  708                  * "VM_NFREEORDER - 1" when "m"'s physical address is zero
  709                  * or the low-order bits of its physical address are zero
  710                  * because the size of a physical address exceeds the size of
  711                  * a long.
  712                  */
  713                 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
  714                     VM_NFREEORDER - 1);
  715                 n = 1 << order;
  716                 if (npages < n)
  717                         break;
  718                 vm_phys_free_pages(m, order);
  719                 m += n;
  720         }
  721         /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
  722         for (; npages > 0; npages -= n) {
  723                 order = flsl(npages) - 1;
  724                 n = 1 << order;
  725                 vm_phys_free_pages(m, order);
  726                 m += n;
  727         }
  728 }
  729 
  730 /*
  731  * Set the pool for a contiguous, power of two-sized set of physical pages. 
  732  */
  733 void
  734 vm_phys_set_pool(int pool, vm_page_t m, int order)
  735 {
  736         vm_page_t m_tmp;
  737 
  738         for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
  739                 m_tmp->pool = pool;
  740 }
  741 
  742 /*
  743  * Search for the given physical page "m" in the free lists.  If the search
  744  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
  745  * FALSE, indicating that "m" is not in the free lists.
  746  *
  747  * The free page queues must be locked.
  748  */
  749 boolean_t
  750 vm_phys_unfree_page(vm_page_t m)
  751 {
  752         struct vm_freelist *fl;
  753         struct vm_phys_seg *seg;
  754         vm_paddr_t pa, pa_half;
  755         vm_page_t m_set, m_tmp;
  756         int order;
  757 
  758         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  759 
  760         /*
  761          * First, find the contiguous, power of two-sized set of free
  762          * physical pages containing the given physical page "m" and
  763          * assign it to "m_set".
  764          */
  765         seg = &vm_phys_segs[m->segind];
  766         for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
  767             order < VM_NFREEORDER - 1; ) {
  768                 order++;
  769                 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
  770                 if (pa >= seg->start)
  771                         m_set = &seg->first_page[atop(pa - seg->start)];
  772                 else
  773                         return (FALSE);
  774         }
  775         if (m_set->order < order)
  776                 return (FALSE);
  777         if (m_set->order == VM_NFREEORDER)
  778                 return (FALSE);
  779         KASSERT(m_set->order < VM_NFREEORDER,
  780             ("vm_phys_unfree_page: page %p has unexpected order %d",
  781             m_set, m_set->order));
  782 
  783         /*
  784          * Next, remove "m_set" from the free lists.  Finally, extract
  785          * "m" from "m_set" using an iterative algorithm: While "m_set"
  786          * is larger than a page, shrink "m_set" by returning the half
  787          * of "m_set" that does not contain "m" to the free lists.
  788          */
  789         fl = (*seg->free_queues)[m_set->pool];
  790         order = m_set->order;
  791         vm_freelist_rem(fl, m_set, order);
  792         while (order > 0) {
  793                 order--;
  794                 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
  795                 if (m->phys_addr < pa_half)
  796                         m_tmp = &seg->first_page[atop(pa_half - seg->start)];
  797                 else {
  798                         m_tmp = m_set;
  799                         m_set = &seg->first_page[atop(pa_half - seg->start)];
  800                 }
  801                 vm_freelist_add(fl, m_tmp, order, 0);
  802         }
  803         KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
  804         return (TRUE);
  805 }
  806 
  807 /*
  808  * Try to zero one physical page.  Used by an idle priority thread.
  809  */
  810 boolean_t
  811 vm_phys_zero_pages_idle(void)
  812 {
  813         static struct vm_freelist *fl;
  814         static int flind, oind, pind;
  815         vm_page_t m, m_tmp;
  816         int domain;
  817 
  818         domain = vm_rr_selectdomain();
  819         fl = vm_phys_free_queues[domain][0][0];
  820         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  821         for (;;) {
  822                 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) {
  823                         for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
  824                                 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
  825                                         vm_phys_unfree_page(m_tmp);
  826                                         vm_phys_freecnt_adj(m, -1);
  827                                         mtx_unlock(&vm_page_queue_free_mtx);
  828                                         pmap_zero_page_idle(m_tmp);
  829                                         m_tmp->flags |= PG_ZERO;
  830                                         mtx_lock(&vm_page_queue_free_mtx);
  831                                         vm_phys_freecnt_adj(m, 1);
  832                                         vm_phys_free_pages(m_tmp, 0);
  833                                         vm_page_zero_count++;
  834                                         cnt_prezero++;
  835                                         return (TRUE);
  836                                 }
  837                         }
  838                 }
  839                 oind++;
  840                 if (oind == VM_NFREEORDER) {
  841                         oind = 0;
  842                         pind++;
  843                         if (pind == VM_NFREEPOOL) {
  844                                 pind = 0;
  845                                 flind++;
  846                                 if (flind == vm_nfreelists)
  847                                         flind = 0;
  848                         }
  849                         fl = vm_phys_free_queues[domain][flind][pind];
  850                 }
  851         }
  852 }
  853 
  854 /*
  855  * Allocate a contiguous set of physical pages of the given size
  856  * "npages" from the free lists.  All of the physical pages must be at
  857  * or above the given physical address "low" and below the given
  858  * physical address "high".  The given value "alignment" determines the
  859  * alignment of the first physical page in the set.  If the given value
  860  * "boundary" is non-zero, then the set of physical pages cannot cross
  861  * any physical address boundary that is a multiple of that value.  Both
  862  * "alignment" and "boundary" must be a power of two.
  863  */
  864 vm_page_t
  865 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
  866     u_long alignment, vm_paddr_t boundary)
  867 {
  868         struct vm_freelist *fl;
  869         struct vm_phys_seg *seg;
  870         vm_paddr_t pa, pa_last, size;
  871         vm_page_t m, m_ret;
  872         u_long npages_end;
  873         int dom, domain, flind, oind, order, pind;
  874 
  875         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  876         size = npages << PAGE_SHIFT;
  877         KASSERT(size != 0,
  878             ("vm_phys_alloc_contig: size must not be 0"));
  879         KASSERT((alignment & (alignment - 1)) == 0,
  880             ("vm_phys_alloc_contig: alignment must be a power of 2"));
  881         KASSERT((boundary & (boundary - 1)) == 0,
  882             ("vm_phys_alloc_contig: boundary must be a power of 2"));
  883         /* Compute the queue that is the best fit for npages. */
  884         for (order = 0; (1 << order) < npages; order++);
  885         dom = 0;
  886 restartdom:
  887         domain = vm_rr_selectdomain();
  888         for (flind = 0; flind < vm_nfreelists; flind++) {
  889                 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
  890                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  891                                 fl = &vm_phys_free_queues[domain][flind][pind][0];
  892                                 TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
  893                                         /*
  894                                          * A free list may contain physical pages
  895                                          * from one or more segments.
  896                                          */
  897                                         seg = &vm_phys_segs[m_ret->segind];
  898                                         if (seg->start > high ||
  899                                             low >= seg->end)
  900                                                 continue;
  901 
  902                                         /*
  903                                          * Is the size of this allocation request
  904                                          * larger than the largest block size?
  905                                          */
  906                                         if (order >= VM_NFREEORDER) {
  907                                                 /*
  908                                                  * Determine if a sufficient number
  909                                                  * of subsequent blocks to satisfy
  910                                                  * the allocation request are free.
  911                                                  */
  912                                                 pa = VM_PAGE_TO_PHYS(m_ret);
  913                                                 pa_last = pa + size;
  914                                                 for (;;) {
  915                                                         pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
  916                                                         if (pa >= pa_last)
  917                                                                 break;
  918                                                         if (pa < seg->start ||
  919                                                             pa >= seg->end)
  920                                                                 break;
  921                                                         m = &seg->first_page[atop(pa - seg->start)];
  922                                                         if (m->order != VM_NFREEORDER - 1)
  923                                                                 break;
  924                                                 }
  925                                                 /* If not, continue to the next block. */
  926                                                 if (pa < pa_last)
  927                                                         continue;
  928                                         }
  929 
  930                                         /*
  931                                          * Determine if the blocks are within the given range,
  932                                          * satisfy the given alignment, and do not cross the
  933                                          * given boundary.
  934                                          */
  935                                         pa = VM_PAGE_TO_PHYS(m_ret);
  936                                         if (pa >= low &&
  937                                             pa + size <= high &&
  938                                             (pa & (alignment - 1)) == 0 &&
  939                                             ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
  940                                                 goto done;
  941                                 }
  942                         }
  943                 }
  944         }
  945         if (++dom < vm_ndomains)
  946                 goto restartdom;
  947         return (NULL);
  948 done:
  949         for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
  950                 fl = (*seg->free_queues)[m->pool];
  951                 vm_freelist_rem(fl, m, m->order);
  952         }
  953         if (m_ret->pool != VM_FREEPOOL_DEFAULT)
  954                 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
  955         fl = (*seg->free_queues)[m_ret->pool];
  956         vm_phys_split_pages(m_ret, oind, fl, order);
  957         /* Return excess pages to the free lists. */
  958         npages_end = roundup2(npages, 1 << imin(oind, order));
  959         if (npages < npages_end)
  960                 vm_phys_free_contig(&m_ret[npages], npages_end - npages);
  961         return (m_ret);
  962 }
  963 
  964 #ifdef DDB
  965 /*
  966  * Show the number of physical pages in each of the free lists.
  967  */
  968 DB_SHOW_COMMAND(freepages, db_show_freepages)
  969 {
  970         struct vm_freelist *fl;
  971         int flind, oind, pind, dom;
  972 
  973         for (dom = 0; dom < vm_ndomains; dom++) {
  974                 db_printf("DOMAIN: %d\n", dom);
  975                 for (flind = 0; flind < vm_nfreelists; flind++) {
  976                         db_printf("FREE LIST %d:\n"
  977                             "\n  ORDER (SIZE)  |  NUMBER"
  978                             "\n              ", flind);
  979                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
  980                                 db_printf("  |  POOL %d", pind);
  981                         db_printf("\n--            ");
  982                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
  983                                 db_printf("-- --      ");
  984                         db_printf("--\n");
  985                         for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
  986                                 db_printf("  %2.2d (%6.6dK)", oind,
  987                                     1 << (PAGE_SHIFT - 10 + oind));
  988                                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  989                                 fl = vm_phys_free_queues[dom][flind][pind];
  990                                         db_printf("  |  %6.6d", fl[oind].lcnt);
  991                                 }
  992                                 db_printf("\n");
  993                         }
  994                         db_printf("\n");
  995                 }
  996                 db_printf("\n");
  997         }
  998 }
  999 #endif
Cache object: ffb4a18369b311487259f60cea9af467
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_phys.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_phys.c