The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_phys.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2002-2006 Rice University
    3  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
    4  * All rights reserved.
    5  *
    6  * This software was developed for the FreeBSD Project by Alan L. Cox,
    7  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
   22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
   25  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
   28  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: releng/9.1/sys/vm/vm_phys.c 236924 2012-06-11 21:19:59Z kib $");
   34 
   35 #include "opt_ddb.h"
   36 #include "opt_vm.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/lock.h>
   41 #include <sys/kernel.h>
   42 #include <sys/malloc.h>
   43 #include <sys/mutex.h>
   44 #include <sys/queue.h>
   45 #include <sys/sbuf.h>
   46 #include <sys/sysctl.h>
   47 #include <sys/vmmeter.h>
   48 #include <sys/vnode.h>
   49 
   50 #include <ddb/ddb.h>
   51 
   52 #include <vm/vm.h>
   53 #include <vm/vm_param.h>
   54 #include <vm/vm_kern.h>
   55 #include <vm/vm_object.h>
   56 #include <vm/vm_page.h>
   57 #include <vm/vm_phys.h>
   58 #include <vm/vm_reserv.h>
   59 
   60 /*
   61  * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
   62  * domain.  These extra lists are stored at the end of the regular
   63  * free lists starting with VM_NFREELIST.
   64  */
   65 #define VM_RAW_NFREELIST        (VM_NFREELIST + VM_NDOMAIN - 1)
   66 
   67 struct vm_freelist {
   68         struct pglist pl;
   69         int lcnt;
   70 };
   71 
   72 struct vm_phys_seg {
   73         vm_paddr_t      start;
   74         vm_paddr_t      end;
   75         vm_page_t       first_page;
   76         int             domain;
   77         struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
   78 };
   79 
   80 struct mem_affinity *mem_affinity;
   81 
   82 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
   83 
   84 static int vm_phys_nsegs;
   85 
   86 #define VM_PHYS_FICTITIOUS_NSEGS        8
   87 static struct vm_phys_fictitious_seg {
   88         vm_paddr_t      start;
   89         vm_paddr_t      end;
   90         vm_page_t       first_page;
   91 } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS];
   92 static struct mtx vm_phys_fictitious_reg_mtx;
   93 MALLOC_DEFINE(M_FICT_PAGES, "", "");
   94 
   95 static struct vm_freelist
   96     vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
   97 static struct vm_freelist
   98 (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
   99 
  100 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
  101 
  102 static int cnt_prezero;
  103 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
  104     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
  105 
  106 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
  107 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
  108     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
  109 
  110 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
  111 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
  112     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
  113 
  114 #if VM_NDOMAIN > 1
  115 static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
  116 SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
  117     NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
  118 #endif
  119 
  120 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
  121     int domain);
  122 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
  123 static int vm_phys_paddr_to_segind(vm_paddr_t pa);
  124 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
  125     int order);
  126 
  127 /*
  128  * Outputs the state of the physical memory allocator, specifically,
  129  * the amount of physical memory in each free list.
  130  */
  131 static int
  132 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
  133 {
  134         struct sbuf sbuf;
  135         struct vm_freelist *fl;
  136         int error, flind, oind, pind;
  137 
  138         error = sysctl_wire_old_buffer(req, 0);
  139         if (error != 0)
  140                 return (error);
  141         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  142         for (flind = 0; flind < vm_nfreelists; flind++) {
  143                 sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
  144                     "\n  ORDER (SIZE)  |  NUMBER"
  145                     "\n              ", flind);
  146                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
  147                         sbuf_printf(&sbuf, "  |  POOL %d", pind);
  148                 sbuf_printf(&sbuf, "\n--            ");
  149                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
  150                         sbuf_printf(&sbuf, "-- --      ");
  151                 sbuf_printf(&sbuf, "--\n");
  152                 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
  153                         sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
  154                             1 << (PAGE_SHIFT - 10 + oind));
  155                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  156                                 fl = vm_phys_free_queues[flind][pind];
  157                                 sbuf_printf(&sbuf, "  |  %6d", fl[oind].lcnt);
  158                         }
  159                         sbuf_printf(&sbuf, "\n");
  160                 }
  161         }
  162         error = sbuf_finish(&sbuf);
  163         sbuf_delete(&sbuf);
  164         return (error);
  165 }
  166 
  167 /*
  168  * Outputs the set of physical memory segments.
  169  */
  170 static int
  171 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
  172 {
  173         struct sbuf sbuf;
  174         struct vm_phys_seg *seg;
  175         int error, segind;
  176 
  177         error = sysctl_wire_old_buffer(req, 0);
  178         if (error != 0)
  179                 return (error);
  180         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  181         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  182                 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
  183                 seg = &vm_phys_segs[segind];
  184                 sbuf_printf(&sbuf, "start:     %#jx\n",
  185                     (uintmax_t)seg->start);
  186                 sbuf_printf(&sbuf, "end:       %#jx\n",
  187                     (uintmax_t)seg->end);
  188                 sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
  189                 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
  190         }
  191         error = sbuf_finish(&sbuf);
  192         sbuf_delete(&sbuf);
  193         return (error);
  194 }
  195 
  196 #if VM_NDOMAIN > 1
  197 /*
  198  * Outputs the set of free list lookup lists.
  199  */
  200 static int
  201 sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
  202 {
  203         struct sbuf sbuf;
  204         int domain, error, flind, ndomains;
  205 
  206         error = sysctl_wire_old_buffer(req, 0);
  207         if (error != 0)
  208                 return (error);
  209         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  210         ndomains = vm_nfreelists - VM_NFREELIST + 1;
  211         for (domain = 0; domain < ndomains; domain++) {
  212                 sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
  213                 for (flind = 0; flind < vm_nfreelists; flind++)
  214                         sbuf_printf(&sbuf, "  [%d]:\t%p\n", flind,
  215                             vm_phys_lookup_lists[domain][flind]);
  216         }
  217         error = sbuf_finish(&sbuf);
  218         sbuf_delete(&sbuf);
  219         return (error);
  220 }
  221 #endif
  222         
  223 /*
  224  * Create a physical memory segment.
  225  */
  226 static void
  227 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
  228 {
  229         struct vm_phys_seg *seg;
  230 #ifdef VM_PHYSSEG_SPARSE
  231         long pages;
  232         int segind;
  233 
  234         pages = 0;
  235         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  236                 seg = &vm_phys_segs[segind];
  237                 pages += atop(seg->end - seg->start);
  238         }
  239 #endif
  240         KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
  241             ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
  242         seg = &vm_phys_segs[vm_phys_nsegs++];
  243         seg->start = start;
  244         seg->end = end;
  245         seg->domain = domain;
  246 #ifdef VM_PHYSSEG_SPARSE
  247         seg->first_page = &vm_page_array[pages];
  248 #else
  249         seg->first_page = PHYS_TO_VM_PAGE(start);
  250 #endif
  251 #if VM_NDOMAIN > 1
  252         if (flind == VM_FREELIST_DEFAULT && domain != 0) {
  253                 flind = VM_NFREELIST + (domain - 1);
  254                 if (flind >= vm_nfreelists)
  255                         vm_nfreelists = flind + 1;
  256         }
  257 #endif
  258         seg->free_queues = &vm_phys_free_queues[flind];
  259 }
  260 
  261 static void
  262 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
  263 {
  264         int i;
  265 
  266         if (mem_affinity == NULL) {
  267                 _vm_phys_create_seg(start, end, flind, 0);
  268                 return;
  269         }
  270 
  271         for (i = 0;; i++) {
  272                 if (mem_affinity[i].end == 0)
  273                         panic("Reached end of affinity info");
  274                 if (mem_affinity[i].end <= start)
  275                         continue;
  276                 if (mem_affinity[i].start > start)
  277                         panic("No affinity info for start %jx",
  278                             (uintmax_t)start);
  279                 if (mem_affinity[i].end >= end) {
  280                         _vm_phys_create_seg(start, end, flind,
  281                             mem_affinity[i].domain);
  282                         break;
  283                 }
  284                 _vm_phys_create_seg(start, mem_affinity[i].end, flind,
  285                     mem_affinity[i].domain);
  286                 start = mem_affinity[i].end;
  287         }
  288 }
  289 
  290 /*
  291  * Initialize the physical memory allocator.
  292  */
  293 void
  294 vm_phys_init(void)
  295 {
  296         struct vm_freelist *fl;
  297         int flind, i, oind, pind;
  298 #if VM_NDOMAIN > 1
  299         int ndomains, j;
  300 #endif
  301 
  302         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
  303 #ifdef  VM_FREELIST_ISADMA
  304                 if (phys_avail[i] < 16777216) {
  305                         if (phys_avail[i + 1] > 16777216) {
  306                                 vm_phys_create_seg(phys_avail[i], 16777216,
  307                                     VM_FREELIST_ISADMA);
  308                                 vm_phys_create_seg(16777216, phys_avail[i + 1],
  309                                     VM_FREELIST_DEFAULT);
  310                         } else {
  311                                 vm_phys_create_seg(phys_avail[i],
  312                                     phys_avail[i + 1], VM_FREELIST_ISADMA);
  313                         }
  314                         if (VM_FREELIST_ISADMA >= vm_nfreelists)
  315                                 vm_nfreelists = VM_FREELIST_ISADMA + 1;
  316                 } else
  317 #endif
  318 #ifdef  VM_FREELIST_HIGHMEM
  319                 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
  320                         if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
  321                                 vm_phys_create_seg(phys_avail[i],
  322                                     VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
  323                                 vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
  324                                     phys_avail[i + 1], VM_FREELIST_HIGHMEM);
  325                         } else {
  326                                 vm_phys_create_seg(phys_avail[i],
  327                                     phys_avail[i + 1], VM_FREELIST_HIGHMEM);
  328                         }
  329                         if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
  330                                 vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
  331                 } else
  332 #endif
  333                 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
  334                     VM_FREELIST_DEFAULT);
  335         }
  336         for (flind = 0; flind < vm_nfreelists; flind++) {
  337                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  338                         fl = vm_phys_free_queues[flind][pind];
  339                         for (oind = 0; oind < VM_NFREEORDER; oind++)
  340                                 TAILQ_INIT(&fl[oind].pl);
  341                 }
  342         }
  343 #if VM_NDOMAIN > 1
  344         /*
  345          * Build a free list lookup list for each domain.  All of the
  346          * memory domain lists are inserted at the VM_FREELIST_DEFAULT
  347          * index in a round-robin order starting with the current
  348          * domain.
  349          */
  350         ndomains = vm_nfreelists - VM_NFREELIST + 1;
  351         for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
  352                 for (i = 0; i < ndomains; i++)
  353                         vm_phys_lookup_lists[i][flind] =
  354                             &vm_phys_free_queues[flind];
  355         for (i = 0; i < ndomains; i++)
  356                 for (j = 0; j < ndomains; j++) {
  357                         flind = (i + j) % ndomains;
  358                         if (flind == 0)
  359                                 flind = VM_FREELIST_DEFAULT;
  360                         else
  361                                 flind += VM_NFREELIST - 1;
  362                         vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
  363                             &vm_phys_free_queues[flind];
  364                 }
  365         for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
  366              flind++)
  367                 for (i = 0; i < ndomains; i++)
  368                         vm_phys_lookup_lists[i][flind + ndomains - 1] =
  369                             &vm_phys_free_queues[flind];
  370 #else
  371         for (flind = 0; flind < vm_nfreelists; flind++)
  372                 vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
  373 #endif
  374 
  375         mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
  376 }
  377 
  378 /*
  379  * Split a contiguous, power of two-sized set of physical pages.
  380  */
  381 static __inline void
  382 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
  383 {
  384         vm_page_t m_buddy;
  385 
  386         while (oind > order) {
  387                 oind--;
  388                 m_buddy = &m[1 << oind];
  389                 KASSERT(m_buddy->order == VM_NFREEORDER,
  390                     ("vm_phys_split_pages: page %p has unexpected order %d",
  391                     m_buddy, m_buddy->order));
  392                 m_buddy->order = oind;
  393                 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq);
  394                 fl[oind].lcnt++;
  395         }
  396 }
  397 
  398 /*
  399  * Initialize a physical page and add it to the free lists.
  400  */
  401 void
  402 vm_phys_add_page(vm_paddr_t pa)
  403 {
  404         vm_page_t m;
  405 
  406         cnt.v_page_count++;
  407         m = vm_phys_paddr_to_vm_page(pa);
  408         m->phys_addr = pa;
  409         m->queue = PQ_NONE;
  410         m->segind = vm_phys_paddr_to_segind(pa);
  411         m->flags = PG_FREE;
  412         KASSERT(m->order == VM_NFREEORDER,
  413             ("vm_phys_add_page: page %p has unexpected order %d",
  414             m, m->order));
  415         m->pool = VM_FREEPOOL_DEFAULT;
  416         pmap_page_init(m);
  417         mtx_lock(&vm_page_queue_free_mtx);
  418         cnt.v_free_count++;
  419         vm_phys_free_pages(m, 0);
  420         mtx_unlock(&vm_page_queue_free_mtx);
  421 }
  422 
  423 /*
  424  * Allocate a contiguous, power of two-sized set of physical pages
  425  * from the free lists.
  426  *
  427  * The free page queues must be locked.
  428  */
  429 vm_page_t
  430 vm_phys_alloc_pages(int pool, int order)
  431 {
  432         vm_page_t m;
  433         int flind;
  434 
  435         for (flind = 0; flind < vm_nfreelists; flind++) {
  436                 m = vm_phys_alloc_freelist_pages(flind, pool, order);
  437                 if (m != NULL)
  438                         return (m);
  439         }
  440         return (NULL);
  441 }
  442 
  443 /*
  444  * Find and dequeue a free page on the given free list, with the 
  445  * specified pool and order
  446  */
  447 vm_page_t
  448 vm_phys_alloc_freelist_pages(int flind, int pool, int order)
  449 {       
  450         struct vm_freelist *fl;
  451         struct vm_freelist *alt;
  452         int domain, oind, pind;
  453         vm_page_t m;
  454 
  455         KASSERT(flind < VM_NFREELIST,
  456             ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
  457         KASSERT(pool < VM_NFREEPOOL,
  458             ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
  459         KASSERT(order < VM_NFREEORDER,
  460             ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
  461 
  462 #if VM_NDOMAIN > 1
  463         domain = PCPU_GET(domain);
  464 #else
  465         domain = 0;
  466 #endif
  467         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  468         fl = (*vm_phys_lookup_lists[domain][flind])[pool];
  469         for (oind = order; oind < VM_NFREEORDER; oind++) {
  470                 m = TAILQ_FIRST(&fl[oind].pl);
  471                 if (m != NULL) {
  472                         TAILQ_REMOVE(&fl[oind].pl, m, pageq);
  473                         fl[oind].lcnt--;
  474                         m->order = VM_NFREEORDER;
  475                         vm_phys_split_pages(m, oind, fl, order);
  476                         return (m);
  477                 }
  478         }
  479 
  480         /*
  481          * The given pool was empty.  Find the largest
  482          * contiguous, power-of-two-sized set of pages in any
  483          * pool.  Transfer these pages to the given pool, and
  484          * use them to satisfy the allocation.
  485          */
  486         for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
  487                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  488                         alt = (*vm_phys_lookup_lists[domain][flind])[pind];
  489                         m = TAILQ_FIRST(&alt[oind].pl);
  490                         if (m != NULL) {
  491                                 TAILQ_REMOVE(&alt[oind].pl, m, pageq);
  492                                 alt[oind].lcnt--;
  493                                 m->order = VM_NFREEORDER;
  494                                 vm_phys_set_pool(pool, m, oind);
  495                                 vm_phys_split_pages(m, oind, fl, order);
  496                                 return (m);
  497                         }
  498                 }
  499         }
  500         return (NULL);
  501 }
  502 
  503 /*
  504  * Allocate physical memory from phys_avail[].
  505  */
  506 vm_paddr_t
  507 vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment)
  508 {
  509         vm_paddr_t pa;
  510         int i;
  511 
  512         size = round_page(size);
  513         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
  514                 if (phys_avail[i + 1] - phys_avail[i] < size)
  515                         continue;
  516                 pa = phys_avail[i];
  517                 phys_avail[i] += size;
  518                 return (pa);
  519         }
  520         panic("vm_phys_bootstrap_alloc");
  521 }
  522 
  523 /*
  524  * Find the vm_page corresponding to the given physical address.
  525  */
  526 vm_page_t
  527 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
  528 {
  529         struct vm_phys_seg *seg;
  530         int segind;
  531 
  532         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  533                 seg = &vm_phys_segs[segind];
  534                 if (pa >= seg->start && pa < seg->end)
  535                         return (&seg->first_page[atop(pa - seg->start)]);
  536         }
  537         return (NULL);
  538 }
  539 
  540 vm_page_t
  541 vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
  542 {
  543         struct vm_phys_fictitious_seg *seg;
  544         vm_page_t m;
  545         int segind;
  546 
  547         m = NULL;
  548         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  549                 seg = &vm_phys_fictitious_segs[segind];
  550                 if (pa >= seg->start && pa < seg->end) {
  551                         m = &seg->first_page[atop(pa - seg->start)];
  552                         KASSERT((m->flags & PG_FICTITIOUS) != 0,
  553                             ("%p not fictitious", m));
  554                         break;
  555                 }
  556         }
  557         return (m);
  558 }
  559 
  560 int
  561 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
  562     vm_memattr_t memattr)
  563 {
  564         struct vm_phys_fictitious_seg *seg;
  565         vm_page_t fp;
  566         long i, page_count;
  567         int segind;
  568 #ifdef VM_PHYSSEG_DENSE
  569         long pi;
  570         boolean_t malloced;
  571 #endif
  572 
  573         page_count = (end - start) / PAGE_SIZE;
  574 
  575 #ifdef VM_PHYSSEG_DENSE
  576         pi = atop(start);
  577         if (pi >= first_page && atop(end) < vm_page_array_size) {
  578                 fp = &vm_page_array[pi - first_page];
  579                 malloced = FALSE;
  580         } else
  581 #endif
  582         {
  583                 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
  584                     M_WAITOK | M_ZERO);
  585 #ifdef VM_PHYSSEG_DENSE
  586                 malloced = TRUE;
  587 #endif
  588         }
  589         for (i = 0; i < page_count; i++) {
  590                 vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
  591                 pmap_page_init(&fp[i]);
  592                 fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED);
  593         }
  594         mtx_lock(&vm_phys_fictitious_reg_mtx);
  595         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  596                 seg = &vm_phys_fictitious_segs[segind];
  597                 if (seg->start == 0 && seg->end == 0) {
  598                         seg->start = start;
  599                         seg->end = end;
  600                         seg->first_page = fp;
  601                         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  602                         return (0);
  603                 }
  604         }
  605         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  606 #ifdef VM_PHYSSEG_DENSE
  607         if (malloced)
  608 #endif
  609                 free(fp, M_FICT_PAGES);
  610         return (EBUSY);
  611 }
  612 
  613 void
  614 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
  615 {
  616         struct vm_phys_fictitious_seg *seg;
  617         vm_page_t fp;
  618         int segind;
  619 #ifdef VM_PHYSSEG_DENSE
  620         long pi;
  621 #endif
  622 
  623 #ifdef VM_PHYSSEG_DENSE
  624         pi = atop(start);
  625 #endif
  626 
  627         mtx_lock(&vm_phys_fictitious_reg_mtx);
  628         for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
  629                 seg = &vm_phys_fictitious_segs[segind];
  630                 if (seg->start == start && seg->end == end) {
  631                         seg->start = seg->end = 0;
  632                         fp = seg->first_page;
  633                         seg->first_page = NULL;
  634                         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  635 #ifdef VM_PHYSSEG_DENSE
  636                         if (pi < first_page || atop(end) >= vm_page_array_size)
  637 #endif
  638                                 free(fp, M_FICT_PAGES);
  639                         return;
  640                 }
  641         }
  642         mtx_unlock(&vm_phys_fictitious_reg_mtx);
  643         KASSERT(0, ("Unregistering not registered fictitious range"));
  644 }
  645 
  646 /*
  647  * Find the segment containing the given physical address.
  648  */
  649 static int
  650 vm_phys_paddr_to_segind(vm_paddr_t pa)
  651 {
  652         struct vm_phys_seg *seg;
  653         int segind;
  654 
  655         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  656                 seg = &vm_phys_segs[segind];
  657                 if (pa >= seg->start && pa < seg->end)
  658                         return (segind);
  659         }
  660         panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
  661             (uintmax_t)pa);
  662 }
  663 
  664 /*
  665  * Free a contiguous, power of two-sized set of physical pages.
  666  *
  667  * The free page queues must be locked.
  668  */
  669 void
  670 vm_phys_free_pages(vm_page_t m, int order)
  671 {
  672         struct vm_freelist *fl;
  673         struct vm_phys_seg *seg;
  674         vm_paddr_t pa, pa_buddy;
  675         vm_page_t m_buddy;
  676 
  677         KASSERT(m->order == VM_NFREEORDER,
  678             ("vm_phys_free_pages: page %p has unexpected order %d",
  679             m, m->order));
  680         KASSERT(m->pool < VM_NFREEPOOL,
  681             ("vm_phys_free_pages: page %p has unexpected pool %d",
  682             m, m->pool));
  683         KASSERT(order < VM_NFREEORDER,
  684             ("vm_phys_free_pages: order %d is out of range", order));
  685         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  686         pa = VM_PAGE_TO_PHYS(m);
  687         seg = &vm_phys_segs[m->segind];
  688         while (order < VM_NFREEORDER - 1) {
  689                 pa_buddy = pa ^ (1 << (PAGE_SHIFT + order));
  690                 if (pa_buddy < seg->start ||
  691                     pa_buddy >= seg->end)
  692                         break;
  693                 m_buddy = &seg->first_page[atop(pa_buddy - seg->start)];
  694                 if (m_buddy->order != order)
  695                         break;
  696                 fl = (*seg->free_queues)[m_buddy->pool];
  697                 TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq);
  698                 fl[m_buddy->order].lcnt--;
  699                 m_buddy->order = VM_NFREEORDER;
  700                 if (m_buddy->pool != m->pool)
  701                         vm_phys_set_pool(m->pool, m_buddy, order);
  702                 order++;
  703                 pa &= ~((1 << (PAGE_SHIFT + order)) - 1);
  704                 m = &seg->first_page[atop(pa - seg->start)];
  705         }
  706         m->order = order;
  707         fl = (*seg->free_queues)[m->pool];
  708         TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
  709         fl[order].lcnt++;
  710 }
  711 
  712 /*
  713  * Set the pool for a contiguous, power of two-sized set of physical pages. 
  714  */
  715 void
  716 vm_phys_set_pool(int pool, vm_page_t m, int order)
  717 {
  718         vm_page_t m_tmp;
  719 
  720         for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
  721                 m_tmp->pool = pool;
  722 }
  723 
  724 /*
  725  * Search for the given physical page "m" in the free lists.  If the search
  726  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
  727  * FALSE, indicating that "m" is not in the free lists.
  728  *
  729  * The free page queues must be locked.
  730  */
  731 boolean_t
  732 vm_phys_unfree_page(vm_page_t m)
  733 {
  734         struct vm_freelist *fl;
  735         struct vm_phys_seg *seg;
  736         vm_paddr_t pa, pa_half;
  737         vm_page_t m_set, m_tmp;
  738         int order;
  739 
  740         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  741 
  742         /*
  743          * First, find the contiguous, power of two-sized set of free
  744          * physical pages containing the given physical page "m" and
  745          * assign it to "m_set".
  746          */
  747         seg = &vm_phys_segs[m->segind];
  748         for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
  749             order < VM_NFREEORDER - 1; ) {
  750                 order++;
  751                 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
  752                 if (pa >= seg->start)
  753                         m_set = &seg->first_page[atop(pa - seg->start)];
  754                 else
  755                         return (FALSE);
  756         }
  757         if (m_set->order < order)
  758                 return (FALSE);
  759         if (m_set->order == VM_NFREEORDER)
  760                 return (FALSE);
  761         KASSERT(m_set->order < VM_NFREEORDER,
  762             ("vm_phys_unfree_page: page %p has unexpected order %d",
  763             m_set, m_set->order));
  764 
  765         /*
  766          * Next, remove "m_set" from the free lists.  Finally, extract
  767          * "m" from "m_set" using an iterative algorithm: While "m_set"
  768          * is larger than a page, shrink "m_set" by returning the half
  769          * of "m_set" that does not contain "m" to the free lists.
  770          */
  771         fl = (*seg->free_queues)[m_set->pool];
  772         order = m_set->order;
  773         TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
  774         fl[order].lcnt--;
  775         m_set->order = VM_NFREEORDER;
  776         while (order > 0) {
  777                 order--;
  778                 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
  779                 if (m->phys_addr < pa_half)
  780                         m_tmp = &seg->first_page[atop(pa_half - seg->start)];
  781                 else {
  782                         m_tmp = m_set;
  783                         m_set = &seg->first_page[atop(pa_half - seg->start)];
  784                 }
  785                 m_tmp->order = order;
  786                 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
  787                 fl[order].lcnt++;
  788         }
  789         KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
  790         return (TRUE);
  791 }
  792 
  793 /*
  794  * Try to zero one physical page.  Used by an idle priority thread.
  795  */
  796 boolean_t
  797 vm_phys_zero_pages_idle(void)
  798 {
  799         static struct vm_freelist *fl = vm_phys_free_queues[0][0];
  800         static int flind, oind, pind;
  801         vm_page_t m, m_tmp;
  802 
  803         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  804         for (;;) {
  805                 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
  806                         for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
  807                                 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
  808                                         vm_phys_unfree_page(m_tmp);
  809                                         cnt.v_free_count--;
  810                                         mtx_unlock(&vm_page_queue_free_mtx);
  811                                         pmap_zero_page_idle(m_tmp);
  812                                         m_tmp->flags |= PG_ZERO;
  813                                         mtx_lock(&vm_page_queue_free_mtx);
  814                                         cnt.v_free_count++;
  815                                         vm_phys_free_pages(m_tmp, 0);
  816                                         vm_page_zero_count++;
  817                                         cnt_prezero++;
  818                                         return (TRUE);
  819                                 }
  820                         }
  821                 }
  822                 oind++;
  823                 if (oind == VM_NFREEORDER) {
  824                         oind = 0;
  825                         pind++;
  826                         if (pind == VM_NFREEPOOL) {
  827                                 pind = 0;
  828                                 flind++;
  829                                 if (flind == vm_nfreelists)
  830                                         flind = 0;
  831                         }
  832                         fl = vm_phys_free_queues[flind][pind];
  833                 }
  834         }
  835 }
  836 
  837 /*
  838  * Allocate a contiguous set of physical pages of the given size
  839  * "npages" from the free lists.  All of the physical pages must be at
  840  * or above the given physical address "low" and below the given
  841  * physical address "high".  The given value "alignment" determines the
  842  * alignment of the first physical page in the set.  If the given value
  843  * "boundary" is non-zero, then the set of physical pages cannot cross
  844  * any physical address boundary that is a multiple of that value.  Both
  845  * "alignment" and "boundary" must be a power of two.
  846  */
  847 vm_page_t
  848 vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high,
  849     unsigned long alignment, unsigned long boundary)
  850 {
  851         struct vm_freelist *fl;
  852         struct vm_phys_seg *seg;
  853         struct vnode *vp;
  854         vm_paddr_t pa, pa_last, size;
  855         vm_page_t deferred_vdrop_list, m, m_ret;
  856         int domain, flind, i, oind, order, pind;
  857 
  858 #if VM_NDOMAIN > 1
  859         domain = PCPU_GET(domain);
  860 #else
  861         domain = 0;
  862 #endif
  863         size = npages << PAGE_SHIFT;
  864         KASSERT(size != 0,
  865             ("vm_phys_alloc_contig: size must not be 0"));
  866         KASSERT((alignment & (alignment - 1)) == 0,
  867             ("vm_phys_alloc_contig: alignment must be a power of 2"));
  868         KASSERT((boundary & (boundary - 1)) == 0,
  869             ("vm_phys_alloc_contig: boundary must be a power of 2"));
  870         deferred_vdrop_list = NULL;
  871         /* Compute the queue that is the best fit for npages. */
  872         for (order = 0; (1 << order) < npages; order++);
  873         mtx_lock(&vm_page_queue_free_mtx);
  874 #if VM_NRESERVLEVEL > 0
  875 retry:
  876 #endif
  877         for (flind = 0; flind < vm_nfreelists; flind++) {
  878                 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
  879                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  880                                 fl = (*vm_phys_lookup_lists[domain][flind])
  881                                     [pind];
  882                                 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
  883                                         /*
  884                                          * A free list may contain physical pages
  885                                          * from one or more segments.
  886                                          */
  887                                         seg = &vm_phys_segs[m_ret->segind];
  888                                         if (seg->start > high ||
  889                                             low >= seg->end)
  890                                                 continue;
  891 
  892                                         /*
  893                                          * Is the size of this allocation request
  894                                          * larger than the largest block size?
  895                                          */
  896                                         if (order >= VM_NFREEORDER) {
  897                                                 /*
  898                                                  * Determine if a sufficient number
  899                                                  * of subsequent blocks to satisfy
  900                                                  * the allocation request are free.
  901                                                  */
  902                                                 pa = VM_PAGE_TO_PHYS(m_ret);
  903                                                 pa_last = pa + size;
  904                                                 for (;;) {
  905                                                         pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
  906                                                         if (pa >= pa_last)
  907                                                                 break;
  908                                                         if (pa < seg->start ||
  909                                                             pa >= seg->end)
  910                                                                 break;
  911                                                         m = &seg->first_page[atop(pa - seg->start)];
  912                                                         if (m->order != VM_NFREEORDER - 1)
  913                                                                 break;
  914                                                 }
  915                                                 /* If not, continue to the next block. */
  916                                                 if (pa < pa_last)
  917                                                         continue;
  918                                         }
  919 
  920                                         /*
  921                                          * Determine if the blocks are within the given range,
  922                                          * satisfy the given alignment, and do not cross the
  923                                          * given boundary.
  924                                          */
  925                                         pa = VM_PAGE_TO_PHYS(m_ret);
  926                                         if (pa >= low &&
  927                                             pa + size <= high &&
  928                                             (pa & (alignment - 1)) == 0 &&
  929                                             ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
  930                                                 goto done;
  931                                 }
  932                         }
  933                 }
  934         }
  935 #if VM_NRESERVLEVEL > 0
  936         if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary))
  937                 goto retry;
  938 #endif
  939         mtx_unlock(&vm_page_queue_free_mtx);
  940         return (NULL);
  941 done:
  942         for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
  943                 fl = (*seg->free_queues)[m->pool];
  944                 TAILQ_REMOVE(&fl[m->order].pl, m, pageq);
  945                 fl[m->order].lcnt--;
  946                 m->order = VM_NFREEORDER;
  947         }
  948         if (m_ret->pool != VM_FREEPOOL_DEFAULT)
  949                 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
  950         fl = (*seg->free_queues)[m_ret->pool];
  951         vm_phys_split_pages(m_ret, oind, fl, order);
  952         for (i = 0; i < npages; i++) {
  953                 m = &m_ret[i];
  954                 vp = vm_page_alloc_init(m);
  955                 if (vp != NULL) {
  956                         /*
  957                          * Enqueue the vnode for deferred vdrop().
  958                          *
  959                          * Unmanaged pages don't use "pageq", so it
  960                          * can be safely abused to construct a short-
  961                          * lived queue of vnodes.
  962                          */
  963                         m->pageq.tqe_prev = (void *)vp;
  964                         m->pageq.tqe_next = deferred_vdrop_list;
  965                         deferred_vdrop_list = m;
  966                 }
  967         }
  968         for (; i < roundup2(npages, 1 << imin(oind, order)); i++) {
  969                 m = &m_ret[i];
  970                 KASSERT(m->order == VM_NFREEORDER,
  971                     ("vm_phys_alloc_contig: page %p has unexpected order %d",
  972                     m, m->order));
  973                 vm_phys_free_pages(m, 0);
  974         }
  975         mtx_unlock(&vm_page_queue_free_mtx);
  976         while (deferred_vdrop_list != NULL) {
  977                 vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
  978                 deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
  979         }
  980         return (m_ret);
  981 }
  982 
  983 #ifdef DDB
  984 /*
  985  * Show the number of physical pages in each of the free lists.
  986  */
  987 DB_SHOW_COMMAND(freepages, db_show_freepages)
  988 {
  989         struct vm_freelist *fl;
  990         int flind, oind, pind;
  991 
  992         for (flind = 0; flind < vm_nfreelists; flind++) {
  993                 db_printf("FREE LIST %d:\n"
  994                     "\n  ORDER (SIZE)  |  NUMBER"
  995                     "\n              ", flind);
  996                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
  997                         db_printf("  |  POOL %d", pind);
  998                 db_printf("\n--            ");
  999                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
 1000                         db_printf("-- --      ");
 1001                 db_printf("--\n");
 1002                 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
 1003                         db_printf("  %2.2d (%6.6dK)", oind,
 1004                             1 << (PAGE_SHIFT - 10 + oind));
 1005                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 1006                                 fl = vm_phys_free_queues[flind][pind];
 1007                                 db_printf("  |  %6.6d", fl[oind].lcnt);
 1008                         }
 1009                         db_printf("\n");
 1010                 }
 1011                 db_printf("\n");
 1012         }
 1013 }
 1014 #endif

Cache object: 0ecead8399f322c5739183475015be92


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.