The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_phys.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2002-2006 Rice University
    3  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
    4  * All rights reserved.
    5  *
    6  * This software was developed for the FreeBSD Project by Alan L. Cox,
    7  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
   22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
   25  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
   28  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: releng/9.0/sys/vm/vm_phys.c 226894 2011-10-29 06:13:44Z attilio $");
   34 
   35 #include "opt_ddb.h"
   36 #include "opt_vm.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/lock.h>
   41 #include <sys/kernel.h>
   42 #include <sys/malloc.h>
   43 #include <sys/mutex.h>
   44 #include <sys/queue.h>
   45 #include <sys/sbuf.h>
   46 #include <sys/sysctl.h>
   47 #include <sys/vmmeter.h>
   48 #include <sys/vnode.h>
   49 
   50 #include <ddb/ddb.h>
   51 
   52 #include <vm/vm.h>
   53 #include <vm/vm_param.h>
   54 #include <vm/vm_kern.h>
   55 #include <vm/vm_object.h>
   56 #include <vm/vm_page.h>
   57 #include <vm/vm_phys.h>
   58 #include <vm/vm_reserv.h>
   59 
   60 /*
   61  * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
   62  * domain.  These extra lists are stored at the end of the regular
   63  * free lists starting with VM_NFREELIST.
   64  */
   65 #define VM_RAW_NFREELIST        (VM_NFREELIST + VM_NDOMAIN - 1)
   66 
   67 struct vm_freelist {
   68         struct pglist pl;
   69         int lcnt;
   70 };
   71 
   72 struct vm_phys_seg {
   73         vm_paddr_t      start;
   74         vm_paddr_t      end;
   75         vm_page_t       first_page;
   76         int             domain;
   77         struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
   78 };
   79 
   80 struct mem_affinity *mem_affinity;
   81 
   82 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
   83 
   84 static int vm_phys_nsegs;
   85 
   86 static struct vm_freelist
   87     vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
   88 static struct vm_freelist
   89 (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
   90 
   91 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
   92 
   93 static int cnt_prezero;
   94 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
   95     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
   96 
   97 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
   98 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
   99     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
  100 
  101 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
  102 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
  103     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
  104 
  105 #if VM_NDOMAIN > 1
  106 static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
  107 SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
  108     NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
  109 #endif
  110 
  111 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
  112     int domain);
  113 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
  114 static int vm_phys_paddr_to_segind(vm_paddr_t pa);
  115 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
  116     int order);
  117 
  118 /*
  119  * Outputs the state of the physical memory allocator, specifically,
  120  * the amount of physical memory in each free list.
  121  */
  122 static int
  123 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
  124 {
  125         struct sbuf sbuf;
  126         struct vm_freelist *fl;
  127         int error, flind, oind, pind;
  128 
  129         error = sysctl_wire_old_buffer(req, 0);
  130         if (error != 0)
  131                 return (error);
  132         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  133         for (flind = 0; flind < vm_nfreelists; flind++) {
  134                 sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
  135                     "\n  ORDER (SIZE)  |  NUMBER"
  136                     "\n              ", flind);
  137                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
  138                         sbuf_printf(&sbuf, "  |  POOL %d", pind);
  139                 sbuf_printf(&sbuf, "\n--            ");
  140                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
  141                         sbuf_printf(&sbuf, "-- --      ");
  142                 sbuf_printf(&sbuf, "--\n");
  143                 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
  144                         sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
  145                             1 << (PAGE_SHIFT - 10 + oind));
  146                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  147                                 fl = vm_phys_free_queues[flind][pind];
  148                                 sbuf_printf(&sbuf, "  |  %6d", fl[oind].lcnt);
  149                         }
  150                         sbuf_printf(&sbuf, "\n");
  151                 }
  152         }
  153         error = sbuf_finish(&sbuf);
  154         sbuf_delete(&sbuf);
  155         return (error);
  156 }
  157 
  158 /*
  159  * Outputs the set of physical memory segments.
  160  */
  161 static int
  162 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
  163 {
  164         struct sbuf sbuf;
  165         struct vm_phys_seg *seg;
  166         int error, segind;
  167 
  168         error = sysctl_wire_old_buffer(req, 0);
  169         if (error != 0)
  170                 return (error);
  171         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  172         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  173                 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
  174                 seg = &vm_phys_segs[segind];
  175                 sbuf_printf(&sbuf, "start:     %#jx\n",
  176                     (uintmax_t)seg->start);
  177                 sbuf_printf(&sbuf, "end:       %#jx\n",
  178                     (uintmax_t)seg->end);
  179                 sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
  180                 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
  181         }
  182         error = sbuf_finish(&sbuf);
  183         sbuf_delete(&sbuf);
  184         return (error);
  185 }
  186 
  187 #if VM_NDOMAIN > 1
  188 /*
  189  * Outputs the set of free list lookup lists.
  190  */
  191 static int
  192 sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
  193 {
  194         struct sbuf sbuf;
  195         int domain, error, flind, ndomains;
  196 
  197         error = sysctl_wire_old_buffer(req, 0);
  198         if (error != 0)
  199                 return (error);
  200         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  201         ndomains = vm_nfreelists - VM_NFREELIST + 1;
  202         for (domain = 0; domain < ndomains; domain++) {
  203                 sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
  204                 for (flind = 0; flind < vm_nfreelists; flind++)
  205                         sbuf_printf(&sbuf, "  [%d]:\t%p\n", flind,
  206                             vm_phys_lookup_lists[domain][flind]);
  207         }
  208         error = sbuf_finish(&sbuf);
  209         sbuf_delete(&sbuf);
  210         return (error);
  211 }
  212 #endif
  213         
  214 /*
  215  * Create a physical memory segment.
  216  */
  217 static void
  218 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
  219 {
  220         struct vm_phys_seg *seg;
  221 #ifdef VM_PHYSSEG_SPARSE
  222         long pages;
  223         int segind;
  224 
  225         pages = 0;
  226         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  227                 seg = &vm_phys_segs[segind];
  228                 pages += atop(seg->end - seg->start);
  229         }
  230 #endif
  231         KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
  232             ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
  233         seg = &vm_phys_segs[vm_phys_nsegs++];
  234         seg->start = start;
  235         seg->end = end;
  236         seg->domain = domain;
  237 #ifdef VM_PHYSSEG_SPARSE
  238         seg->first_page = &vm_page_array[pages];
  239 #else
  240         seg->first_page = PHYS_TO_VM_PAGE(start);
  241 #endif
  242 #if VM_NDOMAIN > 1
  243         if (flind == VM_FREELIST_DEFAULT && domain != 0) {
  244                 flind = VM_NFREELIST + (domain - 1);
  245                 if (flind >= vm_nfreelists)
  246                         vm_nfreelists = flind + 1;
  247         }
  248 #endif
  249         seg->free_queues = &vm_phys_free_queues[flind];
  250 }
  251 
  252 static void
  253 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
  254 {
  255         int i;
  256 
  257         if (mem_affinity == NULL) {
  258                 _vm_phys_create_seg(start, end, flind, 0);
  259                 return;
  260         }
  261 
  262         for (i = 0;; i++) {
  263                 if (mem_affinity[i].end == 0)
  264                         panic("Reached end of affinity info");
  265                 if (mem_affinity[i].end <= start)
  266                         continue;
  267                 if (mem_affinity[i].start > start)
  268                         panic("No affinity info for start %jx",
  269                             (uintmax_t)start);
  270                 if (mem_affinity[i].end >= end) {
  271                         _vm_phys_create_seg(start, end, flind,
  272                             mem_affinity[i].domain);
  273                         break;
  274                 }
  275                 _vm_phys_create_seg(start, mem_affinity[i].end, flind,
  276                     mem_affinity[i].domain);
  277                 start = mem_affinity[i].end;
  278         }
  279 }
  280 
  281 /*
  282  * Initialize the physical memory allocator.
  283  */
  284 void
  285 vm_phys_init(void)
  286 {
  287         struct vm_freelist *fl;
  288         int flind, i, oind, pind;
  289 #if VM_NDOMAIN > 1
  290         int ndomains, j;
  291 #endif
  292 
  293         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
  294 #ifdef  VM_FREELIST_ISADMA
  295                 if (phys_avail[i] < 16777216) {
  296                         if (phys_avail[i + 1] > 16777216) {
  297                                 vm_phys_create_seg(phys_avail[i], 16777216,
  298                                     VM_FREELIST_ISADMA);
  299                                 vm_phys_create_seg(16777216, phys_avail[i + 1],
  300                                     VM_FREELIST_DEFAULT);
  301                         } else {
  302                                 vm_phys_create_seg(phys_avail[i],
  303                                     phys_avail[i + 1], VM_FREELIST_ISADMA);
  304                         }
  305                         if (VM_FREELIST_ISADMA >= vm_nfreelists)
  306                                 vm_nfreelists = VM_FREELIST_ISADMA + 1;
  307                 } else
  308 #endif
  309 #ifdef  VM_FREELIST_HIGHMEM
  310                 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
  311                         if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
  312                                 vm_phys_create_seg(phys_avail[i],
  313                                     VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
  314                                 vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
  315                                     phys_avail[i + 1], VM_FREELIST_HIGHMEM);
  316                         } else {
  317                                 vm_phys_create_seg(phys_avail[i],
  318                                     phys_avail[i + 1], VM_FREELIST_HIGHMEM);
  319                         }
  320                         if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
  321                                 vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
  322                 } else
  323 #endif
  324                 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
  325                     VM_FREELIST_DEFAULT);
  326         }
  327         for (flind = 0; flind < vm_nfreelists; flind++) {
  328                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  329                         fl = vm_phys_free_queues[flind][pind];
  330                         for (oind = 0; oind < VM_NFREEORDER; oind++)
  331                                 TAILQ_INIT(&fl[oind].pl);
  332                 }
  333         }
  334 #if VM_NDOMAIN > 1
  335         /*
  336          * Build a free list lookup list for each domain.  All of the
  337          * memory domain lists are inserted at the VM_FREELIST_DEFAULT
  338          * index in a round-robin order starting with the current
  339          * domain.
  340          */
  341         ndomains = vm_nfreelists - VM_NFREELIST + 1;
  342         for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
  343                 for (i = 0; i < ndomains; i++)
  344                         vm_phys_lookup_lists[i][flind] =
  345                             &vm_phys_free_queues[flind];
  346         for (i = 0; i < ndomains; i++)
  347                 for (j = 0; j < ndomains; j++) {
  348                         flind = (i + j) % ndomains;
  349                         if (flind == 0)
  350                                 flind = VM_FREELIST_DEFAULT;
  351                         else
  352                                 flind += VM_NFREELIST - 1;
  353                         vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
  354                             &vm_phys_free_queues[flind];
  355                 }
  356         for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
  357              flind++)
  358                 for (i = 0; i < ndomains; i++)
  359                         vm_phys_lookup_lists[i][flind + ndomains - 1] =
  360                             &vm_phys_free_queues[flind];
  361 #else
  362         for (flind = 0; flind < vm_nfreelists; flind++)
  363                 vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
  364 #endif
  365 }
  366 
  367 /*
  368  * Split a contiguous, power of two-sized set of physical pages.
  369  */
  370 static __inline void
  371 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
  372 {
  373         vm_page_t m_buddy;
  374 
  375         while (oind > order) {
  376                 oind--;
  377                 m_buddy = &m[1 << oind];
  378                 KASSERT(m_buddy->order == VM_NFREEORDER,
  379                     ("vm_phys_split_pages: page %p has unexpected order %d",
  380                     m_buddy, m_buddy->order));
  381                 m_buddy->order = oind;
  382                 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq);
  383                 fl[oind].lcnt++;
  384         }
  385 }
  386 
  387 /*
  388  * Initialize a physical page and add it to the free lists.
  389  */
  390 void
  391 vm_phys_add_page(vm_paddr_t pa)
  392 {
  393         vm_page_t m;
  394 
  395         cnt.v_page_count++;
  396         m = vm_phys_paddr_to_vm_page(pa);
  397         m->phys_addr = pa;
  398         m->queue = PQ_NONE;
  399         m->segind = vm_phys_paddr_to_segind(pa);
  400         m->flags = PG_FREE;
  401         KASSERT(m->order == VM_NFREEORDER,
  402             ("vm_phys_add_page: page %p has unexpected order %d",
  403             m, m->order));
  404         m->pool = VM_FREEPOOL_DEFAULT;
  405         pmap_page_init(m);
  406         mtx_lock(&vm_page_queue_free_mtx);
  407         cnt.v_free_count++;
  408         vm_phys_free_pages(m, 0);
  409         mtx_unlock(&vm_page_queue_free_mtx);
  410 }
  411 
  412 /*
  413  * Allocate a contiguous, power of two-sized set of physical pages
  414  * from the free lists.
  415  *
  416  * The free page queues must be locked.
  417  */
  418 vm_page_t
  419 vm_phys_alloc_pages(int pool, int order)
  420 {
  421         vm_page_t m;
  422         int flind;
  423 
  424         for (flind = 0; flind < vm_nfreelists; flind++) {
  425                 m = vm_phys_alloc_freelist_pages(flind, pool, order);
  426                 if (m != NULL)
  427                         return (m);
  428         }
  429         return (NULL);
  430 }
  431 
  432 /*
  433  * Find and dequeue a free page on the given free list, with the 
  434  * specified pool and order
  435  */
  436 vm_page_t
  437 vm_phys_alloc_freelist_pages(int flind, int pool, int order)
  438 {       
  439         struct vm_freelist *fl;
  440         struct vm_freelist *alt;
  441         int domain, oind, pind;
  442         vm_page_t m;
  443 
  444         KASSERT(flind < VM_NFREELIST,
  445             ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
  446         KASSERT(pool < VM_NFREEPOOL,
  447             ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
  448         KASSERT(order < VM_NFREEORDER,
  449             ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
  450 
  451 #if VM_NDOMAIN > 1
  452         domain = PCPU_GET(domain);
  453 #else
  454         domain = 0;
  455 #endif
  456         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  457         fl = (*vm_phys_lookup_lists[domain][flind])[pool];
  458         for (oind = order; oind < VM_NFREEORDER; oind++) {
  459                 m = TAILQ_FIRST(&fl[oind].pl);
  460                 if (m != NULL) {
  461                         TAILQ_REMOVE(&fl[oind].pl, m, pageq);
  462                         fl[oind].lcnt--;
  463                         m->order = VM_NFREEORDER;
  464                         vm_phys_split_pages(m, oind, fl, order);
  465                         return (m);
  466                 }
  467         }
  468 
  469         /*
  470          * The given pool was empty.  Find the largest
  471          * contiguous, power-of-two-sized set of pages in any
  472          * pool.  Transfer these pages to the given pool, and
  473          * use them to satisfy the allocation.
  474          */
  475         for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
  476                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  477                         alt = (*vm_phys_lookup_lists[domain][flind])[pind];
  478                         m = TAILQ_FIRST(&alt[oind].pl);
  479                         if (m != NULL) {
  480                                 TAILQ_REMOVE(&alt[oind].pl, m, pageq);
  481                                 alt[oind].lcnt--;
  482                                 m->order = VM_NFREEORDER;
  483                                 vm_phys_set_pool(pool, m, oind);
  484                                 vm_phys_split_pages(m, oind, fl, order);
  485                                 return (m);
  486                         }
  487                 }
  488         }
  489         return (NULL);
  490 }
  491 
  492 /*
  493  * Allocate physical memory from phys_avail[].
  494  */
  495 vm_paddr_t
  496 vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment)
  497 {
  498         vm_paddr_t pa;
  499         int i;
  500 
  501         size = round_page(size);
  502         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
  503                 if (phys_avail[i + 1] - phys_avail[i] < size)
  504                         continue;
  505                 pa = phys_avail[i];
  506                 phys_avail[i] += size;
  507                 return (pa);
  508         }
  509         panic("vm_phys_bootstrap_alloc");
  510 }
  511 
  512 /*
  513  * Find the vm_page corresponding to the given physical address.
  514  */
  515 vm_page_t
  516 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
  517 {
  518         struct vm_phys_seg *seg;
  519         int segind;
  520 
  521         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  522                 seg = &vm_phys_segs[segind];
  523                 if (pa >= seg->start && pa < seg->end)
  524                         return (&seg->first_page[atop(pa - seg->start)]);
  525         }
  526         return (NULL);
  527 }
  528 
  529 /*
  530  * Find the segment containing the given physical address.
  531  */
  532 static int
  533 vm_phys_paddr_to_segind(vm_paddr_t pa)
  534 {
  535         struct vm_phys_seg *seg;
  536         int segind;
  537 
  538         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  539                 seg = &vm_phys_segs[segind];
  540                 if (pa >= seg->start && pa < seg->end)
  541                         return (segind);
  542         }
  543         panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
  544             (uintmax_t)pa);
  545 }
  546 
  547 /*
  548  * Free a contiguous, power of two-sized set of physical pages.
  549  *
  550  * The free page queues must be locked.
  551  */
  552 void
  553 vm_phys_free_pages(vm_page_t m, int order)
  554 {
  555         struct vm_freelist *fl;
  556         struct vm_phys_seg *seg;
  557         vm_paddr_t pa, pa_buddy;
  558         vm_page_t m_buddy;
  559 
  560         KASSERT(m->order == VM_NFREEORDER,
  561             ("vm_phys_free_pages: page %p has unexpected order %d",
  562             m, m->order));
  563         KASSERT(m->pool < VM_NFREEPOOL,
  564             ("vm_phys_free_pages: page %p has unexpected pool %d",
  565             m, m->pool));
  566         KASSERT(order < VM_NFREEORDER,
  567             ("vm_phys_free_pages: order %d is out of range", order));
  568         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  569         pa = VM_PAGE_TO_PHYS(m);
  570         seg = &vm_phys_segs[m->segind];
  571         while (order < VM_NFREEORDER - 1) {
  572                 pa_buddy = pa ^ (1 << (PAGE_SHIFT + order));
  573                 if (pa_buddy < seg->start ||
  574                     pa_buddy >= seg->end)
  575                         break;
  576                 m_buddy = &seg->first_page[atop(pa_buddy - seg->start)];
  577                 if (m_buddy->order != order)
  578                         break;
  579                 fl = (*seg->free_queues)[m_buddy->pool];
  580                 TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq);
  581                 fl[m_buddy->order].lcnt--;
  582                 m_buddy->order = VM_NFREEORDER;
  583                 if (m_buddy->pool != m->pool)
  584                         vm_phys_set_pool(m->pool, m_buddy, order);
  585                 order++;
  586                 pa &= ~((1 << (PAGE_SHIFT + order)) - 1);
  587                 m = &seg->first_page[atop(pa - seg->start)];
  588         }
  589         m->order = order;
  590         fl = (*seg->free_queues)[m->pool];
  591         TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
  592         fl[order].lcnt++;
  593 }
  594 
  595 /*
  596  * Set the pool for a contiguous, power of two-sized set of physical pages. 
  597  */
  598 void
  599 vm_phys_set_pool(int pool, vm_page_t m, int order)
  600 {
  601         vm_page_t m_tmp;
  602 
  603         for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
  604                 m_tmp->pool = pool;
  605 }
  606 
  607 /*
  608  * Search for the given physical page "m" in the free lists.  If the search
  609  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
  610  * FALSE, indicating that "m" is not in the free lists.
  611  *
  612  * The free page queues must be locked.
  613  */
  614 boolean_t
  615 vm_phys_unfree_page(vm_page_t m)
  616 {
  617         struct vm_freelist *fl;
  618         struct vm_phys_seg *seg;
  619         vm_paddr_t pa, pa_half;
  620         vm_page_t m_set, m_tmp;
  621         int order;
  622 
  623         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  624 
  625         /*
  626          * First, find the contiguous, power of two-sized set of free
  627          * physical pages containing the given physical page "m" and
  628          * assign it to "m_set".
  629          */
  630         seg = &vm_phys_segs[m->segind];
  631         for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
  632             order < VM_NFREEORDER - 1; ) {
  633                 order++;
  634                 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
  635                 if (pa >= seg->start)
  636                         m_set = &seg->first_page[atop(pa - seg->start)];
  637                 else
  638                         return (FALSE);
  639         }
  640         if (m_set->order < order)
  641                 return (FALSE);
  642         if (m_set->order == VM_NFREEORDER)
  643                 return (FALSE);
  644         KASSERT(m_set->order < VM_NFREEORDER,
  645             ("vm_phys_unfree_page: page %p has unexpected order %d",
  646             m_set, m_set->order));
  647 
  648         /*
  649          * Next, remove "m_set" from the free lists.  Finally, extract
  650          * "m" from "m_set" using an iterative algorithm: While "m_set"
  651          * is larger than a page, shrink "m_set" by returning the half
  652          * of "m_set" that does not contain "m" to the free lists.
  653          */
  654         fl = (*seg->free_queues)[m_set->pool];
  655         order = m_set->order;
  656         TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
  657         fl[order].lcnt--;
  658         m_set->order = VM_NFREEORDER;
  659         while (order > 0) {
  660                 order--;
  661                 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
  662                 if (m->phys_addr < pa_half)
  663                         m_tmp = &seg->first_page[atop(pa_half - seg->start)];
  664                 else {
  665                         m_tmp = m_set;
  666                         m_set = &seg->first_page[atop(pa_half - seg->start)];
  667                 }
  668                 m_tmp->order = order;
  669                 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
  670                 fl[order].lcnt++;
  671         }
  672         KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
  673         return (TRUE);
  674 }
  675 
  676 /*
  677  * Try to zero one physical page.  Used by an idle priority thread.
  678  */
  679 boolean_t
  680 vm_phys_zero_pages_idle(void)
  681 {
  682         static struct vm_freelist *fl = vm_phys_free_queues[0][0];
  683         static int flind, oind, pind;
  684         vm_page_t m, m_tmp;
  685 
  686         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  687         for (;;) {
  688                 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
  689                         for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
  690                                 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
  691                                         vm_phys_unfree_page(m_tmp);
  692                                         cnt.v_free_count--;
  693                                         mtx_unlock(&vm_page_queue_free_mtx);
  694                                         pmap_zero_page_idle(m_tmp);
  695                                         m_tmp->flags |= PG_ZERO;
  696                                         mtx_lock(&vm_page_queue_free_mtx);
  697                                         cnt.v_free_count++;
  698                                         vm_phys_free_pages(m_tmp, 0);
  699                                         vm_page_zero_count++;
  700                                         cnt_prezero++;
  701                                         return (TRUE);
  702                                 }
  703                         }
  704                 }
  705                 oind++;
  706                 if (oind == VM_NFREEORDER) {
  707                         oind = 0;
  708                         pind++;
  709                         if (pind == VM_NFREEPOOL) {
  710                                 pind = 0;
  711                                 flind++;
  712                                 if (flind == vm_nfreelists)
  713                                         flind = 0;
  714                         }
  715                         fl = vm_phys_free_queues[flind][pind];
  716                 }
  717         }
  718 }
  719 
  720 /*
  721  * Allocate a contiguous set of physical pages of the given size
  722  * "npages" from the free lists.  All of the physical pages must be at
  723  * or above the given physical address "low" and below the given
  724  * physical address "high".  The given value "alignment" determines the
  725  * alignment of the first physical page in the set.  If the given value
  726  * "boundary" is non-zero, then the set of physical pages cannot cross
  727  * any physical address boundary that is a multiple of that value.  Both
  728  * "alignment" and "boundary" must be a power of two.
  729  */
  730 vm_page_t
  731 vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high,
  732     unsigned long alignment, unsigned long boundary)
  733 {
  734         struct vm_freelist *fl;
  735         struct vm_phys_seg *seg;
  736         struct vnode *vp;
  737         vm_paddr_t pa, pa_last, size;
  738         vm_page_t deferred_vdrop_list, m, m_ret;
  739         int domain, flind, i, oind, order, pind;
  740 
  741 #if VM_NDOMAIN > 1
  742         domain = PCPU_GET(domain);
  743 #else
  744         domain = 0;
  745 #endif
  746         size = npages << PAGE_SHIFT;
  747         KASSERT(size != 0,
  748             ("vm_phys_alloc_contig: size must not be 0"));
  749         KASSERT((alignment & (alignment - 1)) == 0,
  750             ("vm_phys_alloc_contig: alignment must be a power of 2"));
  751         KASSERT((boundary & (boundary - 1)) == 0,
  752             ("vm_phys_alloc_contig: boundary must be a power of 2"));
  753         deferred_vdrop_list = NULL;
  754         /* Compute the queue that is the best fit for npages. */
  755         for (order = 0; (1 << order) < npages; order++);
  756         mtx_lock(&vm_page_queue_free_mtx);
  757 #if VM_NRESERVLEVEL > 0
  758 retry:
  759 #endif
  760         for (flind = 0; flind < vm_nfreelists; flind++) {
  761                 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
  762                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  763                                 fl = (*vm_phys_lookup_lists[domain][flind])
  764                                     [pind];
  765                                 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
  766                                         /*
  767                                          * A free list may contain physical pages
  768                                          * from one or more segments.
  769                                          */
  770                                         seg = &vm_phys_segs[m_ret->segind];
  771                                         if (seg->start > high ||
  772                                             low >= seg->end)
  773                                                 continue;
  774 
  775                                         /*
  776                                          * Is the size of this allocation request
  777                                          * larger than the largest block size?
  778                                          */
  779                                         if (order >= VM_NFREEORDER) {
  780                                                 /*
  781                                                  * Determine if a sufficient number
  782                                                  * of subsequent blocks to satisfy
  783                                                  * the allocation request are free.
  784                                                  */
  785                                                 pa = VM_PAGE_TO_PHYS(m_ret);
  786                                                 pa_last = pa + size;
  787                                                 for (;;) {
  788                                                         pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
  789                                                         if (pa >= pa_last)
  790                                                                 break;
  791                                                         if (pa < seg->start ||
  792                                                             pa >= seg->end)
  793                                                                 break;
  794                                                         m = &seg->first_page[atop(pa - seg->start)];
  795                                                         if (m->order != VM_NFREEORDER - 1)
  796                                                                 break;
  797                                                 }
  798                                                 /* If not, continue to the next block. */
  799                                                 if (pa < pa_last)
  800                                                         continue;
  801                                         }
  802 
  803                                         /*
  804                                          * Determine if the blocks are within the given range,
  805                                          * satisfy the given alignment, and do not cross the
  806                                          * given boundary.
  807                                          */
  808                                         pa = VM_PAGE_TO_PHYS(m_ret);
  809                                         if (pa >= low &&
  810                                             pa + size <= high &&
  811                                             (pa & (alignment - 1)) == 0 &&
  812                                             ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
  813                                                 goto done;
  814                                 }
  815                         }
  816                 }
  817         }
  818 #if VM_NRESERVLEVEL > 0
  819         if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary))
  820                 goto retry;
  821 #endif
  822         mtx_unlock(&vm_page_queue_free_mtx);
  823         return (NULL);
  824 done:
  825         for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
  826                 fl = (*seg->free_queues)[m->pool];
  827                 TAILQ_REMOVE(&fl[m->order].pl, m, pageq);
  828                 fl[m->order].lcnt--;
  829                 m->order = VM_NFREEORDER;
  830         }
  831         if (m_ret->pool != VM_FREEPOOL_DEFAULT)
  832                 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
  833         fl = (*seg->free_queues)[m_ret->pool];
  834         vm_phys_split_pages(m_ret, oind, fl, order);
  835         for (i = 0; i < npages; i++) {
  836                 m = &m_ret[i];
  837                 vp = vm_page_alloc_init(m);
  838                 if (vp != NULL) {
  839                         /*
  840                          * Enqueue the vnode for deferred vdrop().
  841                          *
  842                          * Unmanaged pages don't use "pageq", so it
  843                          * can be safely abused to construct a short-
  844                          * lived queue of vnodes.
  845                          */
  846                         m->pageq.tqe_prev = (void *)vp;
  847                         m->pageq.tqe_next = deferred_vdrop_list;
  848                         deferred_vdrop_list = m;
  849                 }
  850         }
  851         for (; i < roundup2(npages, 1 << imin(oind, order)); i++) {
  852                 m = &m_ret[i];
  853                 KASSERT(m->order == VM_NFREEORDER,
  854                     ("vm_phys_alloc_contig: page %p has unexpected order %d",
  855                     m, m->order));
  856                 vm_phys_free_pages(m, 0);
  857         }
  858         mtx_unlock(&vm_page_queue_free_mtx);
  859         while (deferred_vdrop_list != NULL) {
  860                 vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
  861                 deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
  862         }
  863         return (m_ret);
  864 }
  865 
  866 #ifdef DDB
  867 /*
  868  * Show the number of physical pages in each of the free lists.
  869  */
  870 DB_SHOW_COMMAND(freepages, db_show_freepages)
  871 {
  872         struct vm_freelist *fl;
  873         int flind, oind, pind;
  874 
  875         for (flind = 0; flind < vm_nfreelists; flind++) {
  876                 db_printf("FREE LIST %d:\n"
  877                     "\n  ORDER (SIZE)  |  NUMBER"
  878                     "\n              ", flind);
  879                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
  880                         db_printf("  |  POOL %d", pind);
  881                 db_printf("\n--            ");
  882                 for (pind = 0; pind < VM_NFREEPOOL; pind++)
  883                         db_printf("-- --      ");
  884                 db_printf("--\n");
  885                 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
  886                         db_printf("  %2.2d (%6.6dK)", oind,
  887                             1 << (PAGE_SHIFT - 10 + oind));
  888                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
  889                                 fl = vm_phys_free_queues[flind][pind];
  890                                 db_printf("  |  %6.6d", fl[oind].lcnt);
  891                         }
  892                         db_printf("\n");
  893                 }
  894                 db_printf("\n");
  895         }
  896 }
  897 #endif

Cache object: f301595fcc3ad78263130ae6715e67db


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.