The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/xen/balloon/balloon.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /******************************************************************************
    2  * balloon.c
    3  *
    4  * Xen balloon driver - enables returning/claiming memory to/from Xen.
    5  *
    6  * Copyright (c) 2003, B Dragovic
    7  * Copyright (c) 2003-2004, M Williamson, K Fraser
    8  * Copyright (c) 2005 Dan M. Smith, IBM Corporation
    9  * 
   10  * This file may be distributed separately from the Linux kernel, or
   11  * incorporated into other software packages, subject to the following license:
   12  * 
   13  * Permission is hereby granted, free of charge, to any person obtaining a copy
   14  * of this source file (the "Software"), to deal in the Software without
   15  * restriction, including without limitation the rights to use, copy, modify,
   16  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   17  * and to permit persons to whom the Software is furnished to do so, subject to
   18  * the following conditions:
   19  * 
   20  * The above copyright notice and this permission notice shall be included in
   21  * all copies or substantial portions of the Software.
   22  * 
   23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   25  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   28  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   29  * IN THE SOFTWARE.
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 #include <sys/param.h>
   36 #include <sys/lock.h>
   37 #include <sys/kernel.h>
   38 #include <sys/kthread.h>
   39 #include <sys/malloc.h>
   40 #include <sys/mutex.h>
   41 #include <sys/sysctl.h>
   42 #include <sys/module.h>
   43 
   44 #include <vm/vm.h>
   45 #include <vm/vm_page.h>
   46 
   47 #include <xen/xen-os.h>
   48 #include <xen/hypervisor.h>
   49 #include <xen/features.h>
   50 #include <xen/xenstore/xenstorevar.h>
   51 
   52 static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
   53 
   54 /* Convert from KB (as fetched from xenstore) to number of PAGES */
   55 #define KB_TO_PAGE_SHIFT        (PAGE_SHIFT - 10)
   56 
   57 struct mtx balloon_mutex;
   58 
   59 /* We increase/decrease in batches which fit in a page */
   60 static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)];
   61 
   62 struct balloon_stats {
   63         /* We aim for 'current allocation' == 'target allocation'. */
   64         unsigned long current_pages;
   65         unsigned long target_pages;
   66         /* We may hit the hard limit in Xen. If we do then we remember it. */
   67         unsigned long hard_limit;
   68         /*
   69          * Drivers may alter the memory reservation independently, but they
   70          * must inform the balloon driver so we avoid hitting the hard limit.
   71          */
   72         unsigned long driver_pages;
   73         /* Number of pages in high- and low-memory balloons. */
   74         unsigned long balloon_low;
   75         unsigned long balloon_high;
   76 };
   77 
   78 static struct balloon_stats balloon_stats;
   79 #define bs balloon_stats
   80 
   81 SYSCTL_DECL(_dev_xen);
   82 static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon,
   83     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
   84     "Balloon");
   85 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD,
   86     &bs.current_pages, 0, "Current allocation");
   87 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD,
   88     &bs.target_pages, 0, "Target allocation");
   89 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD,
   90     &bs.driver_pages, 0, "Driver pages");
   91 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD,
   92     &bs.hard_limit, 0, "Xen hard limit");
   93 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD,
   94     &bs.balloon_low, 0, "Low-mem balloon");
   95 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
   96     &bs.balloon_high, 0, "High-mem balloon");
   97 
   98 /* List of ballooned pages, threaded through the mem_map array. */
   99 static TAILQ_HEAD(,vm_page) ballooned_pages;
  100 
  101 /* Main work function, always executed in process context. */
  102 static void balloon_process(void *unused);
  103 
  104 #define IPRINTK(fmt, args...) \
  105         printk(KERN_INFO "xen_mem: " fmt, ##args)
  106 #define WPRINTK(fmt, args...) \
  107         printk(KERN_WARNING "xen_mem: " fmt, ##args)
  108 
  109 static unsigned long 
  110 current_target(void)
  111 {
  112         unsigned long target = min(bs.target_pages, bs.hard_limit);
  113         if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
  114                 target = bs.current_pages + bs.balloon_low + bs.balloon_high;
  115         return (target);
  116 }
  117 
  118 static unsigned long
  119 minimum_target(void)
  120 {
  121         unsigned long min_pages, curr_pages = current_target();
  122 
  123 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
  124         /*
  125          * Simple continuous piecewiese linear function:
  126          *  max MiB -> min MiB  gradient
  127          *       0         0
  128          *      16        16
  129          *      32        24
  130          *     128        72    (1/2)
  131          *     512       168    (1/4)
  132          *    2048       360    (1/8)
  133          *    8192       552    (1/32)
  134          *   32768      1320
  135          *  131072      4392
  136          */
  137         if (realmem < MB2PAGES(128))
  138                 min_pages = MB2PAGES(8) + (realmem >> 1);
  139         else if (realmem < MB2PAGES(512))
  140                 min_pages = MB2PAGES(40) + (realmem >> 2);
  141         else if (realmem < MB2PAGES(2048))
  142                 min_pages = MB2PAGES(104) + (realmem >> 3);
  143         else
  144                 min_pages = MB2PAGES(296) + (realmem >> 5);
  145 #undef MB2PAGES
  146 
  147         /* Don't enforce growth */
  148         return (min(min_pages, curr_pages));
  149 }
  150 
  151 static int 
  152 increase_reservation(unsigned long nr_pages)
  153 {
  154         unsigned long  i;
  155         vm_page_t      page;
  156         long           rc;
  157         struct xen_memory_reservation reservation = {
  158                 .address_bits = 0,
  159                 .extent_order = 0,
  160                 .domid        = DOMID_SELF
  161         };
  162 
  163         mtx_assert(&balloon_mutex, MA_OWNED);
  164 
  165         if (nr_pages > nitems(frame_list))
  166                 nr_pages = nitems(frame_list);
  167 
  168         for (page = TAILQ_FIRST(&ballooned_pages), i = 0;
  169             i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) {
  170                 KASSERT(page != NULL, ("ballooned_pages list corrupt"));
  171                 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
  172         }
  173 
  174         set_xen_guest_handle(reservation.extent_start, frame_list);
  175         reservation.nr_extents   = nr_pages;
  176         rc = HYPERVISOR_memory_op(
  177                 XENMEM_populate_physmap, &reservation);
  178         if (rc < nr_pages) {
  179                 if (rc > 0) {
  180                         int ret __diagused;
  181 
  182                         /* We hit the Xen hard limit: reprobe. */
  183                         reservation.nr_extents = rc;
  184                         ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
  185                                         &reservation);
  186                         KASSERT(ret == rc, ("HYPERVISOR_memory_op failed"));
  187                 }
  188                 if (rc >= 0)
  189                         bs.hard_limit = (bs.current_pages + rc -
  190                                          bs.driver_pages);
  191                 goto out;
  192         }
  193 
  194         for (i = 0; i < nr_pages; i++) {
  195                 page = TAILQ_FIRST(&ballooned_pages);
  196                 KASSERT(page != NULL, ("Unable to get ballooned page"));
  197                 TAILQ_REMOVE(&ballooned_pages, page, plinks.q);
  198                 bs.balloon_low--;
  199 
  200                 KASSERT(xen_feature(XENFEAT_auto_translated_physmap),
  201                     ("auto translated physmap but mapping is valid"));
  202 
  203                 vm_page_free(page);
  204         }
  205 
  206         bs.current_pages += nr_pages;
  207 
  208  out:
  209         return (0);
  210 }
  211 
  212 static int
  213 decrease_reservation(unsigned long nr_pages)
  214 {
  215         unsigned long  i;
  216         vm_page_t      page;
  217         int            need_sleep = 0;
  218         int ret __diagused;
  219         struct xen_memory_reservation reservation = {
  220                 .address_bits = 0,
  221                 .extent_order = 0,
  222                 .domid        = DOMID_SELF
  223         };
  224 
  225         mtx_assert(&balloon_mutex, MA_OWNED);
  226 
  227         if (nr_pages > nitems(frame_list))
  228                 nr_pages = nitems(frame_list);
  229 
  230         for (i = 0; i < nr_pages; i++) {
  231                 /*
  232                  * Zero the page, or else we might be leaking important data to
  233                  * other domains on the same host. Xen doesn't scrub ballooned
  234                  * out memory pages, the guest is in charge of making sure that
  235                  * no information is leaked.
  236                  */
  237                 if ((page = vm_page_alloc_noobj(VM_ALLOC_ZERO)) == NULL) {
  238                         nr_pages = i;
  239                         need_sleep = 1;
  240                         break;
  241                 }
  242 
  243                 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
  244 
  245                 TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
  246                 bs.balloon_low++;
  247         }
  248 
  249         set_xen_guest_handle(reservation.extent_start, frame_list);
  250         reservation.nr_extents   = nr_pages;
  251         ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
  252         KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
  253 
  254         bs.current_pages -= nr_pages;
  255 
  256         return (need_sleep);
  257 }
  258 
  259 /*
  260  * We avoid multiple worker processes conflicting via the balloon mutex.
  261  * We may of course race updates of the target counts (which are protected
  262  * by the balloon lock), or with changes to the Xen hard limit, but we will
  263  * recover from these in time.
  264  */
  265 static void 
  266 balloon_process(void *unused)
  267 {
  268         int need_sleep = 0;
  269         long credit;
  270 
  271         mtx_lock(&balloon_mutex);
  272         for (;;) {
  273                 int sleep_time;
  274 
  275                 do {
  276                         credit = current_target() - bs.current_pages;
  277                         if (credit > 0)
  278                                 need_sleep = (increase_reservation(credit) != 0);
  279                         if (credit < 0)
  280                                 need_sleep = (decrease_reservation(-credit) != 0);
  281                         
  282                 } while ((credit != 0) && !need_sleep);
  283                 
  284                 /* Schedule more work if there is some still to be done. */
  285                 if (current_target() != bs.current_pages)
  286                         sleep_time = hz;
  287                 else
  288                         sleep_time = 0;
  289 
  290                 msleep(balloon_process, &balloon_mutex, 0, "balloon",
  291                        sleep_time);
  292         }
  293         mtx_unlock(&balloon_mutex);
  294 }
  295 
  296 /* Resets the Xen limit, sets new target, and kicks off processing. */
  297 static void 
  298 set_new_target(unsigned long target)
  299 {
  300         /* No need for lock. Not read-modify-write updates. */
  301         bs.hard_limit   = ~0UL;
  302         bs.target_pages = max(target, minimum_target());
  303         wakeup(balloon_process);
  304 }
  305 
  306 static struct xs_watch target_watch =
  307 {
  308         .node = "memory/target",
  309         .max_pending = 1,
  310 };
  311 
  312 /* React to a change in the target key */
  313 static void 
  314 watch_target(struct xs_watch *watch,
  315              const char **vec, unsigned int len)
  316 {
  317         unsigned long long new_target;
  318         int err;
  319 
  320         err = xs_scanf(XST_NIL, "memory", "target", NULL,
  321             "%llu", &new_target);
  322         if (err) {
  323                 /* This is ok (for domain0 at least) - so just return */
  324                 return;
  325         } 
  326         
  327         /*
  328          * The given memory/target value is in KiB, so it needs converting to
  329          * pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
  330          */
  331         set_new_target(new_target >> KB_TO_PAGE_SHIFT);
  332 }
  333 
  334 /*------------------ Private Device Attachment Functions  --------------------*/
  335 /**
  336  * \brief Identify instances of this device type in the system.
  337  *
  338  * \param driver  The driver performing this identify action.
  339  * \param parent  The NewBus parent device for any devices this method adds.
  340  */
  341 static void
  342 xenballoon_identify(driver_t *driver __unused, device_t parent)
  343 {
  344         /*
  345          * A single device instance for our driver is always present
  346          * in a system operating under Xen.
  347          */
  348         BUS_ADD_CHILD(parent, 0, driver->name, 0);
  349 }
  350 
  351 /**
  352  * \brief Probe for the existence of the Xen Balloon device
  353  *
  354  * \param dev  NewBus device_t for this Xen control instance.
  355  *
  356  * \return  Always returns 0 indicating success.
  357  */
  358 static int 
  359 xenballoon_probe(device_t dev)
  360 {
  361 
  362         device_set_desc(dev, "Xen Balloon Device");
  363         return (0);
  364 }
  365 
  366 /**
  367  * \brief Attach the Xen Balloon device.
  368  *
  369  * \param dev  NewBus device_t for this Xen control instance.
  370  *
  371  * \return  On success, 0. Otherwise an errno value indicating the
  372  *          type of failure.
  373  */
  374 static int
  375 xenballoon_attach(device_t dev)
  376 {
  377         int err;
  378 
  379         mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
  380 
  381         bs.current_pages = realmem;
  382         bs.target_pages  = bs.current_pages;
  383         bs.balloon_low   = 0;
  384         bs.balloon_high  = 0;
  385         bs.driver_pages  = 0UL;
  386         bs.hard_limit    = ~0UL;
  387 
  388         kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon");
  389     
  390         target_watch.callback = watch_target;
  391 
  392         err = xs_register_watch(&target_watch);
  393         if (err)
  394                 device_printf(dev,
  395                     "xenballon: failed to set balloon watcher\n");
  396 
  397         return (err);
  398 }
  399 
  400 /*-------------------- Private Device Attachment Data  -----------------------*/
  401 static device_method_t xenballoon_methods[] = {
  402         /* Device interface */
  403         DEVMETHOD(device_identify,      xenballoon_identify),
  404         DEVMETHOD(device_probe,         xenballoon_probe),
  405         DEVMETHOD(device_attach,        xenballoon_attach),
  406 
  407         DEVMETHOD_END
  408 };
  409 
  410 DEFINE_CLASS_0(xenballoon, xenballoon_driver, xenballoon_methods, 0);
  411 
  412 DRIVER_MODULE(xenballoon, xenstore, xenballoon_driver, NULL, NULL);

Cache object: d248912e3e271378a7c89c04d991a218


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.