The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/mm/hugetlb_cgroup.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *
    3  * Copyright IBM Corporation, 2012
    4  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
    5  *
    6  * This program is free software; you can redistribute it and/or modify it
    7  * under the terms of version 2.1 of the GNU Lesser General Public License
    8  * as published by the Free Software Foundation.
    9  *
   10  * This program is distributed in the hope that it would be useful, but
   11  * WITHOUT ANY WARRANTY; without even the implied warranty of
   12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
   13  *
   14  */
   15 
   16 #include <linux/cgroup.h>
   17 #include <linux/slab.h>
   18 #include <linux/hugetlb.h>
   19 #include <linux/hugetlb_cgroup.h>
   20 
   21 struct hugetlb_cgroup {
   22         struct cgroup_subsys_state css;
   23         /*
   24          * the counter to account for hugepages from hugetlb.
   25          */
   26         struct res_counter hugepage[HUGE_MAX_HSTATE];
   27 };
   28 
   29 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
   30 #define MEMFILE_IDX(val)        (((val) >> 16) & 0xffff)
   31 #define MEMFILE_ATTR(val)       ((val) & 0xffff)
   32 
   33 struct cgroup_subsys hugetlb_subsys __read_mostly;
   34 static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
   35 
   36 static inline
   37 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
   38 {
   39         return container_of(s, struct hugetlb_cgroup, css);
   40 }
   41 
   42 static inline
   43 struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup)
   44 {
   45         return hugetlb_cgroup_from_css(cgroup_subsys_state(cgroup,
   46                                                            hugetlb_subsys_id));
   47 }
   48 
   49 static inline
   50 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
   51 {
   52         return hugetlb_cgroup_from_css(task_subsys_state(task,
   53                                                          hugetlb_subsys_id));
   54 }
   55 
   56 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
   57 {
   58         return (h_cg == root_h_cgroup);
   59 }
   60 
   61 static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg)
   62 {
   63         if (!cg->parent)
   64                 return NULL;
   65         return hugetlb_cgroup_from_cgroup(cg->parent);
   66 }
   67 
   68 static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
   69 {
   70         int idx;
   71         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg);
   72 
   73         for (idx = 0; idx < hugetlb_max_hstate; idx++) {
   74                 if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
   75                         return true;
   76         }
   77         return false;
   78 }
   79 
   80 static struct cgroup_subsys_state *hugetlb_cgroup_css_alloc(struct cgroup *cgroup)
   81 {
   82         int idx;
   83         struct cgroup *parent_cgroup;
   84         struct hugetlb_cgroup *h_cgroup, *parent_h_cgroup;
   85 
   86         h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
   87         if (!h_cgroup)
   88                 return ERR_PTR(-ENOMEM);
   89 
   90         parent_cgroup = cgroup->parent;
   91         if (parent_cgroup) {
   92                 parent_h_cgroup = hugetlb_cgroup_from_cgroup(parent_cgroup);
   93                 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
   94                         res_counter_init(&h_cgroup->hugepage[idx],
   95                                          &parent_h_cgroup->hugepage[idx]);
   96         } else {
   97                 root_h_cgroup = h_cgroup;
   98                 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
   99                         res_counter_init(&h_cgroup->hugepage[idx], NULL);
  100         }
  101         return &h_cgroup->css;
  102 }
  103 
  104 static void hugetlb_cgroup_css_free(struct cgroup *cgroup)
  105 {
  106         struct hugetlb_cgroup *h_cgroup;
  107 
  108         h_cgroup = hugetlb_cgroup_from_cgroup(cgroup);
  109         kfree(h_cgroup);
  110 }
  111 
  112 
  113 /*
  114  * Should be called with hugetlb_lock held.
  115  * Since we are holding hugetlb_lock, pages cannot get moved from
  116  * active list or uncharged from the cgroup, So no need to get
  117  * page reference and test for page active here. This function
  118  * cannot fail.
  119  */
  120 static void hugetlb_cgroup_move_parent(int idx, struct cgroup *cgroup,
  121                                        struct page *page)
  122 {
  123         int csize;
  124         struct res_counter *counter;
  125         struct res_counter *fail_res;
  126         struct hugetlb_cgroup *page_hcg;
  127         struct hugetlb_cgroup *h_cg   = hugetlb_cgroup_from_cgroup(cgroup);
  128         struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(cgroup);
  129 
  130         page_hcg = hugetlb_cgroup_from_page(page);
  131         /*
  132          * We can have pages in active list without any cgroup
  133          * ie, hugepage with less than 3 pages. We can safely
  134          * ignore those pages.
  135          */
  136         if (!page_hcg || page_hcg != h_cg)
  137                 goto out;
  138 
  139         csize = PAGE_SIZE << compound_order(page);
  140         if (!parent) {
  141                 parent = root_h_cgroup;
  142                 /* root has no limit */
  143                 res_counter_charge_nofail(&parent->hugepage[idx],
  144                                           csize, &fail_res);
  145         }
  146         counter = &h_cg->hugepage[idx];
  147         res_counter_uncharge_until(counter, counter->parent, csize);
  148 
  149         set_hugetlb_cgroup(page, parent);
  150 out:
  151         return;
  152 }
  153 
  154 /*
  155  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
  156  * the parent cgroup.
  157  */
  158 static void hugetlb_cgroup_css_offline(struct cgroup *cgroup)
  159 {
  160         struct hstate *h;
  161         struct page *page;
  162         int idx = 0;
  163 
  164         do {
  165                 for_each_hstate(h) {
  166                         spin_lock(&hugetlb_lock);
  167                         list_for_each_entry(page, &h->hugepage_activelist, lru)
  168                                 hugetlb_cgroup_move_parent(idx, cgroup, page);
  169 
  170                         spin_unlock(&hugetlb_lock);
  171                         idx++;
  172                 }
  173                 cond_resched();
  174         } while (hugetlb_cgroup_have_usage(cgroup));
  175 }
  176 
  177 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
  178                                  struct hugetlb_cgroup **ptr)
  179 {
  180         int ret = 0;
  181         struct res_counter *fail_res;
  182         struct hugetlb_cgroup *h_cg = NULL;
  183         unsigned long csize = nr_pages * PAGE_SIZE;
  184 
  185         if (hugetlb_cgroup_disabled())
  186                 goto done;
  187         /*
  188          * We don't charge any cgroup if the compound page have less
  189          * than 3 pages.
  190          */
  191         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
  192                 goto done;
  193 again:
  194         rcu_read_lock();
  195         h_cg = hugetlb_cgroup_from_task(current);
  196         if (!css_tryget(&h_cg->css)) {
  197                 rcu_read_unlock();
  198                 goto again;
  199         }
  200         rcu_read_unlock();
  201 
  202         ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res);
  203         css_put(&h_cg->css);
  204 done:
  205         *ptr = h_cg;
  206         return ret;
  207 }
  208 
  209 /* Should be called with hugetlb_lock held */
  210 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
  211                                   struct hugetlb_cgroup *h_cg,
  212                                   struct page *page)
  213 {
  214         if (hugetlb_cgroup_disabled() || !h_cg)
  215                 return;
  216 
  217         set_hugetlb_cgroup(page, h_cg);
  218         return;
  219 }
  220 
  221 /*
  222  * Should be called with hugetlb_lock held
  223  */
  224 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
  225                                   struct page *page)
  226 {
  227         struct hugetlb_cgroup *h_cg;
  228         unsigned long csize = nr_pages * PAGE_SIZE;
  229 
  230         if (hugetlb_cgroup_disabled())
  231                 return;
  232         VM_BUG_ON(!spin_is_locked(&hugetlb_lock));
  233         h_cg = hugetlb_cgroup_from_page(page);
  234         if (unlikely(!h_cg))
  235                 return;
  236         set_hugetlb_cgroup(page, NULL);
  237         res_counter_uncharge(&h_cg->hugepage[idx], csize);
  238         return;
  239 }
  240 
  241 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
  242                                     struct hugetlb_cgroup *h_cg)
  243 {
  244         unsigned long csize = nr_pages * PAGE_SIZE;
  245 
  246         if (hugetlb_cgroup_disabled() || !h_cg)
  247                 return;
  248 
  249         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
  250                 return;
  251 
  252         res_counter_uncharge(&h_cg->hugepage[idx], csize);
  253         return;
  254 }
  255 
  256 static ssize_t hugetlb_cgroup_read(struct cgroup *cgroup, struct cftype *cft,
  257                                    struct file *file, char __user *buf,
  258                                    size_t nbytes, loff_t *ppos)
  259 {
  260         u64 val;
  261         char str[64];
  262         int idx, name, len;
  263         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
  264 
  265         idx = MEMFILE_IDX(cft->private);
  266         name = MEMFILE_ATTR(cft->private);
  267 
  268         val = res_counter_read_u64(&h_cg->hugepage[idx], name);
  269         len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val);
  270         return simple_read_from_buffer(buf, nbytes, ppos, str, len);
  271 }
  272 
  273 static int hugetlb_cgroup_write(struct cgroup *cgroup, struct cftype *cft,
  274                                 const char *buffer)
  275 {
  276         int idx, name, ret;
  277         unsigned long long val;
  278         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
  279 
  280         idx = MEMFILE_IDX(cft->private);
  281         name = MEMFILE_ATTR(cft->private);
  282 
  283         switch (name) {
  284         case RES_LIMIT:
  285                 if (hugetlb_cgroup_is_root(h_cg)) {
  286                         /* Can't set limit on root */
  287                         ret = -EINVAL;
  288                         break;
  289                 }
  290                 /* This function does all necessary parse...reuse it */
  291                 ret = res_counter_memparse_write_strategy(buffer, &val);
  292                 if (ret)
  293                         break;
  294                 ret = res_counter_set_limit(&h_cg->hugepage[idx], val);
  295                 break;
  296         default:
  297                 ret = -EINVAL;
  298                 break;
  299         }
  300         return ret;
  301 }
  302 
  303 static int hugetlb_cgroup_reset(struct cgroup *cgroup, unsigned int event)
  304 {
  305         int idx, name, ret = 0;
  306         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
  307 
  308         idx = MEMFILE_IDX(event);
  309         name = MEMFILE_ATTR(event);
  310 
  311         switch (name) {
  312         case RES_MAX_USAGE:
  313                 res_counter_reset_max(&h_cg->hugepage[idx]);
  314                 break;
  315         case RES_FAILCNT:
  316                 res_counter_reset_failcnt(&h_cg->hugepage[idx]);
  317                 break;
  318         default:
  319                 ret = -EINVAL;
  320                 break;
  321         }
  322         return ret;
  323 }
  324 
  325 static char *mem_fmt(char *buf, int size, unsigned long hsize)
  326 {
  327         if (hsize >= (1UL << 30))
  328                 snprintf(buf, size, "%luGB", hsize >> 30);
  329         else if (hsize >= (1UL << 20))
  330                 snprintf(buf, size, "%luMB", hsize >> 20);
  331         else
  332                 snprintf(buf, size, "%luKB", hsize >> 10);
  333         return buf;
  334 }
  335 
  336 static void __init __hugetlb_cgroup_file_init(int idx)
  337 {
  338         char buf[32];
  339         struct cftype *cft;
  340         struct hstate *h = &hstates[idx];
  341 
  342         /* format the size */
  343         mem_fmt(buf, 32, huge_page_size(h));
  344 
  345         /* Add the limit file */
  346         cft = &h->cgroup_files[0];
  347         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
  348         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
  349         cft->read = hugetlb_cgroup_read;
  350         cft->write_string = hugetlb_cgroup_write;
  351 
  352         /* Add the usage file */
  353         cft = &h->cgroup_files[1];
  354         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
  355         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
  356         cft->read = hugetlb_cgroup_read;
  357 
  358         /* Add the MAX usage file */
  359         cft = &h->cgroup_files[2];
  360         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
  361         cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
  362         cft->trigger = hugetlb_cgroup_reset;
  363         cft->read = hugetlb_cgroup_read;
  364 
  365         /* Add the failcntfile */
  366         cft = &h->cgroup_files[3];
  367         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
  368         cft->private  = MEMFILE_PRIVATE(idx, RES_FAILCNT);
  369         cft->trigger  = hugetlb_cgroup_reset;
  370         cft->read = hugetlb_cgroup_read;
  371 
  372         /* NULL terminate the last cft */
  373         cft = &h->cgroup_files[4];
  374         memset(cft, 0, sizeof(*cft));
  375 
  376         WARN_ON(cgroup_add_cftypes(&hugetlb_subsys, h->cgroup_files));
  377 
  378         return;
  379 }
  380 
  381 void __init hugetlb_cgroup_file_init(void)
  382 {
  383         struct hstate *h;
  384 
  385         for_each_hstate(h) {
  386                 /*
  387                  * Add cgroup control files only if the huge page consists
  388                  * of more than two normal pages. This is because we use
  389                  * page[2].lru.next for storing cgroup details.
  390                  */
  391                 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
  392                         __hugetlb_cgroup_file_init(hstate_index(h));
  393         }
  394 }
  395 
  396 /*
  397  * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
  398  * when we migrate hugepages
  399  */
  400 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
  401 {
  402         struct hugetlb_cgroup *h_cg;
  403         struct hstate *h = page_hstate(oldhpage);
  404 
  405         if (hugetlb_cgroup_disabled())
  406                 return;
  407 
  408         VM_BUG_ON(!PageHuge(oldhpage));
  409         spin_lock(&hugetlb_lock);
  410         h_cg = hugetlb_cgroup_from_page(oldhpage);
  411         set_hugetlb_cgroup(oldhpage, NULL);
  412 
  413         /* move the h_cg details to new cgroup */
  414         set_hugetlb_cgroup(newhpage, h_cg);
  415         list_move(&newhpage->lru, &h->hugepage_activelist);
  416         spin_unlock(&hugetlb_lock);
  417         return;
  418 }
  419 
  420 struct cgroup_subsys hugetlb_subsys = {
  421         .name = "hugetlb",
  422         .css_alloc      = hugetlb_cgroup_css_alloc,
  423         .css_offline    = hugetlb_cgroup_css_offline,
  424         .css_free       = hugetlb_cgroup_css_free,
  425         .subsys_id      = hugetlb_subsys_id,
  426 };

Cache object: 90cfca3508f799744aaa90bbfa8cfda9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.