The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/mm/oom_kill.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/mm/oom_kill.c
    3  * 
    4  *  Copyright (C)  1998,2000  Rik van Riel
    5  *      Thanks go out to Claus Fischer for some serious inspiration and
    6  *      for goading me into coding this file...
    7  *
    8  *  The routines in this file are used to kill a process when
    9  *  we're seriously out of memory. This gets called from kswapd()
   10  *  in linux/mm/vmscan.c when we really run out of memory.
   11  *
   12  *  Since we won't call these routines often (on a well-configured
   13  *  machine) this file will double as a 'coding guide' and a signpost
   14  *  for newbie kernel hackers. It features several pointers to major
   15  *  kernel subsystems and hints as to where to find out what things do.
   16  */
   17 
   18 #include <linux/mm.h>
   19 #include <linux/sched.h>
   20 #include <linux/swap.h>
   21 #include <linux/swapctl.h>
   22 #include <linux/timex.h>
   23 
   24 /* #define DEBUG */
   25 
   26 /**
   27  * int_sqrt - oom_kill.c internal function, rough approximation to sqrt
   28  * @x: integer of which to calculate the sqrt
   29  * 
   30  * A very rough approximation to the sqrt() function.
   31  */
   32 static unsigned int int_sqrt(unsigned int x)
   33 {
   34         unsigned int out = x;
   35         while (x & ~(unsigned int)1) x >>=2, out >>=1;
   36         if (x) out -= out >> 2;
   37         return (out ? out : 1);
   38 }       
   39 
   40 /**
   41  * oom_badness - calculate a numeric value for how bad this task has been
   42  * @p: task struct of which task we should calculate
   43  *
   44  * The formula used is relatively simple and documented inline in the
   45  * function. The main rationale is that we want to select a good task
   46  * to kill when we run out of memory.
   47  *
   48  * Good in this context means that:
   49  * 1) we lose the minimum amount of work done
   50  * 2) we recover a large amount of memory
   51  * 3) we don't kill anything innocent of eating tons of memory
   52  * 4) we want to kill the minimum amount of processes (one)
   53  * 5) we try to kill the process the user expects us to kill, this
   54  *    algorithm has been meticulously tuned to meet the priniciple
   55  *    of least surprise ... (be careful when you change it)
   56  */
   57 
   58 static int badness(struct task_struct *p)
   59 {
   60         int points, cpu_time, run_time;
   61 
   62         if (!p->mm)
   63                 return 0;
   64 
   65         if (p->flags & PF_MEMDIE)
   66                 return 0;
   67 
   68         /*
   69          * The memory size of the process is the basis for the badness.
   70          */
   71         points = p->mm->total_vm;
   72 
   73         /*
   74          * CPU time is in seconds and run time is in minutes. There is no
   75          * particular reason for this other than that it turned out to work
   76          * very well in practice. This is not safe against jiffie wraps
   77          * but we don't care _that_ much...
   78          */
   79         cpu_time = (p->times.tms_utime + p->times.tms_stime) >> (SHIFT_HZ + 3);
   80         run_time = (jiffies - p->start_time) >> (SHIFT_HZ + 10);
   81 
   82         points /= int_sqrt(cpu_time);
   83         points /= int_sqrt(int_sqrt(run_time));
   84 
   85         /*
   86          * Niced processes are most likely less important, so double
   87          * their badness points.
   88          */
   89         if (p->nice > 0)
   90                 points *= 2;
   91 
   92         /*
   93          * Superuser processes are usually more important, so we make it
   94          * less likely that we kill those.
   95          */
   96         if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) ||
   97                                 p->uid == 0 || p->euid == 0)
   98                 points /= 4;
   99 
  100         /*
  101          * We don't want to kill a process with direct hardware access.
  102          * Not only could that mess up the hardware, but usually users
  103          * tend to only have this flag set on applications they think
  104          * of as important.
  105          */
  106         if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO))
  107                 points /= 4;
  108 #ifdef DEBUG
  109         printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n",
  110         p->pid, p->comm, points);
  111 #endif
  112         return points;
  113 }
  114 
  115 /*
  116  * Simple selection loop. We chose the process with the highest
  117  * number of 'points'. We expect the caller will lock the tasklist.
  118  *
  119  * (not docbooked, we don't want this one cluttering up the manual)
  120  */
  121 static struct task_struct * select_bad_process(void)
  122 {
  123         int maxpoints = 0;
  124         struct task_struct *p = NULL;
  125         struct task_struct *chosen = NULL;
  126 
  127         for_each_task(p) {
  128                 if (p->pid) {
  129                         int points = badness(p);
  130                         if (points > maxpoints) {
  131                                 chosen = p;
  132                                 maxpoints = points;
  133                         }
  134                 }
  135         }
  136         return chosen;
  137 }
  138 
  139 /**
  140  * We must be careful though to never send SIGKILL a process with
  141  * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
  142  * we select a process with CAP_SYS_RAW_IO set).
  143  */
  144 void oom_kill_task(struct task_struct *p)
  145 {
  146         printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", p->pid, p->comm);
  147 
  148         /*
  149          * We give our sacrificial lamb high priority and access to
  150          * all the memory it needs. That way it should be able to
  151          * exit() and clear out its resources quickly...
  152          */
  153         p->counter = 5 * HZ;
  154         p->flags |= PF_MEMALLOC | PF_MEMDIE;
  155 
  156         /* This process has hardware access, be more careful. */
  157         if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {
  158                 force_sig(SIGTERM, p);
  159         } else {
  160                 force_sig(SIGKILL, p);
  161         }
  162 }
  163 
  164 /**
  165  * oom_kill - kill the "best" process when we run out of memory
  166  *
  167  * If we run out of memory, we have the choice between either
  168  * killing a random task (bad), letting the system crash (worse)
  169  * OR try to be smart about which process to kill. Note that we
  170  * don't have to be perfect here, we just have to be good.
  171  */
  172 static void oom_kill(void)
  173 {
  174         struct task_struct *p, *q;
  175 
  176         read_lock(&tasklist_lock);
  177         p = select_bad_process();
  178 
  179         /* Found nothing?!?! Either we hang forever, or we panic. */
  180         if (p == NULL)
  181                 panic("Out of memory and no killable processes...\n");
  182 
  183         /* kill all processes that share the ->mm (i.e. all threads) */
  184         for_each_task(q) {
  185                 if (q->mm == p->mm)
  186                         oom_kill_task(q);
  187         }
  188         read_unlock(&tasklist_lock);
  189 
  190         /*
  191          * Make kswapd go out of the way, so "p" has a good chance of
  192          * killing itself before someone else gets the chance to ask
  193          * for more memory.
  194          */
  195         yield();
  196         return;
  197 }
  198 
  199 /**
  200  * out_of_memory - is the system out of memory?
  201  */
  202 void out_of_memory(void)
  203 {
  204         static unsigned long first, last, count, lastkill;
  205         unsigned long now, since;
  206 
  207         /*
  208          * Enough swap space left?  Not OOM.
  209          */
  210         if (nr_swap_pages > 0)
  211                 return;
  212 
  213         now = jiffies;
  214         since = now - last;
  215         last = now;
  216 
  217         /*
  218          * If it's been a long time since last failure,
  219          * we're not oom.
  220          */
  221         last = now;
  222         if (since > 5*HZ)
  223                 goto reset;
  224 
  225         /*
  226          * If we haven't tried for at least one second,
  227          * we're not really oom.
  228          */
  229         since = now - first;
  230         if (since < HZ)
  231                 return;
  232 
  233         /*
  234          * If we have gotten only a few failures,
  235          * we're not really oom. 
  236          */
  237         if (++count < 10)
  238                 return;
  239 
  240         /*
  241          * If we just killed a process, wait a while
  242          * to give that task a chance to exit. This
  243          * avoids killing multiple processes needlessly.
  244          */
  245         since = now - lastkill;
  246         if (since < HZ*5)
  247                 return;
  248 
  249         /*
  250          * Ok, really out of memory. Kill something.
  251          */
  252         lastkill = now;
  253         oom_kill();
  254 
  255 reset:
  256         first = now;
  257         count = 0;
  258 }

Cache object: 88e9948792d72311fb547b41f1ba7525


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.