The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/sched_m2.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: sched_m2.c,v 1.27 2008/10/18 03:44:04 rmind Exp $      */
    2 
    3 /*
    4  * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
    5  * All rights reserved.
    6  * 
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * TODO:
   31  *  - Implementation of fair share queue;
   32  *  - Support for NUMA;
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __KERNEL_RCSID(0, "$NetBSD: sched_m2.c,v 1.27 2008/10/18 03:44:04 rmind Exp $");
   37 
   38 #include <sys/param.h>
   39 
   40 #include <sys/bitops.h>
   41 #include <sys/cpu.h>
   42 #include <sys/callout.h>
   43 #include <sys/errno.h>
   44 #include <sys/kernel.h>
   45 #include <sys/kmem.h>
   46 #include <sys/lwp.h>
   47 #include <sys/mutex.h>
   48 #include <sys/pool.h>
   49 #include <sys/proc.h>
   50 #include <sys/pset.h>
   51 #include <sys/resource.h>
   52 #include <sys/resourcevar.h>
   53 #include <sys/sched.h>
   54 #include <sys/syscallargs.h>
   55 #include <sys/sysctl.h>
   56 #include <sys/types.h>
   57 
   58 /*
   59  * Priority related defintions.
   60  */
   61 #define PRI_TS_COUNT    (NPRI_USER)
   62 #define PRI_RT_COUNT    (PRI_COUNT - PRI_TS_COUNT)
   63 #define PRI_HTS_RANGE   (PRI_TS_COUNT / 10)
   64 
   65 #define PRI_HIGHEST_TS  (MAXPRI_USER)
   66 
   67 /*
   68  * Time-slices and priorities.
   69  */
   70 static u_int    min_ts;                 /* Minimal time-slice */
   71 static u_int    max_ts;                 /* Maximal time-slice */
   72 static u_int    rt_ts;                  /* Real-time time-slice */
   73 static u_int    ts_map[PRI_COUNT];      /* Map of time-slices */
   74 static pri_t    high_pri[PRI_COUNT];    /* Map for priority increase */
   75 
   76 static void     sched_precalcts(void);
   77 
   78 /*
   79  * Initialization and setup.
   80  */
   81 
   82 void
   83 sched_rqinit(void)
   84 {
   85         struct cpu_info *ci = curcpu();
   86 
   87         if (hz < 100) {
   88                 panic("sched_rqinit: value of HZ is too low\n");
   89         }
   90 
   91         /* Default timing ranges */
   92         min_ts = mstohz(20);                    /*  ~20 ms */
   93         max_ts = mstohz(150);                   /* ~150 ms */
   94         rt_ts = mstohz(100);                    /* ~100 ms */
   95         sched_precalcts();
   96 
   97         /* Attach the primary CPU here */
   98         sched_cpuattach(ci);
   99 
  100         sched_lwp_fork(NULL, &lwp0);
  101         sched_newts(&lwp0);
  102 }
  103 
  104 /* Pre-calculate the time-slices for the priorities */
  105 static void
  106 sched_precalcts(void)
  107 {
  108         pri_t p;
  109 
  110         /* Time-sharing range */
  111         for (p = 0; p <= PRI_HIGHEST_TS; p++) {
  112                 ts_map[p] = max_ts -
  113                     (p * 100 / (PRI_TS_COUNT - 1) * (max_ts - min_ts) / 100);
  114                 high_pri[p] = (PRI_HIGHEST_TS - PRI_HTS_RANGE) +
  115                     ((p * PRI_HTS_RANGE) / (PRI_TS_COUNT - 1));
  116         }
  117 
  118         /* Real-time range */
  119         for (p = (PRI_HIGHEST_TS + 1); p < PRI_COUNT; p++) {
  120                 ts_map[p] = rt_ts;
  121                 high_pri[p] = p;
  122         }
  123 }
  124 
  125 /*
  126  * Hooks.
  127  */
  128 
  129 void
  130 sched_proc_fork(struct proc *parent, struct proc *child)
  131 {
  132         struct lwp *l;
  133 
  134         LIST_FOREACH(l, &child->p_lwps, l_sibling) {
  135                 lwp_lock(l);
  136                 sched_newts(l);
  137                 lwp_unlock(l);
  138         }
  139 }
  140 
  141 void
  142 sched_proc_exit(struct proc *child, struct proc *parent)
  143 {
  144 
  145 }
  146 
  147 void
  148 sched_lwp_fork(struct lwp *l1, struct lwp *l2)
  149 {
  150 
  151 }
  152 
  153 void
  154 sched_lwp_collect(struct lwp *l)
  155 {
  156 
  157 }
  158 
  159 void
  160 sched_setrunnable(struct lwp *l)
  161 {
  162 
  163 }
  164 
  165 void
  166 sched_schedclock(struct lwp *l)
  167 {
  168 
  169 }
  170 
  171 /*
  172  * Priorities and time-slice.
  173  */
  174 
  175 void
  176 sched_nice(struct proc *p, int prio)
  177 {
  178         struct lwp *l;
  179         int n;
  180 
  181         KASSERT(mutex_owned(p->p_lock));
  182 
  183         p->p_nice = prio;
  184         n = (prio - NZERO) >> 2;
  185         if (n == 0)
  186                 return;
  187 
  188         LIST_FOREACH(l, &p->p_lwps, l_sibling) {
  189                 lwp_lock(l);
  190                 if (l->l_class == SCHED_OTHER) {
  191                         pri_t pri = l->l_priority - n;
  192                         pri = (n < 0) ? min(pri, PRI_HIGHEST_TS) : imax(pri, 0);
  193                         lwp_changepri(l, pri);
  194                 }
  195                 lwp_unlock(l);
  196         }
  197 }
  198 
  199 /* Recalculate the time-slice */
  200 void
  201 sched_newts(struct lwp *l)
  202 {
  203 
  204         l->l_sched.timeslice = ts_map[lwp_eprio(l)];
  205 }
  206 
  207 void
  208 sched_slept(struct lwp *l)
  209 {
  210 
  211         /*
  212          * If thread is in time-sharing queue and batch flag is not marked,
  213          * increase the the priority, and run with the lower time-quantum.
  214          */
  215         if (l->l_priority < PRI_HIGHEST_TS && (l->l_flag & LW_BATCH) == 0) {
  216                 struct proc *p = l->l_proc;
  217 
  218                 KASSERT(l->l_class == SCHED_OTHER);
  219                 if (__predict_false(p->p_nice < NZERO)) {
  220                         const int n = max((NZERO - p->p_nice) >> 2, 1);
  221                         l->l_priority = min(l->l_priority + n, PRI_HIGHEST_TS);
  222                 } else {
  223                         l->l_priority++;
  224                 }
  225         }
  226 }
  227 
  228 void
  229 sched_wakeup(struct lwp *l)
  230 {
  231 
  232         /* If thread was sleeping a second or more - set a high priority */
  233         if (l->l_slptime >= 1)
  234                 l->l_priority = high_pri[l->l_priority];
  235 }
  236 
  237 void
  238 sched_pstats_hook(struct lwp *l, int batch)
  239 {
  240         pri_t prio;
  241 
  242         /*
  243          * Estimate threads on time-sharing queue only, however,
  244          * exclude the highest priority for performance purposes.
  245          */
  246         KASSERT(lwp_locked(l, NULL));
  247         if (l->l_priority >= PRI_HIGHEST_TS)
  248                 return;
  249         KASSERT(l->l_class == SCHED_OTHER);
  250 
  251         /* If it is CPU-bound not a first time - decrease the priority */
  252         prio = l->l_priority;
  253         if (batch && prio != 0)
  254                 prio--;
  255 
  256         /* If thread was not ran a second or more - set a high priority */
  257         if (l->l_stat == LSRUN) {
  258                 if (l->l_rticks && (hardclock_ticks - l->l_rticks >= hz))
  259                         prio = high_pri[prio];
  260                 /* Re-enqueue the thread if priority has changed */
  261                 if (prio != l->l_priority)
  262                         lwp_changepri(l, prio);
  263         } else {
  264                 /* In other states, change the priority directly */
  265                 l->l_priority = prio;
  266         }
  267 }
  268 
  269 void
  270 sched_oncpu(lwp_t *l)
  271 {
  272         struct schedstate_percpu *spc = &l->l_cpu->ci_schedstate;
  273 
  274         /* Update the counters */
  275         KASSERT(l->l_sched.timeslice >= min_ts);
  276         KASSERT(l->l_sched.timeslice <= max_ts);
  277         spc->spc_ticks = l->l_sched.timeslice;
  278 }
  279 
  280 /*
  281  * Time-driven events.
  282  */
  283 
  284 /*
  285  * Called once per time-quantum.  This routine is CPU-local and runs at
  286  * IPL_SCHED, thus the locking is not needed.
  287  */
  288 void
  289 sched_tick(struct cpu_info *ci)
  290 {
  291         struct schedstate_percpu *spc = &ci->ci_schedstate;
  292         struct lwp *l = curlwp;
  293         struct proc *p;
  294 
  295         if (__predict_false(CURCPU_IDLE_P()))
  296                 return;
  297 
  298         switch (l->l_class) {
  299         case SCHED_FIFO:
  300                 /*
  301                  * Update the time-quantum, and continue running,
  302                  * if thread runs on FIFO real-time policy.
  303                  */
  304                 KASSERT(l->l_priority > PRI_HIGHEST_TS);
  305                 spc->spc_ticks = l->l_sched.timeslice;
  306                 return;
  307         case SCHED_OTHER:
  308                 /*
  309                  * If thread is in time-sharing queue, decrease the priority,
  310                  * and run with a higher time-quantum.
  311                  */
  312                 KASSERT(l->l_priority <= PRI_HIGHEST_TS);
  313                 if (l->l_priority == 0)
  314                         break;
  315 
  316                 p = l->l_proc;
  317                 if (__predict_false(p->p_nice > NZERO)) {
  318                         const int n = max((p->p_nice - NZERO) >> 2, 1);
  319                         l->l_priority = imax(l->l_priority - n, 0);
  320                 } else
  321                         l->l_priority--;
  322                 break;
  323         }
  324 
  325         /*
  326          * If there are higher priority threads or threads in the same queue,
  327          * mark that thread should yield, otherwise, continue running.
  328          */
  329         if (lwp_eprio(l) <= spc->spc_maxpriority || l->l_target_cpu) {
  330                 spc->spc_flags |= SPCF_SHOULDYIELD;
  331                 cpu_need_resched(ci, 0);
  332         } else
  333                 spc->spc_ticks = l->l_sched.timeslice;
  334 }
  335 
  336 /*
  337  * Sysctl nodes and initialization.
  338  */
  339 
  340 static int
  341 sysctl_sched_rtts(SYSCTLFN_ARGS)
  342 {
  343         struct sysctlnode node;
  344         int rttsms = hztoms(rt_ts);
  345 
  346         node = *rnode;
  347         node.sysctl_data = &rttsms;
  348         return sysctl_lookup(SYSCTLFN_CALL(&node));
  349 }
  350 
  351 static int
  352 sysctl_sched_mints(SYSCTLFN_ARGS)
  353 {
  354         struct sysctlnode node;
  355         struct cpu_info *ci;
  356         int error, newsize;
  357         CPU_INFO_ITERATOR cii;
  358 
  359         node = *rnode;
  360         node.sysctl_data = &newsize;
  361 
  362         newsize = hztoms(min_ts);
  363         error = sysctl_lookup(SYSCTLFN_CALL(&node));
  364         if (error || newp == NULL)
  365                 return error;
  366 
  367         newsize = mstohz(newsize);
  368         if (newsize < 1 || newsize > hz || newsize >= max_ts)
  369                 return EINVAL;
  370 
  371         /* It is safe to do this in such order */
  372         for (CPU_INFO_FOREACH(cii, ci))
  373                 spc_lock(ci);
  374 
  375         min_ts = newsize;
  376         sched_precalcts();
  377 
  378         for (CPU_INFO_FOREACH(cii, ci))
  379                 spc_unlock(ci);
  380 
  381         return 0;
  382 }
  383 
  384 static int
  385 sysctl_sched_maxts(SYSCTLFN_ARGS)
  386 {
  387         struct sysctlnode node;
  388         struct cpu_info *ci;
  389         int error, newsize;
  390         CPU_INFO_ITERATOR cii;
  391 
  392         node = *rnode;
  393         node.sysctl_data = &newsize;
  394 
  395         newsize = hztoms(max_ts);
  396         error = sysctl_lookup(SYSCTLFN_CALL(&node));
  397         if (error || newp == NULL)
  398                 return error;
  399 
  400         newsize = mstohz(newsize);
  401         if (newsize < 10 || newsize > hz || newsize <= min_ts)
  402                 return EINVAL;
  403 
  404         /* It is safe to do this in such order */
  405         for (CPU_INFO_FOREACH(cii, ci))
  406                 spc_lock(ci);
  407 
  408         max_ts = newsize;
  409         sched_precalcts();
  410 
  411         for (CPU_INFO_FOREACH(cii, ci))
  412                 spc_unlock(ci);
  413 
  414         return 0;
  415 }
  416 
  417 SYSCTL_SETUP(sysctl_sched_m2_setup, "sysctl sched setup")
  418 {
  419         const struct sysctlnode *node = NULL;
  420 
  421         sysctl_createv(clog, 0, NULL, NULL,
  422                 CTLFLAG_PERMANENT,
  423                 CTLTYPE_NODE, "kern", NULL,
  424                 NULL, 0, NULL, 0,
  425                 CTL_KERN, CTL_EOL);
  426         sysctl_createv(clog, 0, NULL, &node,
  427                 CTLFLAG_PERMANENT,
  428                 CTLTYPE_NODE, "sched",
  429                 SYSCTL_DESCR("Scheduler options"),
  430                 NULL, 0, NULL, 0,
  431                 CTL_KERN, CTL_CREATE, CTL_EOL);
  432 
  433         if (node == NULL)
  434                 return;
  435 
  436         sysctl_createv(NULL, 0, &node, NULL,
  437                 CTLFLAG_PERMANENT,
  438                 CTLTYPE_STRING, "name", NULL,
  439                 NULL, 0, __UNCONST("M2"), 0,
  440                 CTL_CREATE, CTL_EOL);
  441         sysctl_createv(NULL, 0, &node, NULL,
  442                 CTLFLAG_PERMANENT,
  443                 CTLTYPE_INT, "rtts",
  444                 SYSCTL_DESCR("Round-robin time quantum (in miliseconds)"),
  445                 sysctl_sched_rtts, 0, NULL, 0,
  446                 CTL_CREATE, CTL_EOL);
  447         sysctl_createv(NULL, 0, &node, NULL,
  448                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
  449                 CTLTYPE_INT, "maxts",
  450                 SYSCTL_DESCR("Maximal time quantum (in miliseconds)"),
  451                 sysctl_sched_maxts, 0, &max_ts, 0,
  452                 CTL_CREATE, CTL_EOL);
  453         sysctl_createv(NULL, 0, &node, NULL,
  454                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
  455                 CTLTYPE_INT, "mints",
  456                 SYSCTL_DESCR("Minimal time quantum (in miliseconds)"),
  457                 sysctl_sched_mints, 0, &min_ts, 0,
  458                 CTL_CREATE, CTL_EOL);
  459 }

Cache object: 3fbaa9bec004095debee1ce569d5dc53


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.