The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/sched_m2.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: sched_m2.c,v 1.39 2020/05/23 21:24:41 ad Exp $ */
    2 
    3 /*
    4  * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
    5  * All rights reserved.
    6  * 
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * TODO:
   31  *  - Implementation of fair share queue;
   32  *  - Support for NUMA;
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __KERNEL_RCSID(0, "$NetBSD: sched_m2.c,v 1.39 2020/05/23 21:24:41 ad Exp $");
   37 
   38 #include <sys/param.h>
   39 
   40 #include <sys/cpu.h>
   41 #include <sys/callout.h>
   42 #include <sys/errno.h>
   43 #include <sys/kernel.h>
   44 #include <sys/kmem.h>
   45 #include <sys/lwp.h>
   46 #include <sys/mutex.h>
   47 #include <sys/pool.h>
   48 #include <sys/proc.h>
   49 #include <sys/pset.h>
   50 #include <sys/resource.h>
   51 #include <sys/resourcevar.h>
   52 #include <sys/sched.h>
   53 #include <sys/syscallargs.h>
   54 #include <sys/sysctl.h>
   55 #include <sys/types.h>
   56 
   57 /*
   58  * Priority related definitions.
   59  */
   60 #define PRI_TS_COUNT    (NPRI_USER)
   61 #define PRI_RT_COUNT    (PRI_COUNT - PRI_TS_COUNT)
   62 #define PRI_HTS_RANGE   (PRI_TS_COUNT / 10)
   63 
   64 #define PRI_HIGHEST_TS  (MAXPRI_USER)
   65 
   66 /*
   67  * Time-slices and priorities.
   68  */
   69 static u_int    min_ts;                 /* Minimal time-slice */
   70 static u_int    max_ts;                 /* Maximal time-slice */
   71 static u_int    ts_map[PRI_COUNT];      /* Map of time-slices */
   72 static pri_t    high_pri[PRI_COUNT];    /* Map for priority increase */
   73 u_int           sched_rrticks;          /* Real-time time-slice */
   74 
   75 static void     sched_precalcts(void);
   76 
   77 /*
   78  * Initialization and setup.
   79  */
   80 
   81 void
   82 sched_rqinit(void)
   83 {
   84         if (hz < 100) {
   85                 panic("sched_rqinit: value of HZ is too low\n");
   86         }
   87 
   88         /* Default timing ranges */
   89         min_ts = mstohz(20);                    /*  ~20 ms */
   90         max_ts = mstohz(150);                   /* ~150 ms */
   91         sched_rrticks = mstohz(100);                    /* ~100 ms */
   92         sched_precalcts();
   93 
   94 #ifdef notdef
   95         /* Need to set the name etc. This does not belong here */
   96         /* Attach the primary CPU here */
   97         sched_cpuattach(curcpu());
   98 #endif
   99 
  100         sched_lwp_fork(NULL, &lwp0);
  101         sched_newts(&lwp0);
  102 }
  103 
  104 /* Pre-calculate the time-slices for the priorities */
  105 static void
  106 sched_precalcts(void)
  107 {
  108         pri_t p;
  109 
  110         /* Time-sharing range */
  111         for (p = 0; p <= PRI_HIGHEST_TS; p++) {
  112                 ts_map[p] = max_ts -
  113                     (p * 100 / (PRI_TS_COUNT - 1) * (max_ts - min_ts) / 100);
  114                 high_pri[p] = (PRI_HIGHEST_TS - PRI_HTS_RANGE) +
  115                     ((p * PRI_HTS_RANGE) / (PRI_TS_COUNT - 1));
  116         }
  117 
  118         /* Real-time range */
  119         for (p = (PRI_HIGHEST_TS + 1); p < PRI_COUNT; p++) {
  120                 ts_map[p] = sched_rrticks;
  121                 high_pri[p] = p;
  122         }
  123 }
  124 
  125 /*
  126  * Hooks.
  127  */
  128 
  129 void
  130 sched_proc_fork(struct proc *parent, struct proc *child)
  131 {
  132         struct lwp *l;
  133 
  134         LIST_FOREACH(l, &child->p_lwps, l_sibling) {
  135                 lwp_lock(l);
  136                 sched_newts(l);
  137                 lwp_unlock(l);
  138         }
  139 }
  140 
  141 void
  142 sched_proc_exit(struct proc *child, struct proc *parent)
  143 {
  144 
  145 }
  146 
  147 void
  148 sched_lwp_fork(struct lwp *l1, struct lwp *l2)
  149 {
  150 
  151 }
  152 
  153 void
  154 sched_lwp_collect(struct lwp *l)
  155 {
  156 
  157 }
  158 
  159 void
  160 sched_setrunnable(struct lwp *l)
  161 {
  162 
  163 }
  164 
  165 void
  166 sched_schedclock(struct lwp *l)
  167 {
  168 
  169 }
  170 
  171 /*
  172  * Priorities and time-slice.
  173  */
  174 
  175 void
  176 sched_nice(struct proc *p, int prio)
  177 {
  178         struct lwp *l;
  179         int n;
  180 
  181         KASSERT(mutex_owned(p->p_lock));
  182 
  183         p->p_nice = prio;
  184         n = (prio - NZERO) >> 2;
  185         if (n == 0)
  186                 return;
  187 
  188         LIST_FOREACH(l, &p->p_lwps, l_sibling) {
  189                 lwp_lock(l);
  190                 if (l->l_class == SCHED_OTHER) {
  191                         pri_t pri = l->l_priority - n;
  192                         pri = (n < 0) ? uimin(pri, PRI_HIGHEST_TS) : imax(pri, 0);
  193                         lwp_changepri(l, pri);
  194                 }
  195                 lwp_unlock(l);
  196         }
  197 }
  198 
  199 /* Recalculate the time-slice */
  200 void
  201 sched_newts(struct lwp *l)
  202 {
  203 
  204         l->l_sched.timeslice = ts_map[lwp_eprio(l)];
  205 }
  206 
  207 void
  208 sched_slept(struct lwp *l)
  209 {
  210 
  211         /*
  212          * If thread is in time-sharing queue and batch flag is not marked,
  213          * increase the priority, and run with the lower time-quantum.
  214          */
  215         if (l->l_priority < PRI_HIGHEST_TS && (l->l_flag & LW_BATCH) == 0) {
  216                 struct proc *p = l->l_proc;
  217 
  218                 KASSERT(l->l_class == SCHED_OTHER);
  219                 if (__predict_false(p->p_nice < NZERO)) {
  220                         const int n = uimax((NZERO - p->p_nice) >> 2, 1);
  221                         l->l_priority = uimin(l->l_priority + n, PRI_HIGHEST_TS);
  222                 } else {
  223                         l->l_priority++;
  224                 }
  225         }
  226 }
  227 
  228 void
  229 sched_wakeup(struct lwp *l)
  230 {
  231 
  232         /* If thread was sleeping a second or more - set a high priority */
  233         if (l->l_slptime >= 1)
  234                 l->l_priority = high_pri[l->l_priority];
  235 }
  236 
  237 void
  238 sched_pstats_hook(struct lwp *l, int batch)
  239 {
  240         pri_t prio;
  241 
  242         /*
  243          * Estimate threads on time-sharing queue only, however,
  244          * exclude the highest priority for performance purposes.
  245          */
  246         KASSERT(lwp_locked(l, NULL));
  247         if (l->l_priority >= PRI_HIGHEST_TS)
  248                 return;
  249         KASSERT(l->l_class == SCHED_OTHER);
  250 
  251         /* If it is CPU-bound not a first time - decrease the priority */
  252         prio = l->l_priority;
  253         if (batch && prio != 0)
  254                 prio--;
  255 
  256         /* If thread was not ran a second or more - set a high priority */
  257         if (l->l_stat == LSRUN) {
  258                 if (l->l_rticks && (getticks() - l->l_rticks >= hz))
  259                         prio = high_pri[prio];
  260                 /* Re-enqueue the thread if priority has changed */
  261                 if (prio != l->l_priority)
  262                         lwp_changepri(l, prio);
  263         } else {
  264                 /* In other states, change the priority directly */
  265                 l->l_priority = prio;
  266         }
  267 }
  268 
  269 void
  270 sched_oncpu(lwp_t *l)
  271 {
  272         struct schedstate_percpu *spc = &l->l_cpu->ci_schedstate;
  273 
  274         /* Update the counters */
  275         KASSERT(l->l_sched.timeslice >= min_ts);
  276         KASSERT(l->l_sched.timeslice <= max_ts);
  277         spc->spc_ticks = l->l_sched.timeslice;
  278 }
  279 
  280 /*
  281  * Time-driven events.
  282  */
  283 
  284 /*
  285  * Called once per time-quantum, with the running LWP lock held (spc_lwplock).
  286  */
  287 void
  288 sched_tick(struct cpu_info *ci)
  289 {
  290         struct schedstate_percpu *spc = &ci->ci_schedstate;
  291         struct lwp *l = ci->ci_onproc;
  292         struct proc *p;
  293 
  294         if (__predict_false(CURCPU_IDLE_P()))
  295                 return;
  296 
  297         lwp_lock(l);
  298         KASSERT(l->l_mutex != spc->spc_mutex);
  299         switch (l->l_class) {
  300         case SCHED_FIFO:
  301                 /*
  302                  * Update the time-quantum, and continue running,
  303                  * if thread runs on FIFO real-time policy.
  304                  */
  305                 KASSERT(l->l_priority > PRI_HIGHEST_TS);
  306                 spc->spc_ticks = l->l_sched.timeslice;
  307                 lwp_unlock(l);
  308                 return;
  309         case SCHED_OTHER:
  310                 /*
  311                  * If thread is in time-sharing queue, decrease the priority,
  312                  * and run with a higher time-quantum.
  313                  */
  314                 KASSERT(l->l_priority <= PRI_HIGHEST_TS);
  315                 if (l->l_priority == 0)
  316                         break;
  317 
  318                 p = l->l_proc;
  319                 if (__predict_false(p->p_nice > NZERO)) {
  320                         const int n = uimax((p->p_nice - NZERO) >> 2, 1);
  321                         l->l_priority = imax(l->l_priority - n, 0);
  322                 } else
  323                         l->l_priority--;
  324                 break;
  325         }
  326 
  327         /*
  328          * If there are higher priority threads or threads in the same queue,
  329          * mark that thread should yield, otherwise, continue running.
  330          */
  331         if (lwp_eprio(l) <= spc->spc_maxpriority || l->l_target_cpu) {
  332                 spc->spc_flags |= SPCF_SHOULDYIELD;
  333                 spc_lock(ci);
  334                 sched_resched_cpu(ci, MAXPRI_KTHREAD, true);
  335                 /* spc now unlocked */
  336         } else
  337                 spc->spc_ticks = l->l_sched.timeslice; 
  338         lwp_unlock(l);
  339 }
  340 
  341 /*
  342  * Sysctl nodes and initialization.
  343  */
  344 
  345 static int
  346 sysctl_sched_rtts(SYSCTLFN_ARGS)
  347 {
  348         struct sysctlnode node;
  349         int rttsms = hztoms(sched_rrticks);
  350 
  351         node = *rnode;
  352         node.sysctl_data = &rttsms;
  353         return sysctl_lookup(SYSCTLFN_CALL(&node));
  354 }
  355 
  356 static int
  357 sysctl_sched_mints(SYSCTLFN_ARGS)
  358 {
  359         struct sysctlnode node;
  360         struct cpu_info *ci;
  361         int error, newsize;
  362         CPU_INFO_ITERATOR cii;
  363 
  364         node = *rnode;
  365         node.sysctl_data = &newsize;
  366 
  367         newsize = hztoms(min_ts);
  368         error = sysctl_lookup(SYSCTLFN_CALL(&node));
  369         if (error || newp == NULL)
  370                 return error;
  371 
  372         newsize = mstohz(newsize);
  373         if (newsize < 1 || newsize > hz || newsize >= max_ts)
  374                 return EINVAL;
  375 
  376         /* It is safe to do this in such order */
  377         for (CPU_INFO_FOREACH(cii, ci))
  378                 spc_lock(ci);
  379 
  380         min_ts = newsize;
  381         sched_precalcts();
  382 
  383         for (CPU_INFO_FOREACH(cii, ci))
  384                 spc_unlock(ci);
  385 
  386         return 0;
  387 }
  388 
  389 static int
  390 sysctl_sched_maxts(SYSCTLFN_ARGS)
  391 {
  392         struct sysctlnode node;
  393         struct cpu_info *ci;
  394         int error, newsize;
  395         CPU_INFO_ITERATOR cii;
  396 
  397         node = *rnode;
  398         node.sysctl_data = &newsize;
  399 
  400         newsize = hztoms(max_ts);
  401         error = sysctl_lookup(SYSCTLFN_CALL(&node));
  402         if (error || newp == NULL)
  403                 return error;
  404 
  405         newsize = mstohz(newsize);
  406         if (newsize < 10 || newsize > hz || newsize <= min_ts)
  407                 return EINVAL;
  408 
  409         /* It is safe to do this in such order */
  410         for (CPU_INFO_FOREACH(cii, ci))
  411                 spc_lock(ci);
  412 
  413         max_ts = newsize;
  414         sched_precalcts();
  415 
  416         for (CPU_INFO_FOREACH(cii, ci))
  417                 spc_unlock(ci);
  418 
  419         return 0;
  420 }
  421 
  422 SYSCTL_SETUP(sysctl_sched_m2_setup, "sysctl sched setup")
  423 {
  424         const struct sysctlnode *node = NULL;
  425 
  426         sysctl_createv(clog, 0, NULL, &node,
  427                 CTLFLAG_PERMANENT,
  428                 CTLTYPE_NODE, "sched",
  429                 SYSCTL_DESCR("Scheduler options"),
  430                 NULL, 0, NULL, 0,
  431                 CTL_KERN, CTL_CREATE, CTL_EOL);
  432 
  433         if (node == NULL)
  434                 return;
  435 
  436         sysctl_createv(NULL, 0, &node, NULL,
  437                 CTLFLAG_PERMANENT,
  438                 CTLTYPE_STRING, "name", NULL,
  439                 NULL, 0, __UNCONST("M2"), 0,
  440                 CTL_CREATE, CTL_EOL);
  441         sysctl_createv(NULL, 0, &node, NULL,
  442                 CTLFLAG_PERMANENT,
  443                 CTLTYPE_INT, "rtts",
  444                 SYSCTL_DESCR("Round-robin time quantum (in milliseconds)"),
  445                 sysctl_sched_rtts, 0, NULL, 0,
  446                 CTL_CREATE, CTL_EOL);
  447         sysctl_createv(NULL, 0, &node, NULL,
  448                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
  449                 CTLTYPE_INT, "maxts",
  450                 SYSCTL_DESCR("Maximal time quantum (in milliseconds)"),
  451                 sysctl_sched_maxts, 0, &max_ts, 0,
  452                 CTL_CREATE, CTL_EOL);
  453         sysctl_createv(NULL, 0, &node, NULL,
  454                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
  455                 CTLTYPE_INT, "mints",
  456                 SYSCTL_DESCR("Minimal time quantum (in milliseconds)"),
  457                 sysctl_sched_mints, 0, &min_ts, 0,
  458                 CTL_CREATE, CTL_EOL);
  459 }

Cache object: 423fabf4f86c30785e149ce936e0599f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.