The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_runq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_runq.c,v 1.22.4.4 2010/01/16 17:39:01 bouyer Exp $        */
    2 
    3 /*
    4  * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
    5  * All rights reserved.
    6  * 
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.22.4.4 2010/01/16 17:39:01 bouyer Exp $");
   31 
   32 #include <sys/param.h>
   33 #include <sys/kernel.h>
   34 #include <sys/bitops.h>
   35 #include <sys/cpu.h>
   36 #include <sys/idle.h>
   37 #include <sys/intr.h>
   38 #include <sys/kmem.h>
   39 #include <sys/lwp.h>
   40 #include <sys/mutex.h>
   41 #include <sys/proc.h>
   42 #include <sys/sched.h>
   43 #include <sys/syscallargs.h>
   44 #include <sys/sysctl.h>
   45 #include <sys/systm.h>
   46 #include <sys/types.h>
   47 #include <sys/evcnt.h>
   48 
   49 /*
   50  * Priority related defintions.
   51  */
   52 #define PRI_TS_COUNT    (NPRI_USER)
   53 #define PRI_RT_COUNT    (PRI_COUNT - PRI_TS_COUNT)
   54 #define PRI_HTS_RANGE   (PRI_TS_COUNT / 10)
   55 
   56 #define PRI_HIGHEST_TS  (MAXPRI_USER)
   57 
   58 /*
   59  * Bits per map.
   60  */
   61 #define BITMAP_BITS     (32)
   62 #define BITMAP_SHIFT    (5)
   63 #define BITMAP_MSB      (0x80000000U)
   64 #define BITMAP_MASK     (BITMAP_BITS - 1)
   65 
   66 /*
   67  * Structures, runqueue.
   68  */
   69 
   70 const int       schedppq = 1;
   71 
   72 typedef struct {
   73         TAILQ_HEAD(, lwp) q_head;
   74 } queue_t;
   75 
   76 typedef struct {
   77         /* Bitmap */
   78         uint32_t        r_bitmap[PRI_COUNT >> BITMAP_SHIFT];
   79         /* Counters */
   80         u_int           r_count;        /* Count of the threads */
   81         u_int           r_avgcount;     /* Average count of threads */
   82         u_int           r_mcount;       /* Count of migratable threads */
   83         /* Runqueues */
   84         queue_t         r_rt_queue[PRI_RT_COUNT];
   85         queue_t         r_ts_queue[PRI_TS_COUNT];
   86         /* Event counters */
   87         struct evcnt    r_ev_pull;
   88         struct evcnt    r_ev_push;
   89         struct evcnt    r_ev_stay;
   90         struct evcnt    r_ev_localize;
   91 } runqueue_t;
   92 
   93 static void *   sched_getrq(runqueue_t *, const pri_t);
   94 #ifdef MULTIPROCESSOR
   95 static lwp_t *  sched_catchlwp(struct cpu_info *);
   96 static void     sched_balance(void *);
   97 #endif
   98 
   99 /*
  100  * Preemption control.
  101  */
  102 int             sched_upreempt_pri = PRI_KERNEL;
  103 #if defined(__HAVE_PREEMPTION)
  104 int             sched_kpreempt_pri = PRI_USER_RT;
  105 #else
  106 int             sched_kpreempt_pri = 1000;
  107 #endif
  108 
  109 /*
  110  * Migration and balancing.
  111  */
  112 static u_int    cacheht_time;           /* Cache hotness time */
  113 static u_int    min_catch;              /* Minimal LWP count for catching */
  114 static u_int    balance_period;         /* Balance period */
  115 static struct cpu_info *worker_ci;      /* Victim CPU */
  116 #ifdef MULTIPROCESSOR
  117 static struct callout balance_ch;       /* Callout of balancer */
  118 #endif
  119 
  120 void
  121 runq_init(void)
  122 {
  123 
  124         /* Balancing */
  125         worker_ci = curcpu();
  126         cacheht_time = mstohz(3);               /*   ~3 ms */
  127         balance_period = mstohz(300);           /* ~300 ms */
  128 
  129         /* Minimal count of LWPs for catching */
  130         min_catch = 1;
  131 
  132         /* Initialize balancing callout and run it */
  133 #ifdef MULTIPROCESSOR
  134         callout_init(&balance_ch, CALLOUT_MPSAFE);
  135         callout_setfunc(&balance_ch, sched_balance, NULL);
  136         callout_schedule(&balance_ch, balance_period);
  137 #endif
  138 }
  139 
  140 void
  141 sched_cpuattach(struct cpu_info *ci)
  142 {
  143         runqueue_t *ci_rq;
  144         void *rq_ptr;
  145         u_int i, size;
  146         char *cpuname;
  147 
  148         if (ci->ci_schedstate.spc_lwplock == NULL) {
  149                 ci->ci_schedstate.spc_lwplock =
  150                     mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
  151         }
  152         if (ci == lwp0.l_cpu) {
  153                 /* Initialize the scheduler structure of the primary LWP */
  154                 lwp0.l_mutex = ci->ci_schedstate.spc_lwplock;
  155         }
  156         if (ci->ci_schedstate.spc_mutex != NULL) {
  157                 /* Already initialized. */
  158                 return;
  159         }
  160 
  161         /* Allocate the run queue */
  162         size = roundup2(sizeof(runqueue_t), coherency_unit) + coherency_unit;
  163         rq_ptr = kmem_zalloc(size, KM_SLEEP);
  164         if (rq_ptr == NULL) {
  165                 panic("sched_cpuattach: could not allocate the runqueue");
  166         }
  167         ci_rq = (void *)(roundup2((uintptr_t)(rq_ptr), coherency_unit));
  168 
  169         /* Initialize run queues */
  170         ci->ci_schedstate.spc_mutex =
  171             mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
  172         for (i = 0; i < PRI_RT_COUNT; i++)
  173                 TAILQ_INIT(&ci_rq->r_rt_queue[i].q_head);
  174         for (i = 0; i < PRI_TS_COUNT; i++)
  175                 TAILQ_INIT(&ci_rq->r_ts_queue[i].q_head);
  176 
  177         ci->ci_schedstate.spc_sched_info = ci_rq;
  178 
  179         cpuname = kmem_alloc(8, KM_SLEEP);
  180         snprintf(cpuname, 8, "cpu%d", cpu_index(ci));
  181 
  182         evcnt_attach_dynamic(&ci_rq->r_ev_pull, EVCNT_TYPE_MISC, NULL,
  183            cpuname, "runqueue pull");
  184         evcnt_attach_dynamic(&ci_rq->r_ev_push, EVCNT_TYPE_MISC, NULL,
  185            cpuname, "runqueue push");
  186         evcnt_attach_dynamic(&ci_rq->r_ev_stay, EVCNT_TYPE_MISC, NULL,
  187            cpuname, "runqueue stay");
  188         evcnt_attach_dynamic(&ci_rq->r_ev_localize, EVCNT_TYPE_MISC, NULL,
  189            cpuname, "runqueue localize");
  190 }
  191 
  192 /*
  193  * Control of the runqueue.
  194  */
  195 
  196 static inline void *
  197 sched_getrq(runqueue_t *ci_rq, const pri_t prio)
  198 {
  199 
  200         KASSERT(prio < PRI_COUNT);
  201         return (prio <= PRI_HIGHEST_TS) ?
  202             &ci_rq->r_ts_queue[prio].q_head :
  203             &ci_rq->r_rt_queue[prio - PRI_HIGHEST_TS - 1].q_head;
  204 }
  205 
  206 void
  207 sched_enqueue(struct lwp *l, bool swtch)
  208 {
  209         runqueue_t *ci_rq;
  210         struct schedstate_percpu *spc;
  211         TAILQ_HEAD(, lwp) *q_head;
  212         const pri_t eprio = lwp_eprio(l);
  213         struct cpu_info *ci;
  214         int type;
  215 
  216         ci = l->l_cpu;
  217         spc = &ci->ci_schedstate;
  218         ci_rq = spc->spc_sched_info;
  219         KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
  220 
  221         /* Update the last run time on switch */
  222         if (__predict_true(swtch == true))
  223                 l->l_rticksum += (hardclock_ticks - l->l_rticks);
  224         else if (l->l_rticks == 0)
  225                 l->l_rticks = hardclock_ticks;
  226 
  227         /* Enqueue the thread */
  228         q_head = sched_getrq(ci_rq, eprio);
  229         if (TAILQ_EMPTY(q_head)) {
  230                 u_int i;
  231                 uint32_t q;
  232 
  233                 /* Mark bit */
  234                 i = eprio >> BITMAP_SHIFT;
  235                 q = BITMAP_MSB >> (eprio & BITMAP_MASK);
  236                 KASSERT((ci_rq->r_bitmap[i] & q) == 0);
  237                 ci_rq->r_bitmap[i] |= q;
  238         }
  239         TAILQ_INSERT_TAIL(q_head, l, l_runq);
  240         ci_rq->r_count++;
  241         if ((l->l_pflag & LP_BOUND) == 0)
  242                 ci_rq->r_mcount++;
  243 
  244         /*
  245          * Update the value of highest priority in the runqueue,
  246          * if priority of this thread is higher.
  247          */
  248         if (eprio > spc->spc_maxpriority)
  249                 spc->spc_maxpriority = eprio;
  250 
  251         sched_newts(l);
  252 
  253         /*
  254          * Wake the chosen CPU or cause a preemption if the newly
  255          * enqueued thread has higher priority.  Don't cause a 
  256          * preemption if the thread is yielding (swtch).
  257          */
  258         if (!swtch && eprio > spc->spc_curpriority) {
  259                 if (eprio >= sched_kpreempt_pri)
  260                         type = RESCHED_KPREEMPT;
  261                 else if (eprio >= sched_upreempt_pri)
  262                         type = RESCHED_IMMED;
  263                 else
  264                         type = RESCHED_LAZY;
  265                 cpu_need_resched(ci, type);
  266         }
  267 }
  268 
  269 void
  270 sched_dequeue(struct lwp *l)
  271 {
  272         runqueue_t *ci_rq;
  273         TAILQ_HEAD(, lwp) *q_head;
  274         struct schedstate_percpu *spc;
  275         const pri_t eprio = lwp_eprio(l);
  276 
  277         spc = & l->l_cpu->ci_schedstate;
  278         ci_rq = spc->spc_sched_info;
  279         KASSERT(lwp_locked(l, spc->spc_mutex));
  280 
  281         KASSERT(eprio <= spc->spc_maxpriority); 
  282         KASSERT(ci_rq->r_bitmap[eprio >> BITMAP_SHIFT] != 0);
  283         KASSERT(ci_rq->r_count > 0);
  284 
  285         if (spc->spc_migrating == l)
  286                 spc->spc_migrating = NULL;
  287 
  288         ci_rq->r_count--;
  289         if ((l->l_pflag & LP_BOUND) == 0)
  290                 ci_rq->r_mcount--;
  291 
  292         q_head = sched_getrq(ci_rq, eprio);
  293         TAILQ_REMOVE(q_head, l, l_runq);
  294         if (TAILQ_EMPTY(q_head)) {
  295                 u_int i;
  296                 uint32_t q;
  297 
  298                 /* Unmark bit */
  299                 i = eprio >> BITMAP_SHIFT;
  300                 q = BITMAP_MSB >> (eprio & BITMAP_MASK);
  301                 KASSERT((ci_rq->r_bitmap[i] & q) != 0);
  302                 ci_rq->r_bitmap[i] &= ~q;
  303 
  304                 /*
  305                  * Update the value of highest priority in the runqueue, in a
  306                  * case it was a last thread in the queue of highest priority.
  307                  */
  308                 if (eprio != spc->spc_maxpriority)
  309                         return;
  310 
  311                 do {
  312                         if (ci_rq->r_bitmap[i] != 0) {
  313                                 q = ffs(ci_rq->r_bitmap[i]);
  314                                 spc->spc_maxpriority =
  315                                     (i << BITMAP_SHIFT) + (BITMAP_BITS - q);
  316                                 return;
  317                         }
  318                 } while (i--);
  319 
  320                 /* If not found - set the lowest value */
  321                 spc->spc_maxpriority = 0;
  322         }
  323 }
  324 
  325 /*
  326  * Migration and balancing.
  327  */
  328 
  329 #ifdef MULTIPROCESSOR
  330 
  331 /* Estimate if LWP is cache-hot */
  332 static inline bool
  333 lwp_cache_hot(const struct lwp *l)
  334 {
  335 
  336         if (__predict_false(l->l_slptime || l->l_rticks == 0))
  337                 return false;
  338 
  339         return (hardclock_ticks - l->l_rticks <= cacheht_time);
  340 }
  341 
  342 /* Check if LWP can migrate to the chosen CPU */
  343 static inline bool
  344 sched_migratable(const struct lwp *l, struct cpu_info *ci)
  345 {
  346         const struct schedstate_percpu *spc = &ci->ci_schedstate;
  347         KASSERT(lwp_locked(__UNCONST(l), NULL));
  348 
  349         /* CPU is offline */
  350         if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
  351                 return false;
  352 
  353         /* Affinity bind */
  354         if (__predict_false(l->l_flag & LW_AFFINITY))
  355                 return kcpuset_isset(cpu_index(ci), l->l_affinity);
  356 
  357         /* Processor-set */
  358         return (spc->spc_psid == l->l_psid);
  359 }
  360 
  361 /*
  362  * Estimate the migration of LWP to the other CPU.
  363  * Take and return the CPU, if migration is needed.
  364  */
  365 struct cpu_info *
  366 sched_takecpu(struct lwp *l)
  367 {
  368         struct cpu_info *ci, *tci, *first, *next;
  369         struct schedstate_percpu *spc;
  370         runqueue_t *ci_rq, *ici_rq;
  371         pri_t eprio, lpri, pri;
  372 
  373         KASSERT(lwp_locked(l, NULL));
  374 
  375         /* If thread is strictly bound, do not estimate other CPUs */
  376         ci = l->l_cpu;
  377         if (l->l_pflag & LP_BOUND)
  378                 return ci;
  379 
  380         spc = &ci->ci_schedstate;
  381         ci_rq = spc->spc_sched_info;
  382 
  383         /* Make sure that thread is in appropriate processor-set */
  384         if (__predict_true(spc->spc_psid == l->l_psid)) {
  385                 /* If CPU of this thread is idling - run there */
  386                 if (ci_rq->r_count == 0) {
  387                         ci_rq->r_ev_stay.ev_count++;
  388                         return ci;
  389                 }
  390                 /* Stay if thread is cache-hot */
  391                 eprio = lwp_eprio(l);
  392                 if (__predict_true(l->l_stat != LSIDL) &&
  393                     lwp_cache_hot(l) && eprio >= spc->spc_curpriority) {
  394                         ci_rq->r_ev_stay.ev_count++;
  395                         return ci;
  396                 }
  397         } else {
  398                 eprio = lwp_eprio(l);
  399         }
  400 
  401         /* Run on current CPU if priority of thread is higher */
  402         ci = curcpu();
  403         spc = &ci->ci_schedstate;
  404         if (eprio > spc->spc_curpriority && sched_migratable(l, ci)) {
  405                 ci_rq = spc->spc_sched_info;
  406                 ci_rq->r_ev_localize.ev_count++;
  407                 return ci;
  408         }
  409 
  410         /*
  411          * Look for the CPU with the lowest priority thread.  In case of
  412          * equal priority, choose the CPU with the fewest of threads.
  413          */
  414         first = l->l_cpu;
  415         ci = first;
  416         tci = first;
  417         lpri = PRI_COUNT;
  418         do {
  419                 next = CIRCLEQ_LOOP_NEXT(&cpu_queue, ci, ci_data.cpu_qchain);
  420                 spc = &ci->ci_schedstate;
  421                 ici_rq = spc->spc_sched_info;
  422                 pri = max(spc->spc_curpriority, spc->spc_maxpriority);
  423                 if (pri > lpri)
  424                         continue;
  425 
  426                 if (pri == lpri && ci_rq->r_count < ici_rq->r_count)
  427                         continue;
  428 
  429                 if (!sched_migratable(l, ci))
  430                         continue;
  431 
  432                 lpri = pri;
  433                 tci = ci;
  434                 ci_rq = ici_rq;
  435         } while (ci = next, ci != first);
  436 
  437         ci_rq = tci->ci_schedstate.spc_sched_info;
  438         ci_rq->r_ev_push.ev_count++;
  439 
  440         return tci;
  441 }
  442 
  443 /*
  444  * Tries to catch an LWP from the runqueue of other CPU.
  445  */
  446 static struct lwp *
  447 sched_catchlwp(struct cpu_info *ci)
  448 {
  449         struct cpu_info *curci = curcpu();
  450         struct schedstate_percpu *spc, *curspc;
  451         TAILQ_HEAD(, lwp) *q_head;
  452         runqueue_t *ci_rq;
  453         struct lwp *l;
  454 
  455         curspc = &curci->ci_schedstate;
  456         spc = &ci->ci_schedstate;
  457         KASSERT(curspc->spc_psid == spc->spc_psid);
  458 
  459         ci_rq = spc->spc_sched_info;
  460         if (ci_rq->r_mcount < min_catch) {
  461                 spc_unlock(ci);
  462                 return NULL;
  463         }
  464 
  465         /* Take the highest priority thread */
  466         q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
  467         l = TAILQ_FIRST(q_head);
  468 
  469         for (;;) {
  470                 /* Check the first and next result from the queue */
  471                 if (l == NULL)
  472                         break;
  473                 KASSERT(l->l_stat == LSRUN);
  474                 KASSERT(l->l_flag & LW_INMEM);
  475 
  476                 /* Look for threads, whose are allowed to migrate */
  477                 if ((l->l_pflag & LP_BOUND) || lwp_cache_hot(l) ||
  478                     !sched_migratable(l, curci)) {
  479                         l = TAILQ_NEXT(l, l_runq);
  480                         continue;
  481                 }
  482 
  483                 /* Grab the thread, and move to the local run queue */
  484                 sched_dequeue(l);
  485 
  486                 /*
  487                  * If LWP is still context switching, we may need to
  488                  * spin-wait before changing its CPU.
  489                  */
  490                 if (__predict_false(l->l_ctxswtch != 0)) {
  491                         u_int count;
  492                         count = SPINLOCK_BACKOFF_MIN;
  493                         while (l->l_ctxswtch)
  494                                 SPINLOCK_BACKOFF(count);
  495                 }
  496                 l->l_cpu = curci;
  497                 ci_rq->r_ev_pull.ev_count++;
  498                 lwp_unlock_to(l, curspc->spc_mutex);
  499                 sched_enqueue(l, false);
  500                 return l;
  501         }
  502         spc_unlock(ci);
  503 
  504         return l;
  505 }
  506 
  507 /*
  508  * Periodical calculations for balancing.
  509  */
  510 static void
  511 sched_balance(void *nocallout)
  512 {
  513         struct cpu_info *ci, *hci;
  514         runqueue_t *ci_rq;
  515         CPU_INFO_ITERATOR cii;
  516         u_int highest;
  517 
  518         hci = curcpu();
  519         highest = 0;
  520 
  521         /* Make lockless countings */
  522         for (CPU_INFO_FOREACH(cii, ci)) {
  523                 ci_rq = ci->ci_schedstate.spc_sched_info;
  524 
  525                 /* Average count of the threads */
  526                 ci_rq->r_avgcount = (ci_rq->r_avgcount + ci_rq->r_mcount) >> 1;
  527 
  528                 /* Look for CPU with the highest average */
  529                 if (ci_rq->r_avgcount > highest) {
  530                         hci = ci;
  531                         highest = ci_rq->r_avgcount;
  532                 }
  533         }
  534 
  535         /* Update the worker */
  536         worker_ci = hci;
  537 
  538         if (nocallout == NULL)
  539                 callout_schedule(&balance_ch, balance_period);
  540 }
  541 
  542 /*
  543  * Called from each CPU's idle loop.
  544  */
  545 void
  546 sched_idle(void)
  547 {
  548         struct cpu_info *ci = curcpu(), *tci = NULL;
  549         struct schedstate_percpu *spc, *tspc;
  550         runqueue_t *ci_rq;
  551         bool dlock = false;
  552 
  553         /* Check if there is a migrating LWP */
  554         spc = &ci->ci_schedstate;
  555         if (spc->spc_migrating == NULL)
  556                 goto no_migration;
  557 
  558         spc_lock(ci);
  559         for (;;) {
  560                 struct lwp *l;
  561 
  562                 l = spc->spc_migrating;
  563                 if (l == NULL)
  564                         break;
  565 
  566                 /*
  567                  * If second attempt, and target CPU has changed,
  568                  * drop the old lock.
  569                  */
  570                 if (dlock == true && tci != l->l_target_cpu) {
  571                         KASSERT(tci != NULL);
  572                         spc_unlock(tci);
  573                         dlock = false;
  574                 }
  575 
  576                 /*
  577                  * Nothing to do if destination has changed to the
  578                  * local CPU, or migration was done by other CPU.
  579                  */
  580                 tci = l->l_target_cpu;
  581                 if (tci == NULL || tci == ci) {
  582                         spc->spc_migrating = NULL;
  583                         l->l_target_cpu = NULL;
  584                         break;
  585                 }
  586                 tspc = &tci->ci_schedstate;
  587 
  588                 /*
  589                  * Double-lock the runqueues.
  590                  * We do that only once.
  591                  */
  592                 if (dlock == false) {
  593                         dlock = true;
  594                         if (ci < tci) {
  595                                 spc_lock(tci);
  596                         } else if (!mutex_tryenter(tspc->spc_mutex)) {
  597                                 spc_unlock(ci);
  598                                 spc_lock(tci);
  599                                 spc_lock(ci);
  600                                 /* Check the situation again.. */
  601                                 continue;
  602                         }
  603                 }
  604 
  605                 /* Migrate the thread */
  606                 KASSERT(l->l_stat == LSRUN);
  607                 spc->spc_migrating = NULL;
  608                 l->l_target_cpu = NULL;
  609                 sched_dequeue(l);
  610                 l->l_cpu = tci;
  611                 lwp_setlock(l, tspc->spc_mutex);
  612                 sched_enqueue(l, false);
  613                 break;
  614         }
  615         if (dlock == true) {
  616                 KASSERT(tci != NULL);
  617                 spc_unlock(tci);
  618         }
  619         spc_unlock(ci);
  620 
  621 no_migration:
  622         ci_rq = spc->spc_sched_info;
  623         if ((spc->spc_flags & SPCF_OFFLINE) != 0 || ci_rq->r_count != 0) {
  624                 return;
  625         }
  626 
  627         /* Reset the counter, and call the balancer */
  628         ci_rq->r_avgcount = 0;
  629         sched_balance(ci);
  630         tci = worker_ci;
  631         tspc = &tci->ci_schedstate;
  632         if (ci == tci || spc->spc_psid != tspc->spc_psid)
  633                 return;
  634         spc_dlock(ci, tci);
  635         (void)sched_catchlwp(tci);
  636         spc_unlock(ci);
  637 }
  638 
  639 #else
  640 
  641 struct cpu_info *
  642 sched_takecpu(struct lwp *l)
  643 {
  644 
  645         return l->l_cpu;
  646 }
  647 
  648 void
  649 sched_idle(void)
  650 {
  651 
  652 }
  653 #endif  /* MULTIPROCESSOR */
  654 
  655 /*
  656  * Scheduling statistics and balancing.
  657  */
  658 void
  659 sched_lwp_stats(struct lwp *l)
  660 {
  661         int batch;
  662 
  663         KASSERT(lwp_locked(l, NULL));
  664 
  665         /* Update sleep time */
  666         if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
  667             l->l_stat == LSSUSPENDED)
  668                 l->l_slptime++;
  669 
  670         /*
  671          * Set that thread is more CPU-bound, if sum of run time exceeds the
  672          * sum of sleep time.  Check if thread is CPU-bound a first time.
  673          */
  674         batch = (l->l_rticksum > l->l_slpticksum);
  675         if (batch != 0) {
  676                 if ((l->l_flag & LW_BATCH) == 0)
  677                         batch = 0;
  678                 l->l_flag |= LW_BATCH;
  679         } else
  680                 l->l_flag &= ~LW_BATCH;
  681 
  682         /*
  683          * If thread is CPU-bound and never sleeps, it would occupy the CPU.
  684          * In such case reset the value of last sleep, and check it later, if
  685          * it is still zero - perform the migration, unmark the batch flag.
  686          */
  687         if (batch && (l->l_slptime + l->l_slpticksum) == 0) {
  688                 if (l->l_slpticks == 0) {
  689                         if (l->l_target_cpu == NULL &&
  690                             (l->l_stat == LSRUN || l->l_stat == LSONPROC)) {
  691                                 struct cpu_info *ci = sched_takecpu(l);
  692                                 l->l_target_cpu = (ci != l->l_cpu) ? ci : NULL;
  693                         }
  694                         l->l_flag &= ~LW_BATCH;
  695                 } else {
  696                         l->l_slpticks = 0;
  697                 }
  698         }
  699 
  700         /* Reset the time sums */
  701         l->l_slpticksum = 0;
  702         l->l_rticksum = 0;
  703 
  704         /* Scheduler-specific hook */
  705         sched_pstats_hook(l, batch);
  706 }
  707 
  708 /*
  709  * Scheduler mill.
  710  */
  711 struct lwp *
  712 sched_nextlwp(void)
  713 {
  714         struct cpu_info *ci = curcpu();
  715         struct schedstate_percpu *spc;
  716         TAILQ_HEAD(, lwp) *q_head;
  717         runqueue_t *ci_rq;
  718         struct lwp *l;
  719 
  720         /* Return to idle LWP if there is a migrating thread */
  721         spc = &ci->ci_schedstate;
  722         if (__predict_false(spc->spc_migrating != NULL))
  723                 return NULL;
  724         ci_rq = spc->spc_sched_info;
  725 
  726 #ifdef MULTIPROCESSOR
  727         /* If runqueue is empty, try to catch some thread from other CPU */
  728         if (__predict_false(ci_rq->r_count == 0)) {
  729                 struct schedstate_percpu *cspc;
  730                 struct cpu_info *cci;
  731 
  732                 /* Offline CPUs should not perform this, however */
  733                 if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
  734                         return NULL;
  735 
  736                 /* Reset the counter, and call the balancer */
  737                 ci_rq->r_avgcount = 0;
  738                 sched_balance(ci);
  739                 cci = worker_ci;
  740                 cspc = &cci->ci_schedstate;
  741                 if (ci == cci || spc->spc_psid != cspc->spc_psid ||
  742                     !mutex_tryenter(cci->ci_schedstate.spc_mutex))
  743                         return NULL;
  744                 return sched_catchlwp(cci);
  745         }
  746 #else
  747         if (__predict_false(ci_rq->r_count == 0))
  748                 return NULL;
  749 #endif
  750 
  751         /* Take the highest priority thread */
  752         KASSERT(ci_rq->r_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]);
  753         q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
  754         l = TAILQ_FIRST(q_head);
  755         KASSERT(l != NULL);
  756 
  757         sched_oncpu(l);
  758         l->l_rticks = hardclock_ticks;
  759 
  760         return l;
  761 }
  762 
  763 bool
  764 sched_curcpu_runnable_p(void)
  765 {
  766         const struct cpu_info *ci;
  767         const struct schedstate_percpu *spc;
  768         const runqueue_t *ci_rq;
  769         bool rv;
  770 
  771         kpreempt_disable();
  772         ci = curcpu();
  773         spc = &ci->ci_schedstate;
  774         ci_rq = spc->spc_sched_info;
  775 
  776 #ifndef __HAVE_FAST_SOFTINTS
  777         if (ci->ci_data.cpu_softints) {
  778                 kpreempt_enable();
  779                 return true;
  780         }
  781 #endif
  782 
  783         rv = (ci_rq->r_count != 0) ? true : false;
  784         kpreempt_enable();
  785 
  786         return rv;
  787 }
  788 
  789 /*
  790  * Sysctl nodes and initialization.
  791  */
  792 
  793 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
  794 {
  795         const struct sysctlnode *node = NULL;
  796 
  797         sysctl_createv(clog, 0, NULL, NULL,
  798                 CTLFLAG_PERMANENT,
  799                 CTLTYPE_NODE, "kern", NULL,
  800                 NULL, 0, NULL, 0,
  801                 CTL_KERN, CTL_EOL);
  802         sysctl_createv(clog, 0, NULL, &node,
  803                 CTLFLAG_PERMANENT,
  804                 CTLTYPE_NODE, "sched",
  805                 SYSCTL_DESCR("Scheduler options"),
  806                 NULL, 0, NULL, 0,
  807                 CTL_KERN, CTL_CREATE, CTL_EOL);
  808 
  809         if (node == NULL)
  810                 return;
  811 
  812         sysctl_createv(clog, 0, &node, NULL,
  813                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
  814                 CTLTYPE_INT, "cacheht_time",
  815                 SYSCTL_DESCR("Cache hotness time (in ticks)"),
  816                 NULL, 0, &cacheht_time, 0,
  817                 CTL_CREATE, CTL_EOL);
  818         sysctl_createv(clog, 0, &node, NULL,
  819                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
  820                 CTLTYPE_INT, "balance_period",
  821                 SYSCTL_DESCR("Balance period (in ticks)"),
  822                 NULL, 0, &balance_period, 0,
  823                 CTL_CREATE, CTL_EOL);
  824         sysctl_createv(clog, 0, &node, NULL,
  825                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
  826                 CTLTYPE_INT, "min_catch",
  827                 SYSCTL_DESCR("Minimal count of threads for catching"),
  828                 NULL, 0, &min_catch, 0,
  829                 CTL_CREATE, CTL_EOL);
  830         sysctl_createv(clog, 0, &node, NULL,
  831                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
  832                 CTLTYPE_INT, "timesoftints",
  833                 SYSCTL_DESCR("Track CPU time for soft interrupts"),
  834                 NULL, 0, &softint_timing, 0,
  835                 CTL_CREATE, CTL_EOL);
  836         sysctl_createv(clog, 0, &node, NULL,
  837                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
  838                 CTLTYPE_INT, "kpreempt_pri",
  839                 SYSCTL_DESCR("Minimum priority to trigger kernel preemption"),
  840                 NULL, 0, &sched_kpreempt_pri, 0,
  841                 CTL_CREATE, CTL_EOL);
  842         sysctl_createv(clog, 0, &node, NULL,
  843                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
  844                 CTLTYPE_INT, "upreempt_pri",
  845                 SYSCTL_DESCR("Minimum priority to trigger user preemption"),
  846                 NULL, 0, &sched_upreempt_pri, 0,
  847                 CTL_CREATE, CTL_EOL);
  848 }
  849 
  850 /*
  851  * Debugging.
  852  */
  853 
  854 #ifdef DDB
  855 
  856 void
  857 sched_print_runqueue(void (*pr)(const char *, ...)
  858     __attribute__((__format__(__printf__,1,2))))
  859 {
  860         runqueue_t *ci_rq;
  861         struct cpu_info *ci, *tci;
  862         struct schedstate_percpu *spc;
  863         struct lwp *l;
  864         struct proc *p;
  865         CPU_INFO_ITERATOR cii;
  866 
  867         for (CPU_INFO_FOREACH(cii, ci)) {
  868                 int i;
  869 
  870                 spc = &ci->ci_schedstate;
  871                 ci_rq = spc->spc_sched_info;
  872 
  873                 (*pr)("Run-queue (CPU = %u):\n", ci->ci_index);
  874                 (*pr)(" pid.lid = %d.%d, r_count = %u, r_avgcount = %u, "
  875                     "maxpri = %d, mlwp = %p\n",
  876 #ifdef MULTIPROCESSOR
  877                     ci->ci_curlwp->l_proc->p_pid, ci->ci_curlwp->l_lid,
  878 #else
  879                     curlwp->l_proc->p_pid, curlwp->l_lid,
  880 #endif
  881                     ci_rq->r_count, ci_rq->r_avgcount, spc->spc_maxpriority,
  882                     spc->spc_migrating);
  883                 i = (PRI_COUNT >> BITMAP_SHIFT) - 1;
  884                 do {
  885                         uint32_t q;
  886                         q = ci_rq->r_bitmap[i];
  887                         (*pr)(" bitmap[%d] => [ %d (0x%x) ]\n", i, ffs(q), q);
  888                 } while (i--);
  889         }
  890 
  891         (*pr)("   %5s %4s %4s %10s %3s %18s %4s %4s %s\n",
  892             "LID", "PRI", "EPRI", "FL", "ST", "LWP", "CPU", "TCI", "LRTICKS");
  893 
  894         PROCLIST_FOREACH(p, &allproc) {
  895                 if ((p->p_flag & PK_MARKER) != 0)
  896                         continue;
  897                 (*pr)(" /- %d (%s)\n", (int)p->p_pid, p->p_comm);
  898                 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
  899                         ci = l->l_cpu;
  900                         tci = l->l_target_cpu;
  901                         (*pr)(" | %5d %4u %4u 0x%8.8x %3s %18p %4u %4d %u\n",
  902                             (int)l->l_lid, l->l_priority, lwp_eprio(l),
  903                             l->l_flag, l->l_stat == LSRUN ? "RQ" :
  904                             (l->l_stat == LSSLEEP ? "SQ" : "-"),
  905                             l, ci->ci_index, (tci ? tci->ci_index : -1),
  906                             (u_int)(hardclock_ticks - l->l_rticks));
  907                 }
  908         }
  909 }
  910 
  911 #endif

Cache object: 6f540a80935fab9d4ab77522f8b9702c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.