sched_ule.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice unmodified, this list of conditions, and the following
   10  *    disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  *
   26  * $FreeBSD: releng/5.1/sys/kern/sched_ule.c 114496 2003-05-02 06:18:55Z julian $
   27  */
   28 
   29 #include <sys/param.h>
   30 #include <sys/systm.h>
   31 #include <sys/kernel.h>
   32 #include <sys/ktr.h>
   33 #include <sys/lock.h>
   34 #include <sys/mutex.h>
   35 #include <sys/proc.h>
   36 #include <sys/resource.h>
   37 #include <sys/sched.h>
   38 #include <sys/smp.h>
   39 #include <sys/sx.h>
   40 #include <sys/sysctl.h>
   41 #include <sys/sysproto.h>
   42 #include <sys/vmmeter.h>
   43 #ifdef DDB
   44 #include <ddb/ddb.h>
   45 #endif
   46 #ifdef KTRACE
   47 #include <sys/uio.h>
   48 #include <sys/ktrace.h>
   49 #endif
   50 
   51 #include <machine/cpu.h>
   52 
   53 #define KTR_ULE         KTR_NFS
   54 
   55 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
   56 /* XXX This is bogus compatability crap for ps */
   57 static fixpt_t  ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
   58 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
   59 
   60 static void sched_setup(void *dummy);
   61 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL)
   62 
   63 static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "SCHED");
   64 
   65 static int sched_strict;
   66 SYSCTL_INT(_kern_sched, OID_AUTO, strict, CTLFLAG_RD, &sched_strict, 0, "");
   67 
   68 static int slice_min = 1;
   69 SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, "");
   70 
   71 static int slice_max = 2;
   72 SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, "");
   73 
   74 int realstathz;
   75 int tickincr = 1;
   76 
   77 /*
   78  * These datastructures are allocated within their parent datastructure but
   79  * are scheduler specific.
   80  */
   81 
   82 struct ke_sched {
   83         int             ske_slice;
   84         struct runq     *ske_runq;
   85         /* The following variables are only used for pctcpu calculation */
   86         int             ske_ltick;      /* Last tick that we were running on */
   87         int             ske_ftick;      /* First tick that we were running on */
   88         int             ske_ticks;      /* Tick count */
   89         /* CPU that we have affinity for. */
   90         u_char          ske_cpu;
   91 };
   92 #define ke_slice        ke_sched->ske_slice
   93 #define ke_runq         ke_sched->ske_runq
   94 #define ke_ltick        ke_sched->ske_ltick
   95 #define ke_ftick        ke_sched->ske_ftick
   96 #define ke_ticks        ke_sched->ske_ticks
   97 #define ke_cpu          ke_sched->ske_cpu
   98 
   99 struct kg_sched {
  100         int     skg_slptime;            /* Number of ticks we vol. slept */
  101         int     skg_runtime;            /* Number of ticks we were running */
  102 };
  103 #define kg_slptime      kg_sched->skg_slptime
  104 #define kg_runtime      kg_sched->skg_runtime
  105 
  106 struct td_sched {
  107         int     std_slptime;
  108 };
  109 #define td_slptime      td_sched->std_slptime
  110 
  111 struct td_sched td_sched;
  112 struct ke_sched ke_sched;
  113 struct kg_sched kg_sched;
  114 
  115 struct ke_sched *kse0_sched = &ke_sched;
  116 struct kg_sched *ksegrp0_sched = &kg_sched;
  117 struct p_sched *proc0_sched = NULL;
  118 struct td_sched *thread0_sched = &td_sched;
  119 
  120 /*
  121  * This priority range has 20 priorities on either end that are reachable
  122  * only through nice values.
  123  *
  124  * PRI_RANGE:   Total priority range for timeshare threads.
  125  * PRI_NRESV:   Reserved priorities for nice.
  126  * PRI_BASE:    The start of the dynamic range.
  127  * DYN_RANGE:   Number of priorities that are available int the dynamic
  128  *              priority range.
  129  */
  130 #define SCHED_PRI_RANGE         (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1)
  131 #define SCHED_PRI_NRESV         PRIO_TOTAL
  132 #define SCHED_PRI_NHALF         (PRIO_TOTAL / 2)
  133 #define SCHED_PRI_NTHRESH       (SCHED_PRI_NHALF - 1)
  134 #define SCHED_PRI_BASE          ((SCHED_PRI_NRESV / 2) + PRI_MIN_TIMESHARE)
  135 #define SCHED_DYN_RANGE         (SCHED_PRI_RANGE - SCHED_PRI_NRESV)
  136 #define SCHED_PRI_INTERACT(score)                                       \
  137     ((score) * SCHED_DYN_RANGE / SCHED_INTERACT_RANGE)
  138 
  139 /*
  140  * These determine the interactivity of a process.
  141  *
  142  * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate
  143  *              before throttling back.
  144  * SLP_RUN_THROTTLE:    Divisor for reducing slp/run time.
  145  * INTERACT_RANGE:      Range of interactivity values.  Smaller is better.
  146  * INTERACT_HALF:       Convenience define, half of the interactivity range.
  147  * INTERACT_THRESH:     Threshhold for placement on the current runq.
  148  */
  149 #define SCHED_SLP_RUN_MAX       ((hz / 10) << 10)
  150 #define SCHED_SLP_RUN_THROTTLE  (10)
  151 #define SCHED_INTERACT_RANGE    (100)
  152 #define SCHED_INTERACT_HALF     (SCHED_INTERACT_RANGE / 2)
  153 #define SCHED_INTERACT_THRESH   (10)
  154 
  155 /*
  156  * These parameters and macros determine the size of the time slice that is
  157  * granted to each thread.
  158  *
  159  * SLICE_MIN:   Minimum time slice granted, in units of ticks.
  160  * SLICE_MAX:   Maximum time slice granted.
  161  * SLICE_RANGE: Range of available time slices scaled by hz.
  162  * SLICE_SCALE: The number slices granted per val in the range of [0, max].
  163  * SLICE_NICE:  Determine the amount of slice granted to a scaled nice.
  164  */
  165 #define SCHED_SLICE_MIN                 (slice_min)
  166 #define SCHED_SLICE_MAX                 (slice_max)
  167 #define SCHED_SLICE_RANGE               (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1)
  168 #define SCHED_SLICE_SCALE(val, max)     (((val) * SCHED_SLICE_RANGE) / (max))
  169 #define SCHED_SLICE_NICE(nice)                                          \
  170     (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_PRI_NTHRESH))
  171 
  172 /*
  173  * This macro determines whether or not the kse belongs on the current or
  174  * next run queue.
  175  * 
  176  * XXX nice value should effect how interactive a kg is.
  177  */
  178 #define SCHED_INTERACTIVE(kg)                                           \
  179     (sched_interact_score(kg) < SCHED_INTERACT_THRESH)
  180 #define SCHED_CURR(kg, ke)                                              \
  181     (ke->ke_thread->td_priority < PRI_MIN_TIMESHARE || SCHED_INTERACTIVE(kg))
  182 
  183 /*
  184  * Cpu percentage computation macros and defines.
  185  *
  186  * SCHED_CPU_TIME:      Number of seconds to average the cpu usage across.
  187  * SCHED_CPU_TICKS:     Number of hz ticks to average the cpu usage across.
  188  */
  189 
  190 #define SCHED_CPU_TIME  10
  191 #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME)
  192 
  193 /*
  194  * kseq - per processor runqs and statistics.
  195  */
  196 
  197 #define KSEQ_NCLASS     (PRI_IDLE + 1)  /* Number of run classes. */
  198 
  199 struct kseq {
  200         struct runq     ksq_idle;               /* Queue of IDLE threads. */
  201         struct runq     ksq_timeshare[2];       /* Run queues for !IDLE. */
  202         struct runq     *ksq_next;              /* Next timeshare queue. */
  203         struct runq     *ksq_curr;              /* Current queue. */
  204         int             ksq_loads[KSEQ_NCLASS]; /* Load for each class */
  205         int             ksq_load;               /* Aggregate load. */
  206         short           ksq_nice[PRIO_TOTAL + 1]; /* KSEs in each nice bin. */
  207         short           ksq_nicemin;            /* Least nice. */
  208 #ifdef SMP
  209         unsigned int    ksq_rslices;    /* Slices on run queue */
  210 #endif
  211 };
  212 
  213 /*
  214  * One kse queue per processor.
  215  */
  216 #ifdef SMP
  217 struct kseq     kseq_cpu[MAXCPU];
  218 #define KSEQ_SELF()     (&kseq_cpu[PCPU_GET(cpuid)])
  219 #define KSEQ_CPU(x)     (&kseq_cpu[(x)])
  220 #else
  221 struct kseq     kseq_cpu;
  222 #define KSEQ_SELF()     (&kseq_cpu)
  223 #define KSEQ_CPU(x)     (&kseq_cpu)
  224 #endif
  225 
  226 static void sched_slice(struct kse *ke);
  227 static void sched_priority(struct ksegrp *kg);
  228 static int sched_interact_score(struct ksegrp *kg);
  229 void sched_pctcpu_update(struct kse *ke);
  230 int sched_pickcpu(void);
  231 
  232 /* Operations on per processor queues */
  233 static struct kse * kseq_choose(struct kseq *kseq);
  234 static void kseq_setup(struct kseq *kseq);
  235 static void kseq_add(struct kseq *kseq, struct kse *ke);
  236 static void kseq_rem(struct kseq *kseq, struct kse *ke);
  237 static void kseq_nice_add(struct kseq *kseq, int nice);
  238 static void kseq_nice_rem(struct kseq *kseq, int nice);
  239 void kseq_print(int cpu);
  240 #ifdef SMP
  241 struct kseq * kseq_load_highest(void);
  242 #endif
  243 
  244 void
  245 kseq_print(int cpu)
  246 {
  247         struct kseq *kseq;
  248         int i;
  249 
  250         kseq = KSEQ_CPU(cpu);
  251 
  252         printf("kseq:\n");
  253         printf("\tload:           %d\n", kseq->ksq_load);
  254         printf("\tload ITHD:      %d\n", kseq->ksq_loads[PRI_ITHD]);
  255         printf("\tload REALTIME:  %d\n", kseq->ksq_loads[PRI_REALTIME]);
  256         printf("\tload TIMESHARE: %d\n", kseq->ksq_loads[PRI_TIMESHARE]);
  257         printf("\tload IDLE:      %d\n", kseq->ksq_loads[PRI_IDLE]);
  258         printf("\tnicemin:\t%d\n", kseq->ksq_nicemin);
  259         printf("\tnice counts:\n");
  260         for (i = 0; i < PRIO_TOTAL + 1; i++)
  261                 if (kseq->ksq_nice[i])
  262                         printf("\t\t%d = %d\n",
  263                             i - SCHED_PRI_NHALF, kseq->ksq_nice[i]);
  264 }
  265 
  266 static void
  267 kseq_add(struct kseq *kseq, struct kse *ke)
  268 {
  269         kseq->ksq_loads[PRI_BASE(ke->ke_ksegrp->kg_pri_class)]++;
  270         kseq->ksq_load++;
  271         if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
  272         CTR6(KTR_ULE, "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))",
  273             ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority,
  274             ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin);
  275         if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
  276                 kseq_nice_add(kseq, ke->ke_ksegrp->kg_nice);
  277 #ifdef SMP
  278         kseq->ksq_rslices += ke->ke_slice;
  279 #endif
  280 }
  281 
  282 static void
  283 kseq_rem(struct kseq *kseq, struct kse *ke)
  284 {
  285         kseq->ksq_loads[PRI_BASE(ke->ke_ksegrp->kg_pri_class)]--;
  286         kseq->ksq_load--;
  287         ke->ke_runq = NULL;
  288         if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
  289                 kseq_nice_rem(kseq, ke->ke_ksegrp->kg_nice);
  290 #ifdef SMP
  291         kseq->ksq_rslices -= ke->ke_slice;
  292 #endif
  293 }
  294 
  295 static void
  296 kseq_nice_add(struct kseq *kseq, int nice)
  297 {
  298         /* Normalize to zero. */
  299         kseq->ksq_nice[nice + SCHED_PRI_NHALF]++;
  300         if (nice < kseq->ksq_nicemin || kseq->ksq_loads[PRI_TIMESHARE] == 0)
  301                 kseq->ksq_nicemin = nice;
  302 }
  303 
  304 static void
  305 kseq_nice_rem(struct kseq *kseq, int nice) 
  306 {
  307         int n;
  308 
  309         /* Normalize to zero. */
  310         n = nice + SCHED_PRI_NHALF;
  311         kseq->ksq_nice[n]--;
  312         KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count."));
  313 
  314         /*
  315          * If this wasn't the smallest nice value or there are more in
  316          * this bucket we can just return.  Otherwise we have to recalculate
  317          * the smallest nice.
  318          */
  319         if (nice != kseq->ksq_nicemin ||
  320             kseq->ksq_nice[n] != 0 ||
  321             kseq->ksq_loads[PRI_TIMESHARE] == 0)
  322                 return;
  323 
  324         for (; n < SCHED_PRI_NRESV + 1; n++)
  325                 if (kseq->ksq_nice[n]) {
  326                         kseq->ksq_nicemin = n - SCHED_PRI_NHALF;
  327                         return;
  328                 }
  329 }
  330 
  331 #ifdef SMP
  332 struct kseq *
  333 kseq_load_highest(void)
  334 {
  335         struct kseq *kseq;
  336         int load;
  337         int cpu;
  338         int i;
  339 
  340         cpu = 0;
  341         load = 0;
  342 
  343         for (i = 0; i < mp_maxid; i++) {
  344                 if (CPU_ABSENT(i))
  345                         continue;
  346                 kseq = KSEQ_CPU(i);
  347                 if (kseq->ksq_load > load) {
  348                         load = kseq->ksq_load;
  349                         cpu = i;
  350                 }
  351         }
  352         if (load > 1)
  353                 return (KSEQ_CPU(cpu));
  354 
  355         return (NULL);
  356 }
  357 #endif
  358 
  359 struct kse *
  360 kseq_choose(struct kseq *kseq)
  361 {
  362         struct kse *ke;
  363         struct runq *swap;
  364 
  365         swap = NULL;
  366 
  367         for (;;) {
  368                 ke = runq_choose(kseq->ksq_curr);
  369                 if (ke == NULL) {
  370                         /*
  371                          * We already swaped once and didn't get anywhere.
  372                          */
  373                         if (swap)
  374                                 break;
  375                         swap = kseq->ksq_curr;
  376                         kseq->ksq_curr = kseq->ksq_next;
  377                         kseq->ksq_next = swap;
  378                         continue;
  379                 }
  380                 /*
  381                  * If we encounter a slice of 0 the kse is in a
  382                  * TIMESHARE kse group and its nice was too far out
  383                  * of the range that receives slices. 
  384                  */
  385                 if (ke->ke_slice == 0) {
  386                         runq_remove(ke->ke_runq, ke);
  387                         sched_slice(ke);
  388                         ke->ke_runq = kseq->ksq_next;
  389                         runq_add(ke->ke_runq, ke);
  390                         continue;
  391                 }
  392                 return (ke);
  393         }
  394 
  395         return (runq_choose(&kseq->ksq_idle));
  396 }
  397 
  398 static void
  399 kseq_setup(struct kseq *kseq)
  400 {
  401         runq_init(&kseq->ksq_timeshare[0]);
  402         runq_init(&kseq->ksq_timeshare[1]);
  403         runq_init(&kseq->ksq_idle);
  404 
  405         kseq->ksq_curr = &kseq->ksq_timeshare[0];
  406         kseq->ksq_next = &kseq->ksq_timeshare[1];
  407 
  408         kseq->ksq_loads[PRI_ITHD] = 0;
  409         kseq->ksq_loads[PRI_REALTIME] = 0;
  410         kseq->ksq_loads[PRI_TIMESHARE] = 0;
  411         kseq->ksq_loads[PRI_IDLE] = 0;
  412         kseq->ksq_load = 0;
  413 #ifdef SMP
  414         kseq->ksq_rslices = 0;
  415 #endif
  416 }
  417 
  418 static void
  419 sched_setup(void *dummy)
  420 {
  421         int i;
  422 
  423         slice_min = (hz/100);
  424         slice_max = (hz/10);
  425 
  426         mtx_lock_spin(&sched_lock);
  427         /* init kseqs */
  428         for (i = 0; i < MAXCPU; i++)
  429                 kseq_setup(KSEQ_CPU(i));
  430 
  431         kseq_add(KSEQ_SELF(), &kse0);
  432         mtx_unlock_spin(&sched_lock);
  433 }
  434 
  435 /*
  436  * Scale the scheduling priority according to the "interactivity" of this
  437  * process.
  438  */
  439 static void
  440 sched_priority(struct ksegrp *kg)
  441 {
  442         int pri;
  443 
  444         if (kg->kg_pri_class != PRI_TIMESHARE)
  445                 return;
  446 
  447         pri = SCHED_PRI_INTERACT(sched_interact_score(kg));
  448         pri += SCHED_PRI_BASE;
  449         pri += kg->kg_nice;
  450 
  451         if (pri > PRI_MAX_TIMESHARE)
  452                 pri = PRI_MAX_TIMESHARE;
  453         else if (pri < PRI_MIN_TIMESHARE)
  454                 pri = PRI_MIN_TIMESHARE;
  455 
  456         kg->kg_user_pri = pri;
  457 
  458         return;
  459 }
  460 
  461 /*
  462  * Calculate a time slice based on the properties of the kseg and the runq
  463  * that we're on.  This is only for PRI_TIMESHARE ksegrps.
  464  */
  465 static void
  466 sched_slice(struct kse *ke)
  467 {
  468         struct kseq *kseq;
  469         struct ksegrp *kg;
  470 
  471         kg = ke->ke_ksegrp;
  472         kseq = KSEQ_CPU(ke->ke_cpu);
  473 
  474         /*
  475          * Rationale:
  476          * KSEs in interactive ksegs get the minimum slice so that we
  477          * quickly notice if it abuses its advantage.
  478          *
  479          * KSEs in non-interactive ksegs are assigned a slice that is
  480          * based on the ksegs nice value relative to the least nice kseg
  481          * on the run queue for this cpu.
  482          *
  483          * If the KSE is less nice than all others it gets the maximum
  484          * slice and other KSEs will adjust their slice relative to
  485          * this when they first expire.
  486          *
  487          * There is 20 point window that starts relative to the least
  488          * nice kse on the run queue.  Slice size is determined by
  489          * the kse distance from the last nice ksegrp.
  490          *
  491          * If you are outside of the window you will get no slice and
  492          * you will be reevaluated each time you are selected on the
  493          * run queue.
  494          *      
  495          */
  496 
  497         if (!SCHED_INTERACTIVE(kg)) {
  498                 int nice;
  499 
  500                 nice = kg->kg_nice + (0 - kseq->ksq_nicemin);
  501                 if (kseq->ksq_loads[PRI_TIMESHARE] == 0 ||
  502                     kg->kg_nice < kseq->ksq_nicemin)
  503                         ke->ke_slice = SCHED_SLICE_MAX;
  504                 else if (nice <= SCHED_PRI_NTHRESH)
  505                         ke->ke_slice = SCHED_SLICE_NICE(nice);
  506                 else
  507                         ke->ke_slice = 0;
  508         } else
  509                 ke->ke_slice = SCHED_SLICE_MIN;
  510 
  511         CTR6(KTR_ULE,
  512             "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)",
  513             ke, ke->ke_slice, kg->kg_nice, kseq->ksq_nicemin,
  514             kseq->ksq_loads[PRI_TIMESHARE], SCHED_INTERACTIVE(kg));
  515 
  516         /*
  517          * Check to see if we need to scale back the slp and run time
  518          * in the kg.  This will cause us to forget old interactivity
  519          * while maintaining the current ratio.
  520          */
  521         CTR4(KTR_ULE, "Slp vs Run %p (Slp %d, Run %d, Score %d)",
  522             ke, kg->kg_slptime >> 10, kg->kg_runtime >> 10,
  523             sched_interact_score(kg));
  524 
  525         if ((kg->kg_runtime + kg->kg_slptime) >  SCHED_SLP_RUN_MAX) {
  526                 kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE;
  527                 kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE;
  528         }
  529         CTR4(KTR_ULE, "Slp vs Run(2) %p (Slp %d, Run %d, Score %d)",
  530             ke, kg->kg_slptime >> 10, kg->kg_runtime >> 10,
  531             sched_interact_score(kg));
  532 
  533         return;
  534 }
  535 
  536 static int
  537 sched_interact_score(struct ksegrp *kg)
  538 {
  539         int big;
  540         int small;
  541         int base;
  542 
  543         if (kg->kg_runtime > kg->kg_slptime) {
  544                 big = kg->kg_runtime;
  545                 small = kg->kg_slptime;
  546                 base = SCHED_INTERACT_HALF;
  547         } else {
  548                 big = kg->kg_slptime;
  549                 small = kg->kg_runtime;
  550                 base = 0;
  551         }
  552 
  553         big /= SCHED_INTERACT_HALF;
  554         if (big != 0)
  555                 small /= big;
  556         else
  557                 small = 0;
  558 
  559         small += base;
  560         /* XXX Factor in nice */
  561         return (small);
  562 }
  563 
  564 /*
  565  * This is only somewhat accurate since given many processes of the same
  566  * priority they will switch when their slices run out, which will be
  567  * at most SCHED_SLICE_MAX.
  568  */
  569 int
  570 sched_rr_interval(void)
  571 {
  572         return (SCHED_SLICE_MAX);
  573 }
  574 
  575 void
  576 sched_pctcpu_update(struct kse *ke)
  577 {
  578         /*
  579          * Adjust counters and watermark for pctcpu calc.
  580          *
  581          * Shift the tick count out so that the divide doesn't round away
  582          * our results.
  583          */
  584         ke->ke_ticks <<= 10;
  585         ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) *
  586                     SCHED_CPU_TICKS;
  587         ke->ke_ticks >>= 10;
  588         ke->ke_ltick = ticks;
  589         ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS;
  590 }
  591 
  592 #ifdef SMP
  593 /* XXX Should be changed to kseq_load_lowest() */
  594 int
  595 sched_pickcpu(void)
  596 {
  597         struct kseq *kseq;
  598         int load;
  599         int cpu;
  600         int i;
  601 
  602         if (!smp_started)
  603                 return (0);
  604 
  605         load = 0;
  606         cpu = 0;
  607 
  608         for (i = 0; i < mp_maxid; i++) {
  609                 if (CPU_ABSENT(i))
  610                         continue;
  611                 kseq = KSEQ_CPU(i);
  612                 if (kseq->ksq_load < load) {
  613                         cpu = i;
  614                         load = kseq->ksq_load;
  615                 }
  616         }
  617 
  618         CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu);
  619         return (cpu);
  620 }
  621 #else
  622 int
  623 sched_pickcpu(void)
  624 {
  625         return (0);
  626 }
  627 #endif
  628 
  629 void
  630 sched_prio(struct thread *td, u_char prio)
  631 {
  632         struct kse *ke;
  633         struct runq *rq;
  634 
  635         mtx_assert(&sched_lock, MA_OWNED);
  636         ke = td->td_kse;
  637         td->td_priority = prio;
  638 
  639         if (TD_ON_RUNQ(td)) {
  640                 rq = ke->ke_runq;
  641 
  642                 runq_remove(rq, ke);
  643                 runq_add(rq, ke);
  644         }
  645 }
  646 
  647 void
  648 sched_switchout(struct thread *td)
  649 {
  650         struct kse *ke;
  651 
  652         mtx_assert(&sched_lock, MA_OWNED);
  653 
  654         ke = td->td_kse;
  655 
  656         td->td_last_kse = ke;
  657         td->td_lastcpu = td->td_oncpu;
  658         td->td_oncpu = NOCPU;
  659         td->td_flags &= ~TDF_NEEDRESCHED;
  660 
  661         if (TD_IS_RUNNING(td)) {
  662                 runq_add(ke->ke_runq, ke);
  663                 /* setrunqueue(td); */
  664                 return;
  665         }
  666         if (ke->ke_runq)
  667                 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke);
  668         /*
  669          * We will not be on the run queue. So we must be
  670          * sleeping or similar.
  671          */
  672         if (td->td_proc->p_flag & P_THREADED)
  673                 kse_reassign(ke);
  674 }
  675 
  676 void
  677 sched_switchin(struct thread *td)
  678 {
  679         /* struct kse *ke = td->td_kse; */
  680         mtx_assert(&sched_lock, MA_OWNED);
  681 
  682         td->td_oncpu = PCPU_GET(cpuid);
  683 
  684         if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE &&
  685             td->td_priority != td->td_ksegrp->kg_user_pri)
  686                 curthread->td_flags |= TDF_NEEDRESCHED;
  687 }
  688 
  689 void
  690 sched_nice(struct ksegrp *kg, int nice)
  691 {
  692         struct kse *ke;
  693         struct thread *td;
  694         struct kseq *kseq;
  695 
  696         PROC_LOCK_ASSERT(kg->kg_proc, MA_OWNED);
  697         mtx_assert(&sched_lock, MA_OWNED);
  698         /*
  699          * We need to adjust the nice counts for running KSEs.
  700          */
  701         if (kg->kg_pri_class == PRI_TIMESHARE)
  702                 FOREACH_KSE_IN_GROUP(kg, ke) {
  703                         if (ke->ke_state != KES_ONRUNQ &&
  704                             ke->ke_state != KES_THREAD)
  705                                 continue;
  706                         kseq = KSEQ_CPU(ke->ke_cpu);
  707                         kseq_nice_rem(kseq, kg->kg_nice);
  708                         kseq_nice_add(kseq, nice);
  709                 }
  710         kg->kg_nice = nice;
  711         sched_priority(kg);
  712         FOREACH_THREAD_IN_GROUP(kg, td)
  713                 td->td_flags |= TDF_NEEDRESCHED;
  714 }
  715 
  716 void
  717 sched_sleep(struct thread *td, u_char prio)
  718 {
  719         mtx_assert(&sched_lock, MA_OWNED);
  720 
  721         td->td_slptime = ticks;
  722         td->td_priority = prio;
  723 
  724         CTR2(KTR_ULE, "sleep kse %p (tick: %d)",
  725             td->td_kse, td->td_slptime);
  726 }
  727 
  728 void
  729 sched_wakeup(struct thread *td)
  730 {
  731         mtx_assert(&sched_lock, MA_OWNED);
  732 
  733         /*
  734          * Let the kseg know how long we slept for.  This is because process
  735          * interactivity behavior is modeled in the kseg.
  736          */
  737         if (td->td_slptime) {
  738                 struct ksegrp *kg;
  739                 int hzticks;
  740 
  741                 kg = td->td_ksegrp;
  742                 hzticks = ticks - td->td_slptime;
  743                 kg->kg_slptime += hzticks << 10;
  744                 sched_priority(kg);
  745                 CTR2(KTR_ULE, "wakeup kse %p (%d ticks)",
  746                     td->td_kse, hzticks);
  747                 td->td_slptime = 0;
  748         }
  749         setrunqueue(td);
  750         if (td->td_priority < curthread->td_priority)
  751                 curthread->td_flags |= TDF_NEEDRESCHED;
  752 }
  753 
  754 /*
  755  * Penalize the parent for creating a new child and initialize the child's
  756  * priority.
  757  */
  758 void
  759 sched_fork(struct proc *p, struct proc *p1)
  760 {
  761 
  762         mtx_assert(&sched_lock, MA_OWNED);
  763 
  764         sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1));
  765         sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1));
  766         sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1));
  767 }
  768 
  769 void
  770 sched_fork_kse(struct kse *ke, struct kse *child)
  771 {
  772 
  773         child->ke_slice = ke->ke_slice;
  774         child->ke_cpu = ke->ke_cpu; /* sched_pickcpu(); */
  775         child->ke_runq = NULL;
  776 
  777         /*
  778          * Claim that we've been running for one second for statistical
  779          * purposes.
  780          */
  781         child->ke_ticks = 0;
  782         child->ke_ltick = ticks;
  783         child->ke_ftick = ticks - hz;
  784 }
  785 
  786 void
  787 sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child)
  788 {
  789 
  790         PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED);
  791         /* XXX Need something better here */
  792         if (kg->kg_slptime > kg->kg_runtime) {
  793                 child->kg_slptime = SCHED_DYN_RANGE;
  794                 child->kg_runtime = kg->kg_slptime / SCHED_DYN_RANGE;
  795         } else {
  796                 child->kg_runtime = SCHED_DYN_RANGE;
  797                 child->kg_slptime = kg->kg_runtime / SCHED_DYN_RANGE;
  798         }
  799 
  800         child->kg_user_pri = kg->kg_user_pri;
  801         child->kg_nice = kg->kg_nice;
  802 }
  803 
  804 void
  805 sched_fork_thread(struct thread *td, struct thread *child)
  806 {
  807 }
  808 
  809 void
  810 sched_class(struct ksegrp *kg, int class)
  811 {
  812         struct kseq *kseq;
  813         struct kse *ke;
  814 
  815         mtx_assert(&sched_lock, MA_OWNED);
  816         if (kg->kg_pri_class == class)
  817                 return;
  818 
  819         FOREACH_KSE_IN_GROUP(kg, ke) {
  820                 if (ke->ke_state != KES_ONRUNQ &&
  821                     ke->ke_state != KES_THREAD)
  822                         continue;
  823                 kseq = KSEQ_CPU(ke->ke_cpu);
  824 
  825                 kseq->ksq_loads[PRI_BASE(kg->kg_pri_class)]--;
  826                 kseq->ksq_loads[PRI_BASE(class)]++;
  827 
  828                 if (kg->kg_pri_class == PRI_TIMESHARE)
  829                         kseq_nice_rem(kseq, kg->kg_nice);
  830                 else if (class == PRI_TIMESHARE)
  831                         kseq_nice_add(kseq, kg->kg_nice);
  832         }
  833 
  834         kg->kg_pri_class = class;
  835 }
  836 
  837 /*
  838  * Return some of the child's priority and interactivity to the parent.
  839  */
  840 void
  841 sched_exit(struct proc *p, struct proc *child)
  842 {
  843         /* XXX Need something better here */
  844         mtx_assert(&sched_lock, MA_OWNED);
  845         sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child));
  846 }
  847 
  848 void
  849 sched_exit_kse(struct kse *ke, struct kse *child)
  850 {
  851         kseq_rem(KSEQ_CPU(child->ke_cpu), child);
  852 }
  853 
  854 void
  855 sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child)
  856 {
  857 }
  858 
  859 void
  860 sched_exit_thread(struct thread *td, struct thread *child)
  861 {
  862 }
  863 
  864 void
  865 sched_clock(struct kse *ke)
  866 {
  867         struct kseq *kseq;
  868         struct ksegrp *kg;
  869         struct thread *td;
  870 #if 0
  871         struct kse *nke;
  872 #endif
  873 
  874         /*
  875          * sched_setup() apparently happens prior to stathz being set.  We
  876          * need to resolve the timers earlier in the boot so we can avoid
  877          * calculating this here.
  878          */
  879         if (realstathz == 0) {
  880                 realstathz = stathz ? stathz : hz;
  881                 tickincr = hz / realstathz;
  882                 /*
  883                  * XXX This does not work for values of stathz that are much
  884                  * larger than hz.
  885                  */
  886                 if (tickincr == 0)
  887                         tickincr = 1;
  888         }
  889 
  890         td = ke->ke_thread;
  891         kg = ke->ke_ksegrp;
  892 
  893         mtx_assert(&sched_lock, MA_OWNED);
  894         KASSERT((td != NULL), ("schedclock: null thread pointer"));
  895 
  896         /* Adjust ticks for pctcpu */
  897         ke->ke_ticks++;
  898         ke->ke_ltick = ticks;
  899 
  900         /* Go up to one second beyond our max and then trim back down */
  901         if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick)
  902                 sched_pctcpu_update(ke);
  903 
  904         if (td->td_flags & TDF_IDLETD)
  905                 return;
  906 
  907         CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)",
  908             ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10);
  909 
  910         /*
  911          * We only do slicing code for TIMESHARE ksegrps.
  912          */
  913         if (kg->kg_pri_class != PRI_TIMESHARE)
  914                 return;
  915         /*
  916          * Check for a higher priority task on the run queue.  This can happen
  917          * on SMP if another processor woke up a process on our runq.
  918          */
  919         kseq = KSEQ_SELF();
  920 #if 0
  921         if (kseq->ksq_load > 1 && (nke = kseq_choose(kseq)) != NULL) {
  922                 if (sched_strict &&
  923                     nke->ke_thread->td_priority < td->td_priority)
  924                         td->td_flags |= TDF_NEEDRESCHED;
  925                 else if (nke->ke_thread->td_priority <
  926                     td->td_priority SCHED_PRIO_SLOP)
  927                     
  928                 if (nke->ke_thread->td_priority < td->td_priority)
  929                         td->td_flags |= TDF_NEEDRESCHED;
  930         }
  931 #endif
  932         /*
  933          * We used a tick charge it to the ksegrp so that we can compute our
  934          * interactivity.
  935          */
  936         kg->kg_runtime += tickincr << 10;
  937 
  938         /*
  939          * We used up one time slice.
  940          */
  941         ke->ke_slice--;
  942 #ifdef SMP
  943         kseq->ksq_rslices--;
  944 #endif
  945 
  946         if (ke->ke_slice > 0)
  947                 return;
  948         /*
  949          * We're out of time, recompute priorities and requeue.
  950          */
  951         kseq_rem(kseq, ke);
  952         sched_priority(kg);
  953         sched_slice(ke);
  954         if (SCHED_CURR(kg, ke))
  955                 ke->ke_runq = kseq->ksq_curr;
  956         else
  957                 ke->ke_runq = kseq->ksq_next;
  958         kseq_add(kseq, ke);
  959         td->td_flags |= TDF_NEEDRESCHED;
  960 }
  961 
  962 int
  963 sched_runnable(void)
  964 {
  965         struct kseq *kseq;
  966 
  967         kseq = KSEQ_SELF();
  968 
  969         if (kseq->ksq_load)
  970                 return (1);
  971 #ifdef SMP
  972         /*
  973          * For SMP we may steal other processor's KSEs.  Just search until we
  974          * verify that at least on other cpu has a runnable task.
  975          */
  976         if (smp_started) {
  977                 int i;
  978 
  979                 for (i = 0; i < mp_maxid; i++) {
  980                         if (CPU_ABSENT(i))
  981                                 continue;
  982                         kseq = KSEQ_CPU(i);
  983                         if (kseq->ksq_load > 1)
  984                                 return (1);
  985                 }
  986         }
  987 #endif
  988         return (0);
  989 }
  990 
  991 void
  992 sched_userret(struct thread *td)
  993 {
  994         struct ksegrp *kg;
  995         
  996         kg = td->td_ksegrp;
  997 
  998         if (td->td_priority != kg->kg_user_pri) {
  999                 mtx_lock_spin(&sched_lock);
 1000                 td->td_priority = kg->kg_user_pri;
 1001                 mtx_unlock_spin(&sched_lock);
 1002         }
 1003 }
 1004 
 1005 struct kse *
 1006 sched_choose(void)
 1007 {
 1008         struct kseq *kseq;
 1009         struct kse *ke;
 1010 
 1011 #ifdef SMP
 1012 retry:
 1013 #endif
 1014         kseq = KSEQ_SELF();
 1015         ke = kseq_choose(kseq);
 1016         if (ke) {
 1017                 runq_remove(ke->ke_runq, ke);
 1018                 ke->ke_state = KES_THREAD;
 1019 
 1020                 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) {
 1021                         CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)",
 1022                             ke, ke->ke_runq, ke->ke_slice,
 1023                             ke->ke_thread->td_priority);
 1024                 }
 1025                 return (ke);
 1026         }
 1027 
 1028 #ifdef SMP
 1029         if (smp_started) {
 1030                 /*
 1031                  * Find the cpu with the highest load and steal one proc.
 1032                  */
 1033                 if ((kseq = kseq_load_highest()) == NULL)
 1034                         return (NULL);
 1035 
 1036                 /*
 1037                  * Remove this kse from this kseq and runq and then requeue
 1038                  * on the current processor.  Then we will dequeue it
 1039                  * normally above.
 1040                  */
 1041                 ke = kseq_choose(kseq);
 1042                 runq_remove(ke->ke_runq, ke);
 1043                 ke->ke_state = KES_THREAD;
 1044                 kseq_rem(kseq, ke);
 1045 
 1046                 ke->ke_cpu = PCPU_GET(cpuid);
 1047                 sched_add(ke);
 1048                 goto retry;
 1049         }
 1050 #endif
 1051 
 1052         return (NULL);
 1053 }
 1054 
 1055 void
 1056 sched_add(struct kse *ke)
 1057 {
 1058         struct kseq *kseq;
 1059         struct ksegrp *kg;
 1060 
 1061         mtx_assert(&sched_lock, MA_OWNED);
 1062         KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE"));
 1063         KASSERT((ke->ke_thread->td_kse != NULL),
 1064             ("sched_add: No KSE on thread"));
 1065         KASSERT(ke->ke_state != KES_ONRUNQ,
 1066             ("sched_add: kse %p (%s) already in run queue", ke,
 1067             ke->ke_proc->p_comm));
 1068         KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
 1069             ("sched_add: process swapped out"));
 1070         KASSERT(ke->ke_runq == NULL,
 1071             ("sched_add: KSE %p is still assigned to a run queue", ke));
 1072 
 1073         kg = ke->ke_ksegrp;
 1074 
 1075         switch (PRI_BASE(kg->kg_pri_class)) {
 1076         case PRI_ITHD:
 1077         case PRI_REALTIME:
 1078                 kseq = KSEQ_SELF();
 1079                 ke->ke_runq = kseq->ksq_curr;
 1080                 ke->ke_slice = SCHED_SLICE_MAX;
 1081                 ke->ke_cpu = PCPU_GET(cpuid);
 1082                 break;
 1083         case PRI_TIMESHARE:
 1084                 kseq = KSEQ_CPU(ke->ke_cpu);
 1085                 if (SCHED_CURR(kg, ke))
 1086                         ke->ke_runq = kseq->ksq_curr;
 1087                 else
 1088                         ke->ke_runq = kseq->ksq_next;
 1089                 break;
 1090         case PRI_IDLE:
 1091                 kseq = KSEQ_CPU(ke->ke_cpu);
 1092                 /*
 1093                  * This is for priority prop.
 1094                  */
 1095                 if (ke->ke_thread->td_priority < PRI_MAX_TIMESHARE)
 1096                         ke->ke_runq = kseq->ksq_curr;
 1097                 else
 1098                         ke->ke_runq = &kseq->ksq_idle;
 1099                 ke->ke_slice = SCHED_SLICE_MIN;
 1100                 break;
 1101         default:
 1102                 panic("Unknown pri class.\n");
 1103                 break;
 1104         }
 1105 
 1106         ke->ke_ksegrp->kg_runq_kses++;
 1107         ke->ke_state = KES_ONRUNQ;
 1108 
 1109         runq_add(ke->ke_runq, ke);
 1110         kseq_add(kseq, ke);
 1111 }
 1112 
 1113 void
 1114 sched_rem(struct kse *ke)
 1115 {
 1116         struct kseq *kseq;
 1117 
 1118         mtx_assert(&sched_lock, MA_OWNED);
 1119         KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
 1120 
 1121         ke->ke_state = KES_THREAD;
 1122         ke->ke_ksegrp->kg_runq_kses--;
 1123         kseq = KSEQ_CPU(ke->ke_cpu);
 1124         runq_remove(ke->ke_runq, ke);
 1125         kseq_rem(kseq, ke);
 1126 }
 1127 
 1128 fixpt_t
 1129 sched_pctcpu(struct kse *ke)
 1130 {
 1131         fixpt_t pctcpu;
 1132 
 1133         pctcpu = 0;
 1134 
 1135         if (ke->ke_ticks) {
 1136                 int rtick;
 1137 
 1138                 /* Update to account for time potentially spent sleeping */
 1139                 ke->ke_ltick = ticks;
 1140                 sched_pctcpu_update(ke);
 1141 
 1142                 /* How many rtick per second ? */
 1143                 rtick = ke->ke_ticks / SCHED_CPU_TIME;
 1144                 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT;
 1145         }
 1146 
 1147         mtx_lock_spin(&sched_lock);
 1148         ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick;
 1149         mtx_unlock_spin(&sched_lock);
 1150 
 1151         return (pctcpu);
 1152 }
 1153 
 1154 int
 1155 sched_sizeof_kse(void)
 1156 {
 1157         return (sizeof(struct kse) + sizeof(struct ke_sched));
 1158 }
 1159 
 1160 int
 1161 sched_sizeof_ksegrp(void)
 1162 {
 1163         return (sizeof(struct ksegrp) + sizeof(struct kg_sched));
 1164 }
 1165 
 1166 int
 1167 sched_sizeof_proc(void)
 1168 {
 1169         return (sizeof(struct proc));
 1170 }
 1171 
 1172 int
 1173 sched_sizeof_thread(void)
 1174 {
 1175         return (sizeof(struct thread) + sizeof(struct td_sched));
 1176 }
Cache object: 82f489fc2b826ff3f7222edcbd57272d
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/sched_ule.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/sched_ule.c