The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i86pc/os/cpupm/cpu_idle.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or http://www.opensolaris.org/os/licensing.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
   23  * Use is subject to license terms.
   24  */
   25 /*
   26  * Copyright (c) 2009-2010, Intel Corporation.
   27  * All rights reserved.
   28  */
   29 
   30 #include <sys/x86_archext.h>
   31 #include <sys/machsystm.h>
   32 #include <sys/x_call.h>
   33 #include <sys/stat.h>
   34 #include <sys/acpi/acpi.h>
   35 #include <sys/acpica.h>
   36 #include <sys/cpu_acpi.h>
   37 #include <sys/cpu_idle.h>
   38 #include <sys/cpupm.h>
   39 #include <sys/cpu_event.h>
   40 #include <sys/hpet.h>
   41 #include <sys/archsystm.h>
   42 #include <vm/hat_i86.h>
   43 #include <sys/dtrace.h>
   44 #include <sys/sdt.h>
   45 #include <sys/callb.h>
   46 
   47 #define CSTATE_USING_HPET               1
   48 #define CSTATE_USING_LAT                2
   49 
   50 #define CPU_IDLE_STOP_TIMEOUT           1000
   51 
   52 extern void cpu_idle_adaptive(void);
   53 extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data,
   54     cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start);
   55 
   56 static int cpu_idle_init(cpu_t *);
   57 static void cpu_idle_fini(cpu_t *);
   58 static void cpu_idle_stop(cpu_t *);
   59 static boolean_t cpu_deep_idle_callb(void *arg, int code);
   60 static boolean_t cpu_idle_cpr_callb(void *arg, int code);
   61 static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate);
   62 
   63 static boolean_t cstate_use_timer(hrtime_t *lapic_expire, int timer);
   64 
   65 /*
   66  * the flag of always-running local APIC timer.
   67  * the flag of HPET Timer use in deep cstate.
   68  */
   69 static boolean_t cpu_cstate_arat = B_FALSE;
   70 static boolean_t cpu_cstate_hpet = B_FALSE;
   71 
   72 /*
   73  * Interfaces for modules implementing Intel's deep c-state.
   74  */
   75 cpupm_state_ops_t cpu_idle_ops = {
   76         "Generic ACPI C-state Support",
   77         cpu_idle_init,
   78         cpu_idle_fini,
   79         NULL,
   80         cpu_idle_stop
   81 };
   82 
   83 static kmutex_t         cpu_idle_callb_mutex;
   84 static callb_id_t       cpu_deep_idle_callb_id;
   85 static callb_id_t       cpu_idle_cpr_callb_id;
   86 static uint_t           cpu_idle_cfg_state;
   87 
   88 static kmutex_t cpu_idle_mutex;
   89 
   90 cpu_idle_kstat_t cpu_idle_kstat = {
   91         { "address_space_id",   KSTAT_DATA_STRING },
   92         { "latency",            KSTAT_DATA_UINT32 },
   93         { "power",              KSTAT_DATA_UINT32 },
   94 };
   95 
   96 /*
   97  * kstat update function of the c-state info
   98  */
   99 static int
  100 cpu_idle_kstat_update(kstat_t *ksp, int flag)
  101 {
  102         cpu_acpi_cstate_t *cstate = ksp->ks_private;
  103 
  104         if (flag == KSTAT_WRITE) {
  105                 return (EACCES);
  106         }
  107 
  108         if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
  109                 kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
  110                 "FFixedHW");
  111         } else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) {
  112                 kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
  113                 "SystemIO");
  114         } else {
  115                 kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
  116                 "Unsupported");
  117         }
  118 
  119         cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency;
  120         cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power;
  121 
  122         return (0);
  123 }
  124 
  125 /*
  126  * Used during configuration callbacks to manage implementation specific
  127  * details of the hardware timer used during Deep C-state.
  128  */
  129 boolean_t
  130 cstate_timer_callback(int code)
  131 {
  132         if (cpu_cstate_arat) {
  133                 return (B_TRUE);
  134         } else if (cpu_cstate_hpet) {
  135                 return (hpet.callback(code));
  136         }
  137         return (B_FALSE);
  138 }
  139 
  140 /*
  141  * Some Local APIC Timers do not work during Deep C-states.
  142  * The Deep C-state idle function uses this function to ensure it is using a
  143  * hardware timer that works during Deep C-states.  This function also
  144  * switches the timer back to the LACPI Timer after Deep C-state.
  145  */
  146 static boolean_t
  147 cstate_use_timer(hrtime_t *lapic_expire, int timer)
  148 {
  149         if (cpu_cstate_arat)
  150                 return (B_TRUE);
  151 
  152         /*
  153          * We have to return B_FALSE if no arat or hpet support
  154          */
  155         if (!cpu_cstate_hpet)
  156                 return (B_FALSE);
  157 
  158         switch (timer) {
  159         case CSTATE_USING_HPET:
  160                 return (hpet.use_hpet_timer(lapic_expire));
  161         case CSTATE_USING_LAT:
  162                 hpet.use_lapic_timer(*lapic_expire);
  163                 return (B_TRUE);
  164         default:
  165                 return (B_FALSE);
  166         }
  167 }
  168 
  169 /*
  170  * c-state wakeup function.
  171  * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals
  172  * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State.
  173  */
  174 void
  175 cstate_wakeup(cpu_t *cp, int bound)
  176 {
  177         struct machcpu  *mcpu = &(cp->cpu_m);
  178         volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait;
  179         cpupart_t       *cpu_part;
  180         uint_t          cpu_found;
  181         processorid_t   cpu_sid;
  182 
  183         cpu_part = cp->cpu_part;
  184         cpu_sid = cp->cpu_seqid;
  185         /*
  186          * Clear the halted bit for that CPU since it will be woken up
  187          * in a moment.
  188          */
  189         if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
  190                 /*
  191                  * Clear the halted bit for that CPU since it will be
  192                  * poked in a moment.
  193                  */
  194                 bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid);
  195 
  196                 /*
  197                  * We may find the current CPU present in the halted cpuset
  198                  * if we're in the context of an interrupt that occurred
  199                  * before we had a chance to clear our bit in cpu_idle().
  200                  * Waking ourself is obviously unnecessary, since if
  201                  * we're here, we're not halted.
  202                  */
  203                 if (cp != CPU) {
  204                         /*
  205                          * Use correct wakeup mechanism
  206                          */
  207                         if ((mcpu_mwait != NULL) &&
  208                             (*mcpu_mwait == MWAIT_HALTED))
  209                                 MWAIT_WAKEUP(cp);
  210                         else
  211                                 poke_cpu(cp->cpu_id);
  212                 }
  213                 return;
  214         } else {
  215                 /*
  216                  * This cpu isn't halted, but it's idle or undergoing a
  217                  * context switch. No need to awaken anyone else.
  218                  */
  219                 if (cp->cpu_thread == cp->cpu_idle_thread ||
  220                     cp->cpu_disp_flags & CPU_DISP_DONTSTEAL)
  221                         return;
  222         }
  223 
  224         /*
  225          * No need to wake up other CPUs if the thread we just enqueued
  226          * is bound.
  227          */
  228         if (bound)
  229                 return;
  230 
  231 
  232         /*
  233          * See if there's any other halted CPUs. If there are, then
  234          * select one, and awaken it.
  235          * It's possible that after we find a CPU, somebody else
  236          * will awaken it before we get the chance.
  237          * In that case, look again.
  238          */
  239         do {
  240                 cpu_found = bitset_find(&cpu_part->cp_haltset);
  241                 if (cpu_found == (uint_t)-1)
  242                         return;
  243 
  244         } while (bitset_atomic_test_and_del(&cpu_part->cp_haltset,
  245             cpu_found) < 0);
  246 
  247         /*
  248          * Must use correct wakeup mechanism to avoid lost wakeup of
  249          * alternate cpu.
  250          */
  251         if (cpu_found != CPU->cpu_seqid) {
  252                 mcpu_mwait = cpu_seq[cpu_found]->cpu_m.mcpu_mwait;
  253                 if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED))
  254                         MWAIT_WAKEUP(cpu_seq[cpu_found]);
  255                 else
  256                         poke_cpu(cpu_seq[cpu_found]->cpu_id);
  257         }
  258 }
  259 
  260 /*
  261  * Function called by CPU idle notification framework to check whether CPU
  262  * has been awakened. It will be called with interrupt disabled.
  263  * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle
  264  * notification framework.
  265  */
  266 static void
  267 acpi_cpu_mwait_check_wakeup(void *arg)
  268 {
  269         volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
  270 
  271         ASSERT(arg != NULL);
  272         if (*mcpu_mwait != MWAIT_HALTED) {
  273                 /*
  274                  * CPU has been awakened, notify CPU idle notification system.
  275                  */
  276                 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
  277         } else {
  278                 /*
  279                  * Toggle interrupt flag to detect pending interrupts.
  280                  * If interrupt happened, do_interrupt() will notify CPU idle
  281                  * notification framework so no need to call cpu_idle_exit()
  282                  * here.
  283                  */
  284                 sti();
  285                 SMT_PAUSE();
  286                 cli();
  287         }
  288 }
  289 
  290 static void
  291 acpi_cpu_mwait_ipi_check_wakeup(void *arg)
  292 {
  293         volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
  294 
  295         ASSERT(arg != NULL);
  296         if (*mcpu_mwait != MWAIT_WAKEUP_IPI) {
  297                 /*
  298                  * CPU has been awakened, notify CPU idle notification system.
  299                  */
  300                 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
  301         } else {
  302                 /*
  303                  * Toggle interrupt flag to detect pending interrupts.
  304                  * If interrupt happened, do_interrupt() will notify CPU idle
  305                  * notification framework so no need to call cpu_idle_exit()
  306                  * here.
  307                  */
  308                 sti();
  309                 SMT_PAUSE();
  310                 cli();
  311         }
  312 }
  313 
  314 /*ARGSUSED*/
  315 static void
  316 acpi_cpu_check_wakeup(void *arg)
  317 {
  318         /*
  319          * Toggle interrupt flag to detect pending interrupts.
  320          * If interrupt happened, do_interrupt() will notify CPU idle
  321          * notification framework so no need to call cpu_idle_exit() here.
  322          */
  323         sti();
  324         SMT_PAUSE();
  325         cli();
  326 }
  327 
  328 /*
  329  * enter deep c-state handler
  330  */
  331 static void
  332 acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
  333 {
  334         volatile uint32_t       *mcpu_mwait = CPU->cpu_m.mcpu_mwait;
  335         cpu_t                   *cpup = CPU;
  336         processorid_t           cpu_sid = cpup->cpu_seqid;
  337         cpupart_t               *cp = cpup->cpu_part;
  338         hrtime_t                lapic_expire;
  339         uint8_t                 type = cstate->cs_addrspace_id;
  340         uint32_t                cs_type = cstate->cs_type;
  341         int                     hset_update = 1;
  342         boolean_t               using_timer;
  343         cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup;
  344 
  345         /*
  346          * Set our mcpu_mwait here, so we can tell if anyone tries to
  347          * wake us between now and when we call mwait.  No other cpu will
  348          * attempt to set our mcpu_mwait until we add ourself to the haltset.
  349          */
  350         if (mcpu_mwait) {
  351                 if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
  352                         *mcpu_mwait = MWAIT_WAKEUP_IPI;
  353                         check_func = &acpi_cpu_mwait_ipi_check_wakeup;
  354                 } else {
  355                         *mcpu_mwait = MWAIT_HALTED;
  356                         check_func = &acpi_cpu_mwait_check_wakeup;
  357                 }
  358         }
  359 
  360         /*
  361          * If this CPU is online, and there are multiple CPUs
  362          * in the system, then we should note our halting
  363          * by adding ourselves to the partition's halted CPU
  364          * bitmap. This allows other CPUs to find/awaken us when
  365          * work becomes available.
  366          */
  367         if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1)
  368                 hset_update = 0;
  369 
  370         /*
  371          * Add ourselves to the partition's halted CPUs bitmask
  372          * and set our HALTED flag, if necessary.
  373          *
  374          * When a thread becomes runnable, it is placed on the queue
  375          * and then the halted cpuset is checked to determine who
  376          * (if anyone) should be awakened. We therefore need to first
  377          * add ourselves to the halted cpuset, and and then check if there
  378          * is any work available.
  379          *
  380          * Note that memory barriers after updating the HALTED flag
  381          * are not necessary since an atomic operation (updating the bitmap)
  382          * immediately follows. On x86 the atomic operation acts as a
  383          * memory barrier for the update of cpu_disp_flags.
  384          */
  385         if (hset_update) {
  386                 cpup->cpu_disp_flags |= CPU_DISP_HALTED;
  387                 bitset_atomic_add(&cp->cp_haltset, cpu_sid);
  388         }
  389 
  390         /*
  391          * Check to make sure there's really nothing to do.
  392          * Work destined for this CPU may become available after
  393          * this check. We'll be notified through the clearing of our
  394          * bit in the halted CPU bitmask, and a write to our mcpu_mwait.
  395          *
  396          * disp_anywork() checks disp_nrunnable, so we do not have to later.
  397          */
  398         if (disp_anywork()) {
  399                 if (hset_update) {
  400                         cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
  401                         bitset_atomic_del(&cp->cp_haltset, cpu_sid);
  402                 }
  403                 return;
  404         }
  405 
  406         /*
  407          * We're on our way to being halted.
  408          *
  409          * The local APIC timer can stop in ACPI C2 and deeper c-states.
  410          * Try to program the HPET hardware to substitute for this CPU's
  411          * LAPIC timer.
  412          * cstate_use_timer() could disable the LAPIC Timer.  Make sure
  413          * to start the LAPIC Timer again before leaving this function.
  414          *
  415          * Disable interrupts here so we will awaken immediately after halting
  416          * if someone tries to poke us between now and the time we actually
  417          * halt.
  418          */
  419         cli();
  420         using_timer = cstate_use_timer(&lapic_expire, CSTATE_USING_HPET);
  421 
  422         /*
  423          * We check for the presence of our bit after disabling interrupts.
  424          * If it's cleared, we'll return. If the bit is cleared after
  425          * we check then the cstate_wakeup() will pop us out of the halted
  426          * state.
  427          *
  428          * This means that the ordering of the cstate_wakeup() and the clearing
  429          * of the bit by cpu_wakeup is important.
  430          * cpu_wakeup() must clear our mc_haltset bit, and then call
  431          * cstate_wakeup().
  432          * acpi_cpu_cstate() must disable interrupts, then check for the bit.
  433          */
  434         if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) {
  435                 (void) cstate_use_timer(&lapic_expire,
  436                     CSTATE_USING_LAT);
  437                 sti();
  438                 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
  439                 return;
  440         }
  441 
  442         /*
  443          * The check for anything locally runnable is here for performance
  444          * and isn't needed for correctness. disp_nrunnable ought to be
  445          * in our cache still, so it's inexpensive to check, and if there
  446          * is anything runnable we won't have to wait for the poke.
  447          */
  448         if (cpup->cpu_disp->disp_nrunnable != 0) {
  449                 (void) cstate_use_timer(&lapic_expire,
  450                     CSTATE_USING_LAT);
  451                 sti();
  452                 if (hset_update) {
  453                         cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
  454                         bitset_atomic_del(&cp->cp_haltset, cpu_sid);
  455                 }
  456                 return;
  457         }
  458 
  459         if (using_timer == B_FALSE) {
  460 
  461                 (void) cstate_use_timer(&lapic_expire,
  462                     CSTATE_USING_LAT);
  463                 sti();
  464 
  465                 /*
  466                  * We are currently unable to program the HPET to act as this
  467                  * CPU's proxy LAPIC timer.  This CPU cannot enter C2 or deeper
  468                  * because no timer is set to wake it up while its LAPIC timer
  469                  * stalls in deep C-States.
  470                  * Enter C1 instead.
  471                  *
  472                  * cstate_wake_cpu() will wake this CPU with an IPI which
  473                  * works with MWAIT.
  474                  */
  475                 i86_monitor(mcpu_mwait, 0, 0);
  476                 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) {
  477                         if (cpu_idle_enter(IDLE_STATE_C1, 0,
  478                             check_func, (void *)mcpu_mwait) == 0) {
  479                                 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) ==
  480                                     MWAIT_HALTED) {
  481                                         i86_mwait(0, 0);
  482                                 }
  483                                 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
  484                         }
  485                 }
  486 
  487                 /*
  488                  * We're no longer halted
  489                  */
  490                 if (hset_update) {
  491                         cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
  492                         bitset_atomic_del(&cp->cp_haltset, cpu_sid);
  493                 }
  494                 return;
  495         }
  496 
  497         if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) {
  498                 /*
  499                  * We're on our way to being halted.
  500                  * To avoid a lost wakeup, arm the monitor before checking
  501                  * if another cpu wrote to mcpu_mwait to wake us up.
  502                  */
  503                 i86_monitor(mcpu_mwait, 0, 0);
  504                 if (*mcpu_mwait == MWAIT_HALTED) {
  505                         if (cpu_idle_enter((uint_t)cs_type, 0,
  506                             check_func, (void *)mcpu_mwait) == 0) {
  507                                 if (*mcpu_mwait == MWAIT_HALTED) {
  508                                         i86_mwait(cstate->cs_address, 1);
  509                                 }
  510                                 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
  511                         }
  512                 }
  513         } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
  514                 uint32_t value;
  515                 ACPI_TABLE_FADT *gbl_FADT;
  516 
  517                 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
  518                         if (cpu_idle_enter((uint_t)cs_type, 0,
  519                             check_func, (void *)mcpu_mwait) == 0) {
  520                                 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
  521                                         (void) cpu_acpi_read_port(
  522                                             cstate->cs_address, &value, 8);
  523                                         acpica_get_global_FADT(&gbl_FADT);
  524                                         (void) cpu_acpi_read_port(
  525                                             gbl_FADT->XPmTimerBlock.Address,
  526                                             &value, 32);
  527                                 }
  528                                 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
  529                         }
  530                 }
  531         }
  532 
  533         /*
  534          * The LAPIC timer may have stopped in deep c-state.
  535          * Reprogram this CPU's LAPIC here before enabling interrupts.
  536          */
  537         (void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT);
  538         sti();
  539 
  540         /*
  541          * We're no longer halted
  542          */
  543         if (hset_update) {
  544                 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
  545                 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
  546         }
  547 }
  548 
  549 /*
  550  * Idle the present CPU, deep c-state is supported
  551  */
  552 void
  553 cpu_acpi_idle(void)
  554 {
  555         cpu_t *cp = CPU;
  556         cpu_acpi_handle_t handle;
  557         cma_c_state_t *cs_data;
  558         cpu_acpi_cstate_t *cstates;
  559         hrtime_t start, end;
  560         int cpu_max_cstates;
  561         uint32_t cs_indx;
  562         uint16_t cs_type;
  563 
  564         cpupm_mach_state_t *mach_state =
  565             (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
  566         handle = mach_state->ms_acpi_handle;
  567         ASSERT(CPU_ACPI_CSTATES(handle) != NULL);
  568 
  569         cs_data = mach_state->ms_cstate.cma_state.cstate;
  570         cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
  571         ASSERT(cstates != NULL);
  572         cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
  573         if (cpu_max_cstates > CPU_MAX_CSTATES)
  574                 cpu_max_cstates = CPU_MAX_CSTATES;
  575         if (cpu_max_cstates == 1) {     /* no ACPI c-state data */
  576                 (*non_deep_idle_cpu)();
  577                 return;
  578         }
  579 
  580         start = gethrtime_unscaled();
  581 
  582         cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start);
  583 
  584         cs_type = cstates[cs_indx].cs_type;
  585 
  586         switch (cs_type) {
  587         default:
  588                 /* FALLTHROUGH */
  589         case CPU_ACPI_C1:
  590                 (*non_deep_idle_cpu)();
  591                 break;
  592 
  593         case CPU_ACPI_C2:
  594                 acpi_cpu_cstate(&cstates[cs_indx]);
  595                 break;
  596 
  597         case CPU_ACPI_C3:
  598                 /*
  599                  * All supported Intel processors maintain cache coherency
  600                  * during C3.  Currently when entering C3 processors flush
  601                  * core caches to higher level shared cache. The shared cache
  602                  * maintains state and supports probes during C3.
  603                  * Consequently there is no need to handle cache coherency
  604                  * and Bus Master activity here with the cache flush, BM_RLD
  605                  * bit, BM_STS bit, nor PM2_CNT.ARB_DIS mechanisms described
  606                  * in section 8.1.4 of the ACPI Specification 4.0.
  607                  */
  608                 acpi_cpu_cstate(&cstates[cs_indx]);
  609                 break;
  610         }
  611 
  612         end = gethrtime_unscaled();
  613 
  614         /*
  615          * Update statistics
  616          */
  617         cpupm_wakeup_cstate_data(cs_data, end);
  618 }
  619 
  620 boolean_t
  621 cpu_deep_cstates_supported(void)
  622 {
  623         extern int      idle_cpu_no_deep_c;
  624 
  625         if (idle_cpu_no_deep_c)
  626                 return (B_FALSE);
  627 
  628         if (!cpuid_deep_cstates_supported())
  629                 return (B_FALSE);
  630 
  631         if (cpuid_arat_supported()) {
  632                 cpu_cstate_arat = B_TRUE;
  633                 return (B_TRUE);
  634         }
  635 
  636         if ((hpet.supported == HPET_FULL_SUPPORT) &&
  637             hpet.install_proxy()) {
  638                 cpu_cstate_hpet = B_TRUE;
  639                 return (B_TRUE);
  640         }
  641 
  642         return (B_FALSE);
  643 }
  644 
  645 /*
  646  * Validate that this processor supports deep cstate and if so,
  647  * get the c-state data from ACPI and cache it.
  648  */
  649 static int
  650 cpu_idle_init(cpu_t *cp)
  651 {
  652         cpupm_mach_state_t *mach_state =
  653             (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
  654         cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
  655         cpu_acpi_cstate_t *cstate;
  656         char name[KSTAT_STRLEN];
  657         int cpu_max_cstates, i;
  658         int ret;
  659 
  660         /*
  661          * Cache the C-state specific ACPI data.
  662          */
  663         if ((ret = cpu_acpi_cache_cstate_data(handle)) != 0) {
  664                 if (ret < 0)
  665                         cmn_err(CE_NOTE,
  666                             "!Support for CPU deep idle states is being "
  667                             "disabled due to errors parsing ACPI C-state "
  668                             "objects exported by BIOS.");
  669                 cpu_idle_fini(cp);
  670                 return (-1);
  671         }
  672 
  673         cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
  674 
  675         cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
  676 
  677         for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
  678                 (void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type);
  679                 /*
  680                  * Allocate, initialize and install cstate kstat
  681                  */
  682                 cstate->cs_ksp = kstat_create("cstate", cp->cpu_id,
  683                     name, "misc",
  684                     KSTAT_TYPE_NAMED,
  685                     sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
  686                     KSTAT_FLAG_VIRTUAL);
  687 
  688                 if (cstate->cs_ksp == NULL) {
  689                         cmn_err(CE_NOTE, "kstat_create(c_state) fail");
  690                 } else {
  691                         cstate->cs_ksp->ks_data = &cpu_idle_kstat;
  692                         cstate->cs_ksp->ks_lock = &cpu_idle_mutex;
  693                         cstate->cs_ksp->ks_update = cpu_idle_kstat_update;
  694                         cstate->cs_ksp->ks_data_size += MAXNAMELEN;
  695                         cstate->cs_ksp->ks_private = cstate;
  696                         kstat_install(cstate->cs_ksp);
  697                 }
  698                 cstate++;
  699         }
  700 
  701         cpupm_alloc_domains(cp, CPUPM_C_STATES);
  702         cpupm_alloc_ms_cstate(cp);
  703 
  704         if (cpu_deep_cstates_supported()) {
  705                 uint32_t value;
  706 
  707                 mutex_enter(&cpu_idle_callb_mutex);
  708                 if (cpu_deep_idle_callb_id == (callb_id_t)0)
  709                         cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb,
  710                             (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle");
  711                 if (cpu_idle_cpr_callb_id == (callb_id_t)0)
  712                         cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb,
  713                             (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr");
  714                 mutex_exit(&cpu_idle_callb_mutex);
  715 
  716 
  717                 /*
  718                  * All supported CPUs (Nehalem and later) will remain in C3
  719                  * during Bus Master activity.
  720                  * All CPUs set ACPI_BITREG_BUS_MASTER_RLD to 0 here if it
  721                  * is not already 0 before enabling Deeper C-states.
  722                  */
  723                 cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_RLD, &value);
  724                 if (value & 1)
  725                         cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
  726         }
  727 
  728         return (0);
  729 }
  730 
  731 /*
  732  * Free resources allocated by cpu_idle_init().
  733  */
  734 static void
  735 cpu_idle_fini(cpu_t *cp)
  736 {
  737         cpupm_mach_state_t *mach_state =
  738             (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
  739         cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
  740         cpu_acpi_cstate_t *cstate;
  741         uint_t  cpu_max_cstates, i;
  742 
  743         /*
  744          * idle cpu points back to the generic one
  745          */
  746         idle_cpu = cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
  747         disp_enq_thread = non_deep_idle_disp_enq_thread;
  748 
  749         cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
  750         if (cstate) {
  751                 cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
  752 
  753                 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
  754                         if (cstate->cs_ksp != NULL)
  755                                 kstat_delete(cstate->cs_ksp);
  756                         cstate++;
  757                 }
  758         }
  759 
  760         cpupm_free_ms_cstate(cp);
  761         cpupm_free_domains(&cpupm_cstate_domains);
  762         cpu_acpi_free_cstate_data(handle);
  763 
  764         mutex_enter(&cpu_idle_callb_mutex);
  765         if (cpu_deep_idle_callb_id != (callb_id_t)0) {
  766                 (void) callb_delete(cpu_deep_idle_callb_id);
  767                 cpu_deep_idle_callb_id = (callb_id_t)0;
  768         }
  769         if (cpu_idle_cpr_callb_id != (callb_id_t)0) {
  770                 (void) callb_delete(cpu_idle_cpr_callb_id);
  771                 cpu_idle_cpr_callb_id = (callb_id_t)0;
  772         }
  773         mutex_exit(&cpu_idle_callb_mutex);
  774 }
  775 
  776 /*
  777  * This function is introduced here to solve a race condition
  778  * between the master and the slave to touch c-state data structure.
  779  * After the slave calls this idle function to switch to the non
  780  * deep idle function, the master can go on to reclaim the resource.
  781  */
  782 static void
  783 cpu_idle_stop_sync(void)
  784 {
  785         /* switch to the non deep idle function */
  786         CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
  787 }
  788 
  789 static void
  790 cpu_idle_stop(cpu_t *cp)
  791 {
  792         cpupm_mach_state_t *mach_state =
  793             (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
  794         cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
  795         cpu_acpi_cstate_t *cstate;
  796         uint_t cpu_max_cstates, i = 0;
  797 
  798         mutex_enter(&cpu_idle_callb_mutex);
  799         if (idle_cpu == cpu_idle_adaptive) {
  800                 /*
  801                  * invoke the slave to call synchronous idle function.
  802                  */
  803                 cp->cpu_m.mcpu_idle_cpu = cpu_idle_stop_sync;
  804                 poke_cpu(cp->cpu_id);
  805 
  806                 /*
  807                  * wait until the slave switchs to non deep idle function,
  808                  * so that the master is safe to go on to reclaim the resource.
  809                  */
  810                 while (cp->cpu_m.mcpu_idle_cpu != non_deep_idle_cpu) {
  811                         drv_usecwait(10);
  812                         if ((++i % CPU_IDLE_STOP_TIMEOUT) == 0)
  813                                 cmn_err(CE_NOTE, "!cpu_idle_stop: the slave"
  814                                     " idle stop timeout");
  815                 }
  816         }
  817         mutex_exit(&cpu_idle_callb_mutex);
  818 
  819         cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
  820         if (cstate) {
  821                 cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
  822 
  823                 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
  824                         if (cstate->cs_ksp != NULL)
  825                                 kstat_delete(cstate->cs_ksp);
  826                         cstate++;
  827                 }
  828         }
  829         cpupm_free_ms_cstate(cp);
  830         cpupm_remove_domains(cp, CPUPM_C_STATES, &cpupm_cstate_domains);
  831         cpu_acpi_free_cstate_data(handle);
  832 }
  833 
  834 /*ARGSUSED*/
  835 static boolean_t
  836 cpu_deep_idle_callb(void *arg, int code)
  837 {
  838         boolean_t rslt = B_TRUE;
  839 
  840         mutex_enter(&cpu_idle_callb_mutex);
  841         switch (code) {
  842         case PM_DEFAULT_CPU_DEEP_IDLE:
  843                 /*
  844                  * Default policy is same as enable
  845                  */
  846                 /*FALLTHROUGH*/
  847         case PM_ENABLE_CPU_DEEP_IDLE:
  848                 if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0)
  849                         break;
  850 
  851                 if (cstate_timer_callback(PM_ENABLE_CPU_DEEP_IDLE)) {
  852                         disp_enq_thread = cstate_wakeup;
  853                         idle_cpu = cpu_idle_adaptive;
  854                         cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG;
  855                 } else {
  856                         rslt = B_FALSE;
  857                 }
  858                 break;
  859 
  860         case PM_DISABLE_CPU_DEEP_IDLE:
  861                 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
  862                         break;
  863 
  864                 idle_cpu = non_deep_idle_cpu;
  865                 if (cstate_timer_callback(PM_DISABLE_CPU_DEEP_IDLE)) {
  866                         disp_enq_thread = non_deep_idle_disp_enq_thread;
  867                         cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG;
  868                 }
  869                 break;
  870 
  871         default:
  872                 cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n",
  873                     code);
  874                 break;
  875         }
  876         mutex_exit(&cpu_idle_callb_mutex);
  877         return (rslt);
  878 }
  879 
  880 /*ARGSUSED*/
  881 static boolean_t
  882 cpu_idle_cpr_callb(void *arg, int code)
  883 {
  884         boolean_t rslt = B_TRUE;
  885 
  886         mutex_enter(&cpu_idle_callb_mutex);
  887         switch (code) {
  888         case CB_CODE_CPR_RESUME:
  889                 if (cstate_timer_callback(CB_CODE_CPR_RESUME)) {
  890                         /*
  891                          * Do not enable dispatcher hooks if disabled by user.
  892                          */
  893                         if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
  894                                 break;
  895 
  896                         disp_enq_thread = cstate_wakeup;
  897                         idle_cpu = cpu_idle_adaptive;
  898                 } else {
  899                         rslt = B_FALSE;
  900                 }
  901                 break;
  902 
  903         case CB_CODE_CPR_CHKPT:
  904                 idle_cpu = non_deep_idle_cpu;
  905                 disp_enq_thread = non_deep_idle_disp_enq_thread;
  906                 (void) cstate_timer_callback(CB_CODE_CPR_CHKPT);
  907                 break;
  908 
  909         default:
  910                 cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code);
  911                 break;
  912         }
  913         mutex_exit(&cpu_idle_callb_mutex);
  914         return (rslt);
  915 }
  916 
  917 /*
  918  * handle _CST notification
  919  */
  920 void
  921 cpuidle_cstate_instance(cpu_t *cp)
  922 {
  923 #ifndef __xpv
  924         cpupm_mach_state_t      *mach_state =
  925             (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
  926         cpu_acpi_handle_t       handle;
  927         struct machcpu          *mcpu;
  928         cpuset_t                dom_cpu_set;
  929         kmutex_t                *pm_lock;
  930         int                     result = 0;
  931         processorid_t           cpu_id;
  932 
  933         if (mach_state == NULL) {
  934                 return;
  935         }
  936 
  937         ASSERT(mach_state->ms_cstate.cma_domain != NULL);
  938         dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus;
  939         pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock;
  940 
  941         /*
  942          * Do for all the CPU's in the domain
  943          */
  944         mutex_enter(pm_lock);
  945         do {
  946                 CPUSET_FIND(dom_cpu_set, cpu_id);
  947                 if (cpu_id == CPUSET_NOTINSET)
  948                         break;
  949 
  950                 ASSERT(cpu_id >= 0 && cpu_id < NCPU);
  951                 cp = cpu[cpu_id];
  952                 mach_state = (cpupm_mach_state_t *)
  953                     cp->cpu_m.mcpu_pm_mach_state;
  954                 if (!(mach_state->ms_caps & CPUPM_C_STATES)) {
  955                         mutex_exit(pm_lock);
  956                         return;
  957                 }
  958                 handle = mach_state->ms_acpi_handle;
  959                 ASSERT(handle != NULL);
  960 
  961                 /*
  962                  * re-evaluate cstate object
  963                  */
  964                 if (cpu_acpi_cache_cstate_data(handle) != 0) {
  965                         cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state"
  966                             " object Instance: %d", cpu_id);
  967                 }
  968                 mcpu = &(cp->cpu_m);
  969                 mcpu->max_cstates = cpu_acpi_get_max_cstates(handle);
  970                 if (mcpu->max_cstates > CPU_ACPI_C1) {
  971                         (void) cstate_timer_callback(
  972                             CST_EVENT_MULTIPLE_CSTATES);
  973                         disp_enq_thread = cstate_wakeup;
  974                         cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
  975                 } else if (mcpu->max_cstates == CPU_ACPI_C1) {
  976                         disp_enq_thread = non_deep_idle_disp_enq_thread;
  977                         cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
  978                         (void) cstate_timer_callback(CST_EVENT_ONE_CSTATE);
  979                 }
  980 
  981                 CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result);
  982         } while (result < 0);
  983         mutex_exit(pm_lock);
  984 #endif
  985 }
  986 
  987 /*
  988  * handle the number or the type of available processor power states change
  989  */
  990 void
  991 cpuidle_manage_cstates(void *ctx)
  992 {
  993         cpu_t                   *cp = ctx;
  994         cpupm_mach_state_t      *mach_state =
  995             (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
  996         boolean_t               is_ready;
  997 
  998         if (mach_state == NULL) {
  999                 return;
 1000         }
 1001 
 1002         /*
 1003          * We currently refuse to power manage if the CPU is not ready to
 1004          * take cross calls (cross calls fail silently if CPU is not ready
 1005          * for it).
 1006          *
 1007          * Additionally, for x86 platforms we cannot power manage an instance,
 1008          * until it has been initialized.
 1009          */
 1010         is_ready = (cp->cpu_flags & CPU_READY) && cpupm_cstate_ready(cp);
 1011         if (!is_ready)
 1012                 return;
 1013 
 1014         cpuidle_cstate_instance(cp);
 1015 }

Cache object: 620330b404b92f6f7e594fb714e0cd69


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.