The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_cpu.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004-2007 Nate Lawson (SDG)
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/11.0/sys/kern/kern_cpu.c 299746 2016-05-14 18:22:52Z jhb $");
   29 
   30 #include <sys/param.h>
   31 #include <sys/bus.h>
   32 #include <sys/cpu.h>
   33 #include <sys/eventhandler.h>
   34 #include <sys/kernel.h>
   35 #include <sys/lock.h>
   36 #include <sys/malloc.h>
   37 #include <sys/module.h>
   38 #include <sys/proc.h>
   39 #include <sys/queue.h>
   40 #include <sys/sbuf.h>
   41 #include <sys/sched.h>
   42 #include <sys/smp.h>
   43 #include <sys/sysctl.h>
   44 #include <sys/systm.h>
   45 #include <sys/sx.h>
   46 #include <sys/timetc.h>
   47 #include <sys/taskqueue.h>
   48 
   49 #include "cpufreq_if.h"
   50 
   51 /*
   52  * Common CPU frequency glue code.  Drivers for specific hardware can
   53  * attach this interface to allow users to get/set the CPU frequency.
   54  */
   55 
   56 /*
   57  * Number of levels we can handle.  Levels are synthesized from settings
   58  * so for M settings and N drivers, there may be M*N levels.
   59  */
   60 #define CF_MAX_LEVELS   64
   61 
   62 struct cf_saved_freq {
   63         struct cf_level                 level;
   64         int                             priority;
   65         SLIST_ENTRY(cf_saved_freq)      link;
   66 };
   67 
   68 struct cpufreq_softc {
   69         struct sx                       lock;
   70         struct cf_level                 curr_level;
   71         int                             curr_priority;
   72         SLIST_HEAD(, cf_saved_freq)     saved_freq;
   73         struct cf_level_lst             all_levels;
   74         int                             all_count;
   75         int                             max_mhz;
   76         device_t                        dev;
   77         struct sysctl_ctx_list          sysctl_ctx;
   78         struct task                     startup_task;
   79         struct cf_level                 *levels_buf;
   80 };
   81 
   82 struct cf_setting_array {
   83         struct cf_setting               sets[MAX_SETTINGS];
   84         int                             count;
   85         TAILQ_ENTRY(cf_setting_array)   link;
   86 };
   87 
   88 TAILQ_HEAD(cf_setting_lst, cf_setting_array);
   89 
   90 #define CF_MTX_INIT(x)          sx_init((x), "cpufreq lock")
   91 #define CF_MTX_LOCK(x)          sx_xlock((x))
   92 #define CF_MTX_UNLOCK(x)        sx_xunlock((x))
   93 #define CF_MTX_ASSERT(x)        sx_assert((x), SX_XLOCKED)
   94 
   95 #define CF_DEBUG(msg...)        do {            \
   96         if (cf_verbose)                         \
   97                 printf("cpufreq: " msg);        \
   98         } while (0)
   99 
  100 static int      cpufreq_attach(device_t dev);
  101 static void     cpufreq_startup_task(void *ctx, int pending);
  102 static int      cpufreq_detach(device_t dev);
  103 static int      cf_set_method(device_t dev, const struct cf_level *level,
  104                     int priority);
  105 static int      cf_get_method(device_t dev, struct cf_level *level);
  106 static int      cf_levels_method(device_t dev, struct cf_level *levels,
  107                     int *count);
  108 static int      cpufreq_insert_abs(struct cpufreq_softc *sc,
  109                     struct cf_setting *sets, int count);
  110 static int      cpufreq_expand_set(struct cpufreq_softc *sc,
  111                     struct cf_setting_array *set_arr);
  112 static struct cf_level *cpufreq_dup_set(struct cpufreq_softc *sc,
  113                     struct cf_level *dup, struct cf_setting *set);
  114 static int      cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS);
  115 static int      cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS);
  116 static int      cpufreq_settings_sysctl(SYSCTL_HANDLER_ARGS);
  117 
  118 static device_method_t cpufreq_methods[] = {
  119         DEVMETHOD(device_probe,         bus_generic_probe),
  120         DEVMETHOD(device_attach,        cpufreq_attach),
  121         DEVMETHOD(device_detach,        cpufreq_detach),
  122 
  123         DEVMETHOD(cpufreq_set,          cf_set_method),
  124         DEVMETHOD(cpufreq_get,          cf_get_method),
  125         DEVMETHOD(cpufreq_levels,       cf_levels_method),
  126         {0, 0}
  127 };
  128 static driver_t cpufreq_driver = {
  129         "cpufreq", cpufreq_methods, sizeof(struct cpufreq_softc)
  130 };
  131 static devclass_t cpufreq_dc;
  132 DRIVER_MODULE(cpufreq, cpu, cpufreq_driver, cpufreq_dc, 0, 0);
  133 
  134 static int              cf_lowest_freq;
  135 static int              cf_verbose;
  136 static SYSCTL_NODE(_debug, OID_AUTO, cpufreq, CTLFLAG_RD, NULL,
  137     "cpufreq debugging");
  138 SYSCTL_INT(_debug_cpufreq, OID_AUTO, lowest, CTLFLAG_RWTUN, &cf_lowest_freq, 1,
  139     "Don't provide levels below this frequency.");
  140 SYSCTL_INT(_debug_cpufreq, OID_AUTO, verbose, CTLFLAG_RWTUN, &cf_verbose, 1,
  141     "Print verbose debugging messages");
  142 
  143 static int
  144 cpufreq_attach(device_t dev)
  145 {
  146         struct cpufreq_softc *sc;
  147         struct pcpu *pc;
  148         device_t parent;
  149         uint64_t rate;
  150         int numdevs;
  151 
  152         CF_DEBUG("initializing %s\n", device_get_nameunit(dev));
  153         sc = device_get_softc(dev);
  154         parent = device_get_parent(dev);
  155         sc->dev = dev;
  156         sysctl_ctx_init(&sc->sysctl_ctx);
  157         TAILQ_INIT(&sc->all_levels);
  158         CF_MTX_INIT(&sc->lock);
  159         sc->curr_level.total_set.freq = CPUFREQ_VAL_UNKNOWN;
  160         SLIST_INIT(&sc->saved_freq);
  161         /* Try to get nominal CPU freq to use it as maximum later if needed */
  162         sc->max_mhz = cpu_get_nominal_mhz(dev);
  163         /* If that fails, try to measure the current rate */
  164         if (sc->max_mhz <= 0) {
  165                 pc = cpu_get_pcpu(dev);
  166                 if (cpu_est_clockrate(pc->pc_cpuid, &rate) == 0)
  167                         sc->max_mhz = rate / 1000000;
  168                 else
  169                         sc->max_mhz = CPUFREQ_VAL_UNKNOWN;
  170         }
  171 
  172         /*
  173          * Only initialize one set of sysctls for all CPUs.  In the future,
  174          * if multiple CPUs can have different settings, we can move these
  175          * sysctls to be under every CPU instead of just the first one.
  176          */
  177         numdevs = devclass_get_count(cpufreq_dc);
  178         if (numdevs > 1)
  179                 return (0);
  180 
  181         CF_DEBUG("initializing one-time data for %s\n",
  182             device_get_nameunit(dev));
  183         sc->levels_buf = malloc(CF_MAX_LEVELS * sizeof(*sc->levels_buf),
  184             M_DEVBUF, M_WAITOK);
  185         SYSCTL_ADD_PROC(&sc->sysctl_ctx,
  186             SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
  187             OID_AUTO, "freq", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
  188             cpufreq_curr_sysctl, "I", "Current CPU frequency");
  189         SYSCTL_ADD_PROC(&sc->sysctl_ctx,
  190             SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
  191             OID_AUTO, "freq_levels", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
  192             cpufreq_levels_sysctl, "A", "CPU frequency levels");
  193 
  194         /*
  195          * Queue a one-shot broadcast that levels have changed.
  196          * It will run once the system has completed booting.
  197          */
  198         TASK_INIT(&sc->startup_task, 0, cpufreq_startup_task, dev);
  199         taskqueue_enqueue(taskqueue_thread, &sc->startup_task);
  200 
  201         return (0);
  202 }
  203 
  204 /* Handle any work to be done for all drivers that attached during boot. */
  205 static void 
  206 cpufreq_startup_task(void *ctx, int pending)
  207 {
  208 
  209         cpufreq_settings_changed((device_t)ctx);
  210 }
  211 
  212 static int
  213 cpufreq_detach(device_t dev)
  214 {
  215         struct cpufreq_softc *sc;
  216         struct cf_saved_freq *saved_freq;
  217         int numdevs;
  218 
  219         CF_DEBUG("shutdown %s\n", device_get_nameunit(dev));
  220         sc = device_get_softc(dev);
  221         sysctl_ctx_free(&sc->sysctl_ctx);
  222 
  223         while ((saved_freq = SLIST_FIRST(&sc->saved_freq)) != NULL) {
  224                 SLIST_REMOVE_HEAD(&sc->saved_freq, link);
  225                 free(saved_freq, M_TEMP);
  226         }
  227 
  228         /* Only clean up these resources when the last device is detaching. */
  229         numdevs = devclass_get_count(cpufreq_dc);
  230         if (numdevs == 1) {
  231                 CF_DEBUG("final shutdown for %s\n", device_get_nameunit(dev));
  232                 free(sc->levels_buf, M_DEVBUF);
  233         }
  234 
  235         return (0);
  236 }
  237 
  238 static int
  239 cf_set_method(device_t dev, const struct cf_level *level, int priority)
  240 {
  241         struct cpufreq_softc *sc;
  242         const struct cf_setting *set;
  243         struct cf_saved_freq *saved_freq, *curr_freq;
  244         struct pcpu *pc;
  245         int error, i;
  246 
  247         sc = device_get_softc(dev);
  248         error = 0;
  249         set = NULL;
  250         saved_freq = NULL;
  251 
  252         /* We are going to change levels so notify the pre-change handler. */
  253         EVENTHANDLER_INVOKE(cpufreq_pre_change, level, &error);
  254         if (error != 0) {
  255                 EVENTHANDLER_INVOKE(cpufreq_post_change, level, error);
  256                 return (error);
  257         }
  258 
  259         CF_MTX_LOCK(&sc->lock);
  260 
  261 #ifdef SMP
  262 #ifdef EARLY_AP_STARTUP
  263         MPASS(mp_ncpus == 1 || smp_started);
  264 #else
  265         /*
  266          * If still booting and secondary CPUs not started yet, don't allow
  267          * changing the frequency until they're online.  This is because we
  268          * can't switch to them using sched_bind() and thus we'd only be
  269          * switching the main CPU.  XXXTODO: Need to think more about how to
  270          * handle having different CPUs at different frequencies.  
  271          */
  272         if (mp_ncpus > 1 && !smp_started) {
  273                 device_printf(dev, "rejecting change, SMP not started yet\n");
  274                 error = ENXIO;
  275                 goto out;
  276         }
  277 #endif
  278 #endif /* SMP */
  279 
  280         /*
  281          * If the requested level has a lower priority, don't allow
  282          * the new level right now.
  283          */
  284         if (priority < sc->curr_priority) {
  285                 CF_DEBUG("ignoring, curr prio %d less than %d\n", priority,
  286                     sc->curr_priority);
  287                 error = EPERM;
  288                 goto out;
  289         }
  290 
  291         /*
  292          * If the caller didn't specify a level and one is saved, prepare to
  293          * restore the saved level.  If none has been saved, return an error.
  294          */
  295         if (level == NULL) {
  296                 saved_freq = SLIST_FIRST(&sc->saved_freq);
  297                 if (saved_freq == NULL) {
  298                         CF_DEBUG("NULL level, no saved level\n");
  299                         error = ENXIO;
  300                         goto out;
  301                 }
  302                 level = &saved_freq->level;
  303                 priority = saved_freq->priority;
  304                 CF_DEBUG("restoring saved level, freq %d prio %d\n",
  305                     level->total_set.freq, priority);
  306         }
  307 
  308         /* Reject levels that are below our specified threshold. */
  309         if (level->total_set.freq < cf_lowest_freq) {
  310                 CF_DEBUG("rejecting freq %d, less than %d limit\n",
  311                     level->total_set.freq, cf_lowest_freq);
  312                 error = EINVAL;
  313                 goto out;
  314         }
  315 
  316         /* If already at this level, just return. */
  317         if (sc->curr_level.total_set.freq == level->total_set.freq) {
  318                 CF_DEBUG("skipping freq %d, same as current level %d\n",
  319                     level->total_set.freq, sc->curr_level.total_set.freq);
  320                 goto skip;
  321         }
  322 
  323         /* First, set the absolute frequency via its driver. */
  324         set = &level->abs_set;
  325         if (set->dev) {
  326                 if (!device_is_attached(set->dev)) {
  327                         error = ENXIO;
  328                         goto out;
  329                 }
  330 
  331                 /* Bind to the target CPU before switching. */
  332                 pc = cpu_get_pcpu(set->dev);
  333                 thread_lock(curthread);
  334                 sched_bind(curthread, pc->pc_cpuid);
  335                 thread_unlock(curthread);
  336                 CF_DEBUG("setting abs freq %d on %s (cpu %d)\n", set->freq,
  337                     device_get_nameunit(set->dev), PCPU_GET(cpuid));
  338                 error = CPUFREQ_DRV_SET(set->dev, set);
  339                 thread_lock(curthread);
  340                 sched_unbind(curthread);
  341                 thread_unlock(curthread);
  342                 if (error) {
  343                         goto out;
  344                 }
  345         }
  346 
  347         /* Next, set any/all relative frequencies via their drivers. */
  348         for (i = 0; i < level->rel_count; i++) {
  349                 set = &level->rel_set[i];
  350                 if (!device_is_attached(set->dev)) {
  351                         error = ENXIO;
  352                         goto out;
  353                 }
  354 
  355                 /* Bind to the target CPU before switching. */
  356                 pc = cpu_get_pcpu(set->dev);
  357                 thread_lock(curthread);
  358                 sched_bind(curthread, pc->pc_cpuid);
  359                 thread_unlock(curthread);
  360                 CF_DEBUG("setting rel freq %d on %s (cpu %d)\n", set->freq,
  361                     device_get_nameunit(set->dev), PCPU_GET(cpuid));
  362                 error = CPUFREQ_DRV_SET(set->dev, set);
  363                 thread_lock(curthread);
  364                 sched_unbind(curthread);
  365                 thread_unlock(curthread);
  366                 if (error) {
  367                         /* XXX Back out any successful setting? */
  368                         goto out;
  369                 }
  370         }
  371 
  372 skip:
  373         /*
  374          * Before recording the current level, check if we're going to a
  375          * higher priority.  If so, save the previous level and priority.
  376          */
  377         if (sc->curr_level.total_set.freq != CPUFREQ_VAL_UNKNOWN &&
  378             priority > sc->curr_priority) {
  379                 CF_DEBUG("saving level, freq %d prio %d\n",
  380                     sc->curr_level.total_set.freq, sc->curr_priority);
  381                 curr_freq = malloc(sizeof(*curr_freq), M_TEMP, M_NOWAIT);
  382                 if (curr_freq == NULL) {
  383                         error = ENOMEM;
  384                         goto out;
  385                 }
  386                 curr_freq->level = sc->curr_level;
  387                 curr_freq->priority = sc->curr_priority;
  388                 SLIST_INSERT_HEAD(&sc->saved_freq, curr_freq, link);
  389         }
  390         sc->curr_level = *level;
  391         sc->curr_priority = priority;
  392 
  393         /* If we were restoring a saved state, reset it to "unused". */
  394         if (saved_freq != NULL) {
  395                 CF_DEBUG("resetting saved level\n");
  396                 sc->curr_level.total_set.freq = CPUFREQ_VAL_UNKNOWN;
  397                 SLIST_REMOVE_HEAD(&sc->saved_freq, link);
  398                 free(saved_freq, M_TEMP);
  399         }
  400 
  401 out:
  402         CF_MTX_UNLOCK(&sc->lock);
  403 
  404         /*
  405          * We changed levels (or attempted to) so notify the post-change
  406          * handler of new frequency or error.
  407          */
  408         EVENTHANDLER_INVOKE(cpufreq_post_change, level, error);
  409         if (error && set)
  410                 device_printf(set->dev, "set freq failed, err %d\n", error);
  411 
  412         return (error);
  413 }
  414 
  415 static int
  416 cf_get_method(device_t dev, struct cf_level *level)
  417 {
  418         struct cpufreq_softc *sc;
  419         struct cf_level *levels;
  420         struct cf_setting *curr_set, set;
  421         struct pcpu *pc;
  422         device_t *devs;
  423         int bdiff, count, diff, error, i, n, numdevs;
  424         uint64_t rate;
  425 
  426         sc = device_get_softc(dev);
  427         error = 0;
  428         levels = NULL;
  429 
  430         /* If we already know the current frequency, we're done. */
  431         CF_MTX_LOCK(&sc->lock);
  432         curr_set = &sc->curr_level.total_set;
  433         if (curr_set->freq != CPUFREQ_VAL_UNKNOWN) {
  434                 CF_DEBUG("get returning known freq %d\n", curr_set->freq);
  435                 goto out;
  436         }
  437         CF_MTX_UNLOCK(&sc->lock);
  438 
  439         /*
  440          * We need to figure out the current level.  Loop through every
  441          * driver, getting the current setting.  Then, attempt to get a best
  442          * match of settings against each level.
  443          */
  444         count = CF_MAX_LEVELS;
  445         levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
  446         if (levels == NULL)
  447                 return (ENOMEM);
  448         error = CPUFREQ_LEVELS(sc->dev, levels, &count);
  449         if (error) {
  450                 if (error == E2BIG)
  451                         printf("cpufreq: need to increase CF_MAX_LEVELS\n");
  452                 free(levels, M_TEMP);
  453                 return (error);
  454         }
  455         error = device_get_children(device_get_parent(dev), &devs, &numdevs);
  456         if (error) {
  457                 free(levels, M_TEMP);
  458                 return (error);
  459         }
  460 
  461         /*
  462          * Reacquire the lock and search for the given level.
  463          *
  464          * XXX Note: this is not quite right since we really need to go
  465          * through each level and compare both absolute and relative
  466          * settings for each driver in the system before making a match.
  467          * The estimation code below catches this case though.
  468          */
  469         CF_MTX_LOCK(&sc->lock);
  470         for (n = 0; n < numdevs && curr_set->freq == CPUFREQ_VAL_UNKNOWN; n++) {
  471                 if (!device_is_attached(devs[n]))
  472                         continue;
  473                 if (CPUFREQ_DRV_GET(devs[n], &set) != 0)
  474                         continue;
  475                 for (i = 0; i < count; i++) {
  476                         if (set.freq == levels[i].total_set.freq) {
  477                                 sc->curr_level = levels[i];
  478                                 break;
  479                         }
  480                 }
  481         }
  482         free(devs, M_TEMP);
  483         if (curr_set->freq != CPUFREQ_VAL_UNKNOWN) {
  484                 CF_DEBUG("get matched freq %d from drivers\n", curr_set->freq);
  485                 goto out;
  486         }
  487 
  488         /*
  489          * We couldn't find an exact match, so attempt to estimate and then
  490          * match against a level.
  491          */
  492         pc = cpu_get_pcpu(dev);
  493         if (pc == NULL) {
  494                 error = ENXIO;
  495                 goto out;
  496         }
  497         cpu_est_clockrate(pc->pc_cpuid, &rate);
  498         rate /= 1000000;
  499         bdiff = 1 << 30;
  500         for (i = 0; i < count; i++) {
  501                 diff = abs(levels[i].total_set.freq - rate);
  502                 if (diff < bdiff) {
  503                         bdiff = diff;
  504                         sc->curr_level = levels[i];
  505                 }
  506         }
  507         CF_DEBUG("get estimated freq %d\n", curr_set->freq);
  508 
  509 out:
  510         if (error == 0)
  511                 *level = sc->curr_level;
  512 
  513         CF_MTX_UNLOCK(&sc->lock);
  514         if (levels)
  515                 free(levels, M_TEMP);
  516         return (error);
  517 }
  518 
  519 static int
  520 cf_levels_method(device_t dev, struct cf_level *levels, int *count)
  521 {
  522         struct cf_setting_array *set_arr;
  523         struct cf_setting_lst rel_sets;
  524         struct cpufreq_softc *sc;
  525         struct cf_level *lev;
  526         struct cf_setting *sets;
  527         struct pcpu *pc;
  528         device_t *devs;
  529         int error, i, numdevs, set_count, type;
  530         uint64_t rate;
  531 
  532         if (levels == NULL || count == NULL)
  533                 return (EINVAL);
  534 
  535         TAILQ_INIT(&rel_sets);
  536         sc = device_get_softc(dev);
  537         error = device_get_children(device_get_parent(dev), &devs, &numdevs);
  538         if (error)
  539                 return (error);
  540         sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT);
  541         if (sets == NULL) {
  542                 free(devs, M_TEMP);
  543                 return (ENOMEM);
  544         }
  545 
  546         /* Get settings from all cpufreq drivers. */
  547         CF_MTX_LOCK(&sc->lock);
  548         for (i = 0; i < numdevs; i++) {
  549                 /* Skip devices that aren't ready. */
  550                 if (!device_is_attached(devs[i]))
  551                         continue;
  552 
  553                 /*
  554                  * Get settings, skipping drivers that offer no settings or
  555                  * provide settings for informational purposes only.
  556                  */
  557                 error = CPUFREQ_DRV_TYPE(devs[i], &type);
  558                 if (error || (type & CPUFREQ_FLAG_INFO_ONLY)) {
  559                         if (error == 0) {
  560                                 CF_DEBUG("skipping info-only driver %s\n",
  561                                     device_get_nameunit(devs[i]));
  562                         }
  563                         continue;
  564                 }
  565                 set_count = MAX_SETTINGS;
  566                 error = CPUFREQ_DRV_SETTINGS(devs[i], sets, &set_count);
  567                 if (error || set_count == 0)
  568                         continue;
  569 
  570                 /* Add the settings to our absolute/relative lists. */
  571                 switch (type & CPUFREQ_TYPE_MASK) {
  572                 case CPUFREQ_TYPE_ABSOLUTE:
  573                         error = cpufreq_insert_abs(sc, sets, set_count);
  574                         break;
  575                 case CPUFREQ_TYPE_RELATIVE:
  576                         CF_DEBUG("adding %d relative settings\n", set_count);
  577                         set_arr = malloc(sizeof(*set_arr), M_TEMP, M_NOWAIT);
  578                         if (set_arr == NULL) {
  579                                 error = ENOMEM;
  580                                 goto out;
  581                         }
  582                         bcopy(sets, set_arr->sets, set_count * sizeof(*sets));
  583                         set_arr->count = set_count;
  584                         TAILQ_INSERT_TAIL(&rel_sets, set_arr, link);
  585                         break;
  586                 default:
  587                         error = EINVAL;
  588                 }
  589                 if (error)
  590                         goto out;
  591         }
  592 
  593         /*
  594          * If there are no absolute levels, create a fake one at 100%.  We
  595          * then cache the clockrate for later use as our base frequency.
  596          */
  597         if (TAILQ_EMPTY(&sc->all_levels)) {
  598                 if (sc->max_mhz == CPUFREQ_VAL_UNKNOWN) {
  599                         sc->max_mhz = cpu_get_nominal_mhz(dev);
  600                         /*
  601                          * If the CPU can't report a rate for 100%, hope
  602                          * the CPU is running at its nominal rate right now,
  603                          * and use that instead.
  604                          */
  605                         if (sc->max_mhz <= 0) {
  606                                 pc = cpu_get_pcpu(dev);
  607                                 cpu_est_clockrate(pc->pc_cpuid, &rate);
  608                                 sc->max_mhz = rate / 1000000;
  609                         }
  610                 }
  611                 memset(&sets[0], CPUFREQ_VAL_UNKNOWN, sizeof(*sets));
  612                 sets[0].freq = sc->max_mhz;
  613                 sets[0].dev = NULL;
  614                 error = cpufreq_insert_abs(sc, sets, 1);
  615                 if (error)
  616                         goto out;
  617         }
  618 
  619         /* Create a combined list of absolute + relative levels. */
  620         TAILQ_FOREACH(set_arr, &rel_sets, link)
  621                 cpufreq_expand_set(sc, set_arr);
  622 
  623         /* If the caller doesn't have enough space, return the actual count. */
  624         if (sc->all_count > *count) {
  625                 *count = sc->all_count;
  626                 error = E2BIG;
  627                 goto out;
  628         }
  629 
  630         /* Finally, output the list of levels. */
  631         i = 0;
  632         TAILQ_FOREACH(lev, &sc->all_levels, link) {
  633 
  634                 /* Skip levels that have a frequency that is too low. */
  635                 if (lev->total_set.freq < cf_lowest_freq) {
  636                         sc->all_count--;
  637                         continue;
  638                 }
  639 
  640                 levels[i] = *lev;
  641                 i++;
  642         }
  643         *count = sc->all_count;
  644         error = 0;
  645 
  646 out:
  647         /* Clear all levels since we regenerate them each time. */
  648         while ((lev = TAILQ_FIRST(&sc->all_levels)) != NULL) {
  649                 TAILQ_REMOVE(&sc->all_levels, lev, link);
  650                 free(lev, M_TEMP);
  651         }
  652         sc->all_count = 0;
  653 
  654         CF_MTX_UNLOCK(&sc->lock);
  655         while ((set_arr = TAILQ_FIRST(&rel_sets)) != NULL) {
  656                 TAILQ_REMOVE(&rel_sets, set_arr, link);
  657                 free(set_arr, M_TEMP);
  658         }
  659         free(devs, M_TEMP);
  660         free(sets, M_TEMP);
  661         return (error);
  662 }
  663 
  664 /*
  665  * Create levels for an array of absolute settings and insert them in
  666  * sorted order in the specified list.
  667  */
  668 static int
  669 cpufreq_insert_abs(struct cpufreq_softc *sc, struct cf_setting *sets,
  670     int count)
  671 {
  672         struct cf_level_lst *list;
  673         struct cf_level *level, *search;
  674         int i;
  675 
  676         CF_MTX_ASSERT(&sc->lock);
  677 
  678         list = &sc->all_levels;
  679         for (i = 0; i < count; i++) {
  680                 level = malloc(sizeof(*level), M_TEMP, M_NOWAIT | M_ZERO);
  681                 if (level == NULL)
  682                         return (ENOMEM);
  683                 level->abs_set = sets[i];
  684                 level->total_set = sets[i];
  685                 level->total_set.dev = NULL;
  686                 sc->all_count++;
  687 
  688                 if (TAILQ_EMPTY(list)) {
  689                         CF_DEBUG("adding abs setting %d at head\n",
  690                             sets[i].freq);
  691                         TAILQ_INSERT_HEAD(list, level, link);
  692                         continue;
  693                 }
  694 
  695                 TAILQ_FOREACH_REVERSE(search, list, cf_level_lst, link) {
  696                         if (sets[i].freq <= search->total_set.freq) {
  697                                 CF_DEBUG("adding abs setting %d after %d\n",
  698                                     sets[i].freq, search->total_set.freq);
  699                                 TAILQ_INSERT_AFTER(list, search, level, link);
  700                                 break;
  701                         }
  702                 }
  703         }
  704         return (0);
  705 }
  706 
  707 /*
  708  * Expand a group of relative settings, creating derived levels from them.
  709  */
  710 static int
  711 cpufreq_expand_set(struct cpufreq_softc *sc, struct cf_setting_array *set_arr)
  712 {
  713         struct cf_level *fill, *search;
  714         struct cf_setting *set;
  715         int i;
  716 
  717         CF_MTX_ASSERT(&sc->lock);
  718 
  719         /*
  720          * Walk the set of all existing levels in reverse.  This is so we
  721          * create derived states from the lowest absolute settings first
  722          * and discard duplicates created from higher absolute settings.
  723          * For instance, a level of 50 Mhz derived from 100 Mhz + 50% is
  724          * preferable to 200 Mhz + 25% because absolute settings are more
  725          * efficient since they often change the voltage as well.
  726          */
  727         TAILQ_FOREACH_REVERSE(search, &sc->all_levels, cf_level_lst, link) {
  728                 /* Add each setting to the level, duplicating if necessary. */
  729                 for (i = 0; i < set_arr->count; i++) {
  730                         set = &set_arr->sets[i];
  731 
  732                         /*
  733                          * If this setting is less than 100%, split the level
  734                          * into two and add this setting to the new level.
  735                          */
  736                         fill = search;
  737                         if (set->freq < 10000) {
  738                                 fill = cpufreq_dup_set(sc, search, set);
  739 
  740                                 /*
  741                                  * The new level was a duplicate of an existing
  742                                  * level or its absolute setting is too high
  743                                  * so we freed it.  For example, we discard a
  744                                  * derived level of 1000 MHz/25% if a level
  745                                  * of 500 MHz/100% already exists.
  746                                  */
  747                                 if (fill == NULL)
  748                                         break;
  749                         }
  750 
  751                         /* Add this setting to the existing or new level. */
  752                         KASSERT(fill->rel_count < MAX_SETTINGS,
  753                             ("cpufreq: too many relative drivers (%d)",
  754                             MAX_SETTINGS));
  755                         fill->rel_set[fill->rel_count] = *set;
  756                         fill->rel_count++;
  757                         CF_DEBUG(
  758                         "expand set added rel setting %d%% to %d level\n",
  759                             set->freq / 100, fill->total_set.freq);
  760                 }
  761         }
  762 
  763         return (0);
  764 }
  765 
  766 static struct cf_level *
  767 cpufreq_dup_set(struct cpufreq_softc *sc, struct cf_level *dup,
  768     struct cf_setting *set)
  769 {
  770         struct cf_level_lst *list;
  771         struct cf_level *fill, *itr;
  772         struct cf_setting *fill_set, *itr_set;
  773         int i;
  774 
  775         CF_MTX_ASSERT(&sc->lock);
  776 
  777         /*
  778          * Create a new level, copy it from the old one, and update the
  779          * total frequency and power by the percentage specified in the
  780          * relative setting.
  781          */
  782         fill = malloc(sizeof(*fill), M_TEMP, M_NOWAIT);
  783         if (fill == NULL)
  784                 return (NULL);
  785         *fill = *dup;
  786         fill_set = &fill->total_set;
  787         fill_set->freq =
  788             ((uint64_t)fill_set->freq * set->freq) / 10000;
  789         if (fill_set->power != CPUFREQ_VAL_UNKNOWN) {
  790                 fill_set->power = ((uint64_t)fill_set->power * set->freq)
  791                     / 10000;
  792         }
  793         if (set->lat != CPUFREQ_VAL_UNKNOWN) {
  794                 if (fill_set->lat != CPUFREQ_VAL_UNKNOWN)
  795                         fill_set->lat += set->lat;
  796                 else
  797                         fill_set->lat = set->lat;
  798         }
  799         CF_DEBUG("dup set considering derived setting %d\n", fill_set->freq);
  800 
  801         /*
  802          * If we copied an old level that we already modified (say, at 100%),
  803          * we need to remove that setting before adding this one.  Since we
  804          * process each setting array in order, we know any settings for this
  805          * driver will be found at the end.
  806          */
  807         for (i = fill->rel_count; i != 0; i--) {
  808                 if (fill->rel_set[i - 1].dev != set->dev)
  809                         break;
  810                 CF_DEBUG("removed last relative driver: %s\n",
  811                     device_get_nameunit(set->dev));
  812                 fill->rel_count--;
  813         }
  814 
  815         /*
  816          * Insert the new level in sorted order.  If it is a duplicate of an
  817          * existing level (1) or has an absolute setting higher than the
  818          * existing level (2), do not add it.  We can do this since any such
  819          * level is guaranteed use less power.  For example (1), a level with
  820          * one absolute setting of 800 Mhz uses less power than one composed
  821          * of an absolute setting of 1600 Mhz and a relative setting at 50%.
  822          * Also for example (2), a level of 800 Mhz/75% is preferable to
  823          * 1600 Mhz/25% even though the latter has a lower total frequency.
  824          */
  825         list = &sc->all_levels;
  826         KASSERT(!TAILQ_EMPTY(list), ("all levels list empty in dup set"));
  827         TAILQ_FOREACH_REVERSE(itr, list, cf_level_lst, link) {
  828                 itr_set = &itr->total_set;
  829                 if (CPUFREQ_CMP(fill_set->freq, itr_set->freq)) {
  830                         CF_DEBUG("dup set rejecting %d (dupe)\n",
  831                             fill_set->freq);
  832                         itr = NULL;
  833                         break;
  834                 } else if (fill_set->freq < itr_set->freq) {
  835                         if (fill->abs_set.freq <= itr->abs_set.freq) {
  836                                 CF_DEBUG(
  837                         "dup done, inserting new level %d after %d\n",
  838                                     fill_set->freq, itr_set->freq);
  839                                 TAILQ_INSERT_AFTER(list, itr, fill, link);
  840                                 sc->all_count++;
  841                         } else {
  842                                 CF_DEBUG("dup set rejecting %d (abs too big)\n",
  843                                     fill_set->freq);
  844                                 itr = NULL;
  845                         }
  846                         break;
  847                 }
  848         }
  849 
  850         /* We didn't find a good place for this new level so free it. */
  851         if (itr == NULL) {
  852                 CF_DEBUG("dup set freeing new level %d (not optimal)\n",
  853                     fill_set->freq);
  854                 free(fill, M_TEMP);
  855                 fill = NULL;
  856         }
  857 
  858         return (fill);
  859 }
  860 
  861 static int
  862 cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS)
  863 {
  864         struct cpufreq_softc *sc;
  865         struct cf_level *levels;
  866         int best, count, diff, bdiff, devcount, error, freq, i, n;
  867         device_t *devs;
  868 
  869         devs = NULL;
  870         sc = oidp->oid_arg1;
  871         levels = sc->levels_buf;
  872 
  873         error = CPUFREQ_GET(sc->dev, &levels[0]);
  874         if (error)
  875                 goto out;
  876         freq = levels[0].total_set.freq;
  877         error = sysctl_handle_int(oidp, &freq, 0, req);
  878         if (error != 0 || req->newptr == NULL)
  879                 goto out;
  880 
  881         /*
  882          * While we only call cpufreq_get() on one device (assuming all
  883          * CPUs have equal levels), we call cpufreq_set() on all CPUs.
  884          * This is needed for some MP systems.
  885          */
  886         error = devclass_get_devices(cpufreq_dc, &devs, &devcount);
  887         if (error)
  888                 goto out;
  889         for (n = 0; n < devcount; n++) {
  890                 count = CF_MAX_LEVELS;
  891                 error = CPUFREQ_LEVELS(devs[n], levels, &count);
  892                 if (error) {
  893                         if (error == E2BIG)
  894                                 printf(
  895                         "cpufreq: need to increase CF_MAX_LEVELS\n");
  896                         break;
  897                 }
  898                 best = 0;
  899                 bdiff = 1 << 30;
  900                 for (i = 0; i < count; i++) {
  901                         diff = abs(levels[i].total_set.freq - freq);
  902                         if (diff < bdiff) {
  903                                 bdiff = diff;
  904                                 best = i;
  905                         }
  906                 }
  907                 error = CPUFREQ_SET(devs[n], &levels[best], CPUFREQ_PRIO_USER);
  908         }
  909 
  910 out:
  911         if (devs)
  912                 free(devs, M_TEMP);
  913         return (error);
  914 }
  915 
  916 static int
  917 cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS)
  918 {
  919         struct cpufreq_softc *sc;
  920         struct cf_level *levels;
  921         struct cf_setting *set;
  922         struct sbuf sb;
  923         int count, error, i;
  924 
  925         sc = oidp->oid_arg1;
  926         sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND);
  927 
  928         /* Get settings from the device and generate the output string. */
  929         count = CF_MAX_LEVELS;
  930         levels = sc->levels_buf;
  931         if (levels == NULL) {
  932                 sbuf_delete(&sb);
  933                 return (ENOMEM);
  934         }
  935         error = CPUFREQ_LEVELS(sc->dev, levels, &count);
  936         if (error) {
  937                 if (error == E2BIG)
  938                         printf("cpufreq: need to increase CF_MAX_LEVELS\n");
  939                 goto out;
  940         }
  941         if (count) {
  942                 for (i = 0; i < count; i++) {
  943                         set = &levels[i].total_set;
  944                         sbuf_printf(&sb, "%d/%d ", set->freq, set->power);
  945                 }
  946         } else
  947                 sbuf_cpy(&sb, "");
  948         sbuf_trim(&sb);
  949         sbuf_finish(&sb);
  950         error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
  951 
  952 out:
  953         sbuf_delete(&sb);
  954         return (error);
  955 }
  956 
  957 static int
  958 cpufreq_settings_sysctl(SYSCTL_HANDLER_ARGS)
  959 {
  960         device_t dev;
  961         struct cf_setting *sets;
  962         struct sbuf sb;
  963         int error, i, set_count;
  964 
  965         dev = oidp->oid_arg1;
  966         sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND);
  967 
  968         /* Get settings from the device and generate the output string. */
  969         set_count = MAX_SETTINGS;
  970         sets = malloc(set_count * sizeof(*sets), M_TEMP, M_NOWAIT);
  971         if (sets == NULL) {
  972                 sbuf_delete(&sb);
  973                 return (ENOMEM);
  974         }
  975         error = CPUFREQ_DRV_SETTINGS(dev, sets, &set_count);
  976         if (error)
  977                 goto out;
  978         if (set_count) {
  979                 for (i = 0; i < set_count; i++)
  980                         sbuf_printf(&sb, "%d/%d ", sets[i].freq, sets[i].power);
  981         } else
  982                 sbuf_cpy(&sb, "");
  983         sbuf_trim(&sb);
  984         sbuf_finish(&sb);
  985         error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
  986 
  987 out:
  988         free(sets, M_TEMP);
  989         sbuf_delete(&sb);
  990         return (error);
  991 }
  992 
  993 int
  994 cpufreq_register(device_t dev)
  995 {
  996         struct cpufreq_softc *sc;
  997         device_t cf_dev, cpu_dev;
  998 
  999         /* Add a sysctl to get each driver's settings separately. */
 1000         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 1001             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 1002             OID_AUTO, "freq_settings", CTLTYPE_STRING | CTLFLAG_RD, dev, 0,
 1003             cpufreq_settings_sysctl, "A", "CPU frequency driver settings");
 1004 
 1005         /*
 1006          * Add only one cpufreq device to each CPU.  Currently, all CPUs
 1007          * must offer the same levels and be switched at the same time.
 1008          */
 1009         cpu_dev = device_get_parent(dev);
 1010         if ((cf_dev = device_find_child(cpu_dev, "cpufreq", -1))) {
 1011                 sc = device_get_softc(cf_dev);
 1012                 sc->max_mhz = CPUFREQ_VAL_UNKNOWN;
 1013                 return (0);
 1014         }
 1015 
 1016         /* Add the child device and possibly sysctls. */
 1017         cf_dev = BUS_ADD_CHILD(cpu_dev, 0, "cpufreq", -1);
 1018         if (cf_dev == NULL)
 1019                 return (ENOMEM);
 1020         device_quiet(cf_dev);
 1021 
 1022         return (device_probe_and_attach(cf_dev));
 1023 }
 1024 
 1025 int
 1026 cpufreq_unregister(device_t dev)
 1027 {
 1028         device_t cf_dev, *devs;
 1029         int cfcount, devcount, error, i, type;
 1030 
 1031         /*
 1032          * If this is the last cpufreq child device, remove the control
 1033          * device as well.  We identify cpufreq children by calling a method
 1034          * they support.
 1035          */
 1036         error = device_get_children(device_get_parent(dev), &devs, &devcount);
 1037         if (error)
 1038                 return (error);
 1039         cf_dev = device_find_child(device_get_parent(dev), "cpufreq", -1);
 1040         if (cf_dev == NULL) {
 1041                 device_printf(dev,
 1042         "warning: cpufreq_unregister called with no cpufreq device active\n");
 1043                 free(devs, M_TEMP);
 1044                 return (0);
 1045         }
 1046         cfcount = 0;
 1047         for (i = 0; i < devcount; i++) {
 1048                 if (!device_is_attached(devs[i]))
 1049                         continue;
 1050                 if (CPUFREQ_DRV_TYPE(devs[i], &type) == 0)
 1051                         cfcount++;
 1052         }
 1053         if (cfcount <= 1)
 1054                 device_delete_child(device_get_parent(cf_dev), cf_dev);
 1055         free(devs, M_TEMP);
 1056 
 1057         return (0);
 1058 }
 1059 
 1060 int
 1061 cpufreq_settings_changed(device_t dev)
 1062 {
 1063 
 1064         EVENTHANDLER_INVOKE(cpufreq_levels_changed,
 1065             device_get_unit(device_get_parent(dev)));
 1066         return (0);
 1067 }

Cache object: 7946aca0e6ce3c16c756dc7c899f0d52


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.