The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/x86/x86/tsc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1998-2003 Poul-Henning Kamp
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD$");
   29 
   30 #include "opt_compat.h"
   31 #include "opt_clock.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/bus.h>
   35 #include <sys/cpu.h>
   36 #include <sys/limits.h>
   37 #include <sys/malloc.h>
   38 #include <sys/systm.h>
   39 #include <sys/sysctl.h>
   40 #include <sys/time.h>
   41 #include <sys/timetc.h>
   42 #include <sys/kernel.h>
   43 #include <sys/power.h>
   44 #include <sys/smp.h>
   45 #include <sys/vdso.h>
   46 #include <machine/clock.h>
   47 #include <machine/cputypes.h>
   48 #include <machine/md_var.h>
   49 #include <machine/specialreg.h>
   50 
   51 #include "cpufreq_if.h"
   52 
   53 uint64_t        tsc_freq;
   54 int             tsc_is_invariant;
   55 int             tsc_perf_stat;
   56 
   57 static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
   58 
   59 SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
   60     &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
   61 TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant);
   62 
   63 #ifdef SMP
   64 static int      smp_tsc;
   65 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
   66     "Indicates whether the TSC is safe to use in SMP mode");
   67 TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
   68 
   69 int     smp_tsc_adjust = 0;
   70 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN,
   71     &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP");
   72 TUNABLE_INT("kern.timecounter.smp_tsc_adjust", &smp_tsc_adjust);
   73 #endif
   74 
   75 static int      tsc_shift = 1;
   76 SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN,
   77     &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency");
   78 TUNABLE_INT("kern.timecounter.tsc_shift", &tsc_shift);
   79 
   80 static int      tsc_disabled;
   81 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
   82     "Disable x86 Time Stamp Counter");
   83 TUNABLE_INT("machdep.disable_tsc", &tsc_disabled);
   84 
   85 static int      tsc_skip_calibration;
   86 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN,
   87     &tsc_skip_calibration, 0, "Disable TSC frequency calibration");
   88 TUNABLE_INT("machdep.disable_tsc_calibration", &tsc_skip_calibration);
   89 
   90 static void tsc_freq_changed(void *arg, const struct cf_level *level,
   91     int status);
   92 static void tsc_freq_changing(void *arg, const struct cf_level *level,
   93     int *status);
   94 static unsigned tsc_get_timecount(struct timecounter *tc);
   95 static inline unsigned tsc_get_timecount_low(struct timecounter *tc);
   96 static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
   97 static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
   98 static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
   99 static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
  100 static void tsc_levels_changed(void *arg, int unit);
  101 
  102 static struct timecounter tsc_timecounter = {
  103         tsc_get_timecount,      /* get_timecount */
  104         0,                      /* no poll_pps */
  105         ~0u,                    /* counter_mask */
  106         0,                      /* frequency */
  107         "TSC",                  /* name */
  108         800,                    /* quality (adjusted in code) */
  109 };
  110 
  111 #define VMW_HVMAGIC             0x564d5868
  112 #define VMW_HVPORT              0x5658
  113 #define VMW_HVCMD_GETVERSION    10
  114 #define VMW_HVCMD_GETHZ         45
  115 
  116 static __inline void
  117 vmware_hvcall(u_int cmd, u_int *p)
  118 {
  119 
  120         __asm __volatile("inl %w3, %0"
  121         : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
  122         : "" (VMW_HVMAGIC), "1" (UINT_MAX), "2" (cmd), "3" (VMW_HVPORT)
  123         : "memory");
  124 }
  125 
  126 static int
  127 tsc_freq_vmware(void)
  128 {
  129         char hv_sig[13];
  130         u_int regs[4];
  131         char *p;
  132         u_int hv_high;
  133         int i;
  134 
  135         /*
  136          * [RFC] CPUID usage for interaction between Hypervisors and Linux.
  137          * http://lkml.org/lkml/2008/10/1/246
  138          *
  139          * KB1009458: Mechanisms to determine if software is running in
  140          * a VMware virtual machine
  141          * http://kb.vmware.com/kb/1009458
  142          */
  143         hv_high = 0;
  144         if ((cpu_feature2 & CPUID2_HV) != 0) {
  145                 do_cpuid(0x40000000, regs);
  146                 hv_high = regs[0];
  147                 for (i = 1, p = hv_sig; i < 4; i++, p += sizeof(regs) / 4)
  148                         memcpy(p, &regs[i], sizeof(regs[i]));
  149                 *p = '\0';
  150                 if (bootverbose) {
  151                         /*
  152                          * HV vendor    ID string
  153                          * ------------+--------------
  154                          * KVM          "KVMKVMKVM"
  155                          * Microsoft    "Microsoft Hv"
  156                          * VMware       "VMwareVMware"
  157                          * Xen          "XenVMMXenVMM"
  158                          */
  159                         printf("Hypervisor: Origin = \"%s\"\n", hv_sig);
  160                 }
  161                 if (strncmp(hv_sig, "VMwareVMware", 12) != 0)
  162                         return (0);
  163         } else {
  164                 p = getenv("smbios.system.serial");
  165                 if (p == NULL)
  166                         return (0);
  167                 if (strncmp(p, "VMware-", 7) != 0 &&
  168                     strncmp(p, "VMW", 3) != 0) {
  169                         freeenv(p);
  170                         return (0);
  171                 }
  172                 freeenv(p);
  173                 vmware_hvcall(VMW_HVCMD_GETVERSION, regs);
  174                 if (regs[1] != VMW_HVMAGIC)
  175                         return (0);
  176         }
  177         if (hv_high >= 0x40000010) {
  178                 do_cpuid(0x40000010, regs);
  179                 tsc_freq = regs[0] * 1000;
  180         } else {
  181                 vmware_hvcall(VMW_HVCMD_GETHZ, regs);
  182                 if (regs[1] != UINT_MAX)
  183                         tsc_freq = regs[0] | ((uint64_t)regs[1] << 32);
  184         }
  185         tsc_is_invariant = 1;
  186         return (1);
  187 }
  188 
  189 static void
  190 tsc_freq_intel(void)
  191 {
  192         char brand[48];
  193         u_int regs[4];
  194         uint64_t freq;
  195         char *p;
  196         u_int i;
  197 
  198         /*
  199          * Intel Processor Identification and the CPUID Instruction
  200          * Application Note 485.
  201          * http://www.intel.com/assets/pdf/appnote/241618.pdf
  202          */
  203         if (cpu_exthigh >= 0x80000004) {
  204                 p = brand;
  205                 for (i = 0x80000002; i < 0x80000005; i++) {
  206                         do_cpuid(i, regs);
  207                         memcpy(p, regs, sizeof(regs));
  208                         p += sizeof(regs);
  209                 }
  210                 p = NULL;
  211                 for (i = 0; i < sizeof(brand) - 1; i++)
  212                         if (brand[i] == 'H' && brand[i + 1] == 'z')
  213                                 p = brand + i;
  214                 if (p != NULL) {
  215                         p -= 5;
  216                         switch (p[4]) {
  217                         case 'M':
  218                                 i = 1;
  219                                 break;
  220                         case 'G':
  221                                 i = 1000;
  222                                 break;
  223                         case 'T':
  224                                 i = 1000000;
  225                                 break;
  226                         default:
  227                                 return;
  228                         }
  229 #define C2D(c)  ((c) - '')
  230                         if (p[1] == '.') {
  231                                 freq = C2D(p[0]) * 1000;
  232                                 freq += C2D(p[2]) * 100;
  233                                 freq += C2D(p[3]) * 10;
  234                                 freq *= i * 1000;
  235                         } else {
  236                                 freq = C2D(p[0]) * 1000;
  237                                 freq += C2D(p[1]) * 100;
  238                                 freq += C2D(p[2]) * 10;
  239                                 freq += C2D(p[3]);
  240                                 freq *= i * 1000000;
  241                         }
  242 #undef C2D
  243                         tsc_freq = freq;
  244                 }
  245         }
  246 }
  247 
  248 static void
  249 probe_tsc_freq(void)
  250 {
  251         u_int regs[4];
  252         uint64_t tsc1, tsc2;
  253 
  254         if (cpu_high >= 6) {
  255                 do_cpuid(6, regs);
  256                 if ((regs[2] & CPUID_PERF_STAT) != 0) {
  257                         /*
  258                          * XXX Some emulators expose host CPUID without actual
  259                          * support for these MSRs.  We must test whether they
  260                          * really work.
  261                          */
  262                         wrmsr(MSR_MPERF, 0);
  263                         wrmsr(MSR_APERF, 0);
  264                         DELAY(10);
  265                         if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
  266                                 tsc_perf_stat = 1;
  267                 }
  268         }
  269 
  270         if (tsc_freq_vmware())
  271                 return;
  272 
  273         switch (cpu_vendor_id) {
  274         case CPU_VENDOR_AMD:
  275                 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
  276                     (vm_guest == VM_GUEST_NO &&
  277                     CPUID_TO_FAMILY(cpu_id) >= 0x10))
  278                         tsc_is_invariant = 1;
  279                 if (cpu_feature & CPUID_SSE2) {
  280                         tsc_timecounter.tc_get_timecount =
  281                             tsc_get_timecount_mfence;
  282                 }
  283                 break;
  284         case CPU_VENDOR_INTEL:
  285                 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
  286                     (vm_guest == VM_GUEST_NO &&
  287                     ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
  288                     CPUID_TO_MODEL(cpu_id) >= 0xe) ||
  289                     (CPUID_TO_FAMILY(cpu_id) == 0xf &&
  290                     CPUID_TO_MODEL(cpu_id) >= 0x3))))
  291                         tsc_is_invariant = 1;
  292                 if (cpu_feature & CPUID_SSE2) {
  293                         tsc_timecounter.tc_get_timecount =
  294                             tsc_get_timecount_lfence;
  295                 }
  296                 break;
  297         case CPU_VENDOR_CENTAUR:
  298                 if (vm_guest == VM_GUEST_NO &&
  299                     CPUID_TO_FAMILY(cpu_id) == 0x6 &&
  300                     CPUID_TO_MODEL(cpu_id) >= 0xf &&
  301                     (rdmsr(0x1203) & 0x100000000ULL) == 0)
  302                         tsc_is_invariant = 1;
  303                 if (cpu_feature & CPUID_SSE2) {
  304                         tsc_timecounter.tc_get_timecount =
  305                             tsc_get_timecount_lfence;
  306                 }
  307                 break;
  308         }
  309 
  310         if (tsc_skip_calibration) {
  311                 if (cpu_vendor_id == CPU_VENDOR_INTEL)
  312                         tsc_freq_intel();
  313                 return;
  314         }
  315 
  316         if (bootverbose)
  317                 printf("Calibrating TSC clock ... ");
  318         tsc1 = rdtsc();
  319         DELAY(1000000);
  320         tsc2 = rdtsc();
  321         tsc_freq = tsc2 - tsc1;
  322         if (bootverbose)
  323                 printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
  324 }
  325 
  326 void
  327 init_TSC(void)
  328 {
  329 
  330         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
  331                 return;
  332 
  333         probe_tsc_freq();
  334 
  335         /*
  336          * Inform CPU accounting about our boot-time clock rate.  This will
  337          * be updated if someone loads a cpufreq driver after boot that
  338          * discovers a new max frequency.
  339          */
  340         if (tsc_freq != 0)
  341                 set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
  342 
  343         if (tsc_is_invariant)
  344                 return;
  345 
  346         /* Register to find out about changes in CPU frequency. */
  347         tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
  348             tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
  349         tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
  350             tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
  351         tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
  352             tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
  353 }
  354 
  355 #ifdef SMP
  356 
  357 /*
  358  * RDTSC is not a serializing instruction, and does not drain
  359  * instruction stream, so we need to drain the stream before executing
  360  * it.  It could be fixed by use of RDTSCP, except the instruction is
  361  * not available everywhere.
  362  *
  363  * Use CPUID for draining in the boot-time SMP constistency test.  The
  364  * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel
  365  * and VIA) when SSE2 is present, and nothing on older machines which
  366  * also do not issue RDTSC prematurely.  There, testing for SSE2 and
  367  * vendor is too cumbersome, and we learn about TSC presence from CPUID.
  368  *
  369  * Do not use do_cpuid(), since we do not need CPUID results, which
  370  * have to be written into memory with do_cpuid().
  371  */
  372 #define TSC_READ(x)                                                     \
  373 static void                                                             \
  374 tsc_read_##x(void *arg)                                                 \
  375 {                                                                       \
  376         uint64_t *tsc = arg;                                            \
  377         u_int cpu = PCPU_GET(cpuid);                                    \
  378                                                                         \
  379         __asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx");     \
  380         tsc[cpu * 3 + x] = rdtsc();                                     \
  381 }
  382 TSC_READ(0)
  383 TSC_READ(1)
  384 TSC_READ(2)
  385 #undef TSC_READ
  386 
  387 #define N       1000
  388 
  389 static void
  390 comp_smp_tsc(void *arg)
  391 {
  392         uint64_t *tsc;
  393         int64_t d1, d2;
  394         u_int cpu = PCPU_GET(cpuid);
  395         u_int i, j, size;
  396 
  397         size = (mp_maxid + 1) * 3;
  398         for (i = 0, tsc = arg; i < N; i++, tsc += size)
  399                 CPU_FOREACH(j) {
  400                         if (j == cpu)
  401                                 continue;
  402                         d1 = tsc[cpu * 3 + 1] - tsc[j * 3];
  403                         d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1];
  404                         if (d1 <= 0 || d2 <= 0) {
  405                                 smp_tsc = 0;
  406                                 return;
  407                         }
  408                 }
  409 }
  410 
  411 static void
  412 adj_smp_tsc(void *arg)
  413 {
  414         uint64_t *tsc;
  415         int64_t d, min, max;
  416         u_int cpu = PCPU_GET(cpuid);
  417         u_int first, i, size;
  418 
  419         first = CPU_FIRST();
  420         if (cpu == first)
  421                 return;
  422         min = INT64_MIN;
  423         max = INT64_MAX;
  424         size = (mp_maxid + 1) * 3;
  425         for (i = 0, tsc = arg; i < N; i++, tsc += size) {
  426                 d = tsc[first * 3] - tsc[cpu * 3 + 1];
  427                 if (d > min)
  428                         min = d;
  429                 d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2];
  430                 if (d > min)
  431                         min = d;
  432                 d = tsc[first * 3 + 1] - tsc[cpu * 3];
  433                 if (d < max)
  434                         max = d;
  435                 d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1];
  436                 if (d < max)
  437                         max = d;
  438         }
  439         if (min > max)
  440                 return;
  441         d = min / 2 + max / 2;
  442         __asm __volatile (
  443                 "movl $0x10, %%ecx\n\t"
  444                 "rdmsr\n\t"
  445                 "addl %%edi, %%eax\n\t"
  446                 "adcl %%esi, %%edx\n\t"
  447                 "wrmsr\n"
  448                 : /* No output */
  449                 : "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32))
  450                 : "ax", "cx", "dx", "cc"
  451         );
  452 }
  453 
  454 static int
  455 test_tsc(void)
  456 {
  457         uint64_t *data, *tsc;
  458         u_int i, size, adj;
  459 
  460         if ((!smp_tsc && !tsc_is_invariant) || vm_guest)
  461                 return (-100);
  462         size = (mp_maxid + 1) * 3;
  463         data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK);
  464         adj = 0;
  465 retry:
  466         for (i = 0, tsc = data; i < N; i++, tsc += size)
  467                 smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc);
  468         smp_tsc = 1;    /* XXX */
  469         smp_rendezvous(smp_no_rendevous_barrier, comp_smp_tsc,
  470             smp_no_rendevous_barrier, data);
  471         if (!smp_tsc && adj < smp_tsc_adjust) {
  472                 adj++;
  473                 smp_rendezvous(smp_no_rendevous_barrier, adj_smp_tsc,
  474                     smp_no_rendevous_barrier, data);
  475                 goto retry;
  476         }
  477         free(data, M_TEMP);
  478         if (bootverbose)
  479                 printf("SMP: %sed TSC synchronization test%s\n",
  480                     smp_tsc ? "pass" : "fail", 
  481                     adj > 0 ? " after adjustment" : "");
  482         if (smp_tsc && tsc_is_invariant) {
  483                 switch (cpu_vendor_id) {
  484                 case CPU_VENDOR_AMD:
  485                         /*
  486                          * Starting with Family 15h processors, TSC clock
  487                          * source is in the north bridge.  Check whether
  488                          * we have a single-socket/multi-core platform.
  489                          * XXX Need more work for complex cases.
  490                          */
  491                         if (CPUID_TO_FAMILY(cpu_id) < 0x15 ||
  492                             (amd_feature2 & AMDID2_CMP) == 0 ||
  493                             smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1)
  494                                 break;
  495                         return (1000);
  496                 case CPU_VENDOR_INTEL:
  497                         /*
  498                          * XXX Assume Intel platforms have synchronized TSCs.
  499                          */
  500                         return (1000);
  501                 }
  502                 return (800);
  503         }
  504         return (-100);
  505 }
  506 
  507 #undef N
  508 
  509 #else
  510 
  511 /*
  512  * The function is not called, it is provided to avoid linking failure
  513  * on uniprocessor kernel.
  514  */
  515 static int
  516 test_tsc(void)
  517 {
  518 
  519         return (0);
  520 }
  521 
  522 #endif /* SMP */
  523 
  524 static void
  525 init_TSC_tc(void)
  526 {
  527         uint64_t max_freq;
  528         int shift;
  529 
  530         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
  531                 return;
  532 
  533         /*
  534          * Limit timecounter frequency to fit in an int and prevent it from
  535          * overflowing too fast.
  536          */
  537         max_freq = UINT_MAX;
  538 
  539         /*
  540          * We can not use the TSC if we support APM.  Precise timekeeping
  541          * on an APM'ed machine is at best a fools pursuit, since 
  542          * any and all of the time spent in various SMM code can't 
  543          * be reliably accounted for.  Reading the RTC is your only
  544          * source of reliable time info.  The i8254 loses too, of course,
  545          * but we need to have some kind of time...
  546          * We don't know at this point whether APM is going to be used
  547          * or not, nor when it might be activated.  Play it safe.
  548          */
  549         if (power_pm_get_type() == POWER_PM_TYPE_APM) {
  550                 tsc_timecounter.tc_quality = -1000;
  551                 if (bootverbose)
  552                         printf("TSC timecounter disabled: APM enabled.\n");
  553                 goto init;
  554         }
  555 
  556         /*
  557          * We cannot use the TSC if it stops incrementing while idle.
  558          * Intel CPUs without a C-state invariant TSC can stop the TSC
  559          * in either C2 or C3.
  560          */
  561         if (cpu_deepest_sleep >= 2 && cpu_vendor_id == CPU_VENDOR_INTEL &&
  562             (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) {
  563                 tsc_timecounter.tc_quality = -1000;
  564                 tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP;
  565                 if (bootverbose)
  566                         printf("TSC timecounter disabled: C2/C3 may halt it.\n");
  567                 goto init;
  568         }
  569 
  570         /*
  571          * We can not use the TSC in SMP mode unless the TSCs on all CPUs
  572          * are synchronized.  If the user is sure that the system has
  573          * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a
  574          * non-zero value.  The TSC seems unreliable in virtualized SMP
  575          * environments, so it is set to a negative quality in those cases.
  576          */
  577         if (mp_ncpus > 1)
  578                 tsc_timecounter.tc_quality = test_tsc();
  579         else if (tsc_is_invariant)
  580                 tsc_timecounter.tc_quality = 1000;
  581         max_freq >>= tsc_shift;
  582 
  583 init:
  584         for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++)
  585                 ;
  586         if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
  587                 if (cpu_vendor_id == CPU_VENDOR_AMD) {
  588                         tsc_timecounter.tc_get_timecount = shift > 0 ?
  589                             tsc_get_timecount_low_mfence :
  590                             tsc_get_timecount_mfence;
  591                 } else {
  592                         tsc_timecounter.tc_get_timecount = shift > 0 ?
  593                             tsc_get_timecount_low_lfence :
  594                             tsc_get_timecount_lfence;
  595                 }
  596         } else {
  597                 tsc_timecounter.tc_get_timecount = shift > 0 ?
  598                     tsc_get_timecount_low : tsc_get_timecount;
  599         }
  600         if (shift > 0) {
  601                 tsc_timecounter.tc_name = "TSC-low";
  602                 if (bootverbose)
  603                         printf("TSC timecounter discards lower %d bit(s)\n",
  604                             shift);
  605         }
  606         if (tsc_freq != 0) {
  607                 tsc_timecounter.tc_frequency = tsc_freq >> shift;
  608                 tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
  609                 tc_init(&tsc_timecounter);
  610         }
  611 }
  612 SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
  613 
  614 /*
  615  * When cpufreq levels change, find out about the (new) max frequency.  We
  616  * use this to update CPU accounting in case it got a lower estimate at boot.
  617  */
  618 static void
  619 tsc_levels_changed(void *arg, int unit)
  620 {
  621         device_t cf_dev;
  622         struct cf_level *levels;
  623         int count, error;
  624         uint64_t max_freq;
  625 
  626         /* Only use values from the first CPU, assuming all are equal. */
  627         if (unit != 0)
  628                 return;
  629 
  630         /* Find the appropriate cpufreq device instance. */
  631         cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
  632         if (cf_dev == NULL) {
  633                 printf("tsc_levels_changed() called but no cpufreq device?\n");
  634                 return;
  635         }
  636 
  637         /* Get settings from the device and find the max frequency. */
  638         count = 64;
  639         levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
  640         if (levels == NULL)
  641                 return;
  642         error = CPUFREQ_LEVELS(cf_dev, levels, &count);
  643         if (error == 0 && count != 0) {
  644                 max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
  645                 set_cputicker(rdtsc, max_freq, 1);
  646         } else
  647                 printf("tsc_levels_changed: no max freq found\n");
  648         free(levels, M_TEMP);
  649 }
  650 
  651 /*
  652  * If the TSC timecounter is in use, veto the pending change.  It may be
  653  * possible in the future to handle a dynamically-changing timecounter rate.
  654  */
  655 static void
  656 tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
  657 {
  658 
  659         if (*status != 0 || timecounter != &tsc_timecounter)
  660                 return;
  661 
  662         printf("timecounter TSC must not be in use when "
  663             "changing frequencies; change denied\n");
  664         *status = EBUSY;
  665 }
  666 
  667 /* Update TSC freq with the value indicated by the caller. */
  668 static void
  669 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
  670 {
  671         uint64_t freq;
  672 
  673         /* If there was an error during the transition, don't do anything. */
  674         if (tsc_disabled || status != 0)
  675                 return;
  676 
  677         /* Total setting for this level gives the new frequency in MHz. */
  678         freq = (uint64_t)level->total_set.freq * 1000000;
  679         atomic_store_rel_64(&tsc_freq, freq);
  680         tsc_timecounter.tc_frequency =
  681             freq >> (int)(intptr_t)tsc_timecounter.tc_priv;
  682 }
  683 
  684 static int
  685 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
  686 {
  687         int error;
  688         uint64_t freq;
  689 
  690         freq = atomic_load_acq_64(&tsc_freq);
  691         if (freq == 0)
  692                 return (EOPNOTSUPP);
  693         error = sysctl_handle_64(oidp, &freq, 0, req);
  694         if (error == 0 && req->newptr != NULL) {
  695                 atomic_store_rel_64(&tsc_freq, freq);
  696                 atomic_store_rel_64(&tsc_timecounter.tc_frequency,
  697                     freq >> (int)(intptr_t)tsc_timecounter.tc_priv);
  698         }
  699         return (error);
  700 }
  701 
  702 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
  703     0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
  704 
  705 static u_int
  706 tsc_get_timecount(struct timecounter *tc __unused)
  707 {
  708 
  709         return (rdtsc32());
  710 }
  711 
  712 static inline u_int
  713 tsc_get_timecount_low(struct timecounter *tc)
  714 {
  715         uint32_t rv;
  716 
  717         __asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
  718             : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
  719         return (rv);
  720 }
  721 
  722 static u_int
  723 tsc_get_timecount_lfence(struct timecounter *tc __unused)
  724 {
  725 
  726         lfence();
  727         return (rdtsc32());
  728 }
  729 
  730 static u_int
  731 tsc_get_timecount_low_lfence(struct timecounter *tc)
  732 {
  733 
  734         lfence();
  735         return (tsc_get_timecount_low(tc));
  736 }
  737 
  738 static u_int
  739 tsc_get_timecount_mfence(struct timecounter *tc __unused)
  740 {
  741 
  742         mfence();
  743         return (rdtsc32());
  744 }
  745 
  746 static u_int
  747 tsc_get_timecount_low_mfence(struct timecounter *tc)
  748 {
  749 
  750         mfence();
  751         return (tsc_get_timecount_low(tc));
  752 }
  753 
  754 uint32_t
  755 cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th)
  756 {
  757 
  758         vdso_th->th_x86_shift = (int)(intptr_t)timecounter->tc_priv;
  759         bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
  760         return (timecounter == &tsc_timecounter);
  761 }
  762 
  763 #ifdef COMPAT_FREEBSD32
  764 uint32_t
  765 cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32)
  766 {
  767 
  768         vdso_th32->th_x86_shift = (int)(intptr_t)timecounter->tc_priv;
  769         bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
  770         return (timecounter == &tsc_timecounter);
  771 }
  772 #endif

Cache object: 0be95fb040b4b685f3524676c6d539ab


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.