The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/x86/x86/tsc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 1998-2003 Poul-Henning Kamp
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include "opt_clock.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/bus.h>
   37 #include <sys/cpu.h>
   38 #include <sys/eventhandler.h>
   39 #include <sys/limits.h>
   40 #include <sys/malloc.h>
   41 #include <sys/proc.h>
   42 #include <sys/sched.h>
   43 #include <sys/sysctl.h>
   44 #include <sys/time.h>
   45 #include <sys/timetc.h>
   46 #include <sys/kernel.h>
   47 #include <sys/smp.h>
   48 #include <sys/vdso.h>
   49 #include <machine/clock.h>
   50 #include <machine/cputypes.h>
   51 #include <machine/fpu.h>
   52 #include <machine/md_var.h>
   53 #include <machine/specialreg.h>
   54 #include <x86/vmware.h>
   55 #include <dev/acpica/acpi_hpet.h>
   56 #include <contrib/dev/acpica/include/acpi.h>
   57 
   58 #include "cpufreq_if.h"
   59 
   60 uint64_t        tsc_freq;
   61 int             tsc_is_invariant;
   62 int             tsc_perf_stat;
   63 static int      tsc_early_calib_exact;
   64 
   65 static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
   66 
   67 SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
   68     &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
   69 
   70 #ifdef SMP
   71 int     smp_tsc;
   72 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
   73     "Indicates whether the TSC is safe to use in SMP mode");
   74 
   75 int     smp_tsc_adjust = 0;
   76 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN,
   77     &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP");
   78 #endif
   79 
   80 static int      tsc_shift = 1;
   81 SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN,
   82     &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency");
   83 
   84 static int      tsc_disabled;
   85 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
   86     "Disable x86 Time Stamp Counter");
   87 
   88 static int      tsc_skip_calibration;
   89 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN,
   90     &tsc_skip_calibration, 0,
   91     "Disable early TSC frequency calibration");
   92 
   93 static void tsc_freq_changed(void *arg, const struct cf_level *level,
   94     int status);
   95 static void tsc_freq_changing(void *arg, const struct cf_level *level,
   96     int *status);
   97 static u_int tsc_get_timecount(struct timecounter *tc);
   98 static inline u_int tsc_get_timecount_low(struct timecounter *tc);
   99 static u_int tsc_get_timecount_lfence(struct timecounter *tc);
  100 static u_int tsc_get_timecount_low_lfence(struct timecounter *tc);
  101 static u_int tsc_get_timecount_mfence(struct timecounter *tc);
  102 static u_int tsc_get_timecount_low_mfence(struct timecounter *tc);
  103 static u_int tscp_get_timecount(struct timecounter *tc);
  104 static u_int tscp_get_timecount_low(struct timecounter *tc);
  105 static void tsc_levels_changed(void *arg, int unit);
  106 static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th,
  107     struct timecounter *tc);
  108 #ifdef COMPAT_FREEBSD32
  109 static uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
  110     struct timecounter *tc);
  111 #endif
  112 
  113 static struct timecounter tsc_timecounter = {
  114         .tc_get_timecount =             tsc_get_timecount,
  115         .tc_counter_mask =              ~0u,
  116         .tc_name =                      "TSC",
  117         .tc_quality =                   800,    /* adjusted in code */
  118         .tc_fill_vdso_timehands =       x86_tsc_vdso_timehands,
  119 #ifdef COMPAT_FREEBSD32
  120         .tc_fill_vdso_timehands32 =     x86_tsc_vdso_timehands32,
  121 #endif
  122 };
  123 
  124 static int
  125 tsc_freq_cpuid_vm(void)
  126 {
  127         u_int regs[4];
  128 
  129         if (vm_guest == VM_GUEST_NO)
  130                 return (false);
  131         if (hv_high < 0x40000010)
  132                 return (false);
  133         do_cpuid(0x40000010, regs);
  134         tsc_freq = (uint64_t)(regs[0]) * 1000;
  135         tsc_early_calib_exact = 1;
  136         return (true);
  137 }
  138 
  139 static void
  140 tsc_freq_vmware(void)
  141 {
  142         u_int regs[4];
  143 
  144         vmware_hvcall(VMW_HVCMD_GETHZ, regs);
  145         if (regs[1] != UINT_MAX)
  146                 tsc_freq = regs[0] | ((uint64_t)regs[1] << 32);
  147         tsc_early_calib_exact = 1;
  148 }
  149 
  150 static void
  151 tsc_freq_xen(void)
  152 {
  153         u_int regs[4];
  154 
  155         /*
  156          * Must run *after* generic tsc_freq_cpuid_vm, so that when Xen is
  157          * emulating Viridian support the Viridian leaf is used instead.
  158          */
  159         KASSERT(hv_high >= 0x40000003, ("Invalid max hypervisor leaf on Xen"));
  160         cpuid_count(0x40000003, 0, regs);
  161         tsc_freq = (uint64_t)(regs[2]) * 1000;
  162         tsc_early_calib_exact = 1;
  163 }
  164 
  165 /*
  166  * Calculate TSC frequency using information from the CPUID leaf 0x15 'Time
  167  * Stamp Counter and Nominal Core Crystal Clock'.  If leaf 0x15 is not
  168  * functional, as it is on Skylake/Kabylake, try 0x16 'Processor Frequency
  169  * Information'.  Leaf 0x16 is described in the SDM as informational only, but
  170  * we can use this value until late calibration is complete.
  171  */
  172 static bool
  173 tsc_freq_cpuid(uint64_t *res)
  174 {
  175         u_int regs[4];
  176 
  177         if (cpu_high < 0x15)
  178                 return (false);
  179         do_cpuid(0x15, regs);
  180         if (regs[0] != 0 && regs[1] != 0 && regs[2] != 0) {
  181                 *res = (uint64_t)regs[2] * regs[1] / regs[0];
  182                 return (true);
  183         }
  184 
  185         if (cpu_high < 0x16)
  186                 return (false);
  187         do_cpuid(0x16, regs);
  188         if (regs[0] != 0) {
  189                 *res = (uint64_t)regs[0] * 1000000;
  190                 return (true);
  191         }
  192 
  193         return (false);
  194 }
  195 
  196 static bool
  197 tsc_freq_intel_brand(uint64_t *res)
  198 {
  199         char brand[48];
  200         u_int regs[4];
  201         uint64_t freq;
  202         char *p;
  203         u_int i;
  204 
  205         /*
  206          * Intel Processor Identification and the CPUID Instruction
  207          * Application Note 485.
  208          * http://www.intel.com/assets/pdf/appnote/241618.pdf
  209          */
  210         if (cpu_exthigh >= 0x80000004) {
  211                 p = brand;
  212                 for (i = 0x80000002; i < 0x80000005; i++) {
  213                         do_cpuid(i, regs);
  214                         memcpy(p, regs, sizeof(regs));
  215                         p += sizeof(regs);
  216                 }
  217                 p = NULL;
  218                 for (i = 0; i < sizeof(brand) - 1; i++)
  219                         if (brand[i] == 'H' && brand[i + 1] == 'z')
  220                                 p = brand + i;
  221                 if (p != NULL) {
  222                         p -= 5;
  223                         switch (p[4]) {
  224                         case 'M':
  225                                 i = 1;
  226                                 break;
  227                         case 'G':
  228                                 i = 1000;
  229                                 break;
  230                         case 'T':
  231                                 i = 1000000;
  232                                 break;
  233                         default:
  234                                 return (false);
  235                         }
  236 #define C2D(c)  ((c) - '')
  237                         if (p[1] == '.') {
  238                                 freq = C2D(p[0]) * 1000;
  239                                 freq += C2D(p[2]) * 100;
  240                                 freq += C2D(p[3]) * 10;
  241                                 freq *= i * 1000;
  242                         } else {
  243                                 freq = C2D(p[0]) * 1000;
  244                                 freq += C2D(p[1]) * 100;
  245                                 freq += C2D(p[2]) * 10;
  246                                 freq += C2D(p[3]);
  247                                 freq *= i * 1000000;
  248                         }
  249 #undef C2D
  250                         *res = freq;
  251                         return (true);
  252                 }
  253         }
  254         return (false);
  255 }
  256 
  257 static void
  258 tsc_freq_tc(uint64_t *res)
  259 {
  260         uint64_t tsc1, tsc2;
  261         int64_t overhead;
  262         int count, i;
  263 
  264         overhead = 0;
  265         for (i = 0, count = 8; i < count; i++) {
  266                 tsc1 = rdtsc_ordered();
  267                 DELAY(0);
  268                 tsc2 = rdtsc_ordered();
  269                 if (i > 0)
  270                         overhead += tsc2 - tsc1;
  271         }
  272         overhead /= count;
  273 
  274         tsc1 = rdtsc_ordered();
  275         DELAY(100000);
  276         tsc2 = rdtsc_ordered();
  277         tsc_freq = (tsc2 - tsc1 - overhead) * 10;
  278 }
  279 
  280 /*
  281  * Try to determine the TSC frequency using CPUID or hypercalls.  If successful,
  282  * this lets use the TSC for early DELAY() calls instead of the 8254 timer,
  283  * which may be unreliable or entirely absent on contemporary systems.  However,
  284  * avoid calibrating using the 8254 here so as to give hypervisors a chance to
  285  * register a timecounter that can be used instead.
  286  */
  287 static void
  288 probe_tsc_freq_early(void)
  289 {
  290 #ifdef __i386__
  291         /* The TSC is known to be broken on certain CPUs. */
  292         switch (cpu_vendor_id) {
  293         case CPU_VENDOR_AMD:
  294                 switch (cpu_id & 0xFF0) {
  295                 case 0x500:
  296                         /* K5 Model 0 */
  297                         tsc_disabled = 1;
  298                         return;
  299                 }
  300                 break;
  301         case CPU_VENDOR_CENTAUR:
  302                 switch (cpu_id & 0xff0) {
  303                 case 0x540:
  304                         /*
  305                          * http://www.centtech.com/c6_data_sheet.pdf
  306                          *
  307                          * I-12 RDTSC may return incoherent values in EDX:EAX
  308                          * I-13 RDTSC hangs when certain event counters are used
  309                          */
  310                         tsc_disabled = 1;
  311                         return;
  312                 }
  313                 break;
  314         case CPU_VENDOR_NSC:
  315                 switch (cpu_id & 0xff0) {
  316                 case 0x540:
  317                         if ((cpu_id & CPUID_STEPPING) == 0) {
  318                                 tsc_disabled = 1;
  319                                 return;
  320                         }
  321                         break;
  322                 }
  323                 break;
  324         }
  325 #endif
  326 
  327         switch (cpu_vendor_id) {
  328         case CPU_VENDOR_AMD:
  329         case CPU_VENDOR_HYGON:
  330                 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
  331                     (vm_guest == VM_GUEST_NO &&
  332                     CPUID_TO_FAMILY(cpu_id) >= 0x10))
  333                         tsc_is_invariant = 1;
  334                 if (cpu_feature & CPUID_SSE2) {
  335                         tsc_timecounter.tc_get_timecount =
  336                             tsc_get_timecount_mfence;
  337                 }
  338                 break;
  339         case CPU_VENDOR_INTEL:
  340                 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
  341                     (vm_guest == VM_GUEST_NO &&
  342                     ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
  343                     CPUID_TO_MODEL(cpu_id) >= 0xe) ||
  344                     (CPUID_TO_FAMILY(cpu_id) == 0xf &&
  345                     CPUID_TO_MODEL(cpu_id) >= 0x3))))
  346                         tsc_is_invariant = 1;
  347                 if (cpu_feature & CPUID_SSE2) {
  348                         tsc_timecounter.tc_get_timecount =
  349                             tsc_get_timecount_lfence;
  350                 }
  351                 break;
  352         case CPU_VENDOR_CENTAUR:
  353                 if (vm_guest == VM_GUEST_NO &&
  354                     CPUID_TO_FAMILY(cpu_id) == 0x6 &&
  355                     CPUID_TO_MODEL(cpu_id) >= 0xf &&
  356                     (rdmsr(0x1203) & 0x100000000ULL) == 0)
  357                         tsc_is_invariant = 1;
  358                 if (cpu_feature & CPUID_SSE2) {
  359                         tsc_timecounter.tc_get_timecount =
  360                             tsc_get_timecount_lfence;
  361                 }
  362                 break;
  363         }
  364 
  365         if (tsc_freq_cpuid_vm()) {
  366                 if (bootverbose)
  367                         printf(
  368                     "Early TSC frequency %juHz derived from hypervisor CPUID\n",
  369                             (uintmax_t)tsc_freq);
  370         } else if (vm_guest == VM_GUEST_VMWARE) {
  371                 tsc_freq_vmware();
  372                 if (bootverbose)
  373                         printf(
  374                     "Early TSC frequency %juHz derived from VMWare hypercall\n",
  375                             (uintmax_t)tsc_freq);
  376         } else if (vm_guest == VM_GUEST_XEN) {
  377                 tsc_freq_xen();
  378                 if (bootverbose)
  379                         printf(
  380                         "Early TSC frequency %juHz derived from Xen CPUID\n",
  381                             (uintmax_t)tsc_freq);
  382         } else if (tsc_freq_cpuid(&tsc_freq)) {
  383                 /*
  384                  * If possible, use the value obtained from CPUID as the initial
  385                  * frequency.  This will be refined later during boot but is
  386                  * good enough for now.  The 8254 PIT is not functional on some
  387                  * newer platforms anyway, so don't delay our boot for what
  388                  * might be a garbage result.  Late calibration is required if
  389                  * the initial frequency was obtained from CPUID.16H, as the
  390                  * derived value may be off by as much as 1%.
  391                  */
  392                 if (bootverbose)
  393                         printf("Early TSC frequency %juHz derived from CPUID\n",
  394                             (uintmax_t)tsc_freq);
  395         }
  396 }
  397 
  398 /*
  399  * If we were unable to determine the TSC frequency via CPU registers, try
  400  * to calibrate against a known clock.
  401  */
  402 static void
  403 probe_tsc_freq_late(void)
  404 {
  405         if (tsc_freq != 0)
  406                 return;
  407 
  408         if (tsc_skip_calibration) {
  409                 /*
  410                  * Try to parse the brand string to obtain the nominal TSC
  411                  * frequency.
  412                  */
  413                 if (cpu_vendor_id == CPU_VENDOR_INTEL &&
  414                     tsc_freq_intel_brand(&tsc_freq)) {
  415                         if (bootverbose)
  416                                 printf(
  417                     "Early TSC frequency %juHz derived from brand string\n",
  418                                     (uintmax_t)tsc_freq);
  419                 } else {
  420                         tsc_disabled = 1;
  421                 }
  422         } else {
  423                 /*
  424                  * Calibrate against a timecounter or the 8254 PIT.  This
  425                  * estimate will be refined later in tsc_calib().
  426                  */
  427                 tsc_freq_tc(&tsc_freq);
  428                 if (bootverbose)
  429                         printf(
  430                     "Early TSC frequency %juHz calibrated from 8254 PIT\n",
  431                             (uintmax_t)tsc_freq);
  432         }
  433 }
  434 
  435 void
  436 start_TSC(void)
  437 {
  438         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
  439                 return;
  440 
  441         probe_tsc_freq_late();
  442 
  443         if (cpu_power_ecx & CPUID_PERF_STAT) {
  444                 /*
  445                  * XXX Some emulators expose host CPUID without actual support
  446                  * for these MSRs.  We must test whether they really work.
  447                  */
  448                 wrmsr(MSR_MPERF, 0);
  449                 wrmsr(MSR_APERF, 0);
  450                 DELAY(10);
  451                 if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
  452                         tsc_perf_stat = 1;
  453         }
  454 
  455         /*
  456          * Inform CPU accounting about our boot-time clock rate.  This will
  457          * be updated if someone loads a cpufreq driver after boot that
  458          * discovers a new max frequency.
  459          *
  460          * The frequency may also be updated after late calibration is complete;
  461          * however, we register the TSC as the ticker now to avoid switching
  462          * counters after much of the kernel has already booted and potentially
  463          * sampled the CPU clock.
  464          */
  465         if (tsc_freq != 0)
  466                 set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
  467 
  468         if (tsc_is_invariant)
  469                 return;
  470 
  471         /* Register to find out about changes in CPU frequency. */
  472         tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
  473             tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
  474         tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
  475             tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
  476         tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
  477             tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
  478 }
  479 
  480 #ifdef SMP
  481 
  482 /*
  483  * RDTSC is not a serializing instruction, and does not drain
  484  * instruction stream, so we need to drain the stream before executing
  485  * it.  It could be fixed by use of RDTSCP, except the instruction is
  486  * not available everywhere.
  487  *
  488  * Use CPUID for draining in the boot-time SMP constistency test.  The
  489  * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel
  490  * and VIA) when SSE2 is present, and nothing on older machines which
  491  * also do not issue RDTSC prematurely.  There, testing for SSE2 and
  492  * vendor is too cumbersome, and we learn about TSC presence from CPUID.
  493  *
  494  * Do not use do_cpuid(), since we do not need CPUID results, which
  495  * have to be written into memory with do_cpuid().
  496  */
  497 #define TSC_READ(x)                                                     \
  498 static void                                                             \
  499 tsc_read_##x(void *arg)                                                 \
  500 {                                                                       \
  501         uint64_t *tsc = arg;                                            \
  502         u_int cpu = PCPU_GET(cpuid);                                    \
  503                                                                         \
  504         __asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx");     \
  505         tsc[cpu * 3 + x] = rdtsc();                                     \
  506 }
  507 TSC_READ(0)
  508 TSC_READ(1)
  509 TSC_READ(2)
  510 #undef TSC_READ
  511 
  512 #define N       1000
  513 
  514 static void
  515 comp_smp_tsc(void *arg)
  516 {
  517         uint64_t *tsc;
  518         int64_t d1, d2;
  519         u_int cpu = PCPU_GET(cpuid);
  520         u_int i, j, size;
  521 
  522         size = (mp_maxid + 1) * 3;
  523         for (i = 0, tsc = arg; i < N; i++, tsc += size)
  524                 CPU_FOREACH(j) {
  525                         if (j == cpu)
  526                                 continue;
  527                         d1 = tsc[cpu * 3 + 1] - tsc[j * 3];
  528                         d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1];
  529                         if (d1 <= 0 || d2 <= 0) {
  530                                 smp_tsc = 0;
  531                                 return;
  532                         }
  533                 }
  534 }
  535 
  536 static void
  537 adj_smp_tsc(void *arg)
  538 {
  539         uint64_t *tsc;
  540         int64_t d, min, max;
  541         u_int cpu = PCPU_GET(cpuid);
  542         u_int first, i, size;
  543 
  544         first = CPU_FIRST();
  545         if (cpu == first)
  546                 return;
  547         min = INT64_MIN;
  548         max = INT64_MAX;
  549         size = (mp_maxid + 1) * 3;
  550         for (i = 0, tsc = arg; i < N; i++, tsc += size) {
  551                 d = tsc[first * 3] - tsc[cpu * 3 + 1];
  552                 if (d > min)
  553                         min = d;
  554                 d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2];
  555                 if (d > min)
  556                         min = d;
  557                 d = tsc[first * 3 + 1] - tsc[cpu * 3];
  558                 if (d < max)
  559                         max = d;
  560                 d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1];
  561                 if (d < max)
  562                         max = d;
  563         }
  564         if (min > max)
  565                 return;
  566         d = min / 2 + max / 2;
  567         __asm __volatile (
  568                 "movl $0x10, %%ecx\n\t"
  569                 "rdmsr\n\t"
  570                 "addl %%edi, %%eax\n\t"
  571                 "adcl %%esi, %%edx\n\t"
  572                 "wrmsr\n"
  573                 : /* No output */
  574                 : "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32))
  575                 : "ax", "cx", "dx", "cc"
  576         );
  577 }
  578 
  579 static int
  580 test_tsc(int adj_max_count)
  581 {
  582         uint64_t *data, *tsc;
  583         u_int i, size, adj;
  584 
  585         if ((!smp_tsc && !tsc_is_invariant))
  586                 return (-100);
  587         /*
  588          * Misbehavior of TSC under VirtualBox has been observed.  In
  589          * particular, threads doing small (~1 second) sleeps may miss their
  590          * wakeup and hang around in sleep state, causing hangs on shutdown.
  591          */
  592         if (vm_guest == VM_GUEST_VBOX)
  593                 return (0);
  594 
  595         TSENTER();
  596         size = (mp_maxid + 1) * 3;
  597         data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK);
  598         adj = 0;
  599 retry:
  600         for (i = 0, tsc = data; i < N; i++, tsc += size)
  601                 smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc);
  602         smp_tsc = 1;    /* XXX */
  603         smp_rendezvous(smp_no_rendezvous_barrier, comp_smp_tsc,
  604             smp_no_rendezvous_barrier, data);
  605         if (!smp_tsc && adj < adj_max_count) {
  606                 adj++;
  607                 smp_rendezvous(smp_no_rendezvous_barrier, adj_smp_tsc,
  608                     smp_no_rendezvous_barrier, data);
  609                 goto retry;
  610         }
  611         free(data, M_TEMP);
  612         if (bootverbose)
  613                 printf("SMP: %sed TSC synchronization test%s\n",
  614                     smp_tsc ? "pass" : "fail", 
  615                     adj > 0 ? " after adjustment" : "");
  616         TSEXIT();
  617         if (smp_tsc && tsc_is_invariant) {
  618                 switch (cpu_vendor_id) {
  619                 case CPU_VENDOR_AMD:
  620                 case CPU_VENDOR_HYGON:
  621                         /*
  622                          * Processor Programming Reference (PPR) for AMD
  623                          * Family 17h states that the TSC uses a common
  624                          * reference for all sockets, cores and threads.
  625                          */
  626                         if (CPUID_TO_FAMILY(cpu_id) >= 0x17)
  627                                 return (1000);
  628                         /*
  629                          * Starting with Family 15h processors, TSC clock
  630                          * source is in the north bridge.  Check whether
  631                          * we have a single-socket/multi-core platform.
  632                          * XXX Need more work for complex cases.
  633                          */
  634                         if (CPUID_TO_FAMILY(cpu_id) < 0x15 ||
  635                             (amd_feature2 & AMDID2_CMP) == 0 ||
  636                             smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1)
  637                                 break;
  638                         return (1000);
  639                 case CPU_VENDOR_INTEL:
  640                         /*
  641                          * XXX Assume Intel platforms have synchronized TSCs.
  642                          */
  643                         return (1000);
  644                 }
  645                 return (800);
  646         }
  647         return (-100);
  648 }
  649 
  650 #undef N
  651 
  652 #endif /* SMP */
  653 
  654 static void
  655 init_TSC_tc(void)
  656 {
  657         uint64_t max_freq;
  658         int shift;
  659 
  660         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
  661                 return;
  662 
  663         /*
  664          * Limit timecounter frequency to fit in an int and prevent it from
  665          * overflowing too fast.
  666          */
  667         max_freq = UINT_MAX;
  668 
  669         /*
  670          * Intel CPUs without a C-state invariant TSC can stop the TSC
  671          * in either C2 or C3.  Disable use of C2 and C3 while using
  672          * the TSC as the timecounter.  The timecounter can be changed
  673          * to enable C2 and C3.
  674          *
  675          * Note that the TSC is used as the cputicker for computing
  676          * thread runtime regardless of the timecounter setting, so
  677          * using an alternate timecounter and enabling C2 or C3 can
  678          * result incorrect runtimes for kernel idle threads (but not
  679          * for any non-idle threads).
  680          */
  681         if (cpu_vendor_id == CPU_VENDOR_INTEL &&
  682             (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) {
  683                 tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP;
  684                 if (bootverbose)
  685                         printf("TSC timecounter disables C2 and C3.\n");
  686         }
  687 
  688         /*
  689          * We can not use the TSC in SMP mode unless the TSCs on all CPUs
  690          * are synchronized.  If the user is sure that the system has
  691          * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a
  692          * non-zero value.  The TSC seems unreliable in virtualized SMP
  693          * environments, so it is set to a negative quality in those cases.
  694          */
  695 #ifdef SMP
  696         if (mp_ncpus > 1)
  697                 tsc_timecounter.tc_quality = test_tsc(smp_tsc_adjust);
  698         else
  699 #endif /* SMP */
  700         if (tsc_is_invariant)
  701                 tsc_timecounter.tc_quality = 1000;
  702         max_freq >>= tsc_shift;
  703 
  704         for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++)
  705                 ;
  706 
  707         /*
  708          * Timecounter implementation selection, top to bottom:
  709          * - If RDTSCP is available, use RDTSCP.
  710          * - If fence instructions are provided (SSE2), use LFENCE;RDTSC
  711          *   on Intel, and MFENCE;RDTSC on AMD.
  712          * - For really old CPUs, just use RDTSC.
  713          */
  714         if ((amd_feature & AMDID_RDTSCP) != 0) {
  715                 tsc_timecounter.tc_get_timecount = shift > 0 ?
  716                     tscp_get_timecount_low : tscp_get_timecount;
  717         } else if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
  718                 if (cpu_vendor_id == CPU_VENDOR_AMD ||
  719                     cpu_vendor_id == CPU_VENDOR_HYGON) {
  720                         tsc_timecounter.tc_get_timecount = shift > 0 ?
  721                             tsc_get_timecount_low_mfence :
  722                             tsc_get_timecount_mfence;
  723                 } else {
  724                         tsc_timecounter.tc_get_timecount = shift > 0 ?
  725                             tsc_get_timecount_low_lfence :
  726                             tsc_get_timecount_lfence;
  727                 }
  728         } else {
  729                 tsc_timecounter.tc_get_timecount = shift > 0 ?
  730                     tsc_get_timecount_low : tsc_get_timecount;
  731         }
  732         if (shift > 0) {
  733                 tsc_timecounter.tc_name = "TSC-low";
  734                 if (bootverbose)
  735                         printf("TSC timecounter discards lower %d bit(s)\n",
  736                             shift);
  737         }
  738         if (tsc_freq != 0) {
  739                 tsc_timecounter.tc_frequency = tsc_freq >> shift;
  740                 tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
  741 
  742                 /*
  743                  * Timecounter registration is deferred until after late
  744                  * calibration is finished.
  745                  */
  746         }
  747 }
  748 SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
  749 
  750 static void
  751 tsc_update_freq(uint64_t new_freq)
  752 {
  753         atomic_store_rel_64(&tsc_freq, new_freq);
  754         atomic_store_rel_64(&tsc_timecounter.tc_frequency,
  755             new_freq >> (int)(intptr_t)tsc_timecounter.tc_priv);
  756 }
  757 
  758 void
  759 tsc_init(void)
  760 {
  761         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
  762                 return;
  763 
  764         probe_tsc_freq_early();
  765 }
  766 
  767 /*
  768  * Perform late calibration of the TSC frequency once ACPI-based timecounters
  769  * are available.  At this point timehands are not set up, so we read the
  770  * highest-quality timecounter directly rather than using (s)binuptime().
  771  */
  772 void
  773 tsc_calibrate(void)
  774 {
  775         uint64_t freq;
  776 
  777         if (tsc_disabled)
  778                 return;
  779         if (tsc_early_calib_exact)
  780                 goto calibrated;
  781 
  782         fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX);
  783         freq = clockcalib(rdtsc_ordered, "TSC");
  784         fpu_kern_leave(curthread, NULL);
  785         tsc_update_freq(freq);
  786 
  787 calibrated:
  788         tc_init(&tsc_timecounter);
  789         set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
  790 }
  791 
  792 void
  793 resume_TSC(void)
  794 {
  795 #ifdef SMP
  796         int quality;
  797 
  798         /* If TSC was not good on boot, it is unlikely to become good now. */
  799         if (tsc_timecounter.tc_quality < 0)
  800                 return;
  801         /* Nothing to do with UP. */
  802         if (mp_ncpus < 2)
  803                 return;
  804 
  805         /*
  806          * If TSC was good, a single synchronization should be enough,
  807          * but honour smp_tsc_adjust if it's set.
  808          */
  809         quality = test_tsc(MAX(smp_tsc_adjust, 1));
  810         if (quality != tsc_timecounter.tc_quality) {
  811                 printf("TSC timecounter quality changed: %d -> %d\n",
  812                     tsc_timecounter.tc_quality, quality);
  813                 tsc_timecounter.tc_quality = quality;
  814         }
  815 #endif /* SMP */
  816 }
  817 
  818 /*
  819  * When cpufreq levels change, find out about the (new) max frequency.  We
  820  * use this to update CPU accounting in case it got a lower estimate at boot.
  821  */
  822 static void
  823 tsc_levels_changed(void *arg, int unit)
  824 {
  825         device_t cf_dev;
  826         struct cf_level *levels;
  827         int count, error;
  828         uint64_t max_freq;
  829 
  830         /* Only use values from the first CPU, assuming all are equal. */
  831         if (unit != 0)
  832                 return;
  833 
  834         /* Find the appropriate cpufreq device instance. */
  835         cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
  836         if (cf_dev == NULL) {
  837                 printf("tsc_levels_changed() called but no cpufreq device?\n");
  838                 return;
  839         }
  840 
  841         /* Get settings from the device and find the max frequency. */
  842         count = 64;
  843         levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
  844         if (levels == NULL)
  845                 return;
  846         error = CPUFREQ_LEVELS(cf_dev, levels, &count);
  847         if (error == 0 && count != 0) {
  848                 max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
  849                 set_cputicker(rdtsc, max_freq, true);
  850         } else
  851                 printf("tsc_levels_changed: no max freq found\n");
  852         free(levels, M_TEMP);
  853 }
  854 
  855 /*
  856  * If the TSC timecounter is in use, veto the pending change.  It may be
  857  * possible in the future to handle a dynamically-changing timecounter rate.
  858  */
  859 static void
  860 tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
  861 {
  862 
  863         if (*status != 0 || timecounter != &tsc_timecounter)
  864                 return;
  865 
  866         printf("timecounter TSC must not be in use when "
  867             "changing frequencies; change denied\n");
  868         *status = EBUSY;
  869 }
  870 
  871 /* Update TSC freq with the value indicated by the caller. */
  872 static void
  873 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
  874 {
  875         uint64_t freq;
  876 
  877         /* If there was an error during the transition, don't do anything. */
  878         if (tsc_disabled || status != 0)
  879                 return;
  880 
  881         /* Total setting for this level gives the new frequency in MHz. */
  882         freq = (uint64_t)level->total_set.freq * 1000000;
  883         tsc_update_freq(freq);
  884 }
  885 
  886 static int
  887 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
  888 {
  889         int error;
  890         uint64_t freq;
  891 
  892         freq = atomic_load_acq_64(&tsc_freq);
  893         if (freq == 0)
  894                 return (EOPNOTSUPP);
  895         error = sysctl_handle_64(oidp, &freq, 0, req);
  896         if (error == 0 && req->newptr != NULL)
  897                 tsc_update_freq(freq);
  898         return (error);
  899 }
  900 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq,
  901     CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE,
  902     0, 0, sysctl_machdep_tsc_freq, "QU",
  903     "Time Stamp Counter frequency");
  904 
  905 static u_int
  906 tsc_get_timecount(struct timecounter *tc __unused)
  907 {
  908 
  909         return (rdtsc32());
  910 }
  911 
  912 static u_int
  913 tscp_get_timecount(struct timecounter *tc __unused)
  914 {
  915 
  916         return (rdtscp32());
  917 }
  918 
  919 static inline u_int
  920 tsc_get_timecount_low(struct timecounter *tc)
  921 {
  922         uint32_t rv;
  923 
  924         __asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
  925             : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
  926         return (rv);
  927 }
  928 
  929 static u_int
  930 tscp_get_timecount_low(struct timecounter *tc)
  931 {
  932         uint32_t rv;
  933 
  934         __asm __volatile("rdtscp; movl %1, %%ecx; shrd %%cl, %%edx, %0"
  935             : "=&a" (rv) : "m" (tc->tc_priv) : "ecx", "edx");
  936         return (rv);
  937 }
  938 
  939 static u_int
  940 tsc_get_timecount_lfence(struct timecounter *tc __unused)
  941 {
  942 
  943         lfence();
  944         return (rdtsc32());
  945 }
  946 
  947 static u_int
  948 tsc_get_timecount_low_lfence(struct timecounter *tc)
  949 {
  950 
  951         lfence();
  952         return (tsc_get_timecount_low(tc));
  953 }
  954 
  955 static u_int
  956 tsc_get_timecount_mfence(struct timecounter *tc __unused)
  957 {
  958 
  959         mfence();
  960         return (rdtsc32());
  961 }
  962 
  963 static u_int
  964 tsc_get_timecount_low_mfence(struct timecounter *tc)
  965 {
  966 
  967         mfence();
  968         return (tsc_get_timecount_low(tc));
  969 }
  970 
  971 static uint32_t
  972 x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
  973 {
  974 
  975         vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC;
  976         vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv;
  977         vdso_th->th_x86_hpet_idx = 0xffffffff;
  978         vdso_th->th_x86_pvc_last_systime = 0;
  979         vdso_th->th_x86_pvc_stable_mask = 0;
  980         bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
  981         return (1);
  982 }
  983 
  984 #ifdef COMPAT_FREEBSD32
  985 static uint32_t
  986 x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
  987     struct timecounter *tc)
  988 {
  989 
  990         vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC;
  991         vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv;
  992         vdso_th32->th_x86_hpet_idx = 0xffffffff;
  993         vdso_th32->th_x86_pvc_last_systime = 0;
  994         vdso_th32->th_x86_pvc_stable_mask = 0;
  995         bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
  996         return (1);
  997 }
  998 #endif

Cache object: a9997fecb94cd269596b722e3656a362


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.