The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_tc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * ----------------------------------------------------------------------------
    3  * "THE BEER-WARE LICENSE" (Revision 42):
    4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
    5  * can do whatever you want with this stuff. If we meet some day, and you think
    6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
    7  * ----------------------------------------------------------------------------
    8  */
    9 
   10 #include <sys/cdefs.h>
   11 __FBSDID("$FreeBSD$");
   12 
   13 #include "opt_compat.h"
   14 #include "opt_ntp.h"
   15 
   16 #include <sys/param.h>
   17 #include <sys/kernel.h>
   18 #include <sys/sysctl.h>
   19 #include <sys/syslog.h>
   20 #include <sys/systm.h>
   21 #include <sys/timepps.h>
   22 #include <sys/timetc.h>
   23 #include <sys/timex.h>
   24 #include <sys/vdso.h>
   25 
   26 /*
   27  * A large step happens on boot.  This constant detects such steps.
   28  * It is relatively small so that ntp_update_second gets called enough
   29  * in the typical 'missed a couple of seconds' case, but doesn't loop
   30  * forever when the time step is large.
   31  */
   32 #define LARGE_STEP      200
   33 
   34 /*
   35  * Implement a dummy timecounter which we can use until we get a real one
   36  * in the air.  This allows the console and other early stuff to use
   37  * time services.
   38  */
   39 
   40 static u_int
   41 dummy_get_timecount(struct timecounter *tc)
   42 {
   43         static u_int now;
   44 
   45         return (++now);
   46 }
   47 
   48 static struct timecounter dummy_timecounter = {
   49         dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
   50 };
   51 
   52 struct timehands {
   53         /* These fields must be initialized by the driver. */
   54         struct timecounter      *th_counter;
   55         int64_t                 th_adjustment;
   56         uint64_t                th_scale;
   57         u_int                   th_offset_count;
   58         struct bintime          th_offset;
   59         struct timeval          th_microtime;
   60         struct timespec         th_nanotime;
   61         /* Fields not to be copied in tc_windup start with th_generation. */
   62         volatile u_int          th_generation;
   63         struct timehands        *th_next;
   64 };
   65 
   66 static struct timehands th0;
   67 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0};
   68 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9};
   69 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8};
   70 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7};
   71 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6};
   72 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5};
   73 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4};
   74 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3};
   75 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2};
   76 static struct timehands th0 = {
   77         &dummy_timecounter,
   78         0,
   79         (uint64_t)-1 / 1000000,
   80         0,
   81         {1, 0},
   82         {0, 0},
   83         {0, 0},
   84         1,
   85         &th1
   86 };
   87 
   88 static struct timehands *volatile timehands = &th0;
   89 struct timecounter *timecounter = &dummy_timecounter;
   90 static struct timecounter *timecounters = &dummy_timecounter;
   91 
   92 int tc_min_ticktock_freq = 1;
   93 
   94 volatile time_t time_second = 1;
   95 volatile time_t time_uptime = 1;
   96 
   97 struct bintime boottimebin;
   98 struct timeval boottime;
   99 static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS);
  100 SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD,
  101     NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime");
  102 
  103 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
  104 static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, CTLFLAG_RW, 0, "");
  105 
  106 static int timestepwarnings;
  107 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
  108     &timestepwarnings, 0, "Log time steps");
  109 
  110 static void tc_windup(void);
  111 static void cpu_tick_calibrate(int);
  112 
  113 void dtrace_getnanotime(struct timespec *tsp);
  114 
  115 static int
  116 sysctl_kern_boottime(SYSCTL_HANDLER_ARGS)
  117 {
  118 #ifdef SCTL_MASK32
  119         int tv[2];
  120 
  121         if (req->flags & SCTL_MASK32) {
  122                 tv[0] = boottime.tv_sec;
  123                 tv[1] = boottime.tv_usec;
  124                 return SYSCTL_OUT(req, tv, sizeof(tv));
  125         } else
  126 #endif
  127                 return SYSCTL_OUT(req, &boottime, sizeof(boottime));
  128 }
  129 
  130 static int
  131 sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS)
  132 {
  133         u_int ncount;
  134         struct timecounter *tc = arg1;
  135 
  136         ncount = tc->tc_get_timecount(tc);
  137         return sysctl_handle_int(oidp, &ncount, 0, req);
  138 }
  139 
  140 static int
  141 sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS)
  142 {
  143         uint64_t freq;
  144         struct timecounter *tc = arg1;
  145 
  146         freq = tc->tc_frequency;
  147         return sysctl_handle_64(oidp, &freq, 0, req);
  148 }
  149 
  150 /*
  151  * Return the difference between the timehands' counter value now and what
  152  * was when we copied it to the timehands' offset_count.
  153  */
  154 static __inline u_int
  155 tc_delta(struct timehands *th)
  156 {
  157         struct timecounter *tc;
  158 
  159         tc = th->th_counter;
  160         return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
  161             tc->tc_counter_mask);
  162 }
  163 
  164 /*
  165  * Functions for reading the time.  We have to loop until we are sure that
  166  * the timehands that we operated on was not updated under our feet.  See
  167  * the comment in <sys/time.h> for a description of these 12 functions.
  168  */
  169 
  170 void
  171 binuptime(struct bintime *bt)
  172 {
  173         struct timehands *th;
  174         u_int gen;
  175 
  176         do {
  177                 th = timehands;
  178                 gen = th->th_generation;
  179                 *bt = th->th_offset;
  180                 bintime_addx(bt, th->th_scale * tc_delta(th));
  181         } while (gen == 0 || gen != th->th_generation);
  182 }
  183 
  184 void
  185 nanouptime(struct timespec *tsp)
  186 {
  187         struct bintime bt;
  188 
  189         binuptime(&bt);
  190         bintime2timespec(&bt, tsp);
  191 }
  192 
  193 void
  194 microuptime(struct timeval *tvp)
  195 {
  196         struct bintime bt;
  197 
  198         binuptime(&bt);
  199         bintime2timeval(&bt, tvp);
  200 }
  201 
  202 void
  203 bintime(struct bintime *bt)
  204 {
  205 
  206         binuptime(bt);
  207         bintime_add(bt, &boottimebin);
  208 }
  209 
  210 void
  211 nanotime(struct timespec *tsp)
  212 {
  213         struct bintime bt;
  214 
  215         bintime(&bt);
  216         bintime2timespec(&bt, tsp);
  217 }
  218 
  219 void
  220 microtime(struct timeval *tvp)
  221 {
  222         struct bintime bt;
  223 
  224         bintime(&bt);
  225         bintime2timeval(&bt, tvp);
  226 }
  227 
  228 void
  229 getbinuptime(struct bintime *bt)
  230 {
  231         struct timehands *th;
  232         u_int gen;
  233 
  234         do {
  235                 th = timehands;
  236                 gen = th->th_generation;
  237                 *bt = th->th_offset;
  238         } while (gen == 0 || gen != th->th_generation);
  239 }
  240 
  241 void
  242 getnanouptime(struct timespec *tsp)
  243 {
  244         struct timehands *th;
  245         u_int gen;
  246 
  247         do {
  248                 th = timehands;
  249                 gen = th->th_generation;
  250                 bintime2timespec(&th->th_offset, tsp);
  251         } while (gen == 0 || gen != th->th_generation);
  252 }
  253 
  254 void
  255 getmicrouptime(struct timeval *tvp)
  256 {
  257         struct timehands *th;
  258         u_int gen;
  259 
  260         do {
  261                 th = timehands;
  262                 gen = th->th_generation;
  263                 bintime2timeval(&th->th_offset, tvp);
  264         } while (gen == 0 || gen != th->th_generation);
  265 }
  266 
  267 void
  268 getbintime(struct bintime *bt)
  269 {
  270         struct timehands *th;
  271         u_int gen;
  272 
  273         do {
  274                 th = timehands;
  275                 gen = th->th_generation;
  276                 *bt = th->th_offset;
  277         } while (gen == 0 || gen != th->th_generation);
  278         bintime_add(bt, &boottimebin);
  279 }
  280 
  281 void
  282 getnanotime(struct timespec *tsp)
  283 {
  284         struct timehands *th;
  285         u_int gen;
  286 
  287         do {
  288                 th = timehands;
  289                 gen = th->th_generation;
  290                 *tsp = th->th_nanotime;
  291         } while (gen == 0 || gen != th->th_generation);
  292 }
  293 
  294 void
  295 getmicrotime(struct timeval *tvp)
  296 {
  297         struct timehands *th;
  298         u_int gen;
  299 
  300         do {
  301                 th = timehands;
  302                 gen = th->th_generation;
  303                 *tvp = th->th_microtime;
  304         } while (gen == 0 || gen != th->th_generation);
  305 }
  306 
  307 /*
  308  * This is a clone of getnanotime and used for walltimestamps.
  309  * The dtrace_ prefix prevents fbt from creating probes for
  310  * it so walltimestamp can be safely used in all fbt probes.
  311  */
  312 void
  313 dtrace_getnanotime(struct timespec *tsp)
  314 {
  315         struct timehands *th;
  316         u_int gen;
  317 
  318         do {
  319                 th = timehands;
  320                 gen = th->th_generation;
  321                 *tsp = th->th_nanotime;
  322         } while (gen == 0 || gen != th->th_generation);
  323 }
  324 
  325 /*
  326  * Initialize a new timecounter and possibly use it.
  327  */
  328 void
  329 tc_init(struct timecounter *tc)
  330 {
  331         u_int u;
  332         struct sysctl_oid *tc_root;
  333 
  334         u = tc->tc_frequency / tc->tc_counter_mask;
  335         /* XXX: We need some margin here, 10% is a guess */
  336         u *= 11;
  337         u /= 10;
  338         if (u > hz && tc->tc_quality >= 0) {
  339                 tc->tc_quality = -2000;
  340                 if (bootverbose) {
  341                         printf("Timecounter \"%s\" frequency %ju Hz",
  342                             tc->tc_name, (uintmax_t)tc->tc_frequency);
  343                         printf(" -- Insufficient hz, needs at least %u\n", u);
  344                 }
  345         } else if (tc->tc_quality >= 0 || bootverbose) {
  346                 printf("Timecounter \"%s\" frequency %ju Hz quality %d\n",
  347                     tc->tc_name, (uintmax_t)tc->tc_frequency,
  348                     tc->tc_quality);
  349         }
  350 
  351         tc->tc_next = timecounters;
  352         timecounters = tc;
  353         /*
  354          * Set up sysctl tree for this counter.
  355          */
  356         tc_root = SYSCTL_ADD_NODE(NULL,
  357             SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name,
  358             CTLFLAG_RW, 0, "timecounter description");
  359         SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
  360             "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0,
  361             "mask for implemented bits");
  362         SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
  363             "counter", CTLTYPE_UINT | CTLFLAG_RD, tc, sizeof(*tc),
  364             sysctl_kern_timecounter_get, "IU", "current timecounter value");
  365         SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
  366             "frequency", CTLTYPE_U64 | CTLFLAG_RD, tc, sizeof(*tc),
  367              sysctl_kern_timecounter_freq, "QU", "timecounter frequency");
  368         SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
  369             "quality", CTLFLAG_RD, &(tc->tc_quality), 0,
  370             "goodness of time counter");
  371         /*
  372          * Never automatically use a timecounter with negative quality.
  373          * Even though we run on the dummy counter, switching here may be
  374          * worse since this timecounter may not be monotonous.
  375          */
  376         if (tc->tc_quality < 0)
  377                 return;
  378         if (tc->tc_quality < timecounter->tc_quality)
  379                 return;
  380         if (tc->tc_quality == timecounter->tc_quality &&
  381             tc->tc_frequency < timecounter->tc_frequency)
  382                 return;
  383         (void)tc->tc_get_timecount(tc);
  384         (void)tc->tc_get_timecount(tc);
  385         timecounter = tc;
  386 }
  387 
  388 /* Report the frequency of the current timecounter. */
  389 uint64_t
  390 tc_getfrequency(void)
  391 {
  392 
  393         return (timehands->th_counter->tc_frequency);
  394 }
  395 
  396 /*
  397  * Step our concept of UTC.  This is done by modifying our estimate of
  398  * when we booted.
  399  * XXX: not locked.
  400  */
  401 void
  402 tc_setclock(struct timespec *ts)
  403 {
  404         struct timespec tbef, taft;
  405         struct bintime bt, bt2;
  406 
  407         cpu_tick_calibrate(1);
  408         nanotime(&tbef);
  409         timespec2bintime(ts, &bt);
  410         binuptime(&bt2);
  411         bintime_sub(&bt, &bt2);
  412         bintime_add(&bt2, &boottimebin);
  413         boottimebin = bt;
  414         bintime2timeval(&bt, &boottime);
  415 
  416         /* XXX fiddle all the little crinkly bits around the fiords... */
  417         tc_windup();
  418         nanotime(&taft);
  419         if (timestepwarnings) {
  420                 log(LOG_INFO,
  421                     "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n",
  422                     (intmax_t)tbef.tv_sec, tbef.tv_nsec,
  423                     (intmax_t)taft.tv_sec, taft.tv_nsec,
  424                     (intmax_t)ts->tv_sec, ts->tv_nsec);
  425         }
  426         cpu_tick_calibrate(1);
  427 }
  428 
  429 /*
  430  * Initialize the next struct timehands in the ring and make
  431  * it the active timehands.  Along the way we might switch to a different
  432  * timecounter and/or do seconds processing in NTP.  Slightly magic.
  433  */
  434 static void
  435 tc_windup(void)
  436 {
  437         struct bintime bt;
  438         struct timehands *th, *tho;
  439         uint64_t scale;
  440         u_int delta, ncount, ogen;
  441         int i;
  442         time_t t;
  443 
  444         /*
  445          * Make the next timehands a copy of the current one, but do not
  446          * overwrite the generation or next pointer.  While we update
  447          * the contents, the generation must be zero.
  448          */
  449         tho = timehands;
  450         th = tho->th_next;
  451         ogen = th->th_generation;
  452         th->th_generation = 0;
  453         bcopy(tho, th, offsetof(struct timehands, th_generation));
  454 
  455         /*
  456          * Capture a timecounter delta on the current timecounter and if
  457          * changing timecounters, a counter value from the new timecounter.
  458          * Update the offset fields accordingly.
  459          */
  460         delta = tc_delta(th);
  461         if (th->th_counter != timecounter)
  462                 ncount = timecounter->tc_get_timecount(timecounter);
  463         else
  464                 ncount = 0;
  465         th->th_offset_count += delta;
  466         th->th_offset_count &= th->th_counter->tc_counter_mask;
  467         while (delta > th->th_counter->tc_frequency) {
  468                 /* Eat complete unadjusted seconds. */
  469                 delta -= th->th_counter->tc_frequency;
  470                 th->th_offset.sec++;
  471         }
  472         if ((delta > th->th_counter->tc_frequency / 2) &&
  473             (th->th_scale * delta < ((uint64_t)1 << 63))) {
  474                 /* The product th_scale * delta just barely overflows. */
  475                 th->th_offset.sec++;
  476         }
  477         bintime_addx(&th->th_offset, th->th_scale * delta);
  478 
  479         /*
  480          * Hardware latching timecounters may not generate interrupts on
  481          * PPS events, so instead we poll them.  There is a finite risk that
  482          * the hardware might capture a count which is later than the one we
  483          * got above, and therefore possibly in the next NTP second which might
  484          * have a different rate than the current NTP second.  It doesn't
  485          * matter in practice.
  486          */
  487         if (tho->th_counter->tc_poll_pps)
  488                 tho->th_counter->tc_poll_pps(tho->th_counter);
  489 
  490         /*
  491          * Deal with NTP second processing.  The for loop normally
  492          * iterates at most once, but in extreme situations it might
  493          * keep NTP sane if timeouts are not run for several seconds.
  494          * At boot, the time step can be large when the TOD hardware
  495          * has been read, so on really large steps, we call
  496          * ntp_update_second only twice.  We need to call it twice in
  497          * case we missed a leap second.
  498          */
  499         bt = th->th_offset;
  500         bintime_add(&bt, &boottimebin);
  501         i = bt.sec - tho->th_microtime.tv_sec;
  502         if (i > LARGE_STEP)
  503                 i = 2;
  504         for (; i > 0; i--) {
  505                 t = bt.sec;
  506                 ntp_update_second(&th->th_adjustment, &bt.sec);
  507                 if (bt.sec != t)
  508                         boottimebin.sec += bt.sec - t;
  509         }
  510         /* Update the UTC timestamps used by the get*() functions. */
  511         /* XXX shouldn't do this here.  Should force non-`get' versions. */
  512         bintime2timeval(&bt, &th->th_microtime);
  513         bintime2timespec(&bt, &th->th_nanotime);
  514 
  515         /* Now is a good time to change timecounters. */
  516         if (th->th_counter != timecounter) {
  517 #ifndef __arm__
  518                 if ((timecounter->tc_flags & TC_FLAGS_C2STOP) != 0)
  519                         cpu_disable_c2_sleep++;
  520                 if ((th->th_counter->tc_flags & TC_FLAGS_C2STOP) != 0)
  521                         cpu_disable_c2_sleep--;
  522 #endif
  523                 th->th_counter = timecounter;
  524                 th->th_offset_count = ncount;
  525                 tc_min_ticktock_freq = max(1, timecounter->tc_frequency /
  526                     (((uint64_t)timecounter->tc_counter_mask + 1) / 3));
  527         }
  528 
  529         /*-
  530          * Recalculate the scaling factor.  We want the number of 1/2^64
  531          * fractions of a second per period of the hardware counter, taking
  532          * into account the th_adjustment factor which the NTP PLL/adjtime(2)
  533          * processing provides us with.
  534          *
  535          * The th_adjustment is nanoseconds per second with 32 bit binary
  536          * fraction and we want 64 bit binary fraction of second:
  537          *
  538          *       x = a * 2^32 / 10^9 = a * 4.294967296
  539          *
  540          * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
  541          * we can only multiply by about 850 without overflowing, that
  542          * leaves no suitably precise fractions for multiply before divide.
  543          *
  544          * Divide before multiply with a fraction of 2199/512 results in a
  545          * systematic undercompensation of 10PPM of th_adjustment.  On a
  546          * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
  547          *
  548          * We happily sacrifice the lowest of the 64 bits of our result
  549          * to the goddess of code clarity.
  550          *
  551          */
  552         scale = (uint64_t)1 << 63;
  553         scale += (th->th_adjustment / 1024) * 2199;
  554         scale /= th->th_counter->tc_frequency;
  555         th->th_scale = scale * 2;
  556 
  557         /*
  558          * Now that the struct timehands is again consistent, set the new
  559          * generation number, making sure to not make it zero.
  560          */
  561         if (++ogen == 0)
  562                 ogen = 1;
  563         th->th_generation = ogen;
  564 
  565         /* Go live with the new struct timehands. */
  566         time_second = th->th_microtime.tv_sec;
  567         time_uptime = th->th_offset.sec;
  568         timehands = th;
  569         timekeep_push_vdso();
  570 }
  571 
  572 /* Report or change the active timecounter hardware. */
  573 static int
  574 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS)
  575 {
  576         char newname[32];
  577         struct timecounter *newtc, *tc;
  578         int error;
  579 
  580         tc = timecounter;
  581         strlcpy(newname, tc->tc_name, sizeof(newname));
  582 
  583         error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req);
  584         if (error != 0 || req->newptr == NULL ||
  585             strcmp(newname, tc->tc_name) == 0)
  586                 return (error);
  587         for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
  588                 if (strcmp(newname, newtc->tc_name) != 0)
  589                         continue;
  590 
  591                 /* Warm up new timecounter. */
  592                 (void)newtc->tc_get_timecount(newtc);
  593                 (void)newtc->tc_get_timecount(newtc);
  594 
  595                 timecounter = newtc;
  596                 timekeep_push_vdso();
  597                 return (0);
  598         }
  599         return (EINVAL);
  600 }
  601 
  602 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW,
  603     0, 0, sysctl_kern_timecounter_hardware, "A",
  604     "Timecounter hardware selected");
  605 
  606 
  607 /* Report or change the active timecounter hardware. */
  608 static int
  609 sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS)
  610 {
  611         char buf[32], *spc;
  612         struct timecounter *tc;
  613         int error;
  614 
  615         spc = "";
  616         error = 0;
  617         for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
  618                 sprintf(buf, "%s%s(%d)",
  619                     spc, tc->tc_name, tc->tc_quality);
  620                 error = SYSCTL_OUT(req, buf, strlen(buf));
  621                 spc = " ";
  622         }
  623         return (error);
  624 }
  625 
  626 SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, CTLTYPE_STRING | CTLFLAG_RD,
  627     0, 0, sysctl_kern_timecounter_choice, "A", "Timecounter hardware detected");
  628 
  629 /*
  630  * RFC 2783 PPS-API implementation.
  631  */
  632 
  633 int
  634 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
  635 {
  636         pps_params_t *app;
  637         struct pps_fetch_args *fapi;
  638 #ifdef PPS_SYNC
  639         struct pps_kcbind_args *kapi;
  640 #endif
  641 
  642         KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl"));
  643         switch (cmd) {
  644         case PPS_IOC_CREATE:
  645                 return (0);
  646         case PPS_IOC_DESTROY:
  647                 return (0);
  648         case PPS_IOC_SETPARAMS:
  649                 app = (pps_params_t *)data;
  650                 if (app->mode & ~pps->ppscap)
  651                         return (EINVAL);
  652                 pps->ppsparam = *app;
  653                 return (0);
  654         case PPS_IOC_GETPARAMS:
  655                 app = (pps_params_t *)data;
  656                 *app = pps->ppsparam;
  657                 app->api_version = PPS_API_VERS_1;
  658                 return (0);
  659         case PPS_IOC_GETCAP:
  660                 *(int*)data = pps->ppscap;
  661                 return (0);
  662         case PPS_IOC_FETCH:
  663                 fapi = (struct pps_fetch_args *)data;
  664                 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
  665                         return (EINVAL);
  666                 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec)
  667                         return (EOPNOTSUPP);
  668                 pps->ppsinfo.current_mode = pps->ppsparam.mode;
  669                 fapi->pps_info_buf = pps->ppsinfo;
  670                 return (0);
  671         case PPS_IOC_KCBIND:
  672 #ifdef PPS_SYNC
  673                 kapi = (struct pps_kcbind_args *)data;
  674                 /* XXX Only root should be able to do this */
  675                 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
  676                         return (EINVAL);
  677                 if (kapi->kernel_consumer != PPS_KC_HARDPPS)
  678                         return (EINVAL);
  679                 if (kapi->edge & ~pps->ppscap)
  680                         return (EINVAL);
  681                 pps->kcmode = kapi->edge;
  682                 return (0);
  683 #else
  684                 return (EOPNOTSUPP);
  685 #endif
  686         default:
  687                 return (ENOIOCTL);
  688         }
  689 }
  690 
  691 void
  692 pps_init(struct pps_state *pps)
  693 {
  694         pps->ppscap |= PPS_TSFMT_TSPEC;
  695         if (pps->ppscap & PPS_CAPTUREASSERT)
  696                 pps->ppscap |= PPS_OFFSETASSERT;
  697         if (pps->ppscap & PPS_CAPTURECLEAR)
  698                 pps->ppscap |= PPS_OFFSETCLEAR;
  699 }
  700 
  701 void
  702 pps_capture(struct pps_state *pps)
  703 {
  704         struct timehands *th;
  705 
  706         KASSERT(pps != NULL, ("NULL pps pointer in pps_capture"));
  707         th = timehands;
  708         pps->capgen = th->th_generation;
  709         pps->capth = th;
  710         pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
  711         if (pps->capgen != th->th_generation)
  712                 pps->capgen = 0;
  713 }
  714 
  715 void
  716 pps_event(struct pps_state *pps, int event)
  717 {
  718         struct bintime bt;
  719         struct timespec ts, *tsp, *osp;
  720         u_int tcount, *pcount;
  721         int foff, fhard;
  722         pps_seq_t *pseq;
  723 
  724         KASSERT(pps != NULL, ("NULL pps pointer in pps_event"));
  725         /* If the timecounter was wound up underneath us, bail out. */
  726         if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
  727                 return;
  728 
  729         /* Things would be easier with arrays. */
  730         if (event == PPS_CAPTUREASSERT) {
  731                 tsp = &pps->ppsinfo.assert_timestamp;
  732                 osp = &pps->ppsparam.assert_offset;
  733                 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
  734                 fhard = pps->kcmode & PPS_CAPTUREASSERT;
  735                 pcount = &pps->ppscount[0];
  736                 pseq = &pps->ppsinfo.assert_sequence;
  737         } else {
  738                 tsp = &pps->ppsinfo.clear_timestamp;
  739                 osp = &pps->ppsparam.clear_offset;
  740                 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
  741                 fhard = pps->kcmode & PPS_CAPTURECLEAR;
  742                 pcount = &pps->ppscount[1];
  743                 pseq = &pps->ppsinfo.clear_sequence;
  744         }
  745 
  746         /*
  747          * If the timecounter changed, we cannot compare the count values, so
  748          * we have to drop the rest of the PPS-stuff until the next event.
  749          */
  750         if (pps->ppstc != pps->capth->th_counter) {
  751                 pps->ppstc = pps->capth->th_counter;
  752                 *pcount = pps->capcount;
  753                 pps->ppscount[2] = pps->capcount;
  754                 return;
  755         }
  756 
  757         /* Convert the count to a timespec. */
  758         tcount = pps->capcount - pps->capth->th_offset_count;
  759         tcount &= pps->capth->th_counter->tc_counter_mask;
  760         bt = pps->capth->th_offset;
  761         bintime_addx(&bt, pps->capth->th_scale * tcount);
  762         bintime_add(&bt, &boottimebin);
  763         bintime2timespec(&bt, &ts);
  764 
  765         /* If the timecounter was wound up underneath us, bail out. */
  766         if (pps->capgen != pps->capth->th_generation)
  767                 return;
  768 
  769         *pcount = pps->capcount;
  770         (*pseq)++;
  771         *tsp = ts;
  772 
  773         if (foff) {
  774                 timespecadd(tsp, osp);
  775                 if (tsp->tv_nsec < 0) {
  776                         tsp->tv_nsec += 1000000000;
  777                         tsp->tv_sec -= 1;
  778                 }
  779         }
  780 #ifdef PPS_SYNC
  781         if (fhard) {
  782                 uint64_t scale;
  783 
  784                 /*
  785                  * Feed the NTP PLL/FLL.
  786                  * The FLL wants to know how many (hardware) nanoseconds
  787                  * elapsed since the previous event.
  788                  */
  789                 tcount = pps->capcount - pps->ppscount[2];
  790                 pps->ppscount[2] = pps->capcount;
  791                 tcount &= pps->capth->th_counter->tc_counter_mask;
  792                 scale = (uint64_t)1 << 63;
  793                 scale /= pps->capth->th_counter->tc_frequency;
  794                 scale *= 2;
  795                 bt.sec = 0;
  796                 bt.frac = 0;
  797                 bintime_addx(&bt, scale * tcount);
  798                 bintime2timespec(&bt, &ts);
  799                 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
  800         }
  801 #endif
  802 }
  803 
  804 /*
  805  * Timecounters need to be updated every so often to prevent the hardware
  806  * counter from overflowing.  Updating also recalculates the cached values
  807  * used by the get*() family of functions, so their precision depends on
  808  * the update frequency.
  809  */
  810 
  811 static int tc_tick;
  812 SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0,
  813     "Approximate number of hardclock ticks in a millisecond");
  814 
  815 void
  816 tc_ticktock(int cnt)
  817 {
  818         static int count;
  819 
  820         count += cnt;
  821         if (count < tc_tick)
  822                 return;
  823         count = 0;
  824         tc_windup();
  825 }
  826 
  827 static void
  828 inittimecounter(void *dummy)
  829 {
  830         u_int p;
  831 
  832         /*
  833          * Set the initial timeout to
  834          * max(1, <approx. number of hardclock ticks in a millisecond>).
  835          * People should probably not use the sysctl to set the timeout
  836          * to smaller than its inital value, since that value is the
  837          * smallest reasonable one.  If they want better timestamps they
  838          * should use the non-"get"* functions.
  839          */
  840         if (hz > 1000)
  841                 tc_tick = (hz + 500) / 1000;
  842         else
  843                 tc_tick = 1;
  844         p = (tc_tick * 1000000) / hz;
  845         printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
  846 
  847         /* warm up new timecounter (again) and get rolling. */
  848         (void)timecounter->tc_get_timecount(timecounter);
  849         (void)timecounter->tc_get_timecount(timecounter);
  850         tc_windup();
  851 }
  852 
  853 SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL);
  854 
  855 /* Cpu tick handling -------------------------------------------------*/
  856 
  857 static int cpu_tick_variable;
  858 static uint64_t cpu_tick_frequency;
  859 
  860 static uint64_t
  861 tc_cpu_ticks(void)
  862 {
  863         static uint64_t base;
  864         static unsigned last;
  865         unsigned u;
  866         struct timecounter *tc;
  867 
  868         tc = timehands->th_counter;
  869         u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
  870         if (u < last)
  871                 base += (uint64_t)tc->tc_counter_mask + 1;
  872         last = u;
  873         return (u + base);
  874 }
  875 
  876 void
  877 cpu_tick_calibration(void)
  878 {
  879         static time_t last_calib;
  880 
  881         if (time_uptime != last_calib && !(time_uptime & 0xf)) {
  882                 cpu_tick_calibrate(0);
  883                 last_calib = time_uptime;
  884         }
  885 }
  886 
  887 /*
  888  * This function gets called every 16 seconds on only one designated
  889  * CPU in the system from hardclock() via cpu_tick_calibration()().
  890  *
  891  * Whenever the real time clock is stepped we get called with reset=1
  892  * to make sure we handle suspend/resume and similar events correctly.
  893  */
  894 
  895 static void
  896 cpu_tick_calibrate(int reset)
  897 {
  898         static uint64_t c_last;
  899         uint64_t c_this, c_delta;
  900         static struct bintime  t_last;
  901         struct bintime t_this, t_delta;
  902         uint32_t divi;
  903 
  904         if (reset) {
  905                 /* The clock was stepped, abort & reset */
  906                 t_last.sec = 0;
  907                 return;
  908         }
  909 
  910         /* we don't calibrate fixed rate cputicks */
  911         if (!cpu_tick_variable)
  912                 return;
  913 
  914         getbinuptime(&t_this);
  915         c_this = cpu_ticks();
  916         if (t_last.sec != 0) {
  917                 c_delta = c_this - c_last;
  918                 t_delta = t_this;
  919                 bintime_sub(&t_delta, &t_last);
  920                 /*
  921                  * Headroom:
  922                  *      2^(64-20) / 16[s] =
  923                  *      2^(44) / 16[s] =
  924                  *      17.592.186.044.416 / 16 =
  925                  *      1.099.511.627.776 [Hz]
  926                  */
  927                 divi = t_delta.sec << 20;
  928                 divi |= t_delta.frac >> (64 - 20);
  929                 c_delta <<= 20;
  930                 c_delta /= divi;
  931                 if (c_delta > cpu_tick_frequency) {
  932                         if (0 && bootverbose)
  933                                 printf("cpu_tick increased to %ju Hz\n",
  934                                     c_delta);
  935                         cpu_tick_frequency = c_delta;
  936                 }
  937         }
  938         c_last = c_this;
  939         t_last = t_this;
  940 }
  941 
  942 void
  943 set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var)
  944 {
  945 
  946         if (func == NULL) {
  947                 cpu_ticks = tc_cpu_ticks;
  948         } else {
  949                 cpu_tick_frequency = freq;
  950                 cpu_tick_variable = var;
  951                 cpu_ticks = func;
  952         }
  953 }
  954 
  955 uint64_t
  956 cpu_tickrate(void)
  957 {
  958 
  959         if (cpu_ticks == tc_cpu_ticks) 
  960                 return (tc_getfrequency());
  961         return (cpu_tick_frequency);
  962 }
  963 
  964 /*
  965  * We need to be slightly careful converting cputicks to microseconds.
  966  * There is plenty of margin in 64 bits of microseconds (half a million
  967  * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply
  968  * before divide conversion (to retain precision) we find that the
  969  * margin shrinks to 1.5 hours (one millionth of 146y).
  970  * With a three prong approach we never lose significant bits, no
  971  * matter what the cputick rate and length of timeinterval is.
  972  */
  973 
  974 uint64_t
  975 cputick2usec(uint64_t tick)
  976 {
  977 
  978         if (tick > 18446744073709551LL)         /* floor(2^64 / 1000) */
  979                 return (tick / (cpu_tickrate() / 1000000LL));
  980         else if (tick > 18446744073709LL)       /* floor(2^64 / 1000000) */
  981                 return ((tick * 1000LL) / (cpu_tickrate() / 1000LL));
  982         else
  983                 return ((tick * 1000000LL) / cpu_tickrate());
  984 }
  985 
  986 cpu_tick_f      *cpu_ticks = tc_cpu_ticks;
  987 
  988 static int vdso_th_enable = 1;
  989 static int
  990 sysctl_fast_gettime(SYSCTL_HANDLER_ARGS)
  991 {
  992         int old_vdso_th_enable, error;
  993 
  994         old_vdso_th_enable = vdso_th_enable;
  995         error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req);
  996         if (error != 0)
  997                 return (error);
  998         vdso_th_enable = old_vdso_th_enable;
  999         timekeep_push_vdso();
 1000         return (0);
 1001 }
 1002 SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime,
 1003     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 1004     NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day");
 1005 
 1006 uint32_t
 1007 tc_fill_vdso_timehands(struct vdso_timehands *vdso_th)
 1008 {
 1009         struct timehands *th;
 1010         uint32_t enabled;
 1011 
 1012         th = timehands;
 1013         vdso_th->th_algo = VDSO_TH_ALGO_1;
 1014         vdso_th->th_scale = th->th_scale;
 1015         vdso_th->th_offset_count = th->th_offset_count;
 1016         vdso_th->th_counter_mask = th->th_counter->tc_counter_mask;
 1017         vdso_th->th_offset = th->th_offset;
 1018         vdso_th->th_boottime = boottimebin;
 1019         enabled = cpu_fill_vdso_timehands(vdso_th);
 1020         if (!vdso_th_enable)
 1021                 enabled = 0;
 1022         return (enabled);
 1023 }
 1024 
 1025 #ifdef COMPAT_FREEBSD32
 1026 uint32_t
 1027 tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32)
 1028 {
 1029         struct timehands *th;
 1030         uint32_t enabled;
 1031 
 1032         th = timehands;
 1033         vdso_th32->th_algo = VDSO_TH_ALGO_1;
 1034         *(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale;
 1035         vdso_th32->th_offset_count = th->th_offset_count;
 1036         vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask;
 1037         vdso_th32->th_offset.sec = th->th_offset.sec;
 1038         *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac;
 1039         vdso_th32->th_boottime.sec = boottimebin.sec;
 1040         *(uint64_t *)&vdso_th32->th_boottime.frac[0] = boottimebin.frac;
 1041         enabled = cpu_fill_vdso_timehands32(vdso_th32);
 1042         if (!vdso_th_enable)
 1043                 enabled = 0;
 1044         return (enabled);
 1045 }
 1046 #endif

Cache object: cd23f17e3702a43c87aaab756b7b7098


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.