FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_tc.c
1 /*-
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
8 *
9 * $FreeBSD: releng/5.0/sys/kern/kern_tc.c 108837 2003-01-06 22:47:16Z peter $
10 */
11
12 #include "opt_ntp.h"
13
14 #include <sys/param.h>
15 #include <sys/kernel.h>
16 #include <sys/sysctl.h>
17 #include <sys/systm.h>
18 #include <sys/timepps.h>
19 #include <sys/timetc.h>
20 #include <sys/timex.h>
21
22 /*
23 * Implement a dummy timecounter which we can use until we get a real one
24 * in the air. This allows the console and other early stuff to use
25 * time services.
26 */
27
28 static u_int
29 dummy_get_timecount(struct timecounter *tc)
30 {
31 static u_int now;
32
33 return (++now);
34 }
35
36 static struct timecounter dummy_timecounter = {
37 dummy_get_timecount, 0, ~0u, 1000000, "dummy",
38 };
39
40 struct timehands {
41 /* These fields must be initialized by the driver. */
42 struct timecounter *th_counter;
43 int64_t th_adjustment;
44 u_int64_t th_scale;
45 u_int th_offset_count;
46 struct bintime th_offset;
47 struct timeval th_microtime;
48 struct timespec th_nanotime;
49 /* Fields not to be copied in tc_windup start with th_generation. */
50 volatile u_int th_generation;
51 struct timehands *th_next;
52 };
53
54 extern struct timehands th0;
55 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0};
56 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9};
57 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8};
58 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7};
59 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6};
60 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5};
61 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4};
62 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3};
63 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2};
64 static struct timehands th0 = {
65 &dummy_timecounter,
66 0,
67 (uint64_t)-1 / 1000000,
68 0,
69 {1, 0},
70 {0, 0},
71 {0, 0},
72 1,
73 &th1
74 };
75
76 static struct timehands *volatile timehands = &th0;
77 struct timecounter *timecounter = &dummy_timecounter;
78 static struct timecounter *timecounters = &dummy_timecounter;
79
80 time_t time_second = 1;
81 time_t time_uptime = 0;
82
83 static struct bintime boottimebin;
84 struct timeval boottime;
85 SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD,
86 &boottime, timeval, "System boottime");
87
88 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
89
90 #define TC_STATS(foo) \
91 static u_int foo; \
92 SYSCTL_UINT(_kern_timecounter, OID_AUTO, foo, CTLFLAG_RD, &foo, 0, "");\
93 struct __hack
94
95 TC_STATS(nbinuptime); TC_STATS(nnanouptime); TC_STATS(nmicrouptime);
96 TC_STATS(nbintime); TC_STATS(nnanotime); TC_STATS(nmicrotime);
97 TC_STATS(ngetbinuptime); TC_STATS(ngetnanouptime); TC_STATS(ngetmicrouptime);
98 TC_STATS(ngetbintime); TC_STATS(ngetnanotime); TC_STATS(ngetmicrotime);
99
100 #undef TC_STATS
101
102 static void tc_windup(void);
103
104 /*
105 * Return the difference between the timehands' counter value now and what
106 * was when we copied it to the timehands' offset_count.
107 */
108 static __inline u_int
109 tc_delta(struct timehands *th)
110 {
111 struct timecounter *tc;
112
113 tc = th->th_counter;
114 return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
115 tc->tc_counter_mask);
116 }
117
118 /*
119 * Functions for reading the time. We have to loop until we are sure that
120 * the timehands that we operated on was not updated under our feet. See
121 * the comment in <sys/time.h> for a description of these 12 functions.
122 */
123
124 void
125 binuptime(struct bintime *bt)
126 {
127 struct timehands *th;
128 u_int gen;
129
130 nbinuptime++;
131 do {
132 th = timehands;
133 gen = th->th_generation;
134 *bt = th->th_offset;
135 bintime_addx(bt, th->th_scale * tc_delta(th));
136 } while (gen == 0 || gen != th->th_generation);
137 }
138
139 void
140 nanouptime(struct timespec *tsp)
141 {
142 struct bintime bt;
143
144 nnanouptime++;
145 binuptime(&bt);
146 bintime2timespec(&bt, tsp);
147 }
148
149 void
150 microuptime(struct timeval *tvp)
151 {
152 struct bintime bt;
153
154 nmicrouptime++;
155 binuptime(&bt);
156 bintime2timeval(&bt, tvp);
157 }
158
159 void
160 bintime(struct bintime *bt)
161 {
162
163 nbintime++;
164 binuptime(bt);
165 bintime_add(bt, &boottimebin);
166 }
167
168 void
169 nanotime(struct timespec *tsp)
170 {
171 struct bintime bt;
172
173 nnanotime++;
174 bintime(&bt);
175 bintime2timespec(&bt, tsp);
176 }
177
178 void
179 microtime(struct timeval *tvp)
180 {
181 struct bintime bt;
182
183 nmicrotime++;
184 bintime(&bt);
185 bintime2timeval(&bt, tvp);
186 }
187
188 void
189 getbinuptime(struct bintime *bt)
190 {
191 struct timehands *th;
192 u_int gen;
193
194 ngetbinuptime++;
195 do {
196 th = timehands;
197 gen = th->th_generation;
198 *bt = th->th_offset;
199 } while (gen == 0 || gen != th->th_generation);
200 }
201
202 void
203 getnanouptime(struct timespec *tsp)
204 {
205 struct timehands *th;
206 u_int gen;
207
208 ngetnanouptime++;
209 do {
210 th = timehands;
211 gen = th->th_generation;
212 bintime2timespec(&th->th_offset, tsp);
213 } while (gen == 0 || gen != th->th_generation);
214 }
215
216 void
217 getmicrouptime(struct timeval *tvp)
218 {
219 struct timehands *th;
220 u_int gen;
221
222 ngetmicrouptime++;
223 do {
224 th = timehands;
225 gen = th->th_generation;
226 bintime2timeval(&th->th_offset, tvp);
227 } while (gen == 0 || gen != th->th_generation);
228 }
229
230 void
231 getbintime(struct bintime *bt)
232 {
233 struct timehands *th;
234 u_int gen;
235
236 ngetbintime++;
237 do {
238 th = timehands;
239 gen = th->th_generation;
240 *bt = th->th_offset;
241 } while (gen == 0 || gen != th->th_generation);
242 bintime_add(bt, &boottimebin);
243 }
244
245 void
246 getnanotime(struct timespec *tsp)
247 {
248 struct timehands *th;
249 u_int gen;
250
251 ngetnanotime++;
252 do {
253 th = timehands;
254 gen = th->th_generation;
255 *tsp = th->th_nanotime;
256 } while (gen == 0 || gen != th->th_generation);
257 }
258
259 void
260 getmicrotime(struct timeval *tvp)
261 {
262 struct timehands *th;
263 u_int gen;
264
265 ngetmicrotime++;
266 do {
267 th = timehands;
268 gen = th->th_generation;
269 *tvp = th->th_microtime;
270 } while (gen == 0 || gen != th->th_generation);
271 }
272
273 /*
274 * Initialize a new timecounter.
275 * We should really try to rank the timecounters and intelligently determine
276 * if the new timecounter is better than the current one. This is subject
277 * to further study. For now always use the new timecounter.
278 */
279 void
280 tc_init(struct timecounter *tc)
281 {
282 unsigned u;
283
284 printf("Timecounter \"%s\" frequency %lu Hz",
285 tc->tc_name, (u_long)tc->tc_frequency);
286
287 u = tc->tc_frequency / tc->tc_counter_mask;
288 if (u > hz) {
289 printf(" -- Insufficient hz, needs at least %u\n", u);
290 return;
291 }
292 tc->tc_next = timecounters;
293 timecounters = tc;
294 printf("\n");
295 (void)tc->tc_get_timecount(tc);
296 (void)tc->tc_get_timecount(tc);
297 timecounter = tc;
298 }
299
300 /* Report the frequency of the current timecounter. */
301 u_int32_t
302 tc_getfrequency(void)
303 {
304
305 return (timehands->th_counter->tc_frequency);
306 }
307
308 /*
309 * Step our concept of GMT. This is done by modifying our estimate of
310 * when we booted. XXX: needs futher work.
311 */
312 void
313 tc_setclock(struct timespec *ts)
314 {
315 struct timespec ts2;
316
317 nanouptime(&ts2);
318 boottime.tv_sec = ts->tv_sec - ts2.tv_sec;
319 /* XXX boottime should probably be a timespec. */
320 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000;
321 if (boottime.tv_usec < 0) {
322 boottime.tv_usec += 1000000;
323 boottime.tv_sec--;
324 }
325 timeval2bintime(&boottime, &boottimebin);
326
327 /* XXX fiddle all the little crinkly bits around the fiords... */
328 tc_windup();
329 }
330
331 /*
332 * Initialize the next struct timehands in the ring and make
333 * it the active timehands. Along the way we might switch to a different
334 * timecounter and/or do seconds processing in NTP. Slightly magic.
335 */
336 static void
337 tc_windup(void)
338 {
339 struct bintime bt;
340 struct timehands *th, *tho;
341 u_int64_t scale;
342 u_int delta, ncount, ogen;
343 int i;
344
345 /*
346 * Make the next timehands a copy of the current one, but do not
347 * overwrite the generation or next pointer. While we update
348 * the contents, the generation must be zero.
349 */
350 tho = timehands;
351 th = tho->th_next;
352 ogen = th->th_generation;
353 th->th_generation = 0;
354 bcopy(tho, th, offsetof(struct timehands, th_generation));
355
356 /*
357 * Capture a timecounter delta on the current timecounter and if
358 * changing timecounters, a counter value from the new timecounter.
359 * Update the offset fields accordingly.
360 */
361 delta = tc_delta(th);
362 if (th->th_counter != timecounter)
363 ncount = timecounter->tc_get_timecount(timecounter);
364 else
365 ncount = 0;
366 th->th_offset_count += delta;
367 th->th_offset_count &= th->th_counter->tc_counter_mask;
368 bintime_addx(&th->th_offset, th->th_scale * delta);
369
370 /*
371 * Hardware latching timecounters may not generate interrupts on
372 * PPS events, so instead we poll them. There is a finite risk that
373 * the hardware might capture a count which is later than the one we
374 * got above, and therefore possibly in the next NTP second which might
375 * have a different rate than the current NTP second. It doesn't
376 * matter in practice.
377 */
378 if (tho->th_counter->tc_poll_pps)
379 tho->th_counter->tc_poll_pps(tho->th_counter);
380
381 /*
382 * Deal with NTP second processing. The for loop normally only
383 * iterates once, but in extreme situations it might keep NTP sane
384 * if timeouts are not run for several seconds.
385 */
386 for (i = th->th_offset.sec - tho->th_offset.sec; i > 0; i--)
387 ntp_update_second(&th->th_adjustment, &th->th_offset.sec);
388
389 /* Now is a good time to change timecounters. */
390 if (th->th_counter != timecounter) {
391 th->th_counter = timecounter;
392 th->th_offset_count = ncount;
393 }
394
395 /*-
396 * Recalculate the scaling factor. We want the number of 1/2^64
397 * fractions of a second per period of the hardware counter, taking
398 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
399 * processing provides us with.
400 *
401 * The th_adjustment is nanoseconds per second with 32 bit binary
402 * fraction and want 64 bit binary fraction of second:
403 *
404 * x = a * 2^32 / 10^9 = a * 4.294967296
405 *
406 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
407 * we can only multiply by about 850 without overflowing, but that
408 * leaves suitably precise fractions for multiply before divide.
409 *
410 * Divide before multiply with a fraction of 2199/512 results in a
411 * systematic undercompensation of 10PPM of th_adjustment. On a
412 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
413 *
414 * We happily sacrifice the lowest of the 64 bits of our result
415 * to the goddess of code clarity.
416 *
417 */
418 scale = (u_int64_t)1 << 63;
419 scale += (th->th_adjustment / 1024) * 2199;
420 scale /= th->th_counter->tc_frequency;
421 th->th_scale = scale * 2;
422
423 /* Update the GMT timestamps used for the get*() functions. */
424 bt = th->th_offset;
425 bintime_add(&bt, &boottimebin);
426 bintime2timeval(&bt, &th->th_microtime);
427 bintime2timespec(&bt, &th->th_nanotime);
428
429 /*
430 * Now that the struct timehands is again consistent, set the new
431 * generation number, making sure to not make it zero.
432 */
433 if (++ogen == 0)
434 ogen = 1;
435 th->th_generation = ogen;
436
437 /* Go live with the new struct timehands. */
438 time_second = th->th_microtime.tv_sec;
439 time_uptime = th->th_offset.sec;
440 timehands = th;
441 }
442
443 /* Report or change the active timecounter hardware. */
444 static int
445 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS)
446 {
447 char newname[32];
448 struct timecounter *newtc, *tc;
449 int error;
450
451 tc = timecounter;
452 strlcpy(newname, tc->tc_name, sizeof(newname));
453
454 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req);
455 if (error != 0 || req->newptr == NULL ||
456 strcmp(newname, tc->tc_name) == 0)
457 return (error);
458 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
459 if (strcmp(newname, newtc->tc_name) != 0)
460 continue;
461
462 /* Warm up new timecounter. */
463 (void)newtc->tc_get_timecount(newtc);
464 (void)newtc->tc_get_timecount(newtc);
465
466 timecounter = newtc;
467 return (0);
468 }
469 return (EINVAL);
470 }
471
472 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW,
473 0, 0, sysctl_kern_timecounter_hardware, "A", "");
474
475 /*
476 * RFC 2783 PPS-API implementation.
477 */
478
479 int
480 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
481 {
482 pps_params_t *app;
483 struct pps_fetch_args *fapi;
484 #ifdef PPS_SYNC
485 struct pps_kcbind_args *kapi;
486 #endif
487
488 switch (cmd) {
489 case PPS_IOC_CREATE:
490 return (0);
491 case PPS_IOC_DESTROY:
492 return (0);
493 case PPS_IOC_SETPARAMS:
494 app = (pps_params_t *)data;
495 if (app->mode & ~pps->ppscap)
496 return (EINVAL);
497 pps->ppsparam = *app;
498 return (0);
499 case PPS_IOC_GETPARAMS:
500 app = (pps_params_t *)data;
501 *app = pps->ppsparam;
502 app->api_version = PPS_API_VERS_1;
503 return (0);
504 case PPS_IOC_GETCAP:
505 *(int*)data = pps->ppscap;
506 return (0);
507 case PPS_IOC_FETCH:
508 fapi = (struct pps_fetch_args *)data;
509 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
510 return (EINVAL);
511 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec)
512 return (EOPNOTSUPP);
513 pps->ppsinfo.current_mode = pps->ppsparam.mode;
514 fapi->pps_info_buf = pps->ppsinfo;
515 return (0);
516 case PPS_IOC_KCBIND:
517 #ifdef PPS_SYNC
518 kapi = (struct pps_kcbind_args *)data;
519 /* XXX Only root should be able to do this */
520 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
521 return (EINVAL);
522 if (kapi->kernel_consumer != PPS_KC_HARDPPS)
523 return (EINVAL);
524 if (kapi->edge & ~pps->ppscap)
525 return (EINVAL);
526 pps->kcmode = kapi->edge;
527 return (0);
528 #else
529 return (EOPNOTSUPP);
530 #endif
531 default:
532 return (ENOTTY);
533 }
534 }
535
536 void
537 pps_init(struct pps_state *pps)
538 {
539 pps->ppscap |= PPS_TSFMT_TSPEC;
540 if (pps->ppscap & PPS_CAPTUREASSERT)
541 pps->ppscap |= PPS_OFFSETASSERT;
542 if (pps->ppscap & PPS_CAPTURECLEAR)
543 pps->ppscap |= PPS_OFFSETCLEAR;
544 }
545
546 void
547 pps_capture(struct pps_state *pps)
548 {
549 struct timehands *th;
550
551 th = timehands;
552 pps->capgen = th->th_generation;
553 pps->capth = th;
554 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
555 if (pps->capgen != th->th_generation)
556 pps->capgen = 0;
557 }
558
559 void
560 pps_event(struct pps_state *pps, int event)
561 {
562 struct bintime bt;
563 struct timespec ts, *tsp, *osp;
564 u_int tcount, *pcount;
565 int foff, fhard;
566 pps_seq_t *pseq;
567
568 /* If the timecounter was wound up underneath us, bail out. */
569 if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
570 return;
571
572 /* Things would be easier with arrays. */
573 if (event == PPS_CAPTUREASSERT) {
574 tsp = &pps->ppsinfo.assert_timestamp;
575 osp = &pps->ppsparam.assert_offset;
576 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
577 fhard = pps->kcmode & PPS_CAPTUREASSERT;
578 pcount = &pps->ppscount[0];
579 pseq = &pps->ppsinfo.assert_sequence;
580 } else {
581 tsp = &pps->ppsinfo.clear_timestamp;
582 osp = &pps->ppsparam.clear_offset;
583 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
584 fhard = pps->kcmode & PPS_CAPTURECLEAR;
585 pcount = &pps->ppscount[1];
586 pseq = &pps->ppsinfo.clear_sequence;
587 }
588
589 /*
590 * If the timecounter changed, we cannot compare the count values, so
591 * we have to drop the rest of the PPS-stuff until the next event.
592 */
593 if (pps->ppstc != pps->capth->th_counter) {
594 pps->ppstc = pps->capth->th_counter;
595 *pcount = pps->capcount;
596 pps->ppscount[2] = pps->capcount;
597 return;
598 }
599
600 /* Return if nothing really happened. */
601 if (*pcount == pps->capcount)
602 return;
603
604 /* Convert the count to a timespec. */
605 tcount = pps->capcount - pps->capth->th_offset_count;
606 tcount &= pps->capth->th_counter->tc_counter_mask;
607 bt = pps->capth->th_offset;
608 bintime_addx(&bt, pps->capth->th_scale * tcount);
609 bintime_add(&bt, &boottimebin);
610 bintime2timespec(&bt, &ts);
611
612 /* If the timecounter was wound up underneath us, bail out. */
613 if (pps->capgen != pps->capth->th_generation)
614 return;
615
616 *pcount = pps->capcount;
617 (*pseq)++;
618 *tsp = ts;
619
620 if (foff) {
621 timespecadd(tsp, osp);
622 if (tsp->tv_nsec < 0) {
623 tsp->tv_nsec += 1000000000;
624 tsp->tv_sec -= 1;
625 }
626 }
627 #ifdef PPS_SYNC
628 if (fhard) {
629 /*
630 * Feed the NTP PLL/FLL.
631 * The FLL wants to know how many nanoseconds elapsed since
632 * the previous event.
633 * I have never been able to convince myself that this code
634 * is actually correct: Using th_scale is bound to contain
635 * a phase correction component from userland, when running
636 * as FLL, so the number hardpps() gets is not meaningful IMO.
637 */
638 tcount = pps->capcount - pps->ppscount[2];
639 pps->ppscount[2] = pps->capcount;
640 tcount &= pps->capth->th_counter->tc_counter_mask;
641 bt.sec = 0;
642 bt.frac = 0;
643 bintime_addx(&bt, pps->capth->th_scale * tcount);
644 bintime2timespec(&bt, &ts);
645 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
646 }
647 #endif
648 }
649
650 /*
651 * Timecounters need to be updated every so often to prevent the hardware
652 * counter from overflowing. Updating also recalculates the cached values
653 * used by the get*() family of functions, so their precision depends on
654 * the update frequency.
655 */
656
657 static int tc_tick;
658 SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tick, 0, "");
659
660 void
661 tc_ticktock(void)
662 {
663 static int count;
664
665 if (++count < tc_tick)
666 return;
667 count = 0;
668 tc_windup();
669 }
670
671 static void
672 inittimecounter(void *dummy)
673 {
674 u_int p;
675
676 /*
677 * Set the initial timeout to
678 * max(1, <approx. number of hardclock ticks in a millisecond>).
679 * People should probably not use the sysctl to set the timeout
680 * to smaller than its inital value, since that value is the
681 * smallest reasonable one. If they want better timestamps they
682 * should use the non-"get"* functions.
683 */
684 if (hz > 1000)
685 tc_tick = (hz + 500) / 1000;
686 else
687 tc_tick = 1;
688 p = (tc_tick * 1000000) / hz;
689 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
690
691 /* warm up new timecounter (again) and get rolling. */
692 (void)timecounter->tc_get_timecount(timecounter);
693 (void)timecounter->tc_get_timecount(timecounter);
694 }
695
696 SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL)
Cache object: f6116a129e03946f66eb56cc3476bac2
|