FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_clock.c
1 /*-
2 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
3 * Copyright (c) 1982, 1986, 1991, 1993
4 * The Regents of the University of California. All rights reserved.
5 * (c) UNIX System Laboratories, Inc.
6 * All or some portions of this file are derived from material licensed
7 * to the University of California by American Telephone and Telegraph
8 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
9 * the permission of UNIX System Laboratories, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
40 * $FreeBSD$
41 */
42
43 #include "opt_ntp.h"
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/dkstat.h>
48 #include <sys/callout.h>
49 #include <sys/kernel.h>
50 #include <sys/proc.h>
51 #include <sys/malloc.h>
52 #include <sys/resourcevar.h>
53 #include <sys/signalvar.h>
54 #include <sys/timex.h>
55 #include <sys/timepps.h>
56 #include <vm/vm.h>
57 #include <sys/lock.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_map.h>
60 #include <sys/sysctl.h>
61
62 #include <machine/cpu.h>
63 #include <machine/limits.h>
64 #include <machine/smp.h>
65
66 #ifdef GPROF
67 #include <sys/gmon.h>
68 #endif
69
70 #ifdef DEVICE_POLLING
71 extern void init_device_poll(void);
72 extern void hardclock_device_poll(void);
73 #endif /* DEVICE_POLLING */
74
75 /*
76 * a large step happens on boot. This constant detects such
77 * a steps. It is relatively small so that ntp_update_second gets called
78 * enough in the typical 'missed a couple of seconds' case, but doesn't
79 * loop forever when the time step is large.
80 */
81 #define LARGE_STEP 200
82
83 /*
84 * Number of timecounters used to implement stable storage
85 */
86 #ifndef NTIMECOUNTER
87 #define NTIMECOUNTER 5
88 #endif
89
90 static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter",
91 "Timecounter stable storage");
92
93 static void initclocks __P((void *dummy));
94 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
95
96 static void tco_forward __P((int force));
97 static void tco_setscales __P((struct timecounter *tc));
98 static __inline unsigned tco_delta __P((struct timecounter *tc));
99
100 /* Some of these don't belong here, but it's easiest to concentrate them. */
101 long cp_time[CPUSTATES];
102
103 SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time),
104 "LU", "CPU time statistics");
105
106 long tk_cancc;
107 long tk_nin;
108 long tk_nout;
109 long tk_rawcc;
110
111 time_t time_second;
112
113 struct timeval boottime;
114 SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD,
115 &boottime, timeval, "System boottime");
116
117 /*
118 * Which update policy to use.
119 * 0 - every tick, bad hardware may fail with "calcru negative..."
120 * 1 - more resistent to the above hardware, but less efficient.
121 */
122 static int tco_method;
123
124 /*
125 * Implement a dummy timecounter which we can use until we get a real one
126 * in the air. This allows the console and other early stuff to use
127 * timeservices.
128 */
129
130 static unsigned
131 dummy_get_timecount(struct timecounter *tc)
132 {
133 static unsigned now;
134 return (++now);
135 }
136
137 static struct timecounter dummy_timecounter = {
138 dummy_get_timecount,
139 0,
140 ~0u,
141 1000000,
142 "dummy"
143 };
144
145 struct timecounter *timecounter = &dummy_timecounter;
146
147 /*
148 * Clock handling routines.
149 *
150 * This code is written to operate with two timers that run independently of
151 * each other.
152 *
153 * The main timer, running hz times per second, is used to trigger interval
154 * timers, timeouts and rescheduling as needed.
155 *
156 * The second timer handles kernel and user profiling,
157 * and does resource use estimation. If the second timer is programmable,
158 * it is randomized to avoid aliasing between the two clocks. For example,
159 * the randomization prevents an adversary from always giving up the cpu
160 * just before its quantum expires. Otherwise, it would never accumulate
161 * cpu ticks. The mean frequency of the second timer is stathz.
162 *
163 * If no second timer exists, stathz will be zero; in this case we drive
164 * profiling and statistics off the main clock. This WILL NOT be accurate;
165 * do not do it unless absolutely necessary.
166 *
167 * The statistics clock may (or may not) be run at a higher rate while
168 * profiling. This profile clock runs at profhz. We require that profhz
169 * be an integral multiple of stathz.
170 *
171 * If the statistics clock is running fast, it must be divided by the ratio
172 * profhz/stathz for statistics. (For profiling, every tick counts.)
173 *
174 * Time-of-day is maintained using a "timecounter", which may or may
175 * not be related to the hardware generating the above mentioned
176 * interrupts.
177 */
178
179 int stathz;
180 int profhz;
181 static int profprocs;
182 int ticks;
183 static int psdiv, pscnt; /* prof => stat divider */
184 int psratio; /* ratio: prof / stat */
185
186 /*
187 * Initialize clock frequencies and start both clocks running.
188 */
189 /* ARGSUSED*/
190 static void
191 initclocks(dummy)
192 void *dummy;
193 {
194 register int i;
195
196 /*
197 * Set divisors to 1 (normal case) and let the machine-specific
198 * code do its bit.
199 */
200 psdiv = pscnt = 1;
201 cpu_initclocks();
202
203 #ifdef DEVICE_POLLING
204 init_device_poll();
205 #endif
206
207 /*
208 * Compute profhz/stathz, and fix profhz if needed.
209 */
210 i = stathz ? stathz : hz;
211 if (profhz == 0)
212 profhz = i;
213 psratio = profhz / i;
214 }
215
216 /*
217 * The real-time timer, interrupting hz times per second.
218 */
219 void
220 hardclock(frame)
221 register struct clockframe *frame;
222 {
223 register struct proc *p;
224
225 p = curproc;
226 if (p) {
227 register struct pstats *pstats;
228
229 /*
230 * Run current process's virtual and profile time, as needed.
231 */
232 pstats = p->p_stats;
233 if (CLKF_USERMODE(frame) &&
234 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
235 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
236 psignal(p, SIGVTALRM);
237 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
238 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
239 psignal(p, SIGPROF);
240 }
241
242 #if defined(SMP) && defined(BETTER_CLOCK)
243 forward_hardclock(pscnt);
244 #endif
245
246 /*
247 * If no separate statistics clock is available, run it from here.
248 */
249 if (stathz == 0)
250 statclock(frame);
251
252 tco_forward(0);
253 ticks++;
254
255 #ifdef DEVICE_POLLING
256 hardclock_device_poll(); /* this is very short and quick */
257 #endif /* DEVICE_POLLING */
258
259 /*
260 * Process callouts at a very low cpu priority, so we don't keep the
261 * relatively high clock interrupt priority any longer than necessary.
262 */
263 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
264 if (CLKF_BASEPRI(frame)) {
265 /*
266 * Save the overhead of a software interrupt;
267 * it will happen as soon as we return, so do it now.
268 */
269 (void)splsoftclock();
270 softclock();
271 } else
272 setsoftclock();
273 } else if (softticks + 1 == ticks)
274 ++softticks;
275 }
276
277 /*
278 * Compute number of ticks in the specified amount of time.
279 */
280 int
281 tvtohz(tv)
282 struct timeval *tv;
283 {
284 register unsigned long ticks;
285 register long sec, usec;
286
287 /*
288 * If the number of usecs in the whole seconds part of the time
289 * difference fits in a long, then the total number of usecs will
290 * fit in an unsigned long. Compute the total and convert it to
291 * ticks, rounding up and adding 1 to allow for the current tick
292 * to expire. Rounding also depends on unsigned long arithmetic
293 * to avoid overflow.
294 *
295 * Otherwise, if the number of ticks in the whole seconds part of
296 * the time difference fits in a long, then convert the parts to
297 * ticks separately and add, using similar rounding methods and
298 * overflow avoidance. This method would work in the previous
299 * case but it is slightly slower and assumes that hz is integral.
300 *
301 * Otherwise, round the time difference down to the maximum
302 * representable value.
303 *
304 * If ints have 32 bits, then the maximum value for any timeout in
305 * 10ms ticks is 248 days.
306 */
307 sec = tv->tv_sec;
308 usec = tv->tv_usec;
309 if (usec < 0) {
310 sec--;
311 usec += 1000000;
312 }
313 if (sec < 0) {
314 #ifdef DIAGNOSTIC
315 if (usec > 0) {
316 sec++;
317 usec -= 1000000;
318 }
319 printf("tvotohz: negative time difference %ld sec %ld usec\n",
320 sec, usec);
321 #endif
322 ticks = 1;
323 } else if (sec <= LONG_MAX / 1000000)
324 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
325 / tick + 1;
326 else if (sec <= LONG_MAX / hz)
327 ticks = sec * hz
328 + ((unsigned long)usec + (tick - 1)) / tick + 1;
329 else
330 ticks = LONG_MAX;
331 if (ticks > INT_MAX)
332 ticks = INT_MAX;
333 return ((int)ticks);
334 }
335
336 /*
337 * Start profiling on a process.
338 *
339 * Kernel profiling passes proc0 which never exits and hence
340 * keeps the profile clock running constantly.
341 */
342 void
343 startprofclock(p)
344 register struct proc *p;
345 {
346 int s;
347
348 if ((p->p_flag & P_PROFIL) == 0) {
349 p->p_flag |= P_PROFIL;
350 if (++profprocs == 1 && stathz != 0) {
351 s = splstatclock();
352 psdiv = pscnt = psratio;
353 setstatclockrate(profhz);
354 splx(s);
355 }
356 }
357 }
358
359 /*
360 * Stop profiling on a process.
361 */
362 void
363 stopprofclock(p)
364 register struct proc *p;
365 {
366 int s;
367
368 if (p->p_flag & P_PROFIL) {
369 p->p_flag &= ~P_PROFIL;
370 if (--profprocs == 0 && stathz != 0) {
371 s = splstatclock();
372 psdiv = pscnt = 1;
373 setstatclockrate(stathz);
374 splx(s);
375 }
376 }
377 }
378
379 /*
380 * Statistics clock. Grab profile sample, and if divider reaches 0,
381 * do process and kernel statistics. Most of the statistics are only
382 * used by user-level statistics programs. The main exceptions are
383 * p->p_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu.
384 */
385 void
386 statclock(frame)
387 register struct clockframe *frame;
388 {
389 #ifdef GPROF
390 register struct gmonparam *g;
391 int i;
392 #endif
393 register struct proc *p;
394 struct pstats *pstats;
395 long rss;
396 struct rusage *ru;
397 struct vmspace *vm;
398
399 if (curproc != NULL && CLKF_USERMODE(frame)) {
400 /*
401 * Came from user mode; CPU was in user state.
402 * If this process is being profiled, record the tick.
403 */
404 p = curproc;
405 if (p->p_flag & P_PROFIL)
406 addupc_intr(p, CLKF_PC(frame), 1);
407 #if defined(SMP) && defined(BETTER_CLOCK)
408 if (stathz != 0)
409 forward_statclock(pscnt);
410 #endif
411 if (--pscnt > 0)
412 return;
413 /*
414 * Charge the time as appropriate.
415 */
416 p->p_uticks++;
417 if (p->p_nice > NZERO)
418 cp_time[CP_NICE]++;
419 else
420 cp_time[CP_USER]++;
421 } else {
422 #ifdef GPROF
423 /*
424 * Kernel statistics are just like addupc_intr, only easier.
425 */
426 g = &_gmonparam;
427 if (g->state == GMON_PROF_ON) {
428 i = CLKF_PC(frame) - g->lowpc;
429 if (i < g->textsize) {
430 i /= HISTFRACTION * sizeof(*g->kcount);
431 g->kcount[i]++;
432 }
433 }
434 #endif
435 #if defined(SMP) && defined(BETTER_CLOCK)
436 if (stathz != 0)
437 forward_statclock(pscnt);
438 #endif
439 if (--pscnt > 0)
440 return;
441 /*
442 * Came from kernel mode, so we were:
443 * - handling an interrupt,
444 * - doing syscall or trap work on behalf of the current
445 * user process, or
446 * - spinning in the idle loop.
447 * Whichever it is, charge the time as appropriate.
448 * Note that we charge interrupts to the current process,
449 * regardless of whether they are ``for'' that process,
450 * so that we know how much of its real time was spent
451 * in ``non-process'' (i.e., interrupt) work.
452 */
453 p = curproc;
454 if (CLKF_INTR(frame)) {
455 if (p != NULL)
456 p->p_iticks++;
457 cp_time[CP_INTR]++;
458 } else if (p != NULL) {
459 p->p_sticks++;
460 cp_time[CP_SYS]++;
461 } else
462 cp_time[CP_IDLE]++;
463 }
464 pscnt = psdiv;
465
466 if (p != NULL) {
467 schedclock(p);
468
469 /* Update resource usage integrals and maximums. */
470 if ((pstats = p->p_stats) != NULL &&
471 (ru = &pstats->p_ru) != NULL &&
472 (vm = p->p_vmspace) != NULL) {
473 ru->ru_ixrss += pgtok(vm->vm_tsize);
474 ru->ru_idrss += pgtok(vm->vm_dsize);
475 ru->ru_isrss += pgtok(vm->vm_ssize);
476 rss = pgtok(vmspace_resident_count(vm));
477 if (ru->ru_maxrss < rss)
478 ru->ru_maxrss = rss;
479 }
480 }
481 }
482
483 /*
484 * Return information about system clocks.
485 */
486 static int
487 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
488 {
489 struct clockinfo clkinfo;
490 /*
491 * Construct clockinfo structure.
492 */
493 clkinfo.hz = hz;
494 clkinfo.tick = tick;
495 clkinfo.tickadj = tickadj;
496 clkinfo.profhz = profhz;
497 clkinfo.stathz = stathz ? stathz : hz;
498 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
499 }
500
501 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
502 0, 0, sysctl_kern_clockrate, "S,clockinfo","");
503
504 static __inline unsigned
505 tco_delta(struct timecounter *tc)
506 {
507
508 return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) &
509 tc->tc_counter_mask);
510 }
511
512 /*
513 * We have eight functions for looking at the clock, four for
514 * microseconds and four for nanoseconds. For each there is fast
515 * but less precise version "get{nano|micro}[up]time" which will
516 * return a time which is up to 1/HZ previous to the call, whereas
517 * the raw version "{nano|micro}[up]time" will return a timestamp
518 * which is as precise as possible. The "up" variants return the
519 * time relative to system boot, these are well suited for time
520 * interval measurements.
521 */
522
523 void
524 getmicrotime(struct timeval *tvp)
525 {
526 struct timecounter *tc;
527
528 if (!tco_method) {
529 tc = timecounter;
530 *tvp = tc->tc_microtime;
531 } else {
532 microtime(tvp);
533 }
534 }
535
536 void
537 getnanotime(struct timespec *tsp)
538 {
539 struct timecounter *tc;
540
541 if (!tco_method) {
542 tc = timecounter;
543 *tsp = tc->tc_nanotime;
544 } else {
545 nanotime(tsp);
546 }
547 }
548
549 void
550 microtime(struct timeval *tv)
551 {
552 struct timecounter *tc;
553
554 tc = timecounter;
555 tv->tv_sec = tc->tc_offset_sec;
556 tv->tv_usec = tc->tc_offset_micro;
557 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
558 tv->tv_usec += boottime.tv_usec;
559 tv->tv_sec += boottime.tv_sec;
560 while (tv->tv_usec < 0) {
561 tv->tv_usec += 1000000;
562 if (tv->tv_sec > 0)
563 tv->tv_sec--;
564 }
565 while (tv->tv_usec >= 1000000) {
566 tv->tv_usec -= 1000000;
567 tv->tv_sec++;
568 }
569 }
570
571 void
572 nanotime(struct timespec *ts)
573 {
574 unsigned count;
575 u_int64_t delta;
576 struct timecounter *tc;
577
578 tc = timecounter;
579 ts->tv_sec = tc->tc_offset_sec;
580 count = tco_delta(tc);
581 delta = tc->tc_offset_nano;
582 delta += ((u_int64_t)count * tc->tc_scale_nano_f);
583 delta >>= 32;
584 delta += ((u_int64_t)count * tc->tc_scale_nano_i);
585 delta += boottime.tv_usec * 1000;
586 ts->tv_sec += boottime.tv_sec;
587 while (delta < 0) {
588 delta += 1000000000;
589 if (ts->tv_sec > 0)
590 ts->tv_sec--;
591 }
592 while (delta >= 1000000000) {
593 delta -= 1000000000;
594 ts->tv_sec++;
595 }
596 ts->tv_nsec = delta;
597 }
598
599 void
600 getmicrouptime(struct timeval *tvp)
601 {
602 struct timecounter *tc;
603
604 if (!tco_method) {
605 tc = timecounter;
606 tvp->tv_sec = tc->tc_offset_sec;
607 tvp->tv_usec = tc->tc_offset_micro;
608 } else {
609 microuptime(tvp);
610 }
611 }
612
613 void
614 getnanouptime(struct timespec *tsp)
615 {
616 struct timecounter *tc;
617
618 if (!tco_method) {
619 tc = timecounter;
620 tsp->tv_sec = tc->tc_offset_sec;
621 tsp->tv_nsec = tc->tc_offset_nano >> 32;
622 } else {
623 nanouptime(tsp);
624 }
625 }
626
627 void
628 microuptime(struct timeval *tv)
629 {
630 struct timecounter *tc;
631
632 tc = timecounter;
633 tv->tv_sec = tc->tc_offset_sec;
634 tv->tv_usec = tc->tc_offset_micro;
635 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
636 while (tv->tv_usec < 0) {
637 tv->tv_usec += 1000000;
638 if (tv->tv_sec > 0)
639 tv->tv_sec--;
640 }
641 while (tv->tv_usec >= 1000000) {
642 tv->tv_usec -= 1000000;
643 tv->tv_sec++;
644 }
645 }
646
647 void
648 nanouptime(struct timespec *ts)
649 {
650 unsigned count;
651 u_int64_t delta;
652 struct timecounter *tc;
653
654 tc = timecounter;
655 ts->tv_sec = tc->tc_offset_sec;
656 count = tco_delta(tc);
657 delta = tc->tc_offset_nano;
658 delta += ((u_int64_t)count * tc->tc_scale_nano_f);
659 delta >>= 32;
660 delta += ((u_int64_t)count * tc->tc_scale_nano_i);
661 while (delta < 0) {
662 delta += 1000000000;
663 if (ts->tv_sec > 0)
664 ts->tv_sec--;
665 }
666 while (delta >= 1000000000) {
667 delta -= 1000000000;
668 ts->tv_sec++;
669 }
670 ts->tv_nsec = delta;
671 }
672
673 static void
674 tco_setscales(struct timecounter *tc)
675 {
676 u_int64_t scale;
677
678 scale = 1000000000LL << 32;
679 scale += tc->tc_adjustment;
680 scale /= tc->tc_tweak->tc_frequency;
681 tc->tc_scale_micro = scale / 1000;
682 tc->tc_scale_nano_f = scale & 0xffffffff;
683 tc->tc_scale_nano_i = scale >> 32;
684 }
685
686 void
687 update_timecounter(struct timecounter *tc)
688 {
689 tco_setscales(tc);
690 }
691
692 void
693 init_timecounter(struct timecounter *tc)
694 {
695 struct timespec ts1;
696 struct timecounter *t1, *t2, *t3;
697 unsigned u;
698 int i;
699
700 u = tc->tc_frequency / tc->tc_counter_mask;
701 if (u > hz) {
702 printf("Timecounter \"%s\" frequency %lu Hz"
703 " -- Insufficient hz, needs at least %u\n",
704 tc->tc_name, (u_long) tc->tc_frequency, u);
705 return;
706 }
707
708 tc->tc_adjustment = 0;
709 tc->tc_tweak = tc;
710 tco_setscales(tc);
711 tc->tc_offset_count = tc->tc_get_timecount(tc);
712 if (timecounter == &dummy_timecounter)
713 tc->tc_avail = tc;
714 else {
715 tc->tc_avail = timecounter->tc_tweak->tc_avail;
716 timecounter->tc_tweak->tc_avail = tc;
717 }
718 MALLOC(t1, struct timecounter *, sizeof *t1, M_TIMECOUNTER, M_WAITOK);
719 tc->tc_other = t1;
720 *t1 = *tc;
721 t2 = t1;
722 for (i = 1; i < NTIMECOUNTER; i++) {
723 MALLOC(t3, struct timecounter *, sizeof *t3,
724 M_TIMECOUNTER, M_WAITOK);
725 *t3 = *tc;
726 t3->tc_other = t2;
727 t2 = t3;
728 }
729 t1->tc_other = t3;
730 tc = t1;
731
732 printf("Timecounter \"%s\" frequency %lu Hz\n",
733 tc->tc_name, (u_long)tc->tc_frequency);
734
735 /* XXX: For now always start using the counter. */
736 tc->tc_offset_count = tc->tc_get_timecount(tc);
737 nanouptime(&ts1);
738 tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32;
739 tc->tc_offset_micro = ts1.tv_nsec / 1000;
740 tc->tc_offset_sec = ts1.tv_sec;
741 timecounter = tc;
742 }
743
744 void
745 set_timecounter(struct timespec *ts)
746 {
747 struct timespec ts2;
748
749 nanouptime(&ts2);
750 boottime.tv_sec = ts->tv_sec - ts2.tv_sec;
751 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000;
752 if (boottime.tv_usec < 0) {
753 boottime.tv_usec += 1000000;
754 boottime.tv_sec--;
755 }
756 /* fiddle all the little crinkly bits around the fiords... */
757 tco_forward(1);
758 }
759
760 static void
761 switch_timecounter(struct timecounter *newtc)
762 {
763 int s;
764 struct timecounter *tc;
765 struct timespec ts;
766
767 s = splclock();
768 tc = timecounter;
769 if (newtc->tc_tweak == tc->tc_tweak) {
770 splx(s);
771 return;
772 }
773 newtc = newtc->tc_tweak->tc_other;
774 nanouptime(&ts);
775 newtc->tc_offset_sec = ts.tv_sec;
776 newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32;
777 newtc->tc_offset_micro = ts.tv_nsec / 1000;
778 newtc->tc_offset_count = newtc->tc_get_timecount(newtc);
779 tco_setscales(newtc);
780 timecounter = newtc;
781 splx(s);
782 }
783
784 static struct timecounter *
785 sync_other_counter(void)
786 {
787 struct timecounter *tc, *tcn, *tco;
788 unsigned delta;
789
790 tco = timecounter;
791 tc = tco->tc_other;
792 tcn = tc->tc_other;
793 *tc = *tco;
794 tc->tc_other = tcn;
795 delta = tco_delta(tc);
796 tc->tc_offset_count += delta;
797 tc->tc_offset_count &= tc->tc_counter_mask;
798 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f;
799 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32;
800 return (tc);
801 }
802
803 static void
804 tco_forward(int force)
805 {
806 struct timecounter *tc, *tco;
807 struct timeval tvt;
808 time_t t;
809
810 tco = timecounter;
811 tc = sync_other_counter();
812 /*
813 * We may be inducing a tiny error here, the tc_poll_pps() may
814 * process a latched count which happens after the tco_delta()
815 * in sync_other_counter(), which would extend the previous
816 * counters parameters into the domain of this new one.
817 * Since the timewindow is very small for this, the error is
818 * going to be only a few weenieseconds (as Dave Mills would
819 * say), so lets just not talk more about it, OK ?
820 */
821 if (tco->tc_poll_pps)
822 tco->tc_poll_pps(tco);
823 if (timedelta != 0) {
824 tvt = boottime;
825 tvt.tv_usec += tickdelta;
826 if (tvt.tv_usec >= 1000000) {
827 tvt.tv_sec++;
828 tvt.tv_usec -= 1000000;
829 } else if (tvt.tv_usec < 0) {
830 tvt.tv_sec--;
831 tvt.tv_usec += 1000000;
832 }
833 boottime = tvt;
834 timedelta -= tickdelta;
835 }
836
837 while (tc->tc_offset_nano >= 1000000000ULL << 32) {
838 tc->tc_offset_nano -= 1000000000ULL << 32;
839 tc->tc_offset_sec++;
840 force++;
841 }
842
843 if (tco_method && !force)
844 return;
845
846 tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32;
847
848 /* Figure out the wall-clock time */
849 tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec;
850 tc->tc_nanotime.tv_nsec =
851 (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000;
852 tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec;
853 while (tc->tc_nanotime.tv_nsec >= 1000000000) {
854 tc->tc_nanotime.tv_nsec -= 1000000000;
855 tc->tc_microtime.tv_usec -= 1000000;
856 tc->tc_nanotime.tv_sec++;
857 }
858 t = tc->tc_nanotime.tv_sec - time_second;
859 if (t > LARGE_STEP)
860 t = 2;
861 while (t-- > 0) {
862 time_second = tc->tc_nanotime.tv_sec;
863 ntp_update_second(tc);
864 tc->tc_offset_sec += tc->tc_nanotime.tv_sec - time_second;
865 tco_setscales(tc);
866 }
867 time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec;
868
869 timecounter = tc;
870 }
871
872 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
873
874 SYSCTL_INT(_kern_timecounter, OID_AUTO, method, CTLFLAG_RW, &tco_method, 0,
875 "This variable determines the method used for updating timecounters. "
876 "If the default algorithm (0) fails with \"calcru negative...\" messages "
877 "try the alternate algorithm (1) which handles bad hardware better."
878
879 );
880
881 static int
882 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS)
883 {
884 char newname[32];
885 struct timecounter *newtc, *tc;
886 int error;
887
888 tc = timecounter->tc_tweak;
889 strncpy(newname, tc->tc_name, sizeof(newname));
890 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req);
891 if (error == 0 && req->newptr != NULL &&
892 strcmp(newname, tc->tc_name) != 0) {
893 for (newtc = tc->tc_avail; newtc != tc;
894 newtc = newtc->tc_avail) {
895 if (strcmp(newname, newtc->tc_name) == 0) {
896 /* Warm up new timecounter. */
897 (void)newtc->tc_get_timecount(newtc);
898
899 switch_timecounter(newtc);
900 return (0);
901 }
902 }
903 return (EINVAL);
904 }
905 return (error);
906 }
907
908 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW,
909 0, 0, sysctl_kern_timecounter_hardware, "A", "");
910
911
912 int
913 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
914 {
915 pps_params_t *app;
916 struct pps_fetch_args *fapi;
917 #ifdef PPS_SYNC
918 struct pps_kcbind_args *kapi;
919 #endif
920
921 switch (cmd) {
922 case PPS_IOC_CREATE:
923 return (0);
924 case PPS_IOC_DESTROY:
925 return (0);
926 case PPS_IOC_SETPARAMS:
927 app = (pps_params_t *)data;
928 if (app->mode & ~pps->ppscap)
929 return (EINVAL);
930 pps->ppsparam = *app;
931 return (0);
932 case PPS_IOC_GETPARAMS:
933 app = (pps_params_t *)data;
934 *app = pps->ppsparam;
935 app->api_version = PPS_API_VERS_1;
936 return (0);
937 case PPS_IOC_GETCAP:
938 *(int*)data = pps->ppscap;
939 return (0);
940 case PPS_IOC_FETCH:
941 fapi = (struct pps_fetch_args *)data;
942 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
943 return (EINVAL);
944 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec)
945 return (EOPNOTSUPP);
946 pps->ppsinfo.current_mode = pps->ppsparam.mode;
947 fapi->pps_info_buf = pps->ppsinfo;
948 return (0);
949 case PPS_IOC_KCBIND:
950 #ifdef PPS_SYNC
951 kapi = (struct pps_kcbind_args *)data;
952 /* XXX Only root should be able to do this */
953 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
954 return (EINVAL);
955 if (kapi->kernel_consumer != PPS_KC_HARDPPS)
956 return (EINVAL);
957 if (kapi->edge & ~pps->ppscap)
958 return (EINVAL);
959 pps->kcmode = kapi->edge;
960 return (0);
961 #else
962 return (EOPNOTSUPP);
963 #endif
964 default:
965 return (ENOTTY);
966 }
967 }
968
969 void
970 pps_init(struct pps_state *pps)
971 {
972 pps->ppscap |= PPS_TSFMT_TSPEC;
973 if (pps->ppscap & PPS_CAPTUREASSERT)
974 pps->ppscap |= PPS_OFFSETASSERT;
975 if (pps->ppscap & PPS_CAPTURECLEAR)
976 pps->ppscap |= PPS_OFFSETCLEAR;
977 }
978
979 void
980 pps_event(struct pps_state *pps, struct timecounter *tc, unsigned count, int event)
981 {
982 struct timespec ts, *tsp, *osp;
983 u_int64_t delta;
984 unsigned tcount, *pcount;
985 int foff, fhard;
986 pps_seq_t *pseq;
987
988 /* Things would be easier with arrays... */
989 if (event == PPS_CAPTUREASSERT) {
990 tsp = &pps->ppsinfo.assert_timestamp;
991 osp = &pps->ppsparam.assert_offset;
992 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
993 fhard = pps->kcmode & PPS_CAPTUREASSERT;
994 pcount = &pps->ppscount[0];
995 pseq = &pps->ppsinfo.assert_sequence;
996 } else {
997 tsp = &pps->ppsinfo.clear_timestamp;
998 osp = &pps->ppsparam.clear_offset;
999 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
1000 fhard = pps->kcmode & PPS_CAPTURECLEAR;
1001 pcount = &pps->ppscount[1];
1002 pseq = &pps->ppsinfo.clear_sequence;
1003 }
1004
1005 /* The timecounter changed: bail */
1006 if (!pps->ppstc ||
1007 pps->ppstc->tc_name != tc->tc_name ||
1008 tc->tc_name != timecounter->tc_name) {
1009 pps->ppstc = tc;
1010 *pcount = count;
1011 return;
1012 }
1013
1014 /* Nothing really happened */
1015 if (*pcount == count)
1016 return;
1017
1018 *pcount = count;
1019
1020 /* Convert the count to timespec */
1021 ts.tv_sec = tc->tc_offset_sec;
1022 tcount = count - tc->tc_offset_count;
1023 tcount &= tc->tc_counter_mask;
1024 delta = tc->tc_offset_nano;
1025 delta += ((u_int64_t)tcount * tc->tc_scale_nano_f);
1026 delta >>= 32;
1027 delta += ((u_int64_t)tcount * tc->tc_scale_nano_i);
1028 delta += boottime.tv_usec * 1000;
1029 ts.tv_sec += boottime.tv_sec;
1030 while (delta >= 1000000000) {
1031 delta -= 1000000000;
1032 ts.tv_sec++;
1033 }
1034 ts.tv_nsec = delta;
1035
1036 (*pseq)++;
1037 *tsp = ts;
1038
1039 if (foff) {
1040 timespecadd(tsp, osp);
1041 if (tsp->tv_nsec < 0) {
1042 tsp->tv_nsec += 1000000000;
1043 tsp->tv_sec -= 1;
1044 }
1045 }
1046 #ifdef PPS_SYNC
1047 if (fhard) {
1048 /* magic, at its best... */
1049 tcount = count - pps->ppscount[2];
1050 pps->ppscount[2] = count;
1051 tcount &= tc->tc_counter_mask;
1052 delta = ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_f);
1053 delta >>= 32;
1054 delta += ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_i);
1055 hardpps(tsp, delta);
1056 }
1057 #endif
1058 }
Cache object: c2fb343fcb282413ba87d3bc89394497
|