The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/sun4/sys/clock.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or http://www.opensolaris.org/os/licensing.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
   23  * Use is subject to license terms.
   24  */
   25 
   26 #ifndef _SYS_CLOCK_H
   27 #define _SYS_CLOCK_H
   28 
   29 #ifdef  __cplusplus
   30 extern "C" {
   31 #endif
   32 
   33 #include <sys/spl.h>
   34 #include <sys/time.h>
   35 #include <sys/machclock.h>
   36 
   37 #ifndef _ASM
   38 
   39 #ifdef  _KERNEL
   40 
   41 extern void     setcpudelay(void);
   42 
   43 extern uint_t   nsec_scale;
   44 extern uint_t   nsec_shift;
   45 extern uint_t   nsec_per_sys_tick;
   46 extern uint64_t sys_tick_freq;
   47 
   48 extern int      traptrace_use_stick;
   49 extern uint64_t system_clock_freq;
   50 extern uint_t   sys_clock_mhz;
   51 
   52 extern void mon_clock_init(void);
   53 extern void mon_clock_start(void);
   54 extern void mon_clock_stop(void);
   55 extern void mon_clock_share(void);
   56 extern void mon_clock_unshare(void);
   57 
   58 extern hrtime_t hrtime_base;
   59 extern void hres_tick(void);
   60 extern void     clkstart(void);
   61 extern void cbe_level14();
   62 extern hrtime_t tick2ns(hrtime_t, uint_t);
   63 
   64 typedef struct {
   65         uint64_t cbe_level1_inum;
   66         uint64_t cbe_level10_inum;
   67 } cbe_data_t;
   68 
   69 #endif  /* _KERNEL */
   70 
   71 #endif  /* _ASM */
   72 
   73 
   74 #define CBE_LOW_PIL     1
   75 #define CBE_LOCK_PIL    LOCK_LEVEL
   76 #define CBE_HIGH_PIL    14
   77 
   78 #define ADJ_SHIFT       4       /* used in get_hrestime and _level10 */
   79 
   80 /*
   81  * Locking strategy for high-resolution timing services
   82  *
   83  * We generally construct timestamps from two or more components:
   84  * a hardware time source and one or more software time sources.
   85  * These components cannot all be loaded simultaneously, so we need
   86  * some sort of locking strategy to generate consistent timestamps.
   87  *
   88  * To minimize lock contention and cache thrashing we employ the
   89  * weakest possible synchronization model: writers (rare) serialize
   90  * on an acquisition-counting mutex, described below; readers (common)
   91  * execute in parallel with no synchronization at all -- they don't
   92  * exclude other readers, and they don't even exclude writers.  Instead,
   93  * readers just examine the writer lock's value before and after loading
   94  * all the components of a timestamp to detect writer intervention.
   95  * In the rare case when a writer does intervene, the reader will
   96  * detect it, discard the timestamp and try again.
   97  *
   98  * The writer lock, hres_lock, is a 32-bit integer consisting of an
   99  * 8-bit lock and a 24-bit acquisition count.  To acquire the lock we
  100  * set the lock field with ldstub, which sets the low-order 8 bits to
  101  * 0xff; to clear the lock, we increment it, which simultaneously clears
  102  * the lock field (0xff --> 0x00) and increments the acquisition count
  103  * (due to carry into bit 8).  Thus each acquisition transforms hres_lock
  104  * from N:0 to N:ff, and each release transforms N:ff into (N+1):0.
  105  *
  106  * Readers can detect writer intervention by loading hres_lock before
  107  * and after loading the time components they need; if either lock value
  108  * contains 0xff in the low-order bits (lock held), or if the lock values
  109  * are not equal (lock was acquired and released), a writer intervened
  110  * and the reader must try again.  If the lock values are equal and the
  111  * low-order 8 bits are clear, the timestamp must be valid.  We can check
  112  * both of these conditions with a single compare instruction by checking
  113  * whether old_hres_lock & ~1 == new_hres_lock, as illustrated by the
  114  * following table of all possible lock states:
  115  *
  116  *      initial & ~1    final           result of compare
  117  *      ------------    -----           -----------------
  118  *      now:00          now:00          valid
  119  *      now:00          now:ff          invalid
  120  *      now:00          later:00        invalid
  121  *      now:00          later:ff        invalid
  122  *      now:fe          now:ff          invalid
  123  *      now:fe          later:00        invalid
  124  *      now:fe          later:ff        invalid
  125  *
  126  * Implementation considerations:
  127  *
  128  * (1) Load buffering.
  129  *
  130  * On a CPU that does load buffering we must ensure that the load of
  131  * hres_lock completes before the load of any timestamp components.
  132  * This is essential *even on a CPU that does in-order loads* because
  133  * accessing the hardware time source may not involve a memory reference
  134  * (e.g. rd %tick).  A convenient way to address this is to clear the
  135  * lower bit (andn with 1) of the old lock value right away, since this
  136  * generates a dependency on the load of hres_lock.  We have to do this
  137  * anyway to perform the lock comparison described above.
  138  *
  139  * (2) Out-of-order loads.
  140  *
  141  * On a CPU that does out-of-order loads we must ensure that the loads
  142  * of all timestamp components have completed before we load the final
  143  * value of hres_lock.  This can be done either by generating load
  144  * dependencies on the timestamp components or by membar #LoadLoad.
  145  *
  146  * (3) Interaction with the high level cyclic handler, hres_tick().
  147  *
  148  * One unusual property of hres_lock is that it's acquired in a high
  149  * level cyclic handler, hres_tick().  Thus, hres_lock must be acquired at
  150  * CBE_HIGH_PIL or higher to prevent single-CPU deadlock.
  151  *
  152  * (4) Cross-calls.
  153  *
  154  * If a cross-call happens while one CPU has hres_lock and another is
  155  * trying to acquire it in the clock interrupt path, the system will
  156  * deadlock: the first CPU will never release hres_lock since it's
  157  * waiting to be released from the cross-call, and the cross-call can't
  158  * complete because the second CPU is spinning on hres_lock with traps
  159  * disabled.  Thus cross-calls must be blocked while holding hres_lock.
  160  *
  161  * Together, (3) and (4) imply that hres_lock should only be acquired
  162  * at PIL >= max(XCALL_PIL, CBE_HIGH_PIL), or while traps are disabled.
  163  */
  164 #define HRES_LOCK_OFFSET 3
  165 
  166 #define CLOCK_LOCK(oldsplp)     \
  167         lock_set_spl((lock_t *)&hres_lock + HRES_LOCK_OFFSET, \
  168                 ipltospl(CBE_HIGH_PIL), oldsplp)
  169 
  170 #define CLOCK_UNLOCK(spl)       \
  171         membar_ldst_stst();     \
  172         hres_lock++;            \
  173         splx(spl);              \
  174         LOCKSTAT_RECORD0(LS_CLOCK_UNLOCK_RELEASE,       \
  175                 (lock_t *)&hres_lock + HRES_LOCK_OFFSET);
  176 
  177 /*
  178  * NATIVE_TIME_TO_NSEC_SCALE is called with NSEC_SHIFT to convert hi-res
  179  * timestamps into nanoseconds. On systems that have a %stick register,
  180  * hi-res timestamps are in %stick units. On systems that do not have a
  181  * %stick register, hi-res timestamps are in %tick units.
  182  *
  183  * NATIVE_TIME_TO_NSEC_SCALE is called with TICK_NSEC_SHIFT to convert from
  184  * %tick units to nanoseconds on all implementations whether %stick is
  185  * available or not.
  186  */
  187 
  188 /*
  189  * At least 62.5 MHz CPU %tick frequency
  190  */
  191 
  192 #define TICK_NSEC_SHIFT 4
  193 
  194 /*
  195  * Convert hi-res native time (V9's %tick in our case) into nanoseconds.
  196  *
  197  * The challenge is to multiply a %tick value by (NANOSEC / sys_tick_freq)
  198  * without using floating point and without overflowing 64-bit integers.
  199  * We assume that all sun4u systems will have a 16 nsec or better clock
  200  * (i.e. faster than 62.5 MHz), which means that (ticks << 4) has units
  201  * greater than one nanosecond, so converting from (ticks << 4) to nsec
  202  * requires multiplication by a rational number, R, between 0 and 1.
  203  * To avoid floating-point we precompute (R * 2^32) during boot and
  204  * stash this away in nsec_scale.  Thus we can compute (tick * R) as
  205  * (tick * nsec_scale) >> 32, which is accurate to about 1 part per billion.
  206  *
  207  * To avoid 64-bit overflow when multiplying (tick << 4) by nsec_scale,
  208  * we split (tick << 4) into its high and low 32-bit pieces, H and L,
  209  * multiply each piece separately, and add up the relevant bits of the
  210  * partial products.  Putting it all together we have:
  211  *
  212  * nsec = (tick << 4) * R
  213  *      = ((tick << 4) * nsec_scale) >> 32
  214  *      = ((H << 32) + L) * nsec_scale) >> 32
  215  *      = (H * nsec_scale) + ((L * nsec_scale) >> 32)
  216  *
  217  * The last line is the computation we actually perform: it requires no
  218  * floating point and all intermediate results fit in 64-bit registers.
  219  *
  220  * Note that we require that tick is less than (1 << (64 - NSEC_SHIFT));
  221  * greater values will result in overflow and misbehavior (not that this
  222  * is a serious problem; (1 << (64 - NSEC_SHIFT)) nanoseconds is over
  223  * thirty-six years).  Nonetheless, clients may wish to be aware of this
  224  * limitation; NATIVE_TIME_MAX() returns this maximum native time.
  225  *
  226  * We provide two versions of this macro: a "full-service" version that
  227  * just converts ticks to nanoseconds and a higher-performance version that
  228  * expects the scaling factor nsec_scale as its second argument (so that
  229  * callers can distance the load of nsec_scale from its use).  Note that
  230  * we take a fast path if we determine the ticks to be less than 32 bits
  231  * (as it often is for the delta between %tick values for successive
  232  * firings of the hres_tick() cyclic).
  233  *
  234  * Note that in the 32-bit path we don't even bother clearing NPT.
  235  * We get away with this by making hardclk.c ensure than nsec_scale
  236  * is even, so we can take advantage of the associativity of modular
  237  * arithmetic: multiplying %tick by any even number, say 2*n, is
  238  * equivalent to multiplying %tick by 2, then by n.  Multiplication
  239  * by 2 is equivalent to shifting left by one, which clears NPT.
  240  *
  241  * Finally, note that the macros use the labels "6:" and "7:"; these
  242  * labels must not be used across an invocation of either macro.
  243  */
  244 #define NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, shift)               \
  245         srlx    out, 32, scr2;          /* check high 32 bits */        \
  246 /* CSTYLED */                                                           \
  247         brz,a,pt scr2, 6f;              /* if clear, 32-bit fast path */\
  248         mulx    out, scr1, out;         /* delay: 32-bit fast path */   \
  249         sllx    out, shift, out;        /* clear NPT and pre-scale */   \
  250         srlx    out, 32, scr2;          /* scr2 = hi32(tick<<4) = H */  \
  251         mulx    scr2, scr1, scr2;       /* scr2 = (H*F) */              \
  252         srl     out, 0, out;            /* out = lo32(tick<<4) = L */   \
  253         mulx    out, scr1, scr1;        /* scr1 = (L*F) */              \
  254         srlx    scr1, 32, scr1;         /* scr1 = (L*F) >> 32 */        \
  255         ba      7f;                     /* branch over 32-bit path */   \
  256         add     scr1, scr2, out;        /* out = (H*F) + ((L*F) >> 32) */\
  257 6:                                                                      \
  258         srlx    out, 32 - shift, out;                                   \
  259 7:
  260 
  261 #define NATIVE_TIME_TO_NSEC(out, scr1, scr2)                            \
  262         sethi   %hi(nsec_scale), scr1;  /* load scaling factor */       \
  263         ld      [scr1 + %lo(nsec_scale)], scr1;                         \
  264         NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, NSEC_SHIFT);
  265 
  266 #define NATIVE_TIME_MAX(out)                                            \
  267         mov     -1, out;                                                \
  268         srlx    out, NSEC_SHIFT, out
  269 
  270 /*
  271  * NSEC_SHIFT and VTRACE_SHIFT constants are defined in
  272  * <sys/machclock.h> file.
  273  */
  274 
  275 #ifdef  __cplusplus
  276 }
  277 #endif
  278 
  279 #endif  /* !_SYS_CLOCK_H */

Cache object: e514cdec2df1b4b83e4bc864ee1eab26


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.