The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kernel/hrtimer.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/kernel/hrtimer.c
    3  *
    4  *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
    5  *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
    6  *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
    7  *
    8  *  High-resolution kernel timers
    9  *
   10  *  In contrast to the low-resolution timeout API implemented in
   11  *  kernel/timer.c, hrtimers provide finer resolution and accuracy
   12  *  depending on system configuration and capabilities.
   13  *
   14  *  These timers are currently used for:
   15  *   - itimers
   16  *   - POSIX timers
   17  *   - nanosleep
   18  *   - precise in-kernel timing
   19  *
   20  *  Started by: Thomas Gleixner and Ingo Molnar
   21  *
   22  *  Credits:
   23  *      based on kernel/timer.c
   24  *
   25  *      Help, testing, suggestions, bugfixes, improvements were
   26  *      provided by:
   27  *
   28  *      George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
   29  *      et. al.
   30  *
   31  *  For licencing details see kernel-base/COPYING
   32  */
   33 
   34 #include <linux/cpu.h>
   35 #include <linux/export.h>
   36 #include <linux/percpu.h>
   37 #include <linux/hrtimer.h>
   38 #include <linux/notifier.h>
   39 #include <linux/syscalls.h>
   40 #include <linux/kallsyms.h>
   41 #include <linux/interrupt.h>
   42 #include <linux/tick.h>
   43 #include <linux/seq_file.h>
   44 #include <linux/err.h>
   45 #include <linux/debugobjects.h>
   46 #include <linux/sched.h>
   47 #include <linux/timer.h>
   48 
   49 #include <asm/uaccess.h>
   50 
   51 #include <trace/events/timer.h>
   52 
   53 /*
   54  * The timer bases:
   55  *
   56  * There are more clockids then hrtimer bases. Thus, we index
   57  * into the timer bases by the hrtimer_base_type enum. When trying
   58  * to reach a base using a clockid, hrtimer_clockid_to_base()
   59  * is used to convert from clockid to the proper hrtimer_base_type.
   60  */
   61 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
   62 {
   63 
   64         .clock_base =
   65         {
   66                 {
   67                         .index = HRTIMER_BASE_MONOTONIC,
   68                         .clockid = CLOCK_MONOTONIC,
   69                         .get_time = &ktime_get,
   70                         .resolution = KTIME_LOW_RES,
   71                 },
   72                 {
   73                         .index = HRTIMER_BASE_REALTIME,
   74                         .clockid = CLOCK_REALTIME,
   75                         .get_time = &ktime_get_real,
   76                         .resolution = KTIME_LOW_RES,
   77                 },
   78                 {
   79                         .index = HRTIMER_BASE_BOOTTIME,
   80                         .clockid = CLOCK_BOOTTIME,
   81                         .get_time = &ktime_get_boottime,
   82                         .resolution = KTIME_LOW_RES,
   83                 },
   84         }
   85 };
   86 
   87 static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
   88         [CLOCK_REALTIME]        = HRTIMER_BASE_REALTIME,
   89         [CLOCK_MONOTONIC]       = HRTIMER_BASE_MONOTONIC,
   90         [CLOCK_BOOTTIME]        = HRTIMER_BASE_BOOTTIME,
   91 };
   92 
   93 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
   94 {
   95         return hrtimer_clock_to_base_table[clock_id];
   96 }
   97 
   98 
   99 /*
  100  * Get the coarse grained time at the softirq based on xtime and
  101  * wall_to_monotonic.
  102  */
  103 static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
  104 {
  105         ktime_t xtim, mono, boot;
  106         struct timespec xts, tom, slp;
  107 
  108         get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
  109 
  110         xtim = timespec_to_ktime(xts);
  111         mono = ktime_add(xtim, timespec_to_ktime(tom));
  112         boot = ktime_add(mono, timespec_to_ktime(slp));
  113         base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
  114         base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
  115         base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
  116 }
  117 
  118 /*
  119  * Functions and macros which are different for UP/SMP systems are kept in a
  120  * single place
  121  */
  122 #ifdef CONFIG_SMP
  123 
  124 /*
  125  * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
  126  * means that all timers which are tied to this base via timer->base are
  127  * locked, and the base itself is locked too.
  128  *
  129  * So __run_timers/migrate_timers can safely modify all timers which could
  130  * be found on the lists/queues.
  131  *
  132  * When the timer's base is locked, and the timer removed from list, it is
  133  * possible to set timer->base = NULL and drop the lock: the timer remains
  134  * locked.
  135  */
  136 static
  137 struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
  138                                              unsigned long *flags)
  139 {
  140         struct hrtimer_clock_base *base;
  141 
  142         for (;;) {
  143                 base = timer->base;
  144                 if (likely(base != NULL)) {
  145                         raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
  146                         if (likely(base == timer->base))
  147                                 return base;
  148                         /* The timer has migrated to another CPU: */
  149                         raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
  150                 }
  151                 cpu_relax();
  152         }
  153 }
  154 
  155 
  156 /*
  157  * Get the preferred target CPU for NOHZ
  158  */
  159 static int hrtimer_get_target(int this_cpu, int pinned)
  160 {
  161 #ifdef CONFIG_NO_HZ
  162         if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu))
  163                 return get_nohz_timer_target();
  164 #endif
  165         return this_cpu;
  166 }
  167 
  168 /*
  169  * With HIGHRES=y we do not migrate the timer when it is expiring
  170  * before the next event on the target cpu because we cannot reprogram
  171  * the target cpu hardware and we would cause it to fire late.
  172  *
  173  * Called with cpu_base->lock of target cpu held.
  174  */
  175 static int
  176 hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
  177 {
  178 #ifdef CONFIG_HIGH_RES_TIMERS
  179         ktime_t expires;
  180 
  181         if (!new_base->cpu_base->hres_active)
  182                 return 0;
  183 
  184         expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
  185         return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
  186 #else
  187         return 0;
  188 #endif
  189 }
  190 
  191 /*
  192  * Switch the timer base to the current CPU when possible.
  193  */
  194 static inline struct hrtimer_clock_base *
  195 switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
  196                     int pinned)
  197 {
  198         struct hrtimer_clock_base *new_base;
  199         struct hrtimer_cpu_base *new_cpu_base;
  200         int this_cpu = smp_processor_id();
  201         int cpu = hrtimer_get_target(this_cpu, pinned);
  202         int basenum = base->index;
  203 
  204 again:
  205         new_cpu_base = &per_cpu(hrtimer_bases, cpu);
  206         new_base = &new_cpu_base->clock_base[basenum];
  207 
  208         if (base != new_base) {
  209                 /*
  210                  * We are trying to move timer to new_base.
  211                  * However we can't change timer's base while it is running,
  212                  * so we keep it on the same CPU. No hassle vs. reprogramming
  213                  * the event source in the high resolution case. The softirq
  214                  * code will take care of this when the timer function has
  215                  * completed. There is no conflict as we hold the lock until
  216                  * the timer is enqueued.
  217                  */
  218                 if (unlikely(hrtimer_callback_running(timer)))
  219                         return base;
  220 
  221                 /* See the comment in lock_timer_base() */
  222                 timer->base = NULL;
  223                 raw_spin_unlock(&base->cpu_base->lock);
  224                 raw_spin_lock(&new_base->cpu_base->lock);
  225 
  226                 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
  227                         cpu = this_cpu;
  228                         raw_spin_unlock(&new_base->cpu_base->lock);
  229                         raw_spin_lock(&base->cpu_base->lock);
  230                         timer->base = base;
  231                         goto again;
  232                 }
  233                 timer->base = new_base;
  234         }
  235         return new_base;
  236 }
  237 
  238 #else /* CONFIG_SMP */
  239 
  240 static inline struct hrtimer_clock_base *
  241 lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
  242 {
  243         struct hrtimer_clock_base *base = timer->base;
  244 
  245         raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
  246 
  247         return base;
  248 }
  249 
  250 # define switch_hrtimer_base(t, b, p)   (b)
  251 
  252 #endif  /* !CONFIG_SMP */
  253 
  254 /*
  255  * Functions for the union type storage format of ktime_t which are
  256  * too large for inlining:
  257  */
  258 #if BITS_PER_LONG < 64
  259 # ifndef CONFIG_KTIME_SCALAR
  260 /**
  261  * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
  262  * @kt:         addend
  263  * @nsec:       the scalar nsec value to add
  264  *
  265  * Returns the sum of kt and nsec in ktime_t format
  266  */
  267 ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
  268 {
  269         ktime_t tmp;
  270 
  271         if (likely(nsec < NSEC_PER_SEC)) {
  272                 tmp.tv64 = nsec;
  273         } else {
  274                 unsigned long rem = do_div(nsec, NSEC_PER_SEC);
  275 
  276                 tmp = ktime_set((long)nsec, rem);
  277         }
  278 
  279         return ktime_add(kt, tmp);
  280 }
  281 
  282 EXPORT_SYMBOL_GPL(ktime_add_ns);
  283 
  284 /**
  285  * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
  286  * @kt:         minuend
  287  * @nsec:       the scalar nsec value to subtract
  288  *
  289  * Returns the subtraction of @nsec from @kt in ktime_t format
  290  */
  291 ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
  292 {
  293         ktime_t tmp;
  294 
  295         if (likely(nsec < NSEC_PER_SEC)) {
  296                 tmp.tv64 = nsec;
  297         } else {
  298                 unsigned long rem = do_div(nsec, NSEC_PER_SEC);
  299 
  300                 tmp = ktime_set((long)nsec, rem);
  301         }
  302 
  303         return ktime_sub(kt, tmp);
  304 }
  305 
  306 EXPORT_SYMBOL_GPL(ktime_sub_ns);
  307 # endif /* !CONFIG_KTIME_SCALAR */
  308 
  309 /*
  310  * Divide a ktime value by a nanosecond value
  311  */
  312 u64 ktime_divns(const ktime_t kt, s64 div)
  313 {
  314         u64 dclc;
  315         int sft = 0;
  316 
  317         dclc = ktime_to_ns(kt);
  318         /* Make sure the divisor is less than 2^32: */
  319         while (div >> 32) {
  320                 sft++;
  321                 div >>= 1;
  322         }
  323         dclc >>= sft;
  324         do_div(dclc, (unsigned long) div);
  325 
  326         return dclc;
  327 }
  328 #endif /* BITS_PER_LONG >= 64 */
  329 
  330 /*
  331  * Add two ktime values and do a safety check for overflow:
  332  */
  333 ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
  334 {
  335         ktime_t res = ktime_add(lhs, rhs);
  336 
  337         /*
  338          * We use KTIME_SEC_MAX here, the maximum timeout which we can
  339          * return to user space in a timespec:
  340          */
  341         if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
  342                 res = ktime_set(KTIME_SEC_MAX, 0);
  343 
  344         return res;
  345 }
  346 
  347 EXPORT_SYMBOL_GPL(ktime_add_safe);
  348 
  349 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
  350 
  351 static struct debug_obj_descr hrtimer_debug_descr;
  352 
  353 static void *hrtimer_debug_hint(void *addr)
  354 {
  355         return ((struct hrtimer *) addr)->function;
  356 }
  357 
  358 /*
  359  * fixup_init is called when:
  360  * - an active object is initialized
  361  */
  362 static int hrtimer_fixup_init(void *addr, enum debug_obj_state state)
  363 {
  364         struct hrtimer *timer = addr;
  365 
  366         switch (state) {
  367         case ODEBUG_STATE_ACTIVE:
  368                 hrtimer_cancel(timer);
  369                 debug_object_init(timer, &hrtimer_debug_descr);
  370                 return 1;
  371         default:
  372                 return 0;
  373         }
  374 }
  375 
  376 /*
  377  * fixup_activate is called when:
  378  * - an active object is activated
  379  * - an unknown object is activated (might be a statically initialized object)
  380  */
  381 static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
  382 {
  383         switch (state) {
  384 
  385         case ODEBUG_STATE_NOTAVAILABLE:
  386                 WARN_ON_ONCE(1);
  387                 return 0;
  388 
  389         case ODEBUG_STATE_ACTIVE:
  390                 WARN_ON(1);
  391 
  392         default:
  393                 return 0;
  394         }
  395 }
  396 
  397 /*
  398  * fixup_free is called when:
  399  * - an active object is freed
  400  */
  401 static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
  402 {
  403         struct hrtimer *timer = addr;
  404 
  405         switch (state) {
  406         case ODEBUG_STATE_ACTIVE:
  407                 hrtimer_cancel(timer);
  408                 debug_object_free(timer, &hrtimer_debug_descr);
  409                 return 1;
  410         default:
  411                 return 0;
  412         }
  413 }
  414 
  415 static struct debug_obj_descr hrtimer_debug_descr = {
  416         .name           = "hrtimer",
  417         .debug_hint     = hrtimer_debug_hint,
  418         .fixup_init     = hrtimer_fixup_init,
  419         .fixup_activate = hrtimer_fixup_activate,
  420         .fixup_free     = hrtimer_fixup_free,
  421 };
  422 
  423 static inline void debug_hrtimer_init(struct hrtimer *timer)
  424 {
  425         debug_object_init(timer, &hrtimer_debug_descr);
  426 }
  427 
  428 static inline void debug_hrtimer_activate(struct hrtimer *timer)
  429 {
  430         debug_object_activate(timer, &hrtimer_debug_descr);
  431 }
  432 
  433 static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
  434 {
  435         debug_object_deactivate(timer, &hrtimer_debug_descr);
  436 }
  437 
  438 static inline void debug_hrtimer_free(struct hrtimer *timer)
  439 {
  440         debug_object_free(timer, &hrtimer_debug_descr);
  441 }
  442 
  443 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
  444                            enum hrtimer_mode mode);
  445 
  446 void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
  447                            enum hrtimer_mode mode)
  448 {
  449         debug_object_init_on_stack(timer, &hrtimer_debug_descr);
  450         __hrtimer_init(timer, clock_id, mode);
  451 }
  452 EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
  453 
  454 void destroy_hrtimer_on_stack(struct hrtimer *timer)
  455 {
  456         debug_object_free(timer, &hrtimer_debug_descr);
  457 }
  458 
  459 #else
  460 static inline void debug_hrtimer_init(struct hrtimer *timer) { }
  461 static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
  462 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
  463 #endif
  464 
  465 static inline void
  466 debug_init(struct hrtimer *timer, clockid_t clockid,
  467            enum hrtimer_mode mode)
  468 {
  469         debug_hrtimer_init(timer);
  470         trace_hrtimer_init(timer, clockid, mode);
  471 }
  472 
  473 static inline void debug_activate(struct hrtimer *timer)
  474 {
  475         debug_hrtimer_activate(timer);
  476         trace_hrtimer_start(timer);
  477 }
  478 
  479 static inline void debug_deactivate(struct hrtimer *timer)
  480 {
  481         debug_hrtimer_deactivate(timer);
  482         trace_hrtimer_cancel(timer);
  483 }
  484 
  485 /* High resolution timer related functions */
  486 #ifdef CONFIG_HIGH_RES_TIMERS
  487 
  488 /*
  489  * High resolution timer enabled ?
  490  */
  491 static int hrtimer_hres_enabled __read_mostly  = 1;
  492 
  493 /*
  494  * Enable / Disable high resolution mode
  495  */
  496 static int __init setup_hrtimer_hres(char *str)
  497 {
  498         if (!strcmp(str, "off"))
  499                 hrtimer_hres_enabled = 0;
  500         else if (!strcmp(str, "on"))
  501                 hrtimer_hres_enabled = 1;
  502         else
  503                 return 0;
  504         return 1;
  505 }
  506 
  507 __setup("highres=", setup_hrtimer_hres);
  508 
  509 /*
  510  * hrtimer_high_res_enabled - query, if the highres mode is enabled
  511  */
  512 static inline int hrtimer_is_hres_enabled(void)
  513 {
  514         return hrtimer_hres_enabled;
  515 }
  516 
  517 /*
  518  * Is the high resolution mode active ?
  519  */
  520 static inline int hrtimer_hres_active(void)
  521 {
  522         return __this_cpu_read(hrtimer_bases.hres_active);
  523 }
  524 
  525 /*
  526  * Reprogram the event source with checking both queues for the
  527  * next event
  528  * Called with interrupts disabled and base->lock held
  529  */
  530 static void
  531 hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
  532 {
  533         int i;
  534         struct hrtimer_clock_base *base = cpu_base->clock_base;
  535         ktime_t expires, expires_next;
  536 
  537         expires_next.tv64 = KTIME_MAX;
  538 
  539         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
  540                 struct hrtimer *timer;
  541                 struct timerqueue_node *next;
  542 
  543                 next = timerqueue_getnext(&base->active);
  544                 if (!next)
  545                         continue;
  546                 timer = container_of(next, struct hrtimer, node);
  547 
  548                 expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
  549                 /*
  550                  * clock_was_set() has changed base->offset so the
  551                  * result might be negative. Fix it up to prevent a
  552                  * false positive in clockevents_program_event()
  553                  */
  554                 if (expires.tv64 < 0)
  555                         expires.tv64 = 0;
  556                 if (expires.tv64 < expires_next.tv64)
  557                         expires_next = expires;
  558         }
  559 
  560         if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
  561                 return;
  562 
  563         cpu_base->expires_next.tv64 = expires_next.tv64;
  564 
  565         if (cpu_base->expires_next.tv64 != KTIME_MAX)
  566                 tick_program_event(cpu_base->expires_next, 1);
  567 }
  568 
  569 /*
  570  * Shared reprogramming for clock_realtime and clock_monotonic
  571  *
  572  * When a timer is enqueued and expires earlier than the already enqueued
  573  * timers, we have to check, whether it expires earlier than the timer for
  574  * which the clock event device was armed.
  575  *
  576  * Called with interrupts disabled and base->cpu_base.lock held
  577  */
  578 static int hrtimer_reprogram(struct hrtimer *timer,
  579                              struct hrtimer_clock_base *base)
  580 {
  581         struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
  582         ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
  583         int res;
  584 
  585         WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
  586 
  587         /*
  588          * When the callback is running, we do not reprogram the clock event
  589          * device. The timer callback is either running on a different CPU or
  590          * the callback is executed in the hrtimer_interrupt context. The
  591          * reprogramming is handled either by the softirq, which called the
  592          * callback or at the end of the hrtimer_interrupt.
  593          */
  594         if (hrtimer_callback_running(timer))
  595                 return 0;
  596 
  597         /*
  598          * CLOCK_REALTIME timer might be requested with an absolute
  599          * expiry time which is less than base->offset. Nothing wrong
  600          * about that, just avoid to call into the tick code, which
  601          * has now objections against negative expiry values.
  602          */
  603         if (expires.tv64 < 0)
  604                 return -ETIME;
  605 
  606         if (expires.tv64 >= cpu_base->expires_next.tv64)
  607                 return 0;
  608 
  609         /*
  610          * If a hang was detected in the last timer interrupt then we
  611          * do not schedule a timer which is earlier than the expiry
  612          * which we enforced in the hang detection. We want the system
  613          * to make progress.
  614          */
  615         if (cpu_base->hang_detected)
  616                 return 0;
  617 
  618         /*
  619          * Clockevents returns -ETIME, when the event was in the past.
  620          */
  621         res = tick_program_event(expires, 0);
  622         if (!IS_ERR_VALUE(res))
  623                 cpu_base->expires_next = expires;
  624         return res;
  625 }
  626 
  627 /*
  628  * Initialize the high resolution related parts of cpu_base
  629  */
  630 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
  631 {
  632         base->expires_next.tv64 = KTIME_MAX;
  633         base->hres_active = 0;
  634 }
  635 
  636 /*
  637  * When High resolution timers are active, try to reprogram. Note, that in case
  638  * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
  639  * check happens. The timer gets enqueued into the rbtree. The reprogramming
  640  * and expiry check is done in the hrtimer_interrupt or in the softirq.
  641  */
  642 static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
  643                                             struct hrtimer_clock_base *base,
  644                                             int wakeup)
  645 {
  646         if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
  647                 if (wakeup) {
  648                         raw_spin_unlock(&base->cpu_base->lock);
  649                         raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  650                         raw_spin_lock(&base->cpu_base->lock);
  651                 } else
  652                         __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  653 
  654                 return 1;
  655         }
  656 
  657         return 0;
  658 }
  659 
  660 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
  661 {
  662         ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
  663         ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
  664 
  665         return ktime_get_update_offsets(offs_real, offs_boot);
  666 }
  667 
  668 /*
  669  * Retrigger next event is called after clock was set
  670  *
  671  * Called with interrupts disabled via on_each_cpu()
  672  */
  673 static void retrigger_next_event(void *arg)
  674 {
  675         struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
  676 
  677         if (!hrtimer_hres_active())
  678                 return;
  679 
  680         raw_spin_lock(&base->lock);
  681         hrtimer_update_base(base);
  682         hrtimer_force_reprogram(base, 0);
  683         raw_spin_unlock(&base->lock);
  684 }
  685 
  686 /*
  687  * Switch to high resolution mode
  688  */
  689 static int hrtimer_switch_to_hres(void)
  690 {
  691         int i, cpu = smp_processor_id();
  692         struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
  693         unsigned long flags;
  694 
  695         if (base->hres_active)
  696                 return 1;
  697 
  698         local_irq_save(flags);
  699 
  700         if (tick_init_highres()) {
  701                 local_irq_restore(flags);
  702                 printk(KERN_WARNING "Could not switch to high resolution "
  703                                     "mode on CPU %d\n", cpu);
  704                 return 0;
  705         }
  706         base->hres_active = 1;
  707         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
  708                 base->clock_base[i].resolution = KTIME_HIGH_RES;
  709 
  710         tick_setup_sched_timer();
  711         /* "Retrigger" the interrupt to get things going */
  712         retrigger_next_event(NULL);
  713         local_irq_restore(flags);
  714         return 1;
  715 }
  716 
  717 /*
  718  * Called from timekeeping code to reprogramm the hrtimer interrupt
  719  * device. If called from the timer interrupt context we defer it to
  720  * softirq context.
  721  */
  722 void clock_was_set_delayed(void)
  723 {
  724         struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
  725 
  726         cpu_base->clock_was_set = 1;
  727         __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  728 }
  729 
  730 #else
  731 
  732 static inline int hrtimer_hres_active(void) { return 0; }
  733 static inline int hrtimer_is_hres_enabled(void) { return 0; }
  734 static inline int hrtimer_switch_to_hres(void) { return 0; }
  735 static inline void
  736 hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
  737 static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
  738                                             struct hrtimer_clock_base *base,
  739                                             int wakeup)
  740 {
  741         return 0;
  742 }
  743 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
  744 static inline void retrigger_next_event(void *arg) { }
  745 
  746 #endif /* CONFIG_HIGH_RES_TIMERS */
  747 
  748 /*
  749  * Clock realtime was set
  750  *
  751  * Change the offset of the realtime clock vs. the monotonic
  752  * clock.
  753  *
  754  * We might have to reprogram the high resolution timer interrupt. On
  755  * SMP we call the architecture specific code to retrigger _all_ high
  756  * resolution timer interrupts. On UP we just disable interrupts and
  757  * call the high resolution interrupt code.
  758  */
  759 void clock_was_set(void)
  760 {
  761 #ifdef CONFIG_HIGH_RES_TIMERS
  762         /* Retrigger the CPU local events everywhere */
  763         on_each_cpu(retrigger_next_event, NULL, 1);
  764 #endif
  765         timerfd_clock_was_set();
  766 }
  767 
  768 /*
  769  * During resume we might have to reprogram the high resolution timer
  770  * interrupt (on the local CPU):
  771  */
  772 void hrtimers_resume(void)
  773 {
  774         WARN_ONCE(!irqs_disabled(),
  775                   KERN_INFO "hrtimers_resume() called with IRQs enabled!");
  776 
  777         retrigger_next_event(NULL);
  778         timerfd_clock_was_set();
  779 }
  780 
  781 static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
  782 {
  783 #ifdef CONFIG_TIMER_STATS
  784         if (timer->start_site)
  785                 return;
  786         timer->start_site = __builtin_return_address(0);
  787         memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
  788         timer->start_pid = current->pid;
  789 #endif
  790 }
  791 
  792 static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
  793 {
  794 #ifdef CONFIG_TIMER_STATS
  795         timer->start_site = NULL;
  796 #endif
  797 }
  798 
  799 static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
  800 {
  801 #ifdef CONFIG_TIMER_STATS
  802         if (likely(!timer_stats_active))
  803                 return;
  804         timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
  805                                  timer->function, timer->start_comm, 0);
  806 #endif
  807 }
  808 
  809 /*
  810  * Counterpart to lock_hrtimer_base above:
  811  */
  812 static inline
  813 void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
  814 {
  815         raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
  816 }
  817 
  818 /**
  819  * hrtimer_forward - forward the timer expiry
  820  * @timer:      hrtimer to forward
  821  * @now:        forward past this time
  822  * @interval:   the interval to forward
  823  *
  824  * Forward the timer expiry so it will expire in the future.
  825  * Returns the number of overruns.
  826  */
  827 u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
  828 {
  829         u64 orun = 1;
  830         ktime_t delta;
  831 
  832         delta = ktime_sub(now, hrtimer_get_expires(timer));
  833 
  834         if (delta.tv64 < 0)
  835                 return 0;
  836 
  837         if (interval.tv64 < timer->base->resolution.tv64)
  838                 interval.tv64 = timer->base->resolution.tv64;
  839 
  840         if (unlikely(delta.tv64 >= interval.tv64)) {
  841                 s64 incr = ktime_to_ns(interval);
  842 
  843                 orun = ktime_divns(delta, incr);
  844                 hrtimer_add_expires_ns(timer, incr * orun);
  845                 if (hrtimer_get_expires_tv64(timer) > now.tv64)
  846                         return orun;
  847                 /*
  848                  * This (and the ktime_add() below) is the
  849                  * correction for exact:
  850                  */
  851                 orun++;
  852         }
  853         hrtimer_add_expires(timer, interval);
  854 
  855         return orun;
  856 }
  857 EXPORT_SYMBOL_GPL(hrtimer_forward);
  858 
  859 /*
  860  * enqueue_hrtimer - internal function to (re)start a timer
  861  *
  862  * The timer is inserted in expiry order. Insertion into the
  863  * red black tree is O(log(n)). Must hold the base lock.
  864  *
  865  * Returns 1 when the new timer is the leftmost timer in the tree.
  866  */
  867 static int enqueue_hrtimer(struct hrtimer *timer,
  868                            struct hrtimer_clock_base *base)
  869 {
  870         debug_activate(timer);
  871 
  872         timerqueue_add(&base->active, &timer->node);
  873         base->cpu_base->active_bases |= 1 << base->index;
  874 
  875         /*
  876          * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
  877          * state of a possibly running callback.
  878          */
  879         timer->state |= HRTIMER_STATE_ENQUEUED;
  880 
  881         return (&timer->node == base->active.next);
  882 }
  883 
  884 /*
  885  * __remove_hrtimer - internal function to remove a timer
  886  *
  887  * Caller must hold the base lock.
  888  *
  889  * High resolution timer mode reprograms the clock event device when the
  890  * timer is the one which expires next. The caller can disable this by setting
  891  * reprogram to zero. This is useful, when the context does a reprogramming
  892  * anyway (e.g. timer interrupt)
  893  */
  894 static void __remove_hrtimer(struct hrtimer *timer,
  895                              struct hrtimer_clock_base *base,
  896                              unsigned long newstate, int reprogram)
  897 {
  898         struct timerqueue_node *next_timer;
  899         if (!(timer->state & HRTIMER_STATE_ENQUEUED))
  900                 goto out;
  901 
  902         next_timer = timerqueue_getnext(&base->active);
  903         timerqueue_del(&base->active, &timer->node);
  904         if (&timer->node == next_timer) {
  905 #ifdef CONFIG_HIGH_RES_TIMERS
  906                 /* Reprogram the clock event device. if enabled */
  907                 if (reprogram && hrtimer_hres_active()) {
  908                         ktime_t expires;
  909 
  910                         expires = ktime_sub(hrtimer_get_expires(timer),
  911                                             base->offset);
  912                         if (base->cpu_base->expires_next.tv64 == expires.tv64)
  913                                 hrtimer_force_reprogram(base->cpu_base, 1);
  914                 }
  915 #endif
  916         }
  917         if (!timerqueue_getnext(&base->active))
  918                 base->cpu_base->active_bases &= ~(1 << base->index);
  919 out:
  920         timer->state = newstate;
  921 }
  922 
  923 /*
  924  * remove hrtimer, called with base lock held
  925  */
  926 static inline int
  927 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
  928 {
  929         if (hrtimer_is_queued(timer)) {
  930                 unsigned long state;
  931                 int reprogram;
  932 
  933                 /*
  934                  * Remove the timer and force reprogramming when high
  935                  * resolution mode is active and the timer is on the current
  936                  * CPU. If we remove a timer on another CPU, reprogramming is
  937                  * skipped. The interrupt event on this CPU is fired and
  938                  * reprogramming happens in the interrupt handler. This is a
  939                  * rare case and less expensive than a smp call.
  940                  */
  941                 debug_deactivate(timer);
  942                 timer_stats_hrtimer_clear_start_info(timer);
  943                 reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
  944                 /*
  945                  * We must preserve the CALLBACK state flag here,
  946                  * otherwise we could move the timer base in
  947                  * switch_hrtimer_base.
  948                  */
  949                 state = timer->state & HRTIMER_STATE_CALLBACK;
  950                 __remove_hrtimer(timer, base, state, reprogram);
  951                 return 1;
  952         }
  953         return 0;
  954 }
  955 
  956 int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
  957                 unsigned long delta_ns, const enum hrtimer_mode mode,
  958                 int wakeup)
  959 {
  960         struct hrtimer_clock_base *base, *new_base;
  961         unsigned long flags;
  962         int ret, leftmost;
  963 
  964         base = lock_hrtimer_base(timer, &flags);
  965 
  966         /* Remove an active timer from the queue: */
  967         ret = remove_hrtimer(timer, base);
  968 
  969         /* Switch the timer base, if necessary: */
  970         new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
  971 
  972         if (mode & HRTIMER_MODE_REL) {
  973                 tim = ktime_add_safe(tim, new_base->get_time());
  974                 /*
  975                  * CONFIG_TIME_LOW_RES is a temporary way for architectures
  976                  * to signal that they simply return xtime in
  977                  * do_gettimeoffset(). In this case we want to round up by
  978                  * resolution when starting a relative timer, to avoid short
  979                  * timeouts. This will go away with the GTOD framework.
  980                  */
  981 #ifdef CONFIG_TIME_LOW_RES
  982                 tim = ktime_add_safe(tim, base->resolution);
  983 #endif
  984         }
  985 
  986         hrtimer_set_expires_range_ns(timer, tim, delta_ns);
  987 
  988         timer_stats_hrtimer_set_start_info(timer);
  989 
  990         leftmost = enqueue_hrtimer(timer, new_base);
  991 
  992         /*
  993          * Only allow reprogramming if the new base is on this CPU.
  994          * (it might still be on another CPU if the timer was pending)
  995          *
  996          * XXX send_remote_softirq() ?
  997          */
  998         if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases))
  999                 hrtimer_enqueue_reprogram(timer, new_base, wakeup);
 1000 
 1001         unlock_hrtimer_base(timer, &flags);
 1002 
 1003         return ret;
 1004 }
 1005 
 1006 /**
 1007  * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
 1008  * @timer:      the timer to be added
 1009  * @tim:        expiry time
 1010  * @delta_ns:   "slack" range for the timer
 1011  * @mode:       expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
 1012  *
 1013  * Returns:
 1014  *  0 on success
 1015  *  1 when the timer was active
 1016  */
 1017 int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 1018                 unsigned long delta_ns, const enum hrtimer_mode mode)
 1019 {
 1020         return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1);
 1021 }
 1022 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
 1023 
 1024 /**
 1025  * hrtimer_start - (re)start an hrtimer on the current CPU
 1026  * @timer:      the timer to be added
 1027  * @tim:        expiry time
 1028  * @mode:       expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
 1029  *
 1030  * Returns:
 1031  *  0 on success
 1032  *  1 when the timer was active
 1033  */
 1034 int
 1035 hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 1036 {
 1037         return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
 1038 }
 1039 EXPORT_SYMBOL_GPL(hrtimer_start);
 1040 
 1041 
 1042 /**
 1043  * hrtimer_try_to_cancel - try to deactivate a timer
 1044  * @timer:      hrtimer to stop
 1045  *
 1046  * Returns:
 1047  *  0 when the timer was not active
 1048  *  1 when the timer was active
 1049  * -1 when the timer is currently excuting the callback function and
 1050  *    cannot be stopped
 1051  */
 1052 int hrtimer_try_to_cancel(struct hrtimer *timer)
 1053 {
 1054         struct hrtimer_clock_base *base;
 1055         unsigned long flags;
 1056         int ret = -1;
 1057 
 1058         base = lock_hrtimer_base(timer, &flags);
 1059 
 1060         if (!hrtimer_callback_running(timer))
 1061                 ret = remove_hrtimer(timer, base);
 1062 
 1063         unlock_hrtimer_base(timer, &flags);
 1064 
 1065         return ret;
 1066 
 1067 }
 1068 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 1069 
 1070 /**
 1071  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
 1072  * @timer:      the timer to be cancelled
 1073  *
 1074  * Returns:
 1075  *  0 when the timer was not active
 1076  *  1 when the timer was active
 1077  */
 1078 int hrtimer_cancel(struct hrtimer *timer)
 1079 {
 1080         for (;;) {
 1081                 int ret = hrtimer_try_to_cancel(timer);
 1082 
 1083                 if (ret >= 0)
 1084                         return ret;
 1085                 cpu_relax();
 1086         }
 1087 }
 1088 EXPORT_SYMBOL_GPL(hrtimer_cancel);
 1089 
 1090 /**
 1091  * hrtimer_get_remaining - get remaining time for the timer
 1092  * @timer:      the timer to read
 1093  */
 1094 ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
 1095 {
 1096         unsigned long flags;
 1097         ktime_t rem;
 1098 
 1099         lock_hrtimer_base(timer, &flags);
 1100         rem = hrtimer_expires_remaining(timer);
 1101         unlock_hrtimer_base(timer, &flags);
 1102 
 1103         return rem;
 1104 }
 1105 EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
 1106 
 1107 #ifdef CONFIG_NO_HZ
 1108 /**
 1109  * hrtimer_get_next_event - get the time until next expiry event
 1110  *
 1111  * Returns the delta to the next expiry event or KTIME_MAX if no timer
 1112  * is pending.
 1113  */
 1114 ktime_t hrtimer_get_next_event(void)
 1115 {
 1116         struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
 1117         struct hrtimer_clock_base *base = cpu_base->clock_base;
 1118         ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
 1119         unsigned long flags;
 1120         int i;
 1121 
 1122         raw_spin_lock_irqsave(&cpu_base->lock, flags);
 1123 
 1124         if (!hrtimer_hres_active()) {
 1125                 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
 1126                         struct hrtimer *timer;
 1127                         struct timerqueue_node *next;
 1128 
 1129                         next = timerqueue_getnext(&base->active);
 1130                         if (!next)
 1131                                 continue;
 1132 
 1133                         timer = container_of(next, struct hrtimer, node);
 1134                         delta.tv64 = hrtimer_get_expires_tv64(timer);
 1135                         delta = ktime_sub(delta, base->get_time());
 1136                         if (delta.tv64 < mindelta.tv64)
 1137                                 mindelta.tv64 = delta.tv64;
 1138                 }
 1139         }
 1140 
 1141         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 1142 
 1143         if (mindelta.tv64 < 0)
 1144                 mindelta.tv64 = 0;
 1145         return mindelta;
 1146 }
 1147 #endif
 1148 
 1149 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 1150                            enum hrtimer_mode mode)
 1151 {
 1152         struct hrtimer_cpu_base *cpu_base;
 1153         int base;
 1154 
 1155         memset(timer, 0, sizeof(struct hrtimer));
 1156 
 1157         cpu_base = &__raw_get_cpu_var(hrtimer_bases);
 1158 
 1159         if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
 1160                 clock_id = CLOCK_MONOTONIC;
 1161 
 1162         base = hrtimer_clockid_to_base(clock_id);
 1163         timer->base = &cpu_base->clock_base[base];
 1164         timerqueue_init(&timer->node);
 1165 
 1166 #ifdef CONFIG_TIMER_STATS
 1167         timer->start_site = NULL;
 1168         timer->start_pid = -1;
 1169         memset(timer->start_comm, 0, TASK_COMM_LEN);
 1170 #endif
 1171 }
 1172 
 1173 /**
 1174  * hrtimer_init - initialize a timer to the given clock
 1175  * @timer:      the timer to be initialized
 1176  * @clock_id:   the clock to be used
 1177  * @mode:       timer mode abs/rel
 1178  */
 1179 void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 1180                   enum hrtimer_mode mode)
 1181 {
 1182         debug_init(timer, clock_id, mode);
 1183         __hrtimer_init(timer, clock_id, mode);
 1184 }
 1185 EXPORT_SYMBOL_GPL(hrtimer_init);
 1186 
 1187 /**
 1188  * hrtimer_get_res - get the timer resolution for a clock
 1189  * @which_clock: which clock to query
 1190  * @tp:          pointer to timespec variable to store the resolution
 1191  *
 1192  * Store the resolution of the clock selected by @which_clock in the
 1193  * variable pointed to by @tp.
 1194  */
 1195 int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
 1196 {
 1197         struct hrtimer_cpu_base *cpu_base;
 1198         int base = hrtimer_clockid_to_base(which_clock);
 1199 
 1200         cpu_base = &__raw_get_cpu_var(hrtimer_bases);
 1201         *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
 1202 
 1203         return 0;
 1204 }
 1205 EXPORT_SYMBOL_GPL(hrtimer_get_res);
 1206 
 1207 static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 1208 {
 1209         struct hrtimer_clock_base *base = timer->base;
 1210         struct hrtimer_cpu_base *cpu_base = base->cpu_base;
 1211         enum hrtimer_restart (*fn)(struct hrtimer *);
 1212         int restart;
 1213 
 1214         WARN_ON(!irqs_disabled());
 1215 
 1216         debug_deactivate(timer);
 1217         __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
 1218         timer_stats_account_hrtimer(timer);
 1219         fn = timer->function;
 1220 
 1221         /*
 1222          * Because we run timers from hardirq context, there is no chance
 1223          * they get migrated to another cpu, therefore its safe to unlock
 1224          * the timer base.
 1225          */
 1226         raw_spin_unlock(&cpu_base->lock);
 1227         trace_hrtimer_expire_entry(timer, now);
 1228         restart = fn(timer);
 1229         trace_hrtimer_expire_exit(timer);
 1230         raw_spin_lock(&cpu_base->lock);
 1231 
 1232         /*
 1233          * Note: We clear the CALLBACK bit after enqueue_hrtimer and
 1234          * we do not reprogramm the event hardware. Happens either in
 1235          * hrtimer_start_range_ns() or in hrtimer_interrupt()
 1236          */
 1237         if (restart != HRTIMER_NORESTART) {
 1238                 BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
 1239                 enqueue_hrtimer(timer, base);
 1240         }
 1241 
 1242         WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
 1243 
 1244         timer->state &= ~HRTIMER_STATE_CALLBACK;
 1245 }
 1246 
 1247 #ifdef CONFIG_HIGH_RES_TIMERS
 1248 
 1249 /*
 1250  * High resolution timer interrupt
 1251  * Called with interrupts disabled
 1252  */
 1253 void hrtimer_interrupt(struct clock_event_device *dev)
 1254 {
 1255         struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
 1256         ktime_t expires_next, now, entry_time, delta;
 1257         int i, retries = 0;
 1258 
 1259         BUG_ON(!cpu_base->hres_active);
 1260         cpu_base->nr_events++;
 1261         dev->next_event.tv64 = KTIME_MAX;
 1262 
 1263         raw_spin_lock(&cpu_base->lock);
 1264         entry_time = now = hrtimer_update_base(cpu_base);
 1265 retry:
 1266         expires_next.tv64 = KTIME_MAX;
 1267         /*
 1268          * We set expires_next to KTIME_MAX here with cpu_base->lock
 1269          * held to prevent that a timer is enqueued in our queue via
 1270          * the migration code. This does not affect enqueueing of
 1271          * timers which run their callback and need to be requeued on
 1272          * this CPU.
 1273          */
 1274         cpu_base->expires_next.tv64 = KTIME_MAX;
 1275 
 1276         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 1277                 struct hrtimer_clock_base *base;
 1278                 struct timerqueue_node *node;
 1279                 ktime_t basenow;
 1280 
 1281                 if (!(cpu_base->active_bases & (1 << i)))
 1282                         continue;
 1283 
 1284                 base = cpu_base->clock_base + i;
 1285                 basenow = ktime_add(now, base->offset);
 1286 
 1287                 while ((node = timerqueue_getnext(&base->active))) {
 1288                         struct hrtimer *timer;
 1289 
 1290                         timer = container_of(node, struct hrtimer, node);
 1291 
 1292                         /*
 1293                          * The immediate goal for using the softexpires is
 1294                          * minimizing wakeups, not running timers at the
 1295                          * earliest interrupt after their soft expiration.
 1296                          * This allows us to avoid using a Priority Search
 1297                          * Tree, which can answer a stabbing querry for
 1298                          * overlapping intervals and instead use the simple
 1299                          * BST we already have.
 1300                          * We don't add extra wakeups by delaying timers that
 1301                          * are right-of a not yet expired timer, because that
 1302                          * timer will have to trigger a wakeup anyway.
 1303                          */
 1304 
 1305                         if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
 1306                                 ktime_t expires;
 1307 
 1308                                 expires = ktime_sub(hrtimer_get_expires(timer),
 1309                                                     base->offset);
 1310                                 if (expires.tv64 < expires_next.tv64)
 1311                                         expires_next = expires;
 1312                                 break;
 1313                         }
 1314 
 1315                         __run_hrtimer(timer, &basenow);
 1316                 }
 1317         }
 1318 
 1319         /*
 1320          * Store the new expiry value so the migration code can verify
 1321          * against it.
 1322          */
 1323         cpu_base->expires_next = expires_next;
 1324         raw_spin_unlock(&cpu_base->lock);
 1325 
 1326         /* Reprogramming necessary ? */
 1327         if (expires_next.tv64 == KTIME_MAX ||
 1328             !tick_program_event(expires_next, 0)) {
 1329                 cpu_base->hang_detected = 0;
 1330                 return;
 1331         }
 1332 
 1333         /*
 1334          * The next timer was already expired due to:
 1335          * - tracing
 1336          * - long lasting callbacks
 1337          * - being scheduled away when running in a VM
 1338          *
 1339          * We need to prevent that we loop forever in the hrtimer
 1340          * interrupt routine. We give it 3 attempts to avoid
 1341          * overreacting on some spurious event.
 1342          *
 1343          * Acquire base lock for updating the offsets and retrieving
 1344          * the current time.
 1345          */
 1346         raw_spin_lock(&cpu_base->lock);
 1347         now = hrtimer_update_base(cpu_base);
 1348         cpu_base->nr_retries++;
 1349         if (++retries < 3)
 1350                 goto retry;
 1351         /*
 1352          * Give the system a chance to do something else than looping
 1353          * here. We stored the entry time, so we know exactly how long
 1354          * we spent here. We schedule the next event this amount of
 1355          * time away.
 1356          */
 1357         cpu_base->nr_hangs++;
 1358         cpu_base->hang_detected = 1;
 1359         raw_spin_unlock(&cpu_base->lock);
 1360         delta = ktime_sub(now, entry_time);
 1361         if (delta.tv64 > cpu_base->max_hang_time.tv64)
 1362                 cpu_base->max_hang_time = delta;
 1363         /*
 1364          * Limit it to a sensible value as we enforce a longer
 1365          * delay. Give the CPU at least 100ms to catch up.
 1366          */
 1367         if (delta.tv64 > 100 * NSEC_PER_MSEC)
 1368                 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
 1369         else
 1370                 expires_next = ktime_add(now, delta);
 1371         tick_program_event(expires_next, 1);
 1372         printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
 1373                     ktime_to_ns(delta));
 1374 }
 1375 
 1376 /*
 1377  * local version of hrtimer_peek_ahead_timers() called with interrupts
 1378  * disabled.
 1379  */
 1380 static void __hrtimer_peek_ahead_timers(void)
 1381 {
 1382         struct tick_device *td;
 1383 
 1384         if (!hrtimer_hres_active())
 1385                 return;
 1386 
 1387         td = &__get_cpu_var(tick_cpu_device);
 1388         if (td && td->evtdev)
 1389                 hrtimer_interrupt(td->evtdev);
 1390 }
 1391 
 1392 /**
 1393  * hrtimer_peek_ahead_timers -- run soft-expired timers now
 1394  *
 1395  * hrtimer_peek_ahead_timers will peek at the timer queue of
 1396  * the current cpu and check if there are any timers for which
 1397  * the soft expires time has passed. If any such timers exist,
 1398  * they are run immediately and then removed from the timer queue.
 1399  *
 1400  */
 1401 void hrtimer_peek_ahead_timers(void)
 1402 {
 1403         unsigned long flags;
 1404 
 1405         local_irq_save(flags);
 1406         __hrtimer_peek_ahead_timers();
 1407         local_irq_restore(flags);
 1408 }
 1409 
 1410 static void run_hrtimer_softirq(struct softirq_action *h)
 1411 {
 1412         struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
 1413 
 1414         if (cpu_base->clock_was_set) {
 1415                 cpu_base->clock_was_set = 0;
 1416                 clock_was_set();
 1417         }
 1418 
 1419         hrtimer_peek_ahead_timers();
 1420 }
 1421 
 1422 #else /* CONFIG_HIGH_RES_TIMERS */
 1423 
 1424 static inline void __hrtimer_peek_ahead_timers(void) { }
 1425 
 1426 #endif  /* !CONFIG_HIGH_RES_TIMERS */
 1427 
 1428 /*
 1429  * Called from timer softirq every jiffy, expire hrtimers:
 1430  *
 1431  * For HRT its the fall back code to run the softirq in the timer
 1432  * softirq context in case the hrtimer initialization failed or has
 1433  * not been done yet.
 1434  */
 1435 void hrtimer_run_pending(void)
 1436 {
 1437         if (hrtimer_hres_active())
 1438                 return;
 1439 
 1440         /*
 1441          * This _is_ ugly: We have to check in the softirq context,
 1442          * whether we can switch to highres and / or nohz mode. The
 1443          * clocksource switch happens in the timer interrupt with
 1444          * xtime_lock held. Notification from there only sets the
 1445          * check bit in the tick_oneshot code, otherwise we might
 1446          * deadlock vs. xtime_lock.
 1447          */
 1448         if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
 1449                 hrtimer_switch_to_hres();
 1450 }
 1451 
 1452 /*
 1453  * Called from hardirq context every jiffy
 1454  */
 1455 void hrtimer_run_queues(void)
 1456 {
 1457         struct timerqueue_node *node;
 1458         struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
 1459         struct hrtimer_clock_base *base;
 1460         int index, gettime = 1;
 1461 
 1462         if (hrtimer_hres_active())
 1463                 return;
 1464 
 1465         for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
 1466                 base = &cpu_base->clock_base[index];
 1467                 if (!timerqueue_getnext(&base->active))
 1468                         continue;
 1469 
 1470                 if (gettime) {
 1471                         hrtimer_get_softirq_time(cpu_base);
 1472                         gettime = 0;
 1473                 }
 1474 
 1475                 raw_spin_lock(&cpu_base->lock);
 1476 
 1477                 while ((node = timerqueue_getnext(&base->active))) {
 1478                         struct hrtimer *timer;
 1479 
 1480                         timer = container_of(node, struct hrtimer, node);
 1481                         if (base->softirq_time.tv64 <=
 1482                                         hrtimer_get_expires_tv64(timer))
 1483                                 break;
 1484 
 1485                         __run_hrtimer(timer, &base->softirq_time);
 1486                 }
 1487                 raw_spin_unlock(&cpu_base->lock);
 1488         }
 1489 }
 1490 
 1491 /*
 1492  * Sleep related functions:
 1493  */
 1494 static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 1495 {
 1496         struct hrtimer_sleeper *t =
 1497                 container_of(timer, struct hrtimer_sleeper, timer);
 1498         struct task_struct *task = t->task;
 1499 
 1500         t->task = NULL;
 1501         if (task)
 1502                 wake_up_process(task);
 1503 
 1504         return HRTIMER_NORESTART;
 1505 }
 1506 
 1507 void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
 1508 {
 1509         sl->timer.function = hrtimer_wakeup;
 1510         sl->task = task;
 1511 }
 1512 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 1513 
 1514 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
 1515 {
 1516         hrtimer_init_sleeper(t, current);
 1517 
 1518         do {
 1519                 set_current_state(TASK_INTERRUPTIBLE);
 1520                 hrtimer_start_expires(&t->timer, mode);
 1521                 if (!hrtimer_active(&t->timer))
 1522                         t->task = NULL;
 1523 
 1524                 if (likely(t->task))
 1525                         schedule();
 1526 
 1527                 hrtimer_cancel(&t->timer);
 1528                 mode = HRTIMER_MODE_ABS;
 1529 
 1530         } while (t->task && !signal_pending(current));
 1531 
 1532         __set_current_state(TASK_RUNNING);
 1533 
 1534         return t->task == NULL;
 1535 }
 1536 
 1537 static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
 1538 {
 1539         struct timespec rmt;
 1540         ktime_t rem;
 1541 
 1542         rem = hrtimer_expires_remaining(timer);
 1543         if (rem.tv64 <= 0)
 1544                 return 0;
 1545         rmt = ktime_to_timespec(rem);
 1546 
 1547         if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
 1548                 return -EFAULT;
 1549 
 1550         return 1;
 1551 }
 1552 
 1553 long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 1554 {
 1555         struct hrtimer_sleeper t;
 1556         struct timespec __user  *rmtp;
 1557         int ret = 0;
 1558 
 1559         hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
 1560                                 HRTIMER_MODE_ABS);
 1561         hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
 1562 
 1563         if (do_nanosleep(&t, HRTIMER_MODE_ABS))
 1564                 goto out;
 1565 
 1566         rmtp = restart->nanosleep.rmtp;
 1567         if (rmtp) {
 1568                 ret = update_rmtp(&t.timer, rmtp);
 1569                 if (ret <= 0)
 1570                         goto out;
 1571         }
 1572 
 1573         /* The other values in restart are already filled in */
 1574         ret = -ERESTART_RESTARTBLOCK;
 1575 out:
 1576         destroy_hrtimer_on_stack(&t.timer);
 1577         return ret;
 1578 }
 1579 
 1580 long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 1581                        const enum hrtimer_mode mode, const clockid_t clockid)
 1582 {
 1583         struct restart_block *restart;
 1584         struct hrtimer_sleeper t;
 1585         int ret = 0;
 1586         unsigned long slack;
 1587 
 1588         slack = current->timer_slack_ns;
 1589         if (rt_task(current))
 1590                 slack = 0;
 1591 
 1592         hrtimer_init_on_stack(&t.timer, clockid, mode);
 1593         hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
 1594         if (do_nanosleep(&t, mode))
 1595                 goto out;
 1596 
 1597         /* Absolute timers do not update the rmtp value and restart: */
 1598         if (mode == HRTIMER_MODE_ABS) {
 1599                 ret = -ERESTARTNOHAND;
 1600                 goto out;
 1601         }
 1602 
 1603         if (rmtp) {
 1604                 ret = update_rmtp(&t.timer, rmtp);
 1605                 if (ret <= 0)
 1606                         goto out;
 1607         }
 1608 
 1609         restart = &current_thread_info()->restart_block;
 1610         restart->fn = hrtimer_nanosleep_restart;
 1611         restart->nanosleep.clockid = t.timer.base->clockid;
 1612         restart->nanosleep.rmtp = rmtp;
 1613         restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
 1614 
 1615         ret = -ERESTART_RESTARTBLOCK;
 1616 out:
 1617         destroy_hrtimer_on_stack(&t.timer);
 1618         return ret;
 1619 }
 1620 
 1621 SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
 1622                 struct timespec __user *, rmtp)
 1623 {
 1624         struct timespec tu;
 1625 
 1626         if (copy_from_user(&tu, rqtp, sizeof(tu)))
 1627                 return -EFAULT;
 1628 
 1629         if (!timespec_valid(&tu))
 1630                 return -EINVAL;
 1631 
 1632         return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
 1633 }
 1634 
 1635 /*
 1636  * Functions related to boot-time initialization:
 1637  */
 1638 static void __cpuinit init_hrtimers_cpu(int cpu)
 1639 {
 1640         struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
 1641         int i;
 1642 
 1643         raw_spin_lock_init(&cpu_base->lock);
 1644 
 1645         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 1646                 cpu_base->clock_base[i].cpu_base = cpu_base;
 1647                 timerqueue_init_head(&cpu_base->clock_base[i].active);
 1648         }
 1649 
 1650         hrtimer_init_hres(cpu_base);
 1651 }
 1652 
 1653 #ifdef CONFIG_HOTPLUG_CPU
 1654 
 1655 static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 1656                                 struct hrtimer_clock_base *new_base)
 1657 {
 1658         struct hrtimer *timer;
 1659         struct timerqueue_node *node;
 1660 
 1661         while ((node = timerqueue_getnext(&old_base->active))) {
 1662                 timer = container_of(node, struct hrtimer, node);
 1663                 BUG_ON(hrtimer_callback_running(timer));
 1664                 debug_deactivate(timer);
 1665 
 1666                 /*
 1667                  * Mark it as STATE_MIGRATE not INACTIVE otherwise the
 1668                  * timer could be seen as !active and just vanish away
 1669                  * under us on another CPU
 1670                  */
 1671                 __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
 1672                 timer->base = new_base;
 1673                 /*
 1674                  * Enqueue the timers on the new cpu. This does not
 1675                  * reprogram the event device in case the timer
 1676                  * expires before the earliest on this CPU, but we run
 1677                  * hrtimer_interrupt after we migrated everything to
 1678                  * sort out already expired timers and reprogram the
 1679                  * event device.
 1680                  */
 1681                 enqueue_hrtimer(timer, new_base);
 1682 
 1683                 /* Clear the migration state bit */
 1684                 timer->state &= ~HRTIMER_STATE_MIGRATE;
 1685         }
 1686 }
 1687 
 1688 static void migrate_hrtimers(int scpu)
 1689 {
 1690         struct hrtimer_cpu_base *old_base, *new_base;
 1691         int i;
 1692 
 1693         BUG_ON(cpu_online(scpu));
 1694         tick_cancel_sched_timer(scpu);
 1695 
 1696         local_irq_disable();
 1697         old_base = &per_cpu(hrtimer_bases, scpu);
 1698         new_base = &__get_cpu_var(hrtimer_bases);
 1699         /*
 1700          * The caller is globally serialized and nobody else
 1701          * takes two locks at once, deadlock is not possible.
 1702          */
 1703         raw_spin_lock(&new_base->lock);
 1704         raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 1705 
 1706         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 1707                 migrate_hrtimer_list(&old_base->clock_base[i],
 1708                                      &new_base->clock_base[i]);
 1709         }
 1710 
 1711         raw_spin_unlock(&old_base->lock);
 1712         raw_spin_unlock(&new_base->lock);
 1713 
 1714         /* Check, if we got expired work to do */
 1715         __hrtimer_peek_ahead_timers();
 1716         local_irq_enable();
 1717 }
 1718 
 1719 #endif /* CONFIG_HOTPLUG_CPU */
 1720 
 1721 static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
 1722                                         unsigned long action, void *hcpu)
 1723 {
 1724         int scpu = (long)hcpu;
 1725 
 1726         switch (action) {
 1727 
 1728         case CPU_UP_PREPARE:
 1729         case CPU_UP_PREPARE_FROZEN:
 1730                 init_hrtimers_cpu(scpu);
 1731                 break;
 1732 
 1733 #ifdef CONFIG_HOTPLUG_CPU
 1734         case CPU_DYING:
 1735         case CPU_DYING_FROZEN:
 1736                 clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
 1737                 break;
 1738         case CPU_DEAD:
 1739         case CPU_DEAD_FROZEN:
 1740         {
 1741                 clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
 1742                 migrate_hrtimers(scpu);
 1743                 break;
 1744         }
 1745 #endif
 1746 
 1747         default:
 1748                 break;
 1749         }
 1750 
 1751         return NOTIFY_OK;
 1752 }
 1753 
 1754 static struct notifier_block __cpuinitdata hrtimers_nb = {
 1755         .notifier_call = hrtimer_cpu_notify,
 1756 };
 1757 
 1758 void __init hrtimers_init(void)
 1759 {
 1760         hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
 1761                           (void *)(long)smp_processor_id());
 1762         register_cpu_notifier(&hrtimers_nb);
 1763 #ifdef CONFIG_HIGH_RES_TIMERS
 1764         open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
 1765 #endif
 1766 }
 1767 
 1768 /**
 1769  * schedule_hrtimeout_range_clock - sleep until timeout
 1770  * @expires:    timeout value (ktime_t)
 1771  * @delta:      slack in expires timeout (ktime_t)
 1772  * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
 1773  * @clock:      timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
 1774  */
 1775 int __sched
 1776 schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
 1777                                const enum hrtimer_mode mode, int clock)
 1778 {
 1779         struct hrtimer_sleeper t;
 1780 
 1781         /*
 1782          * Optimize when a zero timeout value is given. It does not
 1783          * matter whether this is an absolute or a relative time.
 1784          */
 1785         if (expires && !expires->tv64) {
 1786                 __set_current_state(TASK_RUNNING);
 1787                 return 0;
 1788         }
 1789 
 1790         /*
 1791          * A NULL parameter means "infinite"
 1792          */
 1793         if (!expires) {
 1794                 schedule();
 1795                 __set_current_state(TASK_RUNNING);
 1796                 return -EINTR;
 1797         }
 1798 
 1799         hrtimer_init_on_stack(&t.timer, clock, mode);
 1800         hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
 1801 
 1802         hrtimer_init_sleeper(&t, current);
 1803 
 1804         hrtimer_start_expires(&t.timer, mode);
 1805         if (!hrtimer_active(&t.timer))
 1806                 t.task = NULL;
 1807 
 1808         if (likely(t.task))
 1809                 schedule();
 1810 
 1811         hrtimer_cancel(&t.timer);
 1812         destroy_hrtimer_on_stack(&t.timer);
 1813 
 1814         __set_current_state(TASK_RUNNING);
 1815 
 1816         return !t.task ? 0 : -EINTR;
 1817 }
 1818 
 1819 /**
 1820  * schedule_hrtimeout_range - sleep until timeout
 1821  * @expires:    timeout value (ktime_t)
 1822  * @delta:      slack in expires timeout (ktime_t)
 1823  * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
 1824  *
 1825  * Make the current task sleep until the given expiry time has
 1826  * elapsed. The routine will return immediately unless
 1827  * the current task state has been set (see set_current_state()).
 1828  *
 1829  * The @delta argument gives the kernel the freedom to schedule the
 1830  * actual wakeup to a time that is both power and performance friendly.
 1831  * The kernel give the normal best effort behavior for "@expires+@delta",
 1832  * but may decide to fire the timer earlier, but no earlier than @expires.
 1833  *
 1834  * You can set the task state as follows -
 1835  *
 1836  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
 1837  * pass before the routine returns.
 1838  *
 1839  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
 1840  * delivered to the current task.
 1841  *
 1842  * The current task state is guaranteed to be TASK_RUNNING when this
 1843  * routine returns.
 1844  *
 1845  * Returns 0 when the timer has expired otherwise -EINTR
 1846  */
 1847 int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
 1848                                      const enum hrtimer_mode mode)
 1849 {
 1850         return schedule_hrtimeout_range_clock(expires, delta, mode,
 1851                                               CLOCK_MONOTONIC);
 1852 }
 1853 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
 1854 
 1855 /**
 1856  * schedule_hrtimeout - sleep until timeout
 1857  * @expires:    timeout value (ktime_t)
 1858  * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
 1859  *
 1860  * Make the current task sleep until the given expiry time has
 1861  * elapsed. The routine will return immediately unless
 1862  * the current task state has been set (see set_current_state()).
 1863  *
 1864  * You can set the task state as follows -
 1865  *
 1866  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
 1867  * pass before the routine returns.
 1868  *
 1869  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
 1870  * delivered to the current task.
 1871  *
 1872  * The current task state is guaranteed to be TASK_RUNNING when this
 1873  * routine returns.
 1874  *
 1875  * Returns 0 when the timer has expired otherwise -EINTR
 1876  */
 1877 int __sched schedule_hrtimeout(ktime_t *expires,
 1878                                const enum hrtimer_mode mode)
 1879 {
 1880         return schedule_hrtimeout_range(expires, 0, mode);
 1881 }
 1882 EXPORT_SYMBOL_GPL(schedule_hrtimeout);

Cache object: d947b5f9df02b9689feccaaba43b5ac2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.