The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kernel/timer.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/kernel/timer.c
    3  *
    4  *  Kernel internal timers, kernel timekeeping, basic process system calls
    5  *
    6  *  Copyright (C) 1991, 1992  Linus Torvalds
    7  *
    8  *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
    9  *
   10  *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
   11  *              "A Kernel Model for Precision Timekeeping" by Dave Mills
   12  *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
   13  *              serialize accesses to xtime/lost_ticks).
   14  *                              Copyright (C) 1998  Andrea Arcangeli
   15  *  1999-03-10  Improved NTP compatibility by Ulrich Windl
   16  */
   17 
   18 #include <linux/config.h>
   19 #include <linux/mm.h>
   20 #include <linux/timex.h>
   21 #include <linux/delay.h>
   22 #include <linux/smp_lock.h>
   23 #include <linux/interrupt.h>
   24 #include <linux/kernel_stat.h>
   25 
   26 #include <asm/uaccess.h>
   27 
   28 /*
   29  * Timekeeping variables
   30  */
   31 
   32 long tick = (1000000 + HZ/2) / HZ;      /* timer interrupt period */
   33 
   34 /* The current time */
   35 struct timeval xtime __attribute__ ((aligned (16)));
   36 
   37 /* Don't completely fail for HZ > 500.  */
   38 int tickadj = 500/HZ ? : 1;             /* microsecs */
   39 
   40 DECLARE_TASK_QUEUE(tq_timer);
   41 DECLARE_TASK_QUEUE(tq_immediate);
   42 
   43 /*
   44  * phase-lock loop variables
   45  */
   46 /* TIME_ERROR prevents overwriting the CMOS clock */
   47 int time_state = TIME_OK;               /* clock synchronization status */
   48 int time_status = STA_UNSYNC;           /* clock status bits            */
   49 long time_offset;                       /* time adjustment (us)         */
   50 long time_constant = 2;                 /* pll time constant            */
   51 long time_tolerance = MAXFREQ;          /* frequency tolerance (ppm)    */
   52 long time_precision = 1;                /* clock precision (us)         */
   53 long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us)           */
   54 long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us)         */
   55 long time_phase;                        /* phase offset (scaled us)     */
   56 long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
   57                                         /* frequency offset (scaled ppm)*/
   58 long time_adj;                          /* tick adjust (scaled 1 / HZ)  */
   59 long time_reftime;                      /* time at last adjustment (s)  */
   60 
   61 long time_adjust;
   62 long time_adjust_step;
   63 
   64 unsigned long event;
   65 
   66 extern int do_setitimer(int, struct itimerval *, struct itimerval *);
   67 
   68 unsigned long volatile jiffies;
   69 
   70 unsigned int * prof_buffer;
   71 unsigned long prof_len;
   72 unsigned long prof_shift;
   73 
   74 /*
   75  * Event timer code
   76  */
   77 #define TVN_BITS 6
   78 #define TVR_BITS 8
   79 #define TVN_SIZE (1 << TVN_BITS)
   80 #define TVR_SIZE (1 << TVR_BITS)
   81 #define TVN_MASK (TVN_SIZE - 1)
   82 #define TVR_MASK (TVR_SIZE - 1)
   83 
   84 struct timer_vec {
   85         int index;
   86         struct list_head vec[TVN_SIZE];
   87 };
   88 
   89 struct timer_vec_root {
   90         int index;
   91         struct list_head vec[TVR_SIZE];
   92 };
   93 
   94 static struct timer_vec tv5;
   95 static struct timer_vec tv4;
   96 static struct timer_vec tv3;
   97 static struct timer_vec tv2;
   98 static struct timer_vec_root tv1;
   99 
  100 static struct timer_vec * const tvecs[] = {
  101         (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
  102 };
  103 
  104 static struct list_head * run_timer_list_running;
  105 
  106 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
  107 
  108 void init_timervecs (void)
  109 {
  110         int i;
  111 
  112         for (i = 0; i < TVN_SIZE; i++) {
  113                 INIT_LIST_HEAD(tv5.vec + i);
  114                 INIT_LIST_HEAD(tv4.vec + i);
  115                 INIT_LIST_HEAD(tv3.vec + i);
  116                 INIT_LIST_HEAD(tv2.vec + i);
  117         }
  118         for (i = 0; i < TVR_SIZE; i++)
  119                 INIT_LIST_HEAD(tv1.vec + i);
  120 }
  121 
  122 static unsigned long timer_jiffies;
  123 
  124 static inline void internal_add_timer(struct timer_list *timer)
  125 {
  126         /*
  127          * must be cli-ed when calling this
  128          */
  129         unsigned long expires = timer->expires;
  130         unsigned long idx = expires - timer_jiffies;
  131         struct list_head * vec;
  132 
  133         if (run_timer_list_running)
  134                 vec = run_timer_list_running;
  135         else if (idx < TVR_SIZE) {
  136                 int i = expires & TVR_MASK;
  137                 vec = tv1.vec + i;
  138         } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
  139                 int i = (expires >> TVR_BITS) & TVN_MASK;
  140                 vec = tv2.vec + i;
  141         } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
  142                 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
  143                 vec =  tv3.vec + i;
  144         } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
  145                 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
  146                 vec = tv4.vec + i;
  147         } else if ((signed long) idx < 0) {
  148                 /* can happen if you add a timer with expires == jiffies,
  149                  * or you set a timer to go off in the past
  150                  */
  151                 vec = tv1.vec + tv1.index;
  152         } else if (idx <= 0xffffffffUL) {
  153                 int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
  154                 vec = tv5.vec + i;
  155         } else {
  156                 /* Can only get here on architectures with 64-bit jiffies */
  157                 INIT_LIST_HEAD(&timer->list);
  158                 return;
  159         }
  160         /*
  161          * Timers are FIFO!
  162          */
  163         list_add(&timer->list, vec->prev);
  164 }
  165 
  166 /* Initialize both explicitly - let's try to have them in the same cache line */
  167 spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
  168 
  169 #ifdef CONFIG_SMP
  170 volatile struct timer_list * volatile running_timer;
  171 #define timer_enter(t) do { running_timer = t; mb(); } while (0)
  172 #define timer_exit() do { running_timer = NULL; } while (0)
  173 #define timer_is_running(t) (running_timer == t)
  174 #define timer_synchronize(t) while (timer_is_running(t)) barrier()
  175 #else
  176 #define timer_enter(t)          do { } while (0)
  177 #define timer_exit()            do { } while (0)
  178 #endif
  179 
  180 void add_timer(struct timer_list *timer)
  181 {
  182         unsigned long flags;
  183 
  184         spin_lock_irqsave(&timerlist_lock, flags);
  185         if (timer_pending(timer))
  186                 goto bug;
  187         internal_add_timer(timer);
  188         spin_unlock_irqrestore(&timerlist_lock, flags);
  189         return;
  190 bug:
  191         spin_unlock_irqrestore(&timerlist_lock, flags);
  192         printk("bug: kernel timer added twice at %p.\n",
  193                         __builtin_return_address(0));
  194 }
  195 
  196 static inline int detach_timer (struct timer_list *timer)
  197 {
  198         if (!timer_pending(timer))
  199                 return 0;
  200         list_del(&timer->list);
  201         return 1;
  202 }
  203 
  204 int mod_timer(struct timer_list *timer, unsigned long expires)
  205 {
  206         int ret;
  207         unsigned long flags;
  208 
  209         spin_lock_irqsave(&timerlist_lock, flags);
  210         timer->expires = expires;
  211         ret = detach_timer(timer);
  212         internal_add_timer(timer);
  213         spin_unlock_irqrestore(&timerlist_lock, flags);
  214         return ret;
  215 }
  216 
  217 int del_timer(struct timer_list * timer)
  218 {
  219         int ret;
  220         unsigned long flags;
  221 
  222         spin_lock_irqsave(&timerlist_lock, flags);
  223         ret = detach_timer(timer);
  224         timer->list.next = timer->list.prev = NULL;
  225         spin_unlock_irqrestore(&timerlist_lock, flags);
  226         return ret;
  227 }
  228 
  229 #ifdef CONFIG_SMP
  230 void sync_timers(void)
  231 {
  232         spin_unlock_wait(&global_bh_lock);
  233 }
  234 
  235 /*
  236  * SMP specific function to delete periodic timer.
  237  * Caller must disable by some means restarting the timer
  238  * for new. Upon exit the timer is not queued and handler is not running
  239  * on any CPU. It returns number of times, which timer was deleted
  240  * (for reference counting).
  241  */
  242 
  243 int del_timer_sync(struct timer_list * timer)
  244 {
  245         int ret = 0;
  246 
  247         for (;;) {
  248                 unsigned long flags;
  249                 int running;
  250 
  251                 spin_lock_irqsave(&timerlist_lock, flags);
  252                 ret += detach_timer(timer);
  253                 timer->list.next = timer->list.prev = 0;
  254                 running = timer_is_running(timer);
  255                 spin_unlock_irqrestore(&timerlist_lock, flags);
  256 
  257                 if (!running)
  258                         break;
  259 
  260                 timer_synchronize(timer);
  261         }
  262 
  263         return ret;
  264 }
  265 #endif
  266 
  267 
  268 static inline void cascade_timers(struct timer_vec *tv)
  269 {
  270         /* cascade all the timers from tv up one level */
  271         struct list_head *head, *curr, *next;
  272 
  273         head = tv->vec + tv->index;
  274         curr = head->next;
  275         /*
  276          * We are removing _all_ timers from the list, so we don't  have to
  277          * detach them individually, just clear the list afterwards.
  278          */
  279         while (curr != head) {
  280                 struct timer_list *tmp;
  281 
  282                 tmp = list_entry(curr, struct timer_list, list);
  283                 next = curr->next;
  284                 list_del(curr); // not needed
  285                 internal_add_timer(tmp);
  286                 curr = next;
  287         }
  288         INIT_LIST_HEAD(head);
  289         tv->index = (tv->index + 1) & TVN_MASK;
  290 }
  291 
  292 static inline void run_timer_list(void)
  293 {
  294         spin_lock_irq(&timerlist_lock);
  295         while ((long)(jiffies - timer_jiffies) >= 0) {
  296                 LIST_HEAD(queued);
  297                 struct list_head *head, *curr;
  298                 if (!tv1.index) {
  299                         int n = 1;
  300                         do {
  301                                 cascade_timers(tvecs[n]);
  302                         } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
  303                 }
  304                 run_timer_list_running = &queued;
  305 repeat:
  306                 head = tv1.vec + tv1.index;
  307                 curr = head->next;
  308                 if (curr != head) {
  309                         struct timer_list *timer;
  310                         void (*fn)(unsigned long);
  311                         unsigned long data;
  312 
  313                         timer = list_entry(curr, struct timer_list, list);
  314                         fn = timer->function;
  315                         data= timer->data;
  316 
  317                         detach_timer(timer);
  318                         timer->list.next = timer->list.prev = NULL;
  319                         timer_enter(timer);
  320                         spin_unlock_irq(&timerlist_lock);
  321                         fn(data);
  322                         spin_lock_irq(&timerlist_lock);
  323                         timer_exit();
  324                         goto repeat;
  325                 }
  326                 run_timer_list_running = NULL;
  327                 ++timer_jiffies; 
  328                 tv1.index = (tv1.index + 1) & TVR_MASK;
  329 
  330                 curr = queued.next;
  331                 while (curr != &queued) {
  332                         struct timer_list *timer;
  333 
  334                         timer = list_entry(curr, struct timer_list, list);
  335                         curr = curr->next;
  336                         internal_add_timer(timer);
  337                 }                       
  338         }
  339         spin_unlock_irq(&timerlist_lock);
  340 }
  341 
  342 spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
  343 
  344 void tqueue_bh(void)
  345 {
  346         run_task_queue(&tq_timer);
  347 }
  348 
  349 void immediate_bh(void)
  350 {
  351         run_task_queue(&tq_immediate);
  352 }
  353 
  354 /*
  355  * this routine handles the overflow of the microsecond field
  356  *
  357  * The tricky bits of code to handle the accurate clock support
  358  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
  359  * They were originally developed for SUN and DEC kernels.
  360  * All the kudos should go to Dave for this stuff.
  361  *
  362  */
  363 static void second_overflow(void)
  364 {
  365     long ltemp;
  366 
  367     /* Bump the maxerror field */
  368     time_maxerror += time_tolerance >> SHIFT_USEC;
  369     if ( time_maxerror > NTP_PHASE_LIMIT ) {
  370         time_maxerror = NTP_PHASE_LIMIT;
  371         time_status |= STA_UNSYNC;
  372     }
  373 
  374     /*
  375      * Leap second processing. If in leap-insert state at
  376      * the end of the day, the system clock is set back one
  377      * second; if in leap-delete state, the system clock is
  378      * set ahead one second. The microtime() routine or
  379      * external clock driver will insure that reported time
  380      * is always monotonic. The ugly divides should be
  381      * replaced.
  382      */
  383     switch (time_state) {
  384 
  385     case TIME_OK:
  386         if (time_status & STA_INS)
  387             time_state = TIME_INS;
  388         else if (time_status & STA_DEL)
  389             time_state = TIME_DEL;
  390         break;
  391 
  392     case TIME_INS:
  393         if (xtime.tv_sec % 86400 == 0) {
  394             xtime.tv_sec--;
  395             time_state = TIME_OOP;
  396             printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
  397         }
  398         break;
  399 
  400     case TIME_DEL:
  401         if ((xtime.tv_sec + 1) % 86400 == 0) {
  402             xtime.tv_sec++;
  403             time_state = TIME_WAIT;
  404             printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
  405         }
  406         break;
  407 
  408     case TIME_OOP:
  409         time_state = TIME_WAIT;
  410         break;
  411 
  412     case TIME_WAIT:
  413         if (!(time_status & (STA_INS | STA_DEL)))
  414             time_state = TIME_OK;
  415     }
  416 
  417     /*
  418      * Compute the phase adjustment for the next second. In
  419      * PLL mode, the offset is reduced by a fixed factor
  420      * times the time constant. In FLL mode the offset is
  421      * used directly. In either mode, the maximum phase
  422      * adjustment for each second is clamped so as to spread
  423      * the adjustment over not more than the number of
  424      * seconds between updates.
  425      */
  426     if (time_offset < 0) {
  427         ltemp = -time_offset;
  428         if (!(time_status & STA_FLL))
  429             ltemp >>= SHIFT_KG + time_constant;
  430         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
  431             ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
  432         time_offset += ltemp;
  433         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
  434     } else {
  435         ltemp = time_offset;
  436         if (!(time_status & STA_FLL))
  437             ltemp >>= SHIFT_KG + time_constant;
  438         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
  439             ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
  440         time_offset -= ltemp;
  441         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
  442     }
  443 
  444     /*
  445      * Compute the frequency estimate and additional phase
  446      * adjustment due to frequency error for the next
  447      * second. When the PPS signal is engaged, gnaw on the
  448      * watchdog counter and update the frequency computed by
  449      * the pll and the PPS signal.
  450      */
  451     pps_valid++;
  452     if (pps_valid == PPS_VALID) {       /* PPS signal lost */
  453         pps_jitter = MAXTIME;
  454         pps_stabil = MAXFREQ;
  455         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
  456                          STA_PPSWANDER | STA_PPSERROR);
  457     }
  458     ltemp = time_freq + pps_freq;
  459     if (ltemp < 0)
  460         time_adj -= -ltemp >>
  461             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
  462     else
  463         time_adj += ltemp >>
  464             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
  465 
  466 #if HZ == 100
  467     /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
  468      * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
  469      */
  470     if (time_adj < 0)
  471         time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
  472     else
  473         time_adj += (time_adj >> 2) + (time_adj >> 5);
  474 #endif
  475 }
  476 
  477 /* in the NTP reference this is called "hardclock()" */
  478 static void update_wall_time_one_tick(void)
  479 {
  480         if ( (time_adjust_step = time_adjust) != 0 ) {
  481             /* We are doing an adjtime thing. 
  482              *
  483              * Prepare time_adjust_step to be within bounds.
  484              * Note that a positive time_adjust means we want the clock
  485              * to run faster.
  486              *
  487              * Limit the amount of the step to be in the range
  488              * -tickadj .. +tickadj
  489              */
  490              if (time_adjust > tickadj)
  491                 time_adjust_step = tickadj;
  492              else if (time_adjust < -tickadj)
  493                 time_adjust_step = -tickadj;
  494              
  495             /* Reduce by this step the amount of time left  */
  496             time_adjust -= time_adjust_step;
  497         }
  498         xtime.tv_usec += tick + time_adjust_step;
  499         /*
  500          * Advance the phase, once it gets to one microsecond, then
  501          * advance the tick more.
  502          */
  503         time_phase += time_adj;
  504         if (time_phase <= -FINEUSEC) {
  505                 long ltemp = -time_phase >> SHIFT_SCALE;
  506                 time_phase += ltemp << SHIFT_SCALE;
  507                 xtime.tv_usec -= ltemp;
  508         }
  509         else if (time_phase >= FINEUSEC) {
  510                 long ltemp = time_phase >> SHIFT_SCALE;
  511                 time_phase -= ltemp << SHIFT_SCALE;
  512                 xtime.tv_usec += ltemp;
  513         }
  514 }
  515 
  516 /*
  517  * Using a loop looks inefficient, but "ticks" is
  518  * usually just one (we shouldn't be losing ticks,
  519  * we're doing this this way mainly for interrupt
  520  * latency reasons, not because we think we'll
  521  * have lots of lost timer ticks
  522  */
  523 static void update_wall_time(unsigned long ticks)
  524 {
  525         do {
  526                 ticks--;
  527                 update_wall_time_one_tick();
  528         } while (ticks);
  529 
  530         if (xtime.tv_usec >= 1000000) {
  531             xtime.tv_usec -= 1000000;
  532             xtime.tv_sec++;
  533             second_overflow();
  534         }
  535 }
  536 
  537 static inline void do_process_times(struct task_struct *p,
  538         unsigned long user, unsigned long system)
  539 {
  540         unsigned long psecs;
  541 
  542         psecs = (p->times.tms_utime += user);
  543         psecs += (p->times.tms_stime += system);
  544         if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
  545                 /* Send SIGXCPU every second.. */
  546                 if (!(psecs % HZ))
  547                         send_sig(SIGXCPU, p, 1);
  548                 /* and SIGKILL when we go over max.. */
  549                 if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
  550                         send_sig(SIGKILL, p, 1);
  551         }
  552 }
  553 
  554 static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
  555 {
  556         unsigned long it_virt = p->it_virt_value;
  557 
  558         if (it_virt) {
  559                 it_virt -= ticks;
  560                 if (!it_virt) {
  561                         it_virt = p->it_virt_incr;
  562                         send_sig(SIGVTALRM, p, 1);
  563                 }
  564                 p->it_virt_value = it_virt;
  565         }
  566 }
  567 
  568 static inline void do_it_prof(struct task_struct *p)
  569 {
  570         unsigned long it_prof = p->it_prof_value;
  571 
  572         if (it_prof) {
  573                 if (--it_prof == 0) {
  574                         it_prof = p->it_prof_incr;
  575                         send_sig(SIGPROF, p, 1);
  576                 }
  577                 p->it_prof_value = it_prof;
  578         }
  579 }
  580 
  581 void update_one_process(struct task_struct *p, unsigned long user,
  582                         unsigned long system, int cpu)
  583 {
  584         p->per_cpu_utime[cpu] += user;
  585         p->per_cpu_stime[cpu] += system;
  586         do_process_times(p, user, system);
  587         do_it_virt(p, user);
  588         do_it_prof(p);
  589 }       
  590 
  591 /*
  592  * Called from the timer interrupt handler to charge one tick to the current 
  593  * process.  user_tick is 1 if the tick is user time, 0 for system.
  594  */
  595 void update_process_times(int user_tick)
  596 {
  597         struct task_struct *p = current;
  598         int cpu = smp_processor_id(), system = user_tick ^ 1;
  599 
  600         update_one_process(p, user_tick, system, cpu);
  601         if (p->pid) {
  602                 if (--p->counter <= 0) {
  603                         p->counter = 0;
  604                         /*
  605                          * SCHED_FIFO is priority preemption, so this is 
  606                          * not the place to decide whether to reschedule a
  607                          * SCHED_FIFO task or not - Bhavesh Davda
  608                          */
  609                         if (p->policy != SCHED_FIFO) {
  610                                 p->need_resched = 1;
  611                         }
  612                 }
  613                 if (p->nice > 0)
  614                         kstat.per_cpu_nice[cpu] += user_tick;
  615                 else
  616                         kstat.per_cpu_user[cpu] += user_tick;
  617                 kstat.per_cpu_system[cpu] += system;
  618         } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
  619                 kstat.per_cpu_system[cpu] += system;
  620 }
  621 
  622 /*
  623  * Nr of active tasks - counted in fixed-point numbers
  624  */
  625 static unsigned long count_active_tasks(void)
  626 {
  627         struct task_struct *p;
  628         unsigned long nr = 0;
  629 
  630         read_lock(&tasklist_lock);
  631         for_each_task(p) {
  632                 if ((p->state == TASK_RUNNING ||
  633                      (p->state & TASK_UNINTERRUPTIBLE)))
  634                         nr += FIXED_1;
  635         }
  636         read_unlock(&tasklist_lock);
  637         return nr;
  638 }
  639 
  640 /*
  641  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
  642  * imply that avenrun[] is the standard name for this kind of thing.
  643  * Nothing else seems to be standardized: the fractional size etc
  644  * all seem to differ on different machines.
  645  */
  646 unsigned long avenrun[3];
  647 
  648 static inline void calc_load(unsigned long ticks)
  649 {
  650         unsigned long active_tasks; /* fixed-point */
  651         static int count = LOAD_FREQ;
  652 
  653         count -= ticks;
  654         if (count < 0) {
  655                 count += LOAD_FREQ;
  656                 active_tasks = count_active_tasks();
  657                 CALC_LOAD(avenrun[0], EXP_1, active_tasks);
  658                 CALC_LOAD(avenrun[1], EXP_5, active_tasks);
  659                 CALC_LOAD(avenrun[2], EXP_15, active_tasks);
  660         }
  661 }
  662 
  663 /* jiffies at the most recent update of wall time */
  664 unsigned long wall_jiffies;
  665 
  666 /*
  667  * This spinlock protect us from races in SMP while playing with xtime. -arca
  668  */
  669 rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
  670 
  671 static inline void update_times(void)
  672 {
  673         unsigned long ticks;
  674 
  675         /*
  676          * update_times() is run from the raw timer_bh handler so we
  677          * just know that the irqs are locally enabled and so we don't
  678          * need to save/restore the flags of the local CPU here. -arca
  679          */
  680         write_lock_irq(&xtime_lock);
  681         vxtime_lock();
  682 
  683         ticks = jiffies - wall_jiffies;
  684         if (ticks) {
  685                 wall_jiffies += ticks;
  686                 update_wall_time(ticks);
  687         }
  688         vxtime_unlock();
  689         write_unlock_irq(&xtime_lock);
  690         calc_load(ticks);
  691 }
  692 
  693 void timer_bh(void)
  694 {
  695         update_times();
  696         run_timer_list();
  697 }
  698 
  699 void do_timer(struct pt_regs *regs)
  700 {
  701         (*(unsigned long *)&jiffies)++;
  702 #ifndef CONFIG_SMP
  703         /* SMP process accounting uses the local APIC timer */
  704 
  705         update_process_times(user_mode(regs));
  706 #endif
  707         mark_bh(TIMER_BH);
  708         if (TQ_ACTIVE(tq_timer))
  709                 mark_bh(TQUEUE_BH);
  710 }
  711 
  712 #if !defined(__alpha__) && !defined(__ia64__)
  713 
  714 /*
  715  * For backwards compatibility?  This can be done in libc so Alpha
  716  * and all newer ports shouldn't need it.
  717  */
  718 asmlinkage unsigned long sys_alarm(unsigned int seconds)
  719 {
  720         struct itimerval it_new, it_old;
  721         unsigned int oldalarm;
  722 
  723         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
  724         it_new.it_value.tv_sec = seconds;
  725         it_new.it_value.tv_usec = 0;
  726         do_setitimer(ITIMER_REAL, &it_new, &it_old);
  727         oldalarm = it_old.it_value.tv_sec;
  728         /* ehhh.. We can't return 0 if we have an alarm pending.. */
  729         /* And we'd better return too much than too little anyway */
  730         if (it_old.it_value.tv_usec)
  731                 oldalarm++;
  732         return oldalarm;
  733 }
  734 
  735 #endif
  736 
  737 #ifndef __alpha__
  738 
  739 /*
  740  * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
  741  * should be moved into arch/i386 instead?
  742  */
  743 
  744 /**
  745  * sys_getpid - return the thread group id of the current process
  746  *
  747  * Note, despite the name, this returns the tgid not the pid.  The tgid and
  748  * the pid are identical unless CLONE_THREAD was specified on clone() in
  749  * which case the tgid is the same in all threads of the same group.
  750  *
  751  * This is SMP safe as current->tgid does not change.
  752  */
  753 asmlinkage long sys_getpid(void)
  754 {
  755         return current->tgid;
  756 }
  757 
  758 /*
  759  * This is not strictly SMP safe: p_opptr could change
  760  * from under us. However, rather than getting any lock
  761  * we can use an optimistic algorithm: get the parent
  762  * pid, and go back and check that the parent is still
  763  * the same. If it has changed (which is extremely unlikely
  764  * indeed), we just try again..
  765  *
  766  * NOTE! This depends on the fact that even if we _do_
  767  * get an old value of "parent", we can happily dereference
  768  * the pointer: we just can't necessarily trust the result
  769  * until we know that the parent pointer is valid.
  770  *
  771  * The "mb()" macro is a memory barrier - a synchronizing
  772  * event. It also makes sure that gcc doesn't optimize
  773  * away the necessary memory references.. The barrier doesn't
  774  * have to have all that strong semantics: on x86 we don't
  775  * really require a synchronizing instruction, for example.
  776  * The barrier is more important for code generation than
  777  * for any real memory ordering semantics (even if there is
  778  * a small window for a race, using the old pointer is
  779  * harmless for a while).
  780  */
  781 asmlinkage long sys_getppid(void)
  782 {
  783         int pid;
  784         struct task_struct * me = current;
  785         struct task_struct * parent;
  786 
  787         parent = me->p_opptr;
  788         for (;;) {
  789                 pid = parent->pid;
  790 #if CONFIG_SMP
  791 {
  792                 struct task_struct *old = parent;
  793                 mb();
  794                 parent = me->p_opptr;
  795                 if (old != parent)
  796                         continue;
  797 }
  798 #endif
  799                 break;
  800         }
  801         return pid;
  802 }
  803 
  804 asmlinkage long sys_getuid(void)
  805 {
  806         /* Only we change this so SMP safe */
  807         return current->uid;
  808 }
  809 
  810 asmlinkage long sys_geteuid(void)
  811 {
  812         /* Only we change this so SMP safe */
  813         return current->euid;
  814 }
  815 
  816 asmlinkage long sys_getgid(void)
  817 {
  818         /* Only we change this so SMP safe */
  819         return current->gid;
  820 }
  821 
  822 asmlinkage long sys_getegid(void)
  823 {
  824         /* Only we change this so SMP safe */
  825         return  current->egid;
  826 }
  827 
  828 #endif
  829 
  830 /* Thread ID - the internal kernel "pid" */
  831 asmlinkage long sys_gettid(void)
  832 {
  833         return current->pid;
  834 }
  835 
  836 asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
  837 {
  838         struct timespec t;
  839         unsigned long expire;
  840 
  841         if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
  842                 return -EFAULT;
  843 
  844         if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
  845                 return -EINVAL;
  846 
  847 
  848         if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
  849             current->policy != SCHED_OTHER)
  850         {
  851                 /*
  852                  * Short delay requests up to 2 ms will be handled with
  853                  * high precision by a busy wait for all real-time processes.
  854                  *
  855                  * Its important on SMP not to do this holding locks.
  856                  */
  857                 udelay((t.tv_nsec + 999) / 1000);
  858                 return 0;
  859         }
  860 
  861         expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
  862 
  863         current->state = TASK_INTERRUPTIBLE;
  864         expire = schedule_timeout(expire);
  865 
  866         if (expire) {
  867                 if (rmtp) {
  868                         jiffies_to_timespec(expire, &t);
  869                         if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
  870                                 return -EFAULT;
  871                 }
  872                 return -EINTR;
  873         }
  874         return 0;
  875 }
  876 

Cache object: 553e0240ca5398945182d1b0c95982ab


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.