 21884a83b2
			
		
	
	
	21884a83b2
	
	
	
		
			
			Pull timer core updates from Thomas Gleixner:
 "The timer changes contain:
   - posix timer code consolidation and fixes for odd corner cases
   - sched_clock implementation moved from ARM to core code to avoid
     duplication by other architectures
   - alarm timer updates
   - clocksource and clockevents unregistration facilities
   - clocksource/events support for new hardware
   - precise nanoseconds RTC readout (Xen feature)
   - generic support for Xen suspend/resume oddities
   - the usual lot of fixes and cleanups all over the place
  The parts which touch other areas (ARM/XEN) have been coordinated with
  the relevant maintainers.  Though this results in an handful of
  trivial to solve merge conflicts, which we preferred over nasty cross
  tree merge dependencies.
  The patches which have been committed in the last few days are bug
  fixes plus the posix timer lot.  The latter was in akpms queue and
  next for quite some time; they just got forgotten and Frederic
  collected them last minute."
* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (59 commits)
  hrtimer: Remove unused variable
  hrtimers: Move SMP function call to thread context
  clocksource: Reselect clocksource when watchdog validated high-res capability
  posix-cpu-timers: don't account cpu timer after stopped thread runtime accounting
  posix_timers: fix racy timer delta caching on task exit
  posix-timers: correctly get dying task time sample in posix_cpu_timer_schedule()
  selftests: add basic posix timers selftests
  posix_cpu_timers: consolidate expired timers check
  posix_cpu_timers: consolidate timer list cleanups
  posix_cpu_timer: consolidate expiry time type
  tick: Sanitize broadcast control logic
  tick: Prevent uncontrolled switch to oneshot mode
  tick: Make oneshot broadcast robust vs. CPU offlining
  x86: xen: Sync the CMOS RTC as well as the Xen wallclock
  x86: xen: Sync the wallclock when the system time is set
  timekeeping: Indicate that clock was set in the pvclock gtod notifier
  timekeeping: Pass flags instead of multiple bools to timekeeping_update()
  xen: Remove clock_was_set() call in the resume path
  hrtimers: Support resuming with two or more CPUs online (but stopped)
  timer: Fix jiffies wrap behavior of round_jiffies_common()
  ...
		
	
			
		
			
				
	
	
		
			264 lines
		
	
	
	
		
			7.6 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			264 lines
		
	
	
	
		
			7.6 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| 
 | |
| #ifdef CONFIG_SCHEDSTATS
 | |
| 
 | |
| /*
 | |
|  * Expects runqueue lock to be held for atomicity of update
 | |
|  */
 | |
| static inline void
 | |
| rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
 | |
| {
 | |
| 	if (rq) {
 | |
| 		rq->rq_sched_info.run_delay += delta;
 | |
| 		rq->rq_sched_info.pcount++;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Expects runqueue lock to be held for atomicity of update
 | |
|  */
 | |
| static inline void
 | |
| rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 | |
| {
 | |
| 	if (rq)
 | |
| 		rq->rq_cpu_time += delta;
 | |
| }
 | |
| 
 | |
| static inline void
 | |
| rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 | |
| {
 | |
| 	if (rq)
 | |
| 		rq->rq_sched_info.run_delay += delta;
 | |
| }
 | |
| # define schedstat_inc(rq, field)	do { (rq)->field++; } while (0)
 | |
| # define schedstat_add(rq, field, amt)	do { (rq)->field += (amt); } while (0)
 | |
| # define schedstat_set(var, val)	do { var = (val); } while (0)
 | |
| #else /* !CONFIG_SCHEDSTATS */
 | |
| static inline void
 | |
| rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
 | |
| {}
 | |
| static inline void
 | |
| rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 | |
| {}
 | |
| static inline void
 | |
| rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 | |
| {}
 | |
| # define schedstat_inc(rq, field)	do { } while (0)
 | |
| # define schedstat_add(rq, field, amt)	do { } while (0)
 | |
| # define schedstat_set(var, val)	do { } while (0)
 | |
| #endif
 | |
| 
 | |
| #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 | |
| static inline void sched_info_reset_dequeued(struct task_struct *t)
 | |
| {
 | |
| 	t->sched_info.last_queued = 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * We are interested in knowing how long it was from the *first* time a
 | |
|  * task was queued to the time that it finally hit a cpu, we call this routine
 | |
|  * from dequeue_task() to account for possible rq->clock skew across cpus. The
 | |
|  * delta taken on each cpu would annul the skew.
 | |
|  */
 | |
| static inline void sched_info_dequeued(struct task_struct *t)
 | |
| {
 | |
| 	unsigned long long now = rq_clock(task_rq(t)), delta = 0;
 | |
| 
 | |
| 	if (unlikely(sched_info_on()))
 | |
| 		if (t->sched_info.last_queued)
 | |
| 			delta = now - t->sched_info.last_queued;
 | |
| 	sched_info_reset_dequeued(t);
 | |
| 	t->sched_info.run_delay += delta;
 | |
| 
 | |
| 	rq_sched_info_dequeued(task_rq(t), delta);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Called when a task finally hits the cpu.  We can now calculate how
 | |
|  * long it was waiting to run.  We also note when it began so that we
 | |
|  * can keep stats on how long its timeslice is.
 | |
|  */
 | |
| static void sched_info_arrive(struct task_struct *t)
 | |
| {
 | |
| 	unsigned long long now = rq_clock(task_rq(t)), delta = 0;
 | |
| 
 | |
| 	if (t->sched_info.last_queued)
 | |
| 		delta = now - t->sched_info.last_queued;
 | |
| 	sched_info_reset_dequeued(t);
 | |
| 	t->sched_info.run_delay += delta;
 | |
| 	t->sched_info.last_arrival = now;
 | |
| 	t->sched_info.pcount++;
 | |
| 
 | |
| 	rq_sched_info_arrive(task_rq(t), delta);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This function is only called from enqueue_task(), but also only updates
 | |
|  * the timestamp if it is already not set.  It's assumed that
 | |
|  * sched_info_dequeued() will clear that stamp when appropriate.
 | |
|  */
 | |
| static inline void sched_info_queued(struct task_struct *t)
 | |
| {
 | |
| 	if (unlikely(sched_info_on()))
 | |
| 		if (!t->sched_info.last_queued)
 | |
| 			t->sched_info.last_queued = rq_clock(task_rq(t));
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Called when a process ceases being the active-running process, either
 | |
|  * voluntarily or involuntarily.  Now we can calculate how long we ran.
 | |
|  * Also, if the process is still in the TASK_RUNNING state, call
 | |
|  * sched_info_queued() to mark that it has now again started waiting on
 | |
|  * the runqueue.
 | |
|  */
 | |
| static inline void sched_info_depart(struct task_struct *t)
 | |
| {
 | |
| 	unsigned long long delta = rq_clock(task_rq(t)) -
 | |
| 					t->sched_info.last_arrival;
 | |
| 
 | |
| 	rq_sched_info_depart(task_rq(t), delta);
 | |
| 
 | |
| 	if (t->state == TASK_RUNNING)
 | |
| 		sched_info_queued(t);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Called when tasks are switched involuntarily due, typically, to expiring
 | |
|  * their time slice.  (This may also be called when switching to or from
 | |
|  * the idle task.)  We are only called when prev != next.
 | |
|  */
 | |
| static inline void
 | |
| __sched_info_switch(struct task_struct *prev, struct task_struct *next)
 | |
| {
 | |
| 	struct rq *rq = task_rq(prev);
 | |
| 
 | |
| 	/*
 | |
| 	 * prev now departs the cpu.  It's not interesting to record
 | |
| 	 * stats about how efficient we were at scheduling the idle
 | |
| 	 * process, however.
 | |
| 	 */
 | |
| 	if (prev != rq->idle)
 | |
| 		sched_info_depart(prev);
 | |
| 
 | |
| 	if (next != rq->idle)
 | |
| 		sched_info_arrive(next);
 | |
| }
 | |
| static inline void
 | |
| sched_info_switch(struct task_struct *prev, struct task_struct *next)
 | |
| {
 | |
| 	if (unlikely(sched_info_on()))
 | |
| 		__sched_info_switch(prev, next);
 | |
| }
 | |
| #else
 | |
| #define sched_info_queued(t)			do { } while (0)
 | |
| #define sched_info_reset_dequeued(t)	do { } while (0)
 | |
| #define sched_info_dequeued(t)			do { } while (0)
 | |
| #define sched_info_switch(t, next)		do { } while (0)
 | |
| #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
 | |
| 
 | |
| /*
 | |
|  * The following are functions that support scheduler-internal time accounting.
 | |
|  * These functions are generally called at the timer tick.  None of this depends
 | |
|  * on CONFIG_SCHEDSTATS.
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * cputimer_running - return true if cputimer is running
 | |
|  *
 | |
|  * @tsk:	Pointer to target task.
 | |
|  */
 | |
| static inline bool cputimer_running(struct task_struct *tsk)
 | |
| 
 | |
| {
 | |
| 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 | |
| 
 | |
| 	if (!cputimer->running)
 | |
| 		return false;
 | |
| 
 | |
| 	/*
 | |
| 	 * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime
 | |
| 	 * in __exit_signal(), we won't account to the signal struct further
 | |
| 	 * cputime consumed by that task, even though the task can still be
 | |
| 	 * ticking after __exit_signal().
 | |
| 	 *
 | |
| 	 * In order to keep a consistent behaviour between thread group cputime
 | |
| 	 * and thread group cputimer accounting, lets also ignore the cputime
 | |
| 	 * elapsing after __exit_signal() in any thread group timer running.
 | |
| 	 *
 | |
| 	 * This makes sure that POSIX CPU clocks and timers are synchronized, so
 | |
| 	 * that a POSIX CPU timer won't expire while the corresponding POSIX CPU
 | |
| 	 * clock delta is behind the expiring timer value.
 | |
| 	 */
 | |
| 	if (unlikely(!tsk->sighand))
 | |
| 		return false;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * account_group_user_time - Maintain utime for a thread group.
 | |
|  *
 | |
|  * @tsk:	Pointer to task structure.
 | |
|  * @cputime:	Time value by which to increment the utime field of the
 | |
|  *		thread_group_cputime structure.
 | |
|  *
 | |
|  * If thread group time is being maintained, get the structure for the
 | |
|  * running CPU and update the utime field there.
 | |
|  */
 | |
| static inline void account_group_user_time(struct task_struct *tsk,
 | |
| 					   cputime_t cputime)
 | |
| {
 | |
| 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 | |
| 
 | |
| 	if (!cputimer_running(tsk))
 | |
| 		return;
 | |
| 
 | |
| 	raw_spin_lock(&cputimer->lock);
 | |
| 	cputimer->cputime.utime += cputime;
 | |
| 	raw_spin_unlock(&cputimer->lock);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * account_group_system_time - Maintain stime for a thread group.
 | |
|  *
 | |
|  * @tsk:	Pointer to task structure.
 | |
|  * @cputime:	Time value by which to increment the stime field of the
 | |
|  *		thread_group_cputime structure.
 | |
|  *
 | |
|  * If thread group time is being maintained, get the structure for the
 | |
|  * running CPU and update the stime field there.
 | |
|  */
 | |
| static inline void account_group_system_time(struct task_struct *tsk,
 | |
| 					     cputime_t cputime)
 | |
| {
 | |
| 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 | |
| 
 | |
| 	if (!cputimer_running(tsk))
 | |
| 		return;
 | |
| 
 | |
| 	raw_spin_lock(&cputimer->lock);
 | |
| 	cputimer->cputime.stime += cputime;
 | |
| 	raw_spin_unlock(&cputimer->lock);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * account_group_exec_runtime - Maintain exec runtime for a thread group.
 | |
|  *
 | |
|  * @tsk:	Pointer to task structure.
 | |
|  * @ns:		Time value by which to increment the sum_exec_runtime field
 | |
|  *		of the thread_group_cputime structure.
 | |
|  *
 | |
|  * If thread group time is being maintained, get the structure for the
 | |
|  * running CPU and update the sum_exec_runtime field there.
 | |
|  */
 | |
| static inline void account_group_exec_runtime(struct task_struct *tsk,
 | |
| 					      unsigned long long ns)
 | |
| {
 | |
| 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 | |
| 
 | |
| 	if (!cputimer_running(tsk))
 | |
| 		return;
 | |
| 
 | |
| 	raw_spin_lock(&cputimer->lock);
 | |
| 	cputimer->cputime.sum_exec_runtime += ns;
 | |
| 	raw_spin_unlock(&cputimer->lock);
 | |
| }
 |