Merge branches 'sched-core-for-linus' and 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (60 commits)
  sched: Fix and optimise calculation of the weight-inverse
  sched: Avoid going ahead if ->cpus_allowed is not changed
  sched, rt: Update rq clock when unthrottling of an otherwise idle CPU
  sched: Remove unused parameters from sched_fork() and wake_up_new_task()
  sched: Shorten the construction of the span cpu mask of sched domain
  sched: Wrap the 'cfs_rq->nr_spread_over' field with CONFIG_SCHED_DEBUG
  sched: Remove unused 'this_best_prio arg' from balance_tasks()
  sched: Remove noop in alloc_rt_sched_group()
  sched: Get rid of lock_depth
  sched: Remove obsolete comment from scheduler_tick()
  sched: Fix sched_domain iterations vs. RCU
  sched: Next buddy hint on sleep and preempt path
  sched: Make set_*_buddy() work on non-task entities
  sched: Remove need_migrate_task()
  sched: Move the second half of ttwu() to the remote cpu
  sched: Restructure ttwu() some more
  sched: Rename ttwu_post_activation() to ttwu_do_wakeup()
  sched: Remove rq argument from ttwu_stat()
  sched: Remove rq->lock from the first half of ttwu()
  sched: Drop rq->lock from sched_exec()
  ...

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  sched: Fix rt_rq runtime leakage bug
This commit is contained in:
Linus Torvalds 2011-05-19 17:41:22 -07:00
commit 80fe02b5da
47 changed files with 944 additions and 1186 deletions

View file

@ -183,6 +183,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
}
typedef struct task_group *rt_rq_iter_t;
#define for_each_rt_rq(rt_rq, iter, rq) \
for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
(&iter->list != &task_groups) && \
(rt_rq = iter->rt_rq[cpu_of(rq)]); \
iter = list_entry_rcu(iter->list.next, typeof(*iter), list))
static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
{
list_add_rcu(&rt_rq->leaf_rt_rq_list,
@ -288,6 +296,11 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
return ktime_to_ns(def_rt_bandwidth.rt_period);
}
typedef struct rt_rq *rt_rq_iter_t;
#define for_each_rt_rq(rt_rq, iter, rq) \
for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
{
}
@ -402,12 +415,13 @@ next:
static void __disable_runtime(struct rq *rq)
{
struct root_domain *rd = rq->rd;
rt_rq_iter_t iter;
struct rt_rq *rt_rq;
if (unlikely(!scheduler_running))
return;
for_each_leaf_rt_rq(rt_rq, rq) {
for_each_rt_rq(rt_rq, iter, rq) {
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
s64 want;
int i;
@ -487,6 +501,7 @@ static void disable_runtime(struct rq *rq)
static void __enable_runtime(struct rq *rq)
{
rt_rq_iter_t iter;
struct rt_rq *rt_rq;
if (unlikely(!scheduler_running))
@ -495,7 +510,7 @@ static void __enable_runtime(struct rq *rq)
/*
* Reset each runqueue's bandwidth settings
*/
for_each_leaf_rt_rq(rt_rq, rq) {
for_each_rt_rq(rt_rq, iter, rq) {
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
raw_spin_lock(&rt_b->rt_runtime_lock);
@ -562,6 +577,13 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
rt_rq->rt_throttled = 0;
enqueue = 1;
/*
* Force a clock update if the CPU was idle,
* lest wakeup -> unthrottle time accumulate.
*/
if (rt_rq->rt_nr_running && rq->curr == rq->idle)
rq->skip_clock_update = -1;
}
if (rt_rq->rt_time || rt_rq->rt_nr_running)
idle = 0;
@ -977,13 +999,23 @@ static void yield_task_rt(struct rq *rq)
static int find_lowest_rq(struct task_struct *task);
static int
select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
{
struct task_struct *curr;
struct rq *rq;
int cpu;
if (sd_flag != SD_BALANCE_WAKE)
return smp_processor_id();
cpu = task_cpu(p);
rq = cpu_rq(cpu);
rcu_read_lock();
curr = ACCESS_ONCE(rq->curr); /* unlocked access */
/*
* If the current task is an RT task, then
* If the current task on @p's runqueue is an RT task, then
* try to see if we can wake this RT task up on another
* runqueue. Otherwise simply start this RT task
* on its current runqueue.
@ -997,21 +1029,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
* lock?
*
* For equal prio tasks, we just let the scheduler sort it out.
*/
if (unlikely(rt_task(rq->curr)) &&
(rq->curr->rt.nr_cpus_allowed < 2 ||
rq->curr->prio < p->prio) &&
(p->rt.nr_cpus_allowed > 1)) {
int cpu = find_lowest_rq(p);
return (cpu == -1) ? task_cpu(p) : cpu;
}
/*
*
* Otherwise, just let it ride on the affined RQ and the
* post-schedule router will push the preempted task away
*
* This test is optimistic, if we get it wrong the load-balancer
* will have to sort it out.
*/
return task_cpu(p);
if (curr && unlikely(rt_task(curr)) &&
(curr->rt.nr_cpus_allowed < 2 ||
curr->prio < p->prio) &&
(p->rt.nr_cpus_allowed > 1)) {
int target = find_lowest_rq(p);
if (target != -1)
cpu = target;
}
rcu_read_unlock();
return cpu;
}
static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
@ -1136,7 +1172,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
* The previous task needs to be made eligible for pushing
* if it is still active
*/
if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
}
@ -1287,7 +1323,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
!cpumask_test_cpu(lowest_rq->cpu,
&task->cpus_allowed) ||
task_running(rq, task) ||
!task->se.on_rq)) {
!task->on_rq)) {
raw_spin_unlock(&lowest_rq->lock);
lowest_rq = NULL;
@ -1321,7 +1357,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
BUG_ON(task_current(rq, p));
BUG_ON(p->rt.nr_cpus_allowed <= 1);
BUG_ON(!p->se.on_rq);
BUG_ON(!p->on_rq);
BUG_ON(!rt_task(p));
return p;
@ -1467,7 +1503,7 @@ static int pull_rt_task(struct rq *this_rq)
*/
if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
WARN_ON(p == src_rq->curr);
WARN_ON(!p->se.on_rq);
WARN_ON(!p->on_rq);
/*
* There's a chance that p is higher in priority
@ -1538,7 +1574,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
* Update the migration status of the RQ if we have an RT task
* which is running AND changing its weight value.
*/
if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) {
struct rq *rq = task_rq(p);
if (!task_current(rq, p)) {
@ -1608,7 +1644,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
* we may need to handle the pulling of RT tasks
* now.
*/
if (p->se.on_rq && !rq->rt.rt_nr_running)
if (p->on_rq && !rq->rt.rt_nr_running)
pull_rt_task(rq);
}
@ -1638,7 +1674,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
* If that current running task is also an RT task
* then see if we can move to another run queue.
*/
if (p->se.on_rq && rq->curr != p) {
if (p->on_rq && rq->curr != p) {
#ifdef CONFIG_SMP
if (rq->rt.overloaded && push_rt_task(rq) &&
/* Don't resched if we changed runqueues */
@ -1657,7 +1693,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
static void
prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
{
if (!p->se.on_rq)
if (!p->on_rq)
return;
if (rq->curr == p) {
@ -1796,10 +1832,11 @@ extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
static void print_rt_stats(struct seq_file *m, int cpu)
{
rt_rq_iter_t iter;
struct rt_rq *rt_rq;
rcu_read_lock();
for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
print_rt_rq(m, cpu, rt_rq);
rcu_read_unlock();
}