Discussion:
[RFC][PATCH] sched,mips,ia64: Remove __ARCH_WANT_UNLOCKED_CTXSW
Peter Zijlstra
2014-09-23 15:06:41 UTC
Permalink
Architectures, which define __ARCH_WANT_UNLOCKED_CTXSW,
may pull a task when it's in the middle of schedule().
CPU1(task1 calls schedule) CPU2
... schedule()
... idle_balance()
... load_balance()
... ...
schedule() ...
prepare_lock_switch() ...
raw_spin_unlock(&rq1->lock) ...
... raw_spin_lock(&rq1->lock)
... detach_tasks();
... can_migrate_task(task1)
... attach_tasks(); <--- move task1 to rq2
... raw_spin_unlock(&rq1->lock)
... context_switch() <--- switch to task1's stack
... ...
(using task1's stack) (using task1's stack)
... ...
context_switch() ...
Parallel use of a single stack is not a good idea.
Indeed it is, but how about we do this instead?

---
Subject: sched,mips,ia64: Remove __ARCH_WANT_UNLOCKED_CTXSW

Kirill found that there's a subtle race in the
__ARCH_WANT_UNLOCKED_CTXSW code, and instead of fixing it, remove the
entire exception because neither arch that uses it seems to actually
still require it.

Boot tested on mips64el (qemu) only.

Cc: Oleg Nesterov <***@redhat.com>
Cc: Guenter Roeck <***@roeck-us.net>
Cc: Ralf Baechle <***@linux-mips.org>
Cc: Tony Luck <***@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <***@infradead.org>
---
arch/ia64/include/asm/processor.h | 1 -
arch/mips/include/asm/processor.h | 6 ------
kernel/sched/core.c | 6 ------
kernel/sched/sched.h | 30 ------------------------------
4 files changed, 43 deletions(-)

diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index c736713..ce53c50 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -19,7 +19,6 @@
#include <asm/ptrace.h>
#include <asm/ustack.h>

-#define __ARCH_WANT_UNLOCKED_CTXSW
#define ARCH_HAS_PREFETCH_SWITCH_STACK

#define IA64_NUM_PHYS_STACK_REG 96
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 05f0843..f1df4cb 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -397,12 +397,6 @@ unsigned long get_wchan(struct task_struct *p);
#define ARCH_HAS_PREFETCHW
#define prefetchw(x) __builtin_prefetch((x), 1, 1)

-/*
- * See Documentation/scheduler/sched-arch.txt; prevents deadlock on SMP
- * systems.
- */
-#define __ARCH_WANT_UNLOCKED_CTXSW
-
#endif

#endif /* _ASM_PROCESSOR_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2a93b87..ccbafb0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2304,10 +2304,6 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
*/
post_schedule(rq);

-#ifdef __ARCH_WANT_UNLOCKED_CTXSW
- /* In this case, finish_task_switch does not reenable preemption */
- preempt_enable();
-#endif
if (current->set_child_tid)
put_user(task_pid_vnr(current), current->set_child_tid);
}
@@ -2350,9 +2346,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
* of the scheduler it's an obvious special-case), so we
* do an early lockdep release here:
*/
-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
-#endif

context_tracking_task_switch(prev, next);
/* Here we just switch the register state and the stack. */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1bc6aad..d87f122 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -966,7 +966,6 @@ static inline int task_on_rq_migrating(struct task_struct *p)
# define finish_arch_post_lock_switch() do { } while (0)
#endif

-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
{
#ifdef CONFIG_SMP
@@ -1004,35 +1003,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
raw_spin_unlock_irq(&rq->lock);
}

-#else /* __ARCH_WANT_UNLOCKED_CTXSW */
-static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
-{
-#ifdef CONFIG_SMP
- /*
- * We can optimise this out completely for !SMP, because the
- * SMP rebalancing from interrupt is the only thing that cares
- * here.
- */
- next->on_cpu = 1;
-#endif
- raw_spin_unlock(&rq->lock);
-}
-
-static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
-{
-#ifdef CONFIG_SMP
- /*
- * After ->on_cpu is cleared, the task can be moved to a different CPU.
- * We must ensure this doesn't happen until the switch is completely
- * finished.
- */
- smp_wmb();
- prev->on_cpu = 0;
-#endif
- local_irq_enable();
-}
-#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
-
/*
* wake flags
*/
Kirill Tkhai
2014-09-23 15:16:44 UTC
Permalink
Post by Peter Zijlstra
=9AArchitectures, which define __ARCH_WANT_UNLOCKED_CTXSW,
=9Amay pull a task when it's in the middle of schedule().
=9ACPU1(task1 calls schedule) =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9ACPU2
=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9Aschedule()
Post by Peter Zijlstra
=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9Aidle_balance()
Post by Peter Zijlstra
=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9Aload_balance()
Post by Peter Zijlstra
=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A...
Post by Peter Zijlstra
=9Aschedule() =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A...
Post by Peter Zijlstra
=9A=9A=9A=9Aprepare_lock_switch() =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A...
Post by Peter Zijlstra
=9A=9A=9A=9A=9A=9A=9Araw_spin_unlock(&rq1->lock) =9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A...
Post by Peter Zijlstra
=9A=9A=9A=9A=9A=9A=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9Araw_s=
pin_lock(&rq1->lock)
Post by Peter Zijlstra
=9A=9A=9A=9A=9A=9A=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9Adetach_tasks();
Post by Peter Zijlstra
=9A=9A=9A=9A=9A=9A=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9Acan_migrate_task(task1)
Post by Peter Zijlstra
=9A=9A=9A=9A=9A=9A=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9Aattach_tasks(); <--- move task1 to rq2
Post by Peter Zijlstra
=9A=9A=9A=9A=9A=9A=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9Araw_s=
pin_unlock(&rq1->lock)
Post by Peter Zijlstra
=9A=9A=9A=9A=9A=9A=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9Acontext_switch() <--- s=
witch to task1's stack
Post by Peter Zijlstra
=9A=9A=9A=9A=9A=9A=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A...
Post by Peter Zijlstra
=9A=9A=9A=9A(using task1's stack) =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A(using task1's stack)
Post by Peter Zijlstra
=9A=9A=9A=9A... =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A...
Post by Peter Zijlstra
=9A=9A=9A=9Acontext_switch() =9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=
=9A=9A=9A=9A=9A=9A=9A=9A...
Post by Peter Zijlstra
=9AParallel use of a single stack is not a good idea.
Indeed it is, but how about we do this instead?
Completely agree, looks good for me.
Post by Peter Zijlstra
---
Subject: sched,mips,ia64: Remove __ARCH_WANT_UNLOCKED_CTXSW
Kirill found that there's a subtle race in the
__ARCH_WANT_UNLOCKED_CTXSW code, and instead of fixing it, remove the
entire exception because neither arch that uses it seems to actually
still require it.
Boot tested on mips64el (qemu) only.
Reviewed-by: Kirill Tkhai <***@parallels.com> (one more review of ia=
64 part)
Guenter Roeck
2014-09-23 19:45:58 UTC
Permalink
Post by Peter Zijlstra
Architectures, which define __ARCH_WANT_UNLOCKED_CTXSW,
may pull a task when it's in the middle of schedule().
CPU1(task1 calls schedule) CPU2
... schedule()
... idle_balance()
... load_balance()
... ...
schedule() ...
prepare_lock_switch() ...
raw_spin_unlock(&rq1->lock) ...
... raw_spin_lock(&rq1->lock)
... detach_tasks();
... can_migrate_task(task1)
... attach_tasks(); <--- move task1 to rq2
... raw_spin_unlock(&rq1->lock)
... context_switch() <--- switch to task1's stack
... ...
(using task1's stack) (using task1's stack)
... ...
context_switch() ...
Parallel use of a single stack is not a good idea.
Indeed it is, but how about we do this instead?
---
Subject: sched,mips,ia64: Remove __ARCH_WANT_UNLOCKED_CTXSW
Kirill found that there's a subtle race in the
__ARCH_WANT_UNLOCKED_CTXSW code, and instead of fixing it, remove the
entire exception because neither arch that uses it seems to actually
still require it.
Boot tested on mips64el (qemu) only.
---
arch/ia64/include/asm/processor.h | 1 -
arch/mips/include/asm/processor.h | 6 ------
kernel/sched/core.c | 6 ------
kernel/sched/sched.h | 30 ------------------------------
You might also want to clean up any references to the define if you remove it.

Documentation/scheduler/sched-arch.txt:you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
arch/blackfin/kernel/entry.S: * since Blackfin does not define __ARCH_WANT_UNLOCKED_CTXSW, so

Guenter
Post by Peter Zijlstra
4 files changed, 43 deletions(-)
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index c736713..ce53c50 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -19,7 +19,6 @@
#include <asm/ptrace.h>
#include <asm/ustack.h>
-#define __ARCH_WANT_UNLOCKED_CTXSW
#define ARCH_HAS_PREFETCH_SWITCH_STACK
#define IA64_NUM_PHYS_STACK_REG 96
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 05f0843..f1df4cb 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -397,12 +397,6 @@ unsigned long get_wchan(struct task_struct *p);
#define ARCH_HAS_PREFETCHW
#define prefetchw(x) __builtin_prefetch((x), 1, 1)
-/*
- * See Documentation/scheduler/sched-arch.txt; prevents deadlock on SMP
- * systems.
- */
-#define __ARCH_WANT_UNLOCKED_CTXSW
-
#endif
#endif /* _ASM_PROCESSOR_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2a93b87..ccbafb0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2304,10 +2304,6 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
*/
post_schedule(rq);
-#ifdef __ARCH_WANT_UNLOCKED_CTXSW
- /* In this case, finish_task_switch does not reenable preemption */
- preempt_enable();
-#endif
if (current->set_child_tid)
put_user(task_pid_vnr(current), current->set_child_tid);
}
@@ -2350,9 +2346,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
* of the scheduler it's an obvious special-case), so we
*/
-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
-#endif
context_tracking_task_switch(prev, next);
/* Here we just switch the register state and the stack. */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1bc6aad..d87f122 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -966,7 +966,6 @@ static inline int task_on_rq_migrating(struct task_struct *p)
# define finish_arch_post_lock_switch() do { } while (0)
#endif
-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
{
#ifdef CONFIG_SMP
@@ -1004,35 +1003,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
raw_spin_unlock_irq(&rq->lock);
}
-#else /* __ARCH_WANT_UNLOCKED_CTXSW */
-static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
-{
-#ifdef CONFIG_SMP
- /*
- * We can optimise this out completely for !SMP, because the
- * SMP rebalancing from interrupt is the only thing that cares
- * here.
- */
- next->on_cpu = 1;
-#endif
- raw_spin_unlock(&rq->lock);
-}
-
-static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
-{
-#ifdef CONFIG_SMP
- /*
- * After ->on_cpu is cleared, the task can be moved to a different CPU.
- * We must ensure this doesn't happen until the switch is completely
- * finished.
- */
- smp_wmb();
- prev->on_cpu = 0;
-#endif
- local_irq_enable();
-}
-#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
-
/*
* wake flags
*/
--
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Loading...