203 lines
6.1 KiB
Diff
203 lines
6.1 KiB
Diff
|
From 2a767692d6140051e569ab59a1440b3760839e03 Mon Sep 17 00:00:00 2001
|
||
|
From: Andy Lutomirski <luto@kernel.org>
|
||
|
Date: Tue, 11 Jul 2017 10:33:38 -0500
|
||
|
Subject: [PATCH 018/231] x86/entry/64: Refactor IRQ stacks and make them
|
||
|
NMI-safe
|
||
|
MIME-Version: 1.0
|
||
|
Content-Type: text/plain; charset=UTF-8
|
||
|
Content-Transfer-Encoding: 8bit
|
||
|
|
||
|
CVE-2017-5754
|
||
|
|
||
|
This will allow IRQ stacks to nest inside NMIs or similar entries
|
||
|
that can happen during IRQ stack setup or teardown.
|
||
|
|
||
|
The new macros won't work correctly if they're invoked with IRQs on.
|
||
|
Add a check under CONFIG_DEBUG_ENTRY to detect that.
|
||
|
|
||
|
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||
|
[ Use %r10 instead of %r11 in xen_do_hypervisor_callback to make objtool
|
||
|
and ORC unwinder's lives a little easier. ]
|
||
|
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||
|
Cc: Borislav Petkov <bp@alien8.de>
|
||
|
Cc: Brian Gerst <brgerst@gmail.com>
|
||
|
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||
|
Cc: H. Peter Anvin <hpa@zytor.com>
|
||
|
Cc: Jiri Slaby <jslaby@suse.cz>
|
||
|
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||
|
Cc: Mike Galbraith <efault@gmx.de>
|
||
|
Cc: Peter Zijlstra <peterz@infradead.org>
|
||
|
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||
|
Cc: live-patching@vger.kernel.org
|
||
|
Link: http://lkml.kernel.org/r/b0b2ff5fb97d2da2e1d7e1f380190c92545c8bb5.1499786555.git.jpoimboe@redhat.com
|
||
|
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||
|
(cherry picked from commit 1d3e53e8624a3ec85f4041ca6d973da7c1575938)
|
||
|
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||
|
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||
|
(cherry picked from commit be58b042e135d0ee777a54798f33015857d7e2e0)
|
||
|
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||
|
---
|
||
|
arch/x86/kernel/process_64.c | 3 ++
|
||
|
arch/x86/Kconfig.debug | 2 --
|
||
|
arch/x86/entry/entry_64.S | 85 +++++++++++++++++++++++++++++++-------------
|
||
|
3 files changed, 64 insertions(+), 26 deletions(-)
|
||
|
|
||
|
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
||
|
index fe56e6f93cbb..1e7701c4cd80 100644
|
||
|
--- a/arch/x86/kernel/process_64.c
|
||
|
+++ b/arch/x86/kernel/process_64.c
|
||
|
@@ -404,6 +404,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||
|
int cpu = smp_processor_id();
|
||
|
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
|
||
|
|
||
|
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
|
||
|
+ this_cpu_read(irq_count) != -1);
|
||
|
+
|
||
|
switch_fpu_prepare(prev_fpu, cpu);
|
||
|
|
||
|
/* We must save %fs and %gs before load_TLS() because
|
||
|
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
|
||
|
index cd20ca0b4043..1fc519f3c49e 100644
|
||
|
--- a/arch/x86/Kconfig.debug
|
||
|
+++ b/arch/x86/Kconfig.debug
|
||
|
@@ -305,8 +305,6 @@ config DEBUG_ENTRY
|
||
|
Some of these sanity checks may slow down kernel entries and
|
||
|
exits or otherwise impact performance.
|
||
|
|
||
|
- This is currently used to help test NMI code.
|
||
|
-
|
||
|
If unsure, say N.
|
||
|
|
||
|
config DEBUG_NMI_SELFTEST
|
||
|
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||
|
index 6d078b89a5e8..07b4056af8a8 100644
|
||
|
--- a/arch/x86/entry/entry_64.S
|
||
|
+++ b/arch/x86/entry/entry_64.S
|
||
|
@@ -447,6 +447,59 @@ ENTRY(irq_entries_start)
|
||
|
.endr
|
||
|
END(irq_entries_start)
|
||
|
|
||
|
+.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||
|
+#ifdef CONFIG_DEBUG_ENTRY
|
||
|
+ pushfq
|
||
|
+ testl $X86_EFLAGS_IF, (%rsp)
|
||
|
+ jz .Lokay_\@
|
||
|
+ ud2
|
||
|
+.Lokay_\@:
|
||
|
+ addq $8, %rsp
|
||
|
+#endif
|
||
|
+.endm
|
||
|
+
|
||
|
+/*
|
||
|
+ * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
|
||
|
+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
|
||
|
+ * Requires kernel GSBASE.
|
||
|
+ *
|
||
|
+ * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
|
||
|
+ */
|
||
|
+.macro ENTER_IRQ_STACK old_rsp
|
||
|
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||
|
+ movq %rsp, \old_rsp
|
||
|
+ incl PER_CPU_VAR(irq_count)
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Right now, if we just incremented irq_count to zero, we've
|
||
|
+ * claimed the IRQ stack but we haven't switched to it yet.
|
||
|
+ *
|
||
|
+ * If anything is added that can interrupt us here without using IST,
|
||
|
+ * it must be *extremely* careful to limit its stack usage. This
|
||
|
+ * could include kprobes and a hypothetical future IST-less #DB
|
||
|
+ * handler.
|
||
|
+ */
|
||
|
+
|
||
|
+ cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||
|
+ pushq \old_rsp
|
||
|
+.endm
|
||
|
+
|
||
|
+/*
|
||
|
+ * Undoes ENTER_IRQ_STACK.
|
||
|
+ */
|
||
|
+.macro LEAVE_IRQ_STACK
|
||
|
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||
|
+ /* We need to be off the IRQ stack before decrementing irq_count. */
|
||
|
+ popq %rsp
|
||
|
+
|
||
|
+ /*
|
||
|
+ * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
|
||
|
+ * the irq stack but we're not on it.
|
||
|
+ */
|
||
|
+
|
||
|
+ decl PER_CPU_VAR(irq_count)
|
||
|
+.endm
|
||
|
+
|
||
|
/*
|
||
|
* Interrupt entry/exit.
|
||
|
*
|
||
|
@@ -485,17 +538,7 @@ END(irq_entries_start)
|
||
|
CALL_enter_from_user_mode
|
||
|
|
||
|
1:
|
||
|
- /*
|
||
|
- * Save previous stack pointer, optionally switch to interrupt stack.
|
||
|
- * irq_count is used to check if a CPU is already on an interrupt stack
|
||
|
- * or not. While this is essentially redundant with preempt_count it is
|
||
|
- * a little cheaper to use a separate counter in the PDA (short of
|
||
|
- * moving irq_enter into assembly, which would be too much work)
|
||
|
- */
|
||
|
- movq %rsp, %rdi
|
||
|
- incl PER_CPU_VAR(irq_count)
|
||
|
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||
|
- pushq %rdi
|
||
|
+ ENTER_IRQ_STACK old_rsp=%rdi
|
||
|
/* We entered an interrupt context - irqs are off: */
|
||
|
TRACE_IRQS_OFF
|
||
|
|
||
|
@@ -515,10 +558,8 @@ common_interrupt:
|
||
|
ret_from_intr:
|
||
|
DISABLE_INTERRUPTS(CLBR_ANY)
|
||
|
TRACE_IRQS_OFF
|
||
|
- decl PER_CPU_VAR(irq_count)
|
||
|
|
||
|
- /* Restore saved previous stack */
|
||
|
- popq %rsp
|
||
|
+ LEAVE_IRQ_STACK
|
||
|
|
||
|
testb $3, CS(%rsp)
|
||
|
jz retint_kernel
|
||
|
@@ -892,12 +933,10 @@ bad_gs:
|
||
|
ENTRY(do_softirq_own_stack)
|
||
|
pushq %rbp
|
||
|
mov %rsp, %rbp
|
||
|
- incl PER_CPU_VAR(irq_count)
|
||
|
- cmove PER_CPU_VAR(irq_stack_ptr), %rsp
|
||
|
- push %rbp /* frame pointer backlink */
|
||
|
+ ENTER_IRQ_STACK old_rsp=%r11
|
||
|
call __do_softirq
|
||
|
+ LEAVE_IRQ_STACK
|
||
|
leaveq
|
||
|
- decl PER_CPU_VAR(irq_count)
|
||
|
ret
|
||
|
END(do_softirq_own_stack)
|
||
|
|
||
|
@@ -924,13 +963,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
|
||
|
* see the correct pointer to the pt_regs
|
||
|
*/
|
||
|
movq %rdi, %rsp /* we don't return, adjust the stack frame */
|
||
|
-11: incl PER_CPU_VAR(irq_count)
|
||
|
- movq %rsp, %rbp
|
||
|
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||
|
- pushq %rbp /* frame pointer backlink */
|
||
|
+
|
||
|
+ ENTER_IRQ_STACK old_rsp=%r10
|
||
|
call xen_evtchn_do_upcall
|
||
|
- popq %rsp
|
||
|
- decl PER_CPU_VAR(irq_count)
|
||
|
+ LEAVE_IRQ_STACK
|
||
|
+
|
||
|
#ifndef CONFIG_PREEMPT
|
||
|
call xen_maybe_preempt_hcall
|
||
|
#endif
|
||
|
--
|
||
|
2.14.2
|
||
|
|