pve-kernel-thunderx/patches/kernel/0186-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch
Fabian Grünbichler a0f7ab8a6a fix #1622: i40e memory leak
cherry-pick from upstream 4.14
2018-01-19 12:43:16 +01:00

400 lines
13 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 20 Dec 2017 18:28:54 +0100
Subject: [PATCH] x86/cpu_entry_area: Move it to a separate unit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
CVE-2017-5754
Separate the cpu_entry_area code out of cpu/common.c and the fixmap.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255)
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
(cherry picked from commit 0fa11d2cd3d67af676aa2762ade282ba6d09cbe5)
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
arch/x86/mm/Makefile | 2 +-
arch/x86/include/asm/cpu_entry_area.h | 52 +++++++++++++++++
arch/x86/include/asm/fixmap.h | 41 +-------------
arch/x86/kernel/cpu/common.c | 94 ------------------------------
arch/x86/kernel/traps.c | 1 +
arch/x86/mm/cpu_entry_area.c | 104 ++++++++++++++++++++++++++++++++++
6 files changed, 159 insertions(+), 135 deletions(-)
create mode 100644 arch/x86/include/asm/cpu_entry_area.h
create mode 100644 arch/x86/mm/cpu_entry_area.c
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 0fbdcb64f9f8..76f5399a8356 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -2,7 +2,7 @@
KCOV_INSTRUMENT_tlb.o := n
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644
index 000000000000..5471826803af
--- /dev/null
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
+#define _ASM_X86_CPU_ENTRY_AREA_H
+
+#include <linux/percpu-defs.h>
+#include <asm/processor.h>
+
+/*
+ * cpu_entry_area is a percpu region that contains things needed by the CPU
+ * and early entry/exit code. Real types aren't used for all fields here
+ * to avoid circular header dependencies.
+ *
+ * Every field is a virtual alias of some other allocated backing store.
+ * There is no direct allocation of a struct cpu_entry_area.
+ */
+struct cpu_entry_area {
+ char gdt[PAGE_SIZE];
+
+ /*
+ * The GDT is just below entry_stack and thus serves (on x86_64) as
+ * a a read-only guard page.
+ */
+ struct entry_stack_page entry_stack_page;
+
+ /*
+ * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
+ * we need task switches to work, and task switches write to the TSS.
+ */
+ struct tss_struct tss;
+
+ char entry_trampoline[PAGE_SIZE];
+
+#ifdef CONFIG_X86_64
+ /*
+ * Exception stacks used for IST entries.
+ *
+ * In the future, this should have a separate slot for each stack
+ * with guard pages between them.
+ */
+ char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+#endif
+};
+
+#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
+
+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+
+extern void setup_cpu_entry_areas(void);
+
+#endif
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index a7fb137ad964..1b2521473480 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -25,6 +25,7 @@
#else
#include <uapi/asm/vsyscall.h>
#endif
+#include <asm/cpu_entry_area.h>
/*
* We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
@@ -44,46 +45,6 @@ extern unsigned long __FIXADDR_TOP;
PAGE_SIZE)
#endif
-/*
- * cpu_entry_area is a percpu region in the fixmap that contains things
- * needed by the CPU and early entry/exit code. Real types aren't used
- * for all fields here to avoid circular header dependencies.
- *
- * Every field is a virtual alias of some other allocated backing store.
- * There is no direct allocation of a struct cpu_entry_area.
- */
-struct cpu_entry_area {
- char gdt[PAGE_SIZE];
-
- /*
- * The GDT is just below entry_stack and thus serves (on x86_64) as
- * a a read-only guard page.
- */
- struct entry_stack_page entry_stack_page;
-
- /*
- * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
- * we need task switches to work, and task switches write to the TSS.
- */
- struct tss_struct tss;
-
- char entry_trampoline[PAGE_SIZE];
-
-#ifdef CONFIG_X86_64
- /*
- * Exception stacks used for IST entries.
- *
- * In the future, this should have a separate slot for each stack
- * with guard pages between them.
- */
- char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
-#endif
-};
-
-#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
-
-extern void setup_cpu_entry_areas(void);
-
/*
* Here we define all the compile-time 'special' virtual
* addresses. The point is to have a constant address at
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7a8a5d436566..96171ce46d61 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -482,102 +482,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-#endif
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
- entry_stack_storage);
-
-static void __init
-set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
-{
- for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
- __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
-}
-
-/* Setup the fixmap mappings only once per-processor */
-static void __init setup_cpu_entry_area(int cpu)
-{
-#ifdef CONFIG_X86_64
- extern char _entry_trampoline[];
-
- /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
- pgprot_t gdt_prot = PAGE_KERNEL_RO;
- pgprot_t tss_prot = PAGE_KERNEL_RO;
-#else
- /*
- * On native 32-bit systems, the GDT cannot be read-only because
- * our double fault handler uses a task gate, and entering through
- * a task gate needs to change an available TSS to busy. If the
- * GDT is read-only, that will triple fault. The TSS cannot be
- * read-only because the CPU writes to it on task switches.
- *
- * On Xen PV, the GDT must be read-only because the hypervisor
- * requires it.
- */
- pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
- PAGE_KERNEL_RO : PAGE_KERNEL;
- pgprot_t tss_prot = PAGE_KERNEL;
-#endif
-
- __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
- per_cpu_ptr(&entry_stack_storage, cpu), 1,
- PAGE_KERNEL);
-
- /*
- * The Intel SDM says (Volume 3, 7.2.1):
- *
- * Avoid placing a page boundary in the part of the TSS that the
- * processor reads during a task switch (the first 104 bytes). The
- * processor may not correctly perform address translations if a
- * boundary occurs in this area. During a task switch, the processor
- * reads and writes into the first 104 bytes of each TSS (using
- * contiguous physical addresses beginning with the physical address
- * of the first byte of the TSS). So, after TSS access begins, if
- * part of the 104 bytes is not physically contiguous, the processor
- * will access incorrect information without generating a page-fault
- * exception.
- *
- * There are also a lot of errata involving the TSS spanning a page
- * boundary. Assert that we're not doing that.
- */
- BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
- offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
- BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
- &per_cpu(cpu_tss_rw, cpu),
- sizeof(struct tss_struct) / PAGE_SIZE,
- tss_prot);
-
-#ifdef CONFIG_X86_32
- per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
#endif
-#ifdef CONFIG_X86_64
- BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
- BUILD_BUG_ON(sizeof(exception_stacks) !=
- sizeof(((struct cpu_entry_area *)0)->exception_stacks));
- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
- &per_cpu(exception_stacks, cpu),
- sizeof(exception_stacks) / PAGE_SIZE,
- PAGE_KERNEL);
-
- __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
- __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
-#endif
-}
-
-void __init setup_cpu_entry_areas(void)
-{
- unsigned int cpu;
-
- for_each_possible_cpu(cpu)
- setup_cpu_entry_area(cpu);
-}
-
/* Load the original GDT from the per-cpu structure */
void load_direct_gdt(int cpu)
{
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 14b462eefa17..ef2d1b8a0516 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -57,6 +57,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
+#include <asm/cpu_entry_area.h>
#include <asm/mce.h>
#include <asm/fixmap.h>
#include <asm/mach_traps.h>
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644
index 000000000000..235ff9cfaaf4
--- /dev/null
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <asm/desc.h>
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
+
+#ifdef CONFIG_X86_64
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+#endif
+
+static void __init
+set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
+{
+ for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
+ __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
+}
+
+/* Setup the fixmap mappings only once per-processor */
+static void __init setup_cpu_entry_area(int cpu)
+{
+#ifdef CONFIG_X86_64
+ extern char _entry_trampoline[];
+
+ /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+ pgprot_t gdt_prot = PAGE_KERNEL_RO;
+ pgprot_t tss_prot = PAGE_KERNEL_RO;
+#else
+ /*
+ * On native 32-bit systems, the GDT cannot be read-only because
+ * our double fault handler uses a task gate, and entering through
+ * a task gate needs to change an available TSS to busy. If the
+ * GDT is read-only, that will triple fault. The TSS cannot be
+ * read-only because the CPU writes to it on task switches.
+ *
+ * On Xen PV, the GDT must be read-only because the hypervisor
+ * requires it.
+ */
+ pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+ PAGE_KERNEL_RO : PAGE_KERNEL;
+ pgprot_t tss_prot = PAGE_KERNEL;
+#endif
+
+ __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
+ per_cpu_ptr(&entry_stack_storage, cpu), 1,
+ PAGE_KERNEL);
+
+ /*
+ * The Intel SDM says (Volume 3, 7.2.1):
+ *
+ * Avoid placing a page boundary in the part of the TSS that the
+ * processor reads during a task switch (the first 104 bytes). The
+ * processor may not correctly perform address translations if a
+ * boundary occurs in this area. During a task switch, the processor
+ * reads and writes into the first 104 bytes of each TSS (using
+ * contiguous physical addresses beginning with the physical address
+ * of the first byte of the TSS). So, after TSS access begins, if
+ * part of the 104 bytes is not physically contiguous, the processor
+ * will access incorrect information without generating a page-fault
+ * exception.
+ *
+ * There are also a lot of errata involving the TSS spanning a page
+ * boundary. Assert that we're not doing that.
+ */
+ BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+ BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
+ &per_cpu(cpu_tss_rw, cpu),
+ sizeof(struct tss_struct) / PAGE_SIZE,
+ tss_prot);
+
+#ifdef CONFIG_X86_32
+ per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+#endif
+
+#ifdef CONFIG_X86_64
+ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+ BUILD_BUG_ON(sizeof(exception_stacks) !=
+ sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
+ &per_cpu(exception_stacks, cpu),
+ sizeof(exception_stacks) / PAGE_SIZE,
+ PAGE_KERNEL);
+
+ __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
+ __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+#endif
+}
+
+void __init setup_cpu_entry_areas(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu)
+ setup_cpu_entry_area(cpu);
+}
--
2.14.2