633c5ed17f
this causes kernel OOPS and upstream is unresponsive about it. see https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1726519
199 lines
6.5 KiB
Diff
199 lines
6.5 KiB
Diff
From 39f179e9baa1728a99a60c5933b1a4a3db73e617 Mon Sep 17 00:00:00 2001
|
|
From: Dave Hansen <dave.hansen@linux.intel.com>
|
|
Date: Mon, 4 Dec 2017 15:07:39 +0100
|
|
Subject: [PATCH 194/242] x86/mm/pti: Allocate a separate user PGD
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
CVE-2017-5754
|
|
|
|
Kernel page table isolation requires to have two PGDs. One for the kernel,
|
|
which contains the full kernel mapping plus the user space mapping and one
|
|
for user space which contains the user space mappings and the minimal set
|
|
of kernel mappings which are required by the architecture to be able to
|
|
transition from and to user space.
|
|
|
|
Add the necessary preliminaries.
|
|
|
|
[ tglx: Split out from the big kaiser dump. EFI fixup from Kirill ]
|
|
|
|
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
|
|
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
Reviewed-by: Borislav Petkov <bp@suse.de>
|
|
Cc: Andy Lutomirski <luto@kernel.org>
|
|
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
|
Cc: Borislav Petkov <bp@alien8.de>
|
|
Cc: Brian Gerst <brgerst@gmail.com>
|
|
Cc: David Laight <David.Laight@aculab.com>
|
|
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
|
Cc: Eduardo Valentin <eduval@amazon.com>
|
|
Cc: Greg KH <gregkh@linuxfoundation.org>
|
|
Cc: H. Peter Anvin <hpa@zytor.com>
|
|
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
|
Cc: Juergen Gross <jgross@suse.com>
|
|
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
|
Cc: Peter Zijlstra <peterz@infradead.org>
|
|
Cc: Will Deacon <will.deacon@arm.com>
|
|
Cc: aliguori@amazon.com
|
|
Cc: daniel.gruss@iaik.tugraz.at
|
|
Cc: hughd@google.com
|
|
Cc: keescook@google.com
|
|
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
|
(backported from commit d9e9a6418065bb376e5de8d93ce346939b9a37a6)
|
|
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
|
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
|
(cherry picked from commit 0bd4b34e330d8bedf90c0497dfcef2e2286c4367)
|
|
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
---
|
|
arch/x86/include/asm/pgalloc.h | 11 +++++++++++
|
|
arch/x86/mm/pgtable.c | 5 +++--
|
|
arch/x86/platform/efi/efi_64.c | 5 ++++-
|
|
arch/x86/kernel/head_64.S | 30 +++++++++++++++++++++++++++---
|
|
4 files changed, 45 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
|
|
index b2d0cd8288aa..d65b0dee7448 100644
|
|
--- a/arch/x86/include/asm/pgalloc.h
|
|
+++ b/arch/x86/include/asm/pgalloc.h
|
|
@@ -29,6 +29,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
|
|
*/
|
|
extern gfp_t __userpte_alloc_gfp;
|
|
|
|
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
|
+/*
|
|
+ * Instead of one PGD, we acquire two PGDs. Being order-1, it is
|
|
+ * both 8k in size and 8k-aligned. That lets us just flip bit 12
|
|
+ * in a pointer to swap between the two 4k halves.
|
|
+ */
|
|
+#define PGD_ALLOCATION_ORDER 1
|
|
+#else
|
|
+#define PGD_ALLOCATION_ORDER 0
|
|
+#endif
|
|
+
|
|
/*
|
|
* Allocate and free page tables.
|
|
*/
|
|
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
|
|
index 942391b5b639..90d1d8f49cf6 100644
|
|
--- a/arch/x86/mm/pgtable.c
|
|
+++ b/arch/x86/mm/pgtable.c
|
|
@@ -354,14 +354,15 @@ static inline void _pgd_free(pgd_t *pgd)
|
|
kmem_cache_free(pgd_cache, pgd);
|
|
}
|
|
#else
|
|
+
|
|
static inline pgd_t *_pgd_alloc(void)
|
|
{
|
|
- return (pgd_t *)__get_free_page(PGALLOC_GFP);
|
|
+ return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
|
|
}
|
|
|
|
static inline void _pgd_free(pgd_t *pgd)
|
|
{
|
|
- free_page((unsigned long)pgd);
|
|
+ free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
|
|
}
|
|
#endif /* CONFIG_X86_PAE */
|
|
|
|
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
|
|
index 9bf72f5bfedb..b104224d3d6c 100644
|
|
--- a/arch/x86/platform/efi/efi_64.c
|
|
+++ b/arch/x86/platform/efi/efi_64.c
|
|
@@ -194,6 +194,9 @@ static pgd_t *efi_pgd;
|
|
* because we want to avoid inserting EFI region mappings (EFI_VA_END
|
|
* to EFI_VA_START) into the standard kernel page tables. Everything
|
|
* else can be shared, see efi_sync_low_kernel_mappings().
|
|
+ *
|
|
+ * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
|
|
+ * allocation.
|
|
*/
|
|
int __init efi_alloc_page_tables(void)
|
|
{
|
|
@@ -206,7 +209,7 @@ int __init efi_alloc_page_tables(void)
|
|
return 0;
|
|
|
|
gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
|
|
- efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
|
|
+ efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
|
|
if (!efi_pgd)
|
|
return -ENOMEM;
|
|
|
|
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
|
index e785734980ad..eeaaaab54b2a 100644
|
|
--- a/arch/x86/kernel/head_64.S
|
|
+++ b/arch/x86/kernel/head_64.S
|
|
@@ -324,6 +324,27 @@ GLOBAL(early_recursion_flag)
|
|
.balign PAGE_SIZE; \
|
|
GLOBAL(name)
|
|
|
|
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
|
+/*
|
|
+ * Each PGD needs to be 8k long and 8k aligned. We do not
|
|
+ * ever go out to userspace with these, so we do not
|
|
+ * strictly *need* the second page, but this allows us to
|
|
+ * have a single set_pgd() implementation that does not
|
|
+ * need to worry about whether it has 4k or 8k to work
|
|
+ * with.
|
|
+ *
|
|
+ * This ensures PGDs are 8k long:
|
|
+ */
|
|
+#define PTI_USER_PGD_FILL 512
|
|
+/* This ensures they are 8k-aligned: */
|
|
+#define NEXT_PGD_PAGE(name) \
|
|
+ .balign 2 * PAGE_SIZE; \
|
|
+GLOBAL(name)
|
|
+#else
|
|
+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
|
|
+#define PTI_USER_PGD_FILL 0
|
|
+#endif
|
|
+
|
|
/* Automate the creation of 1 to 1 mapping pmd entries */
|
|
#define PMDS(START, PERM, COUNT) \
|
|
i = 0 ; \
|
|
@@ -333,13 +354,14 @@ GLOBAL(name)
|
|
.endr
|
|
|
|
__INITDATA
|
|
-NEXT_PAGE(early_top_pgt)
|
|
+NEXT_PGD_PAGE(early_top_pgt)
|
|
.fill 511,8,0
|
|
#ifdef CONFIG_X86_5LEVEL
|
|
.quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
|
#else
|
|
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
|
#endif
|
|
+ .fill PTI_USER_PGD_FILL,8,0
|
|
|
|
NEXT_PAGE(early_dynamic_pgts)
|
|
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
|
|
@@ -347,13 +369,14 @@ NEXT_PAGE(early_dynamic_pgts)
|
|
.data
|
|
|
|
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
|
-NEXT_PAGE(init_top_pgt)
|
|
+NEXT_PGD_PAGE(init_top_pgt)
|
|
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
|
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
|
|
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
|
.org init_top_pgt + PGD_START_KERNEL*8, 0
|
|
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
|
|
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
|
+ .fill PTI_USER_PGD_FILL,8,0
|
|
|
|
NEXT_PAGE(level3_ident_pgt)
|
|
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
|
@@ -364,8 +387,9 @@ NEXT_PAGE(level2_ident_pgt)
|
|
*/
|
|
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
|
#else
|
|
-NEXT_PAGE(init_top_pgt)
|
|
+NEXT_PGD_PAGE(init_top_pgt)
|
|
.fill 512,8,0
|
|
+ .fill PTI_USER_PGD_FILL,8,0
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_5LEVEL
|
|
--
|
|
2.14.2
|
|
|