add proposed fix for LP#1674838

Patches and rationale by Seth Forshee[1]:

My testing shows that the "POWER9: Additional power9
patches" patches are responsible, two of them in particular:

 - mm: introduce page_vma_mapped_walk()
 - mm, ksm: convert write_protect_page() to use page_vma_mapped_walk()

These patches don't appear to be included for any
functionality they provide, but rather to make "mm/ksm:
handle protnone saved writes when making page write protect"
a clean cherry pick instead of a backport. But the backport
isn't that difficult, so as far as I can tell we can do away
with the other two patches.

1: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1674838/comments/108
This commit is contained in:
Fabian Grünbichler 2017-05-05 09:12:17 +02:00
parent 7f0f6370be
commit 2b834b083d
5 changed files with 543 additions and 0 deletions

View file

@ -229,6 +229,10 @@ ${KERNEL_SRC}/README ${KERNEL_CFG_ORG}: ${KERNEL_SRC_SUBMODULE} | submodules
cd ${KERNEL_SRC}; patch -p1 < ../openvswitch-Set-internal-device-max-mtu-to-ETH_MAX_M.patch
cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-7979-0001-net_sched-nla_memdup_cookie-can-be-static.patch
cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-7979-0002-net-sched-actions-allocate-act-cookie-early.patch
cd ${KERNEL_SRC}; patch -p1 < ../swapops-0001-Revert-mm-ksm-handle-protnone-saved-writes-when-maki.patch
cd ${KERNEL_SRC}; patch -p1 < ../swapops-0002-Revert-mm-ksm-convert-write_protect_page-to-use-page.patch
cd ${KERNEL_SRC}; patch -p1 < ../swapops-0003-Revert-mm-introduce-page_vma_mapped_walk.patch
cd ${KERNEL_SRC}; patch -p1 < ../swapops-0004-mm-ksm-handle-protnone-saved-writes-when-making-page.patch
sed -i ${KERNEL_SRC}/Makefile -e 's/^EXTRAVERSION.*$$/EXTRAVERSION=${EXTRAVERSION}/'
touch $@

View file

@ -0,0 +1,70 @@
From b4cf3c107f8f1d2da2b606e9d08e241be7000d65 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Wed, 3 May 2017 08:34:47 -0500
Subject: [PATCH 1/4] Revert "mm/ksm: handle protnone saved writes when making
page write protect"
This reverts commit c228a1037cd6bd0064472ea282e3730a342d6fca.
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
---
include/asm-generic/pgtable.h | 8 --------
mm/ksm.c | 9 ++-------
2 files changed, 2 insertions(+), 15 deletions(-)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 8c8ba48bef0b..b6f3a8a4b738 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -200,10 +200,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
#define pte_mk_savedwrite pte_mkwrite
#endif
-#ifndef pte_clear_savedwrite
-#define pte_clear_savedwrite pte_wrprotect
-#endif
-
#ifndef pmd_savedwrite
#define pmd_savedwrite pmd_write
#endif
@@ -212,10 +208,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
#define pmd_mk_savedwrite pmd_mkwrite
#endif
-#ifndef pmd_clear_savedwrite
-#define pmd_clear_savedwrite pmd_wrprotect
-#endif
-
#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
diff --git a/mm/ksm.c b/mm/ksm.c
index abc05187168a..9dd2e58fb6dc 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -880,8 +880,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
goto out_unlock;
- if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
- (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte))) {
+ if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte)) {
pte_t entry;
swapped = PageSwapCache(page);
@@ -906,11 +905,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
}
if (pte_dirty(entry))
set_page_dirty(page);
-
- if (pte_protnone(entry))
- entry = pte_mkclean(pte_clear_savedwrite(entry));
- else
- entry = pte_mkclean(pte_wrprotect(entry));
+ entry = pte_mkclean(pte_wrprotect(entry));
set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
}
*orig_pte = *pvmw.pte;
--
2.7.4

View file

@ -0,0 +1,98 @@
From 40e1fa51bc8aa3df1d3a23711b5de62d8251bff5 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Wed, 3 May 2017 08:34:52 -0500
Subject: [PATCH 2/4] Revert "mm, ksm: convert write_protect_page() to use
page_vma_mapped_walk()"
This reverts commit 3000e033152a70fa139765b4dbb5baec46b1cc1b.
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
---
mm/ksm.c | 34 ++++++++++++++++------------------
1 file changed, 16 insertions(+), 18 deletions(-)
diff --git a/mm/ksm.c b/mm/ksm.c
index 9dd2e58fb6dc..fed4afd8293b 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -856,35 +856,33 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
pte_t *orig_pte)
{
struct mm_struct *mm = vma->vm_mm;
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- };
+ unsigned long addr;
+ pte_t *ptep;
+ spinlock_t *ptl;
int swapped;
int err = -EFAULT;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
- pvmw.address = page_address_in_vma(page, vma);
- if (pvmw.address == -EFAULT)
+ addr = page_address_in_vma(page, vma);
+ if (addr == -EFAULT)
goto out;
BUG_ON(PageTransCompound(page));
- mmun_start = pvmw.address;
- mmun_end = pvmw.address + PAGE_SIZE;
+ mmun_start = addr;
+ mmun_end = addr + PAGE_SIZE;
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
- if (!page_vma_mapped_walk(&pvmw))
+ ptep = page_check_address(page, mm, addr, &ptl, 0);
+ if (!ptep)
goto out_mn;
- if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
- goto out_unlock;
- if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte)) {
+ if (pte_write(*ptep) || pte_dirty(*ptep)) {
pte_t entry;
swapped = PageSwapCache(page);
- flush_cache_page(vma, pvmw.address, page_to_pfn(page));
+ flush_cache_page(vma, addr, page_to_pfn(page));
/*
* Ok this is tricky, when get_user_pages_fast() run it doesn't
* take any lock, therefore the check that we are going to make
@@ -894,25 +892,25 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
* this assure us that no O_DIRECT can happen after the check
* or in the middle of the check.
*/
- entry = ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte);
+ entry = ptep_clear_flush_notify(vma, addr, ptep);
/*
* Check that no O_DIRECT or similar I/O is in progress on the
* page
*/
if (page_mapcount(page) + 1 + swapped != page_count(page)) {
- set_pte_at(mm, pvmw.address, pvmw.pte, entry);
+ set_pte_at(mm, addr, ptep, entry);
goto out_unlock;
}
if (pte_dirty(entry))
set_page_dirty(page);
entry = pte_mkclean(pte_wrprotect(entry));
- set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
+ set_pte_at_notify(mm, addr, ptep, entry);
}
- *orig_pte = *pvmw.pte;
+ *orig_pte = *ptep;
err = 0;
out_unlock:
- page_vma_mapped_walk_done(&pvmw);
+ pte_unmap_unlock(ptep, ptl);
out_mn:
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
out:
--
2.7.4

View file

@ -0,0 +1,294 @@
From 47a3e75a6d3db759a5fce3e922f144af0d6f1d38 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Wed, 3 May 2017 08:34:56 -0500
Subject: [PATCH 3/4] Revert "mm: introduce page_vma_mapped_walk()"
This reverts commit 6e2a092a48d38cfc0f06bdb523014acbfeba7b2e.
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
---
include/linux/rmap.h | 26 -------
mm/Makefile | 6 +-
mm/huge_memory.c | 9 +--
mm/page_vma_mapped.c | 188 ---------------------------------------------------
4 files changed, 5 insertions(+), 224 deletions(-)
delete mode 100644 mm/page_vma_mapped.c
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b76343610653..15321fb1df6b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -9,7 +9,6 @@
#include <linux/mm.h>
#include <linux/rwsem.h>
#include <linux/memcontrol.h>
-#include <linux/highmem.h>
/*
* The anon_vma heads a list of private "related" vmas, to scan if
@@ -233,31 +232,6 @@ static inline bool page_check_address_transhuge(struct page *page,
}
#endif
-/* Avoid racy checks */
-#define PVMW_SYNC (1 << 0)
-/* Look for migarion entries rather than present PTEs */
-#define PVMW_MIGRATION (1 << 1)
-
-struct page_vma_mapped_walk {
- struct page *page;
- struct vm_area_struct *vma;
- unsigned long address;
- pmd_t *pmd;
- pte_t *pte;
- spinlock_t *ptl;
- unsigned int flags;
-};
-
-static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
-{
- if (pvmw->pte)
- pte_unmap(pvmw->pte);
- if (pvmw->ptl)
- spin_unlock(pvmw->ptl);
-}
-
-bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
-
/*
* Used by swapoff to help locate where page is expected in vma.
*/
diff --git a/mm/Makefile b/mm/Makefile
index 474fa3431059..14fa1c83b504 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -23,10 +23,8 @@ KCOV_INSTRUMENT_vmstat.o := n
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
- mlock.o mmap.o mprotect.o mremap.o msync.o \
- page_vma_mapped.o pagewalk.o pgtable-generic.o \
- rmap.o vmalloc.o
-
+ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
+ vmalloc.o pagewalk.o pgtable-generic.o
ifdef CONFIG_CROSS_MEMORY_ATTACH
mmu-$(CONFIG_MMU) += process_vm_access.o
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 87303c6bacf4..e6de801fa477 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1878,12 +1878,9 @@ static void freeze_page(struct page *page)
static void unfreeze_page(struct page *page)
{
int i;
- if (PageTransHuge(page)) {
- remove_migration_ptes(page, page, true);
- } else {
- for (i = 0; i < HPAGE_PMD_NR; i++)
- remove_migration_ptes(page + i, page + i, true);
- }
+
+ for (i = 0; i < HPAGE_PMD_NR; i++)
+ remove_migration_ptes(page + i, page + i, true);
}
static void __split_huge_page_tail(struct page *head, int tail,
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
deleted file mode 100644
index dc1a54826cf2..000000000000
--- a/mm/page_vma_mapped.c
+++ /dev/null
@@ -1,188 +0,0 @@
-#include <linux/mm.h>
-#include <linux/rmap.h>
-#include <linux/hugetlb.h>
-#include <linux/swap.h>
-#include <linux/swapops.h>
-
-#include "internal.h"
-
-static inline bool check_pmd(struct page_vma_mapped_walk *pvmw)
-{
- pmd_t pmde;
- /*
- * Make sure we don't re-load pmd between present and !trans_huge check.
- * We need a consistent view.
- */
- pmde = READ_ONCE(*pvmw->pmd);
- return pmd_present(pmde) && !pmd_trans_huge(pmde);
-}
-
-static inline bool not_found(struct page_vma_mapped_walk *pvmw)
-{
- page_vma_mapped_walk_done(pvmw);
- return false;
-}
-
-static bool map_pte(struct page_vma_mapped_walk *pvmw)
-{
- pvmw->pte = pte_offset_map(pvmw->pmd, pvmw->address);
- if (!(pvmw->flags & PVMW_SYNC)) {
- if (pvmw->flags & PVMW_MIGRATION) {
- if (!is_swap_pte(*pvmw->pte))
- return false;
- } else {
- if (!pte_present(*pvmw->pte))
- return false;
- }
- }
- pvmw->ptl = pte_lockptr(pvmw->vma->vm_mm, pvmw->pmd);
- spin_lock(pvmw->ptl);
- return true;
-}
-
-static bool check_pte(struct page_vma_mapped_walk *pvmw)
-{
- if (pvmw->flags & PVMW_MIGRATION) {
-#ifdef CONFIG_MIGRATION
- swp_entry_t entry;
- if (!is_swap_pte(*pvmw->pte))
- return false;
- entry = pte_to_swp_entry(*pvmw->pte);
- if (!is_migration_entry(entry))
- return false;
- if (migration_entry_to_page(entry) - pvmw->page >=
- hpage_nr_pages(pvmw->page)) {
- return false;
- }
- if (migration_entry_to_page(entry) < pvmw->page)
- return false;
-#else
- WARN_ON_ONCE(1);
-#endif
- } else {
- if (!pte_present(*pvmw->pte))
- return false;
-
- /* THP can be referenced by any subpage */
- if (pte_page(*pvmw->pte) - pvmw->page >=
- hpage_nr_pages(pvmw->page)) {
- return false;
- }
- if (pte_page(*pvmw->pte) < pvmw->page)
- return false;
- }
-
- return true;
-}
-
-/**
- * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
- * @pvmw->address
- * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
- * must be set. pmd, pte and ptl must be NULL.
- *
- * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
- * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
- * adjusted if needed (for PTE-mapped THPs).
- *
- * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
- * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
- * a loop to find all PTEs that map the THP.
- *
- * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
- * regardless of which page table level the page is mapped at. @pvmw->pmd is
- * NULL.
- *
- * Retruns false if there are no more page table entries for the page in
- * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
- *
- * If you need to stop the walk before page_vma_mapped_walk() returned false,
- * use page_vma_mapped_walk_done(). It will do the housekeeping.
- */
-bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
-{
- struct mm_struct *mm = pvmw->vma->vm_mm;
- struct page *page = pvmw->page;
- pgd_t *pgd;
- pud_t *pud;
-
- /* The only possible pmd mapping has been handled on last iteration */
- if (pvmw->pmd && !pvmw->pte)
- return not_found(pvmw);
-
- /* Only for THP, seek to next pte entry makes sense */
- if (pvmw->pte) {
- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
- return not_found(pvmw);
- goto next_pte;
- }
-
- if (unlikely(PageHuge(pvmw->page))) {
- /* when pud is not present, pte will be NULL */
- pvmw->pte = huge_pte_offset(mm, pvmw->address);
- if (!pvmw->pte)
- return false;
-
- pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte);
- spin_lock(pvmw->ptl);
- if (!check_pte(pvmw))
- return not_found(pvmw);
- return true;
- }
-restart:
- pgd = pgd_offset(mm, pvmw->address);
- if (!pgd_present(*pgd))
- return false;
- pud = pud_offset(pgd, pvmw->address);
- if (!pud_present(*pud))
- return false;
- pvmw->pmd = pmd_offset(pud, pvmw->address);
- if (pmd_trans_huge(*pvmw->pmd)) {
- pvmw->ptl = pmd_lock(mm, pvmw->pmd);
- if (!pmd_present(*pvmw->pmd))
- return not_found(pvmw);
- if (likely(pmd_trans_huge(*pvmw->pmd))) {
- if (pvmw->flags & PVMW_MIGRATION)
- return not_found(pvmw);
- if (pmd_page(*pvmw->pmd) != page)
- return not_found(pvmw);
- return true;
- } else {
- /* THP pmd was split under us: handle on pte level */
- spin_unlock(pvmw->ptl);
- pvmw->ptl = NULL;
- }
- } else {
- if (!check_pmd(pvmw))
- return false;
- }
- if (!map_pte(pvmw))
- goto next_pte;
- while (1) {
- if (check_pte(pvmw))
- return true;
-next_pte: do {
- pvmw->address += PAGE_SIZE;
- if (pvmw->address >=
- __vma_address(pvmw->page, pvmw->vma) +
- hpage_nr_pages(pvmw->page) * PAGE_SIZE)
- return not_found(pvmw);
- /* Did we cross page table boundary? */
- if (pvmw->address % PMD_SIZE == 0) {
- pte_unmap(pvmw->pte);
- if (pvmw->ptl) {
- spin_unlock(pvmw->ptl);
- pvmw->ptl = NULL;
- }
- goto restart;
- } else {
- pvmw->pte++;
- }
- } while (pte_none(*pvmw->pte));
-
- if (!pvmw->ptl) {
- pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
- spin_lock(pvmw->ptl);
- }
- }
-}
--
2.7.4

View file

@ -0,0 +1,77 @@
From 361de9fb44163c4e693022786af380a2b2298c6d Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Fri, 24 Feb 2017 14:59:19 -0800
Subject: [PATCH 4/4] mm/ksm: handle protnone saved writes when making page
write protect
Without this KSM will consider the page write protected, but a numa
fault can later mark the page writable. This can result in memory
corruption.
Link: http://lkml.kernel.org/r/1487498625-10891-3-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(backported from commit 595cd8f256d24face93b2722927ec9c980419c26)
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
---
include/asm-generic/pgtable.h | 8 ++++++++
mm/ksm.c | 9 +++++++--
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b6f3a8a4b738..8c8ba48bef0b 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -200,6 +200,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
#define pte_mk_savedwrite pte_mkwrite
#endif
+#ifndef pte_clear_savedwrite
+#define pte_clear_savedwrite pte_wrprotect
+#endif
+
#ifndef pmd_savedwrite
#define pmd_savedwrite pmd_write
#endif
@@ -208,6 +212,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
#define pmd_mk_savedwrite pmd_mkwrite
#endif
+#ifndef pmd_clear_savedwrite
+#define pmd_clear_savedwrite pmd_wrprotect
+#endif
+
#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
diff --git a/mm/ksm.c b/mm/ksm.c
index fed4afd8293b..099dfa45d596 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -878,7 +878,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
if (!ptep)
goto out_mn;
- if (pte_write(*ptep) || pte_dirty(*ptep)) {
+ if (pte_write(*ptep) || pte_dirty(*ptep) ||
+ (pte_protnone(*ptep) && pte_savedwrite(*ptep))) {
pte_t entry;
swapped = PageSwapCache(page);
@@ -903,7 +904,11 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
}
if (pte_dirty(entry))
set_page_dirty(page);
- entry = pte_mkclean(pte_wrprotect(entry));
+
+ if (pte_protnone(entry))
+ entry = pte_mkclean(pte_clear_savedwrite(entry));
+ else
+ entry = pte_mkclean(pte_wrprotect(entry));
set_pte_at_notify(mm, addr, ptep, entry);
}
*orig_pte = *ptep;
--
2.7.4