2018-01-06 14:13:39 +00:00
|
|
|
From 7c5d42f31bf68647dd00ac2fef9057d113e8072d Mon Sep 17 00:00:00 2001
|
|
|
|
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
|
|
|
Date: Sat, 9 Sep 2017 00:56:03 +0300
|
2018-01-06 14:14:30 +00:00
|
|
|
Subject: [PATCH 050/232] mm, x86/mm: Fix performance regression in
|
2018-01-06 14:13:39 +00:00
|
|
|
get_user_pages_fast()
|
|
|
|
MIME-Version: 1.0
|
|
|
|
Content-Type: text/plain; charset=UTF-8
|
|
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
|
|
|
|
CVE-2017-5754
|
|
|
|
|
|
|
|
The 0-day test bot found a performance regression that was tracked down to
|
|
|
|
switching x86 to the generic get_user_pages_fast() implementation:
|
|
|
|
|
|
|
|
http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop
|
|
|
|
|
|
|
|
The regression was caused by the fact that we now use local_irq_save() +
|
|
|
|
local_irq_restore() in get_user_pages_fast() to disable interrupts.
|
|
|
|
In x86 implementation local_irq_disable() + local_irq_enable() was used.
|
|
|
|
|
|
|
|
The fix is to make get_user_pages_fast() use local_irq_disable(),
|
|
|
|
leaving local_irq_save() for __get_user_pages_fast() that can be called
|
|
|
|
with interrupts disabled.
|
|
|
|
|
|
|
|
Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:
|
|
|
|
|
|
|
|
Before: Average: 14.91 ms, stddev: 0.45 ms
|
|
|
|
After: Average: 10.76 ms, stddev: 0.18 ms
|
|
|
|
|
|
|
|
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
|
|
|
Cc: Andrew Morton <akpm@linux-foundation.org>
|
|
|
|
Cc: Huang Ying <ying.huang@intel.com>
|
|
|
|
Cc: Jonathan Corbet <corbet@lwn.net>
|
|
|
|
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
|
|
|
Cc: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
Cc: Thomas Gleixner <tglx@linutronix.de>
|
|
|
|
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
|
|
|
|
Cc: linux-mm@kvack.org
|
|
|
|
Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
|
|
|
|
Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com
|
|
|
|
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
|
|
|
(cherry picked from commit 5b65c4677a57a1d4414212f9995aa0e46a21ff80)
|
|
|
|
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
|
|
|
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
|
|
|
(cherry picked from commit 5241f4b2c68284612e34910305f3234e4a64701b)
|
|
|
|
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
|
|
---
|
|
|
|
mm/gup.c | 97 ++++++++++++++++++++++++++++++++++++++--------------------------
|
|
|
|
1 file changed, 58 insertions(+), 39 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/mm/gup.c b/mm/gup.c
|
|
|
|
index 23f01c40c88f..4a789f1c6a27 100644
|
|
|
|
--- a/mm/gup.c
|
|
|
|
+++ b/mm/gup.c
|
|
|
|
@@ -1618,6 +1618,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
+static void gup_pgd_range(unsigned long addr, unsigned long end,
|
|
|
|
+ int write, struct page **pages, int *nr)
|
|
|
|
+{
|
|
|
|
+ unsigned long next;
|
|
|
|
+ pgd_t *pgdp;
|
|
|
|
+
|
|
|
|
+ pgdp = pgd_offset(current->mm, addr);
|
|
|
|
+ do {
|
|
|
|
+ pgd_t pgd = READ_ONCE(*pgdp);
|
|
|
|
+
|
|
|
|
+ next = pgd_addr_end(addr, end);
|
|
|
|
+ if (pgd_none(pgd))
|
|
|
|
+ return;
|
|
|
|
+ if (unlikely(pgd_huge(pgd))) {
|
|
|
|
+ if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
|
|
|
|
+ pages, nr))
|
|
|
|
+ return;
|
|
|
|
+ } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
|
|
|
+ if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
|
|
|
|
+ PGDIR_SHIFT, next, write, pages, nr))
|
|
|
|
+ return;
|
|
|
|
+ } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
|
|
|
|
+ return;
|
|
|
|
+ } while (pgdp++, addr = next, addr != end);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#ifndef gup_fast_permitted
|
|
|
|
+/*
|
|
|
|
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
|
|
|
|
+ * we need to fall back to the slow version:
|
|
|
|
+ */
|
|
|
|
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
|
|
|
+{
|
|
|
|
+ unsigned long len, end;
|
|
|
|
+
|
|
|
|
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
|
|
|
|
+ end = start + len;
|
|
|
|
+ return end >= start;
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
|
|
|
|
* the regular GUP. It will only return non-negative values.
|
|
|
|
@@ -1625,10 +1666,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
|
|
|
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
|
|
|
struct page **pages)
|
|
|
|
{
|
|
|
|
- struct mm_struct *mm = current->mm;
|
|
|
|
unsigned long addr, len, end;
|
|
|
|
- unsigned long next, flags;
|
|
|
|
- pgd_t *pgdp;
|
|
|
|
+ unsigned long flags;
|
|
|
|
int nr = 0;
|
|
|
|
|
|
|
|
start &= PAGE_MASK;
|
|
|
|
@@ -1652,45 +1691,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
|
|
|
* block IPIs that come from THPs splitting.
|
|
|
|
*/
|
|
|
|
|
|
|
|
- local_irq_save(flags);
|
|
|
|
- pgdp = pgd_offset(mm, addr);
|
|
|
|
- do {
|
|
|
|
- pgd_t pgd = READ_ONCE(*pgdp);
|
|
|
|
-
|
|
|
|
- next = pgd_addr_end(addr, end);
|
|
|
|
- if (pgd_none(pgd))
|
|
|
|
- break;
|
|
|
|
- if (unlikely(pgd_huge(pgd))) {
|
|
|
|
- if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
|
|
|
|
- pages, &nr))
|
|
|
|
- break;
|
|
|
|
- } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
|
|
|
- if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
|
|
|
|
- PGDIR_SHIFT, next, write, pages, &nr))
|
|
|
|
- break;
|
|
|
|
- } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
|
|
|
|
- break;
|
|
|
|
- } while (pgdp++, addr = next, addr != end);
|
|
|
|
- local_irq_restore(flags);
|
|
|
|
+ if (gup_fast_permitted(start, nr_pages, write)) {
|
|
|
|
+ local_irq_save(flags);
|
|
|
|
+ gup_pgd_range(addr, end, write, pages, &nr);
|
|
|
|
+ local_irq_restore(flags);
|
|
|
|
+ }
|
|
|
|
|
|
|
|
return nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
-#ifndef gup_fast_permitted
|
|
|
|
-/*
|
|
|
|
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
|
|
|
|
- * we need to fall back to the slow version:
|
|
|
|
- */
|
|
|
|
-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
|
|
|
-{
|
|
|
|
- unsigned long len, end;
|
|
|
|
-
|
|
|
|
- len = (unsigned long) nr_pages << PAGE_SHIFT;
|
|
|
|
- end = start + len;
|
|
|
|
- return end >= start;
|
|
|
|
-}
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
/**
|
|
|
|
* get_user_pages_fast() - pin user pages in memory
|
|
|
|
* @start: starting user address
|
|
|
|
@@ -1710,12 +1719,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
|
|
|
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
|
|
|
struct page **pages)
|
|
|
|
{
|
|
|
|
+ unsigned long addr, len, end;
|
|
|
|
int nr = 0, ret = 0;
|
|
|
|
|
|
|
|
start &= PAGE_MASK;
|
|
|
|
+ addr = start;
|
|
|
|
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
|
|
|
|
+ end = start + len;
|
|
|
|
+
|
|
|
|
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
|
|
|
|
+ (void __user *)start, len)))
|
|
|
|
+ return 0;
|
|
|
|
|
|
|
|
if (gup_fast_permitted(start, nr_pages, write)) {
|
|
|
|
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
|
|
|
|
+ local_irq_disable();
|
|
|
|
+ gup_pgd_range(addr, end, write, pages, &nr);
|
|
|
|
+ local_irq_enable();
|
|
|
|
ret = nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
--
|
|
|
|
2.14.2
|
|
|
|
|