825 lines
29 KiB
Diff
825 lines
29 KiB
Diff
From dfd4ec1fd8d1d09930e9cf9ed7ebd07a66813337 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
|
|
Date: Wed, 29 Nov 2017 09:45:44 +0100
|
|
Subject: [PATCH 7/7] Revert "Merge branch 'mmu_notifier_fixes'"
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
This reverts commit ea25c43179462e342d4a0e66c3f6a5f53514da05, reversing
|
|
changes made to c227390c91a355300f47f9bef0aefbdfaaca1500.
|
|
|
|
This series causes blue screens in Windows VMs running under heavy
|
|
memory/swap pressure.
|
|
|
|
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
---
|
|
arch/arm/include/asm/kvm_host.h | 6 +++++
|
|
arch/arm64/include/asm/kvm_host.h | 6 +++++
|
|
arch/mips/include/asm/kvm_host.h | 5 ++++
|
|
arch/powerpc/include/asm/kvm_host.h | 5 ++++
|
|
arch/x86/include/asm/kvm_host.h | 2 ++
|
|
include/linux/mm.h | 1 -
|
|
include/linux/mmu_notifier.h | 25 +++++++++++++++++++
|
|
arch/powerpc/platforms/powernv/npu-dma.c | 10 ++++++++
|
|
arch/x86/kvm/x86.c | 11 +++++++++
|
|
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 31 +++++++++++++++++++++++
|
|
drivers/infiniband/core/umem_odp.c | 19 +++++++++++++++
|
|
drivers/infiniband/hw/hfi1/mmu_rb.c | 9 +++++++
|
|
drivers/iommu/amd_iommu_v2.c | 8 ++++++
|
|
drivers/iommu/intel-svm.c | 9 +++++++
|
|
drivers/misc/mic/scif/scif_dma.c | 11 +++++++++
|
|
drivers/misc/sgi-gru/grutlbpurge.c | 12 +++++++++
|
|
drivers/xen/gntdev.c | 8 ++++++
|
|
fs/dax.c | 19 ++++++---------
|
|
mm/memory.c | 26 ++++----------------
|
|
mm/mmu_notifier.c | 14 +++++++++++
|
|
mm/rmap.c | 35 +++-----------------------
|
|
virt/kvm/kvm_main.c | 42 ++++++++++++++++++++++++++++++++
|
|
22 files changed, 249 insertions(+), 65 deletions(-)
|
|
|
|
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
|
|
index 4a879f6ff13b..127e2dd2e21c 100644
|
|
--- a/arch/arm/include/asm/kvm_host.h
|
|
+++ b/arch/arm/include/asm/kvm_host.h
|
|
@@ -225,6 +225,12 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
|
|
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
|
|
|
+/* We do not have shadow page tables, hence the empty hooks */
|
|
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
|
|
+ unsigned long address)
|
|
+{
|
|
+}
|
|
+
|
|
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
|
|
struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
|
|
void kvm_arm_halt_guest(struct kvm *kvm);
|
|
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
|
|
index e923b58606e2..d68630007b14 100644
|
|
--- a/arch/arm64/include/asm/kvm_host.h
|
|
+++ b/arch/arm64/include/asm/kvm_host.h
|
|
@@ -326,6 +326,12 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
|
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
|
|
|
+/* We do not have shadow page tables, hence the empty hooks */
|
|
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
|
|
+ unsigned long address)
|
|
+{
|
|
+}
|
|
+
|
|
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
|
|
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
|
|
void kvm_arm_halt_guest(struct kvm *kvm);
|
|
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
|
|
index a9af1d2dcd69..2998479fd4e8 100644
|
|
--- a/arch/mips/include/asm/kvm_host.h
|
|
+++ b/arch/mips/include/asm/kvm_host.h
|
|
@@ -938,6 +938,11 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
|
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
|
|
|
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
|
|
+ unsigned long address)
|
|
+{
|
|
+}
|
|
+
|
|
/* Emulation */
|
|
int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
|
|
enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
|
|
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
|
|
index e372ed871c51..8b3f1238d07f 100644
|
|
--- a/arch/powerpc/include/asm/kvm_host.h
|
|
+++ b/arch/powerpc/include/asm/kvm_host.h
|
|
@@ -67,6 +67,11 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
|
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
|
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
|
|
|
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
|
|
+ unsigned long address)
|
|
+{
|
|
+}
|
|
+
|
|
#define HPTEG_CACHE_NUM (1 << 15)
|
|
#define HPTEG_HASH_BITS_PTE 13
|
|
#define HPTEG_HASH_BITS_PTE_LONG 12
|
|
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
|
|
index 92c9032502d8..f4d120a3e22e 100644
|
|
--- a/arch/x86/include/asm/kvm_host.h
|
|
+++ b/arch/x86/include/asm/kvm_host.h
|
|
@@ -1375,6 +1375,8 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
|
|
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
|
|
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
|
|
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
|
|
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
|
|
+ unsigned long address);
|
|
|
|
void kvm_define_shared_msr(unsigned index, u32 msr);
|
|
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
|
|
diff --git a/include/linux/mm.h b/include/linux/mm.h
|
|
index 07630442bbf2..701de4b55ece 100644
|
|
--- a/include/linux/mm.h
|
|
+++ b/include/linux/mm.h
|
|
@@ -1260,7 +1260,6 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|
void unmap_mapping_range(struct address_space *mapping,
|
|
loff_t const holebegin, loff_t const holelen, int even_cows);
|
|
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
|
- unsigned long *start, unsigned long *end,
|
|
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
|
|
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
|
|
unsigned long *pfn);
|
|
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
|
|
index 6866e8126982..947f21b451d2 100644
|
|
--- a/include/linux/mmu_notifier.h
|
|
+++ b/include/linux/mmu_notifier.h
|
|
@@ -94,6 +94,17 @@ struct mmu_notifier_ops {
|
|
unsigned long address,
|
|
pte_t pte);
|
|
|
|
+ /*
|
|
+ * Before this is invoked any secondary MMU is still ok to
|
|
+ * read/write to the page previously pointed to by the Linux
|
|
+ * pte because the page hasn't been freed yet and it won't be
|
|
+ * freed until this returns. If required set_page_dirty has to
|
|
+ * be called internally to this method.
|
|
+ */
|
|
+ void (*invalidate_page)(struct mmu_notifier *mn,
|
|
+ struct mm_struct *mm,
|
|
+ unsigned long address);
|
|
+
|
|
/*
|
|
* invalidate_range_start() and invalidate_range_end() must be
|
|
* paired and are called only when the mmap_sem and/or the
|
|
@@ -209,6 +220,8 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm,
|
|
unsigned long address);
|
|
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
|
|
unsigned long address, pte_t pte);
|
|
+extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
|
|
+ unsigned long address);
|
|
extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
|
|
unsigned long start, unsigned long end);
|
|
extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
|
|
@@ -255,6 +268,13 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
|
|
__mmu_notifier_change_pte(mm, address, pte);
|
|
}
|
|
|
|
+static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ if (mm_has_notifiers(mm))
|
|
+ __mmu_notifier_invalidate_page(mm, address);
|
|
+}
|
|
+
|
|
static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
@@ -427,6 +447,11 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
|
|
{
|
|
}
|
|
|
|
+static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+}
|
|
+
|
|
static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
|
|
index 2cb6cbea4b3b..3d4f879e687c 100644
|
|
--- a/arch/powerpc/platforms/powernv/npu-dma.c
|
|
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
|
|
@@ -614,6 +614,15 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
|
|
mmio_invalidate(npu_context, 1, address, true);
|
|
}
|
|
|
|
+static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
|
|
+ struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ struct npu_context *npu_context = mn_to_npu_context(mn);
|
|
+
|
|
+ mmio_invalidate(npu_context, 1, address, true);
|
|
+}
|
|
+
|
|
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
|
|
struct mm_struct *mm,
|
|
unsigned long start, unsigned long end)
|
|
@@ -631,6 +640,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
|
|
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
|
|
.release = pnv_npu2_mn_release,
|
|
.change_pte = pnv_npu2_mn_change_pte,
|
|
+ .invalidate_page = pnv_npu2_mn_invalidate_page,
|
|
.invalidate_range = pnv_npu2_mn_invalidate_range,
|
|
};
|
|
|
|
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
|
|
index 7351cdc46cc7..a669b4dd51e7 100644
|
|
--- a/arch/x86/kvm/x86.c
|
|
+++ b/arch/x86/kvm/x86.c
|
|
@@ -6734,6 +6734,17 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
|
|
|
|
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ /*
|
|
+ * The physical address of apic access page is stored in the VMCS.
|
|
+ * Update it when it becomes invalid.
|
|
+ */
|
|
+ if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
|
|
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
|
|
+}
|
|
+
|
|
/*
|
|
* Returns 1 to let vcpu_run() continue the guest execution loop without
|
|
* exiting to the userspace. Otherwise, the value will be returned to the
|
|
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
|
|
index e1cde6b80027..6558a3ed57a7 100644
|
|
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
|
|
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
|
|
@@ -146,6 +146,36 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
|
|
}
|
|
}
|
|
|
|
+/**
|
|
+ * amdgpu_mn_invalidate_page - callback to notify about mm change
|
|
+ *
|
|
+ * @mn: our notifier
|
|
+ * @mn: the mm this callback is about
|
|
+ * @address: address of invalidate page
|
|
+ *
|
|
+ * Invalidation of a single page. Blocks for all BOs mapping it
|
|
+ * and unmap them by move them into system domain again.
|
|
+ */
|
|
+static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn,
|
|
+ struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
|
|
+ struct interval_tree_node *it;
|
|
+
|
|
+ mutex_lock(&rmn->lock);
|
|
+
|
|
+ it = interval_tree_iter_first(&rmn->objects, address, address);
|
|
+ if (it) {
|
|
+ struct amdgpu_mn_node *node;
|
|
+
|
|
+ node = container_of(it, struct amdgpu_mn_node, it);
|
|
+ amdgpu_mn_invalidate_node(node, address, address);
|
|
+ }
|
|
+
|
|
+ mutex_unlock(&rmn->lock);
|
|
+}
|
|
+
|
|
/**
|
|
* amdgpu_mn_invalidate_range_start - callback to notify about mm change
|
|
*
|
|
@@ -185,6 +215,7 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
|
|
|
|
static const struct mmu_notifier_ops amdgpu_mn_ops = {
|
|
.release = amdgpu_mn_release,
|
|
+ .invalidate_page = amdgpu_mn_invalidate_page,
|
|
.invalidate_range_start = amdgpu_mn_invalidate_range_start,
|
|
};
|
|
|
|
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
|
|
index 55e8f5ed8b3c..8c4ec564e495 100644
|
|
--- a/drivers/infiniband/core/umem_odp.c
|
|
+++ b/drivers/infiniband/core/umem_odp.c
|
|
@@ -166,6 +166,24 @@ static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
|
|
return 0;
|
|
}
|
|
|
|
+static void ib_umem_notifier_invalidate_page(struct mmu_notifier *mn,
|
|
+ struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
|
|
+
|
|
+ if (!context->invalidate_range)
|
|
+ return;
|
|
+
|
|
+ ib_ucontext_notifier_start_account(context);
|
|
+ down_read(&context->umem_rwsem);
|
|
+ rbt_ib_umem_for_each_in_range(&context->umem_tree, address,
|
|
+ address + PAGE_SIZE,
|
|
+ invalidate_page_trampoline, NULL);
|
|
+ up_read(&context->umem_rwsem);
|
|
+ ib_ucontext_notifier_end_account(context);
|
|
+}
|
|
+
|
|
static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
|
|
u64 end, void *cookie)
|
|
{
|
|
@@ -219,6 +237,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
|
|
|
|
static const struct mmu_notifier_ops ib_umem_notifiers = {
|
|
.release = ib_umem_notifier_release,
|
|
+ .invalidate_page = ib_umem_notifier_invalidate_page,
|
|
.invalidate_range_start = ib_umem_notifier_invalidate_range_start,
|
|
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
|
|
};
|
|
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
|
|
index e4b56a0dd6d0..ccbf52c8ff6f 100644
|
|
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
|
|
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
|
|
@@ -67,6 +67,8 @@ struct mmu_rb_handler {
|
|
|
|
static unsigned long mmu_node_start(struct mmu_rb_node *);
|
|
static unsigned long mmu_node_last(struct mmu_rb_node *);
|
|
+static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
|
|
+ unsigned long);
|
|
static inline void mmu_notifier_range_start(struct mmu_notifier *,
|
|
struct mm_struct *,
|
|
unsigned long, unsigned long);
|
|
@@ -80,6 +82,7 @@ static void do_remove(struct mmu_rb_handler *handler,
|
|
static void handle_remove(struct work_struct *work);
|
|
|
|
static const struct mmu_notifier_ops mn_opts = {
|
|
+ .invalidate_page = mmu_notifier_page,
|
|
.invalidate_range_start = mmu_notifier_range_start,
|
|
};
|
|
|
|
@@ -282,6 +285,12 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
|
|
handler->ops->remove(handler->ops_arg, node);
|
|
}
|
|
|
|
+static inline void mmu_notifier_page(struct mmu_notifier *mn,
|
|
+ struct mm_struct *mm, unsigned long addr)
|
|
+{
|
|
+ mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE);
|
|
+}
|
|
+
|
|
static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
|
|
struct mm_struct *mm,
|
|
unsigned long start,
|
|
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
|
|
index dccf5b76eff2..6629c472eafd 100644
|
|
--- a/drivers/iommu/amd_iommu_v2.c
|
|
+++ b/drivers/iommu/amd_iommu_v2.c
|
|
@@ -391,6 +391,13 @@ static int mn_clear_flush_young(struct mmu_notifier *mn,
|
|
return 0;
|
|
}
|
|
|
|
+static void mn_invalidate_page(struct mmu_notifier *mn,
|
|
+ struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ __mn_flush_page(mn, address);
|
|
+}
|
|
+
|
|
static void mn_invalidate_range(struct mmu_notifier *mn,
|
|
struct mm_struct *mm,
|
|
unsigned long start, unsigned long end)
|
|
@@ -429,6 +436,7 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
|
|
static const struct mmu_notifier_ops iommu_mn = {
|
|
.release = mn_release,
|
|
.clear_flush_young = mn_clear_flush_young,
|
|
+ .invalidate_page = mn_invalidate_page,
|
|
.invalidate_range = mn_invalidate_range,
|
|
};
|
|
|
|
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
|
|
index f620dccec8ee..f167c0d84ebf 100644
|
|
--- a/drivers/iommu/intel-svm.c
|
|
+++ b/drivers/iommu/intel-svm.c
|
|
@@ -223,6 +223,14 @@ static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
|
|
intel_flush_svm_range(svm, address, 1, 1, 0);
|
|
}
|
|
|
|
+static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
|
|
+
|
|
+ intel_flush_svm_range(svm, address, 1, 1, 0);
|
|
+}
|
|
+
|
|
/* Pages have been freed at this point */
|
|
static void intel_invalidate_range(struct mmu_notifier *mn,
|
|
struct mm_struct *mm,
|
|
@@ -277,6 +285,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
|
|
static const struct mmu_notifier_ops intel_mmuops = {
|
|
.release = intel_mm_release,
|
|
.change_pte = intel_change_pte,
|
|
+ .invalidate_page = intel_invalidate_page,
|
|
.invalidate_range = intel_invalidate_range,
|
|
};
|
|
|
|
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
|
|
index 63d6246d6dff..64d5760d069a 100644
|
|
--- a/drivers/misc/mic/scif/scif_dma.c
|
|
+++ b/drivers/misc/mic/scif/scif_dma.c
|
|
@@ -200,6 +200,16 @@ static void scif_mmu_notifier_release(struct mmu_notifier *mn,
|
|
schedule_work(&scif_info.misc_work);
|
|
}
|
|
|
|
+static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
|
|
+ struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ struct scif_mmu_notif *mmn;
|
|
+
|
|
+ mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
|
|
+ scif_rma_destroy_tcw(mmn, address, PAGE_SIZE);
|
|
+}
|
|
+
|
|
static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
|
struct mm_struct *mm,
|
|
unsigned long start,
|
|
@@ -225,6 +235,7 @@ static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
|
|
static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
|
|
.release = scif_mmu_notifier_release,
|
|
.clear_flush_young = NULL,
|
|
+ .invalidate_page = scif_mmu_notifier_invalidate_page,
|
|
.invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
|
|
.invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
|
|
|
|
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
|
|
index 9918eda0e05f..e936d43895d2 100644
|
|
--- a/drivers/misc/sgi-gru/grutlbpurge.c
|
|
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
|
|
@@ -247,6 +247,17 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn,
|
|
gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
|
|
}
|
|
|
|
+static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
|
|
+ ms_notifier);
|
|
+
|
|
+ STAT(mmu_invalidate_page);
|
|
+ gru_flush_tlb_range(gms, address, PAGE_SIZE);
|
|
+ gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
|
|
+}
|
|
+
|
|
static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
|
|
{
|
|
struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
|
|
@@ -258,6 +269,7 @@ static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
|
|
|
|
|
|
static const struct mmu_notifier_ops gru_mmuops = {
|
|
+ .invalidate_page = gru_invalidate_page,
|
|
.invalidate_range_start = gru_invalidate_range_start,
|
|
.invalidate_range_end = gru_invalidate_range_end,
|
|
.release = gru_release,
|
|
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
|
|
index 82360594fa8e..f3bf8f4e2d6c 100644
|
|
--- a/drivers/xen/gntdev.c
|
|
+++ b/drivers/xen/gntdev.c
|
|
@@ -484,6 +484,13 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
|
|
mutex_unlock(&priv->lock);
|
|
}
|
|
|
|
+static void mn_invl_page(struct mmu_notifier *mn,
|
|
+ struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ mn_invl_range_start(mn, mm, address, address + PAGE_SIZE);
|
|
+}
|
|
+
|
|
static void mn_release(struct mmu_notifier *mn,
|
|
struct mm_struct *mm)
|
|
{
|
|
@@ -515,6 +522,7 @@ static void mn_release(struct mmu_notifier *mn,
|
|
|
|
static const struct mmu_notifier_ops gntdev_mmu_ops = {
|
|
.release = mn_release,
|
|
+ .invalidate_page = mn_invl_page,
|
|
.invalidate_range_start = mn_invl_range_start,
|
|
};
|
|
|
|
diff --git a/fs/dax.c b/fs/dax.c
|
|
index fa8e358c3c6b..57da1d0a6a40 100644
|
|
--- a/fs/dax.c
|
|
+++ b/fs/dax.c
|
|
@@ -591,10 +591,11 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
|
|
pte_t pte, *ptep = NULL;
|
|
pmd_t *pmdp = NULL;
|
|
spinlock_t *ptl;
|
|
+ bool changed;
|
|
|
|
i_mmap_lock_read(mapping);
|
|
vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
|
|
- unsigned long address, start, end;
|
|
+ unsigned long address;
|
|
|
|
cond_resched();
|
|
|
|
@@ -602,13 +603,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
|
|
continue;
|
|
|
|
address = pgoff_address(index, vma);
|
|
-
|
|
- /*
|
|
- * Note because we provide start/end to follow_pte_pmd it will
|
|
- * call mmu_notifier_invalidate_range_start() on our behalf
|
|
- * before taking any lock.
|
|
- */
|
|
- if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl))
|
|
+ changed = false;
|
|
+ if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
|
|
continue;
|
|
|
|
if (pmdp) {
|
|
@@ -625,7 +621,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
|
|
pmd = pmd_wrprotect(pmd);
|
|
pmd = pmd_mkclean(pmd);
|
|
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
|
|
- mmu_notifier_invalidate_range(vma->vm_mm, start, end);
|
|
+ changed = true;
|
|
unlock_pmd:
|
|
spin_unlock(ptl);
|
|
#endif
|
|
@@ -640,12 +636,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
|
|
pte = pte_wrprotect(pte);
|
|
pte = pte_mkclean(pte);
|
|
set_pte_at(vma->vm_mm, address, ptep, pte);
|
|
- mmu_notifier_invalidate_range(vma->vm_mm, start, end);
|
|
+ changed = true;
|
|
unlock_pte:
|
|
pte_unmap_unlock(ptep, ptl);
|
|
}
|
|
|
|
- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
|
|
+ if (changed)
|
|
+ mmu_notifier_invalidate_page(vma->vm_mm, address);
|
|
}
|
|
i_mmap_unlock_read(mapping);
|
|
}
|
|
diff --git a/mm/memory.c b/mm/memory.c
|
|
index 969c5bf31997..7834310a6b64 100644
|
|
--- a/mm/memory.c
|
|
+++ b/mm/memory.c
|
|
@@ -4044,8 +4044,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
|
|
#endif /* __PAGETABLE_PMD_FOLDED */
|
|
|
|
static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
|
- unsigned long *start, unsigned long *end,
|
|
- pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
|
|
+ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
|
|
{
|
|
pgd_t *pgd;
|
|
p4d_t *p4d;
|
|
@@ -4072,29 +4071,17 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
|
if (!pmdpp)
|
|
goto out;
|
|
|
|
- if (start && end) {
|
|
- *start = address & PMD_MASK;
|
|
- *end = *start + PMD_SIZE;
|
|
- mmu_notifier_invalidate_range_start(mm, *start, *end);
|
|
- }
|
|
*ptlp = pmd_lock(mm, pmd);
|
|
if (pmd_huge(*pmd)) {
|
|
*pmdpp = pmd;
|
|
return 0;
|
|
}
|
|
spin_unlock(*ptlp);
|
|
- if (start && end)
|
|
- mmu_notifier_invalidate_range_end(mm, *start, *end);
|
|
}
|
|
|
|
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
|
|
goto out;
|
|
|
|
- if (start && end) {
|
|
- *start = address & PAGE_MASK;
|
|
- *end = *start + PAGE_SIZE;
|
|
- mmu_notifier_invalidate_range_start(mm, *start, *end);
|
|
- }
|
|
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
|
|
if (!pte_present(*ptep))
|
|
goto unlock;
|
|
@@ -4102,8 +4089,6 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
|
return 0;
|
|
unlock:
|
|
pte_unmap_unlock(ptep, *ptlp);
|
|
- if (start && end)
|
|
- mmu_notifier_invalidate_range_end(mm, *start, *end);
|
|
out:
|
|
return -EINVAL;
|
|
}
|
|
@@ -4115,21 +4100,20 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address,
|
|
|
|
/* (void) is needed to make gcc happy */
|
|
(void) __cond_lock(*ptlp,
|
|
- !(res = __follow_pte_pmd(mm, address, NULL, NULL,
|
|
- ptepp, NULL, ptlp)));
|
|
+ !(res = __follow_pte_pmd(mm, address, ptepp, NULL,
|
|
+ ptlp)));
|
|
return res;
|
|
}
|
|
|
|
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
|
- unsigned long *start, unsigned long *end,
|
|
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
|
|
{
|
|
int res;
|
|
|
|
/* (void) is needed to make gcc happy */
|
|
(void) __cond_lock(*ptlp,
|
|
- !(res = __follow_pte_pmd(mm, address, start, end,
|
|
- ptepp, pmdpp, ptlp)));
|
|
+ !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp,
|
|
+ ptlp)));
|
|
return res;
|
|
}
|
|
EXPORT_SYMBOL(follow_pte_pmd);
|
|
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
|
|
index 314285284e6e..54ca54562928 100644
|
|
--- a/mm/mmu_notifier.c
|
|
+++ b/mm/mmu_notifier.c
|
|
@@ -174,6 +174,20 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
|
|
srcu_read_unlock(&srcu, id);
|
|
}
|
|
|
|
+void __mmu_notifier_invalidate_page(struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ struct mmu_notifier *mn;
|
|
+ int id;
|
|
+
|
|
+ id = srcu_read_lock(&srcu);
|
|
+ hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
|
|
+ if (mn->ops->invalidate_page)
|
|
+ mn->ops->invalidate_page(mn, mm, address);
|
|
+ }
|
|
+ srcu_read_unlock(&srcu, id);
|
|
+}
|
|
+
|
|
void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
diff --git a/mm/rmap.c b/mm/rmap.c
|
|
index c570f82e6827..c8993c63eb25 100644
|
|
--- a/mm/rmap.c
|
|
+++ b/mm/rmap.c
|
|
@@ -887,21 +887,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
|
|
.address = address,
|
|
.flags = PVMW_SYNC,
|
|
};
|
|
- unsigned long start = address, end;
|
|
int *cleaned = arg;
|
|
|
|
- /*
|
|
- * We have to assume the worse case ie pmd for invalidation. Note that
|
|
- * the page can not be free from this function.
|
|
- */
|
|
- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
|
|
- mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
|
|
-
|
|
while (page_vma_mapped_walk(&pvmw)) {
|
|
- unsigned long cstart, cend;
|
|
int ret = 0;
|
|
-
|
|
- cstart = address = pvmw.address;
|
|
+ address = pvmw.address;
|
|
if (pvmw.pte) {
|
|
pte_t entry;
|
|
pte_t *pte = pvmw.pte;
|
|
@@ -914,7 +904,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
|
|
entry = pte_wrprotect(entry);
|
|
entry = pte_mkclean(entry);
|
|
set_pte_at(vma->vm_mm, address, pte, entry);
|
|
- cend = cstart + PAGE_SIZE;
|
|
ret = 1;
|
|
} else {
|
|
#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
|
|
@@ -929,8 +918,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
|
|
entry = pmd_wrprotect(entry);
|
|
entry = pmd_mkclean(entry);
|
|
set_pmd_at(vma->vm_mm, address, pmd, entry);
|
|
- cstart &= PMD_MASK;
|
|
- cend = cstart + PMD_SIZE;
|
|
ret = 1;
|
|
#else
|
|
/* unexpected pmd-mapped page? */
|
|
@@ -939,13 +926,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
|
|
}
|
|
|
|
if (ret) {
|
|
- mmu_notifier_invalidate_range(vma->vm_mm, cstart, cend);
|
|
+ mmu_notifier_invalidate_page(vma->vm_mm, address);
|
|
(*cleaned)++;
|
|
}
|
|
}
|
|
|
|
- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
|
|
-
|
|
return true;
|
|
}
|
|
|
|
@@ -1339,7 +1324,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
|
pte_t pteval;
|
|
struct page *subpage;
|
|
bool ret = true;
|
|
- unsigned long start = address, end;
|
|
enum ttu_flags flags = (enum ttu_flags)arg;
|
|
|
|
/* munlock has nothing to gain from examining un-locked vmas */
|
|
@@ -1351,14 +1335,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
|
flags & TTU_MIGRATION, page);
|
|
}
|
|
|
|
- /*
|
|
- * We have to assume the worse case ie pmd for invalidation. Note that
|
|
- * the page can not be free in this function as call of try_to_unmap()
|
|
- * must hold a reference on the page.
|
|
- */
|
|
- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
|
|
- mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
|
|
-
|
|
while (page_vma_mapped_walk(&pvmw)) {
|
|
/*
|
|
* If the page is mlock()d, we cannot swap it out.
|
|
@@ -1469,7 +1445,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
|
if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
|
|
WARN_ON_ONCE(1);
|
|
ret = false;
|
|
- /* We have to invalidate as we cleared the pte */
|
|
page_vma_mapped_walk_done(&pvmw);
|
|
break;
|
|
}
|
|
@@ -1515,12 +1490,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
|
discard:
|
|
page_remove_rmap(subpage, PageHuge(page));
|
|
put_page(page);
|
|
- mmu_notifier_invalidate_range(mm, address,
|
|
- address + PAGE_SIZE);
|
|
+ mmu_notifier_invalidate_page(mm, address);
|
|
}
|
|
-
|
|
- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
|
|
-
|
|
return ret;
|
|
}
|
|
|
|
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
|
|
index bfa9c4d34102..1d048ef969a8 100644
|
|
--- a/virt/kvm/kvm_main.c
|
|
+++ b/virt/kvm/kvm_main.c
|
|
@@ -322,6 +322,47 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
|
|
return container_of(mn, struct kvm, mmu_notifier);
|
|
}
|
|
|
|
+static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
|
|
+ struct mm_struct *mm,
|
|
+ unsigned long address)
|
|
+{
|
|
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
|
|
+ int need_tlb_flush, idx;
|
|
+
|
|
+ /*
|
|
+ * When ->invalidate_page runs, the linux pte has been zapped
|
|
+ * already but the page is still allocated until
|
|
+ * ->invalidate_page returns. So if we increase the sequence
|
|
+ * here the kvm page fault will notice if the spte can't be
|
|
+ * established because the page is going to be freed. If
|
|
+ * instead the kvm page fault establishes the spte before
|
|
+ * ->invalidate_page runs, kvm_unmap_hva will release it
|
|
+ * before returning.
|
|
+ *
|
|
+ * The sequence increase only need to be seen at spin_unlock
|
|
+ * time, and not at spin_lock time.
|
|
+ *
|
|
+ * Increasing the sequence after the spin_unlock would be
|
|
+ * unsafe because the kvm page fault could then establish the
|
|
+ * pte after kvm_unmap_hva returned, without noticing the page
|
|
+ * is going to be freed.
|
|
+ */
|
|
+ idx = srcu_read_lock(&kvm->srcu);
|
|
+ spin_lock(&kvm->mmu_lock);
|
|
+
|
|
+ kvm->mmu_notifier_seq++;
|
|
+ need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
|
|
+ /* we've to flush the tlb before the pages can be freed */
|
|
+ if (need_tlb_flush)
|
|
+ kvm_flush_remote_tlbs(kvm);
|
|
+
|
|
+ spin_unlock(&kvm->mmu_lock);
|
|
+
|
|
+ kvm_arch_mmu_notifier_invalidate_page(kvm, address);
|
|
+
|
|
+ srcu_read_unlock(&kvm->srcu, idx);
|
|
+}
|
|
+
|
|
static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
|
|
struct mm_struct *mm,
|
|
unsigned long address,
|
|
@@ -469,6 +510,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
|
|
}
|
|
|
|
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
|
|
+ .invalidate_page = kvm_mmu_notifier_invalidate_page,
|
|
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
|
|
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
|
|
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
|
|
--
|
|
2.14.2
|
|
|