Merge branch 'akpm' (patches from Andrew)
Merge second patchbomb from Andrew Morton: - the rest of MM - various misc bits - add ability to run /sbin/reboot at reboot time - printk/vsprintf changes - fiddle with seq_printf() return value * akpm: (114 commits) parisc: remove use of seq_printf return value lru_cache: remove use of seq_printf return value tracing: remove use of seq_printf return value cgroup: remove use of seq_printf return value proc: remove use of seq_printf return value s390: remove use of seq_printf return value cris fasttimer: remove use of seq_printf return value cris: remove use of seq_printf return value openrisc: remove use of seq_printf return value ARM: plat-pxa: remove use of seq_printf return value nios2: cpuinfo: remove use of seq_printf return value microblaze: mb: remove use of seq_printf return value ipc: remove use of seq_printf return value rtc: remove use of seq_printf return value power: wakeup: remove use of seq_printf return value x86: mtrr: if: remove use of seq_printf return value linux/bitmap.h: improve BITMAP_{LAST,FIRST}_WORD_MASK MAINTAINERS: CREDITS: remove Stefano Brivio from B43 .mailmap: add Ricardo Ribalda CREDITS: add Ricardo Ribalda Delgado ...
This commit is contained in:
commit
eea3a00264
136 changed files with 3273 additions and 1808 deletions
5
mm/cma.c
5
mm/cma.c
|
@ -23,6 +23,7 @@
|
|||
# define DEBUG
|
||||
#endif
|
||||
#endif
|
||||
#define CREATE_TRACE_POINTS
|
||||
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/err.h>
|
||||
|
@ -34,6 +35,7 @@
|
|||
#include <linux/cma.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/io.h>
|
||||
#include <trace/events/cma.h>
|
||||
|
||||
#include "cma.h"
|
||||
|
||||
|
@ -414,6 +416,8 @@ struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
|
|||
start = bitmap_no + mask + 1;
|
||||
}
|
||||
|
||||
trace_cma_alloc(page ? pfn : -1UL, page, count, align);
|
||||
|
||||
pr_debug("%s(): returned %p\n", __func__, page);
|
||||
return page;
|
||||
}
|
||||
|
@ -446,6 +450,7 @@ bool cma_release(struct cma *cma, const struct page *pages, unsigned int count)
|
|||
|
||||
free_contig_range(pfn, count);
|
||||
cma_clear_bitmap(cma, pfn, count);
|
||||
trace_cma_release(pfn, pages, count);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -30,9 +30,44 @@ static int cma_debugfs_get(void *data, u64 *val)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(cma_debugfs_fops, cma_debugfs_get, NULL, "%llu\n");
|
||||
|
||||
static int cma_used_get(void *data, u64 *val)
|
||||
{
|
||||
struct cma *cma = data;
|
||||
unsigned long used;
|
||||
|
||||
mutex_lock(&cma->lock);
|
||||
/* pages counter is smaller than sizeof(int) */
|
||||
used = bitmap_weight(cma->bitmap, (int)cma->count);
|
||||
mutex_unlock(&cma->lock);
|
||||
*val = (u64)used << cma->order_per_bit;
|
||||
|
||||
return 0;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(cma_used_fops, cma_used_get, NULL, "%llu\n");
|
||||
|
||||
static int cma_maxchunk_get(void *data, u64 *val)
|
||||
{
|
||||
struct cma *cma = data;
|
||||
unsigned long maxchunk = 0;
|
||||
unsigned long start, end = 0;
|
||||
|
||||
mutex_lock(&cma->lock);
|
||||
for (;;) {
|
||||
start = find_next_zero_bit(cma->bitmap, cma->count, end);
|
||||
if (start >= cma->count)
|
||||
break;
|
||||
end = find_next_bit(cma->bitmap, cma->count, start);
|
||||
maxchunk = max(end - start, maxchunk);
|
||||
}
|
||||
mutex_unlock(&cma->lock);
|
||||
*val = (u64)maxchunk << cma->order_per_bit;
|
||||
|
||||
return 0;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(cma_maxchunk_fops, cma_maxchunk_get, NULL, "%llu\n");
|
||||
|
||||
static void cma_add_to_cma_mem_list(struct cma *cma, struct cma_mem *mem)
|
||||
{
|
||||
spin_lock(&cma->mem_head_lock);
|
||||
|
@ -91,7 +126,6 @@ static int cma_free_write(void *data, u64 val)
|
|||
|
||||
return cma_free_mem(cma, pages);
|
||||
}
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(cma_free_fops, NULL, cma_free_write, "%llu\n");
|
||||
|
||||
static int cma_alloc_mem(struct cma *cma, int count)
|
||||
|
@ -124,7 +158,6 @@ static int cma_alloc_write(void *data, u64 val)
|
|||
|
||||
return cma_alloc_mem(cma, pages);
|
||||
}
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(cma_alloc_fops, NULL, cma_alloc_write, "%llu\n");
|
||||
|
||||
static void cma_debugfs_add_one(struct cma *cma, int idx)
|
||||
|
@ -149,6 +182,8 @@ static void cma_debugfs_add_one(struct cma *cma, int idx)
|
|||
&cma->count, &cma_debugfs_fops);
|
||||
debugfs_create_file("order_per_bit", S_IRUGO, tmp,
|
||||
&cma->order_per_bit, &cma_debugfs_fops);
|
||||
debugfs_create_file("used", S_IRUGO, tmp, cma, &cma_used_fops);
|
||||
debugfs_create_file("maxchunk", S_IRUGO, tmp, cma, &cma_maxchunk_fops);
|
||||
|
||||
u32s = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32));
|
||||
debugfs_create_u32_array("bitmap", S_IRUGO, tmp, (u32*)cma->bitmap, u32s);
|
||||
|
|
|
@ -391,28 +391,6 @@ static inline bool compact_should_abort(struct compact_control *cc)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Returns true if the page is within a block suitable for migration to */
|
||||
static bool suitable_migration_target(struct page *page)
|
||||
{
|
||||
/* If the page is a large free page, then disallow migration */
|
||||
if (PageBuddy(page)) {
|
||||
/*
|
||||
* We are checking page_order without zone->lock taken. But
|
||||
* the only small danger is that we skip a potentially suitable
|
||||
* pageblock, so it's not worth to check order for valid range.
|
||||
*/
|
||||
if (page_order_unsafe(page) >= pageblock_order)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
|
||||
if (migrate_async_suitable(get_pageblock_migratetype(page)))
|
||||
return true;
|
||||
|
||||
/* Otherwise skip the block */
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Isolate free pages onto a private freelist. If @strict is true, will abort
|
||||
* returning 0 on any invalid PFNs or non-free pages inside of the pageblock
|
||||
|
@ -896,6 +874,29 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
|
|||
|
||||
#endif /* CONFIG_COMPACTION || CONFIG_CMA */
|
||||
#ifdef CONFIG_COMPACTION
|
||||
|
||||
/* Returns true if the page is within a block suitable for migration to */
|
||||
static bool suitable_migration_target(struct page *page)
|
||||
{
|
||||
/* If the page is a large free page, then disallow migration */
|
||||
if (PageBuddy(page)) {
|
||||
/*
|
||||
* We are checking page_order without zone->lock taken. But
|
||||
* the only small danger is that we skip a potentially suitable
|
||||
* pageblock, so it's not worth to check order for valid range.
|
||||
*/
|
||||
if (page_order_unsafe(page) >= pageblock_order)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
|
||||
if (migrate_async_suitable(get_pageblock_migratetype(page)))
|
||||
return true;
|
||||
|
||||
/* Otherwise skip the block */
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Based on information in the current compact_control, find blocks
|
||||
* suitable for isolating free pages from and then isolate them.
|
||||
|
@ -1046,6 +1047,12 @@ typedef enum {
|
|||
ISOLATE_SUCCESS, /* Pages isolated, migrate */
|
||||
} isolate_migrate_t;
|
||||
|
||||
/*
|
||||
* Allow userspace to control policy on scanning the unevictable LRU for
|
||||
* compactable pages.
|
||||
*/
|
||||
int sysctl_compact_unevictable_allowed __read_mostly = 1;
|
||||
|
||||
/*
|
||||
* Isolate all pages that can be migrated from the first suitable block,
|
||||
* starting at the block pointed to by the migrate scanner pfn within
|
||||
|
@ -1057,6 +1064,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
|
|||
unsigned long low_pfn, end_pfn;
|
||||
struct page *page;
|
||||
const isolate_mode_t isolate_mode =
|
||||
(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
|
||||
(cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0);
|
||||
|
||||
/*
|
||||
|
@ -1598,6 +1606,14 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
|
|||
INIT_LIST_HEAD(&cc->freepages);
|
||||
INIT_LIST_HEAD(&cc->migratepages);
|
||||
|
||||
/*
|
||||
* When called via /proc/sys/vm/compact_memory
|
||||
* this makes sure we compact the whole zone regardless of
|
||||
* cached scanner positions.
|
||||
*/
|
||||
if (cc->order == -1)
|
||||
__reset_isolation_suitable(zone);
|
||||
|
||||
if (cc->order == -1 || !compaction_deferred(zone, cc->order))
|
||||
compact_zone(zone, cc);
|
||||
|
||||
|
|
4
mm/gup.c
4
mm/gup.c
|
@ -1019,7 +1019,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
|||
*
|
||||
* for an example see gup_get_pte in arch/x86/mm/gup.c
|
||||
*/
|
||||
pte_t pte = ACCESS_ONCE(*ptep);
|
||||
pte_t pte = READ_ONCE(*ptep);
|
||||
struct page *page;
|
||||
|
||||
/*
|
||||
|
@ -1309,7 +1309,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
|||
local_irq_save(flags);
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
do {
|
||||
pgd_t pgd = ACCESS_ONCE(*pgdp);
|
||||
pgd_t pgd = READ_ONCE(*pgdp);
|
||||
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
|
|
|
@ -67,6 +67,7 @@ static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
|
|||
|
||||
static int khugepaged(void *none);
|
||||
static int khugepaged_slab_init(void);
|
||||
static void khugepaged_slab_exit(void);
|
||||
|
||||
#define MM_SLOTS_HASH_BITS 10
|
||||
static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
|
||||
|
@ -109,9 +110,6 @@ static int set_recommended_min_free_kbytes(void)
|
|||
int nr_zones = 0;
|
||||
unsigned long recommended_min;
|
||||
|
||||
if (!khugepaged_enabled())
|
||||
return 0;
|
||||
|
||||
for_each_populated_zone(zone)
|
||||
nr_zones++;
|
||||
|
||||
|
@ -143,9 +141,8 @@ static int set_recommended_min_free_kbytes(void)
|
|||
setup_per_zone_wmarks();
|
||||
return 0;
|
||||
}
|
||||
late_initcall(set_recommended_min_free_kbytes);
|
||||
|
||||
static int start_khugepaged(void)
|
||||
static int start_stop_khugepaged(void)
|
||||
{
|
||||
int err = 0;
|
||||
if (khugepaged_enabled()) {
|
||||
|
@ -156,6 +153,7 @@ static int start_khugepaged(void)
|
|||
pr_err("khugepaged: kthread_run(khugepaged) failed\n");
|
||||
err = PTR_ERR(khugepaged_thread);
|
||||
khugepaged_thread = NULL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (!list_empty(&khugepaged_scan.mm_head))
|
||||
|
@ -166,7 +164,7 @@ static int start_khugepaged(void)
|
|||
kthread_stop(khugepaged_thread);
|
||||
khugepaged_thread = NULL;
|
||||
}
|
||||
|
||||
fail:
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -183,7 +181,7 @@ static struct page *get_huge_zero_page(void)
|
|||
struct page *zero_page;
|
||||
retry:
|
||||
if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
|
||||
return ACCESS_ONCE(huge_zero_page);
|
||||
return READ_ONCE(huge_zero_page);
|
||||
|
||||
zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
|
||||
HPAGE_PMD_ORDER);
|
||||
|
@ -202,7 +200,7 @@ retry:
|
|||
/* We take additional reference here. It will be put back by shrinker */
|
||||
atomic_set(&huge_zero_refcount, 2);
|
||||
preempt_enable();
|
||||
return ACCESS_ONCE(huge_zero_page);
|
||||
return READ_ONCE(huge_zero_page);
|
||||
}
|
||||
|
||||
static void put_huge_zero_page(void)
|
||||
|
@ -300,7 +298,7 @@ static ssize_t enabled_store(struct kobject *kobj,
|
|||
int err;
|
||||
|
||||
mutex_lock(&khugepaged_mutex);
|
||||
err = start_khugepaged();
|
||||
err = start_stop_khugepaged();
|
||||
mutex_unlock(&khugepaged_mutex);
|
||||
|
||||
if (err)
|
||||
|
@ -634,27 +632,38 @@ static int __init hugepage_init(void)
|
|||
|
||||
err = hugepage_init_sysfs(&hugepage_kobj);
|
||||
if (err)
|
||||
return err;
|
||||
goto err_sysfs;
|
||||
|
||||
err = khugepaged_slab_init();
|
||||
if (err)
|
||||
goto out;
|
||||
goto err_slab;
|
||||
|
||||
register_shrinker(&huge_zero_page_shrinker);
|
||||
err = register_shrinker(&huge_zero_page_shrinker);
|
||||
if (err)
|
||||
goto err_hzp_shrinker;
|
||||
|
||||
/*
|
||||
* By default disable transparent hugepages on smaller systems,
|
||||
* where the extra memory used could hurt more than TLB overhead
|
||||
* is likely to save. The admin can still enable it through /sys.
|
||||
*/
|
||||
if (totalram_pages < (512 << (20 - PAGE_SHIFT)))
|
||||
if (totalram_pages < (512 << (20 - PAGE_SHIFT))) {
|
||||
transparent_hugepage_flags = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
start_khugepaged();
|
||||
err = start_stop_khugepaged();
|
||||
if (err)
|
||||
goto err_khugepaged;
|
||||
|
||||
return 0;
|
||||
out:
|
||||
err_khugepaged:
|
||||
unregister_shrinker(&huge_zero_page_shrinker);
|
||||
err_hzp_shrinker:
|
||||
khugepaged_slab_exit();
|
||||
err_slab:
|
||||
hugepage_exit_sysfs(hugepage_kobj);
|
||||
err_sysfs:
|
||||
return err;
|
||||
}
|
||||
subsys_initcall(hugepage_init);
|
||||
|
@ -708,7 +717,7 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
|
|||
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long haddr, pmd_t *pmd,
|
||||
struct page *page)
|
||||
struct page *page, gfp_t gfp)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
pgtable_t pgtable;
|
||||
|
@ -716,7 +725,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
|||
|
||||
VM_BUG_ON_PAGE(!PageCompound(page), page);
|
||||
|
||||
if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg))
|
||||
if (mem_cgroup_try_charge(page, mm, gfp, &memcg))
|
||||
return VM_FAULT_OOM;
|
||||
|
||||
pgtable = pte_alloc_one(mm, haddr);
|
||||
|
@ -822,7 +831,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
|
||||
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) {
|
||||
put_page(page);
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
return VM_FAULT_FALLBACK;
|
||||
|
@ -1080,6 +1089,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
unsigned long haddr;
|
||||
unsigned long mmun_start; /* For mmu_notifiers */
|
||||
unsigned long mmun_end; /* For mmu_notifiers */
|
||||
gfp_t huge_gfp; /* for allocation and charge */
|
||||
|
||||
ptl = pmd_lockptr(mm, pmd);
|
||||
VM_BUG_ON_VMA(!vma->anon_vma, vma);
|
||||
|
@ -1106,10 +1116,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
alloc:
|
||||
if (transparent_hugepage_enabled(vma) &&
|
||||
!transparent_hugepage_debug_cow()) {
|
||||
gfp_t gfp;
|
||||
|
||||
gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
|
||||
new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
|
||||
huge_gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
|
||||
new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
|
||||
} else
|
||||
new_page = NULL;
|
||||
|
||||
|
@ -1130,8 +1138,7 @@ alloc:
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(mem_cgroup_try_charge(new_page, mm,
|
||||
GFP_TRANSHUGE, &memcg))) {
|
||||
if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg))) {
|
||||
put_page(new_page);
|
||||
if (page) {
|
||||
split_huge_page(page);
|
||||
|
@ -1976,6 +1983,11 @@ static int __init khugepaged_slab_init(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void __init khugepaged_slab_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(mm_slot_cache);
|
||||
}
|
||||
|
||||
static inline struct mm_slot *alloc_mm_slot(void)
|
||||
{
|
||||
if (!mm_slot_cache) /* initialization failed */
|
||||
|
@ -2323,19 +2335,13 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
|
|||
return true;
|
||||
}
|
||||
|
||||
static struct page
|
||||
*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm,
|
||||
static struct page *
|
||||
khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
|
||||
struct vm_area_struct *vma, unsigned long address,
|
||||
int node)
|
||||
{
|
||||
gfp_t flags;
|
||||
|
||||
VM_BUG_ON_PAGE(*hpage, *hpage);
|
||||
|
||||
/* Only allocate from the target node */
|
||||
flags = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
|
||||
__GFP_THISNODE;
|
||||
|
||||
/*
|
||||
* Before allocating the hugepage, release the mmap_sem read lock.
|
||||
* The allocation can take potentially a long time if it involves
|
||||
|
@ -2344,7 +2350,7 @@ static struct page
|
|||
*/
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
*hpage = alloc_pages_exact_node(node, flags, HPAGE_PMD_ORDER);
|
||||
*hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER);
|
||||
if (unlikely(!*hpage)) {
|
||||
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
|
||||
*hpage = ERR_PTR(-ENOMEM);
|
||||
|
@ -2397,13 +2403,14 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
|
|||
return true;
|
||||
}
|
||||
|
||||
static struct page
|
||||
*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm,
|
||||
static struct page *
|
||||
khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
|
||||
struct vm_area_struct *vma, unsigned long address,
|
||||
int node)
|
||||
{
|
||||
up_read(&mm->mmap_sem);
|
||||
VM_BUG_ON(!*hpage);
|
||||
|
||||
return *hpage;
|
||||
}
|
||||
#endif
|
||||
|
@ -2438,16 +2445,21 @@ static void collapse_huge_page(struct mm_struct *mm,
|
|||
struct mem_cgroup *memcg;
|
||||
unsigned long mmun_start; /* For mmu_notifiers */
|
||||
unsigned long mmun_end; /* For mmu_notifiers */
|
||||
gfp_t gfp;
|
||||
|
||||
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
||||
|
||||
/* Only allocate from the target node */
|
||||
gfp = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
|
||||
__GFP_THISNODE;
|
||||
|
||||
/* release the mmap_sem read lock. */
|
||||
new_page = khugepaged_alloc_page(hpage, mm, vma, address, node);
|
||||
new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node);
|
||||
if (!new_page)
|
||||
return;
|
||||
|
||||
if (unlikely(mem_cgroup_try_charge(new_page, mm,
|
||||
GFP_TRANSHUGE, &memcg)))
|
||||
gfp, &memcg)))
|
||||
return;
|
||||
|
||||
/*
|
||||
|
|
236
mm/hugetlb.c
236
mm/hugetlb.c
|
@ -61,6 +61,9 @@ DEFINE_SPINLOCK(hugetlb_lock);
|
|||
static int num_fault_mutexes;
|
||||
static struct mutex *htlb_fault_mutex_table ____cacheline_aligned_in_smp;
|
||||
|
||||
/* Forward declaration */
|
||||
static int hugetlb_acct_memory(struct hstate *h, long delta);
|
||||
|
||||
static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
|
||||
{
|
||||
bool free = (spool->count == 0) && (spool->used_hpages == 0);
|
||||
|
@ -68,23 +71,36 @@ static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
|
|||
spin_unlock(&spool->lock);
|
||||
|
||||
/* If no pages are used, and no other handles to the subpool
|
||||
* remain, free the subpool the subpool remain */
|
||||
if (free)
|
||||
* remain, give up any reservations mased on minimum size and
|
||||
* free the subpool */
|
||||
if (free) {
|
||||
if (spool->min_hpages != -1)
|
||||
hugetlb_acct_memory(spool->hstate,
|
||||
-spool->min_hpages);
|
||||
kfree(spool);
|
||||
}
|
||||
}
|
||||
|
||||
struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
|
||||
struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
|
||||
long min_hpages)
|
||||
{
|
||||
struct hugepage_subpool *spool;
|
||||
|
||||
spool = kmalloc(sizeof(*spool), GFP_KERNEL);
|
||||
spool = kzalloc(sizeof(*spool), GFP_KERNEL);
|
||||
if (!spool)
|
||||
return NULL;
|
||||
|
||||
spin_lock_init(&spool->lock);
|
||||
spool->count = 1;
|
||||
spool->max_hpages = nr_blocks;
|
||||
spool->used_hpages = 0;
|
||||
spool->max_hpages = max_hpages;
|
||||
spool->hstate = h;
|
||||
spool->min_hpages = min_hpages;
|
||||
|
||||
if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages)) {
|
||||
kfree(spool);
|
||||
return NULL;
|
||||
}
|
||||
spool->rsv_hpages = min_hpages;
|
||||
|
||||
return spool;
|
||||
}
|
||||
|
@ -97,36 +113,89 @@ void hugepage_put_subpool(struct hugepage_subpool *spool)
|
|||
unlock_or_release_subpool(spool);
|
||||
}
|
||||
|
||||
static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
|
||||
/*
|
||||
* Subpool accounting for allocating and reserving pages.
|
||||
* Return -ENOMEM if there are not enough resources to satisfy the
|
||||
* the request. Otherwise, return the number of pages by which the
|
||||
* global pools must be adjusted (upward). The returned value may
|
||||
* only be different than the passed value (delta) in the case where
|
||||
* a subpool minimum size must be manitained.
|
||||
*/
|
||||
static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
|
||||
long delta)
|
||||
{
|
||||
int ret = 0;
|
||||
long ret = delta;
|
||||
|
||||
if (!spool)
|
||||
return 0;
|
||||
return ret;
|
||||
|
||||
spin_lock(&spool->lock);
|
||||
if ((spool->used_hpages + delta) <= spool->max_hpages) {
|
||||
spool->used_hpages += delta;
|
||||
} else {
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
spin_unlock(&spool->lock);
|
||||
|
||||
if (spool->max_hpages != -1) { /* maximum size accounting */
|
||||
if ((spool->used_hpages + delta) <= spool->max_hpages)
|
||||
spool->used_hpages += delta;
|
||||
else {
|
||||
ret = -ENOMEM;
|
||||
goto unlock_ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (spool->min_hpages != -1) { /* minimum size accounting */
|
||||
if (delta > spool->rsv_hpages) {
|
||||
/*
|
||||
* Asking for more reserves than those already taken on
|
||||
* behalf of subpool. Return difference.
|
||||
*/
|
||||
ret = delta - spool->rsv_hpages;
|
||||
spool->rsv_hpages = 0;
|
||||
} else {
|
||||
ret = 0; /* reserves already accounted for */
|
||||
spool->rsv_hpages -= delta;
|
||||
}
|
||||
}
|
||||
|
||||
unlock_ret:
|
||||
spin_unlock(&spool->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
|
||||
/*
|
||||
* Subpool accounting for freeing and unreserving pages.
|
||||
* Return the number of global page reservations that must be dropped.
|
||||
* The return value may only be different than the passed value (delta)
|
||||
* in the case where a subpool minimum size must be maintained.
|
||||
*/
|
||||
static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
|
||||
long delta)
|
||||
{
|
||||
long ret = delta;
|
||||
|
||||
if (!spool)
|
||||
return;
|
||||
return delta;
|
||||
|
||||
spin_lock(&spool->lock);
|
||||
spool->used_hpages -= delta;
|
||||
/* If hugetlbfs_put_super couldn't free spool due to
|
||||
* an outstanding quota reference, free it now. */
|
||||
|
||||
if (spool->max_hpages != -1) /* maximum size accounting */
|
||||
spool->used_hpages -= delta;
|
||||
|
||||
if (spool->min_hpages != -1) { /* minimum size accounting */
|
||||
if (spool->rsv_hpages + delta <= spool->min_hpages)
|
||||
ret = 0;
|
||||
else
|
||||
ret = spool->rsv_hpages + delta - spool->min_hpages;
|
||||
|
||||
spool->rsv_hpages += delta;
|
||||
if (spool->rsv_hpages > spool->min_hpages)
|
||||
spool->rsv_hpages = spool->min_hpages;
|
||||
}
|
||||
|
||||
/*
|
||||
* If hugetlbfs_put_super couldn't free spool due to an outstanding
|
||||
* quota reference, free it now.
|
||||
*/
|
||||
unlock_or_release_subpool(spool);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
|
||||
|
@ -855,6 +924,31 @@ struct hstate *size_to_hstate(unsigned long size)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test to determine whether the hugepage is "active/in-use" (i.e. being linked
|
||||
* to hstate->hugepage_activelist.)
|
||||
*
|
||||
* This function can be called for tail pages, but never returns true for them.
|
||||
*/
|
||||
bool page_huge_active(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!PageHuge(page), page);
|
||||
return PageHead(page) && PagePrivate(&page[1]);
|
||||
}
|
||||
|
||||
/* never called for tail page */
|
||||
static void set_page_huge_active(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
|
||||
SetPagePrivate(&page[1]);
|
||||
}
|
||||
|
||||
static void clear_page_huge_active(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
|
||||
ClearPagePrivate(&page[1]);
|
||||
}
|
||||
|
||||
void free_huge_page(struct page *page)
|
||||
{
|
||||
/*
|
||||
|
@ -874,7 +968,16 @@ void free_huge_page(struct page *page)
|
|||
restore_reserve = PagePrivate(page);
|
||||
ClearPagePrivate(page);
|
||||
|
||||
/*
|
||||
* A return code of zero implies that the subpool will be under its
|
||||
* minimum size if the reservation is not restored after page is free.
|
||||
* Therefore, force restore_reserve operation.
|
||||
*/
|
||||
if (hugepage_subpool_put_pages(spool, 1) == 0)
|
||||
restore_reserve = true;
|
||||
|
||||
spin_lock(&hugetlb_lock);
|
||||
clear_page_huge_active(page);
|
||||
hugetlb_cgroup_uncharge_page(hstate_index(h),
|
||||
pages_per_huge_page(h), page);
|
||||
if (restore_reserve)
|
||||
|
@ -891,7 +994,6 @@ void free_huge_page(struct page *page)
|
|||
enqueue_huge_page(h, page);
|
||||
}
|
||||
spin_unlock(&hugetlb_lock);
|
||||
hugepage_subpool_put_pages(spool, 1);
|
||||
}
|
||||
|
||||
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
|
||||
|
@ -1386,7 +1488,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
|
|||
if (chg < 0)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
if (chg || avoid_reserve)
|
||||
if (hugepage_subpool_get_pages(spool, 1))
|
||||
if (hugepage_subpool_get_pages(spool, 1) < 0)
|
||||
return ERR_PTR(-ENOSPC);
|
||||
|
||||
ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
|
||||
|
@ -2454,6 +2556,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
|
|||
struct resv_map *resv = vma_resv_map(vma);
|
||||
struct hugepage_subpool *spool = subpool_vma(vma);
|
||||
unsigned long reserve, start, end;
|
||||
long gbl_reserve;
|
||||
|
||||
if (!resv || !is_vma_resv_set(vma, HPAGE_RESV_OWNER))
|
||||
return;
|
||||
|
@ -2466,8 +2569,12 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
|
|||
kref_put(&resv->refs, resv_map_release);
|
||||
|
||||
if (reserve) {
|
||||
hugetlb_acct_memory(h, -reserve);
|
||||
hugepage_subpool_put_pages(spool, reserve);
|
||||
/*
|
||||
* Decrement reserve counts. The global reserve count may be
|
||||
* adjusted if the subpool has a minimum size.
|
||||
*/
|
||||
gbl_reserve = hugepage_subpool_put_pages(spool, reserve);
|
||||
hugetlb_acct_memory(h, -gbl_reserve);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2891,6 +2998,7 @@ retry_avoidcopy:
|
|||
copy_user_huge_page(new_page, old_page, address, vma,
|
||||
pages_per_huge_page(h));
|
||||
__SetPageUptodate(new_page);
|
||||
set_page_huge_active(new_page);
|
||||
|
||||
mmun_start = address & huge_page_mask(h);
|
||||
mmun_end = mmun_start + huge_page_size(h);
|
||||
|
@ -3003,6 +3111,7 @@ retry:
|
|||
}
|
||||
clear_huge_page(page, address, pages_per_huge_page(h));
|
||||
__SetPageUptodate(page);
|
||||
set_page_huge_active(page);
|
||||
|
||||
if (vma->vm_flags & VM_MAYSHARE) {
|
||||
int err;
|
||||
|
@ -3447,6 +3556,7 @@ int hugetlb_reserve_pages(struct inode *inode,
|
|||
struct hstate *h = hstate_inode(inode);
|
||||
struct hugepage_subpool *spool = subpool_inode(inode);
|
||||
struct resv_map *resv_map;
|
||||
long gbl_reserve;
|
||||
|
||||
/*
|
||||
* Only apply hugepage reservation if asked. At fault time, an
|
||||
|
@ -3483,8 +3593,13 @@ int hugetlb_reserve_pages(struct inode *inode,
|
|||
goto out_err;
|
||||
}
|
||||
|
||||
/* There must be enough pages in the subpool for the mapping */
|
||||
if (hugepage_subpool_get_pages(spool, chg)) {
|
||||
/*
|
||||
* There must be enough pages in the subpool for the mapping. If
|
||||
* the subpool has a minimum size, there may be some global
|
||||
* reservations already in place (gbl_reserve).
|
||||
*/
|
||||
gbl_reserve = hugepage_subpool_get_pages(spool, chg);
|
||||
if (gbl_reserve < 0) {
|
||||
ret = -ENOSPC;
|
||||
goto out_err;
|
||||
}
|
||||
|
@ -3493,9 +3608,10 @@ int hugetlb_reserve_pages(struct inode *inode,
|
|||
* Check enough hugepages are available for the reservation.
|
||||
* Hand the pages back to the subpool if there are not
|
||||
*/
|
||||
ret = hugetlb_acct_memory(h, chg);
|
||||
ret = hugetlb_acct_memory(h, gbl_reserve);
|
||||
if (ret < 0) {
|
||||
hugepage_subpool_put_pages(spool, chg);
|
||||
/* put back original number of pages, chg */
|
||||
(void)hugepage_subpool_put_pages(spool, chg);
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
|
@ -3525,6 +3641,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
|
|||
struct resv_map *resv_map = inode_resv_map(inode);
|
||||
long chg = 0;
|
||||
struct hugepage_subpool *spool = subpool_inode(inode);
|
||||
long gbl_reserve;
|
||||
|
||||
if (resv_map)
|
||||
chg = region_truncate(resv_map, offset);
|
||||
|
@ -3532,8 +3649,12 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
|
|||
inode->i_blocks -= (blocks_per_huge_page(h) * freed);
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
hugepage_subpool_put_pages(spool, (chg - freed));
|
||||
hugetlb_acct_memory(h, -(chg - freed));
|
||||
/*
|
||||
* If the subpool has a minimum size, the number of global
|
||||
* reservations to be released may be adjusted.
|
||||
*/
|
||||
gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed));
|
||||
hugetlb_acct_memory(h, -gbl_reserve);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
|
||||
|
@ -3775,20 +3896,6 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
|||
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
|
||||
/* Should be called in hugetlb_lock */
|
||||
static int is_hugepage_on_freelist(struct page *hpage)
|
||||
{
|
||||
struct page *page;
|
||||
struct page *tmp;
|
||||
struct hstate *h = page_hstate(hpage);
|
||||
int nid = page_to_nid(hpage);
|
||||
|
||||
list_for_each_entry_safe(page, tmp, &h->hugepage_freelists[nid], lru)
|
||||
if (page == hpage)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called from memory failure code.
|
||||
* Assume the caller holds page lock of the head page.
|
||||
|
@ -3800,7 +3907,11 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
|
|||
int ret = -EBUSY;
|
||||
|
||||
spin_lock(&hugetlb_lock);
|
||||
if (is_hugepage_on_freelist(hpage)) {
|
||||
/*
|
||||
* Just checking !page_huge_active is not enough, because that could be
|
||||
* an isolated/hwpoisoned hugepage (which have >0 refcount).
|
||||
*/
|
||||
if (!page_huge_active(hpage) && !page_count(hpage)) {
|
||||
/*
|
||||
* Hwpoisoned hugepage isn't linked to activelist or freelist,
|
||||
* but dangling hpage->lru can trigger list-debug warnings
|
||||
|
@ -3820,42 +3931,27 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
|
|||
|
||||
bool isolate_huge_page(struct page *page, struct list_head *list)
|
||||
{
|
||||
bool ret = true;
|
||||
|
||||
VM_BUG_ON_PAGE(!PageHead(page), page);
|
||||
if (!get_page_unless_zero(page))
|
||||
return false;
|
||||
spin_lock(&hugetlb_lock);
|
||||
if (!page_huge_active(page) || !get_page_unless_zero(page)) {
|
||||
ret = false;
|
||||
goto unlock;
|
||||
}
|
||||
clear_page_huge_active(page);
|
||||
list_move_tail(&page->lru, list);
|
||||
unlock:
|
||||
spin_unlock(&hugetlb_lock);
|
||||
return true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void putback_active_hugepage(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!PageHead(page), page);
|
||||
spin_lock(&hugetlb_lock);
|
||||
set_page_huge_active(page);
|
||||
list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
|
||||
spin_unlock(&hugetlb_lock);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
bool is_hugepage_active(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!PageHuge(page), page);
|
||||
/*
|
||||
* This function can be called for a tail page because the caller,
|
||||
* scan_movable_pages, scans through a given pfn-range which typically
|
||||
* covers one memory block. In systems using gigantic hugepage (1GB
|
||||
* for x86_64,) a hugepage is larger than a memory block, and we don't
|
||||
* support migrating such large hugepages for now, so return false
|
||||
* when called for tail pages.
|
||||
*/
|
||||
if (PageTail(page))
|
||||
return false;
|
||||
/*
|
||||
* Refcount of a hwpoisoned hugepages is 1, but they are not active,
|
||||
* so we should return false for them.
|
||||
*/
|
||||
if (unlikely(PageHWPoison(page)))
|
||||
return false;
|
||||
return page_count(page) > 0;
|
||||
}
|
||||
|
|
|
@ -224,13 +224,13 @@ static inline unsigned long page_order(struct page *page)
|
|||
* PageBuddy() should be checked first by the caller to minimize race window,
|
||||
* and invalid values must be handled gracefully.
|
||||
*
|
||||
* ACCESS_ONCE is used so that if the caller assigns the result into a local
|
||||
* READ_ONCE is used so that if the caller assigns the result into a local
|
||||
* variable and e.g. tests it for valid range before using, the compiler cannot
|
||||
* decide to remove the variable and inline the page_private(page) multiple
|
||||
* times, potentially observing different values in the tests and the actual
|
||||
* use of the result.
|
||||
*/
|
||||
#define page_order_unsafe(page) ACCESS_ONCE(page_private(page))
|
||||
#define page_order_unsafe(page) READ_ONCE(page_private(page))
|
||||
|
||||
static inline bool is_cow_mapping(vm_flags_t flags)
|
||||
{
|
||||
|
|
|
@ -389,6 +389,19 @@ void kasan_krealloc(const void *object, size_t size)
|
|||
kasan_kmalloc(page->slab_cache, object, size);
|
||||
}
|
||||
|
||||
void kasan_kfree(void *ptr)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = virt_to_head_page(ptr);
|
||||
|
||||
if (unlikely(!PageSlab(page)))
|
||||
kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
|
||||
KASAN_FREE_PAGE);
|
||||
else
|
||||
kasan_slab_free(page->slab_cache, ptr);
|
||||
}
|
||||
|
||||
void kasan_kfree_large(const void *ptr)
|
||||
{
|
||||
struct page *page = virt_to_page(ptr);
|
||||
|
|
10
mm/ksm.c
10
mm/ksm.c
|
@ -542,7 +542,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
|
|||
expected_mapping = (void *)stable_node +
|
||||
(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
|
||||
again:
|
||||
kpfn = ACCESS_ONCE(stable_node->kpfn);
|
||||
kpfn = READ_ONCE(stable_node->kpfn);
|
||||
page = pfn_to_page(kpfn);
|
||||
|
||||
/*
|
||||
|
@ -551,7 +551,7 @@ again:
|
|||
* but on Alpha we need to be more careful.
|
||||
*/
|
||||
smp_read_barrier_depends();
|
||||
if (ACCESS_ONCE(page->mapping) != expected_mapping)
|
||||
if (READ_ONCE(page->mapping) != expected_mapping)
|
||||
goto stale;
|
||||
|
||||
/*
|
||||
|
@ -577,14 +577,14 @@ again:
|
|||
cpu_relax();
|
||||
}
|
||||
|
||||
if (ACCESS_ONCE(page->mapping) != expected_mapping) {
|
||||
if (READ_ONCE(page->mapping) != expected_mapping) {
|
||||
put_page(page);
|
||||
goto stale;
|
||||
}
|
||||
|
||||
if (lock_it) {
|
||||
lock_page(page);
|
||||
if (ACCESS_ONCE(page->mapping) != expected_mapping) {
|
||||
if (READ_ONCE(page->mapping) != expected_mapping) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto stale;
|
||||
|
@ -600,7 +600,7 @@ stale:
|
|||
* before checking whether node->kpfn has been changed.
|
||||
*/
|
||||
smp_rmb();
|
||||
if (ACCESS_ONCE(stable_node->kpfn) != kpfn)
|
||||
if (READ_ONCE(stable_node->kpfn) != kpfn)
|
||||
goto again;
|
||||
remove_node_from_stable_tree(stable_node);
|
||||
return NULL;
|
||||
|
|
|
@ -580,10 +580,24 @@ int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
|
|||
return memblock_add_range(&memblock.memory, base, size, nid, 0);
|
||||
}
|
||||
|
||||
static int __init_memblock memblock_add_region(phys_addr_t base,
|
||||
phys_addr_t size,
|
||||
int nid,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct memblock_type *_rgn = &memblock.memory;
|
||||
|
||||
memblock_dbg("memblock_add: [%#016llx-%#016llx] flags %#02lx %pF\n",
|
||||
(unsigned long long)base,
|
||||
(unsigned long long)base + size - 1,
|
||||
flags, (void *)_RET_IP_);
|
||||
|
||||
return memblock_add_range(_rgn, base, size, nid, flags);
|
||||
}
|
||||
|
||||
int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
return memblock_add_range(&memblock.memory, base, size,
|
||||
MAX_NUMNODES, 0);
|
||||
return memblock_add_region(base, size, MAX_NUMNODES, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -259,11 +259,6 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
|
|||
* page cache and RSS per cgroup. We would eventually like to provide
|
||||
* statistics based on the statistics developed by Rik Van Riel for clock-pro,
|
||||
* to help the administrator determine what knobs to tune.
|
||||
*
|
||||
* TODO: Add a water mark for the memory controller. Reclaim will begin when
|
||||
* we hit the water mark. May be even add a low water mark, such that
|
||||
* no reclaim occurs from a cgroup at it's low water mark, this is
|
||||
* a feature that will be implemented much later in the future.
|
||||
*/
|
||||
struct mem_cgroup {
|
||||
struct cgroup_subsys_state css;
|
||||
|
@ -460,6 +455,12 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
|
|||
return memcg->css.id;
|
||||
}
|
||||
|
||||
/*
|
||||
* A helper function to get mem_cgroup from ID. must be called under
|
||||
* rcu_read_lock(). The caller is responsible for calling
|
||||
* css_tryget_online() if the mem_cgroup is used for charging. (dropping
|
||||
* refcnt from swap can be called against removed memcg.)
|
||||
*/
|
||||
static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
@ -673,7 +674,7 @@ static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
|
|||
static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
|
||||
{
|
||||
unsigned long nr_pages = page_counter_read(&memcg->memory);
|
||||
unsigned long soft_limit = ACCESS_ONCE(memcg->soft_limit);
|
||||
unsigned long soft_limit = READ_ONCE(memcg->soft_limit);
|
||||
unsigned long excess = 0;
|
||||
|
||||
if (nr_pages > soft_limit)
|
||||
|
@ -1041,7 +1042,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
|||
goto out_unlock;
|
||||
|
||||
do {
|
||||
pos = ACCESS_ONCE(iter->position);
|
||||
pos = READ_ONCE(iter->position);
|
||||
/*
|
||||
* A racing update may change the position and
|
||||
* put the last reference, hence css_tryget(),
|
||||
|
@ -1358,13 +1359,13 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
|
|||
unsigned long limit;
|
||||
|
||||
count = page_counter_read(&memcg->memory);
|
||||
limit = ACCESS_ONCE(memcg->memory.limit);
|
||||
limit = READ_ONCE(memcg->memory.limit);
|
||||
if (count < limit)
|
||||
margin = limit - count;
|
||||
|
||||
if (do_swap_account) {
|
||||
count = page_counter_read(&memcg->memsw);
|
||||
limit = ACCESS_ONCE(memcg->memsw.limit);
|
||||
limit = READ_ONCE(memcg->memsw.limit);
|
||||
if (count <= limit)
|
||||
margin = min(margin, limit - count);
|
||||
}
|
||||
|
@ -2348,20 +2349,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|||
css_put_many(&memcg->css, nr_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* A helper function to get mem_cgroup from ID. must be called under
|
||||
* rcu_read_lock(). The caller is responsible for calling
|
||||
* css_tryget_online() if the mem_cgroup is used for charging. (dropping
|
||||
* refcnt from swap can be called against removed memcg.)
|
||||
*/
|
||||
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
|
||||
{
|
||||
/* ID 0 is unused ID */
|
||||
if (!id)
|
||||
return NULL;
|
||||
return mem_cgroup_from_id(id);
|
||||
}
|
||||
|
||||
/*
|
||||
* try_get_mem_cgroup_from_page - look up page's memcg association
|
||||
* @page: the page
|
||||
|
@ -2388,7 +2375,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
|
|||
ent.val = page_private(page);
|
||||
id = lookup_swap_cgroup_id(ent);
|
||||
rcu_read_lock();
|
||||
memcg = mem_cgroup_lookup(id);
|
||||
memcg = mem_cgroup_from_id(id);
|
||||
if (memcg && !css_tryget_online(&memcg->css))
|
||||
memcg = NULL;
|
||||
rcu_read_unlock();
|
||||
|
@ -2650,7 +2637,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
|
|||
return cachep;
|
||||
|
||||
memcg = get_mem_cgroup_from_mm(current->mm);
|
||||
kmemcg_id = ACCESS_ONCE(memcg->kmemcg_id);
|
||||
kmemcg_id = READ_ONCE(memcg->kmemcg_id);
|
||||
if (kmemcg_id < 0)
|
||||
goto out;
|
||||
|
||||
|
@ -5020,7 +5007,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
|
|||
* tunable will only affect upcoming migrations, not the current one.
|
||||
* So we need to save it, and keep it going.
|
||||
*/
|
||||
move_flags = ACCESS_ONCE(memcg->move_charge_at_immigrate);
|
||||
move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
|
||||
if (move_flags) {
|
||||
struct mm_struct *mm;
|
||||
struct mem_cgroup *from = mem_cgroup_from_task(p);
|
||||
|
@ -5254,7 +5241,7 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
|
|||
static int memory_low_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
||||
unsigned long low = ACCESS_ONCE(memcg->low);
|
||||
unsigned long low = READ_ONCE(memcg->low);
|
||||
|
||||
if (low == PAGE_COUNTER_MAX)
|
||||
seq_puts(m, "max\n");
|
||||
|
@ -5284,7 +5271,7 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
|
|||
static int memory_high_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
||||
unsigned long high = ACCESS_ONCE(memcg->high);
|
||||
unsigned long high = READ_ONCE(memcg->high);
|
||||
|
||||
if (high == PAGE_COUNTER_MAX)
|
||||
seq_puts(m, "max\n");
|
||||
|
@ -5314,7 +5301,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
|
|||
static int memory_max_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
||||
unsigned long max = ACCESS_ONCE(memcg->memory.limit);
|
||||
unsigned long max = READ_ONCE(memcg->memory.limit);
|
||||
|
||||
if (max == PAGE_COUNTER_MAX)
|
||||
seq_puts(m, "max\n");
|
||||
|
@ -5869,7 +5856,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
|
|||
|
||||
id = swap_cgroup_record(entry, 0);
|
||||
rcu_read_lock();
|
||||
memcg = mem_cgroup_lookup(id);
|
||||
memcg = mem_cgroup_from_id(id);
|
||||
if (memcg) {
|
||||
if (!mem_cgroup_is_root(memcg))
|
||||
page_counter_uncharge(&memcg->memsw, 1);
|
||||
|
|
|
@ -521,6 +521,52 @@ static const char *action_name[] = {
|
|||
[RECOVERED] = "Recovered",
|
||||
};
|
||||
|
||||
enum action_page_type {
|
||||
MSG_KERNEL,
|
||||
MSG_KERNEL_HIGH_ORDER,
|
||||
MSG_SLAB,
|
||||
MSG_DIFFERENT_COMPOUND,
|
||||
MSG_POISONED_HUGE,
|
||||
MSG_HUGE,
|
||||
MSG_FREE_HUGE,
|
||||
MSG_UNMAP_FAILED,
|
||||
MSG_DIRTY_SWAPCACHE,
|
||||
MSG_CLEAN_SWAPCACHE,
|
||||
MSG_DIRTY_MLOCKED_LRU,
|
||||
MSG_CLEAN_MLOCKED_LRU,
|
||||
MSG_DIRTY_UNEVICTABLE_LRU,
|
||||
MSG_CLEAN_UNEVICTABLE_LRU,
|
||||
MSG_DIRTY_LRU,
|
||||
MSG_CLEAN_LRU,
|
||||
MSG_TRUNCATED_LRU,
|
||||
MSG_BUDDY,
|
||||
MSG_BUDDY_2ND,
|
||||
MSG_UNKNOWN,
|
||||
};
|
||||
|
||||
static const char * const action_page_types[] = {
|
||||
[MSG_KERNEL] = "reserved kernel page",
|
||||
[MSG_KERNEL_HIGH_ORDER] = "high-order kernel page",
|
||||
[MSG_SLAB] = "kernel slab page",
|
||||
[MSG_DIFFERENT_COMPOUND] = "different compound page after locking",
|
||||
[MSG_POISONED_HUGE] = "huge page already hardware poisoned",
|
||||
[MSG_HUGE] = "huge page",
|
||||
[MSG_FREE_HUGE] = "free huge page",
|
||||
[MSG_UNMAP_FAILED] = "unmapping failed page",
|
||||
[MSG_DIRTY_SWAPCACHE] = "dirty swapcache page",
|
||||
[MSG_CLEAN_SWAPCACHE] = "clean swapcache page",
|
||||
[MSG_DIRTY_MLOCKED_LRU] = "dirty mlocked LRU page",
|
||||
[MSG_CLEAN_MLOCKED_LRU] = "clean mlocked LRU page",
|
||||
[MSG_DIRTY_UNEVICTABLE_LRU] = "dirty unevictable LRU page",
|
||||
[MSG_CLEAN_UNEVICTABLE_LRU] = "clean unevictable LRU page",
|
||||
[MSG_DIRTY_LRU] = "dirty LRU page",
|
||||
[MSG_CLEAN_LRU] = "clean LRU page",
|
||||
[MSG_TRUNCATED_LRU] = "already truncated LRU page",
|
||||
[MSG_BUDDY] = "free buddy page",
|
||||
[MSG_BUDDY_2ND] = "free buddy page (2nd try)",
|
||||
[MSG_UNKNOWN] = "unknown page",
|
||||
};
|
||||
|
||||
/*
|
||||
* XXX: It is possible that a page is isolated from LRU cache,
|
||||
* and then kept in swap cache or failed to remove from page cache.
|
||||
|
@ -777,10 +823,10 @@ static int me_huge_page(struct page *p, unsigned long pfn)
|
|||
static struct page_state {
|
||||
unsigned long mask;
|
||||
unsigned long res;
|
||||
char *msg;
|
||||
enum action_page_type type;
|
||||
int (*action)(struct page *p, unsigned long pfn);
|
||||
} error_states[] = {
|
||||
{ reserved, reserved, "reserved kernel", me_kernel },
|
||||
{ reserved, reserved, MSG_KERNEL, me_kernel },
|
||||
/*
|
||||
* free pages are specially detected outside this table:
|
||||
* PG_buddy pages only make a small fraction of all free pages.
|
||||
|
@ -791,31 +837,31 @@ static struct page_state {
|
|||
* currently unused objects without touching them. But just
|
||||
* treat it as standard kernel for now.
|
||||
*/
|
||||
{ slab, slab, "kernel slab", me_kernel },
|
||||
{ slab, slab, MSG_SLAB, me_kernel },
|
||||
|
||||
#ifdef CONFIG_PAGEFLAGS_EXTENDED
|
||||
{ head, head, "huge", me_huge_page },
|
||||
{ tail, tail, "huge", me_huge_page },
|
||||
{ head, head, MSG_HUGE, me_huge_page },
|
||||
{ tail, tail, MSG_HUGE, me_huge_page },
|
||||
#else
|
||||
{ compound, compound, "huge", me_huge_page },
|
||||
{ compound, compound, MSG_HUGE, me_huge_page },
|
||||
#endif
|
||||
|
||||
{ sc|dirty, sc|dirty, "dirty swapcache", me_swapcache_dirty },
|
||||
{ sc|dirty, sc, "clean swapcache", me_swapcache_clean },
|
||||
{ sc|dirty, sc|dirty, MSG_DIRTY_SWAPCACHE, me_swapcache_dirty },
|
||||
{ sc|dirty, sc, MSG_CLEAN_SWAPCACHE, me_swapcache_clean },
|
||||
|
||||
{ mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty },
|
||||
{ mlock|dirty, mlock, "clean mlocked LRU", me_pagecache_clean },
|
||||
{ mlock|dirty, mlock|dirty, MSG_DIRTY_MLOCKED_LRU, me_pagecache_dirty },
|
||||
{ mlock|dirty, mlock, MSG_CLEAN_MLOCKED_LRU, me_pagecache_clean },
|
||||
|
||||
{ unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty },
|
||||
{ unevict|dirty, unevict, "clean unevictable LRU", me_pagecache_clean },
|
||||
{ unevict|dirty, unevict|dirty, MSG_DIRTY_UNEVICTABLE_LRU, me_pagecache_dirty },
|
||||
{ unevict|dirty, unevict, MSG_CLEAN_UNEVICTABLE_LRU, me_pagecache_clean },
|
||||
|
||||
{ lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty },
|
||||
{ lru|dirty, lru, "clean LRU", me_pagecache_clean },
|
||||
{ lru|dirty, lru|dirty, MSG_DIRTY_LRU, me_pagecache_dirty },
|
||||
{ lru|dirty, lru, MSG_CLEAN_LRU, me_pagecache_clean },
|
||||
|
||||
/*
|
||||
* Catchall entry: must be at end.
|
||||
*/
|
||||
{ 0, 0, "unknown page state", me_unknown },
|
||||
{ 0, 0, MSG_UNKNOWN, me_unknown },
|
||||
};
|
||||
|
||||
#undef dirty
|
||||
|
@ -835,10 +881,10 @@ static struct page_state {
|
|||
* "Dirty/Clean" indication is not 100% accurate due to the possibility of
|
||||
* setting PG_dirty outside page lock. See also comment above set_page_dirty().
|
||||
*/
|
||||
static void action_result(unsigned long pfn, char *msg, int result)
|
||||
static void action_result(unsigned long pfn, enum action_page_type type, int result)
|
||||
{
|
||||
pr_err("MCE %#lx: %s page recovery: %s\n",
|
||||
pfn, msg, action_name[result]);
|
||||
pr_err("MCE %#lx: recovery action for %s: %s\n",
|
||||
pfn, action_page_types[type], action_name[result]);
|
||||
}
|
||||
|
||||
static int page_action(struct page_state *ps, struct page *p,
|
||||
|
@ -854,11 +900,11 @@ static int page_action(struct page_state *ps, struct page *p,
|
|||
count--;
|
||||
if (count != 0) {
|
||||
printk(KERN_ERR
|
||||
"MCE %#lx: %s page still referenced by %d users\n",
|
||||
pfn, ps->msg, count);
|
||||
"MCE %#lx: %s still referenced by %d users\n",
|
||||
pfn, action_page_types[ps->type], count);
|
||||
result = FAILED;
|
||||
}
|
||||
action_result(pfn, ps->msg, result);
|
||||
action_result(pfn, ps->type, result);
|
||||
|
||||
/* Could do more checks here if page looks ok */
|
||||
/*
|
||||
|
@ -1106,7 +1152,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
|
|||
if (!(flags & MF_COUNT_INCREASED) &&
|
||||
!get_page_unless_zero(hpage)) {
|
||||
if (is_free_buddy_page(p)) {
|
||||
action_result(pfn, "free buddy", DELAYED);
|
||||
action_result(pfn, MSG_BUDDY, DELAYED);
|
||||
return 0;
|
||||
} else if (PageHuge(hpage)) {
|
||||
/*
|
||||
|
@ -1123,12 +1169,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
|
|||
}
|
||||
set_page_hwpoison_huge_page(hpage);
|
||||
res = dequeue_hwpoisoned_huge_page(hpage);
|
||||
action_result(pfn, "free huge",
|
||||
action_result(pfn, MSG_FREE_HUGE,
|
||||
res ? IGNORED : DELAYED);
|
||||
unlock_page(hpage);
|
||||
return res;
|
||||
} else {
|
||||
action_result(pfn, "high order kernel", IGNORED);
|
||||
action_result(pfn, MSG_KERNEL_HIGH_ORDER, IGNORED);
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
|
@ -1150,9 +1196,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
|
|||
*/
|
||||
if (is_free_buddy_page(p)) {
|
||||
if (flags & MF_COUNT_INCREASED)
|
||||
action_result(pfn, "free buddy", DELAYED);
|
||||
action_result(pfn, MSG_BUDDY, DELAYED);
|
||||
else
|
||||
action_result(pfn, "free buddy, 2nd try", DELAYED);
|
||||
action_result(pfn, MSG_BUDDY_2ND,
|
||||
DELAYED);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -1165,7 +1212,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
|
|||
* If this happens just bail out.
|
||||
*/
|
||||
if (compound_head(p) != hpage) {
|
||||
action_result(pfn, "different compound page after locking", IGNORED);
|
||||
action_result(pfn, MSG_DIFFERENT_COMPOUND, IGNORED);
|
||||
res = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1205,8 +1252,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
|
|||
* on the head page to show that the hugepage is hwpoisoned
|
||||
*/
|
||||
if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
|
||||
action_result(pfn, "hugepage already hardware poisoned",
|
||||
IGNORED);
|
||||
action_result(pfn, MSG_POISONED_HUGE, IGNORED);
|
||||
unlock_page(hpage);
|
||||
put_page(hpage);
|
||||
return 0;
|
||||
|
@ -1235,7 +1281,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
|
|||
*/
|
||||
if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage)
|
||||
!= SWAP_SUCCESS) {
|
||||
action_result(pfn, "unmapping failed", IGNORED);
|
||||
action_result(pfn, MSG_UNMAP_FAILED, IGNORED);
|
||||
res = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1244,7 +1290,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
|
|||
* Torn down by someone else?
|
||||
*/
|
||||
if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
|
||||
action_result(pfn, "already truncated LRU", IGNORED);
|
||||
action_result(pfn, MSG_TRUNCATED_LRU, IGNORED);
|
||||
res = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1540,8 +1586,18 @@ static int soft_offline_huge_page(struct page *page, int flags)
|
|||
}
|
||||
unlock_page(hpage);
|
||||
|
||||
/* Keep page count to indicate a given hugepage is isolated. */
|
||||
list_move(&hpage->lru, &pagelist);
|
||||
ret = isolate_huge_page(hpage, &pagelist);
|
||||
if (ret) {
|
||||
/*
|
||||
* get_any_page() and isolate_huge_page() takes a refcount each,
|
||||
* so need to drop one here.
|
||||
*/
|
||||
put_page(hpage);
|
||||
} else {
|
||||
pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
|
||||
MIGRATE_SYNC, MR_MEMORY_FAILURE);
|
||||
if (ret) {
|
||||
|
|
56
mm/memory.c
56
mm/memory.c
|
@ -690,12 +690,11 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
|
|||
/*
|
||||
* Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y
|
||||
*/
|
||||
if (vma->vm_ops)
|
||||
printk(KERN_ALERT "vma->vm_ops->fault: %pSR\n",
|
||||
vma->vm_ops->fault);
|
||||
if (vma->vm_file)
|
||||
printk(KERN_ALERT "vma->vm_file->f_op->mmap: %pSR\n",
|
||||
vma->vm_file->f_op->mmap);
|
||||
pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n",
|
||||
vma->vm_file,
|
||||
vma->vm_ops ? vma->vm_ops->fault : NULL,
|
||||
vma->vm_file ? vma->vm_file->f_op->mmap : NULL,
|
||||
mapping ? mapping->a_ops->readpage : NULL);
|
||||
dump_stack();
|
||||
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
|
||||
}
|
||||
|
@ -2181,6 +2180,42 @@ oom:
|
|||
return VM_FAULT_OOM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
|
||||
* mapping
|
||||
*/
|
||||
static int wp_pfn_shared(struct mm_struct *mm,
|
||||
struct vm_area_struct *vma, unsigned long address,
|
||||
pte_t *page_table, spinlock_t *ptl, pte_t orig_pte,
|
||||
pmd_t *pmd)
|
||||
{
|
||||
if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
|
||||
struct vm_fault vmf = {
|
||||
.page = NULL,
|
||||
.pgoff = linear_page_index(vma, address),
|
||||
.virtual_address = (void __user *)(address & PAGE_MASK),
|
||||
.flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE,
|
||||
};
|
||||
int ret;
|
||||
|
||||
pte_unmap_unlock(page_table, ptl);
|
||||
ret = vma->vm_ops->pfn_mkwrite(vma, &vmf);
|
||||
if (ret & VM_FAULT_ERROR)
|
||||
return ret;
|
||||
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
|
||||
/*
|
||||
* We might have raced with another page fault while we
|
||||
* released the pte_offset_map_lock.
|
||||
*/
|
||||
if (!pte_same(*page_table, orig_pte)) {
|
||||
pte_unmap_unlock(page_table, ptl);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte,
|
||||
NULL, 0, 0);
|
||||
}
|
||||
|
||||
static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
unsigned long address, pte_t *page_table,
|
||||
pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte,
|
||||
|
@ -2259,13 +2294,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
* VM_PFNMAP VMA.
|
||||
*
|
||||
* We should not cow pages in a shared writeable mapping.
|
||||
* Just mark the pages writable as we can't do any dirty
|
||||
* accounting on raw pfn maps.
|
||||
* Just mark the pages writable and/or call ops->pfn_mkwrite.
|
||||
*/
|
||||
if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
|
||||
(VM_WRITE|VM_SHARED))
|
||||
return wp_page_reuse(mm, vma, address, page_table, ptl,
|
||||
orig_pte, old_page, 0, 0);
|
||||
return wp_pfn_shared(mm, vma, address, page_table, ptl,
|
||||
orig_pte, pmd);
|
||||
|
||||
pte_unmap_unlock(page_table, ptl);
|
||||
return wp_page_copy(mm, vma, address, page_table, pmd,
|
||||
|
@ -2845,7 +2879,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
|
|||
struct vm_fault vmf;
|
||||
int off;
|
||||
|
||||
nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT;
|
||||
nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
|
||||
mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
|
||||
|
||||
start_addr = max(address & mask, vma->vm_start);
|
||||
|
|
|
@ -1373,7 +1373,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
|
|||
if (PageLRU(page))
|
||||
return pfn;
|
||||
if (PageHuge(page)) {
|
||||
if (is_hugepage_active(page))
|
||||
if (page_huge_active(page))
|
||||
return pfn;
|
||||
else
|
||||
pfn = round_up(pfn + 1,
|
||||
|
|
117
mm/mempool.c
117
mm/mempool.c
|
@ -6,26 +6,138 @@
|
|||
* extreme VM load.
|
||||
*
|
||||
* started by Ingo Molnar, Copyright (C) 2001
|
||||
* debugging by David Rientjes, Copyright (C) 2015
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/mempool.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/writeback.h>
|
||||
#include "slab.h"
|
||||
|
||||
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
|
||||
static void poison_error(mempool_t *pool, void *element, size_t size,
|
||||
size_t byte)
|
||||
{
|
||||
const int nr = pool->curr_nr;
|
||||
const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
|
||||
const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
|
||||
int i;
|
||||
|
||||
pr_err("BUG: mempool element poison mismatch\n");
|
||||
pr_err("Mempool %p size %zu\n", pool, size);
|
||||
pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
|
||||
for (i = start; i < end; i++)
|
||||
pr_cont("%x ", *(u8 *)(element + i));
|
||||
pr_cont("%s\n", end < size ? "..." : "");
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
static void __check_element(mempool_t *pool, void *element, size_t size)
|
||||
{
|
||||
u8 *obj = element;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
|
||||
|
||||
if (obj[i] != exp) {
|
||||
poison_error(pool, element, size, i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(obj, POISON_INUSE, size);
|
||||
}
|
||||
|
||||
static void check_element(mempool_t *pool, void *element)
|
||||
{
|
||||
/* Mempools backed by slab allocator */
|
||||
if (pool->free == mempool_free_slab || pool->free == mempool_kfree)
|
||||
__check_element(pool, element, ksize(element));
|
||||
|
||||
/* Mempools backed by page allocator */
|
||||
if (pool->free == mempool_free_pages) {
|
||||
int order = (int)(long)pool->pool_data;
|
||||
void *addr = kmap_atomic((struct page *)element);
|
||||
|
||||
__check_element(pool, addr, 1UL << (PAGE_SHIFT + order));
|
||||
kunmap_atomic(addr);
|
||||
}
|
||||
}
|
||||
|
||||
static void __poison_element(void *element, size_t size)
|
||||
{
|
||||
u8 *obj = element;
|
||||
|
||||
memset(obj, POISON_FREE, size - 1);
|
||||
obj[size - 1] = POISON_END;
|
||||
}
|
||||
|
||||
static void poison_element(mempool_t *pool, void *element)
|
||||
{
|
||||
/* Mempools backed by slab allocator */
|
||||
if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
|
||||
__poison_element(element, ksize(element));
|
||||
|
||||
/* Mempools backed by page allocator */
|
||||
if (pool->alloc == mempool_alloc_pages) {
|
||||
int order = (int)(long)pool->pool_data;
|
||||
void *addr = kmap_atomic((struct page *)element);
|
||||
|
||||
__poison_element(addr, 1UL << (PAGE_SHIFT + order));
|
||||
kunmap_atomic(addr);
|
||||
}
|
||||
}
|
||||
#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
|
||||
static inline void check_element(mempool_t *pool, void *element)
|
||||
{
|
||||
}
|
||||
static inline void poison_element(mempool_t *pool, void *element)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
|
||||
|
||||
static void kasan_poison_element(mempool_t *pool, void *element)
|
||||
{
|
||||
if (pool->alloc == mempool_alloc_slab)
|
||||
kasan_slab_free(pool->pool_data, element);
|
||||
if (pool->alloc == mempool_kmalloc)
|
||||
kasan_kfree(element);
|
||||
if (pool->alloc == mempool_alloc_pages)
|
||||
kasan_free_pages(element, (unsigned long)pool->pool_data);
|
||||
}
|
||||
|
||||
static void kasan_unpoison_element(mempool_t *pool, void *element)
|
||||
{
|
||||
if (pool->alloc == mempool_alloc_slab)
|
||||
kasan_slab_alloc(pool->pool_data, element);
|
||||
if (pool->alloc == mempool_kmalloc)
|
||||
kasan_krealloc(element, (size_t)pool->pool_data);
|
||||
if (pool->alloc == mempool_alloc_pages)
|
||||
kasan_alloc_pages(element, (unsigned long)pool->pool_data);
|
||||
}
|
||||
|
||||
static void add_element(mempool_t *pool, void *element)
|
||||
{
|
||||
BUG_ON(pool->curr_nr >= pool->min_nr);
|
||||
poison_element(pool, element);
|
||||
kasan_poison_element(pool, element);
|
||||
pool->elements[pool->curr_nr++] = element;
|
||||
}
|
||||
|
||||
static void *remove_element(mempool_t *pool)
|
||||
{
|
||||
BUG_ON(pool->curr_nr <= 0);
|
||||
return pool->elements[--pool->curr_nr];
|
||||
void *element = pool->elements[--pool->curr_nr];
|
||||
|
||||
BUG_ON(pool->curr_nr < 0);
|
||||
check_element(pool, element);
|
||||
kasan_unpoison_element(pool, element);
|
||||
return element;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -334,6 +446,7 @@ EXPORT_SYMBOL(mempool_free);
|
|||
void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
|
||||
{
|
||||
struct kmem_cache *mem = pool_data;
|
||||
VM_BUG_ON(mem->ctor);
|
||||
return kmem_cache_alloc(mem, gfp_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(mempool_alloc_slab);
|
||||
|
|
|
@ -537,7 +537,8 @@ void migrate_page_copy(struct page *newpage, struct page *page)
|
|||
* Please do not reorder this without considering how mm/ksm.c's
|
||||
* get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
|
||||
*/
|
||||
ClearPageSwapCache(page);
|
||||
if (PageSwapCache(page))
|
||||
ClearPageSwapCache(page);
|
||||
ClearPagePrivate(page);
|
||||
set_page_private(page, 0);
|
||||
|
||||
|
|
21
mm/mmap.c
21
mm/mmap.c
|
@ -1133,7 +1133,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
|
|||
* by another page fault trying to merge _that_. But that's ok: if it
|
||||
* is being set up, that automatically means that it will be a singleton
|
||||
* acceptable for merging, so we can do all of this optimistically. But
|
||||
* we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
|
||||
* we do that READ_ONCE() to make sure that we never re-load the pointer.
|
||||
*
|
||||
* IOW: that the "list_is_singular()" test on the anon_vma_chain only
|
||||
* matters for the 'stable anon_vma' case (ie the thing we want to avoid
|
||||
|
@ -1147,7 +1147,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
|
|||
static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
|
||||
{
|
||||
if (anon_vma_compatible(a, b)) {
|
||||
struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
|
||||
struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
|
||||
|
||||
if (anon_vma && list_is_singular(&old->anon_vma_chain))
|
||||
return anon_vma;
|
||||
|
@ -1551,11 +1551,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
|||
|
||||
/* Clear old maps */
|
||||
error = -ENOMEM;
|
||||
munmap_back:
|
||||
if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
|
||||
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
|
||||
&rb_parent)) {
|
||||
if (do_munmap(mm, addr, len))
|
||||
return -ENOMEM;
|
||||
goto munmap_back;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1571,7 +1570,8 @@ munmap_back:
|
|||
/*
|
||||
* Can we just expand an old mapping?
|
||||
*/
|
||||
vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
|
||||
vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
|
||||
NULL);
|
||||
if (vma)
|
||||
goto out;
|
||||
|
||||
|
@ -2100,7 +2100,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
|
|||
actual_size = size;
|
||||
if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
|
||||
actual_size -= PAGE_SIZE;
|
||||
if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
|
||||
if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
|
||||
return -ENOMEM;
|
||||
|
||||
/* mlock limit tests */
|
||||
|
@ -2108,7 +2108,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
|
|||
unsigned long locked;
|
||||
unsigned long limit;
|
||||
locked = mm->locked_vm + grow;
|
||||
limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
|
||||
limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
|
||||
limit >>= PAGE_SHIFT;
|
||||
if (locked > limit && !capable(CAP_IPC_LOCK))
|
||||
return -ENOMEM;
|
||||
|
@ -2739,11 +2739,10 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
|
|||
/*
|
||||
* Clear old maps. this also does some error checking for us
|
||||
*/
|
||||
munmap_back:
|
||||
if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
|
||||
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
|
||||
&rb_parent)) {
|
||||
if (do_munmap(mm, addr, len))
|
||||
return -ENOMEM;
|
||||
goto munmap_back;
|
||||
}
|
||||
|
||||
/* Check against address space limits *after* clearing old maps... */
|
||||
|
|
25
mm/mremap.c
25
mm/mremap.c
|
@ -345,25 +345,25 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
|
|||
struct vm_area_struct *vma = find_vma(mm, addr);
|
||||
|
||||
if (!vma || vma->vm_start > addr)
|
||||
goto Efault;
|
||||
return ERR_PTR(-EFAULT);
|
||||
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
goto Einval;
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* We can't remap across vm area boundaries */
|
||||
if (old_len > vma->vm_end - addr)
|
||||
goto Efault;
|
||||
return ERR_PTR(-EFAULT);
|
||||
|
||||
/* Need to be careful about a growing mapping */
|
||||
if (new_len > old_len) {
|
||||
unsigned long pgoff;
|
||||
|
||||
if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
|
||||
goto Efault;
|
||||
return ERR_PTR(-EFAULT);
|
||||
pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
|
||||
pgoff += vma->vm_pgoff;
|
||||
if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
|
||||
goto Einval;
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (vma->vm_flags & VM_LOCKED) {
|
||||
|
@ -372,29 +372,20 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
|
|||
lock_limit = rlimit(RLIMIT_MEMLOCK);
|
||||
locked += new_len - old_len;
|
||||
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
|
||||
goto Eagain;
|
||||
return ERR_PTR(-EAGAIN);
|
||||
}
|
||||
|
||||
if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT))
|
||||
goto Enomem;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (vma->vm_flags & VM_ACCOUNT) {
|
||||
unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
|
||||
if (security_vm_enough_memory_mm(mm, charged))
|
||||
goto Efault;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
*p = charged;
|
||||
}
|
||||
|
||||
return vma;
|
||||
|
||||
Efault: /* very odd choice for most of the cases, but... */
|
||||
return ERR_PTR(-EFAULT);
|
||||
Einval:
|
||||
return ERR_PTR(-EINVAL);
|
||||
Enomem:
|
||||
return ERR_PTR(-ENOMEM);
|
||||
Eagain:
|
||||
return ERR_PTR(-EAGAIN);
|
||||
}
|
||||
|
||||
static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
|
||||
|
|
|
@ -408,7 +408,7 @@ bool oom_killer_disabled __read_mostly;
|
|||
static DECLARE_RWSEM(oom_sem);
|
||||
|
||||
/**
|
||||
* mark_tsk_oom_victim - marks the given taks as OOM victim.
|
||||
* mark_tsk_oom_victim - marks the given task as OOM victim.
|
||||
* @tsk: task to mark
|
||||
*
|
||||
* Has to be called with oom_sem taken for read and never after
|
||||
|
|
|
@ -2228,7 +2228,8 @@ int set_page_dirty(struct page *page)
|
|||
* it will confuse readahead and make it restart the size rampup
|
||||
* process. But it's a trivial problem.
|
||||
*/
|
||||
ClearPageReclaim(page);
|
||||
if (PageReclaim(page))
|
||||
ClearPageReclaim(page);
|
||||
#ifdef CONFIG_BLOCK
|
||||
if (!spd)
|
||||
spd = __set_page_dirty_buffers;
|
||||
|
|
|
@ -1371,7 +1371,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
|
|||
int to_drain, batch;
|
||||
|
||||
local_irq_save(flags);
|
||||
batch = ACCESS_ONCE(pcp->batch);
|
||||
batch = READ_ONCE(pcp->batch);
|
||||
to_drain = min(pcp->count, batch);
|
||||
if (to_drain > 0) {
|
||||
free_pcppages_bulk(zone, to_drain, pcp);
|
||||
|
@ -1570,7 +1570,7 @@ void free_hot_cold_page(struct page *page, bool cold)
|
|||
list_add_tail(&page->lru, &pcp->lists[migratetype]);
|
||||
pcp->count++;
|
||||
if (pcp->count >= pcp->high) {
|
||||
unsigned long batch = ACCESS_ONCE(pcp->batch);
|
||||
unsigned long batch = READ_ONCE(pcp->batch);
|
||||
free_pcppages_bulk(zone, batch, pcp);
|
||||
pcp->count -= batch;
|
||||
}
|
||||
|
@ -6207,7 +6207,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
|
|||
mask <<= (BITS_PER_LONG - bitidx - 1);
|
||||
flags <<= (BITS_PER_LONG - bitidx - 1);
|
||||
|
||||
word = ACCESS_ONCE(bitmap[word_bitidx]);
|
||||
word = READ_ONCE(bitmap[word_bitidx]);
|
||||
for (;;) {
|
||||
old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
|
||||
if (word == old_word)
|
||||
|
|
|
@ -456,7 +456,7 @@ struct anon_vma *page_get_anon_vma(struct page *page)
|
|||
unsigned long anon_mapping;
|
||||
|
||||
rcu_read_lock();
|
||||
anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
|
||||
anon_mapping = (unsigned long)READ_ONCE(page->mapping);
|
||||
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
|
||||
goto out;
|
||||
if (!page_mapped(page))
|
||||
|
@ -500,14 +500,14 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page)
|
|||
unsigned long anon_mapping;
|
||||
|
||||
rcu_read_lock();
|
||||
anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
|
||||
anon_mapping = (unsigned long)READ_ONCE(page->mapping);
|
||||
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
|
||||
goto out;
|
||||
if (!page_mapped(page))
|
||||
goto out;
|
||||
|
||||
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
|
||||
root_anon_vma = ACCESS_ONCE(anon_vma->root);
|
||||
root_anon_vma = READ_ONCE(anon_vma->root);
|
||||
if (down_read_trylock(&root_anon_vma->rwsem)) {
|
||||
/*
|
||||
* If the page is still mapped, then this anon_vma is still
|
||||
|
|
|
@ -4277,7 +4277,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
|
|||
int node;
|
||||
struct page *page;
|
||||
|
||||
page = ACCESS_ONCE(c->page);
|
||||
page = READ_ONCE(c->page);
|
||||
if (!page)
|
||||
continue;
|
||||
|
||||
|
@ -4292,7 +4292,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
|
|||
total += x;
|
||||
nodes[node] += x;
|
||||
|
||||
page = ACCESS_ONCE(c->partial);
|
||||
page = READ_ONCE(c->partial);
|
||||
if (page) {
|
||||
node = page_to_nid(page);
|
||||
if (flags & SO_TOTAL)
|
||||
|
|
34
mm/swap.c
34
mm/swap.c
|
@ -31,6 +31,7 @@
|
|||
#include <linux/memcontrol.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
|
@ -42,7 +43,7 @@ int page_cluster;
|
|||
|
||||
static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
|
||||
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
|
||||
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
|
||||
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
|
||||
|
||||
/*
|
||||
* This path almost never happens for VM activity - pages are normally
|
||||
|
@ -75,7 +76,14 @@ static void __put_compound_page(struct page *page)
|
|||
{
|
||||
compound_page_dtor *dtor;
|
||||
|
||||
__page_cache_release(page);
|
||||
/*
|
||||
* __page_cache_release() is supposed to be called for thp, not for
|
||||
* hugetlb. This is because hugetlb page does never have PageLRU set
|
||||
* (it's never listed to any LRU lists) and no memcg routines should
|
||||
* be called for hugetlb (it has a separate hugetlb_cgroup.)
|
||||
*/
|
||||
if (!PageHuge(page))
|
||||
__page_cache_release(page);
|
||||
dtor = get_compound_page_dtor(page);
|
||||
(*dtor)(page);
|
||||
}
|
||||
|
@ -743,7 +751,7 @@ void lru_cache_add_active_or_unevictable(struct page *page,
|
|||
* be write it out by flusher threads as this is much more effective
|
||||
* than the single-page writeout from reclaim.
|
||||
*/
|
||||
static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
|
||||
static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
|
||||
void *arg)
|
||||
{
|
||||
int lru, file;
|
||||
|
@ -811,36 +819,36 @@ void lru_add_drain_cpu(int cpu)
|
|||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
pvec = &per_cpu(lru_deactivate_pvecs, cpu);
|
||||
pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
|
||||
if (pagevec_count(pvec))
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
|
||||
|
||||
activate_page_drain(cpu);
|
||||
}
|
||||
|
||||
/**
|
||||
* deactivate_page - forcefully deactivate a page
|
||||
* deactivate_file_page - forcefully deactivate a file page
|
||||
* @page: page to deactivate
|
||||
*
|
||||
* This function hints the VM that @page is a good reclaim candidate,
|
||||
* for example if its invalidation fails due to the page being dirty
|
||||
* or under writeback.
|
||||
*/
|
||||
void deactivate_page(struct page *page)
|
||||
void deactivate_file_page(struct page *page)
|
||||
{
|
||||
/*
|
||||
* In a workload with many unevictable page such as mprotect, unevictable
|
||||
* page deactivation for accelerating reclaim is pointless.
|
||||
* In a workload with many unevictable page such as mprotect,
|
||||
* unevictable page deactivation for accelerating reclaim is pointless.
|
||||
*/
|
||||
if (PageUnevictable(page))
|
||||
return;
|
||||
|
||||
if (likely(get_page_unless_zero(page))) {
|
||||
struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
|
||||
struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
|
||||
|
||||
if (!pagevec_add(pvec, page))
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
|
||||
put_cpu_var(lru_deactivate_pvecs);
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
|
||||
put_cpu_var(lru_deactivate_file_pvecs);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -872,7 +880,7 @@ void lru_add_drain_all(void)
|
|||
|
||||
if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
|
||||
need_activate_page_drain(cpu)) {
|
||||
INIT_WORK(work, lru_add_drain_per_cpu);
|
||||
schedule_work_on(cpu, work);
|
||||
|
|
|
@ -390,7 +390,7 @@ static unsigned long swapin_nr_pages(unsigned long offset)
|
|||
unsigned int pages, max_pages, last_ra;
|
||||
static atomic_t last_readahead_pages;
|
||||
|
||||
max_pages = 1 << ACCESS_ONCE(page_cluster);
|
||||
max_pages = 1 << READ_ONCE(page_cluster);
|
||||
if (max_pages <= 1)
|
||||
return 1;
|
||||
|
||||
|
|
|
@ -1312,7 +1312,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
|
|||
else
|
||||
continue;
|
||||
}
|
||||
count = ACCESS_ONCE(si->swap_map[i]);
|
||||
count = READ_ONCE(si->swap_map[i]);
|
||||
if (count && swap_count(count) != SWAP_MAP_BAD)
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -490,7 +490,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
|
|||
* of interest and try to speed up its reclaim.
|
||||
*/
|
||||
if (!ret)
|
||||
deactivate_page(page);
|
||||
deactivate_file_page(page);
|
||||
count += ret;
|
||||
}
|
||||
pagevec_remove_exceptionals(&pvec);
|
||||
|
|
41
mm/util.c
41
mm/util.c
|
@ -325,9 +325,37 @@ void kvfree(const void *addr)
|
|||
}
|
||||
EXPORT_SYMBOL(kvfree);
|
||||
|
||||
static inline void *__page_rmapping(struct page *page)
|
||||
{
|
||||
unsigned long mapping;
|
||||
|
||||
mapping = (unsigned long)page->mapping;
|
||||
mapping &= ~PAGE_MAPPING_FLAGS;
|
||||
|
||||
return (void *)mapping;
|
||||
}
|
||||
|
||||
/* Neutral page->mapping pointer to address_space or anon_vma or other */
|
||||
void *page_rmapping(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
return __page_rmapping(page);
|
||||
}
|
||||
|
||||
struct anon_vma *page_anon_vma(struct page *page)
|
||||
{
|
||||
unsigned long mapping;
|
||||
|
||||
page = compound_head(page);
|
||||
mapping = (unsigned long)page->mapping;
|
||||
if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
|
||||
return NULL;
|
||||
return __page_rmapping(page);
|
||||
}
|
||||
|
||||
struct address_space *page_mapping(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
unsigned long mapping;
|
||||
|
||||
/* This happens if someone calls flush_dcache_page on slab page */
|
||||
if (unlikely(PageSlab(page)))
|
||||
|
@ -337,10 +365,13 @@ struct address_space *page_mapping(struct page *page)
|
|||
swp_entry_t entry;
|
||||
|
||||
entry.val = page_private(page);
|
||||
mapping = swap_address_space(entry);
|
||||
} else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
|
||||
mapping = NULL;
|
||||
return mapping;
|
||||
return swap_address_space(entry);
|
||||
}
|
||||
|
||||
mapping = (unsigned long)page->mapping;
|
||||
if (mapping & PAGE_MAPPING_FLAGS)
|
||||
return NULL;
|
||||
return page->mapping;
|
||||
}
|
||||
|
||||
int overcommit_ratio_handler(struct ctl_table *table, int write,
|
||||
|
|
95
mm/vmalloc.c
95
mm/vmalloc.c
|
@ -765,7 +765,7 @@ struct vmap_block {
|
|||
spinlock_t lock;
|
||||
struct vmap_area *va;
|
||||
unsigned long free, dirty;
|
||||
DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
|
||||
unsigned long dirty_min, dirty_max; /*< dirty range */
|
||||
struct list_head free_list;
|
||||
struct rcu_head rcu_head;
|
||||
struct list_head purge;
|
||||
|
@ -796,13 +796,31 @@ static unsigned long addr_to_vb_idx(unsigned long addr)
|
|||
return addr;
|
||||
}
|
||||
|
||||
static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
|
||||
static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
addr = va_start + (pages_off << PAGE_SHIFT);
|
||||
BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
|
||||
return (void *)addr;
|
||||
}
|
||||
|
||||
/**
|
||||
* new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
|
||||
* block. Of course pages number can't exceed VMAP_BBMAP_BITS
|
||||
* @order: how many 2^order pages should be occupied in newly allocated block
|
||||
* @gfp_mask: flags for the page level allocator
|
||||
*
|
||||
* Returns: virtual address in a newly allocated block or ERR_PTR(-errno)
|
||||
*/
|
||||
static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
|
||||
{
|
||||
struct vmap_block_queue *vbq;
|
||||
struct vmap_block *vb;
|
||||
struct vmap_area *va;
|
||||
unsigned long vb_idx;
|
||||
int node, err;
|
||||
void *vaddr;
|
||||
|
||||
node = numa_node_id();
|
||||
|
||||
|
@ -826,11 +844,15 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
vaddr = vmap_block_vaddr(va->va_start, 0);
|
||||
spin_lock_init(&vb->lock);
|
||||
vb->va = va;
|
||||
vb->free = VMAP_BBMAP_BITS;
|
||||
/* At least something should be left free */
|
||||
BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
|
||||
vb->free = VMAP_BBMAP_BITS - (1UL << order);
|
||||
vb->dirty = 0;
|
||||
bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
|
||||
vb->dirty_min = VMAP_BBMAP_BITS;
|
||||
vb->dirty_max = 0;
|
||||
INIT_LIST_HEAD(&vb->free_list);
|
||||
|
||||
vb_idx = addr_to_vb_idx(va->va_start);
|
||||
|
@ -842,11 +864,11 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
|
|||
|
||||
vbq = &get_cpu_var(vmap_block_queue);
|
||||
spin_lock(&vbq->lock);
|
||||
list_add_rcu(&vb->free_list, &vbq->free);
|
||||
list_add_tail_rcu(&vb->free_list, &vbq->free);
|
||||
spin_unlock(&vbq->lock);
|
||||
put_cpu_var(vmap_block_queue);
|
||||
|
||||
return vb;
|
||||
return vaddr;
|
||||
}
|
||||
|
||||
static void free_vmap_block(struct vmap_block *vb)
|
||||
|
@ -881,7 +903,8 @@ static void purge_fragmented_blocks(int cpu)
|
|||
if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
|
||||
vb->free = 0; /* prevent further allocs after releasing lock */
|
||||
vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
|
||||
bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
|
||||
vb->dirty_min = 0;
|
||||
vb->dirty_max = VMAP_BBMAP_BITS;
|
||||
spin_lock(&vbq->lock);
|
||||
list_del_rcu(&vb->free_list);
|
||||
spin_unlock(&vbq->lock);
|
||||
|
@ -910,7 +933,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
|
|||
{
|
||||
struct vmap_block_queue *vbq;
|
||||
struct vmap_block *vb;
|
||||
unsigned long addr = 0;
|
||||
void *vaddr = NULL;
|
||||
unsigned int order;
|
||||
|
||||
BUG_ON(size & ~PAGE_MASK);
|
||||
|
@ -925,43 +948,38 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
|
|||
}
|
||||
order = get_order(size);
|
||||
|
||||
again:
|
||||
rcu_read_lock();
|
||||
vbq = &get_cpu_var(vmap_block_queue);
|
||||
list_for_each_entry_rcu(vb, &vbq->free, free_list) {
|
||||
int i;
|
||||
unsigned long pages_off;
|
||||
|
||||
spin_lock(&vb->lock);
|
||||
if (vb->free < 1UL << order)
|
||||
goto next;
|
||||
if (vb->free < (1UL << order)) {
|
||||
spin_unlock(&vb->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
i = VMAP_BBMAP_BITS - vb->free;
|
||||
addr = vb->va->va_start + (i << PAGE_SHIFT);
|
||||
BUG_ON(addr_to_vb_idx(addr) !=
|
||||
addr_to_vb_idx(vb->va->va_start));
|
||||
pages_off = VMAP_BBMAP_BITS - vb->free;
|
||||
vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
|
||||
vb->free -= 1UL << order;
|
||||
if (vb->free == 0) {
|
||||
spin_lock(&vbq->lock);
|
||||
list_del_rcu(&vb->free_list);
|
||||
spin_unlock(&vbq->lock);
|
||||
}
|
||||
|
||||
spin_unlock(&vb->lock);
|
||||
break;
|
||||
next:
|
||||
spin_unlock(&vb->lock);
|
||||
}
|
||||
|
||||
put_cpu_var(vmap_block_queue);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!addr) {
|
||||
vb = new_vmap_block(gfp_mask);
|
||||
if (IS_ERR(vb))
|
||||
return vb;
|
||||
goto again;
|
||||
}
|
||||
/* Allocate new block if nothing was found */
|
||||
if (!vaddr)
|
||||
vaddr = new_vmap_block(order, gfp_mask);
|
||||
|
||||
return (void *)addr;
|
||||
return vaddr;
|
||||
}
|
||||
|
||||
static void vb_free(const void *addr, unsigned long size)
|
||||
|
@ -979,6 +997,7 @@ static void vb_free(const void *addr, unsigned long size)
|
|||
order = get_order(size);
|
||||
|
||||
offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
|
||||
offset >>= PAGE_SHIFT;
|
||||
|
||||
vb_idx = addr_to_vb_idx((unsigned long)addr);
|
||||
rcu_read_lock();
|
||||
|
@ -989,7 +1008,10 @@ static void vb_free(const void *addr, unsigned long size)
|
|||
vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
|
||||
|
||||
spin_lock(&vb->lock);
|
||||
BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
|
||||
|
||||
/* Expand dirty range */
|
||||
vb->dirty_min = min(vb->dirty_min, offset);
|
||||
vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
|
||||
|
||||
vb->dirty += 1UL << order;
|
||||
if (vb->dirty == VMAP_BBMAP_BITS) {
|
||||
|
@ -1028,25 +1050,18 @@ void vm_unmap_aliases(void)
|
|||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(vb, &vbq->free, free_list) {
|
||||
int i, j;
|
||||
|
||||
spin_lock(&vb->lock);
|
||||
i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
|
||||
if (i < VMAP_BBMAP_BITS) {
|
||||
if (vb->dirty) {
|
||||
unsigned long va_start = vb->va->va_start;
|
||||
unsigned long s, e;
|
||||
|
||||
j = find_last_bit(vb->dirty_map,
|
||||
VMAP_BBMAP_BITS);
|
||||
j = j + 1; /* need exclusive index */
|
||||
s = va_start + (vb->dirty_min << PAGE_SHIFT);
|
||||
e = va_start + (vb->dirty_max << PAGE_SHIFT);
|
||||
|
||||
start = min(s, start);
|
||||
end = max(e, end);
|
||||
|
||||
s = vb->va->va_start + (i << PAGE_SHIFT);
|
||||
e = vb->va->va_start + (j << PAGE_SHIFT);
|
||||
flush = 1;
|
||||
|
||||
if (s < start)
|
||||
start = s;
|
||||
if (e > end)
|
||||
end = e;
|
||||
}
|
||||
spin_unlock(&vb->lock);
|
||||
}
|
||||
|
|
989
mm/zsmalloc.c
989
mm/zsmalloc.c
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue