Merge branch 'akpm' (patches from Andrew)

Merge second patchbomb from Andrew Morton:

 - the rest of MM

 - various misc bits

 - add ability to run /sbin/reboot at reboot time

 - printk/vsprintf changes

 - fiddle with seq_printf() return value

* akpm: (114 commits)
  parisc: remove use of seq_printf return value
  lru_cache: remove use of seq_printf return value
  tracing: remove use of seq_printf return value
  cgroup: remove use of seq_printf return value
  proc: remove use of seq_printf return value
  s390: remove use of seq_printf return value
  cris fasttimer: remove use of seq_printf return value
  cris: remove use of seq_printf return value
  openrisc: remove use of seq_printf return value
  ARM: plat-pxa: remove use of seq_printf return value
  nios2: cpuinfo: remove use of seq_printf return value
  microblaze: mb: remove use of seq_printf return value
  ipc: remove use of seq_printf return value
  rtc: remove use of seq_printf return value
  power: wakeup: remove use of seq_printf return value
  x86: mtrr: if: remove use of seq_printf return value
  linux/bitmap.h: improve BITMAP_{LAST,FIRST}_WORD_MASK
  MAINTAINERS: CREDITS: remove Stefano Brivio from B43
  .mailmap: add Ricardo Ribalda
  CREDITS: add Ricardo Ribalda Delgado
  ...
This commit is contained in:
Linus Torvalds 2015-04-15 16:39:15 -07:00
commit eea3a00264
136 changed files with 3273 additions and 1808 deletions

View file

@ -23,6 +23,7 @@
# define DEBUG
#endif
#endif
#define CREATE_TRACE_POINTS
#include <linux/memblock.h>
#include <linux/err.h>
@ -34,6 +35,7 @@
#include <linux/cma.h>
#include <linux/highmem.h>
#include <linux/io.h>
#include <trace/events/cma.h>
#include "cma.h"
@ -414,6 +416,8 @@ struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
start = bitmap_no + mask + 1;
}
trace_cma_alloc(page ? pfn : -1UL, page, count, align);
pr_debug("%s(): returned %p\n", __func__, page);
return page;
}
@ -446,6 +450,7 @@ bool cma_release(struct cma *cma, const struct page *pages, unsigned int count)
free_contig_range(pfn, count);
cma_clear_bitmap(cma, pfn, count);
trace_cma_release(pfn, pages, count);
return true;
}

View file

@ -30,9 +30,44 @@ static int cma_debugfs_get(void *data, u64 *val)
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(cma_debugfs_fops, cma_debugfs_get, NULL, "%llu\n");
static int cma_used_get(void *data, u64 *val)
{
struct cma *cma = data;
unsigned long used;
mutex_lock(&cma->lock);
/* pages counter is smaller than sizeof(int) */
used = bitmap_weight(cma->bitmap, (int)cma->count);
mutex_unlock(&cma->lock);
*val = (u64)used << cma->order_per_bit;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(cma_used_fops, cma_used_get, NULL, "%llu\n");
static int cma_maxchunk_get(void *data, u64 *val)
{
struct cma *cma = data;
unsigned long maxchunk = 0;
unsigned long start, end = 0;
mutex_lock(&cma->lock);
for (;;) {
start = find_next_zero_bit(cma->bitmap, cma->count, end);
if (start >= cma->count)
break;
end = find_next_bit(cma->bitmap, cma->count, start);
maxchunk = max(end - start, maxchunk);
}
mutex_unlock(&cma->lock);
*val = (u64)maxchunk << cma->order_per_bit;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(cma_maxchunk_fops, cma_maxchunk_get, NULL, "%llu\n");
static void cma_add_to_cma_mem_list(struct cma *cma, struct cma_mem *mem)
{
spin_lock(&cma->mem_head_lock);
@ -91,7 +126,6 @@ static int cma_free_write(void *data, u64 val)
return cma_free_mem(cma, pages);
}
DEFINE_SIMPLE_ATTRIBUTE(cma_free_fops, NULL, cma_free_write, "%llu\n");
static int cma_alloc_mem(struct cma *cma, int count)
@ -124,7 +158,6 @@ static int cma_alloc_write(void *data, u64 val)
return cma_alloc_mem(cma, pages);
}
DEFINE_SIMPLE_ATTRIBUTE(cma_alloc_fops, NULL, cma_alloc_write, "%llu\n");
static void cma_debugfs_add_one(struct cma *cma, int idx)
@ -149,6 +182,8 @@ static void cma_debugfs_add_one(struct cma *cma, int idx)
&cma->count, &cma_debugfs_fops);
debugfs_create_file("order_per_bit", S_IRUGO, tmp,
&cma->order_per_bit, &cma_debugfs_fops);
debugfs_create_file("used", S_IRUGO, tmp, cma, &cma_used_fops);
debugfs_create_file("maxchunk", S_IRUGO, tmp, cma, &cma_maxchunk_fops);
u32s = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32));
debugfs_create_u32_array("bitmap", S_IRUGO, tmp, (u32*)cma->bitmap, u32s);

View file

@ -391,28 +391,6 @@ static inline bool compact_should_abort(struct compact_control *cc)
return false;
}
/* Returns true if the page is within a block suitable for migration to */
static bool suitable_migration_target(struct page *page)
{
/* If the page is a large free page, then disallow migration */
if (PageBuddy(page)) {
/*
* We are checking page_order without zone->lock taken. But
* the only small danger is that we skip a potentially suitable
* pageblock, so it's not worth to check order for valid range.
*/
if (page_order_unsafe(page) >= pageblock_order)
return false;
}
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
if (migrate_async_suitable(get_pageblock_migratetype(page)))
return true;
/* Otherwise skip the block */
return false;
}
/*
* Isolate free pages onto a private freelist. If @strict is true, will abort
* returning 0 on any invalid PFNs or non-free pages inside of the pageblock
@ -896,6 +874,29 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
#endif /* CONFIG_COMPACTION || CONFIG_CMA */
#ifdef CONFIG_COMPACTION
/* Returns true if the page is within a block suitable for migration to */
static bool suitable_migration_target(struct page *page)
{
/* If the page is a large free page, then disallow migration */
if (PageBuddy(page)) {
/*
* We are checking page_order without zone->lock taken. But
* the only small danger is that we skip a potentially suitable
* pageblock, so it's not worth to check order for valid range.
*/
if (page_order_unsafe(page) >= pageblock_order)
return false;
}
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
if (migrate_async_suitable(get_pageblock_migratetype(page)))
return true;
/* Otherwise skip the block */
return false;
}
/*
* Based on information in the current compact_control, find blocks
* suitable for isolating free pages from and then isolate them.
@ -1046,6 +1047,12 @@ typedef enum {
ISOLATE_SUCCESS, /* Pages isolated, migrate */
} isolate_migrate_t;
/*
* Allow userspace to control policy on scanning the unevictable LRU for
* compactable pages.
*/
int sysctl_compact_unevictable_allowed __read_mostly = 1;
/*
* Isolate all pages that can be migrated from the first suitable block,
* starting at the block pointed to by the migrate scanner pfn within
@ -1057,6 +1064,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
unsigned long low_pfn, end_pfn;
struct page *page;
const isolate_mode_t isolate_mode =
(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
(cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0);
/*
@ -1598,6 +1606,14 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
INIT_LIST_HEAD(&cc->freepages);
INIT_LIST_HEAD(&cc->migratepages);
/*
* When called via /proc/sys/vm/compact_memory
* this makes sure we compact the whole zone regardless of
* cached scanner positions.
*/
if (cc->order == -1)
__reset_isolation_suitable(zone);
if (cc->order == -1 || !compaction_deferred(zone, cc->order))
compact_zone(zone, cc);

View file

@ -1019,7 +1019,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
*
* for an example see gup_get_pte in arch/x86/mm/gup.c
*/
pte_t pte = ACCESS_ONCE(*ptep);
pte_t pte = READ_ONCE(*ptep);
struct page *page;
/*
@ -1309,7 +1309,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
pgd_t pgd = ACCESS_ONCE(*pgdp);
pgd_t pgd = READ_ONCE(*pgdp);
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))

View file

@ -67,6 +67,7 @@ static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
static int khugepaged(void *none);
static int khugepaged_slab_init(void);
static void khugepaged_slab_exit(void);
#define MM_SLOTS_HASH_BITS 10
static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
@ -109,9 +110,6 @@ static int set_recommended_min_free_kbytes(void)
int nr_zones = 0;
unsigned long recommended_min;
if (!khugepaged_enabled())
return 0;
for_each_populated_zone(zone)
nr_zones++;
@ -143,9 +141,8 @@ static int set_recommended_min_free_kbytes(void)
setup_per_zone_wmarks();
return 0;
}
late_initcall(set_recommended_min_free_kbytes);
static int start_khugepaged(void)
static int start_stop_khugepaged(void)
{
int err = 0;
if (khugepaged_enabled()) {
@ -156,6 +153,7 @@ static int start_khugepaged(void)
pr_err("khugepaged: kthread_run(khugepaged) failed\n");
err = PTR_ERR(khugepaged_thread);
khugepaged_thread = NULL;
goto fail;
}
if (!list_empty(&khugepaged_scan.mm_head))
@ -166,7 +164,7 @@ static int start_khugepaged(void)
kthread_stop(khugepaged_thread);
khugepaged_thread = NULL;
}
fail:
return err;
}
@ -183,7 +181,7 @@ static struct page *get_huge_zero_page(void)
struct page *zero_page;
retry:
if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
return ACCESS_ONCE(huge_zero_page);
return READ_ONCE(huge_zero_page);
zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
HPAGE_PMD_ORDER);
@ -202,7 +200,7 @@ retry:
/* We take additional reference here. It will be put back by shrinker */
atomic_set(&huge_zero_refcount, 2);
preempt_enable();
return ACCESS_ONCE(huge_zero_page);
return READ_ONCE(huge_zero_page);
}
static void put_huge_zero_page(void)
@ -300,7 +298,7 @@ static ssize_t enabled_store(struct kobject *kobj,
int err;
mutex_lock(&khugepaged_mutex);
err = start_khugepaged();
err = start_stop_khugepaged();
mutex_unlock(&khugepaged_mutex);
if (err)
@ -634,27 +632,38 @@ static int __init hugepage_init(void)
err = hugepage_init_sysfs(&hugepage_kobj);
if (err)
return err;
goto err_sysfs;
err = khugepaged_slab_init();
if (err)
goto out;
goto err_slab;
register_shrinker(&huge_zero_page_shrinker);
err = register_shrinker(&huge_zero_page_shrinker);
if (err)
goto err_hzp_shrinker;
/*
* By default disable transparent hugepages on smaller systems,
* where the extra memory used could hurt more than TLB overhead
* is likely to save. The admin can still enable it through /sys.
*/
if (totalram_pages < (512 << (20 - PAGE_SHIFT)))
if (totalram_pages < (512 << (20 - PAGE_SHIFT))) {
transparent_hugepage_flags = 0;
return 0;
}
start_khugepaged();
err = start_stop_khugepaged();
if (err)
goto err_khugepaged;
return 0;
out:
err_khugepaged:
unregister_shrinker(&huge_zero_page_shrinker);
err_hzp_shrinker:
khugepaged_slab_exit();
err_slab:
hugepage_exit_sysfs(hugepage_kobj);
err_sysfs:
return err;
}
subsys_initcall(hugepage_init);
@ -708,7 +717,7 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long haddr, pmd_t *pmd,
struct page *page)
struct page *page, gfp_t gfp)
{
struct mem_cgroup *memcg;
pgtable_t pgtable;
@ -716,7 +725,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
VM_BUG_ON_PAGE(!PageCompound(page), page);
if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg))
if (mem_cgroup_try_charge(page, mm, gfp, &memcg))
return VM_FAULT_OOM;
pgtable = pte_alloc_one(mm, haddr);
@ -822,7 +831,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) {
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@ -1080,6 +1089,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long haddr;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
gfp_t huge_gfp; /* for allocation and charge */
ptl = pmd_lockptr(mm, pmd);
VM_BUG_ON_VMA(!vma->anon_vma, vma);
@ -1106,10 +1116,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
alloc:
if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow()) {
gfp_t gfp;
gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
huge_gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
} else
new_page = NULL;
@ -1130,8 +1138,7 @@ alloc:
goto out;
}
if (unlikely(mem_cgroup_try_charge(new_page, mm,
GFP_TRANSHUGE, &memcg))) {
if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg))) {
put_page(new_page);
if (page) {
split_huge_page(page);
@ -1976,6 +1983,11 @@ static int __init khugepaged_slab_init(void)
return 0;
}
static void __init khugepaged_slab_exit(void)
{
kmem_cache_destroy(mm_slot_cache);
}
static inline struct mm_slot *alloc_mm_slot(void)
{
if (!mm_slot_cache) /* initialization failed */
@ -2323,19 +2335,13 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
return true;
}
static struct page
*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm,
static struct page *
khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
int node)
{
gfp_t flags;
VM_BUG_ON_PAGE(*hpage, *hpage);
/* Only allocate from the target node */
flags = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
__GFP_THISNODE;
/*
* Before allocating the hugepage, release the mmap_sem read lock.
* The allocation can take potentially a long time if it involves
@ -2344,7 +2350,7 @@ static struct page
*/
up_read(&mm->mmap_sem);
*hpage = alloc_pages_exact_node(node, flags, HPAGE_PMD_ORDER);
*hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER);
if (unlikely(!*hpage)) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
*hpage = ERR_PTR(-ENOMEM);
@ -2397,13 +2403,14 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
return true;
}
static struct page
*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm,
static struct page *
khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
int node)
{
up_read(&mm->mmap_sem);
VM_BUG_ON(!*hpage);
return *hpage;
}
#endif
@ -2438,16 +2445,21 @@ static void collapse_huge_page(struct mm_struct *mm,
struct mem_cgroup *memcg;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
gfp_t gfp;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
/* Only allocate from the target node */
gfp = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
__GFP_THISNODE;
/* release the mmap_sem read lock. */
new_page = khugepaged_alloc_page(hpage, mm, vma, address, node);
new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node);
if (!new_page)
return;
if (unlikely(mem_cgroup_try_charge(new_page, mm,
GFP_TRANSHUGE, &memcg)))
gfp, &memcg)))
return;
/*

View file

@ -61,6 +61,9 @@ DEFINE_SPINLOCK(hugetlb_lock);
static int num_fault_mutexes;
static struct mutex *htlb_fault_mutex_table ____cacheline_aligned_in_smp;
/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
{
bool free = (spool->count == 0) && (spool->used_hpages == 0);
@ -68,23 +71,36 @@ static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
spin_unlock(&spool->lock);
/* If no pages are used, and no other handles to the subpool
* remain, free the subpool the subpool remain */
if (free)
* remain, give up any reservations mased on minimum size and
* free the subpool */
if (free) {
if (spool->min_hpages != -1)
hugetlb_acct_memory(spool->hstate,
-spool->min_hpages);
kfree(spool);
}
}
struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
long min_hpages)
{
struct hugepage_subpool *spool;
spool = kmalloc(sizeof(*spool), GFP_KERNEL);
spool = kzalloc(sizeof(*spool), GFP_KERNEL);
if (!spool)
return NULL;
spin_lock_init(&spool->lock);
spool->count = 1;
spool->max_hpages = nr_blocks;
spool->used_hpages = 0;
spool->max_hpages = max_hpages;
spool->hstate = h;
spool->min_hpages = min_hpages;
if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages)) {
kfree(spool);
return NULL;
}
spool->rsv_hpages = min_hpages;
return spool;
}
@ -97,36 +113,89 @@ void hugepage_put_subpool(struct hugepage_subpool *spool)
unlock_or_release_subpool(spool);
}
static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
/*
* Subpool accounting for allocating and reserving pages.
* Return -ENOMEM if there are not enough resources to satisfy the
* the request. Otherwise, return the number of pages by which the
* global pools must be adjusted (upward). The returned value may
* only be different than the passed value (delta) in the case where
* a subpool minimum size must be manitained.
*/
static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
long delta)
{
int ret = 0;
long ret = delta;
if (!spool)
return 0;
return ret;
spin_lock(&spool->lock);
if ((spool->used_hpages + delta) <= spool->max_hpages) {
spool->used_hpages += delta;
} else {
ret = -ENOMEM;
}
spin_unlock(&spool->lock);
if (spool->max_hpages != -1) { /* maximum size accounting */
if ((spool->used_hpages + delta) <= spool->max_hpages)
spool->used_hpages += delta;
else {
ret = -ENOMEM;
goto unlock_ret;
}
}
if (spool->min_hpages != -1) { /* minimum size accounting */
if (delta > spool->rsv_hpages) {
/*
* Asking for more reserves than those already taken on
* behalf of subpool. Return difference.
*/
ret = delta - spool->rsv_hpages;
spool->rsv_hpages = 0;
} else {
ret = 0; /* reserves already accounted for */
spool->rsv_hpages -= delta;
}
}
unlock_ret:
spin_unlock(&spool->lock);
return ret;
}
static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
/*
* Subpool accounting for freeing and unreserving pages.
* Return the number of global page reservations that must be dropped.
* The return value may only be different than the passed value (delta)
* in the case where a subpool minimum size must be maintained.
*/
static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
long delta)
{
long ret = delta;
if (!spool)
return;
return delta;
spin_lock(&spool->lock);
spool->used_hpages -= delta;
/* If hugetlbfs_put_super couldn't free spool due to
* an outstanding quota reference, free it now. */
if (spool->max_hpages != -1) /* maximum size accounting */
spool->used_hpages -= delta;
if (spool->min_hpages != -1) { /* minimum size accounting */
if (spool->rsv_hpages + delta <= spool->min_hpages)
ret = 0;
else
ret = spool->rsv_hpages + delta - spool->min_hpages;
spool->rsv_hpages += delta;
if (spool->rsv_hpages > spool->min_hpages)
spool->rsv_hpages = spool->min_hpages;
}
/*
* If hugetlbfs_put_super couldn't free spool due to an outstanding
* quota reference, free it now.
*/
unlock_or_release_subpool(spool);
return ret;
}
static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
@ -855,6 +924,31 @@ struct hstate *size_to_hstate(unsigned long size)
return NULL;
}
/*
* Test to determine whether the hugepage is "active/in-use" (i.e. being linked
* to hstate->hugepage_activelist.)
*
* This function can be called for tail pages, but never returns true for them.
*/
bool page_huge_active(struct page *page)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
return PageHead(page) && PagePrivate(&page[1]);
}
/* never called for tail page */
static void set_page_huge_active(struct page *page)
{
VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
SetPagePrivate(&page[1]);
}
static void clear_page_huge_active(struct page *page)
{
VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
ClearPagePrivate(&page[1]);
}
void free_huge_page(struct page *page)
{
/*
@ -874,7 +968,16 @@ void free_huge_page(struct page *page)
restore_reserve = PagePrivate(page);
ClearPagePrivate(page);
/*
* A return code of zero implies that the subpool will be under its
* minimum size if the reservation is not restored after page is free.
* Therefore, force restore_reserve operation.
*/
if (hugepage_subpool_put_pages(spool, 1) == 0)
restore_reserve = true;
spin_lock(&hugetlb_lock);
clear_page_huge_active(page);
hugetlb_cgroup_uncharge_page(hstate_index(h),
pages_per_huge_page(h), page);
if (restore_reserve)
@ -891,7 +994,6 @@ void free_huge_page(struct page *page)
enqueue_huge_page(h, page);
}
spin_unlock(&hugetlb_lock);
hugepage_subpool_put_pages(spool, 1);
}
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
@ -1386,7 +1488,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
if (chg < 0)
return ERR_PTR(-ENOMEM);
if (chg || avoid_reserve)
if (hugepage_subpool_get_pages(spool, 1))
if (hugepage_subpool_get_pages(spool, 1) < 0)
return ERR_PTR(-ENOSPC);
ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
@ -2454,6 +2556,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
struct resv_map *resv = vma_resv_map(vma);
struct hugepage_subpool *spool = subpool_vma(vma);
unsigned long reserve, start, end;
long gbl_reserve;
if (!resv || !is_vma_resv_set(vma, HPAGE_RESV_OWNER))
return;
@ -2466,8 +2569,12 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
kref_put(&resv->refs, resv_map_release);
if (reserve) {
hugetlb_acct_memory(h, -reserve);
hugepage_subpool_put_pages(spool, reserve);
/*
* Decrement reserve counts. The global reserve count may be
* adjusted if the subpool has a minimum size.
*/
gbl_reserve = hugepage_subpool_put_pages(spool, reserve);
hugetlb_acct_memory(h, -gbl_reserve);
}
}
@ -2891,6 +2998,7 @@ retry_avoidcopy:
copy_user_huge_page(new_page, old_page, address, vma,
pages_per_huge_page(h));
__SetPageUptodate(new_page);
set_page_huge_active(new_page);
mmun_start = address & huge_page_mask(h);
mmun_end = mmun_start + huge_page_size(h);
@ -3003,6 +3111,7 @@ retry:
}
clear_huge_page(page, address, pages_per_huge_page(h));
__SetPageUptodate(page);
set_page_huge_active(page);
if (vma->vm_flags & VM_MAYSHARE) {
int err;
@ -3447,6 +3556,7 @@ int hugetlb_reserve_pages(struct inode *inode,
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
long gbl_reserve;
/*
* Only apply hugepage reservation if asked. At fault time, an
@ -3483,8 +3593,13 @@ int hugetlb_reserve_pages(struct inode *inode,
goto out_err;
}
/* There must be enough pages in the subpool for the mapping */
if (hugepage_subpool_get_pages(spool, chg)) {
/*
* There must be enough pages in the subpool for the mapping. If
* the subpool has a minimum size, there may be some global
* reservations already in place (gbl_reserve).
*/
gbl_reserve = hugepage_subpool_get_pages(spool, chg);
if (gbl_reserve < 0) {
ret = -ENOSPC;
goto out_err;
}
@ -3493,9 +3608,10 @@ int hugetlb_reserve_pages(struct inode *inode,
* Check enough hugepages are available for the reservation.
* Hand the pages back to the subpool if there are not
*/
ret = hugetlb_acct_memory(h, chg);
ret = hugetlb_acct_memory(h, gbl_reserve);
if (ret < 0) {
hugepage_subpool_put_pages(spool, chg);
/* put back original number of pages, chg */
(void)hugepage_subpool_put_pages(spool, chg);
goto out_err;
}
@ -3525,6 +3641,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
struct resv_map *resv_map = inode_resv_map(inode);
long chg = 0;
struct hugepage_subpool *spool = subpool_inode(inode);
long gbl_reserve;
if (resv_map)
chg = region_truncate(resv_map, offset);
@ -3532,8 +3649,12 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
inode->i_blocks -= (blocks_per_huge_page(h) * freed);
spin_unlock(&inode->i_lock);
hugepage_subpool_put_pages(spool, (chg - freed));
hugetlb_acct_memory(h, -(chg - freed));
/*
* If the subpool has a minimum size, the number of global
* reservations to be released may be adjusted.
*/
gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed));
hugetlb_acct_memory(h, -gbl_reserve);
}
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
@ -3775,20 +3896,6 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
#ifdef CONFIG_MEMORY_FAILURE
/* Should be called in hugetlb_lock */
static int is_hugepage_on_freelist(struct page *hpage)
{
struct page *page;
struct page *tmp;
struct hstate *h = page_hstate(hpage);
int nid = page_to_nid(hpage);
list_for_each_entry_safe(page, tmp, &h->hugepage_freelists[nid], lru)
if (page == hpage)
return 1;
return 0;
}
/*
* This function is called from memory failure code.
* Assume the caller holds page lock of the head page.
@ -3800,7 +3907,11 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
int ret = -EBUSY;
spin_lock(&hugetlb_lock);
if (is_hugepage_on_freelist(hpage)) {
/*
* Just checking !page_huge_active is not enough, because that could be
* an isolated/hwpoisoned hugepage (which have >0 refcount).
*/
if (!page_huge_active(hpage) && !page_count(hpage)) {
/*
* Hwpoisoned hugepage isn't linked to activelist or freelist,
* but dangling hpage->lru can trigger list-debug warnings
@ -3820,42 +3931,27 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
bool isolate_huge_page(struct page *page, struct list_head *list)
{
bool ret = true;
VM_BUG_ON_PAGE(!PageHead(page), page);
if (!get_page_unless_zero(page))
return false;
spin_lock(&hugetlb_lock);
if (!page_huge_active(page) || !get_page_unless_zero(page)) {
ret = false;
goto unlock;
}
clear_page_huge_active(page);
list_move_tail(&page->lru, list);
unlock:
spin_unlock(&hugetlb_lock);
return true;
return ret;
}
void putback_active_hugepage(struct page *page)
{
VM_BUG_ON_PAGE(!PageHead(page), page);
spin_lock(&hugetlb_lock);
set_page_huge_active(page);
list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
spin_unlock(&hugetlb_lock);
put_page(page);
}
bool is_hugepage_active(struct page *page)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
/*
* This function can be called for a tail page because the caller,
* scan_movable_pages, scans through a given pfn-range which typically
* covers one memory block. In systems using gigantic hugepage (1GB
* for x86_64,) a hugepage is larger than a memory block, and we don't
* support migrating such large hugepages for now, so return false
* when called for tail pages.
*/
if (PageTail(page))
return false;
/*
* Refcount of a hwpoisoned hugepages is 1, but they are not active,
* so we should return false for them.
*/
if (unlikely(PageHWPoison(page)))
return false;
return page_count(page) > 0;
}

View file

@ -224,13 +224,13 @@ static inline unsigned long page_order(struct page *page)
* PageBuddy() should be checked first by the caller to minimize race window,
* and invalid values must be handled gracefully.
*
* ACCESS_ONCE is used so that if the caller assigns the result into a local
* READ_ONCE is used so that if the caller assigns the result into a local
* variable and e.g. tests it for valid range before using, the compiler cannot
* decide to remove the variable and inline the page_private(page) multiple
* times, potentially observing different values in the tests and the actual
* use of the result.
*/
#define page_order_unsafe(page) ACCESS_ONCE(page_private(page))
#define page_order_unsafe(page) READ_ONCE(page_private(page))
static inline bool is_cow_mapping(vm_flags_t flags)
{

View file

@ -389,6 +389,19 @@ void kasan_krealloc(const void *object, size_t size)
kasan_kmalloc(page->slab_cache, object, size);
}
void kasan_kfree(void *ptr)
{
struct page *page;
page = virt_to_head_page(ptr);
if (unlikely(!PageSlab(page)))
kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
KASAN_FREE_PAGE);
else
kasan_slab_free(page->slab_cache, ptr);
}
void kasan_kfree_large(const void *ptr)
{
struct page *page = virt_to_page(ptr);

View file

@ -542,7 +542,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
expected_mapping = (void *)stable_node +
(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
again:
kpfn = ACCESS_ONCE(stable_node->kpfn);
kpfn = READ_ONCE(stable_node->kpfn);
page = pfn_to_page(kpfn);
/*
@ -551,7 +551,7 @@ again:
* but on Alpha we need to be more careful.
*/
smp_read_barrier_depends();
if (ACCESS_ONCE(page->mapping) != expected_mapping)
if (READ_ONCE(page->mapping) != expected_mapping)
goto stale;
/*
@ -577,14 +577,14 @@ again:
cpu_relax();
}
if (ACCESS_ONCE(page->mapping) != expected_mapping) {
if (READ_ONCE(page->mapping) != expected_mapping) {
put_page(page);
goto stale;
}
if (lock_it) {
lock_page(page);
if (ACCESS_ONCE(page->mapping) != expected_mapping) {
if (READ_ONCE(page->mapping) != expected_mapping) {
unlock_page(page);
put_page(page);
goto stale;
@ -600,7 +600,7 @@ stale:
* before checking whether node->kpfn has been changed.
*/
smp_rmb();
if (ACCESS_ONCE(stable_node->kpfn) != kpfn)
if (READ_ONCE(stable_node->kpfn) != kpfn)
goto again;
remove_node_from_stable_tree(stable_node);
return NULL;

View file

@ -580,10 +580,24 @@ int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
return memblock_add_range(&memblock.memory, base, size, nid, 0);
}
static int __init_memblock memblock_add_region(phys_addr_t base,
phys_addr_t size,
int nid,
unsigned long flags)
{
struct memblock_type *_rgn = &memblock.memory;
memblock_dbg("memblock_add: [%#016llx-%#016llx] flags %#02lx %pF\n",
(unsigned long long)base,
(unsigned long long)base + size - 1,
flags, (void *)_RET_IP_);
return memblock_add_range(_rgn, base, size, nid, flags);
}
int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
{
return memblock_add_range(&memblock.memory, base, size,
MAX_NUMNODES, 0);
return memblock_add_region(base, size, MAX_NUMNODES, 0);
}
/**

View file

@ -259,11 +259,6 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
* page cache and RSS per cgroup. We would eventually like to provide
* statistics based on the statistics developed by Rik Van Riel for clock-pro,
* to help the administrator determine what knobs to tune.
*
* TODO: Add a water mark for the memory controller. Reclaim will begin when
* we hit the water mark. May be even add a low water mark, such that
* no reclaim occurs from a cgroup at it's low water mark, this is
* a feature that will be implemented much later in the future.
*/
struct mem_cgroup {
struct cgroup_subsys_state css;
@ -460,6 +455,12 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
return memcg->css.id;
}
/*
* A helper function to get mem_cgroup from ID. must be called under
* rcu_read_lock(). The caller is responsible for calling
* css_tryget_online() if the mem_cgroup is used for charging. (dropping
* refcnt from swap can be called against removed memcg.)
*/
static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
{
struct cgroup_subsys_state *css;
@ -673,7 +674,7 @@ static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
{
unsigned long nr_pages = page_counter_read(&memcg->memory);
unsigned long soft_limit = ACCESS_ONCE(memcg->soft_limit);
unsigned long soft_limit = READ_ONCE(memcg->soft_limit);
unsigned long excess = 0;
if (nr_pages > soft_limit)
@ -1041,7 +1042,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
goto out_unlock;
do {
pos = ACCESS_ONCE(iter->position);
pos = READ_ONCE(iter->position);
/*
* A racing update may change the position and
* put the last reference, hence css_tryget(),
@ -1358,13 +1359,13 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
unsigned long limit;
count = page_counter_read(&memcg->memory);
limit = ACCESS_ONCE(memcg->memory.limit);
limit = READ_ONCE(memcg->memory.limit);
if (count < limit)
margin = limit - count;
if (do_swap_account) {
count = page_counter_read(&memcg->memsw);
limit = ACCESS_ONCE(memcg->memsw.limit);
limit = READ_ONCE(memcg->memsw.limit);
if (count <= limit)
margin = min(margin, limit - count);
}
@ -2348,20 +2349,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
css_put_many(&memcg->css, nr_pages);
}
/*
* A helper function to get mem_cgroup from ID. must be called under
* rcu_read_lock(). The caller is responsible for calling
* css_tryget_online() if the mem_cgroup is used for charging. (dropping
* refcnt from swap can be called against removed memcg.)
*/
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
{
/* ID 0 is unused ID */
if (!id)
return NULL;
return mem_cgroup_from_id(id);
}
/*
* try_get_mem_cgroup_from_page - look up page's memcg association
* @page: the page
@ -2388,7 +2375,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
ent.val = page_private(page);
id = lookup_swap_cgroup_id(ent);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
memcg = mem_cgroup_from_id(id);
if (memcg && !css_tryget_online(&memcg->css))
memcg = NULL;
rcu_read_unlock();
@ -2650,7 +2637,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
return cachep;
memcg = get_mem_cgroup_from_mm(current->mm);
kmemcg_id = ACCESS_ONCE(memcg->kmemcg_id);
kmemcg_id = READ_ONCE(memcg->kmemcg_id);
if (kmemcg_id < 0)
goto out;
@ -5020,7 +5007,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
* tunable will only affect upcoming migrations, not the current one.
* So we need to save it, and keep it going.
*/
move_flags = ACCESS_ONCE(memcg->move_charge_at_immigrate);
move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
if (move_flags) {
struct mm_struct *mm;
struct mem_cgroup *from = mem_cgroup_from_task(p);
@ -5254,7 +5241,7 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
static int memory_low_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
unsigned long low = ACCESS_ONCE(memcg->low);
unsigned long low = READ_ONCE(memcg->low);
if (low == PAGE_COUNTER_MAX)
seq_puts(m, "max\n");
@ -5284,7 +5271,7 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
static int memory_high_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
unsigned long high = ACCESS_ONCE(memcg->high);
unsigned long high = READ_ONCE(memcg->high);
if (high == PAGE_COUNTER_MAX)
seq_puts(m, "max\n");
@ -5314,7 +5301,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
static int memory_max_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
unsigned long max = ACCESS_ONCE(memcg->memory.limit);
unsigned long max = READ_ONCE(memcg->memory.limit);
if (max == PAGE_COUNTER_MAX)
seq_puts(m, "max\n");
@ -5869,7 +5856,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
id = swap_cgroup_record(entry, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
memcg = mem_cgroup_from_id(id);
if (memcg) {
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memsw, 1);

View file

@ -521,6 +521,52 @@ static const char *action_name[] = {
[RECOVERED] = "Recovered",
};
enum action_page_type {
MSG_KERNEL,
MSG_KERNEL_HIGH_ORDER,
MSG_SLAB,
MSG_DIFFERENT_COMPOUND,
MSG_POISONED_HUGE,
MSG_HUGE,
MSG_FREE_HUGE,
MSG_UNMAP_FAILED,
MSG_DIRTY_SWAPCACHE,
MSG_CLEAN_SWAPCACHE,
MSG_DIRTY_MLOCKED_LRU,
MSG_CLEAN_MLOCKED_LRU,
MSG_DIRTY_UNEVICTABLE_LRU,
MSG_CLEAN_UNEVICTABLE_LRU,
MSG_DIRTY_LRU,
MSG_CLEAN_LRU,
MSG_TRUNCATED_LRU,
MSG_BUDDY,
MSG_BUDDY_2ND,
MSG_UNKNOWN,
};
static const char * const action_page_types[] = {
[MSG_KERNEL] = "reserved kernel page",
[MSG_KERNEL_HIGH_ORDER] = "high-order kernel page",
[MSG_SLAB] = "kernel slab page",
[MSG_DIFFERENT_COMPOUND] = "different compound page after locking",
[MSG_POISONED_HUGE] = "huge page already hardware poisoned",
[MSG_HUGE] = "huge page",
[MSG_FREE_HUGE] = "free huge page",
[MSG_UNMAP_FAILED] = "unmapping failed page",
[MSG_DIRTY_SWAPCACHE] = "dirty swapcache page",
[MSG_CLEAN_SWAPCACHE] = "clean swapcache page",
[MSG_DIRTY_MLOCKED_LRU] = "dirty mlocked LRU page",
[MSG_CLEAN_MLOCKED_LRU] = "clean mlocked LRU page",
[MSG_DIRTY_UNEVICTABLE_LRU] = "dirty unevictable LRU page",
[MSG_CLEAN_UNEVICTABLE_LRU] = "clean unevictable LRU page",
[MSG_DIRTY_LRU] = "dirty LRU page",
[MSG_CLEAN_LRU] = "clean LRU page",
[MSG_TRUNCATED_LRU] = "already truncated LRU page",
[MSG_BUDDY] = "free buddy page",
[MSG_BUDDY_2ND] = "free buddy page (2nd try)",
[MSG_UNKNOWN] = "unknown page",
};
/*
* XXX: It is possible that a page is isolated from LRU cache,
* and then kept in swap cache or failed to remove from page cache.
@ -777,10 +823,10 @@ static int me_huge_page(struct page *p, unsigned long pfn)
static struct page_state {
unsigned long mask;
unsigned long res;
char *msg;
enum action_page_type type;
int (*action)(struct page *p, unsigned long pfn);
} error_states[] = {
{ reserved, reserved, "reserved kernel", me_kernel },
{ reserved, reserved, MSG_KERNEL, me_kernel },
/*
* free pages are specially detected outside this table:
* PG_buddy pages only make a small fraction of all free pages.
@ -791,31 +837,31 @@ static struct page_state {
* currently unused objects without touching them. But just
* treat it as standard kernel for now.
*/
{ slab, slab, "kernel slab", me_kernel },
{ slab, slab, MSG_SLAB, me_kernel },
#ifdef CONFIG_PAGEFLAGS_EXTENDED
{ head, head, "huge", me_huge_page },
{ tail, tail, "huge", me_huge_page },
{ head, head, MSG_HUGE, me_huge_page },
{ tail, tail, MSG_HUGE, me_huge_page },
#else
{ compound, compound, "huge", me_huge_page },
{ compound, compound, MSG_HUGE, me_huge_page },
#endif
{ sc|dirty, sc|dirty, "dirty swapcache", me_swapcache_dirty },
{ sc|dirty, sc, "clean swapcache", me_swapcache_clean },
{ sc|dirty, sc|dirty, MSG_DIRTY_SWAPCACHE, me_swapcache_dirty },
{ sc|dirty, sc, MSG_CLEAN_SWAPCACHE, me_swapcache_clean },
{ mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty },
{ mlock|dirty, mlock, "clean mlocked LRU", me_pagecache_clean },
{ mlock|dirty, mlock|dirty, MSG_DIRTY_MLOCKED_LRU, me_pagecache_dirty },
{ mlock|dirty, mlock, MSG_CLEAN_MLOCKED_LRU, me_pagecache_clean },
{ unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty },
{ unevict|dirty, unevict, "clean unevictable LRU", me_pagecache_clean },
{ unevict|dirty, unevict|dirty, MSG_DIRTY_UNEVICTABLE_LRU, me_pagecache_dirty },
{ unevict|dirty, unevict, MSG_CLEAN_UNEVICTABLE_LRU, me_pagecache_clean },
{ lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty },
{ lru|dirty, lru, "clean LRU", me_pagecache_clean },
{ lru|dirty, lru|dirty, MSG_DIRTY_LRU, me_pagecache_dirty },
{ lru|dirty, lru, MSG_CLEAN_LRU, me_pagecache_clean },
/*
* Catchall entry: must be at end.
*/
{ 0, 0, "unknown page state", me_unknown },
{ 0, 0, MSG_UNKNOWN, me_unknown },
};
#undef dirty
@ -835,10 +881,10 @@ static struct page_state {
* "Dirty/Clean" indication is not 100% accurate due to the possibility of
* setting PG_dirty outside page lock. See also comment above set_page_dirty().
*/
static void action_result(unsigned long pfn, char *msg, int result)
static void action_result(unsigned long pfn, enum action_page_type type, int result)
{
pr_err("MCE %#lx: %s page recovery: %s\n",
pfn, msg, action_name[result]);
pr_err("MCE %#lx: recovery action for %s: %s\n",
pfn, action_page_types[type], action_name[result]);
}
static int page_action(struct page_state *ps, struct page *p,
@ -854,11 +900,11 @@ static int page_action(struct page_state *ps, struct page *p,
count--;
if (count != 0) {
printk(KERN_ERR
"MCE %#lx: %s page still referenced by %d users\n",
pfn, ps->msg, count);
"MCE %#lx: %s still referenced by %d users\n",
pfn, action_page_types[ps->type], count);
result = FAILED;
}
action_result(pfn, ps->msg, result);
action_result(pfn, ps->type, result);
/* Could do more checks here if page looks ok */
/*
@ -1106,7 +1152,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
if (!(flags & MF_COUNT_INCREASED) &&
!get_page_unless_zero(hpage)) {
if (is_free_buddy_page(p)) {
action_result(pfn, "free buddy", DELAYED);
action_result(pfn, MSG_BUDDY, DELAYED);
return 0;
} else if (PageHuge(hpage)) {
/*
@ -1123,12 +1169,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
}
set_page_hwpoison_huge_page(hpage);
res = dequeue_hwpoisoned_huge_page(hpage);
action_result(pfn, "free huge",
action_result(pfn, MSG_FREE_HUGE,
res ? IGNORED : DELAYED);
unlock_page(hpage);
return res;
} else {
action_result(pfn, "high order kernel", IGNORED);
action_result(pfn, MSG_KERNEL_HIGH_ORDER, IGNORED);
return -EBUSY;
}
}
@ -1150,9 +1196,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
*/
if (is_free_buddy_page(p)) {
if (flags & MF_COUNT_INCREASED)
action_result(pfn, "free buddy", DELAYED);
action_result(pfn, MSG_BUDDY, DELAYED);
else
action_result(pfn, "free buddy, 2nd try", DELAYED);
action_result(pfn, MSG_BUDDY_2ND,
DELAYED);
return 0;
}
}
@ -1165,7 +1212,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
* If this happens just bail out.
*/
if (compound_head(p) != hpage) {
action_result(pfn, "different compound page after locking", IGNORED);
action_result(pfn, MSG_DIFFERENT_COMPOUND, IGNORED);
res = -EBUSY;
goto out;
}
@ -1205,8 +1252,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
* on the head page to show that the hugepage is hwpoisoned
*/
if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
action_result(pfn, "hugepage already hardware poisoned",
IGNORED);
action_result(pfn, MSG_POISONED_HUGE, IGNORED);
unlock_page(hpage);
put_page(hpage);
return 0;
@ -1235,7 +1281,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
*/
if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage)
!= SWAP_SUCCESS) {
action_result(pfn, "unmapping failed", IGNORED);
action_result(pfn, MSG_UNMAP_FAILED, IGNORED);
res = -EBUSY;
goto out;
}
@ -1244,7 +1290,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
* Torn down by someone else?
*/
if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
action_result(pfn, "already truncated LRU", IGNORED);
action_result(pfn, MSG_TRUNCATED_LRU, IGNORED);
res = -EBUSY;
goto out;
}
@ -1540,8 +1586,18 @@ static int soft_offline_huge_page(struct page *page, int flags)
}
unlock_page(hpage);
/* Keep page count to indicate a given hugepage is isolated. */
list_move(&hpage->lru, &pagelist);
ret = isolate_huge_page(hpage, &pagelist);
if (ret) {
/*
* get_any_page() and isolate_huge_page() takes a refcount each,
* so need to drop one here.
*/
put_page(hpage);
} else {
pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
return -EBUSY;
}
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) {

View file

@ -690,12 +690,11 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
/*
* Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y
*/
if (vma->vm_ops)
printk(KERN_ALERT "vma->vm_ops->fault: %pSR\n",
vma->vm_ops->fault);
if (vma->vm_file)
printk(KERN_ALERT "vma->vm_file->f_op->mmap: %pSR\n",
vma->vm_file->f_op->mmap);
pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n",
vma->vm_file,
vma->vm_ops ? vma->vm_ops->fault : NULL,
vma->vm_file ? vma->vm_file->f_op->mmap : NULL,
mapping ? mapping->a_ops->readpage : NULL);
dump_stack();
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
@ -2181,6 +2180,42 @@ oom:
return VM_FAULT_OOM;
}
/*
* Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
* mapping
*/
static int wp_pfn_shared(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
pte_t *page_table, spinlock_t *ptl, pte_t orig_pte,
pmd_t *pmd)
{
if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
struct vm_fault vmf = {
.page = NULL,
.pgoff = linear_page_index(vma, address),
.virtual_address = (void __user *)(address & PAGE_MASK),
.flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE,
};
int ret;
pte_unmap_unlock(page_table, ptl);
ret = vma->vm_ops->pfn_mkwrite(vma, &vmf);
if (ret & VM_FAULT_ERROR)
return ret;
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
/*
* We might have raced with another page fault while we
* released the pte_offset_map_lock.
*/
if (!pte_same(*page_table, orig_pte)) {
pte_unmap_unlock(page_table, ptl);
return 0;
}
}
return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte,
NULL, 0, 0);
}
static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table,
pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte,
@ -2259,13 +2294,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
* VM_PFNMAP VMA.
*
* We should not cow pages in a shared writeable mapping.
* Just mark the pages writable as we can't do any dirty
* accounting on raw pfn maps.
* Just mark the pages writable and/or call ops->pfn_mkwrite.
*/
if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
(VM_WRITE|VM_SHARED))
return wp_page_reuse(mm, vma, address, page_table, ptl,
orig_pte, old_page, 0, 0);
return wp_pfn_shared(mm, vma, address, page_table, ptl,
orig_pte, pmd);
pte_unmap_unlock(page_table, ptl);
return wp_page_copy(mm, vma, address, page_table, pmd,
@ -2845,7 +2879,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
struct vm_fault vmf;
int off;
nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT;
nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
start_addr = max(address & mask, vma->vm_start);

View file

@ -1373,7 +1373,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
if (PageLRU(page))
return pfn;
if (PageHuge(page)) {
if (is_hugepage_active(page))
if (page_huge_active(page))
return pfn;
else
pfn = round_up(pfn + 1,

View file

@ -6,26 +6,138 @@
* extreme VM load.
*
* started by Ingo Molnar, Copyright (C) 2001
* debugging by David Rientjes, Copyright (C) 2015
*/
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/kasan.h>
#include <linux/kmemleak.h>
#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
#include "slab.h"
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
static void poison_error(mempool_t *pool, void *element, size_t size,
size_t byte)
{
const int nr = pool->curr_nr;
const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
int i;
pr_err("BUG: mempool element poison mismatch\n");
pr_err("Mempool %p size %zu\n", pool, size);
pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
for (i = start; i < end; i++)
pr_cont("%x ", *(u8 *)(element + i));
pr_cont("%s\n", end < size ? "..." : "");
dump_stack();
}
static void __check_element(mempool_t *pool, void *element, size_t size)
{
u8 *obj = element;
size_t i;
for (i = 0; i < size; i++) {
u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
if (obj[i] != exp) {
poison_error(pool, element, size, i);
return;
}
}
memset(obj, POISON_INUSE, size);
}
static void check_element(mempool_t *pool, void *element)
{
/* Mempools backed by slab allocator */
if (pool->free == mempool_free_slab || pool->free == mempool_kfree)
__check_element(pool, element, ksize(element));
/* Mempools backed by page allocator */
if (pool->free == mempool_free_pages) {
int order = (int)(long)pool->pool_data;
void *addr = kmap_atomic((struct page *)element);
__check_element(pool, addr, 1UL << (PAGE_SHIFT + order));
kunmap_atomic(addr);
}
}
static void __poison_element(void *element, size_t size)
{
u8 *obj = element;
memset(obj, POISON_FREE, size - 1);
obj[size - 1] = POISON_END;
}
static void poison_element(mempool_t *pool, void *element)
{
/* Mempools backed by slab allocator */
if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
__poison_element(element, ksize(element));
/* Mempools backed by page allocator */
if (pool->alloc == mempool_alloc_pages) {
int order = (int)(long)pool->pool_data;
void *addr = kmap_atomic((struct page *)element);
__poison_element(addr, 1UL << (PAGE_SHIFT + order));
kunmap_atomic(addr);
}
}
#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
static inline void check_element(mempool_t *pool, void *element)
{
}
static inline void poison_element(mempool_t *pool, void *element)
{
}
#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
static void kasan_poison_element(mempool_t *pool, void *element)
{
if (pool->alloc == mempool_alloc_slab)
kasan_slab_free(pool->pool_data, element);
if (pool->alloc == mempool_kmalloc)
kasan_kfree(element);
if (pool->alloc == mempool_alloc_pages)
kasan_free_pages(element, (unsigned long)pool->pool_data);
}
static void kasan_unpoison_element(mempool_t *pool, void *element)
{
if (pool->alloc == mempool_alloc_slab)
kasan_slab_alloc(pool->pool_data, element);
if (pool->alloc == mempool_kmalloc)
kasan_krealloc(element, (size_t)pool->pool_data);
if (pool->alloc == mempool_alloc_pages)
kasan_alloc_pages(element, (unsigned long)pool->pool_data);
}
static void add_element(mempool_t *pool, void *element)
{
BUG_ON(pool->curr_nr >= pool->min_nr);
poison_element(pool, element);
kasan_poison_element(pool, element);
pool->elements[pool->curr_nr++] = element;
}
static void *remove_element(mempool_t *pool)
{
BUG_ON(pool->curr_nr <= 0);
return pool->elements[--pool->curr_nr];
void *element = pool->elements[--pool->curr_nr];
BUG_ON(pool->curr_nr < 0);
check_element(pool, element);
kasan_unpoison_element(pool, element);
return element;
}
/**
@ -334,6 +446,7 @@ EXPORT_SYMBOL(mempool_free);
void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
{
struct kmem_cache *mem = pool_data;
VM_BUG_ON(mem->ctor);
return kmem_cache_alloc(mem, gfp_mask);
}
EXPORT_SYMBOL(mempool_alloc_slab);

View file

@ -537,7 +537,8 @@ void migrate_page_copy(struct page *newpage, struct page *page)
* Please do not reorder this without considering how mm/ksm.c's
* get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
*/
ClearPageSwapCache(page);
if (PageSwapCache(page))
ClearPageSwapCache(page);
ClearPagePrivate(page);
set_page_private(page, 0);

View file

@ -1133,7 +1133,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
* by another page fault trying to merge _that_. But that's ok: if it
* is being set up, that automatically means that it will be a singleton
* acceptable for merging, so we can do all of this optimistically. But
* we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
* we do that READ_ONCE() to make sure that we never re-load the pointer.
*
* IOW: that the "list_is_singular()" test on the anon_vma_chain only
* matters for the 'stable anon_vma' case (ie the thing we want to avoid
@ -1147,7 +1147,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
{
if (anon_vma_compatible(a, b)) {
struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
if (anon_vma && list_is_singular(&old->anon_vma_chain))
return anon_vma;
@ -1551,11 +1551,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
/* Clear old maps */
error = -ENOMEM;
munmap_back:
if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
&rb_parent)) {
if (do_munmap(mm, addr, len))
return -ENOMEM;
goto munmap_back;
}
/*
@ -1571,7 +1570,8 @@ munmap_back:
/*
* Can we just expand an old mapping?
*/
vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
NULL);
if (vma)
goto out;
@ -2100,7 +2100,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
actual_size = size;
if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
actual_size -= PAGE_SIZE;
if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
return -ENOMEM;
/* mlock limit tests */
@ -2108,7 +2108,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
unsigned long locked;
unsigned long limit;
locked = mm->locked_vm + grow;
limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
limit >>= PAGE_SHIFT;
if (locked > limit && !capable(CAP_IPC_LOCK))
return -ENOMEM;
@ -2739,11 +2739,10 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
/*
* Clear old maps. this also does some error checking for us
*/
munmap_back:
if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
&rb_parent)) {
if (do_munmap(mm, addr, len))
return -ENOMEM;
goto munmap_back;
}
/* Check against address space limits *after* clearing old maps... */

View file

@ -345,25 +345,25 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
struct vm_area_struct *vma = find_vma(mm, addr);
if (!vma || vma->vm_start > addr)
goto Efault;
return ERR_PTR(-EFAULT);
if (is_vm_hugetlb_page(vma))
goto Einval;
return ERR_PTR(-EINVAL);
/* We can't remap across vm area boundaries */
if (old_len > vma->vm_end - addr)
goto Efault;
return ERR_PTR(-EFAULT);
/* Need to be careful about a growing mapping */
if (new_len > old_len) {
unsigned long pgoff;
if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
goto Efault;
return ERR_PTR(-EFAULT);
pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
pgoff += vma->vm_pgoff;
if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
goto Einval;
return ERR_PTR(-EINVAL);
}
if (vma->vm_flags & VM_LOCKED) {
@ -372,29 +372,20 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
lock_limit = rlimit(RLIMIT_MEMLOCK);
locked += new_len - old_len;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
goto Eagain;
return ERR_PTR(-EAGAIN);
}
if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT))
goto Enomem;
return ERR_PTR(-ENOMEM);
if (vma->vm_flags & VM_ACCOUNT) {
unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
if (security_vm_enough_memory_mm(mm, charged))
goto Efault;
return ERR_PTR(-ENOMEM);
*p = charged;
}
return vma;
Efault: /* very odd choice for most of the cases, but... */
return ERR_PTR(-EFAULT);
Einval:
return ERR_PTR(-EINVAL);
Enomem:
return ERR_PTR(-ENOMEM);
Eagain:
return ERR_PTR(-EAGAIN);
}
static unsigned long mremap_to(unsigned long addr, unsigned long old_len,

View file

@ -408,7 +408,7 @@ bool oom_killer_disabled __read_mostly;
static DECLARE_RWSEM(oom_sem);
/**
* mark_tsk_oom_victim - marks the given taks as OOM victim.
* mark_tsk_oom_victim - marks the given task as OOM victim.
* @tsk: task to mark
*
* Has to be called with oom_sem taken for read and never after

View file

@ -2228,7 +2228,8 @@ int set_page_dirty(struct page *page)
* it will confuse readahead and make it restart the size rampup
* process. But it's a trivial problem.
*/
ClearPageReclaim(page);
if (PageReclaim(page))
ClearPageReclaim(page);
#ifdef CONFIG_BLOCK
if (!spd)
spd = __set_page_dirty_buffers;

View file

@ -1371,7 +1371,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
int to_drain, batch;
local_irq_save(flags);
batch = ACCESS_ONCE(pcp->batch);
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0) {
free_pcppages_bulk(zone, to_drain, pcp);
@ -1570,7 +1570,7 @@ void free_hot_cold_page(struct page *page, bool cold)
list_add_tail(&page->lru, &pcp->lists[migratetype]);
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = ACCESS_ONCE(pcp->batch);
unsigned long batch = READ_ONCE(pcp->batch);
free_pcppages_bulk(zone, batch, pcp);
pcp->count -= batch;
}
@ -6207,7 +6207,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
mask <<= (BITS_PER_LONG - bitidx - 1);
flags <<= (BITS_PER_LONG - bitidx - 1);
word = ACCESS_ONCE(bitmap[word_bitidx]);
word = READ_ONCE(bitmap[word_bitidx]);
for (;;) {
old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
if (word == old_word)

View file

@ -456,7 +456,7 @@ struct anon_vma *page_get_anon_vma(struct page *page)
unsigned long anon_mapping;
rcu_read_lock();
anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
anon_mapping = (unsigned long)READ_ONCE(page->mapping);
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
goto out;
if (!page_mapped(page))
@ -500,14 +500,14 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page)
unsigned long anon_mapping;
rcu_read_lock();
anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
anon_mapping = (unsigned long)READ_ONCE(page->mapping);
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
goto out;
if (!page_mapped(page))
goto out;
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
root_anon_vma = ACCESS_ONCE(anon_vma->root);
root_anon_vma = READ_ONCE(anon_vma->root);
if (down_read_trylock(&root_anon_vma->rwsem)) {
/*
* If the page is still mapped, then this anon_vma is still

View file

@ -4277,7 +4277,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
int node;
struct page *page;
page = ACCESS_ONCE(c->page);
page = READ_ONCE(c->page);
if (!page)
continue;
@ -4292,7 +4292,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
total += x;
nodes[node] += x;
page = ACCESS_ONCE(c->partial);
page = READ_ONCE(c->partial);
if (page) {
node = page_to_nid(page);
if (flags & SO_TOTAL)

View file

@ -31,6 +31,7 @@
#include <linux/memcontrol.h>
#include <linux/gfp.h>
#include <linux/uio.h>
#include <linux/hugetlb.h>
#include "internal.h"
@ -42,7 +43,7 @@ int page_cluster;
static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
/*
* This path almost never happens for VM activity - pages are normally
@ -75,7 +76,14 @@ static void __put_compound_page(struct page *page)
{
compound_page_dtor *dtor;
__page_cache_release(page);
/*
* __page_cache_release() is supposed to be called for thp, not for
* hugetlb. This is because hugetlb page does never have PageLRU set
* (it's never listed to any LRU lists) and no memcg routines should
* be called for hugetlb (it has a separate hugetlb_cgroup.)
*/
if (!PageHuge(page))
__page_cache_release(page);
dtor = get_compound_page_dtor(page);
(*dtor)(page);
}
@ -743,7 +751,7 @@ void lru_cache_add_active_or_unevictable(struct page *page,
* be write it out by flusher threads as this is much more effective
* than the single-page writeout from reclaim.
*/
static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
void *arg)
{
int lru, file;
@ -811,36 +819,36 @@ void lru_add_drain_cpu(int cpu)
local_irq_restore(flags);
}
pvec = &per_cpu(lru_deactivate_pvecs, cpu);
pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
if (pagevec_count(pvec))
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
activate_page_drain(cpu);
}
/**
* deactivate_page - forcefully deactivate a page
* deactivate_file_page - forcefully deactivate a file page
* @page: page to deactivate
*
* This function hints the VM that @page is a good reclaim candidate,
* for example if its invalidation fails due to the page being dirty
* or under writeback.
*/
void deactivate_page(struct page *page)
void deactivate_file_page(struct page *page)
{
/*
* In a workload with many unevictable page such as mprotect, unevictable
* page deactivation for accelerating reclaim is pointless.
* In a workload with many unevictable page such as mprotect,
* unevictable page deactivation for accelerating reclaim is pointless.
*/
if (PageUnevictable(page))
return;
if (likely(get_page_unless_zero(page))) {
struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
if (!pagevec_add(pvec, page))
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
put_cpu_var(lru_deactivate_pvecs);
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
put_cpu_var(lru_deactivate_file_pvecs);
}
}
@ -872,7 +880,7 @@ void lru_add_drain_all(void)
if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
schedule_work_on(cpu, work);

View file

@ -390,7 +390,7 @@ static unsigned long swapin_nr_pages(unsigned long offset)
unsigned int pages, max_pages, last_ra;
static atomic_t last_readahead_pages;
max_pages = 1 << ACCESS_ONCE(page_cluster);
max_pages = 1 << READ_ONCE(page_cluster);
if (max_pages <= 1)
return 1;

View file

@ -1312,7 +1312,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
else
continue;
}
count = ACCESS_ONCE(si->swap_map[i]);
count = READ_ONCE(si->swap_map[i]);
if (count && swap_count(count) != SWAP_MAP_BAD)
break;
}

View file

@ -490,7 +490,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
* of interest and try to speed up its reclaim.
*/
if (!ret)
deactivate_page(page);
deactivate_file_page(page);
count += ret;
}
pagevec_remove_exceptionals(&pvec);

View file

@ -325,9 +325,37 @@ void kvfree(const void *addr)
}
EXPORT_SYMBOL(kvfree);
static inline void *__page_rmapping(struct page *page)
{
unsigned long mapping;
mapping = (unsigned long)page->mapping;
mapping &= ~PAGE_MAPPING_FLAGS;
return (void *)mapping;
}
/* Neutral page->mapping pointer to address_space or anon_vma or other */
void *page_rmapping(struct page *page)
{
page = compound_head(page);
return __page_rmapping(page);
}
struct anon_vma *page_anon_vma(struct page *page)
{
unsigned long mapping;
page = compound_head(page);
mapping = (unsigned long)page->mapping;
if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
return NULL;
return __page_rmapping(page);
}
struct address_space *page_mapping(struct page *page)
{
struct address_space *mapping = page->mapping;
unsigned long mapping;
/* This happens if someone calls flush_dcache_page on slab page */
if (unlikely(PageSlab(page)))
@ -337,10 +365,13 @@ struct address_space *page_mapping(struct page *page)
swp_entry_t entry;
entry.val = page_private(page);
mapping = swap_address_space(entry);
} else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
mapping = NULL;
return mapping;
return swap_address_space(entry);
}
mapping = (unsigned long)page->mapping;
if (mapping & PAGE_MAPPING_FLAGS)
return NULL;
return page->mapping;
}
int overcommit_ratio_handler(struct ctl_table *table, int write,

View file

@ -765,7 +765,7 @@ struct vmap_block {
spinlock_t lock;
struct vmap_area *va;
unsigned long free, dirty;
DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
unsigned long dirty_min, dirty_max; /*< dirty range */
struct list_head free_list;
struct rcu_head rcu_head;
struct list_head purge;
@ -796,13 +796,31 @@ static unsigned long addr_to_vb_idx(unsigned long addr)
return addr;
}
static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
{
unsigned long addr;
addr = va_start + (pages_off << PAGE_SHIFT);
BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
return (void *)addr;
}
/**
* new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
* block. Of course pages number can't exceed VMAP_BBMAP_BITS
* @order: how many 2^order pages should be occupied in newly allocated block
* @gfp_mask: flags for the page level allocator
*
* Returns: virtual address in a newly allocated block or ERR_PTR(-errno)
*/
static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
{
struct vmap_block_queue *vbq;
struct vmap_block *vb;
struct vmap_area *va;
unsigned long vb_idx;
int node, err;
void *vaddr;
node = numa_node_id();
@ -826,11 +844,15 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
return ERR_PTR(err);
}
vaddr = vmap_block_vaddr(va->va_start, 0);
spin_lock_init(&vb->lock);
vb->va = va;
vb->free = VMAP_BBMAP_BITS;
/* At least something should be left free */
BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
vb->free = VMAP_BBMAP_BITS - (1UL << order);
vb->dirty = 0;
bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
vb->dirty_min = VMAP_BBMAP_BITS;
vb->dirty_max = 0;
INIT_LIST_HEAD(&vb->free_list);
vb_idx = addr_to_vb_idx(va->va_start);
@ -842,11 +864,11 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
vbq = &get_cpu_var(vmap_block_queue);
spin_lock(&vbq->lock);
list_add_rcu(&vb->free_list, &vbq->free);
list_add_tail_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
put_cpu_var(vmap_block_queue);
return vb;
return vaddr;
}
static void free_vmap_block(struct vmap_block *vb)
@ -881,7 +903,8 @@ static void purge_fragmented_blocks(int cpu)
if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
vb->free = 0; /* prevent further allocs after releasing lock */
vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
vb->dirty_min = 0;
vb->dirty_max = VMAP_BBMAP_BITS;
spin_lock(&vbq->lock);
list_del_rcu(&vb->free_list);
spin_unlock(&vbq->lock);
@ -910,7 +933,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
{
struct vmap_block_queue *vbq;
struct vmap_block *vb;
unsigned long addr = 0;
void *vaddr = NULL;
unsigned int order;
BUG_ON(size & ~PAGE_MASK);
@ -925,43 +948,38 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
}
order = get_order(size);
again:
rcu_read_lock();
vbq = &get_cpu_var(vmap_block_queue);
list_for_each_entry_rcu(vb, &vbq->free, free_list) {
int i;
unsigned long pages_off;
spin_lock(&vb->lock);
if (vb->free < 1UL << order)
goto next;
if (vb->free < (1UL << order)) {
spin_unlock(&vb->lock);
continue;
}
i = VMAP_BBMAP_BITS - vb->free;
addr = vb->va->va_start + (i << PAGE_SHIFT);
BUG_ON(addr_to_vb_idx(addr) !=
addr_to_vb_idx(vb->va->va_start));
pages_off = VMAP_BBMAP_BITS - vb->free;
vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
vb->free -= 1UL << order;
if (vb->free == 0) {
spin_lock(&vbq->lock);
list_del_rcu(&vb->free_list);
spin_unlock(&vbq->lock);
}
spin_unlock(&vb->lock);
break;
next:
spin_unlock(&vb->lock);
}
put_cpu_var(vmap_block_queue);
rcu_read_unlock();
if (!addr) {
vb = new_vmap_block(gfp_mask);
if (IS_ERR(vb))
return vb;
goto again;
}
/* Allocate new block if nothing was found */
if (!vaddr)
vaddr = new_vmap_block(order, gfp_mask);
return (void *)addr;
return vaddr;
}
static void vb_free(const void *addr, unsigned long size)
@ -979,6 +997,7 @@ static void vb_free(const void *addr, unsigned long size)
order = get_order(size);
offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
offset >>= PAGE_SHIFT;
vb_idx = addr_to_vb_idx((unsigned long)addr);
rcu_read_lock();
@ -989,7 +1008,10 @@ static void vb_free(const void *addr, unsigned long size)
vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
spin_lock(&vb->lock);
BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
/* Expand dirty range */
vb->dirty_min = min(vb->dirty_min, offset);
vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
vb->dirty += 1UL << order;
if (vb->dirty == VMAP_BBMAP_BITS) {
@ -1028,25 +1050,18 @@ void vm_unmap_aliases(void)
rcu_read_lock();
list_for_each_entry_rcu(vb, &vbq->free, free_list) {
int i, j;
spin_lock(&vb->lock);
i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
if (i < VMAP_BBMAP_BITS) {
if (vb->dirty) {
unsigned long va_start = vb->va->va_start;
unsigned long s, e;
j = find_last_bit(vb->dirty_map,
VMAP_BBMAP_BITS);
j = j + 1; /* need exclusive index */
s = va_start + (vb->dirty_min << PAGE_SHIFT);
e = va_start + (vb->dirty_max << PAGE_SHIFT);
start = min(s, start);
end = max(e, end);
s = vb->va->va_start + (i << PAGE_SHIFT);
e = vb->va->va_start + (j << PAGE_SHIFT);
flush = 1;
if (s < start)
start = s;
if (e > end)
end = e;
}
spin_unlock(&vb->lock);
}

File diff suppressed because it is too large Load diff