diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 8e58a1b73771..67df1c51a84c 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -56,8 +56,9 @@ static inline void page_ext_init(void) { } #endif - struct page_ext *lookup_page_ext(const struct page *page); +extern struct page_ext *page_ext_get(struct page *page); +extern void page_ext_put(struct page_ext *page_ext); static inline struct page_ext *page_ext_next(struct page_ext *curr) { @@ -73,11 +74,6 @@ static inline void pgdat_page_ext_init(struct pglist_data *pgdat) { } -static inline struct page_ext *lookup_page_ext(const struct page *page) -{ - return NULL; -} - static inline void page_ext_init(void) { } @@ -85,5 +81,14 @@ static inline void page_ext_init(void) static inline void page_ext_init_flatmem(void) { } + +static inline struct page_ext *page_ext_get(struct page *page) +{ + return NULL; +} + +static inline void page_ext_put(struct page_ext *page_ext) +{ +} #endif /* CONFIG_PAGE_EXTENSION */ #endif /* __LINUX_PAGE_EXT_H */ diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index d8a6aecf99cb..2a21c793d815 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -47,62 +47,77 @@ extern struct page_ext_operations page_idle_ops; static inline bool page_is_young(struct page *page) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = page_ext_get(page); + bool page_young; if (unlikely(!page_ext)) return false; - return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_young = test_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_ext_put(page_ext); + + return page_young; } static inline void set_page_young(struct page *page) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; set_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_ext_put(page_ext); } static inline bool test_and_clear_page_young(struct page *page) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = page_ext_get(page); + bool page_young; if (unlikely(!page_ext)) return false; - return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_young = test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_ext_put(page_ext); + + return page_young; } static inline bool page_is_idle(struct page *page) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = page_ext_get(page); + bool page_idle; if (unlikely(!page_ext)) return false; - return test_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_idle = test_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_ext_put(page_ext); + + return page_idle; } static inline void set_page_idle(struct page *page) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; set_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_ext_put(page_ext); } static inline void clear_page_idle(struct page *page) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; clear_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_ext_put(page_ext); } #endif /* CONFIG_64BIT */ diff --git a/mm/page_ext.c b/mm/page_ext.c index e5e31ff1adba..e5b56394e2b4 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -8,7 +8,7 @@ #include #include #include - +#include /* * struct page extension * @@ -58,6 +58,10 @@ * can utilize this callback to initialize the state of it correctly. */ +#ifdef CONFIG_SPARSEMEM +#define PAGE_EXT_INVALID (0x1) +#endif + #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) static bool need_page_idle(void) { @@ -117,6 +121,49 @@ static inline struct page_ext *get_entry(void *base, unsigned long index) return base + page_ext_size * index; } +/** + * page_ext_get() - Get the extended information for a page. + * @page: The page we're interested in. + * + * Ensures that the page_ext will remain valid until page_ext_put() + * is called. + * + * Return: NULL if no page_ext exists for this page. + * Context: Any context. Caller may not sleep until they have called + * page_ext_put(). + */ +struct page_ext *page_ext_get(struct page *page) +{ + struct page_ext *page_ext; + + rcu_read_lock(); + page_ext = lookup_page_ext(page); + if (!page_ext) { + rcu_read_unlock(); + return NULL; + } + + return page_ext; +} + +/** + * page_ext_put() - Working with page extended information is done. + * @page_ext - Page extended information received from page_ext_get(). + * + * The page extended information of the page may not be valid after this + * function is called. + * + * Return: None. + * Context: Any context with corresponding page_ext_get() is called. + */ +void page_ext_put(struct page_ext *page_ext) +{ + if (unlikely(!page_ext)) + return; + + rcu_read_unlock(); +} + #if !defined(CONFIG_SPARSEMEM) @@ -131,6 +178,7 @@ struct page_ext *lookup_page_ext(const struct page *page) unsigned long index; struct page_ext *base; + WARN_ON_ONCE(!rcu_read_lock_held()); base = NODE_DATA(page_to_nid(page))->node_page_ext; /* * The sanity checks the page allocator does upon freeing a @@ -200,20 +248,27 @@ fail: } #else /* CONFIG_FLAT_NODE_MEM_MAP */ +static bool page_ext_invalid(struct page_ext *page_ext) +{ + return !page_ext || (((unsigned long)page_ext & PAGE_EXT_INVALID) == PAGE_EXT_INVALID); +} struct page_ext *lookup_page_ext(const struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); + struct page_ext *page_ext = READ_ONCE(section->page_ext); + + WARN_ON_ONCE(!rcu_read_lock_held()); /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are * allocated when feeding a range of pages to the allocator * for the first time during bootup or memory hotplug. */ - if (!section->page_ext) + if (page_ext_invalid(page_ext)) return NULL; - return get_entry(section->page_ext, pfn); + return get_entry(page_ext, pfn); } EXPORT_SYMBOL_GPL(lookup_page_ext); @@ -293,9 +348,30 @@ static void __free_page_ext(unsigned long pfn) ms = __pfn_to_section(pfn); if (!ms || !ms->page_ext) return; - base = get_entry(ms->page_ext, pfn); + + base = READ_ONCE(ms->page_ext); + /* + * page_ext here can be valid while doing the roll back + * operation in online_page_ext(). + */ + if (page_ext_invalid(base)) + base = (void *)base - PAGE_EXT_INVALID; + WRITE_ONCE(ms->page_ext, NULL); + + base = get_entry(base, pfn); free_page_ext(base); - ms->page_ext = NULL; +} + +static void __invalidate_page_ext(unsigned long pfn) +{ + struct mem_section *ms; + void *val; + + ms = __pfn_to_section(pfn); + if (!ms || !ms->page_ext) + return; + val = (void *)ms->page_ext + PAGE_EXT_INVALID; + WRITE_ONCE(ms->page_ext, val); } static int __meminit online_page_ext(unsigned long start_pfn, @@ -338,6 +414,20 @@ static int __meminit offline_page_ext(unsigned long start_pfn, start = SECTION_ALIGN_DOWN(start_pfn); end = SECTION_ALIGN_UP(start_pfn + nr_pages); + /* + * Freeing of page_ext is done in 3 steps to avoid + * use-after-free of it: + * 1) Traverse all the sections and mark their page_ext + * as invalid. + * 2) Wait for all the existing users of page_ext who + * started before invalidation to finish. + * 3) Free the page_ext. + */ + for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) + __invalidate_page_ext(pfn); + + synchronize_rcu(); + for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_ext(pfn); return 0; diff --git a/mm/page_owner.c b/mm/page_owner.c index 9501940a3ccd..8dd3f27db21a 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -173,7 +173,7 @@ void __reset_page_owner(struct page *page, unsigned int order) handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); - page_ext = lookup_page_ext(page); + page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; for (i = 0; i < (1 << order); i++) { @@ -183,6 +183,7 @@ void __reset_page_owner(struct page *page, unsigned int order) page_owner->free_ts_nsec = free_ts_nsec; page_ext = page_ext_next(page_ext); } + page_ext_put(page_ext); } static inline void __set_page_owner_handle(struct page *page, @@ -210,19 +211,21 @@ static inline void __set_page_owner_handle(struct page *page, noinline void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext; depot_stack_handle_t handle; + handle = save_stack(gfp_mask); + + page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; - - handle = save_stack(gfp_mask); __set_page_owner_handle(page, page_ext, handle, order, gfp_mask); + page_ext_put(page_ext); } void __set_page_owner_migrate_reason(struct page *page, int reason) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = page_ext_get(page); struct page_owner *page_owner; if (unlikely(!page_ext)) @@ -230,12 +233,13 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) page_owner = get_page_owner(page_ext); page_owner->last_migrate_reason = reason; + page_ext_put(page_ext); } void __split_page_owner(struct page *page, unsigned int nr) { int i; - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = page_ext_get(page); struct page_owner *page_owner; if (unlikely(!page_ext)) @@ -246,17 +250,25 @@ void __split_page_owner(struct page *page, unsigned int nr) page_owner->order = 0; page_ext = page_ext_next(page_ext); } + page_ext_put(page_ext); } void __copy_page_owner(struct page *oldpage, struct page *newpage) { - struct page_ext *old_ext = lookup_page_ext(oldpage); - struct page_ext *new_ext = lookup_page_ext(newpage); + struct page_ext *old_ext; + struct page_ext *new_ext; struct page_owner *old_page_owner, *new_page_owner; - if (unlikely(!old_ext || !new_ext)) + old_ext = page_ext_get(oldpage); + if (unlikely(!old_ext)) return; + new_ext = page_ext_get(newpage); + if (unlikely(!new_ext)) { + page_ext_put(old_ext); + return; + } + old_page_owner = get_page_owner(old_ext); new_page_owner = get_page_owner(new_ext); new_page_owner->order = old_page_owner->order; @@ -279,6 +291,8 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) */ __set_bit(PAGE_EXT_OWNER, &new_ext->flags); __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags); + page_ext_put(new_ext); + page_ext_put(old_ext); } void pagetypeinfo_showmixedcount_print(struct seq_file *m, @@ -335,12 +349,12 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, if (PageReserved(page)) continue; - page_ext = lookup_page_ext(page); + page_ext = page_ext_get(page); if (unlikely(!page_ext)) continue; if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) - continue; + goto ext_put_continue; page_owner = get_page_owner(page_ext); page_mt = gfp_migratetype(page_owner->gfp_mask); @@ -351,9 +365,12 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, count[pageblock_mt]++; pfn = block_end_pfn; + page_ext_put(page_ext); break; } pfn += (1UL << page_owner->order) - 1; +ext_put_continue: + page_ext_put(page_ext); } } @@ -432,7 +449,7 @@ err: void __dump_page_owner(struct page *page) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = page_ext_get((void *)page); struct page_owner *page_owner; depot_stack_handle_t handle; unsigned long *entries; @@ -451,6 +468,7 @@ void __dump_page_owner(struct page *page) if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { pr_alert("page_owner info is not present (never set?)\n"); + page_ext_put(page_ext); return; } @@ -483,6 +501,7 @@ void __dump_page_owner(struct page *page) if (page_owner->last_migrate_reason != -1) pr_alert("page has been migrated, last migrate reason: %s\n", migrate_reason_names[page_owner->last_migrate_reason]); + page_ext_put(page_ext); } static ssize_t @@ -508,6 +527,14 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) /* Find an allocated page */ for (; pfn < max_pfn; pfn++) { + /* + * This temporary page_owner is required so + * that we can avoid the context switches while holding + * the rcu lock and copying the page owner information to + * user through copy_to_user() or GFP_KERNEL allocations. + */ + struct page_owner page_owner_tmp; + /* * If the new page is in a new MAX_ORDER_NR_PAGES area, * validate the area as existing, skip it if not @@ -530,7 +557,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) continue; } - page_ext = lookup_page_ext(page); + page_ext = page_ext_get(page); if (unlikely(!page_ext)) continue; @@ -539,14 +566,14 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) * because we don't hold the zone lock. */ if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) - continue; + goto ext_put_continue; /* * Although we do have the info about past allocation of free * pages, it's not relevant for current memory usage. */ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) - continue; + goto ext_put_continue; page_owner = get_page_owner(page_ext); @@ -555,7 +582,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) * would inflate the stats. */ if (!IS_ALIGNED(pfn, 1 << page_owner->order)) - continue; + goto ext_put_continue; /* * Access to page_ext->handle isn't synchronous so we should @@ -563,13 +590,17 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) */ handle = READ_ONCE(page_owner->handle); if (!handle) - continue; + goto ext_put_continue; /* Record the next PFN to read in the file offset */ *ppos = (pfn - min_low_pfn) + 1; + page_owner_tmp = *page_owner; + page_ext_put(page_ext); return print_page_owner(buf, count, pfn, page, - page_owner, handle); + &page_owner_tmp, handle); +ext_put_continue: + page_ext_put(page_ext); } return 0; @@ -627,18 +658,20 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) if (PageReserved(page)) continue; - page_ext = lookup_page_ext(page); + page_ext = page_ext_get(page); if (unlikely(!page_ext)) continue; /* Maybe overlapping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) - continue; + goto ext_put_continue; /* Found early allocated page */ __set_page_owner_handle(page, page_ext, early_handle, 0, 0); count++; +ext_put_continue: + page_ext_put(page_ext); } cond_resched(); }