Merge branch 'hwpoison-hugepages' into hwpoison
Conflicts: mm/memory-failure.c
This commit is contained in:
		
				commit
				
					
						46e387bbd8
					
				
			
		
					 10 changed files with 551 additions and 125 deletions
				
			
		| 
						 | 
					@ -11,6 +11,7 @@
 | 
				
			||||||
#include <linux/kprobes.h>		/* __kprobes, ...		*/
 | 
					#include <linux/kprobes.h>		/* __kprobes, ...		*/
 | 
				
			||||||
#include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
 | 
					#include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
 | 
				
			||||||
#include <linux/perf_event.h>		/* perf_sw_event		*/
 | 
					#include <linux/perf_event.h>		/* perf_sw_event		*/
 | 
				
			||||||
 | 
					#include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <asm/traps.h>			/* dotraplinkage, ...		*/
 | 
					#include <asm/traps.h>			/* dotraplinkage, ...		*/
 | 
				
			||||||
#include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 | 
					#include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 | 
				
			||||||
| 
						 | 
					@ -160,15 +161,20 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 | 
					force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 | 
				
			||||||
		     struct task_struct *tsk)
 | 
							     struct task_struct *tsk, int fault)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						unsigned lsb = 0;
 | 
				
			||||||
	siginfo_t info;
 | 
						siginfo_t info;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	info.si_signo	= si_signo;
 | 
						info.si_signo	= si_signo;
 | 
				
			||||||
	info.si_errno	= 0;
 | 
						info.si_errno	= 0;
 | 
				
			||||||
	info.si_code	= si_code;
 | 
						info.si_code	= si_code;
 | 
				
			||||||
	info.si_addr	= (void __user *)address;
 | 
						info.si_addr	= (void __user *)address;
 | 
				
			||||||
	info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
 | 
						if (fault & VM_FAULT_HWPOISON_LARGE)
 | 
				
			||||||
 | 
							lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); 
 | 
				
			||||||
 | 
						if (fault & VM_FAULT_HWPOISON)
 | 
				
			||||||
 | 
							lsb = PAGE_SHIFT;
 | 
				
			||||||
 | 
						info.si_addr_lsb = lsb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	force_sig_info(si_signo, &info, tsk);
 | 
						force_sig_info(si_signo, &info, tsk);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -722,7 +728,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 | 
				
			||||||
		tsk->thread.error_code	= error_code | (address >= TASK_SIZE);
 | 
							tsk->thread.error_code	= error_code | (address >= TASK_SIZE);
 | 
				
			||||||
		tsk->thread.trap_no	= 14;
 | 
							tsk->thread.trap_no	= 14;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		force_sig_info_fault(SIGSEGV, si_code, address, tsk);
 | 
							force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -807,14 +813,14 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 | 
				
			||||||
	tsk->thread.trap_no	= 14;
 | 
						tsk->thread.trap_no	= 14;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_MEMORY_FAILURE
 | 
					#ifdef CONFIG_MEMORY_FAILURE
 | 
				
			||||||
	if (fault & VM_FAULT_HWPOISON) {
 | 
						if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
 | 
				
			||||||
		printk(KERN_ERR
 | 
							printk(KERN_ERR
 | 
				
			||||||
	"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
 | 
						"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
 | 
				
			||||||
			tsk->comm, tsk->pid, address);
 | 
								tsk->comm, tsk->pid, address);
 | 
				
			||||||
		code = BUS_MCEERR_AR;
 | 
							code = BUS_MCEERR_AR;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	force_sig_info_fault(SIGBUS, code, address, tsk);
 | 
						force_sig_info_fault(SIGBUS, code, address, tsk, fault);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static noinline void
 | 
					static noinline void
 | 
				
			||||||
| 
						 | 
					@ -824,7 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 | 
				
			||||||
	if (fault & VM_FAULT_OOM) {
 | 
						if (fault & VM_FAULT_OOM) {
 | 
				
			||||||
		out_of_memory(regs, error_code, address);
 | 
							out_of_memory(regs, error_code, address);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
 | 
							if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
 | 
				
			||||||
 | 
								     VM_FAULT_HWPOISON_LARGE))
 | 
				
			||||||
			do_sigbus(regs, error_code, address, fault);
 | 
								do_sigbus(regs, error_code, address, fault);
 | 
				
			||||||
		else
 | 
							else
 | 
				
			||||||
			BUG();
 | 
								BUG();
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -31,6 +31,7 @@
 | 
				
			||||||
#include <linux/statfs.h>
 | 
					#include <linux/statfs.h>
 | 
				
			||||||
#include <linux/security.h>
 | 
					#include <linux/security.h>
 | 
				
			||||||
#include <linux/magic.h>
 | 
					#include <linux/magic.h>
 | 
				
			||||||
 | 
					#include <linux/migrate.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <asm/uaccess.h>
 | 
					#include <asm/uaccess.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -573,6 +574,19 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int hugetlbfs_migrate_page(struct address_space *mapping,
 | 
				
			||||||
 | 
									struct page *newpage, struct page *page)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int rc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						rc = migrate_huge_page_move_mapping(mapping, newpage, page);
 | 
				
			||||||
 | 
						if (rc)
 | 
				
			||||||
 | 
							return rc;
 | 
				
			||||||
 | 
						migrate_page_copy(newpage, page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 | 
					static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
 | 
						struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
 | 
				
			||||||
| 
						 | 
					@ -659,6 +673,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 | 
				
			||||||
	.write_begin	= hugetlbfs_write_begin,
 | 
						.write_begin	= hugetlbfs_write_begin,
 | 
				
			||||||
	.write_end	= hugetlbfs_write_end,
 | 
						.write_end	= hugetlbfs_write_end,
 | 
				
			||||||
	.set_page_dirty	= hugetlbfs_set_page_dirty,
 | 
						.set_page_dirty	= hugetlbfs_set_page_dirty,
 | 
				
			||||||
 | 
						.migratepage    = hugetlbfs_migrate_page,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -43,7 +43,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 | 
				
			||||||
						struct vm_area_struct *vma,
 | 
											struct vm_area_struct *vma,
 | 
				
			||||||
						int acctflags);
 | 
											int acctflags);
 | 
				
			||||||
void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 | 
					void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 | 
				
			||||||
void __isolate_hwpoisoned_huge_page(struct page *page);
 | 
					int dequeue_hwpoisoned_huge_page(struct page *page);
 | 
				
			||||||
 | 
					void copy_huge_page(struct page *dst, struct page *src);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern unsigned long hugepages_treat_as_movable;
 | 
					extern unsigned long hugepages_treat_as_movable;
 | 
				
			||||||
extern const unsigned long hugetlb_zero, hugetlb_infinity;
 | 
					extern const unsigned long hugetlb_zero, hugetlb_infinity;
 | 
				
			||||||
| 
						 | 
					@ -101,7 +102,10 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 | 
				
			||||||
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
 | 
					#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
 | 
				
			||||||
#define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 | 
					#define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 | 
				
			||||||
#define huge_pte_offset(mm, address)	0
 | 
					#define huge_pte_offset(mm, address)	0
 | 
				
			||||||
#define __isolate_hwpoisoned_huge_page(page)	0
 | 
					#define dequeue_hwpoisoned_huge_page(page)	0
 | 
				
			||||||
 | 
					static inline void copy_huge_page(struct page *dst, struct page *src)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define hugetlb_change_protection(vma, address, end, newprot)
 | 
					#define hugetlb_change_protection(vma, address, end, newprot)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -228,6 +232,8 @@ struct huge_bootmem_page {
 | 
				
			||||||
	struct hstate *hstate;
 | 
						struct hstate *hstate;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct page *alloc_huge_page_node(struct hstate *h, int nid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* arch callback */
 | 
					/* arch callback */
 | 
				
			||||||
int __init alloc_bootmem_huge_page(struct hstate *h);
 | 
					int __init alloc_bootmem_huge_page(struct hstate *h);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -301,8 +307,14 @@ static inline struct hstate *page_hstate(struct page *page)
 | 
				
			||||||
	return size_to_hstate(PAGE_SIZE << compound_order(page));
 | 
						return size_to_hstate(PAGE_SIZE << compound_order(page));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline unsigned hstate_index_to_shift(unsigned index)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return hstates[index].order + PAGE_SHIFT;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
struct hstate {};
 | 
					struct hstate {};
 | 
				
			||||||
 | 
					#define alloc_huge_page_node(h, nid) NULL
 | 
				
			||||||
#define alloc_bootmem_huge_page(h) NULL
 | 
					#define alloc_bootmem_huge_page(h) NULL
 | 
				
			||||||
#define hstate_file(f) NULL
 | 
					#define hstate_file(f) NULL
 | 
				
			||||||
#define hstate_vma(v) NULL
 | 
					#define hstate_vma(v) NULL
 | 
				
			||||||
| 
						 | 
					@ -317,6 +329,7 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return 1;
 | 
						return 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define hstate_index_to_shift(index) 0
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* _LINUX_HUGETLB_H */
 | 
					#endif /* _LINUX_HUGETLB_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,6 +14,8 @@ extern int migrate_page(struct address_space *,
 | 
				
			||||||
			struct page *, struct page *);
 | 
								struct page *, struct page *);
 | 
				
			||||||
extern int migrate_pages(struct list_head *l, new_page_t x,
 | 
					extern int migrate_pages(struct list_head *l, new_page_t x,
 | 
				
			||||||
			unsigned long private, int offlining);
 | 
								unsigned long private, int offlining);
 | 
				
			||||||
 | 
					extern int migrate_huge_pages(struct list_head *l, new_page_t x,
 | 
				
			||||||
 | 
								unsigned long private, int offlining);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern int fail_migrate_page(struct address_space *,
 | 
					extern int fail_migrate_page(struct address_space *,
 | 
				
			||||||
			struct page *, struct page *);
 | 
								struct page *, struct page *);
 | 
				
			||||||
| 
						 | 
					@ -23,12 +25,17 @@ extern int migrate_prep_local(void);
 | 
				
			||||||
extern int migrate_vmas(struct mm_struct *mm,
 | 
					extern int migrate_vmas(struct mm_struct *mm,
 | 
				
			||||||
		const nodemask_t *from, const nodemask_t *to,
 | 
							const nodemask_t *from, const nodemask_t *to,
 | 
				
			||||||
		unsigned long flags);
 | 
							unsigned long flags);
 | 
				
			||||||
 | 
					extern void migrate_page_copy(struct page *newpage, struct page *page);
 | 
				
			||||||
 | 
					extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 | 
				
			||||||
 | 
									  struct page *newpage, struct page *page);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#define PAGE_MIGRATION 0
 | 
					#define PAGE_MIGRATION 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void putback_lru_pages(struct list_head *l) {}
 | 
					static inline void putback_lru_pages(struct list_head *l) {}
 | 
				
			||||||
static inline int migrate_pages(struct list_head *l, new_page_t x,
 | 
					static inline int migrate_pages(struct list_head *l, new_page_t x,
 | 
				
			||||||
		unsigned long private, int offlining) { return -ENOSYS; }
 | 
							unsigned long private, int offlining) { return -ENOSYS; }
 | 
				
			||||||
 | 
					static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
 | 
				
			||||||
 | 
							unsigned long private, int offlining) { return -ENOSYS; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int migrate_prep(void) { return -ENOSYS; }
 | 
					static inline int migrate_prep(void) { return -ENOSYS; }
 | 
				
			||||||
static inline int migrate_prep_local(void) { return -ENOSYS; }
 | 
					static inline int migrate_prep_local(void) { return -ENOSYS; }
 | 
				
			||||||
| 
						 | 
					@ -40,6 +47,15 @@ static inline int migrate_vmas(struct mm_struct *mm,
 | 
				
			||||||
	return -ENOSYS;
 | 
						return -ENOSYS;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void migrate_page_copy(struct page *newpage,
 | 
				
			||||||
 | 
									     struct page *page) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 | 
				
			||||||
 | 
									  struct page *newpage, struct page *page)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return -ENOSYS;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Possible settings for the migrate_page() method in address_operations */
 | 
					/* Possible settings for the migrate_page() method in address_operations */
 | 
				
			||||||
#define migrate_page NULL
 | 
					#define migrate_page NULL
 | 
				
			||||||
#define fail_migrate_page NULL
 | 
					#define fail_migrate_page NULL
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -718,12 +718,20 @@ static inline int page_mapped(struct page *page)
 | 
				
			||||||
#define VM_FAULT_SIGBUS	0x0002
 | 
					#define VM_FAULT_SIGBUS	0x0002
 | 
				
			||||||
#define VM_FAULT_MAJOR	0x0004
 | 
					#define VM_FAULT_MAJOR	0x0004
 | 
				
			||||||
#define VM_FAULT_WRITE	0x0008	/* Special case for get_user_pages */
 | 
					#define VM_FAULT_WRITE	0x0008	/* Special case for get_user_pages */
 | 
				
			||||||
#define VM_FAULT_HWPOISON 0x0010	/* Hit poisoned page */
 | 
					#define VM_FAULT_HWPOISON 0x0010	/* Hit poisoned small page */
 | 
				
			||||||
 | 
					#define VM_FAULT_HWPOISON_LARGE 0x0020  /* Hit poisoned large page. Index encoded in upper bits */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
 | 
					#define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
 | 
				
			||||||
#define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 | 
					#define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
 | 
					#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \
 | 
				
			||||||
 | 
								 VM_FAULT_HWPOISON_LARGE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Encode hstate index for a hwpoisoned large page */
 | 
				
			||||||
 | 
					#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
 | 
				
			||||||
 | 
					#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
 | 
					 * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										211
									
								
								mm/hugetlb.c
									
										
									
									
									
								
							
							
						
						
									
										211
									
								
								mm/hugetlb.c
									
										
									
									
									
								
							| 
						 | 
					@ -423,14 +423,14 @@ static void clear_huge_page(struct page *page,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void copy_gigantic_page(struct page *dst, struct page *src,
 | 
					static void copy_user_gigantic_page(struct page *dst, struct page *src,
 | 
				
			||||||
			   unsigned long addr, struct vm_area_struct *vma)
 | 
								   unsigned long addr, struct vm_area_struct *vma)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int i;
 | 
						int i;
 | 
				
			||||||
	struct hstate *h = hstate_vma(vma);
 | 
						struct hstate *h = hstate_vma(vma);
 | 
				
			||||||
	struct page *dst_base = dst;
 | 
						struct page *dst_base = dst;
 | 
				
			||||||
	struct page *src_base = src;
 | 
						struct page *src_base = src;
 | 
				
			||||||
	might_sleep();
 | 
					
 | 
				
			||||||
	for (i = 0; i < pages_per_huge_page(h); ) {
 | 
						for (i = 0; i < pages_per_huge_page(h); ) {
 | 
				
			||||||
		cond_resched();
 | 
							cond_resched();
 | 
				
			||||||
		copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
 | 
							copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
 | 
				
			||||||
| 
						 | 
					@ -440,14 +440,15 @@ static void copy_gigantic_page(struct page *dst, struct page *src,
 | 
				
			||||||
		src = mem_map_next(src, src_base, i);
 | 
							src = mem_map_next(src, src_base, i);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
static void copy_huge_page(struct page *dst, struct page *src,
 | 
					
 | 
				
			||||||
 | 
					static void copy_user_huge_page(struct page *dst, struct page *src,
 | 
				
			||||||
			   unsigned long addr, struct vm_area_struct *vma)
 | 
								   unsigned long addr, struct vm_area_struct *vma)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int i;
 | 
						int i;
 | 
				
			||||||
	struct hstate *h = hstate_vma(vma);
 | 
						struct hstate *h = hstate_vma(vma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
 | 
						if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
 | 
				
			||||||
		copy_gigantic_page(dst, src, addr, vma);
 | 
							copy_user_gigantic_page(dst, src, addr, vma);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -458,6 +459,40 @@ static void copy_huge_page(struct page *dst, struct page *src,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void copy_gigantic_page(struct page *dst, struct page *src)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
						struct hstate *h = page_hstate(src);
 | 
				
			||||||
 | 
						struct page *dst_base = dst;
 | 
				
			||||||
 | 
						struct page *src_base = src;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < pages_per_huge_page(h); ) {
 | 
				
			||||||
 | 
							cond_resched();
 | 
				
			||||||
 | 
							copy_highpage(dst, src);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							i++;
 | 
				
			||||||
 | 
							dst = mem_map_next(dst, dst_base, i);
 | 
				
			||||||
 | 
							src = mem_map_next(src, src_base, i);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void copy_huge_page(struct page *dst, struct page *src)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
						struct hstate *h = page_hstate(src);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
 | 
				
			||||||
 | 
							copy_gigantic_page(dst, src);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						might_sleep();
 | 
				
			||||||
 | 
						for (i = 0; i < pages_per_huge_page(h); i++) {
 | 
				
			||||||
 | 
							cond_resched();
 | 
				
			||||||
 | 
							copy_highpage(dst + i, src + i);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void enqueue_huge_page(struct hstate *h, struct page *page)
 | 
					static void enqueue_huge_page(struct hstate *h, struct page *page)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int nid = page_to_nid(page);
 | 
						int nid = page_to_nid(page);
 | 
				
			||||||
| 
						 | 
					@ -466,11 +501,24 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
 | 
				
			||||||
	h->free_huge_pages_node[nid]++;
 | 
						h->free_huge_pages_node[nid]++;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct page *page;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (list_empty(&h->hugepage_freelists[nid]))
 | 
				
			||||||
 | 
							return NULL;
 | 
				
			||||||
 | 
						page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
 | 
				
			||||||
 | 
						list_del(&page->lru);
 | 
				
			||||||
 | 
						set_page_refcounted(page);
 | 
				
			||||||
 | 
						h->free_huge_pages--;
 | 
				
			||||||
 | 
						h->free_huge_pages_node[nid]--;
 | 
				
			||||||
 | 
						return page;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct page *dequeue_huge_page_vma(struct hstate *h,
 | 
					static struct page *dequeue_huge_page_vma(struct hstate *h,
 | 
				
			||||||
				struct vm_area_struct *vma,
 | 
									struct vm_area_struct *vma,
 | 
				
			||||||
				unsigned long address, int avoid_reserve)
 | 
									unsigned long address, int avoid_reserve)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int nid;
 | 
					 | 
				
			||||||
	struct page *page = NULL;
 | 
						struct page *page = NULL;
 | 
				
			||||||
	struct mempolicy *mpol;
 | 
						struct mempolicy *mpol;
 | 
				
			||||||
	nodemask_t *nodemask;
 | 
						nodemask_t *nodemask;
 | 
				
			||||||
| 
						 | 
					@ -496,21 +544,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 | 
						for_each_zone_zonelist_nodemask(zone, z, zonelist,
 | 
				
			||||||
						MAX_NR_ZONES - 1, nodemask) {
 | 
											MAX_NR_ZONES - 1, nodemask) {
 | 
				
			||||||
		nid = zone_to_nid(zone);
 | 
							if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
 | 
				
			||||||
		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
 | 
								page = dequeue_huge_page_node(h, zone_to_nid(zone));
 | 
				
			||||||
		    !list_empty(&h->hugepage_freelists[nid])) {
 | 
								if (page) {
 | 
				
			||||||
			page = list_entry(h->hugepage_freelists[nid].next,
 | 
					 | 
				
			||||||
					  struct page, lru);
 | 
					 | 
				
			||||||
			list_del(&page->lru);
 | 
					 | 
				
			||||||
			h->free_huge_pages--;
 | 
					 | 
				
			||||||
			h->free_huge_pages_node[nid]--;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
				if (!avoid_reserve)
 | 
									if (!avoid_reserve)
 | 
				
			||||||
					decrement_hugepage_resv_vma(h, vma);
 | 
										decrement_hugepage_resv_vma(h, vma);
 | 
				
			||||||
 | 
					 | 
				
			||||||
				break;
 | 
									break;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
err:
 | 
					err:
 | 
				
			||||||
	mpol_cond_put(mpol);
 | 
						mpol_cond_put(mpol);
 | 
				
			||||||
	put_mems_allowed();
 | 
						put_mems_allowed();
 | 
				
			||||||
| 
						 | 
					@ -770,11 +812,10 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct page *alloc_buddy_huge_page(struct hstate *h,
 | 
					static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 | 
				
			||||||
			struct vm_area_struct *vma, unsigned long address)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
	unsigned int nid;
 | 
						unsigned int r_nid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (h->order >= MAX_ORDER)
 | 
						if (h->order >= MAX_ORDER)
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
| 
						 | 
					@ -812,9 +853,14 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	spin_unlock(&hugetlb_lock);
 | 
						spin_unlock(&hugetlb_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (nid == NUMA_NO_NODE)
 | 
				
			||||||
		page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
 | 
							page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
 | 
				
			||||||
				   __GFP_REPEAT|__GFP_NOWARN,
 | 
									   __GFP_REPEAT|__GFP_NOWARN,
 | 
				
			||||||
				   huge_page_order(h));
 | 
									   huge_page_order(h));
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							page = alloc_pages_exact_node(nid,
 | 
				
			||||||
 | 
								htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
 | 
				
			||||||
 | 
								__GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (page && arch_prepare_hugepage(page)) {
 | 
						if (page && arch_prepare_hugepage(page)) {
 | 
				
			||||||
		__free_pages(page, huge_page_order(h));
 | 
							__free_pages(page, huge_page_order(h));
 | 
				
			||||||
| 
						 | 
					@ -823,19 +869,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&hugetlb_lock);
 | 
						spin_lock(&hugetlb_lock);
 | 
				
			||||||
	if (page) {
 | 
						if (page) {
 | 
				
			||||||
		/*
 | 
							r_nid = page_to_nid(page);
 | 
				
			||||||
		 * This page is now managed by the hugetlb allocator and has
 | 
					 | 
				
			||||||
		 * no users -- drop the buddy allocator's reference.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		put_page_testzero(page);
 | 
					 | 
				
			||||||
		VM_BUG_ON(page_count(page));
 | 
					 | 
				
			||||||
		nid = page_to_nid(page);
 | 
					 | 
				
			||||||
		set_compound_page_dtor(page, free_huge_page);
 | 
							set_compound_page_dtor(page, free_huge_page);
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * We incremented the global counters already
 | 
							 * We incremented the global counters already
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		h->nr_huge_pages_node[nid]++;
 | 
							h->nr_huge_pages_node[r_nid]++;
 | 
				
			||||||
		h->surplus_huge_pages_node[nid]++;
 | 
							h->surplus_huge_pages_node[r_nid]++;
 | 
				
			||||||
		__count_vm_event(HTLB_BUDDY_PGALLOC);
 | 
							__count_vm_event(HTLB_BUDDY_PGALLOC);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		h->nr_huge_pages--;
 | 
							h->nr_huge_pages--;
 | 
				
			||||||
| 
						 | 
					@ -847,6 +887,25 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
 | 
				
			||||||
	return page;
 | 
						return page;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * This allocation function is useful in the context where vma is irrelevant.
 | 
				
			||||||
 | 
					 * E.g. soft-offlining uses this function because it only cares physical
 | 
				
			||||||
 | 
					 * address of error page.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct page *alloc_huge_page_node(struct hstate *h, int nid)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct page *page;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&hugetlb_lock);
 | 
				
			||||||
 | 
						page = dequeue_huge_page_node(h, nid);
 | 
				
			||||||
 | 
						spin_unlock(&hugetlb_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!page)
 | 
				
			||||||
 | 
							page = alloc_buddy_huge_page(h, nid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return page;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Increase the hugetlb pool such that it can accomodate a reservation
 | 
					 * Increase the hugetlb pool such that it can accomodate a reservation
 | 
				
			||||||
 * of size 'delta'.
 | 
					 * of size 'delta'.
 | 
				
			||||||
| 
						 | 
					@ -871,17 +930,14 @@ static int gather_surplus_pages(struct hstate *h, int delta)
 | 
				
			||||||
retry:
 | 
					retry:
 | 
				
			||||||
	spin_unlock(&hugetlb_lock);
 | 
						spin_unlock(&hugetlb_lock);
 | 
				
			||||||
	for (i = 0; i < needed; i++) {
 | 
						for (i = 0; i < needed; i++) {
 | 
				
			||||||
		page = alloc_buddy_huge_page(h, NULL, 0);
 | 
							page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
 | 
				
			||||||
		if (!page) {
 | 
							if (!page)
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
			 * We were not able to allocate enough pages to
 | 
								 * We were not able to allocate enough pages to
 | 
				
			||||||
			 * satisfy the entire reservation so we free what
 | 
								 * satisfy the entire reservation so we free what
 | 
				
			||||||
			 * we've allocated so far.
 | 
								 * we've allocated so far.
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			spin_lock(&hugetlb_lock);
 | 
					 | 
				
			||||||
			needed = 0;
 | 
					 | 
				
			||||||
			goto free;
 | 
								goto free;
 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		list_add(&page->lru, &surplus_list);
 | 
							list_add(&page->lru, &surplus_list);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -908,31 +964,31 @@ retry:
 | 
				
			||||||
	needed += allocated;
 | 
						needed += allocated;
 | 
				
			||||||
	h->resv_huge_pages += delta;
 | 
						h->resv_huge_pages += delta;
 | 
				
			||||||
	ret = 0;
 | 
						ret = 0;
 | 
				
			||||||
free:
 | 
					
 | 
				
			||||||
 | 
						spin_unlock(&hugetlb_lock);
 | 
				
			||||||
	/* Free the needed pages to the hugetlb pool */
 | 
						/* Free the needed pages to the hugetlb pool */
 | 
				
			||||||
	list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
 | 
						list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
 | 
				
			||||||
		if ((--needed) < 0)
 | 
							if ((--needed) < 0)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		list_del(&page->lru);
 | 
							list_del(&page->lru);
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * This page is now managed by the hugetlb allocator and has
 | 
				
			||||||
 | 
							 * no users -- drop the buddy allocator's reference.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							put_page_testzero(page);
 | 
				
			||||||
 | 
							VM_BUG_ON(page_count(page));
 | 
				
			||||||
		enqueue_huge_page(h, page);
 | 
							enqueue_huge_page(h, page);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Free unnecessary surplus pages to the buddy allocator */
 | 
						/* Free unnecessary surplus pages to the buddy allocator */
 | 
				
			||||||
 | 
					free:
 | 
				
			||||||
	if (!list_empty(&surplus_list)) {
 | 
						if (!list_empty(&surplus_list)) {
 | 
				
			||||||
		spin_unlock(&hugetlb_lock);
 | 
					 | 
				
			||||||
		list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
 | 
							list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
 | 
				
			||||||
			list_del(&page->lru);
 | 
								list_del(&page->lru);
 | 
				
			||||||
			/*
 | 
								put_page(page);
 | 
				
			||||||
			 * The page has a reference count of zero already, so
 | 
							}
 | 
				
			||||||
			 * call free_huge_page directly instead of using
 | 
					 | 
				
			||||||
			 * put_page.  This must be done with hugetlb_lock
 | 
					 | 
				
			||||||
			 * unlocked which is safe because free_huge_page takes
 | 
					 | 
				
			||||||
			 * hugetlb_lock before deciding how to free the page.
 | 
					 | 
				
			||||||
			 */
 | 
					 | 
				
			||||||
			free_huge_page(page);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	spin_lock(&hugetlb_lock);
 | 
						spin_lock(&hugetlb_lock);
 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1052,14 +1108,13 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 | 
				
			||||||
	spin_unlock(&hugetlb_lock);
 | 
						spin_unlock(&hugetlb_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!page) {
 | 
						if (!page) {
 | 
				
			||||||
		page = alloc_buddy_huge_page(h, vma, addr);
 | 
							page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
 | 
				
			||||||
		if (!page) {
 | 
							if (!page) {
 | 
				
			||||||
			hugetlb_put_quota(inode->i_mapping, chg);
 | 
								hugetlb_put_quota(inode->i_mapping, chg);
 | 
				
			||||||
			return ERR_PTR(-VM_FAULT_SIGBUS);
 | 
								return ERR_PTR(-VM_FAULT_SIGBUS);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	set_page_refcounted(page);
 | 
					 | 
				
			||||||
	set_page_private(page, (unsigned long) mapping);
 | 
						set_page_private(page, (unsigned long) mapping);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	vma_commit_reservation(h, vma, addr);
 | 
						vma_commit_reservation(h, vma, addr);
 | 
				
			||||||
| 
						 | 
					@ -2153,6 +2208,19 @@ nomem:
 | 
				
			||||||
	return -ENOMEM;
 | 
						return -ENOMEM;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int is_hugetlb_entry_migration(pte_t pte)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						swp_entry_t swp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (huge_pte_none(pte) || pte_present(pte))
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
						swp = pte_to_swp_entry(pte);
 | 
				
			||||||
 | 
						if (non_swap_entry(swp) && is_migration_entry(swp)) {
 | 
				
			||||||
 | 
							return 1;
 | 
				
			||||||
 | 
						} else
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int is_hugetlb_entry_hwpoisoned(pte_t pte)
 | 
					static int is_hugetlb_entry_hwpoisoned(pte_t pte)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	swp_entry_t swp;
 | 
						swp_entry_t swp;
 | 
				
			||||||
| 
						 | 
					@ -2383,7 +2451,7 @@ retry_avoidcopy:
 | 
				
			||||||
	if (unlikely(anon_vma_prepare(vma)))
 | 
						if (unlikely(anon_vma_prepare(vma)))
 | 
				
			||||||
		return VM_FAULT_OOM;
 | 
							return VM_FAULT_OOM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	copy_huge_page(new_page, old_page, address, vma);
 | 
						copy_user_huge_page(new_page, old_page, address, vma);
 | 
				
			||||||
	__SetPageUptodate(new_page);
 | 
						__SetPageUptodate(new_page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -2515,20 +2583,18 @@ retry:
 | 
				
			||||||
			hugepage_add_new_anon_rmap(page, vma, address);
 | 
								hugepage_add_new_anon_rmap(page, vma, address);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		page_dup_rmap(page);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
	 * Since memory error handler replaces pte into hwpoison swap entry
 | 
							 * If memory error occurs between mmap() and fault, some process
 | 
				
			||||||
	 * at the time of error handling, a process which reserved but not have
 | 
							 * don't have hwpoisoned swap entry for errored virtual address.
 | 
				
			||||||
	 * the mapping to the error hugepage does not have hwpoison swap entry.
 | 
							 * So we need to block hugepage fault by PG_hwpoison bit check.
 | 
				
			||||||
	 * So we need to block accesses from such a process by checking
 | 
					 | 
				
			||||||
	 * PG_hwpoison bit here.
 | 
					 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (unlikely(PageHWPoison(page))) {
 | 
							if (unlikely(PageHWPoison(page))) {
 | 
				
			||||||
		ret = VM_FAULT_HWPOISON;
 | 
								ret = VM_FAULT_HWPOISON | 
 | 
				
			||||||
 | 
								      VM_FAULT_SET_HINDEX(h - hstates);
 | 
				
			||||||
			goto backout_unlocked;
 | 
								goto backout_unlocked;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							page_dup_rmap(page);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * If we are going to COW a private mapping later, we examine the
 | 
						 * If we are going to COW a private mapping later, we examine the
 | 
				
			||||||
| 
						 | 
					@ -2587,8 +2653,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
	ptep = huge_pte_offset(mm, address);
 | 
						ptep = huge_pte_offset(mm, address);
 | 
				
			||||||
	if (ptep) {
 | 
						if (ptep) {
 | 
				
			||||||
		entry = huge_ptep_get(ptep);
 | 
							entry = huge_ptep_get(ptep);
 | 
				
			||||||
		if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
 | 
							if (unlikely(is_hugetlb_entry_migration(entry))) {
 | 
				
			||||||
			return VM_FAULT_HWPOISON;
 | 
								migration_entry_wait(mm, (pmd_t *)ptep, address);
 | 
				
			||||||
 | 
								return 0;
 | 
				
			||||||
 | 
							} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
 | 
				
			||||||
 | 
								return VM_FAULT_HWPOISON_LARGE | 
 | 
				
			||||||
 | 
								       VM_FAULT_SET_HINDEX(h - hstates);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ptep = huge_pte_alloc(mm, address, huge_page_size(h));
 | 
						ptep = huge_pte_alloc(mm, address, huge_page_size(h));
 | 
				
			||||||
| 
						 | 
					@ -2878,18 +2948,41 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 | 
				
			||||||
	hugetlb_acct_memory(h, -(chg - freed));
 | 
						hugetlb_acct_memory(h, -(chg - freed));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_MEMORY_FAILURE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Should be called in hugetlb_lock */
 | 
				
			||||||
 | 
					static int is_hugepage_on_freelist(struct page *hpage)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct page *page;
 | 
				
			||||||
 | 
						struct page *tmp;
 | 
				
			||||||
 | 
						struct hstate *h = page_hstate(hpage);
 | 
				
			||||||
 | 
						int nid = page_to_nid(hpage);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						list_for_each_entry_safe(page, tmp, &h->hugepage_freelists[nid], lru)
 | 
				
			||||||
 | 
							if (page == hpage)
 | 
				
			||||||
 | 
								return 1;
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * This function is called from memory failure code.
 | 
					 * This function is called from memory failure code.
 | 
				
			||||||
 * Assume the caller holds page lock of the head page.
 | 
					 * Assume the caller holds page lock of the head page.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
void __isolate_hwpoisoned_huge_page(struct page *hpage)
 | 
					int dequeue_hwpoisoned_huge_page(struct page *hpage)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct hstate *h = page_hstate(hpage);
 | 
						struct hstate *h = page_hstate(hpage);
 | 
				
			||||||
	int nid = page_to_nid(hpage);
 | 
						int nid = page_to_nid(hpage);
 | 
				
			||||||
 | 
						int ret = -EBUSY;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&hugetlb_lock);
 | 
						spin_lock(&hugetlb_lock);
 | 
				
			||||||
 | 
						if (is_hugepage_on_freelist(hpage)) {
 | 
				
			||||||
		list_del(&hpage->lru);
 | 
							list_del(&hpage->lru);
 | 
				
			||||||
 | 
							set_page_refcounted(hpage);
 | 
				
			||||||
		h->free_huge_pages--;
 | 
							h->free_huge_pages--;
 | 
				
			||||||
		h->free_huge_pages_node[nid]--;
 | 
							h->free_huge_pages_node[nid]--;
 | 
				
			||||||
	spin_unlock(&hugetlb_lock);
 | 
							ret = 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						spin_unlock(&hugetlb_lock);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -697,11 +697,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
 | 
				
			||||||
 * Issues:
 | 
					 * Issues:
 | 
				
			||||||
 * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
 | 
					 * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
 | 
				
			||||||
 *   To narrow down kill region to one page, we need to break up pmd.
 | 
					 *   To narrow down kill region to one page, we need to break up pmd.
 | 
				
			||||||
 * - To support soft-offlining for hugepage, we need to support hugepage
 | 
					 | 
				
			||||||
 *   migration.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static int me_huge_page(struct page *p, unsigned long pfn)
 | 
					static int me_huge_page(struct page *p, unsigned long pfn)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						int res = 0;
 | 
				
			||||||
	struct page *hpage = compound_head(p);
 | 
						struct page *hpage = compound_head(p);
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * We can safely recover from error on free or reserved (i.e.
 | 
						 * We can safely recover from error on free or reserved (i.e.
 | 
				
			||||||
| 
						 | 
					@ -714,7 +713,8 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 | 
				
			||||||
	 * so there is no race between isolation and mapping/unmapping.
 | 
						 * so there is no race between isolation and mapping/unmapping.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (!(page_mapping(hpage) || PageAnon(hpage))) {
 | 
						if (!(page_mapping(hpage) || PageAnon(hpage))) {
 | 
				
			||||||
		__isolate_hwpoisoned_huge_page(hpage);
 | 
							res = dequeue_hwpoisoned_huge_page(hpage);
 | 
				
			||||||
 | 
							if (!res)
 | 
				
			||||||
			return RECOVERED;
 | 
								return RECOVERED;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return DELAYED;
 | 
						return DELAYED;
 | 
				
			||||||
| 
						 | 
					@ -972,7 +972,10 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 | 
				
			||||||
	 * We need/can do nothing about count=0 pages.
 | 
						 * We need/can do nothing about count=0 pages.
 | 
				
			||||||
	 * 1) it's a free page, and therefore in safe hand:
 | 
						 * 1) it's a free page, and therefore in safe hand:
 | 
				
			||||||
	 *    prep_new_page() will be the gate keeper.
 | 
						 *    prep_new_page() will be the gate keeper.
 | 
				
			||||||
	 * 2) it's part of a non-compound high order page.
 | 
						 * 2) it's a free hugepage, which is also safe:
 | 
				
			||||||
 | 
						 *    an affected hugepage will be dequeued from hugepage freelist,
 | 
				
			||||||
 | 
						 *    so there's no concern about reusing it ever after.
 | 
				
			||||||
 | 
						 * 3) it's part of a non-compound high order page.
 | 
				
			||||||
	 *    Implies some kernel user: cannot stop them from
 | 
						 *    Implies some kernel user: cannot stop them from
 | 
				
			||||||
	 *    R/W the page; let's pray that the page has been
 | 
						 *    R/W the page; let's pray that the page has been
 | 
				
			||||||
	 *    used and will be freed some time later.
 | 
						 *    used and will be freed some time later.
 | 
				
			||||||
| 
						 | 
					@ -984,6 +987,24 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 | 
				
			||||||
		if (is_free_buddy_page(p)) {
 | 
							if (is_free_buddy_page(p)) {
 | 
				
			||||||
			action_result(pfn, "free buddy", DELAYED);
 | 
								action_result(pfn, "free buddy", DELAYED);
 | 
				
			||||||
			return 0;
 | 
								return 0;
 | 
				
			||||||
 | 
							} else if (PageHuge(hpage)) {
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * Check "just unpoisoned", "filter hit", and
 | 
				
			||||||
 | 
								 * "race with other subpage."
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								lock_page_nosync(hpage);
 | 
				
			||||||
 | 
								if (!PageHWPoison(hpage)
 | 
				
			||||||
 | 
								    || (hwpoison_filter(p) && TestClearPageHWPoison(p))
 | 
				
			||||||
 | 
								    || (p != hpage && TestSetPageHWPoison(hpage))) {
 | 
				
			||||||
 | 
									atomic_long_sub(nr_pages, &mce_bad_pages);
 | 
				
			||||||
 | 
									return 0;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								set_page_hwpoison_huge_page(hpage);
 | 
				
			||||||
 | 
								res = dequeue_hwpoisoned_huge_page(hpage);
 | 
				
			||||||
 | 
								action_result(pfn, "free huge",
 | 
				
			||||||
 | 
									      res ? IGNORED : DELAYED);
 | 
				
			||||||
 | 
								unlock_page(hpage);
 | 
				
			||||||
 | 
								return res;
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			action_result(pfn, "high order kernel", IGNORED);
 | 
								action_result(pfn, "high order kernel", IGNORED);
 | 
				
			||||||
			return -EBUSY;
 | 
								return -EBUSY;
 | 
				
			||||||
| 
						 | 
					@ -1145,6 +1166,16 @@ int unpoison_memory(unsigned long pfn)
 | 
				
			||||||
	nr_pages = 1 << compound_order(page);
 | 
						nr_pages = 1 << compound_order(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!get_page_unless_zero(page)) {
 | 
						if (!get_page_unless_zero(page)) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Since HWPoisoned hugepage should have non-zero refcount,
 | 
				
			||||||
 | 
							 * race between memory failure and unpoison seems to happen.
 | 
				
			||||||
 | 
							 * In such case unpoison fails and memory failure runs
 | 
				
			||||||
 | 
							 * to the end.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (PageHuge(page)) {
 | 
				
			||||||
 | 
								pr_debug("MCE: Memory failure is now running on free hugepage %#lx\n", pfn);
 | 
				
			||||||
 | 
								return 0;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		if (TestClearPageHWPoison(p))
 | 
							if (TestClearPageHWPoison(p))
 | 
				
			||||||
			atomic_long_sub(nr_pages, &mce_bad_pages);
 | 
								atomic_long_sub(nr_pages, &mce_bad_pages);
 | 
				
			||||||
		pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
 | 
							pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
 | 
				
			||||||
| 
						 | 
					@ -1162,9 +1193,9 @@ int unpoison_memory(unsigned long pfn)
 | 
				
			||||||
		pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
 | 
							pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
 | 
				
			||||||
		atomic_long_sub(nr_pages, &mce_bad_pages);
 | 
							atomic_long_sub(nr_pages, &mce_bad_pages);
 | 
				
			||||||
		freeit = 1;
 | 
							freeit = 1;
 | 
				
			||||||
	}
 | 
							if (PageHuge(page))
 | 
				
			||||||
	if (PageHuge(p))
 | 
					 | 
				
			||||||
			clear_page_hwpoison_huge_page(page);
 | 
								clear_page_hwpoison_huge_page(page);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	unlock_page(page);
 | 
						unlock_page(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	put_page(page);
 | 
						put_page(page);
 | 
				
			||||||
| 
						 | 
					@ -1178,6 +1209,10 @@ EXPORT_SYMBOL(unpoison_memory);
 | 
				
			||||||
static struct page *new_page(struct page *p, unsigned long private, int **x)
 | 
					static struct page *new_page(struct page *p, unsigned long private, int **x)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int nid = page_to_nid(p);
 | 
						int nid = page_to_nid(p);
 | 
				
			||||||
 | 
						if (PageHuge(p))
 | 
				
			||||||
 | 
							return alloc_huge_page_node(page_hstate(compound_head(p)),
 | 
				
			||||||
 | 
											   nid);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
		return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
 | 
							return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1206,8 +1241,15 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
 | 
				
			||||||
	 * was free.
 | 
						 * was free.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	set_migratetype_isolate(p);
 | 
						set_migratetype_isolate(p);
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * When the target page is a free hugepage, just remove it
 | 
				
			||||||
 | 
						 * from free hugepage list.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
	if (!get_page_unless_zero(compound_head(p))) {
 | 
						if (!get_page_unless_zero(compound_head(p))) {
 | 
				
			||||||
		if (is_free_buddy_page(p)) {
 | 
							if (PageHuge(p)) {
 | 
				
			||||||
 | 
								pr_info("get_any_page: %#lx free huge page\n", pfn);
 | 
				
			||||||
 | 
								ret = dequeue_hwpoisoned_huge_page(compound_head(p));
 | 
				
			||||||
 | 
							} else if (is_free_buddy_page(p)) {
 | 
				
			||||||
			pr_info("get_any_page: %#lx free buddy page\n", pfn);
 | 
								pr_info("get_any_page: %#lx free buddy page\n", pfn);
 | 
				
			||||||
			/* Set hwpoison bit while page is still isolated */
 | 
								/* Set hwpoison bit while page is still isolated */
 | 
				
			||||||
			SetPageHWPoison(p);
 | 
								SetPageHWPoison(p);
 | 
				
			||||||
| 
						 | 
					@ -1226,6 +1268,45 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int soft_offline_huge_page(struct page *page, int flags)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
						unsigned long pfn = page_to_pfn(page);
 | 
				
			||||||
 | 
						struct page *hpage = compound_head(page);
 | 
				
			||||||
 | 
						LIST_HEAD(pagelist);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = get_any_page(page, pfn, flags);
 | 
				
			||||||
 | 
						if (ret < 0)
 | 
				
			||||||
 | 
							return ret;
 | 
				
			||||||
 | 
						if (ret == 0)
 | 
				
			||||||
 | 
							goto done;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (PageHWPoison(hpage)) {
 | 
				
			||||||
 | 
							put_page(hpage);
 | 
				
			||||||
 | 
							pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn);
 | 
				
			||||||
 | 
							return -EBUSY;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Keep page count to indicate a given hugepage is isolated. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						list_add(&hpage->lru, &pagelist);
 | 
				
			||||||
 | 
						ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
 | 
				
			||||||
 | 
						if (ret) {
 | 
				
			||||||
 | 
							pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
 | 
				
			||||||
 | 
								 pfn, ret, page->flags);
 | 
				
			||||||
 | 
							if (ret > 0)
 | 
				
			||||||
 | 
								ret = -EIO;
 | 
				
			||||||
 | 
							return ret;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					done:
 | 
				
			||||||
 | 
						if (!PageHWPoison(hpage))
 | 
				
			||||||
 | 
							atomic_long_add(1 << compound_order(hpage), &mce_bad_pages);
 | 
				
			||||||
 | 
						set_page_hwpoison_huge_page(hpage);
 | 
				
			||||||
 | 
						dequeue_hwpoisoned_huge_page(hpage);
 | 
				
			||||||
 | 
						/* keep elevated page count for bad page */
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * soft_offline_page - Soft offline a page.
 | 
					 * soft_offline_page - Soft offline a page.
 | 
				
			||||||
 * @page: page to offline
 | 
					 * @page: page to offline
 | 
				
			||||||
| 
						 | 
					@ -1253,6 +1334,9 @@ int soft_offline_page(struct page *page, int flags)
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
	unsigned long pfn = page_to_pfn(page);
 | 
						unsigned long pfn = page_to_pfn(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (PageHuge(page))
 | 
				
			||||||
 | 
							return soft_offline_huge_page(page, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = get_any_page(page, pfn, flags);
 | 
						ret = get_any_page(page, pfn, flags);
 | 
				
			||||||
	if (ret < 0)
 | 
						if (ret < 0)
 | 
				
			||||||
		return ret;
 | 
							return ret;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1450,7 +1450,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 | 
				
			||||||
					if (ret & VM_FAULT_OOM)
 | 
										if (ret & VM_FAULT_OOM)
 | 
				
			||||||
						return i ? i : -ENOMEM;
 | 
											return i ? i : -ENOMEM;
 | 
				
			||||||
					if (ret &
 | 
										if (ret &
 | 
				
			||||||
					    (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
 | 
										    (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE|
 | 
				
			||||||
 | 
										     VM_FAULT_SIGBUS))
 | 
				
			||||||
						return i ? i : -EFAULT;
 | 
											return i ? i : -EFAULT;
 | 
				
			||||||
					BUG();
 | 
										BUG();
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										200
									
								
								mm/migrate.c
									
										
									
									
									
								
							
							
						
						
									
										200
									
								
								mm/migrate.c
									
										
									
									
									
								
							| 
						 | 
					@ -32,6 +32,7 @@
 | 
				
			||||||
#include <linux/security.h>
 | 
					#include <linux/security.h>
 | 
				
			||||||
#include <linux/memcontrol.h>
 | 
					#include <linux/memcontrol.h>
 | 
				
			||||||
#include <linux/syscalls.h>
 | 
					#include <linux/syscalls.h>
 | 
				
			||||||
 | 
					#include <linux/hugetlb.h>
 | 
				
			||||||
#include <linux/gfp.h>
 | 
					#include <linux/gfp.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "internal.h"
 | 
					#include "internal.h"
 | 
				
			||||||
| 
						 | 
					@ -95,6 +96,12 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 | 
				
			||||||
	pte_t *ptep, pte;
 | 
						pte_t *ptep, pte;
 | 
				
			||||||
 	spinlock_t *ptl;
 | 
					 	spinlock_t *ptl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(PageHuge(new))) {
 | 
				
			||||||
 | 
							ptep = huge_pte_offset(mm, addr);
 | 
				
			||||||
 | 
							if (!ptep)
 | 
				
			||||||
 | 
								goto out;
 | 
				
			||||||
 | 
							ptl = &mm->page_table_lock;
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
		pgd = pgd_offset(mm, addr);
 | 
							pgd = pgd_offset(mm, addr);
 | 
				
			||||||
		if (!pgd_present(*pgd))
 | 
							if (!pgd_present(*pgd))
 | 
				
			||||||
			goto out;
 | 
								goto out;
 | 
				
			||||||
| 
						 | 
					@ -115,6 +122,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ptl = pte_lockptr(mm, pmd);
 | 
							ptl = pte_lockptr(mm, pmd);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 	spin_lock(ptl);
 | 
					 	spin_lock(ptl);
 | 
				
			||||||
	pte = *ptep;
 | 
						pte = *ptep;
 | 
				
			||||||
	if (!is_swap_pte(pte))
 | 
						if (!is_swap_pte(pte))
 | 
				
			||||||
| 
						 | 
					@ -130,10 +139,19 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 | 
				
			||||||
	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
 | 
						pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
 | 
				
			||||||
	if (is_write_migration_entry(entry))
 | 
						if (is_write_migration_entry(entry))
 | 
				
			||||||
		pte = pte_mkwrite(pte);
 | 
							pte = pte_mkwrite(pte);
 | 
				
			||||||
 | 
					#ifdef CONFIG_HUGETLB_PAGE
 | 
				
			||||||
 | 
						if (PageHuge(new))
 | 
				
			||||||
 | 
							pte = pte_mkhuge(pte);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
	flush_cache_page(vma, addr, pte_pfn(pte));
 | 
						flush_cache_page(vma, addr, pte_pfn(pte));
 | 
				
			||||||
	set_pte_at(mm, addr, ptep, pte);
 | 
						set_pte_at(mm, addr, ptep, pte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (PageHuge(new)) {
 | 
				
			||||||
		if (PageAnon(new))
 | 
							if (PageAnon(new))
 | 
				
			||||||
 | 
								hugepage_add_anon_rmap(new, vma, addr);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								page_dup_rmap(new);
 | 
				
			||||||
 | 
						} else if (PageAnon(new))
 | 
				
			||||||
		page_add_anon_rmap(new, vma, addr);
 | 
							page_add_anon_rmap(new, vma, addr);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
		page_add_file_rmap(new);
 | 
							page_add_file_rmap(new);
 | 
				
			||||||
| 
						 | 
					@ -275,11 +293,59 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * The expected number of remaining references is the same as that
 | 
				
			||||||
 | 
					 * of migrate_page_move_mapping().
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int migrate_huge_page_move_mapping(struct address_space *mapping,
 | 
				
			||||||
 | 
									   struct page *newpage, struct page *page)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int expected_count;
 | 
				
			||||||
 | 
						void **pslot;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!mapping) {
 | 
				
			||||||
 | 
							if (page_count(page) != 1)
 | 
				
			||||||
 | 
								return -EAGAIN;
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_irq(&mapping->tree_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pslot = radix_tree_lookup_slot(&mapping->page_tree,
 | 
				
			||||||
 | 
										page_index(page));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						expected_count = 2 + page_has_private(page);
 | 
				
			||||||
 | 
						if (page_count(page) != expected_count ||
 | 
				
			||||||
 | 
						    (struct page *)radix_tree_deref_slot(pslot) != page) {
 | 
				
			||||||
 | 
							spin_unlock_irq(&mapping->tree_lock);
 | 
				
			||||||
 | 
							return -EAGAIN;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!page_freeze_refs(page, expected_count)) {
 | 
				
			||||||
 | 
							spin_unlock_irq(&mapping->tree_lock);
 | 
				
			||||||
 | 
							return -EAGAIN;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						get_page(newpage);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						radix_tree_replace_slot(pslot, newpage);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						page_unfreeze_refs(page, expected_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__put_page(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_unlock_irq(&mapping->tree_lock);
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copy the page to its new location
 | 
					 * Copy the page to its new location
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static void migrate_page_copy(struct page *newpage, struct page *page)
 | 
					void migrate_page_copy(struct page *newpage, struct page *page)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						if (PageHuge(page))
 | 
				
			||||||
 | 
							copy_huge_page(newpage, page);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
		copy_highpage(newpage, page);
 | 
							copy_highpage(newpage, page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (PageError(page))
 | 
						if (PageError(page))
 | 
				
			||||||
| 
						 | 
					@ -723,6 +789,92 @@ move_newpage:
 | 
				
			||||||
	return rc;
 | 
						return rc;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Counterpart of unmap_and_move_page() for hugepage migration.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This function doesn't wait the completion of hugepage I/O
 | 
				
			||||||
 | 
					 * because there is no race between I/O and migration for hugepage.
 | 
				
			||||||
 | 
					 * Note that currently hugepage I/O occurs only in direct I/O
 | 
				
			||||||
 | 
					 * where no lock is held and PG_writeback is irrelevant,
 | 
				
			||||||
 | 
					 * and writeback status of all subpages are counted in the reference
 | 
				
			||||||
 | 
					 * count of the head page (i.e. if all subpages of a 2MB hugepage are
 | 
				
			||||||
 | 
					 * under direct I/O, the reference of the head page is 512 and a bit more.)
 | 
				
			||||||
 | 
					 * This means that when we try to migrate hugepage whose subpages are
 | 
				
			||||||
 | 
					 * doing direct I/O, some references remain after try_to_unmap() and
 | 
				
			||||||
 | 
					 * hugepage migration fails without data corruption.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * There is also no race when direct I/O is issued on the page under migration,
 | 
				
			||||||
 | 
					 * because then pte is replaced with migration swap entry and direct I/O code
 | 
				
			||||||
 | 
					 * will wait in the page fault for migration to complete.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int unmap_and_move_huge_page(new_page_t get_new_page,
 | 
				
			||||||
 | 
									unsigned long private, struct page *hpage,
 | 
				
			||||||
 | 
									int force, int offlining)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int rc = 0;
 | 
				
			||||||
 | 
						int *result = NULL;
 | 
				
			||||||
 | 
						struct page *new_hpage = get_new_page(hpage, private, &result);
 | 
				
			||||||
 | 
						int rcu_locked = 0;
 | 
				
			||||||
 | 
						struct anon_vma *anon_vma = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!new_hpage)
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						rc = -EAGAIN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!trylock_page(hpage)) {
 | 
				
			||||||
 | 
							if (!force)
 | 
				
			||||||
 | 
								goto out;
 | 
				
			||||||
 | 
							lock_page(hpage);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (PageAnon(hpage)) {
 | 
				
			||||||
 | 
							rcu_read_lock();
 | 
				
			||||||
 | 
							rcu_locked = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (page_mapped(hpage)) {
 | 
				
			||||||
 | 
								anon_vma = page_anon_vma(hpage);
 | 
				
			||||||
 | 
								atomic_inc(&anon_vma->external_refcount);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!page_mapped(hpage))
 | 
				
			||||||
 | 
							rc = move_to_new_page(new_hpage, hpage, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (rc)
 | 
				
			||||||
 | 
							remove_migration_ptes(hpage, hpage);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount,
 | 
				
			||||||
 | 
										    &anon_vma->lock)) {
 | 
				
			||||||
 | 
							int empty = list_empty(&anon_vma->head);
 | 
				
			||||||
 | 
							spin_unlock(&anon_vma->lock);
 | 
				
			||||||
 | 
							if (empty)
 | 
				
			||||||
 | 
								anon_vma_free(anon_vma);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (rcu_locked)
 | 
				
			||||||
 | 
							rcu_read_unlock();
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						unlock_page(hpage);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (rc != -EAGAIN) {
 | 
				
			||||||
 | 
							list_del(&hpage->lru);
 | 
				
			||||||
 | 
							put_page(hpage);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						put_page(new_hpage);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (result) {
 | 
				
			||||||
 | 
							if (rc)
 | 
				
			||||||
 | 
								*result = rc;
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								*result = page_to_nid(new_hpage);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return rc;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * migrate_pages
 | 
					 * migrate_pages
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -788,6 +940,52 @@ out:
 | 
				
			||||||
	return nr_failed + retry;
 | 
						return nr_failed + retry;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int migrate_huge_pages(struct list_head *from,
 | 
				
			||||||
 | 
							new_page_t get_new_page, unsigned long private, int offlining)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int retry = 1;
 | 
				
			||||||
 | 
						int nr_failed = 0;
 | 
				
			||||||
 | 
						int pass = 0;
 | 
				
			||||||
 | 
						struct page *page;
 | 
				
			||||||
 | 
						struct page *page2;
 | 
				
			||||||
 | 
						int rc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (pass = 0; pass < 10 && retry; pass++) {
 | 
				
			||||||
 | 
							retry = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							list_for_each_entry_safe(page, page2, from, lru) {
 | 
				
			||||||
 | 
								cond_resched();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								rc = unmap_and_move_huge_page(get_new_page,
 | 
				
			||||||
 | 
										private, page, pass > 2, offlining);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								switch(rc) {
 | 
				
			||||||
 | 
								case -ENOMEM:
 | 
				
			||||||
 | 
									goto out;
 | 
				
			||||||
 | 
								case -EAGAIN:
 | 
				
			||||||
 | 
									retry++;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								case 0:
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								default:
 | 
				
			||||||
 | 
									/* Permanent failure */
 | 
				
			||||||
 | 
									nr_failed++;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						rc = 0;
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						list_for_each_entry_safe(page, page2, from, lru)
 | 
				
			||||||
 | 
							put_page(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (rc)
 | 
				
			||||||
 | 
							return rc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return nr_failed + retry;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_NUMA
 | 
					#ifdef CONFIG_NUMA
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Move a list of individual pages
 | 
					 * Move a list of individual pages
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										23
									
								
								mm/rmap.c
									
										
									
									
									
								
							
							
						
						
									
										23
									
								
								mm/rmap.c
									
										
									
									
									
								
							| 
						 | 
					@ -781,9 +781,9 @@ void page_move_anon_rmap(struct page *page,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * __page_set_anon_rmap - set up new anonymous rmap
 | 
					 * __page_set_anon_rmap - set up new anonymous rmap
 | 
				
			||||||
 * @page:	the page to add the mapping to
 | 
					 * @page:	Page to add to rmap	
 | 
				
			||||||
 * @vma:	the vm area in which the mapping is added
 | 
					 * @vma:	VM area to add page to.
 | 
				
			||||||
 * @address:	the user virtual address mapped
 | 
					 * @address:	User virtual address of the mapping	
 | 
				
			||||||
 * @exclusive:	the page is exclusively owned by the current process
 | 
					 * @exclusive:	the page is exclusively owned by the current process
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static void __page_set_anon_rmap(struct page *page,
 | 
					static void __page_set_anon_rmap(struct page *page,
 | 
				
			||||||
| 
						 | 
					@ -793,25 +793,16 @@ static void __page_set_anon_rmap(struct page *page,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(!anon_vma);
 | 
						BUG_ON(!anon_vma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (PageAnon(page))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * If the page isn't exclusively mapped into this vma,
 | 
						 * If the page isn't exclusively mapped into this vma,
 | 
				
			||||||
	 * we must use the _oldest_ possible anon_vma for the
 | 
						 * we must use the _oldest_ possible anon_vma for the
 | 
				
			||||||
	 * page mapping!
 | 
						 * page mapping!
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (!exclusive) {
 | 
						if (!exclusive)
 | 
				
			||||||
		if (PageAnon(page))
 | 
					 | 
				
			||||||
			return;
 | 
					 | 
				
			||||||
		anon_vma = anon_vma->root;
 | 
							anon_vma = anon_vma->root;
 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * In this case, swapped-out-but-not-discarded swap-cache
 | 
					 | 
				
			||||||
		 * is remapped. So, no need to update page->mapping here.
 | 
					 | 
				
			||||||
		 * We convice anon_vma poitned by page->mapping is not obsolete
 | 
					 | 
				
			||||||
		 * because vma->anon_vma is necessary to be a family of it.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (PageAnon(page))
 | 
					 | 
				
			||||||
			return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
 | 
						anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
 | 
				
			||||||
	page->mapping = (struct address_space *) anon_vma;
 | 
						page->mapping = (struct address_space *) anon_vma;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue