mm: add FOLL_MLOCK follow_page flag.
Move the code to mlock pages from __mlock_vma_pages_range() to follow_page(). This allows __mlock_vma_pages_range() to not have to break down work into 16-page batches. An additional motivation for doing this within the present patch series is that it'll make it easier for a later chagne to drop mmap_sem when blocking on disk (we'd like to be able to resume at the page that was read from disk instead of at the start of a 16-page batch). Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
					parent
					
						
							
								fed067da46
							
						
					
				
			
			
				commit
				
					
						110d74a921
					
				
			
		
					 3 changed files with 28 additions and 60 deletions
				
			
		|  | @ -1415,6 +1415,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, | |||
| #define FOLL_GET	0x04	/* do get_page on page */ | ||||
| #define FOLL_DUMP	0x08	/* give error on hole if it would be zero */ | ||||
| #define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */ | ||||
| #define FOLL_MLOCK	0x40	/* mark page as mlocked */ | ||||
| 
 | ||||
| typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, | ||||
| 			void *data); | ||||
|  |  | |||
							
								
								
									
										22
									
								
								mm/memory.c
									
										
									
									
									
								
							
							
						
						
									
										22
									
								
								mm/memory.c
									
										
									
									
									
								
							|  | @ -1310,6 +1310,28 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, | |||
| 		 */ | ||||
| 		mark_page_accessed(page); | ||||
| 	} | ||||
| 	if (flags & FOLL_MLOCK) { | ||||
| 		/*
 | ||||
| 		 * The preliminary mapping check is mainly to avoid the | ||||
| 		 * pointless overhead of lock_page on the ZERO_PAGE | ||||
| 		 * which might bounce very badly if there is contention. | ||||
| 		 * | ||||
| 		 * If the page is already locked, we don't need to | ||||
| 		 * handle it now - vmscan will handle it later if and | ||||
| 		 * when it attempts to reclaim the page. | ||||
| 		 */ | ||||
| 		if (page->mapping && trylock_page(page)) { | ||||
| 			lru_add_drain();  /* push cached pages to LRU */ | ||||
| 			/*
 | ||||
| 			 * Because we lock page here and migration is | ||||
| 			 * blocked by the pte's page reference, we need | ||||
| 			 * only check for file-cache page truncation. | ||||
| 			 */ | ||||
| 			if (page->mapping) | ||||
| 				mlock_vma_page(page); | ||||
| 			unlock_page(page); | ||||
| 		} | ||||
| 	} | ||||
| unlock: | ||||
| 	pte_unmap_unlock(ptep, ptl); | ||||
| out: | ||||
|  |  | |||
							
								
								
									
										65
									
								
								mm/mlock.c
									
										
									
									
									
								
							
							
						
						
									
										65
									
								
								mm/mlock.c
									
										
									
									
									
								
							|  | @ -159,10 +159,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
| { | ||||
| 	struct mm_struct *mm = vma->vm_mm; | ||||
| 	unsigned long addr = start; | ||||
| 	struct page *pages[16]; /* 16 gives a reasonable batch */ | ||||
| 	int nr_pages = (end - start) / PAGE_SIZE; | ||||
| 	int ret = 0; | ||||
| 	int gup_flags; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	VM_BUG_ON(start & ~PAGE_MASK); | ||||
| 	VM_BUG_ON(end   & ~PAGE_MASK); | ||||
|  | @ -170,7 +169,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
| 	VM_BUG_ON(end   > vma->vm_end); | ||||
| 	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); | ||||
| 
 | ||||
| 	gup_flags = FOLL_TOUCH | FOLL_GET; | ||||
| 	gup_flags = FOLL_TOUCH | FOLL_MLOCK; | ||||
| 	/*
 | ||||
| 	 * We want to touch writable mappings with a write fault in order | ||||
| 	 * to break COW, except for shared mappings because these don't COW | ||||
|  | @ -185,63 +184,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
| 		nr_pages--; | ||||
| 	} | ||||
| 
 | ||||
| 	while (nr_pages > 0) { | ||||
| 		int i; | ||||
| 
 | ||||
| 		cond_resched(); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * get_user_pages makes pages present if we are | ||||
| 		 * setting mlock. and this extra reference count will | ||||
| 		 * disable migration of this page.  However, page may | ||||
| 		 * still be truncated out from under us. | ||||
| 		 */ | ||||
| 		ret = __get_user_pages(current, mm, addr, | ||||
| 				min_t(int, nr_pages, ARRAY_SIZE(pages)), | ||||
| 				gup_flags, pages, NULL); | ||||
| 		/*
 | ||||
| 		 * This can happen for, e.g., VM_NONLINEAR regions before | ||||
| 		 * a page has been allocated and mapped at a given offset, | ||||
| 		 * or for addresses that map beyond end of a file. | ||||
| 		 * We'll mlock the pages if/when they get faulted in. | ||||
| 		 */ | ||||
| 		if (ret < 0) | ||||
| 			break; | ||||
| 
 | ||||
| 		lru_add_drain();	/* push cached pages to LRU */ | ||||
| 
 | ||||
| 		for (i = 0; i < ret; i++) { | ||||
| 			struct page *page = pages[i]; | ||||
| 
 | ||||
| 			if (page->mapping) { | ||||
| 				/*
 | ||||
| 				 * That preliminary check is mainly to avoid | ||||
| 				 * the pointless overhead of lock_page on the | ||||
| 				 * ZERO_PAGE: which might bounce very badly if | ||||
| 				 * there is contention.  However, we're still | ||||
| 				 * dirtying its cacheline with get/put_page: | ||||
| 				 * we'll add another __get_user_pages flag to | ||||
| 				 * avoid it if that case turns out to matter. | ||||
| 				 */ | ||||
| 				lock_page(page); | ||||
| 				/*
 | ||||
| 				 * Because we lock page here and migration is | ||||
| 				 * blocked by the elevated reference, we need | ||||
| 				 * only check for file-cache page truncation. | ||||
| 				 */ | ||||
| 				if (page->mapping) | ||||
| 					mlock_vma_page(page); | ||||
| 				unlock_page(page); | ||||
| 			} | ||||
| 			put_page(page);	/* ref from get_user_pages() */ | ||||
| 		} | ||||
| 
 | ||||
| 		addr += ret * PAGE_SIZE; | ||||
| 		nr_pages -= ret; | ||||
| 		ret = 0; | ||||
| 	} | ||||
| 
 | ||||
| 	return ret;	/* 0 or negative error code */ | ||||
| 	ret = __get_user_pages(current, mm, addr, nr_pages, gup_flags, | ||||
| 			       NULL, NULL); | ||||
| 	return max(ret, 0);	/* 0 or negative error code */ | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Michel Lespinasse
				Michel Lespinasse