hugetlbfs: per mount huge page sizes
Add the ability to configure the hugetlb hstate used on a per mount basis. - Add a new pagesize= option to the hugetlbfs mount that allows setting the page size - This option causes the mount code to find the hstate corresponding to the specified size, and sets up a pointer to the hstate in the mount's superblock. - Change the hstate accessors to use this information rather than the global_hstate they were using (requires a slight change in mm/memory.c so we don't NULL deref in the error-unmap path -- see comments). [np: take hstate out of hugetlbfs inode and vma->vm_private_data] Acked-by: Adam Litke <agl@us.ibm.com> Acked-by: Nishanth Aravamudan <nacc@us.ibm.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
					parent
					
						
							
								e5ff215941
							
						
					
				
			
			
				commit
				
					
						a137e1cc6d
					
				
			
		
					 4 changed files with 64 additions and 29 deletions
				
			
		|  | @ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group; | ||||||
| enum { | enum { | ||||||
| 	Opt_size, Opt_nr_inodes, | 	Opt_size, Opt_nr_inodes, | ||||||
| 	Opt_mode, Opt_uid, Opt_gid, | 	Opt_mode, Opt_uid, Opt_gid, | ||||||
|  | 	Opt_pagesize, | ||||||
| 	Opt_err, | 	Opt_err, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -62,6 +63,7 @@ static match_table_t tokens = { | ||||||
| 	{Opt_mode,	"mode=%o"}, | 	{Opt_mode,	"mode=%o"}, | ||||||
| 	{Opt_uid,	"uid=%u"}, | 	{Opt_uid,	"uid=%u"}, | ||||||
| 	{Opt_gid,	"gid=%u"}, | 	{Opt_gid,	"gid=%u"}, | ||||||
|  | 	{Opt_pagesize,	"pagesize=%s"}, | ||||||
| 	{Opt_err,	NULL}, | 	{Opt_err,	NULL}, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | ||||||
| 	char *p, *rest; | 	char *p, *rest; | ||||||
| 	substring_t args[MAX_OPT_ARGS]; | 	substring_t args[MAX_OPT_ARGS]; | ||||||
| 	int option; | 	int option; | ||||||
|  | 	unsigned long long size = 0; | ||||||
|  | 	enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE; | ||||||
| 
 | 
 | ||||||
| 	if (!options) | 	if (!options) | ||||||
| 		return 0; | 		return 0; | ||||||
|  | @ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | ||||||
| 			break; | 			break; | ||||||
| 
 | 
 | ||||||
| 		case Opt_size: { | 		case Opt_size: { | ||||||
|  			unsigned long long size; |  | ||||||
| 			/* memparse() will accept a K/M/G without a digit */ | 			/* memparse() will accept a K/M/G without a digit */ | ||||||
| 			if (!isdigit(*args[0].from)) | 			if (!isdigit(*args[0].from)) | ||||||
| 				goto bad_val; | 				goto bad_val; | ||||||
| 			size = memparse(args[0].from, &rest); | 			size = memparse(args[0].from, &rest); | ||||||
| 			if (*rest == '%') { | 			setsize = SIZE_STD; | ||||||
| 				size <<= HPAGE_SHIFT; | 			if (*rest == '%') | ||||||
| 				size *= max_huge_pages; | 				setsize = SIZE_PERCENT; | ||||||
| 				do_div(size, 100); |  | ||||||
| 			} |  | ||||||
| 			pconfig->nr_blocks = (size >> HPAGE_SHIFT); |  | ||||||
| 			break; | 			break; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | @ -801,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | ||||||
| 			pconfig->nr_inodes = memparse(args[0].from, &rest); | 			pconfig->nr_inodes = memparse(args[0].from, &rest); | ||||||
| 			break; | 			break; | ||||||
| 
 | 
 | ||||||
|  | 		case Opt_pagesize: { | ||||||
|  | 			unsigned long ps; | ||||||
|  | 			ps = memparse(args[0].from, &rest); | ||||||
|  | 			pconfig->hstate = size_to_hstate(ps); | ||||||
|  | 			if (!pconfig->hstate) { | ||||||
|  | 				printk(KERN_ERR | ||||||
|  | 				"hugetlbfs: Unsupported page size %lu MB\n", | ||||||
|  | 					ps >> 20); | ||||||
|  | 				return -EINVAL; | ||||||
|  | 			} | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		default: | 		default: | ||||||
| 			printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", | 			printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", | ||||||
| 				 p); | 				 p); | ||||||
|  | @ -808,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | ||||||
| 			break; | 			break; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	/* Do size after hstate is set up */ | ||||||
|  | 	if (setsize > NO_SIZE) { | ||||||
|  | 		struct hstate *h = pconfig->hstate; | ||||||
|  | 		if (setsize == SIZE_PERCENT) { | ||||||
|  | 			size <<= huge_page_shift(h); | ||||||
|  | 			size *= h->max_huge_pages; | ||||||
|  | 			do_div(size, 100); | ||||||
|  | 		} | ||||||
|  | 		pconfig->nr_blocks = (size >> huge_page_shift(h)); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| 
 | 
 | ||||||
| bad_val: | bad_val: | ||||||
|  | @ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) | ||||||
| 	config.uid = current->fsuid; | 	config.uid = current->fsuid; | ||||||
| 	config.gid = current->fsgid; | 	config.gid = current->fsgid; | ||||||
| 	config.mode = 0755; | 	config.mode = 0755; | ||||||
|  | 	config.hstate = &default_hstate; | ||||||
| 	ret = hugetlbfs_parse_options(data, &config); | 	ret = hugetlbfs_parse_options(data, &config); | ||||||
| 	if (ret) | 	if (ret) | ||||||
| 		return ret; | 		return ret; | ||||||
|  | @ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) | ||||||
| 	if (!sbinfo) | 	if (!sbinfo) | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 	sb->s_fs_info = sbinfo; | 	sb->s_fs_info = sbinfo; | ||||||
|  | 	sbinfo->hstate = config.hstate; | ||||||
| 	spin_lock_init(&sbinfo->stat_lock); | 	spin_lock_init(&sbinfo->stat_lock); | ||||||
| 	sbinfo->max_blocks = config.nr_blocks; | 	sbinfo->max_blocks = config.nr_blocks; | ||||||
| 	sbinfo->free_blocks = config.nr_blocks; | 	sbinfo->free_blocks = config.nr_blocks; | ||||||
| 	sbinfo->max_inodes = config.nr_inodes; | 	sbinfo->max_inodes = config.nr_inodes; | ||||||
| 	sbinfo->free_inodes = config.nr_inodes; | 	sbinfo->free_inodes = config.nr_inodes; | ||||||
| 	sb->s_maxbytes = MAX_LFS_FILESIZE; | 	sb->s_maxbytes = MAX_LFS_FILESIZE; | ||||||
| 	sb->s_blocksize = HPAGE_SIZE; | 	sb->s_blocksize = huge_page_size(config.hstate); | ||||||
| 	sb->s_blocksize_bits = HPAGE_SHIFT; | 	sb->s_blocksize_bits = huge_page_shift(config.hstate); | ||||||
| 	sb->s_magic = HUGETLBFS_MAGIC; | 	sb->s_magic = HUGETLBFS_MAGIC; | ||||||
| 	sb->s_op = &hugetlbfs_ops; | 	sb->s_op = &hugetlbfs_ops; | ||||||
| 	sb->s_time_gran = 1; | 	sb->s_time_gran = 1; | ||||||
|  |  | ||||||
|  | @ -100,6 +100,7 @@ struct hugetlbfs_config { | ||||||
| 	umode_t mode; | 	umode_t mode; | ||||||
| 	long	nr_blocks; | 	long	nr_blocks; | ||||||
| 	long	nr_inodes; | 	long	nr_inodes; | ||||||
|  | 	struct hstate *hstate; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct hugetlbfs_sb_info { | struct hugetlbfs_sb_info { | ||||||
|  | @ -108,6 +109,7 @@ struct hugetlbfs_sb_info { | ||||||
| 	long	max_inodes;   /* inodes allowed */ | 	long	max_inodes;   /* inodes allowed */ | ||||||
| 	long	free_inodes;  /* inodes free */ | 	long	free_inodes;  /* inodes free */ | ||||||
| 	spinlock_t	stat_lock; | 	spinlock_t	stat_lock; | ||||||
|  | 	struct hstate *hstate; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -191,19 +193,21 @@ extern unsigned int default_hstate_idx; | ||||||
| 
 | 
 | ||||||
| #define default_hstate (hstates[default_hstate_idx]) | #define default_hstate (hstates[default_hstate_idx]) | ||||||
| 
 | 
 | ||||||
| static inline struct hstate *hstate_vma(struct vm_area_struct *vma) | static inline struct hstate *hstate_inode(struct inode *i) | ||||||
| { | { | ||||||
| 	return &default_hstate; | 	struct hugetlbfs_sb_info *hsb; | ||||||
|  | 	hsb = HUGETLBFS_SB(i->i_sb); | ||||||
|  | 	return hsb->hstate; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline struct hstate *hstate_file(struct file *f) | static inline struct hstate *hstate_file(struct file *f) | ||||||
| { | { | ||||||
| 	return &default_hstate; | 	return hstate_inode(f->f_dentry->d_inode); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline struct hstate *hstate_inode(struct inode *i) | static inline struct hstate *hstate_vma(struct vm_area_struct *vma) | ||||||
| { | { | ||||||
| 	return &default_hstate; | 	return hstate_file(vma->vm_file); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline unsigned long huge_page_size(struct hstate *h) | static inline unsigned long huge_page_size(struct hstate *h) | ||||||
|  |  | ||||||
							
								
								
									
										10
									
								
								mm/hugetlb.c
									
										
									
									
									
								
							
							
						
						
									
										10
									
								
								mm/hugetlb.c
									
										
									
									
									
								
							|  | @ -1439,19 +1439,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | ||||||
| void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | ||||||
| 			  unsigned long end, struct page *ref_page) | 			  unsigned long end, struct page *ref_page) | ||||||
| { | { | ||||||
| 	/*
 |  | ||||||
| 	 * It is undesirable to test vma->vm_file as it should be non-null |  | ||||||
| 	 * for valid hugetlb area. However, vm_file will be NULL in the error |  | ||||||
| 	 * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails, |  | ||||||
| 	 * do_mmap_pgoff() nullifies vma->vm_file before calling this function |  | ||||||
| 	 * to clean up. Since no pte has actually been setup, it is safe to |  | ||||||
| 	 * do nothing in this case. |  | ||||||
| 	 */ |  | ||||||
| 	if (vma->vm_file) { |  | ||||||
| 	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); | 	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); | ||||||
| 	__unmap_hugepage_range(vma, start, end, ref_page); | 	__unmap_hugepage_range(vma, start, end, ref_page); | ||||||
| 	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); | 	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); | ||||||
| 	} |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  |  | ||||||
							
								
								
									
										14
									
								
								mm/memory.c
									
										
									
									
									
								
							
							
						
						
									
										14
									
								
								mm/memory.c
									
										
									
									
									
								
							|  | @ -901,9 +901,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			if (unlikely(is_vm_hugetlb_page(vma))) { | 			if (unlikely(is_vm_hugetlb_page(vma))) { | ||||||
|  | 				/*
 | ||||||
|  | 				 * It is undesirable to test vma->vm_file as it | ||||||
|  | 				 * should be non-null for valid hugetlb area. | ||||||
|  | 				 * However, vm_file will be NULL in the error | ||||||
|  | 				 * cleanup path of do_mmap_pgoff. When | ||||||
|  | 				 * hugetlbfs ->mmap method fails, | ||||||
|  | 				 * do_mmap_pgoff() nullifies vma->vm_file | ||||||
|  | 				 * before calling this function to clean up. | ||||||
|  | 				 * Since no pte has actually been setup, it is | ||||||
|  | 				 * safe to do nothing in this case. | ||||||
|  | 				 */ | ||||||
|  | 				if (vma->vm_file) { | ||||||
| 					unmap_hugepage_range(vma, start, end, NULL); | 					unmap_hugepage_range(vma, start, end, NULL); | ||||||
| 					zap_work -= (end - start) / | 					zap_work -= (end - start) / | ||||||
| 					pages_per_huge_page(hstate_vma(vma)); | 					pages_per_huge_page(hstate_vma(vma)); | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
| 				start = end; | 				start = end; | ||||||
| 			} else | 			} else | ||||||
| 				start = unmap_page_range(*tlbp, vma, | 				start = unmap_page_range(*tlbp, vma, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Andi Kleen
				Andi Kleen