Replace the fd_sets in struct fdtable with an array of unsigned longs
Replace the fd_sets in struct fdtable with an array of unsigned longs and then
use the standard non-atomic bit operations rather than the FD_* macros.
This:
 (1) Removes the abuses of struct fd_set:
     (a) Since we don't want to allocate a full fd_set the vast majority of the
     	 time, we actually, in effect, just allocate a just-big-enough array of
     	 unsigned longs and cast it to an fd_set type - so why bother with the
     	 fd_set at all?
     (b) Some places outside of the core fdtable handling code (such as
     	 SELinux) want to look inside the array of unsigned longs hidden inside
     	 the fd_set struct for more efficient iteration over the entire set.
 (2) Eliminates the use of FD_*() macros in the kernel completely.
 (3) Permits the __FD_*() macros to be deleted entirely where not exposed to
     userspace.
Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120216174954.23314.48147.stgit@warthog.procyon.org.uk
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
	
	
This commit is contained in:
		
					parent
					
						
							
								1dce27c5aa
							
						
					
				
			
			
				commit
				
					
						1fd36adcd9
					
				
			
		
					 6 changed files with 37 additions and 47 deletions
				
			
		|  | @ -1026,10 +1026,10 @@ static void flush_old_files(struct files_struct * files) | |||
| 		fdt = files_fdtable(files); | ||||
| 		if (i >= fdt->max_fds) | ||||
| 			break; | ||||
| 		set = fdt->close_on_exec->fds_bits[j]; | ||||
| 		set = fdt->close_on_exec[j]; | ||||
| 		if (!set) | ||||
| 			continue; | ||||
| 		fdt->close_on_exec->fds_bits[j] = 0; | ||||
| 		fdt->close_on_exec[j] = 0; | ||||
| 		spin_unlock(&files->file_lock); | ||||
| 		for ( ; set ; i++,set >>= 1) { | ||||
| 			if (set & 1) { | ||||
|  |  | |||
							
								
								
									
										44
									
								
								fs/file.c
									
										
									
									
									
								
							
							
						
						
									
										44
									
								
								fs/file.c
									
										
									
									
									
								
							|  | @ -40,7 +40,7 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */ | |||
|  */ | ||||
| static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); | ||||
| 
 | ||||
| static void *alloc_fdmem(unsigned int size) | ||||
| static void *alloc_fdmem(size_t size) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * Very large allocations can stress page reclaim, so fall back to | ||||
|  | @ -142,7 +142,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) | |||
| static struct fdtable * alloc_fdtable(unsigned int nr) | ||||
| { | ||||
| 	struct fdtable *fdt; | ||||
| 	char *data; | ||||
| 	void *data; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Figure out how many fds we actually want to support in this fdtable. | ||||
|  | @ -172,14 +172,15 @@ static struct fdtable * alloc_fdtable(unsigned int nr) | |||
| 	data = alloc_fdmem(nr * sizeof(struct file *)); | ||||
| 	if (!data) | ||||
| 		goto out_fdt; | ||||
| 	fdt->fd = (struct file **)data; | ||||
| 	data = alloc_fdmem(max_t(unsigned int, | ||||
| 	fdt->fd = data; | ||||
| 
 | ||||
| 	data = alloc_fdmem(max_t(size_t, | ||||
| 				 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); | ||||
| 	if (!data) | ||||
| 		goto out_arr; | ||||
| 	fdt->open_fds = (fd_set *)data; | ||||
| 	data += nr / BITS_PER_BYTE; | ||||
| 	fdt->close_on_exec = (fd_set *)data; | ||||
| 	fdt->open_fds = data; | ||||
| 	data += nr / BITS_PER_LONG; | ||||
| 	fdt->close_on_exec = data; | ||||
| 	fdt->next = NULL; | ||||
| 
 | ||||
| 	return fdt; | ||||
|  | @ -275,11 +276,11 @@ static int count_open_files(struct fdtable *fdt) | |||
| 	int i; | ||||
| 
 | ||||
| 	/* Find the last open fd */ | ||||
| 	for (i = size/(8*sizeof(long)); i > 0; ) { | ||||
| 		if (fdt->open_fds->fds_bits[--i]) | ||||
| 	for (i = size / BITS_PER_LONG; i > 0; ) { | ||||
| 		if (fdt->open_fds[--i]) | ||||
| 			break; | ||||
| 	} | ||||
| 	i = (i+1) * 8 * sizeof(long); | ||||
| 	i = (i + 1) * BITS_PER_LONG; | ||||
| 	return i; | ||||
| } | ||||
| 
 | ||||
|  | @ -306,8 +307,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 	newf->next_fd = 0; | ||||
| 	new_fdt = &newf->fdtab; | ||||
| 	new_fdt->max_fds = NR_OPEN_DEFAULT; | ||||
| 	new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; | ||||
| 	new_fdt->open_fds = (fd_set *)&newf->open_fds_init; | ||||
| 	new_fdt->close_on_exec = newf->close_on_exec_init; | ||||
| 	new_fdt->open_fds = newf->open_fds_init; | ||||
| 	new_fdt->fd = &newf->fd_array[0]; | ||||
| 	new_fdt->next = NULL; | ||||
| 
 | ||||
|  | @ -350,10 +351,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 	old_fds = old_fdt->fd; | ||||
| 	new_fds = new_fdt->fd; | ||||
| 
 | ||||
| 	memcpy(new_fdt->open_fds->fds_bits, | ||||
| 		old_fdt->open_fds->fds_bits, open_files/8); | ||||
| 	memcpy(new_fdt->close_on_exec->fds_bits, | ||||
| 		old_fdt->close_on_exec->fds_bits, open_files/8); | ||||
| 	memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8); | ||||
| 	memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8); | ||||
| 
 | ||||
| 	for (i = open_files; i != 0; i--) { | ||||
| 		struct file *f = *old_fds++; | ||||
|  | @ -380,10 +379,10 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 
 | ||||
| 	if (new_fdt->max_fds > open_files) { | ||||
| 		int left = (new_fdt->max_fds - open_files) / 8; | ||||
| 		int start = open_files / (8 * sizeof(unsigned long)); | ||||
| 		int start = open_files / BITS_PER_LONG; | ||||
| 
 | ||||
| 		memset(&new_fdt->open_fds->fds_bits[start], 0, left); | ||||
| 		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); | ||||
| 		memset(&new_fdt->open_fds[start], 0, left); | ||||
| 		memset(&new_fdt->close_on_exec[start], 0, left); | ||||
| 	} | ||||
| 
 | ||||
| 	rcu_assign_pointer(newf->fdt, new_fdt); | ||||
|  | @ -419,8 +418,8 @@ struct files_struct init_files = { | |||
| 	.fdtab		= { | ||||
| 		.max_fds	= NR_OPEN_DEFAULT, | ||||
| 		.fd		= &init_files.fd_array[0], | ||||
| 		.close_on_exec	= (fd_set *)&init_files.close_on_exec_init, | ||||
| 		.open_fds	= (fd_set *)&init_files.open_fds_init, | ||||
| 		.close_on_exec	= init_files.close_on_exec_init, | ||||
| 		.open_fds	= init_files.open_fds_init, | ||||
| 	}, | ||||
| 	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock), | ||||
| }; | ||||
|  | @ -443,8 +442,7 @@ repeat: | |||
| 		fd = files->next_fd; | ||||
| 
 | ||||
| 	if (fd < fdt->max_fds) | ||||
| 		fd = find_next_zero_bit(fdt->open_fds->fds_bits, | ||||
| 					   fdt->max_fds, fd); | ||||
| 		fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); | ||||
| 
 | ||||
| 	error = expand_files(files, fd); | ||||
| 	if (error < 0) | ||||
|  |  | |||
|  | @ -348,7 +348,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) | |||
| 	set = ~(~0UL << (n & (__NFDBITS-1))); | ||||
| 	n /= __NFDBITS; | ||||
| 	fdt = files_fdtable(current->files); | ||||
| 	open_fds = fdt->open_fds->fds_bits+n; | ||||
| 	open_fds = fdt->open_fds + n; | ||||
| 	max = 0; | ||||
| 	if (set) { | ||||
| 		set &= BITS(fds, n); | ||||
|  |  | |||
|  | @ -21,51 +21,43 @@ | |||
|  */ | ||||
| #define NR_OPEN_DEFAULT BITS_PER_LONG | ||||
| 
 | ||||
| /*
 | ||||
|  * The embedded_fd_set is a small fd_set, | ||||
|  * suitable for most tasks (which open <= BITS_PER_LONG files) | ||||
|  */ | ||||
| struct embedded_fd_set { | ||||
| 	unsigned long fds_bits[1]; | ||||
| }; | ||||
| 
 | ||||
| struct fdtable { | ||||
| 	unsigned int max_fds; | ||||
| 	struct file __rcu **fd;      /* current fd array */ | ||||
| 	fd_set *close_on_exec; | ||||
| 	fd_set *open_fds; | ||||
| 	unsigned long *close_on_exec; | ||||
| 	unsigned long *open_fds; | ||||
| 	struct rcu_head rcu; | ||||
| 	struct fdtable *next; | ||||
| }; | ||||
| 
 | ||||
| static inline void __set_close_on_exec(int fd, struct fdtable *fdt) | ||||
| { | ||||
| 	FD_SET(fd, fdt->close_on_exec); | ||||
| 	__set_bit(fd, fdt->close_on_exec); | ||||
| } | ||||
| 
 | ||||
| static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) | ||||
| { | ||||
| 	FD_CLR(fd, fdt->close_on_exec); | ||||
| 	__clear_bit(fd, fdt->close_on_exec); | ||||
| } | ||||
| 
 | ||||
| static inline bool close_on_exec(int fd, const struct fdtable *fdt) | ||||
| { | ||||
| 	return FD_ISSET(fd, fdt->close_on_exec); | ||||
| 	return test_bit(fd, fdt->close_on_exec); | ||||
| } | ||||
| 
 | ||||
| static inline void __set_open_fd(int fd, struct fdtable *fdt) | ||||
| { | ||||
| 	FD_SET(fd, fdt->open_fds); | ||||
| 	__set_bit(fd, fdt->open_fds); | ||||
| } | ||||
| 
 | ||||
| static inline void __clear_open_fd(int fd, struct fdtable *fdt) | ||||
| { | ||||
| 	FD_CLR(fd, fdt->open_fds); | ||||
| 	__clear_bit(fd, fdt->open_fds); | ||||
| } | ||||
| 
 | ||||
| static inline bool fd_is_open(int fd, const struct fdtable *fdt) | ||||
| { | ||||
| 	return FD_ISSET(fd, fdt->open_fds); | ||||
| 	return test_bit(fd, fdt->open_fds); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -83,8 +75,8 @@ struct files_struct { | |||
|    */ | ||||
| 	spinlock_t file_lock ____cacheline_aligned_in_smp; | ||||
| 	int next_fd; | ||||
| 	struct embedded_fd_set close_on_exec_init; | ||||
| 	struct embedded_fd_set open_fds_init; | ||||
| 	unsigned long close_on_exec_init[1]; | ||||
| 	unsigned long open_fds_init[1]; | ||||
| 	struct file __rcu * fd_array[NR_OPEN_DEFAULT]; | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -473,7 +473,7 @@ static void close_files(struct files_struct * files) | |||
| 		i = j * __NFDBITS; | ||||
| 		if (i >= fdt->max_fds) | ||||
| 			break; | ||||
| 		set = fdt->open_fds->fds_bits[j++]; | ||||
| 		set = fdt->open_fds[j++]; | ||||
| 		while (set) { | ||||
| 			if (set & 1) { | ||||
| 				struct file * file = xchg(&fdt->fd[i], NULL); | ||||
|  |  | |||
|  | @ -2145,7 +2145,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, | |||
| 		fdt = files_fdtable(files); | ||||
| 		if (i >= fdt->max_fds) | ||||
| 			break; | ||||
| 		set = fdt->open_fds->fds_bits[j]; | ||||
| 		set = fdt->open_fds[j]; | ||||
| 		if (!set) | ||||
| 			continue; | ||||
| 		spin_unlock(&files->file_lock); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 David Howells
				David Howells