Instead of splitting refcount between (per-cpu) mnt_count and (SMP-only) mnt_longrefs, make all references contribute to mnt_count again and keep track of how many are longterm ones. Accounting rules for longterm count: * 1 for each fs_struct.root.mnt * 1 for each fs_struct.pwd.mnt * 1 for having non-NULL ->mnt_ns * decrement to 0 happens only under vfsmount lock exclusive That allows nice common case for mntput() - since we can't drop the final reference until after mnt_longterm has reached 0 due to the rules above, mntput() can grab vfsmount lock shared and check mnt_longterm. If it turns out to be non-zero (which is the common case), we know that this is not the final mntput() and can just blindly decrement percpu mnt_count. Otherwise we grab vfsmount lock exclusive and do usual decrement-and-check of percpu mnt_count. For fs_struct.c we have mnt_make_longterm() and mnt_make_shortterm(); namespace.c uses the latter in places where we don't already hold vfsmount lock exclusive and opencodes a few remaining spots where we need to manipulate mnt_longterm. Note that we mostly revert the code outside of fs/namespace.c back to what we used to have; in particular, normal code doesn't need to care about two kinds of references, etc. And we get to keep the optimization Nick's variant had bought us... Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
		
			
				
	
	
		
			201 lines
		
	
	
	
		
			3.9 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			201 lines
		
	
	
	
		
			3.9 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
#include <linux/module.h>
 | 
						|
#include <linux/sched.h>
 | 
						|
#include <linux/fs.h>
 | 
						|
#include <linux/path.h>
 | 
						|
#include <linux/slab.h>
 | 
						|
#include <linux/fs_struct.h>
 | 
						|
#include "internal.h"
 | 
						|
 | 
						|
static inline void path_get_longterm(struct path *path)
 | 
						|
{
 | 
						|
	path_get(path);
 | 
						|
	mnt_make_longterm(path->mnt);
 | 
						|
}
 | 
						|
 | 
						|
static inline void path_put_longterm(struct path *path)
 | 
						|
{
 | 
						|
	mnt_make_shortterm(path->mnt);
 | 
						|
	path_put(path);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
 | 
						|
 * It can block.
 | 
						|
 */
 | 
						|
void set_fs_root(struct fs_struct *fs, struct path *path)
 | 
						|
{
 | 
						|
	struct path old_root;
 | 
						|
 | 
						|
	spin_lock(&fs->lock);
 | 
						|
	write_seqcount_begin(&fs->seq);
 | 
						|
	old_root = fs->root;
 | 
						|
	fs->root = *path;
 | 
						|
	path_get_longterm(path);
 | 
						|
	write_seqcount_end(&fs->seq);
 | 
						|
	spin_unlock(&fs->lock);
 | 
						|
	if (old_root.dentry)
 | 
						|
		path_put_longterm(&old_root);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
 | 
						|
 * It can block.
 | 
						|
 */
 | 
						|
void set_fs_pwd(struct fs_struct *fs, struct path *path)
 | 
						|
{
 | 
						|
	struct path old_pwd;
 | 
						|
 | 
						|
	spin_lock(&fs->lock);
 | 
						|
	write_seqcount_begin(&fs->seq);
 | 
						|
	old_pwd = fs->pwd;
 | 
						|
	fs->pwd = *path;
 | 
						|
	path_get_longterm(path);
 | 
						|
	write_seqcount_end(&fs->seq);
 | 
						|
	spin_unlock(&fs->lock);
 | 
						|
 | 
						|
	if (old_pwd.dentry)
 | 
						|
		path_put_longterm(&old_pwd);
 | 
						|
}
 | 
						|
 | 
						|
void chroot_fs_refs(struct path *old_root, struct path *new_root)
 | 
						|
{
 | 
						|
	struct task_struct *g, *p;
 | 
						|
	struct fs_struct *fs;
 | 
						|
	int count = 0;
 | 
						|
 | 
						|
	read_lock(&tasklist_lock);
 | 
						|
	do_each_thread(g, p) {
 | 
						|
		task_lock(p);
 | 
						|
		fs = p->fs;
 | 
						|
		if (fs) {
 | 
						|
			spin_lock(&fs->lock);
 | 
						|
			write_seqcount_begin(&fs->seq);
 | 
						|
			if (fs->root.dentry == old_root->dentry
 | 
						|
			    && fs->root.mnt == old_root->mnt) {
 | 
						|
				path_get_longterm(new_root);
 | 
						|
				fs->root = *new_root;
 | 
						|
				count++;
 | 
						|
			}
 | 
						|
			if (fs->pwd.dentry == old_root->dentry
 | 
						|
			    && fs->pwd.mnt == old_root->mnt) {
 | 
						|
				path_get_longterm(new_root);
 | 
						|
				fs->pwd = *new_root;
 | 
						|
				count++;
 | 
						|
			}
 | 
						|
			write_seqcount_end(&fs->seq);
 | 
						|
			spin_unlock(&fs->lock);
 | 
						|
		}
 | 
						|
		task_unlock(p);
 | 
						|
	} while_each_thread(g, p);
 | 
						|
	read_unlock(&tasklist_lock);
 | 
						|
	while (count--)
 | 
						|
		path_put_longterm(old_root);
 | 
						|
}
 | 
						|
 | 
						|
void free_fs_struct(struct fs_struct *fs)
 | 
						|
{
 | 
						|
	path_put_longterm(&fs->root);
 | 
						|
	path_put_longterm(&fs->pwd);
 | 
						|
	kmem_cache_free(fs_cachep, fs);
 | 
						|
}
 | 
						|
 | 
						|
void exit_fs(struct task_struct *tsk)
 | 
						|
{
 | 
						|
	struct fs_struct *fs = tsk->fs;
 | 
						|
 | 
						|
	if (fs) {
 | 
						|
		int kill;
 | 
						|
		task_lock(tsk);
 | 
						|
		spin_lock(&fs->lock);
 | 
						|
		write_seqcount_begin(&fs->seq);
 | 
						|
		tsk->fs = NULL;
 | 
						|
		kill = !--fs->users;
 | 
						|
		write_seqcount_end(&fs->seq);
 | 
						|
		spin_unlock(&fs->lock);
 | 
						|
		task_unlock(tsk);
 | 
						|
		if (kill)
 | 
						|
			free_fs_struct(fs);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
struct fs_struct *copy_fs_struct(struct fs_struct *old)
 | 
						|
{
 | 
						|
	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
 | 
						|
	/* We don't need to lock fs - think why ;-) */
 | 
						|
	if (fs) {
 | 
						|
		fs->users = 1;
 | 
						|
		fs->in_exec = 0;
 | 
						|
		spin_lock_init(&fs->lock);
 | 
						|
		seqcount_init(&fs->seq);
 | 
						|
		fs->umask = old->umask;
 | 
						|
 | 
						|
		spin_lock(&old->lock);
 | 
						|
		fs->root = old->root;
 | 
						|
		path_get_longterm(&fs->root);
 | 
						|
		fs->pwd = old->pwd;
 | 
						|
		path_get_longterm(&fs->pwd);
 | 
						|
		spin_unlock(&old->lock);
 | 
						|
	}
 | 
						|
	return fs;
 | 
						|
}
 | 
						|
 | 
						|
int unshare_fs_struct(void)
 | 
						|
{
 | 
						|
	struct fs_struct *fs = current->fs;
 | 
						|
	struct fs_struct *new_fs = copy_fs_struct(fs);
 | 
						|
	int kill;
 | 
						|
 | 
						|
	if (!new_fs)
 | 
						|
		return -ENOMEM;
 | 
						|
 | 
						|
	task_lock(current);
 | 
						|
	spin_lock(&fs->lock);
 | 
						|
	kill = !--fs->users;
 | 
						|
	current->fs = new_fs;
 | 
						|
	spin_unlock(&fs->lock);
 | 
						|
	task_unlock(current);
 | 
						|
 | 
						|
	if (kill)
 | 
						|
		free_fs_struct(fs);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(unshare_fs_struct);
 | 
						|
 | 
						|
int current_umask(void)
 | 
						|
{
 | 
						|
	return current->fs->umask;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(current_umask);
 | 
						|
 | 
						|
/* to be mentioned only in INIT_TASK */
 | 
						|
struct fs_struct init_fs = {
 | 
						|
	.users		= 1,
 | 
						|
	.lock		= __SPIN_LOCK_UNLOCKED(init_fs.lock),
 | 
						|
	.seq		= SEQCNT_ZERO,
 | 
						|
	.umask		= 0022,
 | 
						|
};
 | 
						|
 | 
						|
void daemonize_fs_struct(void)
 | 
						|
{
 | 
						|
	struct fs_struct *fs = current->fs;
 | 
						|
 | 
						|
	if (fs) {
 | 
						|
		int kill;
 | 
						|
 | 
						|
		task_lock(current);
 | 
						|
 | 
						|
		spin_lock(&init_fs.lock);
 | 
						|
		init_fs.users++;
 | 
						|
		spin_unlock(&init_fs.lock);
 | 
						|
 | 
						|
		spin_lock(&fs->lock);
 | 
						|
		current->fs = &init_fs;
 | 
						|
		kill = !--fs->users;
 | 
						|
		spin_unlock(&fs->lock);
 | 
						|
 | 
						|
		task_unlock(current);
 | 
						|
		if (kill)
 | 
						|
			free_fs_struct(fs);
 | 
						|
	}
 | 
						|
}
 |