cgroup: remove extra calls to find_existing_css_set
In cgroup_attach_proc, we indirectly call find_existing_css_set 3 times. It is an expensive call so we want to call it a minimum of times. This patch only calls it once and stores the result so that it can be used later on when we call cgroup_task_migrate. This required modifying cgroup_task_migrate to take the new css_set (which we obtained from find_css_set) as a parameter. The nice side effect of this is that cgroup_task_migrate is now identical for cgroup_attach_task and cgroup_attach_proc. It also now returns a void since it can never fail. Changes in V5: * https://lkml.org/lkml/2012/1/20/344 (Tejun Heo) * Remove css_set_refs Changes in V4: * https://lkml.org/lkml/2011/12/22/421 (Li Zefan) * Avoid GFP_KERNEL (sleep) in rcu_read_lock by getting css_set in a separate loop not under an rcu_read_lock Changes in V3: * https://lkml.org/lkml/2011/12/22/13 (Li Zefan) * Fixed earlier bug by creating a seperate patch to remove tasklist_lock Changes in V2: * https://lkml.org/lkml/2011/12/20/372 (Tejun Heo) * Move find_css_set call into loop which creates the flex array * Author * Kill css_set_refs and use group_size instead * Fix an off-by-one error in counting css_set refs * Add a retval check in out_list_teardown Signed-off-by: Mandeep Singh Baines <msb@chromium.org> Acked-by: Li Zefan <lizf@cn.fujitsu.com> Signed-off-by: Tejun Heo <tj@kernel.org> Cc: containers@lists.linux-foundation.org Cc: cgroups@vger.kernel.org Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Paul Menage <paul@paulmenage.org>
This commit is contained in:
		
					parent
					
						
							
								fb5d2b4cfc
							
						
					
				
			
			
				commit
				
					
						61d1d219c4
					
				
			
		
					 1 changed files with 27 additions and 113 deletions
				
			
		
							
								
								
									
										140
									
								
								kernel/cgroup.c
									
										
									
									
									
								
							
							
						
						
									
										140
									
								
								kernel/cgroup.c
									
										
									
									
									
								
							|  | @ -1763,6 +1763,7 @@ EXPORT_SYMBOL_GPL(cgroup_path); | ||||||
| struct task_and_cgroup { | struct task_and_cgroup { | ||||||
| 	struct task_struct	*task; | 	struct task_struct	*task; | ||||||
| 	struct cgroup		*cgrp; | 	struct cgroup		*cgrp; | ||||||
|  | 	struct css_set		*cg; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct cgroup_taskset { | struct cgroup_taskset { | ||||||
|  | @ -1843,11 +1844,10 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size); | ||||||
|  * will already exist. If not set, this function might sleep, and can fail with |  * will already exist. If not set, this function might sleep, and can fail with | ||||||
|  * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked. |  * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked. | ||||||
|  */ |  */ | ||||||
| static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | ||||||
| 			       struct task_struct *tsk, bool guarantee) | 				struct task_struct *tsk, struct css_set *newcg) | ||||||
| { | { | ||||||
| 	struct css_set *oldcg; | 	struct css_set *oldcg; | ||||||
| 	struct css_set *newcg; |  | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * We are synchronized through threadgroup_lock() against PF_EXITING | 	 * We are synchronized through threadgroup_lock() against PF_EXITING | ||||||
|  | @ -1857,23 +1857,6 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | ||||||
| 	WARN_ON_ONCE(tsk->flags & PF_EXITING); | 	WARN_ON_ONCE(tsk->flags & PF_EXITING); | ||||||
| 	oldcg = tsk->cgroups; | 	oldcg = tsk->cgroups; | ||||||
| 
 | 
 | ||||||
| 	/* locate or allocate a new css_set for this task. */ |  | ||||||
| 	if (guarantee) { |  | ||||||
| 		/* we know the css_set we want already exists. */ |  | ||||||
| 		struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; |  | ||||||
| 		read_lock(&css_set_lock); |  | ||||||
| 		newcg = find_existing_css_set(oldcg, cgrp, template); |  | ||||||
| 		BUG_ON(!newcg); |  | ||||||
| 		get_css_set(newcg); |  | ||||||
| 		read_unlock(&css_set_lock); |  | ||||||
| 	} else { |  | ||||||
| 		might_sleep(); |  | ||||||
| 		/* find_css_set will give us newcg already referenced. */ |  | ||||||
| 		newcg = find_css_set(oldcg, cgrp); |  | ||||||
| 		if (!newcg) |  | ||||||
| 			return -ENOMEM; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	task_lock(tsk); | 	task_lock(tsk); | ||||||
| 	rcu_assign_pointer(tsk->cgroups, newcg); | 	rcu_assign_pointer(tsk->cgroups, newcg); | ||||||
| 	task_unlock(tsk); | 	task_unlock(tsk); | ||||||
|  | @ -1892,7 +1875,6 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | ||||||
| 	put_css_set(oldcg); | 	put_css_set(oldcg); | ||||||
| 
 | 
 | ||||||
| 	set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 	set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | ||||||
| 	return 0; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  | @ -1910,6 +1892,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | ||||||
| 	struct cgroup *oldcgrp; | 	struct cgroup *oldcgrp; | ||||||
| 	struct cgroupfs_root *root = cgrp->root; | 	struct cgroupfs_root *root = cgrp->root; | ||||||
| 	struct cgroup_taskset tset = { }; | 	struct cgroup_taskset tset = { }; | ||||||
|  | 	struct css_set *newcg; | ||||||
| 
 | 
 | ||||||
| 	/* @tsk either already exited or can't exit until the end */ | 	/* @tsk either already exited or can't exit until the end */ | ||||||
| 	if (tsk->flags & PF_EXITING) | 	if (tsk->flags & PF_EXITING) | ||||||
|  | @ -1939,9 +1922,13 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); | 	newcg = find_css_set(tsk->cgroups, cgrp); | ||||||
| 	if (retval) | 	if (!newcg) { | ||||||
|  | 		retval = -ENOMEM; | ||||||
| 		goto out; | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg); | ||||||
| 
 | 
 | ||||||
| 	for_each_subsys(root, ss) { | 	for_each_subsys(root, ss) { | ||||||
| 		if (ss->attach) | 		if (ss->attach) | ||||||
|  | @ -1997,66 +1984,6 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(cgroup_attach_task_all); | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * cgroup_attach_proc works in two stages, the first of which prefetches all |  | ||||||
|  * new css_sets needed (to make sure we have enough memory before committing |  | ||||||
|  * to the move) and stores them in a list of entries of the following type. |  | ||||||
|  * TODO: possible optimization: use css_set->rcu_head for chaining instead |  | ||||||
|  */ |  | ||||||
| struct cg_list_entry { |  | ||||||
| 	struct css_set *cg; |  | ||||||
| 	struct list_head links; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static bool css_set_check_fetched(struct cgroup *cgrp, |  | ||||||
| 				  struct task_struct *tsk, struct css_set *cg, |  | ||||||
| 				  struct list_head *newcg_list) |  | ||||||
| { |  | ||||||
| 	struct css_set *newcg; |  | ||||||
| 	struct cg_list_entry *cg_entry; |  | ||||||
| 	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; |  | ||||||
| 
 |  | ||||||
| 	read_lock(&css_set_lock); |  | ||||||
| 	newcg = find_existing_css_set(cg, cgrp, template); |  | ||||||
| 	read_unlock(&css_set_lock); |  | ||||||
| 
 |  | ||||||
| 	/* doesn't exist at all? */ |  | ||||||
| 	if (!newcg) |  | ||||||
| 		return false; |  | ||||||
| 	/* see if it's already in the list */ |  | ||||||
| 	list_for_each_entry(cg_entry, newcg_list, links) |  | ||||||
| 		if (cg_entry->cg == newcg) |  | ||||||
| 			return true; |  | ||||||
| 
 |  | ||||||
| 	/* not found */ |  | ||||||
| 	return false; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Find the new css_set and store it in the list in preparation for moving the |  | ||||||
|  * given task to the given cgroup. Returns 0 or -ENOMEM. |  | ||||||
|  */ |  | ||||||
| static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, |  | ||||||
| 			    struct list_head *newcg_list) |  | ||||||
| { |  | ||||||
| 	struct css_set *newcg; |  | ||||||
| 	struct cg_list_entry *cg_entry; |  | ||||||
| 
 |  | ||||||
| 	/* ensure a new css_set will exist for this thread */ |  | ||||||
| 	newcg = find_css_set(cg, cgrp); |  | ||||||
| 	if (!newcg) |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 	/* add it to the list */ |  | ||||||
| 	cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL); |  | ||||||
| 	if (!cg_entry) { |  | ||||||
| 		put_css_set(newcg); |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 	} |  | ||||||
| 	cg_entry->cg = newcg; |  | ||||||
| 	list_add(&cg_entry->links, newcg_list); |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /**
 | /**
 | ||||||
|  * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup |  * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup | ||||||
|  * @cgrp: the cgroup to attach to |  * @cgrp: the cgroup to attach to | ||||||
|  | @ -2070,20 +1997,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | ||||||
| 	int retval, i, group_size; | 	int retval, i, group_size; | ||||||
| 	struct cgroup_subsys *ss, *failed_ss = NULL; | 	struct cgroup_subsys *ss, *failed_ss = NULL; | ||||||
| 	/* guaranteed to be initialized later, but the compiler needs this */ | 	/* guaranteed to be initialized later, but the compiler needs this */ | ||||||
| 	struct css_set *oldcg; |  | ||||||
| 	struct cgroupfs_root *root = cgrp->root; | 	struct cgroupfs_root *root = cgrp->root; | ||||||
| 	/* threadgroup list cursor and array */ | 	/* threadgroup list cursor and array */ | ||||||
| 	struct task_struct *tsk; | 	struct task_struct *tsk; | ||||||
| 	struct task_and_cgroup *tc; | 	struct task_and_cgroup *tc; | ||||||
| 	struct flex_array *group; | 	struct flex_array *group; | ||||||
| 	struct cgroup_taskset tset = { }; | 	struct cgroup_taskset tset = { }; | ||||||
| 	/*
 |  | ||||||
| 	 * we need to make sure we have css_sets for all the tasks we're |  | ||||||
| 	 * going to move -before- we actually start moving them, so that in |  | ||||||
| 	 * case we get an ENOMEM we can bail out before making any changes. |  | ||||||
| 	 */ |  | ||||||
| 	struct list_head newcg_list; |  | ||||||
| 	struct cg_list_entry *cg_entry, *temp_nobe; |  | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * step 0: in order to do expensive, possibly blocking operations for | 	 * step 0: in order to do expensive, possibly blocking operations for | ||||||
|  | @ -2119,15 +2038,15 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | ||||||
| 
 | 
 | ||||||
| 		/* as per above, nr_threads may decrease, but not increase. */ | 		/* as per above, nr_threads may decrease, but not increase. */ | ||||||
| 		BUG_ON(i >= group_size); | 		BUG_ON(i >= group_size); | ||||||
| 		/*
 |  | ||||||
| 		 * saying GFP_ATOMIC has no effect here because we did prealloc |  | ||||||
| 		 * earlier, but it's good form to communicate our expectations. |  | ||||||
| 		 */ |  | ||||||
| 		ent.task = tsk; | 		ent.task = tsk; | ||||||
| 		ent.cgrp = task_cgroup_from_root(tsk, root); | 		ent.cgrp = task_cgroup_from_root(tsk, root); | ||||||
| 		/* nothing to do if this task is already in the cgroup */ | 		/* nothing to do if this task is already in the cgroup */ | ||||||
| 		if (ent.cgrp == cgrp) | 		if (ent.cgrp == cgrp) | ||||||
| 			continue; | 			continue; | ||||||
|  | 		/*
 | ||||||
|  | 		 * saying GFP_ATOMIC has no effect here because we did prealloc | ||||||
|  | 		 * earlier, but it's good form to communicate our expectations. | ||||||
|  | 		 */ | ||||||
| 		retval = flex_array_put(group, i, &ent, GFP_ATOMIC); | 		retval = flex_array_put(group, i, &ent, GFP_ATOMIC); | ||||||
| 		BUG_ON(retval != 0); | 		BUG_ON(retval != 0); | ||||||
| 		i++; | 		i++; | ||||||
|  | @ -2160,17 +2079,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | ||||||
| 	 * step 2: make sure css_sets exist for all threads to be migrated. | 	 * step 2: make sure css_sets exist for all threads to be migrated. | ||||||
| 	 * we use find_css_set, which allocates a new one if necessary. | 	 * we use find_css_set, which allocates a new one if necessary. | ||||||
| 	 */ | 	 */ | ||||||
| 	INIT_LIST_HEAD(&newcg_list); |  | ||||||
| 	for (i = 0; i < group_size; i++) { | 	for (i = 0; i < group_size; i++) { | ||||||
| 		tc = flex_array_get(group, i); | 		tc = flex_array_get(group, i); | ||||||
| 		oldcg = tc->task->cgroups; | 		tc->cg = find_css_set(tc->task->cgroups, cgrp); | ||||||
| 
 | 		if (!tc->cg) { | ||||||
| 		/* if we don't already have it in the list get a new one */ | 			retval = -ENOMEM; | ||||||
| 		if (!css_set_check_fetched(cgrp, tc->task, oldcg, | 			goto out_put_css_set_refs; | ||||||
| 					   &newcg_list)) { |  | ||||||
| 			retval = css_set_prefetch(cgrp, oldcg, &newcg_list); |  | ||||||
| 			if (retval) |  | ||||||
| 				goto out_list_teardown; |  | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -2181,8 +2095,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | ||||||
| 	 */ | 	 */ | ||||||
| 	for (i = 0; i < group_size; i++) { | 	for (i = 0; i < group_size; i++) { | ||||||
| 		tc = flex_array_get(group, i); | 		tc = flex_array_get(group, i); | ||||||
| 		retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true); | 		cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg); | ||||||
| 		BUG_ON(retval); |  | ||||||
| 	} | 	} | ||||||
| 	/* nothing is sensitive to fork() after this point. */ | 	/* nothing is sensitive to fork() after this point. */ | ||||||
| 
 | 
 | ||||||
|  | @ -2200,15 +2113,16 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | ||||||
| 	synchronize_rcu(); | 	synchronize_rcu(); | ||||||
| 	cgroup_wakeup_rmdir_waiter(cgrp); | 	cgroup_wakeup_rmdir_waiter(cgrp); | ||||||
| 	retval = 0; | 	retval = 0; | ||||||
| out_list_teardown: | out_put_css_set_refs: | ||||||
| 	/* clean up the list of prefetched css_sets. */ | 	if (retval) { | ||||||
| 	list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) { | 		for (i = 0; i < group_size; i++) { | ||||||
| 		list_del(&cg_entry->links); | 			tc = flex_array_get(group, i); | ||||||
| 		put_css_set(cg_entry->cg); | 			if (!tc->cg) | ||||||
| 		kfree(cg_entry); | 				break; | ||||||
|  | 			put_css_set(tc->cg); | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| out_cancel_attach: | out_cancel_attach: | ||||||
| 	/* same deal as in cgroup_attach_task */ |  | ||||||
| 	if (retval) { | 	if (retval) { | ||||||
| 		for_each_subsys(root, ss) { | 		for_each_subsys(root, ss) { | ||||||
| 			if (ss == failed_ss) | 			if (ss == failed_ss) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Mandeep Singh Baines
				Mandeep Singh Baines