cpuset: replace cpuset->stack_list with cpuset_for_each_descendant_pre()
Implement cpuset_for_each_descendant_pre() and replace the cpuset-specific tree walking using cpuset->stack_list with it. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Michal Hocko <mhocko@suse.cz> Acked-by: Li Zefan <lizefan@huawei.com>
This commit is contained in:
		
					parent
					
						
							
								5d21cc2db0
							
						
					
				
			
			
				commit
				
					
						fc560a26ac
					
				
			
		
					 1 changed files with 48 additions and 75 deletions
				
			
		
							
								
								
									
										123
									
								
								kernel/cpuset.c
									
										
									
									
									
								
							
							
						
						
									
										123
									
								
								kernel/cpuset.c
									
										
									
									
									
								
							|  | @ -103,9 +103,6 @@ struct cpuset { | ||||||
| 	/* for custom sched domain */ | 	/* for custom sched domain */ | ||||||
| 	int relax_domain_level; | 	int relax_domain_level; | ||||||
| 
 | 
 | ||||||
| 	/* used for walking a cpuset hierarchy */ |  | ||||||
| 	struct list_head stack_list; |  | ||||||
| 
 |  | ||||||
| 	struct work_struct hotplug_work; | 	struct work_struct hotplug_work; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -207,6 +204,20 @@ static struct cpuset top_cpuset = { | ||||||
| 	cgroup_for_each_child((pos_cgrp), (parent_cs)->css.cgroup)	\ | 	cgroup_for_each_child((pos_cgrp), (parent_cs)->css.cgroup)	\ | ||||||
| 		if (is_cpuset_online(((child_cs) = cgroup_cs((pos_cgrp))))) | 		if (is_cpuset_online(((child_cs) = cgroup_cs((pos_cgrp))))) | ||||||
| 
 | 
 | ||||||
|  | /**
 | ||||||
|  |  * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants | ||||||
|  |  * @des_cs: loop cursor pointing to the current descendant | ||||||
|  |  * @pos_cgrp: used for iteration | ||||||
|  |  * @root_cs: target cpuset to walk ancestor of | ||||||
|  |  * | ||||||
|  |  * Walk @des_cs through the online descendants of @root_cs.  Must be used | ||||||
|  |  * with RCU read locked.  The caller may modify @pos_cgrp by calling | ||||||
|  |  * cgroup_rightmost_descendant() to skip subtree. | ||||||
|  |  */ | ||||||
|  | #define cpuset_for_each_descendant_pre(des_cs, pos_cgrp, root_cs)	\ | ||||||
|  | 	cgroup_for_each_descendant_pre((pos_cgrp), (root_cs)->css.cgroup) \ | ||||||
|  | 		if (is_cpuset_online(((des_cs) = cgroup_cs((pos_cgrp))))) | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * There are two global mutexes guarding cpuset structures - cpuset_mutex |  * There are two global mutexes guarding cpuset structures - cpuset_mutex | ||||||
|  * and callback_mutex.  The latter may nest inside the former.  We also |  * and callback_mutex.  The latter may nest inside the former.  We also | ||||||
|  | @ -507,31 +518,24 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | ||||||
| 	return; | 	return; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void | static void update_domain_attr_tree(struct sched_domain_attr *dattr, | ||||||
| update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | 				    struct cpuset *root_cs) | ||||||
| { | { | ||||||
| 	LIST_HEAD(q); | 	struct cpuset *cp; | ||||||
|  | 	struct cgroup *pos_cgrp; | ||||||
| 
 | 
 | ||||||
| 	list_add(&c->stack_list, &q); | 	rcu_read_lock(); | ||||||
| 	while (!list_empty(&q)) { | 	cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) { | ||||||
| 		struct cpuset *cp; | 		/* skip the whole subtree if @cp doesn't have any CPU */ | ||||||
| 		struct cgroup *cont; | 		if (cpumask_empty(cp->cpus_allowed)) { | ||||||
| 		struct cpuset *child; | 			pos_cgrp = cgroup_rightmost_descendant(pos_cgrp); | ||||||
| 
 |  | ||||||
| 		cp = list_first_entry(&q, struct cpuset, stack_list); |  | ||||||
| 		list_del(q.next); |  | ||||||
| 
 |  | ||||||
| 		if (cpumask_empty(cp->cpus_allowed)) |  | ||||||
| 			continue; | 			continue; | ||||||
|  | 		} | ||||||
| 
 | 
 | ||||||
| 		if (is_sched_load_balance(cp)) | 		if (is_sched_load_balance(cp)) | ||||||
| 			update_domain_attr(dattr, cp); | 			update_domain_attr(dattr, cp); | ||||||
| 
 |  | ||||||
| 		rcu_read_lock(); |  | ||||||
| 		cpuset_for_each_child(child, cont, cp) |  | ||||||
| 			list_add_tail(&child->stack_list, &q); |  | ||||||
| 		rcu_read_unlock(); |  | ||||||
| 	} | 	} | ||||||
|  | 	rcu_read_unlock(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -591,7 +595,6 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | ||||||
| static int generate_sched_domains(cpumask_var_t **domains, | static int generate_sched_domains(cpumask_var_t **domains, | ||||||
| 			struct sched_domain_attr **attributes) | 			struct sched_domain_attr **attributes) | ||||||
| { | { | ||||||
| 	LIST_HEAD(q);		/* queue of cpusets to be scanned */ |  | ||||||
| 	struct cpuset *cp;	/* scans q */ | 	struct cpuset *cp;	/* scans q */ | ||||||
| 	struct cpuset **csa;	/* array of all cpuset ptrs */ | 	struct cpuset **csa;	/* array of all cpuset ptrs */ | ||||||
| 	int csn;		/* how many cpuset ptrs in csa so far */ | 	int csn;		/* how many cpuset ptrs in csa so far */ | ||||||
|  | @ -600,6 +603,7 @@ static int generate_sched_domains(cpumask_var_t **domains, | ||||||
| 	struct sched_domain_attr *dattr;  /* attributes for custom domains */ | 	struct sched_domain_attr *dattr;  /* attributes for custom domains */ | ||||||
| 	int ndoms = 0;		/* number of sched domains in result */ | 	int ndoms = 0;		/* number of sched domains in result */ | ||||||
| 	int nslot;		/* next empty doms[] struct cpumask slot */ | 	int nslot;		/* next empty doms[] struct cpumask slot */ | ||||||
|  | 	struct cgroup *pos_cgrp; | ||||||
| 
 | 
 | ||||||
| 	doms = NULL; | 	doms = NULL; | ||||||
| 	dattr = NULL; | 	dattr = NULL; | ||||||
|  | @ -627,33 +631,27 @@ static int generate_sched_domains(cpumask_var_t **domains, | ||||||
| 		goto done; | 		goto done; | ||||||
| 	csn = 0; | 	csn = 0; | ||||||
| 
 | 
 | ||||||
| 	list_add(&top_cpuset.stack_list, &q); | 	rcu_read_lock(); | ||||||
| 	while (!list_empty(&q)) { | 	cpuset_for_each_descendant_pre(cp, pos_cgrp, &top_cpuset) { | ||||||
| 		struct cgroup *cont; |  | ||||||
| 		struct cpuset *child;   /* scans child cpusets of cp */ |  | ||||||
| 
 |  | ||||||
| 		cp = list_first_entry(&q, struct cpuset, stack_list); |  | ||||||
| 		list_del(q.next); |  | ||||||
| 
 |  | ||||||
| 		if (cpumask_empty(cp->cpus_allowed)) |  | ||||||
| 			continue; |  | ||||||
| 
 |  | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * All child cpusets contain a subset of the parent's cpus, so | 		 * Continue traversing beyond @cp iff @cp has some CPUs and | ||||||
| 		 * just skip them, and then we call update_domain_attr_tree() | 		 * isn't load balancing.  The former is obvious.  The | ||||||
| 		 * to calc relax_domain_level of the corresponding sched | 		 * latter: All child cpusets contain a subset of the | ||||||
| 		 * domain. | 		 * parent's cpus, so just skip them, and then we call | ||||||
|  | 		 * update_domain_attr_tree() to calc relax_domain_level of | ||||||
|  | 		 * the corresponding sched domain. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (is_sched_load_balance(cp)) { | 		if (!cpumask_empty(cp->cpus_allowed) && | ||||||
| 			csa[csn++] = cp; | 		    !is_sched_load_balance(cp)) | ||||||
| 			continue; | 			continue; | ||||||
| 		} |  | ||||||
| 
 | 
 | ||||||
| 		rcu_read_lock(); | 		if (is_sched_load_balance(cp)) | ||||||
| 		cpuset_for_each_child(child, cont, cp) | 			csa[csn++] = cp; | ||||||
| 			list_add_tail(&child->stack_list, &q); | 
 | ||||||
| 		rcu_read_unlock(); | 		/* skip @cp's subtree */ | ||||||
|   	} | 		pos_cgrp = cgroup_rightmost_descendant(pos_cgrp); | ||||||
|  | 	} | ||||||
|  | 	rcu_read_unlock(); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < csn; i++) | 	for (i = 0; i < csn; i++) | ||||||
| 		csa[i]->pn = i; | 		csa[i]->pn = i; | ||||||
|  | @ -2068,31 +2066,6 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | ||||||
| 	move_member_tasks_to_cpuset(cs, parent); | 	move_member_tasks_to_cpuset(cs, parent); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * Helper function to traverse cpusets. |  | ||||||
|  * It can be used to walk the cpuset tree from top to bottom, completing |  | ||||||
|  * one layer before dropping down to the next (thus always processing a |  | ||||||
|  * node before any of its children). |  | ||||||
|  */ |  | ||||||
| static struct cpuset *cpuset_next(struct list_head *queue) |  | ||||||
| { |  | ||||||
| 	struct cpuset *cp; |  | ||||||
| 	struct cpuset *child;	/* scans child cpusets of cp */ |  | ||||||
| 	struct cgroup *cont; |  | ||||||
| 
 |  | ||||||
| 	if (list_empty(queue)) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| 	cp = list_first_entry(queue, struct cpuset, stack_list); |  | ||||||
| 	list_del(queue->next); |  | ||||||
| 	rcu_read_lock(); |  | ||||||
| 	cpuset_for_each_child(child, cont, cp) |  | ||||||
| 		list_add_tail(&child->stack_list, queue); |  | ||||||
| 	rcu_read_unlock(); |  | ||||||
| 
 |  | ||||||
| 	return cp; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /**
 | /**
 | ||||||
|  * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset |  * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset | ||||||
|  * @cs: cpuset in interest |  * @cs: cpuset in interest | ||||||
|  | @ -2229,12 +2202,12 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | ||||||
| 	/* if cpus or mems went down, we need to propagate to descendants */ | 	/* if cpus or mems went down, we need to propagate to descendants */ | ||||||
| 	if (cpus_offlined || mems_offlined) { | 	if (cpus_offlined || mems_offlined) { | ||||||
| 		struct cpuset *cs; | 		struct cpuset *cs; | ||||||
| 		LIST_HEAD(queue); | 		struct cgroup *pos_cgrp; | ||||||
| 
 | 
 | ||||||
| 		list_add_tail(&top_cpuset.stack_list, &queue); | 		rcu_read_lock(); | ||||||
| 		while ((cs = cpuset_next(&queue))) | 		cpuset_for_each_descendant_pre(cs, pos_cgrp, &top_cpuset) | ||||||
| 			if (cs != &top_cpuset) | 			schedule_cpuset_propagate_hotplug(cs); | ||||||
| 				schedule_cpuset_propagate_hotplug(cs); | 		rcu_read_unlock(); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	mutex_unlock(&cpuset_mutex); | 	mutex_unlock(&cpuset_mutex); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Tejun Heo
				Tejun Heo