432 lines
		
	
	
	
		
			11 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			432 lines
		
	
	
	
		
			11 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
|   | /* cpumap.c: used for optimizing CPU assignment
 | ||
|  |  * | ||
|  |  * Copyright (C) 2009 Hong H. Pham <hong.pham@windriver.com> | ||
|  |  */ | ||
|  | 
 | ||
|  | #include <linux/module.h>
 | ||
|  | #include <linux/kernel.h>
 | ||
|  | #include <linux/init.h>
 | ||
|  | #include <linux/cpumask.h>
 | ||
|  | #include <linux/spinlock.h>
 | ||
|  | #include <asm/cpudata.h>
 | ||
|  | #include "cpumap.h"
 | ||
|  | 
 | ||
|  | 
 | ||
|  | enum { | ||
|  | 	CPUINFO_LVL_ROOT = 0, | ||
|  | 	CPUINFO_LVL_NODE, | ||
|  | 	CPUINFO_LVL_CORE, | ||
|  | 	CPUINFO_LVL_PROC, | ||
|  | 	CPUINFO_LVL_MAX, | ||
|  | }; | ||
|  | 
 | ||
|  | enum { | ||
|  | 	ROVER_NO_OP              = 0, | ||
|  | 	/* Increment rover every time level is visited */ | ||
|  | 	ROVER_INC_ON_VISIT       = 1 << 0, | ||
|  | 	/* Increment parent's rover every time rover wraps around */ | ||
|  | 	ROVER_INC_PARENT_ON_LOOP = 1 << 1, | ||
|  | }; | ||
|  | 
 | ||
|  | struct cpuinfo_node { | ||
|  | 	int id; | ||
|  | 	int level; | ||
|  | 	int num_cpus;    /* Number of CPUs in this hierarchy */ | ||
|  | 	int parent_index; | ||
|  | 	int child_start; /* Array index of the first child node */ | ||
|  | 	int child_end;   /* Array index of the last child node */ | ||
|  | 	int rover;       /* Child node iterator */ | ||
|  | }; | ||
|  | 
 | ||
|  | struct cpuinfo_level { | ||
|  | 	int start_index; /* Index of first node of a level in a cpuinfo tree */ | ||
|  | 	int end_index;   /* Index of last node of a level in a cpuinfo tree */ | ||
|  | 	int num_nodes;   /* Number of nodes in a level in a cpuinfo tree */ | ||
|  | }; | ||
|  | 
 | ||
|  | struct cpuinfo_tree { | ||
|  | 	int total_nodes; | ||
|  | 
 | ||
|  | 	/* Offsets into nodes[] for each level of the tree */ | ||
|  | 	struct cpuinfo_level level[CPUINFO_LVL_MAX]; | ||
|  | 	struct cpuinfo_node  nodes[0]; | ||
|  | }; | ||
|  | 
 | ||
|  | 
 | ||
|  | static struct cpuinfo_tree *cpuinfo_tree; | ||
|  | 
 | ||
|  | static u16 cpu_distribution_map[NR_CPUS]; | ||
|  | static DEFINE_SPINLOCK(cpu_map_lock); | ||
|  | 
 | ||
|  | 
 | ||
|  | /* Niagara optimized cpuinfo tree traversal. */ | ||
|  | static const int niagara_iterate_method[] = { | ||
|  | 	[CPUINFO_LVL_ROOT] = ROVER_NO_OP, | ||
|  | 
 | ||
|  | 	/* Strands (or virtual CPUs) within a core may not run concurrently
 | ||
|  | 	 * on the Niagara, as instruction pipeline(s) are shared.  Distribute | ||
|  | 	 * work to strands in different cores first for better concurrency. | ||
|  | 	 * Go to next NUMA node when all cores are used. | ||
|  | 	 */ | ||
|  | 	[CPUINFO_LVL_NODE] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP, | ||
|  | 
 | ||
|  | 	/* Strands are grouped together by proc_id in cpuinfo_sparc, i.e.
 | ||
|  | 	 * a proc_id represents an instruction pipeline.  Distribute work to | ||
|  | 	 * strands in different proc_id groups if the core has multiple | ||
|  | 	 * instruction pipelines (e.g. the Niagara 2/2+ has two). | ||
|  | 	 */ | ||
|  | 	[CPUINFO_LVL_CORE] = ROVER_INC_ON_VISIT, | ||
|  | 
 | ||
|  | 	/* Pick the next strand in the proc_id group. */ | ||
|  | 	[CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT, | ||
|  | }; | ||
|  | 
 | ||
|  | /* Generic cpuinfo tree traversal.  Distribute work round robin across NUMA
 | ||
|  |  * nodes. | ||
|  |  */ | ||
|  | static const int generic_iterate_method[] = { | ||
|  | 	[CPUINFO_LVL_ROOT] = ROVER_INC_ON_VISIT, | ||
|  | 	[CPUINFO_LVL_NODE] = ROVER_NO_OP, | ||
|  | 	[CPUINFO_LVL_CORE] = ROVER_INC_PARENT_ON_LOOP, | ||
|  | 	[CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP, | ||
|  | }; | ||
|  | 
 | ||
|  | 
 | ||
|  | static int cpuinfo_id(int cpu, int level) | ||
|  | { | ||
|  | 	int id; | ||
|  | 
 | ||
|  | 	switch (level) { | ||
|  | 	case CPUINFO_LVL_ROOT: | ||
|  | 		id = 0; | ||
|  | 		break; | ||
|  | 	case CPUINFO_LVL_NODE: | ||
|  | 		id = cpu_to_node(cpu); | ||
|  | 		break; | ||
|  | 	case CPUINFO_LVL_CORE: | ||
|  | 		id = cpu_data(cpu).core_id; | ||
|  | 		break; | ||
|  | 	case CPUINFO_LVL_PROC: | ||
|  | 		id = cpu_data(cpu).proc_id; | ||
|  | 		break; | ||
|  | 	default: | ||
|  | 		id = -EINVAL; | ||
|  | 	} | ||
|  | 	return id; | ||
|  | } | ||
|  | 
 | ||
|  | /*
 | ||
|  |  * Enumerate the CPU information in __cpu_data to determine the start index, | ||
|  |  * end index, and number of nodes for each level in the cpuinfo tree.  The | ||
|  |  * total number of cpuinfo nodes required to build the tree is returned. | ||
|  |  */ | ||
|  | static int enumerate_cpuinfo_nodes(struct cpuinfo_level *tree_level) | ||
|  | { | ||
|  | 	int prev_id[CPUINFO_LVL_MAX]; | ||
|  | 	int i, n, num_nodes; | ||
|  | 
 | ||
|  | 	for (i = CPUINFO_LVL_ROOT; i < CPUINFO_LVL_MAX; i++) { | ||
|  | 		struct cpuinfo_level *lv = &tree_level[i]; | ||
|  | 
 | ||
|  | 		prev_id[i] = -1; | ||
|  | 		lv->start_index = lv->end_index = lv->num_nodes = 0; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	num_nodes = 1; /* Include the root node */ | ||
|  | 
 | ||
|  | 	for (i = 0; i < num_possible_cpus(); i++) { | ||
|  | 		if (!cpu_online(i)) | ||
|  | 			continue; | ||
|  | 
 | ||
|  | 		n = cpuinfo_id(i, CPUINFO_LVL_NODE); | ||
|  | 		if (n > prev_id[CPUINFO_LVL_NODE]) { | ||
|  | 			tree_level[CPUINFO_LVL_NODE].num_nodes++; | ||
|  | 			prev_id[CPUINFO_LVL_NODE] = n; | ||
|  | 			num_nodes++; | ||
|  | 		} | ||
|  | 		n = cpuinfo_id(i, CPUINFO_LVL_CORE); | ||
|  | 		if (n > prev_id[CPUINFO_LVL_CORE]) { | ||
|  | 			tree_level[CPUINFO_LVL_CORE].num_nodes++; | ||
|  | 			prev_id[CPUINFO_LVL_CORE] = n; | ||
|  | 			num_nodes++; | ||
|  | 		} | ||
|  | 		n = cpuinfo_id(i, CPUINFO_LVL_PROC); | ||
|  | 		if (n > prev_id[CPUINFO_LVL_PROC]) { | ||
|  | 			tree_level[CPUINFO_LVL_PROC].num_nodes++; | ||
|  | 			prev_id[CPUINFO_LVL_PROC] = n; | ||
|  | 			num_nodes++; | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	tree_level[CPUINFO_LVL_ROOT].num_nodes = 1; | ||
|  | 
 | ||
|  | 	n = tree_level[CPUINFO_LVL_NODE].num_nodes; | ||
|  | 	tree_level[CPUINFO_LVL_NODE].start_index = 1; | ||
|  | 	tree_level[CPUINFO_LVL_NODE].end_index   = n; | ||
|  | 
 | ||
|  | 	n++; | ||
|  | 	tree_level[CPUINFO_LVL_CORE].start_index = n; | ||
|  | 	n += tree_level[CPUINFO_LVL_CORE].num_nodes; | ||
|  | 	tree_level[CPUINFO_LVL_CORE].end_index   = n - 1; | ||
|  | 
 | ||
|  | 	tree_level[CPUINFO_LVL_PROC].start_index = n; | ||
|  | 	n += tree_level[CPUINFO_LVL_PROC].num_nodes; | ||
|  | 	tree_level[CPUINFO_LVL_PROC].end_index   = n - 1; | ||
|  | 
 | ||
|  | 	return num_nodes; | ||
|  | } | ||
|  | 
 | ||
|  | /* Build a tree representation of the CPU hierarchy using the per CPU
 | ||
|  |  * information in __cpu_data.  Entries in __cpu_data[0..NR_CPUS] are | ||
|  |  * assumed to be sorted in ascending order based on node, core_id, and | ||
|  |  * proc_id (in order of significance). | ||
|  |  */ | ||
|  | static struct cpuinfo_tree *build_cpuinfo_tree(void) | ||
|  | { | ||
|  | 	struct cpuinfo_tree *new_tree; | ||
|  | 	struct cpuinfo_node *node; | ||
|  | 	struct cpuinfo_level tmp_level[CPUINFO_LVL_MAX]; | ||
|  | 	int num_cpus[CPUINFO_LVL_MAX]; | ||
|  | 	int level_rover[CPUINFO_LVL_MAX]; | ||
|  | 	int prev_id[CPUINFO_LVL_MAX]; | ||
|  | 	int n, id, cpu, prev_cpu, last_cpu, level; | ||
|  | 
 | ||
|  | 	n = enumerate_cpuinfo_nodes(tmp_level); | ||
|  | 
 | ||
|  | 	new_tree = kzalloc(sizeof(struct cpuinfo_tree) + | ||
|  | 	                   (sizeof(struct cpuinfo_node) * n), GFP_ATOMIC); | ||
|  | 	if (!new_tree) | ||
|  | 		return NULL; | ||
|  | 
 | ||
|  | 	new_tree->total_nodes = n; | ||
|  | 	memcpy(&new_tree->level, tmp_level, sizeof(tmp_level)); | ||
|  | 
 | ||
|  | 	prev_cpu = cpu = first_cpu(cpu_online_map); | ||
|  | 
 | ||
|  | 	/* Initialize all levels in the tree with the first CPU */ | ||
|  | 	for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) { | ||
|  | 		n = new_tree->level[level].start_index; | ||
|  | 
 | ||
|  | 		level_rover[level] = n; | ||
|  | 		node = &new_tree->nodes[n]; | ||
|  | 
 | ||
|  | 		id = cpuinfo_id(cpu, level); | ||
|  | 		if (unlikely(id < 0)) { | ||
|  | 			kfree(new_tree); | ||
|  | 			return NULL; | ||
|  | 		} | ||
|  | 		node->id = id; | ||
|  | 		node->level = level; | ||
|  | 		node->num_cpus = 1; | ||
|  | 
 | ||
|  | 		node->parent_index = (level > CPUINFO_LVL_ROOT) | ||
|  | 		    ? new_tree->level[level - 1].start_index : -1; | ||
|  | 
 | ||
|  | 		node->child_start = node->child_end = node->rover = | ||
|  | 		    (level == CPUINFO_LVL_PROC) | ||
|  | 		    ? cpu : new_tree->level[level + 1].start_index; | ||
|  | 
 | ||
|  | 		prev_id[level] = node->id; | ||
|  | 		num_cpus[level] = 1; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	for (last_cpu = (num_possible_cpus() - 1); last_cpu >= 0; last_cpu--) { | ||
|  | 		if (cpu_online(last_cpu)) | ||
|  | 			break; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	while (++cpu <= last_cpu) { | ||
|  | 		if (!cpu_online(cpu)) | ||
|  | 			continue; | ||
|  | 
 | ||
|  | 		for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; | ||
|  | 		     level--) { | ||
|  | 			id = cpuinfo_id(cpu, level); | ||
|  | 			if (unlikely(id < 0)) { | ||
|  | 				kfree(new_tree); | ||
|  | 				return NULL; | ||
|  | 			} | ||
|  | 
 | ||
|  | 			if ((id != prev_id[level]) || (cpu == last_cpu)) { | ||
|  | 				prev_id[level] = id; | ||
|  | 				node = &new_tree->nodes[level_rover[level]]; | ||
|  | 				node->num_cpus = num_cpus[level]; | ||
|  | 				num_cpus[level] = 1; | ||
|  | 
 | ||
|  | 				if (cpu == last_cpu) | ||
|  | 					node->num_cpus++; | ||
|  | 
 | ||
|  | 				/* Connect tree node to parent */ | ||
|  | 				if (level == CPUINFO_LVL_ROOT) | ||
|  | 					node->parent_index = -1; | ||
|  | 				else | ||
|  | 					node->parent_index = | ||
|  | 					    level_rover[level - 1]; | ||
|  | 
 | ||
|  | 				if (level == CPUINFO_LVL_PROC) { | ||
|  | 					node->child_end = | ||
|  | 					    (cpu == last_cpu) ? cpu : prev_cpu; | ||
|  | 				} else { | ||
|  | 					node->child_end = | ||
|  | 					    level_rover[level + 1] - 1; | ||
|  | 				} | ||
|  | 
 | ||
|  | 				/* Initialize the next node in the same level */ | ||
|  | 				n = ++level_rover[level]; | ||
|  | 				if (n <= new_tree->level[level].end_index) { | ||
|  | 					node = &new_tree->nodes[n]; | ||
|  | 					node->id = id; | ||
|  | 					node->level = level; | ||
|  | 
 | ||
|  | 					/* Connect node to child */ | ||
|  | 					node->child_start = node->child_end = | ||
|  | 					node->rover = | ||
|  | 					    (level == CPUINFO_LVL_PROC) | ||
|  | 					    ? cpu : level_rover[level + 1]; | ||
|  | 				} | ||
|  | 			} else | ||
|  | 				num_cpus[level]++; | ||
|  | 		} | ||
|  | 		prev_cpu = cpu; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return new_tree; | ||
|  | } | ||
|  | 
 | ||
|  | static void increment_rover(struct cpuinfo_tree *t, int node_index, | ||
|  |                             int root_index, const int *rover_inc_table) | ||
|  | { | ||
|  | 	struct cpuinfo_node *node = &t->nodes[node_index]; | ||
|  | 	int top_level, level; | ||
|  | 
 | ||
|  | 	top_level = t->nodes[root_index].level; | ||
|  | 	for (level = node->level; level >= top_level; level--) { | ||
|  | 		node->rover++; | ||
|  | 		if (node->rover <= node->child_end) | ||
|  | 			return; | ||
|  | 
 | ||
|  | 		node->rover = node->child_start; | ||
|  | 		/* If parent's rover does not need to be adjusted, stop here. */ | ||
|  | 		if ((level == top_level) || | ||
|  | 		    !(rover_inc_table[level] & ROVER_INC_PARENT_ON_LOOP)) | ||
|  | 			return; | ||
|  | 
 | ||
|  | 		node = &t->nodes[node->parent_index]; | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) | ||
|  | { | ||
|  | 	const int *rover_inc_table; | ||
|  | 	int level, new_index, index = root_index; | ||
|  | 
 | ||
|  | 	switch (sun4v_chip_type) { | ||
|  | 	case SUN4V_CHIP_NIAGARA1: | ||
|  | 	case SUN4V_CHIP_NIAGARA2: | ||
|  | 		rover_inc_table = niagara_iterate_method; | ||
|  | 		break; | ||
|  | 	default: | ||
|  | 		rover_inc_table = generic_iterate_method; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	for (level = t->nodes[root_index].level; level < CPUINFO_LVL_MAX; | ||
|  | 	     level++) { | ||
|  | 		new_index = t->nodes[index].rover; | ||
|  | 		if (rover_inc_table[level] & ROVER_INC_ON_VISIT) | ||
|  | 			increment_rover(t, index, root_index, rover_inc_table); | ||
|  | 
 | ||
|  | 		index = new_index; | ||
|  | 	} | ||
|  | 	return index; | ||
|  | } | ||
|  | 
 | ||
|  | static void _cpu_map_rebuild(void) | ||
|  | { | ||
|  | 	int i; | ||
|  | 
 | ||
|  | 	if (cpuinfo_tree) { | ||
|  | 		kfree(cpuinfo_tree); | ||
|  | 		cpuinfo_tree = NULL; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	cpuinfo_tree = build_cpuinfo_tree(); | ||
|  | 	if (!cpuinfo_tree) | ||
|  | 		return; | ||
|  | 
 | ||
|  | 	/* Build CPU distribution map that spans all online CPUs.  No need
 | ||
|  | 	 * to check if the CPU is online, as that is done when the cpuinfo | ||
|  | 	 * tree is being built. | ||
|  | 	 */ | ||
|  | 	for (i = 0; i < cpuinfo_tree->nodes[0].num_cpus; i++) | ||
|  | 		cpu_distribution_map[i] = iterate_cpu(cpuinfo_tree, 0); | ||
|  | } | ||
|  | 
 | ||
|  | /* Fallback if the cpuinfo tree could not be built.  CPU mapping is linear
 | ||
|  |  * round robin. | ||
|  |  */ | ||
|  | static int simple_map_to_cpu(unsigned int index) | ||
|  | { | ||
|  | 	int i, end, cpu_rover; | ||
|  | 
 | ||
|  | 	cpu_rover = 0; | ||
|  | 	end = index % num_online_cpus(); | ||
|  | 	for (i = 0; i < num_possible_cpus(); i++) { | ||
|  | 		if (cpu_online(cpu_rover)) { | ||
|  | 			if (cpu_rover >= end) | ||
|  | 				return cpu_rover; | ||
|  | 
 | ||
|  | 			cpu_rover++; | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	/* Impossible, since num_online_cpus() <= num_possible_cpus() */ | ||
|  | 	return first_cpu(cpu_online_map); | ||
|  | } | ||
|  | 
 | ||
|  | static int _map_to_cpu(unsigned int index) | ||
|  | { | ||
|  | 	struct cpuinfo_node *root_node; | ||
|  | 
 | ||
|  | 	if (unlikely(!cpuinfo_tree)) { | ||
|  | 		_cpu_map_rebuild(); | ||
|  | 		if (!cpuinfo_tree) | ||
|  | 			return simple_map_to_cpu(index); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	root_node = &cpuinfo_tree->nodes[0]; | ||
|  | #ifdef CONFIG_HOTPLUG_CPU
 | ||
|  | 	if (unlikely(root_node->num_cpus != num_online_cpus())) { | ||
|  | 		_cpu_map_rebuild(); | ||
|  | 		if (!cpuinfo_tree) | ||
|  | 			return simple_map_to_cpu(index); | ||
|  | 	} | ||
|  | #endif
 | ||
|  | 	return cpu_distribution_map[index % root_node->num_cpus]; | ||
|  | } | ||
|  | 
 | ||
|  | int map_to_cpu(unsigned int index) | ||
|  | { | ||
|  | 	int mapped_cpu; | ||
|  | 	unsigned long flag; | ||
|  | 
 | ||
|  | 	spin_lock_irqsave(&cpu_map_lock, flag); | ||
|  | 	mapped_cpu = _map_to_cpu(index); | ||
|  | 
 | ||
|  | #ifdef CONFIG_HOTPLUG_CPU
 | ||
|  | 	while (unlikely(!cpu_online(mapped_cpu))) | ||
|  | 		mapped_cpu = _map_to_cpu(index); | ||
|  | #endif
 | ||
|  | 	spin_unlock_irqrestore(&cpu_map_lock, flag); | ||
|  | 	return mapped_cpu; | ||
|  | } | ||
|  | EXPORT_SYMBOL(map_to_cpu); | ||
|  | 
 | ||
|  | void cpu_map_rebuild(void) | ||
|  | { | ||
|  | 	unsigned long flag; | ||
|  | 
 | ||
|  | 	spin_lock_irqsave(&cpu_map_lock, flag); | ||
|  | 	_cpu_map_rebuild(); | ||
|  | 	spin_unlock_irqrestore(&cpu_map_lock, flag); | ||
|  | } |