perf/x86/intel: Add Broadwell core support
Add Broadwell support for Broadwell Client to perf. This is very similar to Haswell. It uses a new cache event table, because there were various changes there. The constraint list has one new event that needs to be handled over Haswell. The PEBS event list is the same, so we reuse Haswell's. [fengguang.wu: make intel_bdw_event_constraints[] static] Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: eranian@google.com Link: http://lkml.kernel.org/r/1409683455-29168-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
					parent
					
						
							
								d86c8eaf95
							
						
					
				
			
			
				commit
				
					
						86a349a28b
					
				
			
		
					 1 changed files with 150 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -220,6 +220,15 @@ static struct event_constraint intel_hsw_event_constraints[] = {
 | 
			
		|||
	EVENT_CONSTRAINT_END
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct event_constraint intel_bdw_event_constraints[] = {
 | 
			
		||||
	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 | 
			
		||||
	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
 | 
			
		||||
	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
 | 
			
		||||
	INTEL_UEVENT_CONSTRAINT(0x148, 0x4),	/* L1D_PEND_MISS.PENDING */
 | 
			
		||||
	INTEL_EVENT_CONSTRAINT(0xa3, 0x4),	/* CYCLE_ACTIVITY.* */
 | 
			
		||||
	EVENT_CONSTRAINT_END
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static u64 intel_pmu_event_map(int hw_event)
 | 
			
		||||
{
 | 
			
		||||
	return intel_perfmon_event_map[hw_event];
 | 
			
		||||
| 
						 | 
				
			
			@ -415,6 +424,126 @@ static __initconst const u64 snb_hw_cache_event_ids
 | 
			
		|||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static __initconst const u64 hsw_hw_cache_event_ids
 | 
			
		||||
				[PERF_COUNT_HW_CACHE_MAX]
 | 
			
		||||
				[PERF_COUNT_HW_CACHE_OP_MAX]
 | 
			
		||||
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 | 
			
		||||
{
 | 
			
		||||
 [ C(L1D ) ] = {
 | 
			
		||||
	[ C(OP_READ) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x81d0, 	/* MEM_UOPS_RETIRED.ALL_LOADS */
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x151, 	/* L1D.REPLACEMENT */
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_WRITE) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x82d0, 	/* MEM_UOPS_RETIRED.ALL_STORES */
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x0,
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_PREFETCH) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x0,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x0,
 | 
			
		||||
	},
 | 
			
		||||
 },
 | 
			
		||||
 [ C(L1I ) ] = {
 | 
			
		||||
	[ C(OP_READ) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x0,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x280, 	/* ICACHE.MISSES */
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_WRITE) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = -1,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = -1,
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_PREFETCH) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x0,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x0,
 | 
			
		||||
	},
 | 
			
		||||
 },
 | 
			
		||||
 [ C(LL  ) ] = {
 | 
			
		||||
	[ C(OP_READ) ] = {
 | 
			
		||||
		/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x1b7,
 | 
			
		||||
		/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
 | 
			
		||||
                   L3_MISS|ANY_SNOOP */
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x1b7,
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_WRITE) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x1b7, 	/* OFFCORE_RESPONSE:ALL_RFO */
 | 
			
		||||
		/* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x1b7,
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_PREFETCH) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x0,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x0,
 | 
			
		||||
	},
 | 
			
		||||
 },
 | 
			
		||||
 [ C(DTLB) ] = {
 | 
			
		||||
	[ C(OP_READ) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x81d0, 	/* MEM_UOPS_RETIRED.ALL_LOADS */
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x108, 	/* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_WRITE) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x82d0, 	/* MEM_UOPS_RETIRED.ALL_STORES */
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x149, 	/* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_PREFETCH) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x0,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x0,
 | 
			
		||||
	},
 | 
			
		||||
 },
 | 
			
		||||
 [ C(ITLB) ] = {
 | 
			
		||||
	[ C(OP_READ) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x6085, 	/* ITLB_MISSES.STLB_HIT */
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x185, 	/* ITLB_MISSES.MISS_CAUSES_A_WALK */
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_WRITE) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = -1,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = -1,
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_PREFETCH) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = -1,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = -1,
 | 
			
		||||
	},
 | 
			
		||||
 },
 | 
			
		||||
 [ C(BPU ) ] = {
 | 
			
		||||
	[ C(OP_READ) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0xc4, 	/* BR_INST_RETIRED.ALL_BRANCHES */
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0xc5, 	/* BR_MISP_RETIRED.ALL_BRANCHES */
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_WRITE) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = -1,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = -1,
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_PREFETCH) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = -1,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = -1,
 | 
			
		||||
	},
 | 
			
		||||
 },
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static __initconst const u64 hsw_hw_cache_extra_regs
 | 
			
		||||
				[PERF_COUNT_HW_CACHE_MAX]
 | 
			
		||||
				[PERF_COUNT_HW_CACHE_OP_MAX]
 | 
			
		||||
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 | 
			
		||||
{
 | 
			
		||||
 [ C(LL  ) ] = {
 | 
			
		||||
	[ C(OP_READ) ] = {
 | 
			
		||||
		/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x2d5,
 | 
			
		||||
		/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
 | 
			
		||||
                   L3_MISS|ANY_SNOOP */
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x3fbc0202d5ull,
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_WRITE) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x122, 	/* OFFCORE_RESPONSE:ALL_RFO */
 | 
			
		||||
		/* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x3fbc020122ull,
 | 
			
		||||
	},
 | 
			
		||||
	[ C(OP_PREFETCH) ] = {
 | 
			
		||||
		[ C(RESULT_ACCESS) ] = 0x0,
 | 
			
		||||
		[ C(RESULT_MISS)   ] = 0x0,
 | 
			
		||||
	},
 | 
			
		||||
 },
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static __initconst const u64 westmere_hw_cache_event_ids
 | 
			
		||||
				[PERF_COUNT_HW_CACHE_MAX]
 | 
			
		||||
				[PERF_COUNT_HW_CACHE_OP_MAX]
 | 
			
		||||
| 
						 | 
				
			
			@ -2565,6 +2694,27 @@ __init int intel_pmu_init(void)
 | 
			
		|||
		pr_cont("Haswell events, ");
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	case 61: /* 14nm Broadwell Core-M */
 | 
			
		||||
		x86_pmu.late_ack = true;
 | 
			
		||||
		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 | 
			
		||||
		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 | 
			
		||||
 | 
			
		||||
		intel_pmu_lbr_init_snb();
 | 
			
		||||
 | 
			
		||||
		x86_pmu.event_constraints = intel_bdw_event_constraints;
 | 
			
		||||
		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
 | 
			
		||||
		x86_pmu.extra_regs = intel_snbep_extra_regs;
 | 
			
		||||
		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
 | 
			
		||||
		/* all extra regs are per-cpu when HT is on */
 | 
			
		||||
		x86_pmu.er_flags |= ERF_HAS_RSP_1;
 | 
			
		||||
		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
 | 
			
		||||
 | 
			
		||||
		x86_pmu.hw_config = hsw_hw_config;
 | 
			
		||||
		x86_pmu.get_event_constraints = hsw_get_event_constraints;
 | 
			
		||||
		x86_pmu.cpu_events = hsw_events_attrs;
 | 
			
		||||
		pr_cont("Broadwell events, ");
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	default:
 | 
			
		||||
		switch (x86_pmu.version) {
 | 
			
		||||
		case 1:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue