 545104dd2b
			
		
	
	
	545104dd2b
	
	
	
		
			
			There's inconsistency between dmesg and the trace event output.
When dmesg says "severity=Corrected", the trace event says
"severity=Fatal". What happens is that HW_EVENT_ERR_CORRECTED is
defined in edac.h:
enum hw_event_mc_err_type {
        HW_EVENT_ERR_CORRECTED,
        HW_EVENT_ERR_UNCORRECTED,
        HW_EVENT_ERR_FATAL,
        HW_EVENT_ERR_INFO,
};
while aer_print_error() uses aer_error_severity_string[] defined as:
static const char *aer_error_severity_string[] = {
        "Uncorrected (Non-Fatal)",
        "Uncorrected (Fatal)",
        "Corrected"
};
In this case dmesg is correct because info->severity is assigned in
aer_isr_one_error() using the definitions in include/linux/ras.h:
Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Acked-by: Ethan Zhao <ethan.kernel@gmail.com>
Link: http://lkml.kernel.org/r/CANVTcTaP18CiGOSEcX5Ch_wPw9mEhkgokfp+d+ZOMFD+Ce4juA@mail.gmail.com
Signed-off-by: Borislav Petkov <bp@suse.de>
		
	
			
		
			
				
	
	
		
			77 lines
		
	
	
	
		
			2.1 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			77 lines
		
	
	
	
		
			2.1 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| #undef TRACE_SYSTEM
 | |
| #define TRACE_SYSTEM ras
 | |
| 
 | |
| #if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
 | |
| #define _TRACE_AER_H
 | |
| 
 | |
| #include <linux/tracepoint.h>
 | |
| #include <linux/aer.h>
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * PCIe AER Trace event
 | |
|  *
 | |
|  * These events are generated when hardware detects a corrected or
 | |
|  * uncorrected event on a PCIe device. The event report has
 | |
|  * the following structure:
 | |
|  *
 | |
|  * char * dev_name -	The name of the slot where the device resides
 | |
|  *			([domain:]bus:device.function).
 | |
|  * u32 status -		Either the correctable or uncorrectable register
 | |
|  *			indicating what error or errors have been seen
 | |
|  * u8 severity -	error severity 0:NONFATAL 1:FATAL 2:CORRECTED
 | |
|  */
 | |
| 
 | |
| #define aer_correctable_errors		\
 | |
| 	{BIT(0),	"Receiver Error"},		\
 | |
| 	{BIT(6),	"Bad TLP"},			\
 | |
| 	{BIT(7),	"Bad DLLP"},			\
 | |
| 	{BIT(8),	"RELAY_NUM Rollover"},		\
 | |
| 	{BIT(12),	"Replay Timer Timeout"},	\
 | |
| 	{BIT(13),	"Advisory Non-Fatal"}
 | |
| 
 | |
| #define aer_uncorrectable_errors		\
 | |
| 	{BIT(4),	"Data Link Protocol"},		\
 | |
| 	{BIT(12),	"Poisoned TLP"},		\
 | |
| 	{BIT(13),	"Flow Control Protocol"},	\
 | |
| 	{BIT(14),	"Completion Timeout"},		\
 | |
| 	{BIT(15),	"Completer Abort"},		\
 | |
| 	{BIT(16),	"Unexpected Completion"},	\
 | |
| 	{BIT(17),	"Receiver Overflow"},		\
 | |
| 	{BIT(18),	"Malformed TLP"},		\
 | |
| 	{BIT(19),	"ECRC"},			\
 | |
| 	{BIT(20),	"Unsupported Request"}
 | |
| 
 | |
| TRACE_EVENT(aer_event,
 | |
| 	TP_PROTO(const char *dev_name,
 | |
| 		 const u32 status,
 | |
| 		 const u8 severity),
 | |
| 
 | |
| 	TP_ARGS(dev_name, status, severity),
 | |
| 
 | |
| 	TP_STRUCT__entry(
 | |
| 		__string(	dev_name,	dev_name	)
 | |
| 		__field(	u32,		status		)
 | |
| 		__field(	u8,		severity	)
 | |
| 	),
 | |
| 
 | |
| 	TP_fast_assign(
 | |
| 		__assign_str(dev_name, dev_name);
 | |
| 		__entry->status		= status;
 | |
| 		__entry->severity	= severity;
 | |
| 	),
 | |
| 
 | |
| 	TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
 | |
| 		__get_str(dev_name),
 | |
| 		__entry->severity == AER_CORRECTABLE ? "Corrected" :
 | |
| 			__entry->severity == AER_FATAL ?
 | |
| 			"Fatal" : "Uncorrected, non-fatal",
 | |
| 		__entry->severity == AER_CORRECTABLE ?
 | |
| 		__print_flags(__entry->status, "|", aer_correctable_errors) :
 | |
| 		__print_flags(__entry->status, "|", aer_uncorrectable_errors))
 | |
| );
 | |
| 
 | |
| #endif /* _TRACE_AER_H */
 | |
| 
 | |
| /* This part must be outside protection */
 | |
| #include <trace/define_trace.h>
 |