 b1c089b7ca
			
		
	
	
	b1c089b7ca
	
	
	
		
			
			This patch is required not to lost error records by action invoked on error recovery, such as slot reset etc. Following sample (real machine + dummy record injected by aer-inject) shows that record of 28:00.1 could not be retrieved by recovery of 28:00.0: - Before: pcieport-driver 0000:00:02.0: AER: Multiple Uncorrected (Non-Fatal) error received: id=2801 e1000e 0000:28:00.0: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2800(Receiver ID) e1000e 0000:28:00.0: device [8086:1096] error status/mask=00001000/00100000 e1000e 0000:28:00.0: [12] Poisoned TLP (First) e1000e 0000:28:00.0: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.0: broadcast error_detected message e1000e 0000:28:00.0: broadcast slot_reset message e1000e 0000:28:00.0: setting latency timer to 64 e1000e 0000:28:00.0: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.1: setting latency timer to 64 e1000e 0000:28:00.1: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.0: broadcast resume message e1000e 0000:28:00.0: AER driver successfully recovered e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX - After: pcieport-driver 0000:00:02.0: AER: Multiple Uncorrected (Non-Fatal) error received: id=2801 e1000e 0000:28:00.0: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2800(Receiver ID) e1000e 0000:28:00.0: device [8086:1096] error status/mask=00001000/00100000 e1000e 0000:28:00.0: [12] Poisoned TLP (First) e1000e 0000:28:00.0: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.1: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2801(Receiver ID) e1000e 0000:28:00.1: device [8086:1096] error status/mask=00081000/00100000 e1000e 0000:28:00.1: [12] Poisoned TLP (First) e1000e 0000:28:00.1: [19] ECRC e1000e 0000:28:00.1: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.1: Error of this Agent(2801) is reported first e1000e 0000:28:00.0: broadcast error_detected message e1000e 0000:28:00.0: broadcast slot_reset message e1000e 0000:28:00.0: setting latency timer to 64 e1000e 0000:28:00.0: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.1: setting latency timer to 64 e1000e 0000:28:00.1: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.0: broadcast resume message e1000e 0000:28:00.0: AER driver successfully recovered e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
		
			
				
	
	
		
			137 lines
		
	
	
	
		
			3.3 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			137 lines
		
	
	
	
		
			3.3 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Copyright (C) 2006 Intel Corp.
 | |
|  *	Tom Long Nguyen (tom.l.nguyen@intel.com)
 | |
|  *	Zhang Yanmin (yanmin.zhang@intel.com)
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #ifndef _AERDRV_H_
 | |
| #define _AERDRV_H_
 | |
| 
 | |
| #include <linux/workqueue.h>
 | |
| #include <linux/pcieport_if.h>
 | |
| #include <linux/aer.h>
 | |
| #include <linux/interrupt.h>
 | |
| 
 | |
| #define AER_NONFATAL			0
 | |
| #define AER_FATAL			1
 | |
| #define AER_CORRECTABLE			2
 | |
| 
 | |
| /* Root Error Status Register Bits */
 | |
| #define ROOT_ERR_STATUS_MASKS		0x0f
 | |
| 
 | |
| #define SYSTEM_ERROR_INTR_ON_MESG_MASK	(PCI_EXP_RTCTL_SECEE|	\
 | |
| 					PCI_EXP_RTCTL_SENFEE|	\
 | |
| 					PCI_EXP_RTCTL_SEFEE)
 | |
| #define ROOT_PORT_INTR_ON_MESG_MASK	(PCI_ERR_ROOT_CMD_COR_EN|	\
 | |
| 					PCI_ERR_ROOT_CMD_NONFATAL_EN|	\
 | |
| 					PCI_ERR_ROOT_CMD_FATAL_EN)
 | |
| #define ERR_COR_ID(d)			(d & 0xffff)
 | |
| #define ERR_UNCOR_ID(d)			(d >> 16)
 | |
| 
 | |
| #define AER_ERROR_SOURCES_MAX		100
 | |
| 
 | |
| #define AER_LOG_TLP_MASKS		(PCI_ERR_UNC_POISON_TLP|	\
 | |
| 					PCI_ERR_UNC_ECRC|		\
 | |
| 					PCI_ERR_UNC_UNSUP|		\
 | |
| 					PCI_ERR_UNC_COMP_ABORT|		\
 | |
| 					PCI_ERR_UNC_UNX_COMP|		\
 | |
| 					PCI_ERR_UNC_MALF_TLP)
 | |
| 
 | |
| struct header_log_regs {
 | |
| 	unsigned int dw0;
 | |
| 	unsigned int dw1;
 | |
| 	unsigned int dw2;
 | |
| 	unsigned int dw3;
 | |
| };
 | |
| 
 | |
| #define AER_MAX_MULTI_ERR_DEVICES	5	/* Not likely to have more */
 | |
| struct aer_err_info {
 | |
| 	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
 | |
| 	int error_dev_num;
 | |
| 
 | |
| 	unsigned int id:16;
 | |
| 
 | |
| 	unsigned int severity:2;	/* 0:NONFATAL | 1:FATAL | 2:COR */
 | |
| 	unsigned int __pad1:5;
 | |
| 	unsigned int multi_error_valid:1;
 | |
| 
 | |
| 	unsigned int first_error:5;
 | |
| 	unsigned int __pad2:2;
 | |
| 	unsigned int tlp_header_valid:1;
 | |
| 
 | |
| 	unsigned int status;		/* COR/UNCOR Error Status */
 | |
| 	unsigned int mask;		/* COR/UNCOR Error Mask */
 | |
| 	struct header_log_regs tlp;	/* TLP Header */
 | |
| };
 | |
| 
 | |
| struct aer_err_source {
 | |
| 	unsigned int status;
 | |
| 	unsigned int id;
 | |
| };
 | |
| 
 | |
| struct aer_rpc {
 | |
| 	struct pcie_device *rpd;	/* Root Port device */
 | |
| 	struct work_struct dpc_handler;
 | |
| 	struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX];
 | |
| 	unsigned short prod_idx;	/* Error Producer Index */
 | |
| 	unsigned short cons_idx;	/* Error Consumer Index */
 | |
| 	int isr;
 | |
| 	spinlock_t e_lock;		/*
 | |
| 					 * Lock access to Error Status/ID Regs
 | |
| 					 * and error producer/consumer index
 | |
| 					 */
 | |
| 	struct mutex rpc_mutex;		/*
 | |
| 					 * only one thread could do
 | |
| 					 * recovery on the same
 | |
| 					 * root port hierarchy
 | |
| 					 */
 | |
| 	wait_queue_head_t wait_release;
 | |
| };
 | |
| 
 | |
| struct aer_broadcast_data {
 | |
| 	enum pci_channel_state state;
 | |
| 	enum pci_ers_result result;
 | |
| };
 | |
| 
 | |
| static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
 | |
| 		enum pci_ers_result new)
 | |
| {
 | |
| 	if (new == PCI_ERS_RESULT_NONE)
 | |
| 		return orig;
 | |
| 
 | |
| 	switch (orig) {
 | |
| 	case PCI_ERS_RESULT_CAN_RECOVER:
 | |
| 	case PCI_ERS_RESULT_RECOVERED:
 | |
| 		orig = new;
 | |
| 		break;
 | |
| 	case PCI_ERS_RESULT_DISCONNECT:
 | |
| 		if (new == PCI_ERS_RESULT_NEED_RESET)
 | |
| 			orig = new;
 | |
| 		break;
 | |
| 	default:
 | |
| 		break;
 | |
| 	}
 | |
| 
 | |
| 	return orig;
 | |
| }
 | |
| 
 | |
| extern struct bus_type pcie_port_bus_type;
 | |
| extern void aer_enable_rootport(struct aer_rpc *rpc);
 | |
| extern void aer_delete_rootport(struct aer_rpc *rpc);
 | |
| extern int aer_init(struct pcie_device *dev);
 | |
| extern void aer_isr(struct work_struct *work);
 | |
| extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 | |
| extern void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info);
 | |
| extern irqreturn_t aer_irq(int irq, void *context);
 | |
| 
 | |
| #ifdef CONFIG_ACPI
 | |
| extern int aer_osc_setup(struct pcie_device *pciedev);
 | |
| #else
 | |
| static inline int aer_osc_setup(struct pcie_device *pciedev)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #endif /* _AERDRV_H_ */
 |