krishnanadh / rpms / rasdaemon

Forked from rpms/rasdaemon a year ago
Clone

Blame SOURCES/8704a85d8dc3483423ec2934fee8132f85f8fdb6.patch

f5a251
commit 8704a85d8dc3483423ec2934fee8132f85f8fdb6
f5a251
Author: Brian WoodsGhannam, Yazen <brian.woods@amd.comYazen.Ghannam@amd.com>
f5a251
Date:   Fri Nov 1 15:48:14 2019 +0100
f5a251
f5a251
    rasdaemon: add support for new AMD SMCA bank types
f5a251
    
f5a251
    Going forward, the Scalable Machine Check Architecture (SMCA) has some
f5a251
    updated and additional bank types which show up in Zen2.  The differing
f5a251
    bank types include: CS_V2, PSP_V2, SMU_V2, MP5, NBIO, and PCIE.  The V2
f5a251
    bank types replace the original bank types but have unique HWID/MCAtype
f5a251
    IDs from the originals so there's no conflicts between different
f5a251
    versions or other bank types.  All of the differing bank types have new
f5a251
    MCE descriptions which have been added as well.
f5a251
    
f5a251
    CC: "mchehab+samsung@kernel.org" <mchehab+samsung@kernel.org>, "Namburu, Chandu-babu" <chandu@amd.com> # Thread-Topic: [PATCH 2/2] rasdaemon: add support for new AMD SMCA bank types
f5a251
    Signed-off-by: Brian Woods <brian.woods@amd.com>
f5a251
    Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
f5a251
    Cc: Chandu-babu Namburu <chandu@amd.com>
f5a251
    Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
f5a251
f5a251
diff --git a/mce-amd-smca.c b/mce-amd-smca.c
f5a251
index 6c3e8a5..114e786 100644
f5a251
--- a/mce-amd-smca.c
f5a251
+++ b/mce-amd-smca.c
f5a251
@@ -49,11 +49,17 @@ enum smca_bank_types {
f5a251
 	SMCA_FP,        /* Floating Point */
f5a251
 	SMCA_L3_CACHE,  /* L3 Cache */
f5a251
 	SMCA_CS,        /* Coherent Slave */
f5a251
+	SMCA_CS_V2,     /* Coherent Slave V2 */
f5a251
 	SMCA_PIE,       /* Power, Interrupts, etc. */
f5a251
 	SMCA_UMC,       /* Unified Memory Controller */
f5a251
 	SMCA_PB,        /* Parameter Block */
f5a251
 	SMCA_PSP,       /* Platform Security Processor */
f5a251
+	SMCA_PSP_V2,    /* Platform Security Processor V2 */
f5a251
 	SMCA_SMU,       /* System Management Unit */
f5a251
+	SMCA_SMU_V2,    /* System Management Unit V2 */
f5a251
+	SMCA_MP5,	/* Microprocessor 5 Unit */
f5a251
+	SMCA_NBIO,	/* Northbridge IO Unit */
f5a251
+	SMCA_PCIE,	/* PCI Express Unit */
f5a251
 	N_SMCA_BANK_TYPES
f5a251
 };
f5a251
 
f5a251
@@ -165,6 +171,23 @@ static const char * const smca_cs_mce_desc[] = {
f5a251
 	"Atomic request parity",
f5a251
 	"ECC error on probe filter access",
f5a251
 };
f5a251
+/* Coherent Slave Unit V2 */
f5a251
+static const char * const smca_cs2_mce_desc[] = {
f5a251
+	"Illegal Request",
f5a251
+	"Address Violation",
f5a251
+	"Security Violation",
f5a251
+	"Illegal Response",
f5a251
+	"Unexpected Response",
f5a251
+	"Request or Probe Parity Error",
f5a251
+	"Read Response Parity Error",
f5a251
+	"Atomic Request Parity Error",
f5a251
+	"SDP read response had no match in the CS queue",
f5a251
+	"Probe Filter Protocol Error",
f5a251
+	"Probe Filter ECC Error",
f5a251
+	"SDP read response had an unexpected RETRY error",
f5a251
+	"Counter overflow error",
f5a251
+	"Counter underflow error",
f5a251
+};
f5a251
 /* Power, Interrupt, etc.. */
f5a251
 static const char * const smca_pie_mce_desc[] = {
f5a251
 	"HW assert",
f5a251
@@ -189,10 +212,75 @@ static const char * const smca_pb_mce_desc[] = {
f5a251
 static const char * const smca_psp_mce_desc[] = {
f5a251
 	"PSP RAM ECC or parity error",
f5a251
 };
f5a251
+/* Platform Security Processor V2 */
f5a251
+static const char * const smca_psp2_mce_desc[] = {
f5a251
+	"High SRAM ECC or parity error",
f5a251
+	"Low SRAM ECC or parity error",
f5a251
+	"Instruction Cache Bank 0 ECC or parity error",
f5a251
+	"Instruction Cache Bank 1 ECC or parity error",
f5a251
+	"Instruction Tag Ram 0 parity error",
f5a251
+	"Instruction Tag Ram 1 parity error",
f5a251
+	"Data Cache Bank 0 ECC or parity error",
f5a251
+	"Data Cache Bank 1 ECC or parity error",
f5a251
+	"Data Cache Bank 2 ECC or parity error",
f5a251
+	"Data Cache Bank 3 ECC or parity error",
f5a251
+	"Data Tag Bank 0 parity error",
f5a251
+	"Data Tag Bank 1 parity error",
f5a251
+	"Data Tag Bank 2 parity error",
f5a251
+	"Data Tag Bank 3 parity error",
f5a251
+	"Dirty Data Ram parity error",
f5a251
+	"TLB Bank 0 parity error",
f5a251
+	"TLB Bank 1 parity error",
f5a251
+	"System Hub Read Buffer ECC or parity error",
f5a251
+};
f5a251
 /* System Management Unit */
f5a251
 static const char * const smca_smu_mce_desc[] = {
f5a251
 	"SMU RAM ECC or parity error",
f5a251
 };
f5a251
+/* System Management Unit V2 */
f5a251
+static const char * const smca_smu2_mce_desc[] = {
f5a251
+	"High SRAM ECC or parity error",
f5a251
+	"Low SRAM ECC or parity error",
f5a251
+	"Data Cache Bank A ECC or parity error",
f5a251
+	"Data Cache Bank B ECC or parity error",
f5a251
+	"Data Tag Cache Bank A ECC or parity error",
f5a251
+	"Data Tag Cache Bank B ECC or parity error",
f5a251
+	"Instruction Cache Bank A ECC or parity error",
f5a251
+	"Instruction Cache Bank B ECC or parity error",
f5a251
+	"Instruction Tag Cache Bank A ECC or parity error",
f5a251
+	"Instruction Tag Cache Bank B ECC or parity error",
f5a251
+	"System Hub Read Buffer ECC or parity error",
f5a251
+};
f5a251
+/* Microprocessor 5 Unit */
f5a251
+static const char * const smca_mp5_mce_desc[] = {
f5a251
+	"High SRAM ECC or parity error",
f5a251
+	"Low SRAM ECC or parity error",
f5a251
+	"Data Cache Bank A ECC or parity error",
f5a251
+	"Data Cache Bank B ECC or parity error",
f5a251
+	"Data Tag Cache Bank A ECC or parity error",
f5a251
+	"Data Tag Cache Bank B ECC or parity error",
f5a251
+	"Instruction Cache Bank A ECC or parity error",
f5a251
+	"Instruction Cache Bank B ECC or parity error",
f5a251
+	"Instruction Tag Cache Bank A ECC or parity error",
f5a251
+	"Instruction Tag Cache Bank B ECC or parity error",
f5a251
+};
f5a251
+/* Northbridge IO Unit */
f5a251
+static const char * const smca_nbio_mce_desc[] = {
f5a251
+	"ECC or Parity error",
f5a251
+	"PCIE error",
f5a251
+	"SDP ErrEvent error",
f5a251
+	"SDP Egress Poison Error",
f5a251
+	"IOHC Internal Poison Error",
f5a251
+};
f5a251
+/* PCI Express Unit */
f5a251
+static const char * const smca_pcie_mce_desc[] = {
f5a251
+	"CCIX PER Message logging",
f5a251
+	"CCIX Read Response with Status: Non-Data Error",
f5a251
+	"CCIX Write Response with Status: Non-Data Error",
f5a251
+	"CCIX Read Response with Status: Data Error",
f5a251
+	"CCIX Non-okay write response with data error",
f5a251
+};
f5a251
+
f5a251
 
f5a251
 struct smca_mce_desc {
f5a251
 	const char * const *descs;
f5a251
@@ -208,11 +296,17 @@ static struct smca_mce_desc smca_mce_descs[] = {
f5a251
 	[SMCA_FP]       = { smca_fp_mce_desc,   ARRAY_SIZE(smca_fp_mce_desc)  },
f5a251
 	[SMCA_L3_CACHE] = { smca_l3_mce_desc,   ARRAY_SIZE(smca_l3_mce_desc)  },
f5a251
 	[SMCA_CS]       = { smca_cs_mce_desc,   ARRAY_SIZE(smca_cs_mce_desc)  },
f5a251
+	[SMCA_CS_V2]    = { smca_cs2_mce_desc,  ARRAY_SIZE(smca_cs2_mce_desc) },
f5a251
 	[SMCA_PIE]      = { smca_pie_mce_desc,  ARRAY_SIZE(smca_pie_mce_desc) },
f5a251
 	[SMCA_UMC]      = { smca_umc_mce_desc,  ARRAY_SIZE(smca_umc_mce_desc) },
f5a251
 	[SMCA_PB]       = { smca_pb_mce_desc,   ARRAY_SIZE(smca_pb_mce_desc)  },
f5a251
 	[SMCA_PSP]      = { smca_psp_mce_desc,  ARRAY_SIZE(smca_psp_mce_desc) },
f5a251
+	[SMCA_PSP_V2]   = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)},
f5a251
 	[SMCA_SMU]      = { smca_smu_mce_desc,  ARRAY_SIZE(smca_smu_mce_desc) },
f5a251
+	[SMCA_SMU_V2]   = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc)},
f5a251
+	[SMCA_MP5]      = { smca_mp5_mce_desc,  ARRAY_SIZE(smca_mp5_mce_desc) },
f5a251
+	[SMCA_NBIO]     = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)},
f5a251
+	[SMCA_PCIE]     = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)},
f5a251
 };
f5a251
 
f5a251
 struct smca_hwid {
f5a251
@@ -235,6 +329,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
f5a251
 
f5a251
 	/* Data Fabric MCA types */
f5a251
 	{ SMCA_CS,       0x0000002E },
f5a251
+	{ SMCA_CS_V2,    0x0002002E },
f5a251
 	{ SMCA_PIE,      0x0001002E },
f5a251
 
f5a251
 	/* Unified Memory Controller MCA type */
f5a251
@@ -245,9 +340,20 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
f5a251
 
f5a251
 	/* Platform Security Processor MCA type */
f5a251
 	{ SMCA_PSP,      0x000000FF },
f5a251
+	{ SMCA_PSP_V2,   0x000100FF },
f5a251
 
f5a251
 	/* System Management Unit MCA type */
f5a251
 	{ SMCA_SMU,      0x00000001 },
f5a251
+	{ SMCA_SMU_V2,   0x00010001 },
f5a251
+
f5a251
+	/* Microprocessor 5 Unit MCA type */
f5a251
+	{ SMCA_MP5,      0x00020001 },
f5a251
+
f5a251
+	/* Northbridge IO Unit MCA type */
f5a251
+	{ SMCA_NBIO,     0x00000018 },
f5a251
+
f5a251
+	/* PCI Express Unit MCA type */
f5a251
+	{ SMCA_PCIE,     0x00000046 },
f5a251
 };
f5a251
 
f5a251
 struct smca_bank_name {
f5a251
@@ -264,11 +370,17 @@ static struct smca_bank_name smca_names[] = {
f5a251
 	[SMCA_FP]       = { "Floating Point Unit" },
f5a251
 	[SMCA_L3_CACHE] = { "L3 Cache" },
f5a251
 	[SMCA_CS]       = { "Coherent Slave" },
f5a251
+	[SMCA_CS_V2]    = { "Coherent Slave" },
f5a251
 	[SMCA_PIE]      = { "Power, Interrupts, etc." },
f5a251
 	[SMCA_UMC]      = { "Unified Memory Controller" },
f5a251
 	[SMCA_PB]       = { "Parameter Block" },
f5a251
 	[SMCA_PSP]      = { "Platform Security Processor" },
f5a251
+	[SMCA_PSP_V2]   = { "Platform Security Processor" },
f5a251
 	[SMCA_SMU]      = { "System Management Unit" },
f5a251
+	[SMCA_SMU_V2]   = { "System Management Unit" },
f5a251
+	[SMCA_MP5]	= { "Microprocessor 5 Unit" },
f5a251
+	[SMCA_NBIO]     = { "Northbridge IO Unit" },
f5a251
+	[SMCA_PCIE]     = { "PCI Express Unit" },
f5a251
 };
f5a251
 
f5a251
 static void amd_decode_errcode(struct mce_event *e)