|
|
f5a251 |
commit 8704a85d8dc3483423ec2934fee8132f85f8fdb6
|
|
|
f5a251 |
Author: Brian WoodsGhannam, Yazen <brian.woods@amd.comYazen.Ghannam@amd.com>
|
|
|
f5a251 |
Date: Fri Nov 1 15:48:14 2019 +0100
|
|
|
f5a251 |
|
|
|
f5a251 |
rasdaemon: add support for new AMD SMCA bank types
|
|
|
f5a251 |
|
|
|
f5a251 |
Going forward, the Scalable Machine Check Architecture (SMCA) has some
|
|
|
f5a251 |
updated and additional bank types which show up in Zen2. The differing
|
|
|
f5a251 |
bank types include: CS_V2, PSP_V2, SMU_V2, MP5, NBIO, and PCIE. The V2
|
|
|
f5a251 |
bank types replace the original bank types but have unique HWID/MCAtype
|
|
|
f5a251 |
IDs from the originals so there's no conflicts between different
|
|
|
f5a251 |
versions or other bank types. All of the differing bank types have new
|
|
|
f5a251 |
MCE descriptions which have been added as well.
|
|
|
f5a251 |
|
|
|
f5a251 |
CC: "mchehab+samsung@kernel.org" <mchehab+samsung@kernel.org>, "Namburu, Chandu-babu" <chandu@amd.com> # Thread-Topic: [PATCH 2/2] rasdaemon: add support for new AMD SMCA bank types
|
|
|
f5a251 |
Signed-off-by: Brian Woods <brian.woods@amd.com>
|
|
|
f5a251 |
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
|
|
|
f5a251 |
Cc: Chandu-babu Namburu <chandu@amd.com>
|
|
|
f5a251 |
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
|
|
|
f5a251 |
|
|
|
f5a251 |
diff --git a/mce-amd-smca.c b/mce-amd-smca.c
|
|
|
f5a251 |
index 6c3e8a5..114e786 100644
|
|
|
f5a251 |
--- a/mce-amd-smca.c
|
|
|
f5a251 |
+++ b/mce-amd-smca.c
|
|
|
f5a251 |
@@ -49,11 +49,17 @@ enum smca_bank_types {
|
|
|
f5a251 |
SMCA_FP, /* Floating Point */
|
|
|
f5a251 |
SMCA_L3_CACHE, /* L3 Cache */
|
|
|
f5a251 |
SMCA_CS, /* Coherent Slave */
|
|
|
f5a251 |
+ SMCA_CS_V2, /* Coherent Slave V2 */
|
|
|
f5a251 |
SMCA_PIE, /* Power, Interrupts, etc. */
|
|
|
f5a251 |
SMCA_UMC, /* Unified Memory Controller */
|
|
|
f5a251 |
SMCA_PB, /* Parameter Block */
|
|
|
f5a251 |
SMCA_PSP, /* Platform Security Processor */
|
|
|
f5a251 |
+ SMCA_PSP_V2, /* Platform Security Processor V2 */
|
|
|
f5a251 |
SMCA_SMU, /* System Management Unit */
|
|
|
f5a251 |
+ SMCA_SMU_V2, /* System Management Unit V2 */
|
|
|
f5a251 |
+ SMCA_MP5, /* Microprocessor 5 Unit */
|
|
|
f5a251 |
+ SMCA_NBIO, /* Northbridge IO Unit */
|
|
|
f5a251 |
+ SMCA_PCIE, /* PCI Express Unit */
|
|
|
f5a251 |
N_SMCA_BANK_TYPES
|
|
|
f5a251 |
};
|
|
|
f5a251 |
|
|
|
f5a251 |
@@ -165,6 +171,23 @@ static const char * const smca_cs_mce_desc[] = {
|
|
|
f5a251 |
"Atomic request parity",
|
|
|
f5a251 |
"ECC error on probe filter access",
|
|
|
f5a251 |
};
|
|
|
f5a251 |
+/* Coherent Slave Unit V2 */
|
|
|
f5a251 |
+static const char * const smca_cs2_mce_desc[] = {
|
|
|
f5a251 |
+ "Illegal Request",
|
|
|
f5a251 |
+ "Address Violation",
|
|
|
f5a251 |
+ "Security Violation",
|
|
|
f5a251 |
+ "Illegal Response",
|
|
|
f5a251 |
+ "Unexpected Response",
|
|
|
f5a251 |
+ "Request or Probe Parity Error",
|
|
|
f5a251 |
+ "Read Response Parity Error",
|
|
|
f5a251 |
+ "Atomic Request Parity Error",
|
|
|
f5a251 |
+ "SDP read response had no match in the CS queue",
|
|
|
f5a251 |
+ "Probe Filter Protocol Error",
|
|
|
f5a251 |
+ "Probe Filter ECC Error",
|
|
|
f5a251 |
+ "SDP read response had an unexpected RETRY error",
|
|
|
f5a251 |
+ "Counter overflow error",
|
|
|
f5a251 |
+ "Counter underflow error",
|
|
|
f5a251 |
+};
|
|
|
f5a251 |
/* Power, Interrupt, etc.. */
|
|
|
f5a251 |
static const char * const smca_pie_mce_desc[] = {
|
|
|
f5a251 |
"HW assert",
|
|
|
f5a251 |
@@ -189,10 +212,75 @@ static const char * const smca_pb_mce_desc[] = {
|
|
|
f5a251 |
static const char * const smca_psp_mce_desc[] = {
|
|
|
f5a251 |
"PSP RAM ECC or parity error",
|
|
|
f5a251 |
};
|
|
|
f5a251 |
+/* Platform Security Processor V2 */
|
|
|
f5a251 |
+static const char * const smca_psp2_mce_desc[] = {
|
|
|
f5a251 |
+ "High SRAM ECC or parity error",
|
|
|
f5a251 |
+ "Low SRAM ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Cache Bank 0 ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Cache Bank 1 ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Tag Ram 0 parity error",
|
|
|
f5a251 |
+ "Instruction Tag Ram 1 parity error",
|
|
|
f5a251 |
+ "Data Cache Bank 0 ECC or parity error",
|
|
|
f5a251 |
+ "Data Cache Bank 1 ECC or parity error",
|
|
|
f5a251 |
+ "Data Cache Bank 2 ECC or parity error",
|
|
|
f5a251 |
+ "Data Cache Bank 3 ECC or parity error",
|
|
|
f5a251 |
+ "Data Tag Bank 0 parity error",
|
|
|
f5a251 |
+ "Data Tag Bank 1 parity error",
|
|
|
f5a251 |
+ "Data Tag Bank 2 parity error",
|
|
|
f5a251 |
+ "Data Tag Bank 3 parity error",
|
|
|
f5a251 |
+ "Dirty Data Ram parity error",
|
|
|
f5a251 |
+ "TLB Bank 0 parity error",
|
|
|
f5a251 |
+ "TLB Bank 1 parity error",
|
|
|
f5a251 |
+ "System Hub Read Buffer ECC or parity error",
|
|
|
f5a251 |
+};
|
|
|
f5a251 |
/* System Management Unit */
|
|
|
f5a251 |
static const char * const smca_smu_mce_desc[] = {
|
|
|
f5a251 |
"SMU RAM ECC or parity error",
|
|
|
f5a251 |
};
|
|
|
f5a251 |
+/* System Management Unit V2 */
|
|
|
f5a251 |
+static const char * const smca_smu2_mce_desc[] = {
|
|
|
f5a251 |
+ "High SRAM ECC or parity error",
|
|
|
f5a251 |
+ "Low SRAM ECC or parity error",
|
|
|
f5a251 |
+ "Data Cache Bank A ECC or parity error",
|
|
|
f5a251 |
+ "Data Cache Bank B ECC or parity error",
|
|
|
f5a251 |
+ "Data Tag Cache Bank A ECC or parity error",
|
|
|
f5a251 |
+ "Data Tag Cache Bank B ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Cache Bank A ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Cache Bank B ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Tag Cache Bank A ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Tag Cache Bank B ECC or parity error",
|
|
|
f5a251 |
+ "System Hub Read Buffer ECC or parity error",
|
|
|
f5a251 |
+};
|
|
|
f5a251 |
+/* Microprocessor 5 Unit */
|
|
|
f5a251 |
+static const char * const smca_mp5_mce_desc[] = {
|
|
|
f5a251 |
+ "High SRAM ECC or parity error",
|
|
|
f5a251 |
+ "Low SRAM ECC or parity error",
|
|
|
f5a251 |
+ "Data Cache Bank A ECC or parity error",
|
|
|
f5a251 |
+ "Data Cache Bank B ECC or parity error",
|
|
|
f5a251 |
+ "Data Tag Cache Bank A ECC or parity error",
|
|
|
f5a251 |
+ "Data Tag Cache Bank B ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Cache Bank A ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Cache Bank B ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Tag Cache Bank A ECC or parity error",
|
|
|
f5a251 |
+ "Instruction Tag Cache Bank B ECC or parity error",
|
|
|
f5a251 |
+};
|
|
|
f5a251 |
+/* Northbridge IO Unit */
|
|
|
f5a251 |
+static const char * const smca_nbio_mce_desc[] = {
|
|
|
f5a251 |
+ "ECC or Parity error",
|
|
|
f5a251 |
+ "PCIE error",
|
|
|
f5a251 |
+ "SDP ErrEvent error",
|
|
|
f5a251 |
+ "SDP Egress Poison Error",
|
|
|
f5a251 |
+ "IOHC Internal Poison Error",
|
|
|
f5a251 |
+};
|
|
|
f5a251 |
+/* PCI Express Unit */
|
|
|
f5a251 |
+static const char * const smca_pcie_mce_desc[] = {
|
|
|
f5a251 |
+ "CCIX PER Message logging",
|
|
|
f5a251 |
+ "CCIX Read Response with Status: Non-Data Error",
|
|
|
f5a251 |
+ "CCIX Write Response with Status: Non-Data Error",
|
|
|
f5a251 |
+ "CCIX Read Response with Status: Data Error",
|
|
|
f5a251 |
+ "CCIX Non-okay write response with data error",
|
|
|
f5a251 |
+};
|
|
|
f5a251 |
+
|
|
|
f5a251 |
|
|
|
f5a251 |
struct smca_mce_desc {
|
|
|
f5a251 |
const char * const *descs;
|
|
|
f5a251 |
@@ -208,11 +296,17 @@ static struct smca_mce_desc smca_mce_descs[] = {
|
|
|
f5a251 |
[SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
|
|
|
f5a251 |
[SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
|
|
|
f5a251 |
[SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
|
|
|
f5a251 |
+ [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
|
|
|
f5a251 |
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
|
|
|
f5a251 |
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
|
|
|
f5a251 |
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
|
|
|
f5a251 |
[SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
|
|
|
f5a251 |
+ [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)},
|
|
|
f5a251 |
[SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
|
|
|
f5a251 |
+ [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc)},
|
|
|
f5a251 |
+ [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
|
|
|
f5a251 |
+ [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)},
|
|
|
f5a251 |
+ [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)},
|
|
|
f5a251 |
};
|
|
|
f5a251 |
|
|
|
f5a251 |
struct smca_hwid {
|
|
|
f5a251 |
@@ -235,6 +329,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
|
|
|
f5a251 |
|
|
|
f5a251 |
/* Data Fabric MCA types */
|
|
|
f5a251 |
{ SMCA_CS, 0x0000002E },
|
|
|
f5a251 |
+ { SMCA_CS_V2, 0x0002002E },
|
|
|
f5a251 |
{ SMCA_PIE, 0x0001002E },
|
|
|
f5a251 |
|
|
|
f5a251 |
/* Unified Memory Controller MCA type */
|
|
|
f5a251 |
@@ -245,9 +340,20 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
|
|
|
f5a251 |
|
|
|
f5a251 |
/* Platform Security Processor MCA type */
|
|
|
f5a251 |
{ SMCA_PSP, 0x000000FF },
|
|
|
f5a251 |
+ { SMCA_PSP_V2, 0x000100FF },
|
|
|
f5a251 |
|
|
|
f5a251 |
/* System Management Unit MCA type */
|
|
|
f5a251 |
{ SMCA_SMU, 0x00000001 },
|
|
|
f5a251 |
+ { SMCA_SMU_V2, 0x00010001 },
|
|
|
f5a251 |
+
|
|
|
f5a251 |
+ /* Microprocessor 5 Unit MCA type */
|
|
|
f5a251 |
+ { SMCA_MP5, 0x00020001 },
|
|
|
f5a251 |
+
|
|
|
f5a251 |
+ /* Northbridge IO Unit MCA type */
|
|
|
f5a251 |
+ { SMCA_NBIO, 0x00000018 },
|
|
|
f5a251 |
+
|
|
|
f5a251 |
+ /* PCI Express Unit MCA type */
|
|
|
f5a251 |
+ { SMCA_PCIE, 0x00000046 },
|
|
|
f5a251 |
};
|
|
|
f5a251 |
|
|
|
f5a251 |
struct smca_bank_name {
|
|
|
f5a251 |
@@ -264,11 +370,17 @@ static struct smca_bank_name smca_names[] = {
|
|
|
f5a251 |
[SMCA_FP] = { "Floating Point Unit" },
|
|
|
f5a251 |
[SMCA_L3_CACHE] = { "L3 Cache" },
|
|
|
f5a251 |
[SMCA_CS] = { "Coherent Slave" },
|
|
|
f5a251 |
+ [SMCA_CS_V2] = { "Coherent Slave" },
|
|
|
f5a251 |
[SMCA_PIE] = { "Power, Interrupts, etc." },
|
|
|
f5a251 |
[SMCA_UMC] = { "Unified Memory Controller" },
|
|
|
f5a251 |
[SMCA_PB] = { "Parameter Block" },
|
|
|
f5a251 |
[SMCA_PSP] = { "Platform Security Processor" },
|
|
|
f5a251 |
+ [SMCA_PSP_V2] = { "Platform Security Processor" },
|
|
|
f5a251 |
[SMCA_SMU] = { "System Management Unit" },
|
|
|
f5a251 |
+ [SMCA_SMU_V2] = { "System Management Unit" },
|
|
|
f5a251 |
+ [SMCA_MP5] = { "Microprocessor 5 Unit" },
|
|
|
f5a251 |
+ [SMCA_NBIO] = { "Northbridge IO Unit" },
|
|
|
f5a251 |
+ [SMCA_PCIE] = { "PCI Express Unit" },
|
|
|
f5a251 |
};
|
|
|
f5a251 |
|
|
|
f5a251 |
static void amd_decode_errcode(struct mce_event *e)
|