[coreboot-gerrit] Change in coreboot[master]: NOT_FOR_MERGE: Add fake MCA errors to test BERT

Marshall Dawson (Code Review) gerrit at coreboot.org
Tue Sep 4 22:07:07 CEST 2018


Marshall Dawson has uploaded this change for review. ( https://review.coreboot.org/28481


Change subject: NOT_FOR_MERGE: Add fake MCA errors to test BERT
......................................................................

NOT_FOR_MERGE: Add fake MCA errors to test BERT

Fill the BERT region with some sample errors.  The first core's
errors should be reported by the ACPI driver as:

[Hardware Error]: event severity: fatal
[Hardware Error]:  precise tstamp: 2018-09-04 16:58:11
[Hardware Error]:  Error 0, type: fatal
[Hardware Error]:   section_type: general processor error
[Hardware Error]:   processor_type: 0, IA32/X64
[Hardware Error]:   error_type: 0x01
[Hardware Error]:   cache error
[Hardware Error]:   version_info: 0x0000000000670f00
[Hardware Error]:   processor_id: 0x0000000000000010
[Hardware Error]:  precise tstamp: 2018-09-04 16:58:11
[Hardware Error]:  Error 1, type: fatal
[Hardware Error]:   section type: unknown, dc3ea0b0-a144-4797-b95b-53fa242b6e1d
[Hardware Error]:   section length: 0x100
[Hardware Error]:   00000000: 00000307 00000000 00000010 00000000  ................
[Hardware Error]:   00000010: 00670f00 00000000 00020800 00000000  ..g.............
[Hardware Error]:   00000020: 76d8320b 00000000 178bfbff 00000000  .2.v............
[Hardware Error]:   00000030: 00000000 00000000 00000000 00000000  ................
[Hardware Error]:   00000040: a55701f5 43dee3ef 9b2472ac 2cad3f57  ..W....C.r$.W?.,
[Hardware Error]:   00000050: 00000000 00000000 00000000 00000000  ................
[Hardware Error]:   00000060: 00000000 00000000 00000000 00000000  ................
[Hardware Error]:   00000070: 00000000 00000000 00000000 00000000  ................
[Hardware Error]:   00000080: 00180001 00000179 00000000 00000000  ....y...........
[Hardware Error]:   00000090: 00000107 00000000 00000000 00000000  ................
[Hardware Error]:   000000a0: 00000000 00000000 00000000 00000000  ................
[Hardware Error]:   000000b0: 00200001 00000404 00000000 00000000  .. .............
[Hardware Error]:   000000c0: 00000000 00000000 00100153 fe000000  ........S.......
[Hardware Error]:   000000d0: 00000009 00000000 00000000 d01a0003  ................
[Hardware Error]:   000000e0: 00080001 c0010045 00000000 00000000  ....E...........
[Hardware Error]:   000000f0: 00000080 00000000 00000000 00000000  ................

Change-Id: I413955fb0fcf0d98a89da647f6f70b0df25a923a
Signed-off-by: Marshall Dawson <marshalldawson3rd at gmail.com>
---
M src/arch/x86/acpi_bert_storage.c
M src/soc/amd/stoneyridge/mca.c
2 files changed, 97 insertions(+), 2 deletions(-)



  git pull ssh://review.coreboot.org:29418/coreboot refs/changes/81/28481/1

diff --git a/src/arch/x86/acpi_bert_storage.c b/src/arch/x86/acpi_bert_storage.c
index 826c14c..c15989c 100644
--- a/src/arch/x86/acpi_bert_storage.c
+++ b/src/arch/x86/acpi_bert_storage.c
@@ -487,6 +487,8 @@
 	return NULL;
 }
 
+void fake_msr_reads(msr_t *p, u32 addr, int num);
+
 /* Helper to add an MSR context to an existing IA32/X64-type error entry */
 cper_ia32x64_context_t *cper_new_ia32x64_context_msr(
 		acpi_generic_error_status_t *status,
@@ -506,8 +508,11 @@
 
 	dest = (msr_t *)((u8 *)(ctx + 1)); /* point to the Register Array */
 
-	for (i = 0 ; i < num ; i++)
-		*(dest + i) = rdmsr(addr + i);
+	if (1)
+		fake_msr_reads(dest, addr, num);
+	else
+		for (i = 0 ; i < num ; i++)
+			*(dest + i) = rdmsr(addr + i);
 	return ctx;
 }
 
diff --git a/src/soc/amd/stoneyridge/mca.c b/src/soc/amd/stoneyridge/mca.c
index 81d95fb..e762134 100644
--- a/src/soc/amd/stoneyridge/mca.c
+++ b/src/soc/amd/stoneyridge/mca.c
@@ -13,7 +13,9 @@
  * GNU General Public License for more details.
  */
 
+#include <cpu/x86/mp.h>
 #include <cpu/x86/msr.h>
+#include <cpu/x86/lapic.h>
 #include <arch/acpi.h>
 #include <cpu/amd/amdfam15.h>
 #include <soc/cpu.h>
@@ -31,6 +33,84 @@
 	msr_t cmask;
 };
 
+void fake_msr_reads(msr_t *p, u32 addr, int num);
+void fake_msr_reads(msr_t *p, u32 addr, int num)
+{
+	int i;
+
+	/* These are taken from actual failures we've seen */
+	switch (addr) {
+	case 0x404: /* 1 */
+		p[0].hi = 0x00000000; p[0].lo = 0x00000000;
+		p[1].hi = 0xfe000000; p[1].lo = 0x00100153;
+		p[2].hi = 0x00000000; p[2].lo = 0x00000009;
+		p[3].hi = 0xd01a0003; p[3].lo = 0x00000000;
+		break;
+	case 0x408: /* 2 */
+		p[0].hi = 0x00000000; p[0].lo = 0x00000000;
+		p[1].hi = 0xb6000000; p[1].lo = 0x0012010a;
+		p[2].hi = 0x00000000; p[2].lo = 0x00004500;
+		p[3].hi = 0xd0100000; p[3].lo = 0x00000000;
+		break;
+	case 0x410: /* 4 */
+		p[0].hi = 0x00000000; p[0].lo = 0x00000000;
+		p[1].hi = 0xb2000010; p[1].lo = 0x00020c0f;
+		p[2].hi = 0x00000000; p[2].lo = 0x00000000;
+		p[3].hi = 0xd0000000; p[3].lo = 0x01000000;
+		break;
+	case 0x418: /* 6 */
+		p[0].hi = 0x00000000, p[0].lo = 0x00000000;
+		p[1].hi = 0xb2000000, p[1].lo = 0x00030e0f;
+		p[2].hi = 0x00000000, p[2].lo = 0x00000000;
+		p[3].hi = 0x00000000, p[3].lo = 0x00000000;
+		break;
+	case 0x400: /* 0 */
+	case 0x40c: /* 3 */
+	case 0x414: /* 5 */
+	default:
+		for (i = 0 ; i < num ; i++)
+			p[i] = rdmsr(addr + i);
+		break;
+	}
+}
+
+void load_fake_msr_info(struct mca_bank *mci, int bank);
+void load_fake_msr_info(struct mca_bank *mci, int bank)
+{
+
+	/* These are taken from actual failures we've seen */
+	switch (bank) {
+	case 1: /* 1 */
+		mci->bank = 1;
+		mci->ctl.hi  = 0x00000000; mci->ctl.lo  = 0x00000000;
+		mci->sts.hi  = 0xfe000000; mci->sts.lo  = 0x00100153;
+		mci->addr.hi = 0x00000000; mci->addr.lo = 0x00000009;
+		mci->misc.hi = 0xd01a0003; mci->misc.lo = 0x00000000;
+		break;
+	case 2: /* 2 */
+		mci->bank = 2;
+		mci->ctl.hi  = 0x00000000; mci->ctl.lo  = 0x00000000;
+		mci->sts.hi  = 0xb6000000; mci->sts.lo  = 0x0012010a;
+		mci->addr.hi = 0x00000000; mci->addr.lo = 0x00004500;
+		mci->misc.hi = 0xd0100000; mci->misc.lo = 0x00000000;
+		break;
+	case 4: /* 4 */
+		mci->bank = 4;
+		mci->ctl.hi  = 0x00000000; mci->ctl.lo  = 0x00000000;
+		mci->sts.hi  = 0xb2000010; mci->sts.lo  = 0x00020c0f;
+		mci->addr.hi = 0x00000000; mci->addr.lo = 0x00000000;
+		mci->misc.hi = 0xd0000000; mci->misc.lo = 0x01000000;
+		break;
+	case 6: /* 6 */
+		mci->bank = 6;
+		mci->ctl.hi  = 0x00000000, mci->ctl.lo  = 0x00000000;
+		mci->sts.hi  = 0xb2000000, mci->sts.lo  = 0x00030e0f;
+		mci->addr.hi = 0x00000000, mci->addr.lo = 0x00000000;
+		mci->misc.hi = 0x00000000, mci->misc.lo = 0x00000000;
+		break;
+	}
+}
+
 static inline size_t mca_report_size_reqd(void)
 {
 	size_t size;
@@ -180,6 +260,16 @@
 	cap = rdmsr(MCG_CAP);
 	num_banks = cap.lo & MCA_BANKS_MASK;
 
+
+	printk(0, "======================= Adding Fake MC Errors ====================\n");
+	if (boot_cpu()) {
+		load_fake_msr_info(&mci, 1);
+		build_bert_mca_error(&mci);
+	} else {
+		load_fake_msr_info(&mci, 4);
+		build_bert_mca_error(&mci);
+	}
+
 	if (is_warm_reset()) {
 		for (i = 0 ; i < num_banks ; i++) {
 			if (i == 3) /* Reserved in Family 15h */

-- 
To view, visit https://review.coreboot.org/28481
To unsubscribe, or for help writing mail filters, visit https://review.coreboot.org/settings

Gerrit-Project: coreboot
Gerrit-Branch: master
Gerrit-MessageType: newchange
Gerrit-Change-Id: I413955fb0fcf0d98a89da647f6f70b0df25a923a
Gerrit-Change-Number: 28481
Gerrit-PatchSet: 1
Gerrit-Owner: Marshall Dawson <marshalldawson3rd at gmail.com>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.coreboot.org/pipermail/coreboot-gerrit/attachments/20180904/e1d1a04e/attachment.html>


More information about the coreboot-gerrit mailing list