Marshall Dawson has uploaded this change for review.

View Change

NOT_FOR_MERGE: Add fake MCA errors to test BERT

Fill the BERT region with some sample errors. The first core's
errors should be reported by the ACPI driver as:

[Hardware Error]: event severity: fatal
[Hardware Error]: precise tstamp: 2018-09-04 16:58:11
[Hardware Error]: Error 0, type: fatal
[Hardware Error]: section_type: general processor error
[Hardware Error]: processor_type: 0, IA32/X64
[Hardware Error]: error_type: 0x01
[Hardware Error]: cache error
[Hardware Error]: version_info: 0x0000000000670f00
[Hardware Error]: processor_id: 0x0000000000000010
[Hardware Error]: precise tstamp: 2018-09-04 16:58:11
[Hardware Error]: Error 1, type: fatal
[Hardware Error]: section type: unknown, dc3ea0b0-a144-4797-b95b-53fa242b6e1d
[Hardware Error]: section length: 0x100
[Hardware Error]: 00000000: 00000307 00000000 00000010 00000000 ................
[Hardware Error]: 00000010: 00670f00 00000000 00020800 00000000 ..g.............
[Hardware Error]: 00000020: 76d8320b 00000000 178bfbff 00000000 .2.v............
[Hardware Error]: 00000030: 00000000 00000000 00000000 00000000 ................
[Hardware Error]: 00000040: a55701f5 43dee3ef 9b2472ac 2cad3f57 ..W....C.r$.W?.,
[Hardware Error]: 00000050: 00000000 00000000 00000000 00000000 ................
[Hardware Error]: 00000060: 00000000 00000000 00000000 00000000 ................
[Hardware Error]: 00000070: 00000000 00000000 00000000 00000000 ................
[Hardware Error]: 00000080: 00180001 00000179 00000000 00000000 ....y...........
[Hardware Error]: 00000090: 00000107 00000000 00000000 00000000 ................
[Hardware Error]: 000000a0: 00000000 00000000 00000000 00000000 ................
[Hardware Error]: 000000b0: 00200001 00000404 00000000 00000000 .. .............
[Hardware Error]: 000000c0: 00000000 00000000 00100153 fe000000 ........S.......
[Hardware Error]: 000000d0: 00000009 00000000 00000000 d01a0003 ................
[Hardware Error]: 000000e0: 00080001 c0010045 00000000 00000000 ....E...........
[Hardware Error]: 000000f0: 00000080 00000000 00000000 00000000 ................

Change-Id: I413955fb0fcf0d98a89da647f6f70b0df25a923a
Signed-off-by: Marshall Dawson <marshalldawson3rd@gmail.com>
---
M src/arch/x86/acpi_bert_storage.c
M src/soc/amd/stoneyridge/mca.c
2 files changed, 97 insertions(+), 2 deletions(-)

git pull ssh://review.coreboot.org:29418/coreboot refs/changes/81/28481/1
diff --git a/src/arch/x86/acpi_bert_storage.c b/src/arch/x86/acpi_bert_storage.c
index 826c14c..c15989c 100644
--- a/src/arch/x86/acpi_bert_storage.c
+++ b/src/arch/x86/acpi_bert_storage.c
@@ -487,6 +487,8 @@
return NULL;
}

+void fake_msr_reads(msr_t *p, u32 addr, int num);
+
/* Helper to add an MSR context to an existing IA32/X64-type error entry */
cper_ia32x64_context_t *cper_new_ia32x64_context_msr(
acpi_generic_error_status_t *status,
@@ -506,8 +508,11 @@

dest = (msr_t *)((u8 *)(ctx + 1)); /* point to the Register Array */

- for (i = 0 ; i < num ; i++)
- *(dest + i) = rdmsr(addr + i);
+ if (1)
+ fake_msr_reads(dest, addr, num);
+ else
+ for (i = 0 ; i < num ; i++)
+ *(dest + i) = rdmsr(addr + i);
return ctx;
}

diff --git a/src/soc/amd/stoneyridge/mca.c b/src/soc/amd/stoneyridge/mca.c
index 81d95fb..e762134 100644
--- a/src/soc/amd/stoneyridge/mca.c
+++ b/src/soc/amd/stoneyridge/mca.c
@@ -13,7 +13,9 @@
* GNU General Public License for more details.
*/

+#include <cpu/x86/mp.h>
#include <cpu/x86/msr.h>
+#include <cpu/x86/lapic.h>
#include <arch/acpi.h>
#include <cpu/amd/amdfam15.h>
#include <soc/cpu.h>
@@ -31,6 +33,84 @@
msr_t cmask;
};

+void fake_msr_reads(msr_t *p, u32 addr, int num);
+void fake_msr_reads(msr_t *p, u32 addr, int num)
+{
+ int i;
+
+ /* These are taken from actual failures we've seen */
+ switch (addr) {
+ case 0x404: /* 1 */
+ p[0].hi = 0x00000000; p[0].lo = 0x00000000;
+ p[1].hi = 0xfe000000; p[1].lo = 0x00100153;
+ p[2].hi = 0x00000000; p[2].lo = 0x00000009;
+ p[3].hi = 0xd01a0003; p[3].lo = 0x00000000;
+ break;
+ case 0x408: /* 2 */
+ p[0].hi = 0x00000000; p[0].lo = 0x00000000;
+ p[1].hi = 0xb6000000; p[1].lo = 0x0012010a;
+ p[2].hi = 0x00000000; p[2].lo = 0x00004500;
+ p[3].hi = 0xd0100000; p[3].lo = 0x00000000;
+ break;
+ case 0x410: /* 4 */
+ p[0].hi = 0x00000000; p[0].lo = 0x00000000;
+ p[1].hi = 0xb2000010; p[1].lo = 0x00020c0f;
+ p[2].hi = 0x00000000; p[2].lo = 0x00000000;
+ p[3].hi = 0xd0000000; p[3].lo = 0x01000000;
+ break;
+ case 0x418: /* 6 */
+ p[0].hi = 0x00000000, p[0].lo = 0x00000000;
+ p[1].hi = 0xb2000000, p[1].lo = 0x00030e0f;
+ p[2].hi = 0x00000000, p[2].lo = 0x00000000;
+ p[3].hi = 0x00000000, p[3].lo = 0x00000000;
+ break;
+ case 0x400: /* 0 */
+ case 0x40c: /* 3 */
+ case 0x414: /* 5 */
+ default:
+ for (i = 0 ; i < num ; i++)
+ p[i] = rdmsr(addr + i);
+ break;
+ }
+}
+
+void load_fake_msr_info(struct mca_bank *mci, int bank);
+void load_fake_msr_info(struct mca_bank *mci, int bank)
+{
+
+ /* These are taken from actual failures we've seen */
+ switch (bank) {
+ case 1: /* 1 */
+ mci->bank = 1;
+ mci->ctl.hi = 0x00000000; mci->ctl.lo = 0x00000000;
+ mci->sts.hi = 0xfe000000; mci->sts.lo = 0x00100153;
+ mci->addr.hi = 0x00000000; mci->addr.lo = 0x00000009;
+ mci->misc.hi = 0xd01a0003; mci->misc.lo = 0x00000000;
+ break;
+ case 2: /* 2 */
+ mci->bank = 2;
+ mci->ctl.hi = 0x00000000; mci->ctl.lo = 0x00000000;
+ mci->sts.hi = 0xb6000000; mci->sts.lo = 0x0012010a;
+ mci->addr.hi = 0x00000000; mci->addr.lo = 0x00004500;
+ mci->misc.hi = 0xd0100000; mci->misc.lo = 0x00000000;
+ break;
+ case 4: /* 4 */
+ mci->bank = 4;
+ mci->ctl.hi = 0x00000000; mci->ctl.lo = 0x00000000;
+ mci->sts.hi = 0xb2000010; mci->sts.lo = 0x00020c0f;
+ mci->addr.hi = 0x00000000; mci->addr.lo = 0x00000000;
+ mci->misc.hi = 0xd0000000; mci->misc.lo = 0x01000000;
+ break;
+ case 6: /* 6 */
+ mci->bank = 6;
+ mci->ctl.hi = 0x00000000, mci->ctl.lo = 0x00000000;
+ mci->sts.hi = 0xb2000000, mci->sts.lo = 0x00030e0f;
+ mci->addr.hi = 0x00000000, mci->addr.lo = 0x00000000;
+ mci->misc.hi = 0x00000000, mci->misc.lo = 0x00000000;
+ break;
+ }
+}
+
static inline size_t mca_report_size_reqd(void)
{
size_t size;
@@ -180,6 +260,16 @@
cap = rdmsr(MCG_CAP);
num_banks = cap.lo & MCA_BANKS_MASK;

+
+ printk(0, "======================= Adding Fake MC Errors ====================\n");
+ if (boot_cpu()) {
+ load_fake_msr_info(&mci, 1);
+ build_bert_mca_error(&mci);
+ } else {
+ load_fake_msr_info(&mci, 4);
+ build_bert_mca_error(&mci);
+ }
+
if (is_warm_reset()) {
for (i = 0 ; i < num_banks ; i++) {
if (i == 3) /* Reserved in Family 15h */

To view, visit change 28481. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: coreboot
Gerrit-Branch: master
Gerrit-MessageType: newchange
Gerrit-Change-Id: I413955fb0fcf0d98a89da647f6f70b0df25a923a
Gerrit-Change-Number: 28481
Gerrit-PatchSet: 1
Gerrit-Owner: Marshall Dawson <marshalldawson3rd@gmail.com>