Marshall Dawson has uploaded this change for review. ( https://review.coreboot.org/28481
Change subject: NOT_FOR_MERGE: Add fake MCA errors to test BERT ......................................................................
NOT_FOR_MERGE: Add fake MCA errors to test BERT
Fill the BERT region with some sample errors. The first core's errors should be reported by the ACPI driver as:
[Hardware Error]: event severity: fatal [Hardware Error]: precise tstamp: 2018-09-04 16:58:11 [Hardware Error]: Error 0, type: fatal [Hardware Error]: section_type: general processor error [Hardware Error]: processor_type: 0, IA32/X64 [Hardware Error]: error_type: 0x01 [Hardware Error]: cache error [Hardware Error]: version_info: 0x0000000000670f00 [Hardware Error]: processor_id: 0x0000000000000010 [Hardware Error]: precise tstamp: 2018-09-04 16:58:11 [Hardware Error]: Error 1, type: fatal [Hardware Error]: section type: unknown, dc3ea0b0-a144-4797-b95b-53fa242b6e1d [Hardware Error]: section length: 0x100 [Hardware Error]: 00000000: 00000307 00000000 00000010 00000000 ................ [Hardware Error]: 00000010: 00670f00 00000000 00020800 00000000 ..g............. [Hardware Error]: 00000020: 76d8320b 00000000 178bfbff 00000000 .2.v............ [Hardware Error]: 00000030: 00000000 00000000 00000000 00000000 ................ [Hardware Error]: 00000040: a55701f5 43dee3ef 9b2472ac 2cad3f57 ..W....C.r$.W?., [Hardware Error]: 00000050: 00000000 00000000 00000000 00000000 ................ [Hardware Error]: 00000060: 00000000 00000000 00000000 00000000 ................ [Hardware Error]: 00000070: 00000000 00000000 00000000 00000000 ................ [Hardware Error]: 00000080: 00180001 00000179 00000000 00000000 ....y........... [Hardware Error]: 00000090: 00000107 00000000 00000000 00000000 ................ [Hardware Error]: 000000a0: 00000000 00000000 00000000 00000000 ................ [Hardware Error]: 000000b0: 00200001 00000404 00000000 00000000 .. ............. [Hardware Error]: 000000c0: 00000000 00000000 00100153 fe000000 ........S....... [Hardware Error]: 000000d0: 00000009 00000000 00000000 d01a0003 ................ [Hardware Error]: 000000e0: 00080001 c0010045 00000000 00000000 ....E........... [Hardware Error]: 000000f0: 00000080 00000000 00000000 00000000 ................
Change-Id: I413955fb0fcf0d98a89da647f6f70b0df25a923a Signed-off-by: Marshall Dawson marshalldawson3rd@gmail.com --- M src/arch/x86/acpi_bert_storage.c M src/soc/amd/stoneyridge/mca.c 2 files changed, 97 insertions(+), 2 deletions(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/81/28481/1
diff --git a/src/arch/x86/acpi_bert_storage.c b/src/arch/x86/acpi_bert_storage.c index 826c14c..c15989c 100644 --- a/src/arch/x86/acpi_bert_storage.c +++ b/src/arch/x86/acpi_bert_storage.c @@ -487,6 +487,8 @@ return NULL; }
+void fake_msr_reads(msr_t *p, u32 addr, int num); + /* Helper to add an MSR context to an existing IA32/X64-type error entry */ cper_ia32x64_context_t *cper_new_ia32x64_context_msr( acpi_generic_error_status_t *status, @@ -506,8 +508,11 @@
dest = (msr_t *)((u8 *)(ctx + 1)); /* point to the Register Array */
- for (i = 0 ; i < num ; i++) - *(dest + i) = rdmsr(addr + i); + if (1) + fake_msr_reads(dest, addr, num); + else + for (i = 0 ; i < num ; i++) + *(dest + i) = rdmsr(addr + i); return ctx; }
diff --git a/src/soc/amd/stoneyridge/mca.c b/src/soc/amd/stoneyridge/mca.c index 81d95fb..e762134 100644 --- a/src/soc/amd/stoneyridge/mca.c +++ b/src/soc/amd/stoneyridge/mca.c @@ -13,7 +13,9 @@ * GNU General Public License for more details. */
+#include <cpu/x86/mp.h> #include <cpu/x86/msr.h> +#include <cpu/x86/lapic.h> #include <arch/acpi.h> #include <cpu/amd/amdfam15.h> #include <soc/cpu.h> @@ -31,6 +33,84 @@ msr_t cmask; };
+void fake_msr_reads(msr_t *p, u32 addr, int num); +void fake_msr_reads(msr_t *p, u32 addr, int num) +{ + int i; + + /* These are taken from actual failures we've seen */ + switch (addr) { + case 0x404: /* 1 */ + p[0].hi = 0x00000000; p[0].lo = 0x00000000; + p[1].hi = 0xfe000000; p[1].lo = 0x00100153; + p[2].hi = 0x00000000; p[2].lo = 0x00000009; + p[3].hi = 0xd01a0003; p[3].lo = 0x00000000; + break; + case 0x408: /* 2 */ + p[0].hi = 0x00000000; p[0].lo = 0x00000000; + p[1].hi = 0xb6000000; p[1].lo = 0x0012010a; + p[2].hi = 0x00000000; p[2].lo = 0x00004500; + p[3].hi = 0xd0100000; p[3].lo = 0x00000000; + break; + case 0x410: /* 4 */ + p[0].hi = 0x00000000; p[0].lo = 0x00000000; + p[1].hi = 0xb2000010; p[1].lo = 0x00020c0f; + p[2].hi = 0x00000000; p[2].lo = 0x00000000; + p[3].hi = 0xd0000000; p[3].lo = 0x01000000; + break; + case 0x418: /* 6 */ + p[0].hi = 0x00000000, p[0].lo = 0x00000000; + p[1].hi = 0xb2000000, p[1].lo = 0x00030e0f; + p[2].hi = 0x00000000, p[2].lo = 0x00000000; + p[3].hi = 0x00000000, p[3].lo = 0x00000000; + break; + case 0x400: /* 0 */ + case 0x40c: /* 3 */ + case 0x414: /* 5 */ + default: + for (i = 0 ; i < num ; i++) + p[i] = rdmsr(addr + i); + break; + } +} + +void load_fake_msr_info(struct mca_bank *mci, int bank); +void load_fake_msr_info(struct mca_bank *mci, int bank) +{ + + /* These are taken from actual failures we've seen */ + switch (bank) { + case 1: /* 1 */ + mci->bank = 1; + mci->ctl.hi = 0x00000000; mci->ctl.lo = 0x00000000; + mci->sts.hi = 0xfe000000; mci->sts.lo = 0x00100153; + mci->addr.hi = 0x00000000; mci->addr.lo = 0x00000009; + mci->misc.hi = 0xd01a0003; mci->misc.lo = 0x00000000; + break; + case 2: /* 2 */ + mci->bank = 2; + mci->ctl.hi = 0x00000000; mci->ctl.lo = 0x00000000; + mci->sts.hi = 0xb6000000; mci->sts.lo = 0x0012010a; + mci->addr.hi = 0x00000000; mci->addr.lo = 0x00004500; + mci->misc.hi = 0xd0100000; mci->misc.lo = 0x00000000; + break; + case 4: /* 4 */ + mci->bank = 4; + mci->ctl.hi = 0x00000000; mci->ctl.lo = 0x00000000; + mci->sts.hi = 0xb2000010; mci->sts.lo = 0x00020c0f; + mci->addr.hi = 0x00000000; mci->addr.lo = 0x00000000; + mci->misc.hi = 0xd0000000; mci->misc.lo = 0x01000000; + break; + case 6: /* 6 */ + mci->bank = 6; + mci->ctl.hi = 0x00000000, mci->ctl.lo = 0x00000000; + mci->sts.hi = 0xb2000000, mci->sts.lo = 0x00030e0f; + mci->addr.hi = 0x00000000, mci->addr.lo = 0x00000000; + mci->misc.hi = 0x00000000, mci->misc.lo = 0x00000000; + break; + } +} + static inline size_t mca_report_size_reqd(void) { size_t size; @@ -180,6 +260,16 @@ cap = rdmsr(MCG_CAP); num_banks = cap.lo & MCA_BANKS_MASK;
+ + printk(0, "======================= Adding Fake MC Errors ====================\n"); + if (boot_cpu()) { + load_fake_msr_info(&mci, 1); + build_bert_mca_error(&mci); + } else { + load_fake_msr_info(&mci, 4); + build_bert_mca_error(&mci); + } + if (is_warm_reset()) { for (i = 0 ; i < num_banks ; i++) { if (i == 3) /* Reserved in Family 15h */