Jonathan Zhang has uploaded this change for review. ( https://review.coreboot.org/c/coreboot/+/45392 )
Change subject: soc/intel/xeon_sp/cpx: generate ACPI BERT table ......................................................................
soc/intel/xeon_sp/cpx: generate ACPI BERT table
This is a WIP patch to prove that the BERT memory region put directly above CBMEM top works well for the purpose. Right now dummy error records is used, next step is to get error record raw data from PREV_BOOT_ERR_SRC_HOB.
Select ACPI_BERT as default, configure ACPI_BERT_SIZE.
BERT memory region is placed directly above CBMEM top, with the size of CONFIG_ACPI_BERT_SIZE.
Set up e820 memory entry for it as reserved.
If ACPI_BERT is selected, fill in APEI BERT with dummy error record.
When there are error records in APEI BERT region, generate ACPI BERT table.
TESTED=booted on DeltaLake DVT, following are boot log excepts: BIOS-e820: [mem 0x0000000000000000-0x0000000000000fff] type 16 BIOS-e820: [mem 0x0000000000001000-0x000000000009ffff] usable BIOS-e820: [mem 0x00000000000a0000-0x00000000000fffff] reserved BIOS-e820: [mem 0x0000000000100000-0x00000000752b1fff] usable BIOS-e820: [mem 0x00000000752b2000-0x00000000777fbfff] type 16 BIOS-e820: [mem 0x00000000777fc000-0x00000000777fffff] reserved <-- BERT region BIOS-e820: [mem 0x0000000078000000-0x000000008fffffff] reserved BIOS-e820: [mem 0x00000000fd000000-0x00000000fdffffff] reserved BIOS-e820: [mem 0x00000000fed40000-0x00000000fed44fff] reserved BIOS-e820: [mem 0x00000000fee00000-0x00000000fee00fff] reserved BIOS-e820: [mem 0x0000000100000000-0x000000183fffffff] usable ... [ 13.624688] BERT: Error records from previous boot: [ 13.634676] [Hardware Error]: event severity: fatal [ 13.644666] [Hardware Error]: precise tstamp: 2020-09-15 02:15:53 [ 13.657329] [Hardware Error]: Error 0, type: fatal [ 13.667317] [Hardware Error]: section_type: general processor error [ 13.680509] [Hardware Error]: processor_type: 0, IA32/X64 [ 13.691919] [Hardware Error]: error_type: 0x02 [ 13.701368] [Hardware Error]: TLB error [ 13.709578] [Hardware Error]: version_info: 0x000000000005065b [ 13.721885] [Hardware Error]: processor_id: 0x0000000000000000
Change-Id: Ifb23883a6f0d4d737ba08e256fb927e9965cb803 Signed-off-by: Jonathan Zhang jonzhang@fb.com --- M src/soc/intel/xeon_sp/cpx/Kconfig M src/soc/intel/xeon_sp/cpx/acpi.c M src/soc/intel/xeon_sp/uncore.c 3 files changed, 82 insertions(+), 3 deletions(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/92/45392/1
diff --git a/src/soc/intel/xeon_sp/cpx/Kconfig b/src/soc/intel/xeon_sp/cpx/Kconfig index 8e7e6f1..bba1eb3 100644 --- a/src/soc/intel/xeon_sp/cpx/Kconfig +++ b/src/soc/intel/xeon_sp/cpx/Kconfig @@ -87,4 +87,20 @@ int default 512
+config ACPI_BERT + bool "Build ACPI BERT Table" + default y + depends on HAVE_ACPI_TABLES + help + Report Machine Check errors identified in POST to the OS in an + ACPI Boot Error Record Table. + +config ACPI_BERT_SIZE + hex + default 0x4000 if ACPI_BERT + default 0x0 + help + Specify the amount of DRAM reserved for gathering the BERT error + records. + endif diff --git a/src/soc/intel/xeon_sp/cpx/acpi.c b/src/soc/intel/xeon_sp/cpx/acpi.c index 1328257..3abc530 100644 --- a/src/soc/intel/xeon_sp/cpx/acpi.c +++ b/src/soc/intel/xeon_sp/cpx/acpi.c @@ -2,6 +2,7 @@
#include <acpi/acpi_gnvs.h> #include <acpi/acpigen.h> +#include <arch/bert_storage.h> #include <arch/ioapic.h> #include <arch/smp/mpspec.h> #include <assert.h> @@ -12,6 +13,7 @@ #include <cpu/x86/smm.h> #include <device/pci.h> #include <intelblocks/acpi.h> +#include <lib.h> #include <hob_iiouds.h> #include <hob_memmap.h> #include <soc/acpi.h> @@ -878,6 +880,37 @@ return current; }
+#ifdef CONFIG_ACPI_BERT +void bert_reserved_region(void **start, size_t *size) +{ + *start = cbmem_top(); + *size = CONFIG_ACPI_BERT_SIZE; + assert(*start); + memset(*start, 0, *size); + printk(BIOS_INFO, "Reserved BERT region base: %p, size: 0x%lx\n", *start, *size); +} + +static bool fill_bert_region(void) +{ + acpi_generic_error_status_t *status; + acpi_hest_generic_data_v300_t *gen_entry; + cper_proc_generic_error_section_t *gen_sec; + + status = bert_new_event(&CPER_SEC_PROC_GENERIC_GUID); + if (!status){ + printk(BIOS_ERR, "Error: Not enough room in BERT region for Machine Check error\n"); + return false; + } + + gen_entry = acpi_hest_generic_data3(status); + gen_sec = section_of_acpientry(gen_sec, gen_entry); + gen_sec->error_type = GENPROC_ERRTYPE_TLB; + gen_sec->validation |= GENPROC_VALID_PROC_ERR_TYPE; + + return true; +} +#endif + unsigned long northbridge_write_acpi_tables(const struct device *device, unsigned long current, struct acpi_rsdp *rsdp) @@ -885,6 +918,7 @@ acpi_srat_t *srat; acpi_slit_t *slit; acpi_dmar_t *dmar; + acpi_bert_t *bert;
const struct soc_intel_xeon_sp_cpx_config *const config = config_of(device);
@@ -917,5 +951,26 @@ acpi_add_table(rsdp, dmar); }
+ if (CONFIG(ACPI_BERT)) { + /* Fill BERT region. Return if not filled in. */ + if(!fill_bert_region()) + return current; + + void *rgn = NULL; + size_t size = 0; + bert_errors_region(&rgn, &size); + printk(BIOS_INFO, "BERT region base: %p, error record size: 0x%lx\n", rgn, size); + /* Add BERT table */ + if (rgn && size) { + hexdump(rgn, size); + current = ALIGN(current, 8); + bert = (acpi_bert_t *)current; + acpi_write_bert(bert, (uintptr_t)rgn, size); + acpi_add_table(rsdp, (void *)current); + current += bert->header.length; + } else { + printk(BIOS_ERR, "BERT region is not filled.\n"); + } + } return current; } diff --git a/src/soc/intel/xeon_sp/uncore.c b/src/soc/intel/xeon_sp/uncore.c index a549acb..d640c1d 100644 --- a/src/soc/intel/xeon_sp/uncore.c +++ b/src/soc/intel/xeon_sp/uncore.c @@ -153,6 +153,7 @@ uint64_t mc_values[NUM_MAP_ENTRIES]; struct resource *resource; int index = *res_count; + uintptr_t cbmem_top_addr = (uintptr_t)cbmem_top();
fsp_find_reserved_memory(&fsp_mem);
@@ -162,7 +163,7 @@
top_of_ram = range_entry_base(&fsp_mem) - 1; printk(BIOS_SPEW, "cbmem_top: 0x%lx, fsp range: [0x%llx - 0x%llx], top_of_ram: 0x%llx\n", - (uintptr_t) cbmem_top(), range_entry_base(&fsp_mem), + cbmem_top_addr, range_entry_base(&fsp_mem), range_entry_end(&fsp_mem), top_of_ram);
/* Conventional Memory (DOS region, 0x0 to 0x9FFFF) */ @@ -178,8 +179,7 @@ ram_resource(dev, index++, base_kb, size_kb);
/* - * FSP meomoy, CBMem regions are already added as reserved - * Add TSEG and MESEG Regions as reserved memory + * FSP memory, CBMem regions are already added as reserved * src/drivers/intel/fsp2_0/memory_init.c sets CBMEM reserved size * arch_upd->BootLoaderTolumSize = cbmem_overhead_size(); == 2 * CBMEM_ROOT_MIN_SIZE * typically 0x2000 @@ -192,6 +192,14 @@ * FspBootLoaderTolum Size : 2000 */
+ /* Mark APEI BERT region as reserved */ + if (CONFIG(ACPI_BERT)) { + base_kb = (cbmem_top_addr >> 10); + size_kb = (CONFIG_ACPI_BERT_SIZE >> 10); + LOG_MEM_RESOURCE("apei_bert", dev, index, base_kb, size_kb); + mmio_resource(dev, index++, base_kb, size_kb); + } + /* Mark TSEG/SMM region as reserved */ base_kb = (mc_values[TSEG_BASE_REG] >> 10); size_kb = (mc_values[TSEG_LIMIT_REG] - mc_values[TSEG_BASE_REG] + 1) >> 10;
build bot (Jenkins) has posted comments on this change. ( https://review.coreboot.org/c/coreboot/+/45392 )
Change subject: soc/intel/xeon_sp/cpx: generate ACPI BERT table ......................................................................
Patch Set 1:
(4 comments)
https://review.coreboot.org/c/coreboot/+/45392/1/src/soc/intel/xeon_sp/cpx/K... File src/soc/intel/xeon_sp/cpx/Kconfig:
https://review.coreboot.org/c/coreboot/+/45392/1/src/soc/intel/xeon_sp/cpx/K... PS1, Line 104: records. trailing whitespace
https://review.coreboot.org/c/coreboot/+/45392/1/src/soc/intel/xeon_sp/cpx/a... File src/soc/intel/xeon_sp/cpx/acpi.c:
https://review.coreboot.org/c/coreboot/+/45392/1/src/soc/intel/xeon_sp/cpx/a... PS1, Line 900: if (!status){ space required before the open brace '{'
https://review.coreboot.org/c/coreboot/+/45392/1/src/soc/intel/xeon_sp/cpx/a... PS1, Line 956: if(!fill_bert_region()) space required before the open parenthesis '('
https://review.coreboot.org/c/coreboot/+/45392/1/src/soc/intel/xeon_sp/cpx/a... PS1, Line 962: printk(BIOS_INFO, "BERT region base: %p, error record size: 0x%lx\n", rgn, size); line over 96 characters
Jonathan Zhang has posted comments on this change. ( https://review.coreboot.org/c/coreboot/+/45392 )
Change subject: soc/intel/xeon_sp/cpx: generate ACPI BERT table ......................................................................
Patch Set 1:
This change is ready for review.
Jonathan Zhang has posted comments on this change. ( https://review.coreboot.org/c/coreboot/+/45392 )
Change subject: soc/intel/xeon_sp/cpx: generate ACPI BERT table ......................................................................
Patch Set 1:
This change is ready for review.
Jonathan Zhang has posted comments on this change. ( https://review.coreboot.org/c/coreboot/+/45392 )
Change subject: soc/intel/xeon_sp/cpx: generate ACPI BERT table ......................................................................
Patch Set 1:
(1 comment)
This change is ready for review.
https://review.coreboot.org/c/coreboot/+/45392/1/src/soc/intel/xeon_sp/uncor... File src/soc/intel/xeon_sp/uncore.c:
https://review.coreboot.org/c/coreboot/+/45392/1/src/soc/intel/xeon_sp/uncor... PS1, Line 129: cbmem_top
add BERT
Done
Attention is currently required from: Angel Pons. Jonathan Zhang has posted comments on this change. ( https://review.coreboot.org/c/coreboot/+/45392 )
Change subject: soc/intel/xeon_sp/cpx: generate ACPI BERT table ......................................................................
Patch Set 4:
(1 comment)
This change is ready for review.
File src/soc/intel/xeon_sp/cpx/acpi.c:
https://review.coreboot.org/c/coreboot/+/45392/comment/a88ac65f_588b0994 PS4, Line 889: memset(*start, 0, *size);
Hm, this function is in the global namespace. […]
Hi Felix/Angel, Thanks for the review. I am not working on this patch actively, since we are winding down work on CPX-SP based server. We list BERT as a feature gap for DeltaLake. We are moving our resources to SPR-SP work. Later on we will fill in this gap.