Attention is currently required from: Jason Glenesk, Raul Rangel, Marshall Dawson. Felix Held has uploaded this change for review. ( https://review.coreboot.org/c/coreboot/+/56283 )
Change subject: soc/amd/*/mca: factor out BERT entry generation to soc/amd/common ......................................................................
soc/amd/*/mca: factor out BERT entry generation to soc/amd/common
Change-Id: I960a2f384f11e4aa5aa2eb0645b6046f9f2f8847 Signed-off-by: Felix Held felix-coreboot@felixheld.de --- M src/soc/amd/common/block/cpu/mca/Makefile.inc A src/soc/amd/common/block/cpu/mca/mca_bert.c A src/soc/amd/common/block/cpu/mca/mcax_bert.c M src/soc/amd/common/block/include/amdblocks/mca.h M src/soc/amd/picasso/mca.c M src/soc/amd/stoneyridge/mca.c 6 files changed, 268 insertions(+), 256 deletions(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/83/56283/1
diff --git a/src/soc/amd/common/block/cpu/mca/Makefile.inc b/src/soc/amd/common/block/cpu/mca/Makefile.inc index 0aa47e1..5c1d2c5 100644 --- a/src/soc/amd/common/block/cpu/mca/Makefile.inc +++ b/src/soc/amd/common/block/cpu/mca/Makefile.inc @@ -1 +1,3 @@ ramstage-$(CONFIG_SOC_AMD_COMMON_BLOCK_MCA_COMMON) += mca_common.c +ramstage-$(CONFIG_SOC_AMD_COMMON_BLOCK_MCA) += mca_bert.c +ramstage-$(CONFIG_SOC_AMD_COMMON_BLOCK_MCAX) += mcax_bert.c diff --git a/src/soc/amd/common/block/cpu/mca/mca_bert.c b/src/soc/amd/common/block/cpu/mca/mca_bert.c new file mode 100644 index 0000000..ffd8973 --- /dev/null +++ b/src/soc/amd/common/block/cpu/mca/mca_bert.c @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <amdblocks/mca.h> +#include <cpu/amd/msr.h> +#include <cpu/x86/msr.h> +#include <acpi/acpi.h> +#include <console/console.h> +#include <arch/bert_storage.h> +#include <cper.h> +#include <types.h> + +static inline size_t mca_report_size_reqd(void) +{ + size_t size; + + size = sizeof(acpi_generic_error_status_t); + + size += sizeof(acpi_hest_generic_data_v300_t); + size += sizeof(cper_proc_generic_error_section_t); + + size += sizeof(acpi_hest_generic_data_v300_t); + size += sizeof(cper_ia32x64_proc_error_section_t); + + /* Check Error */ + size += cper_ia32x64_check_sz(); + + /* Context of MCG_CAP, MCG_STAT, MCG_CTL */ + size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 3); + + /* Context of MCi_CTL, MCi_STATUS, MCi_ADDR, MCi_MISC */ + size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 4); + + /* Context of CTL_MASK */ + size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 1); + + return size; +} + +static enum cper_x86_check_type error_to_chktype(struct mca_bank_status *mci) +{ + int error = mca_err_type(mci->sts); + + if (error == MCA_ERRTYPE_BUS) + return X86_PROCESSOR_BUS_CHK; + if (error == MCA_ERRTYPE_INT) + return X86_PROCESSOR_MS_CHK; + if (error == MCA_ERRTYPE_MEM) + return X86_PROCESSOR_CACHE_CHK; + if (error == MCA_ERRTYPE_TLB) + return X86_PROCESSOR_TLB_CHK; + + return X86_PROCESSOR_MS_CHK; /* unrecognized */ +} + +/* Fill additional information in the Generic Processor Error Section. */ +static void fill_generic_section(cper_proc_generic_error_section_t *sec, + struct mca_bank_status *mci) +{ + int type = mca_err_type(mci->sts); + + if (type == MCA_ERRTYPE_BUS) /* try to map MCA errors to CPER types */ + sec->error_type = GENPROC_ERRTYPE_BUS; + else if (type == MCA_ERRTYPE_INT) + sec->error_type = GENPROC_ERRTYPE_UARCH; + else if (type == MCA_ERRTYPE_MEM) + sec->error_type = GENPROC_ERRTYPE_CACHE; + else if (type == MCA_ERRTYPE_TLB) + sec->error_type = GENPROC_ERRTYPE_TLB; + else + sec->error_type = GENPROC_ERRTYPE_UNKNOWN; + sec->validation |= GENPROC_VALID_PROC_ERR_TYPE; +} + +/* Convert an error reported by an MCA bank into BERT information to be reported + * by the OS. The ACPI driver doesn't recognize/parse the IA32/X64 structure, + * which is the best method to report MSR context. As a result, add two + * structures: A "processor generic error" that is parsed, and an IA32/X64 one + * to capture complete information. + * + * Future work may attempt to interpret the specific Family 15h error symptoms + * found in the MCA registers. This data could enhance the reporting of the + * Processor Generic section and the failing error/check added to the + * IA32/X64 section. + */ +void build_bert_mca_error(struct mca_bank_status *mci) +{ + acpi_generic_error_status_t *status; + acpi_hest_generic_data_v300_t *gen_entry; + acpi_hest_generic_data_v300_t *x86_entry; + cper_proc_generic_error_section_t *gen_sec; + cper_ia32x64_proc_error_section_t *x86_sec; + cper_ia32x64_proc_error_info_t *chk; + cper_ia32x64_context_t *ctx; + + if (mca_report_size_reqd() > bert_storage_remaining()) + goto failed; + + status = bert_new_event(&CPER_SEC_PROC_GENERIC_GUID); + if (!status) + goto failed; + + gen_entry = acpi_hest_generic_data3(status); + gen_sec = section_of_acpientry(gen_sec, gen_entry); + + fill_generic_section(gen_sec, mci); + + x86_entry = bert_append_ia32x64(status); + x86_sec = section_of_acpientry(x86_sec, x86_entry); + + chk = new_cper_ia32x64_check(status, x86_sec, error_to_chktype(mci)); + if (!chk) + goto failed; + + ctx = cper_new_ia32x64_context_msr(status, x86_sec, IA32_MCG_CAP, 3); + if (!ctx) + goto failed; + ctx = cper_new_ia32x64_context_msr(status, x86_sec, IA32_MC_CTL(mci->bank), 4); + if (!ctx) + goto failed; + ctx = cper_new_ia32x64_context_msr(status, x86_sec, MC_CTL_MASK(mci->bank), 1); + if (!ctx) + goto failed; + + return; + +failed: + /* We're here because of a hardware error, don't break something else */ + printk(BIOS_ERR, "Error: Not enough room in BERT region for Machine Check error\n"); +} diff --git a/src/soc/amd/common/block/cpu/mca/mcax_bert.c b/src/soc/amd/common/block/cpu/mca/mcax_bert.c new file mode 100644 index 0000000..f976158 --- /dev/null +++ b/src/soc/amd/common/block/cpu/mca/mcax_bert.c @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <amdblocks/mca.h> +#include <amdblocks/msr_zen.h> +#include <cpu/x86/msr.h> +#include <acpi/acpi.h> +#include <console/console.h> +#include <arch/bert_storage.h> +#include <cper.h> +#include <types.h> + +/* MISC4 is the last used register in the MCAX banks of Picasso */ +#define MCAX_USED_REGISTERS_PER_BANK (MCAX_MISC4_OFFSET + 1) + +static inline size_t mca_report_size_reqd(void) +{ + size_t size; + + size = sizeof(acpi_generic_error_status_t); + + size += sizeof(acpi_hest_generic_data_v300_t); + size += sizeof(cper_proc_generic_error_section_t); + + size += sizeof(acpi_hest_generic_data_v300_t); + size += sizeof(cper_ia32x64_proc_error_section_t); + + /* Check Error */ + size += cper_ia32x64_check_sz(); + + /* Context of MCG_CAP, MCG_STAT, MCG_CTL */ + size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 3); + + /* Context of CTL, STATUS, ADDR, MISC0, CONFIG, IPID, SYND, RESERVED, DESTAT, DEADDR, + MISC1, MISC2, MISC3, MISC4 */ + size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, MCAX_USED_REGISTERS_PER_BANK); + + /* Context of CTL_MASK */ + size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 1); + + return size; +} + +static enum cper_x86_check_type error_to_chktype(struct mca_bank_status *mci) +{ + int error = mca_err_type(mci->sts); + + if (error == MCA_ERRTYPE_BUS) + return X86_PROCESSOR_BUS_CHK; + if (error == MCA_ERRTYPE_INT) + return X86_PROCESSOR_MS_CHK; + if (error == MCA_ERRTYPE_MEM) + return X86_PROCESSOR_CACHE_CHK; + if (error == MCA_ERRTYPE_TLB) + return X86_PROCESSOR_TLB_CHK; + + return X86_PROCESSOR_MS_CHK; /* unrecognized */ +} + +/* Fill additional information in the Generic Processor Error Section. */ +static void fill_generic_section(cper_proc_generic_error_section_t *sec, + struct mca_bank_status *mci) +{ + int type = mca_err_type(mci->sts); + + if (type == MCA_ERRTYPE_BUS) /* try to map MCA errors to CPER types */ + sec->error_type = GENPROC_ERRTYPE_BUS; + else if (type == MCA_ERRTYPE_INT) + sec->error_type = GENPROC_ERRTYPE_UARCH; + else if (type == MCA_ERRTYPE_MEM) + sec->error_type = GENPROC_ERRTYPE_CACHE; + else if (type == MCA_ERRTYPE_TLB) + sec->error_type = GENPROC_ERRTYPE_TLB; + else + sec->error_type = GENPROC_ERRTYPE_UNKNOWN; + sec->validation |= GENPROC_VALID_PROC_ERR_TYPE; +} + +/* Convert an error reported by an MCA bank into BERT information to be reported + * by the OS. The ACPI driver doesn't recognize/parse the IA32/X64 structure, + * which is the best method to report MSR context. As a result, add two + * structures: A "processor generic error" that is parsed, and an IA32/X64 one + * to capture complete information. + */ +void build_bert_mca_error(struct mca_bank_status *mci) +{ + acpi_generic_error_status_t *status; + acpi_hest_generic_data_v300_t *gen_entry; + acpi_hest_generic_data_v300_t *x86_entry; + cper_proc_generic_error_section_t *gen_sec; + cper_ia32x64_proc_error_section_t *x86_sec; + cper_ia32x64_proc_error_info_t *chk; + cper_ia32x64_context_t *ctx; + + if (mca_report_size_reqd() > bert_storage_remaining()) + goto failed; + + status = bert_new_event(&CPER_SEC_PROC_GENERIC_GUID); + if (!status) + goto failed; + + gen_entry = acpi_hest_generic_data3(status); + gen_sec = section_of_acpientry(gen_sec, gen_entry); + + fill_generic_section(gen_sec, mci); + + x86_entry = bert_append_ia32x64(status); + x86_sec = section_of_acpientry(x86_sec, x86_entry); + + chk = new_cper_ia32x64_check(status, x86_sec, error_to_chktype(mci)); + if (!chk) + goto failed; + + ctx = cper_new_ia32x64_context_msr(status, x86_sec, IA32_MCG_CAP, 3); + if (!ctx) + goto failed; + ctx = cper_new_ia32x64_context_msr(status, x86_sec, MCAX_CTL_MSR(mci->bank), + MCAX_USED_REGISTERS_PER_BANK); + if (!ctx) + goto failed; + ctx = cper_new_ia32x64_context_msr(status, x86_sec, MCA_CTL_MASK_MSR(mci->bank), 1); + if (!ctx) + goto failed; + + return; + +failed: + /* We're here because of a hardware error, don't break something else */ + printk(BIOS_ERR, "Error: Not enough room in BERT region for Machine Check error\n"); +} diff --git a/src/soc/amd/common/block/include/amdblocks/mca.h b/src/soc/amd/common/block/include/amdblocks/mca.h index 16b016b..7cead26 100644 --- a/src/soc/amd/common/block/include/amdblocks/mca.h +++ b/src/soc/amd/common/block/include/amdblocks/mca.h @@ -3,7 +3,15 @@ #ifndef AMD_BLOCK_MCA_H #define AMD_BLOCK_MCA_H
+#include <cpu/x86/msr.h> + +struct mca_bank_status { + unsigned int bank; + msr_t sts; +}; + void check_mca(void); void mca_check_all_banks(void); +void build_bert_mca_error(struct mca_bank_status *mci);
#endif /* AMD_BLOCK_MCA_H */ diff --git a/src/soc/amd/picasso/mca.c b/src/soc/amd/picasso/mca.c index 074f313..9c02806 100644 --- a/src/soc/amd/picasso/mca.c +++ b/src/soc/amd/picasso/mca.c @@ -2,138 +2,9 @@
#include <amdblocks/mca.h> #include <amdblocks/msr_zen.h> -#include <amdblocks/reset.h> #include <cpu/x86/lapic.h> #include <cpu/x86/msr.h> -#include <acpi/acpi.h> #include <console/console.h> -#include <arch/bert_storage.h> -#include <cper.h> -#include <types.h> - -/* MISC4 is the last used register in the MCAX banks of Picasso */ -#define MCAX_USED_REGISTERS_PER_BANK (MCAX_MISC4_OFFSET + 1) - -struct mca_bank_status { - unsigned int bank; - msr_t sts; -}; - -static inline size_t mca_report_size_reqd(void) -{ - size_t size; - - size = sizeof(acpi_generic_error_status_t); - - size += sizeof(acpi_hest_generic_data_v300_t); - size += sizeof(cper_proc_generic_error_section_t); - - size += sizeof(acpi_hest_generic_data_v300_t); - size += sizeof(cper_ia32x64_proc_error_section_t); - - /* Check Error */ - size += cper_ia32x64_check_sz(); - - /* Context of MCG_CAP, MCG_STAT, MCG_CTL */ - size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 3); - - /* Context of CTL, STATUS, ADDR, MISC0, CONFIG, IPID, SYND, RESERVED, DESTAT, DEADDR, - MISC1, MISC2, MISC3, MISC4 */ - size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, MCAX_USED_REGISTERS_PER_BANK); - - /* Context of CTL_MASK */ - size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 1); - - return size; -} - -static enum cper_x86_check_type error_to_chktype(struct mca_bank_status *mci) -{ - int error = mca_err_type(mci->sts); - - if (error == MCA_ERRTYPE_BUS) - return X86_PROCESSOR_BUS_CHK; - if (error == MCA_ERRTYPE_INT) - return X86_PROCESSOR_MS_CHK; - if (error == MCA_ERRTYPE_MEM) - return X86_PROCESSOR_CACHE_CHK; - if (error == MCA_ERRTYPE_TLB) - return X86_PROCESSOR_TLB_CHK; - - return X86_PROCESSOR_MS_CHK; /* unrecognized */ -} - -/* Fill additional information in the Generic Processor Error Section. */ -static void fill_generic_section(cper_proc_generic_error_section_t *sec, - struct mca_bank_status *mci) -{ - int type = mca_err_type(mci->sts); - - if (type == MCA_ERRTYPE_BUS) /* try to map MCA errors to CPER types */ - sec->error_type = GENPROC_ERRTYPE_BUS; - else if (type == MCA_ERRTYPE_INT) - sec->error_type = GENPROC_ERRTYPE_UARCH; - else if (type == MCA_ERRTYPE_MEM) - sec->error_type = GENPROC_ERRTYPE_CACHE; - else if (type == MCA_ERRTYPE_TLB) - sec->error_type = GENPROC_ERRTYPE_TLB; - else - sec->error_type = GENPROC_ERRTYPE_UNKNOWN; - sec->validation |= GENPROC_VALID_PROC_ERR_TYPE; -} - -/* Convert an error reported by an MCA bank into BERT information to be reported - * by the OS. The ACPI driver doesn't recognize/parse the IA32/X64 structure, - * which is the best method to report MSR context. As a result, add two - * structures: A "processor generic error" that is parsed, and an IA32/X64 one - * to capture complete information. - */ -static void build_bert_mca_error(struct mca_bank_status *mci) -{ - acpi_generic_error_status_t *status; - acpi_hest_generic_data_v300_t *gen_entry; - acpi_hest_generic_data_v300_t *x86_entry; - cper_proc_generic_error_section_t *gen_sec; - cper_ia32x64_proc_error_section_t *x86_sec; - cper_ia32x64_proc_error_info_t *chk; - cper_ia32x64_context_t *ctx; - - if (mca_report_size_reqd() > bert_storage_remaining()) - goto failed; - - status = bert_new_event(&CPER_SEC_PROC_GENERIC_GUID); - if (!status) - goto failed; - - gen_entry = acpi_hest_generic_data3(status); - gen_sec = section_of_acpientry(gen_sec, gen_entry); - - fill_generic_section(gen_sec, mci); - - x86_entry = bert_append_ia32x64(status); - x86_sec = section_of_acpientry(x86_sec, x86_entry); - - chk = new_cper_ia32x64_check(status, x86_sec, error_to_chktype(mci)); - if (!chk) - goto failed; - - ctx = cper_new_ia32x64_context_msr(status, x86_sec, IA32_MCG_CAP, 3); - if (!ctx) - goto failed; - ctx = cper_new_ia32x64_context_msr(status, x86_sec, MCAX_CTL_MSR(mci->bank), - MCAX_USED_REGISTERS_PER_BANK); - if (!ctx) - goto failed; - ctx = cper_new_ia32x64_context_msr(status, x86_sec, MCA_CTL_MASK_MSR(mci->bank), 1); - if (!ctx) - goto failed; - - return; - -failed: - /* We're here because of a hardware error, don't break something else */ - printk(BIOS_ERR, "Error: Not enough room in BERT region for Machine Check error\n"); -}
static const char *const mca_bank_name[] = { [0] = "Load-store unit", diff --git a/src/soc/amd/stoneyridge/mca.c b/src/soc/amd/stoneyridge/mca.c index 67efdfd..165d41f 100644 --- a/src/soc/amd/stoneyridge/mca.c +++ b/src/soc/amd/stoneyridge/mca.c @@ -5,136 +5,9 @@ #include <cpu/amd/msr.h> #include <cpu/x86/lapic.h> #include <cpu/x86/msr.h> -#include <acpi/acpi.h> #include <console/console.h> -#include <arch/bert_storage.h> -#include <cper.h> #include <types.h>
-struct mca_bank_status { - unsigned int bank; - msr_t sts; -}; - -static inline size_t mca_report_size_reqd(void) -{ - size_t size; - - size = sizeof(acpi_generic_error_status_t); - - size += sizeof(acpi_hest_generic_data_v300_t); - size += sizeof(cper_proc_generic_error_section_t); - - size += sizeof(acpi_hest_generic_data_v300_t); - size += sizeof(cper_ia32x64_proc_error_section_t); - - /* Check Error */ - size += cper_ia32x64_check_sz(); - - /* Context of MCG_CAP, MCG_STAT, MCG_CTL */ - size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 3); - - /* Context of MCi_CTL, MCi_STATUS, MCi_ADDR, MCi_MISC */ - size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 4); - - /* Context of CTL_MASK */ - size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 1); - - return size; -} - -static enum cper_x86_check_type error_to_chktype(struct mca_bank_status *mci) -{ - int error = mca_err_type(mci->sts); - - if (error == MCA_ERRTYPE_BUS) - return X86_PROCESSOR_BUS_CHK; - if (error == MCA_ERRTYPE_INT) - return X86_PROCESSOR_MS_CHK; - if (error == MCA_ERRTYPE_MEM) - return X86_PROCESSOR_CACHE_CHK; - if (error == MCA_ERRTYPE_TLB) - return X86_PROCESSOR_TLB_CHK; - - return X86_PROCESSOR_MS_CHK; /* unrecognized */ -} - -/* Fill additional information in the Generic Processor Error Section. */ -static void fill_generic_section(cper_proc_generic_error_section_t *sec, - struct mca_bank_status *mci) -{ - int type = mca_err_type(mci->sts); - - if (type == MCA_ERRTYPE_BUS) /* try to map MCA errors to CPER types */ - sec->error_type = GENPROC_ERRTYPE_BUS; - else if (type == MCA_ERRTYPE_INT) - sec->error_type = GENPROC_ERRTYPE_UARCH; - else if (type == MCA_ERRTYPE_MEM) - sec->error_type = GENPROC_ERRTYPE_CACHE; - else if (type == MCA_ERRTYPE_TLB) - sec->error_type = GENPROC_ERRTYPE_TLB; - else - sec->error_type = GENPROC_ERRTYPE_UNKNOWN; - sec->validation |= GENPROC_VALID_PROC_ERR_TYPE; -} - -/* Convert an error reported by an MCA bank into BERT information to be reported - * by the OS. The ACPI driver doesn't recognize/parse the IA32/X64 structure, - * which is the best method to report MSR context. As a result, add two - * structures: A "processor generic error" that is parsed, and an IA32/X64 one - * to capture complete information. - * - * Future work may attempt to interpret the specific Family 15h error symptoms - * found in the MCA registers. This data could enhance the reporting of the - * Processor Generic section and the failing error/check added to the - * IA32/X64 section. - */ -static void build_bert_mca_error(struct mca_bank_status *mci) -{ - acpi_generic_error_status_t *status; - acpi_hest_generic_data_v300_t *gen_entry; - acpi_hest_generic_data_v300_t *x86_entry; - cper_proc_generic_error_section_t *gen_sec; - cper_ia32x64_proc_error_section_t *x86_sec; - cper_ia32x64_proc_error_info_t *chk; - cper_ia32x64_context_t *ctx; - - if (mca_report_size_reqd() > bert_storage_remaining()) - goto failed; - - status = bert_new_event(&CPER_SEC_PROC_GENERIC_GUID); - if (!status) - goto failed; - - gen_entry = acpi_hest_generic_data3(status); - gen_sec = section_of_acpientry(gen_sec, gen_entry); - - fill_generic_section(gen_sec, mci); - - x86_entry = bert_append_ia32x64(status); - x86_sec = section_of_acpientry(x86_sec, x86_entry); - - chk = new_cper_ia32x64_check(status, x86_sec, error_to_chktype(mci)); - if (!chk) - goto failed; - - ctx = cper_new_ia32x64_context_msr(status, x86_sec, IA32_MCG_CAP, 3); - if (!ctx) - goto failed; - ctx = cper_new_ia32x64_context_msr(status, x86_sec, IA32_MC_CTL(mci->bank), 4); - if (!ctx) - goto failed; - ctx = cper_new_ia32x64_context_msr(status, x86_sec, MC_CTL_MASK(mci->bank), 1); - if (!ctx) - goto failed; - - return; - -failed: - /* We're here because of a hardware error, don't break something else */ - printk(BIOS_ERR, "Error: Not enough room in BERT region for Machine Check error\n"); -} - static const char *const mca_bank_name[] = { [0] = "Load-store unit", [1] = "Instruction fetch unit",