Marshall Dawson has uploaded this change for review.

View Change

NOT_FOR_MERGE soc/amd/stoneridge: Add DRAM check for s3

Allocate cbmem space to store DRAM check data during S3. Verify the
the data is unchanged during the resume. Due to where the save and
verify steps are placed, the test cannot capture 100% of potential
memory corruption causes in coreboot.

The ranges to verify are determined by the coreboot tables that are
constructed at the end of POST. If a failure occurs within RAM available
to the OS, the test reports "ERROR" to force suspend_stress_test to
stop. Otherwise, the mismatch is reported as "FYI", e.g. in memory
owned by coreboot.

The granularity of blocks to check within each range is configurable,
and defaults to 1MB. The size of data stored for each block is 32 bits.
Both parameters may be changed if desired.

CAUTION: This test must not be deployed in a shipping system. It
disables TSEG, and its protections, in order to allow the
performance to be at an acceptable level.
TODO: Add the capability of checking DRAM above 4GB.

TEST=Suspend and resume grunt. Verify mismatch with induced error.
BUG=b:118157730

Change-Id: I375dd7ea9a3ab8992f1616126bcbd9724e4fc9a0
Signed-off-by: Marshall Dawson <marshalldawson3rd@gmail.com>
---
M src/soc/amd/common/block/pi/amd_resume_final.c
M src/soc/amd/stoneyridge/Kconfig
M src/soc/amd/stoneyridge/Makefile.inc
M src/soc/amd/stoneyridge/finalize.c
M src/soc/amd/stoneyridge/include/soc/iomap.h
A src/soc/amd/stoneyridge/include/soc/s3test_util.h
A src/soc/amd/stoneyridge/s3test_util.c
M src/soc/amd/stoneyridge/smihandler.c
8 files changed, 431 insertions(+), 1 deletion(-)

git pull ssh://review.coreboot.org:29418/coreboot refs/changes/06/29406/1
diff --git a/src/soc/amd/common/block/pi/amd_resume_final.c b/src/soc/amd/common/block/pi/amd_resume_final.c
index a282665..7ee6470 100644
--- a/src/soc/amd/common/block/pi/amd_resume_final.c
+++ b/src/soc/amd/common/block/pi/amd_resume_final.c
@@ -15,10 +15,22 @@

#include <bootstate.h>
#include <amdblocks/agesawrapper_call.h>
+#include <soc/s3test_util.h>

static void agesawrapper_s3finalrestore(void *unused)
{
do_agesawrapper(agesawrapper_amds3finalrestore, "amds3finalrestore");
+
+ /* todo: wrong place re. stoneyridge, but place here for now to ensure
+ * verification is as late as possible, and after AGESA.
+ *
+ * However, note that the ramstage copy and everything else in TSEG
+ * will be inaccessable if the BS_OS_RESUME, BS_ON_ENTRY soc_finalize()
+ * happens to run before this entry. That scenario will guarantee a
+ * mismatch if TSEG is tested.
+ */
+ if (IS_ENABLED(CONFIG_DEBUG_CHECKSUM_DRAM_ON_S3))
+ verify_dram_checksums();
}

BOOT_STATE_INIT_ENTRY(BS_OS_RESUME, BS_ON_ENTRY,
diff --git a/src/soc/amd/stoneyridge/Kconfig b/src/soc/amd/stoneyridge/Kconfig
index 4411984..1344d8b 100644
--- a/src/soc/amd/stoneyridge/Kconfig
+++ b/src/soc/amd/stoneyridge/Kconfig
@@ -385,4 +385,16 @@
return to S0. Otherwise the system will remain in S5 once power
is restored.

+config DEBUG_CHECKSUM_DRAM_ON_S3
+ bool "Store checksums for DRAM during suspend"
+ default y # todo - y for convenience but change to n
+ help
+ During a suspend cycle, checksum the DRAM and store the results in
+ cbmem. Compare the values during a resume and dump any discrepancies
+ to the console. Certain errors may be reasonable, depending on their
+ addresses.
+
+ This is a test feature and must never be deployed in production
+ systems.
+
endif # SOC_AMD_STONEYRIDGE_FP4 || SOC_AMD_STONEYRIDGE_FT4
diff --git a/src/soc/amd/stoneyridge/Makefile.inc b/src/soc/amd/stoneyridge/Makefile.inc
index c54b652..a2d317e 100644
--- a/src/soc/amd/stoneyridge/Makefile.inc
+++ b/src/soc/amd/stoneyridge/Makefile.inc
@@ -117,6 +117,7 @@
ramstage-$(CONFIG_SPI_FLASH) += spi.c
ramstage-y += finalize.c
ramstage-y += nb_util.c
+ramstage-$(CONFIG_DEBUG_CHECKSUM_DRAM_ON_S3) += s3test_util.c

smm-y += monotonic_timer.c
smm-y += smihandler.c
@@ -127,6 +128,7 @@
smm-$(CONFIG_SPI_FLASH) += spi.c
smm-y += nb_util.c
smm-y += gpio.c
+smm-$(CONFIG_DEBUG_CHECKSUM_DRAM_ON_S3) += s3test_util.c

CPPFLAGS_common += -I$(src)/soc/amd/stoneyridge
CPPFLAGS_common += -I$(src)/soc/amd/stoneyridge/include
diff --git a/src/soc/amd/stoneyridge/finalize.c b/src/soc/amd/stoneyridge/finalize.c
index 45e6595..dbe06b0 100644
--- a/src/soc/amd/stoneyridge/finalize.c
+++ b/src/soc/amd/stoneyridge/finalize.c
@@ -31,7 +31,12 @@

if (IS_ENABLED(CONFIG_SMM_TSEG)) {
mask = rdmsr(SMM_MASK_MSR);
- mask.lo |= SMM_TSEG_VALID;
+ /* If TSEG is marked valid, the CPU enforces UC memory type
+ * outside of SMRAM while in the SMI handler. Skipping this
+ * step allows the test to perform at an acceptable speed.
+ */
+ if (!IS_ENABLED(CONFIG_DEBUG_CHECKSUM_DRAM_ON_S3))
+ mask.lo |= SMM_TSEG_VALID;
wrmsr(SMM_MASK_MSR, mask);
}

diff --git a/src/soc/amd/stoneyridge/include/soc/iomap.h b/src/soc/amd/stoneyridge/include/soc/iomap.h
index 3e86564..5bcd084 100644
--- a/src/soc/amd/stoneyridge/include/soc/iomap.h
+++ b/src/soc/amd/stoneyridge/include/soc/iomap.h
@@ -72,6 +72,8 @@
#define SYS_RESET 0xcf9

/* BiosRam Ranges at 0xfed80500 or I/O 0xcd4/0xcd5 */
+#define BIOSRAM_S3DRAM_CHECK_BASE 0xe8 /* 4 bytes */
+#define BIOSRAM_S3DRAM_LBMEM_BASE 0xec /* 4 bytes */
#define BIOSRAM_CBMEM_TOP 0xf0 /* 4 bytes */
#define BIOSRAM_UMA_SIZE 0xf4 /* 4 bytes */
#define BIOSRAM_UMA_BASE 0xf8 /* 8 bytes */
diff --git a/src/soc/amd/stoneyridge/include/soc/s3test_util.h b/src/soc/amd/stoneyridge/include/soc/s3test_util.h
new file mode 100644
index 0000000..a6dcdea
--- /dev/null
+++ b/src/soc/amd/stoneyridge/include/soc/s3test_util.h
@@ -0,0 +1,22 @@
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __PI_STONEYRIDGE_S3_TESTUTIL_H__
+#define __PI_STONEYRIDGE_S3_TESTUTIL_H__
+
+void store_dram_checksums(void);
+void verify_dram_checksums(void);
+
+#endif /* __PI_STONEYRIDGE_S3_TESTUTIL_H__ */
diff --git a/src/soc/amd/stoneyridge/s3test_util.c b/src/soc/amd/stoneyridge/s3test_util.c
new file mode 100644
index 0000000..0aa76de
--- /dev/null
+++ b/src/soc/amd/stoneyridge/s3test_util.c
@@ -0,0 +1,368 @@
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2018 Advanced Micro Devices
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <soc/southbridge.h>
+#include <soc/s3test_util.h>
+#include <soc/iomap.h>
+#include <memrange.h>
+#include <cpu/x86/cache.h>
+#include <cpu/x86/msr.h>
+#include <cpu/amd/mtrr.h>
+#include <console/console.h>
+#include <memlayout.h>
+#include <cbmem.h>
+#include <bootstate.h>
+#include <commonlib/coreboot_tables.h>
+
+#define CHECK_TYPE_SAVE 0
+#define CHECK_TYPE_VERIFY 1
+
+#define MAX_CHECK_RANGES 8
+
+/*
+ * Adjust the block size as necessary to narrow in on the location of a failure.
+ * Normally a DWORD is stored for each 1MB test range, but this can be made
+ * smaller if space is a concern.
+ */
+#define TESTBLOCK_SZ (1 * MiB)
+#define USE_SUM_SIZE32
+
+#ifdef USE_SUM_SIZE8
+typedef uint8_t sum_t;
+#endif
+#ifdef USE_SUM_SIZE16
+typedef uint16_t sum_t;
+#endif
+#ifdef USE_SUM_SIZE32
+typedef uint32_t sum_t;
+#endif
+#ifdef USE_SUM_SIZE64
+typedef uint64_t sum_t;
+#endif
+
+/* source code shorteners */
+#define FOUR_GB ((uint64_t)4 * GiB)
+#define CL_SIZE ARCH_CACHELINE_ALIGN_SIZE
+
+static void write_dram_check_addr(void *base)
+{
+ biosram_write32(BIOSRAM_S3DRAM_CHECK_BASE, (uintptr_t)base);
+}
+
+static void *read_dram_check_addr(void)
+{
+ return (void *)biosram_read32(BIOSRAM_S3DRAM_CHECK_BASE);
+}
+
+static void write_lbmem_addr(void *base)
+{
+ biosram_write32(BIOSRAM_S3DRAM_LBMEM_BASE, (uintptr_t)base);
+}
+
+static void *read_lbmem_addr(void)
+{
+ return (void *)biosram_read32(BIOSRAM_S3DRAM_LBMEM_BASE);
+}
+
+#define MEM_RANGE_COUNT(_rec) \
+ (((_rec)->size - sizeof(*(_rec))) / sizeof((_rec)->map[0]))
+
+#define MEM_RANGE_PTR(_rec, _idx) \
+ (void *)(((u8 *) (_rec)) + sizeof(*(_rec)) \
+ + (sizeof((_rec)->map[0]) * (_idx)))
+
+static inline resource_t lb_unpack64(struct lb_uint64 val)
+{
+ return (((resource_t) val.hi) << 32) | val.lo;
+}
+
+/* These need to be consistent across POST and SMM but since those don't
+ * naturally share data, build the info from scratch.
+ * todo: convert to use devicetree instead? or maybe coreboot tables'
+ * knowledge of non-reserved memory?
+ */
+static int range_causes_error[MAX_CHECK_RANGES];
+static struct range_entry check_ranges[MAX_CHECK_RANGES];
+static struct memranges ranges = {
+ .entries = check_ranges,
+};
+
+/*
+ * Parse coreboot's memory ranges to decide what to test and whether a modified
+ * range constitutes a failure.
+ *
+ * todo: This file doesn't do above 4GB so cap the ranges
+ */
+static void fill_check_ranges(void)
+{
+ struct lb_memory *mem = (struct lb_memory *)read_lbmem_addr();
+ int count;
+ int lb_rng, chk_rng;
+
+ if (!mem) {
+ printk(BIOS_ERR, "ERROR: Can't find saved pointer to lb tables\n");
+ return;
+ }
+
+ count = MEM_RANGE_COUNT(mem);
+ if (count > MAX_CHECK_RANGES) {
+ /* maybe not really, as some won't be used... */
+ printk(BIOS_ERR, "ERROR: Need to increase MAX_CHECK_RANGES\n");
+ count = MAX_CHECK_RANGES;
+ }
+
+ chk_rng = 0;
+ for (lb_rng = 0 ; lb_rng < count ; lb_rng++) {
+ struct lb_memory_range *range = MEM_RANGE_PTR(mem, lb_rng);
+
+ if (range->type == LB_MEM_RESERVED
+ || range->type == LB_MEM_VENDOR_RSVD
+ || range->type == LB_MEM_UNUSABLE)
+ continue;
+
+ /* Still can't do >=4GB */
+ if (lb_unpack64(range->start) >= FOUR_GB)
+ continue;
+
+ if (chk_rng)
+ check_ranges[chk_rng - 1].next = &check_ranges[chk_rng];
+
+ range_entry_init(check_ranges + chk_rng,
+ lb_unpack64(range->start),
+ MIN(FOUR_GB, lb_unpack64(range->start)
+ + lb_unpack64(range->size)),
+ chk_rng);
+ range_causes_error[chk_rng] = range->type == LB_MEM_RAM ? 1 : 0;
+ chk_rng++;
+ }
+}
+
+/* If necessary, fold a 64-bit value enough times to fit into sum_t */
+static sum_t sum_to_sumt(uint64_t sum64)
+{
+ if (sizeof(sum_t) == sizeof(uint64_t))
+ return sum64;
+
+ uint32_t sum32 = (uint32_t)(sum64 + (sum64 >> 32));
+ if (sizeof(sum_t) == sizeof(uint32_t))
+ return sum32;
+
+ uint16_t sum16 = (uint16_t)(sum32 + (sum32 >> 16));
+ if (sizeof(sum_t) == sizeof(uint16_t))
+ return sum16;
+
+ uint8_t sum8 = (uint8_t)(sum16 + (sum16 >> 8));
+ return sum8;
+}
+
+/*
+ * todo: this function cannot access >4GB
+ */
+static sum_t calc_range(resource_t rstart, resource_t rlimit)
+{
+ uint64_t sum;
+ const uint8_t *start, *end;
+ const uint8_t *aligned_start;
+ const uint8_t *aligned_end;
+ int pre_aligned_num;
+ int post_aligned_num;
+ int lines;
+ int i;
+ const uint8_t *i8;
+ const uint64_t *i64;
+
+ sum = 0;
+
+ /* since we don't do 4GB yet, help this function be prettier */
+ start = (u8 *)(uintptr_t)rstart;
+ end = (u8 *)(uintptr_t)rlimit + 1; /* treat end like start + size */
+
+ aligned_start = (uint8_t *)ALIGN_UP((uintptr_t)start, CL_SIZE);
+ aligned_end = (uint8_t *)ALIGN_DOWN((uintptr_t)end, CL_SIZE);
+
+ pre_aligned_num = (uintptr_t)aligned_start - (uintptr_t)start;
+ post_aligned_num = (uintptr_t)end - (uintptr_t)aligned_end;
+
+ lines = ((uintptr_t)aligned_end - (uintptr_t)aligned_start) / CL_SIZE;
+
+ /* Do all unaligned as bytes, regardless of how many */
+ for (i8 = start ; i8 < aligned_start ; i8++)
+ sum += *i8;
+
+ i64 = (uint64_t *)aligned_start;
+ for (i = 0 ; i < lines ; i++) { /* unroll one cacheline worth */
+ sum += *(i64++);
+ sum += *(i64++);
+ sum += *(i64++);
+ sum += *(i64++);
+ sum += *(i64++);
+ sum += *(i64++);
+ sum += *(i64++);
+ sum += *(i64++);
+ }
+
+ for (i8 = aligned_end ; i8 < end ; i++)
+ sum += *i8;
+
+ return sum_to_sumt(sum);
+}
+
+static void print_fail(resource_t begin, resource_t end, int e)
+{
+ printk(BIOS_ERR, " %s: Mismatch found in 0x%llx-0x%llx\n",
+ range_causes_error[e] ? "ERROR" : "FYI", begin, end);
+}
+
+static void do_dram_checksums(int type)
+{
+ printk(BIOS_DEBUG, "Begin calculating DRAM check data...\n");
+
+ sum_t *save_area = read_dram_check_addr();
+ struct range_entry *re;
+ resource_t blk;
+ int err;
+
+ if (!save_area) {
+ printk(BIOS_ERR, "Error: no DRAM check save error, exiting test.\n");
+ return;
+ }
+
+ fill_check_ranges();
+
+ /*
+ * Don't rely on this function being called after the SMM handler has
+ * already written back the cache contents. Resume shouldn't make a
+ * difference worth caring about.
+ */
+ wbinvd();
+
+ err = 0;
+ memranges_each_entry(re, &ranges) {
+ printk(BIOS_DEBUG, " Calculate range 0x%llx-0x%llx\n",
+ re->begin, re->end);
+ for (blk = re->begin ; blk < re->end ; blk += TESTBLOCK_SZ) {
+ resource_t end = MIN(blk + TESTBLOCK_SZ - 1, re->end);
+
+ if (type == CHECK_TYPE_SAVE)
+ *(save_area) = calc_range(blk, end);
+ else
+ if (*save_area != calc_range(blk, end))
+ print_fail(blk, end, err);
+ save_area++;
+ }
+ err++;
+ }
+}
+
+static void highest_performance(void)
+{
+ /* todo: abstract better, especially if it moves out of stoneyridge
+ *
+ * The performance hit probably isn't substantial enough to worry
+ * about as long as we can cache outside of SMRAM.
+ */
+
+ /* CPU P-State to 0 */
+ msr_t pst = rdmsr(0xc0010062); // no define for this yet!?
+ pst.lo &= ~7;
+ wrmsr(0xc0010062, pst);
+}
+
+void store_dram_checksums(void)
+{
+ printk(BIOS_DEBUG, "Begin calculating DRAM check data...\n");
+
+ highest_performance();
+ do_dram_checksums(CHECK_TYPE_SAVE);
+}
+
+void verify_dram_checksums(void)
+{
+ printk(BIOS_DEBUG, "|| S3 memory check beginning. Certain miscompares should\n"
+ "|| be anticipated. Addresses may be verified against the\n"
+ "|| coreboot table written at the end of POST.\n");
+ do_dram_checksums(CHECK_TYPE_VERIFY);
+}
+
+/*
+ * During POST, ensure we have adequate cbmem space reserved for storing the
+ * check info. This function should also be safe to call during a resume.
+ *
+ * Note that cbmem functions aren't available in SMM, but the file still builds
+ * OK since functions like this aren't referenced.
+ */
+static void setup_s3_dram_debug(int unused)
+{
+ uint64_t installed;
+ size_t size;
+ msr_t tom2 = rdmsr(TOP_MEM2);
+ void *mem;
+
+ /* simple inference for installed memory, won't work for if <4 GB! */
+ installed = tom2.hi;
+ installed *= 4;
+ installed *= GiB;
+ size = installed / TESTBLOCK_SZ * sizeof(sum_t);
+
+ /* cheat without making a new ID, currently only used for Family 10h */
+ mem = cbmem_add(CBMEM_ID_AMDMCT_MEMINFO, size);
+
+ if (!mem)
+ printk(BIOS_ERR, "Error, can't allocate cbmem to store DRAM S3 check data\n");
+
+ write_dram_check_addr(mem);
+}
+
+/*
+ * Use coreboot's calculations to determine what memory to test. Right now
+ * save a pointer away and parse the info as needed.
+ */
+static void find_lb_tables_in_mem(void *unused)
+{
+ struct lb_header *header;
+ uint8_t *ptr = (uint8_t *)cbmem_find(CBMEM_ID_CBTABLE);
+ int i;
+
+ if (!ptr) {
+ printk(BIOS_ERR, "Error, S3 DRAM test setup: can't find the lb tables\n");
+ return;
+ }
+
+ header = (struct lb_header *)ptr;
+
+ if (strncmp((const char *)header->signature, "LBIO", 4)) {
+ printk(BIOS_ERR, "Error, S3 DRAM test setup: don't recognize the lb tables @0x%p\n",
+ header);
+ return;
+ }
+
+ ptr += header->header_bytes;
+ for (i = 0; i < header->table_entries; i++) {
+ struct lb_record *rec = (struct lb_record *)ptr;
+
+ if (rec->tag != LB_TAG_MEMORY) {
+ ptr += rec->size;
+ continue;
+ }
+
+ write_lbmem_addr(rec);
+ return;
+ }
+ printk(BIOS_ERR, "Error, S3 DRAM test setup: No LB_TAG_MEMORY found\n");
+ write_lbmem_addr(NULL);
+}
+
+RAMSTAGE_CBMEM_INIT_HOOK(setup_s3_dram_debug)
+BOOT_STATE_INIT_ENTRY(BS_WRITE_TABLES, BS_ON_EXIT, find_lb_tables_in_mem, NULL);
diff --git a/src/soc/amd/stoneyridge/smihandler.c b/src/soc/amd/stoneyridge/smihandler.c
index 8985257..6782d20 100644
--- a/src/soc/amd/stoneyridge/smihandler.c
+++ b/src/soc/amd/stoneyridge/smihandler.c
@@ -24,6 +24,7 @@
#include <device/pci_def.h>
#include <soc/smi.h>
#include <soc/southbridge.h>
+#include <soc/s3test_util.h>
#include <elog.h>

/* bits in smm_io_trap */
@@ -153,6 +154,12 @@
}

if (slp_typ >= ACPI_S3) {
+ /*
+ * todo: how can I call this sooner? This is the earliest
+ * opportunity when we're sure we're on our way to S3.
+ */
+ store_dram_checksums();
+
/* Sleep Type Elog S3, S4, and S5 entry */
if (IS_ENABLED(CONFIG_ELOG_GSMI))
elog_add_event_byte(ELOG_TYPE_ACPI_ENTER, slp_typ);

To view, visit change 29406. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: coreboot
Gerrit-Branch: master
Gerrit-MessageType: newchange
Gerrit-Change-Id: I375dd7ea9a3ab8992f1616126bcbd9724e4fc9a0
Gerrit-Change-Number: 29406
Gerrit-PatchSet: 1
Gerrit-Owner: Marshall Dawson <marshalldawson3rd@gmail.com>