[coreboot-gerrit] Change in coreboot[master]: cpu/x86/smm: Always use SMM_LOADER_VERSION2

1 Nov 2020

Arthur Heymans has uploaded this change for review. ( https://review.coreboot.org/c/coreboot/+/47073 )
Change subject: cpu/x86/smm: Always use SMM_LOADER_VERSION2
......................................................................
cpu/x86/smm: Always use SMM_LOADER_VERSION2
This removes the old SMM_LOADER_VERSION1. The version 2 should now
work in all the cases version 1 supported.
This might need a bit more testing.
TODO: The save state handling inside the smihandler code is only
correct for the code that previously worked for V1. This needs to be
fixed for systems with a large amount of CPUs
Change-Id: If52e38ebd2baf4fc80d433e65626c537c0c1f1aa
Signed-off-by: Arthur Heymans arthur@aheymans.xyz
---
M src/cpu/x86/Kconfig
M src/cpu/x86/mp_init.c
M src/cpu/x86/smm/Makefile.inc
M src/cpu/x86/smm/smm_module_handler.c
M src/cpu/x86/smm/smm_module_loader.c
D src/cpu/x86/smm/smm_module_loaderv2.c
M src/include/cpu/x86/smm.h
7 files changed, 407 insertions(+), 846 deletions(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/73/47073/1

diff --git a/src/cpu/x86/Kconfig b/src/cpu/x86/Kconfig
index b3a16bc..5394cd0 100644
--- a/src/cpu/x86/Kconfig
+++ b/src/cpu/x86/Kconfig
@@ -121,14 +121,6 @@
endif
-config X86_SMM_LOADER_VERSION2
-	bool
-	default n
-	depends on HAVE_SMI_HANDLER
-	help
-	  This option enables SMM module loader that works with server
-	  platforms which may contain more than 32 CPU threads.
-
 config SMM_LAPIC_REMAP_MITIGATION
    bool
    default y if NORTHBRIDGE_INTEL_I945
diff --git a/src/cpu/x86/mp_init.c b/src/cpu/x86/mp_init.c
index 3deab6d..f880c87 100644
--- a/src/cpu/x86/mp_init.c
+++ b/src/cpu/x86/mp_init.c
@@ -726,17 +726,12 @@
     * the location of the new SMBASE. If using SMM modules then this
     * calculation needs to match that of the module loader.
     */
-#if CONFIG(X86_SMM_LOADER_VERSION2)
    perm_smbase = smm_get_cpu_smbase(cpu);
    mp_state.perm_smbase = perm_smbase;
    if (!perm_smbase) {
    	printk(BIOS_ERR, "%s: bad SMBASE for CPU %d\n", __func__, cpu);
    	return;
    }
-#else
-	perm_smbase = mp_state.perm_smbase;
-	perm_smbase -= cpu * runtime->save_state_size;
-#endif
/* Setup code checks this callback for validity. */
    printk(BIOS_INFO, "%s : curr_smbase 0x%x perm_smbase 0x%x, cpu = %d\n",
@@ -772,7 +767,7 @@
    smm_stacks = memalign(16, num_cpus * CONFIG_SMM_STUB_STACK_SIZE);
if (smm_stacks == NULL) {
-		printk(BIOS_ERR, "%s: failed to allocate stacks.\n");
+		printk(BIOS_ERR, "%s: failed to allocate stacks.\n", __func__);
    	return -1;
    }
diff --git a/src/cpu/x86/smm/Makefile.inc b/src/cpu/x86/smm/Makefile.inc
index 1273a6c..dbe567a 100644
--- a/src/cpu/x86/smm/Makefile.inc
+++ b/src/cpu/x86/smm/Makefile.inc
@@ -1,10 +1,6 @@
 ## SPDX-License-Identifier: GPL-2.0-only
-ifeq ($(CONFIG_X86_SMM_LOADER_VERSION2),y)
-ramstage-y += smm_module_loaderv2.c
-else
 ramstage-y += smm_module_loader.c
-endif
 ramstage-y += smi_trigger.c
ifeq ($(CONFIG_ARCH_RAMSTAGE_X86_32),y)
diff --git a/src/cpu/x86/smm/smm_module_handler.c b/src/cpu/x86/smm/smm_module_handler.c
index 3ba5684..b6ef077 100644
--- a/src/cpu/x86/smm/smm_module_handler.c
+++ b/src/cpu/x86/smm/smm_module_handler.c
@@ -91,6 +91,7 @@
struct global_nvs *gnvs;
+/* TODO: handle multiple segments! */
 void *smm_get_save_state(int cpu)
 {
    char *base;
diff --git a/src/cpu/x86/smm/smm_module_loader.c b/src/cpu/x86/smm/smm_module_loader.c
index 3c0ed7e..54e46d4 100644
--- a/src/cpu/x86/smm/smm_module_loader.c
+++ b/src/cpu/x86/smm/smm_module_loader.c
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
+#include <stdint.h>
 #include <string.h>
-#include <acpi/acpi_gnvs.h>
 #include <rmodule.h>
 #include <cpu/x86/smm.h>
 #include <commonlib/helpers.h>
@@ -9,7 +9,7 @@
 #include <security/intel/stm/SmmStm.h>
#define FXSAVE_SIZE 512
-
+#define SMM_CODE_SEGMENT_SIZE 0x10000
 /* FXSAVE area during relocation. While it may not be strictly needed the
    SMM stub code relies on the FXSAVE area being non-zero to enable SSE
    instructions within SMM mode. */
@@ -47,65 +47,239 @@
 /* Per CPU minimum stack size. */
 #define SMM_MINIMUM_STACK_SIZE 32
+struct cpu_smm_info {
+	uint8_t active;
+	uintptr_t smbase;
+	uintptr_t entry;
+	uintptr_t ss_start;
+	uintptr_t code_start;
+	uintptr_t code_end;
+};
+struct cpu_smm_info cpus[CONFIG_MAX_CPUS] = { 0 };
+
 /*
- * The smm_entry_ins consists of 3 bytes. It is used when staggering SMRAM entry
- * addresses across CPUs.
+ * This method creates a map of all the CPU entry points, save state locations
+ * and the beginning and end of code segments for each CPU. This map is used
+ * during relocation to properly align as many CPUs that can fit into the SMRAM
+ * region. For more information on how SMRAM works, refer to the latest Intel
+ * developer's manuals (volume 3, chapter 34). SMRAM is divided up into the
+ * following regions:
+ * +-----------------+ Top of SMRAM
+ * |                 | <- MSEG, FXSAVE
+ * +-----------------+
+ * |    common       |
+ * |  smi handler    | 64K
+ * |                 |
+ * +-----------------+
+ * | CPU 0 code  seg |
+ * +-----------------+
+ * | CPU 1 code seg  |
+ * +-----------------+
+ * | CPU x code seg  |
+ * +-----------------+
+ * |                 |
+ * |                 |
+ * +-----------------+
+ * |    stacks       |
+ * +-----------------+ <- START of SMRAM
  *
- * 0xe9 <16-bit relative target> ; jmp <relative-offset>
+ * The code below checks when a code segment is full and begins placing the remainder
+ * CPUs in the lower segments. The entry point for each CPU is smbase + 0x8000
+ * and save state is smbase + 0x8000 + (0x8000 - state save size). Save state
+ * area grows downward into the CPUs entry point.  Therefore staggering too many
+ * CPUs in one 32K block will corrupt CPU0's entry code as the save states move
+ * downward.
+ * input : smbase of first CPU (all other CPUs
+ *         will go below this address)
+ * input : num_cpus in the system. The map will
+ *         be created from 0 to num_cpus.
  */
-struct smm_entry_ins {
-	char jmp_rel;
-	uint16_t rel16;
-} __packed;
-
-/*
- * Place the entry instructions for num entries beginning at entry_start with
- * a given stride. The entry_start is the highest entry point's address. All
- * other entry points are stride size below the previous.
- */
-static void smm_place_jmp_instructions(void *entry_start, size_t stride,
-		size_t num, void *jmp_target)
+static int smm_create_map(uintptr_t smbase, unsigned int num_cpus,
+			const struct smm_loader_params *params)
 {
-	size_t i;
-	char *cur;
-	struct smm_entry_ins entry = { .jmp_rel = 0xe9 };
+	unsigned int i;
+	struct rmodule smm_stub;
+	unsigned int ss_size = params->per_cpu_save_state_size, stub_size;
+	unsigned int smm_entry_offset = params->smm_main_entry_offset;
+	unsigned int seg_count = 0, segments = 0, available;
+	unsigned int cpus_in_segment = 0;
+	unsigned int base = smbase;
-	/* Each entry point has an IP value of 0x8000. The SMBASE for each
-	 * CPU is different so the effective address of the entry instruction
-	 * is different. Therefore, the relative displacement for each entry
-	 * instruction needs to be updated to reflect the current effective
-	 * IP. Additionally, the IP result from the jmp instruction is
-	 * calculated using the next instruction's address so the size of
-	 * the jmp instruction needs to be taken into account. */
-	cur = entry_start;
-	for (i = 0; i < num; i++) {
-		uint32_t disp = (uintptr_t)jmp_target;
-
-		disp -= sizeof(entry) + (uintptr_t)cur;
-		printk(BIOS_DEBUG,
-		       "SMM Module: placing jmp sequence at %p rel16 0x%04x\n",
-		       cur, disp);
-		entry.rel16 = disp;
-		memcpy(cur, &entry, sizeof(entry));
-		cur -= stride;
+	if (rmodule_parse(&_binary_smmstub_start, &smm_stub)) {
+		printk(BIOS_ERR, "%s: unable to get SMM module size\n", __func__);
+		return 0;
    }
+
+	stub_size = rmodule_memory_size(&smm_stub);
+	/* How many CPUs can fit into one 64K segment? */
+	available = 0xFFFF - smm_entry_offset - ss_size - stub_size;
+	if (available > 0) {
+		cpus_in_segment = available / ss_size;
+		/* minimum segments needed will always be 1 */
+		segments = num_cpus / cpus_in_segment + 1;
+		printk(BIOS_DEBUG,
+			"%s: cpus allowed in one segment %d\n", __func__, cpus_in_segment);
+		printk(BIOS_DEBUG,
+			"%s: min # of segments needed %d\n", __func__, segments);
+	} else {
+		printk(BIOS_ERR, "%s: not enough space in SMM to setup all CPUs\n", __func__);
+		printk(BIOS_ERR, "    save state & stub size need to be reduced\n");
+		printk(BIOS_ERR, "    or increase SMRAM size\n");
+		return 0;
+	}
+
+	if (sizeof(cpus) / sizeof(struct cpu_smm_info) < num_cpus) {
+		printk(BIOS_ERR,
+			"%s: increase MAX_CPUS in Kconfig\n", __func__);
+		return 0;
+	}
+
+	if (stub_size > ss_size) {
+		printk(BIOS_ERR, "%s: Save state larger than SMM stub size\n", __func__);
+		printk(BIOS_ERR, "    Decrease stub size or increase the size allocated for the save state\n");
+		return 0;
+	}
+
+	for (i = 0; i < num_cpus; i++) {
+		cpus[i].smbase = base;
+		cpus[i].entry = base + smm_entry_offset;
+		cpus[i].ss_start = cpus[i].entry + (smm_entry_offset - ss_size);
+		cpus[i].code_start = cpus[i].entry;
+		cpus[i].code_end = cpus[i].entry + stub_size;
+		cpus[i].active = 1;
+		base -= ss_size;
+		seg_count++;
+		if (seg_count >= cpus_in_segment) {
+			base -= smm_entry_offset;
+			seg_count = 0;
+		}
+	}
+
+	if (CONFIG_DEFAULT_CONSOLE_LOGLEVEL >= BIOS_DEBUG) {
+		seg_count = 0;
+		for (i = 0; i < num_cpus; i++) {
+			printk(BIOS_DEBUG, "CPU 0x%x\n", i);
+			printk(BIOS_DEBUG,
+				"    smbase %zx  entry %zx\n",
+				cpus[i].smbase, cpus[i].entry);
+			printk(BIOS_DEBUG,
+				"           ss_start %zx  code_end %zx\n",
+				cpus[i].ss_start, cpus[i].code_end);
+			seg_count++;
+			if (seg_count >= cpus_in_segment) {
+				printk(BIOS_DEBUG,
+					"-------------NEW CODE SEGMENT --------------\n");
+				seg_count = 0;
+			}
+		}
+	}
+	return 1;
 }
-/* Place stacks in base -> base + size region, but ensure the stacks don't
- * overlap the staggered entry points. */
+/*
+ * This method expects the smm relocation map to be complete.
+ * This method does not read any HW registers, it simply uses a
+ * map that was created during SMM setup.
+ * input: cpu_num - cpu number which is used as an index into the
+ *       map to return the smbase
+ */
+u32 smm_get_cpu_smbase(unsigned int cpu_num)
+{
+	if (cpu_num < CONFIG_MAX_CPUS) {
+		if (cpus[cpu_num].active)
+			return cpus[cpu_num].smbase;
+	}
+	return 0;
+}
+
+/*
+ * This method assumes that at least 1 CPU has been set up from
+ * which it will place other CPUs below its smbase ensuring that
+ * save state does not clobber the first CPUs init code segment. The init
+ * code which is the smm stub code is the same for all CPUs. They enter
+ * smm, setup stacks (based on their apic id), enter protected mode
+ * and then jump to the common smi handler.  The stack is allocated
+ * at the beginning of smram (aka tseg base, not smbase). The stack
+ * pointer for each CPU is calculated by using its apic id
+ * (code is in smm_stub.s)
+ * Each entry point will now have the same stub code which, sets up the CPU
+ * stack, enters protected mode and then jumps to the smi handler. It is
+ * important to enter protected mode before the jump because the "jump to
+ * address" might be larger than the 20bit address supported by real mode.
+ * SMI entry right now is in real mode.
+ * input: smbase - this is the smbase of the first cpu not the smbase
+ *        where tseg starts (aka smram_start). All CPUs code segment
+ *        and stack will be below this point except for the common
+ *        SMI handler which is one segment above
+ * input: num_cpus - number of cpus that need relocation including
+ *        the first CPU (though its code is already loaded)
+ * input: top of stack (stacks work downward by default in Intel HW)
+ * output: return -1, if runtime smi code could not be installed. In
+ *         this case SMM will not work and any SMI's generated will
+ *         cause a CPU shutdown or general protection fault because
+ *         the appropriate smi handling code was not installed
+ */
+
+static int smm_place_entry_code(uintptr_t smbase, unsigned int num_cpus,
+				unsigned int stack_top, const struct smm_loader_params *params)
+{
+	unsigned int i;
+	unsigned int size;
+	if (smm_create_map(smbase, num_cpus, params)) {
+		/*
+		 * Ensure there was enough space and the last CPUs smbase
+		 * did not encroach upon the stack. Stack top is smram start
+		 * + size of stack.
+		 */
+		if (cpus[num_cpus].active) {
+			if (cpus[num_cpus - 1].smbase +
+				params->smm_main_entry_offset < stack_top) {
+				printk(BIOS_ERR, "%s: stack encroachment\n", __func__);
+				printk(BIOS_ERR, "%s: smbase %zx, stack_top %x\n",
+					__func__, cpus[num_cpus].smbase, stack_top);
+				return 0;
+			}
+		}
+	} else {
+		printk(BIOS_ERR, "%s: unable to place smm entry code\n", __func__);
+		return 0;
+	}
+
+	printk(BIOS_INFO, "%s: smbase %zx, stack_top %x\n",
+		__func__, cpus[num_cpus-1].smbase, stack_top);
+
+	/* start at 1, the first CPU stub code is already there */
+	size = cpus[0].code_end - cpus[0].code_start;
+	for (i = 1; i < num_cpus; i++) {
+		memcpy((int *)cpus[i].code_start, (int *)cpus[0].code_start, size);
+		printk(BIOS_DEBUG,
+			"SMM Module: placing smm entry code at %zx,  cpu # 0x%x\n",
+			cpus[i].code_start, i);
+		printk(BIOS_DEBUG, "%s: copying from %zx to %zx 0x%x bytes\n",
+			__func__, cpus[0].code_start, cpus[i].code_start, size);
+	}
+	return 1;
+}
+
+/*
+ * Place stacks in base -> base + size region, but ensure the stacks don't
+ * overlap the staggered entry points.
+ */
 static void *smm_stub_place_stacks(char *base, size_t size,
    			   struct smm_loader_params *params)
 {
    size_t total_stack_size;
    char *stacks_top;
-	if (params->stack_top != NULL)
-		return params->stack_top;
-
    /* If stack space is requested assume the space lives in the lower
     * half of SMRAM. */
    total_stack_size = params->per_cpu_stack_size *
    		   params->num_concurrent_stacks;
+	printk(BIOS_DEBUG, "%s: cpus: %zx : stack space: needed -> %zx\n",
+		__func__, params->num_concurrent_stacks,
+		total_stack_size);
+	printk(BIOS_DEBUG, "  available -> %zx : per_cpu_stack_size : %zx\n",
+		size, params->per_cpu_stack_size);
/* There has to be at least one stack user. */
    if (params->num_concurrent_stacks < 1)
@@ -117,51 +291,53 @@
/* Stacks extend down to SMBASE */
    stacks_top = &base[total_stack_size];
+	printk(BIOS_DEBUG, "%s: exit, stack_top %p\n", __func__, stacks_top);
return stacks_top;
 }
-/* Place the staggered entry points for each CPU. The entry points are
+/*
+ * Place the staggered entry points for each CPU. The entry points are
  * staggered by the per CPU SMM save state size extending down from
- * SMM_ENTRY_OFFSET. */
-static void smm_stub_place_staggered_entry_points(char *base,
+ * SMM_ENTRY_OFFSET.
+ */
+static int smm_stub_place_staggered_entry_points(char *base,
    const struct smm_loader_params *params, const struct rmodule *smm_stub)
 {
    size_t stub_entry_offset;
-
+	int rc = 1;
    stub_entry_offset = rmodule_entry_offset(smm_stub);
-
-	/* If there are staggered entry points or the stub is not located
-	 * at the SMM entry point then jmp instructions need to be placed. */
+	/* Each CPU now has its own stub code, which enters protected mode,
+	 * sets up the stack, and then jumps to common SMI handler
+	 */
    if (params->num_concurrent_save_states > 1 || stub_entry_offset != 0) {
-		size_t num_entries;
-
-		base += SMM_ENTRY_OFFSET;
-		num_entries = params->num_concurrent_save_states;
-		/* Adjust beginning entry and number of entries down since
-		 * the initial entry point doesn't need a jump sequence. */
-		if (stub_entry_offset == 0) {
-			base -= params->per_cpu_save_state_size;
-			num_entries--;
-		}
-		smm_place_jmp_instructions(base,
-					   params->per_cpu_save_state_size,
-					   num_entries,
-					   rmodule_entry(smm_stub));
+		rc = smm_place_entry_code((unsigned int)base,
+			params->num_concurrent_save_states,
+			(unsigned int)params->stack_top, params);
    }
+	return rc;
 }
/*
  * The stub setup code assumes it is completely contained within the
- * default SMRAM size (0x10000). There are potentially 3 regions to place
+ * default SMRAM size (0x10000) for the default SMI handler (entry at
+ * 0x30000), but no assumption should be made for the permanent SMI handler.
+ * The placement of CPU entry points for permanent handler are determined
+ * by the number of CPUs in the system and the amount of SMRAM.
+ * There are potentially 3 regions to place
  * within the default SMRAM size:
  * 1. Save state areas
  * 2. Stub code
  * 3. Stack areas
  *
- * The save state and stack areas are treated as contiguous for the number of
- * concurrent areas requested. The save state always lives at the top of SMRAM
- * space, and the entry point is at offset 0x8000.
+ * The save state and smm stack are treated as contiguous for the number of
+ * concurrent areas requested. The save state always lives at the top of the
+ * the CPUS smbase (and the entry point is at offset 0x8000). This allows only a certain
+ * number of CPUs with staggered entry points until the save state area comes
+ * down far enough to overwrite/corrupt the entry code (stub code). Therefore,
+ * an SMM map is created to avoid this corruption, see smm_create_map() above.
+ * This module setup code works for the default (0x30000) SMM handler setup and the
+ * permanent SMM handler.
  */
 static int smm_module_setup_stub(void *smbase, size_t smm_size,
    			 struct smm_loader_params *params,
@@ -177,73 +353,67 @@
    size_t i;
    struct smm_stub_params *stub_params;
    struct rmodule smm_stub;
-
+	unsigned int total_size_all;
    base = smbase;
-	size = SMM_DEFAULT_SIZE;
+	size = smm_size;
/* The number of concurrent stacks cannot exceed CONFIG_MAX_CPUS. */
-	if (params->num_concurrent_stacks > CONFIG_MAX_CPUS)
+	if (params->num_concurrent_stacks > CONFIG_MAX_CPUS) {
+		printk(BIOS_ERR, "%s: not enough stacks\n", __func__);
    	return -1;
+	}
/* Fail if can't parse the smm stub rmodule. */
-	if (rmodule_parse(&_binary_smmstub_start, &smm_stub))
+	if (rmodule_parse(&_binary_smmstub_start, &smm_stub)) {
+		printk(BIOS_ERR, "%s: unable to parse smm stub\n", __func__);
    	return -1;
+	}
/* Adjust remaining size to account for save state. */
    total_save_state_size = params->per_cpu_save_state_size *
    			params->num_concurrent_save_states;
-	if (total_save_state_size > size)
+	if (total_save_state_size > size) {
+		printk(BIOS_ERR,
+			"%s: more state save space needed:need -> %zx:available->%zx\n",
+			__func__, total_save_state_size, size);
    	return -1;
+	}
+
    size -= total_save_state_size;
/* The save state size encroached over the first SMM entry point. */
-	if (size <= SMM_ENTRY_OFFSET)
+	if (size <= params->smm_main_entry_offset) {
+		printk(BIOS_ERR, "%s: encroachment over SMM entry point\n", __func__);
+		printk(BIOS_ERR, "%s: state save size: %zx : smm_entry_offset -> %x\n",
+			__func__, size, params->smm_main_entry_offset);
    	return -1;
+	}
/* Need a minimum stack size and alignment. */
    if (params->per_cpu_stack_size <= SMM_MINIMUM_STACK_SIZE ||
-	    (params->per_cpu_stack_size & 3) != 0)
+	    (params->per_cpu_stack_size & 3) != 0) {
+		printk(BIOS_ERR, "%s: need minimum stack size\n", __func__);
    	return -1;
+	}
smm_stub_loc = NULL;
    smm_stub_size = rmodule_memory_size(&smm_stub);
    stub_entry_offset = rmodule_entry_offset(&smm_stub);
-	if (smm_stub_size > params->per_cpu_save_state_size) {
-		printk(BIOS_ERR, "SMM Module: SMM stub size larger than save state size\n");
-		printk(BIOS_ERR, "SMM Module: Staggered entry points will overlap stub\n");
-		return -1;
-	}
-
-	/* Assume the stub is always small enough to live within upper half of
-	 * SMRAM region after the save state space has been allocated. */
-	smm_stub_loc = &base[SMM_ENTRY_OFFSET];
-
-	/* Adjust for jmp instruction sequence. */
-	if (stub_entry_offset != 0) {
-		size_t entry_sequence_size = sizeof(struct smm_entry_ins);
-		/* Align up to 16 bytes. */
-		entry_sequence_size = ALIGN_UP(entry_sequence_size, 16);
-		smm_stub_loc += entry_sequence_size;
-		smm_stub_size += entry_sequence_size;
-	}
+	/* Put the stub at the main entry point */
+	smm_stub_loc = &base[params->smm_main_entry_offset];
/* Stub is too big to fit. */
-	if (smm_stub_size > (size - SMM_ENTRY_OFFSET))
+	if (smm_stub_size > (size - params->smm_main_entry_offset)) {
+		printk(BIOS_ERR, "%s: stub is too big to fit\n", __func__);
    	return -1;
-
-	/* The stacks, if requested, live in the lower half of SMRAM space. */
-	size = SMM_ENTRY_OFFSET;
-
-	/* Ensure stacks don't encroach onto staggered SMM
-	 * entry points. The staggered entry points extend
-	 * below SMM_ENTRY_OFFSET by the number of concurrent
-	 * save states - 1 and save state size. */
-	if (params->num_concurrent_save_states > 1) {
-		size -= total_save_state_size;
-		size += params->per_cpu_save_state_size;
    }
+	/* The stacks, if requested, live in the lower half of SMRAM space
+	 * for default handler, but for relocated handler it lives at the beginning
+	 * of SMRAM which is TSEG base
+	 */
+	const total_stack_size = params->num_concurrent_stacks * params->per_cpu_stack_size;
    /* Use the smbase as a proxy to know if we are installing the stub for relocation
     * or for permanent handling. In case of relocation the SMM relocation stack will
     * have been allocated on the ramstage heap and programmed in the smm loader params.
@@ -251,18 +421,25 @@
    if (smbase == (void *)SMM_DEFAULT_BASE)
    	stacks_top = params->stack_top;
    else
-		/* Place the stacks in the lower half of SMRAM. */
-		stacks_top = smm_stub_place_stacks(base, size, params);
+		stacks_top = smm_stub_place_stacks((char *)params->smram_start, size, params);
-	if (stacks_top == NULL)
+	if (stacks_top == NULL) {
+		printk(BIOS_ERR, "%s: not enough space for stacks\n", __func__);
+		printk(BIOS_ERR, "%s: ....need -> %p : available -> %zx\n", __func__,
+			base, total_stack_size);
    	return -1;
-
+	}
+	params->stack_top = stacks_top;
    /* Load the stub. */
-	if (rmodule_load(smm_stub_loc, &smm_stub))
+	if (rmodule_load(smm_stub_loc, &smm_stub)) {
+		printk(BIOS_ERR, "%s: load module failed\n", __func__);
    	return -1;
+	}
-	/* Place staggered entry points. */
-	smm_stub_place_staggered_entry_points(base, params, &smm_stub);
+	if (!smm_stub_place_staggered_entry_points(base, params, &smm_stub)) {
+		printk(BIOS_ERR, "%s: staggered entry points failed\n", __func__);
+		return -1;
+	}
/* Setup the parameters for the stub code. */
    stub_params = rmodule_parameters(&smm_stub);
@@ -276,7 +453,31 @@
    stub_params->runtime.smm_size = smm_size;
    stub_params->runtime.save_state_size = params->per_cpu_save_state_size;
    stub_params->runtime.num_cpus = params->num_concurrent_stacks;
-	stub_params->runtime.gnvs_ptr = (uintptr_t)acpi_get_gnvs();
+
+	printk(BIOS_DEBUG, "%s: stack_end = 0x%lx\n",
+		__func__, stub_params->stack_top - total_stack_size);
+	printk(BIOS_DEBUG,
+		"%s: stack_top = 0x%x\n", __func__, stub_params->stack_top);
+	printk(BIOS_DEBUG, "%s: stack_size = 0x%x\n",
+		__func__, stub_params->stack_size);
+	printk(BIOS_DEBUG, "%s: runtime.smbase = 0x%x\n",
+		__func__, stub_params->runtime.smbase);
+	printk(BIOS_DEBUG, "%s: runtime.start32_offset = 0x%x\n", __func__,
+		stub_params->runtime.start32_offset);
+	printk(BIOS_DEBUG, "%s: runtime.smm_size = 0x%zx\n",
+		__func__, smm_size);
+	printk(BIOS_DEBUG, "%s: per_cpu_save_state_size = 0x%x\n",
+		__func__, stub_params->runtime.save_state_size);
+	printk(BIOS_DEBUG, "%s: num_cpus = 0x%x\n", __func__,
+		stub_params->runtime.num_cpus);
+	printk(BIOS_DEBUG, "%s: total_save_state_size = 0x%x\n",
+		__func__, (stub_params->runtime.save_state_size *
+		stub_params->runtime.num_cpus));
+	total_size_all = stub_params->stack_size +
+		(stub_params->runtime.save_state_size *
+		stub_params->runtime.num_cpus);
+	printk(BIOS_DEBUG, "%s: total_size_all = 0x%x\n", __func__,
+		total_size_all);
/* Initialize the APIC id to CPU number table to be 1:1 */
    for (i = 0; i < params->num_concurrent_stacks; i++)
@@ -287,7 +488,6 @@
printk(BIOS_DEBUG, "SMM Module: stub loaded at %p. Will call %p(%p)\n",
           smm_stub_loc, params->handler, params->handler_arg);
-
    return 0;
 }
@@ -299,8 +499,8 @@
  */
 int smm_setup_relocation_handler(struct smm_loader_params *params)
 {
-	void *smram = (void *)SMM_DEFAULT_BASE;
-
+	void *smram = (void *)(SMM_DEFAULT_BASE);
+	printk(BIOS_SPEW, "%s: enter\n", __func__);
    /* There can't be more than 1 concurrent save state for the relocation
     * handler because all CPUs default to 0x30000 as SMBASE. */
    if (params->num_concurrent_save_states > 1)
@@ -315,27 +515,35 @@
    if (params->num_concurrent_stacks == 0)
    	params->num_concurrent_stacks = CONFIG_MAX_CPUS;
+	params->smm_main_entry_offset = SMM_ENTRY_OFFSET;
+	params->smram_start = SMM_DEFAULT_BASE;
+	params->smram_end = SMM_DEFAULT_BASE + SMM_DEFAULT_SIZE;
    return smm_module_setup_stub(smram, SMM_DEFAULT_SIZE,
-				     params, fxsave_area_relocation);
+				params, fxsave_area_relocation);
+	printk(BIOS_SPEW, "%s: exit\n", __func__);
 }
-/* The SMM module is placed within the provided region in the following
+/*
+ *The SMM module is placed within the provided region in the following
  * manner:
  * +-----------------+ <- smram + size
  * | BIOS resource   |
  * | list (STM)      |
- * +-----------------+ <- smram + size - CONFIG_BIOS_RESOURCE_LIST_SIZE
- * |    stacks       |
- * +-----------------+ <- .. - total_stack_size
+ * +-----------------+
  * |  fxsave area    |
- * +-----------------+ <- .. - total_stack_size - fxsave_size
+ * +-----------------+
+ * |  smi handler    |
  * |      ...        |
- * +-----------------+ <- smram + handler_size + SMM_DEFAULT_SIZE
- * |    handler      |
- * +-----------------+ <- smram + SMM_DEFAULT_SIZE
- * |    stub code    |
- * +-----------------+ <- smram
- *
+ * +-----------------+ <- cpu0
+ * |    stub code    | <- cpu1
+ * |    stub code    | <- cpu2
+ * |    stub code    | <- cpu3, etc
+ * |                 |
+ * |                 |
+ * |                 |
+ * |    stacks       |
+ * +-----------------+ <- smram start
+
  * It should be noted that this algorithm will not work for
  * SMM_DEFAULT_SIZE SMRAM regions such as the A segment. This algorithm
  * expects a region large enough to encompass the handler and stacks
@@ -350,12 +558,21 @@
    size_t alignment_size;
    size_t fxsave_size;
    void *fxsave_area;
-	size_t total_size;
+	size_t total_size = 0;
    char *base;
if (size <= SMM_DEFAULT_SIZE)
    	return -1;
+	/* Load main SMI handler at the top of SMRAM
+	 * everything else will go below
+	 */
+	base = smram;
+	base += size;
+	params->smram_start = (uintptr_t)smram;
+	params->smram_end = params->smram_start + size;
+	params->smm_main_entry_offset = SMM_ENTRY_OFFSET;
+
    /* Fail if can't parse the smm rmodule. */
    if (rmodule_parse(&_binary_smm_start, &smm_mod))
    	return -1;
@@ -366,21 +583,30 @@
total_stack_size = params->per_cpu_stack_size *
    		   params->num_concurrent_stacks;
+	total_size += total_stack_size;
+	/* Stacks are the base of SMRAM */
+	params->stack_top = smram + total_stack_size;
-	/* Stacks start at the top of the region. */
-	base = smram;
-	base += size;
-
-	if (CONFIG(STM))
+	/* MSEG starts at the top of SMRAM and works down */
+	if (CONFIG(STM)) {
    	base -= CONFIG_MSEG_SIZE + CONFIG_BIOS_RESOURCE_LIST_SIZE;
+		total_size += CONFIG_MSEG_SIZE + CONFIG_BIOS_RESOURCE_LIST_SIZE;
+	}
-	params->stack_top = base;
+	/* FXSAVE goes below MSEG */
+	if (CONFIG(SSE)) {
+		fxsave_size = FXSAVE_SIZE * params->num_concurrent_stacks;
+		fxsave_area = base - fxsave_size;
+		base -= fxsave_size;
+		total_size += fxsave_size;
+	} else {
+		fxsave_size = 0;
+		fxsave_area = NULL;
+	}
-	/* SMM module starts at offset SMM_DEFAULT_SIZE with the load alignment
-	 * taken into account. */
-	base = smram;
-	base += SMM_DEFAULT_SIZE;
    handler_size = rmodule_memory_size(&smm_mod);
+	base -= handler_size;
+	total_size += handler_size;
    module_alignment = rmodule_load_alignment(&smm_mod);
    alignment_size = module_alignment -
    			((uintptr_t)base % module_alignment);
@@ -389,22 +615,20 @@
    	base += alignment_size;
    }
-	if (CONFIG(SSE)) {
-		fxsave_size = FXSAVE_SIZE * params->num_concurrent_stacks;
-		/* FXSAVE area below all the stacks stack. */
-		fxsave_area = params->stack_top;
-		fxsave_area -= total_stack_size + fxsave_size;
-	} else {
-		fxsave_size = 0;
-		fxsave_area = NULL;
-	}
+	printk(BIOS_DEBUG,
+		"%s: total_smm_space_needed %zx, available -> %zx\n",
+		 __func__, total_size, size);
/* Does the required amount of memory exceed the SMRAM region size? */
-	total_size = total_stack_size + handler_size;
-	total_size += fxsave_size + SMM_DEFAULT_SIZE;
-
-	if (total_size > size)
+	if (total_size > size) {
+		printk(BIOS_ERR, "%s: need more SMRAM\n", __func__);
    	return -1;
+	}
+	if (handler_size > SMM_CODE_SEGMENT_SIZE) {
+		printk(BIOS_ERR, "%s: increase SMM_CODE_SEGMENT_SIZE: handler_size = %zx\n",
+			__func__, handler_size);
+		return -1;
+	}
if (rmodule_load(base, &smm_mod))
    	return -1;
@@ -412,5 +636,33 @@
    params->handler = rmodule_entry(&smm_mod);
    params->handler_arg = rmodule_parameters(&smm_mod);
-	return smm_module_setup_stub(smram, size, params, fxsave_area);
+	printk(BIOS_DEBUG, "%s: smram_start: 0x%p\n",
+		 __func__, smram);
+	printk(BIOS_DEBUG, "%s: smram_end: %p\n",
+		 __func__, smram + size);
+	printk(BIOS_DEBUG, "%s: stack_top: %p\n",
+		 __func__, params->stack_top);
+	printk(BIOS_DEBUG, "%s: handler start %p\n",
+		 __func__, params->handler);
+	printk(BIOS_DEBUG, "%s: handler_size %zx\n",
+		 __func__, handler_size);
+	printk(BIOS_DEBUG, "%s: handler_arg %p\n",
+		 __func__, params->handler_arg);
+	printk(BIOS_DEBUG, "%s: fxsave_area %p\n",
+		 __func__, fxsave_area);
+	printk(BIOS_DEBUG, "%s: fxsave_size %zx\n",
+		 __func__, fxsave_size);
+	printk(BIOS_DEBUG, "%s: CONFIG_MSEG_SIZE 0x%x\n",
+		 __func__, CONFIG_MSEG_SIZE);
+	printk(BIOS_DEBUG, "%s: CONFIG_BIOS_RESOURCE_LIST_SIZE 0x%x\n",
+		 __func__, CONFIG_BIOS_RESOURCE_LIST_SIZE);
+
+	/* CPU 0 smbase goes first, all other CPUs
+	 * will be staggered below
+	 */
+	base -= SMM_CODE_SEGMENT_SIZE;
+	printk(BIOS_DEBUG, "%s: cpu0 entry: %p\n",
+		 __func__, base);
+	params->smm_entry = (uintptr_t)base + params->smm_main_entry_offset;
+	return smm_module_setup_stub(base, size, params, fxsave_area);
 }
diff --git a/src/cpu/x86/smm/smm_module_loaderv2.c b/src/cpu/x86/smm/smm_module_loaderv2.c
deleted file mode 100644
index 54e46d4..0000000
--- a/src/cpu/x86/smm/smm_module_loaderv2.c
+++ /dev/null
@@ -1,668 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-#include <stdint.h>
-#include <string.h>
-#include <rmodule.h>
-#include <cpu/x86/smm.h>
-#include <commonlib/helpers.h>
-#include <console/console.h>
-#include <security/intel/stm/SmmStm.h>
-
-#define FXSAVE_SIZE 512
-#define SMM_CODE_SEGMENT_SIZE 0x10000
-/* FXSAVE area during relocation. While it may not be strictly needed the
-   SMM stub code relies on the FXSAVE area being non-zero to enable SSE
-   instructions within SMM mode. */
-static uint8_t fxsave_area_relocation[CONFIG_MAX_CPUS][FXSAVE_SIZE]
-__attribute__((aligned(16)));
-
-/*
- * Components that make up the SMRAM:
- * 1. Save state - the total save state memory used
- * 2. Stack - stacks for the CPUs in the SMM handler
- * 3. Stub - SMM stub code for calling into handler
- * 4. Handler - C-based SMM handler.
- *
- * The components are assumed to consist of one consecutive region.
- */
-
-/* These parameters are used by the SMM stub code. A pointer to the params
- * is also passed to the C-base handler. */
-struct smm_stub_params {
-	u32 stack_size;
-	u32 stack_top;
-	u32 c_handler;
-	u32 c_handler_arg;
-	u32 fxsave_area;
-	u32 fxsave_area_size;
-	struct smm_runtime runtime;
-} __packed;
-
-/*
- * The stub is the entry point that sets up protected mode and stacks for each
- * CPU. It then calls into the SMM handler module. It is encoded as an rmodule.
- */
-extern unsigned char _binary_smmstub_start[];
-
-/* Per CPU minimum stack size. */
-#define SMM_MINIMUM_STACK_SIZE 32
-
-struct cpu_smm_info {
-	uint8_t active;
-	uintptr_t smbase;
-	uintptr_t entry;
-	uintptr_t ss_start;
-	uintptr_t code_start;
-	uintptr_t code_end;
-};
-struct cpu_smm_info cpus[CONFIG_MAX_CPUS] = { 0 };
-
-/*
- * This method creates a map of all the CPU entry points, save state locations
- * and the beginning and end of code segments for each CPU. This map is used
- * during relocation to properly align as many CPUs that can fit into the SMRAM
- * region. For more information on how SMRAM works, refer to the latest Intel
- * developer's manuals (volume 3, chapter 34). SMRAM is divided up into the
- * following regions:
- * +-----------------+ Top of SMRAM
- * |                 | <- MSEG, FXSAVE
- * +-----------------+
- * |    common       |
- * |  smi handler    | 64K
- * |                 |
- * +-----------------+
- * | CPU 0 code  seg |
- * +-----------------+
- * | CPU 1 code seg  |
- * +-----------------+
- * | CPU x code seg  |
- * +-----------------+
- * |                 |
- * |                 |
- * +-----------------+
- * |    stacks       |
- * +-----------------+ <- START of SMRAM
- *
- * The code below checks when a code segment is full and begins placing the remainder
- * CPUs in the lower segments. The entry point for each CPU is smbase + 0x8000
- * and save state is smbase + 0x8000 + (0x8000 - state save size). Save state
- * area grows downward into the CPUs entry point.  Therefore staggering too many
- * CPUs in one 32K block will corrupt CPU0's entry code as the save states move
- * downward.
- * input : smbase of first CPU (all other CPUs
- *         will go below this address)
- * input : num_cpus in the system. The map will
- *         be created from 0 to num_cpus.
- */
-static int smm_create_map(uintptr_t smbase, unsigned int num_cpus,
-			const struct smm_loader_params *params)
-{
-	unsigned int i;
-	struct rmodule smm_stub;
-	unsigned int ss_size = params->per_cpu_save_state_size, stub_size;
-	unsigned int smm_entry_offset = params->smm_main_entry_offset;
-	unsigned int seg_count = 0, segments = 0, available;
-	unsigned int cpus_in_segment = 0;
-	unsigned int base = smbase;
-
-	if (rmodule_parse(&_binary_smmstub_start, &smm_stub)) {
-		printk(BIOS_ERR, "%s: unable to get SMM module size\n", __func__);
-		return 0;
-	}
-
-	stub_size = rmodule_memory_size(&smm_stub);
-	/* How many CPUs can fit into one 64K segment? */
-	available = 0xFFFF - smm_entry_offset - ss_size - stub_size;
-	if (available > 0) {
-		cpus_in_segment = available / ss_size;
-		/* minimum segments needed will always be 1 */
-		segments = num_cpus / cpus_in_segment + 1;
-		printk(BIOS_DEBUG,
-			"%s: cpus allowed in one segment %d\n", __func__, cpus_in_segment);
-		printk(BIOS_DEBUG,
-			"%s: min # of segments needed %d\n", __func__, segments);
-	} else {
-		printk(BIOS_ERR, "%s: not enough space in SMM to setup all CPUs\n", __func__);
-		printk(BIOS_ERR, "    save state & stub size need to be reduced\n");
-		printk(BIOS_ERR, "    or increase SMRAM size\n");
-		return 0;
-	}
-
-	if (sizeof(cpus) / sizeof(struct cpu_smm_info) < num_cpus) {
-		printk(BIOS_ERR,
-			"%s: increase MAX_CPUS in Kconfig\n", __func__);
-		return 0;
-	}
-
-	if (stub_size > ss_size) {
-		printk(BIOS_ERR, "%s: Save state larger than SMM stub size\n", __func__);
-		printk(BIOS_ERR, "    Decrease stub size or increase the size allocated for the save state\n");
-		return 0;
-	}
-
-	for (i = 0; i < num_cpus; i++) {
-		cpus[i].smbase = base;
-		cpus[i].entry = base + smm_entry_offset;
-		cpus[i].ss_start = cpus[i].entry + (smm_entry_offset - ss_size);
-		cpus[i].code_start = cpus[i].entry;
-		cpus[i].code_end = cpus[i].entry + stub_size;
-		cpus[i].active = 1;
-		base -= ss_size;
-		seg_count++;
-		if (seg_count >= cpus_in_segment) {
-			base -= smm_entry_offset;
-			seg_count = 0;
-		}
-	}
-
-	if (CONFIG_DEFAULT_CONSOLE_LOGLEVEL >= BIOS_DEBUG) {
-		seg_count = 0;
-		for (i = 0; i < num_cpus; i++) {
-			printk(BIOS_DEBUG, "CPU 0x%x\n", i);
-			printk(BIOS_DEBUG,
-				"    smbase %zx  entry %zx\n",
-				cpus[i].smbase, cpus[i].entry);
-			printk(BIOS_DEBUG,
-				"           ss_start %zx  code_end %zx\n",
-				cpus[i].ss_start, cpus[i].code_end);
-			seg_count++;
-			if (seg_count >= cpus_in_segment) {
-				printk(BIOS_DEBUG,
-					"-------------NEW CODE SEGMENT --------------\n");
-				seg_count = 0;
-			}
-		}
-	}
-	return 1;
-}
-
-/*
- * This method expects the smm relocation map to be complete.
- * This method does not read any HW registers, it simply uses a
- * map that was created during SMM setup.
- * input: cpu_num - cpu number which is used as an index into the
- *       map to return the smbase
- */
-u32 smm_get_cpu_smbase(unsigned int cpu_num)
-{
-	if (cpu_num < CONFIG_MAX_CPUS) {
-		if (cpus[cpu_num].active)
-			return cpus[cpu_num].smbase;
-	}
-	return 0;
-}
-
-/*
- * This method assumes that at least 1 CPU has been set up from
- * which it will place other CPUs below its smbase ensuring that
- * save state does not clobber the first CPUs init code segment. The init
- * code which is the smm stub code is the same for all CPUs. They enter
- * smm, setup stacks (based on their apic id), enter protected mode
- * and then jump to the common smi handler.  The stack is allocated
- * at the beginning of smram (aka tseg base, not smbase). The stack
- * pointer for each CPU is calculated by using its apic id
- * (code is in smm_stub.s)
- * Each entry point will now have the same stub code which, sets up the CPU
- * stack, enters protected mode and then jumps to the smi handler. It is
- * important to enter protected mode before the jump because the "jump to
- * address" might be larger than the 20bit address supported by real mode.
- * SMI entry right now is in real mode.
- * input: smbase - this is the smbase of the first cpu not the smbase
- *        where tseg starts (aka smram_start). All CPUs code segment
- *        and stack will be below this point except for the common
- *        SMI handler which is one segment above
- * input: num_cpus - number of cpus that need relocation including
- *        the first CPU (though its code is already loaded)
- * input: top of stack (stacks work downward by default in Intel HW)
- * output: return -1, if runtime smi code could not be installed. In
- *         this case SMM will not work and any SMI's generated will
- *         cause a CPU shutdown or general protection fault because
- *         the appropriate smi handling code was not installed
- */
-
-static int smm_place_entry_code(uintptr_t smbase, unsigned int num_cpus,
-				unsigned int stack_top, const struct smm_loader_params *params)
-{
-	unsigned int i;
-	unsigned int size;
-	if (smm_create_map(smbase, num_cpus, params)) {
-		/*
-		 * Ensure there was enough space and the last CPUs smbase
-		 * did not encroach upon the stack. Stack top is smram start
-		 * + size of stack.
-		 */
-		if (cpus[num_cpus].active) {
-			if (cpus[num_cpus - 1].smbase +
-				params->smm_main_entry_offset < stack_top) {
-				printk(BIOS_ERR, "%s: stack encroachment\n", __func__);
-				printk(BIOS_ERR, "%s: smbase %zx, stack_top %x\n",
-					__func__, cpus[num_cpus].smbase, stack_top);
-				return 0;
-			}
-		}
-	} else {
-		printk(BIOS_ERR, "%s: unable to place smm entry code\n", __func__);
-		return 0;
-	}
-
-	printk(BIOS_INFO, "%s: smbase %zx, stack_top %x\n",
-		__func__, cpus[num_cpus-1].smbase, stack_top);
-
-	/* start at 1, the first CPU stub code is already there */
-	size = cpus[0].code_end - cpus[0].code_start;
-	for (i = 1; i < num_cpus; i++) {
-		memcpy((int *)cpus[i].code_start, (int *)cpus[0].code_start, size);
-		printk(BIOS_DEBUG,
-			"SMM Module: placing smm entry code at %zx,  cpu # 0x%x\n",
-			cpus[i].code_start, i);
-		printk(BIOS_DEBUG, "%s: copying from %zx to %zx 0x%x bytes\n",
-			__func__, cpus[0].code_start, cpus[i].code_start, size);
-	}
-	return 1;
-}
-
-/*
- * Place stacks in base -> base + size region, but ensure the stacks don't
- * overlap the staggered entry points.
- */
-static void *smm_stub_place_stacks(char *base, size_t size,
-				   struct smm_loader_params *params)
-{
-	size_t total_stack_size;
-	char *stacks_top;
-
-	/* If stack space is requested assume the space lives in the lower
-	 * half of SMRAM. */
-	total_stack_size = params->per_cpu_stack_size *
-			   params->num_concurrent_stacks;
-	printk(BIOS_DEBUG, "%s: cpus: %zx : stack space: needed -> %zx\n",
-		__func__, params->num_concurrent_stacks,
-		total_stack_size);
-	printk(BIOS_DEBUG, "  available -> %zx : per_cpu_stack_size : %zx\n",
-		size, params->per_cpu_stack_size);
-
-	/* There has to be at least one stack user. */
-	if (params->num_concurrent_stacks < 1)
-		return NULL;
-
-	/* Total stack size cannot fit. */
-	if (total_stack_size > size)
-		return NULL;
-
-	/* Stacks extend down to SMBASE */
-	stacks_top = &base[total_stack_size];
-	printk(BIOS_DEBUG, "%s: exit, stack_top %p\n", __func__, stacks_top);
-
-	return stacks_top;
-}
-
-/*
- * Place the staggered entry points for each CPU. The entry points are
- * staggered by the per CPU SMM save state size extending down from
- * SMM_ENTRY_OFFSET.
- */
-static int smm_stub_place_staggered_entry_points(char *base,
-	const struct smm_loader_params *params, const struct rmodule *smm_stub)
-{
-	size_t stub_entry_offset;
-	int rc = 1;
-	stub_entry_offset = rmodule_entry_offset(smm_stub);
-	/* Each CPU now has its own stub code, which enters protected mode,
-	 * sets up the stack, and then jumps to common SMI handler
-	 */
-	if (params->num_concurrent_save_states > 1 || stub_entry_offset != 0) {
-		rc = smm_place_entry_code((unsigned int)base,
-			params->num_concurrent_save_states,
-			(unsigned int)params->stack_top, params);
-	}
-	return rc;
-}
-
-/*
- * The stub setup code assumes it is completely contained within the
- * default SMRAM size (0x10000) for the default SMI handler (entry at
- * 0x30000), but no assumption should be made for the permanent SMI handler.
- * The placement of CPU entry points for permanent handler are determined
- * by the number of CPUs in the system and the amount of SMRAM.
- * There are potentially 3 regions to place
- * within the default SMRAM size:
- * 1. Save state areas
- * 2. Stub code
- * 3. Stack areas
- *
- * The save state and smm stack are treated as contiguous for the number of
- * concurrent areas requested. The save state always lives at the top of the
- * the CPUS smbase (and the entry point is at offset 0x8000). This allows only a certain
- * number of CPUs with staggered entry points until the save state area comes
- * down far enough to overwrite/corrupt the entry code (stub code). Therefore,
- * an SMM map is created to avoid this corruption, see smm_create_map() above.
- * This module setup code works for the default (0x30000) SMM handler setup and the
- * permanent SMM handler.
- */
-static int smm_module_setup_stub(void *smbase, size_t smm_size,
-				 struct smm_loader_params *params,
-				 void *fxsave_area)
-{
-	size_t total_save_state_size;
-	size_t smm_stub_size;
-	size_t stub_entry_offset;
-	char *smm_stub_loc;
-	void *stacks_top;
-	size_t size;
-	char *base;
-	size_t i;
-	struct smm_stub_params *stub_params;
-	struct rmodule smm_stub;
-	unsigned int total_size_all;
-	base = smbase;
-	size = smm_size;
-
-	/* The number of concurrent stacks cannot exceed CONFIG_MAX_CPUS. */
-	if (params->num_concurrent_stacks > CONFIG_MAX_CPUS) {
-		printk(BIOS_ERR, "%s: not enough stacks\n", __func__);
-		return -1;
-	}
-
-	/* Fail if can't parse the smm stub rmodule. */
-	if (rmodule_parse(&_binary_smmstub_start, &smm_stub)) {
-		printk(BIOS_ERR, "%s: unable to parse smm stub\n", __func__);
-		return -1;
-	}
-
-	/* Adjust remaining size to account for save state. */
-	total_save_state_size = params->per_cpu_save_state_size *
-				params->num_concurrent_save_states;
-	if (total_save_state_size > size) {
-		printk(BIOS_ERR,
-			"%s: more state save space needed:need -> %zx:available->%zx\n",
-			__func__, total_save_state_size, size);
-		return -1;
-	}
-
-	size -= total_save_state_size;
-
-	/* The save state size encroached over the first SMM entry point. */
-	if (size <= params->smm_main_entry_offset) {
-		printk(BIOS_ERR, "%s: encroachment over SMM entry point\n", __func__);
-		printk(BIOS_ERR, "%s: state save size: %zx : smm_entry_offset -> %x\n",
-			__func__, size, params->smm_main_entry_offset);
-		return -1;
-	}
-
-	/* Need a minimum stack size and alignment. */
-	if (params->per_cpu_stack_size <= SMM_MINIMUM_STACK_SIZE ||
-	    (params->per_cpu_stack_size & 3) != 0) {
-		printk(BIOS_ERR, "%s: need minimum stack size\n", __func__);
-		return -1;
-	}
-
-	smm_stub_loc = NULL;
-	smm_stub_size = rmodule_memory_size(&smm_stub);
-	stub_entry_offset = rmodule_entry_offset(&smm_stub);
-
-	/* Put the stub at the main entry point */
-	smm_stub_loc = &base[params->smm_main_entry_offset];
-
-	/* Stub is too big to fit. */
-	if (smm_stub_size > (size - params->smm_main_entry_offset)) {
-		printk(BIOS_ERR, "%s: stub is too big to fit\n", __func__);
-		return -1;
-	}
-
-	/* The stacks, if requested, live in the lower half of SMRAM space
-	 * for default handler, but for relocated handler it lives at the beginning
-	 * of SMRAM which is TSEG base
-	 */
-	const total_stack_size = params->num_concurrent_stacks * params->per_cpu_stack_size;
-	/* Use the smbase as a proxy to know if we are installing the stub for relocation
-	 * or for permanent handling. In case of relocation the SMM relocation stack will
-	 * have been allocated on the ramstage heap and programmed in the smm loader params.
-	 */
-	if (smbase == (void *)SMM_DEFAULT_BASE)
-		stacks_top = params->stack_top;
-	else
-		stacks_top = smm_stub_place_stacks((char *)params->smram_start, size, params);
-
-	if (stacks_top == NULL) {
-		printk(BIOS_ERR, "%s: not enough space for stacks\n", __func__);
-		printk(BIOS_ERR, "%s: ....need -> %p : available -> %zx\n", __func__,
-			base, total_stack_size);
-		return -1;
-	}
-	params->stack_top = stacks_top;
-	/* Load the stub. */
-	if (rmodule_load(smm_stub_loc, &smm_stub)) {
-		printk(BIOS_ERR, "%s: load module failed\n", __func__);
-		return -1;
-	}
-
-	if (!smm_stub_place_staggered_entry_points(base, params, &smm_stub)) {
-		printk(BIOS_ERR, "%s: staggered entry points failed\n", __func__);
-		return -1;
-	}
-
-	/* Setup the parameters for the stub code. */
-	stub_params = rmodule_parameters(&smm_stub);
-	stub_params->stack_top = (uintptr_t)stacks_top;
-	stub_params->stack_size = params->per_cpu_stack_size;
-	stub_params->c_handler = (uintptr_t)params->handler;
-	stub_params->c_handler_arg = (uintptr_t)params->handler_arg;
-	stub_params->fxsave_area = (uintptr_t)fxsave_area;
-	stub_params->fxsave_area_size = FXSAVE_SIZE;
-	stub_params->runtime.smbase = (uintptr_t)smbase;
-	stub_params->runtime.smm_size = smm_size;
-	stub_params->runtime.save_state_size = params->per_cpu_save_state_size;
-	stub_params->runtime.num_cpus = params->num_concurrent_stacks;
-
-	printk(BIOS_DEBUG, "%s: stack_end = 0x%lx\n",
-		__func__, stub_params->stack_top - total_stack_size);
-	printk(BIOS_DEBUG,
-		"%s: stack_top = 0x%x\n", __func__, stub_params->stack_top);
-	printk(BIOS_DEBUG, "%s: stack_size = 0x%x\n",
-		__func__, stub_params->stack_size);
-	printk(BIOS_DEBUG, "%s: runtime.smbase = 0x%x\n",
-		__func__, stub_params->runtime.smbase);
-	printk(BIOS_DEBUG, "%s: runtime.start32_offset = 0x%x\n", __func__,
-		stub_params->runtime.start32_offset);
-	printk(BIOS_DEBUG, "%s: runtime.smm_size = 0x%zx\n",
-		__func__, smm_size);
-	printk(BIOS_DEBUG, "%s: per_cpu_save_state_size = 0x%x\n",
-		__func__, stub_params->runtime.save_state_size);
-	printk(BIOS_DEBUG, "%s: num_cpus = 0x%x\n", __func__,
-		stub_params->runtime.num_cpus);
-	printk(BIOS_DEBUG, "%s: total_save_state_size = 0x%x\n",
-		__func__, (stub_params->runtime.save_state_size *
-		stub_params->runtime.num_cpus));
-	total_size_all = stub_params->stack_size +
-		(stub_params->runtime.save_state_size *
-		stub_params->runtime.num_cpus);
-	printk(BIOS_DEBUG, "%s: total_size_all = 0x%x\n", __func__,
-		total_size_all);
-
-	/* Initialize the APIC id to CPU number table to be 1:1 */
-	for (i = 0; i < params->num_concurrent_stacks; i++)
-		stub_params->runtime.apic_id_to_cpu[i] = i;
-
-	/* Allow the initiator to manipulate SMM stub parameters. */
-	params->runtime = &stub_params->runtime;
-
-	printk(BIOS_DEBUG, "SMM Module: stub loaded at %p. Will call %p(%p)\n",
-	       smm_stub_loc, params->handler, params->handler_arg);
-	return 0;
-}
-
-/*
- * smm_setup_relocation_handler assumes the callback is already loaded in
- * memory. i.e. Another SMM module isn't chained to the stub. The other
- * assumption is that the stub will be entered from the default SMRAM
- * location: 0x30000 -> 0x40000.
- */
-int smm_setup_relocation_handler(struct smm_loader_params *params)
-{
-	void *smram = (void *)(SMM_DEFAULT_BASE);
-	printk(BIOS_SPEW, "%s: enter\n", __func__);
-	/* There can't be more than 1 concurrent save state for the relocation
-	 * handler because all CPUs default to 0x30000 as SMBASE. */
-	if (params->num_concurrent_save_states > 1)
-		return -1;
-
-	/* A handler has to be defined to call for relocation. */
-	if (params->handler == NULL)
-		return -1;
-
-	/* Since the relocation handler always uses stack, adjust the number
-	 * of concurrent stack users to be CONFIG_MAX_CPUS. */
-	if (params->num_concurrent_stacks == 0)
-		params->num_concurrent_stacks = CONFIG_MAX_CPUS;
-
-	params->smm_main_entry_offset = SMM_ENTRY_OFFSET;
-	params->smram_start = SMM_DEFAULT_BASE;
-	params->smram_end = SMM_DEFAULT_BASE + SMM_DEFAULT_SIZE;
-	return smm_module_setup_stub(smram, SMM_DEFAULT_SIZE,
-				params, fxsave_area_relocation);
-	printk(BIOS_SPEW, "%s: exit\n", __func__);
-}
-
-/*
- *The SMM module is placed within the provided region in the following
- * manner:
- * +-----------------+ <- smram + size
- * | BIOS resource   |
- * | list (STM)      |
- * +-----------------+
- * |  fxsave area    |
- * +-----------------+
- * |  smi handler    |
- * |      ...        |
- * +-----------------+ <- cpu0
- * |    stub code    | <- cpu1
- * |    stub code    | <- cpu2
- * |    stub code    | <- cpu3, etc
- * |                 |
- * |                 |
- * |                 |
- * |    stacks       |
- * +-----------------+ <- smram start
-
- * It should be noted that this algorithm will not work for
- * SMM_DEFAULT_SIZE SMRAM regions such as the A segment. This algorithm
- * expects a region large enough to encompass the handler and stacks
- * as well as the SMM_DEFAULT_SIZE.
- */
-int smm_load_module(void *smram, size_t size, struct smm_loader_params *params)
-{
-	struct rmodule smm_mod;
-	size_t total_stack_size;
-	size_t handler_size;
-	size_t module_alignment;
-	size_t alignment_size;
-	size_t fxsave_size;
-	void *fxsave_area;
-	size_t total_size = 0;
-	char *base;
-
-	if (size <= SMM_DEFAULT_SIZE)
-		return -1;
-
-	/* Load main SMI handler at the top of SMRAM
-	 * everything else will go below
-	 */
-	base = smram;
-	base += size;
-	params->smram_start = (uintptr_t)smram;
-	params->smram_end = params->smram_start + size;
-	params->smm_main_entry_offset = SMM_ENTRY_OFFSET;
-
-	/* Fail if can't parse the smm rmodule. */
-	if (rmodule_parse(&_binary_smm_start, &smm_mod))
-		return -1;
-
-	/* Clear SMM region */
-	if (CONFIG(DEBUG_SMI))
-		memset(smram, 0xcd, size);
-
-	total_stack_size = params->per_cpu_stack_size *
-			   params->num_concurrent_stacks;
-	total_size += total_stack_size;
-	/* Stacks are the base of SMRAM */
-	params->stack_top = smram + total_stack_size;
-
-	/* MSEG starts at the top of SMRAM and works down */
-	if (CONFIG(STM)) {
-		base -= CONFIG_MSEG_SIZE + CONFIG_BIOS_RESOURCE_LIST_SIZE;
-		total_size += CONFIG_MSEG_SIZE + CONFIG_BIOS_RESOURCE_LIST_SIZE;
-	}
-
-	/* FXSAVE goes below MSEG */
-	if (CONFIG(SSE)) {
-		fxsave_size = FXSAVE_SIZE * params->num_concurrent_stacks;
-		fxsave_area = base - fxsave_size;
-		base -= fxsave_size;
-		total_size += fxsave_size;
-	} else {
-		fxsave_size = 0;
-		fxsave_area = NULL;
-	}
-
-	handler_size = rmodule_memory_size(&smm_mod);
-	base -= handler_size;
-	total_size += handler_size;
-	module_alignment = rmodule_load_alignment(&smm_mod);
-	alignment_size = module_alignment -
-				((uintptr_t)base % module_alignment);
-	if (alignment_size != module_alignment) {
-		handler_size += alignment_size;
-		base += alignment_size;
-	}
-
-	printk(BIOS_DEBUG,
-		"%s: total_smm_space_needed %zx, available -> %zx\n",
-		 __func__, total_size, size);
-
-	/* Does the required amount of memory exceed the SMRAM region size? */
-	if (total_size > size) {
-		printk(BIOS_ERR, "%s: need more SMRAM\n", __func__);
-		return -1;
-	}
-	if (handler_size > SMM_CODE_SEGMENT_SIZE) {
-		printk(BIOS_ERR, "%s: increase SMM_CODE_SEGMENT_SIZE: handler_size = %zx\n",
-			__func__, handler_size);
-		return -1;
-	}
-
-	if (rmodule_load(base, &smm_mod))
-		return -1;
-
-	params->handler = rmodule_entry(&smm_mod);
-	params->handler_arg = rmodule_parameters(&smm_mod);
-
-	printk(BIOS_DEBUG, "%s: smram_start: 0x%p\n",
-		 __func__, smram);
-	printk(BIOS_DEBUG, "%s: smram_end: %p\n",
-		 __func__, smram + size);
-	printk(BIOS_DEBUG, "%s: stack_top: %p\n",
-		 __func__, params->stack_top);
-	printk(BIOS_DEBUG, "%s: handler start %p\n",
-		 __func__, params->handler);
-	printk(BIOS_DEBUG, "%s: handler_size %zx\n",
-		 __func__, handler_size);
-	printk(BIOS_DEBUG, "%s: handler_arg %p\n",
-		 __func__, params->handler_arg);
-	printk(BIOS_DEBUG, "%s: fxsave_area %p\n",
-		 __func__, fxsave_area);
-	printk(BIOS_DEBUG, "%s: fxsave_size %zx\n",
-		 __func__, fxsave_size);
-	printk(BIOS_DEBUG, "%s: CONFIG_MSEG_SIZE 0x%x\n",
-		 __func__, CONFIG_MSEG_SIZE);
-	printk(BIOS_DEBUG, "%s: CONFIG_BIOS_RESOURCE_LIST_SIZE 0x%x\n",
-		 __func__, CONFIG_BIOS_RESOURCE_LIST_SIZE);
-
-	/* CPU 0 smbase goes first, all other CPUs
-	 * will be staggered below
-	 */
-	base -= SMM_CODE_SEGMENT_SIZE;
-	printk(BIOS_DEBUG, "%s: cpu0 entry: %p\n",
-		 __func__, base);
-	params->smm_entry = (uintptr_t)base + params->smm_main_entry_offset;
-	return smm_module_setup_stub(base, size, params, fxsave_area);
-}
diff --git a/src/include/cpu/x86/smm.h b/src/include/cpu/x86/smm.h
index 1073d03..7491d95 100644
--- a/src/include/cpu/x86/smm.h
+++ b/src/include/cpu/x86/smm.h
@@ -128,7 +128,6 @@
  *             into this field so the code doing the loading can manipulate the
  *             runtime's assumptions. e.g. updating the APIC id to CPU map to
  *             handle sparse APIC id space.
- * The following parameters are only used when X86_SMM_LOADER_VERSION2 is enabled.
  * - smm_entry - entry address of first CPU thread, all others will be tiled
  *               below this address.
  * - smm_main_entry_offset - default entry offset (e.g 0x8000)
@@ -148,22 +147,16 @@
struct smm_runtime *runtime;
-	/* The following are only used by X86_SMM_LOADER_VERSION2 */
-#if CONFIG(X86_SMM_LOADER_VERSION2)
    unsigned int smm_entry;
    unsigned int smm_main_entry_offset;
    unsigned int smram_start;
    unsigned int smram_end;
-#endif
 };
/* Both of these return 0 on success, < 0 on failure. */
 int smm_setup_relocation_handler(struct smm_loader_params *params);
 int smm_load_module(void *smram, size_t size, struct smm_loader_params *params);
-
-#if CONFIG(X86_SMM_LOADER_VERSION2)
 u32 smm_get_cpu_smbase(unsigned int cpu_num);
-#endif
/* Backup and restore default SMM region. */
 void *backup_default_smm_area(void);
-- 
To view, visit https://review.coreboot.org/c/coreboot/+/47073
To unsubscribe, or for help writing mail filters, visit https://review.coreboot.org/settings

Gerrit-Project: coreboot
Gerrit-Branch: master
Gerrit-Change-Id: If52e38ebd2baf4fc80d433e65626c537c0c1f1aa
Gerrit-Change-Number: 47073
Gerrit-PatchSet: 1
Gerrit-Owner: Arthur Heymans arthur@aheymans.xyz
Gerrit-Reviewer: Martin Roth martinroth@google.com
Gerrit-Reviewer: Patrick Georgi pgeorgi@google.com
Gerrit-MessageType: newchange



    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

[coreboot-gerrit] Change in coreboot[master]: cpu/x86/smm: Always use SMM_LOADER_VERSION2