Stefan Reinauer (stefan.reinauer(a)coreboot.org) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/2808
-gerrit
commit c3c24326b1f2aa26ae3698ec724155e92de89aa6
Author: Aaron Durbin <adurbin(a)chromium.org>
Date: Mon Feb 25 10:51:52 2013 -0600
lynxpoint: update MBP give up routine
I'm not sure if I screwed this up originally or the Intel docs changed
(I didn't bother to go back and check). According to ME BWG 1.1.0 the give
up bit is in the host general status #2 register.
Change-Id: Ieaaf524b93e9eb9806173121dda63d0133278c2d
Signed-off-by: Aaron Durbin <adurbin(a)chromium.org>
---
src/southbridge/intel/lynxpoint/me.h | 2 --
src/southbridge/intel/lynxpoint/me_9.x.c | 2 +-
2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/southbridge/intel/lynxpoint/me.h b/src/southbridge/intel/lynxpoint/me.h
index ecd12e4..a305822 100644
--- a/src/southbridge/intel/lynxpoint/me.h
+++ b/src/southbridge/intel/lynxpoint/me.h
@@ -201,8 +201,6 @@ struct me_hfs2 {
#define PCI_ME_H_GS2 0x70
#define PCI_ME_MBP_GIVE_UP 0x01
-#define PCI_ME_H_GS3 0x74
-
#define PCI_ME_HERES 0xbc
#define PCI_ME_EXT_SHA1 0x00
#define PCI_ME_EXT_SHA256 0x02
diff --git a/src/southbridge/intel/lynxpoint/me_9.x.c b/src/southbridge/intel/lynxpoint/me_9.x.c
index 63b5205..2e790fc 100644
--- a/src/southbridge/intel/lynxpoint/me_9.x.c
+++ b/src/southbridge/intel/lynxpoint/me_9.x.c
@@ -804,7 +804,7 @@ static void intel_me_mbp_give_up(device_t dev)
struct mei_csr csr;
reg32 = PCI_ME_MBP_GIVE_UP;
- pci_write_config32(dev, PCI_ME_H_GS3, reg32);
+ pci_write_config32(dev, PCI_ME_H_GS2, reg32);
read_host_csr(&csr);
csr.reset = 1;
csr.interrupt_generate = 1;
Stefan Reinauer (stefan.reinauer(a)coreboot.org) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/2807
-gerrit
commit aa4482bbee5c7b78bcd992611206d7e2846741ae
Author: Aaron Durbin <adurbin(a)chromium.org>
Date: Tue Feb 19 08:59:16 2013 -0600
haswell: RESET_ON_INVALID_RAMSTAGE_CACHE option
The RESET_ON_INVALID_RAMSTAGE_CACHE option indicates what to do
when the ramstage cache is found to be invalid on a S3 wake. If
selected the system will perform a system reset on S3 wake when the
ramstage cache is invalid. Otherwise it will signal to load the
ramstage from cbfs.
Change-Id: I8f21fcfc7f95fb3377ed2932868aa49a68904803
Signed-off-by: Aaron Durbin <adurbin(a)chromium.org>
---
src/cpu/intel/haswell/Kconfig | 12 ++++++++++++
src/cpu/intel/haswell/romstage.c | 17 +++++++++++++----
2 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/src/cpu/intel/haswell/Kconfig b/src/cpu/intel/haswell/Kconfig
index b49795d..d1f521d 100644
--- a/src/cpu/intel/haswell/Kconfig
+++ b/src/cpu/intel/haswell/Kconfig
@@ -41,4 +41,16 @@ config MICROCODE_INCLUDE_PATH
string
default "src/cpu/intel/haswell"
+config RESET_ON_INVALID_RAMSTAGE_CACHE
+ bool "Reset the system on S3 wake when ramstage cache invalid."
+ default n
+ depends on RELOCATABLE_RAMSTAGE
+ help
+ The haswell romstage code caches the loaded ramstage program
+ in SMM space. On S3 wake the romstage will copy over a fresh
+ ramstage that was cached in the SMM space. This option determines
+ the action to take when the ramstage cache is invalid. If selected
+ the system will reset otherwise the ramstage will be reloaded from
+ cbfs.
+
endif
diff --git a/src/cpu/intel/haswell/romstage.c b/src/cpu/intel/haswell/romstage.c
index d491c7e..4ece6c2 100644
--- a/src/cpu/intel/haswell/romstage.c
+++ b/src/cpu/intel/haswell/romstage.c
@@ -36,6 +36,7 @@
#include <cbmem.h>
#include <cbfs.h>
#include <romstage_handoff.h>
+#include <reset.h>
#if CONFIG_CHROMEOS
#include <vendorcode/google/chromeos/chromeos.h>
#endif
@@ -46,6 +47,14 @@
#include "southbridge/intel/lynxpoint/me.h"
+static inline void reset_system(void)
+{
+ hard_reset();
+ while (1) {
+ hlt();
+ }
+}
+
/* The cache-as-ram assembly file calls romstage_main() after setting up
* cache-as-ram. romstage_main() will then call the mainboards's
* mainboard_romstage_entry() function. That function then calls
@@ -271,10 +280,7 @@ void romstage_common(const struct romstage_params *params)
#if CONFIG_HAVE_ACPI_RESUME
if (wake_from_s3 && !cbmem_was_initted) {
/* Failed S3 resume, reset to come up cleanly */
- outb(0x6, 0xcf9);
- while (1) {
- hlt();
- }
+ reset_system();
}
#endif
@@ -375,6 +381,9 @@ void *load_cached_ramstage(struct romstage_handoff *handoff)
if (cache->magic != RAMSTAGE_CACHE_MAGIC) {
printk(BIOS_DEBUG, "Invalid ramstage cache found.\n");
+ #if CONFIG_RESET_ON_INVALID_RAMSTAGE_CACHE
+ reset_system();
+ #endif
return NULL;
}
Stefan Reinauer (stefan.reinauer(a)coreboot.org) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/2806
-gerrit
commit 65cb69c95de57f0d79512c777ed9ff1ba6e2f267
Author: Aaron Durbin <adurbin(a)chromium.org>
Date: Sat Feb 16 00:05:52 2013 -0600
haswell: implement ramstage caching in SMM region
Cache the relocated ramstage into the SMM region. There is
a reserved region within the final SMM region (TSEG). Use that
space to cache the relocated ramstage program. That way, on S3 resume
there is a copy that can be loaded quickly instead of accessing the
flash. Caching the ramstage in the SMM space is also helpful in that
it prevents the OS from tampering with the ramstage program.
Change-Id: Ifa695ad1c350d5b504b14cc29d3e83c79b317a62
Signed-off-by: Aaron Durbin <adurbin(a)chromium.org>
---
src/cpu/intel/haswell/haswell.h | 17 +++++++++++
src/cpu/intel/haswell/romstage.c | 65 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 82 insertions(+)
diff --git a/src/cpu/intel/haswell/haswell.h b/src/cpu/intel/haswell/haswell.h
index 3a5ebe6..a1c6f39 100644
--- a/src/cpu/intel/haswell/haswell.h
+++ b/src/cpu/intel/haswell/haswell.h
@@ -174,6 +174,23 @@ int setup_ap_init(struct bus *cpu_bus, int *max_cpus,
int start_aps(struct bus *cpu_bus, int max_cpus);
void release_aps_for_smm_relocation(int do_parallel_relocation);
#endif
+
+/* This structure is saved along with the relocated ramstage program in SMM
+ * space. It is used to protect the integrity of the ramstage program on S3
+ * resume by saving a copy of the relocated ramstage in SMM space with the
+ * assumption that the SMM region cannot be altered from the OS. The magic
+ * value just serves as a quick sanity check. */
+
+#define RAMSTAGE_CACHE_MAGIC 0xf3c3a02a
+
+struct ramstage_cache {
+ uint32_t magic;
+ uint32_t entry_point;
+ uint32_t load_address;
+ uint32_t size;
+ char program[0];
+} __attribute__((packed));
+
#endif
#endif
diff --git a/src/cpu/intel/haswell/romstage.c b/src/cpu/intel/haswell/romstage.c
index b26cbde..d491c7e 100644
--- a/src/cpu/intel/haswell/romstage.c
+++ b/src/cpu/intel/haswell/romstage.c
@@ -34,6 +34,7 @@
#include <device/pci_def.h>
#include <cpu/x86/lapic.h>
#include <cbmem.h>
+#include <cbfs.h>
#include <romstage_handoff.h>
#if CONFIG_CHROMEOS
#include <vendorcode/google/chromeos/chromeos.h>
@@ -320,3 +321,67 @@ void romstage_after_car(void)
/* Load the ramstage. */
copy_and_run(0);
}
+
+
+#if CONFIG_RELOCATABLE_RAMSTAGE
+void cache_loaded_ramstage(struct romstage_handoff *handoff,
+ void *ramstage_base, uint32_t ramstage_size,
+ void *entry_point)
+{
+ struct ramstage_cache *cache;
+ uint32_t total_size;
+
+ /* The ramstage cache lives in the TSEG region at RESERVED_SMM_OFFSET.
+ * The top of ram is defined to be the TSEG base address. */
+ cache = (void *)(get_top_of_ram() + RESERVED_SMM_OFFSET);
+ total_size = sizeof(*cache) + ramstage_size;
+ if (total_size > RESERVED_SMM_SIZE) {
+ printk(BIOS_DEBUG, "0x%08x > RESERVED_SMM_SIZE (0x%08x)\n",
+ total_size, RESERVED_SMM_SIZE);
+ /* Nuke whatever may be there now just in case. */
+ cache->magic = ~RAMSTAGE_CACHE_MAGIC;
+ return;
+ }
+
+ cache->magic = RAMSTAGE_CACHE_MAGIC;
+ cache->entry_point = (uint32_t)entry_point;
+ cache->load_address = (uint32_t)ramstage_base;
+ cache->size = ramstage_size;
+
+ printk(BIOS_DEBUG, "Saving ramstage to SMM space cache.\n");
+
+ /* Copy over the program. */
+ memcpy(&cache->program[0], ramstage_base, ramstage_size);
+
+ /* Do not update reserve region if the handoff structure is not
+ * available. Perhaps the ramstage will fix things up for the resume
+ * path. */
+ if (handoff == NULL)
+ return;
+
+ /* Update entry and reserve region. */
+ handoff->reserve_base = (uint32_t)ramstage_base;
+ handoff->reserve_size = ramstage_size;
+ handoff->ramstage_entry_point = (uint32_t)entry_point;
+}
+
+void *load_cached_ramstage(struct romstage_handoff *handoff)
+{
+ struct ramstage_cache *cache;
+
+ /* The ramstage cache lives in the TSEG region at RESERVED_SMM_OFFSET.
+ * The top of ram is defined to be the TSEG base address. */
+ cache = (void *)(get_top_of_ram() + RESERVED_SMM_OFFSET);
+
+ if (cache->magic != RAMSTAGE_CACHE_MAGIC) {
+ printk(BIOS_DEBUG, "Invalid ramstage cache found.\n");
+ return NULL;
+ }
+
+ printk(BIOS_DEBUG, "Loading ramstage from SMM space cache.\n");
+
+ memcpy((void *)cache->load_address, &cache->program[0], cache->size);
+
+ return (void *)cache->entry_point;
+}
+#endif
Stefan Reinauer (stefan.reinauer(a)coreboot.org) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/2805
-gerrit
commit 301254ddb13a47b257a4910593dd6f0c333d6fc2
Author: Aaron Durbin <adurbin(a)chromium.org>
Date: Fri Feb 15 23:26:52 2013 -0600
coreboot: add caching loaded ramstage interface
Instead of hard coding the policy for how a relocated ramstage
image is saved add an interface. The interface consists of two
functions. cache_loaded_ramstage() and load_cached_ramstage()
are the functions to cache and load the relocated ramstage,
respectively. There are default implementations which cache and
load the relocated ramstage just below where the ramstage runs.
Change-Id: I4346e873d8543e7eee4c1cd484847d846f297bb0
Signed-off-by: Aaron Durbin <adurbin(a)chromium.org>
---
src/include/cbfs.h | 21 +++++++++++++++
src/lib/cbfs.c | 77 +++++++++++++++++++++++++++++++++++-------------------
2 files changed, 71 insertions(+), 27 deletions(-)
diff --git a/src/include/cbfs.h b/src/include/cbfs.h
index 9bfe2ce..811df88 100644
--- a/src/include/cbfs.h
+++ b/src/include/cbfs.h
@@ -83,5 +83,26 @@ int selfboot(struct lb_memory *mem, struct cbfs_payload *payload);
/* Defined in individual arch / board implementation. */
int init_default_cbfs_media(struct cbfs_media *media);
+#if CONFIG_RELOCATABLE_RAMSTAGE && defined(__PRE_RAM__)
+/* The cache_loaded_ramstage() and load_cached_ramstage() functions are defined
+ * to be weak so that board and chipset code may override them. Their job is to
+ * cache and load the ramstage for quick S3 resume. By default a copy of the
+ * relocated ramstage is saved just below the running ramstage region. These
+ * functions are only valid during romstage. */
+
+struct romstage_handoff;
+
+/* The implementer of cache_loaded_ramstage() needs to ensure that the
+ * reserve_* fields in in romstage_handoff reflect the memory footprint of the
+ * ramstage (including cached region). Note that the handoff variable can be
+ * NULL. */
+void __attribute__((weak))
+cache_loaded_ramstage(struct romstage_handoff *handoff, void *ramstage_base,
+ uint32_t ramstage_size, void *entry_point);
+/* Return NULL on error or entry point on success. */
+void * __attribute__((weak))
+load_cached_ramstage(struct romstage_handoff *handoff);
+#endif /* CONFIG_RELOCATABLE_RAMSTAGE */
+
#endif
diff --git a/src/lib/cbfs.c b/src/lib/cbfs.c
index 6e89a39..8bcb000 100644
--- a/src/lib/cbfs.c
+++ b/src/lib/cbfs.c
@@ -123,16 +123,53 @@ void *cbfs_load_optionrom(struct cbfs_media *media, uint16_t vendor,
* for the romstage, the rmodule loader is used. The ramstage is placed just
* below the cbmem location. */
+void __attribute__((weak))
+cache_loaded_ramstage(struct romstage_handoff *handoff, void *ramstage_base,
+ uint32_t ramstage_size, void *entry_point)
+{
+ if (handoff == NULL)
+ return;
+
+ /* Cache the loaded ramstage just below the to-be-run ramstage. Then
+ * save the base, size, and entry point in the handoff area. */
+ handoff->reserve_base = (uint32_t)ramstage_base - ramstage_size;
+ handoff->reserve_size = ramstage_size;
+ handoff->ramstage_entry_point = (uint32_t)entry_point;
+
+ memcpy((void *)handoff->reserve_base, ramstage_base, ramstage_size);
+
+ /* Update the reserve region by 2x in order to store the cached copy. */
+ handoff->reserve_size += handoff->reserve_size;
+}
+
+void * __attribute__((weak))
+load_cached_ramstage(struct romstage_handoff *handoff)
+{
+ uint32_t ramstage_size;
+
+ if (handoff == NULL)
+ return NULL;
+
+ /* Load the cached ramstage copy into the to-be-run region. It is just
+ * above the cached copy. */
+ ramstage_size = handoff->reserve_size / 2;
+ memcpy((void *)(handoff->reserve_base + ramstage_size),
+ (void *)handoff->reserve_base, ramstage_size);
+
+ return (void *)handoff->ramstage_entry_point;
+}
+
static void *load_stage_from_cbfs(struct cbfs_media *media, const char *name,
struct romstage_handoff *handoff)
{
struct cbfs_stage *stage;
struct rmodule ramstage;
- void *cbmem_base;
- void *ramstage_base;
+ char *cbmem_base;
+ char *ramstage_base;
void *decompression_loc;
void *ramstage_loc;
void *entry_point;
+ uint32_t ramstage_size;
stage = (struct cbfs_stage *)
cbfs_get_file_content(media, name, CBFS_TYPE_STAGE);
@@ -140,7 +177,7 @@ static void *load_stage_from_cbfs(struct cbfs_media *media, const char *name,
if (stage == NULL)
return (void *) -1;
- cbmem_base = get_cbmem_toc();
+ cbmem_base = (void *)get_cbmem_toc();
if (cbmem_base == NULL)
return (void *) -1;
@@ -165,24 +202,9 @@ static void *load_stage_from_cbfs(struct cbfs_media *media, const char *name,
entry_point = rmodule_entry(&ramstage);
- if (handoff) {
- handoff->reserve_base = (uint32_t)ramstage_base;
- handoff->reserve_size = (uint32_t)cbmem_base -
- (uint32_t)ramstage_base;
- /* Save an entire copy in RAM of the relocated ramstage for
- * the S3 resume path. The size of the saved relocated ramstage
- * is larger than necessary. It could be optimized by saving
- * just the text/data segment of the ramstage. The rmodule
- * API would need to be modified to expose these details. For
- * the time being, just save the entire used region. */
- memcpy((void *)(handoff->reserve_base - handoff->reserve_size),
- (void *)handoff->reserve_base, handoff->reserve_size);
- /* Update the size and base of the reserve region. */
- handoff->reserve_base -= handoff->reserve_size;
- handoff->reserve_size += handoff->reserve_size;
- /* Save the entry point in the handoff area. */
- handoff->ramstage_entry_point = (uint32_t)entry_point;
- }
+ ramstage_size = cbmem_base - ramstage_base;
+ cache_loaded_ramstage(handoff, ramstage_base, ramstage_size,
+ entry_point);
return entry_point;
}
@@ -190,6 +212,7 @@ static void *load_stage_from_cbfs(struct cbfs_media *media, const char *name,
void * cbfs_load_stage(struct cbfs_media *media, const char *name)
{
struct romstage_handoff *handoff;
+ void *entry;
handoff = romstage_handoff_find_or_add();
@@ -199,13 +222,13 @@ void * cbfs_load_stage(struct cbfs_media *media, const char *name)
} else if (!handoff->s3_resume)
return load_stage_from_cbfs(media, name, handoff);
- /* S3 resume path. Copy from the saved relocated program buffer to
- * the running location. load_stage_from_cbfs() keeps a copy of the
- * relocated program just below the relocated program. */
- memcpy((void *)(handoff->reserve_base + (handoff->reserve_size / 2)),
- (void *)handoff->reserve_base, handoff->reserve_size / 2);
+ /* S3 resume path. Load a cached copy of the loaded ramstage. If
+ * return value is NULL load from cbfs. */
+ entry = load_cached_ramstage(handoff);
+ if (entry == NULL)
+ return load_stage_from_cbfs(name, handoff);
- return (void *)handoff->ramstage_entry_point;
+ return entry;
}
#else
Stefan Reinauer (stefan.reinauer(a)coreboot.org) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/2803
-gerrit
commit 19df996e79b2fbd2d6916e104ce0491b6f1d599f
Author: Aaron Durbin <adurbin(a)chromium.org>
Date: Fri Feb 15 15:08:37 2013 -0600
haswell: set TSEG as WB cacheable in romstage
The TSEG region is accessible until the SMM handler is relocated
to that region. Set the region as cacheable in romstage so that it
can be used for other purposes with fast access.
Change-Id: I92b83896e40bc26a54c2930e05c02492918e0874
Signed-off-by: Aaron Durbin <adurbin(a)chromium.org>
---
src/cpu/intel/haswell/romstage.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/src/cpu/intel/haswell/romstage.c b/src/cpu/intel/haswell/romstage.c
index ac0e848..b26cbde 100644
--- a/src/cpu/intel/haswell/romstage.c
+++ b/src/cpu/intel/haswell/romstage.c
@@ -82,6 +82,7 @@ static void *setup_romstage_stack_after_car(void)
int num_mtrrs;
u32 *slot;
u32 mtrr_mask_upper;
+ u32 top_of_ram;
/* Top of stack needs to be aligned to a 4-byte boundary. */
top_of_stack = choose_top_of_stack() & ~3;
@@ -120,6 +121,7 @@ static void *setup_romstage_stack_after_car(void)
slot = stack_push(slot, 0 | MTRR_TYPE_WRBACK);
num_mtrrs++;
+ top_of_ram = get_top_of_ram();
/* Cache 8MiB below the top of ram. On haswell systems the top of
* ram under 4GiB is the start of the TSEG region. It is required to
* be 8MiB aligned. Set this area as cacheable so it can be used later
@@ -127,8 +129,18 @@ static void *setup_romstage_stack_after_car(void)
slot = stack_push(slot, mtrr_mask_upper); /* upper mask */
slot = stack_push(slot, ~((8 << 20) - 1) | MTRRphysMaskValid);
slot = stack_push(slot, 0); /* upper base */
- slot = stack_push(slot,
- (get_top_of_ram() - (8 << 20)) | MTRR_TYPE_WRBACK);
+ slot = stack_push(slot, (top_of_ram - (8 << 20)) | MTRR_TYPE_WRBACK);
+ num_mtrrs++;
+
+ /* Cache 8MiB at the top of ram. Top of ram on haswell systems
+ * is where the TSEG region resides. However, it is not restricted
+ * to SMM mode until SMM has been relocated. By setting the region
+ * to cacheable it provides faster access when relocating the SMM
+ * handler as well as using the TSEG region for other purposes. */
+ slot = stack_push(slot, mtrr_mask_upper); /* upper mask */
+ slot = stack_push(slot, ~((8 << 20) - 1) | MTRRphysMaskValid);
+ slot = stack_push(slot, 0); /* upper base */
+ slot = stack_push(slot, top_of_ram | MTRR_TYPE_WRBACK);
num_mtrrs++;
/* Save the number of MTTRs to setup. Return the stack location
Stefan Reinauer (stefan.reinauer(a)coreboot.org) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/2801
-gerrit
commit 8474b569ddb0b072ed07bce6ec40d50a4010f8b6
Author: Aaron Durbin <adurbin(a)chromium.org>
Date: Wed Feb 13 11:22:25 2013 -0600
haswell: support for parallel SMM relocation
The haswell processors support the ability to save their SMM state
into MSR space instead of the memory. This feaure allows for parallel
SMM relocation handlers as well as setting the same SMBASE for each
CPU since the save state memory area is not used.
The catch is that in order determine if this feature is available the
CPU needs to be in SMM context. In order to implement parallel SMM
relocation the BSP enters the relocation handler twice. The first time
is to determine if that feature is available. If it is, then that
feature is enabled the BSP exits the relocation handler without
relocating SMBASE. It then releases the APs to run the SMM relocation
handler. After the APs have completed the relocation the BSP will
re-enter the SMM relocation handler to relocate its own SMBASE to the
final location. If the parallel SMM feature is not available the BSP
relocates its SMBASE as it did before.
This change also introduces the BSP waiting for the APs to relocate
their SMBASE before proceeding with the remainder of the boot process.
Ensured both the parallel path and the serial path still continue
to work on cold, warm, and S3 resume paths.
Change-Id: Iea24fd8f9561f1b194393cdb77c79adb48039ea2
Signed-off-by: Aaron Durbin <adurbin(a)chromium.org>
---
src/cpu/intel/haswell/haswell.h | 3 +-
src/cpu/intel/haswell/haswell_init.c | 3 -
src/cpu/intel/haswell/mp_init.c | 52 ++++++++++---
src/cpu/intel/haswell/smmrelocate.c | 142 +++++++++++++++++++++++++++++------
4 files changed, 163 insertions(+), 37 deletions(-)
diff --git a/src/cpu/intel/haswell/haswell.h b/src/cpu/intel/haswell/haswell.h
index 26807e9..a510e7d 100644
--- a/src/cpu/intel/haswell/haswell.h
+++ b/src/cpu/intel/haswell/haswell.h
@@ -144,6 +144,7 @@ int cpu_config_tdp_levels(void);
/* Returns 0 on success, < 0 on failure. */
int smm_initialize(void);
void smm_initiate_relocation(void);
+void smm_initiate_relocation_parallel(void);
struct bus;
void bsp_init_and_start_aps(struct bus *cpu_bus);
/* Returns 0 on succes. < 0 on failure. */
@@ -151,7 +152,7 @@ int setup_ap_init(struct bus *cpu_bus, int *max_cpus,
const void *microcode_patch);
/* Returns 0 on success, < 0 on failure. */
int start_aps(struct bus *cpu_bus, int max_cpus);
-void release_aps_for_smm_relocation(void);
+void release_aps_for_smm_relocation(int do_parallel_relocation);
#endif
#endif
diff --git a/src/cpu/intel/haswell/haswell_init.c b/src/cpu/intel/haswell/haswell_init.c
index 82430b7..c7f89ee 100644
--- a/src/cpu/intel/haswell/haswell_init.c
+++ b/src/cpu/intel/haswell/haswell_init.c
@@ -549,9 +549,6 @@ void bsp_init_and_start_aps(struct bus *cpu_bus)
return;
}
- /* Release APs to perform SMM relocation. */
- release_aps_for_smm_relocation();
-
/* After SMM relocation a 2nd microcode load is required. */
intel_microcode_load_unlocked(microcode_patch);
}
diff --git a/src/cpu/intel/haswell/mp_init.c b/src/cpu/intel/haswell/mp_init.c
index 7f15c39..c8bd5c2 100644
--- a/src/cpu/intel/haswell/mp_init.c
+++ b/src/cpu/intel/haswell/mp_init.c
@@ -75,9 +75,16 @@ static device_t cpu_devs[CONFIG_MAX_CPUS];
/* Number of APs checked that have checked in. */
static atomic_t num_aps;
+/* Number of APs that have relocated their SMM handler. */
+static atomic_t num_aps_relocated_smm;
/* Barrier to stop APs from performing SMM relcoation. */
static int smm_relocation_barrier_begin __attribute__ ((aligned (64)));
+static inline void mfence(void)
+{
+ __asm__ __volatile__("mfence\t\n": : :"memory");
+}
+
static inline void wait_for_barrier(volatile int *barrier)
{
while (*barrier == 0) {
@@ -95,13 +102,18 @@ static void ap_wait_for_smm_relocation_begin(void)
wait_for_barrier(&smm_relocation_barrier_begin);
}
+/* This function pointer is used by the non-BSP CPUs to initiate relocation. It
+ * points to either a serial or parallel SMM initiation. */
+static void (*ap_initiate_smm_relocation)(void) = &smm_initiate_relocation;
+
/* Returns 1 if timeout waiting for APs. 0 if target aps found. */
-static int wait_for_aps(int target, int total_delay, int delay_step)
+static int wait_for_aps(atomic_t *val, int target, int total_delay,
+ int delay_step)
{
int timeout = 0;
int delayed = 0;
- while (atomic_read(&num_aps) != target) {
+ while (atomic_read(val) != target) {
udelay(delay_step);
delayed += delay_step;
if (delayed >= total_delay) {
@@ -113,9 +125,19 @@ static int wait_for_aps(int target, int total_delay, int delay_step)
return timeout;
}
-void release_aps_for_smm_relocation(void)
+void release_aps_for_smm_relocation(int do_parallel)
{
+ /* Change the AP SMM initiation function, and ensure it is visible
+ * before releasing the APs. */
+ if (do_parallel) {
+ ap_initiate_smm_relocation = &smm_initiate_relocation_parallel;
+ mfence();
+ }
release_barrier(&smm_relocation_barrier_begin);
+ /* Wait for CPUs to relocate their SMM handler up to 100ms. */
+ if (wait_for_aps(&num_aps_relocated_smm, atomic_read(&num_aps),
+ 100000 /* 100 ms */, 200 /* us */))
+ printk(BIOS_DEBUG, "Timed out waiting for AP SMM relocation\n");
}
/* The mtrr code sets up ROM caching on the BSP, but not the others. However,
@@ -172,7 +194,10 @@ ap_init(unsigned int cpu, void *microcode_ptr)
ap_wait_for_smm_relocation_begin();
- smm_initiate_relocation();
+ ap_initiate_smm_relocation();
+
+ /* Indicate that SMM relocation has occured on this thread. */
+ atomic_inc(&num_aps_relocated_smm);
/* After SMM relocation a 2nd microcode load is required. */
intel_microcode_load_unlocked(microcode_ptr);
@@ -483,7 +508,7 @@ int start_aps(struct bus *cpu_bus, int ap_count)
printk(BIOS_DEBUG, "done.\n");
}
/* Wait for CPUs to check in up to 200 us. */
- wait_for_aps(ap_count, 200 /* us */, 15 /* us */);
+ wait_for_aps(&num_aps, ap_count, 200 /* us */, 15 /* us */);
/* Send 2nd SIPI */
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
@@ -507,7 +532,7 @@ int start_aps(struct bus *cpu_bus, int ap_count)
}
/* Wait for CPUs to check in. */
- if (wait_for_aps(ap_count, 10000 /* 10 ms */, 50 /* us */)) {
+ if (wait_for_aps(&num_aps, ap_count, 10000 /* 10 ms */, 50 /* us */)) {
printk(BIOS_DEBUG, "Not all APs checked in: %d/%d.\n",
atomic_read(&num_aps), ap_count);
return -1;
@@ -516,17 +541,12 @@ int start_aps(struct bus *cpu_bus, int ap_count)
return 0;
}
-DECLARE_SPIN_LOCK(smm_relocation_lock);
-
-void smm_initiate_relocation(void)
+void smm_initiate_relocation_parallel(void)
{
- spin_lock(&smm_relocation_lock);
-
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
printk(BIOS_DEBUG, "timed out. Aborting.\n");
- spin_unlock(&smm_relocation_lock);
return;
} else
printk(BIOS_DEBUG, "done.\n");
@@ -539,6 +559,14 @@ void smm_initiate_relocation(void)
} else
printk(BIOS_DEBUG, "Relocation complete.\n");
+}
+
+DECLARE_SPIN_LOCK(smm_relocation_lock);
+
+void smm_initiate_relocation(void)
+{
+ spin_lock(&smm_relocation_lock);
+ smm_initiate_relocation_parallel();
spin_unlock(&smm_relocation_lock);
}
diff --git a/src/cpu/intel/haswell/smmrelocate.c b/src/cpu/intel/haswell/smmrelocate.c
index 2bf304e..2a322a7 100644
--- a/src/cpu/intel/haswell/smmrelocate.c
+++ b/src/cpu/intel/haswell/smmrelocate.c
@@ -36,6 +36,14 @@
#define EMRRphysMask_MSR 0x1f5
#define UNCORE_EMRRphysBase_MSR 0x2f4
#define UNCORE_EMRRphysMask_MSR 0x2f5
+#define SMM_MCA_CAP_MSR 0x17d
+#define SMM_CPU_SVRSTR_BIT 57
+#define SMM_CPU_SVRSTR_MASK (1 << (SMM_CPU_SVRSTR_BIT - 32))
+#define SMM_FEATURE_CONTROL_MSR 0x4e0
+#define SMM_CPU_SAVE_EN (1 << 1)
+/* SMM save state MSRs */
+#define SMBASE_MSR 0xc20
+#define IEDBASE_MSR 0xc22
#define SMRR_SUPPORTED (1<<11)
#define EMRR_SUPPORTED (1<<12)
@@ -51,6 +59,10 @@ struct smm_relocation_params {
msr_t emrr_mask;
msr_t uncore_emrr_base;
msr_t uncore_emrr_mask;
+ /* The smm_save_state_in_msrs field indicates if SMM save state
+ * locations live in MSRs. This indicates to the CPUs how to adjust
+ * the SMMBASE and IEDBASE */
+ int smm_save_state_in_msrs;
};
/* This gets filled in and used during relocation. */
@@ -82,13 +94,79 @@ static inline void write_uncore_emrr(struct smm_relocation_params *relo_params)
wrmsr(UNCORE_EMRRphysMask_MSR, relo_params->uncore_emrr_mask);
}
+static void update_save_state(int cpu,
+ struct smm_relocation_params *relo_params,
+ const struct smm_runtime *runtime)
+{
+ u32 smbase;
+ u32 iedbase;
+
+ /* The relocated handler runs with all CPUs concurrently. Therefore
+ * stagger the entry points adjusting SMBASE downwards by save state
+ * size * CPU num. */
+ smbase = relo_params->smram_base - cpu * runtime->save_state_size;
+ iedbase = relo_params->ied_base;
+
+ printk(BIOS_DEBUG, "New SMBASE=0x%08x IEDBASE=0x%08x\n",
+ smbase, iedbase);
+
+ /* All threads need to set IEDBASE and SMBASE to the relocated
+ * handler region. However, the save state location depends on the
+ * smm_save_state_in_msrs field in the relocation parameters. If
+ * smm_save_state_in_msrs is non-zero then the CPUs are relocating
+ * the SMM handler in parallel, and each CPUs save state area is
+ * located in their respective MSR space. If smm_save_state_in_msrs
+ * is zero then the SMM relocation is happening serially so the
+ * save state is at the same default location for all CPUs. */
+ if (relo_params->smm_save_state_in_msrs) {
+ msr_t smbase_msr;
+ msr_t iedbase_msr;
+
+ smbase_msr.lo = smbase;
+ smbase_msr.hi = 0;
+
+ /* According the BWG the IEDBASE MSR is in bits 63:32. It's
+ * not clear why it differs from the SMBASE MSR. */
+ iedbase_msr.lo = 0;
+ iedbase_msr.hi = iedbase;
+
+ wrmsr(SMBASE_MSR, smbase_msr);
+ wrmsr(IEDBASE_MSR, iedbase_msr);
+ } else {
+ em64t101_smm_state_save_area_t *save_state;
+
+ save_state = (void *)(runtime->smbase + SMM_DEFAULT_SIZE -
+ runtime->save_state_size);
+
+ save_state->smbase = smbase;
+ save_state->iedbase = iedbase;
+ }
+}
+
+/* Returns 1 if SMM MSR save state was set. */
+static int bsp_setup_msr_save_state(struct smm_relocation_params *relo_params)
+{
+ msr_t smm_mca_cap;
+
+ smm_mca_cap = rdmsr(SMM_MCA_CAP_MSR);
+ if (smm_mca_cap.hi & SMM_CPU_SVRSTR_MASK) {
+ msr_t smm_feature_control;
+
+ smm_feature_control = rdmsr(SMM_FEATURE_CONTROL_MSR);
+ smm_feature_control.hi = 0;
+ smm_feature_control.lo |= SMM_CPU_SAVE_EN;
+ wrmsr(SMM_FEATURE_CONTROL_MSR, smm_feature_control);
+ relo_params->smm_save_state_in_msrs = 1;
+ }
+ return relo_params->smm_save_state_in_msrs;
+}
+
/* The relocation work is actually performed in SMM context, but the code
* resides in the ramstage module. This occurs by trampolining from the default
* SMRAM entry point to here. */
static void __attribute__((cdecl))
cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime)
{
- em64t101_smm_state_save_area_t *save_state;
msr_t mtrr_cap;
struct smm_relocation_params *relo_params = arg;
@@ -100,21 +178,32 @@ cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime)
printk(BIOS_DEBUG, "In relocation handler: cpu %d\n", cpu);
- /* All threads need to set IEDBASE and SMBASE in the save state area.
- * Since one thread runs at a time during the relocation the save state
- * is the same for all cpus. */
- save_state = (void *)(runtime->smbase + SMM_DEFAULT_SIZE -
- runtime->save_state_size);
-
- /* The relocated handler runs with all CPUs concurrently. Therefore
- * stagger the entry points adjusting SMBASE downwards by save state
- * size * CPU num. */
- save_state->smbase = relo_params->smram_base -
- cpu * runtime->save_state_size;
- save_state->iedbase = relo_params->ied_base;
+ /* Determine if the processor supports saving state in MSRs. If so,
+ * enable it before the non-BSPs run so that SMM relocation can occur
+ * in parallel in the non-BSP CPUs. */
+ if (cpu == 0) {
+ /* If smm_save_state_in_msrs is 1 then that means this is the
+ * 2nd time through the relocation handler for the BSP.
+ * Parallel SMM handler relocation is taking place. However,
+ * it is desired to access other CPUs save state in the real
+ * SMM handler. Therefore, disable the SMM save state in MSRs
+ * feature. */
+ if (relo_params->smm_save_state_in_msrs) {
+ msr_t smm_feature_control;
+
+ smm_feature_control = rdmsr(SMM_FEATURE_CONTROL_MSR);
+ smm_feature_control.lo &= ~SMM_CPU_SAVE_EN;
+ wrmsr(SMM_FEATURE_CONTROL_MSR, smm_feature_control);
+ } else if (bsp_setup_msr_save_state(relo_params))
+ /* Just return from relocation handler if MSR save
+ * state is enabled. In that case the BSP will come
+ * back into the relocation handler to setup the new
+ * SMBASE as well disabling SMM save state in MSRs. */
+ return;
+ }
- printk(BIOS_DEBUG, "New SMBASE=0x%08x IEDBASE=0x%08x @ %p\n",
- save_state->smbase, save_state->iedbase, save_state);
+ /* Make appropriate changes to the save state map. */
+ update_save_state(cpu, relo_params, runtime);
/* Write EMRR and SMRR MSRs based on indicated support. */
mtrr_cap = rdmsr(MTRRcap_MSR);
@@ -128,8 +217,6 @@ cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime)
if (cpu == 0)
write_uncore_emrr(relo_params);
}
-
- southbridge_clear_smi_status();
}
static u32 northbridge_get_base_reg(device_t dev, int reg)
@@ -199,10 +286,12 @@ static void fill_in_relocation_params(device_t dev,
static int install_relocation_handler(int num_cpus,
struct smm_relocation_params *relo_params)
{
- /* The default SMM entry happens serially at the default location.
- * Therefore, there is only 1 concurrent save state area. Set the
- * stack size to the save state size, and call into the
- * do_relocation handler. */
+ /* The default SMM entry can happen in parallel or serially. If the
+ * default SMM entry is done in parallel the BSP has already setup
+ * the saving state to each CPU's MSRs. At least one save state size
+ * is required for the initial SMM entry for the BSP to determine if
+ * parallel SMM relocation is even feasible. Set the stack size to
+ * the save state size, and call into the do_relocation handler. */
int save_state_size = sizeof(em64t101_smm_state_save_area_t);
struct smm_loader_params smm_params = {
.per_cpu_stack_size = save_state_size,
@@ -309,6 +398,17 @@ int smm_initialize(void)
/* Run the relocation handler. */
smm_initiate_relocation();
+ /* If smm_save_state_in_msrs is non-zero then parallel SMM relocation
+ * shall take place. Run the relocation handler a second time to do
+ * the final move. */
+ if (smm_reloc_params.smm_save_state_in_msrs) {
+ printk(BIOS_DEBUG, "Doing parallel SMM relocation.\n");
+ release_aps_for_smm_relocation(1);
+ smm_initiate_relocation_parallel();
+ } else {
+ release_aps_for_smm_relocation(0);
+ }
+
/* Lock down the SMRAM space. */
smm_lock();
Stefan Reinauer (stefan.reinauer(a)coreboot.org) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/2800
-gerrit
commit b19bd0e080f7a487af5c5b26b979ccfda634b182
Author: Aaron Durbin <adurbin(a)chromium.org>
Date: Tue Feb 12 00:50:47 2013 -0600
ramstage: cache relocated ramstage in RAM
Accessing the flash part where the ramstage resides can be slow
when loading it. In order to save time in the S3 resume path a copy
of the relocated ramstage is saved just below the location the ramstage
was loaded. Then on S3 resume the cached version of the relocated
ramstage is copied back to the loaded address.
This is achieved by saving the ramstage entry point in the
romstage_handoff structure as reserving double the amount of memory
required for ramstage. This approach saves the engineering time to make
the ramstage reentrant.
The fast path in this change will only be taken when the chipset's
romstage code properly initializes the s3_resume field in the
romstage_handoff structure. If that is never set up properly then the
fast path will never be taken.
e820 entries from Linux:
BIOS-e820: [mem 0x000000007bf21000-0x000000007bfbafff] reserved
BIOS-e820: [mem 0x000000007bfbb000-0x000000007bffffff] type 16
The type 16 is the cbmem table and the reserved section contains the two
copies of the ramstage; one has been executed already and one is
the cached relocated program.
With this change the S3 resume path on the basking ridge CRB shows
to be ~200ms to hand off to the kernel:
13 entries total:
1:95,965
2:97,191 (1,225)
3:131,755 (34,564)
4:132,890 (1,135)
8:135,165 (2,274)
9:135,840 (675)
10:135,973 (132)
30:136,016 (43)
40:136,581 (564)
50:138,280 (1,699)
60:138,381 (100)
70:204,538 (66,157)
98:204,615 (77)
Change-Id: I9c7a6d173afc758eef560e09d2aef5f90a25187a
Signed-off-by: Aaron Durbin <adurbin(a)chromium.org>
---
src/include/romstage_handoff.h | 10 +++++++-
src/lib/cbfs.c | 58 ++++++++++++++++++++++++++++++++++--------
2 files changed, 56 insertions(+), 12 deletions(-)
diff --git a/src/include/romstage_handoff.h b/src/include/romstage_handoff.h
index c20b261..4150e8e 100644
--- a/src/include/romstage_handoff.h
+++ b/src/include/romstage_handoff.h
@@ -31,8 +31,16 @@ struct romstage_handoff {
/* This indicates to the ramstage to reserve a chunk of memory. */
uint32_t reserve_base;
uint32_t reserve_size;
- /* Inidicate if the current boot is an S3 resume. */
+ /* Inidicate if the current boot is an S3 resume. If
+ * CONFIG_RELOCTABLE_RAMSTAGE is enabled the chipset code is
+ * responsible for initializing this variable. Otherwise, ramstage
+ * will be re-loaded from cbfs (which can be slower since it lives
+ * in flash). */
uint32_t s3_resume;
+ /* The ramstage_entry_point is cached in the stag loading path. This
+ * cached value can only be utilized when the chipset code properly
+ * fills in the s3_resume field above. */
+ uint32_t ramstage_entry_point;
};
#if defined(__PRE_RAM__)
diff --git a/src/lib/cbfs.c b/src/lib/cbfs.c
index c8bfb0c..6e89a39 100644
--- a/src/lib/cbfs.c
+++ b/src/lib/cbfs.c
@@ -120,10 +120,11 @@ void *cbfs_load_optionrom(struct cbfs_media *media, uint16_t vendor,
#include <rmodule.h>
#include <romstage_handoff.h>
/* When CONFIG_RELOCATABLE_RAMSTAGE is enabled and this file is being compiled
- * for the romstage the rmodule loader is used. The ramstage is placed just
- * below the cbemem location. */
+ * for the romstage, the rmodule loader is used. The ramstage is placed just
+ * below the cbmem location. */
-void * cbfs_load_stage(struct cbfs_media *media, const char *name)
+static void *load_stage_from_cbfs(struct cbfs_media *media, const char *name,
+ struct romstage_handoff *handoff)
{
struct cbfs_stage *stage;
struct rmodule ramstage;
@@ -131,7 +132,7 @@ void * cbfs_load_stage(struct cbfs_media *media, const char *name)
void *ramstage_base;
void *decompression_loc;
void *ramstage_loc;
- struct romstage_handoff *handoff;
+ void *entry_point;
stage = (struct cbfs_stage *)
cbfs_get_file_content(media, name, CBFS_TYPE_STAGE);
@@ -143,9 +144,10 @@ void * cbfs_load_stage(struct cbfs_media *media, const char *name)
if (cbmem_base == NULL)
return (void *) -1;
- ramstage_base = rmodule_find_region_below(cbmem_base, stage->memlen,
- &ramstage_loc,
- &decompression_loc);
+ ramstage_base =
+ rmodule_find_region_below(cbmem_base, stage->memlen,
+ &ramstage_loc,
+ &decompression_loc);
LOG("Decompressing stage %s @ 0x%p (%d bytes)\n",
name, decompression_loc, stage->memlen);
@@ -161,15 +163,49 @@ void * cbfs_load_stage(struct cbfs_media *media, const char *name)
if (rmodule_load_no_clear_bss(ramstage_loc, &ramstage))
return (void *) -1;
- handoff = romstage_handoff_find_or_add();
+ entry_point = rmodule_entry(&ramstage);
+
if (handoff) {
handoff->reserve_base = (uint32_t)ramstage_base;
handoff->reserve_size = (uint32_t)cbmem_base -
(uint32_t)ramstage_base;
- } else
- LOG("Couldn't allocate romstage handoff.\n");
+ /* Save an entire copy in RAM of the relocated ramstage for
+ * the S3 resume path. The size of the saved relocated ramstage
+ * is larger than necessary. It could be optimized by saving
+ * just the text/data segment of the ramstage. The rmodule
+ * API would need to be modified to expose these details. For
+ * the time being, just save the entire used region. */
+ memcpy((void *)(handoff->reserve_base - handoff->reserve_size),
+ (void *)handoff->reserve_base, handoff->reserve_size);
+ /* Update the size and base of the reserve region. */
+ handoff->reserve_base -= handoff->reserve_size;
+ handoff->reserve_size += handoff->reserve_size;
+ /* Save the entry point in the handoff area. */
+ handoff->ramstage_entry_point = (uint32_t)entry_point;
+ }
+
+ return entry_point;
+}
+
+void * cbfs_load_stage(struct cbfs_media *media, const char *name)
+{
+ struct romstage_handoff *handoff;
+
+ handoff = romstage_handoff_find_or_add();
+
+ if (handoff == NULL) {
+ LOG("Couldn't find or allocate romstage handoff.\n");
+ return load_stage_from_cbfs(media, name, handoff);
+ } else if (!handoff->s3_resume)
+ return load_stage_from_cbfs(media, name, handoff);
+
+ /* S3 resume path. Copy from the saved relocated program buffer to
+ * the running location. load_stage_from_cbfs() keeps a copy of the
+ * relocated program just below the relocated program. */
+ memcpy((void *)(handoff->reserve_base + (handoff->reserve_size / 2)),
+ (void *)handoff->reserve_base, handoff->reserve_size / 2);
- return rmodule_entry(&ramstage);
+ return (void *)handoff->ramstage_entry_point;
}
#else