Damien Zammit (damien@zamaudio.com) just uploaded a new patch set to gerrit, which you can find at https://review.coreboot.org/6311
-gerrit
commit a377590a85c55a4590d359dfcc74189d8a8e273e Author: Damien Zammit damien@zamaudio.com Date: Sat Nov 28 21:27:05 2015 +1100
x86/smm: Initialize SMM on some CPUs one-by-one
We currently race in SMM init on Atom 230 (and potentially other CPUs). At least on the 230, this leads to a hang on RSM, likely because both hyperthreads mess around with SMBASE and other SMM state variables in parallel without coordination. The same behaviour occurs with Atom D5xx.
Change it so first APs are spun up and sent to sleep, then BSP initializes SMM, then every CPU, one after another.
Only do this when SERIALIZE_SMM_INITIALIZATION is set. Set the flag for Atom CPUs.
Change-Id: I1ae864e37546298ea222e81349c27cf774ed251f Signed-off-by: Patrick Georgi patrick@georgi-clan.de Signed-off-by: Damien Zammit damien@zamaudio.com --- src/cpu/intel/model_106cx/Kconfig | 1 + src/cpu/x86/Kconfig | 11 ++++++++++ src/cpu/x86/lapic/lapic_cpu_init.c | 45 +++++++++++++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 1 deletion(-)
diff --git a/src/cpu/intel/model_106cx/Kconfig b/src/cpu/intel/model_106cx/Kconfig index 09acfd9..a005ba2 100644 --- a/src/cpu/intel/model_106cx/Kconfig +++ b/src/cpu/intel/model_106cx/Kconfig @@ -11,6 +11,7 @@ config CPU_INTEL_MODEL_106CX select AP_IN_SIPI_WAIT select TSC_SYNC_MFENCE select SUPPORT_CPU_UCODE_IN_CBFS + select SERIALIZED_SMM_INITIALIZATION
if CPU_INTEL_MODEL_106CX
diff --git a/src/cpu/x86/Kconfig b/src/cpu/x86/Kconfig index 131cbf2..94225a3 100644 --- a/src/cpu/x86/Kconfig +++ b/src/cpu/x86/Kconfig @@ -96,6 +96,17 @@ config SMM_LAPIC_REMAP_MITIGATION default y if NORTHBRIDGE_INTEL_NEHALEM default n
+config SERIALIZED_SMM_INITIALIZATION + bool + default n + help + On some CPUs, there is a race condition in SMM. + This can occur when both hyperthreads change SMM state + variables in parallel without coordination. + Setting this option serializes the SMM initialization + to avoid an ugly hang in the boot process at the cost + of a slightly longer boot time. + config X86_AMD_FIXED_MTRRS bool default n diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c index ef150a5..db74230 100644 --- a/src/cpu/x86/lapic/lapic_cpu_init.c +++ b/src/cpu/x86/lapic/lapic_cpu_init.c @@ -458,6 +458,39 @@ static void start_other_cpus(struct bus *cpu_bus, struct device *bsp_cpu)
}
+static void smm_other_cpus(struct bus *cpu_bus, device_t bsp_cpu) +{ + device_t cpu; + int pre_count = atomic_read(&active_cpus); + + /* Loop through the cpus once to let them run through SMM relocator */ + + for(cpu = cpu_bus->children; cpu ; cpu = cpu->sibling) { + if (cpu->path.type != DEVICE_PATH_APIC) { + continue; + } + + printk(BIOS_ERR, "considering CPU 0x%02x for SMM init\n", + cpu->path.apic.apic_id); + + if (cpu==bsp_cpu) + continue; + + if (!cpu->enabled) { + continue; + } + + if (!start_cpu(cpu)) { + /* Record the error in cpu? */ + printk(BIOS_ERR, "CPU 0x%02x would not start!\n", + cpu->path.apic.apic_id); + } + + /* FIXME: endless loop */ + while (atomic_read(&active_cpus) != pre_count) ; + } +} + static void wait_other_cpus_stop(struct bus *cpu_bus) { struct device *cpu; @@ -528,7 +561,8 @@ void initialize_cpus(struct bus *cpu_bus) #endif
#if CONFIG_HAVE_SMI_HANDLER - smm_init(); + if (!IS_ENABLED(CONFIG_SERIALIZED_SMM_INITIALIZATION)) + smm_init(); #endif
#if CONFIG_SMP && CONFIG_MAX_CPUS > 1 @@ -547,4 +581,13 @@ void initialize_cpus(struct bus *cpu_bus) /* Now wait the rest of the cpus stop*/ wait_other_cpus_stop(cpu_bus); #endif + + if (IS_ENABLED(CONFIG_SERIALIZED_SMM_INITIALIZATION)) { + /* BSP only, all APs are sleeping */ + smm_init(); +#if CONFIG_SMP && CONFIG_MAX_CPUS > 1 + last_cpu_index = 0; + smm_other_cpus(cpu_bus, info->cpu); +#endif + } }