Changelog since: v3: * don't move CPU counting around, leave it as is and consolidate apic_id accounting only * add and use qemu_*_present_cpus_count() helpers to init/get boot cpus count at boot time and only qemu_get_present_cpus_count() at resume time * fix S3 wakeup stuck in resume vector * If cpu were hotplugged during smp_scan, report it to user instead of silently hanging if it came online while smp_scan() waits for APs v2: * rebase on top of current master /smp_scan() changes/ v1: * s/count_cpu/apic_id_init/ * merge handle_x2apic() into apic_id_init() RFC: * move out max-cpus check out of mptable_setup() * factor out CPU counting/apic ID detection in separate function * return back accidentially deleted debug message with APIC ID * drop unused code in smp_setup()
According to SDM, if CPUs have APIC ID more than 254 firmware should pass control to OS in x2APIC mode. This series adds x2APIC bootstrap initialization.
Works fine with not yet rebased on master QEMU side of x2APIC support: https://www.mail-archive.com/qemu-devel@nongnu.org/msg390671.html Rebased variant probably will be sent next week.
Note: S3 wakeup works as expected if linux guest is running with IRQ remapping enabled. However it seems that kernel is buggy with IRQ remapping disabled as kernel disables CPUs with APIC ID > 254 and on resume from S3 hangs somewere after getting control from Seabios.
Igor Mammedov (4): paravirt: disable legacy bios tables in case of more than 255 CPUs error out if present cpus count changed during SMP bringup add helpers to read etc/boot-cpus at resume time support booting with more than 255 CPUs
Kevin O'Connor (1): smp: consolidate CPU APIC ID detection and accounting
src/fw/paravirt.h | 3 +++ src/x86.h | 1 + src/fw/paravirt.c | 44 +++++++++++++++++++++++++++++++++++-- src/fw/smp.c | 65 ++++++++++++++++++++++++++++++++++++++----------------- 4 files changed, 91 insertions(+), 22 deletions(-)
MPTable doesn't support more than 255 CPUs and QEMU supplies an alternative MADT table which guest will use instead of it. So do not install legacy tables if more than 254 CPUs are provided
Signed-off-by: Igor Mammedov imammedo@redhat.com Acked-by: Michael S. Tsirkin mst@redhat.com --- src/fw/paravirt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 73a08f0..33a471b 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -164,8 +164,10 @@ qemu_platform_setup(void) smp_setup();
// Create bios tables - pirtable_setup(); - mptable_setup(); + if (MaxCountCPUs <= 255) { + pirtable_setup(); + mptable_setup(); + } smbios_setup();
if (CONFIG_FW_ROMFILE_LOAD) {
From: Kevin O'Connor kevin@koconnor.net
Signed-off-by: "Kevin O'Connor" kevin@koconnor.net Signed-off-by: Igor Mammedov imammedo@redhat.com --- v3: * fix hang on resume path by resetting CountCPUs on every smp_scan v2: * s/count_cpu/apic_id_init/ * call apic_id_init() after sending SIPI, it will be needed for switching BSP into x2APIC mode --- src/fw/smp.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/src/fw/smp.c b/src/fw/smp.c index 719d51d..31bcc6a 100644 --- a/src/fw/smp.c +++ b/src/fw/smp.c @@ -55,23 +55,29 @@ int apic_id_is_present(u8 apic_id) return !!(FoundAPICIDs[apic_id/32] & (1ul << (apic_id % 32))); }
+static int +apic_id_init(void) +{ + // Track found apic id for use in legacy internal bios tables + u32 eax, ebx, ecx, cpuid_features; + cpuid(1, &eax, &ebx, &ecx, &cpuid_features); + u8 apic_id = ebx>>24; + FoundAPICIDs[apic_id/32] |= 1 << (apic_id % 32); + return apic_id; +} + void VISIBLE32FLAT handle_smp(void) { if (!CONFIG_QEMU) return;
- // Detect apic_id - u32 eax, ebx, ecx, cpuid_features; - cpuid(1, &eax, &ebx, &ecx, &cpuid_features); - u8 apic_id = ebx>>24; - dprintf(DEBUG_HDL_smp, "handle_smp: apic_id=%d\n", apic_id); + // Track this CPU and detect the apic_id + int apic_id = apic_id_init(); + dprintf(DEBUG_HDL_smp, "handle_smp: apic_id=0x%x\n", apic_id);
smp_write_msrs();
- // Set bit on FoundAPICIDs - FoundAPICIDs[apic_id/32] |= (1 << (apic_id % 32)); - CountCPUs++; }
@@ -94,8 +100,7 @@ smp_scan(void) }
// mark the BSP initial APIC ID as found, too: - u8 apic_id = ebx>>24; - FoundAPICIDs[apic_id/32] |= (1 << (apic_id % 32)); + apic_id_init(); CountCPUs = 1;
// Setup jump trampoline to counter code.
if during SMP bringup a cpu is hotplugged, Seabios might silently hung in
while (cmos_smp_count != CountCPUs)
loop as cmos_smp_count might be less then CountCPUs if SIPI were delivered to the hotplugged CPU.
Warn user about it and ask for reboot.
While at it rename CountCPUs to BroughtUpCPUs to make clear what it is counting.
Signed-off-by: Igor Mammedov imammedo@redhat.com --- src/fw/smp.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/src/fw/smp.c b/src/fw/smp.c index 31bcc6a..9040b01 100644 --- a/src/fw/smp.c +++ b/src/fw/smp.c @@ -46,7 +46,7 @@ smp_write_msrs(void) }
u32 MaxCountCPUs; -static u32 CountCPUs; +static u32 BroughtUpCPUs; // 256 bits for the found APIC IDs static u32 FoundAPICIDs[256/32];
@@ -78,7 +78,7 @@ handle_smp(void)
smp_write_msrs();
- CountCPUs++; + BroughtUpCPUs++; }
// Atomic lock for shared stack across processors. @@ -95,13 +95,13 @@ smp_scan(void) if (eax < 1 || !(cpuid_features & CPUID_APIC)) { // No apic - only the main cpu is present. dprintf(1, "No apic - only the main cpu is present.\n"); - CountCPUs= 1; + BroughtUpCPUs= 1; return; }
// mark the BSP initial APIC ID as found, too: apic_id_init(); - CountCPUs = 1; + BroughtUpCPUs = 1;
// Setup jump trampoline to counter code. u64 old = *(u64*)BUILD_AP_BOOT_ADDR; @@ -131,8 +131,8 @@ smp_scan(void) writel(APIC_ICR_LOW, 0x000C4600 | sipi_vector);
// Wait for other CPUs to process the SIPI. - u8 cmos_smp_count = rtc_read(CMOS_BIOS_SMP_COUNT) + 1; - while (cmos_smp_count != CountCPUs) + u8 smp_count, smp_count_initial = rtc_read(CMOS_BIOS_SMP_COUNT) + 1; + while ((smp_count = rtc_read(CMOS_BIOS_SMP_COUNT) + 1) != BroughtUpCPUs) { asm volatile( // Release lock and allow other processors to use the stack. " movl %%esp, %1\n" @@ -143,12 +143,15 @@ smp_scan(void) " jc 1b\n" : "+m" (SMPLock), "+m" (SMPStack) : : "cc", "memory"); + if (smp_count != smp_count_initial) + dprintf(1, "Error: count of present cpus changed, reboot manually"); + } yield();
// Restore memory. *(u64*)BUILD_AP_BOOT_ADDR = old;
- dprintf(1, "Found %d cpu(s) max supported %d cpu(s)\n", CountCPUs, + dprintf(1, "Found %d cpu(s) max supported %d cpu(s)\n", BroughtUpCPUs, MaxCountCPUs); }
On Fri, Sep 16, 2016 at 01:54:16PM +0200, Igor Mammedov wrote:
if during SMP bringup a cpu is hotplugged, Seabios might silently hung in
while (cmos_smp_count != CountCPUs)
loop as cmos_smp_count might be less then CountCPUs if SIPI were delivered to the hotplugged CPU.
Warn user about it and ask for reboot.
While at it rename CountCPUs to BroughtUpCPUs to make clear what it is counting.
Signed-off-by: Igor Mammedov imammedo@redhat.com
src/fw/smp.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/src/fw/smp.c b/src/fw/smp.c index 31bcc6a..9040b01 100644 --- a/src/fw/smp.c +++ b/src/fw/smp.c @@ -46,7 +46,7 @@ smp_write_msrs(void) }
u32 MaxCountCPUs; -static u32 CountCPUs; +static u32 BroughtUpCPUs; // 256 bits for the found APIC IDs static u32 FoundAPICIDs[256/32];
@@ -78,7 +78,7 @@ handle_smp(void)
smp_write_msrs();
- CountCPUs++;
- BroughtUpCPUs++;
}
// Atomic lock for shared stack across processors. @@ -95,13 +95,13 @@ smp_scan(void) if (eax < 1 || !(cpuid_features & CPUID_APIC)) { // No apic - only the main cpu is present. dprintf(1, "No apic - only the main cpu is present.\n");
CountCPUs= 1;
BroughtUpCPUs= 1; return;
}
// mark the BSP initial APIC ID as found, too: apic_id_init();
- CountCPUs = 1;
BroughtUpCPUs = 1;
// Setup jump trampoline to counter code. u64 old = *(u64*)BUILD_AP_BOOT_ADDR;
@@ -131,8 +131,8 @@ smp_scan(void) writel(APIC_ICR_LOW, 0x000C4600 | sipi_vector);
// Wait for other CPUs to process the SIPI.
- u8 cmos_smp_count = rtc_read(CMOS_BIOS_SMP_COUNT) + 1;
- while (cmos_smp_count != CountCPUs)
- u8 smp_count, smp_count_initial = rtc_read(CMOS_BIOS_SMP_COUNT) + 1;
- while ((smp_count = rtc_read(CMOS_BIOS_SMP_COUNT) + 1) != BroughtUpCPUs) { asm volatile( // Release lock and allow other processors to use the stack. " movl %%esp, %1\n"
@@ -143,12 +143,15 @@ smp_scan(void) " jc 1b\n" : "+m" (SMPLock), "+m" (SMPStack) : : "cc", "memory");
if (smp_count != smp_count_initial)
dprintf(1, "Error: count of present cpus changed, reboot manually");
- }
I'm not sure about this patch. No one reads the debug log, so this error message seems unlikely to be seen; in the case someone did forward the debug log to disk, this loop could rapidly fill it up.
Perhaps separate out this patch from the rest of the series as it seems separate.
The rest of the series looks good to me. Is there a dependency on pending QEMU patches?
-Kevin
On Fri, 16 Sep 2016 08:55:49 -0400 "Kevin O'Connor" kevin@koconnor.net wrote:
On Fri, Sep 16, 2016 at 01:54:16PM +0200, Igor Mammedov wrote:
if during SMP bringup a cpu is hotplugged, Seabios might silently hung in
while (cmos_smp_count != CountCPUs)
loop as cmos_smp_count might be less then CountCPUs if SIPI were delivered to the hotplugged CPU.
Warn user about it and ask for reboot.
While at it rename CountCPUs to BroughtUpCPUs to make clear what it is counting.
Signed-off-by: Igor Mammedov imammedo@redhat.com
src/fw/smp.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/src/fw/smp.c b/src/fw/smp.c index 31bcc6a..9040b01 100644 --- a/src/fw/smp.c +++ b/src/fw/smp.c @@ -46,7 +46,7 @@ smp_write_msrs(void) }
u32 MaxCountCPUs; -static u32 CountCPUs; +static u32 BroughtUpCPUs; // 256 bits for the found APIC IDs static u32 FoundAPICIDs[256/32];
@@ -78,7 +78,7 @@ handle_smp(void)
smp_write_msrs();
- CountCPUs++;
- BroughtUpCPUs++;
}
// Atomic lock for shared stack across processors. @@ -95,13 +95,13 @@ smp_scan(void) if (eax < 1 || !(cpuid_features & CPUID_APIC)) { // No apic - only the main cpu is present. dprintf(1, "No apic - only the main cpu is present.\n");
CountCPUs= 1;
BroughtUpCPUs= 1; return;
}
// mark the BSP initial APIC ID as found, too: apic_id_init();
- CountCPUs = 1;
BroughtUpCPUs = 1;
// Setup jump trampoline to counter code. u64 old = *(u64*)BUILD_AP_BOOT_ADDR;
@@ -131,8 +131,8 @@ smp_scan(void) writel(APIC_ICR_LOW, 0x000C4600 | sipi_vector);
// Wait for other CPUs to process the SIPI.
- u8 cmos_smp_count = rtc_read(CMOS_BIOS_SMP_COUNT) + 1;
- while (cmos_smp_count != CountCPUs)
- u8 smp_count, smp_count_initial = rtc_read(CMOS_BIOS_SMP_COUNT) + 1;
- while ((smp_count = rtc_read(CMOS_BIOS_SMP_COUNT) + 1) != BroughtUpCPUs) { asm volatile( // Release lock and allow other processors to use the stack. " movl %%esp, %1\n"
@@ -143,12 +143,15 @@ smp_scan(void) " jc 1b\n" : "+m" (SMPLock), "+m" (SMPStack) : : "cc", "memory");
if (smp_count != smp_count_initial)
dprintf(1, "Error: count of present cpus changed, reboot manually");
- }
I'm not sure about this patch. No one reads the debug log, so this error message seems unlikely to be seen; in the case someone did forward the debug log to disk, this loop could rapidly fill it up.
Perhaps separate out this patch from the rest of the series as it seems separate.
Sure, we can drop it for now and think about a beet way to handle race later. I can post [v5 5/5] as reply here since dropping this patch will cause some trivial conflicts or respin whole series with amended 5/5 (whichever you prefer).
The rest of the series looks good to me. Is there a dependency on pending QEMU patches?
There is older version of QEMU patches https://www.mail-archive.com/qemu-devel@nongnu.org/msg390671.html and I'm preparing rebased variant for posting next week to qemu-devel.
-Kevin
Signed-off-by: Igor Mammedov imammedo@redhat.com --- src/fw/paravirt.h | 3 +++ src/fw/paravirt.c | 38 ++++++++++++++++++++++++++++++++++++++ src/fw/smp.c | 11 ++++++----- 3 files changed, 47 insertions(+), 5 deletions(-)
diff --git a/src/fw/paravirt.h b/src/fw/paravirt.h index ed8e5f1..6f26fd0 100644 --- a/src/fw/paravirt.h +++ b/src/fw/paravirt.h @@ -52,4 +52,7 @@ void qemu_preinit(void); void qemu_platform_setup(void); void qemu_cfg_init(void);
+u16 qemu_init_present_cpus_count(void); +u16 qemu_get_present_cpus_count(void); + #endif diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 33a471b..125066d 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -319,6 +319,44 @@ qemu_romfile_add(char *name, int select, int skip, int size) romfile_add(&qfile->file); }
+static int +qemu_romfile_get_fwcfg_entry(char *name, int *select) +{ + struct romfile_s *file = romfile_find(name); + if (!file) + return 0; + struct qemu_romfile_s *qfile; + qfile = container_of(file, struct qemu_romfile_s, file); + if (select) + *select = qfile->select; + return file->size; +} + +static int boot_cpus_sel; +static int boot_cpus_file_sz; + +u16 +qemu_init_present_cpus_count(void) +{ + u16 smp_count = romfile_loadint("etc/boot-cpus", + rtc_read(CMOS_BIOS_SMP_COUNT) + 1); + boot_cpus_file_sz = + qemu_romfile_get_fwcfg_entry("etc/boot-cpus", &boot_cpus_sel); + return smp_count; +} + +u16 +qemu_get_present_cpus_count(void) +{ + u16 smp_count; + if (!boot_cpus_file_sz) { + smp_count = rtc_read(CMOS_BIOS_SMP_COUNT) + 1; + } else { + qemu_cfg_read_entry(&smp_count, boot_cpus_sel, boot_cpus_file_sz); + } + return smp_count; +} + struct e820_reservation { u64 address; u64 length; diff --git a/src/fw/smp.c b/src/fw/smp.c index 9040b01..51834ad 100644 --- a/src/fw/smp.c +++ b/src/fw/smp.c @@ -12,6 +12,7 @@ #include "stacks.h" // yield #include "util.h" // smp_setup, msr_feature_control_setup #include "x86.h" // wrmsr +#include "paravirt.h" // qemu_*_present_cpus_count
#define APIC_ICR_LOW ((u8*)BUILD_APIC_ADDR + 0x300) #define APIC_SVR ((u8*)BUILD_APIC_ADDR + 0x0F0) @@ -131,8 +132,8 @@ smp_scan(void) writel(APIC_ICR_LOW, 0x000C4600 | sipi_vector);
// Wait for other CPUs to process the SIPI. - u8 smp_count, smp_count_initial = rtc_read(CMOS_BIOS_SMP_COUNT) + 1; - while ((smp_count = rtc_read(CMOS_BIOS_SMP_COUNT) + 1) != BroughtUpCPUs) { + u16 smp_count, smp_count_initial = qemu_get_present_cpus_count(); + while ((smp_count = qemu_get_present_cpus_count()) != BroughtUpCPUs) { asm volatile( // Release lock and allow other processors to use the stack. " movl %%esp, %1\n" @@ -162,9 +163,9 @@ smp_setup(void) return;
MaxCountCPUs = romfile_loadint("etc/max-cpus", 0); - u8 cmos_smp_count = rtc_read(CMOS_BIOS_SMP_COUNT) + 1; - if (MaxCountCPUs < cmos_smp_count) - MaxCountCPUs = cmos_smp_count; + u16 smp_count = qemu_init_present_cpus_count(); + if (MaxCountCPUs < smp_count) + MaxCountCPUs = smp_count;
smp_scan(); }
SDM[*1] says that if there are CPUs with APIC ID greater than 254, BIOS is to pass control to OS in x2APIC mode. Use the fact that QEMU passes in "etc/max-cpus" max possible "APIC ID + 1" to detect need for x2APIC mode. Also instead of CMOS_BIOS_SMP_COUNT which is limited to 256 CPUs use a new rom file "etc/boot-cpus" that QEMU supporting more than 256 CPUs will provide.
*1) SDM: Volume 3: EXTENDED XAPIC (X2APIC): Initialization by System Software
Signed-off-by: Igor Mammedov imammedo@redhat.com --- v4: * use qemu_*_present_cpus_count() helpers to init/get boot cpus count at boot time and only qemu_get_present_cpus_count() at resume time v2: * merge handle_x2apic() into apic_id_init() --- src/x86.h | 1 + src/fw/smp.c | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/src/x86.h b/src/x86.h index 53378e9..a770e6f 100644 --- a/src/x86.h +++ b/src/x86.h @@ -68,6 +68,7 @@ static inline void wbinvd(void) #define CPUID_MSR (1 << 5) #define CPUID_APIC (1 << 9) #define CPUID_MTRR (1 << 12) +#define CPUID_X2APIC (1 << 21) static inline void __cpuid(u32 index, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) { asm("cpuid" diff --git a/src/fw/smp.c b/src/fw/smp.c index 51834ad..0b36268 100644 --- a/src/fw/smp.c +++ b/src/fw/smp.c @@ -20,6 +20,9 @@ #define APIC_LINT1 ((u8*)BUILD_APIC_ADDR + 0x360)
#define APIC_ENABLED 0x0100 +#define MSR_IA32_APIC_BASE 0x01B +#define MSR_LOCAL_APIC_ID 0x802 +#define MSR_IA32_APICBASE_EXTD (1ULL << 10) /* Enable x2APIC mode */
static struct { u32 index; u64 val; } smp_msr[32]; static u32 smp_msr_count; @@ -59,11 +62,21 @@ int apic_id_is_present(u8 apic_id) static int apic_id_init(void) { - // Track found apic id for use in legacy internal bios tables u32 eax, ebx, ecx, cpuid_features; cpuid(1, &eax, &ebx, &ecx, &cpuid_features); - u8 apic_id = ebx>>24; - FoundAPICIDs[apic_id/32] |= 1 << (apic_id % 32); + u32 apic_id = ebx>>24; + if (MaxCountCPUs < 256) { // xAPIC mode + // Track found apic id for use in legacy internal bios tables + FoundAPICIDs[apic_id/32] |= 1 << (apic_id % 32); + } else if (ecx & CPUID_X2APIC) { + // switch to x2APIC mode + u64 apic_base = rdmsr(MSR_IA32_APIC_BASE); + wrmsr(MSR_IA32_APIC_BASE, apic_base | MSR_IA32_APICBASE_EXTD); + apic_id = rdmsr(MSR_LOCAL_APIC_ID); + } else { + // x2APIC is masked by CPUID + apic_id = -1; + } return apic_id; }
@@ -101,7 +114,6 @@ smp_scan(void) }
// mark the BSP initial APIC ID as found, too: - apic_id_init(); BroughtUpCPUs = 1;
// Setup jump trampoline to counter code. @@ -131,6 +143,10 @@ smp_scan(void) u32 sipi_vector = BUILD_AP_BOOT_ADDR >> 12; writel(APIC_ICR_LOW, 0x000C4600 | sipi_vector);
+ // switch to x2APIC mode after sending SIPI so that + // x2APIC and xAPIC mode could share AP wake up code + apic_id_init(); + // Wait for other CPUs to process the SIPI. u16 smp_count, smp_count_initial = qemu_get_present_cpus_count(); while ((smp_count = qemu_get_present_cpus_count()) != BroughtUpCPUs) {