Lean Sheng Tan has submitted this change. ( https://review.coreboot.org/c/coreboot/+/68912 )
(
29 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. )Change subject: soc/intel/xeon_sp: Don't sort struct device cpus for numa ......................................................................
soc/intel/xeon_sp: Don't sort struct device cpus for numa
Currently the xeon_sp code reassigns struct devices apic_id so that srat entries can be added in a certain order.
This is not a good idea as it breaks thread local storage which contains a pointer to its struct device cpu.
This moves the sorting of the lapic_ids to the srat table generation and adds the numa node id in each core init entry. Now it is done in parallel too as a bonus.
Change-Id: I372bcea1932d28e9bf712cc712f19a76fe3199b1 Signed-off-by: Arthur Heymans arthur@aheymans.xyz Reviewed-on: https://review.coreboot.org/c/coreboot/+/68912 Reviewed-by: Patrick Rudolph siro@das-labor.org Tested-by: build bot (Jenkins) no-reply@coreboot.org --- M src/soc/intel/xeon_sp/cpx/cpu.c M src/soc/intel/xeon_sp/include/soc/util.h M src/soc/intel/xeon_sp/skx/cpu.c M src/soc/intel/xeon_sp/uncore_acpi.c M src/soc/intel/xeon_sp/util.c 5 files changed, 62 insertions(+), 120 deletions(-)
Approvals: build bot (Jenkins): Verified Patrick Rudolph: Looks good to me, approved
diff --git a/src/soc/intel/xeon_sp/cpx/cpu.c b/src/soc/intel/xeon_sp/cpx/cpu.c index f46d1a6..14535e2 100644 --- a/src/soc/intel/xeon_sp/cpx/cpu.c +++ b/src/soc/intel/xeon_sp/cpx/cpu.c @@ -78,8 +78,9 @@ { msr_t msr;
- printk(BIOS_SPEW, "%s dev: %s, cpu: %lu, apic_id: 0x%x\n", - __func__, dev_path(cpu), cpu_index(), cpu->path.apic.apic_id); + printk(BIOS_SPEW, "%s dev: %s, cpu: %lu, apic_id: 0x%x, package_id: 0x%x\n", + __func__, dev_path(cpu), cpu_index(), cpu->path.apic.apic_id, + cpu->path.apic.package_id);
/* * Set HWP base feature, EPP reg enumeration, lock thermal and msr @@ -227,7 +228,4 @@ * rest of the CPU devices do not have chip_info updated. */ chip_config = bus->dev->chip_info; - - /* update numa domain for all cpu devices */ - xeonsp_init_cpu_config(); } diff --git a/src/soc/intel/xeon_sp/include/soc/util.h b/src/soc/intel/xeon_sp/include/soc/util.h index 52afc48..a828ffc 100644 --- a/src/soc/intel/xeon_sp/include/soc/util.h +++ b/src/soc/intel/xeon_sp/include/soc/util.h @@ -13,7 +13,6 @@ int get_platform_thread_count(void); const IIO_UDS *get_iio_uds(void); unsigned int soc_get_num_cpus(void); -void xeonsp_init_cpu_config(void); void set_bios_init_completion(void); uint8_t soc_get_iio_ioapicid(int socket, int stack);
diff --git a/src/soc/intel/xeon_sp/skx/cpu.c b/src/soc/intel/xeon_sp/skx/cpu.c index a9a035d..009527c 100644 --- a/src/soc/intel/xeon_sp/skx/cpu.c +++ b/src/soc/intel/xeon_sp/skx/cpu.c @@ -59,8 +59,9 @@ { msr_t msr;
- printk(BIOS_INFO, "%s dev: %s, cpu: %lu, apic_id: 0x%x\n", - __func__, dev_path(cpu), cpu_index(), cpu->path.apic.apic_id); + printk(BIOS_INFO, "%s dev: %s, cpu: %lu, apic_id: 0x%x, package_id: 0x%x\n", + __func__, dev_path(cpu), cpu_index(), cpu->path.apic.apic_id, + cpu->path.apic.package_id); assert(chip_config);
/* set MSR_PKG_CST_CONFIG_CONTROL - scope per core*/ @@ -247,8 +248,5 @@ /* TODO: Handle mp_init_with_smm failure? */ mp_init_with_smm(bus, &mp_ops);
- /* update numa domain for all cpu devices */ - xeonsp_init_cpu_config(); - FUNC_EXIT(); } diff --git a/src/soc/intel/xeon_sp/uncore_acpi.c b/src/soc/intel/xeon_sp/uncore_acpi.c index be46504..e2b262e 100644 --- a/src/soc/intel/xeon_sp/uncore_acpi.c +++ b/src/soc/intel/xeon_sp/uncore_acpi.c @@ -6,6 +6,7 @@ #include <assert.h> #include <cbmem.h> #include <cpu/x86/lapic.h> +#include <commonlib/sort.h> #include <device/mmio.h> #include <device/pci.h> #include <device/pciexp.h> @@ -22,29 +23,54 @@
/* NUMA related ACPI table generation. SRAT, SLIT, etc */
+/* Increase if necessary. Currently all x86 CPUs only have 2 SMP threads */ +#define MAX_THREAD 2 + unsigned long acpi_create_srat_lapics(unsigned long current) { struct device *cpu; - unsigned int cpu_index = 0; + unsigned int num_cpus = 0; + int apic_ids[CONFIG_MAX_CPUS] = {};
- for (cpu = all_devices; cpu; cpu = cpu->next) { - if (!is_enabled_cpu(cpu)) + unsigned int sort_start = 0; + for (unsigned int thread_id = 0; thread_id < MAX_THREAD; thread_id++) { + for (cpu = all_devices; cpu; cpu = cpu->next) { + if (!is_enabled_cpu(cpu)) + continue; + if (num_cpus >= ARRAY_SIZE(apic_ids)) + break; + if (cpu->path.apic.thread_id != thread_id) + continue; + apic_ids[num_cpus++] = cpu->path.apic.apic_id; + } + bubblesort(&apic_ids[sort_start], num_cpus - sort_start, NUM_ASCENDING); + sort_start = num_cpus; + } + + for (unsigned int i = 0; i < num_cpus; i++) { + /* Match the sorted apic_ids to a struct device */ + for (cpu = all_devices; cpu; cpu = cpu->next) { + if (!is_enabled_cpu(cpu)) + continue; + if (cpu->path.apic.apic_id == apic_ids[i]) + break; + } + if (!cpu) continue;
if (is_x2apic_mode()) { - printk(BIOS_DEBUG, "SRAT: x2apic cpu_index=%08x, node_id=%02x, apic_id=%08x\n", - cpu_index, cpu->path.apic.node_id, cpu->path.apic.apic_id); + printk(BIOS_DEBUG, "SRAT: x2apic cpu_index=%04x, node_id=%02x, apic_id=%08x\n", + i, cpu->path.apic.node_id, cpu->path.apic.apic_id);
current += acpi_create_srat_x2apic((acpi_srat_x2apic_t *)current, cpu->path.apic.node_id, cpu->path.apic.apic_id); } else { printk(BIOS_DEBUG, "SRAT: lapic cpu_index=%02x, node_id=%02x, apic_id=%02x\n", - cpu_index, cpu->path.apic.node_id, cpu->path.apic.apic_id); + i, cpu->path.apic.node_id, cpu->path.apic.apic_id);
current += acpi_create_srat_lapic((acpi_srat_lapic_t *)current, cpu->path.apic.node_id, cpu->path.apic.apic_id); } - cpu_index++; } return current; } diff --git a/src/soc/intel/xeon_sp/util.c b/src/soc/intel/xeon_sp/util.c index 7524e52..e0bf9bc 100644 --- a/src/soc/intel/xeon_sp/util.c +++ b/src/soc/intel/xeon_sp/util.c @@ -125,108 +125,6 @@ }
#if ENV_RAMSTAGE /* Setting devtree variables is only allowed in ramstage. */ -static void get_core_thread_bits(uint32_t *core_bits, uint32_t *thread_bits) -{ - register int ecx; - struct cpuid_result cpuid_regs; - - /* get max index of CPUID */ - cpuid_regs = cpuid(0); - assert(cpuid_regs.eax >= 0xb); /* cpuid_regs.eax is max input value for cpuid */ - - *thread_bits = *core_bits = 0; - ecx = 0; - while (1) { - cpuid_regs = cpuid_ext(0xb, ecx); - if (ecx == 0) { - *thread_bits = (cpuid_regs.eax & 0x1f); - } else { - *core_bits = (cpuid_regs.eax & 0x1f) - *thread_bits; - break; - } - ecx++; - } -} - -static void get_cpu_info_from_apicid(uint32_t apicid, uint32_t core_bits, uint32_t thread_bits, - uint8_t *package, uint8_t *core, uint8_t *thread) -{ - if (package) - *package = (apicid >> (thread_bits + core_bits)); - if (core) - *core = (uint32_t)((apicid >> thread_bits) & ~((~0) << core_bits)); - if (thread) - *thread = (uint32_t)(apicid & ~((~0) << thread_bits)); -} - -void xeonsp_init_cpu_config(void) -{ - struct device *dev; - int apic_ids[CONFIG_MAX_CPUS] = {0}, apic_ids_by_thread[CONFIG_MAX_CPUS] = {0}; - int num_apics = 0; - uint32_t core_bits, thread_bits; - unsigned int core_count, thread_count; - unsigned int num_sockets; - - /* - * sort APIC ids in ascending order to identify apicid ranges for - * each numa domain - */ - for (dev = all_devices; dev; dev = dev->next) { - if ((dev->path.type != DEVICE_PATH_APIC) || - (dev->bus->dev->path.type != DEVICE_PATH_CPU_CLUSTER)) { - continue; - } - if (!dev->enabled) - continue; - if (num_apics >= ARRAY_SIZE(apic_ids)) - break; - apic_ids[num_apics++] = dev->path.apic.apic_id; - } - if (num_apics > 1) - bubblesort(apic_ids, num_apics, NUM_ASCENDING); - - num_sockets = soc_get_num_cpus(); - cpu_read_topology(&core_count, &thread_count); - assert(num_apics == (num_sockets * thread_count)); - - /* sort them by thread i.e., all cores with thread 0 and then thread 1 */ - int index = 0; - for (int id = 0; id < num_apics; ++id) { - int apic_id = apic_ids[id]; - if (apic_id & 0x1) { /* 2nd thread */ - apic_ids_by_thread[index + (num_apics/2) - 1] = apic_id; - } else { /* 1st thread */ - apic_ids_by_thread[index++] = apic_id; - } - } - - /* update apic_id, node_id in sorted order */ - num_apics = 0; - get_core_thread_bits(&core_bits, &thread_bits); - for (dev = all_devices; dev; dev = dev->next) { - uint8_t package; - - if ((dev->path.type != DEVICE_PATH_APIC) || - (dev->bus->dev->path.type != DEVICE_PATH_CPU_CLUSTER)) { - continue; - } - if (!dev->enabled) - continue; - if (num_apics >= ARRAY_SIZE(apic_ids)) - break; - dev->path.apic.apic_id = apic_ids_by_thread[num_apics]; - get_cpu_info_from_apicid(dev->path.apic.apic_id, core_bits, thread_bits, - &package, NULL, NULL); - dev->path.apic.node_id = package; - printk(BIOS_DEBUG, "CPU %d apic_id: 0x%x (%d), node_id: 0x%x\n", - num_apics, dev->path.apic.apic_id, - dev->path.apic.apic_id, dev->path.apic.node_id); - - ++num_apics; - } -} - /* return true if command timed out else false */ static bool wait_for_bios_cmd_cpl(pci_devfn_t dev, uint32_t reg, uint32_t mask, uint32_t target)