pcimem_start and pcimem64_start are adjusted from srat entries. For this reason, paravirt info (NUMA SRAT entries and number of cpus) need to be read before pci_setup. Imho, this is an ugly code change since SRAT bios tables and number of cpus have to be read earlier. But the advantage is that no new paravirt interface is introduced. Suggestions to make the code change cleaner are welcome.
The alternative patch (will be sent as a reply to this patch) implements a paravirt interface to read the starting values of pcimem_start and pcimem64_start from QEMU.
Signed-off-by: Vasilis Liaskovitis vasilis.liaskovitis@profitbricks.com --- src/acpi.c | 82 ++++++++++++++++++++++++++++++++++++++++---------------- src/acpi.h | 3 ++ src/pciinit.c | 6 +++- src/post.c | 3 ++ src/smp.c | 4 +++ 5 files changed, 72 insertions(+), 26 deletions(-)
diff --git a/src/acpi.c b/src/acpi.c index 1223b52..9e99aa7 100644 --- a/src/acpi.c +++ b/src/acpi.c @@ -428,7 +428,10 @@ encodeLen(u8 *ssdt_ptr, int length, int bytes) #define MEM_OFFSET_END 63 #define MEM_OFFSET_SIZE 79
-u64 nb_hp_memslots = 0; +u64 nb_hp_memslots = 0, nb_numanodes; +u64 *numa_data, *hp_memdata; +u64 below_4g_hp_mem_size = 0; +u64 above_4g_hp_mem_size = 0; struct srat_memory_affinity *mem;
#define SSDT_SIGNATURE 0x54445353 // SSDT @@ -763,17 +766,7 @@ acpi_build_srat_memory(struct srat_memory_affinity *numamem, static void * build_srat(void) { - int nb_numa_nodes = qemu_cfg_get_numa_nodes(); - - u64 *numadata = malloc_tmphigh(sizeof(u64) * (MaxCountCPUs + nb_numa_nodes)); - if (!numadata) { - warn_noalloc(); - return NULL; - } - - qemu_cfg_get_numa_data(numadata, MaxCountCPUs + nb_numa_nodes); - - qemu_cfg_get_numa_data(&nb_hp_memslots, 1); + int nb_numa_nodes = nb_numanodes; struct system_resource_affinity_table *srat; int srat_size = sizeof(*srat) + sizeof(struct srat_processor_affinity) * MaxCountCPUs + @@ -782,7 +775,7 @@ build_srat(void) srat = malloc_high(srat_size); if (!srat) { warn_noalloc(); - free(numadata); + free(numa_data); return NULL; }
@@ -791,6 +784,7 @@ build_srat(void) struct srat_processor_affinity *core = (void*)(srat + 1); int i; u64 curnode; + u64 *numadata = numa_data;
for (i = 0; i < MaxCountCPUs; ++i) { core->type = SRAT_PROCESSOR; @@ -847,15 +841,7 @@ build_srat(void) mem = (void*)numamem;
if (nb_hp_memslots) { - u64 *hpmemdata = malloc_tmphigh(sizeof(u64) * (3 * nb_hp_memslots)); - if (!hpmemdata) { - warn_noalloc(); - free(hpmemdata); - free(numadata); - return NULL; - } - - qemu_cfg_get_numa_data(hpmemdata, 3 * nb_hp_memslots); + u64 *hpmemdata = hp_memdata;
for (i = 1; i < nb_hp_memslots + 1; ++i) { mem_base = *hpmemdata++; @@ -865,7 +851,7 @@ build_srat(void) numamem++; slots++; } - free(hpmemdata); + free(hp_memdata); }
for (; slots < nb_numa_nodes + nb_hp_memslots + 2; slots++) { @@ -875,10 +861,58 @@ build_srat(void)
build_header((void*)srat, SRAT_SIGNATURE, srat_size, 1);
- free(numadata); + free(numa_data); return srat; }
+/* QEMU paravirt SRAT entries need to be read in before pci initilization */ +void read_srat_early(void) +{ + int i; + + nb_numanodes = qemu_cfg_get_numa_nodes(); + u64 *hpmemdata; + u64 mem_len, mem_base; + + numa_data = malloc_tmphigh(sizeof(u64) * (MaxCountCPUs + nb_numanodes)); + if (!numa_data) { + warn_noalloc(); + } + + qemu_cfg_get_numa_data(numa_data, MaxCountCPUs + nb_numanodes); + qemu_cfg_get_numa_data(&nb_hp_memslots, 1); + + if (nb_hp_memslots) { + hp_memdata = malloc_tmphigh(sizeof(u64) * (3 * nb_hp_memslots)); + if (!hp_memdata) { + warn_noalloc(); + free(hp_memdata); + free(numa_data); + } + + qemu_cfg_get_numa_data(hp_memdata, 3 * nb_hp_memslots); + hpmemdata = hp_memdata; + + for (i = 1; i < nb_hp_memslots + 1; ++i) { + mem_base = *hpmemdata++; + mem_len = *hpmemdata++; + hpmemdata++; + if (mem_base >= 0x100000000LL) { + above_4g_hp_mem_size += mem_len; + } + /* if dimm fits before pci hole, append it normally */ + else if (mem_base + mem_len <= BUILD_PCIMEM_START) { + below_4g_hp_mem_size += mem_len; + } + /* otherwise place it above 4GB */ + else { + above_4g_hp_mem_size += mem_len; + } + } + + } +} + static const struct pci_device_id acpi_find_tbl[] = { /* PIIX4 Power Management device. */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, NULL), diff --git a/src/acpi.h b/src/acpi.h index cb21561..d29837f 100644 --- a/src/acpi.h +++ b/src/acpi.h @@ -5,6 +5,9 @@
void acpi_bios_init(void); u32 find_resume_vector(void); +void read_srat_early(void); +extern u64 below_4g_hp_mem_size; +extern u64 above_4g_hp_mem_size;
#define RSDP_SIGNATURE 0x2052545020445352LL // "RSD PTR "
diff --git a/src/pciinit.c b/src/pciinit.c index 31115ee..c5a4b24 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -12,6 +12,7 @@ #include "ioport.h" // PORT_ATA1_CMD_BASE #include "config.h" // CONFIG_* #include "xen.h" // usingXen +#include "acpi.h"
#define PCI_DEVICE_MEM_MIN 0x1000 #define PCI_BRIDGE_IO_MIN 0x1000 @@ -597,7 +598,7 @@ static void pci_region_map_entries(struct pci_bus *busses, struct pci_region *r)
static void pci_bios_map_devices(struct pci_bus *busses) { - pcimem_start = RamSize; + pcimem_start = RamSize + below_4g_hp_mem_size;
if (pci_bios_init_root_regions(busses)) { struct pci_region r64_mem, r64_pref; @@ -616,7 +617,8 @@ static void pci_bios_map_devices(struct pci_bus *busses) u64 align_mem = pci_region_align(&r64_mem); u64 align_pref = pci_region_align(&r64_pref);
- r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem); + r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G + + above_4g_hp_mem_size, align_mem); r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref); pcimem64_start = r64_mem.base; pcimem64_end = r64_pref.base + sum_pref; diff --git a/src/post.c b/src/post.c index 924b311..c37730b 100644 --- a/src/post.c +++ b/src/post.c @@ -234,6 +234,9 @@ maininit(void) // Initialize mtrr mtrr_setup();
+ smp_get_ncpus(); + read_srat_early(); + // Initialize pci pci_setup(); smm_init(); diff --git a/src/smp.c b/src/smp.c index 4975412..3922776 100644 --- a/src/smp.c +++ b/src/smp.c @@ -138,7 +138,11 @@ smp_probe(void)
// Restore memory. *(u64*)BUILD_AP_BOOT_ADDR = old; +}
+void +smp_get_ncpus(void) +{ MaxCountCPUs = qemu_cfg_get_max_cpus(); if (!MaxCountCPUs || MaxCountCPUs < CountCPUs) MaxCountCPUs = CountCPUs;