On Wed, 26 Jun 2013 17:15:03 +0800
Hu Tao <hutao(a)cn.fujitsu.com> wrote:
> The memory device generation is guided by qemu paravirt info. Seabios
> uses the info to setup SRAT entries for the hotplug-able memory slots,
> and to generate appropriate memory device objects. One memory device
> (and corresponding SRAT entry) is generated for each hotplug-able qemu
> memslot. Currently no SSDT memory device is created for initial system
> memory.
>
> We only support up to 255 DIMMs for now (PackageOp used for the MEON
> array can only describe an array of at most 255 elements. VarPackageOp
> would be needed to support more than 255 devices)
>
> Signed-off-by: Vasilis Liaskovitis <vasilis.liaskovitis(a)profitbricks.com>
> Signed-off-by: Hu Tao <hutao(a)cn.fujitsu.com>
> ---
> src/acpi.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> src/paravirt.c | 8 +++
> 2 files changed, 152 insertions(+), 7 deletions(-)
>
> diff --git a/src/acpi.c b/src/acpi.c
> index ce988e0..e9a0326 100644
> --- a/src/acpi.c
> +++ b/src/acpi.c
> @@ -15,6 +15,8 @@
> #include "config.h" // CONFIG_*
> #include "paravirt.h" // RamSize
> #include "dev-q35.h"
> +#include "memmap.h"
> +#include "paravirt.h"
>
> #include "acpi-dsdt.hex"
>
> @@ -250,11 +252,23 @@ encodeLen(u8 *ssdt_ptr, int length, int bytes)
> #define PCIHP_AML (ssdp_pcihp_aml + *ssdt_pcihp_start)
> #define PCI_SLOTS 32
>
> +/* 0x5B 0x82 DeviceOp PkgLength NameString DimmID */
> +#define MEM_BASE 0xaf80
> +#define MEM_AML (ssdm_mem_aml + *ssdt_mem_start)
> +#define MEM_SIZEOF (*ssdt_mem_end - *ssdt_mem_start)
> +#define MEM_OFFSET_HEX (*ssdt_mem_name - *ssdt_mem_start + 2)
> +#define MEM_OFFSET_ID (*ssdt_mem_id - *ssdt_mem_start)
> +#define MEM_OFFSET_PXM 31
> +#define MEM_OFFSET_START 55
> +#define MEM_OFFSET_END 63
> +#define MEM_OFFSET_SIZE 79
> +
> #define SSDT_SIGNATURE 0x54445353 // SSDT
> #define SSDT_HEADER_LENGTH 36
>
> #include "ssdt-misc.hex"
> #include "ssdt-pcihp.hex"
> +#include "ssdt-mem.hex"
>
> #define PCI_RMV_BASE 0xae0c
>
> @@ -306,9 +320,100 @@ static void patch_pcihp(int slot, u8 *ssdt_ptr, u32 eject)
> }
> }
>
> +static void build_memdev(u8 *ssdt_ptr, int i, u64 mem_base, u64 mem_len, u8 node)
> +{
> + memcpy(ssdt_ptr, MEM_AML, MEM_SIZEOF);
> + ssdt_ptr[MEM_OFFSET_HEX] = getHex(i >> 4);
> + ssdt_ptr[MEM_OFFSET_HEX+1] = getHex(i);
> + ssdt_ptr[MEM_OFFSET_ID] = i;
> + ssdt_ptr[MEM_OFFSET_PXM] = node;
> + *(u64*)(ssdt_ptr + MEM_OFFSET_START) = cpu_to_le64(mem_base);
> + *(u64*)(ssdt_ptr + MEM_OFFSET_END) = cpu_to_le64(mem_base + mem_len);
> + *(u64*)(ssdt_ptr + MEM_OFFSET_SIZE) = cpu_to_le64(mem_len);
> +}
> +
> +static u8 *build_memssdt(u8 *ssdt_ptr, int memssdt_len,
> + u64 *numadimmsmap, int nb_memdevs)
> +{
> + u64 mem_base, mem_len;
> + u64 *dimm = numadimmsmap;
> + int node;
> + int i;
> +
> + // build Scope(_SB_) header
> + *(ssdt_ptr++) = 0x10; // ScopeOp
> + ssdt_ptr = encodeLen(ssdt_ptr, memssdt_len, 3);
> + *(ssdt_ptr++) = '_';
> + *(ssdt_ptr++) = 'S';
> + *(ssdt_ptr++) = 'B';
> + *(ssdt_ptr++) = '_';
Windows doesn't like much 2 \_SB in one SSDT table, and BSODs.
Just drop it and add stuff to already existing scope or alternatively
create a second SSDT table.
> + for (i = 0; i < nb_memdevs; i++) {
> + mem_base = *dimm++;
> + mem_len = *dimm++;
> + node = *dimm++;
> + build_memdev(ssdt_ptr, i, mem_base, mem_len, node);
> + ssdt_ptr += MEM_SIZEOF;
> + }
> +
> + // build "Method(MTFY, 2) {If (LEqual(Arg0, 0x00)) {Notify(CM00, Arg1)} ...}"
> + *(ssdt_ptr++) = 0x14; // MethodOp
> + ssdt_ptr = encodeLen(ssdt_ptr, 2+5+(12*nb_memdevs), 2);
> + *(ssdt_ptr++) = 'M';
> + *(ssdt_ptr++) = 'T';
> + *(ssdt_ptr++) = 'F';
> + *(ssdt_ptr++) = 'Y';
> + *(ssdt_ptr++) = 0x02;
> + for (i=0; i<nb_memdevs; i++) {
> + *(ssdt_ptr++) = 0xA0; // IfOp
> + ssdt_ptr = encodeLen(ssdt_ptr, 11, 1);
> + *(ssdt_ptr++) = 0x93; // LEqualOp
> + *(ssdt_ptr++) = 0x68; // Arg0Op
> + *(ssdt_ptr++) = 0x0A; // BytePrefix
> + *(ssdt_ptr++) = i;
> + *(ssdt_ptr++) = 0x86; // NotifyOp
> + *(ssdt_ptr++) = 'M';
> + *(ssdt_ptr++) = 'P';
> + *(ssdt_ptr++) = getHex(i >> 4);
> + *(ssdt_ptr++) = getHex(i);
> + *(ssdt_ptr++) = 0x69; // Arg1Op
> + }
> +
> + // build "Name(MEON, Package() { One, One, ..., Zero, Zero, ... })"
> + *(ssdt_ptr++) = 0x08; // NameOp
> + *(ssdt_ptr++) = 'M';
> + *(ssdt_ptr++) = 'E';
> + *(ssdt_ptr++) = 'O';
> + *(ssdt_ptr++) = 'N';
> + *(ssdt_ptr++) = 0x12; // PackageOp
> + ssdt_ptr = encodeLen(ssdt_ptr, 2+1+(1*nb_memdevs), 2);
> + *(ssdt_ptr++) = nb_memdevs;
> +
> + dimm = numadimmsmap;
> + u8 memslot_status = 0, enabled;
> +
> + for (i = 0; i < nb_memdevs; i++) {
> + enabled = 0;
> + if (i % 8 == 0)
> + memslot_status = inb(MEM_BASE + i/8);
> + enabled = memslot_status & 1;
> + mem_base = *dimm++;
> + mem_len = *dimm++;
> + dimm++; // node
> + *(ssdt_ptr++) = enabled ? 0x01 : 0x00;
> + if (enabled)
> + add_e820(mem_base, mem_len, E820_RAM);
> + memslot_status = memslot_status >> 1;
> + }
> +
> + return ssdt_ptr;
> +}
> +
> static void*
> build_ssdt(void)
> {
> + int nb_memdevs;
> + u64 *numadimmsmap;
> int acpi_cpus = MaxCountCPUs > 0xff ? 0xff : MaxCountCPUs;
> int length = (sizeof(ssdp_misc_aml) // _S3_ / _S4_ / _S5_
> + (1+3+4) // Scope(_SB_)
> @@ -318,9 +423,20 @@ build_ssdt(void)
> + (1+3+4) // Scope(PCI0)
> + ((PCI_SLOTS - 1) * PCIHP_SIZEOF) // slots
> + (1+2+5+(12*(PCI_SLOTS - 1)))); // PCNT
> - u8 *ssdt = malloc_high(length);
> +
> + numadimmsmap = romfile_loadfile("etc/numa-dimm-map", &nb_memdevs);
> + nb_memdevs /= 3 * sizeof(u64);
> +
> + // for build_memssdt
> + int memssdt_length = (1+3+4)
> + + (nb_memdevs * MEM_SIZEOF)
> + + (1+2+5+(12*nb_memdevs))
> + + (6+2+1+(1*nb_memdevs));
> +
> + u8 *ssdt = malloc_high(length + memssdt_length);
> if (! ssdt) {
> warn_noalloc();
> + free(numadimmsmap);
> return NULL;
> }
> u8 *ssdt_ptr = ssdt;
> @@ -411,10 +527,13 @@ build_ssdt(void)
>
> ssdt_ptr = build_notify(ssdt_ptr, "PCNT", 1, PCI_SLOTS, "S00_", 1);
>
> + ssdt_ptr = build_memssdt(ssdt_ptr, memssdt_length, numadimmsmap, nb_memdevs);
^^^^^^^ name is misleading, since the result of call is not SSDT
> +
> build_header((void*)ssdt, SSDT_SIGNATURE, ssdt_ptr - ssdt, 1);
>
> //hexdump(ssdt, ssdt_ptr - ssdt);
>
> + free(numadimmsmap);
> return ssdt;
> }
>
> @@ -458,7 +577,7 @@ acpi_build_srat_memory(struct srat_memory_affinity *numamem,
> numamem->length = sizeof(*numamem);
> memset(numamem->proximity, 0, 4);
> numamem->proximity[0] = node;
> - numamem->flags = cpu_to_le32(!!enabled);
> + numamem->flags = cpu_to_le32(!!enabled) | cpu_to_le32(0x2);
> numamem->base_addr = cpu_to_le64(base);
> numamem->range_length = cpu_to_le64(len);
> }
> @@ -466,18 +585,22 @@ acpi_build_srat_memory(struct srat_memory_affinity *numamem,
> static void *
> build_srat(void)
> {
> - int numadatasize, numacpusize;
> + int numadatasize, numacpusize, nb_numa_dimms;
> + u64 *numadimmsmap;
> u64 *numadata = romfile_loadfile("etc/numa-nodes", &numadatasize);
> u64 *numacpumap = romfile_loadfile("etc/numa-cpu-map", &numacpusize);
> - if (!numadata || !numacpumap)
> - goto fail;
> +
> int max_cpu = numacpusize / sizeof(u64);
> int nb_numa_nodes = numadatasize / sizeof(u64);
>
> + numadimmsmap = romfile_loadfile("etc/numa-dimm-map", &nb_numa_dimms);
> +
> + nb_numa_dimms /= 3 * sizeof(u64);
> +
> struct system_resource_affinity_table *srat;
> int srat_size = sizeof(*srat) +
> sizeof(struct srat_processor_affinity) * max_cpu +
> - sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2);
> + sizeof(struct srat_memory_affinity) * (nb_numa_nodes + nb_numa_dimms + 2);
>
> srat = malloc_high(srat_size);
> if (!srat) {
> @@ -512,6 +635,7 @@ build_srat(void)
> */
> struct srat_memory_affinity *numamem = (void*)core;
> int slots = 0;
> + int node;
> u64 mem_len, mem_base, next_base = 0;
>
> acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
> @@ -541,7 +665,18 @@ build_srat(void)
> numamem++;
> slots++;
> }
> - for (; slots < nb_numa_nodes + 2; slots++) {
> + if (nb_numa_dimms) {
> + for (i = 1; i < nb_numa_dimms + 1; ++i) {
> + mem_base = *numadimmsmap++;
> + mem_len = *numadimmsmap++;
> + node = *numadimmsmap++;
> + acpi_build_srat_memory(numamem, mem_base, mem_len, node, 1);
> + numamem++;
> + slots++;
> + }
> + }
> +
> + for (; slots < nb_numa_nodes + nb_numa_dimms + 2; slots++) {
> acpi_build_srat_memory(numamem, 0, 0, 0, 0);
> numamem++;
> }
> @@ -550,10 +685,12 @@ build_srat(void)
>
> free(numadata);
> free(numacpumap);
> + free(numadimmsmap);
> return srat;
> fail:
> free(numadata);
> free(numacpumap);
> + free(numadimmsmap);
> return NULL;
> }
>
> diff --git a/src/paravirt.c b/src/paravirt.c
> index d1a5d3e..5925c63 100644
> --- a/src/paravirt.c
> +++ b/src/paravirt.c
> @@ -240,6 +240,14 @@ qemu_cfg_legacy(void)
> , sizeof(numacount) + max_cpu*sizeof(u64)
> , numacount*sizeof(u64));
>
> + u64 dimm_count;
> + qemu_cfg_select(QEMU_CFG_NUMA);
> + qemu_cfg_skip((1 + max_cpu + numacount) * sizeof(u64));
> + qemu_cfg_read(&dimm_count, sizeof(dimm_count));
> + qemu_romfile_add("etc/numa-dimm-map", QEMU_CFG_NUMA
> + , (2 + max_cpu + numacount) * sizeof(u64),
> + dimm_count * 3 * sizeof(u64));
> +
> // e820 data
> u32 count32;
> qemu_cfg_read_entry(&count32, QEMU_CFG_E820_TABLE, sizeof(count32));