orginally, numa info was packed into an array of 64-bit data which was implicit and hard to maintain, so we define a struct for these info, hope to be as clear as enough.
these changes also involved seabios paches which was sent to seabios mail-list.
Li Guang(6) [PATCH 1/6] pc/bios: move common BIOS_CFG_IOPORT into fw_cfg.h [PATCH 2/6] pc/numa: refactor bios_init function [PATCH 3/6] bitops: change BITS_TO_LONGS [PATCH 4/6] pc: format load_linux() [PATCH 5/6] load_linux: report open kernel file & its size error [PATCH 6/6] load_linux: change kernel header size allocation
hw/fw_cfg.h | 4 ++++ hw/pc.c | 163 +++++++++++++++++++++++++++++++------------------------------- hw/sun4u.c | 3 +-- 3 files changed, 89 insertions(+), 81 deletions(-)
BIOS_CFG_IOPORT are commonly used, so move it to fw_cfg.h bochs_bios_init seems misleading, bios may be seabios, seabios is not only for bochs, and also we are in qemu.
Signed-off-by: liguang lig.fnst@cn.fujitsu.com --- hw/fw_cfg.h | 4 ++++ hw/pc.c | 9 ++++----- hw/sun4u.c | 3 +-- 3 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h index 05c8df1..6b3147d 100644 --- a/hw/fw_cfg.h +++ b/hw/fw_cfg.h @@ -38,6 +38,10 @@
#define FW_CFG_INVALID 0xffff
+#define FW_CFG_CTL_IOPORT 0x510 +#define FW_CFG_DATA_IOPORT 0x511 + + #ifndef NO_QEMU_PROTOS typedef struct FWCfgFile { uint32_t size; /* file size */ diff --git a/hw/pc.c b/hw/pc.c index 34b6dff..d010c75 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -64,7 +64,6 @@
/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables. */ #define ACPI_DATA_SIZE 0x10000 -#define BIOS_CFG_IOPORT 0x510 #define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0) #define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1) #define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2) @@ -556,14 +555,14 @@ int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) * This function returns the limit for the APIC ID value, so that all * CPU APIC IDs are < pc_apic_id_limit(). * - * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). + * This is used for FW_CFG_MAX_CPUS. See comments on bios_init(). */ static unsigned int pc_apic_id_limit(unsigned int max_cpus) { return x86_cpu_apic_id_from_index(max_cpus - 1) + 1; }
-static void *bochs_bios_init(void) +static void *bios_init(void) { void *fw_cfg; uint8_t *smbios_table; @@ -572,7 +571,7 @@ static void *bochs_bios_init(void) int i, j; unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
- fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0); + fw_cfg = fw_cfg_init(FW_CFG_CTL_IOPORT, FW_CFG_DATA_IOPORT, 0, 0); /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: * * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug @@ -954,7 +953,7 @@ void *pc_memory_init(MemoryRegion *system_memory, option_rom_mr, 1);
- fw_cfg = bochs_bios_init(); + fw_cfg = bios_init(); rom_set_fw(fw_cfg);
if (linux_boot) { diff --git a/hw/sun4u.c b/hw/sun4u.c index 9fbda29..1bdc443 100644 --- a/hw/sun4u.c +++ b/hw/sun4u.c @@ -76,7 +76,6 @@ #define PROM_FILENAME "openbios-sparc64" #define NVRAM_SIZE 0x2000 #define MAX_IDE_BUS 2 -#define BIOS_CFG_IOPORT 0x510 #define FW_CFG_SPARC64_WIDTH (FW_CFG_ARCH_LOCAL + 0x00) #define FW_CFG_SPARC64_HEIGHT (FW_CFG_ARCH_LOCAL + 0x01) #define FW_CFG_SPARC64_DEPTH (FW_CFG_ARCH_LOCAL + 0x02) @@ -877,7 +876,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem, graphic_width, graphic_height, graphic_depth, (uint8_t *)&nd_table[0].macaddr);
- fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0); + fw_cfg = fw_cfg_init(FW_CFG_CTL_IOPORT, FW_CFG_DATA_IOPORT, 0, 0); fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
orginally, numa data was packed into an array, which was implicit and hard to maintain, we define a struct for this data, hope to be as clear as enough. also, we only pass cpumask of corresponding nodes to seabios, and leave the paring work for it.
Signed-off-by: liguang lig.fnst@cn.fujitsu.com --- hw/pc.c | 40 ++++++++++++++++++---------------------- 1 files changed, 18 insertions(+), 22 deletions(-)
diff --git a/hw/pc.c b/hw/pc.c index d010c75..893c930 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -562,13 +562,21 @@ static unsigned int pc_apic_id_limit(unsigned int max_cpus) return x86_cpu_apic_id_from_index(max_cpus - 1) + 1; }
+struct srat_data { + uint64_t apic_map; /* size is MAX_NODES */ + uint64_t memory_size; +}; + static void *bios_init(void) { void *fw_cfg; uint8_t *smbios_table; size_t smbios_len; - uint64_t *numa_fw_cfg; - int i, j; + struct fw_numa_cfg { + uint32_t nr_node; + struct srat_data *srat_data; + } fw_cfg_numa; + int i; unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
fw_cfg = fw_cfg_init(FW_CFG_CTL_IOPORT, FW_CFG_DATA_IOPORT, 0, 0); @@ -601,28 +609,16 @@ static void *bios_init(void) &e820_table, sizeof(e820_table));
fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg)); - /* allocate memory for the NUMA channel: one (64bit) word for the number - * of nodes, one word for each VCPU->node and one word for each node to - * hold the amount of memory. - */ - numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes); - numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); - for (i = 0; i < max_cpus; i++) { - unsigned int apic_id = x86_cpu_apic_id_from_index(i); - assert(apic_id < apic_id_limit); - for (j = 0; j < nb_numa_nodes; j++) { - if (test_bit(i, node_cpumask[j])) { - numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); - break; - } - } - } + + fw_cfg_numa.srat_data = g_new0(struct srat_data, nb_numa_nodes); + fw_cfg_numa.nr_node = cpu_to_le64(nb_numa_nodes); + for (i = 0; i < nb_numa_nodes; i++) { - numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]); + fw_cfg_numa.srat_data[i].apic_map = *node_cpumask[i]; + fw_cfg_numa.srat_data[i].memory_size = cpu_to_le64(node_mem[i]); } - fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, - (1 + apic_id_limit + nb_numa_nodes) * - sizeof(*numa_fw_cfg)); + + fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, &fw_cfg_numa, sizeof(fw_cfg_numa));
return fw_cfg; }
On Mon, Feb 4, 2013 at 2:27 AM, liguang lig.fnst@cn.fujitsu.com wrote:
orginally, numa data was packed into an array, which was implicit and hard to maintain, we define a struct for this data, hope to be as clear as enough. also, we only pass cpumask of corresponding nodes to seabios, and leave the paring work for it.
Signed-off-by: liguang lig.fnst@cn.fujitsu.com
hw/pc.c | 40 ++++++++++++++++++---------------------- 1 files changed, 18 insertions(+), 22 deletions(-)
diff --git a/hw/pc.c b/hw/pc.c index d010c75..893c930 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -562,13 +562,21 @@ static unsigned int pc_apic_id_limit(unsigned int max_cpus) return x86_cpu_apic_id_from_index(max_cpus - 1) + 1; }
+struct srat_data {
SRATData and typedef.
- uint64_t apic_map; /* size is MAX_NODES */
- uint64_t memory_size;
+};
static void *bios_init(void) { void *fw_cfg; uint8_t *smbios_table; size_t smbios_len;
- uint64_t *numa_fw_cfg;
- int i, j;
- struct fw_numa_cfg {
FwNUMACfg
uint32_t nr_node;
struct srat_data *srat_data;
} fw_cfg_numa;
int i; unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
fw_cfg = fw_cfg_init(FW_CFG_CTL_IOPORT, FW_CFG_DATA_IOPORT, 0, 0);
@@ -601,28 +609,16 @@ static void *bios_init(void) &e820_table, sizeof(e820_table));
fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
- /* allocate memory for the NUMA channel: one (64bit) word for the number
* of nodes, one word for each VCPU->node and one word for each node to
* hold the amount of memory.
*/
- numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
- numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
- for (i = 0; i < max_cpus; i++) {
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
assert(apic_id < apic_id_limit);
for (j = 0; j < nb_numa_nodes; j++) {
if (test_bit(i, node_cpumask[j])) {
numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
break;
}
}
- }
- fw_cfg_numa.srat_data = g_new0(struct srat_data, nb_numa_nodes);
- fw_cfg_numa.nr_node = cpu_to_le64(nb_numa_nodes);
- for (i = 0; i < nb_numa_nodes; i++) {
numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
fw_cfg_numa.srat_data[i].apic_map = *node_cpumask[i];
}fw_cfg_numa.srat_data[i].memory_size = cpu_to_le64(node_mem[i]);
- fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
(1 + apic_id_limit + nb_numa_nodes) *
sizeof(*numa_fw_cfg));
fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, &fw_cfg_numa, sizeof(fw_cfg_numa));
return fw_cfg;
}
1.7.2.5
Signed-off-by: liguang lig.fnst@cn.fujitsu.com --- include/qemu/bitops.h | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h index 74e14e5..7758792 100644 --- a/include/qemu/bitops.h +++ b/include/qemu/bitops.h @@ -20,7 +20,7 @@ #define BIT(nr) (1UL << (nr)) #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
/** * bitops_ffs - find first bit in word.
seems this function was in wrong coding format so try correct it boldly.
Signed-off-by: liguang lig.fnst@cn.fujitsu.com --- hw/pc.c | 90 +++++++++++++++++++++++++++++++------------------------------- 1 files changed, 45 insertions(+), 45 deletions(-)
diff --git a/hw/pc.c b/hw/pc.c index 893c930..01d00f6 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -639,8 +639,8 @@ static long get_file_size(FILE *f)
static void load_linux(void *fw_cfg, const char *kernel_filename, - const char *initrd_filename, - const char *kernel_cmdline, + const char *initrd_filename, + const char *kernel_cmdline, hwaddr max_ram_size) { uint16_t protocol; @@ -657,11 +657,11 @@ static void load_linux(void *fw_cfg, /* load the kernel header */ f = fopen(kernel_filename, "rb"); if (!f || !(kernel_size = get_file_size(f)) || - fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != - MIN(ARRAY_SIZE(header), kernel_size)) { - fprintf(stderr, "qemu: could not load kernel '%s': %s\n", - kernel_filename, strerror(errno)); - exit(1); + fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != + MIN(ARRAY_SIZE(header), kernel_size)) { + fprintf(stderr, "qemu: could not load kernel '%s': %s\n", + kernel_filename, strerror(errno)); + exit(1); }
/* kernel protocol version */ @@ -669,48 +669,48 @@ static void load_linux(void *fw_cfg, fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); #endif if (ldl_p(header+0x202) == 0x53726448) - protocol = lduw_p(header+0x206); + protocol = lduw_p(header+0x206); else { - /* This looks like a multiboot kernel. If it is, let's stop - treating it like a Linux kernel. */ + /* This looks like a multiboot kernel. If it is, let's stop + treating it like a Linux kernel. */ if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, kernel_cmdline, kernel_size, header)) return; - protocol = 0; + protocol = 0; }
if (protocol < 0x200 || !(header[0x211] & 0x01)) { - /* Low kernel */ - real_addr = 0x90000; - cmdline_addr = 0x9a000 - cmdline_size; - prot_addr = 0x10000; + /* Low kernel */ + real_addr = 0x90000; + cmdline_addr = 0x9a000 - cmdline_size; + prot_addr = 0x10000; } else if (protocol < 0x202) { - /* High but ancient kernel */ - real_addr = 0x90000; - cmdline_addr = 0x9a000 - cmdline_size; - prot_addr = 0x100000; + /* High but ancient kernel */ + real_addr = 0x90000; + cmdline_addr = 0x9a000 - cmdline_size; + prot_addr = 0x100000; } else { - /* High and recent kernel */ - real_addr = 0x10000; - cmdline_addr = 0x20000; - prot_addr = 0x100000; + /* High and recent kernel */ + real_addr = 0x10000; + cmdline_addr = 0x20000; + prot_addr = 0x100000; }
#if 0 fprintf(stderr, - "qemu: real_addr = 0x" TARGET_FMT_plx "\n" - "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" - "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", - real_addr, - cmdline_addr, - prot_addr); + "qemu: real_addr = 0x" TARGET_FMT_plx "\n" + "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" + "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", + real_addr, + cmdline_addr, + prot_addr); #endif
/* highest address for loading the initrd */ if (protocol >= 0x203) - initrd_max = ldl_p(header+0x22c); + initrd_max = ldl_p(header+0x22c); else - initrd_max = 0x37ffffff; + initrd_max = 0x37ffffff;
if (initrd_max >= max_ram_size-ACPI_DATA_SIZE) initrd_max = max_ram_size-ACPI_DATA_SIZE-1; @@ -720,10 +720,10 @@ static void load_linux(void *fw_cfg, fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
if (protocol >= 0x202) { - stl_p(header+0x228, cmdline_addr); + stl_p(header+0x228, cmdline_addr); } else { - stw_p(header+0x20, 0xA33F); - stw_p(header+0x22, cmdline_addr-real_addr); + stw_p(header+0x20, 0xA33F); + stw_p(header+0x22, cmdline_addr-real_addr); }
/* handle vga= parameter */ @@ -749,22 +749,22 @@ static void load_linux(void *fw_cfg, If this code is substantially changed, you may want to consider incrementing the revision. */ if (protocol >= 0x200) - header[0x210] = 0xB0; + header[0x210] = 0xB0;
/* heap */ if (protocol >= 0x201) { - header[0x211] |= 0x80; /* CAN_USE_HEAP */ - stw_p(header+0x224, cmdline_addr-real_addr-0x200); + header[0x211] |= 0x80; /* CAN_USE_HEAP */ + stw_p(header+0x224, cmdline_addr-real_addr-0x200); }
/* load initrd */ if (initrd_filename) { - if (protocol < 0x200) { - fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); - exit(1); - } + if (protocol < 0x200) { + fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); + exit(1); + }
- initrd_size = get_image_size(initrd_filename); + initrd_size = get_image_size(initrd_filename); if (initrd_size < 0) { fprintf(stderr, "qemu: error reading initrd %s\n", initrd_filename); @@ -780,14 +780,14 @@ static void load_linux(void *fw_cfg, fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size);
- stl_p(header+0x218, initrd_addr); - stl_p(header+0x21c, initrd_size); + stl_p(header+0x218, initrd_addr); + stl_p(header+0x21c, initrd_size); }
/* load kernel and setup */ setup_size = header[0x1f1]; if (setup_size == 0) - setup_size = 4; + setup_size = 4; setup_size = (setup_size+1)*512; kernel_size -= setup_size;
Signed-off-by: liguang lig.fnst@cn.fujitsu.com --- hw/pc.c | 14 +++++++++++--- 1 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/hw/pc.c b/hw/pc.c index 01d00f6..0ccd775 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -652,12 +652,20 @@ static void load_linux(void *fw_cfg, char *vmode;
/* Align to 16 bytes as a paranoia measure */ - cmdline_size = (strlen(kernel_cmdline)+16) & ~15; + cmdline_size = (strlen(kernel_cmdline)+16) & ~0xf;
/* load the kernel header */ f = fopen(kernel_filename, "rb"); - if (!f || !(kernel_size = get_file_size(f)) || - fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != + if (!f) { + fprintf(stderr, "can't open kernel image file\n"); + exit(1); + } + kernel_size = get_file_size(f); + if (kernel_size <= 0) { + fprintf(stderr, "can't get size of kernel image file\n"); + exit(1); + } + if (fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != MIN(ARRAY_SIZE(header), kernel_size)) { fprintf(stderr, "qemu: could not load kernel '%s': %s\n", kernel_filename, strerror(errno));
On Mon, Feb 4, 2013 at 2:27 AM, liguang lig.fnst@cn.fujitsu.com wrote:
Signed-off-by: liguang lig.fnst@cn.fujitsu.com
hw/pc.c | 14 +++++++++++--- 1 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/hw/pc.c b/hw/pc.c index 01d00f6..0ccd775 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -652,12 +652,20 @@ static void load_linux(void *fw_cfg, char *vmode;
/* Align to 16 bytes as a paranoia measure */
- cmdline_size = (strlen(kernel_cmdline)+16) & ~15;
- cmdline_size = (strlen(kernel_cmdline)+16) & ~0xf;
Here 15 is 16 - 1, so 0xf seems out of place. I'd use QEMU_ALIGN_UP().
If you touch the line, please add spaces around '+'.
/* load the kernel header */ f = fopen(kernel_filename, "rb");
- if (!f || !(kernel_size = get_file_size(f)) ||
fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) !=
- if (!f) {
fprintf(stderr, "can't open kernel image file\n");
The error message from strerror(errno) would be interesting.
exit(1);
- }
- kernel_size = get_file_size(f);
- if (kernel_size <= 0) {
fprintf(stderr, "can't get size of kernel image file\n");
exit(1);
- }
- if (fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != MIN(ARRAY_SIZE(header), kernel_size)) { fprintf(stderr, "qemu: could not load kernel '%s': %s\n", kernel_filename, strerror(errno));
-- 1.7.2.5
it's not necessary to alloc 8K bytes for kernel header, 0.5K is enough.
Signed-off-by: liguang lig.fnst@cn.fujitsu.com --- hw/pc.c | 10 ++++++---- 1 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/hw/pc.c b/hw/pc.c index 0ccd775..b6b236f 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -637,6 +637,9 @@ static long get_file_size(FILE *f) return size; }
+#define HEADER_SIZE 0x200 +#define HEADER_SIGNATURE 0x53726448 + static void load_linux(void *fw_cfg, const char *kernel_filename, const char *initrd_filename, @@ -646,7 +649,7 @@ static void load_linux(void *fw_cfg, uint16_t protocol; int setup_size, kernel_size, initrd_size = 0, cmdline_size; uint32_t initrd_max; - uint8_t header[8192], *setup, *kernel, *initrd_data; + uint8_t header[HEADER_SIZE], *setup, *kernel, *initrd_data; hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; FILE *f; char *vmode; @@ -665,8 +668,7 @@ static void load_linux(void *fw_cfg, fprintf(stderr, "can't get size of kernel image file\n"); exit(1); } - if (fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != - MIN(ARRAY_SIZE(header), kernel_size)) { + if (fread(header, 1, HEADER_SIZE, f) <= 0) { fprintf(stderr, "qemu: could not load kernel '%s': %s\n", kernel_filename, strerror(errno)); exit(1); @@ -676,7 +678,7 @@ static void load_linux(void *fw_cfg, #if 0 fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); #endif - if (ldl_p(header+0x202) == 0x53726448) + if (ldl_p(header+0x202) == HEADER_SIGNATURE) protocol = lduw_p(header+0x206); else { /* This looks like a multiboot kernel. If it is, let's stop
On Mon, Feb 4, 2013 at 2:27 AM, liguang lig.fnst@cn.fujitsu.com wrote:
it's not necessary to alloc 8K bytes for kernel header, 0.5K is enough.
Signed-off-by: liguang lig.fnst@cn.fujitsu.com
hw/pc.c | 10 ++++++---- 1 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/hw/pc.c b/hw/pc.c index 0ccd775..b6b236f 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -637,6 +637,9 @@ static long get_file_size(FILE *f) return size; }
+#define HEADER_SIZE 0x200 +#define HEADER_SIGNATURE 0x53726448
static void load_linux(void *fw_cfg, const char *kernel_filename, const char *initrd_filename, @@ -646,7 +649,7 @@ static void load_linux(void *fw_cfg, uint16_t protocol; int setup_size, kernel_size, initrd_size = 0, cmdline_size; uint32_t initrd_max;
- uint8_t header[8192], *setup, *kernel, *initrd_data;
- uint8_t header[HEADER_SIZE], *setup, *kernel, *initrd_data; hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; FILE *f; char *vmode;
@@ -665,8 +668,7 @@ static void load_linux(void *fw_cfg, fprintf(stderr, "can't get size of kernel image file\n"); exit(1); }
- if (fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) !=
MIN(ARRAY_SIZE(header), kernel_size)) {
- if (fread(header, 1, HEADER_SIZE, f) <= 0) { fprintf(stderr, "qemu: could not load kernel '%s': %s\n", kernel_filename, strerror(errno)); exit(1);
@@ -676,7 +678,7 @@ static void load_linux(void *fw_cfg, #if 0 fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); #endif
- if (ldl_p(header+0x202) == 0x53726448)
- if (ldl_p(header+0x202) == HEADER_SIGNATURE)
Please add braces while touching the line and add spaces around '+'. Maybe you have not checked the patches with checkpatch.pl?
protocol = lduw_p(header+0x206); else { /* This looks like a multiboot kernel. If it is, let's stop
-- 1.7.2.5
Signed-off-by: liguang lig.fnst@cn.fujitsu.com --- src/acpi.c | 209 ++---------------------------------------------------------- src/acpi.h | 201 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 204 deletions(-)
diff --git a/src/acpi.c b/src/acpi.c index 6267d7b..fde37e5 100644 --- a/src/acpi.c +++ b/src/acpi.c @@ -15,204 +15,12 @@ #include "paravirt.h" // qemu_cfg_irq0_override #include "dev-q35.h" // qemu_cfg_irq0_override
-/****************************************************/ -/* ACPI tables init */ - -/* Table structure from Linux kernel (the ACPI tables are under the - BSD license) */ - -struct acpi_table_header /* ACPI common table header */ -{ - ACPI_TABLE_HEADER_DEF -} PACKED; - -/* - * ACPI 1.0 Root System Description Table (RSDT) - */ -#define RSDT_SIGNATURE 0x54445352 // RSDT -struct rsdt_descriptor_rev1 -{ - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - u32 table_offset_entry[0]; /* Array of pointers to other */ - /* ACPI tables */ -} PACKED; - -/* - * ACPI 1.0 Firmware ACPI Control Structure (FACS) - */ -#define FACS_SIGNATURE 0x53434146 // FACS -struct facs_descriptor_rev1 -{ - u32 signature; /* ACPI Signature */ - u32 length; /* Length of structure, in bytes */ - u32 hardware_signature; /* Hardware configuration signature */ - u32 firmware_waking_vector; /* ACPI OS waking vector */ - u32 global_lock; /* Global Lock */ - u32 S4bios_f : 1; /* Indicates if S4BIOS support is present */ - u32 reserved1 : 31; /* Must be 0 */ - u8 resverved3 [40]; /* Reserved - must be zero */ -} PACKED; - - -/* - * Differentiated System Description Table (DSDT) - */ -#define DSDT_SIGNATURE 0x54445344 // DSDT - -/* - * MADT values and structures - */ - -/* Values for MADT PCATCompat */ - -#define DUAL_PIC 0 -#define MULTIPLE_APIC 1 - - -/* Master MADT */ - -#define APIC_SIGNATURE 0x43495041 // APIC -struct multiple_apic_table -{ - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - u32 local_apic_address; /* Physical address of local APIC */ -#if 0 - u32 PCATcompat : 1; /* A one indicates system also has dual 8259s */ - u32 reserved1 : 31; -#else - u32 flags; -#endif -} PACKED; - - -/* Values for Type in APIC sub-headers */ - -#define APIC_PROCESSOR 0 -#define APIC_IO 1 -#define APIC_XRUPT_OVERRIDE 2 -#define APIC_NMI 3 -#define APIC_LOCAL_NMI 4 -#define APIC_ADDRESS_OVERRIDE 5 -#define APIC_IO_SAPIC 6 -#define APIC_LOCAL_SAPIC 7 -#define APIC_XRUPT_SOURCE 8 -#define APIC_RESERVED 9 /* 9 and greater are reserved */ - -/* - * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE) - */ -#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\ - u8 type; \ - u8 length; - -/* Sub-structures for MADT */ - -struct madt_processor_apic -{ - ACPI_SUB_HEADER_DEF - u8 processor_id; /* ACPI processor id */ - u8 local_apic_id; /* Processor's local APIC id */ -#if 0 - u32 processor_enabled: 1; /* Processor is usable if set */ - u32 reserved2 : 31; /* Reserved, must be zero */ -#else - u32 flags; -#endif -} PACKED; - -struct madt_io_apic -{ - ACPI_SUB_HEADER_DEF - u8 io_apic_id; /* I/O APIC ID */ - u8 reserved; /* Reserved - must be zero */ - u32 address; /* APIC physical address */ - u32 interrupt; /* Global system interrupt where INTI - * lines start */ -} PACKED; - -/* IRQs 5,9,10,11 */ -#define PCI_ISA_IRQ_MASK 0x0e20 - -struct madt_intsrcovr { - ACPI_SUB_HEADER_DEF - u8 bus; - u8 source; - u32 gsi; - u16 flags; -} PACKED; - -struct madt_local_nmi { - ACPI_SUB_HEADER_DEF - u8 processor_id; /* ACPI processor id */ - u16 flags; /* MPS INTI flags */ - u8 lint; /* Local APIC LINT# */ -} PACKED; - - -/* - * ACPI 2.0 Generic Address Space definition. - */ -struct acpi_20_generic_address { - u8 address_space_id; - u8 register_bit_width; - u8 register_bit_offset; - u8 reserved; - u64 address; -} PACKED; - -/* - * HPET Description Table - */ -struct acpi_20_hpet { - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - u32 timer_block_id; - struct acpi_20_generic_address addr; - u8 hpet_number; - u16 min_tick; - u8 page_protect; -} PACKED; - -#define HPET_ID 0x000 -#define HPET_PERIOD 0x004 - -/* - * SRAT (NUMA topology description) table - */ - -#define SRAT_PROCESSOR 0 -#define SRAT_MEMORY 1 - -struct system_resource_affinity_table -{ - ACPI_TABLE_HEADER_DEF - u32 reserved1; - u32 reserved2[2]; -} PACKED; +#define PIIX4_ACPI_ENABLE 0xf1 +#define PIIX4_ACPI_DISABLE 0xf0 +#define PIIX4_GPE0_BLK 0xafe0 +#define PIIX4_GPE0_BLK_LEN 4
-struct srat_processor_affinity -{ - ACPI_SUB_HEADER_DEF - u8 proximity_lo; - u8 local_apic_id; - u32 flags; - u8 local_sapic_eid; - u8 proximity_hi[3]; - u32 reserved; -} PACKED; - -struct srat_memory_affinity -{ - ACPI_SUB_HEADER_DEF - u8 proximity[4]; - u16 reserved1; - u32 base_addr_low,base_addr_high; - u32 length_low,length_high; - u32 reserved2; - u32 flags; - u32 reserved3[2]; -} PACKED; - -#include "acpi-dsdt.hex" +#define PIIX4_PM_INTRRUPT 9 // irq 9
static void build_header(struct acpi_table_header *h, u32 sig, int len, u8 rev) @@ -229,13 +37,6 @@ build_header(struct acpi_table_header *h, u32 sig, int len, u8 rev) h->checksum -= checksum(h, len); }
-#define PIIX4_ACPI_ENABLE 0xf1 -#define PIIX4_ACPI_DISABLE 0xf0 -#define PIIX4_GPE0_BLK 0xafe0 -#define PIIX4_GPE0_BLK_LEN 4 - -#define PIIX4_PM_INTRRUPT 9 // irq 9 - static void piix4_fadt_init(struct pci_device *pci, void *arg) { struct fadt_descriptor_rev1 *fadt = arg; diff --git a/src/acpi.h b/src/acpi.h index 715d19d..996edcb 100644 --- a/src/acpi.h +++ b/src/acpi.h @@ -124,4 +124,205 @@ struct acpi_table_mcfg { struct acpi_mcfg_allocation allocation[0]; } PACKED;
+/****************************************************/ +/* ACPI tables init */ + +/* Table structure from Linux kernel (the ACPI tables are under the + BSD license) */ + +struct acpi_table_header /* ACPI common table header */ +{ + ACPI_TABLE_HEADER_DEF +} PACKED; + +/* + * ACPI 1.0 Root System Description Table (RSDT) + */ +#define RSDT_SIGNATURE 0x54445352 // RSDT +struct rsdt_descriptor_rev1 +{ + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + u32 table_offset_entry[0]; /* Array of pointers to other */ + /* ACPI tables */ +} PACKED; + +/* + * ACPI 1.0 Firmware ACPI Control Structure (FACS) + */ +#define FACS_SIGNATURE 0x53434146 // FACS +struct facs_descriptor_rev1 +{ + u32 signature; /* ACPI Signature */ + u32 length; /* Length of structure, in bytes */ + u32 hardware_signature; /* Hardware configuration signature */ + u32 firmware_waking_vector; /* ACPI OS waking vector */ + u32 global_lock; /* Global Lock */ + u32 S4bios_f : 1; /* Indicates if S4BIOS support is present */ + u32 reserved1 : 31; /* Must be 0 */ + u8 resverved3 [40]; /* Reserved - must be zero */ +} PACKED; + + +/* + * Differentiated System Description Table (DSDT) + */ +#define DSDT_SIGNATURE 0x54445344 // DSDT + +/* + * MADT values and structures + */ + +/* Values for MADT PCATCompat */ + +#define DUAL_PIC 0 +#define MULTIPLE_APIC 1 + + +/* Master MADT */ + +#define APIC_SIGNATURE 0x43495041 // APIC +struct multiple_apic_table +{ + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + u32 local_apic_address; /* Physical address of local APIC */ +#if 0 + u32 PCATcompat : 1; /* A one indicates system also has dual 8259s */ + u32 reserved1 : 31; +#else + u32 flags; +#endif +} PACKED; + + +/* Values for Type in APIC sub-headers */ + +#define APIC_PROCESSOR 0 +#define APIC_IO 1 +#define APIC_XRUPT_OVERRIDE 2 +#define APIC_NMI 3 +#define APIC_LOCAL_NMI 4 +#define APIC_ADDRESS_OVERRIDE 5 +#define APIC_IO_SAPIC 6 +#define APIC_LOCAL_SAPIC 7 +#define APIC_XRUPT_SOURCE 8 +#define APIC_RESERVED 9 /* 9 and greater are reserved */ + +/* + * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE) + */ +#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\ + u8 type; \ + u8 length; + +/* Sub-structures for MADT */ + +struct madt_processor_apic +{ + ACPI_SUB_HEADER_DEF + u8 processor_id; /* ACPI processor id */ + u8 local_apic_id; /* Processor's local APIC id */ +#if 0 + u32 processor_enabled: 1; /* Processor is usable if set */ + u32 reserved2 : 31; /* Reserved, must be zero */ +#else + u32 flags; +#endif +} PACKED; + +struct madt_io_apic +{ + ACPI_SUB_HEADER_DEF + u8 io_apic_id; /* I/O APIC ID */ + u8 reserved; /* Reserved - must be zero */ + u32 address; /* APIC physical address */ + u32 interrupt; /* Global system interrupt where INTI + * lines start */ +} PACKED; + +/* IRQs 5,9,10,11 */ +#define PCI_ISA_IRQ_MASK 0x0e20 + +struct madt_intsrcovr { + ACPI_SUB_HEADER_DEF + u8 bus; + u8 source; + u32 gsi; + u16 flags; +} PACKED; + +struct madt_local_nmi { + ACPI_SUB_HEADER_DEF + u8 processor_id; /* ACPI processor id */ + u16 flags; /* MPS INTI flags */ + u8 lint; /* Local APIC LINT# */ +} PACKED; + + +/* + * ACPI 2.0 Generic Address Space definition. + */ +struct acpi_20_generic_address { + u8 address_space_id; + u8 register_bit_width; + u8 register_bit_offset; + u8 reserved; + u64 address; +} PACKED; + +/* + * HPET Description Table + */ +struct acpi_20_hpet { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + u32 timer_block_id; + struct acpi_20_generic_address addr; + u8 hpet_number; + u16 min_tick; + u8 page_protect; +} PACKED; + +#define HPET_ID 0x000 +#define HPET_PERIOD 0x004 + +/* + * SRAT (NUMA topology description) table + */ + +#define SRAT_PROCESSOR 0 +#define SRAT_MEMORY 1 + +struct system_resource_affinity_table +{ + ACPI_TABLE_HEADER_DEF + u32 reserved1; + u32 reserved2[2]; +} PACKED; + +struct srat_processor_affinity +{ + ACPI_SUB_HEADER_DEF + u8 proximity_lo; + u8 local_apic_id; + u32 flags; + u8 local_sapic_eid; + u8 proximity_hi[3]; + u32 reserved; +} PACKED; + +struct srat_memory_affinity +{ + ACPI_SUB_HEADER_DEF + u8 proximity[4]; + u16 reserved1; + u32 base_addr_low,base_addr_high; + u32 length_low,length_high; + u32 reserved2; + u32 flags; + u32 reserved3[2]; +} PACKED; + +#include "acpi-dsdt.hex" + + + #endif // acpi.h
On Mon, Feb 04, 2013 at 10:27:59AM +0800, liguang wrote:
Signed-off-by: liguang lig.fnst@cn.fujitsu.com
Thanks. Some comments.
[...]
--- a/src/acpi.h +++ b/src/acpi.h
[...]
+#include "acpi-dsdt.hex"
Moving the acpi structure defines to the header is fine, but moving the DSDT code into the header is definitely not right.
-Kevin
在 2013-02-06三的 23:15 -0500,Kevin O'Connor写道:
On Mon, Feb 04, 2013 at 10:27:59AM +0800, liguang wrote:
Signed-off-by: liguang lig.fnst@cn.fujitsu.com
Thanks. Some comments.
[...]
--- a/src/acpi.h +++ b/src/acpi.h
[...]
+#include "acpi-dsdt.hex"
Moving the acpi structure defines to the header is fine, but moving the DSDT code into the header is definitely not right.
OK, will remove this include.
-Kevin
the old numa format got form fw_cfg is: number of nodes node id of cpu (array) node memory size (array)
now, format it like array of: apci_map, memory_size,
it has the advantage of simple and clear.
Signed-off-by: liguang lig.fnst@cn.fujitsu.com --- src/acpi.c | 57 ++++++++++++++++++++++++++++++++++++------------------- src/acpi.h | 5 ++++ src/paravirt.c | 2 +- src/paravirt.h | 1 + 4 files changed, 44 insertions(+), 21 deletions(-)
diff --git a/src/acpi.c b/src/acpi.c index fde37e5..7004a60 100644 --- a/src/acpi.c +++ b/src/acpi.c @@ -473,44 +473,57 @@ acpi_build_srat_memory(struct srat_memory_affinity *numamem, static void * build_srat(void) { - int nb_numa_nodes = qemu_cfg_get_numa_nodes(); + int nr_nodes = qemu_cfg_get_numa_nodes(); + int *node_cpu = malloc_tmp(sizeof(int));
- if (nb_numa_nodes == 0) + if (nr_nodes == 0) return NULL;
- u64 *numadata = malloc_tmphigh(sizeof(u64) * (MaxCountCPUs + nb_numa_nodes)); - if (!numadata) { + if (!node_cpu) + return NULL; + + struct srat_data *sd = malloc_tmp(sizeof(struct srat_data)*nr_nodes); + if (!sd) { warn_noalloc(); return NULL; }
- qemu_cfg_get_numa_data(numadata, MaxCountCPUs + nb_numa_nodes); + qemu_cfg_get_numa_data((u64 *)sd, nr_nodes);
struct system_resource_affinity_table *srat; int srat_size = sizeof(*srat) + sizeof(struct srat_processor_affinity) * MaxCountCPUs + - sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2); + sizeof(struct srat_memory_affinity) * (nr_nodes + 2);
srat = malloc_high(srat_size); if (!srat) { warn_noalloc(); - free(numadata); + free(srat); return NULL; }
memset(srat, 0, srat_size); - srat->reserved1=1; + srat->reserved1 = 1; struct srat_processor_affinity *core = (void*)(srat + 1); - int i; + int i, j; u64 curnode;
+ for (i = 0; i < nr_nodes; i++) { + if (sd[i].apic_map == 0) + continue; + for (j = 0; j < MaxCountCPUs; j++) { + if (sd[i].apic_map & 1 << j) + node_cpu[j] = i; + } + } + for (i = 0; i < MaxCountCPUs; ++i) { core->type = SRAT_PROCESSOR; core->length = sizeof(*core); core->local_apic_id = i; - curnode = *numadata++; + curnode = i; core->proximity_lo = curnode; - memset(core->proximity_hi, 0, 3); + memset(core->proximity_hi, 0, sizeof(core->proximity_hi)); core->local_sapic_eid = 0; if (apic_id_is_present(i)) core->flags = cpu_to_le32(1); @@ -527,15 +540,19 @@ build_srat(void) int slots = 0; u64 mem_len, mem_base, next_base = 0;
- acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1); - next_base = 1024 * 1024; +#define MEM_1K (1UL << 10) +#define MEM_1M (1UL << 20) +#define MEM_1G (1ULL << 30) + + acpi_build_srat_memory(numamem, 0, 640*MEM_1K, 0, 1); + next_base = MEM_1M; numamem++; slots++; - for (i = 1; i < nb_numa_nodes + 1; ++i) { + for (i = 1; i < nr_nodes + 1; ++i) { mem_base = next_base; - mem_len = *numadata++; + mem_len = sd[i].memory_size; if (i == 1) - mem_len -= 1024 * 1024; + mem_len -= MEM_1M; next_base = mem_base + mem_len;
/* Cut out the PCI hole */ @@ -546,22 +563,22 @@ build_srat(void) numamem++; slots++; } - mem_base = 1ULL << 32; + mem_base = MEM_1G; mem_len = next_base - RamSize; - next_base += (1ULL << 32) - RamSize; + next_base += MEM_1G - RamSize; } acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); numamem++; slots++; } - for (; slots < nb_numa_nodes + 2; slots++) { + for (; slots < nr_nodes + 2; slots++) { acpi_build_srat_memory(numamem, 0, 0, 0, 0); numamem++; }
build_header((void*)srat, SRAT_SIGNATURE, srat_size, 1);
- free(numadata); + free(sd); return srat; }
diff --git a/src/acpi.h b/src/acpi.h index 996edcb..bfa2c79 100644 --- a/src/acpi.h +++ b/src/acpi.h @@ -321,6 +321,11 @@ struct srat_memory_affinity u32 reserved3[2]; } PACKED;
+struct srat_data { + u64 apic_map; + u64 memory_size; +}; + #include "acpi-dsdt.hex"
diff --git a/src/paravirt.c b/src/paravirt.c index 4b5c441..e143ca7 100644 --- a/src/paravirt.c +++ b/src/paravirt.c @@ -291,7 +291,7 @@ void qemu_cfg_get_numa_data(u64 *data, int n) int i;
for (i = 0; i < n; i++) - qemu_cfg_read((u8*)(data + i), sizeof(u64)); + qemu_cfg_read((u8*)(data + i), sizeof(struct srat_data)); }
u16 qemu_cfg_get_max_cpus(void) diff --git a/src/paravirt.h b/src/paravirt.h index a284c41..2b25c4f 100644 --- a/src/paravirt.h +++ b/src/paravirt.h @@ -3,6 +3,7 @@
#include "config.h" // CONFIG_COREBOOT #include "util.h" +#include "acpi.h"
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It * should be used to determine that a VM is running under KVM.
On Mon, Feb 04, 2013 at 10:28:00AM +0800, liguang wrote:
the old numa format got form fw_cfg is: number of nodes node id of cpu (array) node memory size (array)
now, format it like array of: apci_map, memory_size,
it has the advantage of simple and clear.
With this change, will old versions of seabios work with new versions of qemu, and old versions of qemu work with new versions of seabios?
Also, can you provide a high-level summary of why this change is useful to an end-user?
-Kevin
Sorry for late reply
在 2013-02-06三的 23:18 -0500,Kevin O'Connor写道:
On Mon, Feb 04, 2013 at 10:28:00AM +0800, liguang wrote:
the old numa format got form fw_cfg is: number of nodes node id of cpu (array) node memory size (array)
now, format it like array of: apci_map, memory_size,
it has the advantage of simple and clear.
With this change, will old versions of seabios work with new versions of qemu, and old versions of qemu work with new versions of seabios?
may be not.
Also, can you provide a high-level summary of why this change is useful to an end-user?
This change have nothing to do with end-user, just for clear design.
-Kevin