Implementation of the FW CFG DMA interface.
When running a Linux guest on top of QEMU, using the -kernel options, this is the timing improvement for x86:
QEMU commit 090d0bf and SeaBIOS commit 2fc20dc QEMU startup time: .078 BIOS startup time: .060 Kernel setup time: .578 Total time: .716
QEMU with this patch series and SeaBIOS with this patch series QEMU startup time: .080 BIOS startup time: .039 Kernel setup time: .002 Total time: .121
QEMU startup time is the time between the start and the first kvm_entry. BIOS startup time is the time between the first kvm_entry and the start of function do_boot, in SeaBIOS. Kernel setup time is the time between the start of the function do_boot in SeaBIOS and the jump to the Linux kernel.
As you can see, both the BIOS (because of ACPI tables and other configurations) and the Linux kernel boot (because of the copy to memory) are greatly improved with this new interface.
Also, this new interface is an addon to the old interface. Both interfaces are compatible and interchangeable.
Changes from v1: - Take into account order of fields in the FWCfgDmaAccess structure - Check and change endianness of FWCfgDmaAccess fields - Change order of fields in the FWCfgDmaAccess structure - Add FW_CFG_DMA_CTL_SKIP feature for control field - Split FW_CFG_SIZE in QEMU - Make FW_CFG_ID a bitmap of features - Add 64 bit address support for the transfer. Trigger when writing the low address, and address is 0 by default and at the end of each transfer. - Align ports and addresses. - Preserve old fw_cfg_comb_valid behaviour in QEMU - Update documentation to reflect all these changes
Implement guest-side of the QEMU FW CFG DMA interface for x86
Marc Marí (2): Add QEMU fw_cfg DMA interface Boot Linux using QEMU fw_cfg DMA interface
src/boot.c | 26 ++++++++++ src/fw/paravirt.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++------- src/fw/paravirt.h | 60 ++++++++++++++++++++-- src/romlayout.S | 20 ++++++++ src/util.h | 1 + 5 files changed, 232 insertions(+), 22 deletions(-)
Add support for the new fw_cfg DMA interface. The protocol is explained in QEMU documentation.
Signed-off-by: Marc Marí markmb@redhat.com --- src/fw/paravirt.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/fw/paravirt.h | 25 +++++++++++++++++---- 2 files changed, 83 insertions(+), 7 deletions(-)
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index db22ae8..26af499 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,6 +23,7 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup +#include "stacks.h" // yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -30,6 +31,13 @@ u32 RamSize; u64 RamSizeOver4G; // Type of emulator platform. int PlatformRunningOn VARFSEG; +// cfg_dma enabled +int cfg_dma_enabled = 0; + +inline int qemu_cfg_dma_enabled(void) +{ + return cfg_dma_enabled; +}
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It * should be used to determine that a VM is running under KVM. @@ -199,16 +207,57 @@ qemu_cfg_select(u16 f) }
static void +qemu_cfg_dma_transfer(u64 address, u32 length, u32 control) +{ + QemuCfgDmaAccess access; + void *p = &access; + + *(u64 *)(p + offsetof(QemuCfgDmaAccess, address)) = + cpu_to_be64(address); + *(u32 *)(p + offsetof(QemuCfgDmaAccess, length)) = + cpu_to_be32(length); + *(u32 *)(p + offsetof(QemuCfgDmaAccess, control)) = + cpu_to_be32(control); + + barrier(); + + outl((u32)p, PORT_QEMU_CFG_DMA_ADDR_LOW); + + u32 len; + u32 ctl; + + do { + yield(); + len = be32_to_cpu(*(u32 *)(p + + offsetof(QemuCfgDmaAccess, length))); + ctl = be32_to_cpu(*(u32 *)(p + + offsetof(QemuCfgDmaAccess, control))); + } while(len != 0 && !(ctl & QEMU_CFG_DMA_CTL_ERROR)); +} + +static void qemu_cfg_read(void *buf, int len) { - insb(PORT_QEMU_CFG_DATA, buf, len); + if (qemu_cfg_dma_enabled()) { + qemu_cfg_dma_transfer((u64)(u32)buf, len, QEMU_CFG_DMA_CTL_READ); + } else { + insb(PORT_QEMU_CFG_DATA, buf, len); + } }
static void qemu_cfg_skip(int len) { - while (len--) - inb(PORT_QEMU_CFG_DATA); + if (len == 0) { + return; + } + + if (qemu_cfg_dma_enabled()) { + qemu_cfg_dma_transfer(0, len, QEMU_CFG_DMA_CTL_SKIP); + } else { + while (len--) + inb(PORT_QEMU_CFG_DATA); + } }
static void @@ -422,8 +471,18 @@ void qemu_cfg_init(void) for (i = 0; i < 4; i++) if (inb(PORT_QEMU_CFG_DATA) != sig[i]) return; + dprintf(1, "Found QEMU fw_cfg\n");
+ // Detect DMA interface. + u32 id; + qemu_cfg_read_entry(&id, QEMU_CFG_ID, sizeof(id)); + + if (id & QEMU_CFG_VERSION_DMA) { + dprintf(1, "QEMU fw_cfg DMA interface supported\n"); + cfg_dma_enabled = 1; + } + // Populate romfiles for legacy fw_cfg entries qemu_cfg_legacy();
diff --git a/src/fw/paravirt.h b/src/fw/paravirt.h index 95ffb92..5e5c61e 100644 --- a/src/fw/paravirt.h +++ b/src/fw/paravirt.h @@ -9,6 +9,12 @@ #define PF_XEN (1<<1) #define PF_KVM (1<<2)
+typedef struct QemuCfgDmaAccess { + u32 control; + u32 length; + u64 address; +} PACKED QemuCfgDmaAccess; + extern u32 RamSize; extern u64 RamSizeOver4G; extern int PlatformRunningOn; @@ -25,11 +31,22 @@ static inline int runningOnKVM(void) { }
// Common paravirt ports. -#define PORT_SMI_CMD 0x00b2 -#define PORT_SMI_STATUS 0x00b3 -#define PORT_QEMU_CFG_CTL 0x0510 -#define PORT_QEMU_CFG_DATA 0x0511 +#define PORT_SMI_CMD 0x00b2 +#define PORT_SMI_STATUS 0x00b3 +#define PORT_QEMU_CFG_CTL 0x0510 +#define PORT_QEMU_CFG_DATA 0x0511 +#define PORT_QEMU_CFG_DMA_ADDR_LOW 0x0514 +#define PORT_QEMU_CFG_DMA_ADDR_HIGH 0x0518 + +// QEMU_CFG_DMA_CONTROL bits +#define QEMU_CFG_DMA_CTL_ERROR 0x01 +#define QEMU_CFG_DMA_CTL_READ 0x02 +#define QEMU_CFG_DMA_CTL_SKIP 0x04 + +// QEMU_CFG_DMA ID bit +#define QEMU_CFG_VERSION_DMA 2
+int qemu_cfg_dma_enabled(void); void qemu_preinit(void); void qemu_platform_setup(void); void qemu_cfg_init(void);
On Mon, Aug 31, 2015 at 11:12:01AM +0200, Marc Marí wrote:
Add support for the new fw_cfg DMA interface. The protocol is explained in QEMU documentation.
Thanks for working on this.
Signed-off-by: Marc Marí markmb@redhat.com
src/fw/paravirt.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/fw/paravirt.h | 25 +++++++++++++++++---- 2 files changed, 83 insertions(+), 7 deletions(-)
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index db22ae8..26af499 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,6 +23,7 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup +#include "stacks.h" // yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -30,6 +31,13 @@ u32 RamSize; u64 RamSizeOver4G; // Type of emulator platform. int PlatformRunningOn VARFSEG; +// cfg_dma enabled +int cfg_dma_enabled = 0;
+inline int qemu_cfg_dma_enabled(void) +{
- return cfg_dma_enabled;
+}
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
- should be used to determine that a VM is running under KVM.
@@ -199,16 +207,57 @@ qemu_cfg_select(u16 f) }
static void +qemu_cfg_dma_transfer(u64 address, u32 length, u32 control) +{
- QemuCfgDmaAccess access;
- void *p = &access;
- *(u64 *)(p + offsetof(QemuCfgDmaAccess, address)) =
cpu_to_be64(address);
- *(u32 *)(p + offsetof(QemuCfgDmaAccess, length)) =
cpu_to_be32(length);
- *(u32 *)(p + offsetof(QemuCfgDmaAccess, control)) =
cpu_to_be32(control);
- barrier();
- outl((u32)p, PORT_QEMU_CFG_DMA_ADDR_LOW);
Unless I'm missing something, the above is the same as:
QemuCfgDmaAccess access; access.address = cpu_to_be64(address); access.length = cpu_to_be64(length); access.control = cpu_to_be64(control);
barrier(); outl((u32)access, PORT_QEMU_CFG_DMA_ADDR_LOW);
I'm not sure what the reason for the casts and offsetof() was, but I find them confusing.
- u32 len;
- u32 ctl;
- do {
yield();
len = be32_to_cpu(*(u32 *)(p +
offsetof(QemuCfgDmaAccess, length)));
ctl = be32_to_cpu(*(u32 *)(p +
offsetof(QemuCfgDmaAccess, control)));
- } while(len != 0 && !(ctl & QEMU_CFG_DMA_CTL_ERROR));
As mentioned in another email, we can't spin on "length or control" because it can lead to race conditions (we need to spin only on the field that QEMU will last update). Also, it's best not to yield() unless we really do need to wait. I think the following would be better:
while (be32_to_cpu(access.control) & ~QEMU_CFG_DMA_CTL_ERROR) yield();
+static void qemu_cfg_read(void *buf, int len) {
- insb(PORT_QEMU_CFG_DATA, buf, len);
- if (qemu_cfg_dma_enabled()) {
qemu_cfg_dma_transfer((u64)(u32)buf, len, QEMU_CFG_DMA_CTL_READ);
I would pass a "void *" to qemu_cfg_dma_transfer() and let qemu_cfg_dma_transfer() do whatever casts are necessary to make the address work.
-Kevin
On Mon, 31 Aug 2015 12:18:27 -0400 "Kevin O'Connor" kevin@koconnor.net wrote:
On Mon, Aug 31, 2015 at 11:12:01AM +0200, Marc Marí wrote:
Add support for the new fw_cfg DMA interface. The protocol is explained in QEMU documentation.
Thanks for working on this.
Signed-off-by: Marc Marí markmb@redhat.com
src/fw/paravirt.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/fw/paravirt.h | 25 +++++++++++++++++---- 2 files changed, 83 insertions(+), 7 deletions(-)
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index db22ae8..26af499 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,6 +23,7 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup +#include "stacks.h" // yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -30,6 +31,13 @@ u32 RamSize; u64 RamSizeOver4G; // Type of emulator platform. int PlatformRunningOn VARFSEG; +// cfg_dma enabled +int cfg_dma_enabled = 0;
+inline int qemu_cfg_dma_enabled(void) +{
- return cfg_dma_enabled;
+}
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
- should be used to determine that a VM is running under KVM.
@@ -199,16 +207,57 @@ qemu_cfg_select(u16 f) }
static void +qemu_cfg_dma_transfer(u64 address, u32 length, u32 control) +{
- QemuCfgDmaAccess access;
- void *p = &access;
- *(u64 *)(p + offsetof(QemuCfgDmaAccess, address)) =
cpu_to_be64(address);
- *(u32 *)(p + offsetof(QemuCfgDmaAccess, length)) =
cpu_to_be32(length);
- *(u32 *)(p + offsetof(QemuCfgDmaAccess, control)) =
cpu_to_be32(control);
- barrier();
- outl((u32)p, PORT_QEMU_CFG_DMA_ADDR_LOW);
Unless I'm missing something, the above is the same as:
QemuCfgDmaAccess access; access.address = cpu_to_be64(address); access.length = cpu_to_be64(length); access.control = cpu_to_be64(control); barrier(); outl((u32)access, PORT_QEMU_CFG_DMA_ADDR_LOW);
I'm not sure what the reason for the casts and offsetof() was, but I find them confusing.
The reason was that the host could write the elements in different order, or something like this. I now see it again and it doesn't make sense. So I'll put it in a simpler way.
- u32 len;
- u32 ctl;
- do {
yield();
len = be32_to_cpu(*(u32 *)(p +
offsetof(QemuCfgDmaAccess, length)));
ctl = be32_to_cpu(*(u32 *)(p +
offsetof(QemuCfgDmaAccess, control)));
- } while(len != 0 && !(ctl & QEMU_CFG_DMA_CTL_ERROR));
As mentioned in another email, we can't spin on "length or control" because it can lead to race conditions (we need to spin only on the field that QEMU will last update). Also, it's best not to yield() unless we really do need to wait. I think the following would be better:
while (be32_to_cpu(access.control) & ~QEMU_CFG_DMA_CTL_ERROR) yield();
I was leaving the len bit because it was in the original implementation. But it's true that taking it out simplifies everything.
Thanks Marc
+static void qemu_cfg_read(void *buf, int len) {
- insb(PORT_QEMU_CFG_DATA, buf, len);
- if (qemu_cfg_dma_enabled()) {
qemu_cfg_dma_transfer((u64)(u32)buf, len,
QEMU_CFG_DMA_CTL_READ);
I would pass a "void *" to qemu_cfg_dma_transfer() and let qemu_cfg_dma_transfer() do whatever casts are necessary to make the address work.
-Kevin
Reading Linux from the fw_cfg interface is faster using the DMA interface. For this reason, add a Linux loader that can benefit from this interface.
Signed-off-by: Marc Marí markmb@redhat.com --- src/boot.c | 26 +++++++++++++++++ src/fw/paravirt.c | 84 ++++++++++++++++++++++++++++++++++++++++++++----------- src/fw/paravirt.h | 35 +++++++++++++++++++++++ src/romlayout.S | 20 +++++++++++++ src/util.h | 1 + 5 files changed, 150 insertions(+), 16 deletions(-)
diff --git a/src/boot.c b/src/boot.c index e0f73a3..ba692db 100644 --- a/src/boot.c +++ b/src/boot.c @@ -280,6 +280,14 @@ boot_init(void) BootRetryTime = romfile_loadint("etc/boot-fail-wait", 60*1000);
loadBootOrder(); + + /* Check for booting directly from fw_cfg DMA. We assign the same boot + * priority of the linuxboot rom (if it exists). + */ + if (qemu_cfg_dma_enabled()) { + boot_add_dma("QEMU Kernel image", + bootprio_find_named_rom("genroms/linuxboot.bin", 0)); + } }
@@ -304,6 +312,7 @@ static struct hlist_head BootList VARVERIFY32INIT; #define IPL_TYPE_HARDDISK 0x02 #define IPL_TYPE_CDROM 0x03 #define IPL_TYPE_CBFS 0x20 +#define IPL_TYPE_DMA 0x21 #define IPL_TYPE_BEV 0x80 #define IPL_TYPE_BCV 0x81 #define IPL_TYPE_HALT 0xf0 @@ -398,6 +407,12 @@ boot_add_cbfs(void *data, const char *desc, int prio) bootentry_add(IPL_TYPE_CBFS, defPrio(prio, DEFAULT_PRIO), (u32)data, desc); }
+// Add a FW_CFG DMA boot +void +boot_add_dma(const char *desc, int prio) +{ + bootentry_add(IPL_TYPE_DMA, defPrio(prio, DEFAULT_PRIO), 0, desc); +}
/**************************************************************** * Keyboard calls @@ -684,6 +699,14 @@ boot_rom(u32 vector) call_boot_entry(so, 0); }
+// Boot from a linuxboot ROM when QEMU cfg is in DMA mode +static void +boot_linux_cfg_dma(void) +{ + printf("Booting Linux from fw_cfg...\n"); + qemu_cfg_dma_boot_linux(); +} + // Unable to find bootable device - warn user and eventually retry. static void boot_fail(void) @@ -734,6 +757,9 @@ do_boot(int seq_nr) case IPL_TYPE_BEV: boot_rom(ie->vector); break; + case IPL_TYPE_DMA: + boot_linux_cfg_dma(); + break; case IPL_TYPE_HALT: boot_fail(); break; diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 26af499..797d8ba 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,7 +23,8 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup -#include "stacks.h" // yield +#include "bregs.h" // struct bregs +#include "stacks.h" // farcall16big, yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -185,21 +186,6 @@ qemu_platform_setup(void) * QEMU firmware config (fw_cfg) interface ****************************************************************/
-// List of QEMU fw_cfg entries. DO NOT ADD MORE. (All new content -// should be passed via the fw_cfg "file" interface.) -#define QEMU_CFG_SIGNATURE 0x00 -#define QEMU_CFG_ID 0x01 -#define QEMU_CFG_UUID 0x02 -#define QEMU_CFG_NUMA 0x0d -#define QEMU_CFG_BOOT_MENU 0x0e -#define QEMU_CFG_MAX_CPUS 0x0f -#define QEMU_CFG_FILE_DIR 0x19 -#define QEMU_CFG_ARCH_LOCAL 0x8000 -#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) -#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) -#define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2) -#define QEMU_CFG_E820_TABLE (QEMU_CFG_ARCH_LOCAL + 3) - static void qemu_cfg_select(u16 f) { @@ -505,3 +491,67 @@ void qemu_cfg_init(void) dprintf(1, "Moving pm_base to 0x%x\n", acpi_pm_base); } } + +void qemu_cfg_dma_boot_linux(void) +{ + dprintf(1, "Loading kernel\n"); + void *setup_addr; + u32 setup_size; + qemu_cfg_read_entry(&setup_addr, QEMU_CFG_SETUP_ADDR, 4); + qemu_cfg_read_entry(&setup_size, QEMU_CFG_SETUP_SIZE, 4); + qemu_cfg_read_entry(setup_addr, QEMU_CFG_SETUP_DATA, setup_size); + + if (readl(setup_addr + 0x202) != 0x53726448) { + dprintf(1, "Not valid kernel\n"); + return; + } + + u16 protocol = readw(setup_addr + 0x206); + if (protocol < 0x203) { + dprintf(1, "Old kernel (v %x) not supported\n", protocol); + return; + } + + void *kernel_addr; + u32 kernel_size; + + qemu_cfg_read_entry(&kernel_addr, QEMU_CFG_KERNEL_ADDR, 4); + qemu_cfg_read_entry(&kernel_size, QEMU_CFG_KERNEL_SIZE, 4); + qemu_cfg_read_entry(kernel_addr, QEMU_CFG_KERNEL_DATA, kernel_size); + + void *cmdline_addr; + u32 cmdline_size; + qemu_cfg_read_entry(&cmdline_addr, QEMU_CFG_CMDLINE_ADDR, 4); + qemu_cfg_read_entry(&cmdline_size, QEMU_CFG_CMDLINE_SIZE, 4); + if (cmdline_size) { + qemu_cfg_read_entry(cmdline_addr, QEMU_CFG_CMDLINE_DATA, cmdline_size); + } + + void *initrd_addr; + u32 initrd_size; + qemu_cfg_read_entry(&initrd_addr, QEMU_CFG_INITRD_ADDR, 4); + qemu_cfg_read_entry(&initrd_size, QEMU_CFG_INITRD_SIZE, 4); + if (initrd_size) { + qemu_cfg_read_entry(initrd_addr, QEMU_CFG_INITRD_DATA, initrd_size); + } + + // Last configurations + writel(setup_addr + 0x228, (u32)cmdline_addr); + writeb(setup_addr + 0x210, 0xB0); + writeb(setup_addr + 0x211, readb(setup_addr + 0x211) | 0x80); + writew(setup_addr + 0x224, cmdline_addr - setup_addr - 0x200); + writel(setup_addr + 0x218, (u32)initrd_addr); + writel(setup_addr + 0x21c, initrd_size); + + dprintf(1, "Jumping to kernel %d@%x %d@%x %d@%x %d@%x\n" + , setup_size, (u32)setup_addr, cmdline_size, (u32)cmdline_addr + , kernel_size, (u32)kernel_addr, initrd_size, (u32)initrd_addr); + struct bregs br; + memset(&br, 0, sizeof(br)); + extern void kernel_stub(void); + br.ebx = (u32)setup_addr >> 4; + br.edx = (u32)cmdline_addr - (u32)setup_addr - 16; + br.code = SEGOFF(SEG_BIOS, (u32)kernel_stub - BUILD_BIOS_ADDR); + + farcall16big(&br); +} diff --git a/src/fw/paravirt.h b/src/fw/paravirt.h index 5e5c61e..a9674c0 100644 --- a/src/fw/paravirt.h +++ b/src/fw/paravirt.h @@ -46,9 +46,44 @@ static inline int runningOnKVM(void) { // QEMU_CFG_DMA ID bit #define QEMU_CFG_VERSION_DMA 2
+// List of QEMU fw_cfg entries. DO NOT ADD MORE. (All new content +// should be passed via the fw_cfg "file" interface.) +#define QEMU_CFG_SIGNATURE 0x00 +#define QEMU_CFG_ID 0x01 +#define QEMU_CFG_UUID 0x02 +#define QEMU_CFG_RAM_SIZE 0x03 +#define QEMU_CFG_NOGRAPHIC 0x04 +#define QEMU_CFG_NB_CPUS 0x05 +#define QEMU_CFG_MACHINE_ID 0x06 +#define QEMU_CFG_KERNEL_ADDR 0x07 +#define QEMU_CFG_KERNEL_SIZE 0x08 +#define QEMU_CFG_KERNEL_CMDLINE 0x09 +#define QEMU_CFG_INITRD_ADDR 0x0a +#define QEMU_CFG_INITRD_SIZE 0x0b +#define QEMU_CFG_BOOT_DEVICE 0x0c +#define QEMU_CFG_NUMA 0x0d +#define QEMU_CFG_BOOT_MENU 0x0e +#define QEMU_CFG_MAX_CPUS 0x0f +#define QEMU_CFG_KERNEL_ENTRY 0x10 +#define QEMU_CFG_KERNEL_DATA 0x11 +#define QEMU_CFG_INITRD_DATA 0x12 +#define QEMU_CFG_CMDLINE_ADDR 0x13 +#define QEMU_CFG_CMDLINE_SIZE 0x14 +#define QEMU_CFG_CMDLINE_DATA 0x15 +#define QEMU_CFG_SETUP_ADDR 0x16 +#define QEMU_CFG_SETUP_SIZE 0x17 +#define QEMU_CFG_SETUP_DATA 0x18 +#define QEMU_CFG_FILE_DIR 0x19 +#define QEMU_CFG_ARCH_LOCAL 0x8000 +#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) +#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) +#define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2) +#define QEMU_CFG_E820_TABLE (QEMU_CFG_ARCH_LOCAL + 3) + int qemu_cfg_dma_enabled(void); void qemu_preinit(void); void qemu_platform_setup(void); void qemu_cfg_init(void); +void qemu_cfg_dma_boot_linux(void);
#endif diff --git a/src/romlayout.S b/src/romlayout.S index 7938e22..1c641c2 100644 --- a/src/romlayout.S +++ b/src/romlayout.S @@ -196,6 +196,26 @@ __farcall16: IRQ_TRAMPOLINE 1c IRQ_TRAMPOLINE 4a
+ DECLFUNC kernel_stub +kernel_stub: + movw %bx, %ds + movw %bx, %es + movw %bx, %fs + movw %bx, %gs + movw %bx, %ss + movl %edx, %esp + addw $0x20, %bx + pushw %bx // push CS + pushw $0 // push IP + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %edi, %edi + xorl %esi, %esi + xorl %ebp, %ebp + lretw +
/**************************************************************** * Misc. entry points. diff --git a/src/util.h b/src/util.h index 327abeb..f99cdb5 100644 --- a/src/util.h +++ b/src/util.h @@ -25,6 +25,7 @@ void boot_add_floppy(struct drive_s *drive_g, const char *desc, int prio); void boot_add_hd(struct drive_s *drive_g, const char *desc, int prio); void boot_add_cd(struct drive_s *drive_g, const char *desc, int prio); void boot_add_cbfs(void *data, const char *desc, int prio); +void boot_add_dma(const char *desc, int prio); void interactive_bootmenu(void); void bcv_prepboot(void); struct pci_device;
On Mon, Aug 31, 2015 at 11:12:02AM +0200, Marc Marí wrote:
Reading Linux from the fw_cfg interface is faster using the DMA interface. For this reason, add a Linux loader that can benefit from this interface.
Signed-off-by: Marc Marí markmb@redhat.com
src/boot.c | 26 +++++++++++++++++ src/fw/paravirt.c | 84 ++++++++++++++++++++++++++++++++++++++++++++----------- src/fw/paravirt.h | 35 +++++++++++++++++++++++ src/romlayout.S | 20 +++++++++++++ src/util.h | 1 + 5 files changed, 150 insertions(+), 16 deletions(-)
diff --git a/src/boot.c b/src/boot.c index e0f73a3..ba692db 100644 --- a/src/boot.c +++ b/src/boot.c @@ -280,6 +280,14 @@ boot_init(void) BootRetryTime = romfile_loadint("etc/boot-fail-wait", 60*1000);
loadBootOrder();
- /* Check for booting directly from fw_cfg DMA. We assign the same boot
* priority of the linuxboot rom (if it exists).
*/
- if (qemu_cfg_dma_enabled()) {
boot_add_dma("QEMU Kernel image",
bootprio_find_named_rom("genroms/linuxboot.bin", 0));
- }
boot_init() isn't the place for this - add a new function (eg, qemu_vmlinux_setup() ) to paravirt.c and call it from post.c:device_hardware_setup().
Overloading "genroms/linuxboot.bin" is fine for testing, but I think ultimately a new name (eg, "vmlinux") should be made and QEMU should be enhanced to pass that new name in the bootorder file.
@@ -304,6 +312,7 @@ static struct hlist_head BootList VARVERIFY32INIT; #define IPL_TYPE_HARDDISK 0x02 #define IPL_TYPE_CDROM 0x03 #define IPL_TYPE_CBFS 0x20 +#define IPL_TYPE_DMA 0x21
How about "IPL_TYPE_QEMU_VMLINUX".
#define IPL_TYPE_BEV 0x80 #define IPL_TYPE_BCV 0x81 #define IPL_TYPE_HALT 0xf0 @@ -398,6 +407,12 @@ boot_add_cbfs(void *data, const char *desc, int prio) bootentry_add(IPL_TYPE_CBFS, defPrio(prio, DEFAULT_PRIO), (u32)data, desc); }
+// Add a FW_CFG DMA boot +void +boot_add_dma(const char *desc, int prio) +{
- bootentry_add(IPL_TYPE_DMA, defPrio(prio, DEFAULT_PRIO), 0, desc);
+}
boot_add_qemu_vmlinux()
/****************************************************************
- Keyboard calls
@@ -684,6 +699,14 @@ boot_rom(u32 vector) call_boot_entry(so, 0); }
+// Boot from a linuxboot ROM when QEMU cfg is in DMA mode +static void +boot_linux_cfg_dma(void) +{
- printf("Booting Linux from fw_cfg...\n");
- qemu_cfg_dma_boot_linux();
+}
// Unable to find bootable device - warn user and eventually retry. static void boot_fail(void) @@ -734,6 +757,9 @@ do_boot(int seq_nr) case IPL_TYPE_BEV: boot_rom(ie->vector); break;
- case IPL_TYPE_DMA:
boot_linux_cfg_dma();
case IPL_TYPE_HALT: boot_fail(); break;break;
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 26af499..797d8ba 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,7 +23,8 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup -#include "stacks.h" // yield +#include "bregs.h" // struct bregs +#include "stacks.h" // farcall16big, yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -185,21 +186,6 @@ qemu_platform_setup(void)
- QEMU firmware config (fw_cfg) interface
****************************************************************/
-// List of QEMU fw_cfg entries. DO NOT ADD MORE. (All new content -// should be passed via the fw_cfg "file" interface.) -#define QEMU_CFG_SIGNATURE 0x00 -#define QEMU_CFG_ID 0x01 -#define QEMU_CFG_UUID 0x02 -#define QEMU_CFG_NUMA 0x0d -#define QEMU_CFG_BOOT_MENU 0x0e -#define QEMU_CFG_MAX_CPUS 0x0f -#define QEMU_CFG_FILE_DIR 0x19 -#define QEMU_CFG_ARCH_LOCAL 0x8000 -#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) -#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) -#define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2) -#define QEMU_CFG_E820_TABLE (QEMU_CFG_ARCH_LOCAL + 3)
static void qemu_cfg_select(u16 f) { @@ -505,3 +491,67 @@ void qemu_cfg_init(void) dprintf(1, "Moving pm_base to 0x%x\n", acpi_pm_base); } }
+void qemu_cfg_dma_boot_linux(void) +{
- dprintf(1, "Loading kernel\n");
- void *setup_addr;
- u32 setup_size;
- qemu_cfg_read_entry(&setup_addr, QEMU_CFG_SETUP_ADDR, 4);
- qemu_cfg_read_entry(&setup_size, QEMU_CFG_SETUP_SIZE, 4);
- qemu_cfg_read_entry(setup_addr, QEMU_CFG_SETUP_DATA, setup_size);
- if (readl(setup_addr + 0x202) != 0x53726448) {
dprintf(1, "Not valid kernel\n");
return;
- }
- u16 protocol = readw(setup_addr + 0x206);
- if (protocol < 0x203) {
dprintf(1, "Old kernel (v %x) not supported\n", protocol);
return;
- }
- void *kernel_addr;
- u32 kernel_size;
- qemu_cfg_read_entry(&kernel_addr, QEMU_CFG_KERNEL_ADDR, 4);
- qemu_cfg_read_entry(&kernel_size, QEMU_CFG_KERNEL_SIZE, 4);
- qemu_cfg_read_entry(kernel_addr, QEMU_CFG_KERNEL_DATA, kernel_size);
- void *cmdline_addr;
- u32 cmdline_size;
- qemu_cfg_read_entry(&cmdline_addr, QEMU_CFG_CMDLINE_ADDR, 4);
- qemu_cfg_read_entry(&cmdline_size, QEMU_CFG_CMDLINE_SIZE, 4);
- if (cmdline_size) {
qemu_cfg_read_entry(cmdline_addr, QEMU_CFG_CMDLINE_DATA, cmdline_size);
- }
- void *initrd_addr;
- u32 initrd_size;
- qemu_cfg_read_entry(&initrd_addr, QEMU_CFG_INITRD_ADDR, 4);
- qemu_cfg_read_entry(&initrd_size, QEMU_CFG_INITRD_SIZE, 4);
- if (initrd_size) {
qemu_cfg_read_entry(initrd_addr, QEMU_CFG_INITRD_DATA, initrd_size);
- }
- // Last configurations
- writel(setup_addr + 0x228, (u32)cmdline_addr);
- writeb(setup_addr + 0x210, 0xB0);
- writeb(setup_addr + 0x211, readb(setup_addr + 0x211) | 0x80);
- writew(setup_addr + 0x224, cmdline_addr - setup_addr - 0x200);
- writel(setup_addr + 0x218, (u32)initrd_addr);
- writel(setup_addr + 0x21c, initrd_size);
Are these updates necessary when using the existing QEMU vmlinux fw_cfg entries? I thought QEMU did this on behalf of the firmware, but maybe I missed something.
-Kevin
On Mon, 31 Aug 2015 12:46:57 -0400 "Kevin O'Connor" kevin@koconnor.net wrote:
On Mon, Aug 31, 2015 at 11:12:02AM +0200, Marc Marí wrote:
Reading Linux from the fw_cfg interface is faster using the DMA interface. For this reason, add a Linux loader that can benefit from this interface.
Signed-off-by: Marc Marí markmb@redhat.com
src/boot.c | 26 +++++++++++++++++ src/fw/paravirt.c | 84 ++++++++++++++++++++++++++++++++++++++++++++----------- src/fw/paravirt.h | 35 +++++++++++++++++++++++ src/romlayout.S | 20 +++++++++++++ src/util.h | 1 + 5 files changed, 150 insertions(+), 16 deletions(-)
diff --git a/src/boot.c b/src/boot.c index e0f73a3..ba692db 100644 --- a/src/boot.c +++ b/src/boot.c @@ -280,6 +280,14 @@ boot_init(void) BootRetryTime = romfile_loadint("etc/boot-fail-wait", 60*1000);
loadBootOrder();
- /* Check for booting directly from fw_cfg DMA. We assign the
same boot
* priority of the linuxboot rom (if it exists).
*/
- if (qemu_cfg_dma_enabled()) {
boot_add_dma("QEMU Kernel image",
bootprio_find_named_rom("genroms/linuxboot.bin", 0));
- }
boot_init() isn't the place for this - add a new function (eg, qemu_vmlinux_setup() ) to paravirt.c and call it from post.c:device_hardware_setup().
Overloading "genroms/linuxboot.bin" is fine for testing, but I think ultimately a new name (eg, "vmlinux") should be made and QEMU should be enhanced to pass that new name in the bootorder file.
@@ -304,6 +312,7 @@ static struct hlist_head BootList VARVERIFY32INIT; #define IPL_TYPE_HARDDISK 0x02 #define IPL_TYPE_CDROM 0x03 #define IPL_TYPE_CBFS 0x20 +#define IPL_TYPE_DMA 0x21
How about "IPL_TYPE_QEMU_VMLINUX".
#define IPL_TYPE_BEV 0x80 #define IPL_TYPE_BCV 0x81 #define IPL_TYPE_HALT 0xf0 @@ -398,6 +407,12 @@ boot_add_cbfs(void *data, const char *desc, int prio) bootentry_add(IPL_TYPE_CBFS, defPrio(prio, DEFAULT_PRIO), (u32)data, desc); }
+// Add a FW_CFG DMA boot +void +boot_add_dma(const char *desc, int prio) +{
- bootentry_add(IPL_TYPE_DMA, defPrio(prio, DEFAULT_PRIO), 0,
desc); +}
boot_add_qemu_vmlinux()
/****************************************************************
- Keyboard calls
@@ -684,6 +699,14 @@ boot_rom(u32 vector) call_boot_entry(so, 0); }
+// Boot from a linuxboot ROM when QEMU cfg is in DMA mode +static void +boot_linux_cfg_dma(void) +{
- printf("Booting Linux from fw_cfg...\n");
- qemu_cfg_dma_boot_linux();
+}
// Unable to find bootable device - warn user and eventually retry. static void boot_fail(void) @@ -734,6 +757,9 @@ do_boot(int seq_nr) case IPL_TYPE_BEV: boot_rom(ie->vector); break;
- case IPL_TYPE_DMA:
boot_linux_cfg_dma();
case IPL_TYPE_HALT: boot_fail(); break;break;
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 26af499..797d8ba 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,7 +23,8 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup -#include "stacks.h" // yield +#include "bregs.h" // struct bregs +#include "stacks.h" // farcall16big, yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -185,21 +186,6 @@ qemu_platform_setup(void)
- QEMU firmware config (fw_cfg) interface
****************************************************************/
-// List of QEMU fw_cfg entries. DO NOT ADD MORE. (All new content -// should be passed via the fw_cfg "file" interface.) -#define QEMU_CFG_SIGNATURE 0x00 -#define QEMU_CFG_ID 0x01 -#define QEMU_CFG_UUID 0x02 -#define QEMU_CFG_NUMA 0x0d -#define QEMU_CFG_BOOT_MENU 0x0e -#define QEMU_CFG_MAX_CPUS 0x0f -#define QEMU_CFG_FILE_DIR 0x19 -#define QEMU_CFG_ARCH_LOCAL 0x8000 -#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) -#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) -#define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2) -#define QEMU_CFG_E820_TABLE (QEMU_CFG_ARCH_LOCAL + 3)
static void qemu_cfg_select(u16 f) { @@ -505,3 +491,67 @@ void qemu_cfg_init(void) dprintf(1, "Moving pm_base to 0x%x\n", acpi_pm_base); } }
+void qemu_cfg_dma_boot_linux(void) +{
- dprintf(1, "Loading kernel\n");
- void *setup_addr;
- u32 setup_size;
- qemu_cfg_read_entry(&setup_addr, QEMU_CFG_SETUP_ADDR, 4);
- qemu_cfg_read_entry(&setup_size, QEMU_CFG_SETUP_SIZE, 4);
- qemu_cfg_read_entry(setup_addr, QEMU_CFG_SETUP_DATA,
setup_size); +
- if (readl(setup_addr + 0x202) != 0x53726448) {
dprintf(1, "Not valid kernel\n");
return;
- }
- u16 protocol = readw(setup_addr + 0x206);
- if (protocol < 0x203) {
dprintf(1, "Old kernel (v %x) not supported\n", protocol);
return;
- }
- void *kernel_addr;
- u32 kernel_size;
- qemu_cfg_read_entry(&kernel_addr, QEMU_CFG_KERNEL_ADDR, 4);
- qemu_cfg_read_entry(&kernel_size, QEMU_CFG_KERNEL_SIZE, 4);
- qemu_cfg_read_entry(kernel_addr, QEMU_CFG_KERNEL_DATA,
kernel_size); +
- void *cmdline_addr;
- u32 cmdline_size;
- qemu_cfg_read_entry(&cmdline_addr, QEMU_CFG_CMDLINE_ADDR, 4);
- qemu_cfg_read_entry(&cmdline_size, QEMU_CFG_CMDLINE_SIZE, 4);
- if (cmdline_size) {
qemu_cfg_read_entry(cmdline_addr, QEMU_CFG_CMDLINE_DATA,
cmdline_size);
- }
- void *initrd_addr;
- u32 initrd_size;
- qemu_cfg_read_entry(&initrd_addr, QEMU_CFG_INITRD_ADDR, 4);
- qemu_cfg_read_entry(&initrd_size, QEMU_CFG_INITRD_SIZE, 4);
- if (initrd_size) {
qemu_cfg_read_entry(initrd_addr, QEMU_CFG_INITRD_DATA,
initrd_size);
- }
- // Last configurations
- writel(setup_addr + 0x228, (u32)cmdline_addr);
- writeb(setup_addr + 0x210, 0xB0);
- writeb(setup_addr + 0x211, readb(setup_addr + 0x211) | 0x80);
- writew(setup_addr + 0x224, cmdline_addr - setup_addr - 0x200);
- writel(setup_addr + 0x218, (u32)initrd_addr);
- writel(setup_addr + 0x21c, initrd_size);
Are these updates necessary when using the existing QEMU vmlinux fw_cfg entries? I thought QEMU did this on behalf of the firmware, but maybe I missed something.
In QEMU, this is done in the linuxboot image, the one that I'm overloading. But this image is written in assembly. And using the DMA interface in assembly can get really tricky with all this memory management. That's why I rewrote the boot in C here.
Thanks Marc
On Mon, Aug 31, 2015 at 07:10:37PM +0200, Marc Marí wrote:
On Mon, 31 Aug 2015 12:46:57 -0400 "Kevin O'Connor" kevin@koconnor.net wrote:
On Mon, Aug 31, 2015 at 11:12:02AM +0200, Marc Marí wrote:
Reading Linux from the fw_cfg interface is faster using the DMA interface. For this reason, add a Linux loader that can benefit from this interface.
Signed-off-by: Marc Marí markmb@redhat.com
src/boot.c | 26 +++++++++++++++++ src/fw/paravirt.c | 84 ++++++++++++++++++++++++++++++++++++++++++++----------- src/fw/paravirt.h | 35 +++++++++++++++++++++++ src/romlayout.S | 20 +++++++++++++ src/util.h | 1 + 5 files changed, 150 insertions(+), 16 deletions(-)
diff --git a/src/boot.c b/src/boot.c index e0f73a3..ba692db 100644 --- a/src/boot.c +++ b/src/boot.c @@ -280,6 +280,14 @@ boot_init(void) BootRetryTime = romfile_loadint("etc/boot-fail-wait", 60*1000);
loadBootOrder();
- /* Check for booting directly from fw_cfg DMA. We assign the
same boot
* priority of the linuxboot rom (if it exists).
*/
- if (qemu_cfg_dma_enabled()) {
boot_add_dma("QEMU Kernel image",
bootprio_find_named_rom("genroms/linuxboot.bin", 0));
- }
boot_init() isn't the place for this - add a new function (eg, qemu_vmlinux_setup() ) to paravirt.c and call it from post.c:device_hardware_setup().
Overloading "genroms/linuxboot.bin" is fine for testing, but I think ultimately a new name (eg, "vmlinux") should be made and QEMU should be enhanced to pass that new name in the bootorder file.
@@ -304,6 +312,7 @@ static struct hlist_head BootList VARVERIFY32INIT; #define IPL_TYPE_HARDDISK 0x02 #define IPL_TYPE_CDROM 0x03 #define IPL_TYPE_CBFS 0x20 +#define IPL_TYPE_DMA 0x21
How about "IPL_TYPE_QEMU_VMLINUX".
#define IPL_TYPE_BEV 0x80 #define IPL_TYPE_BCV 0x81 #define IPL_TYPE_HALT 0xf0 @@ -398,6 +407,12 @@ boot_add_cbfs(void *data, const char *desc, int prio) bootentry_add(IPL_TYPE_CBFS, defPrio(prio, DEFAULT_PRIO), (u32)data, desc); }
+// Add a FW_CFG DMA boot +void +boot_add_dma(const char *desc, int prio) +{
- bootentry_add(IPL_TYPE_DMA, defPrio(prio, DEFAULT_PRIO), 0,
desc); +}
boot_add_qemu_vmlinux()
/****************************************************************
- Keyboard calls
@@ -684,6 +699,14 @@ boot_rom(u32 vector) call_boot_entry(so, 0); }
+// Boot from a linuxboot ROM when QEMU cfg is in DMA mode +static void +boot_linux_cfg_dma(void) +{
- printf("Booting Linux from fw_cfg...\n");
- qemu_cfg_dma_boot_linux();
+}
// Unable to find bootable device - warn user and eventually retry. static void boot_fail(void) @@ -734,6 +757,9 @@ do_boot(int seq_nr) case IPL_TYPE_BEV: boot_rom(ie->vector); break;
- case IPL_TYPE_DMA:
boot_linux_cfg_dma();
case IPL_TYPE_HALT: boot_fail(); break;break;
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 26af499..797d8ba 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,7 +23,8 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup -#include "stacks.h" // yield +#include "bregs.h" // struct bregs +#include "stacks.h" // farcall16big, yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -185,21 +186,6 @@ qemu_platform_setup(void)
- QEMU firmware config (fw_cfg) interface
****************************************************************/
-// List of QEMU fw_cfg entries. DO NOT ADD MORE. (All new content -// should be passed via the fw_cfg "file" interface.) -#define QEMU_CFG_SIGNATURE 0x00 -#define QEMU_CFG_ID 0x01 -#define QEMU_CFG_UUID 0x02 -#define QEMU_CFG_NUMA 0x0d -#define QEMU_CFG_BOOT_MENU 0x0e -#define QEMU_CFG_MAX_CPUS 0x0f -#define QEMU_CFG_FILE_DIR 0x19 -#define QEMU_CFG_ARCH_LOCAL 0x8000 -#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) -#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) -#define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2) -#define QEMU_CFG_E820_TABLE (QEMU_CFG_ARCH_LOCAL + 3)
static void qemu_cfg_select(u16 f) { @@ -505,3 +491,67 @@ void qemu_cfg_init(void) dprintf(1, "Moving pm_base to 0x%x\n", acpi_pm_base); } }
+void qemu_cfg_dma_boot_linux(void) +{
- dprintf(1, "Loading kernel\n");
- void *setup_addr;
- u32 setup_size;
- qemu_cfg_read_entry(&setup_addr, QEMU_CFG_SETUP_ADDR, 4);
- qemu_cfg_read_entry(&setup_size, QEMU_CFG_SETUP_SIZE, 4);
- qemu_cfg_read_entry(setup_addr, QEMU_CFG_SETUP_DATA,
setup_size); +
- if (readl(setup_addr + 0x202) != 0x53726448) {
dprintf(1, "Not valid kernel\n");
return;
- }
- u16 protocol = readw(setup_addr + 0x206);
- if (protocol < 0x203) {
dprintf(1, "Old kernel (v %x) not supported\n", protocol);
return;
- }
- void *kernel_addr;
- u32 kernel_size;
- qemu_cfg_read_entry(&kernel_addr, QEMU_CFG_KERNEL_ADDR, 4);
- qemu_cfg_read_entry(&kernel_size, QEMU_CFG_KERNEL_SIZE, 4);
- qemu_cfg_read_entry(kernel_addr, QEMU_CFG_KERNEL_DATA,
kernel_size); +
- void *cmdline_addr;
- u32 cmdline_size;
- qemu_cfg_read_entry(&cmdline_addr, QEMU_CFG_CMDLINE_ADDR, 4);
- qemu_cfg_read_entry(&cmdline_size, QEMU_CFG_CMDLINE_SIZE, 4);
- if (cmdline_size) {
qemu_cfg_read_entry(cmdline_addr, QEMU_CFG_CMDLINE_DATA,
cmdline_size);
- }
- void *initrd_addr;
- u32 initrd_size;
- qemu_cfg_read_entry(&initrd_addr, QEMU_CFG_INITRD_ADDR, 4);
- qemu_cfg_read_entry(&initrd_size, QEMU_CFG_INITRD_SIZE, 4);
- if (initrd_size) {
qemu_cfg_read_entry(initrd_addr, QEMU_CFG_INITRD_DATA,
initrd_size);
- }
- // Last configurations
- writel(setup_addr + 0x228, (u32)cmdline_addr);
- writeb(setup_addr + 0x210, 0xB0);
- writeb(setup_addr + 0x211, readb(setup_addr + 0x211) | 0x80);
- writew(setup_addr + 0x224, cmdline_addr - setup_addr - 0x200);
- writel(setup_addr + 0x218, (u32)initrd_addr);
- writel(setup_addr + 0x21c, initrd_size);
Are these updates necessary when using the existing QEMU vmlinux fw_cfg entries? I thought QEMU did this on behalf of the firmware, but maybe I missed something.
In QEMU, this is done in the linuxboot image, the one that I'm overloading. But this image is written in assembly. And using the DMA interface in assembly can get really tricky with all this memory management. That's why I rewrote the boot in C here.
Yes, and that makes sense.
However, the checks in the new qemu_cfg_dma_boot_linux() do not match the checks in QEMU's pc-bios/optionrom/linuxboot.S . So, which is right?
-Kevin
On Mon, 31 Aug 2015 14:07:28 -0400 "Kevin O'Connor" kevin@koconnor.net wrote:
On Mon, Aug 31, 2015 at 07:10:37PM +0200, Marc Marí wrote:
On Mon, 31 Aug 2015 12:46:57 -0400 "Kevin O'Connor" kevin@koconnor.net wrote:
On Mon, Aug 31, 2015 at 11:12:02AM +0200, Marc Marí wrote:
Reading Linux from the fw_cfg interface is faster using the DMA interface. For this reason, add a Linux loader that can benefit from this interface.
Signed-off-by: Marc Marí markmb@redhat.com
src/boot.c | 26 +++++++++++++++++ src/fw/paravirt.c | 84 ++++++++++++++++++++++++++++++++++++++++++++----------- src/fw/paravirt.h | 35 +++++++++++++++++++++++ src/romlayout.S | 20 +++++++++++++ src/util.h | 1 + 5 files changed, 150 insertions(+), 16 deletions(-)
diff --git a/src/boot.c b/src/boot.c index e0f73a3..ba692db 100644 --- a/src/boot.c +++ b/src/boot.c @@ -280,6 +280,14 @@ boot_init(void) BootRetryTime = romfile_loadint("etc/boot-fail-wait", 60*1000); loadBootOrder();
- /* Check for booting directly from fw_cfg DMA. We assign
the same boot
* priority of the linuxboot rom (if it exists).
*/
- if (qemu_cfg_dma_enabled()) {
boot_add_dma("QEMU Kernel image",
bootprio_find_named_rom("genroms/linuxboot.bin", 0));
- }
boot_init() isn't the place for this - add a new function (eg, qemu_vmlinux_setup() ) to paravirt.c and call it from post.c:device_hardware_setup().
Overloading "genroms/linuxboot.bin" is fine for testing, but I think ultimately a new name (eg, "vmlinux") should be made and QEMU should be enhanced to pass that new name in the bootorder file.
@@ -304,6 +312,7 @@ static struct hlist_head BootList VARVERIFY32INIT; #define IPL_TYPE_HARDDISK 0x02 #define IPL_TYPE_CDROM 0x03 #define IPL_TYPE_CBFS 0x20 +#define IPL_TYPE_DMA 0x21
How about "IPL_TYPE_QEMU_VMLINUX".
#define IPL_TYPE_BEV 0x80 #define IPL_TYPE_BCV 0x81 #define IPL_TYPE_HALT 0xf0 @@ -398,6 +407,12 @@ boot_add_cbfs(void *data, const char *desc, int prio) bootentry_add(IPL_TYPE_CBFS, defPrio(prio, DEFAULT_PRIO), (u32)data, desc); }
+// Add a FW_CFG DMA boot +void +boot_add_dma(const char *desc, int prio) +{
- bootentry_add(IPL_TYPE_DMA, defPrio(prio, DEFAULT_PRIO), 0,
desc); +}
boot_add_qemu_vmlinux()
/****************************************************************
- Keyboard calls
@@ -684,6 +699,14 @@ boot_rom(u32 vector) call_boot_entry(so, 0); }
+// Boot from a linuxboot ROM when QEMU cfg is in DMA mode +static void +boot_linux_cfg_dma(void) +{
- printf("Booting Linux from fw_cfg...\n");
- qemu_cfg_dma_boot_linux();
+}
// Unable to find bootable device - warn user and eventually retry. static void boot_fail(void) @@ -734,6 +757,9 @@ do_boot(int seq_nr) case IPL_TYPE_BEV: boot_rom(ie->vector); break;
- case IPL_TYPE_DMA:
boot_linux_cfg_dma();
case IPL_TYPE_HALT: boot_fail(); break;break;
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 26af499..797d8ba 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,7 +23,8 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup -#include "stacks.h" // yield +#include "bregs.h" // struct bregs +#include "stacks.h" // farcall16big, yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -185,21 +186,6 @@ qemu_platform_setup(void)
- QEMU firmware config (fw_cfg) interface
****************************************************************/
-// List of QEMU fw_cfg entries. DO NOT ADD MORE. (All new content -// should be passed via the fw_cfg "file" interface.) -#define QEMU_CFG_SIGNATURE 0x00 -#define QEMU_CFG_ID 0x01 -#define QEMU_CFG_UUID 0x02 -#define QEMU_CFG_NUMA 0x0d -#define QEMU_CFG_BOOT_MENU 0x0e -#define QEMU_CFG_MAX_CPUS 0x0f -#define QEMU_CFG_FILE_DIR 0x19 -#define QEMU_CFG_ARCH_LOCAL 0x8000 -#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) -#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) -#define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2) -#define QEMU_CFG_E820_TABLE (QEMU_CFG_ARCH_LOCAL + 3) - static void qemu_cfg_select(u16 f) { @@ -505,3 +491,67 @@ void qemu_cfg_init(void) dprintf(1, "Moving pm_base to 0x%x\n", acpi_pm_base); } }
+void qemu_cfg_dma_boot_linux(void) +{
- dprintf(1, "Loading kernel\n");
- void *setup_addr;
- u32 setup_size;
- qemu_cfg_read_entry(&setup_addr, QEMU_CFG_SETUP_ADDR, 4);
- qemu_cfg_read_entry(&setup_size, QEMU_CFG_SETUP_SIZE, 4);
- qemu_cfg_read_entry(setup_addr, QEMU_CFG_SETUP_DATA,
setup_size); +
- if (readl(setup_addr + 0x202) != 0x53726448) {
dprintf(1, "Not valid kernel\n");
return;
- }
- u16 protocol = readw(setup_addr + 0x206);
- if (protocol < 0x203) {
dprintf(1, "Old kernel (v %x) not supported\n",
protocol);
return;
- }
- void *kernel_addr;
- u32 kernel_size;
- qemu_cfg_read_entry(&kernel_addr, QEMU_CFG_KERNEL_ADDR, 4);
- qemu_cfg_read_entry(&kernel_size, QEMU_CFG_KERNEL_SIZE, 4);
- qemu_cfg_read_entry(kernel_addr, QEMU_CFG_KERNEL_DATA,
kernel_size); +
- void *cmdline_addr;
- u32 cmdline_size;
- qemu_cfg_read_entry(&cmdline_addr, QEMU_CFG_CMDLINE_ADDR,
4);
- qemu_cfg_read_entry(&cmdline_size, QEMU_CFG_CMDLINE_SIZE,
4);
- if (cmdline_size) {
qemu_cfg_read_entry(cmdline_addr,
QEMU_CFG_CMDLINE_DATA, cmdline_size);
- }
- void *initrd_addr;
- u32 initrd_size;
- qemu_cfg_read_entry(&initrd_addr, QEMU_CFG_INITRD_ADDR, 4);
- qemu_cfg_read_entry(&initrd_size, QEMU_CFG_INITRD_SIZE, 4);
- if (initrd_size) {
qemu_cfg_read_entry(initrd_addr, QEMU_CFG_INITRD_DATA,
initrd_size);
- }
- // Last configurations
- writel(setup_addr + 0x228, (u32)cmdline_addr);
- writeb(setup_addr + 0x210, 0xB0);
- writeb(setup_addr + 0x211, readb(setup_addr + 0x211) |
0x80);
- writew(setup_addr + 0x224, cmdline_addr - setup_addr -
0x200);
- writel(setup_addr + 0x218, (u32)initrd_addr);
- writel(setup_addr + 0x21c, initrd_size);
Are these updates necessary when using the existing QEMU vmlinux fw_cfg entries? I thought QEMU did this on behalf of the firmware, but maybe I missed something.
In QEMU, this is done in the linuxboot image, the one that I'm overloading. But this image is written in assembly. And using the DMA interface in assembly can get really tricky with all this memory management. That's why I rewrote the boot in C here.
Yes, and that makes sense.
However, the checks in the new qemu_cfg_dma_boot_linux() do not match the checks in QEMU's pc-bios/optionrom/linuxboot.S . So, which is right?
I suppose linuxboot.S is right (because it was already working). I'll check what checks are missing and add them (if appropiate).
Marc