Implementation of the FW CFG DMA interface.
When running a Linux guest on top of QEMU, using the -kernel options, this is the timing improvement for x86:
QEMU commit 16a1b6e and SeaBIOS commit e4d2b8c QEMU startup time: .080 BIOS startup time: .060 Kernel setup time: .586 Total time: .726
QEMU with this patch series and SeaBIOS with this patch series QEMU startup time: .080 BIOS startup time: .039 Kernel setup time: .002 Total time: .121
QEMU startup time is the time between the start and the first kvm_entry. BIOS startup time is the time between the first kvm_entry and the start of function do_boot, in SeaBIOS. Kernel setup time is the time between the start of the function do_boot in SeaBIOS and the jump to the Linux kernel.
As you can see, both the BIOS (because of ACPI tables and other configurations) and the Linux kernel boot (because of the copy to memory) are greatly improved with this new interface.
Also, this new interface is an addon to the old interface. Both interfaces are compatible and interchangeable.
Changes from v1: - Take into account order of fields in the FWCfgDmaAccess structure - Check and change endianness of FWCfgDmaAccess fields - Change order of fields in the FWCfgDmaAccess structure - Add FW_CFG_DMA_CTL_SKIP feature for control field - Split FW_CFG_SIZE in QEMU - Make FW_CFG_ID a bitmap of features - Add 64 bit address support for the transfer. Trigger when writing the low address, and address is 0 by default and at the end of each transfer. - Align ports and addresses. - Preserve old fw_cfg_comb_valid behaviour in QEMU - Update documentation to reflect all these changes
Changes from v2: - Make IOports fw_cfg DMA region a different IO region. - Reuse everything for MMIO and IOport DMA regions - Make transfer status only based on control field - Use DMA helpers instead of direct map/unmap - Change ARM fw_cfg DMA address space - Change Linux boot process to match linuxboot.S - Add select capabilities in the FWCfgDmaAccess struct - Update documentation to reflect all these changes
Implement guest-side of the QEMU FW CFG DMA interface for x86
Marc Marí (2): Add QEMU fw_cfg DMA interface Boot Linux using QEMU fw_cfg DMA interface
src/boot.c | 32 ++++++++--- src/fw/paravirt.c | 162 ++++++++++++++++++++++++++++++++++++++++++++++-------- src/fw/paravirt.h | 62 +++++++++++++++++++-- src/post.c | 1 + src/romlayout.S | 20 +++++++ src/util.h | 1 + 6 files changed, 244 insertions(+), 34 deletions(-)
Add support for the new fw_cfg DMA interface. The protocol is explained in QEMU documentation.
Signed-off-by: Marc Marí markmb@redhat.com --- src/fw/paravirt.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++------ src/fw/paravirt.h | 26 +++++++++++++++--- 2 files changed, 93 insertions(+), 12 deletions(-)
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index db22ae8..65f9fba 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,6 +23,7 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup +#include "stacks.h" // yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -30,6 +31,13 @@ u32 RamSize; u64 RamSizeOver4G; // Type of emulator platform. int PlatformRunningOn VARFSEG; +// cfg_dma enabled +int cfg_dma_enabled = 0; + +inline int qemu_cfg_dma_enabled(void) +{ + return cfg_dma_enabled; +}
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It * should be used to determine that a VM is running under KVM. @@ -199,23 +207,63 @@ qemu_cfg_select(u16 f) }
static void +qemu_cfg_dma_transfer(void *address, u32 length, u32 control) +{ + QemuCfgDmaAccess access; + + access.address = cpu_to_be64((u64)(u32)address); + access.length = cpu_to_be32(length); + access.control = cpu_to_be32(control); + + barrier(); + + outl(cpu_to_be32((u32)&access), PORT_QEMU_CFG_DMA_ADDR_LOW); + + while(be32_to_cpu(access.control) & ~QEMU_CFG_DMA_CTL_ERROR) { + yield(); + } +} + +static void qemu_cfg_read(void *buf, int len) { - insb(PORT_QEMU_CFG_DATA, buf, len); + if (len == 0) { + return; + } + + if (qemu_cfg_dma_enabled()) { + qemu_cfg_dma_transfer(buf, len, QEMU_CFG_DMA_CTL_READ); + } else { + insb(PORT_QEMU_CFG_DATA, buf, len); + } }
static void qemu_cfg_skip(int len) { - while (len--) - inb(PORT_QEMU_CFG_DATA); + if (len == 0) { + return; + } + + if (qemu_cfg_dma_enabled()) { + qemu_cfg_dma_transfer(0, len, QEMU_CFG_DMA_CTL_SKIP); + } else { + while (len--) + inb(PORT_QEMU_CFG_DATA); + } }
static void qemu_cfg_read_entry(void *buf, int e, int len) { - qemu_cfg_select(e); - qemu_cfg_read(buf, len); + if (qemu_cfg_dma_enabled()) { + u32 control = (e << 16) | QEMU_CFG_DMA_CTL_SELECT + | QEMU_CFG_DMA_CTL_READ; + qemu_cfg_dma_transfer(buf, len, control); + } else { + qemu_cfg_select(e); + qemu_cfg_read(buf, len); + } }
struct qemu_romfile_s { @@ -230,9 +278,14 @@ qemu_cfg_read_file(struct romfile_s *file, void *dst, u32 maxlen) return -1; struct qemu_romfile_s *qfile; qfile = container_of(file, struct qemu_romfile_s, file); - qemu_cfg_select(qfile->select); - qemu_cfg_skip(qfile->skip); - qemu_cfg_read(dst, file->size); + if (qfile->skip == 0) { + /* Do it in one transfer */ + qemu_cfg_read_entry(dst, qfile->select, file->size); + } else { + qemu_cfg_select(qfile->select); + qemu_cfg_skip(qfile->skip); + qemu_cfg_read(dst, file->size); + } return file->size; }
@@ -422,8 +475,18 @@ void qemu_cfg_init(void) for (i = 0; i < 4; i++) if (inb(PORT_QEMU_CFG_DATA) != sig[i]) return; + dprintf(1, "Found QEMU fw_cfg\n");
+ // Detect DMA interface. + u32 id; + qemu_cfg_read_entry(&id, QEMU_CFG_ID, sizeof(id)); + + if (id & QEMU_CFG_VERSION_DMA) { + dprintf(1, "QEMU fw_cfg DMA interface supported\n"); + cfg_dma_enabled = 1; + } + // Populate romfiles for legacy fw_cfg entries qemu_cfg_legacy();
diff --git a/src/fw/paravirt.h b/src/fw/paravirt.h index 95ffb92..ed8e5f1 100644 --- a/src/fw/paravirt.h +++ b/src/fw/paravirt.h @@ -9,6 +9,12 @@ #define PF_XEN (1<<1) #define PF_KVM (1<<2)
+typedef struct QemuCfgDmaAccess { + u32 control; + u32 length; + u64 address; +} PACKED QemuCfgDmaAccess; + extern u32 RamSize; extern u64 RamSizeOver4G; extern int PlatformRunningOn; @@ -25,11 +31,23 @@ static inline int runningOnKVM(void) { }
// Common paravirt ports. -#define PORT_SMI_CMD 0x00b2 -#define PORT_SMI_STATUS 0x00b3 -#define PORT_QEMU_CFG_CTL 0x0510 -#define PORT_QEMU_CFG_DATA 0x0511 +#define PORT_SMI_CMD 0x00b2 +#define PORT_SMI_STATUS 0x00b3 +#define PORT_QEMU_CFG_CTL 0x0510 +#define PORT_QEMU_CFG_DATA 0x0511 +#define PORT_QEMU_CFG_DMA_ADDR_HIGH 0x0514 +#define PORT_QEMU_CFG_DMA_ADDR_LOW 0x0518 + +// QEMU_CFG_DMA_CONTROL bits +#define QEMU_CFG_DMA_CTL_ERROR 0x01 +#define QEMU_CFG_DMA_CTL_READ 0x02 +#define QEMU_CFG_DMA_CTL_SKIP 0x04 +#define QEMU_CFG_DMA_CTL_SELECT 0x08 + +// QEMU_CFG_DMA ID bit +#define QEMU_CFG_VERSION_DMA 2
+int qemu_cfg_dma_enabled(void); void qemu_preinit(void); void qemu_platform_setup(void); void qemu_cfg_init(void);
Reading Linux from the fw_cfg interface is faster using the DMA interface. For this reason, add a Linux loader that can benefit from this interface.
Signed-off-by: Marc Marí markmb@redhat.com --- src/boot.c | 32 ++++++++++++++++----- src/fw/paravirt.c | 85 ++++++++++++++++++++++++++++++++++++++++++++----------- src/fw/paravirt.h | 36 +++++++++++++++++++++++ src/post.c | 1 + src/romlayout.S | 20 +++++++++++++ src/util.h | 1 + 6 files changed, 152 insertions(+), 23 deletions(-)
diff --git a/src/boot.c b/src/boot.c index e0f73a3..52918f8 100644 --- a/src/boot.c +++ b/src/boot.c @@ -300,13 +300,14 @@ struct bootentry_s { }; static struct hlist_head BootList VARVERIFY32INIT;
-#define IPL_TYPE_FLOPPY 0x01 -#define IPL_TYPE_HARDDISK 0x02 -#define IPL_TYPE_CDROM 0x03 -#define IPL_TYPE_CBFS 0x20 -#define IPL_TYPE_BEV 0x80 -#define IPL_TYPE_BCV 0x81 -#define IPL_TYPE_HALT 0xf0 +#define IPL_TYPE_FLOPPY 0x01 +#define IPL_TYPE_HARDDISK 0x02 +#define IPL_TYPE_CDROM 0x03 +#define IPL_TYPE_CBFS 0x20 +#define IPL_TYPE_QEMU_VMLINUX 0x21 +#define IPL_TYPE_BEV 0x80 +#define IPL_TYPE_BCV 0x81 +#define IPL_TYPE_HALT 0xf0
static void bootentry_add(int type, int prio, u32 data, const char *desc) @@ -398,6 +399,12 @@ boot_add_cbfs(void *data, const char *desc, int prio) bootentry_add(IPL_TYPE_CBFS, defPrio(prio, DEFAULT_PRIO), (u32)data, desc); }
+// Add a FW_CFG DMA boot +void +boot_add_qemu_vmlinux(const char *desc, int prio) +{ + bootentry_add(IPL_TYPE_QEMU_VMLINUX, defPrio(prio, DEFAULT_PRIO), 0, desc); +}
/**************************************************************** * Keyboard calls @@ -684,6 +691,14 @@ boot_rom(u32 vector) call_boot_entry(so, 0); }
+// Boot from a linuxboot ROM when QEMU cfg is in DMA mode +static void +boot_qemu_vmlinux_dma(void) +{ + printf("Booting Linux from fw_cfg...\n"); + qemu_vmlinux_dma_boot(); +} + // Unable to find bootable device - warn user and eventually retry. static void boot_fail(void) @@ -734,6 +749,9 @@ do_boot(int seq_nr) case IPL_TYPE_BEV: boot_rom(ie->vector); break; + case IPL_TYPE_QEMU_VMLINUX: + boot_qemu_vmlinux_dma(); + break; case IPL_TYPE_HALT: boot_fail(); break; diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 65f9fba..6896511 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,7 +23,8 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup -#include "stacks.h" // yield +#include "bregs.h" // struct bregs +#include "stacks.h" // farcall16big, yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -185,21 +186,6 @@ qemu_platform_setup(void) * QEMU firmware config (fw_cfg) interface ****************************************************************/
-// List of QEMU fw_cfg entries. DO NOT ADD MORE. (All new content -// should be passed via the fw_cfg "file" interface.) -#define QEMU_CFG_SIGNATURE 0x00 -#define QEMU_CFG_ID 0x01 -#define QEMU_CFG_UUID 0x02 -#define QEMU_CFG_NUMA 0x0d -#define QEMU_CFG_BOOT_MENU 0x0e -#define QEMU_CFG_MAX_CPUS 0x0f -#define QEMU_CFG_FILE_DIR 0x19 -#define QEMU_CFG_ARCH_LOCAL 0x8000 -#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) -#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) -#define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2) -#define QEMU_CFG_E820_TABLE (QEMU_CFG_ARCH_LOCAL + 3) - static void qemu_cfg_select(u16 f) { @@ -509,3 +495,70 @@ void qemu_cfg_init(void) dprintf(1, "Moving pm_base to 0x%x\n", acpi_pm_base); } } + +void qemu_vmlinux_dma_boot(void) +{ + dprintf(1, "Loading kernel\n"); + + irq_disable(); + + void *setup_addr; + u32 setup_size; + qemu_cfg_read_entry(&setup_addr, QEMU_CFG_SETUP_ADDR, 4); + qemu_cfg_read_entry(&setup_size, QEMU_CFG_SETUP_SIZE, 4); + qemu_cfg_read_entry(setup_addr, QEMU_CFG_SETUP_DATA, setup_size); + + u16 protocol = readw(setup_addr + 0x206); + if (protocol < 0x203) { + /* Assume initrd_max 0x37ffffff */ + writel(setup_addr + 0x22c, 0x37ffffff); + } + + void *initrd_addr; + u32 initrd_size; + qemu_cfg_read_entry(&initrd_addr, QEMU_CFG_INITRD_ADDR, 4); + qemu_cfg_read_entry(&initrd_size, QEMU_CFG_INITRD_SIZE, 4); + + u32 initrd_end_page = (u32)(initrd_addr + initrd_size) & -4096; + u32 max_allowed_page = readl(setup_addr + 0x22c) & -4096; + if (initrd_end_page != max_allowed_page) { + /* Initrd at the end of memory. Compute better initrd address + * based on e801 data + */ + initrd_addr = (void *)((LegacyRamSize - initrd_size) & -4096); + writel(setup_addr + 0x218, (u32)initrd_addr); + } + + qemu_cfg_read_entry(initrd_addr, QEMU_CFG_INITRD_DATA, initrd_size); + + void *kernel_addr; + u32 kernel_size; + qemu_cfg_read_entry(&kernel_addr, QEMU_CFG_KERNEL_ADDR, 4); + qemu_cfg_read_entry(&kernel_size, QEMU_CFG_KERNEL_SIZE, 4); + qemu_cfg_read_entry(kernel_addr, QEMU_CFG_KERNEL_DATA, kernel_size); + + void *cmdline_addr; + u32 cmdline_size; + qemu_cfg_read_entry(&cmdline_addr, QEMU_CFG_CMDLINE_ADDR, 4); + qemu_cfg_read_entry(&cmdline_size, QEMU_CFG_CMDLINE_SIZE, 4); + qemu_cfg_read_entry(cmdline_addr, QEMU_CFG_CMDLINE_DATA, cmdline_size); + + dprintf(1, "Jumping to kernel %d@%x %d@%x %d@%x %d@%x\n" + , setup_size, (u32)setup_addr, cmdline_size, (u32)cmdline_addr + , kernel_size, (u32)kernel_addr, initrd_size, (u32)initrd_addr); + struct bregs br; + memset(&br, 0, sizeof(br)); + extern void kernel_stub(void); + br.ebx = (u32)setup_addr >> 4; + br.edx = (u32)cmdline_addr - (u32)setup_addr - 16; + br.code = SEGOFF(SEG_BIOS, (u32)kernel_stub - BUILD_BIOS_ADDR); + + farcall16big(&br); +} + +void qemu_vmlinux_dma_setup(void) { + if (qemu_cfg_dma_enabled()) { + boot_add_qemu_vmlinux("QEMU Kernel image", + bootprio_find_named_rom("genroms/linuxboot.bin", 0)); + } +} diff --git a/src/fw/paravirt.h b/src/fw/paravirt.h index ed8e5f1..b588abf 100644 --- a/src/fw/paravirt.h +++ b/src/fw/paravirt.h @@ -47,9 +47,45 @@ static inline int runningOnKVM(void) { // QEMU_CFG_DMA ID bit #define QEMU_CFG_VERSION_DMA 2
+// List of QEMU fw_cfg entries. DO NOT ADD MORE. (All new content +// should be passed via the fw_cfg "file" interface.) +#define QEMU_CFG_SIGNATURE 0x00 +#define QEMU_CFG_ID 0x01 +#define QEMU_CFG_UUID 0x02 +#define QEMU_CFG_RAM_SIZE 0x03 +#define QEMU_CFG_NOGRAPHIC 0x04 +#define QEMU_CFG_NB_CPUS 0x05 +#define QEMU_CFG_MACHINE_ID 0x06 +#define QEMU_CFG_KERNEL_ADDR 0x07 +#define QEMU_CFG_KERNEL_SIZE 0x08 +#define QEMU_CFG_KERNEL_CMDLINE 0x09 +#define QEMU_CFG_INITRD_ADDR 0x0a +#define QEMU_CFG_INITRD_SIZE 0x0b +#define QEMU_CFG_BOOT_DEVICE 0x0c +#define QEMU_CFG_NUMA 0x0d +#define QEMU_CFG_BOOT_MENU 0x0e +#define QEMU_CFG_MAX_CPUS 0x0f +#define QEMU_CFG_KERNEL_ENTRY 0x10 +#define QEMU_CFG_KERNEL_DATA 0x11 +#define QEMU_CFG_INITRD_DATA 0x12 +#define QEMU_CFG_CMDLINE_ADDR 0x13 +#define QEMU_CFG_CMDLINE_SIZE 0x14 +#define QEMU_CFG_CMDLINE_DATA 0x15 +#define QEMU_CFG_SETUP_ADDR 0x16 +#define QEMU_CFG_SETUP_SIZE 0x17 +#define QEMU_CFG_SETUP_DATA 0x18 +#define QEMU_CFG_FILE_DIR 0x19 +#define QEMU_CFG_ARCH_LOCAL 0x8000 +#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) +#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) +#define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2) +#define QEMU_CFG_E820_TABLE (QEMU_CFG_ARCH_LOCAL + 3) + int qemu_cfg_dma_enabled(void); void qemu_preinit(void); void qemu_platform_setup(void); void qemu_cfg_init(void); +void qemu_vmlinux_dma_boot(void); +void qemu_vmlinux_dma_setup(void);
#endif diff --git a/src/post.c b/src/post.c index 6803585..149505f 100644 --- a/src/post.c +++ b/src/post.c @@ -153,6 +153,7 @@ device_hardware_setup(void) esp_scsi_setup(); megasas_setup(); pvscsi_setup(); + qemu_vmlinux_dma_setup(); }
static void diff --git a/src/romlayout.S b/src/romlayout.S index d78737b..814b3f8 100644 --- a/src/romlayout.S +++ b/src/romlayout.S @@ -187,6 +187,26 @@ __farcall16: IRQ_TRAMPOLINE 1c IRQ_TRAMPOLINE 4a
+ DECLFUNC kernel_stub +kernel_stub: + movw %bx, %ds + movw %bx, %es + movw %bx, %fs + movw %bx, %gs + movw %bx, %ss + movl %edx, %esp + addw $0x20, %bx + pushw %bx // push CS + pushw $0 // push IP + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %edi, %edi + xorl %esi, %esi + xorl %ebp, %ebp + lretw +
/**************************************************************** * Misc. entry points. diff --git a/src/util.h b/src/util.h index 327abeb..5923c4f 100644 --- a/src/util.h +++ b/src/util.h @@ -25,6 +25,7 @@ void boot_add_floppy(struct drive_s *drive_g, const char *desc, int prio); void boot_add_hd(struct drive_s *drive_g, const char *desc, int prio); void boot_add_cd(struct drive_s *drive_g, const char *desc, int prio); void boot_add_cbfs(void *data, const char *desc, int prio); +void boot_add_qemu_vmlinux(const char *desc, int prio); void interactive_bootmenu(void); void bcv_prepboot(void); struct pci_device;
Hi,
+void qemu_vmlinux_dma_setup(void) {
- if (qemu_cfg_dma_enabled()) {
boot_add_qemu_vmlinux("QEMU Kernel image",
bootprio_find_named_rom("genroms/linuxboot.bin", 0));
- }
+}
I think this adds an entry to the boot menu even if qemu was started without -kernel.
What happens with -kernel? Do we have two entries then, for both fw_cfg_dma boot and the linuxboot rom?
cheers, Gerd
On Fri, Sep 18, 2015 at 10:59:15AM +0200, Marc Marí wrote:
Reading Linux from the fw_cfg interface is faster using the DMA interface. For this reason, add a Linux loader that can benefit from this interface.
Signed-off-by: Marc Marí markmb@redhat.com
src/boot.c | 32 ++++++++++++++++----- src/fw/paravirt.c | 85 ++++++++++++++++++++++++++++++++++++++++++++----------- src/fw/paravirt.h | 36 +++++++++++++++++++++++ src/post.c | 1 + src/romlayout.S | 20 +++++++++++++ src/util.h | 1 + 6 files changed, 152 insertions(+), 23 deletions(-)
diff --git a/src/boot.c b/src/boot.c index e0f73a3..52918f8 100644 --- a/src/boot.c +++ b/src/boot.c @@ -300,13 +300,14 @@ struct bootentry_s { }; static struct hlist_head BootList VARVERIFY32INIT;
-#define IPL_TYPE_FLOPPY 0x01 -#define IPL_TYPE_HARDDISK 0x02 -#define IPL_TYPE_CDROM 0x03 -#define IPL_TYPE_CBFS 0x20 -#define IPL_TYPE_BEV 0x80 -#define IPL_TYPE_BCV 0x81 -#define IPL_TYPE_HALT 0xf0 +#define IPL_TYPE_FLOPPY 0x01 +#define IPL_TYPE_HARDDISK 0x02 +#define IPL_TYPE_CDROM 0x03 +#define IPL_TYPE_CBFS 0x20 +#define IPL_TYPE_QEMU_VMLINUX 0x21 +#define IPL_TYPE_BEV 0x80 +#define IPL_TYPE_BCV 0x81 +#define IPL_TYPE_HALT 0xf0
static void bootentry_add(int type, int prio, u32 data, const char *desc) @@ -398,6 +399,12 @@ boot_add_cbfs(void *data, const char *desc, int prio) bootentry_add(IPL_TYPE_CBFS, defPrio(prio, DEFAULT_PRIO), (u32)data, desc); }
+// Add a FW_CFG DMA boot +void +boot_add_qemu_vmlinux(const char *desc, int prio) +{
- bootentry_add(IPL_TYPE_QEMU_VMLINUX, defPrio(prio, DEFAULT_PRIO), 0, desc);
+}
/****************************************************************
- Keyboard calls
@@ -684,6 +691,14 @@ boot_rom(u32 vector) call_boot_entry(so, 0); }
+// Boot from a linuxboot ROM when QEMU cfg is in DMA mode +static void +boot_qemu_vmlinux_dma(void) +{
- printf("Booting Linux from fw_cfg...\n");
- qemu_vmlinux_dma_boot();
+}
// Unable to find bootable device - warn user and eventually retry. static void boot_fail(void) @@ -734,6 +749,9 @@ do_boot(int seq_nr) case IPL_TYPE_BEV: boot_rom(ie->vector); break;
- case IPL_TYPE_QEMU_VMLINUX:
boot_qemu_vmlinux_dma();
case IPL_TYPE_HALT: boot_fail(); break;break;
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 65f9fba..6896511 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,7 +23,8 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup -#include "stacks.h" // yield +#include "bregs.h" // struct bregs +#include "stacks.h" // farcall16big, yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -185,21 +186,6 @@ qemu_platform_setup(void)
- QEMU firmware config (fw_cfg) interface
****************************************************************/
-// List of QEMU fw_cfg entries. DO NOT ADD MORE. (All new content -// should be passed via the fw_cfg "file" interface.) -#define QEMU_CFG_SIGNATURE 0x00 -#define QEMU_CFG_ID 0x01 -#define QEMU_CFG_UUID 0x02 -#define QEMU_CFG_NUMA 0x0d -#define QEMU_CFG_BOOT_MENU 0x0e -#define QEMU_CFG_MAX_CPUS 0x0f -#define QEMU_CFG_FILE_DIR 0x19 -#define QEMU_CFG_ARCH_LOCAL 0x8000 -#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) -#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) -#define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2) -#define QEMU_CFG_E820_TABLE (QEMU_CFG_ARCH_LOCAL + 3)
static void qemu_cfg_select(u16 f) { @@ -509,3 +495,70 @@ void qemu_cfg_init(void) dprintf(1, "Moving pm_base to 0x%x\n", acpi_pm_base); } }
+void qemu_vmlinux_dma_boot(void) +{
- dprintf(1, "Loading kernel\n");
- irq_disable();
SeaBIOS always disabled interrupts in C code (see http://www.seabios.org/Execution_and_code_flow#Hardware_interrupts ), so this irq_disable() is not needed.
- void *setup_addr;
- u32 setup_size;
- qemu_cfg_read_entry(&setup_addr, QEMU_CFG_SETUP_ADDR, 4);
- qemu_cfg_read_entry(&setup_size, QEMU_CFG_SETUP_SIZE, 4);
- qemu_cfg_read_entry(setup_addr, QEMU_CFG_SETUP_DATA, setup_size);
- u16 protocol = readw(setup_addr + 0x206);
- if (protocol < 0x203) {
/* Assume initrd_max 0x37ffffff */
writel(setup_addr + 0x22c, 0x37ffffff);
- }
- void *initrd_addr;
- u32 initrd_size;
- qemu_cfg_read_entry(&initrd_addr, QEMU_CFG_INITRD_ADDR, 4);
- qemu_cfg_read_entry(&initrd_size, QEMU_CFG_INITRD_SIZE, 4);
- u32 initrd_end_page = (u32)(initrd_addr + initrd_size) & -4096;
- u32 max_allowed_page = readl(setup_addr + 0x22c) & -4096;
Use of the ALIGN_DOWN() macro would be preferable here.
- if (initrd_end_page != max_allowed_page) {
/* Initrd at the end of memory. Compute better initrd address
* based on e801 data
*/
initrd_addr = (void *)((LegacyRamSize - initrd_size) & -4096);
writel(setup_addr + 0x218, (u32)initrd_addr);
- }
- qemu_cfg_read_entry(initrd_addr, QEMU_CFG_INITRD_DATA, initrd_size);
- void *kernel_addr;
- u32 kernel_size;
- qemu_cfg_read_entry(&kernel_addr, QEMU_CFG_KERNEL_ADDR, 4);
- qemu_cfg_read_entry(&kernel_size, QEMU_CFG_KERNEL_SIZE, 4);
- qemu_cfg_read_entry(kernel_addr, QEMU_CFG_KERNEL_DATA, kernel_size);
- void *cmdline_addr;
- u32 cmdline_size;
- qemu_cfg_read_entry(&cmdline_addr, QEMU_CFG_CMDLINE_ADDR, 4);
- qemu_cfg_read_entry(&cmdline_size, QEMU_CFG_CMDLINE_SIZE, 4);
- qemu_cfg_read_entry(cmdline_addr, QEMU_CFG_CMDLINE_DATA, cmdline_size);
- dprintf(1, "Jumping to kernel %d@%x %d@%x %d@%x %d@%x\n"
, setup_size, (u32)setup_addr, cmdline_size, (u32)cmdline_addr
, kernel_size, (u32)kernel_addr, initrd_size, (u32)initrd_addr);
- struct bregs br;
- memset(&br, 0, sizeof(br));
- extern void kernel_stub(void);
- br.ebx = (u32)setup_addr >> 4;
- br.edx = (u32)cmdline_addr - (u32)setup_addr - 16;
- br.code = SEGOFF(SEG_BIOS, (u32)kernel_stub - BUILD_BIOS_ADDR);
- farcall16big(&br);
+}
+void qemu_vmlinux_dma_setup(void) {
- if (qemu_cfg_dma_enabled()) {
I don't think this feature should depend on DMA (it works fine even if fw_cfg DMA is not available). I also don't think "dma" should be in the function names - qemu_vmlinux_setup() is fine.
Also, the code should check if there is a kernel before registering an IPL - for example:
u32 kernel_size; qemu_cfg_read_entry(&kernel_size, QEMU_CFG_KERNEL_SIZE, sizeof(kernel_size)); if (!kernel_size) return;
boot_add_qemu_vmlinux("QEMU Kernel image",
bootprio_find_named_rom("genroms/linuxboot.bin", 0));
Using "genroms/linuxboot.bin" works for testing, but I think a new bootorder id (eg, "vmlinux") will need to be defined between QEMU and SeaBIOS. I don't think it would be good to overload the meaning of the "genroms/" prefix.
Otherwise the series looks good to me. Once the QEMU series is committed we can commit this series to SeaBIOS.
-Kevin
On Fri, 18 Sep 2015 14:40:30 -0400 "Kevin O'Connor" kevin@koconnor.net wrote:
On Fri, Sep 18, 2015 at 10:59:15AM +0200, Marc Marí wrote:
Reading Linux from the fw_cfg interface is faster using the DMA interface. For this reason, add a Linux loader that can benefit from this interface.
Signed-off-by: Marc Marí markmb@redhat.com
src/boot.c | 32 ++++++++++++++++----- src/fw/paravirt.c | 85 ++++++++++++++++++++++++++++++++++++++++++++----------- src/fw/paravirt.h | 36 +++++++++++++++++++++++ src/post.c | 1 + src/romlayout.S | 20 +++++++++++++ src/util.h | 1 + 6 files changed, 152 insertions(+), 23 deletions(-)
diff --git a/src/boot.c b/src/boot.c index e0f73a3..52918f8 100644 --- a/src/boot.c +++ b/src/boot.c @@ -300,13 +300,14 @@ struct bootentry_s { }; static struct hlist_head BootList VARVERIFY32INIT;
-#define IPL_TYPE_FLOPPY 0x01 -#define IPL_TYPE_HARDDISK 0x02 -#define IPL_TYPE_CDROM 0x03 -#define IPL_TYPE_CBFS 0x20 -#define IPL_TYPE_BEV 0x80 -#define IPL_TYPE_BCV 0x81 -#define IPL_TYPE_HALT 0xf0 +#define IPL_TYPE_FLOPPY 0x01 +#define IPL_TYPE_HARDDISK 0x02 +#define IPL_TYPE_CDROM 0x03 +#define IPL_TYPE_CBFS 0x20 +#define IPL_TYPE_QEMU_VMLINUX 0x21 +#define IPL_TYPE_BEV 0x80 +#define IPL_TYPE_BCV 0x81 +#define IPL_TYPE_HALT 0xf0
static void bootentry_add(int type, int prio, u32 data, const char *desc) @@ -398,6 +399,12 @@ boot_add_cbfs(void *data, const char *desc, int prio) bootentry_add(IPL_TYPE_CBFS, defPrio(prio, DEFAULT_PRIO), (u32)data, desc); }
+// Add a FW_CFG DMA boot +void +boot_add_qemu_vmlinux(const char *desc, int prio) +{
- bootentry_add(IPL_TYPE_QEMU_VMLINUX, defPrio(prio,
DEFAULT_PRIO), 0, desc); +}
/****************************************************************
- Keyboard calls
@@ -684,6 +691,14 @@ boot_rom(u32 vector) call_boot_entry(so, 0); }
+// Boot from a linuxboot ROM when QEMU cfg is in DMA mode +static void +boot_qemu_vmlinux_dma(void) +{
- printf("Booting Linux from fw_cfg...\n");
- qemu_vmlinux_dma_boot();
+}
// Unable to find bootable device - warn user and eventually retry. static void boot_fail(void) @@ -734,6 +749,9 @@ do_boot(int seq_nr) case IPL_TYPE_BEV: boot_rom(ie->vector); break;
- case IPL_TYPE_QEMU_VMLINUX:
boot_qemu_vmlinux_dma();
case IPL_TYPE_HALT: boot_fail(); break;break;
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 65f9fba..6896511 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -23,7 +23,8 @@ #include "util.h" // pci_setup #include "x86.h" // cpuid #include "xen.h" // xen_biostable_setup -#include "stacks.h" // yield +#include "bregs.h" // struct bregs +#include "stacks.h" // farcall16big, yield
// Amount of continuous ram under 4Gig u32 RamSize; @@ -185,21 +186,6 @@ qemu_platform_setup(void)
- QEMU firmware config (fw_cfg) interface
****************************************************************/
-// List of QEMU fw_cfg entries. DO NOT ADD MORE. (All new content -// should be passed via the fw_cfg "file" interface.) -#define QEMU_CFG_SIGNATURE 0x00 -#define QEMU_CFG_ID 0x01 -#define QEMU_CFG_UUID 0x02 -#define QEMU_CFG_NUMA 0x0d -#define QEMU_CFG_BOOT_MENU 0x0e -#define QEMU_CFG_MAX_CPUS 0x0f -#define QEMU_CFG_FILE_DIR 0x19 -#define QEMU_CFG_ARCH_LOCAL 0x8000 -#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) -#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) -#define QEMU_CFG_IRQ0_OVERRIDE (QEMU_CFG_ARCH_LOCAL + 2) -#define QEMU_CFG_E820_TABLE (QEMU_CFG_ARCH_LOCAL + 3)
static void qemu_cfg_select(u16 f) { @@ -509,3 +495,70 @@ void qemu_cfg_init(void) dprintf(1, "Moving pm_base to 0x%x\n", acpi_pm_base); } }
+void qemu_vmlinux_dma_boot(void) +{
- dprintf(1, "Loading kernel\n");
- irq_disable();
SeaBIOS always disabled interrupts in C code (see http://www.seabios.org/Execution_and_code_flow#Hardware_interrupts ), so this irq_disable() is not needed.
- void *setup_addr;
- u32 setup_size;
- qemu_cfg_read_entry(&setup_addr, QEMU_CFG_SETUP_ADDR, 4);
- qemu_cfg_read_entry(&setup_size, QEMU_CFG_SETUP_SIZE, 4);
- qemu_cfg_read_entry(setup_addr, QEMU_CFG_SETUP_DATA,
setup_size); +
- u16 protocol = readw(setup_addr + 0x206);
- if (protocol < 0x203) {
/* Assume initrd_max 0x37ffffff */
writel(setup_addr + 0x22c, 0x37ffffff);
- }
- void *initrd_addr;
- u32 initrd_size;
- qemu_cfg_read_entry(&initrd_addr, QEMU_CFG_INITRD_ADDR, 4);
- qemu_cfg_read_entry(&initrd_size, QEMU_CFG_INITRD_SIZE, 4);
- u32 initrd_end_page = (u32)(initrd_addr + initrd_size) & -4096;
- u32 max_allowed_page = readl(setup_addr + 0x22c) & -4096;
Use of the ALIGN_DOWN() macro would be preferable here.
- if (initrd_end_page != max_allowed_page) {
/* Initrd at the end of memory. Compute better initrd
address
* based on e801 data
*/
initrd_addr = (void *)((LegacyRamSize - initrd_size) &
-4096);
writel(setup_addr + 0x218, (u32)initrd_addr);
- }
- qemu_cfg_read_entry(initrd_addr, QEMU_CFG_INITRD_DATA,
initrd_size); +
- void *kernel_addr;
- u32 kernel_size;
- qemu_cfg_read_entry(&kernel_addr, QEMU_CFG_KERNEL_ADDR, 4);
- qemu_cfg_read_entry(&kernel_size, QEMU_CFG_KERNEL_SIZE, 4);
- qemu_cfg_read_entry(kernel_addr, QEMU_CFG_KERNEL_DATA,
kernel_size); +
- void *cmdline_addr;
- u32 cmdline_size;
- qemu_cfg_read_entry(&cmdline_addr, QEMU_CFG_CMDLINE_ADDR, 4);
- qemu_cfg_read_entry(&cmdline_size, QEMU_CFG_CMDLINE_SIZE, 4);
- qemu_cfg_read_entry(cmdline_addr, QEMU_CFG_CMDLINE_DATA,
cmdline_size); +
- dprintf(1, "Jumping to kernel %d@%x %d@%x %d@%x %d@%x\n"
, setup_size, (u32)setup_addr, cmdline_size,
(u32)cmdline_addr
, kernel_size, (u32)kernel_addr, initrd_size,
(u32)initrd_addr);
- struct bregs br;
- memset(&br, 0, sizeof(br));
- extern void kernel_stub(void);
- br.ebx = (u32)setup_addr >> 4;
- br.edx = (u32)cmdline_addr - (u32)setup_addr - 16;
- br.code = SEGOFF(SEG_BIOS, (u32)kernel_stub - BUILD_BIOS_ADDR);
- farcall16big(&br);
+}
+void qemu_vmlinux_dma_setup(void) {
- if (qemu_cfg_dma_enabled()) {
I don't think this feature should depend on DMA (it works fine even if fw_cfg DMA is not available). I also don't think "dma" should be in the function names - qemu_vmlinux_setup() is fine.
Also, the code should check if there is a kernel before registering an IPL - for example:
u32 kernel_size; qemu_cfg_read_entry(&kernel_size, QEMU_CFG_KERNEL_SIZE,
sizeof(kernel_size)); if (!kernel_size) return;
boot_add_qemu_vmlinux("QEMU Kernel image",
bootprio_find_named_rom("genroms/linuxboot.bin", 0));
Using "genroms/linuxboot.bin" works for testing, but I think a new bootorder id (eg, "vmlinux") will need to be defined between QEMU and SeaBIOS. I don't think it would be good to overload the meaning of the "genroms/" prefix.
I was looking at it, but I don't see a "good" way to put it.
If you want to boot from fw_cfg through linuxboot.S, you prefer to boot using fw_cfg DMA, which is faster. And adding a new entry would mean adding a new type (?) which would be added in the same cases as linuxboot.S. It looks a lot of work for a thing that is repeated.
But yes, it's really ugly to overload it in this way.
Any other ideas?
Marc
Otherwise the series looks good to me. Once the QEMU series is committed we can commit this series to SeaBIOS.
-Kevin
On Fri, Sep 18, 2015 at 09:28:57PM +0200, Marc Marí wrote:
On Fri, 18 Sep 2015 14:40:30 -0400 "Kevin O'Connor" kevin@koconnor.net wrote:
On Fri, Sep 18, 2015 at 10:59:15AM +0200, Marc Marí wrote:
bootprio_find_named_rom("genroms/linuxboot.bin", 0));
Using "genroms/linuxboot.bin" works for testing, but I think a new bootorder id (eg, "vmlinux") will need to be defined between QEMU and SeaBIOS. I don't think it would be good to overload the meaning of the "genroms/" prefix.
I was looking at it, but I don't see a "good" way to put it.
If you want to boot from fw_cfg through linuxboot.S, you prefer to boot using fw_cfg DMA, which is faster. And adding a new entry would mean adding a new type (?) which would be added in the same cases as linuxboot.S. It looks a lot of work for a thing that is repeated.
Would the QEMU patch below work?
-Kevin
--- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1012,8 +1012,9 @@ static void load_linux(PCMachineState *pcms, fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
option_rom[nb_option_roms].name = "linuxboot.bin"; - option_rom[nb_option_roms].bootindex = 0; + option_rom[nb_option_roms].bootindex = 1; nb_option_roms++; + add_boot_device_path(0, NULL, "vmlinux"); }
#define NE2000_NB_MAX 6
On Fri, 18 Sep 2015 15:55:12 -0400 "Kevin O'Connor" kevin@koconnor.net wrote:
On Fri, Sep 18, 2015 at 09:28:57PM +0200, Marc Marí wrote:
On Fri, 18 Sep 2015 14:40:30 -0400 "Kevin O'Connor" kevin@koconnor.net wrote:
On Fri, Sep 18, 2015 at 10:59:15AM +0200, Marc Marí wrote:
bootprio_find_named_rom("genroms/linuxboot.bin", 0));
Using "genroms/linuxboot.bin" works for testing, but I think a new bootorder id (eg, "vmlinux") will need to be defined between QEMU and SeaBIOS. I don't think it would be good to overload the meaning of the "genroms/" prefix.
I was looking at it, but I don't see a "good" way to put it.
If you want to boot from fw_cfg through linuxboot.S, you prefer to boot using fw_cfg DMA, which is faster. And adding a new entry would mean adding a new type (?) which would be added in the same cases as linuxboot.S. It looks a lot of work for a thing that is repeated.
Would the QEMU patch below work?
Yes, it does. Sorry for my negative answer. I didn't look at it properly.
Thanks Marc
-Kevin
--- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1012,8 +1012,9 @@ static void load_linux(PCMachineState *pcms, fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
option_rom[nb_option_roms].name = "linuxboot.bin";
- option_rom[nb_option_roms].bootindex = 0;
- option_rom[nb_option_roms].bootindex = 1; nb_option_roms++;
- add_boot_device_path(0, NULL, "vmlinux");
}
#define NE2000_NB_MAX 6