Hi,
The 1.11 seabios release which is just around the corner brings serial console support to seabios. Which is useful for kvmtool too. So I decided to undust my old kvmtool support patch series and check out where we stand.
Patch #1 adds the config option, wires up everything and adds ram detection for kvmtool. Patches #2 and #3 tweak the seabios virtio drivers so they work with kvmtool.
With that in place seabios works nicely. Loads grub from hard disk, allows to edit menu entries, loads and runs the linux kernel.
But there are also some known issues:
(1) ram detection is clumpsy (see patch #1) due to kvmtool not passing ram information to the firmware (on x86, seems other platforms use the device tree for that).
(2) kvmtool virtio implementation seems to have some bugs. specifically handing over the virtio-blk devices from seabios to the linux kernel doesn't work (see patch #3).
(3) when kernels are booted via seabios+grub all the command line args kvmtool adds to the kernel command line on direct kernel boot will not be passed to the kernel. Not sure how much of a problem that is in practice, due to (2) I didn't yet manage to boot a linux guest to the login prompt.
cheers, Gerd
Gerd Hoffmann (3): kvmtool: initial support kvmtool: allow mmio for legacy bar 0 kvmtool: support larger virtio queues
Makefile | 1 + src/fw/paravirt.h | 3 +++ src/hw/virtio-ring.h | 2 +- src/fw/paravirt.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ src/hw/virtio-pci.c | 16 ++++++++++++---- src/post.c | 4 +++- src/sercon.c | 2 ++ src/Kconfig | 23 +++++++++++++++++++++-- 8 files changed, 91 insertions(+), 8 deletions(-)
Add CONFIG_KVMTOOL config option.
kvmtool supports virtio only, so disable drivers for all kinds of qemu emulated hardware and leave only virtio-blk and virtio-scsi enabled.
Set rom default size to 128k. Enable serial console for kvmtool. Add ram detection. Add pci devices scan.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- Makefile | 1 + src/fw/paravirt.h | 3 +++ src/fw/paravirt.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ src/post.c | 4 +++- src/sercon.c | 2 ++ src/Kconfig | 23 +++++++++++++++++++++-- 6 files changed, 78 insertions(+), 3 deletions(-)
diff --git a/Makefile b/Makefile index eb8ad583ae..6ea8e19ee7 100644 --- a/Makefile +++ b/Makefile @@ -92,6 +92,7 @@ endif
target-y := target-$(CONFIG_QEMU) += $(OUT)bios.bin +target-$(CONFIG_KVMTOOL) += $(OUT)bios.bin target-$(CONFIG_CSM) += $(OUT)Csm16.bin target-$(CONFIG_COREBOOT) += $(OUT)bios.bin.elf target-$(CONFIG_BUILD_VGABIOS) += $(OUT)vgabios.bin diff --git a/src/fw/paravirt.h b/src/fw/paravirt.h index a14d83e101..57ee0cfc4d 100644 --- a/src/fw/paravirt.h +++ b/src/fw/paravirt.h @@ -60,4 +60,7 @@ int qemu_cfg_write_file(void *src, struct romfile_s *file, u32 offset, u32 len); int qemu_cfg_write_file_simple(void *src, u16 key, u32 offset, u32 len); u16 qemu_get_romfile_key(struct romfile_s *file);
+void kvmtool_preinit(void); +void kvmtool_platform_setup(void); + #endif diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 9674ab8ba8..0d4855e2e2 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -621,3 +621,51 @@ void qemu_cfg_init(void) dprintf(1, "Moving pm_base to 0x%x\n", acpi_pm_base); } } + +void +kvmtool_platform_setup(void) +{ + if (!CONFIG_KVMTOOL) + return; + + pci_probe_devices(); +} + +void +kvmtool_preinit(void) +{ + /* + * When started without firmware kvmtool creates a e820 map for + * the guest kernel. When started with "--firmware $file" it + * doesn't, so we have to figure. + * + * Detects only memory below 4G for now as we run in 32bit mode. + * For memory above 4G we would have to: + * (1) get hints from kvmtool somehow, or + * (2) enable paging, or + * (3) enter long mode. + * + * There is a 768M memory hole for I/O, + * see x86/include/kvm/kvm-arch.h in kvmtool. + */ + static const u32 max_mb_32bit = 4096 - 768; + u32 mb, *ptr; + + if (!CONFIG_KVMTOOL) + return; + + for (mb = 16; mb < max_mb_32bit; mb++) { + ptr = (void*)(mb * 1024 * 1024 - 4); + *ptr = mb; + } + for (mb = 16; mb < max_mb_32bit; mb++) { + ptr = (void*)(mb * 1024 * 1024 - 4); + if (*ptr != mb) + break; + RamSize = mb * 1024 * 1024; + } + + dprintf(1,"kvmtool: probed %d MB low RAM.\n", + RamSize / (1024 * 1024)); + e820_add(0, RamSize, E820_RAM); +} diff --git a/src/post.c b/src/post.c index f93106a1c9..f7268ecb8f 100644 --- a/src/post.c +++ b/src/post.c @@ -147,6 +147,7 @@ platform_hardware_setup(void)
// Platform specific setup qemu_platform_setup(); + kvmtool_platform_setup(); coreboot_platform_setup();
// Setup timers and periodic clock interrupt @@ -307,6 +308,7 @@ dopost(void)
// Detect ram and setup internal malloc. qemu_preinit(); + kvmtool_preinit(); coreboot_preinit(); malloc_preinit();
@@ -320,7 +322,7 @@ dopost(void) void VISIBLE32FLAT handle_post(void) { - if (!CONFIG_QEMU && !CONFIG_COREBOOT) + if (!CONFIG_QEMU && !CONFIG_COREBOOT && !CONFIG_KVMTOOL) return;
serial_debug_preinit(); diff --git a/src/sercon.c b/src/sercon.c index 5d27051efb..0b7722ec08 100644 --- a/src/sercon.c +++ b/src/sercon.c @@ -517,6 +517,8 @@ void sercon_setup(void) u16 addr;
addr = romfile_loadint("etc/sercon-port", 0); + if (!addr && CONFIG_KVMTOOL) + addr = 0x3f8; if (!addr) return; dprintf(1, "sercon: using ioport 0x%x\n", addr); diff --git a/src/Kconfig b/src/Kconfig index 00108057d7..985594c51b 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -25,6 +25,11 @@ choice Configure to be used by EFI firmware as Compatibility Support module (CSM) to provide legacy BIOS services.
+ config KVMTOOL + bool "Build for kvmtool" + help + Configure for an emulated machine (kvmtool). + endchoice
config QEMU_HARDWARE @@ -61,6 +66,7 @@ endchoice Support an interactive boot menu at end of post. config BOOTSPLASH depends on BOOTMENU + depends on !KVMTOOL bool "Graphical boot splash screen" default y help @@ -124,6 +130,7 @@ endchoice
config ROM_SIZE int "ROM size (in KB)" + default 128 if KVMTOOL default 0 help Set the ROM size. Say '0' here to make seabios figure the @@ -138,6 +145,7 @@ endmenu menu "Hardware support" config ATA depends on DRIVES + depends on !KVMTOOL bool "ATA controllers" default y help @@ -156,24 +164,26 @@ menu "Hardware support" Use 32bit PIO accesses on ATA (minor optimization on PCI transfers). config AHCI depends on DRIVES + depends on !KVMTOOL bool "AHCI controllers" default y help Support for AHCI disk code. config SDCARD depends on DRIVES + depends on !KVMTOOL bool "SD controllers" default y help Support for SD cards on PCI host controllers. config VIRTIO_BLK - depends on DRIVES && QEMU_HARDWARE + depends on DRIVES && (QEMU_HARDWARE || KVMTOOL) bool "virtio-blk controllers" default y help Support boot from virtio-blk storage. config VIRTIO_SCSI - depends on DRIVES && QEMU_HARDWARE + depends on DRIVES && (QEMU_HARDWARE || KVMTOOL) bool "virtio-scsi controllers" default y help @@ -204,6 +214,7 @@ menu "Hardware support" Support boot from qemu-emulated lsi53c895a scsi storage. config MEGASAS depends on DRIVES + depends on !KVMTOOL bool "LSI MegaRAID SAS controllers" default y help @@ -216,6 +227,7 @@ menu "Hardware support" Support boot from LSI MPT Fusion scsi storage. config FLOPPY depends on DRIVES && HARDWARE_IRQ + depends on !KVMTOOL bool "Floppy controller" default y help @@ -229,6 +241,7 @@ menu "Hardware support" QEMU fw_cfg. config NVME depends on DRIVES + depends on !KVMTOOL bool "NVMe controllers" default y help @@ -236,6 +249,7 @@ menu "Hardware support"
config PS2PORT depends on KEYBOARD || MOUSE + depends on !KVMTOOL bool "PS/2 port" default y help @@ -243,6 +257,7 @@ menu "Hardware support"
config USB bool "USB" + depends on !KVMTOOL default y help Support USB devices. @@ -355,6 +370,7 @@ menu "Hardware support" Initialize the Memory Type Range Registers (on emulators). config PMTIMER bool "Support ACPI timer" + depends on !KVMTOOL default y help Detect and use the ACPI timer for timekeeping. @@ -404,6 +420,7 @@ menu "BIOS interfaces" config OPTIONROMS bool "Option ROMS" default y + depends on !KVMTOOL help Support finding and running option roms during POST. config PMM @@ -466,6 +483,7 @@ menu "BIOS interfaces"
config TCGBIOS depends on S3_RESUME + depends on !KVMTOOL bool "TPM support and TCG BIOS extensions" default y help @@ -493,6 +511,7 @@ menu "BIOS Tables" sometimes called DMI. config ACPI bool "ACPI" + depends on !KVMTOOL default y help Support generation of ACPI tables.
kvmtool uses MMIO not IO bar for legacy virtio. Doesn't match spec. But easy to handle given we have the code anyway for virtio 1.0 which allows both MMIO and IO.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/hw/virtio-pci.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/src/hw/virtio-pci.c b/src/hw/virtio-pci.c index 96f9c6b012..1a3acc9962 100644 --- a/src/hw/virtio-pci.c +++ b/src/hw/virtio-pci.c @@ -488,10 +488,18 @@ void vp_init_simple(struct vp_device *vp, struct pci_device *pci) } else { dprintf(1, "pci dev %pP using legacy (0.9.5) virtio mode\n", pci); vp->legacy.bar = 0; - vp->legacy.ioaddr = pci_enable_iobar(pci, PCI_BASE_ADDRESS_0); - if (!vp->legacy.ioaddr) - return; - vp->legacy.mode = VP_ACCESS_IO; + addr = pci_config_readl(pci->bdf, PCI_BASE_ADDRESS_0); + if (addr & PCI_BASE_ADDRESS_SPACE_IO) { + vp->legacy.ioaddr = addr & PCI_BASE_ADDRESS_IO_MASK; + if (!vp->legacy.ioaddr) + return; + vp->legacy.mode = VP_ACCESS_IO; + } else { + vp->legacy.ioaddr = addr & PCI_BASE_ADDRESS_MEM_MASK; + if (!vp->legacy.ioaddr) + return; + vp->legacy.mode = VP_ACCESS_MMIO; + } }
vp_reset(vp);
Queues have 256 entries on kvmtool, support that. Needs more memory for virtqueues now. But with the move to 32bit drivers for virtio this should not be much of an issue any more.
Known problems (probably kvmtool bugs): * Must bump to 260 entries to make things actually work, otherwise kvmtool segfaults. Oops. * Linux kernel doesn't find virtio-blk devices after seabios initialized them. virtio device reset not working properly?
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/hw/virtio-ring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/hw/virtio-ring.h b/src/hw/virtio-ring.h index 8604a01140..cb8da89cab 100644 --- a/src/hw/virtio-ring.h +++ b/src/hw/virtio-ring.h @@ -20,7 +20,7 @@ #define VIRTIO_F_VERSION_1 32 #define VIRTIO_F_IOMMU_PLATFORM 33
-#define MAX_QUEUE_NUM (128) +#define MAX_QUEUE_NUM (260)
#define VRING_DESC_F_NEXT 1 #define VRING_DESC_F_WRITE 2
On 02/11/17 15:50, Gerd Hoffmann wrote:
Queues have 256 entries on kvmtool, support that. Needs more memory for virtqueues now. But with the move to 32bit drivers for virtio this should not be much of an issue any more.
Known problems (probably kvmtool bugs):
- Must bump to 260 entries to make things actually work, otherwise kvmtool segfaults. Oops.
You mean setting MAX_QUEUE_NUM to 256 instead of 260 in seabios causes a kvmtool crash? Do you have any more detail on the segfault?
One problem I can see is kvmtool's handling of used/avail event indexes. net and blk devices call virtio_queue__should_signal which reads event indexes without of checking if VIRTIO_F_EVENT_IDX was negotiated first. Since seabios doesn't use the event indexes, this would lead to missing signals, but not a segfault.
- Linux kernel doesn't find virtio-blk devices after seabios initialized them. virtio device reset not working properly?
No, reset isn't implemented at all... A lot of work is required to properly clear the state and threads of each device.
Thanks, Jean
On Fri, 2017-11-03 at 13:49 +0000, Jean-Philippe Brucker wrote:
On 02/11/17 15:50, Gerd Hoffmann wrote:
Queues have 256 entries on kvmtool, support that. Needs more memory for virtqueues now. But with the move to 32bit drivers for virtio this should not be much of an issue any more.
Known problems (probably kvmtool bugs): * Must bump to 260 entries to make things actually work, otherwise kvmtool segfaults. Oops.
You mean setting MAX_QUEUE_NUM to 256 instead of 260 in seabios causes a kvmtool crash?
yes.
Do you have any more detail on the segfault?
Ok, lets have a look with gdb ...
Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7f81caf3c700 (LWP 20234)] virt_queue__get_head_iov (vq=vq@entry=0x7f82576be0a0, iov=iov@entry=0x7 f826770aae0, out=out@entry=0x7f826770bae0, in=in@entry=0x7f826770bae2, head=65104, kvm=kvm@entry=0x246eee0) at virtio/core.c:105 105 *out = *in = 0; (gdb) bt #0 0x000000000040c91b in virt_queue__get_head_iov (vq=vq@entry=0x7f825 76be0a0, iov=iov@entry=0x7f826770aae0, out=out@entry=0x7f826770bae0, in =in@entry=0x7f826770bae2, head=65104, kvm=kvm@entry=0x246eee0) at virtio/core.c:105 #1 0x000000000040bbf7 in virtio_blk_thread (bdev=0x7f82576be010, vq=0x7f82576be0a0, kvm=0x246eee0) at virtio/blk.c:134 #2 0x000000000040bbf7 in virtio_blk_thread (dev=0x7f82576be010) at virtio/blk.c:208 #3 0x00007f82571c6e25 in start_thread () at /lib64/libpthread.so.0 #4 0x00007f82543b134d in clone () at /lib64/libc.so.6 (gdb) print *vq $1 = {vring = {num = 256, desc = 0x7f824cf3e000, avail = 0x7f824cf3f000, used = 0x7f824cf40000}, pfn = 524285, last_avail_idx = 263, last_used_signalled = 1, endian = 1}
last_avail_idx looks bogus ...
Since seabios doesn't use the event indexes, this would lead to missing signals, but not a segfault.
seabios polls anyway, so it doesn't need signals.
* Linux kernel doesn't find virtio-blk devices after seabios initialized them. virtio device reset not working properly?
No, reset isn't implemented at all... A lot of work is required to properly clear the state and threads of each device.
Hmm. That is required for any kind of boot loader support though.
/me wonders what the kvmtool --firmware switch is good for then if a direct kernel boot is apparently the only thing which actually works.
cheers, Gerd
On 03/11/17 15:34, Gerd Hoffmann wrote:
On Fri, 2017-11-03 at 13:49 +0000, Jean-Philippe Brucker wrote:
On 02/11/17 15:50, Gerd Hoffmann wrote:
Queues have 256 entries on kvmtool, support that. Needs more memory for virtqueues now. But with the move to 32bit drivers for virtio this should not be much of an issue any more.
Known problems (probably kvmtool bugs): * Must bump to 260 entries to make things actually work, otherwise kvmtool segfaults. Oops.
You mean setting MAX_QUEUE_NUM to 256 instead of 260 in seabios causes a kvmtool crash?
yes.
Do you have any more detail on the segfault?
Ok, lets have a look with gdb ...
Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7f81caf3c700 (LWP 20234)] virt_queue__get_head_iov (vq=vq@entry=0x7f82576be0a0, iov=iov@entry=0x7 f826770aae0, out=out@entry=0x7f826770bae0, in=in@entry=0x7f826770bae2, head=65104, kvm=kvm@entry=0x246eee0) at virtio/core.c:105 105 *out = *in = 0; (gdb) bt #0 0x000000000040c91b in virt_queue__get_head_iov (vq=vq@entry=0x7f825 76be0a0, iov=iov@entry=0x7f826770aae0, out=out@entry=0x7f826770bae0, in =in@entry=0x7f826770bae2, head=65104, kvm=kvm@entry=0x246eee0) at virtio/core.c:105 #1 0x000000000040bbf7 in virtio_blk_thread (bdev=0x7f82576be010, vq=0x7f82576be0a0, kvm=0x246eee0) at virtio/blk.c:134 #2 0x000000000040bbf7 in virtio_blk_thread (dev=0x7f82576be010) at virtio/blk.c:208 #3 0x00007f82571c6e25 in start_thread () at /lib64/libpthread.so.0 #4 0x00007f82543b134d in clone () at /lib64/libc.so.6 (gdb) print *vq $1 = {vring = {num = 256, desc = 0x7f824cf3e000, avail = 0x7f824cf3f000, used = 0x7f824cf40000}, pfn = 524285, last_avail_idx = 263, last_used_signalled = 1, endian = 1}
last_avail_idx looks bogus ...
It follows avail->idx, which wraps naturally at 65536 (regardless of the ring size). But head=65104 seems bogus, it should be an index into the descriptor table. So either seabios puts that value in the avail ring, or kvmtool reads some uninitialized ring entry. I haven't found how we can get into this situation yet.
Thanks, Jean
On 03/11/17 19:42, Jean-Philippe Brucker wrote:
On 03/11/17 15:34, Gerd Hoffmann wrote:
On Fri, 2017-11-03 at 13:49 +0000, Jean-Philippe Brucker wrote:
On 02/11/17 15:50, Gerd Hoffmann wrote:
Queues have 256 entries on kvmtool, support that. Needs more memory for virtqueues now. But with the move to 32bit drivers for virtio this should not be much of an issue any more.
Known problems (probably kvmtool bugs): * Must bump to 260 entries to make things actually work, otherwise kvmtool segfaults. Oops.
You mean setting MAX_QUEUE_NUM to 256 instead of 260 in seabios causes a kvmtool crash?
yes.
Do you have any more detail on the segfault?
Ok, lets have a look with gdb ...
Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7f81caf3c700 (LWP 20234)] virt_queue__get_head_iov (vq=vq@entry=0x7f82576be0a0, iov=iov@entry=0x7 f826770aae0, out=out@entry=0x7f826770bae0, in=in@entry=0x7f826770bae2, head=65104, kvm=kvm@entry=0x246eee0) at virtio/core.c:105 105 *out = *in = 0; (gdb) bt #0 0x000000000040c91b in virt_queue__get_head_iov (vq=vq@entry=0x7f825 76be0a0, iov=iov@entry=0x7f826770aae0, out=out@entry=0x7f826770bae0, in =in@entry=0x7f826770bae2, head=65104, kvm=kvm@entry=0x246eee0) at virtio/core.c:105 #1 0x000000000040bbf7 in virtio_blk_thread (bdev=0x7f82576be010, vq=0x7f82576be0a0, kvm=0x246eee0) at virtio/blk.c:134 #2 0x000000000040bbf7 in virtio_blk_thread (dev=0x7f82576be010) at virtio/blk.c:208 #3 0x00007f82571c6e25 in start_thread () at /lib64/libpthread.so.0 #4 0x00007f82543b134d in clone () at /lib64/libc.so.6 (gdb) print *vq $1 = {vring = {num = 256, desc = 0x7f824cf3e000, avail = 0x7f824cf3f000, used = 0x7f824cf40000}, pfn = 524285, last_avail_idx = 263, last_used_signalled = 1, endian = 1}
last_avail_idx looks bogus ...
It follows avail->idx, which wraps naturally at 65536 (regardless of the ring size). But head=65104 seems bogus, it should be an index into the descriptor table. So either seabios puts that value in the avail ring, or kvmtool reads some uninitialized ring entry. I haven't found how we can get into this situation yet.
When MAX_QUEUE_NUM = queue size, SeaBIOS doesn't reserve space for used/avail events, so when kvmtool writes to what it thinks is the avail event (a u16 located right after the used ring), it actually overwrites the next member of SeaBIOS' vring_virtqueue, which is vring->num. This results in further corruption as soon as the value written in vring->num exceeds queue size.
Thanks for reporting it, I'll send a fix for kvmtool.
Thanks, Jean