This patch adds native support for booting from virtio disks to Seabios.
Signed-off-by: Gleb Natapov gleb@redhat.com ---
Changelog: v1->v2: - free memory in case of vq initialization error. - change license of virtio ring/pci to LGPLv3 with permission of Laurent Vivier (aka the author).
diff --git a/Makefile b/Makefile index 327a1bf..d0b8881 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,8 @@ OUT=out/ SRCBOTH=misc.c pmm.c stacks.c output.c util.c block.c floppy.c ata.c mouse.c \ kbd.c pci.c serial.c clock.c pic.c cdrom.c ps2port.c smp.c resume.c \ pnpbios.c pirtable.c vgahooks.c ramdisk.c pcibios.c blockcmd.c \ - usb.c usb-uhci.c usb-ohci.c usb-ehci.c usb-hid.c usb-msc.c + usb.c usb-uhci.c usb-ohci.c usb-ehci.c usb-hid.c usb-msc.c \ + virtio-ring.c virtio-pci.c virtio-blk.c SRC16=$(SRCBOTH) system.c disk.c apm.c font.c SRC32FLAT=$(SRCBOTH) post.c shadow.c memmap.c coreboot.c boot.c \ acpi.c smm.c mptable.c smbios.c pciinit.c optionroms.c mtrr.c \ diff --git a/src/block.c b/src/block.c index ddf441f..b6b1902 100644 --- a/src/block.c +++ b/src/block.c @@ -11,6 +11,7 @@ #include "util.h" // dprintf #include "ata.h" // process_ata_op #include "usb-msc.h" // process_usb_op +#include "virtio-blk.h" // process_virtio_op
struct drives_s Drives VAR16VISIBLE;
@@ -289,6 +290,8 @@ process_op(struct disk_op_s *op) return process_cdemu_op(op); case DTYPE_USB: return process_usb_op(op); + case DTYPE_VIRTIO: + return process_virtio_op(op); default: op->count = 0; return DISK_RET_EPARAM; diff --git a/src/config.h b/src/config.h index b101174..ad569c6 100644 --- a/src/config.h +++ b/src/config.h @@ -136,6 +136,9 @@ #define CONFIG_SUBMODEL_ID 0x00 #define CONFIG_BIOS_REVISION 0x01
+// Support boot from virtio storage +#define CONFIG_VIRTIO_BLK 1 + // Various memory addresses used by the code. #define BUILD_STACK_ADDR 0x7000 #define BUILD_S3RESUME_STACK_ADDR 0x1000 diff --git a/src/disk.h b/src/disk.h index 0cd1b74..9e5b083 100644 --- a/src/disk.h +++ b/src/disk.h @@ -197,6 +197,7 @@ struct drive_s { #define DTYPE_RAMDISK 0x04 #define DTYPE_CDEMU 0x05 #define DTYPE_USB 0x06 +#define DTYPE_VIRTIO 0x07
#define MAXDESCSIZE 80
diff --git a/src/pci_ids.h b/src/pci_ids.h index 1800f1d..e1cded2 100644 --- a/src/pci_ids.h +++ b/src/pci_ids.h @@ -2605,3 +2605,6 @@ #define PCI_DEVICE_ID_RME_DIGI32 0x9896 #define PCI_DEVICE_ID_RME_DIGI32_PRO 0x9897 #define PCI_DEVICE_ID_RME_DIGI32_8 0x9898 + +#define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 +#define PCI_DEVICE_ID_VIRTIO_BLK 0x1001 diff --git a/src/post.c b/src/post.c index 638b0f7..25535e2 100644 --- a/src/post.c +++ b/src/post.c @@ -23,6 +23,7 @@ #include "smbios.h" // smbios_init #include "paravirt.h" // qemu_cfg_port_probe #include "ps2port.h" // ps2port_setup +#include "virtio-blk.h" // virtio_blk_setup
void __set_irq(int vector, void *loc) @@ -184,6 +185,7 @@ init_hw(void) floppy_setup(); ata_setup(); ramdisk_setup(); + virtio_blk_setup(); }
// Main setup code. diff --git a/src/virtio-blk.c b/src/virtio-blk.c new file mode 100644 index 0000000..a41c336 --- /dev/null +++ b/src/virtio-blk.c @@ -0,0 +1,155 @@ +// Virtio blovl boot support. +// +// Copyright (C) 2010 Red Hat Inc. +// +// Authors: +// Gleb Natapov gnatapov@redhat.com +// +// This file may be distributed under the terms of the GNU LGPLv3 license. + +#include "util.h" // dprintf +#include "pci.h" // foreachpci +#include "config.h" // CONFIG_* +#include "virtio-pci.h" +#include "virtio-blk.h" +#include "disk.h" + +struct virtiodrive_s { + struct drive_s drive; + struct vring_virtqueue *vq; + u16 ioaddr; +}; + +static int +virtio_blk_read(struct disk_op_s *op) +{ + struct virtiodrive_s *vdrive_g = + container_of(op->drive_g, struct virtiodrive_s, drive); + struct vring_virtqueue *vq = GET_GLOBAL(vdrive_g->vq); + struct virtio_blk_outhdr hdr = { + .type = VIRTIO_BLK_T_IN, + .ioprio = 0, + .sector = op->lba, + }; + u8 status = VIRTIO_BLK_S_UNSUPP; + struct vring_list sg[] = { + { + .addr = MAKE_FLATPTR(GET_SEG(SS), &hdr), + .length = sizeof(hdr), + }, + { + .addr = op->buf_fl, + .length = GET_GLOBAL(vdrive_g->drive.blksize) * op->count, + }, + { + .addr = MAKE_FLATPTR(GET_SEG(SS), &status), + .length = sizeof(status), + }, + }; + + /* Add to virtqueue and kick host */ + vring_add_buf(vq, sg, 1, 2, 0, 0); + vring_kick(GET_GLOBAL(vdrive_g->ioaddr), vq, 1); + + /* Wait for reply */ + while (!vring_more_used(vq)) + udelay(5); + + /* Reclaim virtqueue element */ + vring_get_buf(vq, NULL); + return status == VIRTIO_BLK_S_OK ? DISK_RET_SUCCESS : DISK_RET_EBADTRACK; +} + +int +process_virtio_op(struct disk_op_s *op) +{ + switch (op->command) { + case CMD_READ: + return virtio_blk_read(op); + case CMD_FORMAT: + case CMD_WRITE: + return DISK_RET_EWRITEPROTECT; + case CMD_RESET: + case CMD_ISREADY: + case CMD_VERIFY: + case CMD_SEEK: + return DISK_RET_SUCCESS; + default: + op->count = 0; + return DISK_RET_EPARAM; + } +} + +void +virtio_blk_setup(void) +{ + ASSERT32FLAT(); + if (! CONFIG_VIRTIO_BLK) + return; + + dprintf(3, "init virtio-blk\n"); + + int bdf, max; + u32 id = PCI_VENDOR_ID_REDHAT_QUMRANET | (PCI_DEVICE_ID_VIRTIO_BLK << 16); + foreachpci(bdf, max) { + u32 v = pci_config_readl(bdf, PCI_VENDOR_ID); + if (v != id) + continue; + dprintf(3, "found virtio-blk at %x:%x\n", pci_bdf_to_bus(bdf), + pci_bdf_to_dev(bdf)); + char *desc = malloc_tmphigh(MAXDESCSIZE); + struct virtiodrive_s *vdrive_g = malloc_fseg(sizeof(*vdrive_g)); + struct vring_virtqueue *vq = malloc_low(sizeof(*vq)); + if (!vdrive_g || !desc || !vq) { + warn_noalloc(); + return; + } + memset(vdrive_g, 0, sizeof(*vdrive_g)); + vdrive_g->drive.type = DTYPE_VIRTIO; + vdrive_g->drive.cntl_id = bdf; + vdrive_g->vq = vq; + + u16 ioaddr = pci_config_readl(bdf, PCI_BASE_ADDRESS_0) & + PCI_BASE_ADDRESS_IO_MASK; + + vdrive_g->ioaddr = ioaddr; + + vp_reset(ioaddr); + vp_set_status(ioaddr, VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER ); + + if (vp_find_vq(ioaddr, 0, vdrive_g->vq) < 0 ) { + free(vdrive_g); + free(desc); + free(vq); + dprintf(1, "fail to find vq for virtio-blk %x:%x\n", + pci_bdf_to_bus (bdf), pci_bdf_to_dev(bdf)); + continue; + } + + struct virtio_blk_config cfg; + vp_get(ioaddr, 0, &cfg, sizeof(cfg)); + + vdrive_g->drive.blksize = cfg.blk_size; + vdrive_g->drive.sectors = cfg.capacity; + dprintf(3, "virtio-blk %x:%x blksize=%d sectors=%u\n", + pci_bdf_to_bus (bdf), pci_bdf_to_dev(bdf), + vdrive_g->drive.blksize, (u32)vdrive_g->drive.sectors); + + vdrive_g->drive.pchs.cylinders = cfg.cylinders; + vdrive_g->drive.pchs.heads = cfg.heads; + vdrive_g->drive.pchs.spt = cfg.sectors; + + setup_translation(&vdrive_g->drive); + add_bcv_internal(&vdrive_g->drive); + + snprintf(desc, MAXDESCSIZE, "Virtio disk PCI:%x:%x", + pci_bdf_to_bus(bdf), pci_bdf_to_dev(bdf)); + + vdrive_g->drive.desc = desc; + + vp_set_status(ioaddr, VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK); + } +} + diff --git a/src/virtio-blk.h b/src/virtio-blk.h new file mode 100644 index 0000000..3369ea4 --- /dev/null +++ b/src/virtio-blk.h @@ -0,0 +1,40 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +struct virtio_blk_config +{ + u64 capacity; + u32 size_max; + u32 seg_max; + u16 cylinders; + u8 heads; + u8 sectors; + u32 blk_size; + u8 physical_block_exp; + u8 alignment_offset; + u16 min_io_size; + u32 opt_io_size; +} __attribute__((packed)); + +/* These two define direction. */ +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1 + +/* This is the first element of the read scatter-gather list. */ +struct virtio_blk_outhdr { + /* VIRTIO_BLK_T* */ + u32 type; + /* io priority. */ + u32 ioprio; + /* Sector (ie. 512 byte offset) */ + u64 sector; +}; + +#define VIRTIO_BLK_S_OK 0 +#define VIRTIO_BLK_S_IOERR 1 +#define VIRTIO_BLK_S_UNSUPP 2 + +int process_virtio_op(struct disk_op_s *op); +void virtio_blk_setup(void); + +#endif /* _VIRTIO_BLK_H */ diff --git a/src/virtio-pci.c b/src/virtio-pci.c new file mode 100644 index 0000000..e171ea3 --- /dev/null +++ b/src/virtio-pci.c @@ -0,0 +1,67 @@ +/* virtio-pci.c - pci interface for virtio interface + * + * (c) Copyright 2008 Bull S.A.S. + * + * Author: Laurent Vivier Laurent.Vivier@bull.net + * + * some parts from Linux Virtio PCI driver + * + * Copyright IBM Corp. 2007 + * Authors: Anthony Liguori aliguori@us.ibm.com + * + * Adopted for Seabios: Gleb Natapov gleb@redhat.com + * + * This work is licensed under the terms of the GNU LGPLv3 + * See the COPYING file in the top-level directory. + */ + +#include "virtio-ring.h" +#include "virtio-pci.h" + +int vp_find_vq(unsigned int ioaddr, int queue_index, + struct vring_virtqueue *vq) +{ + struct vring * vr = &vq->vring; + u16 num; + + ASSERT32FLAT(); + /* select the queue */ + + outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_SEL); + + /* check if the queue is available */ + + num = inw(ioaddr + VIRTIO_PCI_QUEUE_NUM); + if (!num) { + dprintf(1, "ERROR: queue size is 0\n"); + return -1; + } + + if (num > MAX_QUEUE_NUM) { + dprintf(1, "ERROR: queue size %d > %d\n", num, MAX_QUEUE_NUM); + return -1; + } + + /* check if the queue is already active */ + + if (inl(ioaddr + VIRTIO_PCI_QUEUE_PFN)) { + dprintf(1, "ERROR: queue already active\n"); + return -1; + } + + vq->queue_index = queue_index; + + /* initialize the queue */ + + vring_init(vr, num, (unsigned char*)&vq->queue); + + /* activate the queue + * + * NOTE: vr->desc is initialized by vring_init() + */ + + outl((unsigned long)virt_to_phys(vr->desc) >> PAGE_SHIFT, + ioaddr + VIRTIO_PCI_QUEUE_PFN); + + return num; +} diff --git a/src/virtio-pci.h b/src/virtio-pci.h new file mode 100644 index 0000000..6932036 --- /dev/null +++ b/src/virtio-pci.h @@ -0,0 +1,97 @@ +#ifndef _VIRTIO_PCI_H +#define _VIRTIO_PCI_H + +/* A 32-bit r/o bitmask of the features supported by the host */ +#define VIRTIO_PCI_HOST_FEATURES 0 + +/* A 32-bit r/w bitmask of features activated by the guest */ +#define VIRTIO_PCI_GUEST_FEATURES 4 + +/* A 32-bit r/w PFN for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_PFN 8 + +/* A 16-bit r/o queue size for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_NUM 12 + +/* A 16-bit r/w queue selector */ +#define VIRTIO_PCI_QUEUE_SEL 14 + +/* A 16-bit r/w queue notifier */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 + +/* An 8-bit device status register. */ +#define VIRTIO_PCI_STATUS 18 + +/* An 8-bit r/o interrupt status register. Reading the value will return the + * current contents of the ISR and will also clear it. This is effectively + * a read-and-acknowledge. */ +#define VIRTIO_PCI_ISR 19 + +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 + +/* The remaining space is defined by each driver as the per-driver + * configuration space */ +#define VIRTIO_PCI_CONFIG 20 + +/* Virtio ABI version, this must match exactly */ +#define VIRTIO_PCI_ABI_VERSION 0 + +static inline u32 vp_get_features(unsigned int ioaddr) +{ + return inl(ioaddr + VIRTIO_PCI_HOST_FEATURES); +} + +static inline void vp_set_features(unsigned int ioaddr, u32 features) +{ + outl(features, ioaddr + VIRTIO_PCI_GUEST_FEATURES); +} + +static inline void vp_get(unsigned int ioaddr, unsigned offset, + void *buf, unsigned len) +{ + u8 *ptr = buf; + unsigned i; + + for (i = 0; i < len; i++) + ptr[i] = inb(ioaddr + VIRTIO_PCI_CONFIG + offset + i); +} + +static inline u8 vp_get_status(unsigned int ioaddr) +{ + return inb(ioaddr + VIRTIO_PCI_STATUS); +} + +static inline void vp_set_status(unsigned int ioaddr, u8 status) +{ + if (status == 0) /* reset */ + return; + outb(status, ioaddr + VIRTIO_PCI_STATUS); +} + + +static inline void vp_reset(unsigned int ioaddr) +{ + outb(0, ioaddr + VIRTIO_PCI_STATUS); + (void)inb(ioaddr + VIRTIO_PCI_ISR); +} + +static inline void vp_notify(unsigned int ioaddr, int queue_index) +{ + outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); +} + +static inline void vp_del_vq(unsigned int ioaddr, int queue_index) +{ + /* select the queue */ + + outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_SEL); + + /* deactivate the queue */ + + outl(0, ioaddr + VIRTIO_PCI_QUEUE_PFN); +} + +int vp_find_vq(unsigned int ioaddr, int queue_index, + struct vring_virtqueue *vq); +#endif /* _VIRTIO_PCI_H_ */ diff --git a/src/virtio-ring.c b/src/virtio-ring.c new file mode 100644 index 0000000..f4a2efe --- /dev/null +++ b/src/virtio-ring.c @@ -0,0 +1,152 @@ +/* virtio-pci.c - virtio ring management + * + * (c) Copyright 2008 Bull S.A.S. + * + * Author: Laurent Vivier Laurent.Vivier@bull.net + * + * some parts from Linux Virtio Ring + * + * Copyright Rusty Russell IBM Corporation 2007 + * + * Adopted for Seabios: Gleb Natapov gleb@redhat.com + * + * This work is licensed under the terms of the GNU LGPLv3 + * See the COPYING file in the top-level directory. + * + * + */ + +#include "virtio-ring.h" +#include "virtio-pci.h" + +#define BUG() do { \ + dprintf(1, "BUG: failure at %s:%d/%s()!\n", \ + __FILE__, __LINE__, __FUNCTION__); \ + while(1); \ + } while (0) +#define BUG_ON(condition) do { if (condition) BUG(); } while (0) + +/* + * vring_more_used + * + * is there some used buffers ? + * + */ + +int vring_more_used(struct vring_virtqueue *vq) +{ + struct vring_used *used = GET_FLATPTR(vq->vring.used); + wmb(); + return GET_FLATPTR(vq->last_used_idx) != GET_FLATPTR(used->idx); +} + +/* + * vring_free + * + * put at the begin of the free list the current desc[head] + */ + +void vring_detach(struct vring_virtqueue *vq, unsigned int head) +{ + struct vring *vr = &vq->vring; + struct vring_desc *desc = GET_FLATPTR(vr->desc); + unsigned int i; + + /* find end of given descriptor */ + + i = head; + while (GET_FLATPTR(desc[i].flags) & VRING_DESC_F_NEXT) + i = GET_FLATPTR(desc[i].next); + + /* link it with free list and point to it */ + + SET_FLATPTR(desc[i].next, GET_FLATPTR(vq->free_head)); + wmb(); + SET_FLATPTR(vq->free_head, head); +} + +/* + * vring_get_buf + * + * get a buffer from the used list + * + */ + +int vring_get_buf(struct vring_virtqueue *vq, unsigned int *len) +{ + struct vring *vr = &vq->vring; + struct vring_used_elem *elem; + struct vring_used *used = GET_FLATPTR(vq->vring.used); + u32 id; + int ret; + +// BUG_ON(!vring_more_used(vq)); + + elem = &used->ring[GET_FLATPTR(vq->last_used_idx) % GET_FLATPTR(vr->num)]; + wmb(); + id = GET_FLATPTR(elem->id); + if (len != NULL) + *len = GET_FLATPTR(elem->len); + + ret = GET_FLATPTR(vq->vdata[id]); + + vring_detach(vq, id); + + SET_FLATPTR(vq->last_used_idx, GET_FLATPTR(vq->last_used_idx) + 1); + + return ret; +} + +void vring_add_buf(struct vring_virtqueue *vq, + struct vring_list list[], + unsigned int out, unsigned int in, + int index, int num_added) +{ + struct vring *vr = &vq->vring; + int i, av, head, prev; + struct vring_desc *desc = GET_FLATPTR(vr->desc); + struct vring_avail *avail = GET_FLATPTR(vr->avail); + + BUG_ON(out + in == 0); + + prev = 0; + head = GET_FLATPTR(vq->free_head); + for (i = head; out; i = GET_FLATPTR(desc[i].next), out--) { + SET_FLATPTR(desc[i].flags, VRING_DESC_F_NEXT); + SET_FLATPTR(desc[i].addr, (u64)virt_to_phys(list->addr)); + SET_FLATPTR(desc[i].len, list->length); + prev = i; + list++; + } + for ( ; in; i = GET_FLATPTR(desc[i].next), in--) { + SET_FLATPTR(desc[i].flags, VRING_DESC_F_NEXT|VRING_DESC_F_WRITE); + SET_FLATPTR(desc[i].addr, (u64)virt_to_phys(list->addr)); + SET_FLATPTR(desc[i].len, list->length); + prev = i; + list++; + } + SET_FLATPTR(desc[prev].flags, + GET_FLATPTR(desc[prev].flags) & ~VRING_DESC_F_NEXT); + + SET_FLATPTR(vq->free_head, i); + + SET_FLATPTR(vq->vdata[head], index); + + av = (GET_FLATPTR(avail->idx) + num_added) % GET_FLATPTR(vr->num); + SET_FLATPTR(avail->ring[av], head); + wmb(); +} + +void vring_kick(unsigned int ioaddr, struct vring_virtqueue *vq, int num_added) +{ + struct vring *vr = &vq->vring; + struct vring_avail *avail = GET_FLATPTR(vr->avail); + struct vring_used *used = GET_FLATPTR(vq->vring.used); + + wmb(); + SET_FLATPTR(avail->idx, GET_FLATPTR(avail->idx) + num_added); + + mb(); + if (!(GET_FLATPTR(used->flags) & VRING_USED_F_NO_NOTIFY)) + vp_notify(ioaddr, GET_FLATPTR(vq->queue_index)); +} diff --git a/src/virtio-ring.h b/src/virtio-ring.h new file mode 100644 index 0000000..b97d572 --- /dev/null +++ b/src/virtio-ring.h @@ -0,0 +1,125 @@ +#ifndef _VIRTIO_RING_H +#define _VIRTIO_RING_H + +#define PAGE_SHIFT 12 +#define PAGE_MASK (PAGE_SIZE-1) + +#define virt_to_phys(v) (unsigned long)(v) +#define phys_to_virt(p) (void*)(p) +#define wmb() barrier() +#define mb() barrier() + +/* Status byte for guest to report progress, and synchronize features. */ +/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +/* We have found a driver for the device. */ +#define VIRTIO_CONFIG_S_DRIVER 2 +/* Driver has used its parts of the config, and is happy */ +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +/* We've given up on this device. */ +#define VIRTIO_CONFIG_S_FAILED 0x80 + +#define MAX_QUEUE_NUM (128) + +#define VRING_DESC_F_NEXT 1 +#define VRING_DESC_F_WRITE 2 + +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +#define VRING_USED_F_NO_NOTIFY 1 + +struct vring_desc +{ + u64 addr; + u32 len; + u16 flags; + u16 next; +}; + +struct vring_avail +{ + u16 flags; + u16 idx; + u16 ring[0]; +}; + +struct vring_used_elem +{ + u32 id; + u32 len; +}; + +struct vring_used +{ + u16 flags; + u16 idx; + struct vring_used_elem ring[]; +}; + +struct vring { + unsigned int num; + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; +}; + +#define vring_size(num) \ + (((((sizeof(struct vring_desc) * num) + \ + (sizeof(struct vring_avail) + sizeof(u16) * num)) \ + + PAGE_MASK) & ~PAGE_MASK) + \ + (sizeof(struct vring_used) + sizeof(struct vring_used_elem) * num)) + +typedef unsigned char virtio_queue_t[PAGE_MASK + vring_size(MAX_QUEUE_NUM)]; + +struct vring_virtqueue { + virtio_queue_t queue; + struct vring vring; + u16 free_head; + u16 last_used_idx; + u16 vdata[MAX_QUEUE_NUM]; + /* PCI */ + int queue_index; +}; + +struct vring_list { + char *addr; + unsigned int length; +}; + +static inline void vring_init(struct vring *vr, + unsigned int num, unsigned char *queue) +{ + unsigned int i; + unsigned long pa; + + ASSERT32FLAT(); + vr->num = num; + + /* physical address of desc must be page aligned */ + + pa = virt_to_phys(queue); + pa = (pa + PAGE_MASK) & ~PAGE_MASK; + vr->desc = phys_to_virt(pa); + + vr->avail = (struct vring_avail *)&vr->desc[num]; + + /* physical address of used must be page aligned */ + + pa = virt_to_phys(&vr->avail->ring[num]); + pa = (pa + PAGE_MASK) & ~PAGE_MASK; + vr->used = phys_to_virt(pa); + + for (i = 0; i < num - 1; i++) + vr->desc[i].next = i + 1; + vr->desc[i].next = 0; +} + +int vring_more_used(struct vring_virtqueue *vq); +void vring_detach(struct vring_virtqueue *vq, unsigned int head); +int vring_get_buf(struct vring_virtqueue *vq, unsigned int *len); +void vring_add_buf(struct vring_virtqueue *vq, struct vring_list list[], + unsigned int out, unsigned int in, + int index, int num_added); +void vring_kick(unsigned int ioaddr, struct vring_virtqueue *vq, int num_added); + +#endif /* _VIRTIO_RING_H_ */
-- Gleb.
diff --git a/src/virtio-blk.c b/src/virtio-blk.c new file mode 100644 index 0000000..a41c336 --- /dev/null +++ b/src/virtio-blk.c @@ -0,0 +1,155 @@ +// Virtio blovl boot support.
Just noticed the "blovl" typo.
- char *desc = malloc_tmphigh(MAXDESCSIZE);
- struct virtiodrive_s *vdrive_g = malloc_fseg(sizeof(*vdrive_g));
- struct vring_virtqueue *vq = malloc_low(sizeof(*vq));
- if (!vdrive_g || !desc || !vq) {
- warn_noalloc();
- return;
- }
This error return can still leak.
Stefan
On Mon, May 10, 2010 at 09:25:20AM +0100, Stefan Hajnoczi wrote:
diff --git a/src/virtio-blk.c b/src/virtio-blk.c new file mode 100644 index 0000000..a41c336 --- /dev/null +++ b/src/virtio-blk.c @@ -0,0 +1,155 @@ +// Virtio blovl boot support.
Just noticed the "blovl" typo.
- char *desc = malloc_tmphigh(MAXDESCSIZE);
- struct virtiodrive_s *vdrive_g = malloc_fseg(sizeof(*vdrive_g));
- struct vring_virtqueue *vq = malloc_low(sizeof(*vq));
- if (!vdrive_g || !desc || !vq) {
- warn_noalloc();
- return;
- }
This error return can still leak.
Oh Gosh, programming is hard. Why don't we write bios in python?
-- Gleb.
On 05/10/2010 03:11 AM, Gleb Natapov wrote:
This patch adds native support for booting from virtio disks to Seabios.
Signed-off-by: Gleb Natapovgleb@redhat.com
A related problem that I think we need to think about how we solve is indicating to Seabios which device we want to boot from
With your patch, a user can select a virtio device explicitly or if they use only one virtio device, it will Just Work.
However, if a user uses IDE and virtio, or a user has multiple disks, they cannot select a device via -boot.
Is this something we need to address? I don't think we'd break libvirt if we didn't.
Regards,
Anthony Liguori
Changelog: v1->v2:
- free memory in case of vq initialization error.
- change license of virtio ring/pci to LGPLv3 with permission of Laurent Vivier (aka the author).
diff --git a/Makefile b/Makefile index 327a1bf..d0b8881 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,8 @@ OUT=out/ SRCBOTH=misc.c pmm.c stacks.c output.c util.c block.c floppy.c ata.c mouse.c \ kbd.c pci.c serial.c clock.c pic.c cdrom.c ps2port.c smp.c resume.c \ pnpbios.c pirtable.c vgahooks.c ramdisk.c pcibios.c blockcmd.c \
usb.c usb-uhci.c usb-ohci.c usb-ehci.c usb-hid.c usb-msc.c
usb.c usb-uhci.c usb-ohci.c usb-ehci.c usb-hid.c usb-msc.c \
SRC16=$(SRCBOTH) system.c disk.c apm.c font.c SRC32FLAT=$(SRCBOTH) post.c shadow.c memmap.c coreboot.c boot.c \ acpi.c smm.c mptable.c smbios.c pciinit.c optionroms.c mtrr.c \virtio-ring.c virtio-pci.c virtio-blk.c
diff --git a/src/block.c b/src/block.c index ddf441f..b6b1902 100644 --- a/src/block.c +++ b/src/block.c @@ -11,6 +11,7 @@ #include "util.h" // dprintf #include "ata.h" // process_ata_op #include "usb-msc.h" // process_usb_op +#include "virtio-blk.h" // process_virtio_op
struct drives_s Drives VAR16VISIBLE;
@@ -289,6 +290,8 @@ process_op(struct disk_op_s *op) return process_cdemu_op(op); case DTYPE_USB: return process_usb_op(op);
- case DTYPE_VIRTIO:
- return process_virtio_op(op); default: op->count = 0; return DISK_RET_EPARAM;
diff --git a/src/config.h b/src/config.h index b101174..ad569c6 100644 --- a/src/config.h +++ b/src/config.h @@ -136,6 +136,9 @@ #define CONFIG_SUBMODEL_ID 0x00 #define CONFIG_BIOS_REVISION 0x01
+// Support boot from virtio storage +#define CONFIG_VIRTIO_BLK 1
- // Various memory addresses used by the code. #define BUILD_STACK_ADDR 0x7000 #define BUILD_S3RESUME_STACK_ADDR 0x1000
diff --git a/src/disk.h b/src/disk.h index 0cd1b74..9e5b083 100644 --- a/src/disk.h +++ b/src/disk.h @@ -197,6 +197,7 @@ struct drive_s { #define DTYPE_RAMDISK 0x04 #define DTYPE_CDEMU 0x05 #define DTYPE_USB 0x06 +#define DTYPE_VIRTIO 0x07
#define MAXDESCSIZE 80
diff --git a/src/pci_ids.h b/src/pci_ids.h index 1800f1d..e1cded2 100644 --- a/src/pci_ids.h +++ b/src/pci_ids.h @@ -2605,3 +2605,6 @@ #define PCI_DEVICE_ID_RME_DIGI32 0x9896 #define PCI_DEVICE_ID_RME_DIGI32_PRO 0x9897 #define PCI_DEVICE_ID_RME_DIGI32_8 0x9898
+#define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 +#define PCI_DEVICE_ID_VIRTIO_BLK 0x1001 diff --git a/src/post.c b/src/post.c index 638b0f7..25535e2 100644 --- a/src/post.c +++ b/src/post.c @@ -23,6 +23,7 @@ #include "smbios.h" // smbios_init #include "paravirt.h" // qemu_cfg_port_probe #include "ps2port.h" // ps2port_setup +#include "virtio-blk.h" // virtio_blk_setup
void __set_irq(int vector, void *loc) @@ -184,6 +185,7 @@ init_hw(void) floppy_setup(); ata_setup(); ramdisk_setup();
virtio_blk_setup(); }
// Main setup code.
diff --git a/src/virtio-blk.c b/src/virtio-blk.c new file mode 100644 index 0000000..a41c336 --- /dev/null +++ b/src/virtio-blk.c @@ -0,0 +1,155 @@ +// Virtio blovl boot support. +// +// Copyright (C) 2010 Red Hat Inc. +// +// Authors: +// Gleb Natapovgnatapov@redhat.com +// +// This file may be distributed under the terms of the GNU LGPLv3 license.
+#include "util.h" // dprintf +#include "pci.h" // foreachpci +#include "config.h" // CONFIG_* +#include "virtio-pci.h" +#include "virtio-blk.h" +#include "disk.h"
+struct virtiodrive_s {
- struct drive_s drive;
- struct vring_virtqueue *vq;
- u16 ioaddr;
+};
+static int +virtio_blk_read(struct disk_op_s *op) +{
- struct virtiodrive_s *vdrive_g =
container_of(op->drive_g, struct virtiodrive_s, drive);
- struct vring_virtqueue *vq = GET_GLOBAL(vdrive_g->vq);
- struct virtio_blk_outhdr hdr = {
.type = VIRTIO_BLK_T_IN,
.ioprio = 0,
.sector = op->lba,
- };
- u8 status = VIRTIO_BLK_S_UNSUPP;
- struct vring_list sg[] = {
{
.addr = MAKE_FLATPTR(GET_SEG(SS),&hdr),
.length = sizeof(hdr),
},
{
.addr = op->buf_fl,
.length = GET_GLOBAL(vdrive_g->drive.blksize) * op->count,
},
{
.addr = MAKE_FLATPTR(GET_SEG(SS),&status),
.length = sizeof(status),
},
- };
- /* Add to virtqueue and kick host */
- vring_add_buf(vq, sg, 1, 2, 0, 0);
- vring_kick(GET_GLOBAL(vdrive_g->ioaddr), vq, 1);
- /* Wait for reply */
- while (!vring_more_used(vq))
udelay(5);
- /* Reclaim virtqueue element */
- vring_get_buf(vq, NULL);
- return status == VIRTIO_BLK_S_OK ? DISK_RET_SUCCESS : DISK_RET_EBADTRACK;
+}
+int +process_virtio_op(struct disk_op_s *op) +{
- switch (op->command) {
- case CMD_READ:
return virtio_blk_read(op);
- case CMD_FORMAT:
- case CMD_WRITE:
return DISK_RET_EWRITEPROTECT;
- case CMD_RESET:
- case CMD_ISREADY:
- case CMD_VERIFY:
- case CMD_SEEK:
return DISK_RET_SUCCESS;
- default:
op->count = 0;
return DISK_RET_EPARAM;
- }
+}
+void +virtio_blk_setup(void) +{
- ASSERT32FLAT();
- if (! CONFIG_VIRTIO_BLK)
return;
- dprintf(3, "init virtio-blk\n");
- int bdf, max;
- u32 id = PCI_VENDOR_ID_REDHAT_QUMRANET | (PCI_DEVICE_ID_VIRTIO_BLK<< 16);
- foreachpci(bdf, max) {
u32 v = pci_config_readl(bdf, PCI_VENDOR_ID);
if (v != id)
continue;
dprintf(3, "found virtio-blk at %x:%x\n", pci_bdf_to_bus(bdf),
pci_bdf_to_dev(bdf));
char *desc = malloc_tmphigh(MAXDESCSIZE);
struct virtiodrive_s *vdrive_g = malloc_fseg(sizeof(*vdrive_g));
struct vring_virtqueue *vq = malloc_low(sizeof(*vq));
if (!vdrive_g || !desc || !vq) {
warn_noalloc();
return;
}
memset(vdrive_g, 0, sizeof(*vdrive_g));
vdrive_g->drive.type = DTYPE_VIRTIO;
vdrive_g->drive.cntl_id = bdf;
vdrive_g->vq = vq;
u16 ioaddr = pci_config_readl(bdf, PCI_BASE_ADDRESS_0)&
PCI_BASE_ADDRESS_IO_MASK;
vdrive_g->ioaddr = ioaddr;
vp_reset(ioaddr);
vp_set_status(ioaddr, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER );
if (vp_find_vq(ioaddr, 0, vdrive_g->vq)< 0 ) {
free(vdrive_g);
free(desc);
free(vq);
dprintf(1, "fail to find vq for virtio-blk %x:%x\n",
pci_bdf_to_bus (bdf), pci_bdf_to_dev(bdf));
continue;
}
struct virtio_blk_config cfg;
vp_get(ioaddr, 0,&cfg, sizeof(cfg));
vdrive_g->drive.blksize = cfg.blk_size;
vdrive_g->drive.sectors = cfg.capacity;
dprintf(3, "virtio-blk %x:%x blksize=%d sectors=%u\n",
pci_bdf_to_bus (bdf), pci_bdf_to_dev(bdf),
vdrive_g->drive.blksize, (u32)vdrive_g->drive.sectors);
vdrive_g->drive.pchs.cylinders = cfg.cylinders;
vdrive_g->drive.pchs.heads = cfg.heads;
vdrive_g->drive.pchs.spt = cfg.sectors;
setup_translation(&vdrive_g->drive);
add_bcv_internal(&vdrive_g->drive);
snprintf(desc, MAXDESCSIZE, "Virtio disk PCI:%x:%x",
pci_bdf_to_bus(bdf), pci_bdf_to_dev(bdf));
vdrive_g->drive.desc = desc;
vp_set_status(ioaddr, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK);
- }
+}
diff --git a/src/virtio-blk.h b/src/virtio-blk.h new file mode 100644 index 0000000..3369ea4 --- /dev/null +++ b/src/virtio-blk.h @@ -0,0 +1,40 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H
+struct virtio_blk_config +{
- u64 capacity;
- u32 size_max;
- u32 seg_max;
- u16 cylinders;
- u8 heads;
- u8 sectors;
- u32 blk_size;
- u8 physical_block_exp;
- u8 alignment_offset;
- u16 min_io_size;
- u32 opt_io_size;
+} __attribute__((packed));
+/* These two define direction. */ +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1
+/* This is the first element of the read scatter-gather list. */ +struct virtio_blk_outhdr {
- /* VIRTIO_BLK_T* */
- u32 type;
- /* io priority. */
- u32 ioprio;
- /* Sector (ie. 512 byte offset) */
- u64 sector;
+};
+#define VIRTIO_BLK_S_OK 0 +#define VIRTIO_BLK_S_IOERR 1 +#define VIRTIO_BLK_S_UNSUPP 2
+int process_virtio_op(struct disk_op_s *op); +void virtio_blk_setup(void);
+#endif /* _VIRTIO_BLK_H */ diff --git a/src/virtio-pci.c b/src/virtio-pci.c new file mode 100644 index 0000000..e171ea3 --- /dev/null +++ b/src/virtio-pci.c @@ -0,0 +1,67 @@ +/* virtio-pci.c - pci interface for virtio interface
- (c) Copyright 2008 Bull S.A.S.
- Author: Laurent VivierLaurent.Vivier@bull.net
- some parts from Linux Virtio PCI driver
- Copyright IBM Corp. 2007
- Authors: Anthony Liguorialiguori@us.ibm.com
- Adopted for Seabios: Gleb Natapovgleb@redhat.com
- This work is licensed under the terms of the GNU LGPLv3
- See the COPYING file in the top-level directory.
- */
+#include "virtio-ring.h" +#include "virtio-pci.h"
+int vp_find_vq(unsigned int ioaddr, int queue_index,
struct vring_virtqueue *vq)
+{
- struct vring * vr =&vq->vring;
- u16 num;
- ASSERT32FLAT();
- /* select the queue */
- outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_SEL);
- /* check if the queue is available */
- num = inw(ioaddr + VIRTIO_PCI_QUEUE_NUM);
- if (!num) {
dprintf(1, "ERROR: queue size is 0\n");
return -1;
- }
- if (num> MAX_QUEUE_NUM) {
dprintf(1, "ERROR: queue size %d> %d\n", num, MAX_QUEUE_NUM);
return -1;
- }
- /* check if the queue is already active */
- if (inl(ioaddr + VIRTIO_PCI_QUEUE_PFN)) {
dprintf(1, "ERROR: queue already active\n");
return -1;
- }
- vq->queue_index = queue_index;
- /* initialize the queue */
- vring_init(vr, num, (unsigned char*)&vq->queue);
- /* activate the queue
- NOTE: vr->desc is initialized by vring_init()
- */
- outl((unsigned long)virt_to_phys(vr->desc)>> PAGE_SHIFT,
ioaddr + VIRTIO_PCI_QUEUE_PFN);
- return num;
+} diff --git a/src/virtio-pci.h b/src/virtio-pci.h new file mode 100644 index 0000000..6932036 --- /dev/null +++ b/src/virtio-pci.h @@ -0,0 +1,97 @@ +#ifndef _VIRTIO_PCI_H +#define _VIRTIO_PCI_H
+/* A 32-bit r/o bitmask of the features supported by the host */ +#define VIRTIO_PCI_HOST_FEATURES 0
+/* A 32-bit r/w bitmask of features activated by the guest */ +#define VIRTIO_PCI_GUEST_FEATURES 4
+/* A 32-bit r/w PFN for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_PFN 8
+/* A 16-bit r/o queue size for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_NUM 12
+/* A 16-bit r/w queue selector */ +#define VIRTIO_PCI_QUEUE_SEL 14
+/* A 16-bit r/w queue notifier */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16
+/* An 8-bit device status register. */ +#define VIRTIO_PCI_STATUS 18
+/* An 8-bit r/o interrupt status register. Reading the value will return the
- current contents of the ISR and will also clear it. This is effectively
- a read-and-acknowledge. */
+#define VIRTIO_PCI_ISR 19
+/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2
+/* The remaining space is defined by each driver as the per-driver
- configuration space */
+#define VIRTIO_PCI_CONFIG 20
+/* Virtio ABI version, this must match exactly */ +#define VIRTIO_PCI_ABI_VERSION 0
+static inline u32 vp_get_features(unsigned int ioaddr) +{
- return inl(ioaddr + VIRTIO_PCI_HOST_FEATURES);
+}
+static inline void vp_set_features(unsigned int ioaddr, u32 features) +{
outl(features, ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+}
+static inline void vp_get(unsigned int ioaddr, unsigned offset,
void *buf, unsigned len)
+{
- u8 *ptr = buf;
- unsigned i;
- for (i = 0; i< len; i++)
ptr[i] = inb(ioaddr + VIRTIO_PCI_CONFIG + offset + i);
+}
+static inline u8 vp_get_status(unsigned int ioaddr) +{
- return inb(ioaddr + VIRTIO_PCI_STATUS);
+}
+static inline void vp_set_status(unsigned int ioaddr, u8 status) +{
- if (status == 0) /* reset */
return;
- outb(status, ioaddr + VIRTIO_PCI_STATUS);
+}
+static inline void vp_reset(unsigned int ioaddr) +{
- outb(0, ioaddr + VIRTIO_PCI_STATUS);
- (void)inb(ioaddr + VIRTIO_PCI_ISR);
+}
+static inline void vp_notify(unsigned int ioaddr, int queue_index) +{
- outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+}
+static inline void vp_del_vq(unsigned int ioaddr, int queue_index) +{
- /* select the queue */
- outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_SEL);
- /* deactivate the queue */
- outl(0, ioaddr + VIRTIO_PCI_QUEUE_PFN);
+}
+int vp_find_vq(unsigned int ioaddr, int queue_index,
struct vring_virtqueue *vq);
+#endif /* _VIRTIO_PCI_H_ */ diff --git a/src/virtio-ring.c b/src/virtio-ring.c new file mode 100644 index 0000000..f4a2efe --- /dev/null +++ b/src/virtio-ring.c @@ -0,0 +1,152 @@ +/* virtio-pci.c - virtio ring management
- (c) Copyright 2008 Bull S.A.S.
- Author: Laurent VivierLaurent.Vivier@bull.net
- some parts from Linux Virtio Ring
- Copyright Rusty Russell IBM Corporation 2007
- Adopted for Seabios: Gleb Natapovgleb@redhat.com
- This work is licensed under the terms of the GNU LGPLv3
- See the COPYING file in the top-level directory.
- */
+#include "virtio-ring.h" +#include "virtio-pci.h"
+#define BUG() do { \
dprintf(1, "BUG: failure at %s:%d/%s()!\n", \
__FILE__, __LINE__, __FUNCTION__); \
while(1); \
} while (0)
+#define BUG_ON(condition) do { if (condition) BUG(); } while (0)
+/*
- vring_more_used
- is there some used buffers ?
- */
+int vring_more_used(struct vring_virtqueue *vq) +{
- struct vring_used *used = GET_FLATPTR(vq->vring.used);
- wmb();
- return GET_FLATPTR(vq->last_used_idx) != GET_FLATPTR(used->idx);
+}
+/*
- vring_free
- put at the begin of the free list the current desc[head]
- */
+void vring_detach(struct vring_virtqueue *vq, unsigned int head) +{
- struct vring *vr =&vq->vring;
- struct vring_desc *desc = GET_FLATPTR(vr->desc);
- unsigned int i;
- /* find end of given descriptor */
- i = head;
- while (GET_FLATPTR(desc[i].flags)& VRING_DESC_F_NEXT)
i = GET_FLATPTR(desc[i].next);
- /* link it with free list and point to it */
- SET_FLATPTR(desc[i].next, GET_FLATPTR(vq->free_head));
- wmb();
- SET_FLATPTR(vq->free_head, head);
+}
+/*
- vring_get_buf
- get a buffer from the used list
- */
+int vring_get_buf(struct vring_virtqueue *vq, unsigned int *len) +{
- struct vring *vr =&vq->vring;
- struct vring_used_elem *elem;
- struct vring_used *used = GET_FLATPTR(vq->vring.used);
- u32 id;
- int ret;
+// BUG_ON(!vring_more_used(vq));
- elem =&used->ring[GET_FLATPTR(vq->last_used_idx) % GET_FLATPTR(vr->num)];
- wmb();
- id = GET_FLATPTR(elem->id);
- if (len != NULL)
*len = GET_FLATPTR(elem->len);
- ret = GET_FLATPTR(vq->vdata[id]);
- vring_detach(vq, id);
- SET_FLATPTR(vq->last_used_idx, GET_FLATPTR(vq->last_used_idx) + 1);
- return ret;
+}
+void vring_add_buf(struct vring_virtqueue *vq,
struct vring_list list[],
unsigned int out, unsigned int in,
int index, int num_added)
+{
- struct vring *vr =&vq->vring;
- int i, av, head, prev;
- struct vring_desc *desc = GET_FLATPTR(vr->desc);
- struct vring_avail *avail = GET_FLATPTR(vr->avail);
- BUG_ON(out + in == 0);
- prev = 0;
- head = GET_FLATPTR(vq->free_head);
- for (i = head; out; i = GET_FLATPTR(desc[i].next), out--) {
SET_FLATPTR(desc[i].flags, VRING_DESC_F_NEXT);
SET_FLATPTR(desc[i].addr, (u64)virt_to_phys(list->addr));
SET_FLATPTR(desc[i].len, list->length);
prev = i;
list++;
- }
- for ( ; in; i = GET_FLATPTR(desc[i].next), in--) {
SET_FLATPTR(desc[i].flags, VRING_DESC_F_NEXT|VRING_DESC_F_WRITE);
SET_FLATPTR(desc[i].addr, (u64)virt_to_phys(list->addr));
SET_FLATPTR(desc[i].len, list->length);
prev = i;
list++;
- }
- SET_FLATPTR(desc[prev].flags,
GET_FLATPTR(desc[prev].flags)& ~VRING_DESC_F_NEXT);
- SET_FLATPTR(vq->free_head, i);
- SET_FLATPTR(vq->vdata[head], index);
- av = (GET_FLATPTR(avail->idx) + num_added) % GET_FLATPTR(vr->num);
- SET_FLATPTR(avail->ring[av], head);
- wmb();
+}
+void vring_kick(unsigned int ioaddr, struct vring_virtqueue *vq, int num_added) +{
- struct vring *vr =&vq->vring;
- struct vring_avail *avail = GET_FLATPTR(vr->avail);
- struct vring_used *used = GET_FLATPTR(vq->vring.used);
- wmb();
- SET_FLATPTR(avail->idx, GET_FLATPTR(avail->idx) + num_added);
- mb();
- if (!(GET_FLATPTR(used->flags)& VRING_USED_F_NO_NOTIFY))
vp_notify(ioaddr, GET_FLATPTR(vq->queue_index));
+} diff --git a/src/virtio-ring.h b/src/virtio-ring.h new file mode 100644 index 0000000..b97d572 --- /dev/null +++ b/src/virtio-ring.h @@ -0,0 +1,125 @@ +#ifndef _VIRTIO_RING_H +#define _VIRTIO_RING_H
+#define PAGE_SHIFT 12 +#define PAGE_MASK (PAGE_SIZE-1)
+#define virt_to_phys(v) (unsigned long)(v) +#define phys_to_virt(p) (void*)(p) +#define wmb() barrier() +#define mb() barrier()
+/* Status byte for guest to report progress, and synchronize features. */ +/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +/* We have found a driver for the device. */ +#define VIRTIO_CONFIG_S_DRIVER 2 +/* Driver has used its parts of the config, and is happy */ +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +/* We've given up on this device. */ +#define VIRTIO_CONFIG_S_FAILED 0x80
+#define MAX_QUEUE_NUM (128)
+#define VRING_DESC_F_NEXT 1 +#define VRING_DESC_F_WRITE 2
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+#define VRING_USED_F_NO_NOTIFY 1
+struct vring_desc +{
- u64 addr;
- u32 len;
- u16 flags;
- u16 next;
+};
+struct vring_avail +{
- u16 flags;
- u16 idx;
- u16 ring[0];
+};
+struct vring_used_elem +{
- u32 id;
- u32 len;
+};
+struct vring_used +{
- u16 flags;
- u16 idx;
- struct vring_used_elem ring[];
+};
+struct vring {
- unsigned int num;
- struct vring_desc *desc;
- struct vring_avail *avail;
- struct vring_used *used;
+};
+#define vring_size(num) \
- (((((sizeof(struct vring_desc) * num) + \
(sizeof(struct vring_avail) + sizeof(u16) * num)) \
+ PAGE_MASK)& ~PAGE_MASK) + \
(sizeof(struct vring_used) + sizeof(struct vring_used_elem) * num))
+typedef unsigned char virtio_queue_t[PAGE_MASK + vring_size(MAX_QUEUE_NUM)];
+struct vring_virtqueue {
- virtio_queue_t queue;
- struct vring vring;
- u16 free_head;
- u16 last_used_idx;
- u16 vdata[MAX_QUEUE_NUM];
- /* PCI */
- int queue_index;
+};
+struct vring_list {
- char *addr;
- unsigned int length;
+};
+static inline void vring_init(struct vring *vr,
unsigned int num, unsigned char *queue)
+{
- unsigned int i;
- unsigned long pa;
- ASSERT32FLAT();
- vr->num = num;
- /* physical address of desc must be page aligned */
- pa = virt_to_phys(queue);
- pa = (pa + PAGE_MASK)& ~PAGE_MASK;
- vr->desc = phys_to_virt(pa);
- vr->avail = (struct vring_avail *)&vr->desc[num];
- /* physical address of used must be page aligned */
- pa = virt_to_phys(&vr->avail->ring[num]);
- pa = (pa + PAGE_MASK)& ~PAGE_MASK;
- vr->used = phys_to_virt(pa);
- for (i = 0; i< num - 1; i++)
vr->desc[i].next = i + 1;
- vr->desc[i].next = 0;
+}
+int vring_more_used(struct vring_virtqueue *vq); +void vring_detach(struct vring_virtqueue *vq, unsigned int head); +int vring_get_buf(struct vring_virtqueue *vq, unsigned int *len); +void vring_add_buf(struct vring_virtqueue *vq, struct vring_list list[],
unsigned int out, unsigned int in,
int index, int num_added);
+void vring_kick(unsigned int ioaddr, struct vring_virtqueue *vq, int num_added);
+#endif /* _VIRTIO_RING_H_ */
-- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 10, 2010 at 10:48:42AM -0500, Anthony Liguori wrote:
On 05/10/2010 03:11 AM, Gleb Natapov wrote:
This patch adds native support for booting from virtio disks to Seabios.
Signed-off-by: Gleb Natapovgleb@redhat.com
A related problem that I think we need to think about how we solve is indicating to Seabios which device we want to boot from
With your patch, a user can select a virtio device explicitly or if they use only one virtio device, it will Just Work.
However, if a user uses IDE and virtio, or a user has multiple disks, they cannot select a device via -boot.
Isn't this problem unrelated to this patch? I mean if I start qemu with two ide devices can I specify from qemu command line which one I want to boot from?
Is this something we need to address? I don't think we'd break libvirt if we didn't.
Regards,
Anthony Liguori
Changelog: v1->v2:
- free memory in case of vq initialization error.
- change license of virtio ring/pci to LGPLv3 with permission of Laurent Vivier (aka the author).
diff --git a/Makefile b/Makefile index 327a1bf..d0b8881 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,8 @@ OUT=out/ SRCBOTH=misc.c pmm.c stacks.c output.c util.c block.c floppy.c ata.c mouse.c \ kbd.c pci.c serial.c clock.c pic.c cdrom.c ps2port.c smp.c resume.c \ pnpbios.c pirtable.c vgahooks.c ramdisk.c pcibios.c blockcmd.c \
usb.c usb-uhci.c usb-ohci.c usb-ehci.c usb-hid.c usb-msc.c
usb.c usb-uhci.c usb-ohci.c usb-ehci.c usb-hid.c usb-msc.c \
virtio-ring.c virtio-pci.c virtio-blk.c
SRC16=$(SRCBOTH) system.c disk.c apm.c font.c SRC32FLAT=$(SRCBOTH) post.c shadow.c memmap.c coreboot.c boot.c \ acpi.c smm.c mptable.c smbios.c pciinit.c optionroms.c mtrr.c \ diff --git a/src/block.c b/src/block.c index ddf441f..b6b1902 100644 --- a/src/block.c +++ b/src/block.c @@ -11,6 +11,7 @@ #include "util.h" // dprintf #include "ata.h" // process_ata_op #include "usb-msc.h" // process_usb_op +#include "virtio-blk.h" // process_virtio_op
struct drives_s Drives VAR16VISIBLE;
@@ -289,6 +290,8 @@ process_op(struct disk_op_s *op) return process_cdemu_op(op); case DTYPE_USB: return process_usb_op(op);
- case DTYPE_VIRTIO:
- return process_virtio_op(op); default: op->count = 0; return DISK_RET_EPARAM;
diff --git a/src/config.h b/src/config.h index b101174..ad569c6 100644 --- a/src/config.h +++ b/src/config.h @@ -136,6 +136,9 @@ #define CONFIG_SUBMODEL_ID 0x00 #define CONFIG_BIOS_REVISION 0x01
+// Support boot from virtio storage +#define CONFIG_VIRTIO_BLK 1
// Various memory addresses used by the code. #define BUILD_STACK_ADDR 0x7000 #define BUILD_S3RESUME_STACK_ADDR 0x1000 diff --git a/src/disk.h b/src/disk.h index 0cd1b74..9e5b083 100644 --- a/src/disk.h +++ b/src/disk.h @@ -197,6 +197,7 @@ struct drive_s { #define DTYPE_RAMDISK 0x04 #define DTYPE_CDEMU 0x05 #define DTYPE_USB 0x06 +#define DTYPE_VIRTIO 0x07
#define MAXDESCSIZE 80
diff --git a/src/pci_ids.h b/src/pci_ids.h index 1800f1d..e1cded2 100644 --- a/src/pci_ids.h +++ b/src/pci_ids.h @@ -2605,3 +2605,6 @@ #define PCI_DEVICE_ID_RME_DIGI32 0x9896 #define PCI_DEVICE_ID_RME_DIGI32_PRO 0x9897 #define PCI_DEVICE_ID_RME_DIGI32_8 0x9898
+#define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 +#define PCI_DEVICE_ID_VIRTIO_BLK 0x1001 diff --git a/src/post.c b/src/post.c index 638b0f7..25535e2 100644 --- a/src/post.c +++ b/src/post.c @@ -23,6 +23,7 @@ #include "smbios.h" // smbios_init #include "paravirt.h" // qemu_cfg_port_probe #include "ps2port.h" // ps2port_setup +#include "virtio-blk.h" // virtio_blk_setup
void __set_irq(int vector, void *loc) @@ -184,6 +185,7 @@ init_hw(void) floppy_setup(); ata_setup(); ramdisk_setup();
- virtio_blk_setup();
}
// Main setup code. diff --git a/src/virtio-blk.c b/src/virtio-blk.c new file mode 100644 index 0000000..a41c336 --- /dev/null +++ b/src/virtio-blk.c @@ -0,0 +1,155 @@ +// Virtio blovl boot support. +// +// Copyright (C) 2010 Red Hat Inc. +// +// Authors: +// Gleb Natapovgnatapov@redhat.com +// +// This file may be distributed under the terms of the GNU LGPLv3 license.
+#include "util.h" // dprintf +#include "pci.h" // foreachpci +#include "config.h" // CONFIG_* +#include "virtio-pci.h" +#include "virtio-blk.h" +#include "disk.h"
+struct virtiodrive_s {
- struct drive_s drive;
- struct vring_virtqueue *vq;
- u16 ioaddr;
+};
+static int +virtio_blk_read(struct disk_op_s *op) +{
- struct virtiodrive_s *vdrive_g =
container_of(op->drive_g, struct virtiodrive_s, drive);
- struct vring_virtqueue *vq = GET_GLOBAL(vdrive_g->vq);
- struct virtio_blk_outhdr hdr = {
.type = VIRTIO_BLK_T_IN,
.ioprio = 0,
.sector = op->lba,
- };
- u8 status = VIRTIO_BLK_S_UNSUPP;
- struct vring_list sg[] = {
{
.addr = MAKE_FLATPTR(GET_SEG(SS),&hdr),
.length = sizeof(hdr),
},
{
.addr = op->buf_fl,
.length = GET_GLOBAL(vdrive_g->drive.blksize) * op->count,
},
{
.addr = MAKE_FLATPTR(GET_SEG(SS),&status),
.length = sizeof(status),
},
- };
- /* Add to virtqueue and kick host */
- vring_add_buf(vq, sg, 1, 2, 0, 0);
- vring_kick(GET_GLOBAL(vdrive_g->ioaddr), vq, 1);
- /* Wait for reply */
- while (!vring_more_used(vq))
udelay(5);
- /* Reclaim virtqueue element */
- vring_get_buf(vq, NULL);
- return status == VIRTIO_BLK_S_OK ? DISK_RET_SUCCESS : DISK_RET_EBADTRACK;
+}
+int +process_virtio_op(struct disk_op_s *op) +{
- switch (op->command) {
- case CMD_READ:
return virtio_blk_read(op);
- case CMD_FORMAT:
- case CMD_WRITE:
return DISK_RET_EWRITEPROTECT;
- case CMD_RESET:
- case CMD_ISREADY:
- case CMD_VERIFY:
- case CMD_SEEK:
return DISK_RET_SUCCESS;
- default:
op->count = 0;
return DISK_RET_EPARAM;
- }
+}
+void +virtio_blk_setup(void) +{
- ASSERT32FLAT();
- if (! CONFIG_VIRTIO_BLK)
return;
- dprintf(3, "init virtio-blk\n");
- int bdf, max;
- u32 id = PCI_VENDOR_ID_REDHAT_QUMRANET | (PCI_DEVICE_ID_VIRTIO_BLK<< 16);
- foreachpci(bdf, max) {
u32 v = pci_config_readl(bdf, PCI_VENDOR_ID);
if (v != id)
continue;
dprintf(3, "found virtio-blk at %x:%x\n", pci_bdf_to_bus(bdf),
pci_bdf_to_dev(bdf));
char *desc = malloc_tmphigh(MAXDESCSIZE);
struct virtiodrive_s *vdrive_g = malloc_fseg(sizeof(*vdrive_g));
struct vring_virtqueue *vq = malloc_low(sizeof(*vq));
if (!vdrive_g || !desc || !vq) {
warn_noalloc();
return;
}
memset(vdrive_g, 0, sizeof(*vdrive_g));
vdrive_g->drive.type = DTYPE_VIRTIO;
vdrive_g->drive.cntl_id = bdf;
vdrive_g->vq = vq;
u16 ioaddr = pci_config_readl(bdf, PCI_BASE_ADDRESS_0)&
PCI_BASE_ADDRESS_IO_MASK;
vdrive_g->ioaddr = ioaddr;
vp_reset(ioaddr);
vp_set_status(ioaddr, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER );
if (vp_find_vq(ioaddr, 0, vdrive_g->vq)< 0 ) {
free(vdrive_g);
free(desc);
free(vq);
dprintf(1, "fail to find vq for virtio-blk %x:%x\n",
pci_bdf_to_bus (bdf), pci_bdf_to_dev(bdf));
continue;
}
struct virtio_blk_config cfg;
vp_get(ioaddr, 0,&cfg, sizeof(cfg));
vdrive_g->drive.blksize = cfg.blk_size;
vdrive_g->drive.sectors = cfg.capacity;
dprintf(3, "virtio-blk %x:%x blksize=%d sectors=%u\n",
pci_bdf_to_bus (bdf), pci_bdf_to_dev(bdf),
vdrive_g->drive.blksize, (u32)vdrive_g->drive.sectors);
vdrive_g->drive.pchs.cylinders = cfg.cylinders;
vdrive_g->drive.pchs.heads = cfg.heads;
vdrive_g->drive.pchs.spt = cfg.sectors;
setup_translation(&vdrive_g->drive);
add_bcv_internal(&vdrive_g->drive);
snprintf(desc, MAXDESCSIZE, "Virtio disk PCI:%x:%x",
pci_bdf_to_bus(bdf), pci_bdf_to_dev(bdf));
vdrive_g->drive.desc = desc;
vp_set_status(ioaddr, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK);
- }
+}
diff --git a/src/virtio-blk.h b/src/virtio-blk.h new file mode 100644 index 0000000..3369ea4 --- /dev/null +++ b/src/virtio-blk.h @@ -0,0 +1,40 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H
+struct virtio_blk_config +{
- u64 capacity;
- u32 size_max;
- u32 seg_max;
- u16 cylinders;
- u8 heads;
- u8 sectors;
- u32 blk_size;
- u8 physical_block_exp;
- u8 alignment_offset;
- u16 min_io_size;
- u32 opt_io_size;
+} __attribute__((packed));
+/* These two define direction. */ +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1
+/* This is the first element of the read scatter-gather list. */ +struct virtio_blk_outhdr {
- /* VIRTIO_BLK_T* */
- u32 type;
- /* io priority. */
- u32 ioprio;
- /* Sector (ie. 512 byte offset) */
- u64 sector;
+};
+#define VIRTIO_BLK_S_OK 0 +#define VIRTIO_BLK_S_IOERR 1 +#define VIRTIO_BLK_S_UNSUPP 2
+int process_virtio_op(struct disk_op_s *op); +void virtio_blk_setup(void);
+#endif /* _VIRTIO_BLK_H */ diff --git a/src/virtio-pci.c b/src/virtio-pci.c new file mode 100644 index 0000000..e171ea3 --- /dev/null +++ b/src/virtio-pci.c @@ -0,0 +1,67 @@ +/* virtio-pci.c - pci interface for virtio interface
- (c) Copyright 2008 Bull S.A.S.
- Author: Laurent VivierLaurent.Vivier@bull.net
- some parts from Linux Virtio PCI driver
- Copyright IBM Corp. 2007
- Authors: Anthony Liguorialiguori@us.ibm.com
- Adopted for Seabios: Gleb Natapovgleb@redhat.com
- This work is licensed under the terms of the GNU LGPLv3
- See the COPYING file in the top-level directory.
- */
+#include "virtio-ring.h" +#include "virtio-pci.h"
+int vp_find_vq(unsigned int ioaddr, int queue_index,
struct vring_virtqueue *vq)
+{
- struct vring * vr =&vq->vring;
- u16 num;
- ASSERT32FLAT();
- /* select the queue */
- outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_SEL);
- /* check if the queue is available */
- num = inw(ioaddr + VIRTIO_PCI_QUEUE_NUM);
- if (!num) {
dprintf(1, "ERROR: queue size is 0\n");
return -1;
- }
- if (num> MAX_QUEUE_NUM) {
dprintf(1, "ERROR: queue size %d> %d\n", num, MAX_QUEUE_NUM);
return -1;
- }
- /* check if the queue is already active */
- if (inl(ioaddr + VIRTIO_PCI_QUEUE_PFN)) {
dprintf(1, "ERROR: queue already active\n");
return -1;
- }
- vq->queue_index = queue_index;
- /* initialize the queue */
- vring_init(vr, num, (unsigned char*)&vq->queue);
- /* activate the queue
- NOTE: vr->desc is initialized by vring_init()
- */
- outl((unsigned long)virt_to_phys(vr->desc)>> PAGE_SHIFT,
ioaddr + VIRTIO_PCI_QUEUE_PFN);
- return num;
+} diff --git a/src/virtio-pci.h b/src/virtio-pci.h new file mode 100644 index 0000000..6932036 --- /dev/null +++ b/src/virtio-pci.h @@ -0,0 +1,97 @@ +#ifndef _VIRTIO_PCI_H +#define _VIRTIO_PCI_H
+/* A 32-bit r/o bitmask of the features supported by the host */ +#define VIRTIO_PCI_HOST_FEATURES 0
+/* A 32-bit r/w bitmask of features activated by the guest */ +#define VIRTIO_PCI_GUEST_FEATURES 4
+/* A 32-bit r/w PFN for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_PFN 8
+/* A 16-bit r/o queue size for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_NUM 12
+/* A 16-bit r/w queue selector */ +#define VIRTIO_PCI_QUEUE_SEL 14
+/* A 16-bit r/w queue notifier */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16
+/* An 8-bit device status register. */ +#define VIRTIO_PCI_STATUS 18
+/* An 8-bit r/o interrupt status register. Reading the value will return the
- current contents of the ISR and will also clear it. This is effectively
- a read-and-acknowledge. */
+#define VIRTIO_PCI_ISR 19
+/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2
+/* The remaining space is defined by each driver as the per-driver
- configuration space */
+#define VIRTIO_PCI_CONFIG 20
+/* Virtio ABI version, this must match exactly */ +#define VIRTIO_PCI_ABI_VERSION 0
+static inline u32 vp_get_features(unsigned int ioaddr) +{
- return inl(ioaddr + VIRTIO_PCI_HOST_FEATURES);
+}
+static inline void vp_set_features(unsigned int ioaddr, u32 features) +{
outl(features, ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+}
+static inline void vp_get(unsigned int ioaddr, unsigned offset,
void *buf, unsigned len)
+{
- u8 *ptr = buf;
- unsigned i;
- for (i = 0; i< len; i++)
ptr[i] = inb(ioaddr + VIRTIO_PCI_CONFIG + offset + i);
+}
+static inline u8 vp_get_status(unsigned int ioaddr) +{
- return inb(ioaddr + VIRTIO_PCI_STATUS);
+}
+static inline void vp_set_status(unsigned int ioaddr, u8 status) +{
- if (status == 0) /* reset */
return;
- outb(status, ioaddr + VIRTIO_PCI_STATUS);
+}
+static inline void vp_reset(unsigned int ioaddr) +{
- outb(0, ioaddr + VIRTIO_PCI_STATUS);
- (void)inb(ioaddr + VIRTIO_PCI_ISR);
+}
+static inline void vp_notify(unsigned int ioaddr, int queue_index) +{
- outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+}
+static inline void vp_del_vq(unsigned int ioaddr, int queue_index) +{
- /* select the queue */
- outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_SEL);
- /* deactivate the queue */
- outl(0, ioaddr + VIRTIO_PCI_QUEUE_PFN);
+}
+int vp_find_vq(unsigned int ioaddr, int queue_index,
struct vring_virtqueue *vq);
+#endif /* _VIRTIO_PCI_H_ */ diff --git a/src/virtio-ring.c b/src/virtio-ring.c new file mode 100644 index 0000000..f4a2efe --- /dev/null +++ b/src/virtio-ring.c @@ -0,0 +1,152 @@ +/* virtio-pci.c - virtio ring management
- (c) Copyright 2008 Bull S.A.S.
- Author: Laurent VivierLaurent.Vivier@bull.net
- some parts from Linux Virtio Ring
- Copyright Rusty Russell IBM Corporation 2007
- Adopted for Seabios: Gleb Natapovgleb@redhat.com
- This work is licensed under the terms of the GNU LGPLv3
- See the COPYING file in the top-level directory.
- */
+#include "virtio-ring.h" +#include "virtio-pci.h"
+#define BUG() do { \
dprintf(1, "BUG: failure at %s:%d/%s()!\n", \
__FILE__, __LINE__, __FUNCTION__); \
while(1); \
} while (0)
+#define BUG_ON(condition) do { if (condition) BUG(); } while (0)
+/*
- vring_more_used
- is there some used buffers ?
- */
+int vring_more_used(struct vring_virtqueue *vq) +{
- struct vring_used *used = GET_FLATPTR(vq->vring.used);
- wmb();
- return GET_FLATPTR(vq->last_used_idx) != GET_FLATPTR(used->idx);
+}
+/*
- vring_free
- put at the begin of the free list the current desc[head]
- */
+void vring_detach(struct vring_virtqueue *vq, unsigned int head) +{
- struct vring *vr =&vq->vring;
- struct vring_desc *desc = GET_FLATPTR(vr->desc);
- unsigned int i;
- /* find end of given descriptor */
- i = head;
- while (GET_FLATPTR(desc[i].flags)& VRING_DESC_F_NEXT)
i = GET_FLATPTR(desc[i].next);
- /* link it with free list and point to it */
- SET_FLATPTR(desc[i].next, GET_FLATPTR(vq->free_head));
- wmb();
- SET_FLATPTR(vq->free_head, head);
+}
+/*
- vring_get_buf
- get a buffer from the used list
- */
+int vring_get_buf(struct vring_virtqueue *vq, unsigned int *len) +{
- struct vring *vr =&vq->vring;
- struct vring_used_elem *elem;
- struct vring_used *used = GET_FLATPTR(vq->vring.used);
- u32 id;
- int ret;
+// BUG_ON(!vring_more_used(vq));
- elem =&used->ring[GET_FLATPTR(vq->last_used_idx) % GET_FLATPTR(vr->num)];
- wmb();
- id = GET_FLATPTR(elem->id);
- if (len != NULL)
*len = GET_FLATPTR(elem->len);
- ret = GET_FLATPTR(vq->vdata[id]);
- vring_detach(vq, id);
- SET_FLATPTR(vq->last_used_idx, GET_FLATPTR(vq->last_used_idx) + 1);
- return ret;
+}
+void vring_add_buf(struct vring_virtqueue *vq,
struct vring_list list[],
unsigned int out, unsigned int in,
int index, int num_added)
+{
- struct vring *vr =&vq->vring;
- int i, av, head, prev;
- struct vring_desc *desc = GET_FLATPTR(vr->desc);
- struct vring_avail *avail = GET_FLATPTR(vr->avail);
- BUG_ON(out + in == 0);
- prev = 0;
- head = GET_FLATPTR(vq->free_head);
- for (i = head; out; i = GET_FLATPTR(desc[i].next), out--) {
SET_FLATPTR(desc[i].flags, VRING_DESC_F_NEXT);
SET_FLATPTR(desc[i].addr, (u64)virt_to_phys(list->addr));
SET_FLATPTR(desc[i].len, list->length);
prev = i;
list++;
- }
- for ( ; in; i = GET_FLATPTR(desc[i].next), in--) {
SET_FLATPTR(desc[i].flags, VRING_DESC_F_NEXT|VRING_DESC_F_WRITE);
SET_FLATPTR(desc[i].addr, (u64)virt_to_phys(list->addr));
SET_FLATPTR(desc[i].len, list->length);
prev = i;
list++;
- }
- SET_FLATPTR(desc[prev].flags,
GET_FLATPTR(desc[prev].flags)& ~VRING_DESC_F_NEXT);
- SET_FLATPTR(vq->free_head, i);
- SET_FLATPTR(vq->vdata[head], index);
- av = (GET_FLATPTR(avail->idx) + num_added) % GET_FLATPTR(vr->num);
- SET_FLATPTR(avail->ring[av], head);
- wmb();
+}
+void vring_kick(unsigned int ioaddr, struct vring_virtqueue *vq, int num_added) +{
- struct vring *vr =&vq->vring;
- struct vring_avail *avail = GET_FLATPTR(vr->avail);
- struct vring_used *used = GET_FLATPTR(vq->vring.used);
- wmb();
- SET_FLATPTR(avail->idx, GET_FLATPTR(avail->idx) + num_added);
- mb();
- if (!(GET_FLATPTR(used->flags)& VRING_USED_F_NO_NOTIFY))
vp_notify(ioaddr, GET_FLATPTR(vq->queue_index));
+} diff --git a/src/virtio-ring.h b/src/virtio-ring.h new file mode 100644 index 0000000..b97d572 --- /dev/null +++ b/src/virtio-ring.h @@ -0,0 +1,125 @@ +#ifndef _VIRTIO_RING_H +#define _VIRTIO_RING_H
+#define PAGE_SHIFT 12 +#define PAGE_MASK (PAGE_SIZE-1)
+#define virt_to_phys(v) (unsigned long)(v) +#define phys_to_virt(p) (void*)(p) +#define wmb() barrier() +#define mb() barrier()
+/* Status byte for guest to report progress, and synchronize features. */ +/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +/* We have found a driver for the device. */ +#define VIRTIO_CONFIG_S_DRIVER 2 +/* Driver has used its parts of the config, and is happy */ +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +/* We've given up on this device. */ +#define VIRTIO_CONFIG_S_FAILED 0x80
+#define MAX_QUEUE_NUM (128)
+#define VRING_DESC_F_NEXT 1 +#define VRING_DESC_F_WRITE 2
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+#define VRING_USED_F_NO_NOTIFY 1
+struct vring_desc +{
- u64 addr;
- u32 len;
- u16 flags;
- u16 next;
+};
+struct vring_avail +{
- u16 flags;
- u16 idx;
- u16 ring[0];
+};
+struct vring_used_elem +{
- u32 id;
- u32 len;
+};
+struct vring_used +{
- u16 flags;
- u16 idx;
- struct vring_used_elem ring[];
+};
+struct vring {
- unsigned int num;
- struct vring_desc *desc;
- struct vring_avail *avail;
- struct vring_used *used;
+};
+#define vring_size(num) \
- (((((sizeof(struct vring_desc) * num) + \
(sizeof(struct vring_avail) + sizeof(u16) * num)) \
+ PAGE_MASK)& ~PAGE_MASK) + \
(sizeof(struct vring_used) + sizeof(struct vring_used_elem) * num))
+typedef unsigned char virtio_queue_t[PAGE_MASK + vring_size(MAX_QUEUE_NUM)];
+struct vring_virtqueue {
- virtio_queue_t queue;
- struct vring vring;
- u16 free_head;
- u16 last_used_idx;
- u16 vdata[MAX_QUEUE_NUM];
- /* PCI */
- int queue_index;
+};
+struct vring_list {
- char *addr;
- unsigned int length;
+};
+static inline void vring_init(struct vring *vr,
unsigned int num, unsigned char *queue)
+{
- unsigned int i;
- unsigned long pa;
- ASSERT32FLAT();
- vr->num = num;
- /* physical address of desc must be page aligned */
- pa = virt_to_phys(queue);
- pa = (pa + PAGE_MASK)& ~PAGE_MASK;
- vr->desc = phys_to_virt(pa);
- vr->avail = (struct vring_avail *)&vr->desc[num];
- /* physical address of used must be page aligned */
- pa = virt_to_phys(&vr->avail->ring[num]);
- pa = (pa + PAGE_MASK)& ~PAGE_MASK;
- vr->used = phys_to_virt(pa);
- for (i = 0; i< num - 1; i++)
vr->desc[i].next = i + 1;
- vr->desc[i].next = 0;
+}
+int vring_more_used(struct vring_virtqueue *vq); +void vring_detach(struct vring_virtqueue *vq, unsigned int head); +int vring_get_buf(struct vring_virtqueue *vq, unsigned int *len); +void vring_add_buf(struct vring_virtqueue *vq, struct vring_list list[],
unsigned int out, unsigned int in,
int index, int num_added);
+void vring_kick(unsigned int ioaddr, struct vring_virtqueue *vq, int num_added);
+#endif /* _VIRTIO_RING_H_ */
-- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
-- Gleb.
On 05/10/2010 10:54 AM, Gleb Natapov wrote:
On Mon, May 10, 2010 at 10:48:42AM -0500, Anthony Liguori wrote:
On 05/10/2010 03:11 AM, Gleb Natapov wrote:
This patch adds native support for booting from virtio disks to Seabios.
Signed-off-by: Gleb Natapovgleb@redhat.com
A related problem that I think we need to think about how we solve is indicating to Seabios which device we want to boot from
With your patch, a user can select a virtio device explicitly or if they use only one virtio device, it will Just Work.
However, if a user uses IDE and virtio, or a user has multiple disks, they cannot select a device via -boot.
Isn't this problem unrelated to this patch? I mean if I start qemu with two ide devices can I specify from qemu command line which one I want to boot from?
That's sort of what I'm asking. If you compare this approach to extboot, extboot provided a capability to select a disk. I think it can be argued though that this isn't a necessary feature to carry over and I'm looking for additional opinions on that.
Regards,
Anthony Liguori
On Mon, May 10, 2010 at 10:58:45AM -0500, Anthony Liguori wrote:
On 05/10/2010 10:54 AM, Gleb Natapov wrote:
On Mon, May 10, 2010 at 10:48:42AM -0500, Anthony Liguori wrote:
On 05/10/2010 03:11 AM, Gleb Natapov wrote:
This patch adds native support for booting from virtio disks to Seabios.
Signed-off-by: Gleb Natapovgleb@redhat.com
A related problem that I think we need to think about how we solve is indicating to Seabios which device we want to boot from
With your patch, a user can select a virtio device explicitly or if they use only one virtio device, it will Just Work.
However, if a user uses IDE and virtio, or a user has multiple disks, they cannot select a device via -boot.
Isn't this problem unrelated to this patch? I mean if I start qemu with two ide devices can I specify from qemu command line which one I want to boot from?
That's sort of what I'm asking. If you compare this approach to extboot, extboot provided a capability to select a disk. I think it can be argued though that this isn't a necessary feature to carry over and I'm looking for additional opinions on that.
Well, extboot is just a hack and shouldn't be used with ide disks at all. With extboot it is not possible to switch to another disk from F12 menu for instance (is it actually possible to read more then one disks with bios int13 when extboot is in use?). About specifying boot disk from qemu command like I think it will be very useful. It is not clear how to pass default boot device into Seabios though. We should pass a bus boot device is attached too (ide/virtio) and an unique id of the device on the bus.
-- Gleb.
On 05/10/2010 06:58 PM, Anthony Liguori wrote:
Isn't this problem unrelated to this patch? I mean if I start qemu with two ide devices can I specify from qemu command line which one I want to boot from?
That's sort of what I'm asking. If you compare this approach to extboot, extboot provided a capability to select a disk. I think it can be argued though that this isn't a necessary feature to carry over and I'm looking for additional opinions on that.
I'd say it's a necessary feature, but not one to carry over from the extboot implementation. We have the seabios boot menu (how to reach it?), we need to store the nvram persistently, and we need to extend the selection menu to qemu, but that's unrelated to this patch.
On Thu, May 13, 2010 at 07:49:40PM +0300, Avi Kivity wrote:
On 05/10/2010 06:58 PM, Anthony Liguori wrote:
Isn't this problem unrelated to this patch? I mean if I start qemu with two ide devices can I specify from qemu command line which one I want to boot from?
That's sort of what I'm asking. If you compare this approach to extboot, extboot provided a capability to select a disk. I think it can be argued though that this isn't a necessary feature to carry over and I'm looking for additional opinions on that.
I'd say it's a necessary feature, but not one to carry over from the extboot implementation. We have the seabios boot menu (how to reach it?), we need to store the nvram persistently, and we need to extend the selection menu to qemu, but that's unrelated to this patch.
To reach seabios boot menu run qemu with "-boot menu=on" option and press f12 when prompted.
-- Gleb.
On 05/10/2010 06:48 PM, Anthony Liguori wrote:
On 05/10/2010 03:11 AM, Gleb Natapov wrote:
This patch adds native support for booting from virtio disks to Seabios.
Signed-off-by: Gleb Natapovgleb@redhat.com
A related problem that I think we need to think about how we solve is indicating to Seabios which device we want to boot from
With your patch, a user can select a virtio device explicitly or if they use only one virtio device, it will Just Work.
However, if a user uses IDE and virtio, or a user has multiple disks, they cannot select a device via -boot.
Is this something we need to address? I don't think we'd break libvirt if we didn't.
BIOSes traditionally address this by storing the boot order in RTC non-volatile memory, and allow the user to configure the order via a menu. We could do the same (storing the RTC memory in a small disk image).
Alternatively we can seed the order from the command line (-boot id1,id2,id3 where id* are some qdev property attached to disks, this is more flexible than the current syntax I think).
From what I can tell SeaBIOS is reading CMOS_BIOS_BOOTFLAG1 and
CMOS_BIOS_BOOTFLAG2 from non-volatile memory. The values index into bev[], which contains IPL entries (the drives).
Is the order of bev[] entries well-defined? Is there a way for QEMU command-line to know that the first virtio-blk device corresponds to x and the IDE CD-ROM corresponds to y?
Stefan
On Tue, May 11, 2010 at 10:04:25AM +0100, Stefan Hajnoczi wrote:
From what I can tell SeaBIOS is reading CMOS_BIOS_BOOTFLAG1 and
CMOS_BIOS_BOOTFLAG2 from non-volatile memory. The values index into bev[], which contains IPL entries (the drives).
Is the order of bev[] entries well-defined? Is there a way for QEMU command-line to know that the first virtio-blk device corresponds to x and the IDE CD-ROM corresponds to y?
The order of bev[] is the order in which bootable devices where discovered and can change from one version of Seabios to another.
-- Gleb.
On Tue, May 11, 2010 at 10:04:25AM +0100, Stefan Hajnoczi wrote:
From what I can tell SeaBIOS is reading CMOS_BIOS_BOOTFLAG1 and CMOS_BIOS_BOOTFLAG2 from non-volatile memory. The values index into bev[], which contains IPL entries (the drives).
Is the order of bev[] entries well-defined? Is there a way for QEMU command-line to know that the first virtio-blk device corresponds to x and the IDE CD-ROM corresponds to y?
SeaBIOS arranges for bev[0] = floppy, bev[1] = hd, bev[2] = cdrom, and bev[3] to be the first network card - it does this so that the boot order can be read from qemu. However, it's a pain to force this order.
-Kevin
On Tue, May 11, 2010 at 08:45:29AM -0400, Kevin O'Connor wrote:
On Tue, May 11, 2010 at 10:04:25AM +0100, Stefan Hajnoczi wrote:
From what I can tell SeaBIOS is reading CMOS_BIOS_BOOTFLAG1 and CMOS_BIOS_BOOTFLAG2 from non-volatile memory. The values index into bev[], which contains IPL entries (the drives).
Is the order of bev[] entries well-defined? Is there a way for QEMU command-line to know that the first virtio-blk device corresponds to x and the IDE CD-ROM corresponds to y?
SeaBIOS arranges for bev[0] = floppy, bev[1] = hd, bev[2] = cdrom, and bev[3] to be the first network card - it does this so that the boot order can be read from qemu. However, it's a pain to force this order.
What if there are more then one disk?
-- Gleb.
On Tue, May 11, 2010 at 03:47:40PM +0300, Gleb Natapov wrote:
On Tue, May 11, 2010 at 08:45:29AM -0400, Kevin O'Connor wrote:
On Tue, May 11, 2010 at 10:04:25AM +0100, Stefan Hajnoczi wrote:
From what I can tell SeaBIOS is reading CMOS_BIOS_BOOTFLAG1 and CMOS_BIOS_BOOTFLAG2 from non-volatile memory. The values index into bev[], which contains IPL entries (the drives).
Is the order of bev[] entries well-defined? Is there a way for QEMU command-line to know that the first virtio-blk device corresponds to x and the IDE CD-ROM corresponds to y?
SeaBIOS arranges for bev[0] = floppy, bev[1] = hd, bev[2] = cdrom, and bev[3] to be the first network card - it does this so that the boot order can be read from qemu. However, it's a pain to force this order.
What if there are more then one disk?
It's possible to boot from the A drive (floppy) or the C drive (hd). There's no standard way to boot from the D drive. So, when booting from the second hard drive, SeaBIOS arranges for that drive to be mapped as the C drive.
The boot order (eg, floppy, hd, cdroms, network cards) is determined by the BEV (Boot Execution Vector) list. The harddrive registration order (eg, C, D, E) is determined by the BCV (Boot Connection Vector) list.
When one selects a hard drive in SeaBIOS' boot menu, SeaBIOS actually does two things - it makes hd booting the first entry in the BEV list and it makes the selected hd the first entry in the BCV list.
It's a mess - but that's what the BIOS Boot Specification (BBS) defines. Both option roms and bootloaders depend on this behavior.
-Kevin
On Tue, May 11, 2010 at 11:19:07AM +0300, Avi Kivity wrote:
On 05/10/2010 06:48 PM, Anthony Liguori wrote:
On 05/10/2010 03:11 AM, Gleb Natapov wrote:
This patch adds native support for booting from virtio disks to Seabios.
Signed-off-by: Gleb Natapovgleb@redhat.com
A related problem that I think we need to think about how we solve is indicating to Seabios which device we want to boot from
With your patch, a user can select a virtio device explicitly or if they use only one virtio device, it will Just Work.
However, if a user uses IDE and virtio, or a user has multiple disks, they cannot select a device via -boot.
Is this something we need to address? I don't think we'd break libvirt if we didn't.
BIOSes traditionally address this by storing the boot order in RTC non-volatile memory, and allow the user to configure the order via a menu. We could do the same (storing the RTC memory in a small disk image).
Real BIOS can do that because it enumerates all bootable devices, attach name for each one of them and then asks user to configure boot order using names it attached to devices. In our case we want to provide boot order on qemu command line before BIOS enumerated devices, so qemu should be able to pass enough information about boot device so that BIOS can uniquely identify it after it will discover all bootable devices. bus/device pair can be such thing.
Alternatively we can seed the order from the command line (-boot id1,id2,id3 where id* are some qdev property attached to disks, this is more flexible than the current syntax I think).
The problem is how to communicate this order to Seabios.
-- Gleb.
On 05/11/2010 03:31 PM, Gleb Natapov wrote:
On Tue, May 11, 2010 at 11:19:07AM +0300, Avi Kivity wrote:
On 05/10/2010 06:48 PM, Anthony Liguori wrote:
On 05/10/2010 03:11 AM, Gleb Natapov wrote:
This patch adds native support for booting from virtio disks to Seabios.
Signed-off-by: Gleb Natapovgleb@redhat.com
A related problem that I think we need to think about how we solve is indicating to Seabios which device we want to boot from
With your patch, a user can select a virtio device explicitly or if they use only one virtio device, it will Just Work.
However, if a user uses IDE and virtio, or a user has multiple disks, they cannot select a device via -boot.
Is this something we need to address? I don't think we'd break libvirt if we didn't.
BIOSes traditionally address this by storing the boot order in RTC non-volatile memory, and allow the user to configure the order via a menu. We could do the same (storing the RTC memory in a small disk image).
Real BIOS can do that because it enumerates all bootable devices, attach name for each one of them and then asks user to configure boot order using names it attached to devices. In our case we want to provide boot order on qemu command line before BIOS enumerated devices, so qemu should be able to pass enough information about boot device so that BIOS can uniquely identify it after it will discover all bootable devices. bus/device pair can be such thing.
Having a BIOS menu is also useful, you don't have to drop to the management tool, instead you do everything from the console.
Alternatively we can seed the order from the command line (-boot id1,id2,id3 where id* are some qdev property attached to disks, this is more flexible than the current syntax I think).
The problem is how to communicate this order to Seabios.
Topology (bus/device/lun).
On Wed, May 12, 2010 at 10:22:59AM +0300, Avi Kivity wrote:
On 05/11/2010 03:31 PM, Gleb Natapov wrote:
On Tue, May 11, 2010 at 11:19:07AM +0300, Avi Kivity wrote:
On 05/10/2010 06:48 PM, Anthony Liguori wrote:
On 05/10/2010 03:11 AM, Gleb Natapov wrote:
This patch adds native support for booting from virtio disks to Seabios.
Signed-off-by: Gleb Natapovgleb@redhat.com
A related problem that I think we need to think about how we solve is indicating to Seabios which device we want to boot from
With your patch, a user can select a virtio device explicitly or if they use only one virtio device, it will Just Work.
However, if a user uses IDE and virtio, or a user has multiple disks, they cannot select a device via -boot.
Is this something we need to address? I don't think we'd break libvirt if we didn't.
BIOSes traditionally address this by storing the boot order in RTC non-volatile memory, and allow the user to configure the order via a menu. We could do the same (storing the RTC memory in a small disk image).
Real BIOS can do that because it enumerates all bootable devices, attach name for each one of them and then asks user to configure boot order using names it attached to devices. In our case we want to provide boot order on qemu command line before BIOS enumerated devices, so qemu should be able to pass enough information about boot device so that BIOS can uniquely identify it after it will discover all bootable devices. bus/device pair can be such thing.
Having a BIOS menu is also useful, you don't have to drop to the management tool, instead you do everything from the console.
In Seabios we have functional boot menu. But it is management who controls what disk plugged were.
Alternatively we can seed the order from the command line (-boot id1,id2,id3 where id* are some qdev property attached to disks, this is more flexible than the current syntax I think).
The problem is how to communicate this order to Seabios.
Topology (bus/device/lun).
Yeah, that what I proposed too actually.
-- Gleb.
On Wed, May 12, 2010 at 10:22:59AM +0300, Avi Kivity wrote:
On 05/11/2010 03:31 PM, Gleb Natapov wrote:
Real BIOS can do that because it enumerates all bootable devices, attach name for each one of them and then asks user to configure boot order using names it attached to devices. In our case we want to provide boot order on qemu command line before BIOS enumerated devices, so qemu should be able to pass enough information about boot device so that BIOS can uniquely identify it after it will discover all bootable devices. bus/device pair can be such thing.
Having a BIOS menu is also useful, you don't have to drop to the management tool, instead you do everything from the console.
Having a "setup menu" is something real hardware could use as well. I don't think the setup menu should be in SeaBIOS - instead, SeaBIOS could launch another program (stored in flash or qemu_fw) dedicated to doing setup.
Alternatively we can seed the order from the command line (-boot id1,id2,id3 where id* are some qdev property attached to disks, this is more flexible than the current syntax I think).
The problem is how to communicate this order to Seabios.
Topology (bus/device/lun).
USB is a pain here. It's posible with BDF (Bus/Dev/Fn) and port number (which accounts for hubs having ports as well).
-Kevin