Dear Evgeny,
thank you for the patch iterations!
A minor nitpick, I prefer having a verb in the commit summary.
Add pvscsi boot support
Also you could add your reply (the things you tested on) to the commit
message.
Am Sonntag, den 13.10.2013, 20:07 +0300 schrieb Evgeny Budilovsky:
Should it be capitalizied?> Signed-off-by: Evgeny Budilovsky <evgeny.budilovsky@ravellosystems.com>
> ---
> Makefile | 2 +-
> src/Kconfig | 12 ++
> src/block.c | 1 +
> src/block.h | 1 +
> src/hw/blockcmd.c | 3 +
> src/hw/pci_ids.h | 3 +
> src/hw/pvscsi.c | 365 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> src/hw/pvscsi.h | 8 ++
> src/post.c | 2 +
> 9 files changed, 396 insertions(+), 1 deletion(-)
> create mode 100644 src/hw/pvscsi.c
> create mode 100644 src/hw/pvscsi.h
>
> diff --git a/Makefile b/Makefile
> index 3218970..6c6302e 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -34,7 +34,7 @@ SRCBOTH=misc.c stacks.c output.c string.c x86.c block.c cdrom.c mouse.c kbd.c \
> hw/usb-hid.c hw/usb-msc.c hw/usb-uas.c \
> hw/blockcmd.c hw/floppy.c hw/ata.c hw/ahci.c hw/ramdisk.c \
> hw/virtio-ring.c hw/virtio-pci.c hw/virtio-blk.c hw/virtio-scsi.c \
> - hw/lsi-scsi.c hw/esp-scsi.c hw/megasas.c
> + hw/lsi-scsi.c hw/esp-scsi.c hw/megasas.c hw/pvscsi.c
> SRC16=$(SRCBOTH) system.c disk.c font.c
> SRC32FLAT=$(SRCBOTH) post.c memmap.c malloc.c pmm.c romfile.c optionroms.c \
> boot.c bootsplash.c jpeg.c bmp.c \
> diff --git a/src/Kconfig b/src/Kconfig
> index c40cc61..6e6814e 100644
> --- a/src/Kconfig
> +++ b/src/Kconfig
> @@ -161,6 +161,18 @@ menu "Hardware support"
> default y
> help
> Support boot from virtio-scsi storage.
> + config PVSCSI
> + depends on DRIVES && QEMU_HARDWARE
> + bool "pvscsi controllers"
> + default y»ESX platform*s*« or »from an ESX platform«? (I am no native speaker
> + help
> + Support boot from Paravirtualized SCSI storage. This kind of storage
> + is mainly supported by VMware ESX hypervisor. It is commonly used
> + to allow fast storage access by communicating directly with the
> + underlying hypervisor. Enabling this type of boot will allow
> + booting directly from images that were imported from ESX platform,
though.)
To make it shorter remove the »that were«: … imported from images
imported from … (as above).
> + without the need to use slower emulation of storage controllers[…]
> + such as IDE.
Should these go into the header file?
> diff --git a/src/hw/pvscsi.c b/src/hw/pvscsi.c
> new file mode 100644
> index 0000000..df5faeb
> --- /dev/null
> +++ b/src/hw/pvscsi.c
> @@ -0,0 +1,365 @@
> +// QEMU VMWARE Paravirtualized SCSI boot support.
> +//
> +// Copyright (c) 2013 Ravello Systems LTD (http://ravellosystems.com)
> +//
> +// Authors:
> +// Evgeny Budilovsky <evgeny.budilovsky@ravellosystems.com>
> +//
> +// This file may be distributed under the terms of the GNU LGPLv3 license.
> +
> +#include "biosvar.h" // GET_GLOBAL
> +#include "block.h" // struct drive_s
> +#include "blockcmd.h" // scsi_drive_setup
> +#include "config.h" // CONFIG_*
> +#include "malloc.h" // free
> +#include "output.h" // dprintf
> +#include "pci.h" // foreachpci
> +#include "pci_ids.h" // PCI_DEVICE_ID_VMWARE_PVSCSI
> +#include "pci_regs.h" // PCI_VENDOR_ID
> +#include "std/disk.h" // DISK_RET_SUCCESS
> +#include "string.h" // memset
> +#include "util.h" // usleep
> +#include "pvscsi.h"
> +#include "virtio-ring.h" // PAGE_SHIFT, virt_to_phys
> +
> +#define MASK(n) ((1 << (n)) - 1)
> +
> +#define SIMPLE_QUEUE_TAG 0x20
> +
> +#define PVSCSI_INTR_CMPL_0 (1 << 0)
> +#define PVSCSI_INTR_CMPL_1 (1 << 1)
> +#define PVSCSI_INTR_CMPL_MASK MASK(2)
> +
> +#define PVSCSI_INTR_MSG_0 (1 << 2)
> +#define PVSCSI_INTR_MSG_1 (1 << 3)
> +#define PVSCSI_INTR_MSG_MASK (MASK(2) << 2)
> +#define PVSCSI_INTR_ALL_SUPPORTED MASK(4)
> +
> +#define PVSCSI_FLAG_CMD_WITH_SG_LIST (1 << 0)
> +#define PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB (1 << 1)
> +#define PVSCSI_FLAG_CMD_DIR_NONE (1 << 2)
> +#define PVSCSI_FLAG_CMD_DIR_TOHOST (1 << 3)
> +#define PVSCSI_FLAG_CMD_DIR_TODEVICE (1 << 4)
> +
> +enum PVSCSIRegOffset {
> + PVSCSI_REG_OFFSET_COMMAND = 0x0,
> + PVSCSI_REG_OFFSET_COMMAND_DATA = 0x4,
> + PVSCSI_REG_OFFSET_COMMAND_STATUS = 0x8,
> + PVSCSI_REG_OFFSET_LAST_STS_0 = 0x100,
> + PVSCSI_REG_OFFSET_LAST_STS_1 = 0x104,
> + PVSCSI_REG_OFFSET_LAST_STS_2 = 0x108,
> + PVSCSI_REG_OFFSET_LAST_STS_3 = 0x10c,
> + PVSCSI_REG_OFFSET_INTR_STATUS = 0x100c,
> + PVSCSI_REG_OFFSET_INTR_MASK = 0x2010,
> + PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014,
> + PVSCSI_REG_OFFSET_DEBUG = 0x3018,
> + PVSCSI_REG_OFFSET_KICK_RW_IO = 0x4018,
> +};
> +
> +enum PVSCSICommands {
> + PVSCSI_CMD_FIRST = 0,
> + PVSCSI_CMD_ADAPTER_RESET = 1,
> + PVSCSI_CMD_ISSUE_SCSI = 2,
> + PVSCSI_CMD_SETUP_RINGS = 3,
> + PVSCSI_CMD_RESET_BUS = 4,
> + PVSCSI_CMD_RESET_DEVICE = 5,
> + PVSCSI_CMD_ABORT_CMD = 6,
> + PVSCSI_CMD_CONFIG = 7,
> + PVSCSI_CMD_SETUP_MSG_RING = 8,
> + PVSCSI_CMD_DEVICE_UNPLUG = 9,
> + PVSCSI_CMD_LAST = 10
> +};
> +
> +#define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES 32
> +struct PVSCSICmdDescSetupRings {
> + u32 reqRingNumPages;
> + u32 cmpRingNumPages;
> + u64 ringsStatePPN;
> + u64 reqRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
> + u64 cmpRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
> +} PACKED;
> +
> +struct PVSCSIRingCmpDesc {
> + u64 context;
> + u64 dataLen;
> + u32 senseLen;
> + u16 hostStatus;
> + u16 scsiStatus;
> + u32 pad[2];
> +} PACKED;
> +
> +struct PVSCSIRingsState {
> + u32 reqProdIdx;
> + u32 reqConsIdx;
> + u32 reqNumEntriesLog2;
> +
> + u32 cmpProdIdx;
> + u32 cmpConsIdx;
> + u32 cmpNumEntriesLog2;
> +
> + u8 pad[104];
> +
> + u32 msgProdIdx;
> + u32 msgConsIdx;
> + u32 msgNumEntriesLog2;
> +} PACKED;
> +
> +struct PVSCSIRingReqDesc {
> + u64 context;
> + u64 dataAddr;
> + u64 dataLen;
> + u64 senseAddr;
> + u32 senseLen;
> + u32 flags;
> + u8 cdb[16];
> + u8 cdbLen;
> + u8 lun[8];
> + u8 tag;
> + u8 bus;
> + u8 target;
> + u8 vcpuHint;
> + u8 unused[59];
> +} PACKED;
> +
> +struct pvscsi_ring_dsc_s {
> + struct PVSCSIRingsState *ring_state;
> + struct PVSCSIRingReqDesc *ring_reqs;
> + struct PVSCSIRingCmpDesc *ring_cmps;
> +};
> +
> +struct pvscsi_lun_s {
> + struct drive_s drive;
> + struct pci_device *pci;
> + u32 iobase;
> + u8 target;
> + u8 lun;
> + struct pvscsi_ring_dsc_s *ring_dsc;
> +};
That memset is not needed, right?> +
> +static void
> +pvscsi_write_cmd_desc(u32 iobase, u32 cmd, const void *desc, size_t len)
> +{
> + const u32 *ptr = desc;
> + size_t i;
> +
> + len /= sizeof(*ptr);
> + pci_writel(iobase + PVSCSI_REG_OFFSET_COMMAND, cmd);
> + for (i = 0; i < len; i++)
> + pci_writel(iobase + PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]);
> +}
> +
> +static void
> +pvscsi_kick_rw_io(u32 iobase)
> +{
> + pci_writel(iobase + PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
> +}
> +
> +static void
> +pvscsi_wait_intr_cmpl(u32 iobase)
> +{
> + while (!(pci_readl(iobase + PVSCSI_REG_OFFSET_INTR_STATUS) & PVSCSI_INTR_CMPL_MASK))
> + usleep(5);
> + pci_writel(iobase + PVSCSI_REG_OFFSET_INTR_STATUS, PVSCSI_INTR_CMPL_MASK);
> +
> +}
> +
> +static void
> +pvscsi_init_rings(u32 iobase, struct pvscsi_ring_dsc_s **ring_dsc)
> +{
> + struct PVSCSICmdDescSetupRings cmd = {0,};
> +
> + struct pvscsi_ring_dsc_s *dsc = memalign_low(sizeof(*dsc), PAGE_SIZE);
> + if (!dsc) {
> + warn_noalloc();
> + return;
> + }
> +
> + memset(dsc, 0, sizeof(*dsc));
> +1. Just one semicolon is needed at the end.
> + dsc->ring_state =
> + (struct PVSCSIRingsState *)memalign_low(PAGE_SIZE, PAGE_SIZE);
> + dsc->ring_reqs =
> + (struct PVSCSIRingReqDesc *)memalign_low(PAGE_SIZE, PAGE_SIZE);
> + dsc->ring_cmps =
> + (struct PVSCSIRingCmpDesc *)memalign_low(PAGE_SIZE, PAGE_SIZE);
> + if (!dsc->ring_state || !dsc->ring_reqs || !dsc->ring_cmps) {
> + warn_noalloc();
> + return;
> + }
> + memset(dsc->ring_state, 0, PAGE_SIZE);
> + memset(dsc->ring_reqs, 0, PAGE_SIZE);
> + memset(dsc->ring_cmps, 0, PAGE_SIZE);
> +
> + cmd.reqRingNumPages = 1;
> + cmd.cmpRingNumPages = 1;
> + cmd.ringsStatePPN = virt_to_phys(dsc->ring_state) >> PAGE_SHIFT;
> + cmd.reqRingPPNs[0] = virt_to_phys(dsc->ring_reqs) >> PAGE_SHIFT;;
> + cmd.cmpRingPPNs[0] = virt_to_phys(dsc->ring_cmps) >> PAGE_SHIFT;;
2. I am no C professional, but why are the `memset()` calls above needed
when the content does not seems to be used, when the pointer is casted
to `unsigned int`?
> +[…]
> + pvscsi_write_cmd_desc(iobase, PVSCSI_CMD_SETUP_RINGS,
> + &cmd, sizeof(cmd));
> + *ring_dsc = dsc;
> +}
Thanks,
Paul