From: Thanos Makatos <thanos(a)nutanix.com>
This patch allows specifying an NVMe namespace ID in the boot ROM file
in the form /pci@0cf8/*@*/nvme/ns@*, where the 2nd component is the
PCI device and the last component is the namespace ID.
E.g. booting from namespace 2:
boot order:
1: /pci@i0cf8/pci4e58,0001@6/nvme/ns@2
2: /rom(a)genroms/bootfail.bin
3: HAL
...
|3ffa9000| Searching bootorder for: /pci@i0cf8/*@6/nvme/ns@2
|3ffa9000| NS 2 prio 1
|3ffa9000| phys_alloc zone=0x3ffbfeb8 size=80 align=10 ret=3ffac6e0
(detail=0x3ffac6b0)
|3ffa9000| phys_alloc zone=0x3ffbfeb8 size=24 align=10 ret=3ffac690
(detail=0x3ffac660)
|3ffa9000| Registering bootable: NVMe NS 2: 19073 MiB (39062500 512-byte
blocks + 0-byte metadata)
(type:2 prio:1 data:f4050)
|3ffa9000| NVMe initialization complete!
\3ffa9000/ End thread
If we find a specific namespace we're looking for then we stop probing
all other namespaces as there's no point in doing so. If a namespace is
not specified in the boot ROM file then we default to the old way of
booting from NVMe.
As a future optimization we can avoid keeping information for all
namespaces and operate on a single namespace at at time. This way we
won't run out of memory even if the NVMe controller has the maximum
number of namespaces (2^32 - 1).
The corresponding QEMU patches haven't been upstreamed yet.
Signed-off-by: Thanos Makatos <thanos.makatos(a)nutanix.com>
--
Resending patch since it never reached the list, including a more detailed
commit message.
---
src/boot.c | 12 ++++++++++++
src/hw/nvme.c | 49 +++++++++++++++++++++++++++++++++++++++----------
src/util.h | 1 +
3 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/src/boot.c b/src/boot.c
index 1effd80..6c2a150 100644
--- a/src/boot.c
+++ b/src/boot.c
@@ -325,6 +325,18 @@ int bootprio_find_mmio_device(void *mmio)
return find_prio(desc);
}
+int bootprio_find_nvme_device(struct pci_device *pci, u32 ns_id)
+{
+ if (CONFIG_CSM)
+ return csm_bootprio_pci(pci);
+ if (!CONFIG_BOOTORDER)
+ return -1;
+ char desc[256], *p;
+ p = build_pci_path(desc, sizeof(desc), "*", pci);
+ snprintf(p, desc+sizeof(desc)-p, "/nvme/ns@%u", ns_id);
+ return find_prio(desc);
+}
+
int bootprio_find_scsi_device(struct pci_device *pci, int target, int lun)
{
if (!CONFIG_BOOTORDER)
diff --git a/src/hw/nvme.c b/src/hw/nvme.c
index f26b811..b35a273 100644
--- a/src/hw/nvme.c
+++ b/src/hw/nvme.c
@@ -233,10 +233,23 @@ nvme_admin_identify_ns(struct nvme_ctrl *ctrl, u32 ns_id)
ns_id)->ns;
}
-static void
+static char*
+nvme_ns_desc(const struct nvme_namespace *ns, u32 ns_id)
+{
+ return znprintf(MAXDESCSIZE, "NVMe NS %u: %llu MiB (%llu %u-byte "
+ "blocks + %u-byte metadata)\n",
+ ns_id, (ns->lba_count * ns->block_size) >> 20,
+ ns->lba_count, ns->block_size,
+ ns->metadata_size);
+}
+
+/* Returns 0 if the namespace is usable, -1 otherwise. */
+static int
nvme_probe_ns(struct nvme_ctrl *ctrl, struct nvme_namespace *ns, u32 ns_id,
u8 mdts)
{
+ int ret = -1;
+
ns->ctrl = ctrl;
ns->ns_id = ns_id;
@@ -288,16 +301,14 @@ nvme_probe_ns(struct nvme_ctrl *ctrl, struct nvme_namespace *ns, u32 ns_id,
ns->dma_buffer = zalloc_page_aligned(&ZoneHigh, NVME_PAGE_SIZE);
- char *desc = znprintf(MAXDESCSIZE, "NVMe NS %u: %llu MiB (%llu %u-byte "
- "blocks + %u-byte metadata)\n",
- ns_id, (ns->lba_count * ns->block_size) >> 20,
- ns->lba_count, ns->block_size, ns->metadata_size);
-
+ char *desc = nvme_ns_desc(ns, ns_id);
dprintf(3, "%s", desc);
- boot_add_hd(&ns->drive, desc, bootprio_find_pci_device(ctrl->pci));
+ free(desc);
+ ret = 0;
free_buffer:
free (id);
+ return ret;
}
@@ -635,10 +646,28 @@ nvme_controller_enable(struct nvme_ctrl *ctrl)
memset(ctrl->ns, 0, sizeof(*ctrl->ns) * ctrl->ns_count);
/* Populate namespace IDs */
- int ns_idx;
- for (ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) {
- nvme_probe_ns(ctrl, &ctrl->ns[ns_idx], ns_idx + 1, identify->mdts);
+ int ns_idx, first_ns_idx = -1, prio = -1;
+ for (ns_idx = 0; ns_idx < ctrl->ns_count && prio == -1; ns_idx++) {
+ if (!nvme_probe_ns(ctrl, &ctrl->ns[ns_idx], ns_idx + 1, identify->mdts)) {
+ /* Check whether the NS is explicitly specified. */
+ if (first_ns_idx == -1) {
+ first_ns_idx = ns_idx;
+ }
+ prio = bootprio_find_nvme_device(ctrl->pci, ns_idx + 1);
+ }
}
+ /* no NS specified, use the first one */
+ if (prio != -1) {
+ ns_idx--;
+ } else if (first_ns_idx != -1) {
+ ns_idx = first_ns_idx;
+ dprintf(3, "no NVMe NS specified, defaulting to NS %u\n", ns_idx + 1);
+ prio = bootprio_find_pci_device(ctrl->pci);
+ }
+ if (prio != -1) {
+ boot_add_hd(&ctrl->ns[ns_idx].drive,
+ nvme_ns_desc(&ctrl->ns[ns_idx], ns_idx + 1), prio);
+ }
dprintf(3, "NVMe initialization complete!\n");
return 0;
diff --git a/src/util.h b/src/util.h
index 0de3522..ad2d0ef 100644
--- a/src/util.h
+++ b/src/util.h
@@ -31,6 +31,7 @@ u8 is_bootprio_strict(void);
struct pci_device;
int bootprio_find_pci_device(struct pci_device *pci);
int bootprio_find_mmio_device(void *mmio);
+int bootprio_find_nvme_device(struct pci_device *pci, u32 ns_id);
int bootprio_find_scsi_device(struct pci_device *pci, int target, int lun);
int bootprio_find_scsi_mmio_device(void *mmio, int target, int lun);
int bootprio_find_ata_device(struct pci_device *pci, int chanid, int slave);
--
2.22.3