From: Thanos Makatos thanos@nutanix.com
This patch allows specifying an NVMe namespace ID in the boot ROM file in the form /pci@0cf8/*@*/nvme/ns@*, where the 2nd component is the PCI device and the last component is the namespace ID.
E.g. booting from namespace 2:
boot order: 1: /pci@i0cf8/pci4e58,0001@6/nvme/ns@2 2: /rom@genroms/bootfail.bin 3: HAL ... |3ffa9000| Searching bootorder for: /pci@i0cf8/*@6/nvme/ns@2 |3ffa9000| NS 2 prio 1 |3ffa9000| phys_alloc zone=0x3ffbfeb8 size=80 align=10 ret=3ffac6e0 (detail=0x3ffac6b0) |3ffa9000| phys_alloc zone=0x3ffbfeb8 size=24 align=10 ret=3ffac690 (detail=0x3ffac660) |3ffa9000| Registering bootable: NVMe NS 2: 19073 MiB (39062500 512-byte blocks + 0-byte metadata) (type:2 prio:1 data:f4050) |3ffa9000| NVMe initialization complete! \3ffa9000/ End thread
If we find a specific namespace we're looking for then we stop probing all other namespaces as there's no point in doing so. If a namespace is not specified in the boot ROM file then we default to the old way of booting from NVMe.
As a future optimization we can avoid keeping information for all namespaces and operate on a single namespace at at time. This way we won't run out of memory even if the NVMe controller has the maximum number of namespaces (2^32 - 1).
The corresponding QEMU patches haven't been upstreamed yet.
Signed-off-by: Thanos Makatos thanos.makatos@nutanix.com
--
Resending patch since it never reached the list, including a more detailed commit message.
--- src/boot.c | 12 ++++++++++++ src/hw/nvme.c | 49 +++++++++++++++++++++++++++++++++++++++---------- src/util.h | 1 + 3 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/src/boot.c b/src/boot.c index 1effd80..6c2a150 100644 --- a/src/boot.c +++ b/src/boot.c @@ -325,6 +325,18 @@ int bootprio_find_mmio_device(void *mmio) return find_prio(desc); }
+int bootprio_find_nvme_device(struct pci_device *pci, u32 ns_id) +{ + if (CONFIG_CSM) + return csm_bootprio_pci(pci); + if (!CONFIG_BOOTORDER) + return -1; + char desc[256], *p; + p = build_pci_path(desc, sizeof(desc), "*", pci); + snprintf(p, desc+sizeof(desc)-p, "/nvme/ns@%u", ns_id); + return find_prio(desc); +} + int bootprio_find_scsi_device(struct pci_device *pci, int target, int lun) { if (!CONFIG_BOOTORDER) diff --git a/src/hw/nvme.c b/src/hw/nvme.c index f26b811..b35a273 100644 --- a/src/hw/nvme.c +++ b/src/hw/nvme.c @@ -233,10 +233,23 @@ nvme_admin_identify_ns(struct nvme_ctrl *ctrl, u32 ns_id) ns_id)->ns; }
-static void +static char* +nvme_ns_desc(const struct nvme_namespace *ns, u32 ns_id) +{ + return znprintf(MAXDESCSIZE, "NVMe NS %u: %llu MiB (%llu %u-byte " + "blocks + %u-byte metadata)\n", + ns_id, (ns->lba_count * ns->block_size) >> 20, + ns->lba_count, ns->block_size, + ns->metadata_size); +} + +/* Returns 0 if the namespace is usable, -1 otherwise. */ +static int nvme_probe_ns(struct nvme_ctrl *ctrl, struct nvme_namespace *ns, u32 ns_id, u8 mdts) { + int ret = -1; + ns->ctrl = ctrl; ns->ns_id = ns_id;
@@ -288,16 +301,14 @@ nvme_probe_ns(struct nvme_ctrl *ctrl, struct nvme_namespace *ns, u32 ns_id,
ns->dma_buffer = zalloc_page_aligned(&ZoneHigh, NVME_PAGE_SIZE);
- char *desc = znprintf(MAXDESCSIZE, "NVMe NS %u: %llu MiB (%llu %u-byte " - "blocks + %u-byte metadata)\n", - ns_id, (ns->lba_count * ns->block_size) >> 20, - ns->lba_count, ns->block_size, ns->metadata_size); - + char *desc = nvme_ns_desc(ns, ns_id); dprintf(3, "%s", desc); - boot_add_hd(&ns->drive, desc, bootprio_find_pci_device(ctrl->pci)); + free(desc); + ret = 0;
free_buffer: free (id); + return ret; }
@@ -635,10 +646,28 @@ nvme_controller_enable(struct nvme_ctrl *ctrl) memset(ctrl->ns, 0, sizeof(*ctrl->ns) * ctrl->ns_count);
/* Populate namespace IDs */ - int ns_idx; - for (ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) { - nvme_probe_ns(ctrl, &ctrl->ns[ns_idx], ns_idx + 1, identify->mdts); + int ns_idx, first_ns_idx = -1, prio = -1; + for (ns_idx = 0; ns_idx < ctrl->ns_count && prio == -1; ns_idx++) { + if (!nvme_probe_ns(ctrl, &ctrl->ns[ns_idx], ns_idx + 1, identify->mdts)) { + /* Check whether the NS is explicitly specified. */ + if (first_ns_idx == -1) { + first_ns_idx = ns_idx; + } + prio = bootprio_find_nvme_device(ctrl->pci, ns_idx + 1); + } } + /* no NS specified, use the first one */ + if (prio != -1) { + ns_idx--; + } else if (first_ns_idx != -1) { + ns_idx = first_ns_idx; + dprintf(3, "no NVMe NS specified, defaulting to NS %u\n", ns_idx + 1); + prio = bootprio_find_pci_device(ctrl->pci); + } + if (prio != -1) { + boot_add_hd(&ctrl->ns[ns_idx].drive, + nvme_ns_desc(&ctrl->ns[ns_idx], ns_idx + 1), prio); + }
dprintf(3, "NVMe initialization complete!\n"); return 0; diff --git a/src/util.h b/src/util.h index 0de3522..ad2d0ef 100644 --- a/src/util.h +++ b/src/util.h @@ -31,6 +31,7 @@ u8 is_bootprio_strict(void); struct pci_device; int bootprio_find_pci_device(struct pci_device *pci); int bootprio_find_mmio_device(void *mmio); +int bootprio_find_nvme_device(struct pci_device *pci, u32 ns_id); int bootprio_find_scsi_device(struct pci_device *pci, int target, int lun); int bootprio_find_scsi_mmio_device(void *mmio, int target, int lun); int bootprio_find_ata_device(struct pci_device *pci, int chanid, int slave);