On Wed, 2020-10-07 at 12:13 -0400, Kevin O'Connor wrote:
--- a/src/hw/nvme.c +++ b/src/hw/nvme.c @@ -727,6 +727,22 @@ nvme_cmd_readwrite(struct nvme_namespace *ns, struct disk_op_s *op, int write) u16 const max_blocks = NVME_PAGE_SIZE / ns->block_size; u16 i;
- /* Split up requests that are larger than the device can handle */
- if (op->count > ns->max_req_size) {
u16 count = op->count;
/* Handle the first max_req_size elements */
op->count = ns->max_req_size;
if (nvme_cmd_readwrite(ns, op, write))
return res;
/* Handle the remainder of the request */
op->count = count - ns->max_req_size;
op->lba += ns->max_req_size;
op->buf_fl += (ns->max_req_size * ns->block_size);
return nvme_cmd_readwrite(ns, op, write);
- }
Depending on the disk access, this code could run with a small stack. I would avoid recursion.
This is tail recursion, which any reasonable compiler converts into a jmp :). Take a look at the .o file - for me it did become a plain jump.
Okay, that's fine then.
It's still kind of icky. This used to be a purely iterative function. Now for a large unaligned request (which nvme_build_prpl refuses to handle) you get a mixture of (mostly tail) recursion and iteration.
It'll recurse for each max_req_size, then iterate over each page within that.
I'd much rather see just a simple iterative loop. Something like this (incremental, completely untested):
--- a/src/hw/nvme.c +++ b/src/hw/nvme.c @@ -472,19 +472,20 @@ static int nvme_add_prpl(struct nvme_namespace *ns, u64 base)
int nvme_build_prpl(struct nvme_namespace *ns, struct disk_op_s *op) { - int first_page = 1; + int first_page = 1, count = op->count; u32 base = (long)op->buf_fl; - s32 size = op->count * ns->block_size; + s32 size;
- if (op->count > ns->max_req_size) - return -1; + if (count > ns->max_req_size) + count = ns->max_req_size;
nvme_reset_prpl(ns);
+ size = count * ns->block_size; /* Special case for transfers that fit into PRP1, but are unaligned */ if (((size + (base & ~NVME_PAGE_MASK)) <= NVME_PAGE_SIZE)) { ns->prp1 = op->buf_fl; - return 0; + return count; }
/* Every request has to be page aligned */ @@ -506,7 +507,7 @@ int nvme_build_prpl(struct nvme_namespace *ns, struct disk_op_s *op) return -1; }
- return 0; + return count; }
static int @@ -725,27 +726,16 @@ nvme_cmd_readwrite(struct nvme_namespace *ns, struct disk_op_s *op, int write) { int res = DISK_RET_SUCCESS; u16 const max_blocks = NVME_PAGE_SIZE / ns->block_size; - u16 i; - - /* Split up requests that are larger than the device can handle */ - if (op->count > ns->max_req_size) { - u16 count = op->count; + u16 i, count;
- /* Handle the first max_req_size elements */ - op->count = ns->max_req_size; - if (nvme_cmd_readwrite(ns, op, write)) - return res; - - /* Handle the remainder of the request */ - op->count = count - ns->max_req_size; - op->lba += ns->max_req_size; - op->buf_fl += (ns->max_req_size * ns->block_size); - return nvme_cmd_readwrite(ns, op, write); - } + while (op->count && ((count = nvme_build_prpl(ns, op)) > 0)) { + res = nvme_io_readwrite(ns, op->lba, ns->prp1, count, write); + if (res != DISK_RET_SUCCESS) + break;
- if (!nvme_build_prpl(ns, op)) { - /* Request goes via PRP List logic */ - return nvme_io_readwrite(ns, op->lba, ns->prp1, op->count, write); + op->count -= count; + op->lba += count; + op->buf_fl += (count * ns->block_size); }
for (i = 0; i < op->count && res == DISK_RET_SUCCESS;) {