Changes since V1: * De-duplicate blksize check and capacity logging * Fix off by 1 error for returned sector count (LBA addresses start from 0).
Max Tottenham (1): Add LBA 64bit support for reads beyond 2TB.
src/hw/blockcmd.c | 61 +++++++++++++++++++++++++++++++++++------------ src/hw/blockcmd.h | 27 +++++++++++++++++++++ 2 files changed, 73 insertions(+), 15 deletions(-)
When booting from a >2TB drive/filesystem, it's possible what the kernel/bootloader may be updated and written out at an LBA address beyond what is normally accessible by the READ(10) SCSI commands. If this happens to the kernel grub will fail to boot the kernel as it will call into the BIOS with an LBA address >2TB, and the BIOS will return an error. Per the SCSI spec, >2TB drives should return 0XFFFFFFFF, and a READ CAPACITY(16) command should be issues to determine the full size of the drive, READ(16) commands can then be used in order to read data at LBA addresses beyond 2TB (64 bit LBA addresses).
Signed-off-by: Max Tottenham mtottenh@akamai.com --- src/hw/blockcmd.c | 61 +++++++++++++++++++++++++++++++++++------------ src/hw/blockcmd.h | 27 +++++++++++++++++++++ 2 files changed, 73 insertions(+), 15 deletions(-)
diff --git a/src/hw/blockcmd.c b/src/hw/blockcmd.c index 6b6fea97..9b6a32bf 100644 --- a/src/hw/blockcmd.c +++ b/src/hw/blockcmd.c @@ -66,6 +66,23 @@ cdb_test_unit_ready(struct disk_op_s *op) return process_op(op); }
+static int +cdb_read_capacity16(struct disk_op_s *op, struct cdbres_read_capacity_16 *data) +{ + struct cdb_sai_read_capacity_16 cmd; + memset(&cmd, 0, sizeof(cmd)); + cmd.command = CDB_CMD_SERVICE_ACTION_IN; + cmd.flags = CDB_CMD_SAI_READ_CAPACITY_16; + cmd.len = cpu_to_be32(sizeof(struct cdbres_read_capacity_16)); + op->command = CMD_SCSI; + op->count = 1; + op->buf_fl = data; + op->cdbcmd = &cmd; + op->blocksize = sizeof(*data); + return process_op(op); +} + + // Request capacity static int cdb_read_capacity(struct disk_op_s *op, struct cdbres_read_capacity *data) @@ -111,13 +128,23 @@ scsi_fill_cmd(struct disk_op_s *op, void *cdbcmd, int maxcdb) switch (op->command) { case CMD_READ: case CMD_WRITE: ; - struct cdb_rwdata_10 *cmd = cdbcmd; - memset(cmd, 0, maxcdb); - cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10 - : CDB_CMD_WRITE_10); - cmd->lba = cpu_to_be32(op->lba); - cmd->count = cpu_to_be16(op->count); - return GET_FLATPTR(op->drive_fl->blksize); + if (op->lba < 0xFFFFFFFFULL) { + struct cdb_rwdata_10 *cmd = cdbcmd; + memset(cmd, 0, maxcdb); + cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10 + : CDB_CMD_WRITE_10); + cmd->lba = cpu_to_be32(op->lba); + cmd->count = cpu_to_be16(op->count); + return GET_FLATPTR(op->drive_fl->blksize); + } else { + struct cdb_rwdata_16 *cmd = cdbcmd; + memset(cmd, 0, maxcdb); + cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_16 + : CDB_CMD_WRITE_16); + cmd->lba = cpu_to_be64(op->lba); + cmd->count = cpu_to_be32(op->count); + return GET_FLATPTR(op->drive_fl->blksize); + } case CMD_SCSI: if (MODESEGMENT) return -1; @@ -331,18 +358,22 @@ scsi_drive_setup(struct drive_s *drive, const char *s, int prio) if (ret) return ret;
- // READ CAPACITY returns the address of the last block. - // We do not bother with READ CAPACITY(16) because BIOS does not support - // 64-bit LBA anyway. - drive->blksize = be32_to_cpu(capdata.blksize); + if (be32_to_cpu(capdata.sectors) == 0xFFFFFFFFUL) { + dprintf(1, "%s: >2TB Detected trying READCAP(16)\n", s); + struct cdbres_read_capacity_16 capdata16; + ret = cdb_read_capacity16(&dop, &capdata16); + drive->blksize = be32_to_cpu(capdata16.blksize); + drive->sectors = be64_to_cpu(capdata16.sectors) + 1; + } else { + drive->blksize = be32_to_cpu(capdata.blksize); + drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1; + } + if (drive->blksize != DISK_SECTOR_SIZE) { dprintf(1, "%s: unsupported block size %d\n", s, drive->blksize); return -1; } - drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1; - dprintf(1, "%s blksize=%d sectors=%u\n" - , s, drive->blksize, (unsigned)drive->sectors); - + dprintf(1, "%s blksize=%d sectors=%llX\n", s, drive->blksize, drive->sectors); // We do not recover from USB stalls, so try to be safe and avoid // sending the command if the (obsolete, but still provided by QEMU) // fixed disk geometry page may not be supported. diff --git a/src/hw/blockcmd.h b/src/hw/blockcmd.h index f18543ed..2683186c 100644 --- a/src/hw/blockcmd.h +++ b/src/hw/blockcmd.h @@ -18,6 +18,16 @@ struct cdb_rwdata_10 { u8 pad[6]; } PACKED;
+#define CDB_CMD_READ_16 0x88 +#define CDB_CMD_WRITE_16 0x8A +struct cdb_rwdata_16 { + u8 command; + u8 flags; + u64 lba; + u32 count; + u16 reserved_14; +} PACKED; + #define CDB_CMD_READ_CAPACITY 0x25
struct cdb_read_capacity { @@ -32,6 +42,23 @@ struct cdbres_read_capacity { u32 blksize; } PACKED;
+ +#define CDB_CMD_SERVICE_ACTION_IN 0x9E +#define CDB_CMD_SAI_READ_CAPACITY_16 0x10 +struct cdb_sai_read_capacity_16 { + u8 command; + u8 flags; + u64 lba; //marked as obsolete? + u32 len; + u16 reserved_14; +} PACKED; + +struct cdbres_read_capacity_16 { + u64 sectors; + u32 blksize; + u8 reserved_12[20]; +} PACKED; + #define CDB_CMD_TEST_UNIT_READY 0x00 #define CDB_CMD_INQUIRY 0x12 #define CDB_CMD_REQUEST_SENSE 0x03
Hi,
- drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1;
- dprintf(1, "%s blksize=%d sectors=%u\n"
, s, drive->blksize, (unsigned)drive->sectors);
- dprintf(1, "%s blksize=%d sectors=%llX\n", s, drive->blksize, drive->sectors);
Any specific reason why you change sectors from %u to %x?
I'd prefer to continue using %u.
When printing hex numbers they should be printed with '0x' prefix to make that clear.
take care, Gerd
On 01/25, Gerd Hoffmann wrote:
Hi,
- drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1;
- dprintf(1, "%s blksize=%d sectors=%u\n"
, s, drive->blksize, (unsigned)drive->sectors);
- dprintf(1, "%s blksize=%d sectors=%llX\n", s, drive->blksize, drive->sectors);
Any specific reason why you change sectors from %u to %x?
I'd prefer to continue using %u.
drive->sectors is a unsigned 64 bit integer. The SeaBIOS implementation for the format specifier %u of dprintf() does not support 64 bit precision (%lu/%llu just silently truncates the integer).
I do have an internal patch that adds unsigned 64bit in support that I'd be happy to send, but it's quite a lot of code - it would require porting an implementation of __udivmoddi4, for prototyping I had pulled one from libgcc_s but I'm not sure if there is a licensing issue there. On balance I thought it would be easiest to just dump the number as hex.
When printing hex numbers they should be printed with '0x' prefix to make that clear.
Noted, will update in V3.
take care, Gerd
Any specific reason why you change sectors from %u to %x?
I'd prefer to continue using %u.
drive->sectors is a unsigned 64 bit integer. The SeaBIOS implementation for the format specifier %u of dprintf() does not support 64 bit precision (%lu/%llu just silently truncates the integer).
I do have an internal patch that adds unsigned 64bit in support that I'd be happy to send, but it's quite a lot of code - it would require porting an implementation of __udivmoddi4, for prototyping I had pulled one from libgcc_s but I'm not sure if there is a licensing issue there. On balance I thought it would be easiest to just dump the number as hex.
Fair enough, let's go with "0x%llx" then.
take care, Gerd
Dear Max,
Thank you for your patch.
Am 25.01.24 um 12:35 schrieb Max Tottenham via SeaBIOS:
When booting from a >2TB drive/filesystem, it's possible what the kernel/bootloader may be updated and written out at an LBA address beyond what is normally accessible by the READ(10) SCSI commands. If this happens to the kernel grub will fail to boot the kernel as it will call into the BIOS with an LBA address >2TB, and the BIOS will return an error. Per the SCSI spec, >2TB drives should return 0XFFFFFFFF, and a READ CAPACITY(16) command should be issues
issue*d*
to determine the full size of the drive, READ(16) commands can then be used in order to read data at LBA addresses beyond 2TB (64 bit LBA addresses).
If you have it already, it’d be great if you could document the test case, maybe for QEMU, how to reproduce this and verify the fix.
Signed-off-by: Max Tottenham mtottenh@akamai.com
src/hw/blockcmd.c | 61 +++++++++++++++++++++++++++++++++++------------ src/hw/blockcmd.h | 27 +++++++++++++++++++++ 2 files changed, 73 insertions(+), 15 deletions(-)
diff --git a/src/hw/blockcmd.c b/src/hw/blockcmd.c index 6b6fea97..9b6a32bf 100644 --- a/src/hw/blockcmd.c +++ b/src/hw/blockcmd.c @@ -66,6 +66,23 @@ cdb_test_unit_ready(struct disk_op_s *op) return process_op(op); }
+static int +cdb_read_capacity16(struct disk_op_s *op, struct cdbres_read_capacity_16 *data) +{
- struct cdb_sai_read_capacity_16 cmd;
- memset(&cmd, 0, sizeof(cmd));
- cmd.command = CDB_CMD_SERVICE_ACTION_IN;
- cmd.flags = CDB_CMD_SAI_READ_CAPACITY_16;
- cmd.len = cpu_to_be32(sizeof(struct cdbres_read_capacity_16));
- op->command = CMD_SCSI;
- op->count = 1;
- op->buf_fl = data;
- op->cdbcmd = &cmd;
- op->blocksize = sizeof(*data);
- return process_op(op);
+}
- // Request capacity static int cdb_read_capacity(struct disk_op_s *op, struct cdbres_read_capacity *data)
@@ -111,13 +128,23 @@ scsi_fill_cmd(struct disk_op_s *op, void *cdbcmd, int maxcdb) switch (op->command) { case CMD_READ: case CMD_WRITE: ;
struct cdb_rwdata_10 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10
: CDB_CMD_WRITE_10);
cmd->lba = cpu_to_be32(op->lba);
cmd->count = cpu_to_be16(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
if (op->lba < 0xFFFFFFFFULL) {
struct cdb_rwdata_10 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10
: CDB_CMD_WRITE_10);
cmd->lba = cpu_to_be32(op->lba);
cmd->count = cpu_to_be16(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
} else {
struct cdb_rwdata_16 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_16
: CDB_CMD_WRITE_16);
cmd->lba = cpu_to_be64(op->lba);
cmd->count = cpu_to_be32(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
}
It’s bikeshedding, but maybe the code in the branches could be factored into separate functions, and then the return also wouldn’t need to be duplicated.
case CMD_SCSI: if (MODESEGMENT) return -1;
@@ -331,18 +358,22 @@ scsi_drive_setup(struct drive_s *drive, const char *s, int prio) if (ret) return ret;
- // READ CAPACITY returns the address of the last block.
- // We do not bother with READ CAPACITY(16) because BIOS does not support
- // 64-bit LBA anyway.
- drive->blksize = be32_to_cpu(capdata.blksize);
- if (be32_to_cpu(capdata.sectors) == 0xFFFFFFFFUL) {
dprintf(1, "%s: >2TB Detected trying READCAP(16)\n", s);
I’d use a higher log level, like 3 or so?
Kind regards,
Paul
struct cdbres_read_capacity_16 capdata16;
ret = cdb_read_capacity16(&dop, &capdata16);
drive->blksize = be32_to_cpu(capdata16.blksize);
drive->sectors = be64_to_cpu(capdata16.sectors) + 1;
- } else {
drive->blksize = be32_to_cpu(capdata.blksize);
drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1;
- }
if (drive->blksize != DISK_SECTOR_SIZE) { dprintf(1, "%s: unsupported block size %d\n", s, drive->blksize); return -1; }
- drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1;
- dprintf(1, "%s blksize=%d sectors=%u\n"
, s, drive->blksize, (unsigned)drive->sectors);
- dprintf(1, "%s blksize=%d sectors=%llX\n", s, drive->blksize, drive->sectors); // We do not recover from USB stalls, so try to be safe and avoid // sending the command if the (obsolete, but still provided by QEMU) // fixed disk geometry page may not be supported.
diff --git a/src/hw/blockcmd.h b/src/hw/blockcmd.h index f18543ed..2683186c 100644 --- a/src/hw/blockcmd.h +++ b/src/hw/blockcmd.h @@ -18,6 +18,16 @@ struct cdb_rwdata_10 { u8 pad[6]; } PACKED;
+#define CDB_CMD_READ_16 0x88 +#define CDB_CMD_WRITE_16 0x8A +struct cdb_rwdata_16 {
- u8 command;
- u8 flags;
- u64 lba;
- u32 count;
- u16 reserved_14;
+} PACKED;
#define CDB_CMD_READ_CAPACITY 0x25
struct cdb_read_capacity {
@@ -32,6 +42,23 @@ struct cdbres_read_capacity { u32 blksize; } PACKED;
+#define CDB_CMD_SERVICE_ACTION_IN 0x9E +#define CDB_CMD_SAI_READ_CAPACITY_16 0x10 +struct cdb_sai_read_capacity_16 {
- u8 command;
- u8 flags;
- u64 lba; //marked as obsolete?
- u32 len;
- u16 reserved_14;
+} PACKED;
+struct cdbres_read_capacity_16 {
- u64 sectors;
- u32 blksize;
- u8 reserved_12[20];
+} PACKED;
- #define CDB_CMD_TEST_UNIT_READY 0x00 #define CDB_CMD_INQUIRY 0x12 #define CDB_CMD_REQUEST_SENSE 0x03
[Forgot a nit.]
Am 25.01.24 um 13:34 schrieb Paul Menzel:
Dear Max,
Thank you for your patch.
I’d also remove the trailing dot/period from the commit message summary.
Am 25.01.24 um 12:35 schrieb Max Tottenham via SeaBIOS:
When booting from a >2TB drive/filesystem, it's possible what the kernel/bootloader may be updated and written out at an LBA address beyond what is normally accessible by the READ(10) SCSI commands. If this happens to the kernel grub will fail to boot the kernel as it will call into the BIOS with an LBA address >2TB, and the BIOS will return an error. Per the SCSI spec, >2TB drives should return 0XFFFFFFFF, and a READ CAPACITY(16) command should be issues
issue*d*
to determine the full size of the drive, READ(16) commands can then be used in order to read data at LBA addresses beyond 2TB (64 bit LBA addresses).
If you have it already, it’d be great if you could document the test case, maybe for QEMU, how to reproduce this and verify the fix.
Signed-off-by: Max Tottenham mtottenh@akamai.com
src/hw/blockcmd.c | 61 +++++++++++++++++++++++++++++++++++------------ src/hw/blockcmd.h | 27 +++++++++++++++++++++ 2 files changed, 73 insertions(+), 15 deletions(-)
diff --git a/src/hw/blockcmd.c b/src/hw/blockcmd.c index 6b6fea97..9b6a32bf 100644 --- a/src/hw/blockcmd.c +++ b/src/hw/blockcmd.c @@ -66,6 +66,23 @@ cdb_test_unit_ready(struct disk_op_s *op) return process_op(op); } +static int +cdb_read_capacity16(struct disk_op_s *op, struct cdbres_read_capacity_16 *data) +{ + struct cdb_sai_read_capacity_16 cmd; + memset(&cmd, 0, sizeof(cmd)); + cmd.command = CDB_CMD_SERVICE_ACTION_IN; + cmd.flags = CDB_CMD_SAI_READ_CAPACITY_16; + cmd.len = cpu_to_be32(sizeof(struct cdbres_read_capacity_16)); + op->command = CMD_SCSI; + op->count = 1; + op->buf_fl = data; + op->cdbcmd = &cmd; + op->blocksize = sizeof(*data); + return process_op(op); +}
// Request capacity static int cdb_read_capacity(struct disk_op_s *op, struct cdbres_read_capacity *data) @@ -111,13 +128,23 @@ scsi_fill_cmd(struct disk_op_s *op, void *cdbcmd, int maxcdb) switch (op->command) { case CMD_READ: case CMD_WRITE: ; - struct cdb_rwdata_10 *cmd = cdbcmd; - memset(cmd, 0, maxcdb); - cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10 - : CDB_CMD_WRITE_10); - cmd->lba = cpu_to_be32(op->lba); - cmd->count = cpu_to_be16(op->count); - return GET_FLATPTR(op->drive_fl->blksize); + if (op->lba < 0xFFFFFFFFULL) { + struct cdb_rwdata_10 *cmd = cdbcmd; + memset(cmd, 0, maxcdb); + cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10 + : CDB_CMD_WRITE_10); + cmd->lba = cpu_to_be32(op->lba); + cmd->count = cpu_to_be16(op->count); + return GET_FLATPTR(op->drive_fl->blksize); + } else { + struct cdb_rwdata_16 *cmd = cdbcmd; + memset(cmd, 0, maxcdb); + cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_16 + : CDB_CMD_WRITE_16); + cmd->lba = cpu_to_be64(op->lba); + cmd->count = cpu_to_be32(op->count); + return GET_FLATPTR(op->drive_fl->blksize); + }
It’s bikeshedding, but maybe the code in the branches could be factored into separate functions, and then the return also wouldn’t need to be duplicated.
case CMD_SCSI: if (MODESEGMENT) return -1; @@ -331,18 +358,22 @@ scsi_drive_setup(struct drive_s *drive, const char *s, int prio) if (ret) return ret; - // READ CAPACITY returns the address of the last block. - // We do not bother with READ CAPACITY(16) because BIOS does not support - // 64-bit LBA anyway. - drive->blksize = be32_to_cpu(capdata.blksize); + if (be32_to_cpu(capdata.sectors) == 0xFFFFFFFFUL) { + dprintf(1, "%s: >2TB Detected trying READCAP(16)\n", s);
I’d use a higher log level, like 3 or so?
Kind regards,
Paul
+ struct cdbres_read_capacity_16 capdata16; + ret = cdb_read_capacity16(&dop, &capdata16); + drive->blksize = be32_to_cpu(capdata16.blksize); + drive->sectors = be64_to_cpu(capdata16.sectors) + 1; + } else { + drive->blksize = be32_to_cpu(capdata.blksize); + drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1; + }
if (drive->blksize != DISK_SECTOR_SIZE) { dprintf(1, "%s: unsupported block size %d\n", s, drive->blksize); return -1; } - drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1; - dprintf(1, "%s blksize=%d sectors=%u\n" - , s, drive->blksize, (unsigned)drive->sectors);
+ dprintf(1, "%s blksize=%d sectors=%llX\n", s, drive->blksize, drive->sectors); // We do not recover from USB stalls, so try to be safe and avoid // sending the command if the (obsolete, but still provided by QEMU) // fixed disk geometry page may not be supported. diff --git a/src/hw/blockcmd.h b/src/hw/blockcmd.h index f18543ed..2683186c 100644 --- a/src/hw/blockcmd.h +++ b/src/hw/blockcmd.h @@ -18,6 +18,16 @@ struct cdb_rwdata_10 { u8 pad[6]; } PACKED; +#define CDB_CMD_READ_16 0x88 +#define CDB_CMD_WRITE_16 0x8A +struct cdb_rwdata_16 { + u8 command; + u8 flags; + u64 lba; + u32 count; + u16 reserved_14; +} PACKED;
#define CDB_CMD_READ_CAPACITY 0x25 struct cdb_read_capacity { @@ -32,6 +42,23 @@ struct cdbres_read_capacity { u32 blksize; } PACKED;
+#define CDB_CMD_SERVICE_ACTION_IN 0x9E +#define CDB_CMD_SAI_READ_CAPACITY_16 0x10 +struct cdb_sai_read_capacity_16 { + u8 command; + u8 flags; + u64 lba; //marked as obsolete? + u32 len; + u16 reserved_14; +} PACKED;
+struct cdbres_read_capacity_16 { + u64 sectors; + u32 blksize; + u8 reserved_12[20]; +} PACKED;
#define CDB_CMD_TEST_UNIT_READY 0x00 #define CDB_CMD_INQUIRY 0x12 #define CDB_CMD_REQUEST_SENSE 0x03
On 01/25, Paul Menzel wrote:
Dear Max,
Thank you for your patch.
Am 25.01.24 um 12:35 schrieb Max Tottenham via SeaBIOS:
When booting from a >2TB drive/filesystem, it's possible what the kernel/bootloader may be updated and written out at an LBA address beyond what is normally accessible by the READ(10) SCSI commands. If this happens to the kernel grub will fail to boot the kernel as it will call into the BIOS with an LBA address >2TB, and the BIOS will return an error. Per the SCSI spec, >2TB drives should return 0XFFFFFFFF, and a READ CAPACITY(16) command should be issues
issue*d*
Oops - Thanks, will fix in V3.
to determine the full size of the drive, READ(16) commands can then be used in order to read data at LBA addresses beyond 2TB (64 bit LBA addresses).
If you have it already, it’d be great if you could document the test case, maybe for QEMU, how to reproduce this and verify the fix.
Sure, reproduction via Qemu requires: * Supplying a GRUB2 core.img directly to qemu via the -kernel parameter, and a ~3TB raw disk image attached, the GRUB2 core.img should have an embedded config to load it's main config file from (hd0)/boot/:
qemu-system-x86_64 -m 4096G -smp 2 -kernel /path/to/grub.img -device \ virtio-scsi-pci,id=scsi0 \ -drive \ file=/path/to/filesystem.img,if=none,media=disk,id=drive-scsi-disk-0,aio=native,cache.direct=on,format=raw,read-only=off,auto-read-only=off \ -device \ scsi-hd,bus=scsi0.0,scsi-id=0,channel=0,lun=0,drive=drive-scsi-disk-0,id=drive0,bootindex=1,rotation_rate=1 \
* Some massaging to the disk image after the initial boot may be required to coax either an OS kernel/initrd onto an LBA address beyond 2TB. During internal reproductions we have done something like the following from within the guest. $ sudo fallocate --length 2300G /root/very_large.file $ sudo update-initramfs -c -k $(uname -r) * Confirm that the updated image is located beyond 2TB: $ hdparm --fibmap /boot/initrd.img-$(uname -r) initrd.img-5.10.197-generic: filesystem blocksize 4096, begins at LBA 0; assuming 512 byte sectors. byte_offset begin_LBA end_LBA sectors 0 5203722240 5203820543 98304 50331648 5203296256 5203412079 115824
Alternately - you could probably achieve the same scenario by having a 2 disk system, where the first disk is MBR and contains the bootloader/it's config. And the 2nd disk is a large >2TB GPT disk that contains the kernels that the grub config points to (of course after performing the above massaging to ensure that the kernels/initrds do indeed lie beyond 2TB worth of LBAs.
Signed-off-by: Max Tottenham mtottenh@akamai.com
src/hw/blockcmd.c | 61 +++++++++++++++++++++++++++++++++++------------ src/hw/blockcmd.h | 27 +++++++++++++++++++++ 2 files changed, 73 insertions(+), 15 deletions(-)
diff --git a/src/hw/blockcmd.c b/src/hw/blockcmd.c index 6b6fea97..9b6a32bf 100644 --- a/src/hw/blockcmd.c +++ b/src/hw/blockcmd.c @@ -66,6 +66,23 @@ cdb_test_unit_ready(struct disk_op_s *op) return process_op(op); }
+static int +cdb_read_capacity16(struct disk_op_s *op, struct cdbres_read_capacity_16 *data) +{
- struct cdb_sai_read_capacity_16 cmd;
- memset(&cmd, 0, sizeof(cmd));
- cmd.command = CDB_CMD_SERVICE_ACTION_IN;
- cmd.flags = CDB_CMD_SAI_READ_CAPACITY_16;
- cmd.len = cpu_to_be32(sizeof(struct cdbres_read_capacity_16));
- op->command = CMD_SCSI;
- op->count = 1;
- op->buf_fl = data;
- op->cdbcmd = &cmd;
- op->blocksize = sizeof(*data);
- return process_op(op);
+}
- // Request capacity static int cdb_read_capacity(struct disk_op_s *op, struct cdbres_read_capacity *data)
@@ -111,13 +128,23 @@ scsi_fill_cmd(struct disk_op_s *op, void *cdbcmd, int maxcdb) switch (op->command) { case CMD_READ: case CMD_WRITE: ;
struct cdb_rwdata_10 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10
: CDB_CMD_WRITE_10);
cmd->lba = cpu_to_be32(op->lba);
cmd->count = cpu_to_be16(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
if (op->lba < 0xFFFFFFFFULL) {
struct cdb_rwdata_10 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10
: CDB_CMD_WRITE_10);
cmd->lba = cpu_to_be32(op->lba);
cmd->count = cpu_to_be16(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
} else {
struct cdb_rwdata_16 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_16
: CDB_CMD_WRITE_16);
cmd->lba = cpu_to_be64(op->lba);
cmd->count = cpu_to_be32(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
}
It’s bikeshedding, but maybe the code in the branches could be factored into separate functions, and then the return also wouldn’t need to be duplicated.
case CMD_SCSI: if (MODESEGMENT) return -1;
@@ -331,18 +358,22 @@ scsi_drive_setup(struct drive_s *drive, const char *s, int prio) if (ret) return ret;
- // READ CAPACITY returns the address of the last block.
- // We do not bother with READ CAPACITY(16) because BIOS does not support
- // 64-bit LBA anyway.
- drive->blksize = be32_to_cpu(capdata.blksize);
- if (be32_to_cpu(capdata.sectors) == 0xFFFFFFFFUL) {
dprintf(1, "%s: >2TB Detected trying READCAP(16)\n", s);
I’d use a higher log level, like 3 or so?
Kind regards,
Paul
struct cdbres_read_capacity_16 capdata16;
ret = cdb_read_capacity16(&dop, &capdata16);
drive->blksize = be32_to_cpu(capdata16.blksize);
drive->sectors = be64_to_cpu(capdata16.sectors) + 1;
- } else {
drive->blksize = be32_to_cpu(capdata.blksize);
drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1;
- }
if (drive->blksize != DISK_SECTOR_SIZE) { dprintf(1, "%s: unsupported block size %d\n", s, drive->blksize); return -1; }
- drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1;
- dprintf(1, "%s blksize=%d sectors=%u\n"
, s, drive->blksize, (unsigned)drive->sectors);
- dprintf(1, "%s blksize=%d sectors=%llX\n", s, drive->blksize, drive->sectors); // We do not recover from USB stalls, so try to be safe and avoid // sending the command if the (obsolete, but still provided by QEMU) // fixed disk geometry page may not be supported.
diff --git a/src/hw/blockcmd.h b/src/hw/blockcmd.h index f18543ed..2683186c 100644 --- a/src/hw/blockcmd.h +++ b/src/hw/blockcmd.h @@ -18,6 +18,16 @@ struct cdb_rwdata_10 { u8 pad[6]; } PACKED;
+#define CDB_CMD_READ_16 0x88 +#define CDB_CMD_WRITE_16 0x8A +struct cdb_rwdata_16 {
- u8 command;
- u8 flags;
- u64 lba;
- u32 count;
- u16 reserved_14;
+} PACKED;
#define CDB_CMD_READ_CAPACITY 0x25
struct cdb_read_capacity {
@@ -32,6 +42,23 @@ struct cdbres_read_capacity { u32 blksize; } PACKED;
+#define CDB_CMD_SERVICE_ACTION_IN 0x9E +#define CDB_CMD_SAI_READ_CAPACITY_16 0x10 +struct cdb_sai_read_capacity_16 {
- u8 command;
- u8 flags;
- u64 lba; //marked as obsolete?
- u32 len;
- u16 reserved_14;
+} PACKED;
+struct cdbres_read_capacity_16 {
- u64 sectors;
- u32 blksize;
- u8 reserved_12[20];
+} PACKED;
- #define CDB_CMD_TEST_UNIT_READY 0x00 #define CDB_CMD_INQUIRY 0x12 #define CDB_CMD_REQUEST_SENSE 0x03
On 01/25, Paul Menzel wrote:
Dear Max,
Thank you for your patch.
Am 25.01.24 um 12:35 schrieb Max Tottenham via SeaBIOS:
When booting from a >2TB drive/filesystem, it's possible what the kernel/bootloader may be updated and written out at an LBA address beyond what is normally accessible by the READ(10) SCSI commands. If this happens to the kernel grub will fail to boot the kernel as it will call into the BIOS with an LBA address >2TB, and the BIOS will return an error. Per the SCSI spec, >2TB drives should return 0XFFFFFFFF, and a READ CAPACITY(16) command should be issues
issue*d*
to determine the full size of the drive, READ(16) commands can then be used in order to read data at LBA addresses beyond 2TB (64 bit LBA addresses).
If you have it already, it’d be great if you could document the test case, maybe for QEMU, how to reproduce this and verify the fix.
Signed-off-by: Max Tottenham mtottenh@akamai.com
src/hw/blockcmd.c | 61 +++++++++++++++++++++++++++++++++++------------ src/hw/blockcmd.h | 27 +++++++++++++++++++++ 2 files changed, 73 insertions(+), 15 deletions(-)
diff --git a/src/hw/blockcmd.c b/src/hw/blockcmd.c index 6b6fea97..9b6a32bf 100644 --- a/src/hw/blockcmd.c +++ b/src/hw/blockcmd.c @@ -66,6 +66,23 @@ cdb_test_unit_ready(struct disk_op_s *op) return process_op(op); }
+static int +cdb_read_capacity16(struct disk_op_s *op, struct cdbres_read_capacity_16 *data) +{
- struct cdb_sai_read_capacity_16 cmd;
- memset(&cmd, 0, sizeof(cmd));
- cmd.command = CDB_CMD_SERVICE_ACTION_IN;
- cmd.flags = CDB_CMD_SAI_READ_CAPACITY_16;
- cmd.len = cpu_to_be32(sizeof(struct cdbres_read_capacity_16));
- op->command = CMD_SCSI;
- op->count = 1;
- op->buf_fl = data;
- op->cdbcmd = &cmd;
- op->blocksize = sizeof(*data);
- return process_op(op);
+}
- // Request capacity static int cdb_read_capacity(struct disk_op_s *op, struct cdbres_read_capacity *data)
@@ -111,13 +128,23 @@ scsi_fill_cmd(struct disk_op_s *op, void *cdbcmd, int maxcdb) switch (op->command) { case CMD_READ: case CMD_WRITE: ;
struct cdb_rwdata_10 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10
: CDB_CMD_WRITE_10);
cmd->lba = cpu_to_be32(op->lba);
cmd->count = cpu_to_be16(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
if (op->lba < 0xFFFFFFFFULL) {
struct cdb_rwdata_10 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10
: CDB_CMD_WRITE_10);
cmd->lba = cpu_to_be32(op->lba);
cmd->count = cpu_to_be16(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
} else {
struct cdb_rwdata_16 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_16
: CDB_CMD_WRITE_16);
cmd->lba = cpu_to_be64(op->lba);
cmd->count = cpu_to_be32(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
}
It’s bikeshedding, but maybe the code in the branches could be factored into separate functions, and then the return also wouldn’t need to be duplicated.
I missed this comment earlier - Perhaps rather than functions we could just extract the return statement out of each of the branches e.g. do the following?:
if (op->lba < 0xFFFFFFFFULL) { struct cdb_rwdata_10 *cmd = cdbcmd; memset(cmd, 0, maxcdb); cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10 : CDB_CMD_WRITE_10); cmd->lba = cpu_to_be32(op->lba); cmd->count = cpu_to_be16(op->count); } else { struct cdb_rwdata_16 *cmd = cdbcmd; memset(cmd, 0, maxcdb); cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_16 : CDB_CMD_WRITE_16); cmd->lba = cpu_to_be64(op->lba); cmd->count = cpu_to_be32(op->count); } return GET_FLATPTR(op->drive_fl->blksize);
Seems odd to me to make two separate functions for such a small 'fill this struct with data' ?
case CMD_SCSI: if (MODESEGMENT) return -1;
@@ -331,18 +358,22 @@ scsi_drive_setup(struct drive_s *drive, const char *s, int prio) if (ret) return ret;
- // READ CAPACITY returns the address of the last block.
- // We do not bother with READ CAPACITY(16) because BIOS does not support
- // 64-bit LBA anyway.
- drive->blksize = be32_to_cpu(capdata.blksize);
- if (be32_to_cpu(capdata.sectors) == 0xFFFFFFFFUL) {
dprintf(1, "%s: >2TB Detected trying READCAP(16)\n", s);
I’d use a higher log level, like 3 or so?
Noted. Will update in V3.
Kind regards,
Paul
struct cdbres_read_capacity_16 capdata16;
ret = cdb_read_capacity16(&dop, &capdata16);
drive->blksize = be32_to_cpu(capdata16.blksize);
drive->sectors = be64_to_cpu(capdata16.sectors) + 1;
- } else {
drive->blksize = be32_to_cpu(capdata.blksize);
drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1;
- }
if (drive->blksize != DISK_SECTOR_SIZE) { dprintf(1, "%s: unsupported block size %d\n", s, drive->blksize); return -1; }
- drive->sectors = (u64)be32_to_cpu(capdata.sectors) + 1;
- dprintf(1, "%s blksize=%d sectors=%u\n"
, s, drive->blksize, (unsigned)drive->sectors);
- dprintf(1, "%s blksize=%d sectors=%llX\n", s, drive->blksize, drive->sectors); // We do not recover from USB stalls, so try to be safe and avoid // sending the command if the (obsolete, but still provided by QEMU) // fixed disk geometry page may not be supported.
diff --git a/src/hw/blockcmd.h b/src/hw/blockcmd.h index f18543ed..2683186c 100644 --- a/src/hw/blockcmd.h +++ b/src/hw/blockcmd.h @@ -18,6 +18,16 @@ struct cdb_rwdata_10 { u8 pad[6]; } PACKED;
+#define CDB_CMD_READ_16 0x88 +#define CDB_CMD_WRITE_16 0x8A +struct cdb_rwdata_16 {
- u8 command;
- u8 flags;
- u64 lba;
- u32 count;
- u16 reserved_14;
+} PACKED;
#define CDB_CMD_READ_CAPACITY 0x25
struct cdb_read_capacity {
@@ -32,6 +42,23 @@ struct cdbres_read_capacity { u32 blksize; } PACKED;
+#define CDB_CMD_SERVICE_ACTION_IN 0x9E +#define CDB_CMD_SAI_READ_CAPACITY_16 0x10 +struct cdb_sai_read_capacity_16 {
- u8 command;
- u8 flags;
- u64 lba; //marked as obsolete?
- u32 len;
- u16 reserved_14;
+} PACKED;
+struct cdbres_read_capacity_16 {
- u64 sectors;
- u32 blksize;
- u8 reserved_12[20];
+} PACKED;
- #define CDB_CMD_TEST_UNIT_READY 0x00 #define CDB_CMD_INQUIRY 0x12 #define CDB_CMD_REQUEST_SENSE 0x03
Dear Max,
Thank you for your instant reply.
Am 25.01.24 um 15:36 schrieb Max Tottenham:
On 01/25, Paul Menzel wrote:
Am 25.01.24 um 12:35 schrieb Max Tottenham via SeaBIOS:
[…]
src/hw/blockcmd.c | 61 +++++++++++++++++++++++++++++++++++------------ src/hw/blockcmd.h | 27 +++++++++++++++++++++ 2 files changed, 73 insertions(+), 15 deletions(-)
diff --git a/src/hw/blockcmd.c b/src/hw/blockcmd.c index 6b6fea97..9b6a32bf 100644 --- a/src/hw/blockcmd.c +++ b/src/hw/blockcmd.c @@ -66,6 +66,23 @@ cdb_test_unit_ready(struct disk_op_s *op) return process_op(op); }
+static int +cdb_read_capacity16(struct disk_op_s *op, struct cdbres_read_capacity_16 *data) +{
- struct cdb_sai_read_capacity_16 cmd;
- memset(&cmd, 0, sizeof(cmd));
- cmd.command = CDB_CMD_SERVICE_ACTION_IN;
- cmd.flags = CDB_CMD_SAI_READ_CAPACITY_16;
- cmd.len = cpu_to_be32(sizeof(struct cdbres_read_capacity_16));
- op->command = CMD_SCSI;
- op->count = 1;
- op->buf_fl = data;
- op->cdbcmd = &cmd;
- op->blocksize = sizeof(*data);
- return process_op(op);
+}
- // Request capacity static int cdb_read_capacity(struct disk_op_s *op, struct cdbres_read_capacity *data)
@@ -111,13 +128,23 @@ scsi_fill_cmd(struct disk_op_s *op, void *cdbcmd, int maxcdb) switch (op->command) { case CMD_READ: case CMD_WRITE: ;
struct cdb_rwdata_10 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10
: CDB_CMD_WRITE_10);
cmd->lba = cpu_to_be32(op->lba);
cmd->count = cpu_to_be16(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
if (op->lba < 0xFFFFFFFFULL) {
struct cdb_rwdata_10 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10
: CDB_CMD_WRITE_10);
cmd->lba = cpu_to_be32(op->lba);
cmd->count = cpu_to_be16(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
} else {
struct cdb_rwdata_16 *cmd = cdbcmd;
memset(cmd, 0, maxcdb);
cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_16
: CDB_CMD_WRITE_16);
cmd->lba = cpu_to_be64(op->lba);
cmd->count = cpu_to_be32(op->count);
return GET_FLATPTR(op->drive_fl->blksize);
}
It’s bikeshedding, but maybe the code in the branches could be factored into separate functions, and then the return also wouldn’t need to be duplicated.
I missed this comment earlier - Perhaps rather than functions we could just extract the return statement out of each of the branches e.g. do the following?:
if (op->lba < 0xFFFFFFFFULL) { struct cdb_rwdata_10 *cmd = cdbcmd; memset(cmd, 0, maxcdb); cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_10 : CDB_CMD_WRITE_10); cmd->lba = cpu_to_be32(op->lba); cmd->count = cpu_to_be16(op->count); } else { struct cdb_rwdata_16 *cmd = cdbcmd; memset(cmd, 0, maxcdb); cmd->command = (op->command == CMD_READ ? CDB_CMD_READ_16 : CDB_CMD_WRITE_16); cmd->lba = cpu_to_be64(op->lba); cmd->count = cpu_to_be32(op->count); } return GET_FLATPTR(op->drive_fl->blksize);
Seems odd to me to make two separate functions for such a small 'fill this struct with data' ?
Yes, agreed.
[…]
Kind regards,
Paul