This is an interesting enough setting to add a dprintf() for.
Signed-off-by: John Levon john.levon@nutanix.com --- src/fw/pciinit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc29..0395fdbf 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1197,8 +1197,10 @@ pci_setup(void) } }
- if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver4G) + if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver4G) { + dprintf(1, "enabling 64-bit pci mmio window\n"); pci_pad_mem64 = 1; + }
dprintf(1, "=== PCI bus & bridge init ===\n"); if (pci_probe_host() != 0) {
qemu_cfg_e820() reports RamSize* at debug level 1. Do the same in qemu_early_e820().
Signed-off-by: John Levon john.levon@nutanix.com Reviewed-by: Paul Menzel pmenzel@molgen.mpg.de --- src/fw/paravirt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index e5d4eca0..0ff5d0a4 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -784,7 +784,7 @@ static int qemu_early_e820(void) } }
- dprintf(3, "qemu/e820: RamSize: 0x%08x\n", RamSize); - dprintf(3, "qemu/e820: RamSizeOver4G: 0x%016llx\n", RamSizeOver4G); + dprintf(1, "qemu/e820: RamSize: 0x%08x\n", RamSize); + dprintf(1, "qemu/e820: RamSizeOver4G: 0x%016llx\n", RamSizeOver4G); return 1; }
Signed-off-by: John Levon john.levon@nutanix.com --- src/fw/paravirt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 0ff5d0a4..3ad9094b 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -28,6 +28,8 @@ #include "xen.h" // xen_biostable_setup #include "stacks.h" // yield
+#define MEM_4G (0x100000000ULL) + // Amount of continuous ram under 4Gig u32 RamSize; // Amount of continuous ram >4Gig @@ -589,7 +591,7 @@ qemu_cfg_e820(void) | ((u32)rtc_read(CMOS_MEM_HIGHMEM_MID) << 24) | ((u64)rtc_read(CMOS_MEM_HIGHMEM_HIGH) << 32)); RamSizeOver4G = high; - e820_add(0x100000000ull, high, E820_RAM); + e820_add(MEM_4G, high, E820_RAM); dprintf(1, "RamSizeOver4G: 0x%016llx [cmos]\n", RamSizeOver4G); }
@@ -772,14 +774,14 @@ static int qemu_early_e820(void) e820_add(table.address, table.length, table.type); dprintf(1, "qemu/e820: addr 0x%016llx len 0x%016llx [RAM]\n", table.address, table.length); - if (table.address < 0x100000000LL) { + if (table.address < MEM_4G) { // below 4g if (RamSize < table.address + table.length) RamSize = table.address + table.length; } else { // above 4g - if (RamSizeOver4G < table.address + table.length - 0x100000000LL) - RamSizeOver4G = table.address + table.length - 0x100000000LL; + if (RamSizeOver4G < table.address + table.length - MEM_4G) + RamSizeOver4G = table.address + table.length - MEM_4G; } } }
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:
virtio_balloon virtio2: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_19734-565debf7b362 virtio_net virtio0: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1 Virtio_scsi virtio1: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1 Gave up waiting for root device. Common problems: - Boot args (cat /proc/cmdline) - Check rootdelay= (did the system wait long enough?) - Check root= (did the system wait for the right device?) - Missing modules (cat /proc/modules; ls /dev) ALERT! /dev/disk/by-uuid/86859879-3f17-443d-a226-077c435291e2 does not exist. Dropping to a shell!
Be a bit more conservative, and only enable the window by default when the ram size extends beyond 64G - a 32-bit guest using PAE cannot address beyond that anyway. Due to the mmio window this translates to an effective working configuration limit of 62G/63G, depending on machine type.
Fixes: 96a8d130 ("be less conservative with the 64bit pci io window") Signed-off-by: John Levon john.levon@nutanix.com --- src/fw/paravirt.c | 28 ++++++++++++++++++++++++---- src/fw/paravirt.h | 1 + src/fw/pciinit.c | 6 +++++- 3 files changed, 30 insertions(+), 5 deletions(-)
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 3ad9094b..5b0f191b 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -29,11 +29,14 @@ #include "stacks.h" // yield
#define MEM_4G (0x100000000ULL) +#define MEM_64G (16 * 0x100000000ULL)
// Amount of continuous ram under 4Gig u32 RamSize; // Amount of continuous ram >4Gig u64 RamSizeOver4G; +// Amount of continuous ram >64Gig +u64 RamSizeOver64G; // physical address space bits u8 CPUPhysBits; // 64bit processor @@ -591,8 +594,12 @@ qemu_cfg_e820(void) | ((u32)rtc_read(CMOS_MEM_HIGHMEM_MID) << 24) | ((u64)rtc_read(CMOS_MEM_HIGHMEM_HIGH) << 32)); RamSizeOver4G = high; + RamSizeOver64G = 0; + if (high + MEM_4G > MEM_64G) + RamSizeOver64G = high + MEM_4G - MEM_64G; e820_add(MEM_4G, high, E820_RAM); dprintf(1, "RamSizeOver4G: 0x%016llx [cmos]\n", RamSizeOver4G); + dprintf(1, "RamSizeOver64G: 0x%016llx [cmos]\n", RamSizeOver64G); }
// Populate romfile entries for legacy fw_cfg ports (that predate the @@ -774,19 +781,32 @@ static int qemu_early_e820(void) e820_add(table.address, table.length, table.type); dprintf(1, "qemu/e820: addr 0x%016llx len 0x%016llx [RAM]\n", table.address, table.length); + // address below 4g? if (table.address < MEM_4G) { - // below 4g if (RamSize < table.address + table.length) RamSize = table.address + table.length; } else { - // above 4g - if (RamSizeOver4G < table.address + table.length - MEM_4G) - RamSizeOver4G = table.address + table.length - MEM_4G; + u64 table_end = table.address + table.length; + + /* + * Note that this would ignore any span that crosses the 4G + * boundary. For RamSizeOver64G, we do account for any spans + * that cross the 64G boundary. + */ + if (RamSizeOver4G < table_end - MEM_4G) + RamSizeOver4G = table_end - MEM_4G; + + // crosses 64G ? + if (table_end > MEM_64G) { + if (RamSizeOver64G < table_end - MEM_64G) + RamSizeOver64G = table_end - MEM_64G; + } } } }
dprintf(1, "qemu/e820: RamSize: 0x%08x\n", RamSize); dprintf(1, "qemu/e820: RamSizeOver4G: 0x%016llx\n", RamSizeOver4G); + dprintf(1, "qemu/e820: RamSizeOver64G: 0x%016llx\n", RamSizeOver64G); return 1; } diff --git a/src/fw/paravirt.h b/src/fw/paravirt.h index 62a2cd07..b56e96e8 100644 --- a/src/fw/paravirt.h +++ b/src/fw/paravirt.h @@ -30,6 +30,7 @@ typedef struct QemuCfgDmaAccess {
extern u32 RamSize; extern u64 RamSizeOver4G; +extern u64 RamSizeOver64G; extern int PlatformRunningOn; extern u8 CPUPhysBits; extern u8 CPULongMode; diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index 0395fdbf..1247eb12 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1197,7 +1197,11 @@ pci_setup(void) } }
- if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver4G) { + /* + * Only enable this if we exceed 64G, as some older 32-bit Linux VMs cannot + * handle the 64-bit window correctly. + */ + if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver64G) { dprintf(1, "enabling 64-bit pci mmio window\n"); pci_pad_mem64 = 1; }
On Wed, 19 Jun 2024 11:21:14 +0100 John Levon john.levon@nutanix.com wrote:
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:
virtio_balloon virtio2: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_19734-565debf7b362 virtio_net virtio0: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1 Virtio_scsi virtio1: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1
above aren't exactly indicate 64-bit MMIO window as culprit
Can you provide more data on what exactly goes wrong and where?
Does adding 'realloc' option to guest kernel CLI help?
Gave up waiting for root device. Common problems:
- Boot args (cat /proc/cmdline)
- Check rootdelay= (did the system wait long enough?)
- Check root= (did the system wait for the right device?)
- Missing modules (cat /proc/modules; ls /dev)
ALERT! /dev/disk/by-uuid/86859879-3f17-443d-a226-077c435291e2 does not exist. Dropping to a shell!
Be a bit more conservative, and only enable the window by default when the ram size extends beyond 64G - a 32-bit guest using PAE cannot address beyond that anyway. Due to the mmio window this translates to an effective working configuration limit of 62G/63G, depending on machine type.
Fixes: 96a8d130 ("be less conservative with the 64bit pci io window") Signed-off-by: John Levon john.levon@nutanix.com
src/fw/paravirt.c | 28 ++++++++++++++++++++++++---- src/fw/paravirt.h | 1 + src/fw/pciinit.c | 6 +++++- 3 files changed, 30 insertions(+), 5 deletions(-)
diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 3ad9094b..5b0f191b 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -29,11 +29,14 @@ #include "stacks.h" // yield
#define MEM_4G (0x100000000ULL) +#define MEM_64G (16 * 0x100000000ULL)
// Amount of continuous ram under 4Gig u32 RamSize; // Amount of continuous ram >4Gig u64 RamSizeOver4G; +// Amount of continuous ram >64Gig +u64 RamSizeOver64G; // physical address space bits u8 CPUPhysBits; // 64bit processor @@ -591,8 +594,12 @@ qemu_cfg_e820(void) | ((u32)rtc_read(CMOS_MEM_HIGHMEM_MID) << 24) | ((u64)rtc_read(CMOS_MEM_HIGHMEM_HIGH) << 32)); RamSizeOver4G = high;
- RamSizeOver64G = 0;
- if (high + MEM_4G > MEM_64G)
e820_add(MEM_4G, high, E820_RAM); dprintf(1, "RamSizeOver4G: 0x%016llx [cmos]\n", RamSizeOver4G);RamSizeOver64G = high + MEM_4G - MEM_64G;
- dprintf(1, "RamSizeOver64G: 0x%016llx [cmos]\n", RamSizeOver64G);
}
// Populate romfile entries for legacy fw_cfg ports (that predate the @@ -774,19 +781,32 @@ static int qemu_early_e820(void) e820_add(table.address, table.length, table.type); dprintf(1, "qemu/e820: addr 0x%016llx len 0x%016llx [RAM]\n", table.address, table.length);
// address below 4g? if (table.address < MEM_4G) {
// below 4g if (RamSize < table.address + table.length) RamSize = table.address + table.length; } else {
// above 4g
if (RamSizeOver4G < table.address + table.length - MEM_4G)
RamSizeOver4G = table.address + table.length - MEM_4G;
u64 table_end = table.address + table.length;
/*
* Note that this would ignore any span that crosses the 4G
* boundary. For RamSizeOver64G, we do account for any spans
* that cross the 64G boundary.
*/
if (RamSizeOver4G < table_end - MEM_4G)
RamSizeOver4G = table_end - MEM_4G;
// crosses 64G ?
if (table_end > MEM_64G) {
if (RamSizeOver64G < table_end - MEM_64G)
RamSizeOver64G = table_end - MEM_64G;
} } }
}
dprintf(1, "qemu/e820: RamSize: 0x%08x\n", RamSize); dprintf(1, "qemu/e820: RamSizeOver4G: 0x%016llx\n", RamSizeOver4G);
dprintf(1, "qemu/e820: RamSizeOver64G: 0x%016llx\n", RamSizeOver64G); return 1;
} diff --git a/src/fw/paravirt.h b/src/fw/paravirt.h index 62a2cd07..b56e96e8 100644 --- a/src/fw/paravirt.h +++ b/src/fw/paravirt.h @@ -30,6 +30,7 @@ typedef struct QemuCfgDmaAccess {
extern u32 RamSize; extern u64 RamSizeOver4G; +extern u64 RamSizeOver64G; extern int PlatformRunningOn; extern u8 CPUPhysBits; extern u8 CPULongMode; diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index 0395fdbf..1247eb12 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1197,7 +1197,11 @@ pci_setup(void) } }
- if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver4G) {
- /*
* Only enable this if we exceed 64G, as some older 32-bit Linux VMs cannot
* handle the 64-bit window correctly.
*/
- if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver64G) { dprintf(1, "enabling 64-bit pci mmio window\n"); pci_pad_mem64 = 1; }
On Thu, Jun 20, 2024 at 04:00:05PM +0200, Igor Mammedov wrote:
Regardless of which way is chosen some users will suffer one way or another. My vote would be to keep current behavior so 'modern' guests would work without issues.
The Linux kernel policy is "no regressions", I cannot say it better than Linus himself (if you'll excuse the shouting):
https://lkml.org/lkml/2018/8/3/621
This is exactly what happened here - we updated seabios and things stopped working. It's unfortunate that the long tail of legacy exists, and we all wish it didn't, but it does.
On Thu, Jun 20, 2024 at 04:09:24PM +0200, Igor Mammedov wrote:
On Wed, 19 Jun 2024 11:21:14 +0100 John Levon john.levon@nutanix.com wrote:
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:
virtio_balloon virtio2: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_19734-565debf7b362 virtio_net virtio0: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1 Virtio_scsi virtio1: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1
above aren't exactly indicate 64-bit MMIO window as culprit
Can you provide more data on what exactly goes wrong and where?
Sorry, no idea, and I don't think it's a useful exercise to debug old Linux kernels.
Does adding 'realloc' option to guest kernel CLI help?
I'll try this and get back to you.
This isn't a practical solution in general though IMO, it's not reasonable to ask our downstream customers (and their downstream customers and so on) to figure out how to do this across what could well be thousands of VMs minimum.
regards john
On Thu, 20 Jun 2024 15:43:33 +0100 John Levon john.levon@nutanix.com wrote:
On Thu, Jun 20, 2024 at 04:00:05PM +0200, Igor Mammedov wrote:
Regardless of which way is chosen some users will suffer one way or another. My vote would be to keep current behavior so 'modern' guests would work without issues.
The Linux kernel policy is "no regressions", I cannot say it better than Linus himself (if you'll excuse the shouting):
well you just upgraded 'hardware' for legacy OS, there is no guaranties that it will continue to work without any changes.
with this patch there will be regression other way around affecting not so old OSes.
This is exactly what happened here - we updated seabios and things stopped working. It's unfortunate that the long tail of legacy exists, and we all wish it didn't, but it does.
as was pointed out earlier it's not qemu/seabios domain to guess what OS will be running and tune its behavior to that.
That's up to upper layers tune knobs/guest, since they can be aware of what guest OS actually is.
Here goes another workaround option: use old SeaBIOS for broken OSes.
On Thu, Jun 20, 2024 at 04:09:24PM +0200, Igor Mammedov wrote:
On Wed, 19 Jun 2024 11:21:14 +0100 John Levon john.levon@nutanix.com wrote:
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:
virtio_balloon virtio2: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_19734-565debf7b362 virtio_net virtio0: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1 Virtio_scsi virtio1: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1
above aren't exactly indicate 64-bit MMIO window as culprit
Can you provide more data on what exactly goes wrong and where?
Sorry, no idea, and I don't think it's a useful exercise to debug old Linux kernels.
well, I've just successfully installed RHEL5.11 and RHEL6.10 from i386 ISOs on virtio root disk + 64Gb RAM (upstream QEMU), and they booted to command prompt without any issues.
Justification 'my OS stopped seeing root disk' for some unclear reason might work for close sourced OS but for Linux there should be more convincing story for a introducing breaking change. So far there is no evidence that it's not a guest issue/bug.
Does adding 'realloc' option to guest kernel CLI help?
I'll try this and get back to you.
This isn't a practical solution in general though IMO, it's not reasonable to ask our downstream customers (and their downstream customers and so on) to figure out how to do this across what could well be thousands of VMs minimum
(if amending guest is not an option then there are at least 2 possible workarounds on host side)
But the same goes the other way around for those who rely on hotplug (kubevirt comes to mind). There is no universal defaults and I'd rather keep mainstream happy when it comes to defaults, while downstream can take care of supporting corner cases and migration issues to new host infrastructure.
my 2cents, anyways it's up to maintainers to decide.
regards john
On Thu, Jun 20, 2024 at 05:47:25PM +0200, Igor Mammedov wrote:
well you just upgraded 'hardware' for legacy OS, there is no guaranties that it will continue to work without any changes.
Are you saying that seabios doesn't attempt to provide a stable virtual hardware platform at all? Is that written down somewhere?
with this patch there will be regression other way around affecting not so old OSes.
It is certainly unfortunate that is took us so long to rediscover this (sorry), but again - the Linux policy is *very* clear on this - you don't get to introduce a regression for something that worked previously in order to support something new that didn't previously work.
If the policy of seabios differs from Linux, that's fine - it's your project - but clearly stating that seabios doesn't do this in the documentation would be useful, so we know not to update to newer versions ever.
Here goes another workaround option: use old SeaBIOS for broken OSes.
Your "broken OSes" are production systems for many people.
Justification 'my OS stopped seeing root disk' for some unclear reason might work for close sourced OS but for Linux there should be more convincing story for a introducing breaking change.
It is seabios that introduced the breaking change, not an old Linux version.
regards john
On Thu, Jun 20, 2024 at 04:57:35PM GMT, John Levon wrote:
On Thu, Jun 20, 2024 at 05:47:25PM +0200, Igor Mammedov wrote:
well you just upgraded 'hardware' for legacy OS, there is no guaranties that it will continue to work without any changes.
Are you saying that seabios doesn't attempt to provide a stable virtual hardware platform at all? Is that written down somewhere?
Well, seabios behavior did change in the past and probably will continue to change to make modern workloads work better.
We try to not break existing stuff, this is why the heuristic is there in the first place. The idea is: If there is no memory above 4G it is more likely that the OS in the VM does not support long mode or PAE paging mode, so better don't place pci bars above 4G.
Obviously it is not always working out as planned ...
It is certainly unfortunate that is took us so long to rediscover this (sorry), but again - the Linux policy is *very* clear on this - you don't get to introduce a regression for something that worked previously in order to support something new that didn't previously work.
Well. You are comparing apples with oranges here. Linux has a much easier job, it can simply add new interfaces (typically syscalls) for new stuff. seabios is stuck with software interfaces which are fixed and for the most part have been defined in the last century ...
take care, Gerd
Hi,
well, I've just successfully installed RHEL5.11 and RHEL6.10 from i386 ISOs on virtio root disk + 64Gb RAM (upstream QEMU), and they booted to command prompt without any issues.
Oh, interesting. Apparently the physical address space is the same in 32-bit and long mode, i.e. on x86-64 hardware PAE paging mode can map physical addresses above 64G.
rhel-6-32 kraxel ~# uname -a Linux rhel-6-32.lab.home.kraxel.org 2.6.32-754.35.1.el6.i686 #1 SMP Wed Sep 16 06:48:16 EDT 2020 i686 i686 i386 GNU/Linux rhel-6-32 kraxel ~# cat /proc/iomem [ ... ] fffc0000-ffffffff : reserved 100000000-17fffffff : System RAM 7000000000-77ffffffff : PCI Bus 0000:00 7000000000-7000003fff : 0000:00:02.0 7000004000-7000007fff : 0000:00:03.0 7000004000-7000007fff : virtio-pci [ ... ]
So the 64G / 36 phys-bits apparently simply happens to be the limit implemented by 32-bit processors (and early 64-bit processors from intel too). It's NOT a hard limit for PAE paging mode.
So I'm wondering why the 32-bit guests are running into trouble in the first place?
Also using the 64G as indicator for the heuristics doesn't look that useful any more ...
take care, Gerd
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:
Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support. Even the LTS version from that year (16.04) is not supported any more. But it is at least available for download still, so I gave it a spin.
Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.
take care, Gerd
diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)
if (CPUPhysBits) { pci_mem64_top = 1LL << CPUPhysBits; - if (CPUPhysBits > 46) { - // Old linux kernels have trouble dealing with more than 46 - // phys-bits, so avoid that for now. Seems to be a bug in the - // virtio-pci driver. Reported: centos-7, ubuntu-18.04 - pci_mem64_top = 1LL << 46; + if (CPUPhysBits > 44) { + // Old linux kernels have trouble dealing with more than 44/46 + // phys-bits. Seems to be a bug in the virtio-pci driver. + // 46: centos-7, ubuntu-18.04 + // 44: ubuntu-16.04 + // Limit the used address space to mitigate the bug, except we are + // running in a guest with more than 1TB of memory installed. + if (RamSizeOver4G < (1LL << 40)) { + pci_mem64_top = 1LL << 44; + } } }
On Fri, Jun 21, 2024 at 02:05:17PM +0200, Gerd Hoffmann wrote:
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:
Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.
Thanks for the patch, I can confirm this also works with Ubuntu 14.04 (oldest we had to hand) as well as a couple of 32-bit Windows VMs. This is a much better fix!
Even the LTS version from that year (16.04) is not supported any more.
Even 14.04 is not yet end of life. If you're prepared to pay, they'll still support you. https://wiki.ubuntu.com/Releases
Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support.
You're in the IT dept of a large corporation. You have some critical application running on some old Dell server with Ubuntu 16.04. A move to virtualization has been mandated across the org, so you need to decommission that server. The application was built by some contractor - before your time - and the source code was long lost, due to a misadventure with a misconfigured array - again before your time.
You've tried to use a newer version, but the application depends on lots of libraries that didn't take compatibility seriously (like, say, GNOME), so it simply can't run on newer versions. You've tried for some time to work around this by building and installing dependencies but you're not an expert on dynamic linkers, and could never get that last C++ symbol to resolve.
There's no funding to build a new replacement for the app. You're aware that the OS is out of full support, so you do your best to lock down any network access and mitigate the relevant CVEs.
Now you try to upgrade your virtualization cluster, and your VM doesn't boot any more.
This kind of situation is very common. It's Long tail is long :(
regards john
On Fri, Jun 21, 2024 at 01:37:24PM +0100, John Levon wrote:
On Fri, Jun 21, 2024 at 02:05:17PM +0200, Gerd Hoffmann wrote:
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:
Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.
Thanks for the patch, I can confirm this also works with Ubuntu 14.04 (oldest we had to hand) as well as a couple of 32-bit Windows VMs. This is a much better fix!
Thanks for tracking this down and testing. I agree this looks like an improved fix.
-Kevin
On Fri, Jun 21, 2024 at 01:37:24PM GMT, John Levon wrote:
On Fri, Jun 21, 2024 at 02:05:17PM +0200, Gerd Hoffmann wrote:
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:
Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support.
You're in the IT dept of a large corporation. You have some critical application running on some old Dell server with Ubuntu 16.04.
I was specifically referring to the "16.10" listed above, i.e. running critical stuff on non-LTS distros in the first place.
16.04-LTS is a much better choice, and that there are plenty of things which can happen in real life which can delay or even prevent moving applications to a newer LTS version is pretty clear.
take care, Gerd
On Fri, 21 Jun 2024 14:05:17 +0200 Gerd Hoffmann kraxel@redhat.com wrote:
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:
Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support. Even the LTS version from that year (16.04) is not supported any more. But it is at least available for download still, so I gave it a spin.
Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.
take care, Gerd
diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)
if (CPUPhysBits) { pci_mem64_top = 1LL << CPUPhysBits;
if (CPUPhysBits > 46) {
// Old linux kernels have trouble dealing with more than 46
// phys-bits, so avoid that for now. Seems to be a bug in the
// virtio-pci driver. Reported: centos-7, ubuntu-18.04
pci_mem64_top = 1LL << 46;
if (CPUPhysBits > 44) {
// Old linux kernels have trouble dealing with more than 44/46
// phys-bits. Seems to be a bug in the virtio-pci driver.
// 46: centos-7, ubuntu-18.04
// 44: ubuntu-16.04
// Limit the used address space to mitigate the bug, except we are
// running in a guest with more than 1TB of memory installed.
Is it possible to fix those broken drivers (centos-7 for example) and ditch this heuristic altogether? The rest of downstream can pick it up from there if they care about their customers.
if (RamSizeOver4G < (1LL << 40)) {
pci_mem64_top = 1LL << 44;
}} }
SeaBIOS mailing list -- seabios@seabios.org To unsubscribe send an email to seabios-leave@seabios.org
Hi,
Dne 21. 06. 24 v 15:20 Igor Mammedov napsal(a):
// Old linux kernels have trouble dealing with more than 44/46
// phys-bits. Seems to be a bug in the virtio-pci driver.
// 46: centos-7, ubuntu-18.04
// 44: ubuntu-16.04
// Limit the used address space to mitigate the bug, except we are
// running in a guest with more than 1TB of memory installed.
Is it possible to fix those broken drivers (centos-7 for example) and ditch this heuristic altogether?
Does this code ever runs in some baremetal use cases as well?
Or at least, meybe this could be applied only when virtio devices are present (maybe even just when transitional devices are present) ?
Thanks, Rudolf
On Fri, Jun 21, 2024 at 10:29:45PM +0200, Rudolf Marek wrote:
Hi,
Dne 21. 06. 24 v 15:20 Igor Mammedov napsal(a):
// Old linux kernels have trouble dealing with more than 44/46
// phys-bits. Seems to be a bug in the virtio-pci driver.
// 46: centos-7, ubuntu-18.04
// 44: ubuntu-16.04
// Limit the used address space to mitigate the bug, except we are
// running in a guest with more than 1TB of memory installed.
Is it possible to fix those broken drivers (centos-7 for example) and ditch this heuristic altogether?
Does this code ever runs in some baremetal use cases as well?
Or at least, meybe this could be applied only when virtio devices are present (maybe even just when transitional devices are present) ?
This code is only used on qemu (and derivatives) - specifically CONFIG_QEMU must be true. When running on coreboot, SeaBIOS expects coreboot to map all the PCI devices.
Cheers, -Kevin
On Fri, Jun 21, 2024 at 03:20:19PM GMT, Igor Mammedov wrote:
diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)
if (CPUPhysBits) { pci_mem64_top = 1LL << CPUPhysBits;
if (CPUPhysBits > 46) {
// Old linux kernels have trouble dealing with more than 46
// phys-bits, so avoid that for now. Seems to be a bug in the
// virtio-pci driver. Reported: centos-7, ubuntu-18.04
pci_mem64_top = 1LL << 46;
if (CPUPhysBits > 44) {
// Old linux kernels have trouble dealing with more than 44/46
// phys-bits. Seems to be a bug in the virtio-pci driver.
// 46: centos-7, ubuntu-18.04
// 44: ubuntu-16.04
// Limit the used address space to mitigate the bug, except we are
// running in a guest with more than 1TB of memory installed.
Is it possible to fix those broken drivers (centos-7 for example) and ditch this heuristic altogether? The rest of downstream can pick it up from there if they care about their customers.
Some further testing showed that this is not version-specific but arch specific. Old 32-bit kernels fail >44, old 64-bit kernels fail >46.
Note that 44 = 32 + 12, i.e. this could be pfn (page frame number) hitting MAX_UINT32. Should that be the case the fix is probably not easy (didn't check the kernel source though).
Also note that releasing a kernel fix is not enough, you also have to respin install media. Distros which are *that* old typically don't get regular install media updates any more ...
In short: The idea to fix distros and drop the heuristic is IMHO not realistic.
take care, Gerd