[PATCH v2 1/4] add debug when we set pci_pad_mem64

List overview All Threads
Download

newer

older

[PATCH 0/4] add guest hint for...

Fwd: [PATCH] resume: reset...

John Levon

19 Jun 2024 19 Jun '24

12:21 p.m.

This is an interesting enough setting to add a dprintf() for.

Signed-off-by: John Levon john.levon@nutanix.com --- src/fw/pciinit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc29..0395fdbf 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1197,8 +1197,10 @@ pci_setup(void) } }

- if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver4G) + if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver4G) { + dprintf(1, "enabling 64-bit pci mmio window\n"); pci_pad_mem64 = 1; + }

dprintf(1, "=== PCI bus & bridge init ===\n"); if (pci_probe_host() != 0) {

-- 2.34.1

Show replies by date

John Levon

19 Jun 19 Jun

12:21 p.m.

New subject: [PATCH v2 2/4] paravirt: log RamSize* values at debug level 1

qemu_cfg_e820() reports RamSize* at debug level 1. Do the same in qemu_early_e820().

Signed-off-by: John Levon john.levon@nutanix.com Reviewed-by: Paul Menzel pmenzel@molgen.mpg.de --- src/fw/paravirt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index e5d4eca0..0ff5d0a4 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -784,7 +784,7 @@ static int qemu_early_e820(void) } }

- dprintf(3, "qemu/e820: RamSize: 0x%08x\n", RamSize); - dprintf(3, "qemu/e820: RamSizeOver4G: 0x%016llx\n", RamSizeOver4G); + dprintf(1, "qemu/e820: RamSize: 0x%08x\n", RamSize); + dprintf(1, "qemu/e820: RamSizeOver4G: 0x%016llx\n", RamSizeOver4G); return 1; }

-- 2.34.1

John Levon

12:21 p.m.

New subject: [PATCH v2 3/4] paravirt: use define for 4G memory boundary

Signed-off-by: John Levon john.levon@nutanix.com --- src/fw/paravirt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 0ff5d0a4..3ad9094b 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -28,6 +28,8 @@ #include "xen.h" // xen_biostable_setup #include "stacks.h" // yield

+#define MEM_4G (0x100000000ULL) + // Amount of continuous ram under 4Gig u32 RamSize; // Amount of continuous ram >4Gig @@ -589,7 +591,7 @@ qemu_cfg_e820(void) | ((u32)rtc_read(CMOS_MEM_HIGHMEM_MID) << 24) | ((u64)rtc_read(CMOS_MEM_HIGHMEM_HIGH) << 32)); RamSizeOver4G = high; - e820_add(0x100000000ull, high, E820_RAM); + e820_add(MEM_4G, high, E820_RAM); dprintf(1, "RamSizeOver4G: 0x%016llx [cmos]\n", RamSizeOver4G); }

@@ -772,14 +774,14 @@ static int qemu_early_e820(void) e820_add(table.address, table.length, table.type); dprintf(1, "qemu/e820: addr 0x%016llx len 0x%016llx [RAM]\n", table.address, table.length); - if (table.address < 0x100000000LL) { + if (table.address < MEM_4G) { // below 4g if (RamSize < table.address + table.length) RamSize = table.address + table.length; } else { // above 4g - if (RamSizeOver4G < table.address + table.length - 0x100000000LL) - RamSizeOver4G = table.address + table.length - 0x100000000LL; + if (RamSizeOver4G < table.address + table.length - MEM_4G) + RamSizeOver4G = table.address + table.length - MEM_4G; } } }

-- 2.34.1

John Levon

12:21 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

virtio_balloon virtio2: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_19734-565debf7b362 virtio_net virtio0: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1 Virtio_scsi virtio1: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1 Gave up waiting for root device. Common problems: - Boot args (cat /proc/cmdline) - Check rootdelay= (did the system wait long enough?) - Check root= (did the system wait for the right device?) - Missing modules (cat /proc/modules; ls /dev) ALERT! /dev/disk/by-uuid/86859879-3f17-443d-a226-077c435291e2 does not exist. Dropping to a shell!

Be a bit more conservative, and only enable the window by default when the ram size extends beyond 64G - a 32-bit guest using PAE cannot address beyond that anyway. Due to the mmio window this translates to an effective working configuration limit of 62G/63G, depending on machine type.

Fixes: 96a8d130 ("be less conservative with the 64bit pci io window") Signed-off-by: John Levon john.levon@nutanix.com --- src/fw/paravirt.c | 28 ++++++++++++++++++++++++---- src/fw/paravirt.h | 1 + src/fw/pciinit.c | 6 +++++- 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 3ad9094b..5b0f191b 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -29,11 +29,14 @@ #include "stacks.h" // yield

#define MEM_4G (0x100000000ULL) +#define MEM_64G (16 * 0x100000000ULL)

// Amount of continuous ram under 4Gig u32 RamSize; // Amount of continuous ram >4Gig u64 RamSizeOver4G; +// Amount of continuous ram >64Gig +u64 RamSizeOver64G; // physical address space bits u8 CPUPhysBits; // 64bit processor @@ -591,8 +594,12 @@ qemu_cfg_e820(void) | ((u32)rtc_read(CMOS_MEM_HIGHMEM_MID) << 24) | ((u64)rtc_read(CMOS_MEM_HIGHMEM_HIGH) << 32)); RamSizeOver4G = high; + RamSizeOver64G = 0; + if (high + MEM_4G > MEM_64G) + RamSizeOver64G = high + MEM_4G - MEM_64G; e820_add(MEM_4G, high, E820_RAM); dprintf(1, "RamSizeOver4G: 0x%016llx [cmos]\n", RamSizeOver4G); + dprintf(1, "RamSizeOver64G: 0x%016llx [cmos]\n", RamSizeOver64G); }

// Populate romfile entries for legacy fw_cfg ports (that predate the @@ -774,19 +781,32 @@ static int qemu_early_e820(void) e820_add(table.address, table.length, table.type); dprintf(1, "qemu/e820: addr 0x%016llx len 0x%016llx [RAM]\n", table.address, table.length); + // address below 4g? if (table.address < MEM_4G) { - // below 4g if (RamSize < table.address + table.length) RamSize = table.address + table.length; } else { - // above 4g - if (RamSizeOver4G < table.address + table.length - MEM_4G) - RamSizeOver4G = table.address + table.length - MEM_4G; + u64 table_end = table.address + table.length; + + /* + * Note that this would ignore any span that crosses the 4G + * boundary. For RamSizeOver64G, we do account for any spans + * that cross the 64G boundary. + */ + if (RamSizeOver4G < table_end - MEM_4G) + RamSizeOver4G = table_end - MEM_4G; + + // crosses 64G ? + if (table_end > MEM_64G) { + if (RamSizeOver64G < table_end - MEM_64G) + RamSizeOver64G = table_end - MEM_64G; + } } } }

dprintf(1, "qemu/e820: RamSize: 0x%08x\n", RamSize); dprintf(1, "qemu/e820: RamSizeOver4G: 0x%016llx\n", RamSizeOver4G); + dprintf(1, "qemu/e820: RamSizeOver64G: 0x%016llx\n", RamSizeOver64G); return 1; } diff --git a/src/fw/paravirt.h b/src/fw/paravirt.h index 62a2cd07..b56e96e8 100644 --- a/src/fw/paravirt.h +++ b/src/fw/paravirt.h @@ -30,6 +30,7 @@ typedef struct QemuCfgDmaAccess {

extern u32 RamSize; extern u64 RamSizeOver4G; +extern u64 RamSizeOver64G; extern int PlatformRunningOn; extern u8 CPUPhysBits; extern u8 CPULongMode; diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index 0395fdbf..1247eb12 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1197,7 +1197,11 @@ pci_setup(void) } }

- if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver4G) { + /* + * Only enable this if we exceed 64G, as some older 32-bit Linux VMs cannot + * handle the 64-bit window correctly. + */ + if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver64G) { dprintf(1, "enabling 64-bit pci mmio window\n"); pci_pad_mem64 = 1; }

-- 2.34.1

Igor Mammedov

20 Jun 20 Jun

4:09 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Wed, 19 Jun 2024 11:21:14 +0100 John Levon john.levon@nutanix.com wrote:

...

Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

virtio_balloon virtio2: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_19734-565debf7b362 virtio_net virtio0: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1 Virtio_scsi virtio1: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1

above aren't exactly indicate 64-bit MMIO window as culprit

Can you provide more data on what exactly goes wrong and where?

Does adding 'realloc' option to guest kernel CLI help?

...

Gave up waiting for root device. Common problems:

Boot args (cat /proc/cmdline)

Check rootdelay= (did the system wait long enough?)

Check root= (did the system wait for the right device?)

Missing modules (cat /proc/modules; ls /dev)

ALERT! /dev/disk/by-uuid/86859879-3f17-443d-a226-077c435291e2 does not exist. Dropping to a shell!

Be a bit more conservative, and only enable the window by default when the ram size extends beyond 64G - a 32-bit guest using PAE cannot address beyond that anyway. Due to the mmio window this translates to an effective working configuration limit of 62G/63G, depending on machine type.

Fixes: 96a8d130 ("be less conservative with the 64bit pci io window") Signed-off-by: John Levon john.levon@nutanix.com

src/fw/paravirt.c | 28 ++++++++++++++++++++++++---- src/fw/paravirt.h | 1 + src/fw/pciinit.c | 6 +++++- 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c index 3ad9094b..5b0f191b 100644 --- a/src/fw/paravirt.c +++ b/src/fw/paravirt.c @@ -29,11 +29,14 @@ #include "stacks.h" // yield

#define MEM_4G (0x100000000ULL) +#define MEM_64G (16 * 0x100000000ULL)

// Amount of continuous ram under 4Gig u32 RamSize; // Amount of continuous ram >4Gig u64 RamSizeOver4G; +// Amount of continuous ram >64Gig +u64 RamSizeOver64G; // physical address space bits u8 CPUPhysBits; // 64bit processor @@ -591,8 +594,12 @@ qemu_cfg_e820(void) | ((u32)rtc_read(CMOS_MEM_HIGHMEM_MID) << 24) | ((u64)rtc_read(CMOS_MEM_HIGHMEM_HIGH) << 32)); RamSizeOver4G = high;
RamSizeOver64G = 0;

if (high + MEM_4G > MEM_64G)
   RamSizeOver64G = high + MEM_4G - MEM_64G;
e820_add(MEM_4G, high, E820_RAM); dprintf(1, "RamSizeOver4G: 0x%016llx [cmos]\n", RamSizeOver4G);
dprintf(1, "RamSizeOver64G: 0x%016llx [cmos]\n", RamSizeOver64G);
}

// Populate romfile entries for legacy fw_cfg ports (that predate the @@ -774,19 +781,32 @@ static int qemu_early_e820(void) e820_add(table.address, table.length, table.type); dprintf(1, "qemu/e820: addr 0x%016llx len 0x%016llx [RAM]\n", table.address, table.length);
       // address below 4g?
       if (table.address < MEM_4G) {
           // below 4g
           if (RamSize < table.address + table.length)
               RamSize = table.address + table.length;
       } else {
           // above 4g
           if (RamSizeOver4G < table.address + table.length - MEM_4G)
               RamSizeOver4G = table.address + table.length - MEM_4G;
           u64 table_end = table.address + table.length;
           /*
            * Note that this would ignore any span that crosses the 4G
            * boundary. For RamSizeOver64G, we do account for any spans
            * that cross the 64G boundary.
            */
           if (RamSizeOver4G < table_end - MEM_4G)
               RamSizeOver4G = table_end - MEM_4G;
           // crosses 64G ?
           if (table_end > MEM_64G) {
               if (RamSizeOver64G < table_end - MEM_64G)
                   RamSizeOver64G = table_end - MEM_64G;
           }
       }
   }
}

dprintf(1, "qemu/e820: RamSize: 0x%08x\n", RamSize); dprintf(1, "qemu/e820: RamSizeOver4G: 0x%016llx\n", RamSizeOver4G);
dprintf(1, "qemu/e820: RamSizeOver64G: 0x%016llx\n", RamSizeOver64G); return 1;
} diff --git a/src/fw/paravirt.h b/src/fw/paravirt.h index 62a2cd07..b56e96e8 100644 --- a/src/fw/paravirt.h +++ b/src/fw/paravirt.h @@ -30,6 +30,7 @@ typedef struct QemuCfgDmaAccess {

extern u32 RamSize; extern u64 RamSizeOver4G; +extern u64 RamSizeOver64G; extern int PlatformRunningOn; extern u8 CPUPhysBits; extern u8 CPULongMode; diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index 0395fdbf..1247eb12 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1197,7 +1197,11 @@ pci_setup(void) } }

if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver4G) {
/*
* Only enable this if we exceed 64G, as some older 32-bit Linux VMs cannot
* handle the 64-bit window correctly.
*/
if (CPUPhysBits >= 36 && CPULongMode && RamSizeOver64G) { dprintf(1, "enabling 64-bit pci mmio window\n"); pci_pad_mem64 = 1; }

John Levon

4:43 p.m.

New subject: [PATCH 4/4] only enable 64bit pci io window when RAM >60G

On Thu, Jun 20, 2024 at 04:00:05PM +0200, Igor Mammedov wrote:

...

Regardless of which way is chosen some users will suffer one way or another. My vote would be to keep current behavior so 'modern' guests would work without issues.

The Linux kernel policy is "no regressions", I cannot say it better than Linus himself (if you'll excuse the shouting):

https://lkml.org/lkml/2018/8/3/621

This is exactly what happened here - we updated seabios and things stopped working. It's unfortunate that the long tail of legacy exists, and we all wish it didn't, but it does.

On Thu, Jun 20, 2024 at 04:09:24PM +0200, Igor Mammedov wrote:

...

On Wed, 19 Jun 2024 11:21:14 +0100 John Levon john.levon@nutanix.com wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

virtio_balloon virtio2: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_19734-565debf7b362 virtio_net virtio0: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1 Virtio_scsi virtio1: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1

above aren't exactly indicate 64-bit MMIO window as culprit

Can you provide more data on what exactly goes wrong and where?

Sorry, no idea, and I don't think it's a useful exercise to debug old Linux kernels.

...

Does adding 'realloc' option to guest kernel CLI help?

I'll try this and get back to you.

This isn't a practical solution in general though IMO, it's not reasonable to ask our downstream customers (and their downstream customers and so on) to figure out how to do this across what could well be thousands of VMs minimum.

regards john

Igor Mammedov

5:47 p.m.

New subject: [PATCH 4/4] only enable 64bit pci io window when RAM >60G

On Thu, 20 Jun 2024 15:43:33 +0100 John Levon john.levon@nutanix.com wrote:

...

On Thu, Jun 20, 2024 at 04:00:05PM +0200, Igor Mammedov wrote:

...
Regardless of which way is chosen some users will suffer one way or another. My vote would be to keep current behavior so 'modern' guests would work without issues.

The Linux kernel policy is "no regressions", I cannot say it better than Linus himself (if you'll excuse the shouting):

https://lkml.org/lkml/2018/8/3/621

well you just upgraded 'hardware' for legacy OS, there is no guaranties that it will continue to work without any changes.

with this patch there will be regression other way around affecting not so old OSes.

...

This is exactly what happened here - we updated seabios and things stopped working. It's unfortunate that the long tail of legacy exists, and we all wish it didn't, but it does.

as was pointed out earlier it's not qemu/seabios domain to guess what OS will be running and tune its behavior to that.

That's up to upper layers tune knobs/guest, since they can be aware of what guest OS actually is.

Here goes another workaround option: use old SeaBIOS for broken OSes.

...

On Thu, Jun 20, 2024 at 04:09:24PM +0200, Igor Mammedov wrote:

...
On Wed, 19 Jun 2024 11:21:14 +0100 John Levon john.levon@nutanix.com wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

virtio_balloon virtio2: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_19734-565debf7b362 virtio_net virtio0: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1 Virtio_scsi virtio1: virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1

above aren't exactly indicate 64-bit MMIO window as culprit

Can you provide more data on what exactly goes wrong and where?

Sorry, no idea, and I don't think it's a useful exercise to debug old Linux kernels.

well, I've just successfully installed RHEL5.11 and RHEL6.10 from i386 ISOs on virtio root disk + 64Gb RAM (upstream QEMU), and they booted to command prompt without any issues.

Justification 'my OS stopped seeing root disk' for some unclear reason might work for close sourced OS but for Linux there should be more convincing story for a introducing breaking change. So far there is no evidence that it's not a guest issue/bug.

...

...
Does adding 'realloc' option to guest kernel CLI help?

I'll try this and get back to you.

This isn't a practical solution in general though IMO, it's not reasonable to ask our downstream customers (and their downstream customers and so on) to figure out how to do this across what could well be thousands of VMs minimum

(if amending guest is not an option then there are at least 2 possible workarounds on host side)

But the same goes the other way around for those who rely on hotplug (kubevirt comes to mind). There is no universal defaults and I'd rather keep mainstream happy when it comes to defaults, while downstream can take care of supporting corner cases and migration issues to new host infrastructure.

my 2cents, anyways it's up to maintainers to decide.

...

regards john

John Levon

5:57 p.m.

New subject: [PATCH 4/4] only enable 64bit pci io window when RAM >60G

On Thu, Jun 20, 2024 at 05:47:25PM +0200, Igor Mammedov wrote:

...

well you just upgraded 'hardware' for legacy OS, there is no guaranties that it will continue to work without any changes.

Are you saying that seabios doesn't attempt to provide a stable virtual hardware platform at all? Is that written down somewhere?

...

with this patch there will be regression other way around affecting not so old OSes.

It is certainly unfortunate that is took us so long to rediscover this (sorry), but again - the Linux policy is *very* clear on this - you don't get to introduce a regression for something that worked previously in order to support something new that didn't previously work.

If the policy of seabios differs from Linux, that's fine - it's your project - but clearly stating that seabios doesn't do this in the documentation would be useful, so we know not to update to newer versions ever.

...

Here goes another workaround option: use old SeaBIOS for broken OSes.

Your "broken OSes" are production systems for many people.

...

Justification 'my OS stopped seeing root disk' for some unclear reason might work for close sourced OS but for Linux there should be more convincing story for a introducing breaking change.

It is seabios that introduced the breaking change, not an old Linux version.

regards john

Gerd Hoffmann

21 Jun 21 Jun

10:09 a.m.

New subject: [PATCH 4/4] only enable 64bit pci io window when RAM >60G

On Thu, Jun 20, 2024 at 04:57:35PM GMT, John Levon wrote:

...

On Thu, Jun 20, 2024 at 05:47:25PM +0200, Igor Mammedov wrote:

...
well you just upgraded 'hardware' for legacy OS, there is no guaranties that it will continue to work without any changes.

Are you saying that seabios doesn't attempt to provide a stable virtual hardware platform at all? Is that written down somewhere?

Well, seabios behavior did change in the past and probably will continue to change to make modern workloads work better.

We try to not break existing stuff, this is why the heuristic is there in the first place. The idea is: If there is no memory above 4G it is more likely that the OS in the VM does not support long mode or PAE paging mode, so better don't place pci bars above 4G.

Obviously it is not always working out as planned ...

...

It is certainly unfortunate that is took us so long to rediscover this (sorry), but again - the Linux policy is *very* clear on this - you don't get to introduce a regression for something that worked previously in order to support something new that didn't previously work.

Well. You are comparing apples with oranges here. Linux has a much easier job, it can simply add new interfaces (typically syscalls) for new stuff. seabios is stuck with software interfaces which are fixed and for the most part have been defined in the last century ...

take care, Gerd

Gerd Hoffmann

9:46 a.m.

New subject: [PATCH 4/4] only enable 64bit pci io window when RAM >60G

Hi,

...

well, I've just successfully installed RHEL5.11 and RHEL6.10 from i386 ISOs on virtio root disk + 64Gb RAM (upstream QEMU), and they booted to command prompt without any issues.

Oh, interesting. Apparently the physical address space is the same in 32-bit and long mode, i.e. on x86-64 hardware PAE paging mode can map physical addresses above 64G.

rhel-6-32 kraxel ~# uname -a Linux rhel-6-32.lab.home.kraxel.org 2.6.32-754.35.1.el6.i686 #1 SMP Wed Sep 16 06:48:16 EDT 2020 i686 i686 i386 GNU/Linux rhel-6-32 kraxel ~# cat /proc/iomem [ ... ] fffc0000-ffffffff : reserved 100000000-17fffffff : System RAM 7000000000-77ffffffff : PCI Bus 0000:00 7000000000-7000003fff : 0000:00:02.0 7000004000-7000007fff : 0000:00:03.0 7000004000-7000007fff : virtio-pci [ ... ]

So the 64G / 36 phys-bits apparently simply happens to be the limit implemented by 32-bit processors (and early 64-bit processors from intel too). It's NOT a hard limit for PAE paging mode.

So I'm wondering why the 32-bit guests are running into trouble in the first place?

Also using the 64G as indicator for the heuristics doesn't look that useful any more ...

take care, Gerd

Gerd Hoffmann

2:05 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:

...

Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support. Even the LTS version from that year (16.04) is not supported any more. But it is at least available for download still, so I gave it a spin.

Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.

take care, Gerd

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)

if (CPUPhysBits) { pci_mem64_top = 1LL << CPUPhysBits; - if (CPUPhysBits > 46) { - // Old linux kernels have trouble dealing with more than 46 - // phys-bits, so avoid that for now. Seems to be a bug in the - // virtio-pci driver. Reported: centos-7, ubuntu-18.04 - pci_mem64_top = 1LL << 46; + if (CPUPhysBits > 44) { + // Old linux kernels have trouble dealing with more than 44/46 + // phys-bits. Seems to be a bug in the virtio-pci driver. + // 46: centos-7, ubuntu-18.04 + // 44: ubuntu-16.04 + // Limit the used address space to mitigate the bug, except we are + // running in a guest with more than 1TB of memory installed. + if (RamSizeOver4G < (1LL << 40)) { + pci_mem64_top = 1LL << 44; + } } }

John Levon

2:37 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Fri, Jun 21, 2024 at 02:05:17PM +0200, Gerd Hoffmann wrote:

...

On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.

Thanks for the patch, I can confirm this also works with Ubuntu 14.04 (oldest we had to hand) as well as a couple of 32-bit Windows VMs. This is a much better fix!

...

Even the LTS version from that year (16.04) is not supported any more.

Even 14.04 is not yet end of life. If you're prepared to pay, they'll still support you. https://wiki.ubuntu.com/Releases

...

Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support.

You're in the IT dept of a large corporation. You have some critical application running on some old Dell server with Ubuntu 16.04. A move to virtualization has been mandated across the org, so you need to decommission that server. The application was built by some contractor - before your time - and the source code was long lost, due to a misadventure with a misconfigured array - again before your time.

You've tried to use a newer version, but the application depends on lots of libraries that didn't take compatibility seriously (like, say, GNOME), so it simply can't run on newer versions. You've tried for some time to work around this by building and installing dependencies but you're not an expert on dynamic linkers, and could never get that last C++ symbol to resolve.

There's no funding to build a new replacement for the app. You're aware that the OS is out of full support, so you do your best to lock down any network access and mitigate the relevant CVEs.

Now you try to upgrade your virtualization cluster, and your VM doesn't boot any more.

This kind of situation is very common. It's Long tail is long :(

regards john

Kevin O'Connor

8:36 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Fri, Jun 21, 2024 at 01:37:24PM +0100, John Levon wrote:

...

On Fri, Jun 21, 2024 at 02:05:17PM +0200, Gerd Hoffmann wrote:

...
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.

Thanks for the patch, I can confirm this also works with Ubuntu 14.04 (oldest we had to hand) as well as a couple of 32-bit Windows VMs. This is a much better fix!

Thanks for tracking this down and testing. I agree this looks like an improved fix.

-Kevin

Gerd Hoffmann

24 Jun 24 Jun

11:17 a.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Fri, Jun 21, 2024 at 01:37:24PM GMT, John Levon wrote:

...

On Fri, Jun 21, 2024 at 02:05:17PM +0200, Gerd Hoffmann wrote:

...
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

...
Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support.

You're in the IT dept of a large corporation. You have some critical application running on some old Dell server with Ubuntu 16.04.

I was specifically referring to the "16.10" listed above, i.e. running critical stuff on non-LTS distros in the first place.

16.04-LTS is a much better choice, and that there are plenty of things which can happen in real life which can delay or even prevent moving applications to a newer LTS version is pretty clear.

take care, Gerd

Igor Mammedov

21 Jun 21 Jun

3:20 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Fri, 21 Jun 2024 14:05:17 +0200 Gerd Hoffmann kraxel@redhat.com wrote:

...

On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support. Even the LTS version from that year (16.04) is not supported any more. But it is at least available for download still, so I gave it a spin.

Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.

take care, Gerd

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)
 if (CPUPhysBits) {
     pci_mem64_top = 1LL << CPUPhysBits;
   if (CPUPhysBits > 46) {
       // Old linux kernels have trouble dealing with more than 46
       // phys-bits, so avoid that for now.  Seems to be a bug in the
       // virtio-pci driver.  Reported: centos-7, ubuntu-18.04
       pci_mem64_top = 1LL << 46;
   if (CPUPhysBits > 44) {
       // Old linux kernels have trouble dealing with more than 44/46
       // phys-bits. Seems to be a bug in the virtio-pci driver.
       //   46:  centos-7, ubuntu-18.04
       //   44:  ubuntu-16.04
       // Limit the used address space to mitigate the bug, except we are
       // running in a guest with more than 1TB of memory installed.

Is it possible to fix those broken drivers (centos-7 for example) and ditch this heuristic altogether? The rest of downstream can pick it up from there if they care about their customers.

...

       if (RamSizeOver4G < (1LL << 40)) {
           pci_mem64_top = 1LL << 44;
       }
   }
}
SeaBIOS mailing list -- seabios@seabios.org To unsubscribe send an email to seabios-leave@seabios.org

Rudolf Marek

10:29 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

Hi,

Dne 21. 06. 24 v 15:20 Igor Mammedov napsal(a):

...

       // Old linux kernels have trouble dealing with more than 44/46

       // phys-bits. Seems to be a bug in the virtio-pci driver.

       //   46:  centos-7, ubuntu-18.04

```
       //   44:  ubuntu-16.04
```

       // Limit the used address space to mitigate the bug, except we are

       // running in a guest with more than 1TB of memory installed.

Is it possible to fix those broken drivers (centos-7 for example) and ditch this heuristic altogether?

Does this code ever runs in some baremetal use cases as well?

Or at least, meybe this could be applied only when virtio devices are present (maybe even just when transitional devices are present) ?

Thanks, Rudolf

Kevin O'Connor

22 Jun 22 Jun

12:42 a.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Fri, Jun 21, 2024 at 10:29:45PM +0200, Rudolf Marek wrote:

...

Hi,

Dne 21. 06. 24 v 15:20 Igor Mammedov napsal(a):

...
...
       // Old linux kernels have trouble dealing with more than 44/46
       // phys-bits. Seems to be a bug in the virtio-pci driver.
       //   46:  centos-7, ubuntu-18.04
       //   44:  ubuntu-16.04
       // Limit the used address space to mitigate the bug, except we are
       // running in a guest with more than 1TB of memory installed.
Is it possible to fix those broken drivers (centos-7 for example) and ditch this heuristic altogether?
Does this code ever runs in some baremetal use cases as well?

Or at least, meybe this could be applied only when virtio devices are present (maybe even just when transitional devices are present) ?

This code is only used on qemu (and derivatives) - specifically CONFIG_QEMU must be true. When running on coreboot, SeaBIOS expects coreboot to map all the PCI devices.

Cheers, -Kevin

Gerd Hoffmann

24 Jun 24 Jun

10:36 a.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Fri, Jun 21, 2024 at 03:20:19PM GMT, Igor Mammedov wrote:

...

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)

 if (CPUPhysBits) {
     pci_mem64_top = 1LL << CPUPhysBits;

```
   if (CPUPhysBits > 46) {
```

       // Old linux kernels have trouble dealing with more than 46

       // phys-bits, so avoid that for now.  Seems to be a bug in the

       // virtio-pci driver.  Reported: centos-7, ubuntu-18.04

```
       pci_mem64_top = 1LL << 46;
```

```
   if (CPUPhysBits > 44) {
```

       // Old linux kernels have trouble dealing with more than 44/46

       // phys-bits. Seems to be a bug in the virtio-pci driver.

       //   46:  centos-7, ubuntu-18.04

```
       //   44:  ubuntu-16.04
```

       // Limit the used address space to mitigate the bug, except we are

       // running in a guest with more than 1TB of memory installed.

Is it possible to fix those broken drivers (centos-7 for example) and ditch this heuristic altogether? The rest of downstream can pick it up from there if they care about their customers.

Some further testing showed that this is not version-specific but arch specific. Old 32-bit kernels fail >44, old 64-bit kernels fail >46.

Note that 44 = 32 + 12, i.e. this could be pfn (page frame number) hitting MAX_UINT32. Should that be the case the fix is probably not easy (didn't check the kernel source though).

Also note that releasing a kernel fix is not enough, you also have to respin install media. Distros which are *that* old typically don't get regular install media updates any more ...

In short: The idea to fix distros and drop the heuristic is IMHO not realistic.

take care, Gerd

Fiona Ebner

12 Jul 12 Jul

2:24 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

Hi,

Am 21.06.24 um 14:05 schrieb Gerd Hoffmann:

...

On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support. Even the LTS version from that year (16.04) is not supported any more. But it is at least available for download still, so I gave it a spin.

Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.

take care, Gerd

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)
 if (CPUPhysBits) {
     pci_mem64_top = 1LL << CPUPhysBits;
   if (CPUPhysBits > 46) {
       // Old linux kernels have trouble dealing with more than 46
       // phys-bits, so avoid that for now.  Seems to be a bug in the
       // virtio-pci driver.  Reported: centos-7, ubuntu-18.04
       pci_mem64_top = 1LL << 46;
   if (CPUPhysBits > 44) {
       // Old linux kernels have trouble dealing with more than 44/46
       // phys-bits. Seems to be a bug in the virtio-pci driver.
       //   46:  centos-7, ubuntu-18.04
       //   44:  ubuntu-16.04
       // Limit the used address space to mitigate the bug, except we are
       // running in a guest with more than 1TB of memory installed.
       if (RamSizeOver4G < (1LL << 40)) {
           pci_mem64_top = 1LL << 44;
       }
   }
}

we've also had two reports about issues with 32-bit guests now[0][1] (both had '-m 4096' in the QEMU commandline), which I was able to reproduce with a 32-bit Debian 12.6 install, so nothing ancient ;) The QEMU commandline is below[2].

Unfortunately, it still fails to boot, even with the "limit address space used for pci devices, part two" patch applied on top of rel-1.16.3 (and using current QEMU master).

It boots fine with '-m 3583', but not anymore with '-m 3584'. So is bumping the limit for the check necessary after all?

Best Regards, Fiona

[0]: https://forum.proxmox.com/threads/150217/ [1]: https://forum.proxmox.com/threads/149772/post-683562 [2]:

...

./qemu-system-x86_64 \ -accel 'kvm' \ -cpu 'host' \ -chardev 'socket,id=qmp,path=/var/run/qemu-server/121.qmp,server=on,wait=off' \ -mon 'chardev=qmp,mode=control' \ -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' \ -mon 'chardev=qmp-event,mode=control' \ -pidfile /var/run/qemu-server/121.pid \ -smp '4,sockets=1,cores=4,maxcpus=4' \ -nodefaults \ -vnc 'unix:/var/run/qemu-server/121.vnc,password=on' \ -m 4096 \ -device 'pci-bridge,id=pci.3,chassis_nr=3,bus=pci.0,addr=0x5' \ -device 'VGA,id=vga,bus=pci.0,addr=0x2' \ -device 'virtio-scsi-pci,id=virtioscsi0,bus=pci.3,addr=0x1' \ -drive 'file=/dev/lvmthinbig/vm-121-disk-0,if=none,id=drive-scsi0,format=raw,cache=none,aio=io_uring,detect-zeroes=on' \ -device 'scsi-hd,bus=virtioscsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=100' \ -machine 'type=pc'

Igor Mammedov

3:26 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Fri, 12 Jul 2024 14:24:40 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...

Hi,

Am 21.06.24 um 14:05 schrieb Gerd Hoffmann:

...
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support. Even the LTS version from that year (16.04) is not supported any more. But it is at least available for download still, so I gave it a spin.

Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.

take care, Gerd

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)
 if (CPUPhysBits) {
     pci_mem64_top = 1LL << CPUPhysBits;
   if (CPUPhysBits > 46) {
       // Old linux kernels have trouble dealing with more than 46
       // phys-bits, so avoid that for now.  Seems to be a bug in the
       // virtio-pci driver.  Reported: centos-7, ubuntu-18.04
       pci_mem64_top = 1LL << 46;
   if (CPUPhysBits > 44) {
       // Old linux kernels have trouble dealing with more than 44/46
       // phys-bits. Seems to be a bug in the virtio-pci driver.
       //   46:  centos-7, ubuntu-18.04
       //   44:  ubuntu-16.04
       // Limit the used address space to mitigate the bug, except we are
       // running in a guest with more than 1TB of memory installed.
       if (RamSizeOver4G < (1LL << 40)) {
           pci_mem64_top = 1LL << 44;
       }
   }
}
we've also had two reports about issues with 32-bit guests now[0][1] (both had '-m 4096' in the QEMU commandline), which I was able to reproduce with a 32-bit Debian 12.6 install, so nothing ancient ;) The QEMU commandline is below[2].

is it also reproducible with upstream kernel? if yes, it would be better to fix that on guest kernel side, rather than in SeaBIOS which has no idea what guest OS is going to be running after it.

...

Unfortunately, it still fails to boot, even with the "limit address space used for pci devices, part two" patch applied on top of rel-1.16.3 (and using current QEMU master).

It boots fine with '-m 3583', but not anymore with '-m 3584'. So is bumping the limit for the check necessary after all?

Best Regards, Fiona

...

well, with current QEMU master branch (I assume it haven't even got topic patch yet) ./qemu-system-x86_64 --enable-kvm -cpu host -smp 4 -snapshot -m 4096 -M pc-i440fx-6.1 winxp_x86_build2600 following CLI boots just fine an 32-bit XP on Haswell host.

Also using 'host' with ancient OS, is basically asking for trouble. If it works for user with old 'XP contemporary' CPU model, user should use that instead or workarounds (aka it's management task to configure CLI in compatible with OS manner).

From suspicions config options in that post I see 'viommu' and 'vmgenid', are you sure XP even knows what to do with that, perhaps it triggers BSOD.

...

[2]:

...
./qemu-system-x86_64 \ -accel 'kvm' \ -cpu 'host' \ -chardev 'socket,id=qmp,path=/var/run/qemu-server/121.qmp,server=on,wait=off' \ -mon 'chardev=qmp,mode=control' \ -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' \ -mon 'chardev=qmp-event,mode=control' \ -pidfile /var/run/qemu-server/121.pid \ -smp '4,sockets=1,cores=4,maxcpus=4' \ -nodefaults \ -vnc 'unix:/var/run/qemu-server/121.vnc,password=on' \ -m 4096 \ -device 'pci-bridge,id=pci.3,chassis_nr=3,bus=pci.0,addr=0x5' \ -device 'VGA,id=vga,bus=pci.0,addr=0x2' \ -device 'virtio-scsi-pci,id=virtioscsi0,bus=pci.3,addr=0x1' \ -drive 'file=/dev/lvmthinbig/vm-121-disk-0,if=none,id=drive-scsi0,format=raw,cache=none,aio=io_uring,detect-zeroes=on' \ -device 'scsi-hd,bus=virtioscsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=100' \ -machine 'type=pc'

can you try to boot with q35 + intel iommu enabled and/or force virtio into legacy mode.

...

SeaBIOS mailing list -- seabios@seabios.org To unsubscribe send an email to seabios-leave@seabios.org

Fiona Ebner

16 Jul 16 Jul

1:56 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

Am 12.07.24 um 15:26 schrieb Igor Mammedov:

...

On Fri, 12 Jul 2024 14:24:40 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...
Hi,

Am 21.06.24 um 14:05 schrieb Gerd Hoffmann:

...
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support. Even the LTS version from that year (16.04) is not supported any more. But it is at least available for download still, so I gave it a spin.

Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.

take care, Gerd

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)
 if (CPUPhysBits) {
     pci_mem64_top = 1LL << CPUPhysBits;
   if (CPUPhysBits > 46) {
       // Old linux kernels have trouble dealing with more than 46
       // phys-bits, so avoid that for now.  Seems to be a bug in the
       // virtio-pci driver.  Reported: centos-7, ubuntu-18.04
       pci_mem64_top = 1LL << 46;
   if (CPUPhysBits > 44) {
       // Old linux kernels have trouble dealing with more than 44/46
       // phys-bits. Seems to be a bug in the virtio-pci driver.
       //   46:  centos-7, ubuntu-18.04
       //   44:  ubuntu-16.04
       // Limit the used address space to mitigate the bug, except we are
       // running in a guest with more than 1TB of memory installed.
       if (RamSizeOver4G < (1LL << 40)) {
           pci_mem64_top = 1LL << 44;
       }
   }
}
we've also had two reports about issues with 32-bit guests now[0][1] (both had '-m 4096' in the QEMU commandline), which I was able to reproduce with a 32-bit Debian 12.6 install, so nothing ancient ;) The QEMU commandline is below[2].
is it also reproducible with upstream kernel? if yes, it would be better to fix that on guest kernel side, rather than in SeaBIOS which has no idea what guest OS is going to be running after it.

Turns out it's only kernels with PAE. The 6.1 Debian kernel without PAE boots fine (but the PAE was the one installed by default). I built a 6.10 kernel and it also boots fine, a 6.10 build with PAE doesn't.

...

...
Unfortunately, it still fails to boot, even with the "limit address space used for pci devices, part two" patch applied on top of rel-1.16.3 (and using current QEMU master).

It boots fine with '-m 3583', but not anymore with '-m 3584'. So is bumping the limit for the check necessary after all?

Best Regards, Fiona

...
well, with current QEMU master branch (I assume it haven't even got topic patch yet) ./qemu-system-x86_64 --enable-kvm -cpu host -smp 4 -snapshot -m 4096 -M pc-i440fx-6.1 winxp_x86_build2600 following CLI boots just fine an 32-bit XP on Haswell host.

Also using 'host' with ancient OS, is basically asking for trouble. If it works for user with old 'XP contemporary' CPU model, user should use that instead or workarounds (aka it's management task to configure CLI in compatible with OS manner).

From suspicions config options in that post I see 'viommu' and 'vmgenid', are you sure XP even knows what to do with that, perhaps it triggers BSOD.

No idea why the user enabled viommu for XP. vmgenid is added by our management stack by default and I don't remember it having ever caused problems. The user said the VM booted fine after adding lm=off to the CPU options. I'm not super interested in the Windows case to be honest O:)

...

...
[2]:

...
./qemu-system-x86_64 \ -accel 'kvm' \ -cpu 'host' \ -chardev 'socket,id=qmp,path=/var/run/qemu-server/121.qmp,server=on,wait=off' \ -mon 'chardev=qmp,mode=control' \ -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' \ -mon 'chardev=qmp-event,mode=control' \ -pidfile /var/run/qemu-server/121.pid \ -smp '4,sockets=1,cores=4,maxcpus=4' \ -nodefaults \ -vnc 'unix:/var/run/qemu-server/121.vnc,password=on' \ -m 4096 \ -device 'pci-bridge,id=pci.3,chassis_nr=3,bus=pci.0,addr=0x5' \ -device 'VGA,id=vga,bus=pci.0,addr=0x2' \ -device 'virtio-scsi-pci,id=virtioscsi0,bus=pci.3,addr=0x1' \ -drive 'file=/dev/lvmthinbig/vm-121-disk-0,if=none,id=drive-scsi0,format=raw,cache=none,aio=io_uring,detect-zeroes=on' \ -device 'scsi-hd,bus=virtioscsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=100' \ -machine 'type=pc'

can you try to boot with q35 + intel iommu enabled and/or force virtio into legacy mode.

I tried

...

-device 'intel-iommu' \ -machine 'type=q35'

and intel_iommu=on in the guest's kernel cmdline, but it still failed with kernels with PAE.

Appending 'disable-modern=on,disable-legacy=off' to the virtio-scsi-pci line made it work however (also with pc machine) :)

Best Regards, Fiona

Igor Mammedov

2:48 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Tue, 16 Jul 2024 13:56:08 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...

Am 12.07.24 um 15:26 schrieb Igor Mammedov:

...
On Fri, 12 Jul 2024 14:24:40 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...
Hi,

Am 21.06.24 um 14:05 schrieb Gerd Hoffmann:

...
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support. Even the LTS version from that year (16.04) is not supported any more. But it is at least available for download still, so I gave it a spin.

Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.

take care, Gerd

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)
 if (CPUPhysBits) {
     pci_mem64_top = 1LL << CPUPhysBits;
   if (CPUPhysBits > 46) {
       // Old linux kernels have trouble dealing with more than 46
       // phys-bits, so avoid that for now.  Seems to be a bug in the
       // virtio-pci driver.  Reported: centos-7, ubuntu-18.04
       pci_mem64_top = 1LL << 46;
   if (CPUPhysBits > 44) {
       // Old linux kernels have trouble dealing with more than 44/46
       // phys-bits. Seems to be a bug in the virtio-pci driver.
       //   46:  centos-7, ubuntu-18.04
       //   44:  ubuntu-16.04
       // Limit the used address space to mitigate the bug, except we are
       // running in a guest with more than 1TB of memory installed.
       if (RamSizeOver4G < (1LL << 40)) {
           pci_mem64_top = 1LL << 44;
       }
   }
}
we've also had two reports about issues with 32-bit guests now[0][1] (both had '-m 4096' in the QEMU commandline), which I was able to reproduce with a 32-bit Debian 12.6 install, so nothing ancient ;) The QEMU commandline is below[2].
is it also reproducible with upstream kernel? if yes, it would be better to fix that on guest kernel side, rather than in SeaBIOS which has no idea what guest OS is going to be running after it.
Turns out it's only kernels with PAE. The 6.1 Debian kernel without PAE boots fine (but the PAE was the one installed by default). I built a 6.10 kernel and it also boots fine, a 6.10 build with PAE doesn't.

...
...
Unfortunately, it still fails to boot, even with the "limit address space used for pci devices, part two" patch applied on top of rel-1.16.3 (and using current QEMU master).

It boots fine with '-m 3583', but not anymore with '-m 3584'. So is bumping the limit for the check necessary after all?

Best Regards, Fiona

...
well, with current QEMU master branch (I assume it haven't even got topic patch yet) ./qemu-system-x86_64 --enable-kvm -cpu host -smp 4 -snapshot -m 4096 -M pc-i440fx-6.1 winxp_x86_build2600 following CLI boots just fine an 32-bit XP on Haswell host.

Also using 'host' with ancient OS, is basically asking for trouble. If it works for user with old 'XP contemporary' CPU model, user should use that instead or workarounds (aka it's management task to configure CLI in compatible with OS manner).

From suspicions config options in that post I see 'viommu' and 'vmgenid', are you sure XP even knows what to do with that, perhaps it triggers BSOD.

No idea why the user enabled viommu for XP. vmgenid is added by our management stack by default and I don't remember it having ever caused problems. The user said the VM booted fine after adding lm=off to the CPU options. I'm not super interested in the Windows case to be honest O:)

...
...
[2]:

...
./qemu-system-x86_64 \ -accel 'kvm' \ -cpu 'host' \ -chardev 'socket,id=qmp,path=/var/run/qemu-server/121.qmp,server=on,wait=off' \ -mon 'chardev=qmp,mode=control' \ -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' \ -mon 'chardev=qmp-event,mode=control' \ -pidfile /var/run/qemu-server/121.pid \ -smp '4,sockets=1,cores=4,maxcpus=4' \ -nodefaults \ -vnc 'unix:/var/run/qemu-server/121.vnc,password=on' \ -m 4096 \ -device 'pci-bridge,id=pci.3,chassis_nr=3,bus=pci.0,addr=0x5' \ -device 'VGA,id=vga,bus=pci.0,addr=0x2' \ -device 'virtio-scsi-pci,id=virtioscsi0,bus=pci.3,addr=0x1' \ -drive 'file=/dev/lvmthinbig/vm-121-disk-0,if=none,id=drive-scsi0,format=raw,cache=none,aio=io_uring,detect-zeroes=on' \ -device 'scsi-hd,bus=virtioscsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=100' \ -machine 'type=pc'

can you try to boot with q35 + intel iommu enabled and/or force virtio into legacy mode.

I tried

...
-device 'intel-iommu' \ -machine 'type=q35'

and intel_iommu=on in the guest's kernel cmdline, but it still failed with kernels with PAE.

Appending 'disable-modern=on,disable-legacy=off' to the virtio-scsi-pci line made it work however (also with pc machine) :)

does it also help in Windows case?

Perhaps it's a better workaround (compared to lm=off or going back to older bios or dumb-ing down bios to suit PAE guests) for use on mgmt side, as it directly targets bug in guest's virtio-driver.

Can we put this config tweak into libosinfo somehow, so that provisioning tools/mgmt could all reuse that to properly configure virtio for PAE kernels?

...

Best Regards, Fiona

Fiona Ebner

3:58 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

Am 16.07.24 um 14:48 schrieb Igor Mammedov:

...

On Tue, 16 Jul 2024 13:56:08 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...
Am 12.07.24 um 15:26 schrieb Igor Mammedov:

...
On Fri, 12 Jul 2024 14:24:40 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...
we've also had two reports about issues with 32-bit guests now[0][1] (both had '-m 4096' in the QEMU commandline), which I was able to reproduce with a 32-bit Debian 12.6 install, so nothing ancient ;) The QEMU commandline is below[2].

is it also reproducible with upstream kernel? if yes, it would be better to fix that on guest kernel side, rather than in SeaBIOS which has no idea what guest OS is going to be running after it.

Turns out it's only kernels with PAE. The 6.1 Debian kernel without PAE boots fine (but the PAE was the one installed by default). I built a 6.10 kernel and it also boots fine, a 6.10 build with PAE doesn't.

Appending 'disable-modern=on,disable-legacy=off' to the virtio-scsi-pci line made it work however (also with pc machine) :)

does it also help in Windows case?

Sorry, I haven't set it up right now. I only reproduced the issue with Debian.

...

Perhaps it's a better workaround (compared to lm=off or going back to older bios or dumb-ing down bios to suit PAE guests) for use on mgmt side, as it directly targets bug in guest's virtio-driver.

Can we put this config tweak into libosinfo somehow, so that provisioning tools/mgmt could all reuse that to properly configure virtio for PAE kernels?

How would you detect whether the guest kernel is PAE or not before starting QEMU?

Best Regards, Fiona

Igor Mammedov

5:15 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Tue, 16 Jul 2024 15:58:35 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...

Am 16.07.24 um 14:48 schrieb Igor Mammedov:

...
On Tue, 16 Jul 2024 13:56:08 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...
Am 12.07.24 um 15:26 schrieb Igor Mammedov:

...
On Fri, 12 Jul 2024 14:24:40 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...
we've also had two reports about issues with 32-bit guests now[0][1] (both had '-m 4096' in the QEMU commandline), which I was able to reproduce with a 32-bit Debian 12.6 install, so nothing ancient ;) The QEMU commandline is below[2].

is it also reproducible with upstream kernel? if yes, it would be better to fix that on guest kernel side, rather than in SeaBIOS which has no idea what guest OS is going to be running after it.

Turns out it's only kernels with PAE. The 6.1 Debian kernel without PAE boots fine (but the PAE was the one installed by default). I built a 6.10 kernel and it also boots fine, a 6.10 build with PAE doesn't.

Appending 'disable-modern=on,disable-legacy=off' to the virtio-scsi-pci line made it work however (also with pc machine) :)

does it also help in Windows case?

Sorry, I haven't set it up right now. I only reproduced the issue with Debian.

...
Perhaps it's a better workaround (compared to lm=off or going back to older bios or dumb-ing down bios to suit PAE guests) for use on mgmt side, as it directly targets bug in guest's virtio-driver.

Can we put this config tweak into libosinfo somehow, so that provisioning tools/mgmt could all reuse that to properly configure virtio for PAE kernels?

How would you detect whether the guest kernel is PAE or not before starting QEMU?

that's what mgmt can do by poking in install media/disk image or asking user only mgmt can potentially know what target OS would be and properly configure QEMU (it's not something qemu or firmware can reasonably deal with on their own). (libosinfo can detect OS on install media, perhaps it also could be taught to probe what kernel would be used)

As Gerd have said, all we can do at firmware level is heuristics, and in this case there is no good solution, we either hurt PAE guests with buggy driver by increasing size or hurt 64-bit guests by reducing it. In both cases we would get bug reports, and I'd expect a shift in numbers from PAE reports towards large device hotplug as time goes on.

...

Best Regards, Fiona

Fiona Ebner

17 Jul 17 Jul

9:39 a.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

Am 16.07.24 um 17:15 schrieb Igor Mammedov:

...

On Tue, 16 Jul 2024 15:58:35 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...
Am 16.07.24 um 14:48 schrieb Igor Mammedov:

...
On Tue, 16 Jul 2024 13:56:08 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...
Am 12.07.24 um 15:26 schrieb Igor Mammedov:

...
On Fri, 12 Jul 2024 14:24:40 +0200 Fiona Ebner f.ebner@proxmox.com wrote:

...
we've also had two reports about issues with 32-bit guests now[0][1] (both had '-m 4096' in the QEMU commandline), which I was able to reproduce with a 32-bit Debian 12.6 install, so nothing ancient ;) The QEMU commandline is below[2].

is it also reproducible with upstream kernel? if yes, it would be better to fix that on guest kernel side, rather than in SeaBIOS which has no idea what guest OS is going to be running after it.

Turns out it's only kernels with PAE. The 6.1 Debian kernel without PAE boots fine (but the PAE was the one installed by default). I built a 6.10 kernel and it also boots fine, a 6.10 build with PAE doesn't.

Appending 'disable-modern=on,disable-legacy=off' to the virtio-scsi-pci line made it work however (also with pc machine) :)

does it also help in Windows case?

Sorry, I haven't set it up right now. I only reproduced the issue with Debian.

...
Perhaps it's a better workaround (compared to lm=off or going back to older bios or dumb-ing down bios to suit PAE guests) for use on mgmt side, as it directly targets bug in guest's virtio-driver.

Can we put this config tweak into libosinfo somehow, so that provisioning tools/mgmt could all reuse that to properly configure virtio for PAE kernels?

How would you detect whether the guest kernel is PAE or not before starting QEMU?

that's what mgmt can do by poking in install media/disk image or asking user only mgmt can potentially know what target OS would be and properly configure QEMU (it's not something qemu or firmware can reasonably deal with on their own). (libosinfo can detect OS on install media, perhaps it also could be taught to probe what kernel would be used)

As Gerd have said, all we can do at firmware level is heuristics, and in this case there is no good solution, we either hurt PAE guests with buggy driver by increasing size or hurt 64-bit guests by reducing it. In both cases we would get bug reports, and I'd expect a shift in numbers from PAE reports towards large device hotplug as time goes on.

I see, thank you very much for the explanations!

Best Regards, Fiona

Gerd Hoffmann

9:41 a.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

Hi,

...

we've also had two reports about issues with 32-bit guests now[0][1] (both had '-m 4096' in the QEMU commandline), which I was able to reproduce with a 32-bit Debian 12.6 install, so nothing ancient ;) The QEMU commandline is below[2].

Unfortunately, it still fails to boot, even with the "limit address space used for pci devices, part two" patch applied on top of rel-1.16.3 (and using current QEMU master).

Reproduces here. Seems there is something seriously wrong in that debian kernel.

Even when going for 36 phys bits (-cpu host,host-phys-bits-limit=36), which is the physical address space of PAE mode when it was introduced by the pentium pro, the guest fails to boot. That *really* should not happen.

I guess what you are seeing here is i386 support in the linux kernel starting to bitrot due to everybody moving to 64-bit ...

take care, Gerd

Claudio Fontana

24 Oct 24 Oct

12:36 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On 7/17/24 09:41, Gerd Hoffmann wrote:

...

Hi,

...
we've also had two reports about issues with 32-bit guests now[0][1] (both had '-m 4096' in the QEMU commandline), which I was able to reproduce with a 32-bit Debian 12.6 install, so nothing ancient ;) The QEMU commandline is below[2].

Unfortunately, it still fails to boot, even with the "limit address space used for pci devices, part two" patch applied on top of rel-1.16.3 (and using current QEMU master).

Reproduces here. Seems there is something seriously wrong in that debian kernel.

We can complain about every kernel out there being "wrong", but ultimately what good is a BIOS if it is not able to boot reliably even older OSes?

What do we lose by just reverting things to the state that actually worked?

...

Even when going for 36 phys bits (-cpu host,host-phys-bits-limit=36), which is the physical address space of PAE mode when it was introduced by the pentium pro, the guest fails to boot. That *really* should not happen.

I guess what you are seeing here is i386 support in the linux kernel starting to bitrot due to everybody moving to 64-bit ...

take care, Gerd

SeaBIOS mailing list -- seabios@seabios.org To unsubscribe send an email to seabios-leave@seabios.org

Gerd Hoffmann

12 Nov 12 Nov

12:33 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Thu, Oct 24, 2024 at 12:36:16PM +0200, Claudio Fontana wrote:

...

What do we lose by just reverting things to the state that actually worked?

Well, the world is moving forward, devices getting more memory and larger pci bars. GPUs can have multi-GB pci bars these days. nvidia has a NPU with a 128 GB pci bar. Note this is larger than the whole (initial) PAE address space. I want seabios supporting that kind of devices well.

The range of configurations becomes more and more wide, with huge devices as mentioned above on one end and old 32-bit guests on the other end. Have seabios automatically setup itself turned out to be a hard problem (as this thread clearly shows).

We already have a runtime config switch to force 32-bit friendly setup (turn off long mode support in the vcpu).

We already have a heuristic to select the 32-bit friendly setup, which right now is simply "no memory above 4G is present".

The options I see to move this forward:

(1) Tweak the heuristic. (1a) Raise the memory limit. (1b) Maybe also look at other things such as the machine type. The 'pc' machine type does not support pci express, so it is highly unlikely that devices with huge pci bars will be used with it and we could use that as another hint to better select the 32-bit friendly setup. (1c) Other ideas?

(2) Add a compile-time option (CONFIG_...) to force 32-bit friendly setup unconditionally.

Comments?

take care, Gerd

PS: sorry for the long delay, was offline in october and had quite a email backlog afterwards ...

Igor Mammedov

1:26 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Tue, 12 Nov 2024 12:33:58 +0100 Gerd Hoffmann kraxel@redhat.com wrote:

...

On Thu, Oct 24, 2024 at 12:36:16PM +0200, Claudio Fontana wrote:

...
What do we lose by just reverting things to the state that actually worked?

Well, the world is moving forward, devices getting more memory and larger pci bars. GPUs can have multi-GB pci bars these days. nvidia has a NPU with a 128 GB pci bar. Note this is larger than the whole (initial) PAE address space. I want seabios supporting that kind of devices well.

The range of configurations becomes more and more wide, with huge devices as mentioned above on one end and old 32-bit guests on the other end. Have seabios automatically setup itself turned out to be a hard problem (as this thread clearly shows).

We already have a runtime config switch to force 32-bit friendly setup (turn off long mode support in the vcpu).

We already have a heuristic to select the 32-bit friendly setup, which right now is simply "no memory above 4G is present".

The options I see to move this forward:

(1) Tweak the heuristic. (1a) Raise the memory limit.

that would be improvement for legacy but worsen modern guest situation. Perhaps we at the point where heuristic can't help anymore.

...

(1b) Maybe also look at other things such as the machine type. The 'pc' machine type does not support pci express, so it is highly unlikely that devices with huge pci bars will be used with it and we could use that as another hint to better select the 32-bit friendly setup.

If I'm not wrong it's possible to assign a device with large PCI bar to 'pc' machine. so just machine type won't be sufficient.

...

(1c) Other ideas?

Perhaps a fwcfg file '32_bit_guest' passed to Seabios can serve as switch. But given Seabios and QEMU have no clue about OS nor should they, onus would be on management to configure qemu with compat option. Which leads to #2

...

(2) Add a compile-time option (CONFIG_...) to force 32-bit friendly setup unconditionally.

Given that mgmt would have configure QEMU one way or another to handle legacy guests. This option might be preferable to runtime switch. That also could help us drop some heuristics and pick more suitable defaults for 32 and 64 bit variant instead of a poorly working compromise.

...

Comments?

take care, Gerd

PS: sorry for the long delay, was offline in october and had quite a email backlog afterwards ...

SeaBIOS mailing list -- seabios@seabios.org To unsubscribe send an email to seabios-leave@seabios.org

Max Tottenham

3:41 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On 11/12, Igor Mammedov wrote:

...

!-------------------------------------------------------------------| This Message Is From an External Sender This message came from outside your organization. |-------------------------------------------------------------------!

On Tue, 12 Nov 2024 12:33:58 +0100 Gerd Hoffmann kraxel@redhat.com wrote:

...
On Thu, Oct 24, 2024 at 12:36:16PM +0200, Claudio Fontana wrote:

...
What do we lose by just reverting things to the state that actually worked?

Well, the world is moving forward, devices getting more memory and larger pci bars. GPUs can have multi-GB pci bars these days. nvidia has a NPU with a 128 GB pci bar. Note this is larger than the whole (initial) PAE address space. I want seabios supporting that kind of devices well.

The range of configurations becomes more and more wide, with huge devices as mentioned above on one end and old 32-bit guests on the other end. Have seabios automatically setup itself turned out to be a hard problem (as this thread clearly shows).

We already have a runtime config switch to force 32-bit friendly setup (turn off long mode support in the vcpu).

We already have a heuristic to select the 32-bit friendly setup, which right now is simply "no memory above 4G is present".

The options I see to move this forward:

(1) Tweak the heuristic. (1a) Raise the memory limit.

It seems to me that the thinking here is backwards, the change to the heuristic behavior was a regression from the standpoint of QEMU, an upgraded bios bricked existing guests. Should it not be the case that '32bit-friendly' is enabled by default, and a runtime config switch is provided to expose the expanded PCI BAR space?

If you want the new thing - supply the new flag?

-- Max Tottenham | Senior Software Engineer /(* Akamai Technologies

Gerd Hoffmann

13 Nov 13 Nov

12:29 p.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

...

...
We already have a heuristic to select the 32-bit friendly setup, which right now is simply "no memory above 4G is present".

The options I see to move this forward:

(1) Tweak the heuristic. (1a) Raise the memory limit.

that would be improvement for legacy but worsen modern guest situation.

Well, guests having devices with huge memory bars are unlikely to have only small amounts of main memory, so I think this shouldn't be too bad. Depends a bit on the new limit chosen (when going down that route) obviously.

...

Perhaps we at the point where heuristic can't help anymore.

Could be.

...

...
(1b) Maybe also look at other things such as the machine type. The 'pc' machine type does not support pci express, so it is highly unlikely that devices with huge pci bars will be used with it and we could use that as another hint to better select the 32-bit friendly setup.

If I'm not wrong it's possible to assign a device with large PCI bar to 'pc' machine. so just machine type won't be sufficient.

It's possible, sure. But having devices with multi-GB bars is a relatively recent thing (when compared to the age of the pci express 1.0 spec). I don't think many (if any) legacy pci devices with multi-GB bars exist.

Placing pci express devices into a legacy pci slot of the pc machine type is possible, but it comes with a number of quirks like not being able to access all pci config space. It's one of the reasons why we added the pcie-capable q35 chipset emulation to qemu.

Which why I think going by machine type would be a sensible idea.

...

...
(1c) Other ideas?

Perhaps a fwcfg file '32_bit_guest' passed to Seabios can serve as switch. But given Seabios and QEMU have no clue about OS nor should they, onus would be on management to configure qemu with compat option.

Well, we have that already. The config option is "turn off long mode support". Drawback is that this way seabios can't choose the default guest setup, qemu default for long mode is enabled and it is not going to change.

The other way around would make sense, i.e. have seabios default to 32-bit guest setup and switch into 64-bit guest mode with a fwcfg file (see also the reply by Max).

...

...
(2) Add a compile-time option (CONFIG_...) to force 32-bit friendly setup unconditionally.

Given that mgmt would have configure QEMU one way or another to handle legacy guests. This option might be preferable to runtime switch.

Well, a runtime switch surely is better than a compile time switch. That's why seabios checks long mode support of the cpu already.

But I don't see an agreement emerging on what the default should be ...

The vast majority of guests should be 64-bit these days. I think when it comes to guests officially supported on RHEL hosts there are no 32-bit OSes left these days. So defaulting to 64-bit makes sense here.

For some of the providers discussing here there are apparently enough 32-bit guests still being active that requiring manual config invention to keep them working is too much of a customer support burden, even if it is only a small minority. So they would probably prefer a 32-bit default.

Maybe we should have a 'guest-hint' fwcfg file, carrying either '32' or '64' as value to choose the default at runtime, and a CONFIG_DEFAULT_GUEST_HINT compile time option which decides what to do in case the fwcfg file is not present.

take care, Gerd

Kevin O'Connor

15 Nov 15 Nov

1:42 a.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

On Tue, Nov 12, 2024 at 12:33:58PM +0100, Gerd Hoffmann wrote:

...

We already have a runtime config switch to force 32-bit friendly setup (turn off long mode support in the vcpu).

We already have a heuristic to select the 32-bit friendly setup, which right now is simply "no memory above 4G is present".

The options I see to move this forward:

I think there is another option:

- Revert the commits that added the pci sparse mapping heuristic (starting with commit 96a8d130) and replace with an explicit indicator from qemu when seabios should attempt to allocate sparse pci mappings. Alas, this isn't a great option as commit 96a8d130 has been around for so long that changing it now seems just as likely to create new regressions.

...

(1) Tweak the heuristic. (1a) Raise the memory limit.

I think this seems okay to me (or at least, least bad). If there are a large number of guests that require sparse pci mappings with low guest memory, then perhaps we could add a new "etc/pci_sparse_mapping" config option to allow users to force a different setting.

...

(1b) Maybe also look at other things such as the machine type. The 'pc' machine type does not support pci express, so it is highly unlikely that devices with huge pci bars will be used with it and we could use that as another hint to better select the 32-bit friendly setup.

FWIW, this doesn't sound like a great option. I fear the more heuristics we add the more likely we'll get subtle breakage.

...

(1c) Other ideas?

(2) Add a compile-time option (CONFIG_...) to force 32-bit friendly setup unconditionally.

FWIW, this doesn't sound like a great option. If admins don't want to force a guest to work by changing qemu options (eg, turn off long mode) then they also wont want to manually select a different SeaBIOS binary.

Cheers, -Kevin

Fiona Ebner

24 Oct 24 Oct

11:43 a.m.

New subject: [PATCH v2 4/4] only enable 64bit pci io window when RAM >64G

Am 12.07.24 um 14:24 schrieb Fiona Ebner:

...

Hi,

Am 21.06.24 um 14:05 schrieb Gerd Hoffmann:

...
On Wed, Jun 19, 2024 at 11:21:14AM GMT, John Levon wrote:

...
Older 32-bit Linux VMs (including Ubuntu 16.10) have issues with the 64-bit pci io window, failing during boot with errors like:

Well. Why people would use *that* ubuntu version is not clear to me. It's *loooooong* out of support. Even the LTS version from that year (16.04) is not supported any more. But it is at least available for download still, so I gave it a spin.

Turns out it apparently can't deal with PCI bars mapped above 16TB (aka 44 phys-bits). Test patch below.

take care, Gerd

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index bb44dc296047..a43876a931c9 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -1189,11 +1189,16 @@ pci_setup(void)
 if (CPUPhysBits) {
     pci_mem64_top = 1LL << CPUPhysBits;
   if (CPUPhysBits > 46) {
       // Old linux kernels have trouble dealing with more than 46
       // phys-bits, so avoid that for now.  Seems to be a bug in the
       // virtio-pci driver.  Reported: centos-7, ubuntu-18.04
       pci_mem64_top = 1LL << 46;
   if (CPUPhysBits > 44) {
       // Old linux kernels have trouble dealing with more than 44/46
       // phys-bits. Seems to be a bug in the virtio-pci driver.
       //   46:  centos-7, ubuntu-18.04
       //   44:  ubuntu-16.04
       // Limit the used address space to mitigate the bug, except we are
       // running in a guest with more than 1TB of memory installed.
       if (RamSizeOver4G < (1LL << 40)) {
           pci_mem64_top = 1LL << 44;
       }
   }
}
we've also had two reports about issues with 32-bit guests now[0][1] (both had '-m 4096' in the QEMU commandline), which I was able to reproduce with a 32-bit Debian 12.6 install, so nothing ancient ;) The QEMU commandline is below[2].

Unfortunately, it still fails to boot, even with the "limit address space used for pci devices, part two" patch applied on top of rel-1.16.3 (and using current QEMU master).

It boots fine with '-m 3583', but not anymore with '-m 3584'. So is bumping the limit for the check necessary after all?

Sorry for bumping this again, but we got a new report about OpenBSD 7.6 being affected too (see [3] and the following posts in the forum thread). In particular, the user reports a regression with PCIe passthrough failing and seeing "mem address conflict" messages in dmesg. I could reproduce the latter using [4], i.e. seeing the messages and checked that it is caused by 96a8d130 ("be less conservative with the 64bit pci io window"). The user reported that using 2 GiB of RAM makes the passthrough work again and the messages disappear.

The messages are still present when I test with current SeaBIOS master 2424e4c0 ("esp-scsi: indicate acceptance of MESSAGE IN phase data") and QEMU v9.1.1.

Note that I also get the messages without the vfio-pci device, but since I wasn't able to reproduce the passthrough failure, it is what I have to work with.

Best Regards, Fiona

[3]: https://forum.proxmox.com/threads/149772/post-713842 [4]: https://ftp2.eu.openbsd.org/pub/OpenBSD/7.6/amd64/install76.iso

QEMU commandline with which I observed the messages:

...

./qemu-system-x86_64 \ -accel kvm \ -pidfile /var/run/qemu-server/122.pid \ -chardev 'socket,id=qmp,path=/var/run/qemu-server/122.qmp,server=on,wait=off' \ -mon 'chardev=qmp,mode=control' \ -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' \ -mon 'chardev=qmp-event,mode=control' \ -smp '8,sockets=1,cores=8,maxcpus=8' \ -nodefaults \ -vnc 'unix:/var/run/qemu-server/122.vnc,password=on' \ -cpu host \ -m 4096 \ -device 'pci-bridge,id=pci.3,chassis_nr=3,bus=pci.0,addr=0x5' \ -device 'vfio-pci,host=0000:00:1b.0,id=hostpci0,bus=pci.0,addr=0x10' \ -device 'VGA,id=vga,bus=pci.0,addr=0x2' \ -device 'virtio-scsi-pci,id=virtioscsi0,bus=pci.3,addr=0x1' \ -drive 'file=/dev/zvol/zfs/vm-122-disk-0,if=none,id=drive-scsi0,format=raw' \ -device 'scsi-hd,bus=virtioscsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=100' \ -machine 'type=pc'

...

[2]:

...
./qemu-system-x86_64 \ -accel 'kvm' \ -cpu 'host' \ -chardev 'socket,id=qmp,path=/var/run/qemu-server/121.qmp,server=on,wait=off' \ -mon 'chardev=qmp,mode=control' \ -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' \ -mon 'chardev=qmp-event,mode=control' \ -pidfile /var/run/qemu-server/121.pid \ -smp '4,sockets=1,cores=4,maxcpus=4' \ -nodefaults \ -vnc 'unix:/var/run/qemu-server/121.vnc,password=on' \ -m 4096 \ -device 'pci-bridge,id=pci.3,chassis_nr=3,bus=pci.0,addr=0x5' \ -device 'VGA,id=vga,bus=pci.0,addr=0x2' \ -device 'virtio-scsi-pci,id=virtioscsi0,bus=pci.3,addr=0x1' \ -drive 'file=/dev/lvmthinbig/vm-121-disk-0,if=none,id=drive-scsi0,format=raw,cache=none,aio=io_uring,detect-zeroes=on' \ -device 'scsi-hd,bus=virtioscsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=100' \ -machine 'type=pc'

SeaBIOS mailing list -- seabios@seabios.org To unsubscribe send an email to seabios-leave@seabios.org

175

days inactive

324

days old

seabios@seabios.org

32 comments

8 participants

tags (0)

participants (8)

Claudio Fontana
Fiona Ebner
Gerd Hoffmann
Igor Mammedov
John Levon
Kevin O'Connor
Max Tottenham
Rudolf Marek