This reset issue was reported on the QEMU issue tracker at https://gitlab.com/qemu-project/qemu/-/issues/766
A reset with QEMU -machine q35 -accel tcg leads to a reset loop and with -machine q35 -accel kvm the reset only works because KVM ignores the read-only semantics of the C-F segments.
Details about the issue are in "reset: force standard PCI configuration access".
Volker Rümelin (2): pci: refactor the pci_config_*() functions reset: force standard PCI configuration access
src/fw/shadow.c | 16 +++++---- src/hw/pci.c | 86 ++++++++++++++++++++++++++++++++++++++++++------- src/hw/pci.h | 19 ++++++++++- 3 files changed, 101 insertions(+), 20 deletions(-)
Split out the Standard PCI Configuration Access Mechanism pci_ioconfig_*() functions from the pci_config_*() functions. The standard PCI CAM functions will be used in the next patch.
Signed-off-by: Volker Rümelin vr_qemu@t-online.de --- src/hw/pci.c | 54 ++++++++++++++++++++++++++++++++++++++++------------ src/hw/pci.h | 12 +++++++++++- 2 files changed, 53 insertions(+), 13 deletions(-)
diff --git a/src/hw/pci.c b/src/hw/pci.c index 3df1dae..f13cbde 100644 --- a/src/hw/pci.c +++ b/src/hw/pci.c @@ -26,63 +26,93 @@ static u32 ioconfig_cmd(u16 bdf, u32 addr) return 0x80000000 | (bdf << 8) | (addr & 0xfc); }
+void pci_ioconfig_writel(u16 bdf, u32 addr, u32 val) +{ + outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); + outl(val, PORT_PCI_DATA); +} + void pci_config_writel(u16 bdf, u32 addr, u32 val) { if (!MODESEGMENT && mmconfig) { writel(mmconfig_addr(bdf, addr), val); } else { - outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); - outl(val, PORT_PCI_DATA); + pci_ioconfig_writel(bdf, addr, val); } }
+void pci_ioconfig_writew(u16 bdf, u32 addr, u16 val) +{ + outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); + outw(val, PORT_PCI_DATA + (addr & 2)); +} + void pci_config_writew(u16 bdf, u32 addr, u16 val) { if (!MODESEGMENT && mmconfig) { writew(mmconfig_addr(bdf, addr), val); } else { - outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); - outw(val, PORT_PCI_DATA + (addr & 2)); + pci_ioconfig_writew(bdf, addr, val); } }
+void pci_ioconfig_writeb(u16 bdf, u32 addr, u8 val) +{ + outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); + outb(val, PORT_PCI_DATA + (addr & 3)); +} + void pci_config_writeb(u16 bdf, u32 addr, u8 val) { if (!MODESEGMENT && mmconfig) { writeb(mmconfig_addr(bdf, addr), val); } else { - outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); - outb(val, PORT_PCI_DATA + (addr & 3)); + pci_ioconfig_writeb(bdf, addr, val); } }
+u32 pci_ioconfig_readl(u16 bdf, u32 addr) +{ + outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); + return inl(PORT_PCI_DATA); +} + u32 pci_config_readl(u16 bdf, u32 addr) { if (!MODESEGMENT && mmconfig) { return readl(mmconfig_addr(bdf, addr)); } else { - outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); - return inl(PORT_PCI_DATA); + return pci_ioconfig_readl(bdf, addr); } }
+u16 pci_ioconfig_readw(u16 bdf, u32 addr) +{ + outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); + return inw(PORT_PCI_DATA + (addr & 2)); +} + u16 pci_config_readw(u16 bdf, u32 addr) { if (!MODESEGMENT && mmconfig) { return readw(mmconfig_addr(bdf, addr)); } else { - outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); - return inw(PORT_PCI_DATA + (addr & 2)); + return pci_ioconfig_readw(bdf, addr); } }
+u8 pci_ioconfig_readb(u16 bdf, u32 addr) +{ + outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); + return inb(PORT_PCI_DATA + (addr & 3)); +} + u8 pci_config_readb(u16 bdf, u32 addr) { if (!MODESEGMENT && mmconfig) { return readb(mmconfig_addr(bdf, addr)); } else { - outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); - return inb(PORT_PCI_DATA + (addr & 3)); + return pci_ioconfig_readb(bdf, addr); } }
diff --git a/src/hw/pci.h b/src/hw/pci.h index 01c51f7..ee6acaf 100644 --- a/src/hw/pci.h +++ b/src/hw/pci.h @@ -32,6 +32,15 @@ static inline u16 pci_bus_devfn_to_bdf(int bus, u16 devfn) { ; BDF >= 0 \ ; BDF=pci_next(BDF, (BUS)))
+// standard PCI configration access mechanism +void pci_ioconfig_writel(u16 bdf, u32 addr, u32 val); +void pci_ioconfig_writew(u16 bdf, u32 addr, u16 val); +void pci_ioconfig_writeb(u16 bdf, u32 addr, u8 val); +u32 pci_ioconfig_readl(u16 bdf, u32 addr); +u16 pci_ioconfig_readw(u16 bdf, u32 addr); +u8 pci_ioconfig_readb(u16 bdf, u32 addr); + +// PCI configuration access using either PCI CAM or PCIe ECAM void pci_config_writel(u16 bdf, u32 addr, u32 val); void pci_config_writew(u16 bdf, u32 addr, u16 val); void pci_config_writeb(u16 bdf, u32 addr, u8 val); @@ -39,9 +48,10 @@ u32 pci_config_readl(u16 bdf, u32 addr); u16 pci_config_readw(u16 bdf, u32 addr); u8 pci_config_readb(u16 bdf, u32 addr); void pci_config_maskw(u16 bdf, u32 addr, u16 off, u16 on); -void pci_enable_mmconfig(u64 addr, const char *name); u8 pci_find_capability(u16 bdf, u8 cap_id, u8 cap); int pci_next(int bdf, int bus); + +void pci_enable_mmconfig(u64 addr, const char *name); int pci_probe_host(void); void pci_reboot(void);
After a reset of a QEMU -machine q35 guest, the PCI Express Enhanced Configuration Mechanism is disabled and the variable mmconfig no longer matches the configuration register MCHBAR of the Q35 chipset. Until the variable mmconfig is reset to 0, all pci_config_*() functions no longer work.
The variable mmconfig is located in the read-only F-segment. To reset it the pci_config_*() functions are needed, but they do not work.
Replace all pci_config_*() calls with Standard PCI Configuration Mechanism pci_ioconfig_*() calls until mmconfig is reset to 0.
This fixes
In resume (status=0) In 32bit resume Attempting a hard reboot Unable to unlock ram - bridge not found
and a reset loop with QEMU -accel tcg.
Signed-off-by: Volker Rümelin vr_qemu@t-online.de --- src/fw/shadow.c | 16 +++++++++------- src/hw/pci.c | 32 ++++++++++++++++++++++++++++++++ src/hw/pci.h | 7 +++++++ 3 files changed, 48 insertions(+), 7 deletions(-)
diff --git a/src/fw/shadow.c b/src/fw/shadow.c index 4c627a8..0722df2 100644 --- a/src/fw/shadow.c +++ b/src/fw/shadow.c @@ -32,8 +32,8 @@ __make_bios_writable_intel(u16 bdf, u32 pam0) { // Read in current PAM settings from pci config space union pamdata_u pamdata; - pamdata.data32[0] = pci_config_readl(bdf, ALIGN_DOWN(pam0, 4)); - pamdata.data32[1] = pci_config_readl(bdf, ALIGN_DOWN(pam0, 4) + 4); + pamdata.data32[0] = pci_ioconfig_readl(bdf, ALIGN_DOWN(pam0, 4)); + pamdata.data32[1] = pci_ioconfig_readl(bdf, ALIGN_DOWN(pam0, 4) + 4); u8 *pam = &pamdata.data8[pam0 & 0x03];
// Make ram from 0xc0000-0xf0000 writable @@ -46,8 +46,8 @@ __make_bios_writable_intel(u16 bdf, u32 pam0) pam[0] = 0x30;
// Write PAM settings back to pci config space - pci_config_writel(bdf, ALIGN_DOWN(pam0, 4), pamdata.data32[0]); - pci_config_writel(bdf, ALIGN_DOWN(pam0, 4) + 4, pamdata.data32[1]); + pci_ioconfig_writel(bdf, ALIGN_DOWN(pam0, 4), pamdata.data32[0]); + pci_ioconfig_writel(bdf, ALIGN_DOWN(pam0, 4) + 4, pamdata.data32[1]);
if (!ram_present) // Copy bios. @@ -59,7 +59,7 @@ __make_bios_writable_intel(u16 bdf, u32 pam0) static void make_bios_writable_intel(u16 bdf, u32 pam0) { - int reg = pci_config_readb(bdf, pam0); + int reg = pci_ioconfig_readb(bdf, pam0); if (!(reg & 0x10)) { // QEMU doesn't fully implement the piix shadow capabilities - // if ram isn't backing the bios segment when shadowing is @@ -125,8 +125,8 @@ make_bios_writable(void) // At this point, statically allocated variables can't be written, // so do this search manually. int bdf; - foreachbdf(bdf, 0) { - u32 vendev = pci_config_readl(bdf, PCI_VENDOR_ID); + pci_ioconfig_foreachbdf(bdf, 0) { + u32 vendev = pci_ioconfig_readl(bdf, PCI_VENDOR_ID); u16 vendor = vendev & 0xffff, device = vendev >> 16; if (vendor == PCI_VENDOR_ID_INTEL && device == PCI_DEVICE_ID_INTEL_82441) { @@ -183,10 +183,12 @@ qemu_reboot(void) apm_shutdown(); } make_bios_writable(); + pci_disable_mmconfig(); HaveRunPost = 3; } else { // Copy the BIOS making sure to only reset HaveRunPost at end make_bios_writable(); + pci_disable_mmconfig(); u32 cstart = SYMBOL(code32flat_start), cend = SYMBOL(code32flat_end); memcpy((void*)cstart, flash + cstart, hrp - cstart); memcpy((void*)hrp + 4, flash + hrp + 4, cend - (hrp + 4)); diff --git a/src/hw/pci.c b/src/hw/pci.c index f13cbde..ccf208a 100644 --- a/src/hw/pci.c +++ b/src/hw/pci.c @@ -133,6 +133,11 @@ pci_enable_mmconfig(u64 addr, const char *name) mmconfig = addr; }
+void pci_disable_mmconfig(void) +{ + mmconfig = 0; +} + u8 pci_find_capability(u16 bdf, u8 cap_id, u8 cap) { int i; @@ -157,6 +162,33 @@ u8 pci_find_capability(u16 bdf, u8 cap_id, u8 cap) return 0; }
+// Helper function for pci_ioconfig_foreachbdf() macro - return next device +int pci_ioconfig_next(int bdf, int bus) +{ + if (pci_bdf_to_fn(bdf) == 0 + && (pci_ioconfig_readb(bdf, PCI_HEADER_TYPE) & 0x80) == 0) + // Last found device wasn't a multi-function device - skip to + // the next device. + bdf += 8; + else + bdf += 1; + + for (;;) { + if (pci_bdf_to_bus(bdf) != bus) + return -1; + + u16 v = pci_ioconfig_readw(bdf, PCI_VENDOR_ID); + if (v != 0x0000 && v != 0xffff) + // Device is present. + return bdf; + + if (pci_bdf_to_fn(bdf) == 0) + bdf += 8; + else + bdf += 1; + } +} + // Helper function for foreachbdf() macro - return next device int pci_next(int bdf, int bus) diff --git a/src/hw/pci.h b/src/hw/pci.h index ee6acaf..ec45fca 100644 --- a/src/hw/pci.h +++ b/src/hw/pci.h @@ -27,6 +27,11 @@ static inline u16 pci_bus_devfn_to_bdf(int bus, u16 devfn) { return (bus << 8) | devfn; }
+#define pci_ioconfig_foreachbdf(BDF, BUS) \ + for (BDF=pci_ioconfig_next(pci_bus_devfn_to_bdf((BUS), 0)-1, (BUS)) \ + ; BDF >= 0 \ + ; BDF=pci_ioconfig_next(BDF, (BUS))) + #define foreachbdf(BDF, BUS) \ for (BDF=pci_next(pci_bus_devfn_to_bdf((BUS), 0)-1, (BUS)) \ ; BDF >= 0 \ @@ -39,6 +44,7 @@ void pci_ioconfig_writeb(u16 bdf, u32 addr, u8 val); u32 pci_ioconfig_readl(u16 bdf, u32 addr); u16 pci_ioconfig_readw(u16 bdf, u32 addr); u8 pci_ioconfig_readb(u16 bdf, u32 addr); +int pci_ioconfig_next(int bdf, int bus);
// PCI configuration access using either PCI CAM or PCIe ECAM void pci_config_writel(u16 bdf, u32 addr, u32 val); @@ -52,6 +58,7 @@ u8 pci_find_capability(u16 bdf, u8 cap_id, u8 cap); int pci_next(int bdf, int bus);
void pci_enable_mmconfig(u64 addr, const char *name); +void pci_disable_mmconfig(void); int pci_probe_host(void); void pci_reboot(void);
After a reset of a QEMU -machine q35 guest, the PCI Express Enhanced Configuration Mechanism is disabled and the variable mmconfig no longer matches the configuration register MCHBAR
Sorry, I used the wrong register name. It's of course PCIEXBAR.
of the Q35 chipset. Until the variable mmconfig is reset to 0, all pci_config_*() functions no longer work.
The variable mmconfig is located in the read-only F-segment. To reset it the pci_config_*() functions are needed, but they do not work.
Replace all pci_config_*() calls with Standard PCI Configuration Mechanism pci_ioconfig_*() calls until mmconfig is reset to 0.
This fixes
In resume (status=0) In 32bit resume Attempting a hard reboot Unable to unlock ram - bridge not found
and a reset loop with QEMU -accel tcg.
Signed-off-by: Volker Rümelin vr_qemu@t-online.de
src/fw/shadow.c | 16 +++++++++------- src/hw/pci.c | 32 ++++++++++++++++++++++++++++++++ src/hw/pci.h | 7 +++++++ 3 files changed, 48 insertions(+), 7 deletions(-)
diff --git a/src/fw/shadow.c b/src/fw/shadow.c index 4c627a8..0722df2 100644 --- a/src/fw/shadow.c +++ b/src/fw/shadow.c @@ -32,8 +32,8 @@ __make_bios_writable_intel(u16 bdf, u32 pam0) { // Read in current PAM settings from pci config space union pamdata_u pamdata;
- pamdata.data32[0] = pci_config_readl(bdf, ALIGN_DOWN(pam0, 4));
- pamdata.data32[1] = pci_config_readl(bdf, ALIGN_DOWN(pam0, 4) + 4);
pamdata.data32[0] = pci_ioconfig_readl(bdf, ALIGN_DOWN(pam0, 4));
pamdata.data32[1] = pci_ioconfig_readl(bdf, ALIGN_DOWN(pam0, 4) + 4); u8 *pam = &pamdata.data8[pam0 & 0x03];
// Make ram from 0xc0000-0xf0000 writable
@@ -46,8 +46,8 @@ __make_bios_writable_intel(u16 bdf, u32 pam0) pam[0] = 0x30;
// Write PAM settings back to pci config space
- pci_config_writel(bdf, ALIGN_DOWN(pam0, 4), pamdata.data32[0]);
- pci_config_writel(bdf, ALIGN_DOWN(pam0, 4) + 4, pamdata.data32[1]);
pci_ioconfig_writel(bdf, ALIGN_DOWN(pam0, 4), pamdata.data32[0]);
pci_ioconfig_writel(bdf, ALIGN_DOWN(pam0, 4) + 4, pamdata.data32[1]);
if (!ram_present) // Copy bios.
@@ -59,7 +59,7 @@ __make_bios_writable_intel(u16 bdf, u32 pam0) static void make_bios_writable_intel(u16 bdf, u32 pam0) {
- int reg = pci_config_readb(bdf, pam0);
- int reg = pci_ioconfig_readb(bdf, pam0); if (!(reg & 0x10)) { // QEMU doesn't fully implement the piix shadow capabilities - // if ram isn't backing the bios segment when shadowing is
@@ -125,8 +125,8 @@ make_bios_writable(void) // At this point, statically allocated variables can't be written, // so do this search manually. int bdf;
- foreachbdf(bdf, 0) {
u32 vendev = pci_config_readl(bdf, PCI_VENDOR_ID);
- pci_ioconfig_foreachbdf(bdf, 0) {
u32 vendev = pci_ioconfig_readl(bdf, PCI_VENDOR_ID); u16 vendor = vendev & 0xffff, device = vendev >> 16; if (vendor == PCI_VENDOR_ID_INTEL && device == PCI_DEVICE_ID_INTEL_82441) {
@@ -183,10 +183,12 @@ qemu_reboot(void) apm_shutdown(); } make_bios_writable();
pci_disable_mmconfig(); HaveRunPost = 3; } else { // Copy the BIOS making sure to only reset HaveRunPost at end make_bios_writable();
pci_disable_mmconfig(); u32 cstart = SYMBOL(code32flat_start), cend = SYMBOL(code32flat_end); memcpy((void*)cstart, flash + cstart, hrp - cstart); memcpy((void*)hrp + 4, flash + hrp + 4, cend - (hrp + 4));
diff --git a/src/hw/pci.c b/src/hw/pci.c index f13cbde..ccf208a 100644 --- a/src/hw/pci.c +++ b/src/hw/pci.c @@ -133,6 +133,11 @@ pci_enable_mmconfig(u64 addr, const char *name) mmconfig = addr; }
+void pci_disable_mmconfig(void) +{
- mmconfig = 0;
+}
- u8 pci_find_capability(u16 bdf, u8 cap_id, u8 cap) { int i;
@@ -157,6 +162,33 @@ u8 pci_find_capability(u16 bdf, u8 cap_id, u8 cap) return 0; }
+// Helper function for pci_ioconfig_foreachbdf() macro - return next device +int pci_ioconfig_next(int bdf, int bus) +{
- if (pci_bdf_to_fn(bdf) == 0
&& (pci_ioconfig_readb(bdf, PCI_HEADER_TYPE) & 0x80) == 0)
// Last found device wasn't a multi-function device - skip to
// the next device.
bdf += 8;
- else
bdf += 1;
- for (;;) {
if (pci_bdf_to_bus(bdf) != bus)
return -1;
u16 v = pci_ioconfig_readw(bdf, PCI_VENDOR_ID);
if (v != 0x0000 && v != 0xffff)
// Device is present.
return bdf;
if (pci_bdf_to_fn(bdf) == 0)
bdf += 8;
else
bdf += 1;
- }
+}
- // Helper function for foreachbdf() macro - return next device int pci_next(int bdf, int bus)
diff --git a/src/hw/pci.h b/src/hw/pci.h index ee6acaf..ec45fca 100644 --- a/src/hw/pci.h +++ b/src/hw/pci.h @@ -27,6 +27,11 @@ static inline u16 pci_bus_devfn_to_bdf(int bus, u16 devfn) { return (bus << 8) | devfn; }
+#define pci_ioconfig_foreachbdf(BDF, BUS) \
- for (BDF=pci_ioconfig_next(pci_bus_devfn_to_bdf((BUS), 0)-1, (BUS)) \
; BDF >= 0 \
; BDF=pci_ioconfig_next(BDF, (BUS)))
- #define foreachbdf(BDF, BUS) \ for (BDF=pci_next(pci_bus_devfn_to_bdf((BUS), 0)-1, (BUS)) \ ; BDF >= 0 \
@@ -39,6 +44,7 @@ void pci_ioconfig_writeb(u16 bdf, u32 addr, u8 val); u32 pci_ioconfig_readl(u16 bdf, u32 addr); u16 pci_ioconfig_readw(u16 bdf, u32 addr); u8 pci_ioconfig_readb(u16 bdf, u32 addr); +int pci_ioconfig_next(int bdf, int bus);
// PCI configuration access using either PCI CAM or PCIe ECAM void pci_config_writel(u16 bdf, u32 addr, u32 val); @@ -52,6 +58,7 @@ u8 pci_find_capability(u16 bdf, u8 cap_id, u8 cap); int pci_next(int bdf, int bus);
void pci_enable_mmconfig(u64 addr, const char *name); +void pci_disable_mmconfig(void); int pci_probe_host(void); void pci_reboot(void);
On Sun, Mar 20, 2022 at 10:45:00AM +0100, Volker Rümelin wrote:
This reset issue was reported on the QEMU issue tracker at https://gitlab.com/qemu-project/qemu/-/issues/766
A reset with QEMU -machine q35 -accel tcg leads to a reset loop and with -machine q35 -accel kvm the reset only works because KVM ignores the read-only semantics of the C-F segments.
Details about the issue are in "reset: force standard PCI configuration access".
Volker Rümelin (2): pci: refactor the pci_config_*() functions reset: force standard PCI configuration access
Series looks good to me.
Reviewed-by: Gerd Hoffmann kraxel@redhat.com
take care, Gerd