This is a follow up to an email chain that in November:
http://www.seabios.org/pipermail/seabios/2015-November/009887.html
It was possible for the SeaBIOS code to get confused if an external reboot request occurs while seabios is already in the process of handling a boot or reboot.
This two patch series attempts to make the SeaBIOS reboot code more robust. With these patches the code maintains an invariant - if HaveRunPost is false then the BIOS code will be in a pristine state and it is okay to run the "post" boot logic; if HaveRunPost is true then the reboot logic must be run before starting the "post" boot logic. The reboot handler should always return the BIOS code to a pristine state prior to clearing HaveRunPost.
-Kevin
Kevin O'Connor (2): resume: Make KVM soft reboot loop detection more flexible post: Always set HaveRunPost prior to setting any other global variable
src/fw/csm.c | 1 + src/fw/shadow.c | 17 +++++++++++++++-- src/fw/xen.c | 1 + src/post.c | 20 ++++++++++++++++---- src/resume.c | 9 --------- src/util.h | 1 + 6 files changed, 34 insertions(+), 15 deletions(-)
Move the check for soft reboot loops from resume.c to shadow.c and directly check for the case where the copy of the BIOS in flash appears to be a memory alias instead. This prevents a hang if an external reboot request occurs during the BIOS memcpy.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/fw/shadow.c | 15 +++++++++++++-- src/resume.c | 9 --------- 2 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/src/fw/shadow.c b/src/fw/shadow.c index ee87d36..4486884 100644 --- a/src/fw/shadow.c +++ b/src/fw/shadow.c @@ -163,7 +163,18 @@ qemu_prep_reset(void) return; // QEMU doesn't map 0xc0000-0xfffff back to the original rom on a // reset, so do that manually before invoking a hard reset. + void *cstart = VSYMBOL(code32flat_start), *cend = VSYMBOL(code32flat_end); + void *hrp = &HaveRunPost; + if (readl(hrp + BIOS_SRC_OFFSET)) { + // Some old versions of KVM don't store a pristine copy of the + // BIOS in high memory. Try to shutdown the machine instead. + dprintf(1, "Unable to hard-reboot machine - attempting shutdown.\n"); + apm_shutdown(); + } + // Copy the BIOS making sure to only reset HaveRunPost at end make_bios_writable(); - memcpy(VSYMBOL(code32flat_start), VSYMBOL(code32flat_start) + BIOS_SRC_OFFSET - , SYMBOL(code32flat_end) - SYMBOL(code32flat_start)); + memcpy(cstart, cstart + BIOS_SRC_OFFSET, hrp - cstart); + memcpy(hrp + 4, hrp + 4 + BIOS_SRC_OFFSET, cend - (hrp + 4)); + barrier(); + HaveRunPost = 0; } diff --git a/src/resume.c b/src/resume.c index a5465d8..afeadcf 100644 --- a/src/resume.c +++ b/src/resume.c @@ -114,19 +114,10 @@ s3_resume(void) farcall16big(&br); }
-u8 HaveAttemptedReboot VARLOW; - // Attempt to invoke a hard-reboot. static void tryReboot(void) { - if (HaveAttemptedReboot) { - // Hard reboot has failed - try to shutdown machine. - dprintf(1, "Unable to hard-reboot machine - attempting shutdown.\n"); - apm_shutdown(); - } - HaveAttemptedReboot = 1; - dprintf(1, "Attempting a hard reboot\n");
// Setup for reset on qemu.
The HaveRunPost flag controls whether post or reboot handling is entered on a reset signal. The flag needs to be set before any other global variable because an external reboot signal could occur at any time. (If any global variable is modified prior to setting HaveRunPost then the code might enter post with global variables in a dirty state.)
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/fw/csm.c | 1 + src/fw/shadow.c | 2 ++ src/fw/xen.c | 1 + src/post.c | 20 ++++++++++++++++---- src/util.h | 1 + 5 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/src/fw/csm.c b/src/fw/csm.c index 7cadd12..b01f181 100644 --- a/src/fw/csm.c +++ b/src/fw/csm.c @@ -289,6 +289,7 @@ handle_csm(struct bregs *regs)
dprintf(3, "handle_csm regs %p AX=%04x\n", regs, regs->ax);
+ code_mutable_preinit(); pic_irqmask_write(PICMask);
switch(regs->ax) { diff --git a/src/fw/shadow.c b/src/fw/shadow.c index 4486884..bdb5c5b 100644 --- a/src/fw/shadow.c +++ b/src/fw/shadow.c @@ -123,12 +123,14 @@ make_bios_writable(void) if (vendor == PCI_VENDOR_ID_INTEL && device == PCI_DEVICE_ID_INTEL_82441) { make_bios_writable_intel(bdf, I440FX_PAM0); + code_mutable_preinit(); ShadowBDF = bdf; return; } if (vendor == PCI_VENDOR_ID_INTEL && device == PCI_DEVICE_ID_INTEL_Q35_MCH) { make_bios_writable_intel(bdf, Q35_HOST_BRIDGE_PAM0); + code_mutable_preinit(); ShadowBDF = bdf; return; } diff --git a/src/fw/xen.c b/src/fw/xen.c index 3f19ef2..a215b9e 100644 --- a/src/fw/xen.c +++ b/src/fw/xen.c @@ -71,6 +71,7 @@ void xen_preinit(void) signature, base); if (strcmp(signature, "XenVMMXenVMM") == 0) { /* Set debug_io_port first, so the following messages work. */ + code_mutable_preinit(); DebugOutputPort = 0xe9; debug_banner(); dprintf(1, "\nFound Xen hypervisor signature at %x\n", base); diff --git a/src/post.c b/src/post.c index 49c22b8..e5fa4be 100644 --- a/src/post.c +++ b/src/post.c @@ -41,10 +41,6 @@ ivt_init(void) { dprintf(3, "init ivt\n");
- // Setup reset-vector entry point (controls legacy reboots). - HaveRunPost = 1; - rtc_write(CMOS_RESET_CODE, 0); - // Initialize all vectors to the default handler. int i; for (i=0; i<256; i++) @@ -304,10 +300,26 @@ reloc_preinit(void *f, void *arg) func(arg); }
+// Runs after all code is present and prior to any modifications +void +code_mutable_preinit(void) +{ + if (HaveRunPost) + // Already run + return; + // Setup reset-vector entry point (controls legacy reboots). + rtc_write(CMOS_RESET_CODE, 0); + barrier(); + HaveRunPost = 1; + barrier(); +} + // Setup for code relocation and then relocate. void VISIBLE32INIT dopost(void) { + code_mutable_preinit(); + // Detect ram and setup internal malloc. qemu_preinit(); coreboot_preinit(); diff --git a/src/util.h b/src/util.h index 76db57f..43f2199 100644 --- a/src/util.h +++ b/src/util.h @@ -223,6 +223,7 @@ void device_hardware_setup(void); void prepareboot(void); void startBoot(void); void reloc_preinit(void *f, void *arg); +void code_mutable_preinit(void);
// serial.c void serial_setup(void);
On Tue, Jan 12, 2016 at 02:57:25PM -0500, Kevin O'Connor wrote:
This is a follow up to an email chain that in November:
http://www.seabios.org/pipermail/seabios/2015-November/009887.html
It was possible for the SeaBIOS code to get confused if an external reboot request occurs while seabios is already in the process of handling a boot or reboot.
This two patch series attempts to make the SeaBIOS reboot code more robust. With these patches the code maintains an invariant - if HaveRunPost is false then the BIOS code will be in a pristine state and it is okay to run the "post" boot logic; if HaveRunPost is true then the reboot logic must be run before starting the "post" boot logic. The reboot handler should always return the BIOS code to a pristine state prior to clearing HaveRunPost.
FYI, I committed this series.
-Kevin
On Tue, Jan 12, 2016 at 02:57:25PM -0500, Kevin O'Connor wrote:
This is a follow up to an email chain that in November:
http://www.seabios.org/pipermail/seabios/2015-November/009887.html
It was possible for the SeaBIOS code to get confused if an external reboot request occurs while seabios is already in the process of handling a boot or reboot.
This two patch series attempts to make the SeaBIOS reboot code more robust. With these patches the code maintains an invariant - if HaveRunPost is false then the BIOS code will be in a pristine state and it is okay to run the "post" boot logic; if HaveRunPost is true then the reboot logic must be run before starting the "post" boot logic. The reboot handler should always return the BIOS code to a pristine state prior to clearing HaveRunPost.
FYI, I committed this series.
Thanks, they are very useful !
Regards, -Gonglei
This is a follow up to an email chain that in November:
http://www.seabios.org/pipermail/seabios/2015-November/009887.html
It was possible for the SeaBIOS code to get confused if an external reboot request occurs while seabios is already in the process of handling a boot or reboot.
This two patch series attempts to make the SeaBIOS reboot code more robust. With these patches the code maintains an invariant - if HaveRunPost is false then the BIOS code will be in a pristine state and it is okay to run the "post" boot logic; if HaveRunPost is true then the reboot logic must be run before starting the "post" boot logic. The reboot handler should always return the BIOS code to a pristine state prior to clearing HaveRunPost.
Kevin, thanks for your patch. I've tested yesterday for several days. And it works very well solving all the reset racing problems i've met.
-Kevin
Kevin O'Connor (2): resume: Make KVM soft reboot loop detection more flexible post: Always set HaveRunPost prior to setting any other global variable
src/fw/csm.c | 1 + src/fw/shadow.c | 17 +++++++++++++++-- src/fw/xen.c | 1 + src/post.c | 20 ++++++++++++++++---- src/resume.c | 9 --------- src/util.h | 1 + 6 files changed, 34 insertions(+), 15 deletions(-)
-- 2.5.0