This series extends the existing "vgafixup.py" assembler patching mechanism to also patch out the "leal" instruction that old versions of x86emu don't support. With this series in place, fc11 and fc12 can now boot into graphics when using qemu with "std" vga.
Because the assembler munging is getting a bit complicated, the first patch adds a compile time option to disable all of it. This allows one to build the vgabios without any of the workarounds.
The out-of-line emulate_leal function does have a bit of overhead - one x86 instruction is replaced with over 20 instructions. The last patch in the series optimizes the common case so that most leal instructions are replaced with only 4-6 instructions.
Also at: https://github.com/KevinOConnor/seabios/tree/testing
-Kevin
Kevin O'Connor (3): vgabios: Add config option for assembler fixups vgabios: Emulate "leal" instruction vgabios: Optimize leal instruction fixup
Makefile | 21 +++++++++--------- scripts/vgafixup.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++--- vgasrc/Kconfig | 10 +++++++++ vgasrc/vgaentry.S | 49 +++++++++++++++++++++++------------------- 4 files changed, 107 insertions(+), 35 deletions(-)
Add a kconfig build option (CONFIG_VGA_FIXUP_ASM) to allow users to build the vgabios without the complex assembler fixups that work around emulator bugs.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- Makefile | 21 +++++++++++---------- vgasrc/Kconfig | 10 ++++++++++ vgasrc/vgaentry.S | 4 ++++ 3 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/Makefile b/Makefile index a84a5f7..a4d945c 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ COMMONCFLAGS := -I$(OUT) -Isrc -Os -MD -g \ -Wall -Wno-strict-aliasing -Wold-style-definition \ $(call cc-option,$(CC),-Wtype-limits,) \ -m32 -march=i386 -mregparm=3 -mpreferred-stack-boundary=2 \ - -minline-all-stringops \ + -minline-all-stringops -fomit-frame-pointer \ -freg-struct-return -ffreestanding -fno-delete-null-pointer-checks \ -ffunction-sections -fdata-sections -fno-common -fno-merge-constants COMMONCFLAGS += $(call cc-option,$(CC),-nopie,) @@ -64,15 +64,14 @@ COMMONCFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) COMMONCFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) COMMA := ,
-CFLAGS32FLAT := $(COMMONCFLAGS) -DMODE16=0 -DMODESEGMENT=0 -fomit-frame-pointer +CFLAGS32FLAT := $(COMMONCFLAGS) -DMODE16=0 -DMODESEGMENT=0 CFLAGSSEG := $(COMMONCFLAGS) -DMODESEGMENT=1 -fno-defer-pop \ $(call cc-option,$(CC),-fno-jump-tables,-DMANUAL_NO_JUMP_TABLE) \ $(call cc-option,$(CC),-fno-tree-switch-conversion,) -CFLAGS32SEG := $(CFLAGSSEG) -DMODE16=0 -fomit-frame-pointer -CFLAGS16INC := $(CFLAGSSEG) -DMODE16=1 \ +CFLAGS32SEG := $(CFLAGSSEG) -DMODE16=0 +CFLAGS16 := $(CFLAGSSEG) -DMODE16=1 \ $(call cc-option,$(CC),-m16,-Wa$(COMMA)src/code16gcc.s) \ $(call cc-option,$(CC),--param large-stack-frame=4,-fno-inline) -CFLAGS16 := $(CFLAGS16INC) -fomit-frame-pointer
# Run with "make V=1" to see the actual compile commands ifdef V @@ -210,23 +209,25 @@ SRCVGA=src/output.c src/string.c src/hw/pci.c src/hw/serialio.c \ vgasrc/clext.c vgasrc/bochsvga.c vgasrc/geodevga.c \ src/fw/coreboot.c vgasrc/cbvga.c
-CFLAGS16VGA = $(CFLAGS16INC) -Isrc - -$(OUT)vgaccode16.raw.s: $(OUT)autoconf.h $(patsubst %.c, $(OUT)%.o,$(SRCVGA)) ; $(call whole-compile, $(CFLAGS16VGA) -S, $(SRCVGA),$@) +ifeq "$(CONFIG_VGA_FIXUP_ASM)" "y" +$(OUT)vgaccode16.raw.s: $(OUT)autoconf.h $(patsubst %.c, $(OUT)%.o,$(SRCVGA)) ; $(call whole-compile, $(filter-out -fomit-frame-pointer,$(CFLAGS16)) -fno-omit-frame-pointer -S -Isrc, $(SRCVGA),$@)
$(OUT)vgaccode16.o: $(OUT)vgaccode16.raw.s scripts/vgafixup.py @echo " Fixup VGA rom assembler" $(Q)$(PYTHON) ./scripts/vgafixup.py $< $(OUT)vgaccode16.s $(Q)$(AS) --32 src/code16gcc.s $(OUT)vgaccode16.s -o $@ +else +$(OUT)vgaccode16.o: $(OUT)autoconf.h $(patsubst %.c, $(OUT)%.o,$(SRCVGA)) ; $(call whole-compile, $(CFLAGS16) -Isrc, $(SRCVGA),$@) +endif
$(OUT)vgaentry.o: vgasrc/vgaentry.S $(OUT)autoconf.h $(OUT)asm-offsets.h @echo " Compiling (16bit) $@" - $(Q)$(CC) $(CFLAGS16VGA) -c -D__ASSEMBLY__ $< -o $@ + $(Q)$(CC) $(CFLAGS16) -c -D__ASSEMBLY__ $< -o $@
$(OUT)vgarom.o: $(OUT)vgaccode16.o $(OUT)vgaentry.o $(OUT)vgasrc/vgalayout.lds scripts/buildversion.sh @echo " Linking $@" $(Q)./scripts/buildversion.sh $(OUT)vgaversion.c VAR16 - $(Q)$(CC) $(CFLAGS16VGA) -c $(OUT)vgaversion.c -o $(OUT)vgaversion.o + $(Q)$(CC) $(CFLAGS16) -c $(OUT)vgaversion.c -o $(OUT)vgaversion.o $(Q)$(LD) --gc-sections -T $(OUT)vgasrc/vgalayout.lds $(OUT)vgaccode16.o $(OUT)vgaentry.o $(OUT)vgaversion.o -o $@
$(OUT)vgabios.bin.raw: $(OUT)vgarom.o diff --git a/vgasrc/Kconfig b/vgasrc/Kconfig index 400e8da..27a24c9 100644 --- a/vgasrc/Kconfig +++ b/vgasrc/Kconfig @@ -90,6 +90,16 @@ menu "VGA ROM" Support emulating text mode features when only a framebuffer is available.
+ config VGA_FIXUP_ASM + bool "Fixup assembler to work with broken emulators" + default y + help + This option will cause the build to attempt to avoid + certain x86 machine instructions that are known to confuse + some emulators. In particular, it works around + deficiencies in the Windows vgabios emulator and the + x86emu vgabios emulator (frequently used in Xorg). + config VGA_ALLOCATE_EXTRA_STACK depends on BUILD_VGABIOS bool "Allocate an internal stack for 16bit interrupt entry point" diff --git a/vgasrc/vgaentry.S b/vgasrc/vgaentry.S index e0ab954..7ca550d 100644 --- a/vgasrc/vgaentry.S +++ b/vgasrc/vgaentry.S @@ -64,6 +64,7 @@ x86emu_fault: // This macro implements a call while avoiding instructions // that old versions of x86emu have problems with. .macro VGA_CALLL cfunc +#if CONFIG_VGA_FIXUP_ASM // Make sure leal instruction works. movl $0x8000, %ecx leal (%ecx, %ecx, 1), %ecx @@ -72,6 +73,9 @@ x86emu_fault: // Use callw instead of calll push %ax callw \cfunc +#else + calll \cfunc +#endif .endm
// This macro is the same as ENTRY_ARG except VGA_CALLL is used.
Dear Kevin,
Am Freitag, den 10.04.2015, 11:07 -0400 schrieb Kevin O'Connor:
Add a kconfig build option (CONFIG_VGA_FIXUP_ASM) to allow users to build the vgabios without the complex assembler fixups that work around emulator bugs.
Signed-off-by: Kevin O'Connor kevin@koconnor.net
Makefile | 21 +++++++++++---------- vgasrc/Kconfig | 10 ++++++++++ vgasrc/vgaentry.S | 4 ++++ 3 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/Makefile b/Makefile index a84a5f7..a4d945c 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ COMMONCFLAGS := -I$(OUT) -Isrc -Os -MD -g \ -Wall -Wno-strict-aliasing -Wold-style-definition \ $(call cc-option,$(CC),-Wtype-limits,) \ -m32 -march=i386 -mregparm=3 -mpreferred-stack-boundary=2 \
- -minline-all-stringops \
- -minline-all-stringops -fomit-frame-pointer \
could you elaborate why `-fomit-frame-pointer` causes issues and needs to be moved around?
-freg-struct-return -ffreestanding -fno-delete-null-pointer-checks \ -ffunction-sections -fdata-sections -fno-common -fno-merge-constants
COMMONCFLAGS += $(call cc-option,$(CC),-nopie,) @@ -64,15 +64,14 @@ COMMONCFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) COMMONCFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) COMMA := ,
-CFLAGS32FLAT := $(COMMONCFLAGS) -DMODE16=0 -DMODESEGMENT=0 -fomit-frame-pointer +CFLAGS32FLAT := $(COMMONCFLAGS) -DMODE16=0 -DMODESEGMENT=0 CFLAGSSEG := $(COMMONCFLAGS) -DMODESEGMENT=1 -fno-defer-pop \ $(call cc-option,$(CC),-fno-jump-tables,-DMANUAL_NO_JUMP_TABLE) \ $(call cc-option,$(CC),-fno-tree-switch-conversion,) -CFLAGS32SEG := $(CFLAGSSEG) -DMODE16=0 -fomit-frame-pointer -CFLAGS16INC := $(CFLAGSSEG) -DMODE16=1 \ +CFLAGS32SEG := $(CFLAGSSEG) -DMODE16=0 +CFLAGS16 := $(CFLAGSSEG) -DMODE16=1 \ $(call cc-option,$(CC),-m16,-Wa$(COMMA)src/code16gcc.s) \ $(call cc-option,$(CC),--param large-stack-frame=4,-fno-inline) -CFLAGS16 := $(CFLAGS16INC) -fomit-frame-pointer
[…]
- config VGA_FIXUP_ASM
bool "Fixup assembler to work with broken emulators"
default y
help
This option will cause the build to attempt to avoid
certain x86 machine instructions that are known to confuse
some emulators. In particular, it works around
deficiencies in the Windows vgabios emulator and the
Use Microsoft Windows?
x86emu vgabios emulator (frequently used in Xorg).
- config VGA_ALLOCATE_EXTRA_STACK depends on BUILD_VGABIOS bool "Allocate an internal stack for 16bit interrupt entry point"
[…]
Thanks,
Paul
On Sat, Apr 11, 2015 at 08:58:20AM +0200, Paul Menzel wrote:
Am Freitag, den 10.04.2015, 11:07 -0400 schrieb Kevin O'Connor:
Add a kconfig build option (CONFIG_VGA_FIXUP_ASM) to allow users to build the vgabios without the complex assembler fixups that work around emulator bugs.
Signed-off-by: Kevin O'Connor kevin@koconnor.net
Makefile | 21 +++++++++++---------- vgasrc/Kconfig | 10 ++++++++++ vgasrc/vgaentry.S | 4 ++++ 3 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/Makefile b/Makefile index a84a5f7..a4d945c 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ COMMONCFLAGS := -I$(OUT) -Isrc -Os -MD -g \ -Wall -Wno-strict-aliasing -Wold-style-definition \ $(call cc-option,$(CC),-Wtype-limits,) \ -m32 -march=i386 -mregparm=3 -mpreferred-stack-boundary=2 \
- -minline-all-stringops \
- -minline-all-stringops -fomit-frame-pointer \
could you elaborate why `-fomit-frame-pointer` causes issues and needs to be moved around?
The x86 vgabios interpretter on some versions of Windows gets confused when addresses relative to %esp are used (eg, "movl 4(%esp), %eax"). See 9887ecb7 and 41c6061d. In practice, -fno-omit-frame-pointer causes gcc to not emit these addresses.
- config VGA_FIXUP_ASM
bool "Fixup assembler to work with broken emulators"
default y
help
This option will cause the build to attempt to avoid
certain x86 machine instructions that are known to confuse
some emulators. In particular, it works around
deficiencies in the Windows vgabios emulator and the
Use Microsoft Windows?
Was that unclear? I prefer not to call out company names in defect descriptions.
-Kevin
Emulate the "leal" instruction so that the vgabios can run on older versions of x86emu. (This removes the previous "leal" trap.)
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- scripts/vgafixup.py | 36 +++++++++++++++++++++++++++++++++--- vgasrc/vgaentry.S | 45 +++++++++++++++++++++++---------------------- 2 files changed, 56 insertions(+), 25 deletions(-)
diff --git a/scripts/vgafixup.py b/scripts/vgafixup.py index a981bbf..e144382 100644 --- a/scripts/vgafixup.py +++ b/scripts/vgafixup.py @@ -7,8 +7,8 @@
# The x86emu code widely used in Linux distributions when running Xorg # in vesamode is known to have issues with "retl", "leavel", "entryl", -# and some variants of "calll". This code modifies those instructions -# (ret and leave) that are known to be generated by gcc to avoid +# "leal", and some variants of "calll". This code modifies those +# instructions that are known to be generated by gcc to avoid # triggering the x86emu bugs.
# It is also known that the Windows vgabios emulator has issues with @@ -16,7 +16,35 @@ # worked around by not using the gcc parameter "-fomit-frame-pointer" # when compiling.
-import sys +import sys, re + +# leal parameter regex - example string: -3(%edx,%eax,8), %eax +re_leal = re.compile( + r'^\s*(?P<offset>[^(]*?)\s*' + r'(\s*(?P<base>[^,)]*?)\s*(?:,\s*(?P<index>[^,)]*?)\s*)?' + r'(?:,\s*(?P<scale>[^,)]*?)\s*)?)\s*' + r',\s*(?P<dest>.*?)\s*$') + +# Find an alternate set of instructions for a given "leal" instruction +def handle_leal(sline): + m = re_leal.match(sline[5:]) + if m is None or m.group('index') == '%esp': + print("Invalid leal instruction: %s" % (sline,)) + sys.exit(-1) + offset, base, index, scale, dest = m.group( + 'offset', 'base', 'index', 'scale', 'dest') + if not offset: + offset = '0' + if not base: + base = '$0' + if not index: + index = '$0' + if not scale: + scale = '1' + scale = {'1': 0, '2': 1, '4': 2, '8': 3}[scale] + return ('pushl %s ; pushl %s ; pushl $%s ; pushw $%d' + ' ; callw emulate_leal ; popl %s\n' % ( + base, index, offset, scale, dest))
def main(): infilename, outfilename = sys.argv[1:] @@ -30,6 +58,8 @@ def main(): out.append('movl %ebp, %esp ; popl %ebp\n') elif sline.startswith('call'): out.append('pushw %ax ; callw' + sline[4:] + '\n') + elif sline.startswith('leal'): + out.append(handle_leal(sline)) else: out.append(line) infile.close() diff --git a/vgasrc/vgaentry.S b/vgasrc/vgaentry.S index 7ca550d..8b84f36 100644 --- a/vgasrc/vgaentry.S +++ b/vgasrc/vgaentry.S @@ -45,33 +45,34 @@ _rom_header_signature: * Entry points ****************************************************************/
- // Force a fault if found to be running on broken x86emu versions. - DECLFUNC x86emu_fault -msg: .ascii "SeaVGABIOS: x86emu leal trap!\n" -x86emu_fault: -#if CONFIG_DEBUG_IO - movw %cs:DebugOutputPort, %dx - movw $msg, %si -1: movb %cs:(%si), %al - outb %al, (%dx) - incw %si - cmpw $x86emu_fault, %si - jl 1b -#endif -1: hlt - jmp 1b + // Emulate a leal instruction (see scripts/vgafixup.py) + // On entry stack contains: base, index, offset, scale + DECLFUNC emulate_leal + .global emulate_leal +emulate_leal: + pushl %ebp + movl %esp, %ebp + pushfl + pushl %eax + pushl %ecx + + movb 6(%ebp), %cl + movl 12(%ebp), %eax + shll %cl, %eax + addl 8(%ebp), %eax + addl %eax, 16(%ebp) + + popl %ecx + popl %eax + popfl + popl %ebp + retw $10
// This macro implements a call while avoiding instructions // that old versions of x86emu have problems with. .macro VGA_CALLL cfunc #if CONFIG_VGA_FIXUP_ASM - // Make sure leal instruction works. - movl $0x8000, %ecx - leal (%ecx, %ecx, 1), %ecx - cmpl $0x10000, %ecx - jne x86emu_fault - // Use callw instead of calll - push %ax + pushw %ax callw \cfunc #else calll \cfunc
The majority of gcc generated leal instructions don't require a full out-of-line function call. Detect common cases where the calculation can be performed inline.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- scripts/vgafixup.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-)
diff --git a/scripts/vgafixup.py b/scripts/vgafixup.py index e144382..cee5842 100644 --- a/scripts/vgafixup.py +++ b/scripts/vgafixup.py @@ -33,8 +33,12 @@ def handle_leal(sline): sys.exit(-1) offset, base, index, scale, dest = m.group( 'offset', 'base', 'index', 'scale', 'dest') + if dest == '%esp': + # If destination is %esp then just use 16bit leaw instead + return 'leaw %s\n' % (sline[5:].replace('%e', '%'),) if not offset: offset = '0' + offset = int(offset, 0) if not base: base = '$0' if not index: @@ -42,7 +46,28 @@ def handle_leal(sline): if not scale: scale = '1' scale = {'1': 0, '2': 1, '4': 2, '8': 3}[scale] - return ('pushl %s ; pushl %s ; pushl $%s ; pushw $%d' + if not scale and (index == '$0' or base == dest or base == '%esp'): + # Rearrange base and index if it will permit an inline form + base, index = index, base + if base == index: + scale = 1 + base = '$0' + if base != dest and base != '%esp': + # Calculate result directly in dest register + out = [] + if index != dest: + out.append('movl %s, %s' % (index, dest)) + out.append('pushfl') + if scale: + out.append('shll $%d, %s' % (scale, dest)) + if offset: + out.append('addl $%d, %s' % (offset, dest)) + if base != '$0': + out.append('addl %s, %s' % (base, dest)) + out.append('popfl\n') + return ' ; '.join(out) + # Use default out-of-line calculation + return ('pushl %s ; pushl %s ; pushl $%d ; pushw $%d' ' ; callw emulate_leal ; popl %s\n' % ( base, index, offset, scale, dest))
@@ -60,6 +85,7 @@ def main(): out.append('pushw %ax ; callw' + sline[4:] + '\n') elif sline.startswith('leal'): out.append(handle_leal(sline)) + #print "-> %s\n %s" % (sline, out[-1].strip()) else: out.append(line) infile.close()
The majority of gcc generated leal instructions don't require a full out-of-line function call. Detect common cases where the calculation can be performed inline.
Signed-off-by: Kevin O'Connor kevin@koconnor.net ---
This version is a bit easier to read and is safer in some corner cases.
--- scripts/vgafixup.py | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-)
diff --git a/scripts/vgafixup.py b/scripts/vgafixup.py index e144382..9faea0b 100644 --- a/scripts/vgafixup.py +++ b/scripts/vgafixup.py @@ -25,6 +25,21 @@ re_leal = re.compile( r'(?:,\s*(?P<scale>[^,)]*?)\s*)?)\s*' r',\s*(?P<dest>.*?)\s*$')
+# Produce instructions to calculate "leal" directly in dest register +def handle_leal_inline(offset, base, index, scale, dest): + out = [] + if index != dest: + out.append('movl %s, %s' % (index, dest)) + out.append('pushfl') + if scale: + out.append('shll $%d, %s' % (scale, dest)) + if offset: + out.append('addl $%d, %s' % (offset, dest)) + if base != '$0': + out.append('addl %s, %s' % (base, dest)) + out.append('popfl\n') + return ' ; '.join(out) + # Find an alternate set of instructions for a given "leal" instruction def handle_leal(sline): m = re_leal.match(sline[5:]) @@ -33,16 +48,27 @@ def handle_leal(sline): sys.exit(-1) offset, base, index, scale, dest = m.group( 'offset', 'base', 'index', 'scale', 'dest') + if dest == '%esp': + # If destination is %esp then just use 16bit leaw instead + return 'leaw %s\n' % (sline[5:].replace('%e', '%'),) if not offset: offset = '0' + offset = int(offset, 0) if not base: base = '$0' - if not index: - index = '$0' if not scale: scale = '1' scale = {'1': 0, '2': 1, '4': 2, '8': 3}[scale] - return ('pushl %s ; pushl %s ; pushl $%s ; pushw $%d' + if not index: + return handle_leal_inline(offset, '$0', base, 0, dest) + if base != dest and base != '%esp': + return handle_leal_inline(offset, base, index, scale, dest) + if not scale and index != dest and index != '%esp': + return handle_leal_inline(offset, index, base, 0, dest) + if not scale and index == base: + return handle_leal_inline(offset, '$0', index, 1, dest) + # Use default out-of-line calculation + return ('pushl %s ; pushl %s ; pushl $%d ; pushw $%d' ' ; callw emulate_leal ; popl %s\n' % ( base, index, offset, scale, dest))
@@ -60,6 +86,7 @@ def main(): out.append('pushw %ax ; callw' + sline[4:] + '\n') elif sline.startswith('leal'): out.append(handle_leal(sline)) + #print "-> %s\n %s" % (sline, out[-1].strip()) else: out.append(line) infile.close()