We have lots of bit-for-bit identical code in the various stage0 implementations. Factor out the 16 bit code with associated protected mode activation.
I'm open to moving even more common code out, but this is a well-defined start. If you think the stage0_common.S name is bad/evil/dumb, you're welcome to suggest an alternative. No problem from my side.
This cleanup has been prepared for by r902, r905 and r907.
Boot tested on qemu. Build tested on i586, geodelx, k8.
The diffstat is most enlightening: amd/stage0.S | 145 --------------------------------------------- geodelx/stage0.S | 145 --------------------------------------------- i586/stage0.S | 145 --------------------------------------------- stage0_common.S | 145 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 148 insertions(+), 432 deletions(-)
Signed-off-by: Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net Acked-by: Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net
Index: corebootv3-arch_x86_cleanup/arch/x86/geodelx/stage0.S =================================================================== --- corebootv3-arch_x86_cleanup/arch/x86/geodelx/stage0.S (Revision 907) +++ corebootv3-arch_x86_cleanup/arch/x86/geodelx/stage0.S (Arbeitskopie) @@ -28,112 +28,7 @@ #include <macros.h> #include <amd_geodelx.h>
- .code16 - .globl _stage0 -_stage0: - cli
- /* Save the BIST result. */ - movl %eax, %ebp; - - /* thanks to kmliu@sis.com.tw for this TLB fix */ - /* IMMEDIATELY invalidate the translation lookaside buffer (TLB) before - * executing any further code. Even though paging is disabled we - * could still get false address translations due to the TLB if we - * didn't invalidate it. - */ - xorl %eax, %eax - movl %eax, %cr3 /* Invalidate TLB. */ - - /* Switch to protected mode. */ - - /* NOTE: With GNU assembler version 2.15.94.0.2.2 (i386-redhat-linux) - * using BFD version 2.15.94.0.2.2 20041220 this works fine without all - * the ld hackery and other things. So leave it as is with this comment. - */ - - data32 lgdt %cs:gdtptr - - movl %cr0, %eax - andl $0x7FFAFFD1, %eax /* PG, AM, WP, NE, TS, EM, MP = 0 */ - orl $0x60000001, %eax /* CD, NW, PE = 1 */ - movl %eax, %cr0 - - /* Restore BIST result. */ - movl %ebp, %eax - - // port80_post(0x23) - - /* Now we are in protected mode. Jump to a 32 bit code segment. */ - data32 ljmp $ROM_CODE_SEG, $protected_stage0 - - /* I am leaving this weird jump in here in the event that future gas - * bugs force it to be used. - */ - /* .byte 0x66 */ - .code32 - /* ljmp $ROM_CODE_SEG, $protected_stage0 */ - - /* .code16 */ - .align 4 - .globl gdt16 -gdt16 = . - _stage0 -gdt16x: - .word gdt16xend - gdt16x -1 /* Compute the table limit. */ - .long gdt16x - .word 0 - - /* selgdt 0x08, flat code segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x9b, 0xcf, 0x00 - - /* selgdt 0x10, flat data segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x93, 0xcf, 0x00 -gdt16xend: - - /* From now on we are 32 bit. */ - .code32 - - /* We have two gdts where we could have one. That is ok. - * - * Let's not worry about this -- optimizing gdt is pointless since - * we're only in it for a little bit. - * - * Btw. note the trick below: The GDT points to ITSELF, and the first - * good descriptor is at offset 8. So you word-align the table, and - * then because you chose 8, you get a nice 64-bit aligned GDT entry, - * which is good as this is the size of the entry. - * Just in case you ever wonder why people do this. - */ - .align 4 - .globl gdtptr - .globl gdt_limit -gdt_limit = gdt_end - gdt - 1 /* Compute the table limit. */ - -gdt: -gdtptr: - .word gdt_end - gdt -1 /* Compute the table limit. */ - .long gdt /* We know the offset. */ - .word 0 - - /* selgdt 0x08, flat code segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x9b, 0xcf, 0x00 - - /* selgdt 0x10, flat data segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x93, 0xcf, 0x00 - - /* selgdt 0x18, flat code segment for CAR */ - .word 0xffff, 0x0000 - .byte 0x00, 0x9b, 0xcf, 0x00 - - /* selgdt 0x20, flat data segment for CAR */ - .word 0xffff, 0x0000 - .byte 0x00, 0x93, 0xcf, 0x00 -gdt_end: - /* When we come here we are in protected mode. We expand the stack * and copy the data segment from ROM to the memory. * @@ -379,42 +274,4 @@ jmp stage1_main /* We will not go back. */
-/* Reset vector. */ - -/* - * RVECTOR: Size of reset vector, default is 0x10. - * RESRVED: Size of vpd code, default is 0xf0. - * BOOTBLK: Size of bootblock code, default is 0x1f00 (8k-256b). - */ - -SEGMENT_SIZE = 0x10000 -RVECTOR = 0x00010 - -/* Due to YET ANOTHER BUG in GNU bintools, you can NOT have a code16 here. - * I think we should leave it this way forever, as the bugs come and - * go -- and come again. - * - * .code16 - * .section ".rom.text" - */ -.section ".reset", "ax" - .globl _resetjump -_resetjump: - /* GNU bintools bugs again. This jumps to stage0 - 2. Sigh. */ - /* jmp _stage0 */ - .byte 0xe9 - .int _stage0 - ( . + 2 ) - - /* Note: The above jump is hand coded to work around bugs in binutils. - * 5 bytes are used for a 3 byte instruction. This works because x86 - * is little endian and allows us to use supported 32 bit relocations - * instead of the weird 16 bit relocations that binutils does not - * handle consistenly between versions because they are used so rarely. - */ -.byte 0 - -/* Date? ID string? We might want to put something else in here. */ -.ascii DATE - -/* Checksum. */ -/* .word 0 */ +#include "../stage0_common.S" Index: corebootv3-arch_x86_cleanup/arch/x86/i586/stage0.S =================================================================== --- corebootv3-arch_x86_cleanup/arch/x86/i586/stage0.S (Revision 907) +++ corebootv3-arch_x86_cleanup/arch/x86/i586/stage0.S (Arbeitskopie) @@ -32,112 +32,7 @@
#define CACHE_RAM_CODE_SEG 0x18 #define CACHE_RAM_DATA_SEG 0x20 - .code16 - .globl _stage0 -_stage0: - cli
- /* Save the BIST result. */ - movl %eax, %ebp; - - /* thanks to kmliu@sis.com.tw for this TLB fix */ - /* IMMEDIATELY invalidate the translation lookaside buffer (TLB) before - * executing any further code. Even though paging is disabled we - * could still get false address translations due to the TLB if we - * didn't invalidate it. - */ - xorl %eax, %eax - movl %eax, %cr3 /* Invalidate TLB. */ - - /* Switch to protected mode. */ - - /* NOTE: With GNU assembler version 2.15.94.0.2.2 (i386-redhat-linux) - * using BFD version 2.15.94.0.2.2 20041220 this works fine without all - * the ld hackery and other things. So leave it as is with this comment. - */ - - data32 lgdt %cs:gdtptr - - movl %cr0, %eax - andl $0x7FFAFFD1, %eax /* PG, AM, WP, NE, TS, EM, MP = 0 */ - orl $0x60000001, %eax /* CD, NW, PE = 1 */ - movl %eax, %cr0 - - /* Restore BIST result. */ - movl %ebp, %eax - - // port80_post(0x23) - - /* Now we are in protected mode. Jump to a 32 bit code segment. */ - data32 ljmp $ROM_CODE_SEG, $protected_stage0 - - /* I am leaving this weird jump in here in the event that future gas - * bugs force it to be used. - */ - /* .byte 0x66 */ - .code32 - /* ljmp $ROM_CODE_SEG, $protected_stage0 */ - - /* .code16 */ - .align 4 - .globl gdt16 -gdt16 = . - _stage0 -gdt16x: - .word gdt16xend - gdt16x -1 /* Compute the table limit. */ - .long gdt16x - .word 0 - - /* selgdt 0x08, flat code segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x9b, 0xcf, 0x00 - - /* selgdt 0x10, flat data segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x93, 0xcf, 0x00 -gdt16xend: - - /* From now on we are 32 bit. */ - .code32 - - /* We have two gdts where we could have one. That is ok. - * - * Let's not worry about this -- optimizing gdt is pointless since - * we're only in it for a little bit. - * - * Btw. note the trick below: The GDT points to ITSELF, and the first - * good descriptor is at offset 8. So you word-align the table, and - * then because you chose 8, you get a nice 64-bit aligned GDT entry, - * which is good as this is the size of the entry. - * Just in case you ever wonder why people do this. - */ - .align 4 - .globl gdtptr - .globl gdt_limit -gdt_limit = gdt_end - gdt - 1 /* Compute the table limit. */ - -gdt: -gdtptr: - .word gdt_end - gdt -1 /* Compute the table limit. */ - .long gdt /* We know the offset. */ - .word 0 - - /* selgdt 0x08, flat code segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x9b, 0xcf, 0x00 - - /* selgdt 0x10, flat data segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x93, 0xcf, 0x00 - - /* selgdt 0x18, flat code segment for CAR */ - .word 0xffff, 0x0000 - .byte 0x00, 0x9b, 0xcf, 0x00 - - /* selgdt 0x20, flat data segment for CAR */ - .word 0xffff, 0x0000 - .byte 0x00, 0x93, 0xcf, 0x00 -gdt_end: - /* When we come here we are in protected mode. We expand the stack * and copy the data segment from ROM to the memory. * @@ -465,42 +360,4 @@ .long 0x20C, 0x20D, 0x20E, 0x20F .long 0x000 /* NULL, end of table */
-/* Reset vector. */ - -/* - * RVECTOR: Size of reset vector, default is 0x10. - * RESRVED: Size of vpd code, default is 0xf0. - * BOOTBLK: Size of bootblock code, default is 0x1f00 (8k-256b). - */ - -SEGMENT_SIZE = 0x10000 -RVECTOR = 0x00010 - -/* Due to YET ANOTHER BUG in GNU bintools, you can NOT have a code16 here. - * I think we should leave it this way forever, as the bugs come and - * go -- and come again. - * - * .code16 - * .section ".rom.text" - */ -.section ".reset", "ax" - .globl _resetjump -_resetjump: - /* GNU bintools bugs again. This jumps to stage0 - 2. Sigh. */ - /* jmp _stage0 */ - .byte 0xe9 - .int _stage0 - ( . + 2 ) - - /* Note: The above jump is hand coded to work around bugs in binutils. - * 5 bytes are used for a 3 byte instruction. This works because x86 - * is little endian and allows us to use supported 32 bit relocations - * instead of the weird 16 bit relocations that binutils does not - * handle consistenly between versions because they are used so rarely. - */ -.byte 0 - -/* Date? ID string? We might want to put something else in here. */ -.ascii DATE - -/* Checksum. */ -/* .word 0 */ +#include "../stage0_common.S" Index: corebootv3-arch_x86_cleanup/arch/x86/stage0_common.S =================================================================== --- corebootv3-arch_x86_cleanup/arch/x86/stage0_common.S (Revision 0) +++ corebootv3-arch_x86_cleanup/arch/x86/stage0_common.S (Revision 0) @@ -0,0 +1,145 @@ + .code16 + .globl _stage0 +_stage0: + cli + + /* Save the BIST result. */ + movl %eax, %ebp; + + /* thanks to kmliu@sis.com.tw for this TLB fix */ + /* IMMEDIATELY invalidate the translation lookaside buffer (TLB) before + * executing any further code. Even though paging is disabled we + * could still get false address translations due to the TLB if we + * didn't invalidate it. + */ + xorl %eax, %eax + movl %eax, %cr3 /* Invalidate TLB. */ + + /* Switch to protected mode. */ + + /* NOTE: With GNU assembler version 2.15.94.0.2.2 (i386-redhat-linux) + * using BFD version 2.15.94.0.2.2 20041220 this works fine without all + * the ld hackery and other things. So leave it as is with this comment. + */ + + data32 lgdt %cs:gdtptr + + movl %cr0, %eax + andl $0x7FFAFFD1, %eax /* PG, AM, WP, NE, TS, EM, MP = 0 */ + orl $0x60000001, %eax /* CD, NW, PE = 1 */ + movl %eax, %cr0 + + /* Restore BIST result. */ + movl %ebp, %eax + + // port80_post(0x23) + + /* Now we are in protected mode. Jump to a 32 bit code segment. */ + data32 ljmp $ROM_CODE_SEG, $protected_stage0 + + /* I am leaving this weird jump in here in the event that future gas + * bugs force it to be used. + */ + /* .byte 0x66 */ + .code32 + /* ljmp $ROM_CODE_SEG, $protected_stage0 */ + + /* .code16 */ + .align 4 + .globl gdt16 +gdt16 = . - _stage0 +gdt16x: + .word gdt16xend - gdt16x -1 /* Compute the table limit. */ + .long gdt16x + .word 0 + + /* selgdt 0x08, flat code segment */ + .word 0xffff, 0x0000 + .byte 0x00, 0x9b, 0xcf, 0x00 + + /* selgdt 0x10, flat data segment */ + .word 0xffff, 0x0000 + .byte 0x00, 0x93, 0xcf, 0x00 +gdt16xend: + + /* From now on we are 32 bit. */ + .code32 + + /* We have two gdts where we could have one. That is ok. + * + * Let's not worry about this -- optimizing gdt is pointless since + * we're only in it for a little bit. + * + * Btw. note the trick below: The GDT points to ITSELF, and the first + * good descriptor is at offset 8. So you word-align the table, and + * then because you chose 8, you get a nice 64-bit aligned GDT entry, + * which is good as this is the size of the entry. + * Just in case you ever wonder why people do this. + */ + .align 4 + .globl gdtptr + .globl gdt_limit +gdt_limit = gdt_end - gdt - 1 /* Compute the table limit. */ + +gdt: +gdtptr: + .word gdt_end - gdt -1 /* Compute the table limit. */ + .long gdt /* We know the offset. */ + .word 0 + + /* selgdt 0x08, flat code segment */ + .word 0xffff, 0x0000 + .byte 0x00, 0x9b, 0xcf, 0x00 + + /* selgdt 0x10, flat data segment */ + .word 0xffff, 0x0000 + .byte 0x00, 0x93, 0xcf, 0x00 + + /* selgdt 0x18, flat code segment for CAR */ + .word 0xffff, 0x0000 + .byte 0x00, 0x9b, 0xcf, 0x00 + + /* selgdt 0x20, flat data segment for CAR */ + .word 0xffff, 0x0000 + .byte 0x00, 0x93, 0xcf, 0x00 +gdt_end: + +/* Reset vector. */ + +/* + * RVECTOR: Size of reset vector, default is 0x10. + * RESRVED: Size of vpd code, default is 0xf0. + * BOOTBLK: Size of bootblock code, default is 0x1f00 (8k-256b). + */ + +SEGMENT_SIZE = 0x10000 +RVECTOR = 0x00010 + +/* Due to YET ANOTHER BUG in GNU bintools, you can NOT have a code16 here. + * I think we should leave it this way forever, as the bugs come and + * go -- and come again. + * + * .code16 + * .section ".rom.text" + */ +.section ".reset", "ax" + .globl _resetjump +_resetjump: + /* GNU bintools bugs again. This jumps to stage0 - 2. Sigh. */ + /* jmp _stage0 */ + .byte 0xe9 + .int _stage0 - ( . + 2 ) + + /* Note: The above jump is hand coded to work around bugs in binutils. + * 5 bytes are used for a 3 byte instruction. This works because x86 + * is little endian and allows us to use supported 32 bit relocations + * instead of the weird 16 bit relocations that binutils does not + * handle consistenly between versions because they are used so rarely. + */ +.byte 0 + +/* Date? ID string? We might want to put something else in here. */ +.ascii DATE + +/* Checksum. */ +/* .word 0 */ Index: corebootv3-arch_x86_cleanup/arch/x86/amd/stage0.S =================================================================== --- corebootv3-arch_x86_cleanup/arch/x86/amd/stage0.S (Revision 907) +++ corebootv3-arch_x86_cleanup/arch/x86/amd/stage0.S (Arbeitskopie) @@ -38,112 +38,7 @@ #define CacheSizeAPStack 0x400 /* 1K */ #endif
- .code16 - .globl _stage0 -_stage0: - cli
- /* Save the BIST result. */ - movl %eax, %ebp; - - /* thanks to kmliu@sis.com.tw for this TLB fix */ - /* IMMEDIATELY invalidate the translation lookaside buffer (TLB) before - * executing any further code. Even though paging is disabled we - * could still get false address translations due to the TLB if we - * didn't invalidate it. - */ - xorl %eax, %eax - movl %eax, %cr3 /* Invalidate TLB. */ - - /* Switch to protected mode. */ - - /* NOTE: With GNU assembler version 2.15.94.0.2.2 (i386-redhat-linux) - * using BFD version 2.15.94.0.2.2 20041220 this works fine without all - * the ld hackery and other things. So leave it as is with this comment. - */ - - data32 lgdt %cs:gdtptr - - movl %cr0, %eax - andl $0x7FFAFFD1, %eax /* PG, AM, WP, NE, TS, EM, MP = 0 */ - orl $0x60000001, %eax /* CD, NW, PE = 1 */ - movl %eax, %cr0 - - /* Restore BIST result. */ - movl %ebp, %eax - - // port80_post(0x23) - - /* Now we are in protected mode. Jump to a 32 bit code segment. */ - data32 ljmp $ROM_CODE_SEG, $protected_stage0 - - /* I am leaving this weird jump in here in the event that future gas - * bugs force it to be used. - */ - /* .byte 0x66 */ - .code32 - /* ljmp $ROM_CODE_SEG, $protected_stage0 */ - - /* .code16 */ - .align 4 - .globl gdt16 -gdt16 = . - _stage0 -gdt16x: - .word gdt16xend - gdt16x -1 /* Compute the table limit. */ - .long gdt16x - .word 0 - - /* selgdt 0x08, flat code segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x9b, 0xcf, 0x00 - - /* selgdt 0x10, flat data segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x93, 0xcf, 0x00 -gdt16xend: - - /* From now on we are 32 bit. */ - .code32 - - /* We have two gdts where we could have one. That is ok. - * - * Let's not worry about this -- optimizing gdt is pointless since - * we're only in it for a little bit. - * - * Btw. note the trick below: The GDT points to ITSELF, and the first - * good descriptor is at offset 8. So you word-align the table, and - * then because you chose 8, you get a nice 64-bit aligned GDT entry, - * which is good as this is the size of the entry. - * Just in case you ever wonder why people do this. - */ - .align 4 - .globl gdtptr - .globl gdt_limit -gdt_limit = gdt_end - gdt - 1 /* Compute the table limit. */ - -gdt: -gdtptr: - .word gdt_end - gdt -1 /* Compute the table limit. */ - .long gdt /* We know the offset. */ - .word 0 - - /* selgdt 0x08, flat code segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x9b, 0xcf, 0x00 - - /* selgdt 0x10, flat data segment */ - .word 0xffff, 0x0000 - .byte 0x00, 0x93, 0xcf, 0x00 - - /* selgdt 0x18, flat code segment for CAR */ - .word 0xffff, 0x0000 - .byte 0x00, 0x9b, 0xcf, 0x00 - - /* selgdt 0x20, flat data segment for CAR */ - .word 0xffff, 0x0000 - .byte 0x00, 0x93, 0xcf, 0x00 -gdt_end: - /* When we come here we are in protected mode. We expand the stack * and copy the data segment from ROM to the memory. * @@ -468,42 +363,4 @@ .long 0xC001001A, 0xC001001D .long 0x000 /* NULL, end of table */
-/* Reset vector. */ - -/* - * RVECTOR: Size of reset vector, default is 0x10. - * RESRVED: Size of vpd code, default is 0xf0. - * BOOTBLK: Size of bootblock code, default is 0x1f00 (8k-256b). - */ - -SEGMENT_SIZE = 0x10000 -RVECTOR = 0x00010 - -/* Due to YET ANOTHER BUG in GNU bintools, you can NOT have a code16 here. - * I think we should leave it this way forever, as the bugs come and - * go -- and come again. - * - * .code16 - * .section ".rom.text" - */ -.section ".reset", "ax" - .globl _resetjump -_resetjump: - /* GNU bintools bugs again. This jumps to stage0 - 2. Sigh. */ - /* jmp _stage0 */ - .byte 0xe9 - .int _stage0 - ( . + 2 ) - - /* Note: The above jump is hand coded to work around bugs in binutils. - * 5 bytes are used for a 3 byte instruction. This works because x86 - * is little endian and allows us to use supported 32 bit relocations - * instead of the weird 16 bit relocations that binutils does not - * handle consistenly between versions because they are used so rarely. - */ -.byte 0 - -/* Date? ID string? We might want to put something else in here. */ -.ascii DATE - -/* Checksum. */ -/* .word 0 */ +#include "../stage0_common.S"