I was curious to see if SeaBIOS could run its 32bit code with PAE paging enabled. So, I put together some test code, and so far it seems to work.
The patch below changes "transition32" to always enable PAE (if PAE has been detected as available). This means that all SeaBIOS 32bit "flat mode" code will end up running with paging enabled. Even the SMI handlers run with it on. The code is also available at: https://github.com/KevinOConnor/seabios/tree/testing
The test code currently just creates an "identity mapping" of all memory in the first 4GB. However, it should be possible to setup page table entries to enable access to all ram/device memory - even above 4GB.
I don't plan to commit the patch in its current state. In order for it to be useful, the identity mapping would need to be replaced with something more dynamic. That would likely require updating all the seabios driver code to use "ioremap()" before accessing any device memory.
Some further notes:
* The reason why PAE is interesting (instead of standard i386 paging) is that it allows for 64bit mappings and because one can set it up with just a single level page directory of 2MB pages. The single level page directory makes maintaining it much easier.
* Since SeaBIOS doesn't touch much memory in practice, I think it would be possible to implement an ioremap() with a simple hashing scheme. That way, ioremap() wouldn't need any more storage beyond the 16KB page directory itself.
* The SeaBIOS' malloc code could also be updated to remap pages which would make it possible for it to relocate itself above 4GB and to store data above 4GB. That's likely not all that useful, but I think it would be a little amusing for a 16bit bios to fully support 64bit memory.
* I haven't done any performance tests. It's unclear what the performance impact of enabling paging on every 32bit entry point would be.
-Kevin
commit 6ffd9a0b0852c83850cc531a97d3f576bfc93e21 Author: Kevin O'Connor kevin@koconnor.net Date: Mon Sep 28 13:55:29 2015 -0400
Experimental support for PAE paging
Signed-off-by: Kevin O'Connor kevin@koconnor.net
diff --git a/Makefile b/Makefile index 3a0d2e8..4ee5eb4 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ SRCBOTH=misc.c stacks.c output.c string.c block.c cdrom.c disk.c mouse.c kbd.c \ hw/lsi-scsi.c hw/esp-scsi.c hw/megasas.c SRC16=$(SRCBOTH) SRC32FLAT=$(SRCBOTH) post.c memmap.c malloc.c romfile.c x86.c optionroms.c \ - pmm.c font.c boot.c bootsplash.c jpeg.c bmp.c tcgbios.c sha1.c \ + pmm.c font.c boot.c bootsplash.c jpeg.c bmp.c tcgbios.c sha1.c paging.c \ hw/ahci.c hw/pvscsi.c hw/usb-xhci.c hw/usb-hub.c hw/sdcard.c \ fw/coreboot.c fw/lzmadecode.c fw/multiboot.c fw/csm.c fw/biostables.c \ fw/paravirt.c fw/shadow.c fw/pciinit.c fw/smm.c fw/smp.c fw/mtrr.c fw/xen.c \ diff --git a/src/Kconfig b/src/Kconfig index b873cd3..c28f671 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -122,6 +122,13 @@ endchoice selected, the memory is instead allocated from the "9-segment" (0x90000-0xa0000).
+ config PAGING + bool "PAE paging support" + default y + help + Enable paging in 32bit mode on processors that support PAE + paging. + config ROM_SIZE int "ROM size (in KB)" default 0 diff --git a/src/paging.c b/src/paging.c new file mode 100644 index 0000000..022fad0 --- /dev/null +++ b/src/paging.c @@ -0,0 +1,63 @@ +// CPU page table support +// +// Copyright (C) 2015 Kevin O'Connor kevin@koconnor.net +// +// This file may be distributed under the terms of the GNU LGPLv3 license. + +#include "malloc.h" // memalign_high +#include "memmap.h" // PAGE_SIZE +#include "output.h" // dprintf +#include "x86.h" // cpuid + +u32 PageDirPtr VARFSEG; + +#define PAE_MAP_SIZE (2*1024*1024) + +#define PG_P (1<<0) +#define PG_RW (1<<1) +#define PG_PS (1<<7) + +static void +fill_page_directory(u64 *dir) +{ + // Build dummy identity mapping for first 4GB + u32 i; + for (i=0; i<2048; i++) + dir[i] = i*PAE_MAP_SIZE | PG_P | PG_RW | PG_PS; +} + +// Detect PAE support and enable it if present +void +paging_preinit(void) +{ + if (!CONFIG_PAGING) + return; + // Check if CPU supports PAE + u32 eax, ebx, ecx, edx, cpuid_features = 0; + cpuid(0, &eax, &ebx, &ecx, &edx); + if (!eax) + return; + cpuid(1, &eax, &ebx, &ecx, &cpuid_features); + if (!(eax & (1<<6))) + return; + + dprintf(1, "PAE support found\n"); + u64 *pdp = malloc_high(sizeof(u64)*4); + u64 *dir = memalign_high(PAGE_SIZE, sizeof(u64)*2048); + if (!pdp || !dir) { + warn_noalloc(); + free(pdp); + free(dir); + return; + } + int i; + for (i=0; i<4; i++) + pdp[i] = ((u32)dir + i*4096) | PG_P; + fill_page_directory(dir); + + // Enable PAE + PageDirPtr = (u32)pdp; + cr3_write(PageDirPtr); + cr4_mask(0, CR4_PAE); + cr0_mask(0, CR0_PG); +} diff --git a/src/post.c b/src/post.c index e19b06c..d0f52fe 100644 --- a/src/post.c +++ b/src/post.c @@ -315,6 +315,7 @@ dopost(void) qemu_preinit(); coreboot_preinit(); malloc_preinit(); + paging_preinit();
// Relocate initialization code and call maininit(). reloc_preinit(maininit, NULL); diff --git a/src/romlayout.S b/src/romlayout.S index 823188b..3545612 100644 --- a/src/romlayout.S +++ b/src/romlayout.S @@ -51,12 +51,26 @@ transition32_nmi_off: orl $CR0_PE, %ecx movl %ecx, %cr0
+#if CONFIG_PAGING + movl %cs:PageDirPtr, %ecx + cmpl $0, %ecx + jz 1f + // Enable PAE paging + movl %ecx, %cr3 + movl %cr4, %ecx + orl $CR4_PAE, %ecx + movl %ecx, %cr4 + movl %cr0, %ecx + orl $CR0_PG, %ecx + movl %ecx, %cr0 +#endif + // start 32bit protected mode code - ljmpl $SEG32_MODE32_CS, $(BUILD_BIOS_ADDR + 1f) +1: ljmpl $SEG32_MODE32_CS, $(BUILD_BIOS_ADDR + 2f)
.code32 // init data segments -1: movl $SEG32_MODE32_DS, %ecx +2: movl $SEG32_MODE32_DS, %ecx movw %cx, %ds movw %cx, %es movw %cx, %ss @@ -97,14 +111,25 @@ transition16big: .code16 // Disable protected mode 1: movl %cr0, %ecx - andl $~CR0_PE, %ecx + andl $~(CR0_PE|CR0_PG), %ecx movl %ecx, %cr0
+#if CONFIG_PAGING + cmpl $0, %cs:PageDirPtr + jz 2f + // Disable PAE paging + movl %cr4, %ecx + andl $~CR4_PAE, %ecx + movl %ecx, %cr4 + xorl %ecx, %ecx + movl %ecx, %cr3 +#endif + // far jump to flush CPU queue after transition to real mode - ljmpw $SEG_BIOS, $2f +2: ljmpw $SEG_BIOS, $3f
// restore IDT to normal real-mode defaults -2: lidtw %cs:rmode_IDT_info +3: lidtw %cs:rmode_IDT_info
// Clear segment registers xorw %cx, %cx diff --git a/src/stacks.c b/src/stacks.c index fa9c7db..555a2f5 100644 --- a/src/stacks.c +++ b/src/stacks.c @@ -28,7 +28,7 @@ struct { u8 cmosindex; u8 a20; u16 ss, fs, gs; - u32 cr0; + u32 cr0, cr3, cr4; struct descloc_s gdt; } Call16Data VARLOW;
@@ -48,6 +48,10 @@ call32_prep(u8 method) // Called in 16bit protected mode?! return -1; SET_LOW(Call16Data.cr0, cr0); + if (CONFIG_PAGING && GET_GLOBAL(PageDirPtr)) { + SET_LOW(Call16Data.cr3, cr3_read()); + SET_LOW(Call16Data.cr4, cr4_read()); + }
// Backup fs/gs and gdt SET_LOW(Call16Data.fs, GET_SEG(FS)); @@ -98,6 +102,14 @@ call32_post(void) u32 cr0_caching = GET_LOW(Call16Data.cr0) & (CR0_CD|CR0_NW); if (cr0_caching) cr0_mask(CR0_CD|CR0_NW, cr0_caching); + if (CONFIG_PAGING && GET_GLOBAL(PageDirPtr)) { + u32 cr4_pae = GET_LOW(Call16Data.cr4) & CR4_PAE; + if (cr4_pae) + cr4_mask(CR4_PAE, cr4_pae); + u32 cr3 = GET_LOW(Call16Data.cr3); + if (cr3) + cr3_write(cr3); + } }
// Restore cmos index register diff --git a/src/util.h b/src/util.h index 327abeb..25a03ce 100644 --- a/src/util.h +++ b/src/util.h @@ -203,6 +203,10 @@ void vgarom_setup(void); void s3_resume_vga(void); extern int ScreenAndDebug;
+// paging.c +extern u32 PageDirPtr; +void paging_preinit(void); + // pcibios.c void handle_1ab1(struct bregs *regs); void bios32_init(void); diff --git a/src/x86.h b/src/x86.h index 19d404f..b9b256d 100644 --- a/src/x86.h +++ b/src/x86.h @@ -14,6 +14,8 @@ #define CR0_NW (1<<29) // Not Write-through #define CR0_PE (1<<0) // Protection enable
+#define CR4_PAE (1<<5) + // PORT_A20 bitdefs #define PORT_A20 0x0092 #define A20_ENABLE_BIT 0x02 @@ -92,6 +94,27 @@ static inline u16 cr0_vm86_read(void) { return cr0; }
+static inline u32 cr3_read(void) { + u32 cr3; + asm("movl %%cr3, %0" : "=r"(cr3)); + return cr3; +} +static inline void cr3_write(u32 cr3) { + asm("movl %0, %%cr3" : : "r"(cr3)); +} + +static inline u32 cr4_read(void) { + u32 cr4; + asm("movl %%cr4, %0" : "=r"(cr4)); + return cr4; +} +static inline void cr4_write(u32 cr4) { + asm("movl %0, %%cr4" : : "r"(cr4)); +} +static inline void cr4_mask(u32 off, u32 on) { + cr4_write((cr4_read() & ~off) | on); +} + static inline u64 rdmsr(u32 index) { u64 ret;
Hi Kevin,
- The reason why PAE is interesting (instead of standard i386 paging) is that it allows for 64bit mappings and because one can set it up with just a single level page directory of 2MB pages. The single level page directory makes maintaining it much easier.
There is a performance penalty if you do it for first 2MB of phys memory on some P6/XeonP4 CPUs. Others could trigger undefined behaviour.
The reason for this are the special regions in the 640KB-1MB covered by various MTRRs. See 11.11.9 Large Page Size Considerations in the Intel manual for the details.
Thanks Rudolf
On Tue, Sep 29, 2015 at 10:19:19PM +0200, Rudolf Marek wrote:
Hi Kevin,
- The reason why PAE is interesting (instead of standard i386 paging) is that it allows for 64bit mappings and because one can set it up with just a single level page directory of 2MB pages. The single level page directory makes maintaining it much easier.
There is a performance penalty if you do it for first 2MB of phys memory on some P6/XeonP4 CPUs. Others could trigger undefined behaviour.
The reason for this are the special regions in the 640KB-1MB covered by various MTRRs. See 11.11.9 Large Page Size Considerations in the Intel manual for the details.
Thanks - good catch. It looks like that would require using 4KB pages for identity mapping the first 2MB or ram.
-Kevin