This patch series reduces the overall stack usage of the 16bit code.
The biggest change is the enhancment of the "extra stack" in the e-segment to be re-entrant. The "extra stack" was previously used only by the resume code and disk code. It has been made more flexible so that it can be used by other callers. After this series, SeaBIOS will never expose the extra stack to any external code - on any irq enable or external call point SeaBIOS will jump back to the original stack first. Because the extra stack is only used by SeaBIOS code now, the stack space in use can be tracked and re-entrant callers can be handled safely.
All of the hardware irq handlers have been updated to jump into the extra stack upon being called. This makes the stack space used by SeaBIOS based hardware irq handlers virtually zero.
-Kevin
Kevin O'Connor (13): Rename wait_irq to yield_toirq. Don't restrict VISIBLEx C functions to only x mode. Rename call16 to farcall16. Move farcall16 code from util.c to stacks.c and reorg stacks.c. Replace 32bit->16bit farcall system with regular calls. Make the extra stack re-entrant and "hop back" to check for irqs. Automatically hop off the extra stack when far calling 16bit code. Run all hardware irq handlers on the extra stack. Use the extra stack for 16bit USB and PS2 keyboard/mouse commands. Remove "noinline" declarations from keyboard/mouse driver code. Set noinline on kbd.c interface functions that take stack variable pointers. Make sure to set dependency to segment registers in inline asm. Rework disk.c:fillLCHS to avoid using pointer parameters.
src/apm.c | 15 +-- src/asm-offsets.c | 1 + src/boot.c | 6 +- src/clock.c | 22 +++-- src/disk.c | 40 ++++---- src/disk.h | 1 + src/floppy.c | 8 +- src/kbd.c | 30 +++--- src/misc.c | 14 ++- src/mouse.c | 63 ++++++----- src/optionroms.c | 2 +- src/output.c | 9 -- src/post.c | 3 + src/ps2port.c | 8 +- src/resume.c | 2 +- src/romlayout.S | 153 ++++++++++++++++++--------- src/stacks.c | 285 ++++++++++++++++++++++++++++++++++----------------- src/system.c | 6 +- src/types.h | 16 ++-- src/usb-hid.c | 2 +- src/usb.c | 2 +- src/util.c | 62 ++---------- src/util.h | 47 +++------ tools/checkstack.py | 2 +- 24 files changed, 439 insertions(+), 360 deletions(-)
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/apm.c | 4 ++-- src/boot.c | 2 +- src/clock.c | 2 +- src/floppy.c | 4 ++-- src/kbd.c | 2 +- src/stacks.c | 2 +- src/util.c | 2 +- src/util.h | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/src/apm.c b/src/apm.c index c497dbe..5e45b77 100644 --- a/src/apm.c +++ b/src/apm.c @@ -9,7 +9,7 @@ #include "farptr.h" // GET_VAR #include "bregs.h" // struct bregs #include "ioport.h" // outb -#include "util.h" // wait_irq +#include "util.h" // dprintf #include "config.h" // CONFIG_* #include "biosvar.h" // GET_GLOBAL
@@ -94,7 +94,7 @@ handle_155304(struct bregs *regs) static void handle_155305(struct bregs *regs) { - wait_irq(); + yield_toirq(); set_success(regs); }
diff --git a/src/boot.c b/src/boot.c index 7676eb7..91663a2 100644 --- a/src/boot.c +++ b/src/boot.c @@ -627,7 +627,7 @@ boot_fail(void) for (;;) { if (retrytime != (u32)-1 && check_timer(end)) break; - wait_irq(); + yield_toirq(); } printf("Rebooting.\n"); struct bregs br; diff --git a/src/clock.c b/src/clock.c index c66c3f0..97d5301 100644 --- a/src/clock.c +++ b/src/clock.c @@ -612,7 +612,7 @@ handle_1586(struct bregs *regs) return; } while (!statusflag) - wait_irq(); + yield_toirq(); set_success(regs); }
diff --git a/src/floppy.c b/src/floppy.c index 383744a..72bc79b 100644 --- a/src/floppy.c +++ b/src/floppy.c @@ -9,7 +9,7 @@ #include "disk.h" // DISK_RET_SUCCESS #include "config.h" // CONFIG_FLOPPY #include "biosvar.h" // SET_BDA -#include "util.h" // wait_irq +#include "util.h" // dprintf #include "cmos.h" // inb_cmos #include "pic.h" // eoi_pic1 #include "bregs.h" // struct bregs @@ -192,7 +192,7 @@ wait_floppy_irq(void) v = GET_BDA(floppy_recalibration_status); if (v & FRS_TIMEOUT) break; - // Could use wait_irq() here, but that causes issues on + // Could use yield_toirq() here, but that causes issues on // bochs, so use yield() instead. yield(); } diff --git a/src/kbd.c b/src/kbd.c index 1977c5d..fdb61d4 100644 --- a/src/kbd.c +++ b/src/kbd.c @@ -87,7 +87,7 @@ dequeue_key(struct bregs *regs, int incr, int extended) regs->flags |= F_ZF; return; } - wait_irq(); + yield_toirq(); }
u8 ascii_code = GET_FARVAR(SEG_BDA, *(u8*)(buffer_head+0)); diff --git a/src/stacks.c b/src/stacks.c index 0371330..7b29b8e 100644 --- a/src/stacks.c +++ b/src/stacks.c @@ -128,7 +128,7 @@ ASM16(
// Wait for next irq to occur. void -wait_irq(void) +yield_toirq(void) { if (MODESEGMENT) { asm volatile("sti ; hlt ; cli ; cld": : :"memory"); diff --git a/src/util.c b/src/util.c index 53ef84d..c0c40e3 100644 --- a/src/util.c +++ b/src/util.c @@ -335,6 +335,6 @@ get_keystroke(int msec) return get_raw_keystroke(); if (check_timer(end)) return -1; - wait_irq(); + yield_toirq(); } } diff --git a/src/util.h b/src/util.h index c323449..dece637 100644 --- a/src/util.h +++ b/src/util.h @@ -236,7 +236,7 @@ extern struct thread_info MainThread; extern int CanPreempt; struct thread_info *getCurThread(void); void yield(void); -void wait_irq(void); +void yield_toirq(void); void run_thread(void (*func)(void*), void *data); void wait_threads(void); struct mutex_s { u32 isLocked; };
Since SeaBIOS has been updated to use "_cfuncx_" prefixes when calling cross-mode C functions, there is no reason to restrict an exported C function to only the given mode.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/apm.c | 11 ++--------- src/romlayout.S | 4 ++-- src/types.h | 16 ++++++++-------- 3 files changed, 12 insertions(+), 19 deletions(-)
diff --git a/src/apm.c b/src/apm.c index 5e45b77..54696e9 100644 --- a/src/apm.c +++ b/src/apm.c @@ -223,15 +223,8 @@ handle_1553(struct bregs *regs) } }
-void VISIBLE16 -handle_apm16(struct bregs *regs) -{ - debug_enter(regs, DEBUG_HDL_apm); - handle_1553(regs); -} - -void VISIBLE32SEG -handle_apm32(struct bregs *regs) +void VISIBLE16 VISIBLE32SEG +handle_apm(struct bregs *regs) { debug_enter(regs, DEBUG_HDL_apm); handle_1553(regs); diff --git a/src/romlayout.S b/src/romlayout.S index 399f596..c95e8bc 100644 --- a/src/romlayout.S +++ b/src/romlayout.S @@ -308,7 +308,7 @@ entry_pnp_real: entry_apm16: pushfw // save flags pushl %eax // dummy - ENTRY_ARG handle_apm16 + ENTRY_ARG handle_apm addw $4, %sp // pop dummy popfw // restore flags lretw @@ -321,7 +321,7 @@ entry_apm32: pushl %cs // Move second descriptor after %cs to %gs addl $16, (%esp) popl %gs - ENTRY_ARG_ESP _cfunc32seg_handle_apm32 + ENTRY_ARG_ESP _cfunc32seg_handle_apm popl %gs popfl lretl diff --git a/src/types.h b/src/types.h index 0f83697..b10f3b3 100644 --- a/src/types.h +++ b/src/types.h @@ -44,11 +44,11 @@ extern void __force_link_error__only_in_16bit(void) __noreturn; // Notes a function as externally visible in the 16bit code chunk. # define VISIBLE16 __VISIBLE // Notes a function as externally visible in the 32bit flat code chunk. -# define VISIBLE32FLAT __section(".discard.func32flat." UNIQSEC) noinline +# define VISIBLE32FLAT // Notes a 32bit flat function that will only be called during init. -# define VISIBLE32INIT VISIBLE32FLAT +# define VISIBLE32INIT // Notes a function as externally visible in the 32bit segmented code chunk. -# define VISIBLE32SEG __section(".discard.func32seg." UNIQSEC) noinline +# define VISIBLE32SEG // Designate a variable as (only) visible to 16bit code. # define VAR16 __section(".data16." UNIQSEC) // Designate a variable as visible to 16bit, 32bit, and assembler code. @@ -72,9 +72,9 @@ extern void __force_link_error__only_in_16bit(void) __noreturn; # define ASSERT32SEG() __force_link_error__only_in_32bit_segmented() # define ASSERT32FLAT() __force_link_error__only_in_32bit_flat() #elif MODESEGMENT == 1 -# define VISIBLE16 __section(".discard.func16." UNIQSEC) noinline -# define VISIBLE32FLAT __section(".discard.func32flat." UNIQSEC) noinline -# define VISIBLE32INIT VISIBLE32FLAT +# define VISIBLE16 +# define VISIBLE32FLAT +# define VISIBLE32INIT # define VISIBLE32SEG __VISIBLE # define VAR16 __section(".discard.var16." UNIQSEC) # define VAR16VISIBLE VAR16 __VISIBLE __weak @@ -89,10 +89,10 @@ extern void __force_link_error__only_in_16bit(void) __noreturn; # define ASSERT32SEG() do { } while (0) # define ASSERT32FLAT() __force_link_error__only_in_32bit_flat() #else -# define VISIBLE16 __section(".discard.func16." UNIQSEC) noinline +# define VISIBLE16 # define VISIBLE32FLAT __section(".text.runtime." UNIQSEC) __VISIBLE # define VISIBLE32INIT __section(".text.init." UNIQSEC) __VISIBLE -# define VISIBLE32SEG __section(".discard.func32seg." UNIQSEC) noinline +# define VISIBLE32SEG # define VAR16 __section(".discard.var16." UNIQSEC) # define VAR16VISIBLE VAR16 __VISIBLE __weak # define VAR16EXPORT VAR16VISIBLE
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/boot.c | 4 ++-- src/optionroms.c | 2 +- src/resume.c | 2 +- src/romlayout.S | 18 +++++++++--------- src/stacks.c | 4 ++-- src/util.c | 12 ++++++------ src/util.h | 4 ++-- 7 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/src/boot.c b/src/boot.c index 91663a2..3f6375b 100644 --- a/src/boot.c +++ b/src/boot.c @@ -530,7 +530,7 @@ call_boot_entry(struct segoff_s bootsegip, u8 bootdrv) // Set the magic number in ax and the boot drive in dl. br.dl = bootdrv; br.ax = 0xaa55; - call16(&br); + farcall16(&br); }
// Boot from a disk (either floppy or harddrive) @@ -633,7 +633,7 @@ boot_fail(void) struct bregs br; memset(&br, 0, sizeof(br)); br.code = SEGOFF(SEG_BIOS, (u32)reset_vector); - call16big(&br); + farcall16big(&br); }
// Determine next boot method and attempt a boot using it. diff --git a/src/optionroms.c b/src/optionroms.c index d5783b9..bd2f977 100644 --- a/src/optionroms.c +++ b/src/optionroms.c @@ -38,7 +38,7 @@ __callrom(struct rom_header *rom, u16 offset, u16 bdf) br.di = get_pnp_offset(); br.code = SEGOFF(seg, offset); start_preempt(); - call16big(&br); + farcall16big(&br); finish_preempt();
debug_serial_setup(); diff --git a/src/resume.c b/src/resume.c index 911ad96..f1a96ac 100644 --- a/src/resume.c +++ b/src/resume.c @@ -120,7 +120,7 @@ s3_resume(void) memset(&br, 0, sizeof(br)); dprintf(1, "Jump to resume vector (%x)\n", s3_resume_vector); br.code = FLATPTR_TO_SEGOFF((void*)s3_resume_vector); - call16big(&br); + farcall16big(&br); }
// Attempt to invoke a hard-reboot. diff --git a/src/romlayout.S b/src/romlayout.S index c95e8bc..666f763 100644 --- a/src/romlayout.S +++ b/src/romlayout.S @@ -127,11 +127,11 @@ transition16big: movl %ecx, %eax jmpl *%edx
-// Call a 16bit function from 16bit mode with a specified cpu register state +// Far call a 16bit function from 16bit mode with a specified cpu register state // %eax = address of struct bregs // Clobbers: %e[bcd]x, %e[ds]i, flags - DECLFUNC __call16 -__call16: + DECLFUNC __farcall16 +__farcall16: // Save %eax, %ebp pushl %ebp pushl %eax @@ -183,22 +183,22 @@ __call16:
retl
-// Call a 16bit function from 32bit mode. +// Far call a 16bit function from 32bit mode. // %eax = address of struct bregs // Clobbers: %e[bcd]x, %e[ds]i, flags, segment registers, idt/gdt - DECLFUNC __call16_from32 - .global __call16big_from32 + DECLFUNC __farcall16_from32 + .global __farcall16big_from32 .code32 -__call16_from32: +__farcall16_from32: movl $1f, %edx jmp transition16 -__call16big_from32: +__farcall16big_from32: movl $1f, %edx jmp transition16big
// Make call. .code16gcc -1: calll __call16 +1: calll __farcall16 // Return via transition32 movl $(2f + BUILD_BIOS_ADDR), %edx jmp transition32 diff --git a/src/stacks.c b/src/stacks.c index 7b29b8e..48512b0 100644 --- a/src/stacks.c +++ b/src/stacks.c @@ -114,7 +114,7 @@ check_irqs(void) br.flags = F_IF; br.code.seg = SEG_BIOS; br.code.offset = (u32)&trampoline_checkirqs; - call16big(&br); + farcall16big(&br); }
// 16bit trampoline for waiting for an irq from 32bit mode. @@ -144,7 +144,7 @@ yield_toirq(void) br.flags = 0; br.code.seg = SEG_BIOS; br.code.offset = (u32)&trampoline_waitirq; - call16big(&br); + farcall16big(&br); }
diff --git a/src/util.c b/src/util.c index c0c40e3..c7a22fc 100644 --- a/src/util.c +++ b/src/util.c @@ -32,17 +32,17 @@ cpuid(u32 index, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) // Call a function with a specified register state. Note that on // return, the interrupt enable/disable flag may be altered. inline void -call16(struct bregs *callregs) +farcall16(struct bregs *callregs) { if (!MODESEGMENT && getesp() > BUILD_STACK_ADDR) panic("call16 with invalid stack\n"); asm volatile( #if MODE16 == 1 - "calll __call16\n" + "calll __farcall16\n" "cli\n" "cld" #else - "calll __call16_from32" + "calll __farcall16_from32" #endif : "+a" (callregs), "+m" (*callregs) : @@ -50,13 +50,13 @@ call16(struct bregs *callregs) }
inline void -call16big(struct bregs *callregs) +farcall16big(struct bregs *callregs) { ASSERT32FLAT(); if (getesp() > BUILD_STACK_ADDR) panic("call16 with invalid stack\n"); asm volatile( - "calll __call16big_from32" + "calll __farcall16big_from32" : "+a" (callregs), "+m" (*callregs) : : "ebx", "ecx", "edx", "esi", "edi", "cc", "memory"); @@ -70,7 +70,7 @@ __call16_int(struct bregs *callregs, u16 offset) else callregs->code.seg = SEG_BIOS; callregs->code.offset = offset; - call16(callregs); + farcall16(callregs); }
diff --git a/src/util.h b/src/util.h index dece637..5357841 100644 --- a/src/util.h +++ b/src/util.h @@ -198,8 +198,8 @@ struct descloc_s { // util.c void cpuid(u32 index, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); struct bregs; -inline void call16(struct bregs *callregs); -inline void call16big(struct bregs *callregs); +inline void farcall16(struct bregs *callregs); +inline void farcall16big(struct bregs *callregs); inline void __call16_int(struct bregs *callregs, u16 offset); #define call16_int(nr, callregs) do { \ extern void irq_trampoline_ ##nr (); \
No code changes - just code movement.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/stacks.c | 237 ++++++++++++++++++++++++++++++++++------------------------ src/util.c | 49 ------------ src/util.h | 20 +++--- 3 files changed, 150 insertions(+), 156 deletions(-)
diff --git a/src/stacks.c b/src/stacks.c index 48512b0..da5daa9 100644 --- a/src/stacks.c +++ b/src/stacks.c @@ -8,20 +8,44 @@ #include "util.h" // dprintf #include "bregs.h" // CR0_PE
-// Thread info - stored at bottom of each thread stack - don't change -// without also updating the inline assembler below. -struct thread_info { - struct thread_info *next; - void *stackpos; - struct thread_info **pprev; -}; -struct thread_info VAR32FLATVISIBLE MainThread = { - &MainThread, NULL, &MainThread.next -}; + +/**************************************************************** + * Extra 16bit stack + ****************************************************************/ + +// Space for a stack for 16bit code. +u8 ExtraStack[BUILD_EXTRA_STACK_SIZE+1] VARLOW __aligned(8); + +// Switch to the extra stack and call a function. +inline u32 +stack_hop(u32 eax, u32 edx, void *func) +{ + ASSERT16(); + u16 stack_seg = SEG_LOW, bkup_ss; + u32 bkup_esp; + asm volatile( + // Backup current %ss/%esp values. + "movw %%ss, %w3\n" + "movl %%esp, %4\n" + // Copy stack seg to %ds/%ss and set %esp + "movw %w6, %%ds\n" + "movw %w6, %%ss\n" + "movl %5, %%esp\n" + // Call func + "calll *%2\n" + // Restore segments and stack + "movw %w3, %%ds\n" + "movw %w3, %%ss\n" + "movl %4, %%esp" + : "+a" (eax), "+d" (edx), "+c" (func), "=&r" (bkup_ss), "=&r" (bkup_esp) + : "i" (&ExtraStack[BUILD_EXTRA_STACK_SIZE]), "r" (stack_seg) + : "cc", "memory"); + return eax; +}
/**************************************************************** - * Low level helpers + * 16bit / 32bit calling ****************************************************************/
static inline void sgdt(struct descloc_s *desc) { @@ -88,98 +112,48 @@ call32(void *func, u32 eax, u32 errret) return eax; }
-// 16bit trampoline for enabling irqs from 32bit mode. -ASM16( - " .global trampoline_checkirqs\n" - "trampoline_checkirqs:\n" - " rep ; nop\n" - " lretw" - ); - -static void -check_irqs(void) +// Call a function with a specified register state. Note that on +// return, the interrupt enable/disable flag may be altered. +inline void +farcall16(struct bregs *callregs) { - if (MODESEGMENT) { - asm volatile( - "sti\n" - "nop\n" - "rep ; nop\n" - "cli\n" - "cld\n" - : : :"memory"); - return; - } - extern void trampoline_checkirqs(); - struct bregs br; - br.flags = F_IF; - br.code.seg = SEG_BIOS; - br.code.offset = (u32)&trampoline_checkirqs; - farcall16big(&br); + if (!MODESEGMENT && getesp() > BUILD_STACK_ADDR) + panic("call16 with invalid stack\n"); + asm volatile( +#if MODE16 == 1 + "calll __farcall16\n" + "cli\n" + "cld" +#else + "calll __farcall16_from32" +#endif + : "+a" (callregs), "+m" (*callregs) + : + : "ebx", "ecx", "edx", "esi", "edi", "cc", "memory"); }
-// 16bit trampoline for waiting for an irq from 32bit mode. -ASM16( - " .global trampoline_waitirq\n" - "trampoline_waitirq:\n" - " sti\n" - " hlt\n" - " lretw" - ); - -// Wait for next irq to occur. -void -yield_toirq(void) +inline void +farcall16big(struct bregs *callregs) { - if (MODESEGMENT) { - asm volatile("sti ; hlt ; cli ; cld": : :"memory"); - return; - } - if (CONFIG_THREADS && MainThread.next != &MainThread) { - // Threads still active - do a yield instead. - yield(); - return; - } - extern void trampoline_waitirq(); - struct bregs br; - br.flags = 0; - br.code.seg = SEG_BIOS; - br.code.offset = (u32)&trampoline_waitirq; - farcall16big(&br); + ASSERT32FLAT(); + if (getesp() > BUILD_STACK_ADDR) + panic("call16 with invalid stack\n"); + asm volatile( + "calll __farcall16big_from32" + : "+a" (callregs), "+m" (*callregs) + : + : "ebx", "ecx", "edx", "esi", "edi", "cc", "memory"); }
- -/**************************************************************** - * Extra 16bit stack - ****************************************************************/ - -// Space for a stack for 16bit code. -u8 ExtraStack[BUILD_EXTRA_STACK_SIZE+1] VARLOW __aligned(8); - -// Switch to the extra stack and call a function. -inline u32 -stack_hop(u32 eax, u32 edx, void *func) +inline void +__call16_int(struct bregs *callregs, u16 offset) { - ASSERT16(); - u16 stack_seg = SEG_LOW, bkup_ss; - u32 bkup_esp; - asm volatile( - // Backup current %ss/%esp values. - "movw %%ss, %w3\n" - "movl %%esp, %4\n" - // Copy stack seg to %ds/%ss and set %esp - "movw %w6, %%ds\n" - "movw %w6, %%ss\n" - "movl %5, %%esp\n" - // Call func - "calll *%2\n" - // Restore segments and stack - "movw %w3, %%ds\n" - "movw %w3, %%ss\n" - "movl %4, %%esp" - : "+a" (eax), "+d" (edx), "+c" (func), "=&r" (bkup_ss), "=&r" (bkup_esp) - : "i" (&ExtraStack[BUILD_EXTRA_STACK_SIZE]), "r" (stack_seg) - : "cc", "memory"); - return eax; + if (MODESEGMENT) + callregs->code.seg = GET_SEG(CS); + else + callregs->code.seg = SEG_BIOS; + callregs->code.offset = offset; + farcall16(callregs); }
@@ -187,8 +161,17 @@ stack_hop(u32 eax, u32 edx, void *func) * Threads ****************************************************************/
+// Thread info - stored at bottom of each thread stack - don't change +// without also updating the inline assembler below. +struct thread_info { + struct thread_info *next; + void *stackpos; + struct thread_info **pprev; +}; +struct thread_info VAR32FLATVISIBLE MainThread = { + &MainThread, NULL, &MainThread.next +}; #define THREADSTACKSIZE 4096 -int VAR16VISIBLE CanPreempt;
// Return the 'struct thread_info' for the currently running thread. struct thread_info * @@ -221,6 +204,35 @@ switch_next(struct thread_info *cur) : "ebx", "edx", "esi", "edi", "cc", "memory"); }
+// 16bit trampoline for enabling irqs from 32bit mode. +ASM16( + " .global trampoline_checkirqs\n" + "trampoline_checkirqs:\n" + " rep ; nop\n" + " lretw" + ); + +static void +check_irqs(void) +{ + if (MODESEGMENT) { + asm volatile( + "sti\n" + "nop\n" + "rep ; nop\n" + "cli\n" + "cld\n" + : : :"memory"); + return; + } + extern void trampoline_checkirqs(); + struct bregs br; + br.flags = F_IF; + br.code.seg = SEG_BIOS; + br.code.offset = (u32)&trampoline_checkirqs; + farcall16big(&br); +} + // Briefly permit irqs to occur. void yield(void) @@ -239,6 +251,36 @@ yield(void) switch_next(cur); }
+// 16bit trampoline for waiting for an irq from 32bit mode. +ASM16( + " .global trampoline_waitirq\n" + "trampoline_waitirq:\n" + " sti\n" + " hlt\n" + " lretw" + ); + +// Wait for next irq to occur. +void +yield_toirq(void) +{ + if (MODESEGMENT) { + asm volatile("sti ; hlt ; cli ; cld": : :"memory"); + return; + } + if (CONFIG_THREADS && MainThread.next != &MainThread) { + // Threads still active - do a yield instead. + yield(); + return; + } + extern void trampoline_waitirq(); + struct bregs br; + br.flags = 0; + br.code.seg = SEG_BIOS; + br.code.offset = (u32)&trampoline_waitirq; + farcall16big(&br); +} + // Last thing called from a thread (called on "next" stack). static void __end_thread(struct thread_info *old) @@ -332,6 +374,7 @@ mutex_unlock(struct mutex_s *mutex) * Thread preemption ****************************************************************/
+int VAR16VISIBLE CanPreempt; static u32 PreemptCount;
// Turn on RTC irqs and arrange for them to check the 32bit threads. diff --git a/src/util.c b/src/util.c index c7a22fc..0e69913 100644 --- a/src/util.c +++ b/src/util.c @@ -26,55 +26,6 @@ cpuid(u32 index, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
/**************************************************************** - * 16bit calls - ****************************************************************/ - -// Call a function with a specified register state. Note that on -// return, the interrupt enable/disable flag may be altered. -inline void -farcall16(struct bregs *callregs) -{ - if (!MODESEGMENT && getesp() > BUILD_STACK_ADDR) - panic("call16 with invalid stack\n"); - asm volatile( -#if MODE16 == 1 - "calll __farcall16\n" - "cli\n" - "cld" -#else - "calll __farcall16_from32" -#endif - : "+a" (callregs), "+m" (*callregs) - : - : "ebx", "ecx", "edx", "esi", "edi", "cc", "memory"); -} - -inline void -farcall16big(struct bregs *callregs) -{ - ASSERT32FLAT(); - if (getesp() > BUILD_STACK_ADDR) - panic("call16 with invalid stack\n"); - asm volatile( - "calll __farcall16big_from32" - : "+a" (callregs), "+m" (*callregs) - : - : "ebx", "ecx", "edx", "esi", "edi", "cc", "memory"); -} - -inline void -__call16_int(struct bregs *callregs, u16 offset) -{ - if (MODESEGMENT) - callregs->code.seg = GET_SEG(CS); - else - callregs->code.seg = SEG_BIOS; - callregs->code.offset = offset; - farcall16(callregs); -} - - -/**************************************************************** * String ops ****************************************************************/
diff --git a/src/util.h b/src/util.h index 5357841..f4a5ac8 100644 --- a/src/util.h +++ b/src/util.h @@ -197,14 +197,6 @@ struct descloc_s {
// util.c void cpuid(u32 index, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); -struct bregs; -inline void farcall16(struct bregs *callregs); -inline void farcall16big(struct bregs *callregs); -inline void __call16_int(struct bregs *callregs, u16 offset); -#define call16_int(nr, callregs) do { \ - extern void irq_trampoline_ ##nr (); \ - __call16_int((callregs), (u32)&irq_trampoline_ ##nr ); \ - } while (0) u8 checksum_far(u16 buf_seg, void *buf_far, u32 len); u8 checksum(void *buf, u32 len); size_t strlen(const char *s); @@ -229,11 +221,18 @@ void nullTrailingSpace(char *buf); int get_keystroke(int msec);
// stacks.c -u32 call32(void *func, u32 eax, u32 errret); extern u8 ExtraStack[]; inline u32 stack_hop(u32 eax, u32 edx, void *func); +u32 call32(void *func, u32 eax, u32 errret); +struct bregs; +inline void farcall16(struct bregs *callregs); +inline void farcall16big(struct bregs *callregs); +inline void __call16_int(struct bregs *callregs, u16 offset); +#define call16_int(nr, callregs) do { \ + extern void irq_trampoline_ ##nr (); \ + __call16_int((callregs), (u32)&irq_trampoline_ ##nr ); \ + } while (0) extern struct thread_info MainThread; -extern int CanPreempt; struct thread_info *getCurThread(void); void yield(void); void yield_toirq(void); @@ -242,6 +241,7 @@ void wait_threads(void); struct mutex_s { u32 isLocked; }; void mutex_lock(struct mutex_s *mutex); void mutex_unlock(struct mutex_s *mutex); +extern int CanPreempt; void start_preempt(void); void finish_preempt(void); int wait_preempt(void);
Instead of always "far calling" to 16bit mode, use a regular call. When actually needing to "far call", transition to the 16bit C code that does far calling. This reduces the overhead to the check_irqs and wait_irq code.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/romlayout.S | 16 ++++---- src/stacks.c | 123 ++++++++++++++++++++++++++++-------------------------- 2 files changed, 72 insertions(+), 67 deletions(-)
diff --git a/src/romlayout.S b/src/romlayout.S index 666f763..676658f 100644 --- a/src/romlayout.S +++ b/src/romlayout.S @@ -183,22 +183,22 @@ __farcall16:
retl
-// Far call a 16bit function from 32bit mode. -// %eax = address of struct bregs -// Clobbers: %e[bcd]x, %e[ds]i, flags, segment registers, idt/gdt - DECLFUNC __farcall16_from32 - .global __farcall16big_from32 +// Call a 16bit SeaBIOS function from SeaBIOS 32bit C code. +// %ebx = calling function +// Clobbers: %ecx, %edx, flags, segment registers, idt/gdt + DECLFUNC __call16 + .global __call16big .code32 -__farcall16_from32: +__call16: movl $1f, %edx jmp transition16 -__farcall16big_from32: +__call16big: movl $1f, %edx jmp transition16big
// Make call. .code16gcc -1: calll __farcall16 +1: calll *%ebx // Return via transition32 movl $(2f + BUILD_BIOS_ADDR), %edx jmp transition32 diff --git a/src/stacks.c b/src/stacks.c index da5daa9..044d9ea 100644 --- a/src/stacks.c +++ b/src/stacks.c @@ -112,39 +112,68 @@ call32(void *func, u32 eax, u32 errret) return eax; }
-// Call a function with a specified register state. Note that on -// return, the interrupt enable/disable flag may be altered. -inline void -farcall16(struct bregs *callregs) +// Call a 16bit SeaBIOS function from a 32bit SeaBIOS function. +static inline u32 +call16(u32 eax, void *func) { - if (!MODESEGMENT && getesp() > BUILD_STACK_ADDR) + ASSERT32FLAT(); + if (getesp() > BUILD_STACK_ADDR) panic("call16 with invalid stack\n"); asm volatile( -#if MODE16 == 1 + "calll __call16" + : "+a" (eax) + : "b" ((u32)func - BUILD_BIOS_ADDR) + : "ecx", "edx", "cc", "memory"); + return eax; +} + +static inline u32 +call16big(u32 eax, void *func) +{ + ASSERT32FLAT(); + if (getesp() > BUILD_STACK_ADDR) + panic("call16big with invalid stack\n"); + asm volatile( + "calll __call16big" + : "+a" (eax) + : "b" ((u32)func - BUILD_BIOS_ADDR) + : "ecx", "edx", "cc", "memory"); + return eax; +} + +// Far call 16bit code with a specified register state. +void VISIBLE16 +_farcall16(struct bregs *callregs) +{ + ASSERT16(); + asm volatile( "calll __farcall16\n" "cli\n" "cld" -#else - "calll __farcall16_from32" -#endif : "+a" (callregs), "+m" (*callregs) : : "ebx", "ecx", "edx", "esi", "edi", "cc", "memory"); }
inline void +farcall16(struct bregs *callregs) +{ + if (MODE16) { + _farcall16(callregs); + return; + } + extern void _cfunc16__farcall16(void); + call16((u32)callregs, _cfunc16__farcall16); +} + +inline void farcall16big(struct bregs *callregs) { - ASSERT32FLAT(); - if (getesp() > BUILD_STACK_ADDR) - panic("call16 with invalid stack\n"); - asm volatile( - "calll __farcall16big_from32" - : "+a" (callregs), "+m" (*callregs) - : - : "ebx", "ecx", "edx", "esi", "edi", "cc", "memory"); + extern void _cfunc16__farcall16(void); + call16big((u32)callregs, _cfunc16__farcall16); }
+// Invoke a 16bit software interrupt. inline void __call16_int(struct bregs *callregs, u16 offset) { @@ -204,68 +233,48 @@ switch_next(struct thread_info *cur) : "ebx", "edx", "esi", "edi", "cc", "memory"); }
-// 16bit trampoline for enabling irqs from 32bit mode. -ASM16( - " .global trampoline_checkirqs\n" - "trampoline_checkirqs:\n" - " rep ; nop\n" - " lretw" - ); - -static void +// Low-level irq enable. +void VISIBLE16 check_irqs(void) { - if (MODESEGMENT) { - asm volatile( - "sti\n" - "nop\n" - "rep ; nop\n" - "cli\n" - "cld\n" - : : :"memory"); - return; - } - extern void trampoline_checkirqs(); - struct bregs br; - br.flags = F_IF; - br.code.seg = SEG_BIOS; - br.code.offset = (u32)&trampoline_checkirqs; - farcall16big(&br); + asm volatile("sti ; nop ; rep ; nop ; cli ; cld" : : :"memory"); }
// Briefly permit irqs to occur. void yield(void) { - if (MODESEGMENT || !CONFIG_THREADS) { + if (MODESEGMENT) { // Just directly check irqs. check_irqs(); return; } + extern void _cfunc16_check_irqs(void); + if (!CONFIG_THREADS) { + call16big(0, _cfunc16_check_irqs); + return; + } struct thread_info *cur = getCurThread(); if (cur == &MainThread) // Permit irqs to fire - check_irqs(); + call16big(0, _cfunc16_check_irqs);
// Switch to the next thread switch_next(cur); }
-// 16bit trampoline for waiting for an irq from 32bit mode. -ASM16( - " .global trampoline_waitirq\n" - "trampoline_waitirq:\n" - " sti\n" - " hlt\n" - " lretw" - ); +void VISIBLE16 +wait_irq(void) +{ + asm volatile("sti ; hlt ; cli ; cld": : :"memory"); +}
// Wait for next irq to occur. void yield_toirq(void) { if (MODESEGMENT) { - asm volatile("sti ; hlt ; cli ; cld": : :"memory"); + wait_irq(); return; } if (CONFIG_THREADS && MainThread.next != &MainThread) { @@ -273,12 +282,8 @@ yield_toirq(void) yield(); return; } - extern void trampoline_waitirq(); - struct bregs br; - br.flags = 0; - br.code.seg = SEG_BIOS; - br.code.offset = (u32)&trampoline_waitirq; - farcall16big(&br); + extern void _cfunc16_wait_irq(void); + call16big(0, _cfunc16_wait_irq); }
// Last thing called from a thread (called on "next" stack).
When on the extra stack and it's necessary to check for irqs, switch back to the original caller's stack to check for irqs. Make the extra stack re-entrant, so that a new user of the extra stack wont collide with an existing user.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/post.c | 3 +++ src/stacks.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ src/util.h | 2 +- 3 files changed, 56 insertions(+), 7 deletions(-)
diff --git a/src/post.c b/src/post.c index aa29151..d2f40f4 100644 --- a/src/post.c +++ b/src/post.c @@ -95,6 +95,9 @@ init_bda(void)
add_e820((u32)MAKE_FLATPTR(ebda_seg, 0), GET_EBDA(ebda_seg, size) * 1024 , E820_RESERVED); + + // Init extra stack + StackPos = (void*)(&ExtraStack[BUILD_EXTRA_STACK_SIZE] - _datalow_base); }
static void diff --git a/src/stacks.c b/src/stacks.c index 044d9ea..febd8bc 100644 --- a/src/stacks.c +++ b/src/stacks.c @@ -15,14 +15,24 @@
// Space for a stack for 16bit code. u8 ExtraStack[BUILD_EXTRA_STACK_SIZE+1] VARLOW __aligned(8); +u8 *StackPos VARLOW; + +// Test if currently on the extra stack +static inline int +on_extra_stack(void) +{ + return MODE16 && GET_SEG(SS) == SEG_LOW && getesp() > (u32)ExtraStack; +}
// Switch to the extra stack and call a function. inline u32 stack_hop(u32 eax, u32 edx, void *func) { + if (on_extra_stack()) + return ((u32 (*)(u32, u32))func)(eax, edx); ASSERT16(); - u16 stack_seg = SEG_LOW, bkup_ss; - u32 bkup_esp; + u16 stack_seg = SEG_LOW; + u32 bkup_ss, bkup_esp; asm volatile( // Backup current %ss/%esp values. "movw %%ss, %w3\n" @@ -31,14 +41,51 @@ stack_hop(u32 eax, u32 edx, void *func) "movw %w6, %%ds\n" "movw %w6, %%ss\n" "movl %5, %%esp\n" + "pushl %3\n" + "pushl %4\n" // Call func "calll *%2\n" + "popl %4\n" + "popl %3\n" // Restore segments and stack "movw %w3, %%ds\n" "movw %w3, %%ss\n" "movl %4, %%esp" : "+a" (eax), "+d" (edx), "+c" (func), "=&r" (bkup_ss), "=&r" (bkup_esp) - : "i" (&ExtraStack[BUILD_EXTRA_STACK_SIZE]), "r" (stack_seg) + : "m" (StackPos), "r" (stack_seg) + : "cc", "memory"); + return eax; +} + +// Switch back to original caller's stack and call a function. +static u32 +stack_hop_back(u32 eax, u32 edx, void *func) +{ + if (!on_extra_stack()) + return ((u32 (*)(u32, u32))func)(eax, edx); + ASSERT16(); + u16 bkup_ss; + u32 bkup_stack_pos, temp; + asm volatile( + // Backup stack_pos and current %ss/%esp + "movl %6, %4\n" + "movw %%ss, %w3\n" + "movl %%esp, %6\n" + // Restore original callers' %ss/%esp + "movl -4(%4), %5\n" + "movl %5, %%ss\n" + "movl %%ds:-8(%4), %%esp\n" + "movl %5, %%ds\n" + // Call func + "calll *%2\n" + // Restore %ss/%esp and stack_pos + "movw %w3, %%ds\n" + "movw %w3, %%ss\n" + "movl %6, %%esp\n" + "movl %4, %6" + : "+a" (eax), "+d" (edx), "+c" (func), "=&r" (bkup_ss) + , "=&r" (bkup_stack_pos), "=&r" (temp), "+m" (StackPos) + : : "cc", "memory"); return eax; } @@ -245,8 +292,7 @@ void yield(void) { if (MODESEGMENT) { - // Just directly check irqs. - check_irqs(); + stack_hop_back(0, 0, check_irqs); return; } extern void _cfunc16_check_irqs(void); @@ -274,7 +320,7 @@ void yield_toirq(void) { if (MODESEGMENT) { - wait_irq(); + stack_hop_back(0, 0, wait_irq); return; } if (CONFIG_THREADS && MainThread.next != &MainThread) { diff --git a/src/util.h b/src/util.h index f4a5ac8..a4fabd5 100644 --- a/src/util.h +++ b/src/util.h @@ -221,7 +221,7 @@ void nullTrailingSpace(char *buf); int get_keystroke(int msec);
// stacks.c -extern u8 ExtraStack[]; +extern u8 ExtraStack[], *StackPos; inline u32 stack_hop(u32 eax, u32 edx, void *func); u32 call32(void *func, u32 eax, u32 errret); struct bregs;
Update the low level __farcall16 code to support a 'struct bregs' in a segment other than the stack segment.
Automatically hop back from the extra stack on any farcall16() calls.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/romlayout.S | 69 +++++++++++++++++++++++++++++------------------------- src/stacks.c | 7 ++--- 2 files changed, 40 insertions(+), 36 deletions(-)
diff --git a/src/romlayout.S b/src/romlayout.S index 676658f..147cd3b 100644 --- a/src/romlayout.S +++ b/src/romlayout.S @@ -128,56 +128,61 @@ transition16big: jmpl *%edx
// Far call a 16bit function from 16bit mode with a specified cpu register state -// %eax = address of struct bregs +// %es:%eax = address of struct bregs // Clobbers: %e[bcd]x, %e[ds]i, flags DECLFUNC __farcall16 __farcall16: // Save %eax, %ebp pushl %ebp pushl %eax + pushl %es
// Setup for iretw call pushw %cs - pushw $1f // return point - pushw BREGS_flags(%eax) // flags - pushl BREGS_code(%eax) // CS:IP + pushw $1f // return point + pushw %es:BREGS_flags(%eax) // flags + pushl %es:BREGS_code(%eax) // CS:IP
// Load calling registers. - movl BREGS_edi(%eax), %edi - movl BREGS_esi(%eax), %esi - movl BREGS_ebp(%eax), %ebp - movl BREGS_ebx(%eax), %ebx - movl BREGS_edx(%eax), %edx - movl BREGS_ecx(%eax), %ecx - movw BREGS_es(%eax), %es - movw BREGS_ds(%eax), %ds - movl %ss:BREGS_eax(%eax), %eax + movl %es:BREGS_edi(%eax), %edi + movl %es:BREGS_esi(%eax), %esi + movl %es:BREGS_ebp(%eax), %ebp + movl %es:BREGS_ebx(%eax), %ebx + movl %es:BREGS_edx(%eax), %edx + movl %es:BREGS_ecx(%eax), %ecx + movw %es:BREGS_ds(%eax), %ds + pushl %es:BREGS_eax(%eax) + movw %es:BREGS_es(%eax), %es + popl %eax
// Invoke call - iretw // XXX - just do a lcalll + iretw // XXX - just do a lcalll 1: - // Store flags, eax, ecx + // Store flags, es, eax pushfw + cli + cld + pushw %es pushl %eax - movl 0x06(%esp), %eax - movl %ecx, %ss:BREGS_ecx(%eax) - movw %ds, %ss:BREGS_ds(%eax) - movw %ss, %cx - movw %cx, %ds // Restore %ds == %ss - popl %ecx - movl %ecx, BREGS_eax(%eax) - popw %cx - movw %cx, BREGS_flags(%eax) + movw 0x08(%esp), %es + movl 0x0c(%esp), %eax + popl %es:BREGS_eax(%eax) + popw %es:BREGS_es(%eax) + popw %es:BREGS_flags(%eax)
// Store remaining registers - movw %es, BREGS_es(%eax) - movl %edi, BREGS_edi(%eax) - movl %esi, BREGS_esi(%eax) - movl %ebp, BREGS_ebp(%eax) - movl %ebx, BREGS_ebx(%eax) - movl %edx, BREGS_edx(%eax) - - // Remove %eax, restore %ebp + movl %edi, %es:BREGS_edi(%eax) + movl %esi, %es:BREGS_esi(%eax) + movl %ebp, %es:BREGS_ebp(%eax) + movl %ebx, %es:BREGS_ebx(%eax) + movl %edx, %es:BREGS_edx(%eax) + movl %ecx, %es:BREGS_ecx(%eax) + movw %ds, %es:BREGS_ds(%eax) + movw %ss, %cx + movw %cx, %ds // Restore %ds == %ss + + // Remove %es/%eax, restore %ebp + popl %eax popl %eax popl %ebp
diff --git a/src/stacks.c b/src/stacks.c index febd8bc..cfdd68d 100644 --- a/src/stacks.c +++ b/src/stacks.c @@ -195,10 +195,8 @@ _farcall16(struct bregs *callregs) ASSERT16(); asm volatile( "calll __farcall16\n" - "cli\n" - "cld" : "+a" (callregs), "+m" (*callregs) - : + : "m" (__segment_ES) : "ebx", "ecx", "edx", "esi", "edi", "cc", "memory"); }
@@ -206,7 +204,8 @@ inline void farcall16(struct bregs *callregs) { if (MODE16) { - _farcall16(callregs); + SET_SEG(ES, GET_SEG(SS)); + stack_hop_back((u32)callregs, 0, _farcall16); return; } extern void _cfunc16__farcall16(void);
Jump into the extra stack for all hardware irq handlers. This reduces the overall stack requirements of SeaBIOS.
Replace all users of call16_simpint with call16_int. Only the hardware irq handlers used the old call, and they need to use the new call to ensure the extra stack is properly re-entrant.
Also, pass in a 'struct bregs' to the hardware irq handlers now. It was not done previously to save stack space. Now that the extra stack is used, that is no longer an issue.
Note that should an old OS invoke a hardware irq in 16bit protected mode, then this patch could break that OS. However, the chances of this causing a regression seem small as several existing hardware irq handlers already do not work in 16bit protected mode.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/asm-offsets.c | 1 + src/clock.c | 20 ++++++++++------ src/disk.c | 4 +- src/floppy.c | 4 +- src/kbd.c | 14 ++++++----- src/misc.c | 14 ++++++----- src/mouse.c | 57 ++++++++++++++++++++++++++---------------------- src/output.c | 9 ------- src/ps2port.c | 8 +++--- src/romlayout.S | 60 +++++++++++++++++++++++++++++++++++++++++++++----- src/stacks.c | 4 +- src/util.h | 25 +------------------- tools/checkstack.py | 2 +- 13 files changed, 127 insertions(+), 95 deletions(-)
diff --git a/src/asm-offsets.c b/src/asm-offsets.c index b98f3b5..576bf34 100644 --- a/src/asm-offsets.c +++ b/src/asm-offsets.c @@ -20,4 +20,5 @@ void foo(void) OFFSET(BREGS_edi, bregs, edi); OFFSET(BREGS_flags, bregs, flags); OFFSET(BREGS_code, bregs, code); + DEFINE(BREGS_size, sizeof(struct bregs)); } diff --git a/src/clock.c b/src/clock.c index 97d5301..55dde2e 100644 --- a/src/clock.c +++ b/src/clock.c @@ -516,9 +516,9 @@ handle_1a(struct bregs *regs)
// INT 08h System Timer ISR Entry Point void VISIBLE16 -handle_08(void) +handle_08(struct bregs *regs) { - debug_isr(DEBUG_ISR_08); + debug_enter(regs, DEBUG_ISR_08);
floppy_tick();
@@ -536,8 +536,10 @@ handle_08(void) usb_check_event();
// chain to user timer tick INT #0x1c - u32 eax=0, flags; - call16_simpint(0x1c, &eax, &flags); + struct bregs br; + memset(&br, 0, sizeof(br)); + br.flags = F_IF; + call16_int(0x1c, &br);
eoi_pic1(); } @@ -657,9 +659,9 @@ handle_1583(struct bregs *regs)
// int70h: IRQ8 - CMOS RTC void VISIBLE16 -handle_70(void) +handle_70(struct bregs *regs) { - debug_isr(DEBUG_ISR_70); + debug_enter(regs, DEBUG_ISR_70);
// Check which modes are enabled and have occurred. u8 registerB = inb_cmos(CMOS_STATUS_B); @@ -669,8 +671,10 @@ handle_70(void) goto done; if (registerC & RTC_B_AIE) { // Handle Alarm Interrupt. - u32 eax=0, flags; - call16_simpint(0x4a, &eax, &flags); + struct bregs br; + memset(&br, 0, sizeof(br)); + br.flags = F_IF; + call16_int(0x4a, &br); } if (!(registerC & RTC_B_PIE)) goto done; diff --git a/src/disk.c b/src/disk.c index 080d6cd..ed54e97 100644 --- a/src/disk.c +++ b/src/disk.c @@ -883,9 +883,9 @@ handle_13(struct bregs *regs)
// record completion in BIOS task complete flag void VISIBLE16 -handle_76(void) +handle_76(struct bregs *regs) { - debug_isr(DEBUG_ISR_76); + debug_enter(regs, DEBUG_ISR_76); SET_BDA(disk_interrupt_flag, 0xff); eoi_pic2(); } diff --git a/src/floppy.c b/src/floppy.c index 72bc79b..5400bb0 100644 --- a/src/floppy.c +++ b/src/floppy.c @@ -582,9 +582,9 @@ process_floppy_op(struct disk_op_s *op)
// INT 0Eh Diskette Hardware ISR Entry Point void VISIBLE16 -handle_0e(void) +handle_0e(struct bregs *regs) { - debug_isr(DEBUG_ISR_0e); + debug_enter(regs, DEBUG_ISR_0e); if (! CONFIG_FLOPPY) goto done;
diff --git a/src/kbd.c b/src/kbd.c index fdb61d4..586d57e 100644 --- a/src/kbd.c +++ b/src/kbd.c @@ -379,7 +379,7 @@ static struct scaninfo { };
// Handle a scancode read from the ps2 port. Note that "noinline" is -// used to make sure the call to call16_simpint in process_key doesn't +// used to make sure the call to call16_int in process_key doesn't // have the overhead of this function's stack. static void noinline __process_key(u8 scancode) @@ -562,12 +562,14 @@ process_key(u8 key)
if (CONFIG_KBD_CALL_INT15_4F) { // allow for keyboard intercept - u32 eax = (0x4f << 8) | key; - u32 flags; - call16_simpint(0x15, &eax, &flags); - if (!(flags & F_CF)) + struct bregs br; + memset(&br, 0, sizeof(br)); + br.eax = (0x4f << 8) | key; + br.flags = F_IF|F_CF; + call16_int(0x15, &br); + if (!(br.flags & F_CF)) return; - key = eax; + key = br.eax; } __process_key(key); } diff --git a/src/misc.c b/src/misc.c index 9db49e3..d0d6665 100644 --- a/src/misc.c +++ b/src/misc.c @@ -55,9 +55,9 @@ handle_10(struct bregs *regs)
// NMI handler void VISIBLE16 -handle_02(void) +handle_02(struct bregs *regs) { - debug_isr(DEBUG_ISR_02); + debug_enter(regs, DEBUG_ISR_02); }
void @@ -71,17 +71,19 @@ mathcp_setup(void)
// INT 75 - IRQ13 - MATH COPROCESSOR EXCEPTION void VISIBLE16 -handle_75(void) +handle_75(struct bregs *regs) { - debug_isr(DEBUG_ISR_75); + debug_enter(regs, DEBUG_ISR_75);
// clear irq13 outb(0, PORT_MATH_CLEAR); // clear interrupt eoi_pic2(); // legacy nmi call - u32 eax=0, flags; - call16_simpint(0x02, &eax, &flags); + struct bregs br; + memset(&br, 0, sizeof(br)); + br.flags = F_IF; + call16_int(0x02, &br); }
diff --git a/src/mouse.c b/src/mouse.c index 237c8ff..93e4ed2 100644 --- a/src/mouse.c +++ b/src/mouse.c @@ -6,8 +6,7 @@ // This file may be distributed under the terms of the GNU LGPLv3 license.
#include "biosvar.h" // GET_EBDA -#include "util.h" // debug_isr -#include "pic.h" // eoi_pic2 +#include "util.h" // dprintf #include "bregs.h" // struct bregs #include "ps2port.h" // ps2_mouse_command #include "usb-hid.h" // usb_mouse_command @@ -273,34 +272,12 @@ handle_15c2(struct bregs *regs) } }
-void noinline -process_mouse(u8 data) +static void +invoke_mouse_handler(u16 ebda_seg) { - if (!CONFIG_MOUSE) - return; - - u16 ebda_seg = get_ebda_seg(); - u8 mouse_flags_1 = GET_EBDA(ebda_seg, mouse_flag1); - u8 mouse_flags_2 = GET_EBDA(ebda_seg, mouse_flag2); - - if (! (mouse_flags_2 & 0x80)) - // far call handler not installed - return; - - u8 package_count = mouse_flags_2 & 0x07; - u8 index = mouse_flags_1 & 0x07; - SET_EBDA(ebda_seg, mouse_data[index], data); - - if ((index+1) < package_count) { - mouse_flags_1++; - SET_EBDA(ebda_seg, mouse_flag1, mouse_flags_1); - return; - } - u16 status = GET_EBDA(ebda_seg, mouse_data[0]); u16 X = GET_EBDA(ebda_seg, mouse_data[1]); u16 Y = GET_EBDA(ebda_seg, mouse_data[2]); - SET_EBDA(ebda_seg, mouse_flag1, 0);
struct segoff_s func = GET_EBDA(ebda_seg, far_call_pointer); dprintf(16, "mouse farcall s=%04x x=%04x y=%04x func=%04x:%04x\n" @@ -325,3 +302,31 @@ process_mouse(u8 data) : : "edi", "esi", "cc", "memory"); } + +void noinline +process_mouse(u8 data) +{ + if (!CONFIG_MOUSE) + return; + + u16 ebda_seg = get_ebda_seg(); + u8 mouse_flags_1 = GET_EBDA(ebda_seg, mouse_flag1); + u8 mouse_flags_2 = GET_EBDA(ebda_seg, mouse_flag2); + + if (! (mouse_flags_2 & 0x80)) + // far call handler not installed + return; + + u8 package_count = mouse_flags_2 & 0x07; + u8 index = mouse_flags_1 & 0x07; + SET_EBDA(ebda_seg, mouse_data[index], data); + + if ((index+1) < package_count) { + mouse_flags_1++; + SET_EBDA(ebda_seg, mouse_flag1, mouse_flags_1); + return; + } + + SET_EBDA(ebda_seg, mouse_flag1, 0); + stack_hop_back(ebda_seg, 0, invoke_mouse_handler); +} diff --git a/src/output.c b/src/output.c index 37c4942..1fe5d91 100644 --- a/src/output.c +++ b/src/output.c @@ -487,15 +487,6 @@ dump_regs(struct bregs *regs) , regs->code.seg, regs->code.offset, regs->flags); }
-// Report entry to an Interrupt Service Routine (ISR). -void -__debug_isr(const char *fname) -{ - puts_cs(&debuginfo, fname); - putc(&debuginfo, '\n'); - debug_serial_flush(); -} - // Function called on handler startup. void __debug_enter(struct bregs *regs, const char *fname) diff --git a/src/ps2port.c b/src/ps2port.c index 15bce8e..c835e14 100644 --- a/src/ps2port.c +++ b/src/ps2port.c @@ -357,12 +357,12 @@ ps2_mouse_command(int command, u8 *param)
// INT74h : PS/2 mouse hardware interrupt void VISIBLE16 -handle_74(void) +handle_74(struct bregs *regs) { if (! CONFIG_PS2PORT) return;
- debug_isr(DEBUG_ISR_74); + debug_enter(regs, DEBUG_ISR_74);
u8 v = inb(PORT_PS2_STATUS); if ((v & (I8042_STR_OBF|I8042_STR_AUXDATA)) @@ -384,12 +384,12 @@ done:
// INT09h : Keyboard Hardware Service Entry Point void VISIBLE16 -handle_09(void) +handle_09(struct bregs *regs) { if (! CONFIG_PS2PORT) return;
- debug_isr(DEBUG_ISR_09); + debug_enter(regs, DEBUG_ISR_09);
// read key from keyboard controller u8 v = inb(PORT_PS2_STATUS); diff --git a/src/romlayout.S b/src/romlayout.S index 147cd3b..82be760 100644 --- a/src/romlayout.S +++ b/src/romlayout.S @@ -219,12 +219,15 @@ __call16big: lretw .endm
+ IRQ_TRAMPOLINE 02 IRQ_TRAMPOLINE 10 IRQ_TRAMPOLINE 13 IRQ_TRAMPOLINE 15 IRQ_TRAMPOLINE 16 IRQ_TRAMPOLINE 18 IRQ_TRAMPOLINE 19 + IRQ_TRAMPOLINE 1c + IRQ_TRAMPOLINE 4a
/**************************************************************** @@ -386,10 +389,55 @@ entry_elf: * Interrupt entry points ****************************************************************/
- // Main entry point for interrupts without args - DECLFUNC irqentry -irqentry: - ENTRY_ST + // Main entry point for interrupts handled on extra stack + DECLFUNC hwirqentry +irqentry_extrastack: + cli + cld + pushw %ds + pushl %eax + movl $_datalow_seg, %eax + movl %eax, %ds + movl StackPos, %eax + subl $BREGS_size+12, %eax + popl BREGS_eax(%eax) + popw BREGS_ds(%eax) + movl %edi, BREGS_edi(%eax) + movl %esi, BREGS_esi(%eax) + movl %ebp, BREGS_ebp(%eax) + movl %ebx, BREGS_ebx(%eax) + movl %edx, BREGS_edx(%eax) + movl %ecx, BREGS_ecx(%eax) + movw %es, BREGS_es(%eax) + popl %ecx + popl BREGS_code(%eax) + popw BREGS_flags(%eax) + + movw %ss, BREGS_size+8(%eax) + movzwl %sp, %edx + movl %edx, BREGS_size+4(%eax) + movl %esp, BREGS_size+0(%eax) + movw %ds, %dx + movw %dx, %ss + movl %eax, %esp + calll *%ecx + + movl %esp, %eax + movw BREGS_size+8(%eax), %ss + movl BREGS_size+0(%eax), %esp + movl BREGS_edi(%eax), %edi + movl BREGS_esi(%eax), %esi + movl BREGS_ebp(%eax), %ebp + movl BREGS_ebx(%eax), %ebx + movl BREGS_edx(%eax), %edx + movl BREGS_ecx(%eax), %ecx + movw BREGS_es(%eax), %es + pushw BREGS_flags(%eax) + pushl BREGS_code(%eax) + pushw BREGS_ds(%eax) + pushl BREGS_eax(%eax) + popl %eax + popw %ds iretw
// Main entry point for interrupts with args @@ -398,12 +446,12 @@ irqentryarg: ENTRY_ARG_ST iretw
- // Define an entry point for an interrupt (no args passed). + // Define an entry point for hardware interrupts. .macro IRQ_ENTRY num .global entry_\num entry_\num : pushl $ handle_\num - jmp irqentry + jmp irqentry_extrastack .endm
.macro DECL_IRQ_ENTRY num diff --git a/src/stacks.c b/src/stacks.c index cfdd68d..2804e47 100644 --- a/src/stacks.c +++ b/src/stacks.c @@ -25,7 +25,7 @@ on_extra_stack(void) }
// Switch to the extra stack and call a function. -inline u32 +u32 stack_hop(u32 eax, u32 edx, void *func) { if (on_extra_stack()) @@ -58,7 +58,7 @@ stack_hop(u32 eax, u32 edx, void *func) }
// Switch back to original caller's stack and call a function. -static u32 +u32 stack_hop_back(u32 eax, u32 edx, void *func) { if (!on_extra_stack()) diff --git a/src/util.h b/src/util.h index a4fabd5..0d41785 100644 --- a/src/util.h +++ b/src/util.h @@ -159,23 +159,6 @@ static inline u8 readb(const void *addr) { return *(volatile const u8 *)addr; }
-#define call16_simpint(nr, peax, pflags) do { \ - ASSERT16(); \ - asm volatile( \ - "pushl %%ebp\n" \ - "sti\n" \ - "stc\n" \ - "int %2\n" \ - "pushfl\n" \ - "popl %1\n" \ - "cli\n" \ - "cld\n" \ - "popl %%ebp" \ - : "+a"(*peax), "=c"(*pflags) \ - : "i"(nr) \ - : "ebx", "edx", "esi", "edi", "cc", "memory"); \ - } while (0) - // GDT bits #define GDT_CODE (0x9bULL << 40) // Code segment - P,R,A bits also set #define GDT_DATA (0x93ULL << 40) // Data segment - W,A bits also set @@ -222,7 +205,8 @@ int get_keystroke(int msec);
// stacks.c extern u8 ExtraStack[], *StackPos; -inline u32 stack_hop(u32 eax, u32 edx, void *func); +u32 stack_hop(u32 eax, u32 edx, void *func); +u32 stack_hop_back(u32 eax, u32 edx, void *func); u32 call32(void *func, u32 eax, u32 errret); struct bregs; inline void farcall16(struct bregs *callregs); @@ -260,7 +244,6 @@ char * znprintf(size_t size, const char *fmt, ...) void __dprintf(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); void __debug_enter(struct bregs *regs, const char *fname); -void __debug_isr(const char *fname); void __debug_stub(struct bregs *regs, int lineno, const char *fname); void __warn_invalid(struct bregs *regs, int lineno, const char *fname); void __warn_unimplemented(struct bregs *regs, int lineno, const char *fname); @@ -282,10 +265,6 @@ void hexdump(const void *d, int len); if ((lvl) && (lvl) <= CONFIG_DEBUG_LEVEL) \ __debug_enter((regs), __func__); \ } while (0) -#define debug_isr(lvl) do { \ - if ((lvl) && (lvl) <= CONFIG_DEBUG_LEVEL) \ - __debug_isr(__func__); \ - } while (0) #define debug_stub(regs) \ __debug_stub((regs), __LINE__, __func__) #define warn_invalid(regs) \ diff --git a/tools/checkstack.py b/tools/checkstack.py index 717de2d..23b7c8e 100755 --- a/tools/checkstack.py +++ b/tools/checkstack.py @@ -13,7 +13,7 @@ import sys import re
# Functions that change stacks -STACKHOP = ['__send_disk_op'] +STACKHOP = ['stack_hop', 'stack_hop_back'] # List of functions we can assume are never called. #IGNORE = ['panic', '__dprintf'] IGNORE = ['panic']
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/kbd.c | 6 +++--- src/mouse.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/kbd.c b/src/kbd.c index 586d57e..0da13a1 100644 --- a/src/kbd.c +++ b/src/kbd.c @@ -110,12 +110,12 @@ dequeue_key(struct bregs *regs, int incr, int extended) SET_BDA(kbd_buf_head, buffer_head); }
-static inline int +static int kbd_command(int command, u8 *param) { if (usb_kbd_active()) - return usb_kbd_command(command, param); - return ps2_kbd_command(command, param); + return stack_hop(command, (u32)param, usb_kbd_command); + return stack_hop(command, (u32)param, ps2_kbd_command); }
// read keyboard input diff --git a/src/mouse.c b/src/mouse.c index 93e4ed2..ece69db 100644 --- a/src/mouse.c +++ b/src/mouse.c @@ -21,12 +21,12 @@ mouse_setup(void) SETBITS_BDA(equipment_list_flags, 0x04); }
-static inline int +static int mouse_command(int command, u8 *param) { if (usb_mouse_active()) - return usb_mouse_command(command, param); - return ps2_mouse_command(command, param); + return stack_hop(command, (u32)param, usb_mouse_command); + return stack_hop(command, (u32)param, ps2_mouse_command); }
#define RET_SUCCESS 0x00
Now that the extra stack is used for keyboard and mouse driver code, there is no reason to set noinline (which was done to try and conserve stack space).
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/kbd.c | 6 ++---- src/mouse.c | 2 +- src/usb-hid.c | 2 +- src/usb.c | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/src/kbd.c b/src/kbd.c index 0da13a1..0aa8988 100644 --- a/src/kbd.c +++ b/src/kbd.c @@ -378,10 +378,8 @@ static struct scaninfo { { 0x8600, 0x8800, 0x8a00, 0x8c00, none }, /* F12 */ };
-// Handle a scancode read from the ps2 port. Note that "noinline" is -// used to make sure the call to call16_int in process_key doesn't -// have the overhead of this function's stack. -static void noinline +// Handle a ps2 style scancode read from the keyboard. +static void __process_key(u8 scancode) { u8 flags0 = GET_BDA(kbd_flag0); diff --git a/src/mouse.c b/src/mouse.c index ece69db..e4b25e0 100644 --- a/src/mouse.c +++ b/src/mouse.c @@ -303,7 +303,7 @@ invoke_mouse_handler(u16 ebda_seg) : "edi", "esi", "cc", "memory"); }
-void noinline +void process_mouse(u8 data) { if (!CONFIG_MOUSE) diff --git a/src/usb-hid.c b/src/usb-hid.c index a4fe4ae..8c4b803 100644 --- a/src/usb-hid.c +++ b/src/usb-hid.c @@ -224,7 +224,7 @@ struct usbkeyinfo { struct usbkeyinfo LastUSBkey VARLOW;
// Process USB keyboard data. -static void noinline +static void handle_key(struct keyevent *data) { dprintf(9, "Got key %x %x\n", data->modifiers, data->keys[0]); diff --git a/src/usb.c b/src/usb.c index 1391f0e..bde7a58 100644 --- a/src/usb.c +++ b/src/usb.c @@ -70,7 +70,7 @@ usb_send_bulk(struct usb_pipe *pipe_fl, int dir, void *data, int datasize) } }
-int noinline +int usb_poll_intr(struct usb_pipe *pipe_fl, void *data) { switch (GET_LOWFLAT(pipe_fl->type)) {
Some versions of gcc have been found to inline these funcions and then cause the calling functions to use very large stack usage. Since these functions are called from 16bit mode, their stack space usage is very sensitive.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/kbd.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/kbd.c b/src/kbd.c index 0aa8988..e9ea594 100644 --- a/src/kbd.c +++ b/src/kbd.c @@ -165,7 +165,7 @@ handle_1609(struct bregs *regs) }
// GET KEYBOARD ID -static void +static void noinline handle_160a(struct bregs *regs) { u8 param[2]; @@ -231,7 +231,7 @@ handle_16XX(struct bregs *regs) warn_unimplemented(regs); }
-static void +static void noinline set_leds(void) { u8 shift_flags = (GET_BDA(kbd_flag0) >> 4) & 0x07;
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/system.c | 6 +++--- src/util.c | 11 ++++++----- 2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/src/system.c b/src/system.c index 898c3cc..6658e22 100644 --- a/src/system.c +++ b/src/system.c @@ -162,8 +162,8 @@ handle_1587(struct bregs *regs) // Restore %ds (from %ss) " movw %%ss, %%ax\n" " movw %%ax, %%ds\n" - : "+c"(count), "+S"(si) - : : "eax", "di", "cc"); // XXX - also clobbers %es + : "+c"(count), "+S"(si), "+m" (__segment_ES) + : : "eax", "di", "cc");
set_a20(prev_a20_enable);
@@ -222,7 +222,7 @@ handle_1589(struct bregs *regs) " movw %%ax, %%ds\n" " movw %%ax, %%ss\n" : - : "S"(gdt_far) + : "S"(gdt_far), "m" (__segment_ES) : "eax", "cc"); }
diff --git a/src/util.c b/src/util.c index 0e69913..dcc0b71 100644 --- a/src/util.c +++ b/src/util.c @@ -93,7 +93,7 @@ memset_far(u16 d_seg, void *d_far, u8 c, size_t len) asm volatile( "rep stosb %%es:(%%di)" : "+c"(len), "+D"(d_far) - : "a"(c) + : "a"(c), "m" (__segment_ES) : "cc", "memory"); }
@@ -105,7 +105,7 @@ memset16_far(u16 d_seg, void *d_far, u16 c, size_t len) asm volatile( "rep stosw %%es:(%%di)" : "+c"(len), "+D"(d_far) - : "a"(c) + : "a"(c), "m" (__segment_ES) : "cc", "memory"); }
@@ -137,7 +137,7 @@ memcpy_far(u16 d_seg, void *d_far, u16 s_seg, const void *s_far, size_t len) "rep movsb (%%si),%%es:(%%di)\n" "movw %w0, %%ds" : "=&r"(bkup_ds), "+c"(len), "+S"(s_far), "+D"(d_far) - : "r"(s_seg) + : "r"(s_seg), "m" (__segment_ES) : "cc", "memory"); }
@@ -166,7 +166,7 @@ memcpy(void *d1, const void *s1, size_t len) asm volatile( "rep movsb (%%esi),%%es:(%%edi)" : "+c"(len), "+S"(s1), "+D"(d) - : : "cc", "memory"); + : "m" (__segment_ES) : "cc", "memory"); return d1; } // Common case - use 4-byte copy @@ -174,7 +174,7 @@ memcpy(void *d1, const void *s1, size_t len) asm volatile( "rep movsl (%%esi),%%es:(%%edi)" : "+c"(len), "+S"(s1), "+D"(d) - : : "cc", "memory"); + : "m" (__segment_ES) : "cc", "memory"); return d1; }
@@ -183,6 +183,7 @@ memcpy(void *d1, const void *s1, size_t len) void iomemcpy(void *d, const void *s, u32 len) { + ASSERT32FLAT(); yield(); while (len > 3) { u32 copylen = len;
The compiler does a better job when passing parameters by value.
Signed-off-by: Kevin O'Connor kevin@koconnor.net --- src/disk.c | 36 +++++++++++++++++++----------------- src/disk.h | 1 + 2 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/src/disk.c b/src/disk.c index ed54e97..3ca5697 100644 --- a/src/disk.c +++ b/src/disk.c @@ -55,23 +55,25 @@ __disk_stub(struct bregs *regs, int lineno, const char *fname) __disk_stub((regs), __LINE__, __func__)
// Get the cylinders/heads/sectors for the given drive. -static void -fillLCHS(struct drive_s *drive_g, u16 *nlc, u16 *nlh, u16 *nlspt) +static struct chs_s +getLCHS(struct drive_s *drive_g) { + struct chs_s res = { }; if (CONFIG_CDROM_EMU && drive_g == GLOBALFLAT2GLOBAL(GET_GLOBAL(cdemu_drive_gf))) { // Emulated drive - get info from CDEmu. (It's not possible to // populate the geometry directly in the driveid because the // geometry is only known after the bios segment is made // read-only). - *nlc = GET_LOW(CDEmu.lchs.cylinders); - *nlh = GET_LOW(CDEmu.lchs.heads); - *nlspt = GET_LOW(CDEmu.lchs.spt); - return; + res.cylinders = GET_LOW(CDEmu.lchs.cylinders); + res.heads = GET_LOW(CDEmu.lchs.heads); + res.spt = GET_LOW(CDEmu.lchs.spt); + return res; } - *nlc = GET_GLOBAL(drive_g->lchs.cylinders); - *nlh = GET_GLOBAL(drive_g->lchs.heads); - *nlspt = GET_GLOBAL(drive_g->lchs.spt); + res.cylinders = GET_GLOBAL(drive_g->lchs.cylinders); + res.heads = GET_GLOBAL(drive_g->lchs.heads); + res.spt = GET_GLOBAL(drive_g->lchs.spt); + return res; }
// Perform read/write/verify using old-style chs accesses @@ -94,8 +96,8 @@ basic_access(struct bregs *regs, struct drive_s *drive_g, u16 command) } dop.count = count;
- u16 nlc, nlh, nlspt; - fillLCHS(drive_g, &nlc, &nlh, &nlspt); + struct chs_s chs = getLCHS(drive_g); + u16 nlc=chs.cylinders, nlh=chs.heads, nlspt=chs.spt;
// sanity check on cyl heads, sec if (cylinder >= nlc || head >= nlh || sector > nlspt) { @@ -206,8 +208,8 @@ disk_1305(struct bregs *regs, struct drive_s *drive_g) { debug_stub(regs);
- u16 nlc, nlh, nlspt; - fillLCHS(drive_g, &nlc, &nlh, &nlspt); + struct chs_s chs = getLCHS(drive_g); + u16 nlh=chs.heads, nlspt=chs.spt;
u8 num_sectors = regs->al; u8 head = regs->dh; @@ -232,8 +234,8 @@ static void noinline disk_1308(struct bregs *regs, struct drive_s *drive_g) { // Get logical geometry from table - u16 nlc, nlh, nlspt; - fillLCHS(drive_g, &nlc, &nlh, &nlspt); + struct chs_s chs = getLCHS(drive_g); + u16 nlc=chs.cylinders, nlh=chs.heads, nlspt=chs.spt; nlc--; nlh--; u8 count; @@ -340,8 +342,8 @@ disk_1315(struct bregs *regs, struct drive_s *drive_g) // Hard drive
// Get logical geometry from table - u16 nlc, nlh, nlspt; - fillLCHS(drive_g, &nlc, &nlh, &nlspt); + struct chs_s chs = getLCHS(drive_g); + u16 nlc=chs.cylinders, nlh=chs.heads, nlspt=chs.spt;
// Compute sector count seen by int13 u32 lba = (u32)(nlc - 1) * (u32)nlh * (u32)nlspt; diff --git a/src/disk.h b/src/disk.h index 68e866d..6776ee6 100644 --- a/src/disk.h +++ b/src/disk.h @@ -186,6 +186,7 @@ struct chs_s { u16 heads; // # heads u16 cylinders; // # cylinders u16 spt; // # sectors / track + u16 pad; };
// ElTorito Device Emulation data