Author: wmb
Date: 2008-08-27 08:35:16 +0200 (Wed, 27 Aug 2008)
New Revision: 890
Added:
cpu/x86/float387.fth
Log:
Checked in x86 floating point routines.
Added: cpu/x86/float387.fth
===================================================================
--- cpu/x86/float387.fth (rev 0)
+++ cpu/x86/float387.fth 2008-08-27 06:35:16 UTC (rev 890)
@@ -0,0 +1,734 @@
+\ Forth Floating point package for i387 floating point coprocessor.
+\
+\ Implements the Forth Vendors Group proposed Forth Floating Point Standard
+\ for a 386/387 or 486 system.
+\ Uses the coprocessor's internal floating point stack.
+\ All floating point numbers are IEEE double-precision format.
+
+\needs error-output alias error-output noop
+\needs restore-output alias restore-output noop
+
+also assembler definitions
+: fop ( byte1 byte2 -- ) swap asm8, asm8, ;
+previous definitions
+
+hex
+only forth definitions
+vocabulary floating
+only forth also hidden also floating also definitions
+
+
+d# 16 constant f#places
+8 constant /f
+/f constant f#bytes
+
+\ Clear the task-switched bit. We really should save and restore
+\ the FP environment.
+
+use-postfix-assembler
+
+label fp-ts-handler
+\ ax push
+\ cr0 ax mov
+\ 8 # al and
+\ ax pop
+\ 0<> if
+ 0f 06 fop \ CLTS
+ iret
+\ else
+\ ax pop
+\ 7 # push
+\ save-state #) jmp
+\ then
+end-code
+
+\ \ nuser xx
+\ label fp-exc-handler
+\ ax push
+\ ax ax xor
+\ df e0 fop \ FNSTSW AX
+\ ax 'user xx mov
+\ ax pop
+\ db e2 fop \ FNCLEX
+\ iret
+\ end-code
+
+code (finit) ( -- ) db e3 fop c; \ FNINIT
+
+: finit ( -- )
+[ifdef] pm-vector!
+ fp-ts-handler cs@ d# 7 pm-vector!
+\ fp-exc-handler d# 16 set-vector
+[then]
+ (finit)
+;
+finit
+
+
+
+d# 28 buffer: fpenvbuf
+code fstenv ( -- )
+ 'user fpenvbuf ax mov
+ 9b d9 fop 0 [ax] /6 mem,
+ 0 [ax] /5 d9 esc \ fstcw 0 [ax] restore cw
+c;
+: .ftags ( -- )
+ fstenv fpenvbuf 8 + w@
+ ??cr ." 01234567" cr
+ 8 0 do
+ dup 3 and " +0X-" drop + c@ emit
+ 2 >>
+ loop
+ drop cr
+;
+
+code fcw! ( cw -- )
+ sp ax mov
+ wait 0 [ax] /5 d9 esc \ fstcw 0 [ax]
+ ax pop
+c;
+code fcw@ ( -- cw )
+ ax ax xor
+ ax push
+ sp ax mov
+ 0 [ax] /7 d9 esc \ fldcw 0 [ax]
+c;
+\ \ : +exc cr0@ h# 20 or cr0! fcw@ 1 not and fcw! ;
+
+
+code (fxam) ( -- type )
+ ax ax xor
+ d9 e5 fop \ FXAM
+ 9b asm8, df e0 fop \ FWAIT FSTSW AX
+ ax push
+c;
+: fntype ( -- type sign )
+ (fxam)
+ \ C3 bit - #14 C2 bit - #10 C0 bit - #8
+ dup d# 12 >> 4 and over d# 9 >> 2 and or over d# 8 >> 1 and or
+ \ C1 bit - #9
+ swap h# 20 and 0<>
+;
+string-array fntypes
+ ," Unsupported"
+ ," NAN"
+ ," Normal"
+ ," Infinity"
+ ," Zero"
+ ," Empty"
+ ," Denormal"
+end-string-array
+: .ftype ( -- )
+ fntype if ." +" else ." -" then
+ fntypes ".
+;
+
+code fstatus ( -- n )
+ ax ax xor \ Clear high word
+ df e0 fop \ FNSTSW AX
+ ax push
+c;
+: fdepth ( -- n )
+ fstatus d# 11 >> 7 and dup if 8 swap - then
+;
+
+: binaryfop: ( byte2 -- )
+ [compile] code
+ [ also assembler ] de over fop [compile] c; [ previous ] drop
+;
+
+c1 binaryfop: f+
+c9 binaryfop: f*
+
+\ d0 binaryfop: fcom \ Version that doesn't pop the stack
+d9 binaryfop: fcmp
+
+\ The Intel 486 manual would have you believe that these should be e1 and
+\ f1 (reverse subtract and reverse divide), but e9 and f9 actually work.
+e1 binaryfop: frsub
+e9 binaryfop: f-
+
+f1 binaryfop: frdiv
+f9 binaryfop: f/
+
+: unaryfop: ( byte2 -- )
+ [compile] code
+ [ also assembler ] d9 over fop [compile] c; [ previous ] drop
+
+;
+
+alias fcon: unaryfop: ( byte2 -- )
+
+e0 unaryfop: fnegate
+e1 unaryfop: fabs
+\ e2 unaryfop: fclex
+\ e3 ----- (but finit is db e3)
+\ e4 unaryfop: ftst \ See unaryfcmp
+\ e5 unaryfop: fxam \ See (fxam)
+\ e6 -----
+\ e7 -----
+
+e8 fcon: 1E0
+e9 fcon: log2(10)
+ea fcon: log2(e)
+eb fcon: pi
+ec fcon: log10(2)
+ed fcon: ln(2)
+ee fcon: 0E0
+\ ef -----
+
+f0 unaryfop: f2xm1
+f1 unaryfop: fyl2x
+f2 unaryfop: fptan \ Accurate only if |arg| < 2^63
+f3 unaryfop: fpatan ( rnum rdenom -- ratan )
+
+f4 unaryfop: fxtract ( r1 -- rexponent rsignificand )
+f5 unaryfop: fprem1
+\ f6 unaryfop: fdecstp
+\ f7 unaryfop: fincstp
+f8 unaryfop: fprem
+
+\ f9 unaryfop: fyl2xp1
+fa unaryfop: fsqrt
+fb unaryfop: fsincos
+fc unaryfop: frndint
+fd unaryfop: fscale
+fe unaryfop: fsin
+ff unaryfop: fcos
+
+
+
+
+\ 0c unaryfop: fasin
+\ 1c unaryfop: facos
+
+\ 0d unaryfop: fatanh
+\ 02 unaryfop: fsinh
+\ 19 unaryfop: fcosh
+\ 09 unaryfop: ftanh
+
+\ See page 17-5 for information about comparisons
+also assembler definitions
+: xsetif ( dest set-cond -- )
+ h# 0f asm8,
+ ( set-cond ) asm8, 0 r/m,
+;
+previous definitions
+: binaryfcmp: ( extension-field -- ) ( Later: f1 f2 -- flag )
+ [compile] code
+ [ also assembler ]
+ bx bx xor
+ de d9 fop \ FCOMPP
+ 9b asm8, df e0 fop \ FWAIT FSTSW AX
+ sahf
+ bl over xsetif bx neg bx push
+ [compile] c;
+ [ previous ]
+ drop
+;
+
+94 binaryfcmp: f=
+95 binaryfcmp: f<>
+97 binaryfcmp: f<
+96 binaryfcmp: f>=
+93 binaryfcmp: f<=
+92 binaryfcmp: f>
+
+: unaryfcmp: ( extension-field -- ) ( Later: f1 -- flag )
+ [compile] code
+ [ also assembler ]
+ bx bx xor
+ d9 e4 fop \ FTST
+ 9b asm8, df e0 fop \ FWAIT FSTSW AX
+ sahf
+ bl over xsetif bx neg bx push
+ dd c0 fop d9 f7 fop \ FFREE ST FINCSTP (fdrop)
+ [compile] c;
+ [ previous ]
+ drop
+;
+
+94 unaryfcmp: f0=
+95 unaryfcmp: f0<>
+92 unaryfcmp: f0<
+93 unaryfcmp: f0>=
+96 unaryfcmp: f0<=
+97 unaryfcmp: f0>
+
+code fix ( f -- l )
+ ax push \ Make room on stack
+ sp ax mov
+ 0 [ax] /3 db esc \ fistp 0 [ax]
+ wait
+c;
+code float ( l -- f )
+ sp ax mov
+ 0 [ax] /0 db esc \ fild 0 [ax]
+ wait
+ ax pop \ Discard top of stack
+c;
+code f! ( f addr -- )
+ ax pop
+ 0 [ax] /3 dd esc \ FST 0 [ax].64
+ wait
+c;
+code f@ ( addr -- f )
+ ax pop
+ 0 [ax] /0 dd esc \ FLD 0 [ax].64
+ wait
+c;
+
+code fdrop ( f -- )
+ dd c0 fop d9 f7 fop \ FFREE ST FINCSTP (fdrop)
+c;
+code fswap ( f1 f2 -- f2 f1 )
+ d9 c9 fop \ FXCH ST(1)
+c;
+
+code fover ( f1 f2 -- f1 f2 f1 )
+ d9 c1 fop \ FLD ST(1)
+c;
+code fdup ( f -- f f )
+ d9 c0 fop \ FLD ST(0)
+c;
+
+\ Gack! This will be hard to implement if we let the stack spill into memory!
+code fpick ( f x x x n -- f x x x f )
+ ax pop
+ h# c1 asm8, ax /4 r/m, 8 asm8, \ SHL AX,#8
+ h# c3c0d9 # ax add \ Create FLD ST(n) , RET instruction
+ ax push \ Put it on the stack
+ sp call \ Execute it
+ ax pop
+c;
+
+code frot ( f1 f2 f3 -- f2 f3 f1 )
+ d9 c9 fop \ FXCH ST(1) f1 f2 f3 -> f1 f3 f2
+ d9 ca fop \ FXCH ST(2) f1 f3 f2 -> f2 f3 f1
+c;
+
+code f-rot ( f1 f2 f3 -- f3 f1 f2 )
+ d9 ca fop \ FXCH ST(2) f1 f2 f3 -> f3 f2 f1
+ d9 c9 fop \ FXCH ST(1) f3 f2 f1 -> f3 f1 f2
+c;
+
+code fpop ( f -- l l )
+ ax push
+ ax push
+ sp ax mov
+ 0 [ax] /3 dd esc \ FST 0 [ax].64
+ wait
+c;
+
+code fpush ( l l -- f )
+ sp ax mov
+ 0 [ax] /0 dd esc \ FLD 0 [ax].64
+ wait
+ ax pop
+ ax pop
+c;
+
+: ftan ( r1 -- r2 ) fptan fdrop ;
+: fatan ( r1 -- r2 ) 1E0 fpatan ;
+
+\ We could do this without increasing the stack depth if we used a
+\ memory location for the constant "1E0"
+: 1/f ( r1 -- r2 ) 1E0 frdiv ;
+
+: flog2 ( r1 -- r2 ) 1E0 fswap fyl2x ;
+: flog ( r1 -- r2 ) log2(10) 1/f fswap fyl2x ;
+: fln ( r1 -- r2 ) log2(e) 1/f fswap fyl2x ;
+
+code ftrunc ( r1 -- r2 )
+ ax push \ Make room on stack
+ sp ax mov
+ 0 [ax] /7 d9 esc \ fnstcw 0 [ax]
+ op: 0 [ax] bx mov \ Get cw into bx
+ h# c00 # bx or \ Truncate mode
+ op: bx 2 [ax] mov \ Get cw into ax
+ 2 [ax] /5 d9 esc \ fnldcw 2 [ax] truncate toward 0 mode
+ d9 fc fop \ frndint
+ 0 [ax] /5 d9 esc \ fnldcw 0 [ax] restore previous mode
+ ax pop \ Clean up stack
+c;
+code ffceil ( r1 -- r2 )
+ ax push \ Make room on stack
+ sp ax mov
+ 0 [ax] /7 d9 esc \ fnstcw 0 [ax]
+ op: 0 [ax] bx mov \ Get cw into bx
+ h# 800 # bx or \ Truncate mode
+ op: bx 2 [ax] mov \ Get cw into ax
+ 2 [ax] /5 d9 esc \ fnldcw 2 [ax] truncate toward 0 mode
+ d9 fc fop \ frndint
+ 0 [ax] /5 d9 esc \ fnldcw 0 [ax] restore previous mode
+ ax pop \ Clean up stack
+c;
+code fffloor ( -- )
+ ax push \ Make room on stack
+ sp ax mov
+ 0 [ax] /7 d9 esc \ fnstcw 0 [ax]
+ op: 0 [ax] bx mov \ Get cw into bx
+ h# 400 # bx or \ Truncate mode
+ op: bx 2 [ax] mov \ Get cw into ax
+ 2 [ax] /5 d9 esc \ fnldcw 2 [ax] truncate toward 0 mode
+ d9 fc fop \ frndint
+ 0 [ax] /5 d9 esc \ fnldcw 0 [ax] restore previous mode
+ ax pop \ Clean up stack
+c;
+: int ( r -- l ) ftrunc fix ;
+: fceil ( r -- l ) ffceil fix ;
+: ffloor ( r -- l ) fffloor fix ;
+
+h# 3f800000 constant ione
+
+code falog2 ( r1 -- r2 )
+ \ fdup
+ d9 c0 fop \ FLD ST(0) ( fdup )
+
+ \ ftrunc
+ ax push \ Make room on stack
+ sp ax mov
+ 0 [ax] /7 d9 esc \ fnstcw 0 [ax]
+ op: 0 [ax] bx mov \ Get cw into bx
+ h# c00 # bx or \ Truncate mode
+ op: bx 2 [ax] mov \ Get cw into ax
+ 2 [ax] /5 d9 esc \ fnldcw 2 [ax] truncate toward 0 mode
+ d9 fc fop \ frndint
+ 0 [ax] /5 d9 esc \ fnldcw 0 [ax] restore previous mode
+ ax pop \ Clean up stack
+
+ \ fswap
+ d9 c9 fop \ FXCH ST(1) ( integer-part r1 )
+
+ \ fover f-
+ d8 e1 fop \ FSUB ST, ST(i) ( integer fraction )
+
+ \ f2xm1
+ d9 f0 fop \ F2XM1 ( integer alog-1 )
+
+ \ 1E0 f+
+ d8 asm8, 'body ione #) /0 r/m, \ FADD m32real ione
+
+ \ fscale
+ d9 fd fop \ FSCALE
+
+ \ fswap fdrop
+ dd d9 fop \ FSTP ST(1)
+c;
+
+\ : falog2 ( r1 -- r2 )
+\ fdup ftrunc fswap fover f- ( integer-part fractional-part )
+\ f2xm1 1E0 f+ fscale ( integer-part result )
+\ fswap fdrop
+\ ;
+: falog ( r1 -- r2 ) log2(10) f* falog2 ;
+: faln ( r1 -- r2 ) log2(e) f* falog2 ;
+
+: f** ( r1 r2 -- r3 ) fswap fyl2x falog2 ;
+
+: f, ( f -- ) here /f allot f! ;
+
+: fvariable create /f allot ;
+
+: fconstant ( fp -- )
+ create here /f allot f!
+ ;code [apf] /0 dd esc \ FLD [apf].64
+c;
+
+
+1E0 fdup f+ pi f* 1/f fconstant 1/twopi
+log2(e) falog2 fconstant (e)
+
+code (flit) ( -- fp )
+ 0 [ip] /0 dd esc \ FLD 0 [ip].64
+ wait
+ /f # ip add
+c;
+
+: fliteral ( fp -- ) compile (flit) f, ; immediate
+
+\ Intermediate steps in the development of floating point I/O
+\ These words aren't needed after fliteral? is working
+\
+\ : >f ( l -- fscaled )
+\ float dpl @ 0> if td 10 float dpl @ float f** f/ then
+\ ;
+\
+\ Example: 1.3 E 4 puts the floating point number 13000 on the float stack
+\ Works inside of colon definitions too.
+\ : E \ exponent ( l -- fscaled )
+\ state @ if
+\ \ If we're compiling, we have to grab the number from the code stream
+\ here /l - /token - token@
+\ ['] (llit) = if
+\ here /l - l@ /l /token + negate allot
+\ else
+\ ." E must be preceded by a number containing a decimal point"
+\ cr abort
+\ then
+\ then
+\ >f bl word number float falog f*
+\ state @ if [compile] fliteral then
+\ ; immediate
+
+variable #places
+: places ( n -- ) f#places min #places ! ;
+2 places
+
+d# 10 buffer: fstrbuf \ BCD buffer used for floating to ASCII conversion
+
+\ Read a nibble from the BCD conversion buffer
+: bcd@ ( offset -- )
+ 2 /mod fstrbuf + ( offset adr )
+ c@ ( offset byte )
+ swap if 4 >> then h# f and
+;
+
+\ Write a nibble into the BCD conversion buffer
+: bcd! ( digit offset -- )
+ swap h# 0f and swap
+ 2 /mod fstrbuf + ( digit offset adr )
+ dup >r c@ ( digit offset byte ) ( r: adr )
+
+ \ Merge the new digit into the appropriate nibble
+ swap if ( digit byte )
+ h# 0f and swap 4 <<
+ else
+ h# f0 and
+ then
+ or r> c!
+;
+
+\ Simple non-destructive printing of top of stack for debugging
+\ : f..
+\ fdup d# 100 float f* fint
+\ dup abs <# # # ascii . hold #s nlswap sign #>
+\ type space
+\ ;
+
+code fbstp ( r adr -- ) ax pop 0 [ax] /6 df esc c;
+code fbld ( adr -- r ) ax pop 0 [ax] /4 df esc c;
+
+: fpack ( #places r -- ) float falog f* fstrbuf fbstp ; \ Scale by 10^#places
+
+\ Number of digits to the left of the decimal point
+: #idigits ( r -- n )
+ fdup f0= if 0 else fabs flog ffloor 1+ then
+;
+
+fvariable fnum
+
+code fsave ( -- )
+ d# 108 # rp sub
+ wait dd asm8, 0 [rp] /6 mem,
+c;
+code frestore ( -- )
+ wait dd asm8, 0 [rp] /4 mem, wait
+ d# 108 # rp add
+c;
+\ Append a decimal digit, or a '?' if the result is indefinite
+: fdigit ( index -- )
+ d# 19 bcd@ 1 and if drop ascii ? else bcd@ ascii 0 + then hold
+;
+: ?- ( -- ) d# 19 bcd@ h# 8 and if ascii - hold then ;
+\ Convert floating point number to a string in exponential notation
+: (e.) ( r -- adr len )
+ fpop fsave fpush
+ base @ >r decimal
+ fdup #idigits
+ #places @ over - fpack
+ dup abs <# #s nlswap sign ascii E hold
+ #places @ 1 max 0 do i fdigit loop
+ ascii . hold
+ ?-
+ #>
+ r> base !
+ frestore
+;
+
+: e. ( r -- ) (e.) type space ; \ Display in exponential notation
+
+\ Convert floating point number to a string in floating point notation
+: (f.) ( r -- adr len )
+\ fdup #idigits #places @ negate d# 18 over + within if
+ fpop fsave fpush
+ fdup #idigits d# 18 #places @ - < if
+ #places @ fpack
+ frestore
+ 0 <#
+ #places @ 0 ?do i fdigit loop
+ ascii . hold
+ \ Find leading nonzero digit
+ d# 19 bcd@ 1 and if
+ #places @
+ else
+ #places @ dup d# 17 min d# 17 do
+ i bcd@ 0<> if drop i leave then
+ -1 +loop ( nonzero-digit )
+ then
+ 1+ #places @ do i fdigit loop
+ ?-
+ u#>
+ else
+ fpop frestore fpush
+ (e.)
+ then
+;
+
+: f. ( f -- ) (f.) type space ; \ Display in floating point notation
+
+: fclear ( -- ) fdepth 0 ?do fdrop loop ;
+
+: (f.s) ( -- )
+ 0 fdepth 1- do i fpick f. -1 +loop
+;
+
+: f.s ( -- )
+ fdepth 0= if ." Empty" exit then
+ (f.s)
+;
+
+\ Floating point input conversion
+
+hidden definitions
+
+\ Variables used by the number scanner
+variable exponent
+variable point-seen
+variable #digits
+
+: getexponent ( adr len -- adr 0 )
+ dup if ( adr len )
+ push-decimal
+ 0. 2swap >number ( d adr' len' )
+ pop-base
+ nip or abort" bad exponent" ( n )
+ else ( adr len )
+ nip ( 0 )
+ then ( exponent )
+ exponent @ - exponent !
+ exponent @ d# -4932 d# 4932 between 0= abort" Exponent out of range"
+ 0 0
+;
+
+: nextdigit ( adr -- adr' char ) 1- dup c@ ascii 0 - ;
+
+\ Store digits of input number in the conversion buffer, from right to left
+: putdigits ( adr -- )
+ 1- \ Back up over 'E' ( adr' )
+
+ \ Post-decimal digits
+
+ exponent @ 0 ?do nextdigit i bcd! loop ( adr' )
+
+ point-seen @ if 1- then \ Skip decimal point
+
+ \ Pre-decimal digits
+
+ #digits @ exponent @ ?do nextdigit i bcd! loop ( adr' )
+ drop
+;
+
+\ Convert a string to a floating point number
+: $scanfloat ( $ -- r )
+ fsave
+ fstrbuf d# 10 erase
+ point-seen off 0 exponent ! #digits off
+ begin dup while ( adr len )
+ over 1+ swap 1- ( adr adr' len' )
+ rot c@ case
+ ascii - of d# 19 bcd@ 8 or d# 19 bcd! endof
+ ascii + of endof
+ ascii . of point-seen on endof
+ ascii , of point-seen on endof
+ ascii E of over putdigits getexponent endof
+ ( digit ) 1 #digits +!
+ point-seen @ if 1 exponent +! then
+ endcase
+ repeat ( adr len )
+ 2drop
+
+ \ The digits have been packed into the BCD buffer, and "exponent"
+ \ contains the base 10 exponent. Finish the conversion.
+
+ fstrbuf fbld exponent @ float falog f*
+ fpop frestore fpush
+;
+
+: fdigit? ( char -- flag ) \ True if char is a valid floating point digit
+ dup ascii 0 ascii 9 between ( char flag )
+ over ascii E = or
+ over ascii . = or
+ over ascii , = or
+ over ascii + = or
+ swap ascii - = or
+;
+
+forth definitions
+
+: $fnumber? ( $ -- false | f true )
+ base @ d# 10 <> if 2drop false exit then
+ 2dup bounds ?do ( $ )
+ i c@ fdigit? 0= if 2drop false unloop exit then
+ loop ( $ )
+ $scanfloat true
+;
+
+hidden definitions
+: ?fstack ( -- )
+ fdepth 0< if
+ ." Floating Point Stack Underflow"
+ fclear
+ then
+ fdepth h# 20 >= if
+ ." Floating Point Stack Overflow"
+ fclear
+ then
+ prompt
+;
+
+: $fhandle-literal? ( $ -- handled? )
+ 2>r 2r@ $dnumber? ?dup if ( n 1 | d 2 )
+ (do-literal) ( n | d | )
+ 2r> 2drop true exit
+ then ( r: $ )
+ 2r> $fnumber? if ( f )
+ state @ if ( f )
+ [compile] fliteral ( )
+ then ( f | )
+ true exit
+ then
+ false
+;
+: .flit ( ip -- ip' ) ta1+ dup f@ f. /f + ;
+: skip-flit ( ip -- ip' ) ta1+ /f + ;
+: install-fliteral ( -- )
+ ['] ?fstack ['] prompt ['] do-prompt (patch
+
+ ['] $fhandle-literal? is $handle-literal?
+
+ ['] (flit) ['] .flit ['] skip-flit install-decomp
+ [compile] [
+;
+: remove-fliteral ( -- )
+ ['] prompt ['] ?fstack ['] do-prompt (patch
+ ['] ($handle-literal?) is $handle-literal?
+ [compile] [
+;
+
+forth definitions
+decimal
+install-fliteral
+
+forth definitions
+warning @ warning off
+: (cold-hook ( -- )
+ (cold-hook finit
+ only forth floating also forth also definitions
+;
+warning !
+' (cold-hook is cold-hook
+only forth floating also forth also definitions
+
+[ifdef] use-prefix-assembler use-prefix-assembler [then]
+decimal