Author: wmb Date: Wed Aug 31 10:27:55 2011 New Revision: 2494 URL: http://tracker.coreboot.org/trac/openfirmware/changeset/2494
Log: XO-1.75 - Implemented an upsampling filter using iwMMX instructions and used it to upsample the audio for the startup jingle, thus eliminating the aliasing that was occurring when trying to play 8khz audio directly. The XO-1.75's codec can play at 8khz, but it doesn't do antialiasing at the frequency, so the sound is bad.
Added: cpu/arm/firfilter.fth cpu/arm/iwmmx.fth Modified: cpu/arm/olpc/1.75/devices.fth cpu/arm/olpc/1.75/sound.fth cpu/x86/adpcm.fth
Added: cpu/arm/firfilter.fth ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ cpu/arm/firfilter.fth Wed Aug 31 10:27:55 2011 (r2494) @@ -0,0 +1,182 @@ +\ See license at end of file +purpose: Polyphase audio interpolation (upsampling) filter + +\needs enable fload ${BP}/cpu/arm/iwmmx.fth + +d# 15 constant mulscale \ Bits for fractional multipliers + +\ Convert a decimal fraction to a scaled integer multiplier +: mul: ( "coef" -- ) + safe-parse-word push-decimal $number drop pop-base ( n ) + 1 mulscale lshift d# 1,000,000,000 */ ( n' ) + w, +; + +\ This is a polyphase interpolation filter that uses 16 taps per phase. +\ The upsampling ratio is equal to the number of phases. +\ The number of phases and the array of filter weights are arguments, +\ so the code could be used for different length filters, given suitable +\ weight arrays. + +\ Taps/phase=16 +\ Stride=2 (mono) + +code 16tap-upsample ( 'out 'in /in 'weights #phases -- ) + ldmia sp!,{r0,r1,r2,r3} \ r0:'weights r1:/in r2:'in r3:'out tos:#phases + + mov r4,#15 \ Multiplier scale factor + tmcr wcgr0,r4 + +\ wr0-4 - unaligned samples +\ wr5,6 - aligned samples +\ wr15 - accum +\ wr8,9 - weights +\ wcgr1 - alignment + + and r7,r2,#7 \ Alignment shift count + bic r2,r2,#7 \ r5 - aligned address + + wldrd wr0,[r2],#8 + wldrd wr1,[r2],#8 + wldrd wr2,[r2],#8 + wldrd wr3,[r2],#8 + wldrd wr4,[r2],#8 + + begin + \ r4: inner loop phase counter + \ r5: aligned source address + tmcr wcgr1,r7 \ wcgr1: alignment shift count + walignr1 wr5,wr0,wr1 \ Shift samples into place + walignr1 wr6,wr1,wr2 \ Shift samples into place + walignr1 wr7,wr2,wr3 \ Shift samples into place + walignr1 wr8,wr3,wr4 \ Shift samples into place + + mov r6,r0 \ Restore weights pointer + mov r4,tos \ Restore phase counter + begin + wldrd wr9,[r6],#8 \ W Get the first four weights + + wldrd wr10,[r6],#8 \ W Get the second four weights + wmacsz wr15,wr5,wr9 \ First multiply-accumulate, pipelined + + wldrd wr9,[r6],#8 \ W Get the third four weights + wmacs wr15,wr6,wr10 \ Second multiply-accumulate, pipelined + + wldrd wr10,[r6],#8 \ W Get the fourth four weights + wmacs wr15,wr7,wr9 \ Third multiply-accumulate, pipelined + + wmacs wr15,wr8,wr10 \ Fourth multiply-accumulate (stalls?) + + wrordg wr15,wr15,wcgr0 \ Scale the output sample by the multiplier fraction point + + wstrh wr15,[r3] \ Store the output sample + inc r3,#2 \ This cannot be combined with the preceding because STC requires a word offset + + decs r4,#1 \ Decrement phase counter + 0= until + + inc r7,#2 \ Increment alignment counter + ands r7,r7,#7 \ Check for next word needed + 0= if + wor wr0,wr1,wr1 \ Shift samples + wor wr1,wr2,wr2 + wor wr2,wr3,wr3 + wor wr3,wr4,wr4 + wldrd wr4,[r2],#8 \ Get next group of input samples + then + + decs r1,#2 \ Decrement input length by the sample size + 0<= until + + pop tos,sp +c; + +\ Filter coefficients for 6x upsampling. The following filter was +\ computed with GNU Octave using the program shown at the end of the array. +\ The transition band is centered on Fs/6. The stopband attenuation is 78dB. +\ The -3dB point is about 95% of Fs/6. +\ The coefficients are ordered by phase for easy addressing in the inner loop. +\ The coefficients are in reverse order so they can be accessed by an +\ incrementing pointer - the conventional convolutional filter algorithm +\ runs the data and filter tap pointers in opposite directions but the code +\ above runs both pointers forward for ease of use with MMX instructions. + +d# 6 constant #phases + +create weights-6phase +\ Phase 0 +mul: -0.000631880 mul: 0.002025107 mul: -0.004881022 mul: 0.010022601 +mul: -0.018817755 mul: 0.034801841 mul: -0.074732552 mul: 0.976279469 +mul: 0.080446668 mul: -0.031397096 mul: 0.015062052 mul: -0.007235622 +mul: 0.003214694 mul: -0.001237886 mul: 0.000375061 mul: 0.000190788 + +\ Phase 1 +mul: -0.001092645 mul: 0.003821226 mul: -0.009784496 mul: 0.021106533 +mul: -0.041296857 mul: 0.078608864 mul: -0.166749609 mul: 0.885823376 +mul: 0.283737941 mul: -0.105580171 mul: 0.052425455 mul: -0.026661852 +mul: 0.012716422 mul: -0.005327045 mul: 0.001786043 mul: -0.000511063 + +\ Phase 2 +mul: -0.001079175 mul: 0.004090325 mul: -0.010977550 mul: 0.024446032 +mul: -0.048873931 mul: 0.093827768 mul: -0.194040773 mul: 0.720544802 +mul: 0.508384704 mul: -0.167272365 mul: 0.082398233 mul: -0.042537000 +mul: 0.020831489 mul: -0.009060710 mul: 0.003214599 mul: -0.000795594 + +\ Phase 3 +mul: -0.000795594 mul: 0.003214599 mul: -0.009060710 mul: 0.020831489 +mul: -0.042537000 mul: 0.082398233 mul: -0.167272365 mul: 0.508384704 +mul: 0.720544802 mul: -0.194040773 mul: 0.093827768 mul: -0.048873931 +mul: 0.024446032 mul: -0.010977550 mul: 0.004090325 mul: -0.001079175 + +\ Phase 4 +mul: -0.000511063 mul: 0.001786043 mul: -0.005327045 mul: 0.012716422 +mul: -0.026661852 mul: 0.052425455 mul: -0.105580171 mul: 0.283737941 +mul: 0.885823376 mul: -0.166749609 mul: 0.078608864 mul: -0.041296857 +mul: 0.021106533 mul: -0.009784496 mul: 0.003821226 mul: -0.001092645 + +\ Phase 5 +mul: 0.000190788 mul: 0.000375061 mul: -0.001237886 mul: 0.003214694 +mul: -0.007235622 mul: 0.015062052 mul: -0.031397096 mul: 0.080446668 +mul: 0.976279469 mul: -0.074732552 mul: 0.034801841 mul: -0.018817755 +mul: 0.010022601 mul: -0.004881022 mul: 0.002025107 mul: -0.000631880 + +0 [if] +\ This Matlab/Octave code computes the weights. +\ The 95 is one less than the filter length 96 = 6 * 16 +weights = remez(95, [0 .12 .215 1], [1 1 0 0]); +for phase=1:6 + for tap=16:-1:1 + printf("mul: %.9f ", 5.9 * weights((tap-1)*6+phase)); + end + printf("\n"); +end +[then] + +: upsample6 ( src-adr /src dst-adr -- ) + enable-iwmmx ( src-adr #src-samples dst-adr ) + -rot weights-6phase #phases 16tap-upsample ( ) +; + +\ LICENSE_BEGIN +\ Copyright (c) 2011 FirmWorks +\ +\ Permission is hereby granted, free of charge, to any person obtaining +\ a copy of this software and associated documentation files (the +\ "Software"), to deal in the Software without restriction, including +\ without limitation the rights to use, copy, modify, merge, publish, +\ distribute, sublicense, and/or sell copies of the Software, and to +\ permit persons to whom the Software is furnished to do so, subject to +\ the following conditions: +\ +\ The above copyright notice and this permission notice shall be +\ included in all copies or substantial portions of the Software. +\ +\ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +\ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +\ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +\ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +\ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +\ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +\ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +\ +\ LICENSE_END
Added: cpu/arm/iwmmx.fth ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ cpu/arm/iwmmx.fth Wed Aug 31 10:27:55 2011 (r2494) @@ -0,0 +1,120 @@ +\ See license at end of file +purpose: Intel Wireless MMX support + +: enable-iwmmx ( -- ) + coprocessor-access@ h# f or coprocessor-access! +; + +d# 16 8 * d# 8 4 * + buffer: iwmmx-buf +code get-iwmmx ( adr -- ) + wstrd wr0,[tos],#8 + wstrd wr1,[tos],#8 + wstrd wr2,[tos],#8 + wstrd wr3,[tos],#8 + wstrd wr4,[tos],#8 + wstrd wr5,[tos],#8 + wstrd wr6,[tos],#8 + wstrd wr7,[tos],#8 + wstrd wr8,[tos],#8 + wstrd wr9,[tos],#8 + wstrd wr10,[tos],#8 + wstrd wr11,[tos],#8 + wstrd wr12,[tos],#8 + wstrd wr13,[tos],#8 + wstrd wr14,[tos],#8 + wstrd wr15,[tos],#8 + wstrw wcgr0,[tos],#4 + wstrw wcgr1,[tos],#4 + wstrw wcgr2,[tos],#4 + wstrw wcgr3,[tos],#4 + wstrw wcgr0,[tos],#4 + wstrw wcid,[tos],#4 + wstrw wcon,[tos],#4 + wstrw wcssf,[tos],#4 + wstrw wcasf,[tos],#4 +c; +: dump-iwmmx ( -- ) + iwmmx-buf get-iwmmx + push-hex + d# 16 0 do + i 4 bounds do + iwmmx-buf i 8 * + d@ d# 17 ud.r + loop + cr + 4 +loop + space + ." wCGRs: " + 4 0 do + iwmmx-buf h# 40 + i la+ l@ d# 9 u.r + loop + cr + ." cid, con, cssf, casf: " + 4 0 do + iwmmx-buf h# 50 + i la+ l@ d# 9 u.r + loop + cr + pop-base +; + +0 [if] \ These code words are not expected to be used much; they are examples +: mcr 0e00.0010 {cond} amode-copr ; +: mrc 0e10.0010 {cond} amode-copr ; + +code wcid@ ( -- n ) psh tos,sp tmrc tos,wcid c; \ Coprocessor ID + +code wcon@ ( -- n ) psh tos,sp tmrc tos,wcon c; \ Control +code wcon! ( n -- ) tmcr wcon,tos pop tos,sp c; + +code wcasf@ ( -- n ) psh tos,sp tmrc tos,wcasf c; \ Arithmetic flags +code wcasf! ( n -- ) tmcr wcasf,tos pop tos,sp c; + +code wcssf@ ( -- n ) psh tos,sp tmrc tos,wcssf c; \ Saturation flags +code wcssf! ( n -- ) tmcr wcssf,tos pop tos,sp c; + +code wcgr0@ ( -- n ) psh tos,sp tmrc tos,wcgr0 c; \ General registers for constants +code wcgr0! ( n -- ) tmcr wcgr0,tos pop tos,sp c; +code wcgr1@ ( -- n ) psh tos,sp tmrc tos,wcgr1 c; +code wcgr1! ( n -- ) tmcr wcgr1,tos pop tos,sp c; +code wcgr2@ ( -- n ) psh tos,sp tmrc tos,wcgr2 c; +code wcgr2! ( n -- ) tmcr wcgr2,tos pop tos,sp c; +code wcgr3@ ( -- n ) psh tos,sp tmrc tos,wcgr3 c; +code wcgr3! ( n -- ) tmcr wcgr3,tos pop tos,sp c; +[then] + + + + +code wtest + wldrd wr1,[sp],#8 + tmrrc r3,r4,wr1 + psh r3,sp + mov r4,tos +c; + +code firstep ( adr1 adr2 -- adr1' adr2' d.acc ) + ldr r0,[sp] + + wldrd wr0,[r0],#8 + wldrd wr1,[tos],#8 + + wldrd wr2,[r0],#8 + wmacsz wr4,wr1,wr0 + wldrd wr3,[tos],#8 + + wldrd wr0,[r0],#8 + wmacs wr4,wr2,wr3 + wldrd wr1,[tos],#8 + + wldrd wr2,[r0],#8 + wmacs wr4,wr1,wr0 + wldrd wr3,[tos],#8 + + wmacs wr4,wr2,wr3 + + str r0,[sp] + psh tos,sp + + tmrrc r3,tos,wr4 + psh r3,sp +c; +
Modified: cpu/arm/olpc/1.75/devices.fth ============================================================================== --- cpu/arm/olpc/1.75/devices.fth Wed Aug 31 10:24:22 2011 (r2493) +++ cpu/arm/olpc/1.75/devices.fth Wed Aug 31 10:27:55 2011 (r2494) @@ -351,6 +351,8 @@
fload ${BP}/dev/olpc/mmp2camera/loadpkg.fth
+fload ${BP}/cpu/arm/firfilter.fth + fload ${BP}/cpu/x86/adpcm.fth \ ADPCM decoding d# 32 is playback-volume
Modified: cpu/arm/olpc/1.75/sound.fth ============================================================================== --- cpu/arm/olpc/1.75/sound.fth Wed Aug 31 10:24:22 2011 (r2493) +++ cpu/arm/olpc/1.75/sound.fth Wed Aug 31 10:27:55 2011 (r2494) @@ -187,7 +187,7 @@
: copy-in ( -- ) in-len /audio-buf min ( this-len ) - my-in-desc 2 la+ l@ in-adr third move ( this-len ) + my-in-desc 2 la+ l@ in-adr third move ( this-len ) in-adr over + to in-adr ( this-len ) in-len over - to in-len ( this-len ) drop ( ) @@ -477,6 +477,10 @@ dup set-ctlr-sample-rate set-codec-sample-rate ; +: set-get-sample-rate ( rate -- actual-rate ) + drop d# 48000 ( actual-rate ) + dup set-sample-rate ( actual-rate ) +;
\ This is called from "record" in "mic-test" in "selftest" : set-record-gain ( db -- )
Modified: cpu/x86/adpcm.fth ============================================================================== --- cpu/x86/adpcm.fth Wed Aug 31 10:24:22 2011 (r2493) +++ cpu/x86/adpcm.fth Wed Aug 31 10:27:55 2011 (r2494) @@ -234,8 +234,22 @@ : wav-blk-size ( -- blk-size ) wav-fmt-adr dup if h# 14 + le-w@ then ;
: set-volume ( -- ) " set-volume" $call-audio ; -: set-sample-rate ( -- ) - wav-fmt-adr ?dup if h# c + le-l@ " set-sample-rate" $call-audio then +0 value src-sample-rate +0 value dst-sample-rate +: try-set-sample-rate ( desired-rate -- ) + dup to src-sample-rate + dup " set-get-sample-rate" ['] $call-audio catch if ( desired x x x ) + 3drop ( desired ) + dup " set-sample-rate" $call-audio ( desired ) + else ( desired actual ) + nip ( actual ) + then ( actual ) + to dst-sample-rate ( ) +; +: wav-set-sample-rate ( -- ) + wav-fmt-adr ?dup if + h# c + le-l@ try-set-sample-rate ( desired-rate ) + then ;
0 value out-move @@ -243,35 +257,37 @@ \ Collapse a sample array with "#output-ch" channels/sample into a smaller \ array with "wav-in-#ch" channels/sample, discarding the excess channels.
-: condense-pcm ( adr in-len -- ) - wav-in-#ch #output-ch - /w* to in-skip ( adr in-len ) - #output-ch /w* to out-move ( adr in-len ) - over swap bounds ?do ( out ) - i over out-move move ( out ) - out-move + ( out' ) - in-skip +loop drop ( ) +: condense-pcm ( adr in-len -- adr out-len ) + 2dup #output-ch wav-in-#ch */ 2swap ( adr out-len adr in-len ) + wav-in-#ch #output-ch - /w* to in-skip ( adr out-len adr in-len ) + #output-ch /w* to out-move ( adr out-len adr in-len ) + over swap bounds ?do ( adr out-len out ) + i over out-move move ( adr out-len out ) + out-move + ( adr out-len out' ) + in-skip +loop drop ( adr out-len ) ;
\ Spread a sample array with "wav-in-#ch" channels/sample into a larger \ array with "#output-ch" channels/sample, zeroing the new channels.
-: expand-pcm ( adr in-len -- ) - #output-ch wav-in-#ch - /w* to out-skip ( adr in-len ) - wav-in-#ch /w* to out-move ( adr in-len ) - 2dup wav-in-#ch / #output-ch * ( adr in-len adr out-len ) - + -rot ( out-adr in-start in-len ) - over + out-move - do ( out-adr ) - out-skip - dup out-skip erase ( out-adr' ) - out-move - i over out-move move ( out-adr' ) - out-move negate +loop ( out-adr ) - drop +: expand-pcm ( adr in-len -- adr out-len ) + 2dup #output-ch wav-in-#ch */ 2swap ( adr out-len adr in-len ) + #output-ch wav-in-#ch - /w* to out-skip ( adr out-len adr in-len ) + wav-in-#ch /w* to out-move ( adr out-len adr in-len ) + 2dup wav-in-#ch / #output-ch * ( adr out-len adr in-len adr out-len ) + + -rot ( adr out-len out-adr in-start in-len ) + over + out-move - do ( adr out-len out-adr ) + out-skip - dup out-skip erase ( adr out-len out-adr' ) + out-move - i over out-move move ( adr out-len out-adr' ) + out-move negate +loop ( adr out-len out-adr ) + drop ( adr out-len ) ;
\ Given a sample array of the form L0, R0, L1, R1, ..., copy the left \ channel into the right, giving L0, L0, L1, L1, etc. This is \ particularly useful when the R samples are initially 0.
-: mono16>stereo16 ( adr len -- ) bounds ?do i w@ i wa1+ w! /l +loop ; +: mono16>stereo16 ( adr len -- adr len ) 2dup bounds ?do i w@ i wa1+ w! /l +loop ;
: play-wait ( -- ) " write-done" $call-audio ;
@@ -287,27 +303,55 @@
0 value pcm-base
-: play-raw-pcm ( adr -- error? ) - wav-in-#ch 0= if drop true exit then ( adr ) - - \ Allocate DMA memory for the decoded output - wav-data-adr 4 - le-l@ ( adr in-len ) - dup wav-in-#ch / #output-ch * to /pcm-output ( adr in-len ) - /pcm-output " dma-alloc" $call-audio to pcm-base ( adr in-len ) - - tuck pcm-base swap move ( in-len ) - - #output-ch wav-in-#ch < if pcm-base over condense-pcm then \ Skip extra channel data - #output-ch wav-in-#ch > if pcm-base over expand-pcm then \ Spread out channel data - #output-ch 2 = wav-in-#ch 1 = and if pcm-base over 2* mono16>stereo16 then \ Stereo from mono - drop +: allocate-playback-buffer ( -- in-len ) + dst-sample-rate src-sample-rate ( num denom ) + 2dup = if ( num denom ) + false ( num denom error? ) + else ( num denom ) +[ifdef] upsample6 + 2dup 6 * <> ( num denom error? ) +[else] + true ( num denom error? ) +[then] + then ( num denom error? ) + abort" Unsupported sample rate conversion" ( num denom ) + + 2>r ( r: num denom ) + wav-data-adr 4 - le-l@ ( in-len r: num denom ) + dup 2r> */ ( in-len out-len ) + #output-ch wav-in-#ch */ ( in-len out-len' ) + to /pcm-output ( in-len ) + /pcm-output " dma-alloc" $call-audio to pcm-base ( in-len ) +; + +: move-or-upsample ( adr -- adr' len ) + allocate-playback-buffer ( adr in-len ) + + src-sample-rate dst-sample-rate = if ( adr in-len ) + tuck pcm-base swap move ( in-len ) + else ( adr in-len ) +[ifdef] upsample6 + dup 6 * -rot ( out-len adr in-len ) + pcm-base upsample6 ( out-len ) +[then] + then ( len ) + pcm-base swap ( adr' len ) +; +: play-raw-pcm ( -- error? ) + wav-in-#ch 0= if true exit then ( ) + + wav-data-adr move-or-upsample ( adr in-len ) + + #output-ch wav-in-#ch < if condense-pcm then ( adr len' ) \ Skip extra channel data + #output-ch wav-in-#ch > if expand-pcm then ( adr len' ) \ Spread out channel data + #output-ch 2 = wav-in-#ch 1 = and if mono16>stereo16 then ( adr len ) \ Stereo from mono
- pcm-base /pcm-output (play-pcm) - false + (play-pcm) ( ) + false ( error? ) ;
-: play-ima-adpcm ( adr -- error? ) - wav-fact-adr 0= if drop true exit then +: play-ima-adpcm ( -- error? ) + wav-fact-adr 0= if true exit then
wav-#sample #output-ch * /w* to /pcm-output
@@ -315,11 +359,12 @@ /pcm-output " dma-alloc" $call-audio to pcm-base
pcm-base /pcm-output erase ( in ) - pcm-base wav-#sample wav-in-#ch wav-blk-size adpcm-decoder ( ) - #output-ch 2 = wav-in-#ch 1 = and if ( ) - pcm-base /pcm-output mono16>stereo16 ( ) - then ( ) - pcm-base /pcm-output (play-pcm) ( ) + wav-data-adr pcm-base wav-#sample wav-in-#ch wav-blk-size adpcm-decoder ( ) + pcm-base /pcm-output ( adr len ) + #output-ch 2 = wav-in-#ch 1 = and if ( adr len ) + mono16>stereo16 ( adr len ) + then ( adr len ) + (play-pcm) ( ) false ( error? ) ;
@@ -340,14 +385,15 @@ then
playback-volume set-volume - set-sample-rate + wav-set-sample-rate
wav-cc case - 1 of wav-data-adr play-raw-pcm endof - h# 11 of wav-data-adr play-ima-adpcm endof + 1 of play-raw-pcm endof + h# 11 of play-ima-adpcm endof ( default ) ." Cannot play .wav format type: " dup .wav-cc true swap cr endcase \ audio-ih close-dev + free-wav ;
: ($play-wav) ( file-str -- )
openfirmware@openfirmware.info