page 132,60,1,1 opt nomd,mex ;******************************************* ;Motorola Austin DSP Operation June 30,1988 ;******************************************* ;DSP56000/1 ;Port to Memory FFT - 64 point ;File name: D-56.asm ;************************************************************************** ; Maximum sample rate: 121.9 us at 20.5 MHZ/ 92.56 us at 27.0 MHz ; Memory Size: Prog: 254 words ; Data: 450 words ; Number of clock cycles: 2499 (1249 instruction cycles) ; Clock Frequency: 20.5MHz/27.0MHz ; Instruction cycle time: 97.5ns / 74.1ns ;************************************************************************** ; fftreald macro points,data,odata,coef,ptr1,ptr2 fftreald ident 1,0 ; ; Radix 2 Decimation in Time In-Place Fast Fourier Transform Routine ; ; Real input data - normally ordered ; Real data in Y memory, 2 buffers: one being filled, the other one being processed ; Complex output data - normally ordered ; Real data in X memory ; Imaginary data in Y memory ; Coefficient lookup table ; -Cosine value in X memory ; -Sine value in Y memory ; ; Macro Call - fftreald points,data,outdata,coef,ptr1,ptr2 ; ; points number of points (2-32768, power of 2) ; data start of data buffer ; outdata output data buffer ; coef start of sine/cosine table ; ptr1 memory location of pointer to input data block 1 ; ptr2 memory location of pointer to input data block 2 ; ; Alters Data ALU Registers ; x1 x0 y1 y0 ; a2 a1 a0 a ; b2 b1 b0 b ; ; Alters Address Registers ; r0 n0 m0 ; r1 n1 m1 ; r2 n2 m2 ; r3 n3 m3 ; r4 n4 m4 ; r5 n5 m5 ; r6 n6 m6 ; r7 n7 m7 ; Alters Program Control Registers ; pc sr ; ; Uses 8 locations on System Stack ; _intdata equ $0 ;internal data space at 0 ; ;Check r7 to see if input buffer is filled ; strt move #points,b ;input buffer length loop move r7,a ;get input data pointer sub a,b ;subtract buffer length from current input location move x:ptr1,a ;move input data base addres into a cmp a,b ;see if equal jne loop ;if not, go back ; ; when ready, swap pointers of buffer to be loaded and buffer to be processed ; move x:ptr1,a move x:ptr2,b move b,x:ptr1 move a,x:ptr2 ; ; main fft routine ; move x:ptr2,r2 ;initialize input pointers move #4,n2 ;initial offset for r2 move r2,r0 ;input pointer for real passes move (r2)+n2 ;update external input pointer for complex passes move #points/4,n0 ;initialize input and output offsets move #points-1,m0 ;initialize address modifiers for modulo N move r0,r4 ;set up butterfly pointers in and out move (r0)+n0 ; move r0,r5 ; move (r0)+n0 ; move r0,r1 ; move r4,r0 ; move m0,m1 ;modulo N for remaining pointers in and out move m0,m4 ; move m0,m5 ; ; ; Do first and second Radix 2 FFT passes: all have real input. First and second ; passes are combined using four-point butterflies. move y:(r0)+n0,a ;get ar move y:(r0)+n0,y1 ;get br move y:(r0)+n0,b ;get cr add a,b y:(r0)+n0,y0 ;(ar+cr),get dr subl b,a ;cr'=(ar-cr) do n0,_twopass ;do all four point butterflies tfr y0,a a,x:(r1) ;get dr,save cr' sub y1,a (r0)+ ;ci'=(dr-br) tfr y1,a a,y:(r1)+ ;get br,save ci' add y0,a y:(r0)+n0,x1 ;(br+dr),get ar add b,a y:(r0)+n0,y1 ;ar'=(ar+cr)+(br+dr),get br subl a,b a,y:(r4)+ ;br'=(ar+cr)-(br+dr),save ar' tfr x1,a b,x0 y:(r0)+n0,b ;get ar,move br',get cr add a,b y:(r0)+n0,y0 ;(ar+cr),get dr subl b,a x0,x:(r5)+ ;cr'=(ar-cr),save br' _twopass ; ; Do next real-input FFT (RFFT) passes. Each RFFT butterfly is a four-point in, ; 3-point out. The fourth point is not computed since it is later obtained by ;using the conjugate symmetry property of the RFFT. ; move #points/8,n5 ;spacing, for 1024 spacing=128 do #@cvi(@log(points)/@log(2)-2.5),_next ;7 passes for 1024 pts move #data,r5 ;point to data move n5,n0 ;same offset move r5,r0 ;ar pointer move (r5)+n5 ;+1/4 move r5,r4 ;br pointer move (r5)+n5 ;+1/2 move r5,r1 ;ci pointer move (r5)+n5 ;+3/4 move y:(r0)+n0,a ;get ar move y:(r0)-n0,b ;get br add a,b ;ar'=(ar+br) do n0,_nextpass ;do for all p subl b,a x:(r5)+,b b,y:(r0)+ ;br'=(ar-br),get dr,save ar' neg b a,x:(r4)+ y:(r0)+n0,a ;ci'=-dr,save br',get ar move b,x0 y:(r0)-n0,b ;move ci',get br add a,b x0,y:(r1)+ ;ar'=(ar+br),save ci' _nextpass move n5,a ;get bflys/pass lsr a ;/2 move a1,n5 ;put back _next ; ; special RFFT pass: real input, (4-point). Complex output: stored in normal ; order, 4-th output stored as complex conjugate of 3rd output. ; ; move #data,r0 ;input pointer move #odata,r4 ;output pointer move #points/2,n4 ;output pointer offset move #0,m4 ;bit reverse output move y:(r0)+,a ;get ar move y:(r0)+,b ;get br add a,b x:(r0)+,x0 ;ar'=ar+br, get cr move b,x:(r4)+n4 ;save ar' subl b,a x:(r0),b ;br'=ar-br, get dr neg b b,y0 a,x:(r4)+n4 ;ci'=-dr, save dr, save br' move x0,x:(r4) ;save cr' move b,y:(r4)+n4 ;save ci' move x0,x:(r4) ;save cr' move y0,y:(r4)+n4 ;save cr,ci'* ; ; do first 2-point complex fft with conjugate storage ; initialization move r2,r0 ;r0 points to external data move #-1,m2 ;linear addr. for external input data pointer move #4,n2 ;offset for external input data pointer move #points/8,r3 ;coefficient base offset -->r3 move (r2)+n2 ;update external input data pointer lua (r0)+,r1 ;initialize input pointer b lua (r3)+n3,r6 ;initialize twiddle factor pointer move #points/4,n4 ;offset for output counter a move r4,n3 ;initialization of conjugate pointer move #odata+points,r3 ; move r4,r5 ;initialize output pointer b move (r3)-n3 ;initialize conjugate pointer move #odata,n3 ; lua (r4)+n4,r5 ;initialize output pointer b move (r3)+n3 ;initialize conjugate pointer move n4,n5 ;initialize offset for output pointer b move #0,m4 ;bit-reversed addressing for output ptr a move (r5)+n5 ;initialize output pointer b move #0,m5 ;bit-reversed addressing for output ptr b move #0,m3 ;bit-reversed addressing for conjug. ptr. move #points/2,n3 ;offset for conjugate pointer move y:(r0),b ;initialize butterfly move (r3)+n3 ;future output pointer a move r3,ssh ;save future output pointer a -->stack move (r3)-n3 ;reinit. conjugate pointer ; ; butterfly with conjugate storage ; move x:(r1),x1 y:(r6),y0 mac x1,y0,b x:(r6),x0 y:(r1),y1 macr -x0,y1,b y:(r0),a neg b b,y:(r4) move b,y:(r3)-n3 addl b,a x:(r0),b neg a a,y:(r5) move a,y:(r3)+n3 mac -x1,x0,b x:(r0),a macr -y1,y0,b subl b,a b,x:(r4)+n4 move b,x:(r3)-n3 move a,x:(r5)+n5 move a,x:(r3)-n3 ; end of butterfly ; ; initialize pointers for complex fft's ; move #coef,n3 ;initialize coefficient base move #-1,m3 move m3,m4 ;output pointer a has linear addr. move m3,m5 ;output pointer b has linear addr. move ssh,r4 ;initialize next external output pointer a position move #2,m2 ;initialize butterflies per group move #1,n4 ;initialize number of passes-1 per FFT ; ; do all the complex fft's that are necessary (up to N/4-point) ; do #@cvi(@log(points)/@log(2)-2.5),_end_fft ;7 for 1024 pt (4- pt....256- pt) ; ; initialize pointers in each fft move r4,ssh ;push output data address onto stack move r2,r0 ;get external data input address for first pass move #points/8,r3 ;update coefficient offset move m2,n1 ;initialize butterflies per group move #1,n2 ;initialize groups per pass ; ; complex fft passes are triple nested do-loops, with last pass split out do n4,_end_pass ;do all passes but last in this fft ; ; initialize pointers in each pass move n4,ssh ;put number of passes-1 in FFT on stack move #_intdata,r4 ;initialize A output pointer move n1,r5 move n1,n0 ;initialize pointer offsets lua (r5)-,n7 move n1,n4 move n1,r6 lua (r0)+n0,r1 ;initialize B input pointer lua (r4)+n4,r5 ;initialize B output pointer lua (r6)+,n4 move n4,n5 lua (r3)+n3,r6 ;initialize W input pointer move n4,n0 ; ; initialize butterfly input move x:(r1),x1 y:(r6),y0 ;lookup -sine value move y:(r0),b ;imag. input a mac x1,y0,b x:(r6)+n6,x0 y:(r1)+,y1 ;cos., imag. input b macr -x0,y1,b y:(r0),a ; ; ; butterflies do n2,_end_grp ;do for all groups do n7,_end_bfy ;do every butterfly in this group subl b,a x:(r0),b b,y:(r4) mac -x1,x0,b x:(r0)+,a a,y:(r5) macr -y1,y0,b x:(r1),x1 subl b,a b,x:(r4)+ y:(r0),b mac x1,y0,b y:(r1)+,y1 ;Radix 2 DIT butterfly kernel macr -x0,y1,b a,x:(r5)+ y:(r0),a ;with constant twiddle factor _end_bfy move (r1)+n1 subl b,a x:(r0),b b,y:(r4) mac -x1,x0,b x:(r0)+n0,a a,y:(r5) macr -y1,y0,b x:(r1),x1 y:(r6),y0 ;lookup -sine value subl b,a b,x:(r4)+n4 y:(r0),b mac x1,y0,b x:(r6)+n6,x0 y:(r1)+,y1 macr -x0,y1,b a,x:(r5)+n5 y:(r0),a ;with constant twiddle factor _end_grp move n1,b1 lsr b n2,a1 ;divide butterflies per group by two lsl a b1,n1 ;multiply groups per pass by two move r3,b1 move ssh,n4 ;get number of passes-1 back from stack lsr b a1,n2 ;divide coefficient offset by two move b1,r3 move #_intdata,r0 ;intermediate passes use internal input data _end_pass ; ; Do last FFT pass and move output data off-chip to external data memory. ; The output data is stored in normal order. At the same time, data is stored for ; the next output block using conjugate properties and a "reverse counter" ; ; initialize pointers move n7,r1 move ssh,r4 move (r1)+ move n4,ssh ;put #passes-1 in this fft back on stack move r1,n0 move r1,n1 ;correct pointer offset for last pass move r1,n4 move r1,n5 move #points/4,n4 ;offset for output pointer A lua (r0)+,r1 ;initialize B input pointer lua (r4)+n4,r5 ;initialize B output pointer, first step move n4,n5 ;offset for output pointer B lua (r3)+n3,r6 ;initialize W input pointer move (r5)+n5 ;initialize B output pointer, second step move #0,m4 ;bit-reversed addressing for output pointer A move r4,n3 ;initialization of conjugate pointer move #odata+points,r3 ; ; initialize butterfly move y:(r0),b ;initialization of first butterfly move (r3)-n3 ;initialization of conjugate pointer move #odata,n3 ; move x:(r1),x1 y:(r6),y0 ;initialization of first butterfly move (r3)+n3 ;initialization of conjugate pointer move #0,m3 ;bit-reversed addressing for conjugate ptr move #points/2,n3 ;correct offset for conjugate pointer move m4,m5 ;bit-reversed addressing for output pointer B move (r3)+n3 move r3,ssh ;put next output ptr a initialization on stack move (r3)-n3 ;reset conjugate pointer ; ; final butterfly with conjugate reverse storage of next block do n2,_lastpass mac x1,y0,b x:(r6)+n6,x0 y:(r1)+n1,y1 ;Radix 2 DIT butterfly kernel macr -x0,y1,b y:(r0),a ;with one butterfly per group neg b b,y:(r4) ;and changing twiddle factor move b,y:(r3)-n3 ;with conjugate storage addl b,a x:(r0),b neg a a,y:(r5) move a,y:(r3)+n3 mac -x1,x0,b x:(r0)+n0,a macr -y1,y0,b x:(r1),x1 y:(r6),y0 move b,x:(r4)+n4 subl b,a b,x:(r3)-n3 move a,x:(r5)+n5 y:(r0),b move a,x:(r3)-n3 _lastpass ; ; update pointers move ssh,r4 ;get updated output ptr a from stck for next fft move #coef,n3 ;n3 points to coeff. base address again move #-1,m3 ;linear addressing for r3 again move n6,n2 ;get fft data input offset move m2,a ;initial data offset-->a move ssh,r6 ;get #passes in FFT-1 back from stack lsl a ;initial data offset * 2 -->a move #-1,m6 ;r6 increments linearly in next instruction move #-1,m2 ;external data pointer uses linear addressing lua (r6)+,n4 ;increment #passes-1 -->n4 move #0,m6 ;r6 increments bit-reversed again move a1,r6 ;new initial data offset-->r6 lsl a ;2*#points in this fft -->a move a1,n2 ;offset for new external input data move m0,m4 ;initialize output pointers again for modulo addr. move m0,m5 ; move (r2)+n2 ;point to next complex fft input data block move r6,m2 ;new initial data offset for next FFT-->m2 _end_fft ; ; when fft is finished, jump back to see if data collection for next fft is completed jmp strt endm ; ; ; org p:$8 movep y:$ffff,y:(r7)+ ;data collection upon interrupt org p:$100 move #16,a move a,x:$d0 ;store pointer to data block 1 move #80,a move a,x:$d1 ;store pointer to data block 2 move #127,m7 ;set r7 for modulo addressing ; ; call fft macro fftreald 64,16,144,210,208,209 end