#include #include #include #include typedef float Number; /***************************************************************************** * Fast Fourier transform * Replaces data[1..2*n] by its discrete Fourier transform, if isign is * input as 1; or replaces data[1..2*n] by nn times its inverse discrete * Fourier transform, if isign is input as -1. * data is a complex array of length nn, or equivalently, a real array of * length nn, or, equivalently, a real array of length 2*nn. nn MUST be an * integer power of 2 (this is not checked for!). If tehy don't, just pad your * data with zeros up to the next power of two. * * The extended version uses inline method developed by Todd Veldhuizen and * compares to a similar program from "Numerical Recipes". * C.A. Bertulani, 06/14/2000 */ /***************************************************************************** * four1() implements a Cooley-Tukey FFT on N points, where N is a power of 2. * "Numerical Recipes in C", 2nd Edition (Teukolsky,Press,Vetterling,Flannery). * * It serves as the basis for the inline version found below. All of the * variable names have been preserved in the inline version for comparison * purposes. Also, the line numbers are referred to in the corresponding * template classes. ****************************************************************************/ #define SWAP(a,b) tempr=(a); (a)=(b); (b) = tempr #define M_PI 3.1415926535 // Replaces data[1..2*nn] by its discrete Fourier transform. data[] is a // complex array void four1(Number data[], unsigned long nn, int isign) { unsigned long n, mmax, m, j, istep, i; double wtemp, wr, wpr, wpi, wi, theta; /* Double precision for the */ Number tempr, tempi; /* trigonometric recurrences. */ n = nn << 1; j=1; for (i=1; i < n; i+= 2) { // This is the bit reversal // 1 if (j > i) // section of the routine // 2 { // 3 SWAP(data[j], data[i]); // Exchange two complex // 4 SWAP(data[j+1], data[i+1]); // numbers. // 5 } // 6 m = n >> 1; // 7 while (m >= 2 && j > m) { // 8 j -= m; // 9 m >>= 1; // 10 } // 11 j += m; // 12 }; // 13 // 14 mmax=2; // 15 // Here begins the Danielson-Lanczos section of routine // 16 while (n > mmax) { // 17 istep = mmax << 1; // 18 theta = isign * (6.28318530717959/mmax); // Initialize the // 19 wtemp = sin(0.5*theta); // trigonometric recurence. // 20 wpr = -2.0*wtemp*wtemp; // 21 wpi = sin(theta); // 22 wr = 1.0; // 23 wi = 0.0; // 24 // 25 for (m=1; m < mmax; m += 2) { // Here are the two nested // 26 // inner loops. // 27 for (i=m; i <= n; i += istep) { // 28 // 29 j=i+mmax; // This is the Danielson-Lanczos // 30 tempr = wr*data[j] - wi*data[j+1]; // formula. // 31 tempi = wr * data[j+1] + wi*data[j]; // 32 // 33 // 34 data[j] = data[i] - tempr; // 35 data[j+1] = data[i+1] - tempi; // 36 data[i] += tempr; // 37 data[i+1] += tempi; // 38 } // 39 // 40 wr = (wtemp=wr)*wpr-wi*wpi+wr; // Trigonometric // 41 wi=wi*wpr+wtemp*wpi+wi; // recurrence. // 42 } // 43 mmax=istep; // 44 } // 45 } /***************************************************************************** * * Now begins the inline FFT implementation. * * The way to read this code is from the very bottom (starting with FFTServer) * upwards. This is because the classes are declared in order from the * optimizations in the innermost loops, to the outer loops, and finally * FFTServer itself. * * Why are floats passed as const float&? This eliminates some unnecessary * instructions under some compilers. It may be better to pass floats as 'float' * for some specific compilers. * * Here is a summary of the classes involved: * * Sine Calculates sin(2*Pi*I/N) * Cos Calculates cos(2*Pi*I/N) * bitNum Calculates 16-log2(N), where N is a power of 2 * reverseBits Reverses the order of the bits in I * FFTSwap2 Implements swapping of elements, for array reordering * FFTSwap Ditto * FFTReorder Implements array reordering * FFTCalcTempR Squeezes out an optimization when the weights are (1,0) * FFTCalcTempI Ditto * FFTInnerLoop Implements the inner loop (lines 28-39 in four1) * FFTMLoop Implements the middle loop (lines 26, 41-42 in four1) * FFTOuterLoop Implements the outer loop (line 17 in four1) * FFTServer The FFT Server class ****************************************************************************/ /* * First, compile-time trig functions sin(x) and cos(x). */ template class Sine { public: static inline Number sin() { // This is a series expansion for sin(I*2*M_PI/N) // Since all of these quantities are known at compile time, it gets // simplified to a single constant, which can be included in the code: // mov dword ptr es:[bx],large 03F3504F3h return (I*2*M_PI/N)*(1-(I*2*M_PI/N)*(I*2*M_PI/N)/2/3*(1-(I*2*M_PI/N)* (I*2*M_PI/N)/4/5*(1-(I*2*M_PI/N)*(I*2*M_PI/N)/6/7*(1-(I*2*M_PI/N)* (I*2*M_PI/N)/8/9*(1-(I*2*M_PI/N)*(I*2*M_PI/N)/10/11*(1-(I*2*M_PI/N)* (I*2*M_PI/N)/12/13*(1-(I*2*M_PI/N)*(I*2*M_PI/N)/14/15* (1-(I*2*M_PI/N)*(I*2*M_PI/N)/16/17* (1-(I*2*M_PI/N)*(I*2*M_PI/N)/18/19*(1-(I*2*M_PI/N)* (I*2*M_PI/N)/20/21)))))))))); } }; template class Cosine { public: static inline Number cos() { // This is a series expansion for cos(I*2*M_PI/N) // Since all of these quantities are known at compile time, it gets // simplified to a single number. return 1-(I*2*M_PI/N)*(I*2*M_PI/N)/2*(1-(I*2*M_PI/N)*(I*2*M_PI/N)/3/4* (1-(I*2*M_PI/N)*(I*2*M_PI/N)/5/6*(1-(I*2*M_PI/N)*(I*2*M_PI/N)/7/8* (1-(I*2*M_PI/N)*(I*2*M_PI/N)/9/10*(1-(I*2*M_PI/N)*(I*2*M_PI/N)/11/12* (1-(I*2*M_PI/N)*(I*2*M_PI/N)/13/14*(1-(I*2*M_PI/N)*(I*2*M_PI/N)/15/16* (1-(I*2*M_PI/N)*(I*2*M_PI/N)/17/18*(1-(I*2*M_PI/N)*(I*2*M_PI/N)/19/20* (1-(I*2*M_PI/N)*(I*2*M_PI/N)/21/22*(1-(I*2*M_PI/N)*(I*2*M_PI/N)/23/24 ))))))))))); } }; /* * The next few classes (bitNum, reverseBits, FFTSwap, FFTReorder) implement * the bit-reversal reordering of the input data set (lines 1-14). However, * the algorithm used here bears no resemblence to that in the routine above. */ /* * bitNum returns 16-log2(N); this is used to implement the bit-reversal * swapping portion of the routine. There's a nice recursive * implementation for this, but some compilers give a 'Too many template * instances' warning. So use specialization instead. Note that N must be * a power of 2. */ template class bitNum { public: enum _b { nbits = 0 }; }; class bitNum<1U> { public: enum _b { nbits = 16 }; }; class bitNum<2U> { public: enum _b { nbits = 15 }; }; class bitNum<4U> { public: enum _b { nbits = 14 }; }; class bitNum<8U> { public: enum _b { nbits = 13 }; }; class bitNum<16U> { public: enum _b { nbits = 12 }; }; class bitNum<32U> { public: enum _b { nbits = 11 }; }; class bitNum<64U> { public: enum _b { nbits = 10 }; }; class bitNum<128U> { public: enum _b { nbits = 9 }; }; class bitNum<256U> { public: enum _b { nbits = 8 }; }; class bitNum<512U> { public: enum _b { nbits = 7 }; }; class bitNum<1024U> { public: enum _b { nbits = 6 }; }; class bitNum<2048U> { public: enum _b { nbits = 5 }; }; class bitNum<4096U> { public: enum _b { nbits = 4 }; }; class bitNum<8192U> { public: enum _b { nbits = 3 }; }; class bitNum<16384U> { public: enum _b { nbits = 2 }; }; class bitNum<32768U> { public: enum _b { nbits = 1 }; }; /* * reverseBits::reverse is the number I with the order of its bits * reversed. For example, * * <8,3> 3 is 011, so reverse is 110 or 6 * <8,1> 1 is 001, so reverse is 100 or 8 * * The enum types are used as temporary variables. */ template class reverseBits { public: enum _r4 { I4 = (((unsigned)I & 0xaaaa) >> 1) | (((unsigned)I & 0x5555) << 1) }; enum _r3 { I3 = (((unsigned)I4 & 0xcccc) >> 2) | (((unsigned)I4 & 0x3333) << 2) }; enum _r2 { I2 = (((unsigned)I3 & 0xf0f0) >> 4) | (((unsigned)I3 & 0x0f0f) << 4) }; enum _r1 { I1 = (((unsigned)I2 & 0xff00) >> 8) | (((unsigned)I2 & 0x00ff) << 8) }; enum _r { reverse = ((unsigned)I1 >> bitNum::nbits) }; }; /* * FFTSwap::swap(Number* array) swaps the (complex) elements I and R of * array. It does this only if I < R, to avoid swapping a pair twice or * an element with itself. */ template class FFTSwap2 { public: static inline void swap(Number* array) { register Number temp = array[2*I]; array[2*I] = array[2*R]; array[2*R] = temp; temp = array[2*I+1]; array[2*I+1] = array[2*R+1]; array[2*R+1] = temp; } }; class FFTSwap2<0U,0U> { public: static inline void swap(Number* array) { } }; template class FFTSwap { public: enum _go { go = (I < R) }; static inline void swap(Number* array) { // Only swap if I < R FFTSwap2::swap(array); } }; /* * FFTReorder::reorder(Number* array) reorders the complex elements * of array[] using bit reversal. */ template class FFTReorder { public: enum _go { go = (I+1 < N) }; // Loop from I=0 to N. static inline void reorder(Number* array) { // Swap the I'th element if necessary FFTSwap::reverse>::swap(array); // Loop FFTReorder::reorder(array); } }; // Base case to terminate the recursive loop class FFTReorder<0U,0U> { public: static inline void reorder(Number* array) { } }; /* * FFTCalcTempR and FFTCalcTempI are provided to squeeze out an optimization * for the case when the weights are (1,0), on lines 31-32 above. */ template class FFTCalcTempR { public: static inline Number tempr(const Number& wr, const Number& wi, Number* array, const int j) { return wr * array[j] - wi * array[j+1]; } }; class FFTCalcTempR<1U> { public: static inline Number tempr(const Number& wr, const Number& wi, Number* array, const int j) { // For M == 1, (wr,wi) = (1,0), so specialization leads to a nice // optimization: return array[j]; } }; template class FFTCalcTempI { public: static inline Number tempi(const Number& wr, const Number& wi, Number* array, const int j) { return wr * array[j+1] + wi * array[j]; } }; class FFTCalcTempI<1U> { public: static inline Number tempi(const Number& wr, const Number& wi, Number* array, const int j) { return array[j+1]; } }; /* * FFTInnerLoop implements lines 28-39. */ template class FFTInnerLoop { public: // Loop break condition enum _go { go = (I+2*MMAX <= 2*N) }; static inline void loop(const Number& wr, const Number& wi, Number* array) { const int j = I+MMAX; // Known at compile time Number tempr = FFTCalcTempR::tempr(wr,wi,array,j); Number tempi = FFTCalcTempI::tempi(wr,wi,array,j); array[j] = array[I] - tempr; array[j+1] = array[I+1] - tempi; array[I] += tempr; array[I+1] += tempi; // Loop FFTInnerLoop::loop(wr,wi,array); } }; // Specialization to provide base case for loop recursion class FFTInnerLoop<0U,0U,0U,0U> { public: static inline void loop(const Number& wr, const Number& wi, Number* array) { } }; /* * FFTMLoop implements the middle loop (lines 26, 41-42). However, instead * of using a trigonometric recurrence to compute the weights wi and wr, they * are computed at compile time using the Sine and Cosine classes. This way, * the weights are inserted into the code, eg. * mov dword ptr [bp-360],large 03F6C835Eh // Cosine of something or other */ template class FFTMLoop { public: enum _go { go = (M+2 < MMAX) }; static inline void loop(Number* array) { Number wr = Cosine::cos(); Number wi = Sine::sin(); // Call the inner loop FFTInnerLoop::loop(wr,wi,array); // Loop FFTMLoop::loop(array); } }; // Specialization for base case to terminate recursive loop class FFTMLoop<0U,0U,0U> { public: static inline void loop(Number* array) { } }; /* * FFTOuterLoop implements the outer loop (Line 17). */ template class FFTOuterLoop { public: // Loop break condition enum _go { go = ((MMAX << 1) < (2*N)) }; static inline void loop(Number* array) { // Invoke the middle loop FFTMLoop::loop(array); // Loop FFTOuterLoop::loop(array); } }; // Specialization for base case to terminate recursive loop class FFTOuterLoop<0U,0U> { public: static inline void loop(Number* array) { } }; /* * And finally, the FFT server itself. * * Input is an array of float precision complex numbers, stored in (real,imag) * pairs, starting at array[1]. ie. for an N point FFT, need to use arrays of * float[2*N+1]. The element array[0] is not used, to be compatible with * the Numerical Recipes implementation. N MUST be a power of 2. */ template class FFTServer { public: static void FFT(Number* array); }; template void FFTServer::FFT(Number* array) { // Reorder the array, using bit-reversal. Confusingly, the // reorder portion expects the data to start at 0, rather than 1. FFTReorder::reorder(array+1); // Now do the FFT FFTOuterLoop::loop(array); } /* * Test program to take an N-point FFT. */ int main() { void four1(Number [], unsigned long , int); const unsigned N = 8; Number data[2*N]; Number data2[2*N]; int i; for (i=0; i < 2*N; ++i){ data[i] = i; data[i] = i; } FFTServer::FFT(data-1); // Original four1() used arrays starting // at 1 instead of 0 cout << "Transformed data:" << endl; for (i=0; i < N; ++i) cout << setw(10) << setprecision(5) << data[2*i] << "\t" << data[2*i+1] << "I" << endl; cout << "Compare with Numerical Recipes:" << endl; four1(data2, N , 1); for (i=0; i < N; ++i) cout << setw(10) << setprecision(5) << data[2*i] << "\t" << data[2*i+1] << "I" << endl; return 0; }