/******************************************************************** F A S T F O U R I E R T R A N S F O R M P R O G R A M S by Wang Jian-Sheng 4 Nov 1998, added fft2D(), 11 Apr 2003 --------------------------------------------------------------------- Reference: "Computational Frameworks for the Fast Fourier Transform", Charles Van Loan, SIAM, 1992. There are many FFT algorithms, the most important ones are COOLEY-TUKEY: in place, bit reversal STOCKHAM AUTOSORT: additional memory size of input data MIXED RADIX: 20% less operations comparing to Cooley-Tukey PRIME FACTOR: arbitrary length n We use a combination of the Stockham autosort algorithm 1.7.2, page 57, and multirow Cooley-Tukey (3.1.7), page 124, of the reference above. The discrete Fourier transform is defined by y[k] = sum_(j=0,n-1) x[j] exp(-2 Pi sqrt[-1] j k/n), k=0,1,...,n-1. The factor (1/n) is not included. If y[]<-x[]; fft(x,n,1); fft(x,n,-1); then y[]==x[]/n is true. Three dimensional transform is generalized straightforwardly. Interface and usage: 1D Fourier transform Use: fft(x, n, flag) x : an array of structure type complex; n : the size of data, must be a power of 2; flag : 1 for forward transform, -1 for inverse transform. 3D Fourier transform Use : fft3D(x, n1, n2, n3, flag) x : 1D array of type complex representing 3D array; mapping through C convention, i.e., (i,j,k) -> k + n3*j + n2*n3*i; n1, n2, n3 : dimensions in three directions; flag : same as in 1D. 2D FFT is similar but with n1 and n2 only. **********************************************************************/ /* Data type and new names for flexibility: real: Basic data type for floating point computations (typedef double real;) complex: Structure for complex numbers, real and imaginary parts are referred as c.Re, c.Im. (typedef struct { real Re; real Im; } complex;) */ /* Inclusion of standard C libraries */ #include #include #include #include #define REALSIZE 8 /* in units of byte */ typedef double real; /* can be long double, double, or float */ typedef struct { real Re; real Im; } complex; /* for complex number */ /* Mathematical functions and constants */ #undef M_PI #if (REALSIZE==16) #define sin sinl #define cos cosl #define fabs fabsl #define M_PI 3.1415926535897932384626433832795L #else #define M_PI 3.1415926535897932385E0 #endif void fft(complex x[], int n, int flag); void fft2D(complex x[], int n1, int n2, int flag); void fft3D(complex x[], int n1, int n2, int n3, int flag); /*----------------------------------------------------------------------*/ /* Truncated Stockham algorithm for multi-column vector, X(n1,n2) <- F_n1 X(n1,n2) x[] input data of size n, viewed as n1 = n/n2 by n2 dimensional array, flag =+1 forward transform, -1 for backward transform, y[] is working space, which must be n in size. This function is supposed to be internal (static), not used by application. Note that the terminology of column or row respect to algorithms in the Loan's book is reversed, because we use row major convention of C. */ static void stockham(complex x[], int n, int flag, int n2, complex y[]) { complex *y_orig, *tmp; int i, j, k, k2, Ls, r, jrs; int half, m, m2; real wr, wi, tr, ti; y_orig = y; r = half = n >> 1; Ls = 1; /* Ls=L/2 is the L star */ while(r >= n2) { /* loops log2(n/n2) times */ tmp = x; /* swap pointers, y is always old */ x = y; /* x is always for new data */ y = tmp; m = 0; /* m runs over first half of the array */ m2 = half; /* m2 for second half, n2=n/2 */ for(j = 0; j < Ls; ++j) { wr = cos(M_PI*j/Ls); /* real and imaginary part */ wi = -flag * sin(M_PI*j/Ls); /* of the omega */ jrs = j*(r+r); for(k = jrs; k < jrs+r; ++k) { /* "butterfly" operation */ k2 = k + r; tr = wr*y[k2].Re - wi*y[k2].Im; /* complex multiply, w*y */ ti = wr*y[k2].Im + wi*y[k2].Re; x[m].Re = y[k].Re + tr; x[m].Im = y[k].Im + ti; x[m2].Re = y[k].Re - tr; x[m2].Im = y[k].Im - ti; ++m; ++m2; } } r >>= 1; Ls <<= 1; }; if (y != y_orig) { /* copy back to permanent memory */ for(i = 0; i < n; ++i) { /* if it is not already there */ y[i] = x[i]; /* performed only if log2(n/n2) is odd */ } } assert(Ls == n/n2); /* ensure n is a power of 2 */ assert(1 == n || m2 == n); /* check array index within bound */ } /* The Cooley-Tukey multiple column algorithm, see page 124 of Loan. x[] is input data, overwritten by output, viewed as n/n2 by n2 array. flag = 1 for forward and -1 for backward transform. */ void cooley_tukey(complex x[], int n, int flag, int n2) { complex c; int i, j, k, m, p, n1; int Ls, ks, ms, jm, dk; real wr, wi, tr, ti; n1 = n/n2; /* do bit reversal permutation */ for(k = 0; k < n1; ++k) { /* This is algorithms 1.5.1 and 1.5.2. */ j = 0; m = k; p = 1; /* p = 2^q, q used in the book */ while(p < n1) { j = 2*j + (m&1); m >>= 1; p <<= 1; } assert(p == n1); /* make sure n1 is a power of two */ if(j > k) { for(i = 0; i < n2; ++i) { /* swap k <-> j row */ c = x[k*n2+i]; /* for all columns */ x[k*n2+i] = x[j*n2+i]; x[j*n2+i] = c; } } } /* This is (3.1.7), page 124 */ p = 1; while(p < n1) { Ls = p; p <<= 1; jm = 0; /* jm is j*n2 */ dk = p*n2; for(j = 0; j < Ls; ++j) { wr = cos(M_PI*j/Ls); /* real and imaginary part */ wi = -flag * sin(M_PI*j/Ls); /* of the omega */ for(k = jm; k < n; k += dk) { /* "butterfly" */ ks = k + Ls*n2; for(i = 0; i < n2; ++i) { /* for each row */ m = k + i; ms = ks + i; tr = wr*x[ms].Re - wi*x[ms].Im; ti = wr*x[ms].Im + wi*x[ms].Re; x[ms].Re = x[m].Re - tr; x[ms].Im = x[m].Im - ti; x[m].Re += tr; x[m].Im += ti; } } jm += n2; } } } /* 1D Fourier transform: Simply call stockham with proper arguments. Allocated working space of size n dynamically. */ void fft(complex x[], int n, int flag) { complex *y; assert(1 == flag || -1 == flag); y = (complex *) malloc( n*sizeof(complex) ); assert(NULL != y); stockham(x, n, flag, 1, y); free(y); } /* 3D Fourier transform: The index for x[m] is mapped to (i,j,k) by m = k + n3*j + n3*n2*i, i.e. the row major convention of C. All indices start from 0. This algorithm requires working space of n2*n3. Stockham is efficient, good stride feature, but takes extra memory same size as input data; Cooley-Tukey is in place, so we take a compromise of the two. */ void fft2D(complex x[], int n1, int n2, int flag) { complex *y; int i, n; assert(1 == flag || -1 == flag); n = n1*n2; y = (complex *) malloc( n2*sizeof(complex) ); assert(NULL != y); for(i=0; i < n; i += n2) { /* FFT in y */ stockham(x+i, n2, flag, 1, y); } free(y); cooley_tukey(x, n, flag, n2); /* FFT in x */ } void fft3D(complex x[], int n1, int n2, int n3, int flag) { complex *y; int i, n, n23; assert(1 == flag || -1 == flag); n23 = n2*n3; n = n1*n23; y = (complex *) malloc( n23*sizeof(complex) ); assert(NULL != y); for(i=0; i < n; i += n3) { /* FFT in z */ stockham(x+i, n3, flag, 1, y); } for(i=0; i < n; i += n23) { /* FFT in y */ stockham(x+i, n23, flag, n3, y); } free(y); cooley_tukey(x, n, flag, n23); /* FFT in x */ }