cuFFT是CUDA快速傅里叶变换库的API接口,它由两个独立的库cuFFT和cuFFTW组成。cuFFT库能够充分发挥NVIDIA GPU卡的性能,cuFFTW库能够让用户快速地在NVIDIA GPU卡使用FFTW算法。FFT(Fast Fourier Transform)是一个分治算法,能够高效地计算复数或实数数据集的离散傅里叶变换,它是物理学和通用信号处理中最重要、最广泛使用的算法。cuFFT库提供了一个简单的FFT算法接口,用户可以迅速地利用GPU的浮点算力和并行能力去加速FFT算法。
示例代码:一维输入数据复数到复数的傅里叶变换,然后在频域执行逆变换。
#define NX 256 #define BATCH 1 cufftHandle plan; cufftComplex *data; cudaMalloc((void**)&data, sizeof(cufftComplex)*NX*BATCH); if (cudaGetLastError() != cudaSuccess){ fprintf(stderr, "Cuda error: Failed to allocate\n"); return; } if (cufftPlan1d(&plan, NX, CUFFT_C2C, BATCH) != CUFFT_SUCCESS){ fprintf(stderr, "CUFFT error: Plan creation failed"); return; } ... /* Note: * Identical pointers to input and output arrays implies in-place transformation */ if (cufftExecC2C(plan, data, data, CUFFT_FORWARD) != CUFFT_SUCCESS){ fprintf(stderr, "CUFFT error: ExecC2C Forward failed"); return; } if (cufftExecC2C(plan, data, data, CUFFT_INVERSE) != CUFFT_SUCCESS){ fprintf(stderr, "CUFFT error: ExecC2C Inverse failed"); return; } /* * Results may not be immediately available so block device until all * tasks have completed */ if (cudaDeviceSynchronize() != cudaSuccess){ fprintf(stderr, "Cuda error: Failed to synchronize\n"); return; } /* * Divide by number of elements in data set to get back original data */ ... cufftDestroy(plan); cudaFree(data);