/*- * Copyright (c) 2005 Colin Percival * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted providing that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Redistributions of source code must ensure that the list of * copyright notices is complete, and the lack of a copyright notice * corresponding to a copyrightable contribution or modification may * be taken as an affirmative statement that said contribution or * modification has been placed in the public domain. * 4. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Colin Percival. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef RCSID static const char rcsid[] = "$Id: fftconv.c,v 1.1 2005/07/12 07:42:10 cperciva Exp $"; #endif #include #include #include #include "fftconv.h" /**T \section{FFT normalization} The FFT and inverse FFT given in fft.c are unnormalized, i.e., the length-$N$ FFT followed by a length-$N$ inverse FFT leaves the output equal to $N$ times the input. To remedy this, the function {\em tricl\_fftconv\_scale}($DAT$, $n$) should be called at some point. As in fft.c, $n$ must satisfy $0 \leq n \leq 29$, and $DAT$ must be an array of $2^n$ complex values ($2^{n + 1}$ doubles). */ void tricl_fftconv_scale(double * DAT, int n) { double s; int32_t i; assert(0 <= n && n <= 29); s = ldexp(1.0, -n); for (i = 0; i < 2 << n; i++) DAT[i] = DAT[i] * s; } /**T \section{Pointwise complex products} The function {\em tricl\_fftconv\_mulpw}($DAT1$, $DAT2$, $n$) computes the product of $2^n$ pairs of complex values from $DAT1$ and $DAT2$ and writes the resulting values into $DAT1$. As usual, $n$ must satisfy $0 \leq n \leq 29$, and $DAT1$ and $DAT2$ must be non-overlapping arrays of $2^n$ complex values ($2^{n + 1}$ doubles). */ void tricl_fftconv_mulpw(double * __restrict DAT1, double * __restrict DAT2, int n) { double xr, xi; int i; assert(0 <= n && n <= 29); for (i = 0; i < 1 << n; i++) { xr = DAT1[i * 2]; xi = DAT1[i * 2 + 1]; DAT1[i * 2] = xr * DAT2[i * 2] - xi * DAT2[i * 2 + 1]; DAT1[i * 2 + 1] = xr * DAT2[i * 2 + 1] + xi * DAT2[i * 2]; } } /**T The function {\em tricl\_fftconv\_sqrpw}($DAT$, $n$) squares $2^n$ complex values from $DAT$ and writes the resulting values into $DAT$. As usual, $n$ must satisfy $0 \leq n \leq 29$, and $DAT$ must be an array of $2^n$ complex values ($2^{n + 1}$ doubles). */ void tricl_fftconv_sqrpw(double * DAT, int n) { double xr, xi; int i; assert(0 <= n && n <= 29); for (i = 0; i < 1 << n; i++) { xr = DAT[i * 2]; xi = DAT[i * 2 + 1]; DAT[i * 2] = xr * xr - xi * xi; DAT[i * 2 + 1] = 2 * xr * xi; } } /**T \section{FFT convolution} To compute a length-$2^n$ convolution of two vectors $X$ and $Y$: \begin{verbatim} tricl_fft_makelut(LUT, n); tricl_fft_fft(X, n, LUT); tricl_fft_fft(Y, n, LUT); tricl_fftconv_mulpw(X, Y, n); tricl_fft_ifft(X, n, LUT); tricl_fftconv_scale(X, n); \end{verbatim} although the call to tricl\_fftconv\_scale can be performed at any point in the process, and on either $X$ or $Y$. \begin{theorem} When computed in this manner, the convolution $z$ of two length-$2^n$ complex vectors $x$ and $y$ will satisfy $$ \abs{z^\prime - z}_\infty < \abs{x} \cdot \abs{y} \cdot \left((1 + \epsilon)^{3 n} (1 + \epsilon \sqrt{5})^{3 n + 1} (1 + 1.5 \epsilon)^{3 n} - 1\right) < \abs{x} \cdot \abs{y} \cdot (14.3 n + 2.3) \epsilon $$ where $\epsilon = 2^{-53}$ is the maximum relative error in double-precision floating-point arithmetic. \end{theorem} \begin{proof} The FFT used is a split-radix FFT, not a radix-2 FFT, but the argument from Theorem 5.1 of \cite{Per03} still applies (the only difference as far as error bounds are concerned is that the split-radix FFT has fewer complex multiplications; but this reduction does not affect the worst case). From \cite{rootsc} we note that we can take $\beta = 1.5 \epsilon$ to complete the proof. \end{proof} \bibliographystyle{amsplain} \begin{thebibliography}{X} \bibitem{Per03} C.~Percival, \textit{Rapid multiplication modulo the sum and difference of highly composite numbers}, Math. Comp. \textbf{72} (2003), 387--395. \bibitem{rootsc} C.~Percival, \textit{roots.c}, in \textit{TRICL}, {\tt http://www.daemonology.net/tricl/}, (2005). \end{thebibliography} */