shun_iwasawa a35b8f
/*
shun_iwasawa a35b8f
Copyright (c) 2003-2010, Mark Borgerding
shun_iwasawa a35b8f
shun_iwasawa a35b8f
All rights reserved.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
shun_iwasawa a35b8f
    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
shun_iwasawa a35b8f
    * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
shun_iwasawa a35b8f
*/
shun_iwasawa a35b8f
shun_iwasawa a35b8f
shun_iwasawa a35b8f
#include "_kiss_fft_guts.h"
shun_iwasawa a35b8f
/* The guts header contains all the multiplication and addition macros that are defined for
shun_iwasawa a35b8f
 fixed or floating point complex numbers.  It also delares the kf_ internal functions.
shun_iwasawa a35b8f
 */
shun_iwasawa a35b8f
shun_iwasawa a35b8f
static void kf_bfly2(
shun_iwasawa a35b8f
        kiss_fft_cpx * Fout,
shun_iwasawa a35b8f
        const size_t fstride,
shun_iwasawa a35b8f
        const kiss_fft_cfg st,
shun_iwasawa a35b8f
        int m
shun_iwasawa a35b8f
        )
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    kiss_fft_cpx * Fout2;
shun_iwasawa a35b8f
    kiss_fft_cpx * tw1 = st->twiddles;
shun_iwasawa a35b8f
    kiss_fft_cpx t;
shun_iwasawa a35b8f
    Fout2 = Fout + m;
shun_iwasawa a35b8f
    do{
shun_iwasawa a35b8f
        C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        C_MUL (t,  *Fout2 , *tw1);
shun_iwasawa a35b8f
        tw1 += fstride;
shun_iwasawa a35b8f
        C_SUB( *Fout2 ,  *Fout , t );
shun_iwasawa a35b8f
        C_ADDTO( *Fout ,  t );
shun_iwasawa a35b8f
        ++Fout2;
shun_iwasawa a35b8f
        ++Fout;
shun_iwasawa a35b8f
    }while (--m);
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
static void kf_bfly4(
shun_iwasawa a35b8f
        kiss_fft_cpx * Fout,
shun_iwasawa a35b8f
        const size_t fstride,
shun_iwasawa a35b8f
        const kiss_fft_cfg st,
shun_iwasawa a35b8f
        const size_t m
shun_iwasawa a35b8f
        )
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    kiss_fft_cpx *tw1,*tw2,*tw3;
shun_iwasawa a35b8f
    kiss_fft_cpx scratch[6];
shun_iwasawa a35b8f
    size_t k=m;
shun_iwasawa a35b8f
    const size_t m2=2*m;
shun_iwasawa a35b8f
    const size_t m3=3*m;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    tw3 = tw2 = tw1 = st->twiddles;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    do {
shun_iwasawa a35b8f
        C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        C_MUL(scratch[0],Fout[m] , *tw1 );
shun_iwasawa a35b8f
        C_MUL(scratch[1],Fout[m2] , *tw2 );
shun_iwasawa a35b8f
        C_MUL(scratch[2],Fout[m3] , *tw3 );
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        C_SUB( scratch[5] , *Fout, scratch[1] );
shun_iwasawa a35b8f
        C_ADDTO(*Fout, scratch[1]);
shun_iwasawa a35b8f
        C_ADD( scratch[3] , scratch[0] , scratch[2] );
shun_iwasawa a35b8f
        C_SUB( scratch[4] , scratch[0] , scratch[2] );
shun_iwasawa a35b8f
        C_SUB( Fout[m2], *Fout, scratch[3] );
shun_iwasawa a35b8f
        tw1 += fstride;
shun_iwasawa a35b8f
        tw2 += fstride*2;
shun_iwasawa a35b8f
        tw3 += fstride*3;
shun_iwasawa a35b8f
        C_ADDTO( *Fout , scratch[3] );
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        if(st->inverse) {
shun_iwasawa a35b8f
            Fout[m].r = scratch[5].r - scratch[4].i;
shun_iwasawa a35b8f
            Fout[m].i = scratch[5].i + scratch[4].r;
shun_iwasawa a35b8f
            Fout[m3].r = scratch[5].r + scratch[4].i;
shun_iwasawa a35b8f
            Fout[m3].i = scratch[5].i - scratch[4].r;
shun_iwasawa a35b8f
        }else{
shun_iwasawa a35b8f
            Fout[m].r = scratch[5].r + scratch[4].i;
shun_iwasawa a35b8f
            Fout[m].i = scratch[5].i - scratch[4].r;
shun_iwasawa a35b8f
            Fout[m3].r = scratch[5].r - scratch[4].i;
shun_iwasawa a35b8f
            Fout[m3].i = scratch[5].i + scratch[4].r;
shun_iwasawa a35b8f
        }
shun_iwasawa a35b8f
        ++Fout;
shun_iwasawa a35b8f
    }while(--k);
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
static void kf_bfly3(
shun_iwasawa a35b8f
         kiss_fft_cpx * Fout,
shun_iwasawa a35b8f
         const size_t fstride,
shun_iwasawa a35b8f
         const kiss_fft_cfg st,
shun_iwasawa a35b8f
         size_t m
shun_iwasawa a35b8f
         )
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
     size_t k=m;
shun_iwasawa a35b8f
     const size_t m2 = 2*m;
shun_iwasawa a35b8f
     kiss_fft_cpx *tw1,*tw2;
shun_iwasawa a35b8f
     kiss_fft_cpx scratch[5];
shun_iwasawa a35b8f
     kiss_fft_cpx epi3;
shun_iwasawa a35b8f
     epi3 = st->twiddles[fstride*m];
shun_iwasawa a35b8f
shun_iwasawa a35b8f
     tw1=tw2=st->twiddles;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
     do{
shun_iwasawa a35b8f
         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
         C_MUL(scratch[1],Fout[m] , *tw1);
shun_iwasawa a35b8f
         C_MUL(scratch[2],Fout[m2] , *tw2);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
         C_ADD(scratch[3],scratch[1],scratch[2]);
shun_iwasawa a35b8f
         C_SUB(scratch[0],scratch[1],scratch[2]);
shun_iwasawa a35b8f
         tw1 += fstride;
shun_iwasawa a35b8f
         tw2 += fstride*2;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
shun_iwasawa a35b8f
         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
         C_MULBYSCALAR( scratch[0] , epi3.i );
shun_iwasawa a35b8f
shun_iwasawa a35b8f
         C_ADDTO(*Fout,scratch[3]);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
         Fout[m2].r = Fout[m].r + scratch[0].i;
shun_iwasawa a35b8f
         Fout[m2].i = Fout[m].i - scratch[0].r;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
         Fout[m].r -= scratch[0].i;
shun_iwasawa a35b8f
         Fout[m].i += scratch[0].r;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
         ++Fout;
shun_iwasawa a35b8f
     }while(--k);
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
static void kf_bfly5(
shun_iwasawa a35b8f
        kiss_fft_cpx * Fout,
shun_iwasawa a35b8f
        const size_t fstride,
shun_iwasawa a35b8f
        const kiss_fft_cfg st,
shun_iwasawa a35b8f
        int m
shun_iwasawa a35b8f
        )
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
shun_iwasawa a35b8f
    int u;
shun_iwasawa a35b8f
    kiss_fft_cpx scratch[13];
shun_iwasawa a35b8f
    kiss_fft_cpx * twiddles = st->twiddles;
shun_iwasawa a35b8f
    kiss_fft_cpx *tw;
shun_iwasawa a35b8f
    kiss_fft_cpx ya,yb;
shun_iwasawa a35b8f
    ya = twiddles[fstride*m];
shun_iwasawa a35b8f
    yb = twiddles[fstride*2*m];
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    Fout0=Fout;
shun_iwasawa a35b8f
    Fout1=Fout0+m;
shun_iwasawa a35b8f
    Fout2=Fout0+2*m;
shun_iwasawa a35b8f
    Fout3=Fout0+3*m;
shun_iwasawa a35b8f
    Fout4=Fout0+4*m;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    tw=st->twiddles;
shun_iwasawa a35b8f
    for ( u=0; u
shun_iwasawa a35b8f
        C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
shun_iwasawa a35b8f
        scratch[0] = *Fout0;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
shun_iwasawa a35b8f
        C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
shun_iwasawa a35b8f
        C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
shun_iwasawa a35b8f
        C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        C_ADD( scratch[7],scratch[1],scratch[4]);
shun_iwasawa a35b8f
        C_SUB( scratch[10],scratch[1],scratch[4]);
shun_iwasawa a35b8f
        C_ADD( scratch[8],scratch[2],scratch[3]);
shun_iwasawa a35b8f
        C_SUB( scratch[9],scratch[2],scratch[3]);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        Fout0->r += scratch[7].r + scratch[8].r;
shun_iwasawa a35b8f
        Fout0->i += scratch[7].i + scratch[8].i;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
shun_iwasawa a35b8f
        scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
shun_iwasawa a35b8f
        scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        C_SUB(*Fout1,scratch[5],scratch[6]);
shun_iwasawa a35b8f
        C_ADD(*Fout4,scratch[5],scratch[6]);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
shun_iwasawa a35b8f
        scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
shun_iwasawa a35b8f
        scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
shun_iwasawa a35b8f
        scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        C_ADD(*Fout2,scratch[11],scratch[12]);
shun_iwasawa a35b8f
        C_SUB(*Fout3,scratch[11],scratch[12]);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
/* perform the butterfly for one stage of a mixed radix FFT */
shun_iwasawa a35b8f
static void kf_bfly_generic(
shun_iwasawa a35b8f
        kiss_fft_cpx * Fout,
shun_iwasawa a35b8f
        const size_t fstride,
shun_iwasawa a35b8f
        const kiss_fft_cfg st,
shun_iwasawa a35b8f
        int m,
shun_iwasawa a35b8f
        int p
shun_iwasawa a35b8f
        )
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    int u,k,q1,q;
shun_iwasawa a35b8f
    kiss_fft_cpx * twiddles = st->twiddles;
shun_iwasawa a35b8f
    kiss_fft_cpx t;
shun_iwasawa a35b8f
    int Norig = st->nfft;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    for ( u=0; u
shun_iwasawa a35b8f
        k=u;
shun_iwasawa a35b8f
        for ( q1=0 ; q1
shun_iwasawa a35b8f
            scratch[q1] = Fout[ k  ];
shun_iwasawa a35b8f
            C_FIXDIV(scratch[q1],p);
shun_iwasawa a35b8f
            k += m;
shun_iwasawa a35b8f
        }
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        k=u;
shun_iwasawa a35b8f
        for ( q1=0 ; q1
shun_iwasawa a35b8f
            int twidx=0;
shun_iwasawa a35b8f
            Fout[ k ] = scratch[0];
shun_iwasawa a35b8f
            for (q=1;q
shun_iwasawa a35b8f
                twidx += fstride * k;
shun_iwasawa a35b8f
                if (twidx>=Norig) twidx-=Norig;
shun_iwasawa a35b8f
                C_MUL(t,scratch[q] , twiddles[twidx] );
shun_iwasawa a35b8f
                C_ADDTO( Fout[ k ] ,t);
shun_iwasawa a35b8f
            }
shun_iwasawa a35b8f
            k += m;
shun_iwasawa a35b8f
        }
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    KISS_FFT_TMP_FREE(scratch);
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
static
shun_iwasawa a35b8f
void kf_work(
shun_iwasawa a35b8f
        kiss_fft_cpx * Fout,
shun_iwasawa a35b8f
        const kiss_fft_cpx * f,
shun_iwasawa a35b8f
        const size_t fstride,
shun_iwasawa a35b8f
        int in_stride,
shun_iwasawa a35b8f
        int * factors,
shun_iwasawa a35b8f
        const kiss_fft_cfg st
shun_iwasawa a35b8f
        )
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    kiss_fft_cpx * Fout_beg=Fout;
shun_iwasawa a35b8f
    const int p=*factors++; /* the radix  */
shun_iwasawa a35b8f
    const int m=*factors++; /* stage's fft length/p */
shun_iwasawa a35b8f
    const kiss_fft_cpx * Fout_end = Fout + p*m;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
#ifdef _OPENMP
shun_iwasawa a35b8f
    // use openmp extensions at the 
shun_iwasawa a35b8f
    // top-level (not recursive)
shun_iwasawa a35b8f
    if (fstride==1 && p<=5)
shun_iwasawa a35b8f
    {
shun_iwasawa a35b8f
        int k;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        // execute the p different work units in different threads
shun_iwasawa a35b8f
#       pragma omp parallel for
shun_iwasawa a35b8f
        for (k=0;k
shun_iwasawa a35b8f
            kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
shun_iwasawa a35b8f
        // all threads have joined by this point
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        switch (p) {
shun_iwasawa a35b8f
            case 2: kf_bfly2(Fout,fstride,st,m); break;
shun_iwasawa a35b8f
            case 3: kf_bfly3(Fout,fstride,st,m); break; 
shun_iwasawa a35b8f
            case 4: kf_bfly4(Fout,fstride,st,m); break;
shun_iwasawa a35b8f
            case 5: kf_bfly5(Fout,fstride,st,m); break; 
shun_iwasawa a35b8f
            default: kf_bfly_generic(Fout,fstride,st,m,p); break;
shun_iwasawa a35b8f
        }
shun_iwasawa a35b8f
        return;
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
#endif
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    if (m==1) {
shun_iwasawa a35b8f
        do{
shun_iwasawa a35b8f
            *Fout = *f;
shun_iwasawa a35b8f
            f += fstride*in_stride;
shun_iwasawa a35b8f
        }while(++Fout != Fout_end );
shun_iwasawa a35b8f
    }else{
shun_iwasawa a35b8f
        do{
shun_iwasawa a35b8f
            // recursive call:
shun_iwasawa a35b8f
            // DFT of size m*p performed by doing
shun_iwasawa a35b8f
            // p instances of smaller DFTs of size m, 
shun_iwasawa a35b8f
            // each one takes a decimated version of the input
shun_iwasawa a35b8f
            kf_work( Fout , f, fstride*p, in_stride, factors,st);
shun_iwasawa a35b8f
            f += fstride*in_stride;
shun_iwasawa a35b8f
        }while( (Fout += m) != Fout_end );
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    Fout=Fout_beg;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    // recombine the p smaller DFTs 
shun_iwasawa a35b8f
    switch (p) {
shun_iwasawa a35b8f
        case 2: kf_bfly2(Fout,fstride,st,m); break;
shun_iwasawa a35b8f
        case 3: kf_bfly3(Fout,fstride,st,m); break; 
shun_iwasawa a35b8f
        case 4: kf_bfly4(Fout,fstride,st,m); break;
shun_iwasawa a35b8f
        case 5: kf_bfly5(Fout,fstride,st,m); break; 
shun_iwasawa a35b8f
        default: kf_bfly_generic(Fout,fstride,st,m,p); break;
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
/*  facbuf is populated by p1,m1,p2,m2, ...
shun_iwasawa a35b8f
    where 
shun_iwasawa a35b8f
    p[i] * m[i] = m[i-1]
shun_iwasawa a35b8f
    m0 = n                  */
shun_iwasawa a35b8f
static 
shun_iwasawa a35b8f
void kf_factor(int n,int * facbuf)
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    int p=4;
shun_iwasawa a35b8f
    double floor_sqrt;
shun_iwasawa a35b8f
    floor_sqrt = floor( sqrt((double)n) );
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    /*factor out powers of 4, powers of 2, then any remaining primes */
shun_iwasawa a35b8f
    do {
shun_iwasawa a35b8f
        while (n % p) {
shun_iwasawa a35b8f
            switch (p) {
shun_iwasawa a35b8f
                case 4: p = 2; break;
shun_iwasawa a35b8f
                case 2: p = 3; break;
shun_iwasawa a35b8f
                default: p += 2; break;
shun_iwasawa a35b8f
            }
shun_iwasawa a35b8f
            if (p > floor_sqrt)
shun_iwasawa a35b8f
                p = n;          /* no more factors, skip to end */
shun_iwasawa a35b8f
        }
shun_iwasawa a35b8f
        n /= p;
shun_iwasawa a35b8f
        *facbuf++ = p;
shun_iwasawa a35b8f
        *facbuf++ = n;
shun_iwasawa a35b8f
    } while (n > 1);
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
/*
shun_iwasawa a35b8f
 *
shun_iwasawa a35b8f
 * User-callable function to allocate all necessary storage space for the fft.
shun_iwasawa a35b8f
 *
shun_iwasawa a35b8f
 * The return value is a contiguous block of memory, allocated with malloc.  As such,
shun_iwasawa a35b8f
 * It can be freed with free(), rather than a kiss_fft-specific function.
shun_iwasawa a35b8f
 * */
shun_iwasawa a35b8f
kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    kiss_fft_cfg st=NULL;
shun_iwasawa a35b8f
    size_t memneeded = sizeof(struct kiss_fft_state)
shun_iwasawa a35b8f
        + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    if ( lenmem==NULL ) {
shun_iwasawa a35b8f
        st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
shun_iwasawa a35b8f
    }else{
shun_iwasawa a35b8f
        if (mem != NULL && *lenmem >= memneeded)
shun_iwasawa a35b8f
            st = (kiss_fft_cfg)mem;
shun_iwasawa a35b8f
        *lenmem = memneeded;
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    if (st) {
shun_iwasawa a35b8f
        int i;
shun_iwasawa a35b8f
        st->nfft=nfft;
shun_iwasawa a35b8f
        st->inverse = inverse_fft;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        for (i=0;i
shun_iwasawa a35b8f
            const double pi=3.141592653589793238462643383279502884197169399375105820974944;
shun_iwasawa a35b8f
            double phase = -2*pi*i / nfft;
shun_iwasawa a35b8f
            if (st->inverse)
shun_iwasawa a35b8f
                phase *= -1;
shun_iwasawa a35b8f
            kf_cexp(st->twiddles+i, phase );
shun_iwasawa a35b8f
        }
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        kf_factor(nfft,st->factors);
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    return st;
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
shun_iwasawa a35b8f
void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    if (fin == fout) {
shun_iwasawa a35b8f
        //NOTE: this is not really an in-place FFT algorithm.
shun_iwasawa a35b8f
        //It just performs an out-of-place FFT into a temp buffer
shun_iwasawa a35b8f
        kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
shun_iwasawa a35b8f
        kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
shun_iwasawa a35b8f
        memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
shun_iwasawa a35b8f
        KISS_FFT_TMP_FREE(tmpbuf);
shun_iwasawa a35b8f
    }else{
shun_iwasawa a35b8f
        kf_work( fout, fin, 1,in_stride, st->factors,st );
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    kiss_fft_stride(cfg,fin,fout,1);
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
shun_iwasawa a35b8f
void kiss_fft_cleanup(void)
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    // nothing needed any more
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
int kiss_fft_next_fast_size(int n)
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    while(1) {
shun_iwasawa a35b8f
        int m=n;
shun_iwasawa a35b8f
        while ( (m%2) == 0 ) m/=2;
shun_iwasawa a35b8f
        while ( (m%3) == 0 ) m/=3;
shun_iwasawa a35b8f
        while ( (m%5) == 0 ) m/=5;
shun_iwasawa a35b8f
        if (m<=1)
shun_iwasawa a35b8f
            break; /* n is completely factorable by twos, threes, and fives */
shun_iwasawa a35b8f
        n++;
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    return n;
shun_iwasawa a35b8f
}