shun_iwasawa a35b8f
/*
shun_iwasawa a35b8f
Copyright (c) 2003-2004, Mark Borgerding
shun_iwasawa a35b8f
shun_iwasawa a35b8f
All rights reserved.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
shun_iwasawa a35b8f
    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
shun_iwasawa a35b8f
    * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
shun_iwasawa a35b8f
*/
shun_iwasawa a35b8f
shun_iwasawa a35b8f
#include "kiss_fftr.h"
shun_iwasawa a35b8f
#include "_kiss_fft_guts.h"
shun_iwasawa a35b8f
shun_iwasawa a35b8f
struct kiss_fftr_state{
shun_iwasawa a35b8f
    kiss_fft_cfg substate;
shun_iwasawa a35b8f
    kiss_fft_cpx * tmpbuf;
shun_iwasawa a35b8f
    kiss_fft_cpx * super_twiddles;
shun_iwasawa a35b8f
#ifdef USE_SIMD
shun_iwasawa a35b8f
    void * pad;
shun_iwasawa a35b8f
#endif
shun_iwasawa a35b8f
};
shun_iwasawa a35b8f
shun_iwasawa a35b8f
kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    int i;
shun_iwasawa a35b8f
    kiss_fftr_cfg st = NULL;
shun_iwasawa a35b8f
    size_t subsize, memneeded;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    if (nfft & 1) {
shun_iwasawa a35b8f
        fprintf(stderr,"Real FFT optimization must be even.\n");
shun_iwasawa a35b8f
        return NULL;
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    nfft >>= 1;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
shun_iwasawa a35b8f
    memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    if (lenmem == NULL) {
shun_iwasawa a35b8f
        st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
shun_iwasawa a35b8f
    } else {
shun_iwasawa a35b8f
        if (*lenmem >= memneeded)
shun_iwasawa a35b8f
            st = (kiss_fftr_cfg) mem;
shun_iwasawa a35b8f
        *lenmem = memneeded;
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    if (!st)
shun_iwasawa a35b8f
        return NULL;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
shun_iwasawa a35b8f
    st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
shun_iwasawa a35b8f
    st->super_twiddles = st->tmpbuf + nfft;
shun_iwasawa a35b8f
    kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    for (i = 0; i < nfft/2; ++i) {
shun_iwasawa a35b8f
        double phase =
shun_iwasawa a35b8f
            -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5);
shun_iwasawa a35b8f
        if (inverse_fft)
shun_iwasawa a35b8f
            phase *= -1;
shun_iwasawa a35b8f
        kf_cexp (st->super_twiddles+i,phase);
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    return st;
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    /* input buffer timedata is stored row-wise */
shun_iwasawa a35b8f
    int k,ncfft;
shun_iwasawa a35b8f
    kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    if ( st->substate->inverse) {
shun_iwasawa a35b8f
        fprintf(stderr,"kiss fft usage error: improper alloc\n");
shun_iwasawa a35b8f
        exit(1);
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    ncfft = st->substate->nfft;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    /*perform the parallel fft of two real signals packed in real,imag*/
shun_iwasawa a35b8f
    kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
shun_iwasawa a35b8f
    /* The real part of the DC element of the frequency spectrum in st->tmpbuf
shun_iwasawa a35b8f
     * contains the sum of the even-numbered elements of the input time sequence
shun_iwasawa a35b8f
     * The imag part is the sum of the odd-numbered elements
shun_iwasawa a35b8f
     *
shun_iwasawa a35b8f
     * The sum of tdc.r and tdc.i is the sum of the input time sequence. 
shun_iwasawa a35b8f
     *      yielding DC of input time sequence
shun_iwasawa a35b8f
     * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... 
shun_iwasawa a35b8f
     *      yielding Nyquist bin of input time sequence
shun_iwasawa a35b8f
     */
shun_iwasawa a35b8f
 
shun_iwasawa a35b8f
    tdc.r = st->tmpbuf[0].r;
shun_iwasawa a35b8f
    tdc.i = st->tmpbuf[0].i;
shun_iwasawa a35b8f
    C_FIXDIV(tdc,2);
shun_iwasawa a35b8f
    CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
shun_iwasawa a35b8f
    CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
shun_iwasawa a35b8f
    freqdata[0].r = tdc.r + tdc.i;
shun_iwasawa a35b8f
    freqdata[ncfft].r = tdc.r - tdc.i;
shun_iwasawa a35b8f
#ifdef USE_SIMD    
shun_iwasawa a35b8f
    freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
shun_iwasawa a35b8f
#else
shun_iwasawa a35b8f
    freqdata[ncfft].i = freqdata[0].i = 0;
shun_iwasawa a35b8f
#endif
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    for ( k=1;k <= ncfft/2 ; ++k ) {
shun_iwasawa a35b8f
        fpk    = st->tmpbuf[k]; 
shun_iwasawa a35b8f
        fpnk.r =   st->tmpbuf[ncfft-k].r;
shun_iwasawa a35b8f
        fpnk.i = - st->tmpbuf[ncfft-k].i;
shun_iwasawa a35b8f
        C_FIXDIV(fpk,2);
shun_iwasawa a35b8f
        C_FIXDIV(fpnk,2);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        C_ADD( f1k, fpk , fpnk );
shun_iwasawa a35b8f
        C_SUB( f2k, fpk , fpnk );
shun_iwasawa a35b8f
        C_MUL( tw , f2k , st->super_twiddles[k-1]);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        freqdata[k].r = HALF_OF(f1k.r + tw.r);
shun_iwasawa a35b8f
        freqdata[k].i = HALF_OF(f1k.i + tw.i);
shun_iwasawa a35b8f
        freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
shun_iwasawa a35b8f
        freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    /* input buffer timedata is stored row-wise */
shun_iwasawa a35b8f
    int k, ncfft;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    if (st->substate->inverse == 0) {
shun_iwasawa a35b8f
        fprintf (stderr, "kiss fft usage error: improper alloc\n");
shun_iwasawa a35b8f
        exit (1);
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    ncfft = st->substate->nfft;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
shun_iwasawa a35b8f
    st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
shun_iwasawa a35b8f
    C_FIXDIV(st->tmpbuf[0],2);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    for (k = 1; k <= ncfft / 2; ++k) {
shun_iwasawa a35b8f
        kiss_fft_cpx fk, fnkc, fek, fok, tmp;
shun_iwasawa a35b8f
        fk = freqdata[k];
shun_iwasawa a35b8f
        fnkc.r = freqdata[ncfft - k].r;
shun_iwasawa a35b8f
        fnkc.i = -freqdata[ncfft - k].i;
shun_iwasawa a35b8f
        C_FIXDIV( fk , 2 );
shun_iwasawa a35b8f
        C_FIXDIV( fnkc , 2 );
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        C_ADD (fek, fk, fnkc);
shun_iwasawa a35b8f
        C_SUB (tmp, fk, fnkc);
shun_iwasawa a35b8f
        C_MUL (fok, tmp, st->super_twiddles[k-1]);
shun_iwasawa a35b8f
        C_ADD (st->tmpbuf[k],     fek, fok);
shun_iwasawa a35b8f
        C_SUB (st->tmpbuf[ncfft - k], fek, fok);
shun_iwasawa a35b8f
#ifdef USE_SIMD        
shun_iwasawa a35b8f
        st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
shun_iwasawa a35b8f
#else
shun_iwasawa a35b8f
        st->tmpbuf[ncfft - k].i *= -1;
shun_iwasawa a35b8f
#endif
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
shun_iwasawa a35b8f
}