shun_iwasawa a35b8f
shun_iwasawa a35b8f
shun_iwasawa a35b8f
/*
shun_iwasawa a35b8f
Copyright (c) 2003-2004, Mark Borgerding
shun_iwasawa a35b8f
shun_iwasawa a35b8f
All rights reserved.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
shun_iwasawa a35b8f
    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
shun_iwasawa a35b8f
    * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
shun_iwasawa a35b8f
*/
shun_iwasawa a35b8f
shun_iwasawa a35b8f
#include "kiss_fftnd.h"
shun_iwasawa a35b8f
#include "_kiss_fft_guts.h"
shun_iwasawa a35b8f
shun_iwasawa a35b8f
struct kiss_fftnd_state{
shun_iwasawa a35b8f
    int dimprod; /* dimsum would be mighty tasty right now */
shun_iwasawa a35b8f
    int ndims; 
shun_iwasawa a35b8f
    int *dims;
shun_iwasawa a35b8f
    kiss_fft_cfg *states; /* cfg states for each dimension */
shun_iwasawa a35b8f
    kiss_fft_cpx * tmpbuf; /*buffer capable of hold the entire input */
shun_iwasawa a35b8f
};
shun_iwasawa a35b8f
shun_iwasawa a35b8f
kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem)
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    kiss_fftnd_cfg st = NULL;
shun_iwasawa a35b8f
    int i;
shun_iwasawa a35b8f
    int dimprod=1;
shun_iwasawa a35b8f
    size_t memneeded = sizeof(struct kiss_fftnd_state);
shun_iwasawa a35b8f
    char * ptr;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    for (i=0;i
shun_iwasawa a35b8f
        size_t sublen=0;
shun_iwasawa a35b8f
        kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);
shun_iwasawa a35b8f
        memneeded += sublen;   /* st->states[i] */
shun_iwasawa a35b8f
        dimprod *= dims[i];
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    memneeded += sizeof(int) * ndims;/*  st->dims */
shun_iwasawa a35b8f
    memneeded += sizeof(void*) * ndims;/* st->states  */
shun_iwasawa a35b8f
    memneeded += sizeof(kiss_fft_cpx) * dimprod; /* st->tmpbuf */
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    if (lenmem == NULL) {/* allocate for the caller*/
shun_iwasawa a35b8f
        st = (kiss_fftnd_cfg) malloc (memneeded);
shun_iwasawa a35b8f
    } else { /* initialize supplied buffer if big enough */
shun_iwasawa a35b8f
        if (*lenmem >= memneeded)
shun_iwasawa a35b8f
            st = (kiss_fftnd_cfg) mem;
shun_iwasawa a35b8f
        *lenmem = memneeded; /*tell caller how big struct is (or would be) */
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    if (!st)
shun_iwasawa a35b8f
        return NULL; /*malloc failed or buffer too small */
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    st->dimprod = dimprod;
shun_iwasawa a35b8f
    st->ndims = ndims;
shun_iwasawa a35b8f
    ptr=(char*)(st+1);
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    st->states = (kiss_fft_cfg *)ptr;
shun_iwasawa a35b8f
    ptr += sizeof(void*) * ndims;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    st->dims = (int*)ptr;
shun_iwasawa a35b8f
    ptr += sizeof(int) * ndims;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    st->tmpbuf = (kiss_fft_cpx*)ptr;
shun_iwasawa a35b8f
    ptr += sizeof(kiss_fft_cpx) * dimprod;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    for (i=0;i
shun_iwasawa a35b8f
        size_t len;
shun_iwasawa a35b8f
        st->dims[i] = dims[i];
shun_iwasawa a35b8f
        kiss_fft_alloc (st->dims[i], inverse_fft, NULL, &len);
shun_iwasawa a35b8f
        st->states[i] = kiss_fft_alloc (st->dims[i], inverse_fft, ptr,&len);
shun_iwasawa a35b8f
        ptr += len;
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    /*
shun_iwasawa a35b8f
Hi there!
shun_iwasawa a35b8f
shun_iwasawa a35b8f
If you're looking at this particular code, it probably means you've got a brain-dead bounds checker 
shun_iwasawa a35b8f
that thinks the above code overwrites the end of the array.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
It doesn't.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
-- Mark 
shun_iwasawa a35b8f
shun_iwasawa a35b8f
P.S.
shun_iwasawa a35b8f
The below code might give you some warm fuzzies and help convince you.
shun_iwasawa a35b8f
       */
shun_iwasawa a35b8f
    if ( ptr - (char*)st != (int)memneeded ) {
shun_iwasawa a35b8f
        fprintf(stderr,
shun_iwasawa a35b8f
                "################################################################################\n"
shun_iwasawa a35b8f
                "Internal error! Memory allocation miscalculation\n"
shun_iwasawa a35b8f
                "################################################################################\n"
shun_iwasawa a35b8f
               );
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
    return st;
shun_iwasawa a35b8f
}
shun_iwasawa a35b8f
shun_iwasawa a35b8f
/*
shun_iwasawa a35b8f
 This works by tackling one dimension at a time.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
 In effect,
shun_iwasawa a35b8f
 Each stage starts out by reshaping the matrix into a DixSi 2d matrix.
shun_iwasawa a35b8f
 A Di-sized fft is taken of each column, transposing the matrix as it goes.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
Here's a 3-d example:
shun_iwasawa a35b8f
Take a 2x3x4 matrix, laid out in memory as a contiguous buffer
shun_iwasawa a35b8f
 [ [ [ a b c d ] [ e f g h ] [ i j k l ] ]
shun_iwasawa a35b8f
   [ [ m n o p ] [ q r s t ] [ u v w x ] ] ]
shun_iwasawa a35b8f
shun_iwasawa a35b8f
Stage 0 ( D=2): treat the buffer as a 2x12 matrix
shun_iwasawa a35b8f
   [ [a b ... k l]
shun_iwasawa a35b8f
     [m n ... w x] ]
shun_iwasawa a35b8f
shun_iwasawa a35b8f
   FFT each column with size 2.
shun_iwasawa a35b8f
   Transpose the matrix at the same time using kiss_fft_stride.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
   [ [ a+m a-m ]
shun_iwasawa a35b8f
     [ b+n b-n]
shun_iwasawa a35b8f
     ...
shun_iwasawa a35b8f
     [ k+w k-w ]
shun_iwasawa a35b8f
     [ l+x l-x ] ]
shun_iwasawa a35b8f
shun_iwasawa a35b8f
   Note fft([x y]) == [x+y x-y]
shun_iwasawa a35b8f
shun_iwasawa a35b8f
Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix,
shun_iwasawa a35b8f
   [ [ a+m a-m b+n b-n c+o c-o d+p d-p ] 
shun_iwasawa a35b8f
     [ e+q e-q f+r f-r g+s g-s h+t h-t ]
shun_iwasawa a35b8f
     [ i+u i-u j+v j-v k+w k-w l+x l-x ] ]
shun_iwasawa a35b8f
shun_iwasawa a35b8f
   And perform FFTs (size=3) on each of the columns as above, transposing 
shun_iwasawa a35b8f
   the matrix as it goes.  The output of stage 1 is 
shun_iwasawa a35b8f
       (Legend: ap = [ a+m e+q i+u ]
shun_iwasawa a35b8f
                am = [ a-m e-q i-u ] )
shun_iwasawa a35b8f
   
shun_iwasawa a35b8f
   [ [ sum(ap) fft(ap)[0] fft(ap)[1] ]
shun_iwasawa a35b8f
     [ sum(am) fft(am)[0] fft(am)[1] ]
shun_iwasawa a35b8f
     [ sum(bp) fft(bp)[0] fft(bp)[1] ]
shun_iwasawa a35b8f
     [ sum(bm) fft(bm)[0] fft(bm)[1] ]
shun_iwasawa a35b8f
     [ sum(cp) fft(cp)[0] fft(cp)[1] ]
shun_iwasawa a35b8f
     [ sum(cm) fft(cm)[0] fft(cm)[1] ]
shun_iwasawa a35b8f
     [ sum(dp) fft(dp)[0] fft(dp)[1] ]
shun_iwasawa a35b8f
     [ sum(dm) fft(dm)[0] fft(dm)[1] ]  ]
shun_iwasawa a35b8f
shun_iwasawa a35b8f
Stage 2 ( D=4) treats this buffer as a 4*6 matrix,
shun_iwasawa a35b8f
   [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ]
shun_iwasawa a35b8f
     [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ]
shun_iwasawa a35b8f
     [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ]
shun_iwasawa a35b8f
     [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ]  ]
shun_iwasawa a35b8f
shun_iwasawa a35b8f
   Then FFTs each column, transposing as it goes.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
   The resulting matrix is the 3d FFT of the 2x3x4 input matrix.
shun_iwasawa a35b8f
shun_iwasawa a35b8f
   Note as a sanity check that the first element of the final 
shun_iwasawa a35b8f
   stage's output (DC term) is 
shun_iwasawa a35b8f
   sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] )
shun_iwasawa a35b8f
   , i.e. the summation of all 24 input elements. 
shun_iwasawa a35b8f
shun_iwasawa a35b8f
*/
shun_iwasawa a35b8f
void kiss_fftnd(kiss_fftnd_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
shun_iwasawa a35b8f
{
shun_iwasawa a35b8f
    int i,k;
shun_iwasawa a35b8f
    const kiss_fft_cpx * bufin=fin;
shun_iwasawa a35b8f
    kiss_fft_cpx * bufout;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    /*arrange it so the last bufout == fout*/
shun_iwasawa a35b8f
    if ( st->ndims & 1 ) {
shun_iwasawa a35b8f
        bufout = fout;
shun_iwasawa a35b8f
        if (fin==fout) {
shun_iwasawa a35b8f
            memcpy( st->tmpbuf, fin, sizeof(kiss_fft_cpx) * st->dimprod );
shun_iwasawa a35b8f
            bufin = st->tmpbuf;
shun_iwasawa a35b8f
        }
shun_iwasawa a35b8f
    }else
shun_iwasawa a35b8f
        bufout = st->tmpbuf;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
    for ( k=0; k < st->ndims; ++k) {
shun_iwasawa a35b8f
        int curdim = st->dims[k];
shun_iwasawa a35b8f
        int stride = st->dimprod / curdim;
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        for ( i=0 ; i
shun_iwasawa a35b8f
            kiss_fft_stride( st->states[k], bufin+i , bufout+i*curdim, stride );
shun_iwasawa a35b8f
shun_iwasawa a35b8f
        /*toggle back and forth between the two buffers*/
shun_iwasawa a35b8f
        if (bufout == st->tmpbuf){
shun_iwasawa a35b8f
            bufout = fout;
shun_iwasawa a35b8f
            bufin = st->tmpbuf;
shun_iwasawa a35b8f
        }else{
shun_iwasawa a35b8f
            bufout = st->tmpbuf;
shun_iwasawa a35b8f
            bufin = fout;
shun_iwasawa a35b8f
        }
shun_iwasawa a35b8f
    }
shun_iwasawa a35b8f
}