Logo Search packages:      
Sourcecode: fftw3 version File versions  Download package

hb_12.c

/*
 * Copyright (c) 2003 Matteo Frigo
 * Copyright (c) 2003 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

/* This file was automatically generated --- DO NOT EDIT */
/* Generated on Sat Jul  5 22:11:36 EDT 2003 */

#include "codelet-rdft.h"

/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 12 -dif -name hb_12 -include hb.h */

/*
 * This function contains 118 FP additions, 60 FP multiplications,
 * (or, 88 additions, 30 multiplications, 30 fused multiply/add),
 * 39 stack variables, and 48 memory accesses
 */
/*
 * Generator Id's : 
 * $Id: algsimp.ml,v 1.7 2003/03/15 20:29:42 stevenj Exp $
 * $Id: fft.ml,v 1.2 2003/03/15 20:29:42 stevenj Exp $
 * $Id: gen_hc2hc.ml,v 1.9 2003/04/17 19:25:50 athena Exp $
 */

#include "hb.h"

static const R *hb_12(R *rio, R *iio, const R *W, stride ios, int m, int dist)
{
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
     int i;
     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 22) {
        E T5, Tt, T12, T1M, T1i, T1U, Tl, TM, T1c, T1Y, T1s, T1Q, Ta, Ty, T15;
        E T1N, T1l, T1V, Tg, TH, T19, T1X, T1p, T1P;
        {
             E T1, Tp, T4, T1g, Ts, T11, T10, T1h;
             T1 = rio[0];
             Tp = iio[0];
             {
                E T2, T3, Tq, Tr;
                T2 = rio[WS(ios, 4)];
                T3 = iio[-WS(ios, 8)];
                T4 = T2 + T3;
                T1g = KP866025403 * (T2 - T3);
                Tq = rio[WS(ios, 8)];
                Tr = iio[-WS(ios, 4)];
                Ts = Tq - Tr;
                T11 = KP866025403 * (Tq + Tr);
             }
             T5 = T1 + T4;
             Tt = Tp - Ts;
             T10 = FNMS(KP500000000, T4, T1);
             T12 = T10 - T11;
             T1M = T10 + T11;
             T1h = FMA(KP500000000, Ts, Tp);
             T1i = T1g + T1h;
             T1U = T1h - T1g;
        }
        {
             E Th, TL, Tk, T1a, TK, T1r, T1b, T1q;
             Th = iio[-WS(ios, 9)];
             TL = rio[WS(ios, 9)];
             {
                E Ti, Tj, TI, TJ;
                Ti = rio[WS(ios, 1)];
                Tj = rio[WS(ios, 5)];
                Tk = Ti + Tj;
                T1a = KP866025403 * (Ti - Tj);
                TI = iio[-WS(ios, 5)];
                TJ = iio[-WS(ios, 1)];
                TK = TI + TJ;
                T1r = KP866025403 * (TI - TJ);
             }
             Tl = Th + Tk;
             TM = TK - TL;
             T1b = FMA(KP500000000, TK, TL);
             T1c = T1a - T1b;
             T1Y = T1a + T1b;
             T1q = FNMS(KP500000000, Tk, Th);
             T1s = T1q + T1r;
             T1Q = T1q - T1r;
        }
        {
             E T6, Tx, T9, T1j, Tw, T14, T13, T1k;
             T6 = iio[-WS(ios, 6)];
             Tx = rio[WS(ios, 6)];
             {
                E T7, T8, Tu, Tv;
                T7 = iio[-WS(ios, 10)];
                T8 = rio[WS(ios, 2)];
                T9 = T7 + T8;
                T1j = KP866025403 * (T7 - T8);
                Tu = rio[WS(ios, 10)];
                Tv = iio[-WS(ios, 2)];
                Tw = Tu - Tv;
                T14 = KP866025403 * (Tu + Tv);
             }
             Ta = T6 + T9;
             Ty = Tw + Tx;
             T13 = FNMS(KP500000000, T9, T6);
             T15 = T13 + T14;
             T1N = T13 - T14;
             T1k = FMS(KP500000000, Tw, Tx);
             T1l = T1j + T1k;
             T1V = T1k - T1j;
        }
        {
             E Tc, TD, Tf, T17, TG, T1o, T18, T1n;
             Tc = rio[WS(ios, 3)];
             TD = iio[-WS(ios, 3)];
             {
                E Td, Te, TE, TF;
                Td = iio[-WS(ios, 7)];
                Te = iio[-WS(ios, 11)];
                Tf = Td + Te;
                T17 = KP866025403 * (Td - Te);
                TE = rio[WS(ios, 7)];
                TF = rio[WS(ios, 11)];
                TG = TE + TF;
                T1o = KP866025403 * (TE - TF);
             }
             Tg = Tc + Tf;
             TH = TD - TG;
             T18 = FMA(KP500000000, TG, TD);
             T19 = T17 + T18;
             T1X = T18 - T17;
             T1n = FNMS(KP500000000, Tf, Tc);
             T1p = T1n + T1o;
             T1P = T1n - T1o;
        }
        {
             E Tb, Tm, TU, TW, TX, TY, TT, TV;
             Tb = T5 + Ta;
             Tm = Tg + Tl;
             TU = Tb - Tm;
             TW = Tt - Ty;
             TX = TH + TM;
             TY = TW - TX;
             rio[0] = Tb + Tm;
             iio[-WS(ios, 11)] = TW + TX;
             TT = W[10];
             TV = W[11];
             rio[WS(ios, 6)] = FNMS(TV, TY, TT * TU);
             iio[-WS(ios, 5)] = FMA(TV, TU, TT * TY);
        }
        {
             E T28, T2g, T2c, T2e;
             {
                E T26, T27, T2a, T2b;
                T26 = T1M - T1N;
                T27 = T1X + T1Y;
                T28 = T26 - T27;
                T2g = T26 + T27;
                T2a = T1U - T1V;
                T2b = T1P - T1Q;
                T2c = T2a + T2b;
                T2e = T2a - T2b;
             }
             {
                E T25, T29, T2d, T2f;
                T25 = W[8];
                T29 = W[9];
                rio[WS(ios, 5)] = FNMS(T29, T2c, T25 * T28);
                iio[-WS(ios, 6)] = FMA(T25, T2c, T29 * T28);
                T2d = W[20];
                T2f = W[21];
                iio[0] = FMA(T2d, T2e, T2f * T2g);
                rio[WS(ios, 11)] = FNMS(T2f, T2e, T2d * T2g);
             }
        }
        {
             E TA, TS, TO, TQ;
             {
                E To, Tz, TC, TN;
                To = Tg - Tl;
                Tz = Tt + Ty;
                TA = To + Tz;
                TS = Tz - To;
                TC = T5 - Ta;
                TN = TH - TM;
                TO = TC - TN;
                TQ = TC + TN;
             }
             {
                E Tn, TB, TP, TR;
                Tn = W[16];
                TB = W[17];
                iio[-WS(ios, 2)] = FMA(Tn, TA, TB * TO);
                rio[WS(ios, 9)] = FNMS(TB, TA, Tn * TO);
                TP = W[4];
                TR = W[5];
                rio[WS(ios, 3)] = FNMS(TR, TS, TP * TQ);
                iio[-WS(ios, 8)] = FMA(TP, TS, TR * TQ);
             }
        }
        {
             E T1S, T22, T20, T24;
             {
                E T1O, T1R, T1W, T1Z;
                T1O = T1M + T1N;
                T1R = T1P + T1Q;
                T1S = T1O - T1R;
                T22 = T1O + T1R;
                T1W = T1U + T1V;
                T1Z = T1X - T1Y;
                T20 = T1W - T1Z;
                T24 = T1W + T1Z;
             }
             {
                E T1L, T1T, T21, T23;
                T1L = W[2];
                T1T = W[3];
                rio[WS(ios, 2)] = FNMS(T1T, T20, T1L * T1S);
                iio[-WS(ios, 9)] = FMA(T1T, T1S, T1L * T20);
                T21 = W[14];
                T23 = W[15];
                rio[WS(ios, 8)] = FNMS(T23, T24, T21 * T22);
                iio[-WS(ios, 3)] = FMA(T23, T22, T21 * T24);
             }
        }
        {
             E T1C, T1I, T1G, T1K;
             {
                E T1A, T1B, T1E, T1F;
                T1A = T12 + T15;
                T1B = T1p + T1s;
                T1C = T1A - T1B;
                T1I = T1A + T1B;
                T1E = T1i + T1l;
                T1F = T19 + T1c;
                T1G = T1E - T1F;
                T1K = T1E + T1F;
             }
             {
                E T1z, T1D, T1H, T1J;
                T1z = W[18];
                T1D = W[19];
                rio[WS(ios, 10)] = FNMS(T1D, T1G, T1z * T1C);
                iio[-WS(ios, 1)] = FMA(T1D, T1C, T1z * T1G);
                T1H = W[6];
                T1J = W[7];
                rio[WS(ios, 4)] = FNMS(T1J, T1K, T1H * T1I);
                iio[-WS(ios, 7)] = FMA(T1J, T1I, T1H * T1K);
             }
        }
        {
             E T1e, T1y, T1u, T1w;
             {
                E T16, T1d, T1m, T1t;
                T16 = T12 - T15;
                T1d = T19 - T1c;
                T1e = T16 - T1d;
                T1y = T16 + T1d;
                T1m = T1i - T1l;
                T1t = T1p - T1s;
                T1u = T1m + T1t;
                T1w = T1m - T1t;
             }
             {
                E TZ, T1f, T1v, T1x;
                TZ = W[0];
                T1f = W[1];
                rio[WS(ios, 1)] = FNMS(T1f, T1u, TZ * T1e);
                iio[-WS(ios, 10)] = FMA(TZ, T1u, T1f * T1e);
                T1v = W[12];
                T1x = W[13];
                iio[-WS(ios, 4)] = FMA(T1v, T1w, T1x * T1y);
                rio[WS(ios, 7)] = FNMS(T1x, T1w, T1v * T1y);
             }
        }
     }
     return W;
}

static const tw_instr twinstr[] = {
     {TW_FULL, 0, 12},
     {TW_NEXT, 1, 0}
};

static const hc2hc_desc desc = { 12, "hb_12", twinstr, {88, 30, 30, 0}, &GENUS, 0, 0, 0 };

void X(codelet_hb_12) (planner *p) {
     X(khc2hc_dif_register) (p, hb_12, &desc);
}

Generated by  Doxygen 1.6.0   Back to index