#include "m_apm_lc.h"
#ifndef MM_PI_2
#define MM_PI_2 1.570796326794896619231321691639751442098584699687
#endif
#ifndef WR5000
#define WR5000 0.707106781186547524400844362104849039284835937688
#endif
#ifndef RDFT_LOOP_DIV
#define RDFT_LOOP_DIV 64
#endif
extern void M_fast_mul_fft(UCHAR *, UCHAR *, UCHAR *, int);
extern void M_rdft(int, int, double *);
extern void M_bitrv2(int, double *);
extern void M_cftfsub(int, double *);
extern void M_cftbsub(int, double *);
extern void M_rftfsub(int, double *);
extern void M_rftbsub(int, double *);
extern void M_cft1st(int, double *);
extern void M_cftmdl(int, int, double *);
static double *M_aa_array, *M_bb_array;
static int M_size = -1;
static char *M_fft_error_msg = "\'M_fast_mul_fft\', Out of memory";
void M_free_all_fft()
{
if (M_size > 0)
{
MAPM_FREE(M_aa_array);
MAPM_FREE(M_bb_array);
M_size = -1;
}
}
void M_fast_mul_fft(UCHAR *ww, UCHAR *uu, UCHAR *vv, int nbytes)
{
int mflag, i, j, nn2, nn;
double carry, nnr, dtemp, *a, *b;
UCHAR *w0;
unsigned long ul;
if (M_size < 0)
{
if (M_get_sizeof_int() == 2)
M_size = 516;
else
M_size = 8200;
M_aa_array = (double *)MAPM_MALLOC(M_size * sizeof(double));
M_bb_array = (double *)MAPM_MALLOC(M_size * sizeof(double));
if ((M_aa_array == NULL) || (M_bb_array == NULL))
{
M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg);
}
}
nn = nbytes;
nn2 = nbytes >> 1;
if (nn > M_size)
{
mflag = TRUE;
a = (double *)MAPM_MALLOC((nn + 8) * sizeof(double));
b = (double *)MAPM_MALLOC((nn + 8) * sizeof(double));
if ((a == NULL) || (b == NULL))
{
M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg);
}
}
else
{
mflag = FALSE;
a = M_aa_array;
b = M_bb_array;
}
i = 0;
for (j=0; j < nn2; j++)
{
a[j] = (double)((int)uu[i] * 100 + uu[i+1]);
b[j] = (double)((int)vv[i] * 100 + vv[i+1]);
i += 2;
}
for (j=nn2; j < nn; j++)
{
a[j] = 0.0;
b[j] = 0.0;
}
M_rdft(nn, 1, a);
M_rdft(nn, 1, b);
b[0] *= a[0];
b[1] *= a[1];
for (j=3; j <= nn; j += 2)
{
dtemp = b[j-1];
b[j-1] = dtemp * a[j-1] - b[j] * a[j];
b[j] = dtemp * a[j] + b[j] * a[j-1];
}
M_rdft(nn, -1, b);
carry = 0.0;
j = nn;
nnr = 2.0 / (double)nn;
while (1)
{
dtemp = b[--j] * nnr + carry + 0.5;
ul = (unsigned long)(dtemp * 1.0E-4);
carry = (double)ul;
b[j] = dtemp - carry * 10000.0;
if (j == 0)
break;
}
w0 = ww;
M_get_div_rem((int)ul, w0, (w0 + 1));
for (j=0; j <= (nn - 2); j++)
{
w0 += 2;
M_get_div_rem((int)b[j], w0, (w0 + 1));
}
if (mflag)
{
MAPM_FREE(b);
MAPM_FREE(a);
}
}
void M_rdft(int n, int isgn, double *a)
{
double xi;
if (isgn >= 0) {
if (n > 4) {
M_bitrv2(n, a);
M_cftfsub(n, a);
M_rftfsub(n, a);
} else if (n == 4) {
M_cftfsub(n, a);
}
xi = a[0] - a[1];
a[0] += a[1];
a[1] = xi;
} else {
a[1] = 0.5 * (a[0] - a[1]);
a[0] -= a[1];
if (n > 4) {
M_rftbsub(n, a);
M_bitrv2(n, a);
M_cftbsub(n, a);
} else if (n == 4) {
M_cftfsub(n, a);
}
}
}
void M_bitrv2(int n, double *a)
{
int j0, k0, j1, k1, l, m, i, j, k;
double xr, xi, yr, yi;
l = n >> 2;
m = 2;
while (m < l) {
l >>= 1;
m <<= 1;
}
if (m == l) {
j0 = 0;
for (k0 = 0; k0 < m; k0 += 2) {
k = k0;
for (j = j0; j < j0 + k0; j += 2) {
xr = a[j];
xi = a[j + 1];
yr = a[k];
yi = a[k + 1];
a[j] = yr;
a[j + 1] = yi;
a[k] = xr;
a[k + 1] = xi;
j1 = j + m;
k1 = k + 2 * m;
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
j1 += m;
k1 -= m;
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
j1 += m;
k1 += 2 * m;
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
for (i = n >> 1; i > (k ^= i); i >>= 1);
}
j1 = j0 + k0 + m;
k1 = j1 + m;
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
for (i = n >> 1; i > (j0 ^= i); i >>= 1);
}
} else {
j0 = 0;
for (k0 = 2; k0 < m; k0 += 2) {
for (i = n >> 1; i > (j0 ^= i); i >>= 1);
k = k0;
for (j = j0; j < j0 + k0; j += 2) {
xr = a[j];
xi = a[j + 1];
yr = a[k];
yi = a[k + 1];
a[j] = yr;
a[j + 1] = yi;
a[k] = xr;
a[k + 1] = xi;
j1 = j + m;
k1 = k + m;
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
for (i = n >> 1; i > (k ^= i); i >>= 1);
}
}
}
}
void M_cftfsub(int n, double *a)
{
int j, j1, j2, j3, l;
double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
l = 2;
if (n > 8) {
M_cft1st(n, a);
l = 8;
while ((l << 2) < n) {
M_cftmdl(n, l, a);
l <<= 2;
}
}
if ((l << 2) == n) {
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i - x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i + x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i - x3r;
}
} else {
for (j = 0; j < l; j += 2) {
j1 = j + l;
x0r = a[j] - a[j1];
x0i = a[j + 1] - a[j1 + 1];
a[j] += a[j1];
a[j + 1] += a[j1 + 1];
a[j1] = x0r;
a[j1 + 1] = x0i;
}
}
}
void M_cftbsub(int n, double *a)
{
int j, j1, j2, j3, l;
double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
l = 2;
if (n > 8) {
M_cft1st(n, a);
l = 8;
while ((l << 2) < n) {
M_cftmdl(n, l, a);
l <<= 2;
}
}
if ((l << 2) == n) {
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = -a[j + 1] - a[j1 + 1];
x1r = a[j] - a[j1];
x1i = -a[j + 1] + a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i - x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i + x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i - x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i + x3r;
}
} else {
for (j = 0; j < l; j += 2) {
j1 = j + l;
x0r = a[j] - a[j1];
x0i = -a[j + 1] + a[j1 + 1];
a[j] += a[j1];
a[j + 1] = -a[j + 1] - a[j1 + 1];
a[j1] = x0r;
a[j1 + 1] = x0i;
}
}
}
void M_cft1st(int n, double *a)
{
int j, kj, kr;
double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
x0r = a[0] + a[2];
x0i = a[1] + a[3];
x1r = a[0] - a[2];
x1i = a[1] - a[3];
x2r = a[4] + a[6];
x2i = a[5] + a[7];
x3r = a[4] - a[6];
x3i = a[5] - a[7];
a[0] = x0r + x2r;
a[1] = x0i + x2i;
a[4] = x0r - x2r;
a[5] = x0i - x2i;
a[2] = x1r - x3i;
a[3] = x1i + x3r;
a[6] = x1r + x3i;
a[7] = x1i - x3r;
wn4r = WR5000;
x0r = a[8] + a[10];
x0i = a[9] + a[11];
x1r = a[8] - a[10];
x1i = a[9] - a[11];
x2r = a[12] + a[14];
x2i = a[13] + a[15];
x3r = a[12] - a[14];
x3i = a[13] - a[15];
a[8] = x0r + x2r;
a[9] = x0i + x2i;
a[12] = x2i - x0i;
a[13] = x0r - x2r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[10] = wn4r * (x0r - x0i);
a[11] = wn4r * (x0r + x0i);
x0r = x3i + x1r;
x0i = x3r - x1i;
a[14] = wn4r * (x0i - x0r);
a[15] = wn4r * (x0i + x0r);
ew = MM_PI_2 / n;
kr = 0;
for (j = 16; j < n; j += 16) {
for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1);
wk1r = cos(ew * kr);
wk1i = sin(ew * kr);
wk2r = 1 - 2 * wk1i * wk1i;
wk2i = 2 * wk1i * wk1r;
wk3r = wk1r - 2 * wk2i * wk1i;
wk3i = 2 * wk2i * wk1r - wk1i;
x0r = a[j] + a[j + 2];
x0i = a[j + 1] + a[j + 3];
x1r = a[j] - a[j + 2];
x1i = a[j + 1] - a[j + 3];
x2r = a[j + 4] + a[j + 6];
x2i = a[j + 5] + a[j + 7];
x3r = a[j + 4] - a[j + 6];
x3i = a[j + 5] - a[j + 7];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j + 4] = wk2r * x0r - wk2i * x0i;
a[j + 5] = wk2r * x0i + wk2i * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j + 2] = wk1r * x0r - wk1i * x0i;
a[j + 3] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j + 6] = wk3r * x0r - wk3i * x0i;
a[j + 7] = wk3r * x0i + wk3i * x0r;
x0r = wn4r * (wk1r - wk1i);
wk1i = wn4r * (wk1r + wk1i);
wk1r = x0r;
wk3r = wk1r - 2 * wk2r * wk1i;
wk3i = 2 * wk2r * wk1r - wk1i;
x0r = a[j + 8] + a[j + 10];
x0i = a[j + 9] + a[j + 11];
x1r = a[j + 8] - a[j + 10];
x1i = a[j + 9] - a[j + 11];
x2r = a[j + 12] + a[j + 14];
x2i = a[j + 13] + a[j + 15];
x3r = a[j + 12] - a[j + 14];
x3i = a[j + 13] - a[j + 15];
a[j + 8] = x0r + x2r;
a[j + 9] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j + 12] = -wk2i * x0r - wk2r * x0i;
a[j + 13] = -wk2i * x0i + wk2r * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j + 10] = wk1r * x0r - wk1i * x0i;
a[j + 11] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j + 14] = wk3r * x0r - wk3i * x0i;
a[j + 15] = wk3r * x0i + wk3i * x0r;
}
}
void M_cftmdl(int n, int l, double *a)
{
int j, j1, j2, j3, k, kj, kr, m, m2;
double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
m = l << 2;
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i - x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i + x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i - x3r;
}
wn4r = WR5000;
for (j = m; j < l + m; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
a[j2] = x2i - x0i;
a[j2 + 1] = x0r - x2r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1] = wn4r * (x0r - x0i);
a[j1 + 1] = wn4r * (x0r + x0i);
x0r = x3i + x1r;
x0i = x3r - x1i;
a[j3] = wn4r * (x0i - x0r);
a[j3 + 1] = wn4r * (x0i + x0r);
}
ew = MM_PI_2 / n;
kr = 0;
m2 = 2 * m;
for (k = m2; k < n; k += m2) {
for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1);
wk1r = cos(ew * kr);
wk1i = sin(ew * kr);
wk2r = 1 - 2 * wk1i * wk1i;
wk2i = 2 * wk1i * wk1r;
wk3r = wk1r - 2 * wk2i * wk1i;
wk3i = 2 * wk2i * wk1r - wk1i;
for (j = k; j < l + k; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j2] = wk2r * x0r - wk2i * x0i;
a[j2 + 1] = wk2r * x0i + wk2i * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1] = wk1r * x0r - wk1i * x0i;
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j3] = wk3r * x0r - wk3i * x0i;
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
}
x0r = wn4r * (wk1r - wk1i);
wk1i = wn4r * (wk1r + wk1i);
wk1r = x0r;
wk3r = wk1r - 2 * wk2r * wk1i;
wk3i = 2 * wk2r * wk1r - wk1i;
for (j = k + m; j < l + (k + m); j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j2] = -wk2i * x0r - wk2r * x0i;
a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1] = wk1r * x0r - wk1i * x0i;
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j3] = wk3r * x0r - wk3i * x0i;
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
}
}
}
void M_rftfsub(int n, double *a)
{
int i, i0, j, k;
double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
ec = 2 * MM_PI_2 / n;
wkr = 0;
wki = 0;
wdi = cos(ec);
wdr = sin(ec);
wdi *= wdr;
wdr *= wdr;
w1r = 1 - 2 * wdr;
w1i = 2 * wdi;
ss = 2 * w1i;
i = n >> 1;
while (1) {
i0 = i - 4 * RDFT_LOOP_DIV;
if (i0 < 4) {
i0 = 4;
}
for (j = i - 4; j >= i0; j -= 4) {
k = n - j;
xr = a[j + 2] - a[k - 2];
xi = a[j + 3] + a[k - 1];
yr = wdr * xr - wdi * xi;
yi = wdr * xi + wdi * xr;
a[j + 2] -= yr;
a[j + 3] -= yi;
a[k - 2] += yr;
a[k - 1] -= yi;
wkr += ss * wdi;
wki += ss * (0.5 - wdr);
xr = a[j] - a[k];
xi = a[j + 1] + a[k + 1];
yr = wkr * xr - wki * xi;
yi = wkr * xi + wki * xr;
a[j] -= yr;
a[j + 1] -= yi;
a[k] += yr;
a[k + 1] -= yi;
wdr += ss * wki;
wdi += ss * (0.5 - wkr);
}
if (i0 == 4) {
break;
}
wkr = 0.5 * sin(ec * i0);
wki = 0.5 * cos(ec * i0);
wdr = 0.5 - (wkr * w1r - wki * w1i);
wdi = wkr * w1i + wki * w1r;
wkr = 0.5 - wkr;
i = i0;
}
xr = a[2] - a[n - 2];
xi = a[3] + a[n - 1];
yr = wdr * xr - wdi * xi;
yi = wdr * xi + wdi * xr;
a[2] -= yr;
a[3] -= yi;
a[n - 2] += yr;
a[n - 1] -= yi;
}
void M_rftbsub(int n, double *a)
{
int i, i0, j, k;
double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
ec = 2 * MM_PI_2 / n;
wkr = 0;
wki = 0;
wdi = cos(ec);
wdr = sin(ec);
wdi *= wdr;
wdr *= wdr;
w1r = 1 - 2 * wdr;
w1i = 2 * wdi;
ss = 2 * w1i;
i = n >> 1;
a[i + 1] = -a[i + 1];
while (1) {
i0 = i - 4 * RDFT_LOOP_DIV;
if (i0 < 4) {
i0 = 4;
}
for (j = i - 4; j >= i0; j -= 4) {
k = n - j;
xr = a[j + 2] - a[k - 2];
xi = a[j + 3] + a[k - 1];
yr = wdr * xr + wdi * xi;
yi = wdr * xi - wdi * xr;
a[j + 2] -= yr;
a[j + 3] = yi - a[j + 3];
a[k - 2] += yr;
a[k - 1] = yi - a[k - 1];
wkr += ss * wdi;
wki += ss * (0.5 - wdr);
xr = a[j] - a[k];
xi = a[j + 1] + a[k + 1];
yr = wkr * xr + wki * xi;
yi = wkr * xi - wki * xr;
a[j] -= yr;
a[j + 1] = yi - a[j + 1];
a[k] += yr;
a[k + 1] = yi - a[k + 1];
wdr += ss * wki;
wdi += ss * (0.5 - wkr);
}
if (i0 == 4) {
break;
}
wkr = 0.5 * sin(ec * i0);
wki = 0.5 * cos(ec * i0);
wdr = 0.5 - (wkr * w1r - wki * w1i);
wdi = wkr * w1i + wki * w1r;
wkr = 0.5 - wkr;
i = i0;
}
xr = a[2] - a[n - 2];
xi = a[3] + a[n - 1];
yr = wdr * xr + wdi * xi;
yi = wdr * xi - wdi * xr;
a[2] -= yr;
a[3] = yi - a[3];
a[n - 2] += yr;
a[n - 1] = yi - a[n - 1];
a[1] = -a[1];
}