#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "bn_arch.h"
#include "bn_local.h"
#include "bn_internal.h"
#ifndef HAVE_BN_SQR_COMBA4
void
bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
{
BN_ULONG c2, c1, c0;
bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]);
bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]);
bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]);
bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]);
bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &r[4]);
bn_mul2_mulw_addtw(a[3], a[2], 0, c2, c1, &c2, &c1, &r[5]);
bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &r[7], &r[6]);
}
#endif
#ifndef HAVE_BN_SQR_COMBA6
void
bn_sqr_comba6(BN_ULONG *r, const BN_ULONG *a)
{
BN_ULONG c2, c1, c0;
bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]);
bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]);
bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]);
bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]);
bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[4], a[0], c2, c1, c0, &c2, &c1, &r[4]);
bn_mul2_mulw_addtw(a[5], a[0], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[4], a[1], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[3], a[2], c2, c1, c0, &c2, &c1, &r[5]);
bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[4], a[2], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[5], a[1], c2, c1, c0, &c2, &c1, &r[6]);
bn_mul2_mulw_addtw(a[5], a[2], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[4], a[3], c2, c1, c0, &c2, &c1, &r[7]);
bn_mulw_addtw(a[4], a[4], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[5], a[3], c2, c1, c0, &c2, &c1, &r[8]);
bn_mul2_mulw_addtw(a[5], a[4], 0, c2, c1, &c2, &c1, &r[9]);
bn_mulw_addtw(a[5], a[5], 0, c2, c1, &c2, &r[11], &r[10]);
}
#endif
#ifndef HAVE_BN_SQR_COMBA8
void
bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
{
BN_ULONG c2, c1, c0;
bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]);
bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]);
bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]);
bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]);
bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[4], a[0], c2, c1, c0, &c2, &c1, &r[4]);
bn_mul2_mulw_addtw(a[5], a[0], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[4], a[1], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[3], a[2], c2, c1, c0, &c2, &c1, &r[5]);
bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[4], a[2], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[5], a[1], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[6], a[0], c2, c1, c0, &c2, &c1, &r[6]);
bn_mul2_mulw_addtw(a[7], a[0], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[6], a[1], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[5], a[2], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[4], a[3], c2, c1, c0, &c2, &c1, &r[7]);
bn_mulw_addtw(a[4], a[4], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[5], a[3], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[6], a[2], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[7], a[1], c2, c1, c0, &c2, &c1, &r[8]);
bn_mul2_mulw_addtw(a[7], a[2], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[6], a[3], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[5], a[4], c2, c1, c0, &c2, &c1, &r[9]);
bn_mulw_addtw(a[5], a[5], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[6], a[4], c2, c1, c0, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[7], a[3], c2, c1, c0, &c2, &c1, &r[10]);
bn_mul2_mulw_addtw(a[7], a[4], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[6], a[5], c2, c1, c0, &c2, &c1, &r[11]);
bn_mulw_addtw(a[6], a[6], 0, c2, c1, &c2, &c1, &c0);
bn_mul2_mulw_addtw(a[7], a[5], c2, c1, c0, &c2, &c1, &r[12]);
bn_mul2_mulw_addtw(a[7], a[6], 0, c2, c1, &c2, &c1, &r[13]);
bn_mulw_addtw(a[7], a[7], 0, c2, c1, &c2, &r[15], &r[14]);
}
#endif
#ifndef HAVE_BN_SQR_WORDS
static void
bn_sqr_add_words(BN_ULONG *r, const BN_ULONG *a, int n)
{
BN_ULONG x3, x2, x1, x0;
BN_ULONG carry = 0;
assert(n >= 0);
if (n <= 0)
return;
while (n & ~3) {
bn_mulw(a[0], a[0], &x1, &x0);
bn_mulw(a[1], a[1], &x3, &x2);
bn_qwaddqw(x3, x2, x1, x0, r[3], r[2], r[1], r[0], carry,
&carry, &r[3], &r[2], &r[1], &r[0]);
bn_mulw(a[2], a[2], &x1, &x0);
bn_mulw(a[3], a[3], &x3, &x2);
bn_qwaddqw(x3, x2, x1, x0, r[7], r[6], r[5], r[4], carry,
&carry, &r[7], &r[6], &r[5], &r[4]);
a += 4;
r += 8;
n -= 4;
}
while (n) {
bn_mulw_addw_addw(a[0], a[0], r[0], carry, &carry, &r[0]);
bn_addw(r[1], carry, &carry, &r[1]);
a++;
r += 2;
n--;
}
}
void
bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int a_len)
{
const BN_ULONG *ap;
BN_ULONG *rp;
BN_ULONG w;
int r_len;
int n;
if (a_len <= 0)
return;
ap = a;
w = ap[0];
ap++;
rp = r;
r_len = a_len * 2;
rp[0] = rp[r_len - 1] = 0;
rp++;
n = a_len - 1;
if (n > 0) {
rp[n] = bn_mulw_words(rp, ap, n, w);
}
rp += 2;
n--;
while (n > 0) {
w = ap[0];
ap++;
rp[n] = bn_mulw_add_words(rp, ap, n, w);
rp += 2;
n--;
}
bn_add_words(r, r, r, r_len);
bn_sqr_add_words(r, a, a_len);
}
#endif
static int
bn_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
{
bn_sqr_words(r->d, a->d, a->top);
return 1;
}
int
BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
{
BIGNUM *rr;
int r_len;
int ret = 1;
BN_CTX_start(ctx);
if (a->top < 1) {
BN_zero(r);
goto done;
}
if ((rr = r) == a)
rr = BN_CTX_get(ctx);
if (rr == NULL)
goto err;
if ((r_len = a->top * 2) < a->top)
goto err;
if (!bn_wexpand(rr, r_len))
goto err;
if (a->top == 4) {
bn_sqr_comba4(rr->d, a->d);
} else if (a->top == 6) {
bn_sqr_comba6(rr->d, a->d);
} else if (a->top == 8) {
bn_sqr_comba8(rr->d, a->d);
} else {
if (!bn_sqr(rr, a, ctx))
goto err;
}
rr->top = r_len;
bn_correct_top(rr);
rr->neg = 0;
if (!bn_copy(r, rr))
goto err;
done:
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
LCRYPTO_ALIAS(BN_sqr);