#include <openssl/bn.h>
#include "bn_arch.h"
#include "bn_local.h"
#ifndef HEADER_BN_INTERNAL_H
#define HEADER_BN_INTERNAL_H
int bn_word_clz(BN_ULONG w);
int bn_bitsize(const BIGNUM *bn);
BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
int num);
BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
int num);
BN_ULONG bn_sub_words_borrow(const BN_ULONG *a, const BN_ULONG *b, size_t n);
BN_ULONG bn_add_words_masked(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
BN_ULONG mask, size_t n);
BN_ULONG bn_sub_words_masked(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
BN_ULONG mask, size_t n);
void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
const BN_ULONG *m, size_t n);
void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
const BN_ULONG *m, size_t n);
void bn_mod_mul_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
const BN_ULONG *m, BN_ULONG *t, BN_ULONG m0, size_t n);
void bn_mod_sqr_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *m,
BN_ULONG *t, BN_ULONG m0, size_t n);
void bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap,
const BN_ULONG *bp, const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0,
int n_len);
void bn_montgomery_reduce_words(BN_ULONG *r, BN_ULONG *a, const BN_ULONG *n,
BN_ULONG n0, int n_len);
#ifndef HAVE_BN_CT_NE_ZERO
static inline int
bn_ct_ne_zero(BN_ULONG w)
{
return (w | ~(w - 1)) >> (BN_BITS2 - 1);
}
#endif
#ifndef HAVE_BN_CT_NE_ZERO_MASK
static inline BN_ULONG
bn_ct_ne_zero_mask(BN_ULONG w)
{
return 0 - bn_ct_ne_zero(w);
}
#endif
#ifndef HAVE_BN_CT_EQ_ZERO
static inline int
bn_ct_eq_zero(BN_ULONG w)
{
return 1 - bn_ct_ne_zero(w);
}
#endif
#ifndef HAVE_BN_CT_EQ_ZERO_MASK
static inline BN_ULONG
bn_ct_eq_zero_mask(BN_ULONG w)
{
return 0 - bn_ct_eq_zero(w);
}
#endif
#ifndef HAVE_BN_CLZW
static inline int
bn_clzw(BN_ULONG w)
{
return bn_word_clz(w);
}
#endif
#ifdef BN_ULLONG
#ifndef HAVE_BN_ADDW
#define HAVE_BN_ADDW
static inline void
bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULLONG r;
r = (BN_ULLONG)a + (BN_ULLONG)b;
*out_r1 = r >> BN_BITS2;
*out_r0 = r & BN_MASK2;
}
#endif
#ifndef HAVE_BN_ADDW_ADDW
#define HAVE_BN_ADDW_ADDW
static inline void
bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULLONG r;
r = (BN_ULLONG)a + (BN_ULLONG)b + (BN_ULLONG)c;
*out_r1 = r >> BN_BITS2;
*out_r0 = r & BN_MASK2;
}
#endif
#ifndef HAVE_BN_MULW
#define HAVE_BN_MULW
static inline void
bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULLONG r;
r = (BN_ULLONG)a * (BN_ULLONG)b;
*out_r1 = r >> BN_BITS2;
*out_r0 = r & BN_MASK2;
}
#endif
#ifndef HAVE_BN_MULW_ADDW
#define HAVE_BN_MULW_ADDW
static inline void
bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULLONG r;
r = (BN_ULLONG)a * (BN_ULLONG)b + (BN_ULLONG)c;
*out_r1 = r >> BN_BITS2;
*out_r0 = r & BN_MASK2;
}
#endif
#ifndef HAVE_BN_MULW_ADDW_ADDW
#define HAVE_BN_MULW_ADDW_ADDW
static inline void
bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULLONG r;
r = (BN_ULLONG)a * (BN_ULLONG)b + (BN_ULLONG)c + (BN_ULLONG)d;
*out_r1 = r >> BN_BITS2;
*out_r0 = r & BN_MASK2;
}
#endif
#endif
#ifndef HAVE_BN_ADDW
static inline void
bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r1, r0, c1, c2;
c1 = a | b;
c2 = a & b;
r0 = a + b;
r1 = ((c1 & ~r0) | c2) >> (BN_BITS2 - 1);
*out_r1 = r1;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_ADDW_ADDW
static inline void
bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULONG carry, r1, r0;
bn_addw(a, b, &r1, &r0);
bn_addw(r0, c, &carry, &r0);
r1 += carry;
*out_r1 = r1;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_QWADDQW
static inline void
bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r3, r2, r1, r0;
bn_addw_addw(a0, b0, carry, &carry, &r0);
bn_addw_addw(a1, b1, carry, &carry, &r1);
bn_addw_addw(a2, b2, carry, &carry, &r2);
bn_addw_addw(a3, b3, carry, &carry, &r3);
*out_carry = carry;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_SUBW
static inline void
bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
{
BN_ULONG borrow, r0;
r0 = a - b;
borrow = ((r0 | (b & ~a)) & (b | ~a)) >> (BN_BITS2 - 1);
*out_borrow = borrow;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_SUBW_SUBW
static inline void
bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
BN_ULONG *out_r0)
{
BN_ULONG b1, b2, r0;
bn_subw(a, b, &b1, &r0);
bn_subw(r0, c, &b2, &r0);
*out_borrow = b1 + b2;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_QWSUBQW
static inline void
bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r3, r2, r1, r0;
bn_subw_subw(a0, b0, borrow, &borrow, &r0);
bn_subw_subw(a1, b1, borrow, &borrow, &r1);
bn_subw_subw(a2, b2, borrow, &borrow, &r2);
bn_subw_subw(a3, b3, borrow, &borrow, &r3);
*out_borrow = borrow;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_MULW
#if 1
static inline void
bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG a1, a0, b1, b0, r1, r0;
BN_ULONG carry, x;
a1 = a >> BN_BITS4;
a0 = a & BN_MASK2l;
b1 = b >> BN_BITS4;
b0 = b & BN_MASK2l;
r1 = a1 * b1;
r0 = a0 * b0;
x = a1 * b0;
r1 += x >> BN_BITS4;
bn_addw(r0, x << BN_BITS4, &carry, &r0);
r1 += carry;
x = b1 * a0;
r1 += x >> BN_BITS4;
bn_addw(r0, x << BN_BITS4, &carry, &r0);
r1 += carry;
*out_r1 = r1;
*out_r0 = r0;
}
#else
static inline void
bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG a1, a0, b1, b0, r1, r0, x;
BN_ULONG acc0, acc1, acc2, acc3;
a1 = a >> BN_BITS4;
b1 = b >> BN_BITS4;
a0 = a & BN_MASK2l;
b0 = b & BN_MASK2l;
r1 = a1 * b1;
r0 = a0 * b0;
acc0 = r0 & BN_MASK2l;
acc1 = r0 >> BN_BITS4;
acc2 = r1 & BN_MASK2l;
acc3 = r1 >> BN_BITS4;
x = a1 * b0;
acc1 += x & BN_MASK2l;
acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
acc1 &= BN_MASK2l;
acc3 += acc2 >> BN_BITS4;
acc2 &= BN_MASK2l;
x = b1 * a0;
acc1 += x & BN_MASK2l;
acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
acc1 &= BN_MASK2l;
acc3 += acc2 >> BN_BITS4;
acc2 &= BN_MASK2l;
*out_r1 = (acc3 << BN_BITS4) | acc2;
*out_r0 = (acc1 << BN_BITS4) | acc0;
}
#endif
#endif
#ifndef HAVE_BN_MULW_LO
static inline BN_ULONG
bn_mulw_lo(BN_ULONG a, BN_ULONG b)
{
return a * b;
}
#endif
#ifndef HAVE_BN_MULW_HI
static inline BN_ULONG
bn_mulw_hi(BN_ULONG a, BN_ULONG b)
{
BN_ULONG h, l;
bn_mulw(a, b, &h, &l);
return h;
}
#endif
#ifndef HAVE_BN_MULW_ADDW
static inline void
bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULONG carry, r1, r0;
bn_mulw(a, b, &r1, &r0);
bn_addw(r0, c, &carry, &r0);
r1 += carry;
*out_r1 = r1;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_MULW_ADDW_ADDW
static inline void
bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG carry, r1, r0;
bn_mulw_addw(a, b, c, &r1, &r0);
bn_addw(r0, d, &carry, &r0);
r1 += carry;
*out_r1 = r1;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_MULW_ADDTW
static inline void
bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG carry, r2, r1, r0, x1;
bn_mulw_addw(a, b, c0, &x1, &r0);
bn_addw(c1, x1, &carry, &r1);
r2 = c2 + carry;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_MUL2_MULW_ADDTW
static inline void
bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r2, r1, r0, x1, x0;
BN_ULONG carry;
bn_mulw(a, b, &x1, &x0);
bn_addw(c0, x0, &carry, &r0);
bn_addw(c1, x1 + carry, &r2, &r1);
bn_addw(c2, r2, &carry, &r2);
bn_addw(r0, x0, &carry, &r0);
bn_addw(r1, x1 + carry, &carry, &r1);
r2 += carry;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_QWMULW_ADDW
static inline void
bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r3, r2, r1, r0;
bn_mulw_addw(a0, b, c, &c, &r0);
bn_mulw_addw(a1, b, c, &c, &r1);
bn_mulw_addw(a2, b, c, &c, &r2);
bn_mulw_addw(a3, b, c, &c, &r3);
*out_r4 = c;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#endif
#ifndef HAVE_BN_QWMULW_ADDQW_ADDW
static inline void
bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULONG r3, r2, r1, r0;
bn_mulw_addw_addw(a0, b, c0, d, &d, &r0);
bn_mulw_addw_addw(a1, b, c1, d, &d, &r1);
bn_mulw_addw_addw(a2, b, c2, d, &d, &r2);
bn_mulw_addw_addw(a3, b, c3, d, &d, &r3);
*out_r4 = d;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#endif
#endif