#include "gfmult.h"
#define REV_POLY_REDUCT 0xe1
static const uint8_t nib_rev[] = {
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
};
static inline struct gf128
gf128_mulalpha(struct gf128 v)
{
uint64_t mask;
mask = !!(v.v[1] & 1);
mask = ~(mask - 1);
v.v[1] = (v.v[1] >> 1) | ((v.v[0] & 1) << 63);
v.v[0] = (v.v[0] >> 1) ^ ((mask & REV_POLY_REDUCT) << 56);
return v;
}
void
gf128_genmultable(struct gf128 h, struct gf128table *t)
{
struct gf128 tbl[16];
int i;
tbl[0] = MAKE_GF128(0, 0);
tbl[1] = h;
for (i = 2; i < 16; i += 2) {
tbl[i] = gf128_mulalpha(tbl[i / 2]);
tbl[i + 1] = gf128_add(tbl[i], h);
}
for (i = 0; i < 16; i++) {
t->a[nib_rev[i]] = tbl[i].v[0] >> 32;
t->b[nib_rev[i]] = tbl[i].v[0];
t->c[nib_rev[i]] = tbl[i].v[1] >> 32;
t->d[nib_rev[i]] = tbl[i].v[1];
}
}
void
gf128_genmultable4(struct gf128 h, struct gf128table4 *t)
{
struct gf128 h2, h3, h4;
gf128_genmultable(h, &t->tbls[0]);
h2 = gf128_mul(h, &t->tbls[0]);
gf128_genmultable(h2, &t->tbls[1]);
h3 = gf128_mul(h, &t->tbls[1]);
gf128_genmultable(h3, &t->tbls[2]);
h4 = gf128_mul(h2, &t->tbls[1]);
gf128_genmultable(h4, &t->tbls[3]);
}
static inline struct gf128
readrow(struct gf128table *tbl, unsigned bits)
{
struct gf128 r;
bits = bits % 16;
r.v[0] = ((uint64_t)tbl->a[bits] << 32) | tbl->b[bits];
r.v[1] = ((uint64_t)tbl->c[bits] << 32) | tbl->d[bits];
return r;
}
static uint16_t reduction[] = {
0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
};
static struct gf128
gfmultword(uint64_t word, struct gf128 x, struct gf128table *tbl)
{
struct gf128 row;
unsigned bits;
unsigned redbits;
int i;
for (i = 0; i < 64; i += 4) {
bits = word % 16;
row = readrow(tbl, bits);
redbits = x.v[1] % 16;
x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
x.v[0] >>= 4;
x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
word >>= 4;
x = gf128_add(x, row);
}
return x;
}
static struct gf128
gfmultword4(uint64_t worda, uint64_t wordb, uint64_t wordc, uint64_t wordd,
struct gf128 x, struct gf128table4 *tbl)
{
struct gf128 rowa, rowb, rowc, rowd;
unsigned bitsa, bitsb, bitsc, bitsd;
unsigned redbits;
int i;
for (i = 0; i < 64; i += 4) {
bitsa = worda % 16;
bitsb = wordb % 16;
bitsc = wordc % 16;
bitsd = wordd % 16;
rowa = readrow(&tbl->tbls[3], bitsa);
rowb = readrow(&tbl->tbls[2], bitsb);
rowc = readrow(&tbl->tbls[1], bitsc);
rowd = readrow(&tbl->tbls[0], bitsd);
redbits = x.v[1] % 16;
x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
x.v[0] >>= 4;
x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
worda >>= 4;
wordb >>= 4;
wordc >>= 4;
wordd >>= 4;
x = gf128_add(x, gf128_add(rowa, gf128_add(rowb,
gf128_add(rowc, rowd))));
}
return x;
}
struct gf128
gf128_mul(struct gf128 v, struct gf128table *tbl)
{
struct gf128 ret;
ret = MAKE_GF128(0, 0);
ret = gfmultword(v.v[1], ret, tbl);
ret = gfmultword(v.v[0], ret, tbl);
return ret;
}
struct gf128
gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, struct gf128 d,
struct gf128table4 *tbl)
{
struct gf128 tmp;
tmp = MAKE_GF128(0, 0);
tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
return tmp;
}
struct gf128
gf128_mul4b(struct gf128 r, const uint8_t *v, struct gf128table4 *tbl)
{
struct gf128 a, b, c, d;
struct gf128 tmp;
tmp = MAKE_GF128(0, 0);
a = gf128_add(r, gf128_read(&v[0*16]));
b = gf128_read(&v[1*16]);
c = gf128_read(&v[2*16]);
d = gf128_read(&v[3*16]);
tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
return tmp;
}