#include <sys/types.h>
#include <sys/endian.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <crypto/aesni/aesni.h>
#include <crypto/aesni/aesni_os.h>
#include <crypto/aesni/aesencdec.h>
#define AESNI_ENC(d, k, nr) aesni_enc(nr-1, (const __m128i*)k, d)
#include <wmmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
static inline __m128i
xor_and_encrypt(__m128i a, __m128i b, const unsigned char *k, int nr)
{
__m128i retval = _mm_xor_si128(a, b);
retval = AESNI_ENC(retval, k, nr);
return (retval);
}
static void
append_int(size_t value, __m128i *block, size_t offset)
{
int indx = sizeof(*block) - 1;
uint8_t *bp = (uint8_t*)block;
while (indx > (sizeof(*block) - offset)) {
bp[indx] = value & 0xff;
indx--;
value >>= 8;
}
}
static __m128i
cbc_mac_start(const unsigned char *auth_data, size_t auth_len,
const unsigned char *nonce, size_t nonce_len,
const unsigned char *key, int nr,
size_t data_len, size_t tag_len)
{
__m128i cbc_block, staging_block;
uint8_t *byte_ptr;
int L = sizeof(__m128i) - 1 - nonce_len;
cbc_block = _mm_setzero_si128();
byte_ptr = (uint8_t*)&cbc_block;
byte_ptr[0] = ((auth_len > 0) ? 1 : 0) * 64 |
(((tag_len - 2) / 2) * 8) |
(L - 1);
bcopy(nonce, byte_ptr + 1, nonce_len);
append_int(data_len, &cbc_block, L+1);
cbc_block = AESNI_ENC(cbc_block, key, nr);
if (auth_len != 0) {
uint32_t auth_amt;
size_t copy_amt;
const uint8_t *auth_ptr = auth_data;
staging_block = _mm_setzero_si128();
KASSERT(auth_len < (1ULL << 32),
("%s: auth_len (%zu) larger than 4GB",
__FUNCTION__, auth_len));
if (auth_len < ((1 << 16) - (1 << 8))) {
be16enc(&staging_block, auth_len);
auth_amt = 2;
} else if (auth_len < (1ULL << 32)) {
be16enc(&staging_block, 0xfffe);
be32enc((char*)&staging_block + 2, auth_len);
auth_amt = 6;
} else
panic("%s: auth len too large", __FUNCTION__);
copy_amt = MIN(auth_len,
sizeof(staging_block) - auth_amt);
byte_ptr = (uint8_t*)&staging_block;
bcopy(auth_ptr, &byte_ptr[auth_amt], copy_amt);
auth_ptr += copy_amt;
cbc_block = xor_and_encrypt(cbc_block, staging_block, key, nr);
while (auth_ptr < auth_data + auth_len) {
copy_amt = MIN((auth_data + auth_len) - auth_ptr,
sizeof(staging_block));
if (copy_amt < sizeof(staging_block))
bzero(&staging_block, sizeof(staging_block));
bcopy(auth_ptr, &staging_block, copy_amt);
cbc_block = xor_and_encrypt(cbc_block, staging_block,
key, nr);
auth_ptr += copy_amt;
}
}
return (cbc_block);
}
void
AES_CCM_encrypt(const unsigned char *in, unsigned char *out,
const unsigned char *addt, const unsigned char *nonce,
unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
int tag_length, const unsigned char *key, int nr)
{
int L;
int counter = 1;
size_t copy_amt, total = 0;
uint8_t *byte_ptr;
__m128i s0, rolling_mac, s_x, staging_block;
if (nlen < 7 || nlen > 13)
panic("%s: bad nonce length %d", __FUNCTION__, nlen);
L = sizeof(__m128i) - 1 - nlen;
s0 = _mm_setzero_si128();
rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
key, nr, nbytes, tag_length);
byte_ptr = (uint8_t*)&s0;
byte_ptr[0] = L - 1;
bcopy(nonce, &byte_ptr[1], nlen);
bcopy(&s0, &s_x, sizeof(s0));
while (total < nbytes) {
copy_amt = MIN(nbytes - total, sizeof(staging_block));
bcopy(in+total, &staging_block, copy_amt);
if (copy_amt < sizeof(staging_block)) {
byte_ptr = (uint8_t*)&staging_block;
bzero(&byte_ptr[copy_amt],
sizeof(staging_block) - copy_amt);
}
rolling_mac = xor_and_encrypt(rolling_mac, staging_block,
key, nr);
append_int(counter++, &s_x, L+1);
__m128i X = AESNI_ENC(s_x, key, nr);
staging_block = _mm_xor_si128(staging_block, X);
bcopy(&staging_block, out+total, copy_amt);
total += copy_amt;
}
s0 = AESNI_ENC(s0, key, nr);
staging_block = _mm_xor_si128(s0, rolling_mac);
bcopy(&staging_block, tag, tag_length);
explicit_bzero(&s0, sizeof(s0));
explicit_bzero(&staging_block, sizeof(staging_block));
explicit_bzero(&s_x, sizeof(s_x));
explicit_bzero(&rolling_mac, sizeof(rolling_mac));
}
static void
decrypt_loop(const unsigned char *in, unsigned char *out, size_t nbytes,
__m128i s0, size_t nonce_length, __m128i *macp,
const unsigned char *key, int nr)
{
size_t total = 0;
__m128i s_x = s0, mac_block;
int counter = 1;
const size_t L = sizeof(__m128i) - 1 - nonce_length;
__m128i pad_block, staging_block;
if (macp != NULL)
mac_block = *macp;
while (total < nbytes) {
size_t copy_amt = MIN(nbytes - total, sizeof(staging_block));
if (copy_amt < sizeof(staging_block)) {
staging_block = _mm_setzero_si128();
}
bcopy(in+total, &staging_block, copy_amt);
append_int(counter++, &s_x, L+1);
pad_block = AESNI_ENC(s_x, key, nr);
if (copy_amt < sizeof(staging_block)) {
uint8_t *end_of_buffer = (uint8_t*)&pad_block;
bzero(end_of_buffer + copy_amt,
sizeof(pad_block) - copy_amt);
}
staging_block = _mm_xor_si128(staging_block, pad_block);
if (out)
bcopy(&staging_block, out+total, copy_amt);
if (macp)
mac_block = xor_and_encrypt(mac_block, staging_block,
key, nr);
total += copy_amt;
}
if (macp)
*macp = mac_block;
explicit_bzero(&pad_block, sizeof(pad_block));
explicit_bzero(&staging_block, sizeof(staging_block));
explicit_bzero(&mac_block, sizeof(mac_block));
}
int
AES_CCM_decrypt(const unsigned char *in, unsigned char *out,
const unsigned char *addt, const unsigned char *nonce,
const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
int tag_length, const unsigned char *key, int nr)
{
int L;
__m128i s0, rolling_mac, staging_block;
uint8_t *byte_ptr;
if (nlen < 0 || nlen > 15)
panic("%s: bad nonce length %d", __FUNCTION__, nlen);
L = sizeof(__m128i) - 1 - nlen;
s0 = _mm_setzero_si128();
rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
key, nr, nbytes, tag_length);
byte_ptr = (uint8_t*)&s0;
byte_ptr[0] = L-1;
bcopy(nonce, &byte_ptr[1], nlen);
decrypt_loop(in, NULL, nbytes, s0, nlen, &rolling_mac, key, nr);
staging_block = _mm_xor_si128(AESNI_ENC(s0, key, nr), rolling_mac);
if (timingsafe_bcmp(&staging_block, tag, tag_length) != 0) {
return (0);
}
decrypt_loop(in, out, nbytes, s0, nlen, NULL, key, nr);
return (1);
}