GSWAP8
len_blk[0] = GSWAP8((uint64_t)ctx->aad_len * 8);
len_blk[1] = GSWAP8((uint64_t)len * 8);
len_blk[0] = GSWAP8((uint64_t)ctx->aad_len * 8);
len_blk[1] = GSWAP8((uint64_t)len * 8);
t[0] = GSWAP8(a[0]);
t[1] = GSWAP8(a[1]);
a[1] = GSWAP8((t[1] >> 1) ^ (t[0] << 63));
a[0] = GSWAP8((t[0] >> 1) ^ mask);
((uint64_t *)out)[0] = GSWAP8(((uint64_t *)in)[1]);
((uint64_t *)out)[1] = GSWAP8(((uint64_t *)in)[0]);
tmp[0] = GSWAP8(tmp[0]);
tmp[1] = GSWAP8(tmp[1]);