u64_a1
ctx->cmac.u[0] ^= ((u64_a1 *)inp)[0];
ctx->cmac.u[1] ^= ((u64_a1 *)inp)[1];
((u64_a1 *)out)[0] = scratch.u[0] ^ ((u64_a1 *)inp)[0];
((u64_a1 *)out)[1] = scratch.u[1] ^ ((u64_a1 *)inp)[1];
ctx->cmac.u[0] ^= (((u64_a1 *)out)[0]
= scratch.u[0] ^ ((u64_a1 *)inp)[0]);
ctx->cmac.u[1] ^= (((u64_a1 *)out)[1]
= scratch.u[1] ^ ((u64_a1 *)inp)[1]);
scratch.u[0] = ((u64_a1 *)inp)[0] ^ tweak1.u[0];
scratch.u[1] = ((u64_a1 *)inp)[1] ^ tweak1.u[1];
((u64_a1 *)out)[0] = scratch.u[0] ^ tweak.u[0];
((u64_a1 *)out)[1] = scratch.u[1] ^ tweak.u[1];
scratch.u[0] = ((u64_a1 *)inp)[0] ^ tweak.u[0];
scratch.u[1] = ((u64_a1 *)inp)[1] ^ tweak.u[1];
((u64_a1 *)out)[0] = scratch.u[0] ^= tweak.u[0];
((u64_a1 *)out)[1] = scratch.u[1] ^= tweak.u[1];
scratch.u[0] = ((u64_a1 *)inp)[0] ^ tweak1.u[0];
scratch.u[1] = ((u64_a1 *)inp)[1] ^ tweak1.u[1];
((u64_a1 *)out)[0] = scratch.u[0] ^ tweak.u[0];
((u64_a1 *)out)[1] = scratch.u[1] ^ tweak.u[1];
scratch.u[0] = ((u64_a1 *)inp)[0] ^ tweak.u[0];
scratch.u[1] = ((u64_a1 *)inp)[1] ^ tweak.u[1];
((u64_a1 *)out)[0] = scratch.u[0] ^= tweak.u[0];
((u64_a1 *)out)[1] = scratch.u[1] ^= tweak.u[1];
#define C1(K, i) (((u64_a1 *)(Cx.c + 7))[2 * K.c[(i) * 8 + 1]])
#define C2(K, i) (((u64_a1 *)(Cx.c + 6))[2 * K.c[(i) * 8 + 2]])
#define C3(K, i) (((u64_a1 *)(Cx.c + 5))[2 * K.c[(i) * 8 + 3]])
#define C4(K, i) (((u64_a1 *)(Cx.c + 4))[2 * K.c[(i) * 8 + 4]])
#define C5(K, i) (((u64_a1 *)(Cx.c + 3))[2 * K.c[(i) * 8 + 5]])
#define C6(K, i) (((u64_a1 *)(Cx.c + 2))[2 * K.c[(i) * 8 + 6]])
#define C7(K, i) (((u64_a1 *)(Cx.c + 1))[2 * K.c[(i) * 8 + 7]])