gcm_gmult_4bit
ctx->gmult = gcm_gmult_4bit;
void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);