mul_lower
static ossl_inline uint64_t mul_lower(uint64_t x, uint64_t y);
a = a + b + 2 * mul_lower(a, b); \
c = c + d + 2 * mul_lower(c, d); \
a = a + b + 2 * mul_lower(a, b); \
c = c + d + 2 * mul_lower(c, d); \