U8TOU32
st->r[0] = U8TOU32(&key[0]) & 0x0fffffff;
st->r[1] = U8TOU32(&key[4]) & 0x0ffffffc;
st->r[2] = U8TOU32(&key[8]) & 0x0ffffffc;
st->r[3] = U8TOU32(&key[12]) & 0x0ffffffc;
h0 = (u32)(d0 = (u64)h0 + U8TOU32(inp + 0));
h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + U8TOU32(inp + 4));
h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + U8TOU32(inp + 8));
h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + U8TOU32(inp + 12));
ctx->nonce[0] = U8TOU32(&key[16]);
ctx->nonce[1] = U8TOU32(&key[20]);
ctx->nonce[2] = U8TOU32(&key[24]);
ctx->nonce[3] = U8TOU32(&key[28]);
r0.u = EXP(52 + 0) | (U8TOU32(&key[0]) & 0x0fffffff);
r1.u = EXP(52 + 32) | (U8TOU32(&key[4]) & 0x0ffffffc);
r2.u = EXP(52 + 64) | (U8TOU32(&key[8]) & 0x0ffffffc);
r3.u = EXP(52 + 96) | (U8TOU32(&key[12]) & 0x0ffffffc);
in0.u = EXP(52 + 0) | U8TOU32(&inp[0]);
in1.u = EXP(52 + 32) | U8TOU32(&inp[4]);
in2.u = EXP(52 + 64) | U8TOU32(&inp[8]);
in3.u = EXP(52 + 96) | U8TOU32(&inp[12]) | pad;
in0.u = EXP(52 + 0) | U8TOU32(&inp[0]);
in1.u = EXP(52 + 32) | U8TOU32(&inp[4]);
in2.u = EXP(52 + 64) | U8TOU32(&inp[8]);
in3.u = EXP(52 + 96) | U8TOU32(&inp[12]) | pad;
#ifndef U8TOU32
ctx->nonce[0] = U8TOU32(&key[16]);
ctx->nonce[1] = U8TOU32(&key[20]);
ctx->nonce[2] = U8TOU32(&key[24]);
ctx->nonce[3] = U8TOU32(&key[28]);