PUTU32
PUTU32(ciphertext, tmp[0]);
PUTU32(ciphertext + 4, tmp[1]);
PUTU32(ciphertext + 8, tmp[2]);
PUTU32(ciphertext + 12, tmp[3]);
PUTU32(plaintext, tmp[0]);
PUTU32(plaintext + 4, tmp[1]);
PUTU32(plaintext + 8, tmp[2]);
PUTU32(plaintext + 12, tmp[3]);
PUTU32(pt , s0);
PUTU32(pt + 4, s1);
PUTU32(pt + 8, s2);
PUTU32(pt + 12, s3);
PUTU32(ct , s0);
PUTU32(ct + 4, s1);
PUTU32(ct + 8, s2);
PUTU32(ct + 12, s3);
PUTU32(out, s0);
PUTU32(out + 4, s1);
PUTU32(out + 8, s2);
PUTU32(out + 12, s3);
PUTU32(out, s0);
PUTU32(out + 4, s1);
PUTU32(out + 8, s2);
PUTU32(out + 12, s3);
PUTU32(ciphertext, s2);
PUTU32(ciphertext + 4, s3);
PUTU32(ciphertext + 8, s0);
PUTU32(ciphertext + 12, s1);
PUTU32(plaintext, s2);
PUTU32(plaintext + 4, s3);
PUTU32(plaintext + 8, s0);
PUTU32(plaintext + 12, s1);
PUTU32(blocks[i].c + 60, len);
PUTU32(blocks[i].c + 124, len);
PUTU32(blocks[i].c + 0, ctx->A[i]);
PUTU32(blocks[i].c + 4, ctx->B[i]);
PUTU32(blocks[i].c + 8, ctx->C[i]);
PUTU32(blocks[i].c + 12, ctx->D[i]);
PUTU32(blocks[i].c + 16, ctx->E[i]);
PUTU32(blocks[i].c + 60, (64 + 20) * 8);
PUTU32(out + 0, ctx->A[i]);
PUTU32(out + 4, ctx->B[i]);
PUTU32(out + 8, ctx->C[i]);
PUTU32(out + 12, ctx->D[i]);
PUTU32(out + 16, ctx->E[i]);
PUTU32(blocks[i].c + 60, len);
PUTU32(blocks[i].c + 124, len);
PUTU32(blocks[i].c + 0, ctx->A[i]);
PUTU32(blocks[i].c + 4, ctx->B[i]);
PUTU32(blocks[i].c + 8, ctx->C[i]);
PUTU32(blocks[i].c + 12, ctx->D[i]);
PUTU32(blocks[i].c + 16, ctx->E[i]);
PUTU32(blocks[i].c + 20, ctx->F[i]);
PUTU32(blocks[i].c + 24, ctx->G[i]);
PUTU32(blocks[i].c + 28, ctx->H[i]);
PUTU32(blocks[i].c + 60, (64 + 32) * 8);
PUTU32(out + 0, ctx->A[i]);
PUTU32(out + 4, ctx->B[i]);
PUTU32(out + 8, ctx->C[i]);
PUTU32(out + 12, ctx->D[i]);
PUTU32(out + 16, ctx->E[i]);
PUTU32(out + 20, ctx->F[i]);
PUTU32(out + 24, ctx->G[i]);
PUTU32(out + 28, ctx->H[i]);
PUTU32(ivec + 12, ctr32);
PUTU32(ivec + 12, ctr32);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(p, v);
PUTU32(p + 4, v);
PUTU32(p + 8, v);
PUTU32(p + 12, v);
PUTU32(p, v);
PUTU32(p + 4, v);
PUTU32(p + 8, v);
PUTU32(p + 12, v);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(ctx->Yi.c + 12, ctr);
PUTU32(blocks[i].c + 60, len);
PUTU32(blocks[i].c + 124, len);
PUTU32(blocks[i].c + 0, mctx->A[i]);
PUTU32(blocks[i].c + 4, mctx->B[i]);
PUTU32(blocks[i].c + 8, mctx->C[i]);
PUTU32(blocks[i].c + 12, mctx->D[i]);
PUTU32(blocks[i].c + 16, mctx->E[i]);
PUTU32(blocks[i].c + 60, (64 + 20) * 8);
PUTU32(out + 0, mctx->A[i]);
PUTU32(out + 4, mctx->B[i]);
PUTU32(out + 8, mctx->C[i]);
PUTU32(out + 12, mctx->D[i]);
PUTU32(out + 16, mctx->E[i]);
PUTU32(blocks[i].c + 60, len);
PUTU32(blocks[i].c + 124, len);
PUTU32(blocks[i].c + 0, mctx->A[i]);
PUTU32(blocks[i].c + 4, mctx->B[i]);
PUTU32(blocks[i].c + 8, mctx->C[i]);
PUTU32(blocks[i].c + 12, mctx->D[i]);
PUTU32(blocks[i].c + 16, mctx->E[i]);
PUTU32(blocks[i].c + 20, mctx->F[i]);
PUTU32(blocks[i].c + 24, mctx->G[i]);
PUTU32(blocks[i].c + 28, mctx->H[i]);
PUTU32(blocks[i].c + 60, (64 + 32) * 8);
PUTU32(out + 0, mctx->A[i]);
PUTU32(out + 4, mctx->B[i]);
PUTU32(out + 8, mctx->C[i]);
PUTU32(out + 12, mctx->D[i]);
PUTU32(out + 16, mctx->E[i]);
PUTU32(out + 20, mctx->F[i]);
PUTU32(out + 24, mctx->G[i]);
PUTU32(out + 28, mctx->H[i]);
PUTU32(ctr->V + 12, ctr32);
PUTU32(ciphertext, tmp[0]);
PUTU32(ciphertext+4, tmp[1]);
PUTU32(ciphertext+8, tmp[2]);
PUTU32(ciphertext+12, tmp[3]);
PUTU32(plaintext, tmp[0]);
PUTU32(plaintext+4, tmp[1]);
PUTU32(plaintext+8, tmp[2]);
PUTU32(plaintext+12, tmp[3]);
PUTU32(ct , s0);
PUTU32(ct + 4, s1);
PUTU32(ct + 8, s2);
PUTU32(ct + 12, s3);
PUTU32(pt , s0);
PUTU32(pt + 4, s1);
PUTU32(pt + 8, s2);
PUTU32(pt + 12, s3);