BSWAP4
#ifdef BSWAP4
blocks[i].d[15] = BSWAP4(len);
#ifdef BSWAP4
blocks[i].d[31] = BSWAP4(len);
#ifdef BSWAP4
blocks[i].d[0] = BSWAP4(ctx->A[i]);
blocks[i].d[1] = BSWAP4(ctx->B[i]);
blocks[i].d[2] = BSWAP4(ctx->C[i]);
blocks[i].d[3] = BSWAP4(ctx->D[i]);
blocks[i].d[4] = BSWAP4(ctx->E[i]);
blocks[i].d[15] = BSWAP4((64 + 20) * 8);
#ifdef BSWAP4
bitlen = BSWAP4(bitlen);
#ifdef BSWAP4
pmac->u[0] = BSWAP4(pmac->u[0]);
pmac->u[1] = BSWAP4(pmac->u[1]);
pmac->u[2] = BSWAP4(pmac->u[2]);
pmac->u[3] = BSWAP4(pmac->u[3]);
pmac->u[4] = BSWAP4(pmac->u[4]);
#ifdef BSWAP4
blocks[i].d[15] = BSWAP4(len);
#ifdef BSWAP4
blocks[i].d[31] = BSWAP4(len);
#ifdef BSWAP4
blocks[i].d[0] = BSWAP4(ctx->A[i]);
blocks[i].d[1] = BSWAP4(ctx->B[i]);
blocks[i].d[2] = BSWAP4(ctx->C[i]);
blocks[i].d[3] = BSWAP4(ctx->D[i]);
blocks[i].d[4] = BSWAP4(ctx->E[i]);
blocks[i].d[5] = BSWAP4(ctx->F[i]);
blocks[i].d[6] = BSWAP4(ctx->G[i]);
blocks[i].d[7] = BSWAP4(ctx->H[i]);
blocks[i].d[15] = BSWAP4((64 + 32) * 8);
#ifdef BSWAP4
bitlen = BSWAP4(bitlen);
#ifdef BSWAP4
pmac->u[0] = BSWAP4(pmac->u[0]);
pmac->u[1] = BSWAP4(pmac->u[1]);
pmac->u[2] = BSWAP4(pmac->u[2]);
pmac->u[3] = BSWAP4(pmac->u[3]);
pmac->u[4] = BSWAP4(pmac->u[4]);
pmac->u[5] = BSWAP4(pmac->u[5]);
pmac->u[6] = BSWAP4(pmac->u[6]);
pmac->u[7] = BSWAP4(pmac->u[7]);
#ifdef BSWAP4
ctr = BSWAP4(ctx->Yi.d[3]);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctr = BSWAP4(ctx->Yi.d[3]);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctr = BSWAP4(ctx->Yi.d[3]);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
#define GETU32(p) BSWAP4(*(const u32 *)(p))
#define PUTU32(p, v) *(u32 *)(p) = BSWAP4(v)
#ifdef BSWAP4
ctr = BSWAP4(ctx->Xi.d[3]);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctr = BSWAP4(ctx->Yi.d[3]);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
#if defined(BSWAP4) && !defined(STRICT_ALIGNMENT)
#define GETU32(p) BSWAP4(*(const u32 *)(p))
#define PUTU32(p, v) *(u32 *)(p) = BSWAP4(v)
#ifdef BSWAP4
blocks[i].d[15] = BSWAP4(len);
#ifdef BSWAP4
blocks[i].d[31] = BSWAP4(len);
#ifdef BSWAP4
blocks[i].d[0] = BSWAP4(mctx->A[i]);
blocks[i].d[1] = BSWAP4(mctx->B[i]);
blocks[i].d[2] = BSWAP4(mctx->C[i]);
blocks[i].d[3] = BSWAP4(mctx->D[i]);
blocks[i].d[4] = BSWAP4(mctx->E[i]);
blocks[i].d[15] = BSWAP4((64 + 20) * 8);
#ifdef BSWAP4
bitlen = BSWAP4(bitlen);
#ifdef BSWAP4
pmac->u[0] = BSWAP4(pmac->u[0]);
pmac->u[1] = BSWAP4(pmac->u[1]);
pmac->u[2] = BSWAP4(pmac->u[2]);
pmac->u[3] = BSWAP4(pmac->u[3]);
pmac->u[4] = BSWAP4(pmac->u[4]);
#ifdef BSWAP4
blocks[i].d[15] = BSWAP4(len);
#ifdef BSWAP4
blocks[i].d[31] = BSWAP4(len);
#ifdef BSWAP4
blocks[i].d[0] = BSWAP4(mctx->A[i]);
blocks[i].d[1] = BSWAP4(mctx->B[i]);
blocks[i].d[2] = BSWAP4(mctx->C[i]);
blocks[i].d[3] = BSWAP4(mctx->D[i]);
blocks[i].d[4] = BSWAP4(mctx->E[i]);
blocks[i].d[5] = BSWAP4(mctx->F[i]);
blocks[i].d[6] = BSWAP4(mctx->G[i]);
blocks[i].d[7] = BSWAP4(mctx->H[i]);
blocks[i].d[15] = BSWAP4((64 + 32) * 8);
#ifdef BSWAP4
bitlen = BSWAP4(bitlen);
#ifdef BSWAP4
pmac->u[0] = BSWAP4(pmac->u[0]);
pmac->u[1] = BSWAP4(pmac->u[1]);
pmac->u[2] = BSWAP4(pmac->u[2]);
pmac->u[3] = BSWAP4(pmac->u[3]);
pmac->u[4] = BSWAP4(pmac->u[4]);
pmac->u[5] = BSWAP4(pmac->u[5]);
pmac->u[6] = BSWAP4(pmac->u[6]);
pmac->u[7] = BSWAP4(pmac->u[7]);