LIMBPERM
scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)];
a->limb[LIMBPERM(i)] = scarry & LIMB_MASK(LIMBPERM(i));
scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]);
a->limb[LIMBPERM(i)] = carry & LIMB_MASK(LIMBPERM(i));
carry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
ret |= c->limb[LIMBPERM(i)];
buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill;
fill += LIMB_PLACE_VALUE(LIMBPERM(j));
while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < nbytes) {
x->limb[LIMBPERM(i)] = (word_t)((i < NLIMBS - 1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer);
fill -= LIMB_PLACE_VALUE(LIMBPERM(i));
buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i));
scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8 * sizeof(word_t));