felem_square
void (*felem_square)(void *out, const void *in),
felem_square(tmp, ftmp);
felem_square(tmp2, ftmp3);
felem_square(tmp, z2);
felem_square(tmp, in);
felem_square(tmp, in);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp4);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp4);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, z_in);
felem_square(tmp, y_in);
felem_square(tmp, alpha);
felem_square(tmp, ftmp);
felem_square(tmp2, gamma);
felem_square(tmp, z2);
felem_square(tmp, z1);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp2);
felem_square(tmp, z_in);
felem_square(tmp, y_in);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp5);
felem_square(tmp, ftmp);
felem_square(tmp, z2);
felem_square(tmp, in);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, alpha); /* tmp[i] < 2^115 */
felem_square(tmp, ftmp); /* tmp[i] < 2^115 */
felem_square(tmp2, gamma); /* tmp2[i] < 2^115 */
felem_square(tmp, ftmp5); /* tmp[i] < 2^117 */
felem_square(tmp, ftmp5); /* tmp[i] < 2^117 */
felem_square(tmp, z2);
felem_square(tmp, z_in);
felem_square(tmp, y_in);
felem_square(tmp, alpha);
felem_square(tmp, ftmp);
felem_square(tmp2, gamma);
felem_square(tmp, z1);
felem_square(tmp, z2);
felem_square(tmp, ftmp5);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp5);
felem_square(tmp, z2);
felem_square(tmp, in);
felem_square(tmp, in);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp);
felem_square(tmp, ftmp2);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp4);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(tmp, ftmp3);
felem_square(Z(i), tmp_felem(num)); /* 1/(Z^2) */
void (*felem_square)(void *out,