widemul
accum2 += widemul(a[j - i], b[i]);
accum1 += widemul(aa[j - i], bb[i]);
accum0 += widemul(a[8 + j - i], b[8 + i]);
accum0 -= widemul(a[8 + j - i], b[i]);
accum2 += widemul(aa[8 + j - i], bb[i]);
accum1 += widemul(a[16 + j - i], b[8 + i]);
accum0 += widemul(b, a[i]);
accum8 += widemul(b, a[i + 8]);
accum2 = widemul(a[0], a[3]);
accum0 = widemul(aa[0], aa[3]);
accum1 = widemul(a[4], a[7]);
accum2 += widemul(a[1], a[2]);
accum0 += widemul(aa[1], aa[2]);
accum1 += widemul(a[5], a[6]);
accum0 += widemul(2 * aa[1], aa[3]);
accum1 += widemul(2 * a[5], a[7]);
accum0 += widemul(aa[2], aa[2]);
accum0 -= widemul(2 * a[1], a[3]);
accum1 += widemul(a[6], a[6]);
accum2 = widemul(a[0], a[0]);
accum0 -= widemul(a[2], a[2]);
accum1 += widemul(aa[0], aa[0]);
accum0 += widemul(a[4], a[4]);
accum2 = widemul(2 * aa[2], aa[3]);
accum0 -= widemul(2 * a[2], a[3]);
accum1 += widemul(2 * a[6], a[7]);
accum2 = widemul(2 * a[0], a[1]);
accum1 += widemul(2 * aa[0], aa[1]);
accum0 += widemul(2 * a[4], a[5]);
accum2 = widemul(aa[3], aa[3]);
accum0 -= widemul(a[3], a[3]);
accum1 += widemul(a[7], a[7]);
accum2 = widemul(2 * a[0], a[2]);
accum1 += widemul(2 * aa[0], aa[2]);
accum0 += widemul(2 * a[4], a[6]);
accum2 += widemul(a[1], a[1]);
accum1 += widemul(aa[1], aa[1]);
accum0 += widemul(a[5], a[5]);
accum2 += widemul(a[j], b[i - j]);
accum1 += widemul(aa[j], bb[i - j]);
accum0 += widemul(a[j + 4], b[i - j + 4]);
accum2 += widemul(a[j], b[i + 8 - j]);
accum1 += widemul(aa[j], bbb[i + 4 - j]);
accum0 += widemul(a[j + 4], bb[i + 4 - j]);
accum0 += widemul(b, a[i]);
accum4 += widemul(b, a[i + 4]);