fwd_rnd
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
round(fwd_rnd, b1, b0, kp + 3 * N_COLS);
round(fwd_rnd, b0, b1, kp + 4 * N_COLS);
round(fwd_rnd, b1, b0, kp + 5 * N_COLS);
round(fwd_rnd, b0, b1, kp + 6 * N_COLS);
round(fwd_rnd, b1, b0, kp + 7 * N_COLS);
round(fwd_rnd, b0, b1, kp + 8 * N_COLS);
round(fwd_rnd, b1, b0, kp + 9 * N_COLS);
round(fwd_rnd, b1, b0, kp);
round(fwd_rnd, b0, b1, kp);
round(fwd_rnd, b1, b0, kp);
round(fwd_rnd, b1, b0, kp);