copy_sse
copy_sse<0>(__m128i* destination, const __m128i* source)
constexpr static void (*sValue)(__m128i*, const __m128i*) = copy_sse<N>;
copy_sse<4>(to, from);
copy_sse<1>(toEnd, fromEnd);
copy_sse<N - 1>(destination + 1, source + 1);