#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_8_16_alt)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_8_16_alt)
.text
#define z rdi
#define x rsi
#define y rcx
#define t0 r8
#define t1 r9
#define t2 r10
#define combadd(c,h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add l, rax; \
adc h, rdx; \
adc c, 0
#define combadz(c,h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add l, rax; \
adc h, rdx; \
adc c, c
#define combads(h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add l, rax; \
adc h, rdx
S2N_BN_SYMBOL(bignum_mul_8_16_alt):
_CET_ENDBR
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
mov rdx, r8
#endif
mov y, rdx
mov rax, [x]
mul QWORD PTR [y]
mov [z], rax
mov t0, rdx
xor t1, t1
xor t2, t2
combads(t1,t0,[x],[y+8])
combadz(t2,t1,t0,[x+8],[y])
mov [z+8], t0
xor t0, t0
combadz(t0,t2,t1,[x],[y+16])
combadd(t0,t2,t1,[x+8],[y+8])
combadd(t0,t2,t1,[x+16],[y])
mov [z+16], t1
xor t1, t1
combadz(t1,t0,t2,[x],[y+24])
combadd(t1,t0,t2,[x+8],[y+16])
combadd(t1,t0,t2,[x+16],[y+8])
combadd(t1,t0,t2,[x+24],[y])
mov [z+24], t2
xor t2, t2
combadz(t2,t1,t0,[x],[y+32])
combadd(t2,t1,t0,[x+8],[y+24])
combadd(t2,t1,t0,[x+16],[y+16])
combadd(t2,t1,t0,[x+24],[y+8])
combadd(t2,t1,t0,[x+32],[y])
mov [z+32], t0
xor t0, t0
combadz(t0,t2,t1,[x],[y+40])
combadd(t0,t2,t1,[x+8],[y+32])
combadd(t0,t2,t1,[x+16],[y+24])
combadd(t0,t2,t1,[x+24],[y+16])
combadd(t0,t2,t1,[x+32],[y+8])
combadd(t0,t2,t1,[x+40],[y])
mov [z+40], t1
xor t1, t1
combadz(t1,t0,t2,[x],[y+48])
combadd(t1,t0,t2,[x+8],[y+40])
combadd(t1,t0,t2,[x+16],[y+32])
combadd(t1,t0,t2,[x+24],[y+24])
combadd(t1,t0,t2,[x+32],[y+16])
combadd(t1,t0,t2,[x+40],[y+8])
combadd(t1,t0,t2,[x+48],[y])
mov [z+48], t2
xor t2, t2
combadz(t2,t1,t0,[x],[y+56])
combadd(t2,t1,t0,[x+8],[y+48])
combadd(t2,t1,t0,[x+16],[y+40])
combadd(t2,t1,t0,[x+24],[y+32])
combadd(t2,t1,t0,[x+32],[y+24])
combadd(t2,t1,t0,[x+40],[y+16])
combadd(t2,t1,t0,[x+48],[y+8])
combadd(t2,t1,t0,[x+56],[y])
mov [z+56], t0
xor t0, t0
combadz(t0,t2,t1,[x+8],[y+56])
combadd(t0,t2,t1,[x+16],[y+48])
combadd(t0,t2,t1,[x+24],[y+40])
combadd(t0,t2,t1,[x+32],[y+32])
combadd(t0,t2,t1,[x+40],[y+24])
combadd(t0,t2,t1,[x+48],[y+16])
combadd(t0,t2,t1,[x+56],[y+8])
mov [z+64], t1
xor t1, t1
combadz(t1,t0,t2,[x+16],[y+56])
combadd(t1,t0,t2,[x+24],[y+48])
combadd(t1,t0,t2,[x+32],[y+40])
combadd(t1,t0,t2,[x+40],[y+32])
combadd(t1,t0,t2,[x+48],[y+24])
combadd(t1,t0,t2,[x+56],[y+16])
mov [z+72], t2
xor t2, t2
combadz(t2,t1,t0,[x+24],[y+56])
combadd(t2,t1,t0,[x+32],[y+48])
combadd(t2,t1,t0,[x+40],[y+40])
combadd(t2,t1,t0,[x+48],[y+32])
combadd(t2,t1,t0,[x+56],[y+24])
mov [z+80], t0
xor t0, t0
combadz(t0,t2,t1,[x+32],[y+56])
combadd(t0,t2,t1,[x+40],[y+48])
combadd(t0,t2,t1,[x+48],[y+40])
combadd(t0,t2,t1,[x+56],[y+32])
mov [z+88], t1
xor t1, t1
combadz(t1,t0,t2,[x+40],[y+56])
combadd(t1,t0,t2,[x+48],[y+48])
combadd(t1,t0,t2,[x+56],[y+40])
mov [z+96], t2
xor t2, t2
combadz(t2,t1,t0,[x+48],[y+56])
combadd(t2,t1,t0,[x+56],[y+48])
mov [z+104], t0
combads(t2,t1,[x+56],[y+56])
mov [z+112], t1
mov [z+120], t2
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif