#include <machine/asm.h>
ENTRY(timingsafe_bcmp)
cmp x2, #32
bhi .Lgt32
cmp x2, #16
bhi .L1732
cmp x2, #8
bhi .L0916
cmp x2, #4
bhi .L0508
cmp x2, #2
bhi .L0304
cbnz x2, .L0102
mov w0, #0
ret
.L0102: ldrb w3, [x0]
ldrb w4, [x1]
sub x2, x2, #1
ldrb w5, [x0, x2]
ldrb w6, [x1, x2]
eor w3, w3, w4
eor w5, w5, w6
orr w0, w3, w5
ret
.L0304: ldrh w3, [x0]
ldrh w4, [x1]
sub x2, x2, #2
ldrh w5, [x0, x2]
ldrh w6, [x1, x2]
eor w3, w3, w4
eor w5, w5, w6
orr w0, w3, w5
ret
.L0508: ldr w3, [x0]
ldr w4, [x1]
sub x2, x2, #4
ldr w5, [x0, x2]
ldr w6, [x1, x2]
eor w3, w3, w4
eor w5, w5, w6
orr w0, w3, w5
ret
.L0916: ldr x3, [x0]
ldr x4, [x1]
sub x2, x2, #8
ldr x5, [x0, x2]
ldr x6, [x1, x2]
eor x3, x3, x4
eor x5, x5, x6
orr x0, x3, x5
orr x0, x0, x0, lsr #32
ret
.L1732: ldr q0, [x0]
ldr q1, [x1]
sub x2, x2, #16
ldr q2, [x0, x2]
ldr q3, [x1, x2]
eor v0.16b, v0.16b, v1.16b
eor v2.16b, v2.16b, v3.16b
orr v0.16b, v0.16b, v2.16b
umaxv s0, v0.4s
mov w0, v0.s[0]
ret
.Lgt32: ldp q0, q1, [x0], #32
ldp q2, q3, [x1], #32
eor v0.16b, v0.16b, v2.16b
eor v1.16b, v1.16b, v3.16b
orr v4.16b, v0.16b, v1.16b
subs x2, x2, #64
bls .Ltail
0: ldp q0, q1, [x0], #32
ldp q2, q3, [x1], #32
eor v0.16b, v0.16b, v2.16b
eor v1.16b, v1.16b, v3.16b
orr v0.16b, v0.16b, v1.16b
orr v4.16b, v4.16b, v0.16b
subs x2, x2, #32
bhi 0b
.Ltail: add x0, x0, x2
add x1, x1, x2
ldp q0, q1, [x0]
ldp q2, q3, [x1]
eor v0.16b, v0.16b, v2.16b
eor v1.16b, v1.16b, v3.16b
orr v0.16b, v0.16b, v1.16b
orr v4.16b, v4.16b, v0.16b
umaxv s0, v4.4s
mov w0, v0.s[0]
ret
END(timingsafe_bcmp)