#include <machine/asm.h>
#include <machine/param.h>
.weak strcmp
.set strcmp, __strcmp
.text
ENTRY(__strcmp)
bic x8, x0, #0xf
and x9, x0, #0xf
bic x10, x1, #0xf
and x11, x1, #0xf
mov x13, #-1
add x3, x0, #16
add x4, x1, #16
eor x3, x3, x0
eor x4, x4, x1
orr x3, x3, x4
tbz w3, #PAGE_SHIFT, .Lbegin
ldr q0, [x8]
ldr q2, [x10]
lsl x14, x9, #2
lsl x15, x11, #2
lsl x3, x13, x14
lsl x4, x13, x15
cmeq v5.16b, v0.16b, #0
cmeq v6.16b, v2.16b, #0
shrn v5.8b, v5.8h, #4
shrn v6.8b, v6.8h, #4
fmov x5, d5
fmov x6, d6
adrp x2, shift_data
add x2, x2, :lo12:shift_data
tst x5, x3
b.eq 0f
ldr q4, [x2, x9]
tbl v0.16b, {v0.16b}, v4.16b
b 1f
.p2align 4
0:
ldr q0, [x0]
1:
tst x6, x4
b.eq 0f
ldr q4, [x2, x11]
tbl v4.16b, {v2.16b}, v4.16b
b 1f
.p2align 4
.Lbegin:
ldr q0, [x0]
0:
ldr q4, [x1]
1:
cmeq v2.16b, v0.16b, #0
cmeq v4.16b, v0.16b, v4.16b
orn v2.16b, v2.16b, v4.16b
shrn v2.8b, v2.8h, #4
fmov x5, d2
cbnz x5, .Lhead_mismatch
ldr q2, [x8, #16]
ldr q3, [x10, #16]
subs x9, x9, x11
b.lo .Lswapped
sub x12, x10, x9
ldr q0, [x12, #16]!
sub x10, x10, x8
sub x11, x10, x9
cmeq v1.16b, v3.16b, #0
cmeq v0.16b, v0.16b, v2.16b
add x8, x8, #16
shrn v1.8b, v1.8h, #4
fmov x6, d1
shrn v0.8b, v0.8h, #4
fmov x5, d0
cbnz x6, .Lnulfound
mvn x5, x5
cbnz x5, .Lmismatch
add x8, x8, #16
.p2align 4
0:
ldr q0, [x8, x11]
ldr q1, [x8, x10]
ldr q2, [x8]
cmeq v1.16b, v1.16b, #0
cmeq v0.16b, v0.16b, v2.16b
shrn v1.8b, v1.8h, #4
fmov x6, d1
shrn v0.8b, v0.8h, #4
fmov x5, d0
cbnz x6, .Lnulfound
mvn x5, x5
cbnz x5, .Lmismatch
add x8, x8, #16
ldr q0, [x8, x11]
ldr q1, [x8, x10]
ldr q2, [x8]
add x8, x8, #16
cmeq v1.16b, v1.16b, #0
cmeq v0.16b, v0.16b, v2.16b
shrn v1.8b, v1.8h, #4
fmov x6, d1
shrn v0.8b, v0.8h, #4
fmov x5, d0
cbnz x6, .Lnulfound2
mvn x5, x5
cbz x5, 0b
sub x8, x8, #16
.Lmismatch:
rbit x2, x5
clz x2, x2
lsr x2, x2, #2
add x11, x8, x11
ldrb w4, [x8, x2]
ldrb w5, [x11, x2]
sub w0, w4, w5
ret
.p2align 4
.Lnulfound2:
sub x8, x8, #16
.Lnulfound:
mov x7, x9
mov x4, x6
ubfiz x7, x7, #2, #4
lsl x6, x6, x7
orn x5, x6, x5
cbnz x5, .Lmismatch
ldr q0, [x8, x9]
ldr q1, [x8, x10]
cmeq v1.16b, v0.16b, v1.16b
shrn v1.8b, v1.8h, #4
fmov x5, d1
orn x5, x4, x5
rbit x2, x5
clz x2, x2
lsr x5, x2, #2
add x10, x10, x8
add x8, x8, x9
ldrb w4, [x8, x5]
ldrb w5, [x10, x5]
sub w0, w4, w5
ret
.p2align 4
.Lhead_mismatch:
rbit x2, x5
clz x2, x2
lsr x2, x2, #2
ldrb w4, [x0, x2]
ldrb w5, [x1, x2]
sub w0, w4, w5
ret
.p2align 4
.Lswapped:
add x12, x8, x9
ldr q0, [x12, #16]!
sub x8, x8, x10
add x11, x8, x9
neg x9, x9
cmeq v1.16b, v2.16b, #0
cmeq v0.16b, v0.16b, v3.16b
add x10, x10, #16
shrn v1.8b, v1.8h, #4
fmov x6, d1
shrn v0.8b, v0.8h, #4
fmov x5, d0
cbnz x6, .Lnulfounds
mvn x5, x5
cbnz x5, .Lmismatchs
add x10, x10, #16
.p2align 4
0:
ldr q0, [x10, x11]
ldr q1, [x10, x8]
ldr q2, [x10]
cmeq v1.16b, v1.16b, #0
cmeq v0.16b, v0.16b, v2.16b
shrn v1.8b, v1.8h, #4
fmov x6, d1
shrn v0.8b, v0.8h, #4
fmov x5, d0
cbnz x6, .Lnulfounds
mvn x5, x5
cbnz x5, .Lmismatchs
add x10, x10, #16
ldr q0, [x10, x11]
ldr q1, [x10, x8]
ldr q2, [x10]
add x10, x10, #16
cmeq v1.16b, v1.16b, #0
cmeq v0.16b, v0.16b, v2.16b
shrn v1.8b, v1.8h, #4
fmov x6, d1
shrn v0.8b, v0.8h, #4
fmov x5, d0
cbnz x6, .Lnulfound2s
mvn x5, x5
cbz x5, 0b
sub x10, x10, #16
.Lmismatchs:
rbit x2, x5
clz x2, x2
lsr x2, x2, #2
add x11, x10, x11
ldrb w4, [x10, x2]
ldrb w5, [x11, x2]
sub w0, w5, w4
ret
.p2align 4
.Lnulfound2s:
sub x10, x10, #16
.Lnulfounds:
mov x7, x9
mov x4, x6
ubfiz x7, x7, #2, #4
lsl x6, x6, x7
orn x5, x6, x5
cbnz x5, .Lmismatchs
ldr q0, [x10, x9]
ldr q1, [x10, x8]
cmeq v1.16b, v0.16b, v1.16b
shrn v1.8b, v1.8h, #4
fmov x5, d1
orn x5, x4, x5
rbit x2, x5
clz x2, x2
lsr x5, x2, #2
add x11, x10, x8
add x10, x10, x9
ldrb w4, [x10, x5]
ldrb w5, [x11, x5]
sub w0, w5, w4
ret
END(__strcmp)
.section .rodata
.p2align 4
shift_data:
.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
.fill 16, 1, -1
.size shift_data, .-shift_data