#include <machine/asm.h>
.weak strlcpy
.set strlcpy, __strlcpy
.text
ENTRY(__strlcpy)
subs x2, x2, #1
b.lo .L0
mov x9, x0
bic x10, x1, #0xf
and x11, x1, #0xf
ldr q1, [x10]
cmeq v1.16b, v1.16b, #0
mov x8, #-1
lsl x12, x11, #2
lsl x8, x8, x12
shrn v1.8b, v1.8h, #4
fmov x5, d1
ands x5, x5, x8
b.ne .Lhead_nul
ldr q3, [x10, #16]
ldr q2, [x1]
mov x8, #32
sub x8, x8, x11
cmeq v1.16b, v3.16b, #0
subs x2, x2, x8
b.ls .Lhead_buf_end
shrn v1.8b, v1.8h, #4
fmov x5, d1
cbnz x5, .Lsecond_nul
ldr q1, [x10, #32]
str q2, [x0]
sub x0, x0, x11
str q3, [x0, #16]
add x10, x10, #32
add x0, x0, #32
subs x2, x2, #16
b.ls 1f
.p2align 4
0:
cmeq v2.16b, v1.16b, #0
shrn v2.8b, v2.8h, #4
fmov x5, d2
cbnz x5, 3f
str q1, [x0]
ldr q1, [x10, #16]
cmp x2, #16
b.ls 2f
add x10, x10, #32
add x0, x0, #32
cmeq v2.16b, v1.16b, #0
shrn v2.8b, v2.8h, #4
fmov x5, d2
cbnz x5, 4f
str q1, [x0, #-16]
ldr q1, [x10]
subs x2, x2, #32
b.hi 0b
1:
sub x10, x10, #16
add x2, x2, #16
sub x0, x0, #16
2:
cmeq v2.16b, v1.16b, #0
shrn v2.8b, v2.8h, #4
fmov x4, d2
mov x6, #0xf
mov x7, x4
lsl x5, x2, #2
lsl x5, x6, x5
cmp x2, #16
csel x5, x5, xzr, lo
orr x8, x4, x5
rbit x8, x8
clz x8, x8
lsr x8, x8, #2
add x0, x0, x8
ldr q1, [x10, x8]
str q1, [x0]
strb wzr, [x0, #16]
cbnz x7, 1f
.p2align 4
0:
ldr q1, [x10, #32]
cmeq v1.16b, v1.16b, #0
shrn v1.8b, v1.8h, #4
fmov x7, d1
cbnz x7, 2f
ldr q1, [x10, #48]
cmeq v1.16b, v1.16b, #0
shrn v1.8b, v1.8h, #4
fmov x7, d1
add x10, x10, #32
cbz x7, 0b
1: sub x10, x10, #16
2: rbit x8, x7
clz x8, x8
lsr x8, x8, #2
sub x10, x10, x1
add x0, x10, #32
add x0, x0, x8
ret
4:
sub x10, x10, #16
sub x0, x0, #16
3:
rbit x8, x5
clz x8, x8
lsr x8, x8, #2
add x0, x0, x8
add x10, x10, x8
ldr q1, [x10, #-15]
str q1, [x0, #-15]
add x0, x0, #1
sub x0, x10, x1
ret
.Lhead_buf_end:
shrn v1.8b, v1.8h, #4
fmov x8, d1
add x2, x2, #32
mov x7, x8
mov x6, #0xf
cmp x2, #16
b.lo 0f
rbit x8, x8
clz x8, x8
lsr x8, x8, #2
add x8, x8, #16
cmp x8, x2
csel x8, x8, x2, lo
b 1f
0:
rbit x8, x8
clz x8, x8
lsr x8, x8, #2
mov x8, x2
1:
sub x8, x8, x11
strb wzr, [x9, x8]
cbnz x7, 1f
.p2align 4
0:
ldr q1, [x10, #32]
cmeq v1.16b, v1.16b, #0
shrn v1.8b, v1.8h, #4
fmov x7, d1
cbnz x7, 2f
ldr q1, [x10, #48]
cmeq v1.16b, v1.16b, #0
shrn v1.8b, v1.8h, #4
fmov x7, d1
add x10, x10, #32
cbz x7, 0b
1: sub x10, x10, #16
2: rbit x6, x7
clz x6, x6
lsr x6, x6, #2
sub x10, x10, x1
add x0, x10, #32
add x0, x0, x6
add x4, x9, x8
add x5, x1, x8
b .L1732
.Lsecond_nul:
add x2, x2, x8
rbit x8, x5
clz x8, x8
lsr x5, x8, #2
sub x8, x11, #16
sub x0, x5, x8
cmp x0, x2
csel x8, x2, x0, hi
add x4, x9, x8
add x5, x1, x8
strb wzr, [x4]
.L1732:
cmp x8, #16
b.lo .L0816
ldp x16, x17, [x1]
ldp x12, x1, [x5, #-16]
stp x16, x17, [x9]
stp x12, x1, [x4, #-16]
ret
.Lhead_nul:
rbit x8, x5
clz x8, x8
lsr x8, x8, #2
sub x0, x8, x11
cmp x0, x2
csel x8, x2, x0, hi
add x4, x9, x8
add x5, x1, x8
strb wzr, [x4]
.L0816:
tbz x8, #3, .L0407
ldr x16, [x1]
ldr x17, [x5, #-8]
str x16, [x9]
str x17, [x4, #-8]
ret
.p2align 4
.L0407:
cmp x8, #3
b.ls .L0203
ldr w16, [x1]
ldr w18, [x5, #-4]
str w16, [x9]
str w18, [x4, #-4]
ret
.L0203:
tbz x8, 1, .L0001
ldrh w16, [x1]
ldrh w17, [x5, #-2]
strh w16, [x9]
strh w17, [x4, #-2]
ret
.L0001:
ldrb w16, [x1]
strb w16, [x9]
strb wzr, [x4]
ret
.L0:
mov x0, x1
b strlen
ret
END(__strlcpy)