#include "SYS.h"
#include "proc64_id.h"
#define LABEL(s) .strlen##s
ENTRY(strlen)
mov %rdi, %rsi
mov %rsi, %rcx
pxor %xmm0, %xmm0
and $15, %rcx
jz LABEL(align16_loop)
LABEL(unalign16):
and $0xfffffffffffffff0, %rsi
pcmpeqb (%rsi), %xmm0
lea 16(%rdi), %rsi
pmovmskb %xmm0, %edx
shr %cl, %edx
test %edx, %edx
jnz LABEL(exit)
sub %rcx, %rsi
pxor %xmm0, %xmm0
.p2align 4
LABEL(align16_loop):
pcmpeqb (%rsi), %xmm0
pmovmskb %xmm0, %edx
add $16, %rsi
test %edx, %edx
jnz LABEL(exit)
pcmpeqb (%rsi), %xmm0
pmovmskb %xmm0, %edx
add $16, %rsi
test %edx, %edx
jnz LABEL(exit)
pcmpeqb (%rsi), %xmm0
pmovmskb %xmm0, %edx
add $16, %rsi
test %edx, %edx
jnz LABEL(exit)
pcmpeqb (%rsi), %xmm0
pmovmskb %xmm0, %edx
add $16, %rsi
test %edx, %edx
jz LABEL(align16_loop)
.p2align 4
LABEL(exit):
neg %rdi
testl $USE_BSF, .memops_method(%rip)
jz LABEL(AMD_exit)
lea -16(%rdi, %rsi), %rax
bsf %edx, %ecx
lea (%rax, %rcx),%rax
ret
.p2align 4
LABEL(AMD_exit):
lea -16(%rdi, %rsi), %rax
test %dl, %dl
jz LABEL(exit_high)
test $0x01, %dl
jnz LABEL(exit_tail0)
test $0x02, %dl
jnz LABEL(exit_tail1)
.p2align 4
test $0x04, %dl
jnz LABEL(exit_tail2)
test $0x08, %dl
jnz LABEL(exit_tail3)
test $0x10, %dl
jnz LABEL(exit_tail4)
test $0x20, %dl
jnz LABEL(exit_tail5)
test $0x40, %dl
jnz LABEL(exit_tail6)
add $7, %rax
ret
.p2align 4
LABEL(exit_high):
add $8, %rax
test $0x01, %dh
jnz LABEL(exit_tail0)
test $0x02, %dh
jnz LABEL(exit_tail1)
test $0x04, %dh
jnz LABEL(exit_tail2)
test $0x08, %dh
jnz LABEL(exit_tail3)
test $0x10, %dh
jnz LABEL(exit_tail4)
test $0x20, %dh
jnz LABEL(exit_tail5)
test $0x40, %dh
jnz LABEL(exit_tail6)
add $7, %rax
ret
.p2align 4
LABEL(exit_tail0):
xor %ecx, %ecx
ret
.p2align 4
LABEL(exit_tail1):
add $1, %rax
ret
.p2align 4
LABEL(exit_tail2):
add $2, %rax
ret
.p2align 4
LABEL(exit_tail3):
add $3, %rax
ret
.p2align 4
LABEL(exit_tail4):
add $4, %rax
ret
.p2align 4
LABEL(exit_tail5):
add $5, %rax
ret
.p2align 4
LABEL(exit_tail6):
add $6, %rax
ret
SET_SIZE(strlen)