#include <machine/asm.h>
#include "amd64_archlevel.h"
#define ALIGN_TEXT .p2align 4,0x90
ARCHFUNCS(strlen)
ARCHFUNC(strlen, scalar)
ARCHFUNC(strlen, baseline)
ENDARCHFUNCS(strlen)
ARCHENTRY(strlen, scalar)
movabsq $0xfefefefefefefeff,%r8
movabsq $0x8080808080808080,%r9
movq %rdi,%r10
movq %rdi,%rcx
testb $7,%dil
jz 2f
andq $~7,%rdi
movq (%rdi),%r11
shlq $3,%rcx
movq $-1,%rdx
shlq %cl,%rdx
notq %rdx
orq %rdx,%r11
leaq (%r11,%r8),%rcx
notq %r11
andq %r11,%rcx
andq %r9,%rcx
jnz 3f
ALIGN_TEXT
1:
leaq 8(%rdi),%rdi
2:
movq (%rdi),%r11
leaq (%r11,%r8),%rcx
notq %r11
andq %r11,%rcx
andq %r9,%rcx
jz 1b
3:
bsfq %rcx,%rcx
shrq $3,%rcx
leaq (%rcx,%rdi),%rax
subq %r10,%rax
ret
ARCHEND(strlen, scalar)
ARCHENTRY(strlen, baseline)
mov %rdi, %rcx
pxor %xmm1, %xmm1
and $~0xf, %rdi # align string
pcmpeqb (%rdi), %xmm1 # compare head (with junk before string)
mov %rcx, %rsi # string pointer copy for later
and $0xf, %ecx # amount of bytes rdi is past 16 byte alignment
pmovmskb %xmm1, %eax
add $32, %rdi # advance to next iteration
shr %cl, %eax # clear out matches in junk bytes
test %eax, %eax # any match? (can't use ZF from SHR as CL=0 is possible)
jnz 2f
ALIGN_TEXT
1: pxor %xmm1, %xmm1
pcmpeqb -16(%rdi), %xmm1 # find NUL bytes
pmovmskb %xmm1, %eax
test %eax, %eax # were any NUL bytes present?
jnz 3f
pxor %xmm1, %xmm1
pcmpeqb (%rdi), %xmm1
pmovmskb %xmm1, %eax
add $32, %rdi # advance to next iteration
test %eax, %eax
jz 1b
sub $16, %rdi # undo half the advancement
3: tzcnt %eax, %eax # find the first NUL byte
sub %rsi, %rdi # string length until beginning of (%rdi)
lea -16(%rdi, %rax, 1), %rax # that plus loc. of NUL byte: full string length
ret
2: tzcnt %eax, %eax # compute string length
ret
ARCHEND(strlen, baseline)
.section .note.GNU-stack,"",%progbits