#include <machine/asm.h>
#define BLOCK_SIZE_BITS 6
#define BLOCK_SIZE (1 << BLOCK_SIZE_BITS)
#define BLOCK_SIZE_MASK (BLOCK_SIZE - 1)
#ifndef ALIGN_MASK
#define ALIGN_MASK 0x7
#endif
#define MULTI_PHASE_THRESHOLD 512
#ifndef FN_NAME
#ifdef MEMMOVE
#define FN_NAME __memmove
WEAK_REFERENCE(__memmove, memmove);
#else
#define FN_NAME __bcopy
WEAK_REFERENCE(__bcopy, bcopy);
#endif
#endif
ENTRY(FN_NAME)
cmpld %r3, %r4
beqlr-
cmpdi %r5, 0
beqlr-
#ifdef MEMMOVE
std %r3, -8(%r1)
#else
mr %r0, %r3
mr %r3, %r4
mr %r4, %r0
#endif
andi. %r8, %r3, ALIGN_MASK
andi. %r7, %r4, ALIGN_MASK
cmpd %r7, %r8
bne .Lunaligned
cmpldi %r5, MULTI_PHASE_THRESHOLD
bge .Lmulti_phase
b .Lfast_copy
.Lunaligned:
cmpd %r4, %r3
blt .Lbackward_unaligned
li %r0, 1
mtctr %r5
b .Lsingle_1_loop
.Lbackward_unaligned:
add %r3, %r3, %r5
addi %r3, %r3, -1
add %r4, %r4, %r5
addi %r4, %r4, -1
li %r0, -1
mtctr %r5
b .Lsingle_1_loop
.Lfast_copy:
cmpd %r4, %r3
blt .Lbackward_align
.align 5
.Lalign:
andi. %r0, %r4, 15
beq .Lsingle_copy
lbz %r0, 0(%r4)
addi %r4, %r4, 1
stb %r0, 0(%r3)
addi %r3, %r3, 1
addi %r5, %r5, -1
cmpdi %r5, 0
beq- .Ldone
b .Lalign
.Lbackward_align:
add %r3, %r3, %r5
add %r4, %r4, %r5
.align 5
.Lbackward_align_loop:
andi. %r0, %r4, 15
beq .Lbackward_single_copy
lbzu %r0, -1(%r4)
addi %r5, %r5, -1
stbu %r0, -1(%r3)
cmpdi %r5, 0
beq- .Ldone
b .Lbackward_align_loop
.Lsingle_copy:
li %r0, 1
li %r8, 16
li %r9, 0
b .Lsingle_phase
.Lbackward_single_copy:
li %r0, -1
li %r8, -16
li %r9, -15
addi %r3, %r3, -1
addi %r4, %r4, -1
.Lsingle_phase:
srdi. %r6, %r5, 4
beq .Lsingle_1
add %r3, %r3, %r9
add %r4, %r4, %r9
mtctr %r6
.align 5
.Lsingle_16_loop:
ld %r6, 0(%r4)
ld %r7, 8(%r4)
add %r4, %r4, %r8
std %r6, 0(%r3)
std %r7, 8(%r3)
add %r3, %r3, %r8
bdnz .Lsingle_16_loop
sub %r3, %r3, %r9
sub %r4, %r4, %r9
.Lsingle_1:
andi. %r6, %r5, 0x0f
beq .Ldone
mtctr %r6
.align 5
.Lsingle_1_loop:
lbz %r6, 0(%r4)
add %r4, %r4, %r0
stb %r6, 0(%r3)
add %r3, %r3, %r0
bdnz .Lsingle_1_loop
.Ldone:
#ifdef MEMMOVE
ld %r3, -8(%r1)
#endif
blr
.Lmulti_phase:
andi. %r6, %r4, 15
subfic %r7, %r6, 16
sub %r8, %r5, %r7
andi. %r9, %r8, BLOCK_SIZE_MASK
srdi %r10, %r8, BLOCK_SIZE_BITS
cmpd %r4, %r3
blt .Lbackward_multi_copy
std %r7, -32(%r1)
std %r10, -40(%r1)
std %r9, -48(%r1)
li %r0, 1
li %r5, BLOCK_SIZE
li %r7, 0
li %r8, 16
li %r9, 32
li %r10, 48
std %r8, -16(%r1)
std %r7, -24(%r1)
b .Lphase1
.Lbackward_multi_copy:
std %r9, -32(%r1)
std %r10, -40(%r1)
std %r7, -48(%r1)
li %r0, -1
add %r6, %r5, %r0
li %r5, -BLOCK_SIZE
add %r3, %r3, %r6
add %r4, %r4, %r6
li %r7, -15
li %r8, -31
li %r9, -47
li %r10, -63
add %r6, %r7, %r0
std %r6, -16(%r1)
std %r7, -24(%r1)
.Lphase1:
ld %r6, -32(%r1)
cmpldi %r6, 0
beq+ .Lphase2
mtctr %r6
.align 5
.Lphase1_loop:
lbz %r6, 0(%r4)
add %r4, %r4, %r0
stb %r6, 0(%r3)
add %r3, %r3, %r0
bdnz .Lphase1_loop
.Lphase2:
ld %r6, -40(%r1)
cmpldi %r6, 0
beq .Lphase3
#ifdef FN_PHASE2
FN_PHASE2
#else
std %r14, -56(%r1)
std %r15, -64(%r1)
std %r16, -72(%r1)
std %r17, -80(%r1)
std %r18, -88(%r1)
std %r19, -96(%r1)
std %r20, -104(%r1)
std %r21, -112(%r1)
addi %r18, %r7, 8
addi %r19, %r8, 8
addi %r20, %r9, 8
addi %r21, %r10, 8
mtctr %r6
.align 5
.Lphase2_loop:
ldx %r14, %r7, %r4
ldx %r15, %r18, %r4
ldx %r16, %r8, %r4
ldx %r17, %r19, %r4
stdx %r14, %r7, %r3
stdx %r15, %r18, %r3
stdx %r16, %r8, %r3
stdx %r17, %r19, %r3
ldx %r14, %r9, %r4
ldx %r15, %r20, %r4
ldx %r16, %r10, %r4
ldx %r17, %r21, %r4
stdx %r14, %r9, %r3
stdx %r15, %r20, %r3
stdx %r16, %r10, %r3
stdx %r17, %r21, %r3
add %r4, %r4, %r5
add %r3, %r3, %r5
bdnz .Lphase2_loop
ld %r14, -56(%r1)
ld %r15, -64(%r1)
ld %r16, -72(%r1)
ld %r17, -80(%r1)
ld %r18, -88(%r1)
ld %r19, -96(%r1)
ld %r20, -104(%r1)
ld %r21, -112(%r1)
#endif
.Lphase3:
ld %r5, -48(%r1)
ld %r8, -16(%r1)
ld %r9, -24(%r1)
b .Lsingle_phase
END(FN_NAME)
.section .note.GNU-stack,"",%progbits