root/arch/csky/abiv2/memmove.S
/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.

#include <linux/linkage.h>
#include "sysdep.h"

        .weak memmove
ENTRY(__memmove)
ENTRY(memmove)
        subu    r3, r0, r1
        cmphs   r3, r2
        bt      memcpy

        mov     r12, r0
        addu    r0, r0, r2
        addu    r1, r1, r2

        /* Test if len less than 4 bytes.  */
        cmplti  r2, 4
        bt      .L_copy_by_byte

        andi    r13, r0, 3
        /* Test if dest is not 4 bytes aligned.  */
        bnez    r13, .L_dest_not_aligned
        /* Hardware can handle unaligned access directly.  */
.L_dest_aligned:
        /* If dest is aligned, then copy.  */
        zext    r18, r2, 31, 4
        /* Test if len less than 16 bytes.  */
        bez     r18, .L_len_less_16bytes
        movi    r19, 0

        /* len > 16 bytes */
        LABLE_ALIGN
.L_len_larger_16bytes:
        subi    r1, 16
        subi    r0, 16
#if defined(__CK860__)
        ldw     r3, (r1, 12)
        stw     r3, (r0, 12)
        ldw     r3, (r1, 8)
        stw     r3, (r0, 8)
        ldw     r3, (r1, 4)
        stw     r3, (r0, 4)
        ldw     r3, (r1, 0)
        stw     r3, (r0, 0)
#else
        ldw     r20, (r1, 0)
        ldw     r21, (r1, 4)
        ldw     r22, (r1, 8)
        ldw     r23, (r1, 12)
        stw     r20, (r0, 0)
        stw     r21, (r0, 4)
        stw     r22, (r0, 8)
        stw     r23, (r0, 12)
        PRE_BNEZAD (r18)
#endif
        BNEZAD (r18, .L_len_larger_16bytes)

.L_len_less_16bytes:
        zext    r18, r2, 3, 2
        bez     r18, .L_copy_by_byte
.L_len_less_16bytes_loop:
        subi    r1, 4
        subi    r0, 4
        ldw     r3, (r1, 0)
        PRE_BNEZAD (r18)
        stw     r3, (r0, 0)
        BNEZAD (r18, .L_len_less_16bytes_loop)

        /* Test if len less than 4 bytes.  */
.L_copy_by_byte:
        zext    r18, r2, 1, 0
        bez     r18, .L_return
.L_copy_by_byte_loop:
        subi    r1, 1
        subi    r0, 1
        ldb     r3, (r1, 0)
        PRE_BNEZAD (r18)
        stb     r3, (r0, 0)
        BNEZAD (r18, .L_copy_by_byte_loop)

.L_return:
        mov     r0, r12
        rts

        /* If dest is not aligned, just copy some bytes makes the dest
           align.  */
.L_dest_not_aligned:
        sub     r2, r13
.L_dest_not_aligned_loop:
        subi    r1, 1
        subi    r0, 1
        /* Makes the dest align.  */
        ldb     r3, (r1, 0)
        PRE_BNEZAD (r13)
        stb     r3, (r0, 0)
        BNEZAD (r13, .L_dest_not_aligned_loop)
        cmplti  r2, 4
        bt      .L_copy_by_byte
        /* Check whether the src is aligned.  */
        jbr     .L_dest_aligned
ENDPROC(memmove)
ENDPROC(__memmove)