root/arch/alpha/lib/memmove.S
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * arch/alpha/lib/memmove.S
 *
 * Barely optimized memmove routine for Alpha EV5.
 *
 * This is hand-massaged output from the original memcpy.c.  We defer to
 * memcpy whenever possible; the backwards copy loops are not unrolled.
 */
#include <linux/export.h>
        .set noat
        .set noreorder
        .text

        .align 4
        .globl memmove
        .ent memmove
memmove:
        ldgp $29, 0($27)
        unop
        nop
        .prologue 1

        addq $16,$18,$4
        addq $17,$18,$5
        cmpule $4,$17,$1                /*  dest + n <= src  */
        cmpule $5,$16,$2                /*  dest >= src + n  */

        bis $1,$2,$1
        mov $16,$0
        xor $16,$17,$2
        bne $1,memcpy                   !samegp

        and $2,7,$2                     /* Test for src/dest co-alignment.  */
        and $16,7,$1
        cmpule $16,$17,$3
        bne $3,$memmove_up              /* dest < src */

        and $4,7,$1
        bne $2,$misaligned_dn
        unop
        beq $1,$skip_aligned_byte_loop_head_dn

$aligned_byte_loop_head_dn:
        lda $4,-1($4)
        lda $5,-1($5)
        unop
        ble $18,$egress

        ldq_u $3,0($5)
        ldq_u $2,0($4)
        lda $18,-1($18)
        extbl $3,$5,$1

        insbl $1,$4,$1
        mskbl $2,$4,$2
        bis $1,$2,$1
        and $4,7,$6

        stq_u $1,0($4)
        bne $6,$aligned_byte_loop_head_dn

$skip_aligned_byte_loop_head_dn:
        lda $18,-8($18)
        blt $18,$skip_aligned_word_loop_dn

$aligned_word_loop_dn:
        ldq $1,-8($5)
        nop
        lda $5,-8($5)
        lda $18,-8($18)

        stq $1,-8($4)
        nop
        lda $4,-8($4)
        bge $18,$aligned_word_loop_dn

$skip_aligned_word_loop_dn:
        lda $18,8($18)
        bgt $18,$byte_loop_tail_dn
        unop
        ret $31,($26),1

        .align 4
$misaligned_dn:
        nop
        fnop
        unop
        beq $18,$egress

$byte_loop_tail_dn:
        ldq_u $3,-1($5)
        ldq_u $2,-1($4)
        lda $5,-1($5)
        lda $4,-1($4)

        lda $18,-1($18)
        extbl $3,$5,$1
        insbl $1,$4,$1
        mskbl $2,$4,$2

        bis $1,$2,$1
        stq_u $1,0($4)
        bgt $18,$byte_loop_tail_dn
        br $egress

$memmove_up:
        mov $16,$4
        mov $17,$5
        bne $2,$misaligned_up
        beq $1,$skip_aligned_byte_loop_head_up

$aligned_byte_loop_head_up:
        unop
        ble $18,$egress
        ldq_u $3,0($5)
        ldq_u $2,0($4)

        lda $18,-1($18)
        extbl $3,$5,$1
        insbl $1,$4,$1
        mskbl $2,$4,$2

        bis $1,$2,$1
        lda $5,1($5)
        stq_u $1,0($4)
        lda $4,1($4)

        and $4,7,$6
        bne $6,$aligned_byte_loop_head_up

$skip_aligned_byte_loop_head_up:
        lda $18,-8($18)
        blt $18,$skip_aligned_word_loop_up

$aligned_word_loop_up:
        ldq $1,0($5)
        nop
        lda $5,8($5)
        lda $18,-8($18)

        stq $1,0($4)
        nop
        lda $4,8($4)
        bge $18,$aligned_word_loop_up

$skip_aligned_word_loop_up:
        lda $18,8($18)
        bgt $18,$byte_loop_tail_up
        unop
        ret $31,($26),1

        .align 4
$misaligned_up:
        nop
        fnop
        unop
        beq $18,$egress

$byte_loop_tail_up:
        ldq_u $3,0($5)
        ldq_u $2,0($4)
        lda $18,-1($18)
        extbl $3,$5,$1

        insbl $1,$4,$1
        mskbl $2,$4,$2
        bis $1,$2,$1
        stq_u $1,0($4)

        lda $5,1($5)
        lda $4,1($4)
        nop
        bgt $18,$byte_loop_tail_up

$egress:
        ret $31,($26),1
        nop
        nop
        nop

        .end memmove
        EXPORT_SYMBOL(memmove)