#include "DEFS.h"
LEAF(memcpy, 0)
.set noreorder
move v0, a0 # swap from and to
move a0, a1
move a1, v0
PTR_ADDU t0, a0, a2 # t0 = end of s1 region
sltu t1, a1, t0
sltu t2, a0, a1
j .Lforward # do forward copy
slt t2, a2, 12 # check for small copy
ALEAF(memmove)
.set noreorder
move v0, a0 # swap from and to
move a0, a1
move a1, v0
ALEAF(bcopy)
.set noreorder
PTR_ADDU t0, a0, a2 # t0 = end of s1 region
sltu t1, a1, t0
sltu t2, a0, a1
and t1, t1, t2 # t1 = true if from < to < (from+len)
beq t1, zero, .Lforward # non overlapping, do forward copy
slt t2, a2, 12 # check for small copy
ble a2, zero, 2f
PTR_ADDU t1, a1, a2 # t1 = end of to region
1:
lb v1, -1(t0) # copy bytes backwards,
PTR_SUBU t0, t0, 1 # doesn't happen often so do slow way
PTR_SUBU t1, t1, 1
bne t0, a0, 1b
sb v1, 0(t1)
2:
j ra
nop
.Lforward:
#ifdef _STANDALONE
bne t2, zero, .Lsmallcpy # do a small bcopy
xor v1, a0, a1 # compare low two bits of addresses
and v1, v1, 3
PTR_SUBU a3, zero, a1 # compute # bytes to word align address
beq v1, zero, .Laligned # addresses can be word aligned
and a3, a3, 3
beq a3, zero, 1f
PTR_SUBU a2, a2, a3 # subtract from remaining count
LWHI v1, 0(a0) # get next 4 bytes (unaligned)
LWLO v1, 3(a0)
PTR_ADDU a0, a0, a3
SWHI v1, 0(a1) # store 1, 2, or 3 bytes to align a1
PTR_ADDU a1, a1, a3
1:
and v1, a2, 3 # compute number of words left
PTR_SUBU a3, a2, v1
move a2, v1
PTR_ADDU a3, a3, a0 # compute ending address
2:
LWHI v1, 0(a0) # copy words a0 unaligned, a1 aligned
LWLO v1, 3(a0)
PTR_ADDU a0, a0, 4
sw v1, 0(a1)
PTR_ADDU a1, a1, 4
bne a0, a3, 2b
nop # We have to do this mmu-bug.
b .Lsmallcpy
nop
.Laligned:
beq a3, zero, 1f
PTR_SUBU a2, a2, a3 # subtract from remaining count
LWHI v1, 0(a0) # copy 1, 2, or 3 bytes to align
PTR_ADDU a0, a0, a3
SWHI v1, 0(a1)
PTR_ADDU a1, a1, a3
1:
and v1, a2, 3 # compute number of whole words left
PTR_SUBU a3, a2, v1
move a2, v1
PTR_ADDU a3, a3, a0 # compute ending address
2:
lw v1, 0(a0) # copy words
PTR_ADDU a0, a0, 4
sw v1, 0(a1)
bne a0, a3, 2b
PTR_ADDU a1, a1, 4
#else
bne t2, zero, .Lsmallcpy # do a small bcopy
xor v1, a0, a1 # compare low three bits of addresses
and v1, v1, 7
PTR_SUBU a3, zero, a1 # compute # bytes to dword align address
beq v1, zero, .Laligned # addresses can be dword aligned
and a3, a3, 7
beq a3, zero, 1f
PTR_SUBU a2, a2, a3 # subtract from remaining count
LDHI v1, 0(a0) # get next 8 bytes (unaligned)
LDLO v1, 7(a0)
PTR_ADDU a0, a0, a3
SDHI v1, 0(a1) # store 1-7 bytes to align a1
PTR_ADDU a1, a1, a3
1:
and v1, a2, 7 # compute number of dwords left
PTR_SUBU a3, a2, v1
beq a3, zero, .Lsmallcpy
move a2, v1
PTR_ADDU a3, a3, a0 # compute ending address
2:
LDHI v1, 0(a0) # copy dwords a0 unaligned, a1 aligned
LDLO v1, 7(a0)
PTR_ADDU a0, a0, 8
sd v1, 0(a1)
PTR_ADDU a1, a1, 8
bne a0, a3, 2b
nop # We have to do this mmu-bug.
b .Lsmallcpy
nop
.Laligned:
beq a3, zero, 1f
PTR_SUBU a2, a2, a3 # subtract from remaining count
LDHI v1, 0(a0) # copy 1-7 bytes to align
PTR_ADDU a0, a0, a3
SDHI v1, 0(a1)
PTR_ADDU a1, a1, a3
1:
and v1, a2, 7 # compute number of whole dwords left
PTR_SUBU a3, a2, v1
beq a3, zero, .Lsmallcpy
move a2, v1
PTR_ADDU a3, a3, a0 # compute ending address
2:
ld v1, 0(a0) # copy dwords
PTR_ADDU a0, a0, 8
sd v1, 0(a1)
bne a0, a3, 2b
PTR_ADDU a1, a1, 8
#endif
.Lsmallcpy:
ble a2, zero, 2f
PTR_ADDU a3, a2, a0 # compute ending address
1:
lbu v1, 0(a0) # copy bytes
PTR_ADDU a0, a0, 1
sb v1, 0(a1)
bne a0, a3, 1b
PTR_ADDU a1, a1, 1 # MMU BUG ? can not do -1(a1) at 0x80000000!!
2:
j ra
nop
END(memcpy)