#include <sys/param.h>
#include <sys/errno.h>
#include <sys/asm_linkage.h>
#include <sys/vtrace.h>
#include <sys/machthread.h>
#include <sys/clock.h>
#include <sys/asi.h>
#include <sys/fsr.h>
#include <sys/privregs.h>
#include <sys/fpras_impl.h>
#include "assym.h"
#define VIS_COPY_THRESHOLD 256
#define SHORTCOPY 3
#define CHKSIZE 39
#define FPUSED_FLAG 1
#define TRAMP_FLAG 2
#define MASK_FLAGS 3
#define CHEETAH_PREFETCH 8
#define CHEETAH_2ND_PREFETCH 5
#define VIS_BLOCKSIZE 64
#define HWCOPYFRAMESIZE ((VIS_BLOCKSIZE * (2 + 1)) + (2 * 8))
#define SAVED_FPREGS_OFFSET (VIS_BLOCKSIZE * 3)
#define SAVED_FPREGS_ADJUST ((VIS_BLOCKSIZE * 2) - 1)
#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 8)
#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 8)
#define FZEROQ1Q3 \
fzero %f0 ;\
fzero %f2 ;\
faddd %f0, %f2, %f4 ;\
fmuld %f0, %f2, %f6 ;\
faddd %f0, %f2, %f8 ;\
fmuld %f0, %f2, %f10 ;\
faddd %f0, %f2, %f12 ;\
fmuld %f0, %f2, %f14 ;\
faddd %f0, %f2, %f32 ;\
fmuld %f0, %f2, %f34 ;\
faddd %f0, %f2, %f36 ;\
fmuld %f0, %f2, %f38 ;\
faddd %f0, %f2, %f40 ;\
fmuld %f0, %f2, %f42 ;\
faddd %f0, %f2, %f44 ;\
fmuld %f0, %f2, %f46
#define FZEROQ2Q4 \
fzero %f16 ;\
fzero %f18 ;\
faddd %f16, %f18, %f20 ;\
fmuld %f16, %f18, %f22 ;\
faddd %f16, %f18, %f24 ;\
fmuld %f16, %f18, %f26 ;\
faddd %f16, %f18, %f28 ;\
fmuld %f16, %f18, %f30 ;\
faddd %f16, %f18, %f48 ;\
fmuld %f16, %f18, %f50 ;\
faddd %f16, %f18, %f52 ;\
fmuld %f16, %f18, %f54 ;\
faddd %f16, %f18, %f56 ;\
fmuld %f16, %f18, %f58 ;\
faddd %f16, %f18, %f60 ;\
fmuld %f16, %f18, %f62
#define BST_FPQ1Q3_TOSTACK(tmp1) \
;\
add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
and tmp1, -VIS_BLOCKSIZE, tmp1 ;\
stda %f0, [tmp1]ASI_BLK_P ;\
add tmp1, VIS_BLOCKSIZE, tmp1 ;\
stda %f32, [tmp1]ASI_BLK_P ;\
membar #Sync
#define BLD_FPQ1Q3_FROMSTACK(tmp1) \
;\
add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
and tmp1, -VIS_BLOCKSIZE, tmp1 ;\
ldda [tmp1]ASI_BLK_P, %f0 ;\
add tmp1, VIS_BLOCKSIZE, tmp1 ;\
ldda [tmp1]ASI_BLK_P, %f32 ;\
membar #Sync
#define BST_FPQ2Q4_TOSTACK(tmp1) \
;\
add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
and tmp1, -VIS_BLOCKSIZE, tmp1 ;\
stda %f16, [tmp1]ASI_BLK_P ;\
add tmp1, VIS_BLOCKSIZE, tmp1 ;\
stda %f48, [tmp1]ASI_BLK_P ;\
membar #Sync
#define BLD_FPQ2Q4_FROMSTACK(tmp1) \
;\
add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
and tmp1, -VIS_BLOCKSIZE, tmp1 ;\
ldda [tmp1]ASI_BLK_P, %f16 ;\
add tmp1, VIS_BLOCKSIZE, tmp1 ;\
ldda [tmp1]ASI_BLK_P, %f48 ;\
membar #Sync
#define FP_NOMIGRATE(label1, label2) \
ldn [THREAD_REG + T_LWP], %o0 ;\
brz,a,pn %o0, label1##f ;\
ldsb [THREAD_REG + T_PREEMPT], %o1 ;\
call thread_nomigrate ;\
nop ;\
ba label2##f ;\
nop ;\
label1: ;\
inc %o1 ;\
stb %o1, [THREAD_REG + T_PREEMPT] ;\
label2:
#define FP_ALLOWMIGRATE(label1, label2) \
ldn [THREAD_REG + T_LWP], %o0 ;\
brz,a,pn %o0, label1##f ;\
ldsb [THREAD_REG + T_PREEMPT], %o1 ;\
call thread_allowmigrate ;\
nop ;\
ba label2##f ;\
nop ;\
label1: ;\
dec %o1 ;\
brnz,pn %o1, label2##f ;\
stb %o1, [THREAD_REG + T_PREEMPT] ;\
ldn [THREAD_REG + T_CPU], %o0 ;\
ldub [%o0 + CPU_KPRUNRUN], %o0 ;\
brz,pt %o0, label2##f ;\
nop ;\
call kpreempt ;\
rdpr %pil, %o0 ;\
label2:
.seg ".text"
.align 4
ENTRY(kcopy)
cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
bleu,pt %ncc, .kcopy_small ! go to larger cases
xor %o0, %o1, %o3 ! are src, dst alignable?
btst 7, %o3 !
bz,pt %ncc, .kcopy_8 ! check for longword alignment
nop
btst 1, %o3 !
bz,pt %ncc, .kcopy_2 ! check for half-word
nop
sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_1)], %o3
tst %o3
bz,pn %icc, .kcopy_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .kcopy_small ! go to small copy
nop
ba,pt %ncc, .kcopy_more ! otherwise go to large copy
nop
.kcopy_2:
btst 3, %o3 !
bz,pt %ncc, .kcopy_4 ! check for word alignment
nop
sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_2)], %o3
tst %o3
bz,pn %icc, .kcopy_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .kcopy_small ! go to small copy
nop
ba,pt %ncc, .kcopy_more ! otherwise go to large copy
nop
.kcopy_4:
! already checked longword, must be word aligned
sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_4)], %o3
tst %o3
bz,pn %icc, .kcopy_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .kcopy_small ! go to small copy
nop
ba,pt %ncc, .kcopy_more ! otherwise go to large copy
nop
.kcopy_8:
sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_8)], %o3
tst %o3
bz,pn %icc, .kcopy_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .kcopy_small ! go to small copy
nop
ba,pt %ncc, .kcopy_more ! otherwise go to large copy
nop
.kcopy_small:
sethi %hi(.sm_copyerr), %o5 ! sm_copyerr is lofault value
or %o5, %lo(.sm_copyerr), %o5
ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler
membar #Sync ! sync error barrier
ba,pt %ncc, .sm_do_copy ! common code
stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault
.kcopy_more:
save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
sethi %hi(.copyerr), %l7 ! copyerr is lofault value
or %l7, %lo(.copyerr), %l7
ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler
membar #Sync ! sync error barrier
ba,pt %ncc, .do_copy ! common code
stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault
.copyerr:
set .copyerr2, %l0
membar #Sync ! sync error barrier
stn %l0, [THREAD_REG + T_LOFAULT] ! set t_lofault
btst FPUSED_FLAG, %l6
bz %ncc, 1f
and %l6, TRAMP_FLAG, %l0 ! copy trampoline flag to %l0
ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
wr %o2, 0, %gsr
ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
btst FPRS_FEF, %o3
bz,pt %icc, 4f
nop
BLD_FPQ1Q3_FROMSTACK(%o2)
ba,pt %ncc, 1f
wr %o3, 0, %fprs ! restore fprs
4:
FZEROQ1Q3
wr %o3, 0, %fprs ! restore fprs
!
! Need to cater for the different expectations of kcopy
! and bcopy. kcopy will *always* set a t_lofault handler
! If it fires, we're expected to just return the error code
! and *not* to invoke any existing error handler. As far as
! bcopy is concerned, we only set t_lofault if there was an
! existing lofault handler. In that case we're expected to
! invoke the previously existing handler after resetting the
! t_lofault value.
!
1:
andn %l6, MASK_FLAGS, %l6 ! turn trampoline flag off
membar #Sync ! sync error barrier
stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
FP_ALLOWMIGRATE(5, 6)
btst TRAMP_FLAG, %l0
bnz,pn %ncc, 3f
nop
ret
restore %g1, 0, %o0
3:
!
! We're here via bcopy. There *must* have been an error handler
! in place otherwise we would have died a nasty death already.
!
jmp %l6 ! goto real handler
restore %g0, 0, %o0 ! dispose of copy window
fp_panic_msg:
.asciz "Unable to restore fp state after copy operation"
.align 4
.copyerr2:
set fp_panic_msg, %o0
call panic
nop
.sm_copyerr:
1:
btst TRAMP_FLAG, %o4
membar #Sync
andn %o4, TRAMP_FLAG, %o4
bnz,pn %ncc, 3f
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g1, %o0
3:
jmp %o4 ! goto real handler
mov %g0, %o0 !
SET_SIZE(kcopy)
ENTRY(bcopy)
cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
bleu,pt %ncc, .bcopy_small ! go to larger cases
xor %o0, %o1, %o3 ! are src, dst alignable?
btst 7, %o3 !
bz,pt %ncc, .bcopy_8 ! check for longword alignment
nop
btst 1, %o3 !
bz,pt %ncc, .bcopy_2 ! check for half-word
nop
sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_1)], %o3
tst %o3
bz,pn %icc, .bcopy_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .bcopy_small ! go to small copy
nop
ba,pt %ncc, .bcopy_more ! otherwise go to large copy
nop
.bcopy_2:
btst 3, %o3 !
bz,pt %ncc, .bcopy_4 ! check for word alignment
nop
sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_2)], %o3
tst %o3
bz,pn %icc, .bcopy_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .bcopy_small ! go to small copy
nop
ba,pt %ncc, .bcopy_more ! otherwise go to large copy
nop
.bcopy_4:
! already checked longword, must be word aligned
sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_4)], %o3
tst %o3
bz,pn %icc, .bcopy_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .bcopy_small ! go to small copy
nop
ba,pt %ncc, .bcopy_more ! otherwise go to large copy
nop
.bcopy_8:
sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_8)], %o3
tst %o3
bz,pn %icc, .bcopy_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .bcopy_small ! go to small copy
nop
ba,pt %ncc, .bcopy_more ! otherwise go to large copy
nop
.align 16
.bcopy_small:
ldn [THREAD_REG + T_LOFAULT], %o4 ! save t_lofault
tst %o4
bz,pt %icc, .sm_do_copy
nop
sethi %hi(.sm_copyerr), %o5
or %o5, %lo(.sm_copyerr), %o5
membar #Sync ! sync error barrier
stn %o5, [THREAD_REG + T_LOFAULT] ! install new vector
or %o4, TRAMP_FLAG, %o4 ! error should trampoline
.sm_do_copy:
cmp %o2, SHORTCOPY ! check for really short case
bleu,pt %ncc, .bc_sm_left !
cmp %o2, CHKSIZE ! check for medium length cases
bgu,pn %ncc, .bc_med !
or %o0, %o1, %o3 ! prepare alignment check
andcc %o3, 0x3, %g0 ! test for alignment
bz,pt %ncc, .bc_sm_word ! branch to word aligned case
.bc_sm_movebytes:
sub %o2, 3, %o2 ! adjust count to allow cc zero test
.bc_sm_notalign4:
ldub [%o0], %o3 ! read byte
stb %o3, [%o1] ! write byte
subcc %o2, 4, %o2 ! reduce count by 4
ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes
add %o0, 4, %o0 ! advance SRC by 4
stb %o3, [%o1 + 1]
ldub [%o0 - 2], %o3
add %o1, 4, %o1 ! advance DST by 4
stb %o3, [%o1 - 2]
ldub [%o0 - 1], %o3
bgt,pt %ncc, .bc_sm_notalign4 ! loop til 3 or fewer bytes remain
stb %o3, [%o1 - 1]
add %o2, 3, %o2 ! restore count
.bc_sm_left:
tst %o2
bz,pt %ncc, .bc_sm_exit ! check for zero length
deccc %o2 ! reduce count for cc test
ldub [%o0], %o3 ! move one byte
bz,pt %ncc, .bc_sm_exit
stb %o3, [%o1]
ldub [%o0 + 1], %o3 ! move another byte
deccc %o2 ! check for more
bz,pt %ncc, .bc_sm_exit
stb %o3, [%o1 + 1]
ldub [%o0 + 2], %o3 ! move final byte
stb %o3, [%o1 + 2]
membar #Sync ! sync error barrier
andn %o4, TRAMP_FLAG, %o4
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return 0
.align 16
nop ! instruction alignment
! see discussion at start of file
.bc_sm_words:
lduw [%o0], %o3 ! read word
.bc_sm_wordx:
subcc %o2, 8, %o2 ! update count
stw %o3, [%o1] ! write word
add %o0, 8, %o0 ! update SRC
lduw [%o0 - 4], %o3 ! read word
add %o1, 8, %o1 ! update DST
bgt,pt %ncc, .bc_sm_words ! loop til done
stw %o3, [%o1 - 4] ! write word
addcc %o2, 7, %o2 ! restore count
bz,pt %ncc, .bc_sm_exit
deccc %o2
bz,pt %ncc, .bc_sm_byte
.bc_sm_half:
subcc %o2, 2, %o2 ! reduce count by 2
add %o0, 2, %o0 ! advance SRC by 2
lduh [%o0 - 2], %o3 ! read half word
add %o1, 2, %o1 ! advance DST by 2
bgt,pt %ncc, .bc_sm_half ! loop til done
sth %o3, [%o1 - 2] ! write half word
addcc %o2, 1, %o2 ! restore count
bz,pt %ncc, .bc_sm_exit
nop
.bc_sm_byte:
ldub [%o0], %o3
stb %o3, [%o1]
membar #Sync ! sync error barrier
andn %o4, TRAMP_FLAG, %o4
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return 0
.bc_sm_word:
subcc %o2, 4, %o2 ! update count
bgt,pt %ncc, .bc_sm_wordx
lduw [%o0], %o3 ! read word
addcc %o2, 3, %o2 ! restore count
bz,pt %ncc, .bc_sm_exit
stw %o3, [%o1] ! write word
deccc %o2 ! reduce count for cc test
ldub [%o0 + 4], %o3 ! load one byte
bz,pt %ncc, .bc_sm_exit
stb %o3, [%o1 + 4] ! store one byte
ldub [%o0 + 5], %o3 ! load second byte
deccc %o2
bz,pt %ncc, .bc_sm_exit
stb %o3, [%o1 + 5] ! store second byte
ldub [%o0 + 6], %o3 ! load third byte
stb %o3, [%o1 + 6] ! store third byte
.bc_sm_exit:
membar #Sync ! sync error barrier
andn %o4, TRAMP_FLAG, %o4
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return 0
.align 16
.bc_med:
xor %o0, %o1, %o3 ! setup alignment check
btst 1, %o3
bnz,pt %ncc, .bc_sm_movebytes ! unaligned
nop
btst 3, %o3
bnz,pt %ncc, .bc_med_half ! halfword aligned
nop
btst 7, %o3
bnz,pt %ncc, .bc_med_word ! word aligned
nop
.bc_med_long:
btst 3, %o0 ! check for
bz,pt %ncc, .bc_med_long1 ! word alignment
nop
.bc_med_long0:
ldub [%o0], %o3 ! load one byte
inc %o0
stb %o3,[%o1] ! store byte
inc %o1
btst 3, %o0
bnz,pt %ncc, .bc_med_long0
dec %o2
.bc_med_long1: ! word aligned
btst 7, %o0 ! check for long word
bz,pt %ncc, .bc_med_long2
nop
lduw [%o0], %o3 ! load word
add %o0, 4, %o0 ! advance SRC by 4
stw %o3, [%o1] ! store word
add %o1, 4, %o1 ! advance DST by 4
sub %o2, 4, %o2 ! reduce count by 4
!
! Now long word aligned and have at least 32 bytes to move
!
.bc_med_long2:
sub %o2, 31, %o2 ! adjust count to allow cc zero test
.bc_med_lmove:
ldx [%o0], %o3 ! read long word
stx %o3, [%o1] ! write long word
subcc %o2, 32, %o2 ! reduce count by 32
ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words
add %o0, 32, %o0 ! advance SRC by 32
stx %o3, [%o1 + 8]
ldx [%o0 - 16], %o3
add %o1, 32, %o1 ! advance DST by 32
stx %o3, [%o1 - 16]
ldx [%o0 - 8], %o3
bgt,pt %ncc, .bc_med_lmove ! loop til 31 or fewer bytes left
stx %o3, [%o1 - 8]
addcc %o2, 24, %o2 ! restore count to long word offset
ble,pt %ncc, .bc_med_lextra ! check for more long words to move
nop
.bc_med_lword:
ldx [%o0], %o3 ! read long word
subcc %o2, 8, %o2 ! reduce count by 8
stx %o3, [%o1] ! write long word
add %o0, 8, %o0 ! advance SRC by 8
bgt,pt %ncc, .bc_med_lword ! loop til 7 or fewer bytes left
add %o1, 8, %o1 ! advance DST by 8
.bc_med_lextra:
addcc %o2, 7, %o2 ! restore rest of count
bz,pt %ncc, .bc_sm_exit ! if zero, then done
deccc %o2
bz,pt %ncc, .bc_sm_byte
nop
ba,pt %ncc, .bc_sm_half
nop
.align 16
.bc_med_word:
btst 3, %o0 ! check for
bz,pt %ncc, .bc_med_word1 ! word alignment
nop
.bc_med_word0:
ldub [%o0], %o3 ! load one byte
inc %o0
stb %o3,[%o1] ! store byte
inc %o1
btst 3, %o0
bnz,pt %ncc, .bc_med_word0
dec %o2
!
! Now word aligned and have at least 36 bytes to move
!
.bc_med_word1:
sub %o2, 15, %o2 ! adjust count to allow cc zero test
.bc_med_wmove:
lduw [%o0], %o3 ! read word
stw %o3, [%o1] ! write word
subcc %o2, 16, %o2 ! reduce count by 16
lduw [%o0 + 4], %o3 ! repeat for a total for 4 words
add %o0, 16, %o0 ! advance SRC by 16
stw %o3, [%o1 + 4]
lduw [%o0 - 8], %o3
add %o1, 16, %o1 ! advance DST by 16
stw %o3, [%o1 - 8]
lduw [%o0 - 4], %o3
bgt,pt %ncc, .bc_med_wmove ! loop til 15 or fewer bytes left
stw %o3, [%o1 - 4]
addcc %o2, 12, %o2 ! restore count to word offset
ble,pt %ncc, .bc_med_wextra ! check for more words to move
nop
.bc_med_word2:
lduw [%o0], %o3 ! read word
subcc %o2, 4, %o2 ! reduce count by 4
stw %o3, [%o1] ! write word
add %o0, 4, %o0 ! advance SRC by 4
bgt,pt %ncc, .bc_med_word2 ! loop til 3 or fewer bytes left
add %o1, 4, %o1 ! advance DST by 4
.bc_med_wextra:
addcc %o2, 3, %o2 ! restore rest of count
bz,pt %ncc, .bc_sm_exit ! if zero, then done
deccc %o2
bz,pt %ncc, .bc_sm_byte
nop
ba,pt %ncc, .bc_sm_half
nop
.align 16
.bc_med_half:
btst 1, %o0 ! check for
bz,pt %ncc, .bc_med_half1 ! half word alignment
nop
ldub [%o0], %o3 ! load one byte
inc %o0
stb %o3,[%o1] ! store byte
inc %o1
dec %o2
!
! Now half word aligned and have at least 38 bytes to move
!
.bc_med_half1:
sub %o2, 7, %o2 ! adjust count to allow cc zero test
.bc_med_hmove:
lduh [%o0], %o3 ! read half word
sth %o3, [%o1] ! write half word
subcc %o2, 8, %o2 ! reduce count by 8
lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords
add %o0, 8, %o0 ! advance SRC by 8
sth %o3, [%o1 + 2]
lduh [%o0 - 4], %o3
add %o1, 8, %o1 ! advance DST by 8
sth %o3, [%o1 - 4]
lduh [%o0 - 2], %o3
bgt,pt %ncc, .bc_med_hmove ! loop til 7 or fewer bytes left
sth %o3, [%o1 - 2]
addcc %o2, 7, %o2 ! restore count
bz,pt %ncc, .bc_sm_exit
deccc %o2
bz,pt %ncc, .bc_sm_byte
nop
ba,pt %ncc, .bc_sm_half
nop
SET_SIZE(bcopy)
ENTRY(bcopy_more)
.bcopy_more:
save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
ldn [THREAD_REG + T_LOFAULT], %l6 ! save t_lofault
tst %l6
bz,pt %ncc, .do_copy
nop
sethi %hi(.copyerr), %o2
or %o2, %lo(.copyerr), %o2
membar #Sync ! sync error barrier
stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector
!
! We've already captured whether t_lofault was zero on entry.
! We need to mark ourselves as being from bcopy since both
! kcopy and bcopy use the same code path. If TRAMP_FLAG is set
! and the saved lofault was zero, we won't reset lofault on
! returning.
!
or %l6, TRAMP_FLAG, %l6
.do_copy:
FP_NOMIGRATE(6, 7)
rd %fprs, %o2 ! check for unused fp
st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
btst FPRS_FEF, %o2
bz,a,pt %icc, .do_blockcopy
wr %g0, FPRS_FEF, %fprs
BST_FPQ1Q3_TOSTACK(%o2)
.do_blockcopy:
rd %gsr, %o2
stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
or %l6, FPUSED_FLAG, %l6
#define REALSRC %i0
#define DST %i1
#define CNT %i2
#define SRC %i3
#define TMP %i5
andcc DST, VIS_BLOCKSIZE - 1, TMP
bz,pt %ncc, 2f
neg TMP
add TMP, VIS_BLOCKSIZE, TMP
! TMP = bytes required to align DST on FP_BLOCK boundary
! Using SRC as a tmp here
cmp TMP, 3
bleu,pt %ncc, 1f
sub CNT,TMP,CNT ! adjust main count
sub TMP, 3, TMP ! adjust for end of loop test
.bc_blkalign:
ldub [REALSRC], SRC ! move 4 bytes per loop iteration
stb SRC, [DST]
subcc TMP, 4, TMP
ldub [REALSRC + 1], SRC
add REALSRC, 4, REALSRC
stb SRC, [DST + 1]
ldub [REALSRC - 2], SRC
add DST, 4, DST
stb SRC, [DST - 2]
ldub [REALSRC - 1], SRC
bgu,pt %ncc, .bc_blkalign
stb SRC, [DST - 1]
addcc TMP, 3, TMP ! restore count adjustment
bz,pt %ncc, 2f ! no bytes left?
nop
1: ldub [REALSRC], SRC
inc REALSRC
inc DST
deccc TMP
bgu %ncc, 1b
stb SRC, [DST - 1]
2:
andn REALSRC, 0x7, SRC
alignaddr REALSRC, %g0, %g0
! SRC - 8-byte aligned
! DST - 64-byte aligned
prefetch [SRC], #one_read
prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read
prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read
prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read
ldd [SRC], %f0
#if CHEETAH_PREFETCH > 4
prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
#endif
ldd [SRC + 0x08], %f2
#if CHEETAH_PREFETCH > 5
prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read
#endif
ldd [SRC + 0x10], %f4
#if CHEETAH_PREFETCH > 6
prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read
#endif
faligndata %f0, %f2, %f32
ldd [SRC + 0x18], %f6
#if CHEETAH_PREFETCH > 7
prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read
#endif
faligndata %f2, %f4, %f34
ldd [SRC + 0x20], %f8
faligndata %f4, %f6, %f36
ldd [SRC + 0x28], %f10
faligndata %f6, %f8, %f38
ldd [SRC + 0x30], %f12
faligndata %f8, %f10, %f40
ldd [SRC + 0x38], %f14
faligndata %f10, %f12, %f42
ldd [SRC + VIS_BLOCKSIZE], %f0
sub CNT, VIS_BLOCKSIZE, CNT
add SRC, VIS_BLOCKSIZE, SRC
add REALSRC, VIS_BLOCKSIZE, REALSRC
ba,a,pt %ncc, 1f
nop
.align 16
1:
ldd [SRC + 0x08], %f2
faligndata %f12, %f14, %f44
ldd [SRC + 0x10], %f4
faligndata %f14, %f0, %f46
stda %f32, [DST]ASI_BLK_P
ldd [SRC + 0x18], %f6
faligndata %f0, %f2, %f32
ldd [SRC + 0x20], %f8
faligndata %f2, %f4, %f34
ldd [SRC + 0x28], %f10
faligndata %f4, %f6, %f36
ldd [SRC + 0x30], %f12
faligndata %f6, %f8, %f38
ldd [SRC + 0x38], %f14
faligndata %f8, %f10, %f40
sub CNT, VIS_BLOCKSIZE, CNT
ldd [SRC + VIS_BLOCKSIZE], %f0
faligndata %f10, %f12, %f42
prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read
add DST, VIS_BLOCKSIZE, DST
prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
add REALSRC, VIS_BLOCKSIZE, REALSRC
cmp CNT, VIS_BLOCKSIZE + 8
bgu,pt %ncc, 1b
add SRC, VIS_BLOCKSIZE, SRC
! only if REALSRC & 0x7 is 0
cmp CNT, VIS_BLOCKSIZE
bne %ncc, 3f
andcc REALSRC, 0x7, %g0
bz,pt %ncc, 2f
nop
3:
faligndata %f12, %f14, %f44
faligndata %f14, %f0, %f46
stda %f32, [DST]ASI_BLK_P
add DST, VIS_BLOCKSIZE, DST
ba,pt %ncc, 3f
nop
2:
ldd [SRC + 0x08], %f2
fsrc1 %f12, %f44
ldd [SRC + 0x10], %f4
fsrc1 %f14, %f46
stda %f32, [DST]ASI_BLK_P
ldd [SRC + 0x18], %f6
fsrc1 %f0, %f32
ldd [SRC + 0x20], %f8
fsrc1 %f2, %f34
ldd [SRC + 0x28], %f10
fsrc1 %f4, %f36
ldd [SRC + 0x30], %f12
fsrc1 %f6, %f38
ldd [SRC + 0x38], %f14
fsrc1 %f8, %f40
sub CNT, VIS_BLOCKSIZE, CNT
add DST, VIS_BLOCKSIZE, DST
add SRC, VIS_BLOCKSIZE, SRC
add REALSRC, VIS_BLOCKSIZE, REALSRC
fsrc1 %f10, %f42
fsrc1 %f12, %f44
fsrc1 %f14, %f46
stda %f32, [DST]ASI_BLK_P
add DST, VIS_BLOCKSIZE, DST
ba,a,pt %ncc, .bcb_exit
nop
3: tst CNT
bz,a,pt %ncc, .bcb_exit
nop
5: ldub [REALSRC], TMP
inc REALSRC
inc DST
deccc CNT
bgu %ncc, 5b
stb TMP, [DST - 1]
.bcb_exit:
membar #Sync
FPRAS_INTERVAL(FPRAS_BCOPY, 0, %l5, %o2, %o3, %o4, %o5, 8)
FPRAS_REWRITE_TYPE2Q1(0, %l5, %o2, %o3, 8, 9)
FPRAS_CHECK(FPRAS_BCOPY, %l5, 9) ! outputs lost
ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
wr %o2, 0, %gsr
ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
btst FPRS_FEF, %o3
bz,pt %icc, 4f
nop
BLD_FPQ1Q3_FROMSTACK(%o2)
ba,pt %ncc, 2f
wr %o3, 0, %fprs ! restore fprs
4:
FZEROQ1Q3
wr %o3, 0, %fprs ! restore fprs
2:
membar #Sync ! sync error barrier
andn %l6, MASK_FLAGS, %l6
stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
FP_ALLOWMIGRATE(5, 6)
ret
restore %g0, 0, %o0
SET_SIZE(bcopy_more)
ENTRY(ovbcopy)
tst %o2 ! check count
bgu,a %ncc, 1f ! nothing to do or bad arguments
subcc %o0, %o1, %o3 ! difference of from and to address
retl ! return
nop
1:
bneg,a %ncc, 2f
neg %o3 ! if < 0, make it positive
2: cmp %o2, %o3 ! cmp size and abs(from - to)
bleu %ncc, bcopy ! if size <= abs(diff): use bcopy,
.empty ! no overlap
cmp %o0, %o1 ! compare from and to addresses
blu %ncc, .ov_bkwd ! if from < to, copy backwards
nop
!
! Copy forwards.
!
.ov_fwd:
ldub [%o0], %o3 ! read from address
inc %o0 ! inc from address
stb %o3, [%o1] ! write to address
deccc %o2 ! dec count
bgu %ncc, .ov_fwd ! loop till done
inc %o1 ! inc to address
retl ! return
nop
!
! Copy backwards.
!
.ov_bkwd:
deccc %o2 ! dec count
ldub [%o0 + %o2], %o3 ! get byte at end of src
bgu %ncc, .ov_bkwd ! loop till done
stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
retl ! return
nop
SET_SIZE(ovbcopy)
ENTRY(hwblkpagecopy)
! get another window w/space for three aligned blocks of saved fpregs
save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
! %i0 - source address (arg)
! %i1 - destination address (arg)
! %i2 - length of region (not arg)
! %l0 - saved fprs
! %l1 - pointer to saved fpregs
rd %fprs, %l0 ! check for unused fp
btst FPRS_FEF, %l0
bz,a,pt %icc, 1f
wr %g0, FPRS_FEF, %fprs
BST_FPQ1Q3_TOSTACK(%l1)
1: set PAGESIZE, CNT
mov REALSRC, SRC
prefetch [SRC], #one_read
prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read
prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read
prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read
ldd [SRC], %f0
#if CHEETAH_PREFETCH > 4
prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
#endif
ldd [SRC + 0x08], %f2
#if CHEETAH_PREFETCH > 5
prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read
#endif
ldd [SRC + 0x10], %f4
#if CHEETAH_PREFETCH > 6
prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read
#endif
fsrc1 %f0, %f32
ldd [SRC + 0x18], %f6
#if CHEETAH_PREFETCH > 7
prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read
#endif
fsrc1 %f2, %f34
ldd [SRC + 0x20], %f8
fsrc1 %f4, %f36
ldd [SRC + 0x28], %f10
fsrc1 %f6, %f38
ldd [SRC + 0x30], %f12
fsrc1 %f8, %f40
ldd [SRC + 0x38], %f14
fsrc1 %f10, %f42
ldd [SRC + VIS_BLOCKSIZE], %f0
sub CNT, VIS_BLOCKSIZE, CNT
add SRC, VIS_BLOCKSIZE, SRC
ba,a,pt %ncc, 2f
nop
.align 16
2:
ldd [SRC + 0x08], %f2
fsrc1 %f12, %f44
ldd [SRC + 0x10], %f4
fsrc1 %f14, %f46
stda %f32, [DST]ASI_BLK_P
ldd [SRC + 0x18], %f6
fsrc1 %f0, %f32
ldd [SRC + 0x20], %f8
fsrc1 %f2, %f34
ldd [SRC + 0x28], %f10
fsrc1 %f4, %f36
ldd [SRC + 0x30], %f12
fsrc1 %f6, %f38
ldd [SRC + 0x38], %f14
fsrc1 %f8, %f40
ldd [SRC + VIS_BLOCKSIZE], %f0
fsrc1 %f10, %f42
prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read
sub CNT, VIS_BLOCKSIZE, CNT
add DST, VIS_BLOCKSIZE, DST
cmp CNT, VIS_BLOCKSIZE + 8
prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
bgu,pt %ncc, 2b
add SRC, VIS_BLOCKSIZE, SRC
! trailing block
ldd [SRC + 0x08], %f2
fsrc1 %f12, %f44
ldd [SRC + 0x10], %f4
fsrc1 %f14, %f46
stda %f32, [DST]ASI_BLK_P
ldd [SRC + 0x18], %f6
fsrc1 %f0, %f32
ldd [SRC + 0x20], %f8
fsrc1 %f2, %f34
ldd [SRC + 0x28], %f10
fsrc1 %f4, %f36
ldd [SRC + 0x30], %f12
fsrc1 %f6, %f38
ldd [SRC + 0x38], %f14
fsrc1 %f8, %f40
sub CNT, VIS_BLOCKSIZE, CNT
add DST, VIS_BLOCKSIZE, DST
add SRC, VIS_BLOCKSIZE, SRC
fsrc1 %f10, %f42
fsrc1 %f12, %f44
fsrc1 %f14, %f46
stda %f32, [DST]ASI_BLK_P
membar #Sync
FPRAS_INTERVAL(FPRAS_PGCOPY, 1, %l5, %o2, %o3, %o4, %o5, 8)
FPRAS_REWRITE_TYPE1(1, %l5, %f32, %o2, 9)
FPRAS_CHECK(FPRAS_PGCOPY, %l5, 9) ! lose outputs
btst FPRS_FEF, %l0
bz,pt %icc, 2f
nop
BLD_FPQ1Q3_FROMSTACK(%l3)
ba 3f
nop
2: FZEROQ1Q3
3: wr %l0, 0, %fprs ! restore fprs
ret
restore %g0, 0, %o0
SET_SIZE(hwblkpagecopy)
#define SAVE_SRC %l1
#define SAVE_DST %l2
#define SAVE_COUNT %l3
#define SM_SAVE_SRC %g4
#define SM_SAVE_DST %g5
#define SM_SAVE_COUNT %o5
#define ERRNO %l5
#define REAL_LOFAULT %l4
ENTRY(copyio_fault)
membar #Sync
mov %g1,ERRNO ! save errno in ERRNO
btst FPUSED_FLAG, %l6
bz %ncc, 1f
nop
ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
wr %o2, 0, %gsr ! restore gsr
ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
btst FPRS_FEF, %o3
bz,pt %icc, 4f
nop
BLD_FPQ2Q4_FROMSTACK(%o2)
ba,pt %ncc, 1f
wr %o3, 0, %fprs ! restore fprs
4:
FZEROQ2Q4
wr %o3, 0, %fprs ! restore fprs
1:
andn %l6, FPUSED_FLAG, %l6
membar #Sync
stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
FP_ALLOWMIGRATE(5, 6)
mov SAVE_SRC, %i0
mov SAVE_DST, %i1
jmp REAL_LOFAULT
mov SAVE_COUNT, %i2
SET_SIZE(copyio_fault)
ENTRY(copyout)
cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
bleu,pt %ncc, .copyout_small ! go to larger cases
xor %o0, %o1, %o3 ! are src, dst alignable?
btst 7, %o3 !
bz,pt %ncc, .copyout_8 ! check for longword alignment
nop
btst 1, %o3 !
bz,pt %ncc, .copyout_2 ! check for half-word
nop
sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_1)], %o3
tst %o3
bz,pn %icc, .copyout_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyout_small ! go to small copy
nop
ba,pt %ncc, .copyout_more ! otherwise go to large copy
nop
.copyout_2:
btst 3, %o3 !
bz,pt %ncc, .copyout_4 ! check for word alignment
nop
sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_2)], %o3
tst %o3
bz,pn %icc, .copyout_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyout_small ! go to small copy
nop
ba,pt %ncc, .copyout_more ! otherwise go to large copy
nop
.copyout_4:
! already checked longword, must be word aligned
sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_4)], %o3
tst %o3
bz,pn %icc, .copyout_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyout_small ! go to small copy
nop
ba,pt %ncc, .copyout_more ! otherwise go to large copy
nop
.copyout_8:
sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_8)], %o3
tst %o3
bz,pn %icc, .copyout_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyout_small ! go to small copy
nop
ba,pt %ncc, .copyout_more ! otherwise go to large copy
nop
.align 16
nop ! instruction alignment
! see discussion at start of file
.copyout_small:
sethi %hi(.sm_copyout_err), %o5 ! .sm_copyout_err is lofault
or %o5, %lo(.sm_copyout_err), %o5
ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler
membar #Sync ! sync error barrier
stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault
.sm_do_copyout:
mov %o0, SM_SAVE_SRC
mov %o1, SM_SAVE_DST
cmp %o2, SHORTCOPY ! check for really short case
bleu,pt %ncc, .co_sm_left !
mov %o2, SM_SAVE_COUNT
cmp %o2, CHKSIZE ! check for medium length cases
bgu,pn %ncc, .co_med !
or %o0, %o1, %o3 ! prepare alignment check
andcc %o3, 0x3, %g0 ! test for alignment
bz,pt %ncc, .co_sm_word ! branch to word aligned case
.co_sm_movebytes:
sub %o2, 3, %o2 ! adjust count to allow cc zero test
.co_sm_notalign4:
ldub [%o0], %o3 ! read byte
subcc %o2, 4, %o2 ! reduce count by 4
stba %o3, [%o1]ASI_USER ! write byte
inc %o1 ! advance DST by 1
ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes
add %o0, 4, %o0 ! advance SRC by 4
stba %o3, [%o1]ASI_USER
inc %o1 ! advance DST by 1
ldub [%o0 - 2], %o3
stba %o3, [%o1]ASI_USER
inc %o1 ! advance DST by 1
ldub [%o0 - 1], %o3
stba %o3, [%o1]ASI_USER
bgt,pt %ncc, .co_sm_notalign4 ! loop til 3 or fewer bytes remain
inc %o1 ! advance DST by 1
add %o2, 3, %o2 ! restore count
.co_sm_left:
tst %o2
bz,pt %ncc, .co_sm_exit ! check for zero length
nop
ldub [%o0], %o3 ! load one byte
deccc %o2 ! reduce count for cc test
bz,pt %ncc, .co_sm_exit
stba %o3,[%o1]ASI_USER ! store one byte
ldub [%o0 + 1], %o3 ! load second byte
deccc %o2
inc %o1
bz,pt %ncc, .co_sm_exit
stba %o3,[%o1]ASI_USER ! store second byte
ldub [%o0 + 2], %o3 ! load third byte
inc %o1
stba %o3,[%o1]ASI_USER ! store third byte
membar #Sync ! sync error barrier
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return 0
.align 16
.co_sm_words:
lduw [%o0], %o3 ! read word
.co_sm_wordx:
subcc %o2, 8, %o2 ! update count
stwa %o3, [%o1]ASI_USER ! write word
add %o0, 8, %o0 ! update SRC
lduw [%o0 - 4], %o3 ! read word
add %o1, 4, %o1 ! update DST
stwa %o3, [%o1]ASI_USER ! write word
bgt,pt %ncc, .co_sm_words ! loop til done
add %o1, 4, %o1 ! update DST
addcc %o2, 7, %o2 ! restore count
bz,pt %ncc, .co_sm_exit
nop
deccc %o2
bz,pt %ncc, .co_sm_byte
.co_sm_half:
subcc %o2, 2, %o2 ! reduce count by 2
lduh [%o0], %o3 ! read half word
add %o0, 2, %o0 ! advance SRC by 2
stha %o3, [%o1]ASI_USER ! write half word
bgt,pt %ncc, .co_sm_half ! loop til done
add %o1, 2, %o1 ! advance DST by 2
addcc %o2, 1, %o2 ! restore count
bz,pt %ncc, .co_sm_exit
nop
.co_sm_byte:
ldub [%o0], %o3
stba %o3, [%o1]ASI_USER
membar #Sync ! sync error barrier
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return 0
.align 16
.co_sm_word:
subcc %o2, 4, %o2 ! update count
bgt,pt %ncc, .co_sm_wordx
lduw [%o0], %o3 ! read word
addcc %o2, 3, %o2 ! restore count
bz,pt %ncc, .co_sm_exit
stwa %o3, [%o1]ASI_USER ! write word
deccc %o2 ! reduce count for cc test
ldub [%o0 + 4], %o3 ! load one byte
add %o1, 4, %o1
bz,pt %ncc, .co_sm_exit
stba %o3, [%o1]ASI_USER ! store one byte
ldub [%o0 + 5], %o3 ! load second byte
deccc %o2
inc %o1
bz,pt %ncc, .co_sm_exit
stba %o3, [%o1]ASI_USER ! store second byte
ldub [%o0 + 6], %o3 ! load third byte
inc %o1
stba %o3, [%o1]ASI_USER ! store third byte
.co_sm_exit:
membar #Sync ! sync error barrier
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return 0
.align 16
.co_med:
xor %o0, %o1, %o3 ! setup alignment check
btst 1, %o3
bnz,pt %ncc, .co_sm_movebytes ! unaligned
nop
btst 3, %o3
bnz,pt %ncc, .co_med_half ! halfword aligned
nop
btst 7, %o3
bnz,pt %ncc, .co_med_word ! word aligned
nop
.co_med_long:
btst 3, %o0 ! check for
bz,pt %ncc, .co_med_long1 ! word alignment
nop
.co_med_long0:
ldub [%o0], %o3 ! load one byte
inc %o0
stba %o3,[%o1]ASI_USER ! store byte
inc %o1
btst 3, %o0
bnz,pt %ncc, .co_med_long0
dec %o2
.co_med_long1: ! word aligned
btst 7, %o0 ! check for long word
bz,pt %ncc, .co_med_long2
nop
lduw [%o0], %o3 ! load word
add %o0, 4, %o0 ! advance SRC by 4
stwa %o3, [%o1]ASI_USER ! store word
add %o1, 4, %o1 ! advance DST by 4
sub %o2, 4, %o2 ! reduce count by 4
!
! Now long word aligned and have at least 32 bytes to move
!
.co_med_long2:
sub %o2, 31, %o2 ! adjust count to allow cc zero test
sub %o1, 8, %o1 ! adjust pointer to allow store in
! branch delay slot instead of add
.co_med_lmove:
add %o1, 8, %o1 ! advance DST by 8
ldx [%o0], %o3 ! read long word
subcc %o2, 32, %o2 ! reduce count by 32
stxa %o3, [%o1]ASI_USER ! write long word
add %o1, 8, %o1 ! advance DST by 8
ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words
add %o0, 32, %o0 ! advance SRC by 32
stxa %o3, [%o1]ASI_USER
ldx [%o0 - 16], %o3
add %o1, 8, %o1 ! advance DST by 8
stxa %o3, [%o1]ASI_USER
ldx [%o0 - 8], %o3
add %o1, 8, %o1 ! advance DST by 8
bgt,pt %ncc, .co_med_lmove ! loop til 31 or fewer bytes left
stxa %o3, [%o1]ASI_USER
add %o1, 8, %o1 ! advance DST by 8
addcc %o2, 24, %o2 ! restore count to long word offset
ble,pt %ncc, .co_med_lextra ! check for more long words to move
nop
.co_med_lword:
ldx [%o0], %o3 ! read long word
subcc %o2, 8, %o2 ! reduce count by 8
stxa %o3, [%o1]ASI_USER ! write long word
add %o0, 8, %o0 ! advance SRC by 8
bgt,pt %ncc, .co_med_lword ! loop til 7 or fewer bytes left
add %o1, 8, %o1 ! advance DST by 8
.co_med_lextra:
addcc %o2, 7, %o2 ! restore rest of count
bz,pt %ncc, .co_sm_exit ! if zero, then done
deccc %o2
bz,pt %ncc, .co_sm_byte
nop
ba,pt %ncc, .co_sm_half
nop
.align 16
nop ! instruction alignment
! see discussion at start of file
.co_med_word:
btst 3, %o0 ! check for
bz,pt %ncc, .co_med_word1 ! word alignment
nop
.co_med_word0:
ldub [%o0], %o3 ! load one byte
inc %o0
stba %o3,[%o1]ASI_USER ! store byte
inc %o1
btst 3, %o0
bnz,pt %ncc, .co_med_word0
dec %o2
!
! Now word aligned and have at least 36 bytes to move
!
.co_med_word1:
sub %o2, 15, %o2 ! adjust count to allow cc zero test
.co_med_wmove:
lduw [%o0], %o3 ! read word
subcc %o2, 16, %o2 ! reduce count by 16
stwa %o3, [%o1]ASI_USER ! write word
add %o1, 4, %o1 ! advance DST by 4
lduw [%o0 + 4], %o3 ! repeat for a total for 4 words
add %o0, 16, %o0 ! advance SRC by 16
stwa %o3, [%o1]ASI_USER
add %o1, 4, %o1 ! advance DST by 4
lduw [%o0 - 8], %o3
stwa %o3, [%o1]ASI_USER
add %o1, 4, %o1 ! advance DST by 4
lduw [%o0 - 4], %o3
stwa %o3, [%o1]ASI_USER
bgt,pt %ncc, .co_med_wmove ! loop til 15 or fewer bytes left
add %o1, 4, %o1 ! advance DST by 4
addcc %o2, 12, %o2 ! restore count to word offset
ble,pt %ncc, .co_med_wextra ! check for more words to move
nop
.co_med_word2:
lduw [%o0], %o3 ! read word
subcc %o2, 4, %o2 ! reduce count by 4
stwa %o3, [%o1]ASI_USER ! write word
add %o0, 4, %o0 ! advance SRC by 4
bgt,pt %ncc, .co_med_word2 ! loop til 3 or fewer bytes left
add %o1, 4, %o1 ! advance DST by 4
.co_med_wextra:
addcc %o2, 3, %o2 ! restore rest of count
bz,pt %ncc, .co_sm_exit ! if zero, then done
deccc %o2
bz,pt %ncc, .co_sm_byte
nop
ba,pt %ncc, .co_sm_half
nop
.align 16
nop ! instruction alignment
nop ! see discussion at start of file
nop
.co_med_half:
btst 1, %o0 ! check for
bz,pt %ncc, .co_med_half1 ! half word alignment
nop
ldub [%o0], %o3 ! load one byte
inc %o0
stba %o3,[%o1]ASI_USER ! store byte
inc %o1
dec %o2
!
! Now half word aligned and have at least 38 bytes to move
!
.co_med_half1:
sub %o2, 7, %o2 ! adjust count to allow cc zero test
.co_med_hmove:
lduh [%o0], %o3 ! read half word
subcc %o2, 8, %o2 ! reduce count by 8
stha %o3, [%o1]ASI_USER ! write half word
add %o1, 2, %o1 ! advance DST by 2
lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords
add %o0, 8, %o0 ! advance SRC by 8
stha %o3, [%o1]ASI_USER
add %o1, 2, %o1 ! advance DST by 2
lduh [%o0 - 4], %o3
stha %o3, [%o1]ASI_USER
add %o1, 2, %o1 ! advance DST by 2
lduh [%o0 - 2], %o3
stha %o3, [%o1]ASI_USER
bgt,pt %ncc, .co_med_hmove ! loop til 7 or fewer bytes left
add %o1, 2, %o1 ! advance DST by 2
addcc %o2, 7, %o2 ! restore count
bz,pt %ncc, .co_sm_exit
deccc %o2
bz,pt %ncc, .co_sm_byte
nop
ba,pt %ncc, .co_sm_half
nop
.sm_copyout_err:
membar #Sync
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
mov SM_SAVE_SRC, %o0
mov SM_SAVE_DST, %o1
mov SM_SAVE_COUNT, %o2
ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
tst %o3
bz,pt %ncc, 3f ! if not, return error
nop
ldn [%o3 + CP_COPYOUT], %o5 ! if handler, invoke it with
jmp %o5 ! original arguments
nop
3:
retl
or %g0, -1, %o0 ! return error value
SET_SIZE(copyout)
ENTRY(copyout_more)
.copyout_more:
save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
set .copyout_err, REAL_LOFAULT
.do_copyout:
set copyio_fault, %l7 ! .copyio_fault is lofault val
ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler
membar #Sync ! sync error barrier
stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault
mov %i0, SAVE_SRC
mov %i1, SAVE_DST
mov %i2, SAVE_COUNT
FP_NOMIGRATE(6, 7)
rd %fprs, %o2 ! check for unused fp
st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
btst FPRS_FEF, %o2
bz,a,pt %icc, .do_blockcopyout
wr %g0, FPRS_FEF, %fprs
BST_FPQ2Q4_TOSTACK(%o2)
.do_blockcopyout:
rd %gsr, %o2
stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
or %l6, FPUSED_FLAG, %l6
andcc DST, VIS_BLOCKSIZE - 1, TMP
mov ASI_USER, %asi
bz,pt %ncc, 2f
neg TMP
add TMP, VIS_BLOCKSIZE, TMP
! TMP = bytes required to align DST on FP_BLOCK boundary
! Using SRC as a tmp here
cmp TMP, 3
bleu,pt %ncc, 1f
sub CNT,TMP,CNT ! adjust main count
sub TMP, 3, TMP ! adjust for end of loop test
.co_blkalign:
ldub [REALSRC], SRC ! move 4 bytes per loop iteration
stba SRC, [DST]%asi
subcc TMP, 4, TMP
ldub [REALSRC + 1], SRC
add REALSRC, 4, REALSRC
stba SRC, [DST + 1]%asi
ldub [REALSRC - 2], SRC
add DST, 4, DST
stba SRC, [DST - 2]%asi
ldub [REALSRC - 1], SRC
bgu,pt %ncc, .co_blkalign
stba SRC, [DST - 1]%asi
addcc TMP, 3, TMP ! restore count adjustment
bz,pt %ncc, 2f ! no bytes left?
nop
1: ldub [REALSRC], SRC
inc REALSRC
inc DST
deccc TMP
bgu %ncc, 1b
stba SRC, [DST - 1]%asi
2:
andn REALSRC, 0x7, SRC
alignaddr REALSRC, %g0, %g0
! SRC - 8-byte aligned
! DST - 64-byte aligned
prefetch [SRC], #one_read
prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read
prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read
prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read
ldd [SRC], %f16
#if CHEETAH_PREFETCH > 4
prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
#endif
ldd [SRC + 0x08], %f18
#if CHEETAH_PREFETCH > 5
prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read
#endif
ldd [SRC + 0x10], %f20
#if CHEETAH_PREFETCH > 6
prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read
#endif
faligndata %f16, %f18, %f48
ldd [SRC + 0x18], %f22
#if CHEETAH_PREFETCH > 7
prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read
#endif
faligndata %f18, %f20, %f50
ldd [SRC + 0x20], %f24
faligndata %f20, %f22, %f52
ldd [SRC + 0x28], %f26
faligndata %f22, %f24, %f54
ldd [SRC + 0x30], %f28
faligndata %f24, %f26, %f56
ldd [SRC + 0x38], %f30
faligndata %f26, %f28, %f58
ldd [SRC + VIS_BLOCKSIZE], %f16
sub CNT, VIS_BLOCKSIZE, CNT
add SRC, VIS_BLOCKSIZE, SRC
add REALSRC, VIS_BLOCKSIZE, REALSRC
ba,a,pt %ncc, 1f
nop
.align 16
1:
ldd [SRC + 0x08], %f18
faligndata %f28, %f30, %f60
ldd [SRC + 0x10], %f20
faligndata %f30, %f16, %f62
stda %f48, [DST]ASI_BLK_AIUS
ldd [SRC + 0x18], %f22
faligndata %f16, %f18, %f48
ldd [SRC + 0x20], %f24
faligndata %f18, %f20, %f50
ldd [SRC + 0x28], %f26
faligndata %f20, %f22, %f52
ldd [SRC + 0x30], %f28
faligndata %f22, %f24, %f54
ldd [SRC + 0x38], %f30
faligndata %f24, %f26, %f56
sub CNT, VIS_BLOCKSIZE, CNT
ldd [SRC + VIS_BLOCKSIZE], %f16
faligndata %f26, %f28, %f58
prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read
add DST, VIS_BLOCKSIZE, DST
prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
add REALSRC, VIS_BLOCKSIZE, REALSRC
cmp CNT, VIS_BLOCKSIZE + 8
bgu,pt %ncc, 1b
add SRC, VIS_BLOCKSIZE, SRC
! only if REALSRC & 0x7 is 0
cmp CNT, VIS_BLOCKSIZE
bne %ncc, 3f
andcc REALSRC, 0x7, %g0
bz,pt %ncc, 2f
nop
3:
faligndata %f28, %f30, %f60
faligndata %f30, %f16, %f62
stda %f48, [DST]ASI_BLK_AIUS
add DST, VIS_BLOCKSIZE, DST
ba,pt %ncc, 3f
nop
2:
ldd [SRC + 0x08], %f18
fsrc1 %f28, %f60
ldd [SRC + 0x10], %f20
fsrc1 %f30, %f62
stda %f48, [DST]ASI_BLK_AIUS
ldd [SRC + 0x18], %f22
fsrc1 %f16, %f48
ldd [SRC + 0x20], %f24
fsrc1 %f18, %f50
ldd [SRC + 0x28], %f26
fsrc1 %f20, %f52
ldd [SRC + 0x30], %f28
fsrc1 %f22, %f54
ldd [SRC + 0x38], %f30
fsrc1 %f24, %f56
sub CNT, VIS_BLOCKSIZE, CNT
add DST, VIS_BLOCKSIZE, DST
add SRC, VIS_BLOCKSIZE, SRC
add REALSRC, VIS_BLOCKSIZE, REALSRC
fsrc1 %f26, %f58
fsrc1 %f28, %f60
fsrc1 %f30, %f62
stda %f48, [DST]ASI_BLK_AIUS
add DST, VIS_BLOCKSIZE, DST
ba,a,pt %ncc, 4f
nop
3: tst CNT
bz,a %ncc, 4f
nop
5: ldub [REALSRC], TMP
inc REALSRC
inc DST
deccc CNT
bgu %ncc, 5b
stba TMP, [DST - 1]%asi
4:
.copyout_exit:
membar #Sync
FPRAS_INTERVAL(FPRAS_COPYOUT, 0, %l5, %o2, %o3, %o4, %o5, 8)
FPRAS_REWRITE_TYPE2Q2(0, %l5, %o2, %o3, 8, 9)
FPRAS_CHECK(FPRAS_COPYOUT, %l5, 9) ! lose outputs
ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
wr %o2, 0, %gsr ! restore gsr
ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
btst FPRS_FEF, %o3
bz,pt %icc, 4f
nop
BLD_FPQ2Q4_FROMSTACK(%o2)
ba,pt %ncc, 1f
wr %o3, 0, %fprs ! restore fprs
4:
FZEROQ2Q4
wr %o3, 0, %fprs ! restore fprs
1:
membar #Sync
andn %l6, FPUSED_FLAG, %l6
stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
FP_ALLOWMIGRATE(5, 6)
ret
restore %g0, 0, %o0
.copyout_err:
ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
tst %o4
bz,pt %ncc, 2f ! if not, return error
nop
ldn [%o4 + CP_COPYOUT], %g2 ! if handler, invoke it with
jmp %g2 ! original arguments
restore %g0, 0, %g0 ! dispose of copy window
2:
ret
restore %g0, -1, %o0 ! return error value
SET_SIZE(copyout_more)
ENTRY(xcopyout)
cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
bleu,pt %ncc, .xcopyout_small ! go to larger cases
xor %o0, %o1, %o3 ! are src, dst alignable?
btst 7, %o3 !
bz,pt %ncc, .xcopyout_8 !
nop
btst 1, %o3 !
bz,pt %ncc, .xcopyout_2 ! check for half-word
nop
sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_1)], %o3
tst %o3
bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .xcopyout_small ! go to small copy
nop
ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
nop
.xcopyout_2:
btst 3, %o3 !
bz,pt %ncc, .xcopyout_4 ! check for word alignment
nop
sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_2)], %o3
tst %o3
bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .xcopyout_small ! go to small copy
nop
ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
nop
.xcopyout_4:
! already checked longword, must be word aligned
sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_4)], %o3
tst %o3
bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .xcopyout_small ! go to small copy
nop
ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
nop
.xcopyout_8:
sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_8)], %o3
tst %o3
bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .xcopyout_small ! go to small copy
nop
ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
nop
.xcopyout_small:
sethi %hi(.sm_xcopyout_err), %o5 ! .sm_xcopyout_err is lofault
or %o5, %lo(.sm_xcopyout_err), %o5
ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler
membar #Sync ! sync error barrier
ba,pt %ncc, .sm_do_copyout ! common code
stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault
.xcopyout_more:
save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
sethi %hi(.xcopyout_err), REAL_LOFAULT
ba,pt %ncc, .do_copyout ! common code
or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
.xcopyout_err:
ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
tst %o4
bz,pt %ncc, 2f ! if not, return error
nop
ldn [%o4 + CP_XCOPYOUT], %g2 ! if handler, invoke it with
jmp %g2 ! original arguments
restore %g0, 0, %g0 ! dispose of copy window
2:
ret
restore ERRNO, 0, %o0 ! return errno value
.sm_xcopyout_err:
membar #Sync
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
mov SM_SAVE_SRC, %o0
mov SM_SAVE_DST, %o1
mov SM_SAVE_COUNT, %o2
ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
tst %o3
bz,pt %ncc, 3f ! if not, return error
nop
ldn [%o3 + CP_XCOPYOUT], %o5 ! if handler, invoke it with
jmp %o5 ! original arguments
nop
3:
retl
or %g1, 0, %o0 ! return errno value
SET_SIZE(xcopyout)
ENTRY(xcopyout_little)
sethi %hi(.xcopyio_err), %o5
or %o5, %lo(.xcopyio_err), %o5
ldn [THREAD_REG + T_LOFAULT], %o4
membar #Sync ! sync error barrier
stn %o5, [THREAD_REG + T_LOFAULT]
mov %o4, %o5
subcc %g0, %o2, %o3
add %o0, %o2, %o0
bz,pn %ncc, 2f ! check for zero bytes
sub %o2, 1, %o4
add %o0, %o4, %o0 ! start w/last byte
add %o1, %o2, %o1
ldub [%o0 + %o3], %o4
1: stba %o4, [%o1 + %o3]ASI_AIUSL
inccc %o3
sub %o0, 2, %o0 ! get next byte
bcc,a,pt %ncc, 1b
ldub [%o0 + %o3], %o4
2:
membar #Sync ! sync error barrier
stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return (0)
SET_SIZE(xcopyout_little)
ENTRY(copyin)
cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
bleu,pt %ncc, .copyin_small ! go to larger cases
xor %o0, %o1, %o3 ! are src, dst alignable?
btst 7, %o3 !
bz,pt %ncc, .copyin_8 ! check for longword alignment
nop
btst 1, %o3 !
bz,pt %ncc, .copyin_2 ! check for half-word
nop
sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_1)], %o3
tst %o3
bz,pn %icc, .copyin_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyin_small ! go to small copy
nop
ba,pt %ncc, .copyin_more ! otherwise go to large copy
nop
.copyin_2:
btst 3, %o3 !
bz,pt %ncc, .copyin_4 ! check for word alignment
nop
sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_2)], %o3
tst %o3
bz,pn %icc, .copyin_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyin_small ! go to small copy
nop
ba,pt %ncc, .copyin_more ! otherwise go to large copy
nop
.copyin_4:
! already checked longword, must be word aligned
sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_4)], %o3
tst %o3
bz,pn %icc, .copyin_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyin_small ! go to small copy
nop
ba,pt %ncc, .copyin_more ! otherwise go to large copy
nop
.copyin_8:
sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_8)], %o3
tst %o3
bz,pn %icc, .copyin_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyin_small ! go to small copy
nop
ba,pt %ncc, .copyin_more ! otherwise go to large copy
nop
.align 16
nop ! instruction alignment
! see discussion at start of file
.copyin_small:
sethi %hi(.sm_copyin_err), %o5 ! .sm_copyin_err is lofault
or %o5, %lo(.sm_copyin_err), %o5
ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofault, no tramp
membar #Sync ! sync error barrier
stn %o5, [THREAD_REG + T_LOFAULT]
.sm_do_copyin:
mov %o0, SM_SAVE_SRC
mov %o1, SM_SAVE_DST
cmp %o2, SHORTCOPY ! check for really short case
bleu,pt %ncc, .ci_sm_left !
mov %o2, SM_SAVE_COUNT
cmp %o2, CHKSIZE ! check for medium length cases
bgu,pn %ncc, .ci_med !
or %o0, %o1, %o3 ! prepare alignment check
andcc %o3, 0x3, %g0 ! test for alignment
bz,pt %ncc, .ci_sm_word ! branch to word aligned case
.ci_sm_movebytes:
sub %o2, 3, %o2 ! adjust count to allow cc zero test
.ci_sm_notalign4:
lduba [%o0]ASI_USER, %o3 ! read byte
subcc %o2, 4, %o2 ! reduce count by 4
stb %o3, [%o1] ! write byte
add %o0, 1, %o0 ! advance SRC by 1
lduba [%o0]ASI_USER, %o3 ! repeat for a total of 4 bytes
add %o0, 1, %o0 ! advance SRC by 1
stb %o3, [%o1 + 1]
add %o1, 4, %o1 ! advance DST by 4
lduba [%o0]ASI_USER, %o3
add %o0, 1, %o0 ! advance SRC by 1
stb %o3, [%o1 - 2]
lduba [%o0]ASI_USER, %o3
add %o0, 1, %o0 ! advance SRC by 1
bgt,pt %ncc, .ci_sm_notalign4 ! loop til 3 or fewer bytes remain
stb %o3, [%o1 - 1]
add %o2, 3, %o2 ! restore count
.ci_sm_left:
tst %o2
bz,pt %ncc, .ci_sm_exit
nop
lduba [%o0]ASI_USER, %o3 ! load one byte
deccc %o2 ! reduce count for cc test
bz,pt %ncc, .ci_sm_exit
stb %o3,[%o1] ! store one byte
inc %o0
lduba [%o0]ASI_USER, %o3 ! load second byte
deccc %o2
bz,pt %ncc, .ci_sm_exit
stb %o3,[%o1 + 1] ! store second byte
inc %o0
lduba [%o0]ASI_USER, %o3 ! load third byte
stb %o3,[%o1 + 2] ! store third byte
membar #Sync ! sync error barrier
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return 0
.align 16
.ci_sm_words:
lduwa [%o0]ASI_USER, %o3 ! read word
.ci_sm_wordx:
subcc %o2, 8, %o2 ! update count
stw %o3, [%o1] ! write word
add %o0, 4, %o0 ! update SRC
add %o1, 8, %o1 ! update DST
lduwa [%o0]ASI_USER, %o3 ! read word
add %o0, 4, %o0 ! update SRC
bgt,pt %ncc, .ci_sm_words ! loop til done
stw %o3, [%o1 - 4] ! write word
addcc %o2, 7, %o2 ! restore count
bz,pt %ncc, .ci_sm_exit
nop
deccc %o2
bz,pt %ncc, .ci_sm_byte
.ci_sm_half:
subcc %o2, 2, %o2 ! reduce count by 2
lduha [%o0]ASI_USER, %o3 ! read half word
add %o0, 2, %o0 ! advance SRC by 2
add %o1, 2, %o1 ! advance DST by 2
bgt,pt %ncc, .ci_sm_half ! loop til done
sth %o3, [%o1 - 2] ! write half word
addcc %o2, 1, %o2 ! restore count
bz,pt %ncc, .ci_sm_exit
nop
.ci_sm_byte:
lduba [%o0]ASI_USER, %o3
stb %o3, [%o1]
membar #Sync ! sync error barrier
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return 0
.align 16
.ci_sm_word:
subcc %o2, 4, %o2 ! update count
bgt,pt %ncc, .ci_sm_wordx
lduwa [%o0]ASI_USER, %o3 ! read word
addcc %o2, 3, %o2 ! restore count
bz,pt %ncc, .ci_sm_exit
stw %o3, [%o1] ! write word
deccc %o2 ! reduce count for cc test
add %o0, 4, %o0
lduba [%o0]ASI_USER, %o3 ! load one byte
bz,pt %ncc, .ci_sm_exit
stb %o3, [%o1 + 4] ! store one byte
inc %o0
lduba [%o0]ASI_USER, %o3 ! load second byte
deccc %o2
bz,pt %ncc, .ci_sm_exit
stb %o3, [%o1 + 5] ! store second byte
inc %o0
lduba [%o0]ASI_USER, %o3 ! load third byte
stb %o3, [%o1 + 6] ! store third byte
.ci_sm_exit:
membar #Sync ! sync error barrier
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return 0
.align 16
.ci_med:
xor %o0, %o1, %o3 ! setup alignment check
btst 1, %o3
bnz,pt %ncc, .ci_sm_movebytes ! unaligned
nop
btst 3, %o3
bnz,pt %ncc, .ci_med_half ! halfword aligned
nop
btst 7, %o3
bnz,pt %ncc, .ci_med_word ! word aligned
nop
.ci_med_long:
btst 3, %o0 ! check for
bz,pt %ncc, .ci_med_long1 ! word alignment
nop
.ci_med_long0:
lduba [%o0]ASI_USER, %o3 ! load one byte
inc %o0
stb %o3,[%o1] ! store byte
inc %o1
btst 3, %o0
bnz,pt %ncc, .ci_med_long0
dec %o2
.ci_med_long1: ! word aligned
btst 7, %o0 ! check for long word
bz,pt %ncc, .ci_med_long2
nop
lduwa [%o0]ASI_USER, %o3 ! load word
add %o0, 4, %o0 ! advance SRC by 4
stw %o3, [%o1] ! store word
add %o1, 4, %o1 ! advance DST by 4
sub %o2, 4, %o2 ! reduce count by 4
!
! Now long word aligned and have at least 32 bytes to move
!
.ci_med_long2:
sub %o2, 31, %o2 ! adjust count to allow cc zero test
.ci_med_lmove:
ldxa [%o0]ASI_USER, %o3 ! read long word
subcc %o2, 32, %o2 ! reduce count by 32
stx %o3, [%o1] ! write long word
add %o0, 8, %o0 ! advance SRC by 8
ldxa [%o0]ASI_USER, %o3 ! repeat for a total for 4 long words
add %o0, 8, %o0 ! advance SRC by 8
stx %o3, [%o1 + 8]
add %o1, 32, %o1 ! advance DST by 32
ldxa [%o0]ASI_USER, %o3
add %o0, 8, %o0 ! advance SRC by 8
stx %o3, [%o1 - 16]
ldxa [%o0]ASI_USER, %o3
add %o0, 8, %o0 ! advance SRC by 8
bgt,pt %ncc, .ci_med_lmove ! loop til 31 or fewer bytes left
stx %o3, [%o1 - 8]
addcc %o2, 24, %o2 ! restore count to long word offset
ble,pt %ncc, .ci_med_lextra ! check for more long words to move
nop
.ci_med_lword:
ldxa [%o0]ASI_USER, %o3 ! read long word
subcc %o2, 8, %o2 ! reduce count by 8
stx %o3, [%o1] ! write long word
add %o0, 8, %o0 ! advance SRC by 8
bgt,pt %ncc, .ci_med_lword ! loop til 7 or fewer bytes left
add %o1, 8, %o1 ! advance DST by 8
.ci_med_lextra:
addcc %o2, 7, %o2 ! restore rest of count
bz,pt %ncc, .ci_sm_exit ! if zero, then done
deccc %o2
bz,pt %ncc, .ci_sm_byte
nop
ba,pt %ncc, .ci_sm_half
nop
.align 16
nop ! instruction alignment
! see discussion at start of file
.ci_med_word:
btst 3, %o0 ! check for
bz,pt %ncc, .ci_med_word1 ! word alignment
nop
.ci_med_word0:
lduba [%o0]ASI_USER, %o3 ! load one byte
inc %o0
stb %o3,[%o1] ! store byte
inc %o1
btst 3, %o0
bnz,pt %ncc, .ci_med_word0
dec %o2
!
! Now word aligned and have at least 36 bytes to move
!
.ci_med_word1:
sub %o2, 15, %o2 ! adjust count to allow cc zero test
.ci_med_wmove:
lduwa [%o0]ASI_USER, %o3 ! read word
subcc %o2, 16, %o2 ! reduce count by 16
stw %o3, [%o1] ! write word
add %o0, 4, %o0 ! advance SRC by 4
lduwa [%o0]ASI_USER, %o3 ! repeat for a total for 4 words
add %o0, 4, %o0 ! advance SRC by 4
stw %o3, [%o1 + 4]
add %o1, 16, %o1 ! advance DST by 16
lduwa [%o0]ASI_USER, %o3
add %o0, 4, %o0 ! advance SRC by 4
stw %o3, [%o1 - 8]
lduwa [%o0]ASI_USER, %o3
add %o0, 4, %o0 ! advance SRC by 4
bgt,pt %ncc, .ci_med_wmove ! loop til 15 or fewer bytes left
stw %o3, [%o1 - 4]
addcc %o2, 12, %o2 ! restore count to word offset
ble,pt %ncc, .ci_med_wextra ! check for more words to move
nop
.ci_med_word2:
lduwa [%o0]ASI_USER, %o3 ! read word
subcc %o2, 4, %o2 ! reduce count by 4
stw %o3, [%o1] ! write word
add %o0, 4, %o0 ! advance SRC by 4
bgt,pt %ncc, .ci_med_word2 ! loop til 3 or fewer bytes left
add %o1, 4, %o1 ! advance DST by 4
.ci_med_wextra:
addcc %o2, 3, %o2 ! restore rest of count
bz,pt %ncc, .ci_sm_exit ! if zero, then done
deccc %o2
bz,pt %ncc, .ci_sm_byte
nop
ba,pt %ncc, .ci_sm_half
nop
.align 16
nop ! instruction alignment
! see discussion at start of file
.ci_med_half:
btst 1, %o0 ! check for
bz,pt %ncc, .ci_med_half1 ! half word alignment
nop
lduba [%o0]ASI_USER, %o3 ! load one byte
inc %o0
stb %o3,[%o1] ! store byte
inc %o1
dec %o2
!
! Now half word aligned and have at least 38 bytes to move
!
.ci_med_half1:
sub %o2, 7, %o2 ! adjust count to allow cc zero test
.ci_med_hmove:
lduha [%o0]ASI_USER, %o3 ! read half word
subcc %o2, 8, %o2 ! reduce count by 8
sth %o3, [%o1] ! write half word
add %o0, 2, %o0 ! advance SRC by 2
lduha [%o0]ASI_USER, %o3 ! repeat for a total for 4 halfwords
add %o0, 2, %o0 ! advance SRC by 2
sth %o3, [%o1 + 2]
add %o1, 8, %o1 ! advance DST by 8
lduha [%o0]ASI_USER, %o3
add %o0, 2, %o0 ! advance SRC by 2
sth %o3, [%o1 - 4]
lduha [%o0]ASI_USER, %o3
add %o0, 2, %o0 ! advance SRC by 2
bgt,pt %ncc, .ci_med_hmove ! loop til 7 or fewer bytes left
sth %o3, [%o1 - 2]
addcc %o2, 7, %o2 ! restore count
bz,pt %ncc, .ci_sm_exit
deccc %o2
bz,pt %ncc, .ci_sm_byte
nop
ba,pt %ncc, .ci_sm_half
nop
.sm_copyin_err:
membar #Sync
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
mov SM_SAVE_SRC, %o0
mov SM_SAVE_DST, %o1
mov SM_SAVE_COUNT, %o2
ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
tst %o3
bz,pt %ncc, 3f ! if not, return error
nop
ldn [%o3 + CP_COPYIN], %o5 ! if handler, invoke it with
jmp %o5 ! original arguments
nop
3:
retl
or %g0, -1, %o0 ! return errno value
SET_SIZE(copyin)
ENTRY(copyin_more)
.copyin_more:
save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
set .copyin_err, REAL_LOFAULT
.do_copyin:
set copyio_fault, %l7 ! .copyio_fault is lofault val
ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler
membar #Sync ! sync error barrier
stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault
mov %i0, SAVE_SRC
mov %i1, SAVE_DST
mov %i2, SAVE_COUNT
FP_NOMIGRATE(6, 7)
rd %fprs, %o2 ! check for unused fp
st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
btst FPRS_FEF, %o2
bz,a,pt %icc, .do_blockcopyin
wr %g0, FPRS_FEF, %fprs
BST_FPQ2Q4_TOSTACK(%o2)
.do_blockcopyin:
rd %gsr, %o2
stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
or %l6, FPUSED_FLAG, %l6
andcc DST, VIS_BLOCKSIZE - 1, TMP
mov ASI_USER, %asi
bz,pt %ncc, 2f
neg TMP
add TMP, VIS_BLOCKSIZE, TMP
! TMP = bytes required to align DST on FP_BLOCK boundary
! Using SRC as a tmp here
cmp TMP, 3
bleu,pt %ncc, 1f
sub CNT,TMP,CNT ! adjust main count
sub TMP, 3, TMP ! adjust for end of loop test
.ci_blkalign:
lduba [REALSRC]%asi, SRC ! move 4 bytes per loop iteration
stb SRC, [DST]
subcc TMP, 4, TMP
lduba [REALSRC + 1]%asi, SRC
add REALSRC, 4, REALSRC
stb SRC, [DST + 1]
lduba [REALSRC - 2]%asi, SRC
add DST, 4, DST
stb SRC, [DST - 2]
lduba [REALSRC - 1]%asi, SRC
bgu,pt %ncc, .ci_blkalign
stb SRC, [DST - 1]
addcc TMP, 3, TMP ! restore count adjustment
bz,pt %ncc, 2f ! no bytes left?
nop
1: lduba [REALSRC]%asi, SRC
inc REALSRC
inc DST
deccc TMP
bgu %ncc, 1b
stb SRC, [DST - 1]
2:
andn REALSRC, 0x7, SRC
alignaddr REALSRC, %g0, %g0
! SRC - 8-byte aligned
! DST - 64-byte aligned
prefetcha [SRC]%asi, #one_read
prefetcha [SRC + (1 * VIS_BLOCKSIZE)]%asi, #one_read
prefetcha [SRC + (2 * VIS_BLOCKSIZE)]%asi, #one_read
prefetcha [SRC + (3 * VIS_BLOCKSIZE)]%asi, #one_read
ldda [SRC]%asi, %f16
#if CHEETAH_PREFETCH > 4
prefetcha [SRC + (4 * VIS_BLOCKSIZE)]%asi, #one_read
#endif
ldda [SRC + 0x08]%asi, %f18
#if CHEETAH_PREFETCH > 5
prefetcha [SRC + (5 * VIS_BLOCKSIZE)]%asi, #one_read
#endif
ldda [SRC + 0x10]%asi, %f20
#if CHEETAH_PREFETCH > 6
prefetcha [SRC + (6 * VIS_BLOCKSIZE)]%asi, #one_read
#endif
faligndata %f16, %f18, %f48
ldda [SRC + 0x18]%asi, %f22
#if CHEETAH_PREFETCH > 7
prefetcha [SRC + (7 * VIS_BLOCKSIZE)]%asi, #one_read
#endif
faligndata %f18, %f20, %f50
ldda [SRC + 0x20]%asi, %f24
faligndata %f20, %f22, %f52
ldda [SRC + 0x28]%asi, %f26
faligndata %f22, %f24, %f54
ldda [SRC + 0x30]%asi, %f28
faligndata %f24, %f26, %f56
ldda [SRC + 0x38]%asi, %f30
faligndata %f26, %f28, %f58
ldda [SRC + VIS_BLOCKSIZE]%asi, %f16
sub CNT, VIS_BLOCKSIZE, CNT
add SRC, VIS_BLOCKSIZE, SRC
add REALSRC, VIS_BLOCKSIZE, REALSRC
ba,a,pt %ncc, 1f
nop
.align 16
1:
ldda [SRC + 0x08]%asi, %f18
faligndata %f28, %f30, %f60
ldda [SRC + 0x10]%asi, %f20
faligndata %f30, %f16, %f62
stda %f48, [DST]ASI_BLK_P
ldda [SRC + 0x18]%asi, %f22
faligndata %f16, %f18, %f48
ldda [SRC + 0x20]%asi, %f24
faligndata %f18, %f20, %f50
ldda [SRC + 0x28]%asi, %f26
faligndata %f20, %f22, %f52
ldda [SRC + 0x30]%asi, %f28
faligndata %f22, %f24, %f54
ldda [SRC + 0x38]%asi, %f30
faligndata %f24, %f26, %f56
sub CNT, VIS_BLOCKSIZE, CNT
ldda [SRC + VIS_BLOCKSIZE]%asi, %f16
faligndata %f26, %f28, %f58
prefetcha [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8]%asi, #one_read
add DST, VIS_BLOCKSIZE, DST
prefetcha [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)]%asi, #one_read
add REALSRC, VIS_BLOCKSIZE, REALSRC
cmp CNT, VIS_BLOCKSIZE + 8
bgu,pt %ncc, 1b
add SRC, VIS_BLOCKSIZE, SRC
! only if REALSRC & 0x7 is 0
cmp CNT, VIS_BLOCKSIZE
bne %ncc, 3f
andcc REALSRC, 0x7, %g0
bz,pt %ncc, 2f
nop
3:
faligndata %f28, %f30, %f60
faligndata %f30, %f16, %f62
stda %f48, [DST]ASI_BLK_P
add DST, VIS_BLOCKSIZE, DST
ba,pt %ncc, 3f
nop
2:
ldda [SRC + 0x08]%asi, %f18
fsrc1 %f28, %f60
ldda [SRC + 0x10]%asi, %f20
fsrc1 %f30, %f62
stda %f48, [DST]ASI_BLK_P
ldda [SRC + 0x18]%asi, %f22
fsrc1 %f16, %f48
ldda [SRC + 0x20]%asi, %f24
fsrc1 %f18, %f50
ldda [SRC + 0x28]%asi, %f26
fsrc1 %f20, %f52
ldda [SRC + 0x30]%asi, %f28
fsrc1 %f22, %f54
ldda [SRC + 0x38]%asi, %f30
fsrc1 %f24, %f56
sub CNT, VIS_BLOCKSIZE, CNT
add DST, VIS_BLOCKSIZE, DST
add SRC, VIS_BLOCKSIZE, SRC
add REALSRC, VIS_BLOCKSIZE, REALSRC
fsrc1 %f26, %f58
fsrc1 %f28, %f60
fsrc1 %f30, %f62
stda %f48, [DST]ASI_BLK_P
add DST, VIS_BLOCKSIZE, DST
ba,a,pt %ncc, 4f
nop
3: tst CNT
bz,a %ncc, 4f
nop
5: lduba [REALSRC]ASI_USER, TMP
inc REALSRC
inc DST
deccc CNT
bgu %ncc, 5b
stb TMP, [DST - 1]
4:
.copyin_exit:
membar #Sync
FPRAS_INTERVAL(FPRAS_COPYIN, 1, %l5, %o2, %o3, %o4, %o5, 8)
FPRAS_REWRITE_TYPE1(1, %l5, %f48, %o2, 9)
FPRAS_CHECK(FPRAS_COPYIN, %l5, 9) ! lose outputs
ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
wr %o2, 0, %gsr
ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
btst FPRS_FEF, %o3
bz,pt %icc, 4f
nop
BLD_FPQ2Q4_FROMSTACK(%o2)
ba,pt %ncc, 1f
wr %o3, 0, %fprs ! restore fprs
4:
FZEROQ2Q4
wr %o3, 0, %fprs ! restore fprs
1:
membar #Sync ! sync error barrier
andn %l6, FPUSED_FLAG, %l6
stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
FP_ALLOWMIGRATE(5, 6)
ret
restore %g0, 0, %o0
.copyin_err:
ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
tst %o4
bz,pt %ncc, 2f ! if not, return error
nop
ldn [%o4 + CP_COPYIN], %g2 ! if handler, invoke it with
jmp %g2 ! original arguments
restore %g0, 0, %g0 ! dispose of copy window
2:
ret
restore %g0, -1, %o0 ! return error value
SET_SIZE(copyin_more)
ENTRY(xcopyin)
cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
bleu,pt %ncc, .xcopyin_small ! go to larger cases
xor %o0, %o1, %o3 ! are src, dst alignable?
btst 7, %o3 !
bz,pt %ncc, .xcopyin_8 ! check for longword alignment
nop
btst 1, %o3 !
bz,pt %ncc, .xcopyin_2 ! check for half-word
nop
sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_1)], %o3
tst %o3
bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .xcopyin_small ! go to small copy
nop
ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
nop
.xcopyin_2:
btst 3, %o3 !
bz,pt %ncc, .xcopyin_4 ! check for word alignment
nop
sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_2)], %o3
tst %o3
bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .xcopyin_small ! go to small copy
nop
ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
nop
.xcopyin_4:
! already checked longword, must be word aligned
sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_4)], %o3
tst %o3
bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .xcopyin_small ! go to small copy
nop
ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
nop
.xcopyin_8:
sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_8)], %o3
tst %o3
bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .xcopyin_small ! go to small copy
nop
ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
nop
.xcopyin_small:
sethi %hi(.sm_xcopyin_err), %o5 ! .sm_xcopyin_err is lofault value
or %o5, %lo(.sm_xcopyin_err), %o5
ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofaul
membar #Sync ! sync error barrier
ba,pt %ncc, .sm_do_copyin ! common code
stn %o5, [THREAD_REG + T_LOFAULT]
.xcopyin_more:
save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
sethi %hi(.xcopyin_err), REAL_LOFAULT ! .xcopyin_err is lofault value
ba,pt %ncc, .do_copyin
or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
.xcopyin_err:
ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
tst %o4
bz,pt %ncc, 2f ! if not, return error
nop
ldn [%o4 + CP_XCOPYIN], %g2 ! if handler, invoke it with
jmp %g2 ! original arguments
restore %g0, 0, %g0 ! dispose of copy window
2:
ret
restore ERRNO, 0, %o0 ! return errno value
.sm_xcopyin_err:
membar #Sync
stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
mov SM_SAVE_SRC, %o0
mov SM_SAVE_DST, %o1
mov SM_SAVE_COUNT, %o2
ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
tst %o3
bz,pt %ncc, 3f ! if not, return error
nop
ldn [%o3 + CP_XCOPYIN], %o5 ! if handler, invoke it with
jmp %o5 ! original arguments
nop
3:
retl
or %g1, 0, %o0 ! return errno value
SET_SIZE(xcopyin)
ENTRY(xcopyin_little)
sethi %hi(.xcopyio_err), %o5
or %o5, %lo(.xcopyio_err), %o5
ldn [THREAD_REG + T_LOFAULT], %o4
membar #Sync ! sync error barrier
stn %o5, [THREAD_REG + T_LOFAULT]
mov %o4, %o5
subcc %g0, %o2, %o3
add %o0, %o2, %o0
bz,pn %ncc, 2f ! check for zero bytes
sub %o2, 1, %o4
add %o0, %o4, %o0 ! start w/last byte
add %o1, %o2, %o1
lduba [%o0 + %o3]ASI_AIUSL, %o4
1: stb %o4, [%o1 + %o3]
inccc %o3
sub %o0, 2, %o0 ! get next byte
bcc,a,pt %ncc, 1b
lduba [%o0 + %o3]ASI_AIUSL, %o4
2:
membar #Sync ! sync error barrier
stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g0, %o0 ! return (0)
.xcopyio_err:
membar #Sync ! sync error barrier
stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
retl
mov %g1, %o0
SET_SIZE(xcopyin_little)
ENTRY(copyin_noerr)
cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
bleu,pt %ncc, .copyin_ne_small ! go to larger cases
xor %o0, %o1, %o3 ! are src, dst alignable?
btst 7, %o3 !
bz,pt %ncc, .copyin_ne_8 ! check for longword alignment
nop
btst 1, %o3 !
bz,pt %ncc, .copyin_ne_2 ! check for half-word
nop
sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_1)], %o3
tst %o3
bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyin_ne_small ! go to small copy
nop
ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
nop
.copyin_ne_2:
btst 3, %o3 !
bz,pt %ncc, .copyin_ne_4 ! check for word alignment
nop
sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_2)], %o3
tst %o3
bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyin_ne_small ! go to small copy
nop
ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
nop
.copyin_ne_4:
! already checked longword, must be word aligned
sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_4)], %o3
tst %o3
bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyin_ne_small ! go to small copy
nop
ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
nop
.copyin_ne_8:
sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_8)], %o3
tst %o3
bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyin_ne_small ! go to small copy
nop
ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
nop
.copyin_ne_small:
ldn [THREAD_REG + T_LOFAULT], %o4
tst %o4
bz,pn %ncc, .sm_do_copyin
nop
sethi %hi(.sm_copyio_noerr), %o5
or %o5, %lo(.sm_copyio_noerr), %o5
membar #Sync ! sync error barrier
ba,pt %ncc, .sm_do_copyin
stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault
.copyin_noerr_more:
save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
sethi %hi(.copyio_noerr), REAL_LOFAULT
ba,pt %ncc, .do_copyin
or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
.copyio_noerr:
jmp %l6
restore %g0,0,%g0
.sm_copyio_noerr:
membar #Sync
stn %o4, [THREAD_REG + T_LOFAULT] ! restore t_lofault
jmp %o4
nop
SET_SIZE(copyin_noerr)
ENTRY(copyout_noerr)
cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
bleu,pt %ncc, .copyout_ne_small ! go to larger cases
xor %o0, %o1, %o3 ! are src, dst alignable?
btst 7, %o3 !
bz,pt %ncc, .copyout_ne_8 ! check for longword alignment
nop
btst 1, %o3 !
bz,pt %ncc, .copyout_ne_2 ! check for half-word
nop
sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_1)], %o3
tst %o3
bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyout_ne_small ! go to small copy
nop
ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
nop
.copyout_ne_2:
btst 3, %o3 !
bz,pt %ncc, .copyout_ne_4 ! check for word alignment
nop
sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_2)], %o3
tst %o3
bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyout_ne_small ! go to small copy
nop
ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
nop
.copyout_ne_4:
! already checked longword, must be word aligned
sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_4)], %o3
tst %o3
bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyout_ne_small ! go to small copy
nop
ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
nop
.copyout_ne_8:
sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
ld [%o3 + %lo(hw_copy_limit_8)], %o3
tst %o3
bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
cmp %o2, %o3 ! if length <= limit
bleu,pt %ncc, .copyout_ne_small ! go to small copy
nop
ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
nop
.copyout_ne_small:
ldn [THREAD_REG + T_LOFAULT], %o4
tst %o4
bz,pn %ncc, .sm_do_copyout
nop
sethi %hi(.sm_copyio_noerr), %o5
or %o5, %lo(.sm_copyio_noerr), %o5
membar #Sync ! sync error barrier
ba,pt %ncc, .sm_do_copyout
stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault
.copyout_noerr_more:
save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
sethi %hi(.copyio_noerr), REAL_LOFAULT
ba,pt %ncc, .do_copyout
or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
SET_SIZE(copyout_noerr)
! %i0 - start address
! %i1 - length of region (multiple of 64)
! %l0 - saved fprs
! %l1 - pointer to saved %d0 block
! %l2 - saved curthread->t_lwp
ENTRY(hwblkclr)
! get another window w/space for one aligned block of saved fpregs
save %sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp
! Must be block-aligned
andcc %i0, (VIS_BLOCKSIZE-1), %g0
bnz,pn %ncc, 1f
nop
! ... and must be 256 bytes or more
cmp %i1, 256
blu,pn %ncc, 1f
nop
! ... and length must be a multiple of VIS_BLOCKSIZE
andcc %i1, (VIS_BLOCKSIZE-1), %g0
bz,pn %ncc, 2f
nop
1: ! punt, call bzero but notify the caller that bzero was used
mov %i0, %o0
call bzero
mov %i1, %o1
ret
restore %g0, 1, %o0 ! return (1) - did not use block operations
2: rd %fprs, %l0 ! check for unused fp
btst FPRS_FEF, %l0
bz,pt %icc, 1f
nop
! save in-use fpregs on stack
membar #Sync
add %fp, STACK_BIAS - 65, %l1
and %l1, -VIS_BLOCKSIZE, %l1
stda %d0, [%l1]ASI_BLK_P
1: membar #StoreStore|#StoreLoad|#LoadStore
wr %g0, FPRS_FEF, %fprs
wr %g0, ASI_BLK_P, %asi
! Clear block
fzero %d0
fzero %d2
fzero %d4
fzero %d6
fzero %d8
fzero %d10
fzero %d12
fzero %d14
mov 256, %i3
ba,pt %ncc, .pz_doblock
nop
.pz_blkstart:
! stda %d0, [%i0 + 192]%asi ! in dly slot of branch that got us here
stda %d0, [%i0 + 128]%asi
stda %d0, [%i0 + 64]%asi
stda %d0, [%i0]%asi
.pz_zinst:
add %i0, %i3, %i0
sub %i1, %i3, %i1
.pz_doblock:
cmp %i1, 256
bgeu,a %ncc, .pz_blkstart
stda %d0, [%i0 + 192]%asi
cmp %i1, 64
blu %ncc, .pz_finish
andn %i1, (64-1), %i3
srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words
set .pz_zinst, %i4
sub %i4, %i2, %i4
jmp %i4
nop
.pz_finish:
membar #Sync
btst FPRS_FEF, %l0
bz,a .pz_finished
wr %l0, 0, %fprs ! restore fprs
! restore fpregs from stack
ldda [%l1]ASI_BLK_P, %d0
membar #Sync
wr %l0, 0, %fprs ! restore fprs
.pz_finished:
ret
restore %g0, 0, %o0 ! return (bzero or not)
SET_SIZE(hwblkclr)
ENTRY_NP(hw_pa_bcopy32)
rdpr %pstate, %g1
andn %g1, PSTATE_IE, %g2
wrpr %g0, %g2, %pstate
rdpr %pstate, %g0
ldxa [%o0]ASI_MEM, %o2
add %o0, 8, %o0
ldxa [%o0]ASI_MEM, %o3
add %o0, 8, %o0
ldxa [%o0]ASI_MEM, %o4
add %o0, 8, %o0
ldxa [%o0]ASI_MEM, %o5
stxa %g0, [%o1]ASI_DC_INVAL
membar #Sync
stxa %o2, [%o1]ASI_MEM
add %o1, 8, %o1
stxa %o3, [%o1]ASI_MEM
add %o1, 8, %o1
stxa %o4, [%o1]ASI_MEM
add %o1, 8, %o1
stxa %o5, [%o1]ASI_MEM
retl
wrpr %g0, %g1, %pstate
SET_SIZE(hw_pa_bcopy32)
DGDEF(use_hw_bcopy)
.word 1
DGDEF(use_hw_bzero)
.word 1
DGDEF(hw_copy_limit_1)
.word 0
DGDEF(hw_copy_limit_2)
.word 0
DGDEF(hw_copy_limit_4)
.word 0
DGDEF(hw_copy_limit_8)
.word 0
.align 64
.section ".text"