#include <machine/asm.h>
#include <machine/psl.h>
#define VD_ROTATE 14, 24, 28
#define VA_ROTATE 19, 24, 28
#define VB_ROTATE 24, 24, 28
#define VC_ROTATE 29, 24, 28
#define UI_ROTATE 7, 4, 8
#define FLOAT_1_IS 0x3f80
.rodata
.balign 4
.Lzero: .float 0
.Lone: .float 1
.Ln126: .float 126
.Ltwo63: .float 0x1p63
.Ltwo126: .float 0x1p126
.Lmin: .float 0x1p-126
.text
#define s_size 128
#define s_f31 120
#define s_f30 112
#define s_f29 104
#define s_f28 96
#define s_f27 88
#define s_f26 80
#define s_f25 72
#define s_f24 64
#define s_vc 48
#define s_vb 32
#define s_va 16
ENTRY(vecast_asm)
mflr %r0
RETGUARD_SETUP_LATE(vecast_asm, %r9, %r0)
stwu %r1, -s_size(%r1)
mfmsr %r5
rlwinm %r6, %r5, 0, 17, 15
oris %r6, %r6, PSL_VEC >> 16
ori %r6, %r6, PSL_FP
mtmsr %r6
isync
stfd %f31, s_f31(%r1)
stfd %f30, s_f30(%r1)
stfd %f29, s_f29(%r1)
stfd %f28, s_f28(%r1)
stfd %f27, s_f27(%r1)
stfd %f26, s_f26(%r1)
stfd %f25, s_f25(%r1)
stfd %f24, s_f24(%r1)
mffs %f31
lis %r6, .Lzero@ha
la %r6, .Lzero@l(%r6)
lfs %f30, 0(%r6)
mtfsf 255, %f30
rlwinm %r7, %r3, VB_ROTATE
la %r8, s_vb(%r1)
bl vecast_store_vector
mtctr %r4
li %r4, 4
bctr
.globl vecast_vaddfp
vecast_vaddfp:
rlwinm %r7, %r3, VA_ROTATE
la %r8, s_va(%r1)
bl vecast_store_vector
mtctr %r4
la %r7, (s_va - 4)(%r1)
1: lfsu %f30, 4(%r7)
lfs %f29, (s_vb - s_va)(%r7)
fadds %f30, %f30, %f29
stfs %f30, 0(%r7)
bdnz 1b
b vecast_finish
.globl vecast_vsubfp
vecast_vsubfp:
rlwinm %r7, %r3, VA_ROTATE
la %r8, s_va(%r1)
bl vecast_store_vector
mtctr %r4
la %r7, (s_va - 4)(%r1)
1: lfsu %f30, 4(%r7)
lfs %f29, (s_vb - s_va)(%r7)
fsubs %f30, %f30, %f29
stfs %f30, 0(%r7)
bdnz 1b
b vecast_finish
.globl vecast_vmaddfp
vecast_vmaddfp:
rlwinm %r7, %r3, VA_ROTATE
la %r8, s_va(%r1)
bl vecast_store_vector
rlwinm %r7, %r3, VC_ROTATE
la %r8, s_vc(%r1)
bl vecast_store_vector
mtctr %r4
la %r7, (s_va - 4)(%r1)
1: lfsu %f30, 4(%r7)
lfs %f29, (s_vb - s_va)(%r7)
lfs %f28, (s_vc - s_va)(%r7)
fmadds %f30, %f30, %f28, %f29
stfs %f30, 0(%r7)
bdnz 1b
b vecast_finish
.globl vecast_vnmsubfp
vecast_vnmsubfp:
rlwinm %r7, %r3, VA_ROTATE
la %r8, s_va(%r1)
bl vecast_store_vector
rlwinm %r7, %r3, VC_ROTATE
la %r8, s_vc(%r1)
bl vecast_store_vector
mtctr %r4
la %r7, (s_va - 4)(%r1)
1: lfsu %f30, 4(%r7)
lfs %f29, (s_vb - s_va)(%r7)
lfs %f28, (s_vc - s_va)(%r7)
fnmsubs %f30, %f30, %f28, %f29
stfs %f30, 0(%r7)
bdnz 1b
b vecast_finish
.globl vecast_vrefp
vecast_vrefp:
mtctr %r4
la %r7, (s_vb - 4)(%r1)
1: lfsu %f30, 4(%r7)
fres %f30, %f30
stfs %f30, (s_va - s_vb)(%r7)
bdnz 1b
b vecast_finish
.globl vecast_vrsqrtefp
vecast_vrsqrtefp:
lfs %f30, (.Lone - .Lzero)(%r6)
lfs %f29, (.Ltwo63 - .Lzero)(%r6)
lfs %f28, (.Ltwo126 - .Lzero)(%r6)
lfs %f27, (.Lmin - .Lzero)(%r6)
mtctr %r4
la %r7, (s_vb - 4)(%r1)
1: lfsu %f26, 4(%r7)
fmuls %f25, %f26, %f28
fsubs %f24, %f26, %f27
fsel %f26, %f24, %f26, %f25
stfs %f26, 0(%r7)
fsel %f25, %f24, %f30, %f29
stfs %f25, (s_va - s_vb)(%r7)
bdnz 1b
la %r7, s_vc(%r1)
la %r8, s_vb(%r1)
stvx %v31, 0, %r7
lvx %v31, 0, %r8
vrsqrtefp %v31, %v31
stvx %v31, 0, %r8
lvx %v31, 0, %r7
mtctr %r4
la %r7, (s_va - 4)(%r1)
1: lfsu %f30, 4(%r7)
lfs %f29, (s_vb - s_va)(%r7)
fmuls %f30, %f29, %f30
stfs %f30, 0(%r7)
bdnz 1b
b vecast_finish
.globl vecast_vlogefp
vecast_vlogefp:
lfs %f29, (.Ln126 - .Lzero)(%r6)
lfs %f28, (.Ltwo126 - .Lzero)(%r6)
lfs %f27, (.Lmin - .Lzero)(%r6)
mtctr %r4
la %r7, (s_vb - 4)(%r1)
1: lfsu %f26, 4(%r7)
fmuls %f25, %f26, %f28
fsubs %f24, %f26, %f27
fsel %f26, %f24, %f26, %f25
stfs %f26, 0(%r7)
fsel %f25, %f24, %f30, %f29
stfs %f25, (s_va - s_vb)(%r7)
bdnz 1b
la %r7, s_vc(%r1)
la %r8, s_vb(%r1)
stvx %v31, 0, %r7
lvx %v31, 0, %r8
vlogefp %v31, %v31
stvx %v31, 0, %r8
lvx %v31, 0, %r7
mtctr %r4
la %r7, (s_va - 4)(%r1)
1: lfsu %f30, 4(%r7)
lfs %f29, (s_vb - s_va)(%r7)
fsubs %f30, %f29, %f30
stfs %f30, 0(%r7)
bdnz 1b
b vecast_finish
.globl vecast_vexptefp
vecast_vexptefp:
lfs %f30, (.Lone - .Lzero)(%r6)
lfs %f29, (.Ln126 - .Lzero)(%r6)
lfs %f28, (.Lmin - .Lzero)(%r6)
mtctr %r4
la %r7, (s_vb - 4)(%r1)
1: lfsu %f27, 4(%r7)
fadds %f26, %f27, %f29
fadds %f25, %f26, %f29
fsel %f24, %f26, %f27, %f26
fsel %f24, %f25, %f24, %f27
stfs %f24, 0(%r7)
fsel %f27, %f26, %f30, %f28
stfs %f27, (s_va - s_vb)(%r7)
bdnz 1b
la %r7, s_vc(%r1)
la %r8, s_vb(%r1)
stvx %v31, 0, %r7
lvx %v31, 0, %r8
vexptefp %v31, %v31
stvx %v31, 0, %r8
lvx %v31, 0, %r7
mtctr %r4
la %r7, (s_va - 4)(%r1)
1: lfsu %f30, 4(%r7)
lfs %f29, (s_vb - s_va)(%r7)
fmuls %f30, %f29, %f30
stfs %f30, 0(%r7)
bdnz 1b
b vecast_finish
.globl vecast_vctsxs
vecast_vctsxs:
lfs %f29, (.Lmin - .Lzero)(%r6)
rlwinm %r7, %r3, UI_ROTATE
addis %r7, %r7, FLOAT_1_IS
stw %r7, s_va(%r1)
lfs %f28, s_va(%r1)
mtctr %r4
la %r7, (s_vb - 4)(%r1)
1: lfsu %f27, 4(%r7)
fmuls %f26, %f27, %f28
fabs %f27, %f27
fsubs %f27, %f27, %f29
fsel %f26, %f27, %f26, %f30
stfs %f26, (s_va - s_vb)(%r7)
bdnz 1b
la %r7, s_vc(%r1)
la %r8, s_va(%r1)
stvx %v31, 0, %r7
lvx %v31, 0, %r8
vctsxs %v31, %v31, 0
stvx %v31, 0, %r8
lvx %v31, 0, %r7
b vecast_finish
.globl vecast_vctuxs
vecast_vctuxs:
lfs %f29, (.Lmin - .Lzero)(%r6)
rlwinm %r7, %r3, UI_ROTATE
addis %r7, %r7, FLOAT_1_IS
stw %r7, s_va(%r1)
lfs %f28, s_va(%r1)
mtctr %r4
la %r7, (s_vb - 4)(%r1)
1: lfsu %f27, 4(%r7)
fmuls %f26, %f27, %f28
fabs %f27, %f27
fsubs %f27, %f27, %f29
fsel %f26, %f27, %f26, %f30
stfs %f26, (s_va - s_vb)(%r7)
bdnz 1b
la %r7, s_vc(%r1)
la %r8, s_va(%r1)
stvx %v31, 0, %r7
lvx %v31, 0, %r8
vctuxs %v31, %v31, 0
stvx %v31, 0, %r8
lvx %v31, 0, %r7
vecast_finish:
rlwinm %r7, %r3, VD_ROTATE
addis %r7, %r7, 1f@ha
addi %r7, %r7, 1f@l
mtctr %r7
la %r8, s_va(%r1)
bctr
#define M(n) lvx %v##n, 0, %r8; b 2f
1: M( 0); M( 1); M( 2); M( 3); M( 4); M( 5); M( 6); M( 7)
M( 8); M( 9); M(10); M(11); M(12); M(13); M(14); M(15)
M(16); M(17); M(18); M(19); M(20); M(21); M(22); M(23)
M(24); M(25); M(26); M(27); M(28); M(29); M(30); M(31)
#undef M
2: mtlr %r0
mtfsf 255, %f31
lfd %f24, s_f24(%r1)
lfd %f25, s_f25(%r1)
lfd %f26, s_f26(%r1)
lfd %f27, s_f27(%r1)
lfd %f28, s_f28(%r1)
lfd %f29, s_f29(%r1)
lfd %f30, s_f30(%r1)
lfd %f31, s_f31(%r1)
mtmsr %r5
isync
addi %r1, %r1, s_size
RETGUARD_CHECK(vecast_asm, %r9, %r0)
blr
vecast_store_vector:
RETGUARD_SETUP(vecast_store_vector, %r11, %r12)
addis %r7, %r7, 1f@ha
addi %r7, %r7, 1f@l
mtctr %r7
bctr
#define M(n) stvx %v##n, 0, %r8; b 2f
1: M( 0); M( 1); M( 2); M( 3); M( 4); M( 5); M( 6); M( 7)
M( 8); M( 9); M(10); M(11); M(12); M(13); M(14); M(15)
M(16); M(17); M(18); M(19); M(20); M(21); M(22); M(23)
M(24); M(25); M(26); M(27); M(28); M(29); M(30); M(31)
#undef M
2: RETGUARD_CHECK(vecast_store_vector, %r11, %r12)
blr