#include <sys/asm_linkage.h>
#include <sys/trap.h>
#include <sys/stack.h>
#include <sys/privregs.h>
#include <sys/regset.h>
#include <sys/vis.h>
#include <sys/machthread.h>
#include <sys/machtrap.h>
#include <sys/machsig.h>
.section ".text",#alloc,#execinstr
.file "mont_mulf.s"
.section ".bss",#alloc,#write
Bbss.bss:
.section ".data",#alloc,#write
Ddata.data:
.section ".rodata",#alloc
!
! CONSTANT POOL
!
Drodata.rodata:
.global TwoTo16
.align 8
!
! CONSTANT POOL
!
.global TwoTo16
TwoTo16:
.word 1089470464
.word 0
.type TwoTo16,#object
.size TwoTo16,8
.global TwoToMinus16
!
! CONSTANT POOL
!
.global TwoToMinus16
TwoToMinus16:
.word 1055916032
.word 0
.type TwoToMinus16,#object
.size TwoToMinus16,8
.global Zero
!
! CONSTANT POOL
!
.global Zero
Zero:
.word 0
.word 0
.type Zero,#object
.size Zero,8
.global TwoTo32
!
! CONSTANT POOL
!
.global TwoTo32
TwoTo32:
.word 1106247680
.word 0
.type TwoTo32,#object
.size TwoTo32,8
.global TwoToMinus32
!
! CONSTANT POOL
!
.global TwoToMinus32
TwoToMinus32:
.word 1039138816
.word 0
.type TwoToMinus32,#object
.size TwoToMinus32,8
.section ".text",#alloc,#execinstr
.register %g3,#scratch
.register %g2,#scratch
.align 32
! FILE mont_mulf.c
! 1 !
! 6 !#pragma ident "@(#)mont_mulf.c 1.2 01/09/24 SMI"
! 9 !
! 15 !#include <sys/types.h>
! 16 !#include <math.h>
! 18 !static const double TwoTo16 = 65536.0;
! 19 !static const double TwoToMinus16 = 1.0/65536.0;
! 20 !static const double Zero = 0.0;
! 21 !static const double TwoTo32 = 65536.0 * 65536.0;
! 22 !static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
! 24 !#ifdef RF_INLINE_MACROS
! 26 !double upper32(double);
! 27 !double lower32(double, double);
! 28 !double mod(double, double, double);
! 30 !#else
! 32 !static double
! 33 !upper32(double x)
! 34 !{
! 35 ! return (floor(x * TwoToMinus32));
! 36 !}
! 39 !
! 40 !static double
! 41 !lower32(double x, double y)
! 42 !{
! 43 ! return (x - TwoTo32 * floor(x * TwoToMinus32));
! 44 !}
! 46 !static double
! 47 !mod(double x, double oneoverm, double m)
! 48 !{
! 49 ! return (x - m * floor(x * oneoverm));
! 50 !}
! 52 !#endif
! 55 !static void
! 56 !cleanup(double *dt, int from, int tlen)
! 57 !{
!
! SUBROUTINE cleanup
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
cleanup:
sra %o1,0,%o4
sra %o2,0,%o5
! 58 ! int i;
! 59 ! double tmp, tmp1, x, x1;
! 61 ! tmp = tmp1 = Zero;
sll %o5,1,%g5
! 63 ! for (i = 2 * from; i < 2 * tlen; i += 2) {
sll %o4,1,%g3
cmp %g3,%g5
bge,pn %icc,.L77000188
sethi %hi(Zero),%o3
.L77000197:
ldd [%o3+%lo(Zero)],%f8
sra %g3,0,%o1
sub %g5,1,%g2
sllx %o1,3,%g4
! 64 ! x = dt[i];
ldd [%g4+%o0],%f10
add %g4,%o0,%g1
fmovd %f8,%f18
fmovd %f8,%f16
! 65 ! x1 = dt[i + 1];
! 66 ! dt[i] = lower32(x, Zero) + tmp;
.L900000110:
fdtox %f10,%f0
ldd [%g1+8],%f12
! 67 ! dt[i + 1] = lower32(x1, Zero) + tmp1;
! 68 ! tmp = upper32(x);
! 69 ! tmp1 = upper32(x1);
add %g3,2,%g3
cmp %g3,%g2
fdtox %f12,%f2
fmovd %f0,%f4
fmovs %f8,%f0
fmovs %f8,%f2
fxtod %f0,%f0
fxtod %f2,%f2
fdtox %f12,%f6
faddd %f0,%f18,%f10
std %f10,[%g1]
faddd %f2,%f16,%f14
std %f14,[%g1+8]
fitod %f4,%f18
add %g1,16,%g1
fitod %f6,%f16
ble,a,pt %icc,.L900000110
ldd [%g1],%f10
.L77000188:
retl ! Result =
nop
.type cleanup,2
.size cleanup,(.-cleanup)
.section ".text",#alloc,#execinstr
.align 8
.skip 24
.align 32
! 70 ! }
! 71 !}
! 75 !#ifdef _KERNEL
! 76 !
! 79 !uint64_t
! 80 !double2uint64_t(double* d)
! 81 !{
! 82 ! uint64_t x;
! 83 ! uint64_t exp;
! 84 ! uint64_t man;
! 86 ! x = *((uint64_t *)d);
!
! SUBROUTINE double2uint64_t
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global double2uint64_t
double2uint64_t:
ldx [%o0],%o2
! 87 ! if (x == 0) {
cmp %o2,0
bne,pn %xcc,.L900000206
sethi %hi(0xfff00000),%o5
.L77000202:
retl ! Result = %o0
! 88 ! return (0ULL);
or %g0,0,%o0
! 89 ! }
! 90 ! exp = (x >> 52) - 1023;
! 91 ! man = (x & 0xfffffffffffffULL) | 0x10000000000000ULL;
! 92 ! x = man >> (52 - exp);
! 94 ! return (x);
.L900000206:
sllx %o5,32,%o4
srlx %o2,52,%o0
sethi %hi(0x40000000),%o1
or %g0,1023,%g5
sllx %o1,22,%g4
xor %o4,-1,%o3
sub %g5,%o0,%g3
and %o2,%o3,%g2
or %g2,%g4,%o5
add %g3,52,%g1
retl ! Result = %o0
srlx %o5,%g1,%o0
.type double2uint64_t,2
.size double2uint64_t,(.-double2uint64_t)
.section ".text",#alloc,#execinstr
.align 8
.skip 24
.align 32
! 95 !}
! 96 !#else
! 97 !
! 100 !uint64_t
! 101 !double2uint64_t(double* d)
! 102 !{
! 103 ! return ((int64_t)(*d));
! 104 !}
! 105 !#endif
! 107 !
! 108 !void
! 109 !conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
! 110 !{
!
! SUBROUTINE conv_d16_to_i32
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_d16_to_i32
conv_d16_to_i32:
save %sp,-176,%sp
! 111 ! int i;
! 112 ! int64_t t, t1,
! 113 ! a, b, c, d;
! 114 !
! 115 !
! 116 ! t1 = 0;
! 117 ! a = double2uint64_t(&(d16[0]));
ldx [%i1],%o0
ldx [%i1+8],%i2
cmp %o0,0
bne,pn %xcc,.L77000216
or %g0,0,%i4
.L77000215:
ba .L900000316
cmp %i2,0
.L77000216:
srlx %o0,52,%o5
sethi %hi(0xfff00000),%i4
sllx %i4,32,%o2
sethi %hi(0x40000000),%o7
sllx %o7,22,%o3
or %g0,1023,%o4
xor %o2,-1,%g5
sub %o4,%o5,%l0
and %o0,%g5,%o1
add %l0,52,%l1
or %o1,%o3,%g4
! 118 ! b = double2uint64_t(&(d16[1]));
cmp %i2,0
srlx %g4,%l1,%i4
.L900000316:
bne,pn %xcc,.L77000222
sub %i3,1,%l3
.L77000221:
or %g0,0,%i2
ba .L900000315
or %g0,0,%o3
.L77000222:
srlx %i2,52,%l6
sethi %hi(0xfff00000),%g4
sllx %g4,32,%i5
sethi %hi(0x40000000),%l5
xor %i5,-1,%l4
or %g0,1023,%l2
and %i2,%l4,%l7
sllx %l5,22,%i2
sub %l2,%l6,%g1
or %l7,%i2,%g3
add %g1,52,%g2
or %g0,0,%o3
srlx %g3,%g2,%i2
! 119 ! for (i = 0; i < ilen - 1; i++) {
.L900000315:
cmp %l3,0
ble,pn %icc,.L77000210
or %g0,0,%l4
.L77000245:
sethi %hi(0xfff00000),%l7
or %g0,-1,%l6
sllx %l7,32,%l3
srl %l6,0,%l6
sethi %hi(0x40000000),%l1
sethi %hi(0xfc00),%l2
xor %l3,-1,%l7
sllx %l1,22,%l3
sub %i3,2,%l5
add %l2,1023,%l2
or %g0,2,%g2
or %g0,%i0,%g1
! 120 ! c = double2uint64_t(&(d16[2 * i + 2]));
.L77000208:
sra %g2,0,%g3
add %g2,1,%o2
sllx %g3,3,%i3
! 121 ! t1 += a & 0xffffffff;
! 122 ! t = (a >> 32);
! 123 ! d = double2uint64_t(&(d16[2 * i + 3]));
sra %o2,0,%g5
ldx [%i1+%i3],%o5
sllx %g5,3,%o0
and %i4,%l6,%g4
ldx [%i1+%o0],%i3
cmp %o5,0
bne,pn %xcc,.L77000228
and %i2,%l2,%i5
.L77000227:
or %g0,0,%l1
ba .L900000314
add %o3,%g4,%o0
.L77000228:
srlx %o5,52,%o7
and %o5,%l7,%o5
or %g0,52,%l0
sub %o7,1023,%o4
or %o5,%l3,%l1
sub %l0,%o4,%o1
srlx %l1,%o1,%l1
add %o3,%g4,%o0
.L900000314:
srax %i4,32,%g3
cmp %i3,0
bne,pn %xcc,.L77000234
sllx %i5,16,%g5
.L77000233:
or %g0,0,%o2
ba .L900000313
add %o0,%g5,%o7
.L77000234:
srlx %i3,52,%o2
and %i3,%l7,%i4
sub %o2,1023,%o1
or %g0,52,%g4
sub %g4,%o1,%i5
or %i4,%l3,%i3
srlx %i3,%i5,%o2
! 124 ! t1 += (b & 0xffff) << 16;
add %o0,%g5,%o7
! 125 ! t += (b >> 16) + (t1 >> 32);
.L900000313:
srax %i2,16,%l0
srax %o7,32,%o4
add %l0,%o4,%o3
! 126 ! i32[i] = t1 & 0xffffffff;
! 127 ! t1 = t;
! 128 ! a = c;
! 129 ! b = d;
add %l4,1,%l4
and %o7,%l6,%o5
add %g3,%o3,%o3
st %o5,[%g1]
or %g0,%l1,%i4
or %g0,%o2,%i2
add %g2,2,%g2
cmp %l4,%l5
ble,pt %icc,.L77000208
add %g1,4,%g1
! 130 ! }
! 131 ! t1 += a & 0xffffffff;
! 132 ! t = (a >> 32);
! 133 ! t1 += (b & 0xffff) << 16;
! 134 ! i32[i] = t1 & 0xffffffff;
.L77000210:
sra %l4,0,%l4
sethi %hi(0xfc00),%i1
add %o3,%i4,%l2
add %i1,1023,%i5
and %i2,%i5,%l5
sllx %l4,2,%i2
sllx %l5,16,%l6
add %l2,%l6,%l7
st %l7,[%i0+%i2]
ret ! Result =
restore %g0,%g0,%g0
.type conv_d16_to_i32,2
.size conv_d16_to_i32,(.-conv_d16_to_i32)
.section ".text",#alloc,#execinstr
.align 8
!
! CONSTANT POOL
!
___const_seg_900000401:
.word 1127219200,0
.word 1127219200
.type ___const_seg_900000401,1
.size ___const_seg_900000401,(.-___const_seg_900000401)
.align 8
.skip 24
.align 32
! 135 !}
! 138 !void
! 139 !conv_i32_to_d32(double *d32, uint32_t *i32, int len)
! 140 !{
!
! SUBROUTINE conv_i32_to_d32
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_i32_to_d32
conv_i32_to_d32:
orcc %g0,%o2,%o2
! 141 ! int i;
! 143 !#pragma pipeloop(0)
! 144 ! for (i = 0; i < len; i++)
ble,pn %icc,.L77000254
sub %o2,1,%o3
.L77000263:
or %g0,%o0,%o2
! 145 ! d32[i] = (double)(i32[i]);
add %o3,1,%o5
or %g0,0,%g5
cmp %o5,10
bl,pn %icc,.L77000261
sethi %hi(___const_seg_900000401),%g4
.L900000407:
prefetch [%o1],0
prefetch [%o0],22
sethi %hi(___const_seg_900000401+8),%o4
or %g0,%o0,%o2
prefetch [%o1+64],0
add %o1,8,%o0
sub %o3,7,%o5
prefetch [%o2+64],22
or %g0,2,%g5
prefetch [%o2+128],22
prefetch [%o2+192],22
prefetch [%o1+128],0
ld [%o4+%lo(___const_seg_900000401+8)],%f2
ldd [%g4+%lo(___const_seg_900000401)],%f16
fmovs %f2,%f0
prefetch [%o2+256],22
prefetch [%o2+320],22
ld [%o1],%f3
prefetch [%o1+192],0
ld [%o1+4],%f1
.L900000405:
prefetch [%o0+188],0
fsubd %f2,%f16,%f22
add %g5,8,%g5
add %o0,32,%o0
ld [%o4+%lo(___const_seg_900000401+8)],%f4
std %f22,[%o2]
cmp %g5,%o5
ld [%o0-32],%f5
fsubd %f0,%f16,%f24
add %o2,64,%o2
fmovs %f4,%f0
std %f24,[%o2-56]
ld [%o0-28],%f1
fsubd %f4,%f16,%f26
fmovs %f0,%f6
prefetch [%o2+312],22
std %f26,[%o2-48]
ld [%o0-24],%f7
fsubd %f0,%f16,%f28
fmovs %f6,%f8
std %f28,[%o2-40]
ld [%o0-20],%f9
fsubd %f6,%f16,%f30
fmovs %f8,%f10
std %f30,[%o2-32]
ld [%o0-16],%f11
prefetch [%o2+344],22
fsubd %f8,%f16,%f48
fmovs %f10,%f12
std %f48,[%o2-24]
ld [%o0-12],%f13
fsubd %f10,%f16,%f50
fmovs %f12,%f2
std %f50,[%o2-16]
ld [%o0-8],%f3
fsubd %f12,%f16,%f52
fmovs %f2,%f0
std %f52,[%o2-8]
ble,pt %icc,.L900000405
ld [%o0-4],%f1
.L900000408:
fsubd %f2,%f16,%f18
add %o2,16,%o2
cmp %g5,%o3
std %f18,[%o2-16]
fsubd %f0,%f16,%f20
or %g0,%o0,%o1
bg,pn %icc,.L77000254
std %f20,[%o2-8]
.L77000261:
ld [%o1],%f15
.L900000409:
sethi %hi(___const_seg_900000401+8),%o4
ldd [%g4+%lo(___const_seg_900000401)],%f16
add %g5,1,%g5
ld [%o4+%lo(___const_seg_900000401+8)],%f14
add %o1,4,%o1
cmp %g5,%o3
fsubd %f14,%f16,%f54
std %f54,[%o2]
add %o2,8,%o2
ble,a,pt %icc,.L900000409
ld [%o1],%f15
.L77000254:
retl ! Result =
nop
.type conv_i32_to_d32,2
.size conv_i32_to_d32,(.-conv_i32_to_d32)
.section ".text",#alloc,#execinstr
.align 8
!
! CONSTANT POOL
!
___const_seg_900000501:
.word 1127219200,0
.word 1127219200
.type ___const_seg_900000501,1
.size ___const_seg_900000501,(.-___const_seg_900000501)
.align 8
.skip 24
.align 32
! 146 !}
! 149 !void
! 150 !conv_i32_to_d16(double *d16, uint32_t *i32, int len)
! 151 !{
!
! SUBROUTINE conv_i32_to_d16
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_i32_to_d16
conv_i32_to_d16:
save %sp,-368,%sp
orcc %g0,%i2,%i2
! 152 ! int i;
! 153 ! uint32_t a;
! 155 !#pragma pipeloop(0)
! 156 ! for (i = 0; i < len; i++) {
ble,pn %icc,.L77000272
sub %i2,1,%l6
.L77000281:
sethi %hi(0xfc00),%i3
! 157 ! a = i32[i];
or %g0,%i2,%l1
add %i3,1023,%i4
cmp %i2,4
or %g0,%i1,%l7
or %g0,%i0,%i2
or %g0,0,%i5
or %g0,0,%i3
bl,pn %icc,.L77000279
sethi %hi(___const_seg_900000501),%i1
.L900000508:
prefetch [%i0+8],22
prefetch [%i0+72],22
or %g0,%i0,%l2
! 158 ! d16[2 * i] = (double)(a & 0xffff);
sethi %hi(___const_seg_900000501+8),%l1
prefetch [%i0+136],22
sub %l6,1,%i0
or %g0,0,%i3
prefetch [%i2+200],22
or %g0,2,%i5
prefetch [%i2+264],22
prefetch [%i2+328],22
prefetch [%i2+392],22
ld [%l7],%l3
ld [%l7+4],%l4
ldd [%i1+%lo(___const_seg_900000501)],%f20
! 159 ! d16[2 * i + 1] = (double)(a >> 16);
srl %l3,16,%o1
and %l3,%i4,%o3
st %o3,[%sp+2335]
srl %l4,16,%g4
and %l4,%i4,%o0
st %o0,[%sp+2303]
add %l7,8,%l7
st %o1,[%sp+2271]
st %g4,[%sp+2239]
prefetch [%i2+456],22
prefetch [%i2+520],22
.L900000506:
prefetch [%l2+536],22
add %i5,2,%i5
add %l2,32,%l2
ld [%l7],%g2
cmp %i5,%i0
add %l7,8,%l7
ld [%sp+2335],%f9
add %i3,4,%i3
ld [%l1+%lo(___const_seg_900000501+8)],%f8
ld [%sp+2271],%f11
and %g2,%i4,%g3
fmovs %f8,%f10
st %g3,[%sp+2335]
fsubd %f8,%f20,%f28
std %f28,[%l2-32]
srl %g2,16,%g1
st %g1,[%sp+2271]
fsubd %f10,%f20,%f30
std %f30,[%l2-24]
ld [%l7-4],%l0
ld [%sp+2303],%f13
ld [%l1+%lo(___const_seg_900000501+8)],%f12
ld [%sp+2239],%f15
and %l0,%i4,%l5
fmovs %f12,%f14
st %l5,[%sp+2303]
fsubd %f12,%f20,%f44
std %f44,[%l2-16]
srl %l0,16,%o5
st %o5,[%sp+2239]
fsubd %f14,%f20,%f46
ble,pt %icc,.L900000506
std %f46,[%l2-8]
.L900000509:
ld [%l1+%lo(___const_seg_900000501+8)],%f0
cmp %i5,%l6
add %i3,4,%i3
ld [%sp+2335],%f1
ld [%sp+2303],%f5
fmovs %f0,%f2
ld [%sp+2271],%f3
fmovs %f0,%f4
ld [%sp+2239],%f7
fmovs %f0,%f6
fsubd %f0,%f20,%f22
std %f22,[%l2]
fsubd %f2,%f20,%f24
std %f24,[%l2+8]
fsubd %f4,%f20,%f26
std %f26,[%l2+16]
fsubd %f6,%f20,%f20
bg,pn %icc,.L77000272
std %f20,[%l2+24]
.L77000279:
ld [%l7],%l2
.L900000510:
and %l2,%i4,%o4
st %o4,[%sp+2399]
srl %l2,16,%o2
st %o2,[%sp+2367]
sethi %hi(___const_seg_900000501+8),%l1
sra %i3,0,%i0
ld [%l1+%lo(___const_seg_900000501+8)],%f16
sllx %i0,3,%o1
add %i3,1,%o3
ldd [%i1+%lo(___const_seg_900000501)],%f20
sra %o3,0,%l3
add %i5,1,%i5
ld [%sp+2399],%f17
sllx %l3,3,%o0
add %l7,4,%l7
fmovs %f16,%f18
cmp %i5,%l6
add %i3,2,%i3
fsubd %f16,%f20,%f48
std %f48,[%i2+%o1]
ld [%sp+2367],%f19
fsubd %f18,%f20,%f50
std %f50,[%i2+%o0]
ble,a,pt %icc,.L900000510
ld [%l7],%l2
.L77000272:
ret ! Result =
restore %g0,%g0,%g0
.type conv_i32_to_d16,2
.size conv_i32_to_d16,(.-conv_i32_to_d16)
.section ".text",#alloc,#execinstr
.align 8
!
! CONSTANT POOL
!
___const_seg_900000601:
.word 1127219200,0
.word 1127219200
.type ___const_seg_900000601,1
.size ___const_seg_900000601,(.-___const_seg_900000601)
.align 8
.skip 24
.align 32
! 160 ! }
! 161 !}
! 163 !#ifdef RF_INLINE_MACROS
! 165 !void
! 166 !i16_to_d16_and_d32x4(const double *,
! 167 ! const double *,
! 168 ! const double *,
! 169 ! double *,
! 170 ! double *,
! 171 ! float *);
! 172 !
! 174 !#else
! 177 !
! 178 !static void
! 179 !i16_to_d16_and_d32x4(const double *dummy1,
! 180 ! const double *dummy2,
! 181 ! const double *dummy3,
! 182 ! double *result16,
! 183 ! double *result32,
! 184 ! float *src)
! 185 !
! 186 !{
! 187 ! uint32_t *i32;
! 188 ! uint32_t a, b, c, d;
! 190 ! i32 = (uint32_t *)src;
! 191 ! a = i32[0];
! 192 ! b = i32[1];
! 193 ! c = i32[2];
! 194 ! d = i32[3];
! 195 ! result16[0] = (double)(a & 0xffff);
! 196 ! result16[1] = (double)(a >> 16);
! 197 ! result32[0] = (double)a;
! 198 ! result16[2] = (double)(b & 0xffff);
! 199 ! result16[3] = (double)(b >> 16);
! 200 ! result32[1] = (double)b;
! 201 ! result16[4] = (double)(c & 0xffff);
! 202 ! result16[5] = (double)(c >> 16);
! 203 ! result32[2] = (double)c;
! 204 ! result16[6] = (double)(d & 0xffff);
! 205 ! result16[7] = (double)(d >> 16);
! 206 ! result32[3] = (double)d;
! 207 !}
! 209 !#endif
! 212 !void
! 213 !conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len)
! 214 !{
!
! SUBROUTINE conv_i32_to_d32_and_d16
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_i32_to_d32_and_d16
conv_i32_to_d32_and_d16:
save %sp,-368,%sp
! 215 ! int i;
! 216 ! uint32_t a;
! 218 !#pragma pipeloop(0)
! 219 ! for (i = 0; i < len - 3; i += 4) {
! 220 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
! 221 ! &(d16[2*i]), &(d32[i]),
! 222 ! (float *)(&(i32[i])));
! 223 ! }
! 224 ! for (; i < len; i++) {
! 225 ! a = i32[i];
! 226 ! d32[i] = (double)(i32[i]);
! 227 ! d16[2 * i] = (double)(a & 0xffff);
! 228 ! d16[2 * i + 1] = (double)(a >> 16);
sub %i3,3,%i4
cmp %i4,0
ble,pn %icc,.L77000289
or %g0,0,%i5
.L77000306:
sethi %hi(Zero),%g3
sethi %hi(TwoToMinus16),%g2
sethi %hi(TwoTo16),%o5
ldd [%g3+%lo(Zero)],%f2
sub %i3,4,%o4
or %g0,0,%o3
or %g0,%i0,%l6
or %g0,%i2,%l5
.L900000615:
fmovd %f2,%f26
ld [%l5],%f27
sra %o3,0,%o0
add %i5,4,%i5
fmovd %f2,%f28
ld [%l5+4],%f29
sllx %o0,3,%g5
cmp %i5,%o4
fmovd %f2,%f30
ld [%l5+8],%f31
add %i1,%g5,%g4
add %o3,8,%o3
ld [%l5+12],%f3
fxtod %f26,%f26
ldd [%g2+%lo(TwoToMinus16)],%f32
fxtod %f28,%f28
add %l5,16,%l5
fxtod %f30,%f30
ldd [%o5+%lo(TwoTo16)],%f34
fxtod %f2,%f2
std %f2,[%l6+24]
fmuld %f32,%f26,%f36
std %f26,[%l6]
fmuld %f32,%f28,%f38
std %f28,[%l6+8]
fmuld %f32,%f30,%f40
std %f30,[%l6+16]
fmuld %f32,%f2,%f42
add %l6,32,%l6
fdtox %f36,%f36
fdtox %f38,%f38
fdtox %f40,%f40
fdtox %f42,%f42
fxtod %f36,%f36
std %f36,[%g4+8]
fxtod %f38,%f38
std %f38,[%g4+24]
fxtod %f40,%f40
std %f40,[%g4+40]
fxtod %f42,%f42
std %f42,[%g4+56]
fmuld %f36,%f34,%f36
fmuld %f38,%f34,%f38
fmuld %f40,%f34,%f40
fmuld %f42,%f34,%f42
fsubd %f26,%f36,%f36
std %f36,[%i1+%g5]
fsubd %f28,%f38,%f38
std %f38,[%g4+16]
fsubd %f30,%f40,%f40
std %f40,[%g4+32]
fsubd %f2,%f42,%f42
std %f42,[%g4+48]
ble,a,pt %icc,.L900000615
ldd [%g3+%lo(Zero)],%f2
.L77000289:
cmp %i5,%i3
bge,pn %icc,.L77000294
sethi %hi(0xfc00),%l0
.L77000307:
sra %i5,0,%l2
sll %i5,1,%i4
sllx %l2,3,%l1
sllx %l2,2,%o1
sub %i3,%i5,%l3
add %l0,1023,%l0
add %l1,%i0,%l1
add %o1,%i2,%i2
cmp %l3,5
bl,pn %icc,.L77000291
sethi %hi(___const_seg_900000601),%l7
.L900000612:
prefetch [%l1],22
prefetch [%l1+64],22
sra %i4,0,%l6
sethi %hi(___const_seg_900000601+8),%l2
prefetch [%l1+128],22
add %l6,-2,%l5
sub %i3,3,%i0
prefetch [%l1+192],22
sllx %l5,3,%o4
add %i5,1,%i5
add %i1,%o4,%o3
or %g0,%i3,%g1
ld [%i2],%l4
prefetch [%o3+16],22
add %o3,16,%l3
add %i2,4,%i2
prefetch [%o3+80],22
srl %l4,16,%o1
and %l4,%l0,%o0
prefetch [%o3+144],22
st %o1,[%sp+2271]
st %o0,[%sp+2239]
ldd [%l7+%lo(___const_seg_900000601)],%f32
ld [%l2+%lo(___const_seg_900000601+8)],%f0
prefetch [%o3+208],22
prefetch [%o3+272],22
prefetch [%o3+336],22
.L900000610:
prefetch [%l1+192],22
add %i5,4,%i5
add %l3,64,%l3
ld [%l2+%lo(___const_seg_900000601+8)],%f8
cmp %i5,%i0
ld [%i2],%g5
add %i2,16,%i2
add %l1,32,%l1
add %i4,8,%i4
ld [%i2-20],%f7
srl %g5,16,%i3
fmovs %f8,%f6
st %i3,[%sp+2335]
and %g5,%l0,%g4
st %g4,[%sp+2303]
fsubd %f6,%f32,%f40
ld [%sp+2239],%f9
ld [%sp+2271],%f1
fmovs %f8,%f12
std %f40,[%l1-32]
fsubd %f8,%f32,%f42
std %f42,[%l3-64]
fsubd %f0,%f32,%f44
std %f44,[%l3-56]
fmovs %f12,%f10
ld [%i2-12],%g2
ld [%i2-16],%f1
srl %g2,16,%g3
fmovs %f12,%f0
prefetch [%l3+320],22
st %g3,[%sp+2271]
and %g2,%l0,%l6
st %l6,[%sp+2239]
fsubd %f0,%f32,%f46
ld [%sp+2303],%f11
ld [%sp+2335],%f13
fmovs %f12,%f18
std %f46,[%l1-24]
fsubd %f10,%f32,%f48
std %f48,[%l3-48]
fsubd %f12,%f32,%f50
std %f50,[%l3-40]
fmovs %f18,%f16
ld [%i2-8],%o5
ld [%i2-12],%f15
srl %o5,16,%l5
fmovs %f18,%f14
st %l5,[%sp+2335]
and %o5,%l0,%o4
st %o4,[%sp+2303]
fsubd %f14,%f32,%f52
ld [%sp+2239],%f17
ld [%sp+2271],%f19
prefetch [%l3+352],22
fmovs %f18,%f24
std %f52,[%l1-16]
fsubd %f16,%f32,%f54
std %f54,[%l3-32]
fsubd %f18,%f32,%f56
std %f56,[%l3-24]
fmovs %f24,%f22
ld [%i2-4],%l4
ld [%i2-8],%f21
srl %l4,16,%o3
fmovs %f24,%f20
st %o3,[%sp+2271]
and %l4,%l0,%o2
st %o2,[%sp+2239]
fsubd %f20,%f32,%f58
ld [%sp+2303],%f23
ld [%sp+2335],%f25
fmovs %f24,%f0
std %f58,[%l1-8]
fsubd %f22,%f32,%f60
std %f60,[%l3-16]
fsubd %f24,%f32,%f62
bl,pt %icc,.L900000610
std %f62,[%l3-8]
.L900000613:
ld [%l2+%lo(___const_seg_900000601+8)],%f4
add %l1,8,%l1
cmp %i5,%g1
ld [%i2-4],%f3
or %g0,%g1,%i3
add %i4,2,%i4
ld [%sp+2239],%f5
fmovs %f4,%f2
ld [%sp+2271],%f1
fsubd %f2,%f32,%f34
std %f34,[%l1-8]
fsubd %f4,%f32,%f36
std %f36,[%l3]
fsubd %f0,%f32,%f38
bge,pn %icc,.L77000294
std %f38,[%l3+8]
.L77000291:
ld [%i2],%o2
.L900000614:
ldd [%l7+%lo(___const_seg_900000601)],%f32
srl %o2,16,%l3
sra %i4,0,%i0
st %l3,[%sp+2367]
and %o2,%l0,%g1
sethi %hi(___const_seg_900000601+8),%l2
st %g1,[%sp+2399]
sllx %i0,3,%o0
add %i4,1,%l4
ld [%l2+%lo(___const_seg_900000601+8)],%f4
sra %l4,0,%o1
add %i5,1,%i5
ld [%i2],%f5
sllx %o1,3,%g5
cmp %i5,%i3
ld [%sp+2367],%f9
add %i2,4,%i2
add %i4,2,%i4
fmovs %f4,%f6
fsubd %f4,%f32,%f44
std %f44,[%l1]
ld [%sp+2399],%f7
fmovs %f6,%f8
add %l1,8,%l1
fsubd %f8,%f32,%f48
fsubd %f6,%f32,%f46
std %f46,[%i1+%o0]
std %f48,[%i1+%g5]
bl,a,pt %icc,.L900000614
ld [%i2],%o2
.L77000294:
ret ! Result =
restore %g0,%g0,%g0
.type conv_i32_to_d32_and_d16,2
.size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
.section ".text",#alloc,#execinstr
.align 32
! 229 ! }
! 230 !}
! 232 !extern long long c1, c2, c3, c4;
! 234 !static void
! 235 !adjust_montf_result(uint32_t *i32, uint32_t *nint, int len)
! 236 !{
!
! SUBROUTINE adjust_montf_result
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
adjust_montf_result:
sra %o2,0,%g2
or %g0,%o0,%o4
! 237 ! int64_t acc;
! 238 ! int i;
! 240 ! if (i32[len] > 0) {
sllx %g2,2,%g3
ld [%o0+%g3],%o0
cmp %o0,0
bleu,pn %icc,.L77000316
or %g0,%o1,%o5
! 241 ! i = -1;
.L77000315:
sub %g2,1,%g3
ba .L900000712
cmp %g2,0
! 242 ! } else {
! 243 ! for (i = len - 1; i >= 0; i--) {
.L77000316:
subcc %g2,1,%g3
bneg,pn %icc,.L77000340
or %g0,%g3,%o3
.L77000348:
sra %g3,0,%o1
sllx %o1,2,%g1
! 244 ! if (i32[i] != nint[i]) break;
ld [%g1+%o5],%g4
add %g1,%o4,%o2
add %g1,%o5,%o1
.L900000713:
ld [%o2],%o0
cmp %o0,%g4
bne,pn %icc,.L77000324
sub %o2,4,%o2
.L77000320:
sub %o1,4,%o1
subcc %o3,1,%o3
bpos,a,pt %icc,.L900000713
ld [%o1],%g4
.L900000706:
ba .L900000712
cmp %g2,0
.L77000324:
sra %o3,0,%o0
sllx %o0,2,%g1
ld [%o5+%g1],%o3
ld [%o4+%g1],%g5
cmp %g5,%o3
bleu,pt %icc,.L77000332
nop
! 245 ! }
! 246 ! }
! 247 ! if ((i < 0) || (i32[i] > nint[i])) {
! 248 ! acc = 0;
! 249 ! for (i = 0; i < len; i++) {
.L77000340:
cmp %g2,0
.L900000712:
ble,pn %icc,.L77000332
or %g0,%g2,%o3
.L77000347:
or %g0,0,%o0
! 250 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
cmp %o3,10
bl,pn %icc,.L77000341
or %g0,0,%g2
.L900000709:
prefetch [%o4],22
prefetch [%o4+64],22
! 251 ! i32[i] = acc & 0xffffffff;
! 252 ! acc = acc >> 32;
add %o5,4,%o1
add %o4,8,%o2
prefetch [%o4+128],22
sub %o3,8,%o5
or %g0,2,%o0
prefetch [%o4+192],22
prefetch [%o4+256],22
prefetch [%o4+320],22
prefetch [%o4+384],22
ld [%o2-4],%g5
prefetch [%o2+440],22
prefetch [%o2+504],22
ld [%o4],%g4
ld [%o1-4],%o4
sub %g4,%o4,%o3
st %o3,[%o2-8]
srax %o3,32,%g4
.L900000707:
add %o0,8,%o0
add %o2,32,%o2
ld [%o1],%g1
prefetch [%o2+496],22
cmp %o0,%o5
add %o1,32,%o1
sub %g5,%g1,%g5
add %g5,%g4,%o4
ld [%o2-32],%g4
st %o4,[%o2-36]
srax %o4,32,%g1
ld [%o1-28],%o3
sub %g4,%o3,%g2
add %g2,%g1,%g5
ld [%o2-28],%o3
st %g5,[%o2-32]
srax %g5,32,%g4
ld [%o1-24],%o4
sub %o3,%o4,%g1
add %g1,%g4,%g2
ld [%o2-24],%o3
st %g2,[%o2-28]
srax %g2,32,%g5
ld [%o1-20],%o4
sub %o3,%o4,%g4
add %g4,%g5,%g1
ld [%o2-20],%o4
st %g1,[%o2-24]
srax %g1,32,%o3
ld [%o1-16],%g2
sub %o4,%g2,%g5
add %g5,%o3,%g1
ld [%o2-16],%g4
st %g1,[%o2-20]
srax %g1,32,%o4
ld [%o1-12],%g2
sub %g4,%g2,%o3
add %o3,%o4,%g5
ld [%o2-12],%g2
st %g5,[%o2-16]
srax %g5,32,%g4
ld [%o1-8],%g1
sub %g2,%g1,%o4
add %o4,%g4,%o3
ld [%o2-8],%g2
st %o3,[%o2-12]
srax %o3,32,%g5
ld [%o1-4],%g1
sub %g2,%g1,%g4
add %g4,%g5,%o4
ld [%o2-4],%g5
st %o4,[%o2-8]
ble,pt %icc,.L900000707
srax %o4,32,%g4
.L900000710:
ld [%o1],%o3
add %o1,4,%o5
or %g0,%o2,%o4
cmp %o0,%g3
sub %g5,%o3,%g2
add %g2,%g4,%g1
st %g1,[%o2-4]
bg,pn %icc,.L77000332
srax %g1,32,%g2
.L77000341:
ld [%o4],%g5
.L900000711:
ld [%o5],%o2
add %g2,%g5,%g4
add %o0,1,%o0
cmp %o0,%g3
add %o5,4,%o5
sub %g4,%o2,%o1
st %o1,[%o4]
srax %o1,32,%g2
add %o4,4,%o4
ble,a,pt %icc,.L900000711
ld [%o4],%g5
.L77000332:
retl ! Result =
nop
.type adjust_montf_result,2
.size adjust_montf_result,(.-adjust_montf_result)
.section ".text",#alloc,#execinstr
.align 32
! 253 ! }
! 254 ! }
! 255 !}
! 257 !
! 308 !
! 313 !void mont_mulf_noconv(uint32_t *result,
! 314 ! double *dm1, double *dm2, double *dt,
! 315 ! double *dn, uint32_t *nint,
! 316 ! int nlen, double dn0)
! 317 !{
!
! SUBROUTINE mont_mulf_noconv
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global mont_mulf_noconv
mont_mulf_noconv:
save %sp,-176,%sp
ldx [%fp+2223],%g1
sethi %hi(Zero),%l5
or %g0,%i2,%l0
! 318 ! int i, j, jj;
! 319 ! double digit, m2j, a, b;
! 320 ! double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
! 322 ! pdm1 = &(dm1[0]);
! 323 ! pdm2 = &(dm2[0]);
! 324 ! pdn = &(dn[0]);
! 325 ! pdm2[2 * nlen] = Zero;
ldd [%l5+%lo(Zero)],%f0
or %g0,%i0,%i2
sll %g1,1,%o3
! 327 ! if (nlen != 16) {
cmp %g1,16
sra %o3,0,%i0
sllx %i0,3,%o0
or %g0,%i5,%i0
bne,pn %icc,.L77000476
std %f0,[%l0+%o0]
.L77000488:
sethi %hi(TwoToMinus16),%o2
sethi %hi(TwoTo16),%l3
! 328 ! for (i = 0; i < 4 * nlen + 2; i++)
! 329 ! dt[i] = Zero;
! 330 ! a = dt[0] = pdm1[0] * pdm2[0];
! 331 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
! 333 ! pdtj = &(dt[0]);
! 334 ! for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
! 335 ! m2j = pdm2[j];
! 336 ! a = pdtj[0] + pdn[0] * digit;
! 337 ! b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
! 338 ! pdtj[1] = b;
! 340 !#pragma pipeloop(0)
! 341 ! for (i = 1; i < nlen; i++) {
! 342 ! pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
! 343 ! }
! 344 ! if (jj == 15) {
! 345 ! cleanup(dt, j / 2 + 1, 2 * nlen + 1);
! 346 ! jj = 0;
! 347 ! }
! 349 ! digit = mod(lower32(b, Zero) * dn0,
! 350 ! TwoToMinus16, TwoTo16);
! 351 ! }
! 352 ! } else {
! 353 ! a = dt[0] = pdm1[0] * pdm2[0];
ldd [%i1],%f40
! 355 ! dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
! 356 ! dt[59] = dt[58] = dt[57] = dt[56] = dt[55] =
! 357 ! dt[54] = dt[53] = dt[52] = dt[51] = dt[50] =
! 358 ! dt[49] = dt[48] = dt[47] = dt[46] = dt[45] =
! 359 ! dt[44] = dt[43] = dt[42] = dt[41] = dt[40] =
! 360 ! dt[39] = dt[38] = dt[37] = dt[36] = dt[35] =
! 361 ! dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
! 362 ! dt[29] = dt[28] = dt[27] = dt[26] = dt[25] =
! 363 ! dt[24] = dt[23] = dt[22] = dt[21] = dt[20] =
! 364 ! dt[19] = dt[18] = dt[17] = dt[16] = dt[15] =
! 365 ! dt[14] = dt[13] = dt[12] = dt[11] = dt[10] =
! 366 ! dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] =
! 367 ! dt[3] = dt[2] = dt[1] = Zero;
! 369 ! pdn_0 = pdn[0];
! 370 ! pdm1_0 = pdm1[0];
! 372 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
! 373 ! pdtj = &(dt[0]);
or %g0,%i3,%o3
! 375 ! for (j = 0; j < 32; j++, pdtj++) {
or %g0,0,%l1
ldd [%l0],%f42
ldd [%o2+%lo(TwoToMinus16)],%f44
ldd [%l3+%lo(TwoTo16)],%f46
std %f0,[%i3+8]
fmuld %f40,%f42,%f38
std %f38,[%i3]
std %f0,[%i3+16]
std %f0,[%i3+24]
std %f0,[%i3+32]
fdtox %f38,%f4
std %f0,[%i3+40]
std %f0,[%i3+48]
std %f0,[%i3+56]
fmovs %f0,%f4
std %f0,[%i3+64]
std %f0,[%i3+72]
fxtod %f4,%f52
std %f0,[%i3+80]
std %f0,[%i3+88]
std %f0,[%i3+96]
std %f0,[%i3+104]
fmuld %f52,%f14,%f60
std %f0,[%i3+112]
std %f0,[%i3+120]
std %f0,[%i3+128]
std %f0,[%i3+136]
fmuld %f60,%f44,%f62
std %f0,[%i3+144]
std %f0,[%i3+152]
std %f0,[%i3+160]
std %f0,[%i3+168]
fdtox %f62,%f32
std %f0,[%i3+176]
std %f0,[%i3+184]
std %f0,[%i3+192]
std %f0,[%i3+200]
fxtod %f32,%f50
std %f0,[%i3+208]
std %f0,[%i3+216]
std %f0,[%i3+224]
std %f0,[%i3+232]
fmuld %f50,%f46,%f34
std %f0,[%i3+240]
std %f0,[%i3+248]
std %f0,[%i3+256]
std %f0,[%i3+264]
fsubd %f60,%f34,%f40
std %f0,[%i3+272]
std %f0,[%i3+280]
std %f0,[%i3+288]
std %f0,[%i3+296]
std %f0,[%i3+304]
std %f0,[%i3+312]
std %f0,[%i3+320]
std %f0,[%i3+328]
std %f0,[%i3+336]
std %f0,[%i3+344]
std %f0,[%i3+352]
std %f0,[%i3+360]
std %f0,[%i3+368]
sub %g1,1,%l3
add %i3,8,%o7
std %f0,[%i3+376]
std %f0,[%i3+384]
std %f0,[%i3+392]
std %f0,[%i3+400]
std %f0,[%i3+408]
std %f0,[%i3+416]
std %f0,[%i3+424]
std %f0,[%i3+432]
std %f0,[%i3+440]
std %f0,[%i3+448]
std %f0,[%i3+456]
std %f0,[%i3+464]
std %f0,[%i3+472]
std %f0,[%i3+480]
std %f0,[%i3+488]
std %f0,[%i3+496]
std %f0,[%i3+504]
std %f0,[%i3+512]
std %f0,[%i3+520]
!BEGIN HAND CODED PART
! cheetah schedule, no even-odd trick
add %i3,%g0,%o5
fmovd %f40,%f0
fmovd %f14,%f2
fmovd %f44,%f8
sethi %hi(TwoTo32),%l5
fmovd %f46,%f10
sethi %hi(TwoToMinus32),%g5
ldd [%i3],%f6
ldd [%l0],%f4
ldd [%i1],%f40
ldd [%i1+8],%f42
ldd [%i1+16],%f52
ldd [%i1+48],%f54
ldd [%i1+56],%f36
ldd [%i1+64],%f56
ldd [%i1+104],%f48
ldd [%i1+112],%f58
ldd [%i4],%f44
ldd [%i4+8],%f46
ldd [%i4+104],%f50
ldd [%i4+112],%f60
.L99999999:
!1
ldd [%i1+24],%f20
fmuld %f0,%f44,%f12
!2
ldd [%i4+24],%f22
fmuld %f42,%f4,%f16
!3
ldd [%i1+40],%f24
fmuld %f46,%f0,%f18
!4
ldd [%i4+40],%f26
fmuld %f20,%f4,%f20
!5
ldd [%l0+8],%f38
faddd %f12,%f6,%f12
fmuld %f22,%f0,%f22
!6
add %l0,8,%l0
ldd [%i4+56],%f30
fmuld %f24,%f4,%f24
!7
ldd [%i1+72],%f32
faddd %f16,%f18,%f16
fmuld %f26,%f0,%f26
!8
ldd [%i3+16],%f18
fmuld %f40,%f38,%f14
!9
ldd [%i4+72],%f34
faddd %f20,%f22,%f20
fmuld %f8,%f12,%f12
!10
ldd [%i3+48],%f22
fmuld %f36,%f4,%f28
!11
ldd [%i3+8],%f6
faddd %f16,%f18,%f16
fmuld %f30,%f0,%f30
!12
std %f16,[%i3+16]
faddd %f24,%f26,%f24
fmuld %f32,%f4,%f32
!13
ldd [%i3+80],%f26
faddd %f12,%f14,%f12
fmuld %f34,%f0,%f34
!14
ldd [%i1+88],%f16
faddd %f20,%f22,%f20
!15
ldd [%i4+88],%f18
faddd %f28,%f30,%f28
!16
ldd [%i3+112],%f30
faddd %f32,%f34,%f32
!17
ldd [%i3+144],%f34
faddd %f12,%f6,%f6
fmuld %f16,%f4,%f16
!18
std %f20,[%i3+48]
faddd %f24,%f26,%f24
fmuld %f18,%f0,%f18
!19
std %f24,[%i3+80]
faddd %f28,%f30,%f28
fmuld %f48,%f4,%f20
!20
std %f28,[%i3+112]
faddd %f32,%f34,%f32
fmuld %f50,%f0,%f22
!21
ldd [%i1+120],%f24
fdtox %f6,%f12
!22
std %f32,[%i3+144]
faddd %f16,%f18,%f16
!23
ldd [%i4+120],%f26
!24
ldd [%i3+176],%f18
faddd %f20,%f22,%f20
fmuld %f24,%f4,%f24
!25
ldd [%i4+16],%f30
fmovs %f11,%f12
!26
ldd [%i1+32],%f32
fmuld %f26,%f0,%f26
!27
ldd [%i4+32],%f34
fmuld %f52,%f4,%f28
!28
ldd [%i3+208],%f22
faddd %f16,%f18,%f16
fmuld %f30,%f0,%f30
!29
std %f16,[%i3+176]
fxtod %f12,%f12
fmuld %f32,%f4,%f32
!30
ldd [%i4+48],%f18
faddd %f24,%f26,%f24
fmuld %f34,%f0,%f34
!31
ldd [%i3+240],%f26
faddd %f20,%f22,%f20
!32
std %f20,[%i3+208]
faddd %f28,%f30,%f28
fmuld %f54,%f4,%f16
!33
ldd [%i3+32],%f30
fmuld %f12,%f2,%f14
!34
ldd [%i4+64],%f22
faddd %f32,%f34,%f32
fmuld %f18,%f0,%f18
!35
ldd [%i3+64],%f34
faddd %f24,%f26,%f24
!36
std %f24,[%i3+240]
faddd %f28,%f30,%f28
fmuld %f56,%f4,%f20
!37
std %f28,[%i3+32]
fmuld %f14,%f8,%f12
!38
ldd [%i1+80],%f24
faddd %f32,%f34,%f34 ! yes, tmp52!
fmuld %f22,%f0,%f22
!39
ldd [%i4+80],%f26
faddd %f16,%f18,%f16
!40
ldd [%i1+96],%f28
fmuld %f58,%f4,%f32
!41
ldd [%i4+96],%f30
fdtox %f12,%f12
fmuld %f24,%f4,%f24
!42
std %f34,[%i3+64] ! yes, tmp52!
faddd %f20,%f22,%f20
fmuld %f26,%f0,%f26
!43
ldd [%i3+96],%f18
fmuld %f28,%f4,%f28
!44
ldd [%i3+128],%f22
fmovd %f38,%f4
fmuld %f30,%f0,%f30
!45
fxtod %f12,%f12
fmuld %f60,%f0,%f34
!46
add %i3,8,%i3
faddd %f24,%f26,%f24
!47
ldd [%i3+160-8],%f26
faddd %f16,%f18,%f16
!48
std %f16,[%i3+96-8]
faddd %f28,%f30,%f28
!49
ldd [%i3+192-8],%f30
faddd %f32,%f34,%f32
fmuld %f12,%f10,%f12
!50
ldd [%i3+224-8],%f34
faddd %f20,%f22,%f20
!51
std %f20,[%i3+128-8]
faddd %f24,%f26,%f24
!52
add %l1,1,%l1
std %f24,[%i3+160-8]
faddd %f28,%f30,%f28
!53
cmp %l1,15
std %f28,[%i3+192-8]
fsubd %f14,%f12,%f0
!54
faddd %f32,%f34,%f32
ble,pt %icc,.L99999999
std %f32,[%i3+224-8]
!
ldd [%g5+%lo(TwoToMinus32)],%f8
!
ldd [%i3+8],%f16
!
ldd [%i3+16],%f20
!
fmuld %f8,%f16,%f18
ldd [%i3+24],%f24
!
fmuld %f8,%f20,%f22
ldd [%i3+32],%f28
!
fmuld %f8,%f24,%f26
ldd [%l5+%lo(TwoTo32)],%f10
!
fmuld %f8,%f28,%f30
!
fdtox %f18,%f18
!
fdtox %f22,%f22
!
fdtox %f26,%f26
ldd [%i3+40],%f32
!
fdtox %f30,%f30
ldd [%i3+48],%f56
!
fxtod %f18,%f18
fmuld %f8,%f32,%f34
ldd [%i3+56],%f36
!
fxtod %f22,%f22
fmuld %f8,%f56,%f58
ldd [%i3+64],%f38
!
fxtod %f26,%f26
fmuld %f8,%f36,%f60
!
fxtod %f30,%f30
fmuld %f8,%f38,%f62
!
fdtox %f34,%f34
fmuld %f10,%f18,%f40
!
fdtox %f58,%f58
fmuld %f10,%f22,%f42
!
fdtox %f60,%f60
fmuld %f10,%f26,%f44
!
fdtox %f62,%f62
fmuld %f10,%f30,%f46
!
fxtod %f34,%f34
!
fxtod %f58,%f58
!
fxtod %f60,%f60
!
fxtod %f62,%f62
!
fsubd %f16,%f40,%f40
fmuld %f10,%f34,%f48
!
fsubd %f20,%f42,%f42
fmuld %f10,%f58,%f50
!
fsubd %f24,%f44,%f44
fmuld %f10,%f60,%f52
!
fsubd %f28,%f46,%f46
fmuld %f10,%f62,%f54
!
std %f40,[%i3+8]
!
std %f42,[%i3+16]
!
faddd %f18,%f44,%f44
std %f44,[%i3+24]
!
faddd %f22,%f46,%f46
std %f46,[%i3+32]
!
fsubd %f32,%f48,%f48
ldd [%i3+64+8],%f16
!
fsubd %f56,%f50,%f50
ldd [%i3+64+16],%f20
!
fsubd %f36,%f52,%f52
ldd [%i3+64+24],%f24
!
fsubd %f38,%f54,%f54
ldd [%i3+64+32],%f28
!
faddd %f26,%f48,%f48
fmuld %f8,%f16,%f18
std %f48,[%i3+40]
!
faddd %f30,%f50,%f50
fmuld %f8,%f20,%f22
std %f50,[%i3+48]
!
faddd %f34,%f52,%f52
fmuld %f8,%f24,%f26
std %f52,[%i3+56]
!
faddd %f58,%f54,%f54
fmuld %f8,%f28,%f30
std %f54,[%i3+64]
!
fdtox %f18,%f18
!
fdtox %f22,%f22
!
fdtox %f26,%f26
ldd [%i3+64+40],%f32
!
fdtox %f30,%f30
ldd [%i3+64+48],%f56
!
fxtod %f18,%f18
fmuld %f8,%f32,%f34
ldd [%i3+64+56],%f36
!
fxtod %f22,%f22
fmuld %f8,%f56,%f58
ldd [%i3+64+64],%f38
!
fxtod %f26,%f26
fmuld %f8,%f36,%f12
!
fxtod %f30,%f30
fmuld %f8,%f38,%f14
!
fdtox %f34,%f34
fmuld %f10,%f18,%f40
!
fdtox %f58,%f58
fmuld %f10,%f22,%f42
!
fdtox %f12,%f12
fmuld %f10,%f26,%f44
!
fdtox %f14,%f14
fmuld %f10,%f30,%f46
!
fxtod %f34,%f34
!
fxtod %f58,%f58
!
fxtod %f12,%f12
!
fxtod %f14,%f14
!
fsubd %f16,%f40,%f40
fmuld %f10,%f34,%f48
!
fsubd %f20,%f42,%f42
fmuld %f10,%f58,%f50
!
fsubd %f24,%f44,%f44
fmuld %f10,%f12,%f52
!
fsubd %f28,%f46,%f46
fmuld %f10,%f14,%f54
!
faddd %f60,%f40,%f40
std %f40,[%i3+64+8]
!
faddd %f62,%f42,%f42
std %f42,[%i3+64+16]
!
faddd %f18,%f44,%f44
std %f44,[%i3+64+24]
!
faddd %f22,%f46,%f46
std %f46,[%i3+64+32]
!
fsubd %f32,%f48,%f48
ldd [%i3+64+64+8],%f16
!
fsubd %f56,%f50,%f50
ldd [%i3+64+64+16],%f20
!
fsubd %f36,%f52,%f52
ldd [%i3+64+64+24],%f24
!
fsubd %f38,%f54,%f54
ldd [%i3+64+64+32],%f28
!
faddd %f26,%f48,%f48
fmuld %f8,%f16,%f18
std %f48,[%i3+64+40]
!
faddd %f30,%f50,%f50
fmuld %f8,%f20,%f22
std %f50,[%i3+64+48]
!
faddd %f34,%f52,%f52
fmuld %f8,%f24,%f26
std %f52,[%i3+64+56]
!
faddd %f58,%f54,%f54
fmuld %f8,%f28,%f30
std %f54,[%i3+64+64]
!
fdtox %f18,%f18
!
fdtox %f22,%f22
!
fdtox %f26,%f26
ldd [%i3+64+64+40],%f32
!
fdtox %f30,%f30
ldd [%i3+64+64+48],%f56
!
fxtod %f18,%f18
fmuld %f8,%f32,%f34
ldd [%i3+64+64+56],%f36
!
fxtod %f22,%f22
fmuld %f8,%f56,%f58
ldd [%i3+64+64+64],%f38
!
fxtod %f26,%f26
fmuld %f8,%f36,%f60
!
fxtod %f30,%f30
fmuld %f8,%f38,%f62
!
fdtox %f34,%f34
fmuld %f10,%f18,%f40
!
fdtox %f58,%f58
fmuld %f10,%f22,%f42
!
fdtox %f60,%f60
fmuld %f10,%f26,%f44
!
fdtox %f62,%f62
fmuld %f10,%f30,%f46
!
fxtod %f34,%f34
!
fxtod %f58,%f58
!
fxtod %f60,%f60
!
fxtod %f62,%f62
!
fsubd %f16,%f40,%f40
fmuld %f10,%f34,%f48
!
fsubd %f20,%f42,%f42
fmuld %f10,%f58,%f50
!
fsubd %f24,%f44,%f44
fmuld %f10,%f60,%f52
!
fsubd %f28,%f46,%f46
fmuld %f10,%f62,%f54
!
faddd %f12,%f40,%f40
std %f40,[%i3+64+64+8]
!
faddd %f14,%f42,%f42
std %f42,[%i3+64+64+16]
!
faddd %f18,%f44,%f44
std %f44,[%i3+64+64+24]
!
faddd %f22,%f46,%f46
std %f46,[%i3+64+64+32]
!
fsubd %f32,%f48,%f48
ldd [%i3+64+64+64+8],%f16
!
fsubd %f56,%f50,%f50
ldd [%i3+64+64+64+16],%f20
!
fsubd %f36,%f52,%f52
ldd [%i3+64+64+64+24],%f24
!
fsubd %f38,%f54,%f54
ldd [%i3+64+64+64+32],%f28
!
faddd %f26,%f48,%f48
fmuld %f8,%f16,%f18
std %f48,[%i3+64+64+40]
!
faddd %f30,%f50,%f50
fmuld %f8,%f20,%f22
std %f50,[%i3+64+64+48]
!
faddd %f34,%f52,%f52
fmuld %f8,%f24,%f26
std %f52,[%i3+64+64+56]
!
faddd %f58,%f54,%f54
fmuld %f8,%f28,%f30
std %f54,[%i3+64+64+64]
!
fdtox %f18,%f18
!
fdtox %f22,%f22
!
fdtox %f26,%f26
ldd [%i3+64+64+64+40],%f32
!
fdtox %f30,%f30
ldd [%i3+64+64+64+48],%f56
!
fxtod %f18,%f18
fmuld %f8,%f32,%f34
ldd [%i3+64+64+64+56],%f36
!
fxtod %f22,%f22
fmuld %f8,%f56,%f58
ldd [%i3+64+64+64+64],%f38
!
fxtod %f26,%f26
fmuld %f8,%f36,%f12
!
fxtod %f30,%f30
fmuld %f8,%f38,%f14
!
fdtox %f34,%f34
fmuld %f10,%f18,%f40
!
fdtox %f58,%f58
fmuld %f10,%f22,%f42
!
fdtox %f12,%f12
fmuld %f10,%f26,%f44
!
fdtox %f14,%f14
fmuld %f10,%f30,%f46
!
sethi %hi(TwoToMinus16),%g5
fxtod %f34,%f34
!
sethi %hi(TwoTo16),%l5
fxtod %f58,%f58
!
fxtod %f12,%f12
!
fxtod %f14,%f14
!
fsubd %f16,%f40,%f16
fmuld %f10,%f34,%f48
ldd [%g5+%lo(TwoToMinus16)],%f8
!
fsubd %f20,%f42,%f20
fmuld %f10,%f58,%f50
ldd [%i1],%f40 ! should be %f40
!
fsubd %f24,%f44,%f24
fmuld %f10,%f12,%f52
ldd [%i1+8],%f42 ! should be %f42
!
fsubd %f28,%f46,%f28
fmuld %f10,%f14,%f54
ldd [%i4],%f44 ! should be %f44
!
faddd %f60,%f16,%f16
std %f16,[%i3+64+64+64+8]
!
faddd %f62,%f20,%f20
std %f20,[%i3+64+64+64+16]
!
faddd %f18,%f24,%f24
std %f24,[%i3+64+64+64+24]
!
faddd %f22,%f28,%f28
std %f28,[%i3+64+64+64+32]
!
fsubd %f32,%f48,%f32
ldd [%i4+8],%f46 ! should be %f46
!
fsubd %f56,%f50,%f56
ldd [%i1+104],%f48 ! should be %f48
!
fsubd %f36,%f52,%f36
ldd [%i4+104],%f50 ! should be %f50
!
fsubd %f38,%f54,%f38
ldd [%i1+16],%f52 ! should be %f52
!
faddd %f26,%f32,%f32
std %f32,[%i3+64+64+64+40]
!
faddd %f30,%f56,%f56
std %f56,[%i3+64+64+64+48]
!
faddd %f34,%f36,%f36
std %f36,[%i3+64+64+64+56]
!
faddd %f58,%f38,%f38
std %f38,[%i3+64+64+64+64]
!
std %f12,[%i3+64+64+64+64+8]
!
std %f14,[%i3+64+64+64+64+16]
!
ldd [%l5+%lo(TwoTo16)],%f10
ldd [%i1+48],%f54
ldd [%i1+56],%f36
ldd [%i1+64],%f56
ldd [%i1+112],%f58
ldd [%i4+104],%f50
ldd [%i4+112],%f60
.L99999998:
!1
ldd [%i1+24],%f20
fmuld %f0,%f44,%f12
!2
ldd [%i4+24],%f22
fmuld %f42,%f4,%f16
!3
ldd [%i1+40],%f24
fmuld %f46,%f0,%f18
!4
ldd [%i4+40],%f26
fmuld %f20,%f4,%f20
!5
ldd [%l0+8],%f38
faddd %f12,%f6,%f12
fmuld %f22,%f0,%f22
!6
add %l0,8,%l0
ldd [%i4+56],%f30
fmuld %f24,%f4,%f24
!7
ldd [%i1+72],%f32
faddd %f16,%f18,%f16
fmuld %f26,%f0,%f26
!8
ldd [%i3+16],%f18
fmuld %f40,%f38,%f14
!9
ldd [%i4+72],%f34
faddd %f20,%f22,%f20
fmuld %f8,%f12,%f12
!10
ldd [%i3+48],%f22
fmuld %f36,%f4,%f28
!11
ldd [%i3+8],%f6
faddd %f16,%f18,%f16
fmuld %f30,%f0,%f30
!12
std %f16,[%i3+16]
faddd %f24,%f26,%f24
fmuld %f32,%f4,%f32
!13
ldd [%i3+80],%f26
faddd %f12,%f14,%f12
fmuld %f34,%f0,%f34
!14
ldd [%i1+88],%f16
faddd %f20,%f22,%f20
!15
ldd [%i4+88],%f18
faddd %f28,%f30,%f28
!16
ldd [%i3+112],%f30
faddd %f32,%f34,%f32
!17
ldd [%i3+144],%f34
faddd %f12,%f6,%f6
fmuld %f16,%f4,%f16
!18
std %f20,[%i3+48]
faddd %f24,%f26,%f24
fmuld %f18,%f0,%f18
!19
std %f24,[%i3+80]
faddd %f28,%f30,%f28
fmuld %f48,%f4,%f20
!20
std %f28,[%i3+112]
faddd %f32,%f34,%f32
fmuld %f50,%f0,%f22
!21
ldd [%i1+120],%f24
fdtox %f6,%f12
!22
std %f32,[%i3+144]
faddd %f16,%f18,%f16
!23
ldd [%i4+120],%f26
!24
ldd [%i3+176],%f18
faddd %f20,%f22,%f20
fmuld %f24,%f4,%f24
!25
ldd [%i4+16],%f30
fmovs %f11,%f12
!26
ldd [%i1+32],%f32
fmuld %f26,%f0,%f26
!27
ldd [%i4+32],%f34
fmuld %f52,%f4,%f28
!28
ldd [%i3+208],%f22
faddd %f16,%f18,%f16
fmuld %f30,%f0,%f30
!29
std %f16,[%i3+176]
fxtod %f12,%f12
fmuld %f32,%f4,%f32
!30
ldd [%i4+48],%f18
faddd %f24,%f26,%f24
fmuld %f34,%f0,%f34
!31
ldd [%i3+240],%f26
faddd %f20,%f22,%f20
!32
std %f20,[%i3+208]
faddd %f28,%f30,%f28
fmuld %f54,%f4,%f16
!33
ldd [%i3+32],%f30
fmuld %f12,%f2,%f14
!34
ldd [%i4+64],%f22
faddd %f32,%f34,%f32
fmuld %f18,%f0,%f18
!35
ldd [%i3+64],%f34
faddd %f24,%f26,%f24
!36
std %f24,[%i3+240]
faddd %f28,%f30,%f28
fmuld %f56,%f4,%f20
!37
std %f28,[%i3+32]
fmuld %f14,%f8,%f12
!38
ldd [%i1+80],%f24
faddd %f32,%f34,%f34 ! yes, tmp52!
fmuld %f22,%f0,%f22
!39
ldd [%i4+80],%f26
faddd %f16,%f18,%f16
!40
ldd [%i1+96],%f28
fmuld %f58,%f4,%f32
!41
ldd [%i4+96],%f30
fdtox %f12,%f12
fmuld %f24,%f4,%f24
!42
std %f34,[%i3+64] ! yes, tmp52!
faddd %f20,%f22,%f20
fmuld %f26,%f0,%f26
!43
ldd [%i3+96],%f18
fmuld %f28,%f4,%f28
!44
ldd [%i3+128],%f22
fmovd %f38,%f4
fmuld %f30,%f0,%f30
!45
fxtod %f12,%f12
fmuld %f60,%f0,%f34
!46
add %i3,8,%i3
faddd %f24,%f26,%f24
!47
ldd [%i3+160-8],%f26
faddd %f16,%f18,%f16
!48
std %f16,[%i3+96-8]
faddd %f28,%f30,%f28
!49
ldd [%i3+192-8],%f30
faddd %f32,%f34,%f32
fmuld %f12,%f10,%f12
!50
ldd [%i3+224-8],%f34
faddd %f20,%f22,%f20
!51
std %f20,[%i3+128-8]
faddd %f24,%f26,%f24
!52
add %l1,1,%l1
std %f24,[%i3+160-8]
faddd %f28,%f30,%f28
!53
cmp %l1,31
std %f28,[%i3+192-8]
fsubd %f14,%f12,%f0
!54
faddd %f32,%f34,%f32
ble,pt %icc,.L99999998
std %f32,[%i3+224-8]
!55
std %f6,[%i3]
add %o5,%g0,%i3
!END HAND CODED PART
.L900000828:
ba .L900000852
ldx [%i3+%o0],%l1
! 406 ! }
! 407 ! }
! 409 ! conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1);
! 411 !
! 413 ! adjust_montf_result(result, nint, nlen);
.L77000476:
sll %g1,2,%l3
sethi %hi(TwoTo16),%g5
add %l3,2,%l2
cmp %l2,0
ble,pn %icc,.L77000482
sethi %hi(TwoToMinus16),%o2
.L77000514:
add %l3,2,%l2
add %l3,1,%o4
or %g0,0,%l3
cmp %l2,8
bl,pn %icc,.L77000477
or %g0,%i3,%l1
.L900000831:
prefetch [%i3],22
sub %o4,7,%l4
or %g0,0,%l3
or %g0,%i3,%l1
.L900000829:
prefetch [%l1+528],22
std %f0,[%l1]
add %l3,8,%l3
add %l1,64,%l1
std %f0,[%l1-56]
cmp %l3,%l4
std %f0,[%l1-48]
std %f0,[%l1-40]
prefetch [%l1+496],22
std %f0,[%l1-32]
std %f0,[%l1-24]
std %f0,[%l1-16]
ble,pt %icc,.L900000829
std %f0,[%l1-8]
.L900000832:
cmp %l3,%o4
bg,pn %icc,.L77000482
nop
.L77000477:
add %l3,1,%l3
.L900000851:
std %f0,[%l1]
cmp %l3,%o4
add %l1,8,%l1
ble,pt %icc,.L900000851
add %l3,1,%l3
.L77000482:
ldd [%i1],%f40
cmp %o3,0
sub %g1,1,%l3
ldd [%l0],%f42
ldd [%o2+%lo(TwoToMinus16)],%f36
ldd [%g5+%lo(TwoTo16)],%f38
fmuld %f40,%f42,%f52
fdtox %f52,%f8
fmovs %f0,%f8
fxtod %f8,%f62
fmuld %f62,%f14,%f60
fmuld %f60,%f36,%f32
fdtox %f32,%f50
fxtod %f50,%f34
fmuld %f34,%f38,%f46
fsubd %f60,%f46,%f40
ble,pn %icc,.L77000378
std %f52,[%i3]
.L77000509:
add %o3,1,%g5
sll %g5,1,%o2
or %g0,0,%l1
ldd [%i4],%f42
sub %o3,1,%o3
or %g0,0,%o5
or %g0,%i3,%l2
add %i4,8,%o1
add %i1,8,%g5
.L900000848:
fmuld %f40,%f42,%f34
ldd [%l0+8],%f32
cmp %g1,1
ldd [%i1],%f50
ldd [%l2],%f46
ldd [%l2+8],%f44
fmuld %f50,%f32,%f60
ldd [%l0],%f42
faddd %f46,%f34,%f48
faddd %f44,%f60,%f58
fmuld %f36,%f48,%f54
faddd %f58,%f54,%f34
ble,pn %icc,.L77000368
std %f34,[%l2+8]
.L77000507:
or %g0,1,%l5
or %g0,2,%l4
or %g0,%g5,%g4
cmp %l3,12
bl,pn %icc,.L77000481
or %g0,%o1,%g3
.L900000839:
prefetch [%i1+8],0
prefetch [%i1+72],0
add %i4,40,%l6
add %i1,40,%l7
prefetch [%l2+16],0
or %g0,%l2,%o7
sub %l3,7,%i5
prefetch [%l2+80],0
add %l2,80,%g2
or %g0,2,%l4
prefetch [%i1+136],0
or %g0,5,%l5
prefetch [%i1+200],0
prefetch [%l2+144],0
ldd [%i4+8],%f52
ldd [%i4+16],%f44
ldd [%i4+24],%f56
fmuld %f40,%f52,%f48
fmuld %f40,%f44,%f46
fmuld %f40,%f56,%f44
ldd [%l2+48],%f56
prefetch [%l2+208],0
prefetch [%l2+272],0
prefetch [%l2+336],0
prefetch [%l2+400],0
ldd [%i1+8],%f32
ldd [%i1+16],%f60
ldd [%i1+24],%f50
fmuld %f42,%f32,%f62
ldd [%i1+32],%f32
fmuld %f42,%f60,%f58
ldd [%l2+16],%f52
ldd [%l2+32],%f54
faddd %f62,%f48,%f60
fmuld %f42,%f50,%f48
faddd %f58,%f46,%f62
ldd [%i4+32],%f46
ldd [%l2+64],%f58
.L900000837:
prefetch [%l7+192],0
fmuld %f40,%f46,%f46
faddd %f60,%f52,%f60
ldd [%l6],%f52
std %f60,[%g2-64]
fmuld %f42,%f32,%f50
add %l5,8,%l5
ldd [%l7],%f60
faddd %f48,%f44,%f48
cmp %l5,%i5
ldd [%g2],%f32
add %g2,128,%g2
prefetch [%g2+256],0
fmuld %f40,%f52,%f52
faddd %f62,%f54,%f44
ldd [%l6+8],%f54
std %f44,[%g2-176]
fmuld %f42,%f60,%f44
add %l6,64,%l6
ldd [%l7+8],%f60
faddd %f50,%f46,%f50
add %l7,64,%l7
add %l4,16,%l4
ldd [%g2-112],%f46
fmuld %f40,%f54,%f54
faddd %f48,%f56,%f62
ldd [%l6-48],%f56
std %f62,[%g2-160]
fmuld %f42,%f60,%f48
ldd [%l7-48],%f60
faddd %f44,%f52,%f52
ldd [%g2-96],%f30
prefetch [%g2+288],0
fmuld %f40,%f56,%f56
faddd %f50,%f58,%f62
ldd [%l6-40],%f58
std %f62,[%g2-144]
fmuld %f42,%f60,%f50
ldd [%l7-40],%f62
faddd %f48,%f54,%f54
ldd [%g2-80],%f28
prefetch [%l7+160],0
fmuld %f40,%f58,%f48
faddd %f52,%f32,%f44
ldd [%l6-32],%f58
std %f44,[%g2-128]
fmuld %f42,%f62,%f44
ldd [%l7-32],%f60
faddd %f50,%f56,%f56
ldd [%g2-64],%f52
prefetch [%g2+320],0
fmuld %f40,%f58,%f50
faddd %f54,%f46,%f32
ldd [%l6-24],%f62
std %f32,[%g2-112]
fmuld %f42,%f60,%f46
ldd [%l7-24],%f60
faddd %f44,%f48,%f48
ldd [%g2-48],%f54
fmuld %f40,%f62,%f26
faddd %f56,%f30,%f32
ldd [%l6-16],%f58
std %f32,[%g2-96]
fmuld %f42,%f60,%f30
ldd [%l7-16],%f32
faddd %f46,%f50,%f60
ldd [%g2-32],%f56
prefetch [%g2+352],0
fmuld %f40,%f58,%f44
faddd %f48,%f28,%f62
ldd [%l6-8],%f46
std %f62,[%g2-80]
fmuld %f42,%f32,%f48
ldd [%l7-8],%f32
faddd %f30,%f26,%f62
ble,pt %icc,.L900000837
ldd [%g2-16],%f58
.L900000840:
fmuld %f40,%f46,%f46
faddd %f62,%f54,%f62
std %f62,[%g2-48]
cmp %l5,%l3
fmuld %f42,%f32,%f50
faddd %f48,%f44,%f48
or %g0,%l7,%g4
or %g0,%l6,%g3
faddd %f60,%f52,%f60
std %f60,[%g2-64]
or %g0,%o7,%l2
add %l4,8,%l4
faddd %f50,%f46,%f54
faddd %f48,%f56,%f56
std %f56,[%g2-32]
faddd %f54,%f58,%f58
bg,pn %icc,.L77000368
std %f58,[%g2-16]
.L77000481:
ldd [%g4],%f44
.L900000850:
ldd [%g3],%f48
fmuld %f42,%f44,%f58
sra %l4,0,%l7
add %l5,1,%l5
sllx %l7,3,%g2
add %g4,8,%g4
ldd [%l2+%g2],%f56
cmp %l5,%l3
add %l4,2,%l4
fmuld %f40,%f48,%f54
add %g3,8,%g3
faddd %f58,%f54,%f52
faddd %f52,%f56,%f62
std %f62,[%l2+%g2]
ble,a,pt %icc,.L900000850
ldd [%g4],%f44
.L77000368:
cmp %o5,15
bne,pn %icc,.L77000483
srl %l1,31,%g4
.L77000478:
add %l1,%g4,%l4
sra %l4,1,%o7
add %o7,1,%o4
sll %o4,1,%l6
cmp %l6,%o2
bge,pn %icc,.L77000392
fmovd %f0,%f42
.L77000508:
sra %l6,0,%l4
sllx %l4,3,%g2
fmovd %f0,%f32
sub %o2,1,%l5
ldd [%g2+%i3],%f40
add %g2,%i3,%g3
.L900000849:
fdtox %f40,%f10
ldd [%g3+8],%f52
add %l6,2,%l6
cmp %l6,%l5
fdtox %f52,%f2
fmovd %f10,%f30
fmovs %f0,%f10
fmovs %f0,%f2
fxtod %f10,%f10
fxtod %f2,%f2
fdtox %f52,%f28
faddd %f10,%f32,%f56
std %f56,[%g3]
faddd %f2,%f42,%f62
std %f62,[%g3+8]
fitod %f30,%f32
add %g3,16,%g3
fitod %f28,%f42
ble,a,pt %icc,.L900000849
ldd [%g3],%f40
.L77000392:
or %g0,0,%o5
.L77000483:
fdtox %f34,%f6
add %l1,1,%l1
cmp %l1,%o3
add %o5,1,%o5
add %l2,8,%l2
add %l0,8,%l0
fmovs %f0,%f6
fxtod %f6,%f46
fmuld %f46,%f14,%f56
fmuld %f56,%f36,%f44
fdtox %f44,%f48
fxtod %f48,%f58
fmuld %f58,%f38,%f54
fsubd %f56,%f54,%f40
ble,a,pt %icc,.L900000848
ldd [%i4],%f42
.L77000378:
ldx [%i3+%o0],%l1
.L900000852:
add %i3,%o0,%l4
ldx [%l4+8],%i1
cmp %l1,0
bne,pn %xcc,.L77000403
or %g0,0,%g5
.L77000402:
or %g0,0,%i3
ba .L900000847
cmp %i1,0
.L77000403:
srlx %l1,52,%o5
sethi %hi(0xfff00000),%i3
sllx %i3,32,%o2
sethi %hi(0x40000000),%o0
sllx %o0,22,%o4
or %g0,1023,%l0
xor %o2,-1,%o3
sub %l0,%o5,%o7
and %l1,%o3,%l1
add %o7,52,%i4
or %l1,%o4,%o1
cmp %i1,0
srlx %o1,%i4,%i3
.L900000847:
bne,pn %xcc,.L77000409
or %g0,0,%o7
.L77000408:
ba .L900000846
cmp %g1,0
.L77000409:
srlx %i1,52,%l2
sethi %hi(0xfff00000),%o7
sllx %o7,32,%i4
sethi %hi(0x40000000),%i5
sllx %i5,22,%l6
or %g0,1023,%l5
xor %i4,-1,%o1
sub %l5,%l2,%g2
and %i1,%o1,%l7
add %g2,52,%g3
or %l7,%l6,%g4
cmp %g1,0
srlx %g4,%g3,%o7
.L900000846:
ble,pn %icc,.L77000397
or %g0,0,%l5
.L77000510:
sethi %hi(0xfff00000),%g4
sllx %g4,32,%o0
or %g0,-1,%i5
srl %i5,0,%l7
sethi %hi(0x40000000),%i1
sllx %i1,22,%l6
sethi %hi(0xfc00),%i4
xor %o0,-1,%g2
add %i4,1023,%l2
or %g0,2,%g4
or %g0,%i2,%g3
.L77000395:
sra %g4,0,%o2
add %g4,1,%o3
sllx %o2,3,%o0
sra %o3,0,%o5
ldx [%l4+%o0],%o4
sllx %o5,3,%l0
and %i3,%l7,%o1
ldx [%l4+%l0],%i4
cmp %o4,0
bne,pn %xcc,.L77000415
and %o7,%l2,%i5
.L77000414:
or %g0,0,%l1
ba .L900000845
add %g5,%o1,%i1
.L77000415:
srlx %o4,52,%o3
and %o4,%g2,%l1
or %g0,52,%o0
sub %o3,1023,%l0
or %l1,%l6,%o4
sub %o0,%l0,%o5
srlx %o4,%o5,%l1
add %g5,%o1,%i1
.L900000845:
srax %i3,32,%g5
cmp %i4,0
bne,pn %xcc,.L77000421
sllx %i5,16,%o2
.L77000420:
or %g0,0,%o4
ba .L900000844
add %i1,%o2,%o5
.L77000421:
srlx %i4,52,%o4
or %g0,52,%o0
sub %o4,1023,%o3
and %i4,%g2,%i3
or %i3,%l6,%o5
sub %o0,%o3,%l0
srlx %o5,%l0,%o4
add %i1,%o2,%o5
.L900000844:
srax %o7,16,%i4
srax %o5,32,%i5
add %i4,%i5,%o1
add %l5,1,%l5
and %o5,%l7,%i1
add %g5,%o1,%g5
st %i1,[%g3]
or %g0,%l1,%i3
or %g0,%o4,%o7
add %g4,2,%g4
cmp %l5,%l3
ble,pt %icc,.L77000395
add %g3,4,%g3
.L77000397:
sethi %hi(0xfc00),%l4
sra %l5,0,%i5
add %l4,1023,%i1
add %g5,%i3,%l5
and %o7,%i1,%g5
sllx %g5,16,%l2
sllx %i5,2,%l7
sra %g1,0,%g2
add %l5,%l2,%l6
st %l6,[%i2+%l7]
sllx %g2,2,%g3
ld [%i2+%g3],%g4
cmp %g4,0
bgu,pn %icc,.L77000486
cmp %l3,0
.L77000427:
bl,pn %icc,.L77000486
or %g0,%l3,%i5
.L77000512:
sra %l3,0,%o5
sllx %o5,2,%l7
ld [%l7+%i0],%o5
add %l7,%i2,%o1
add %l7,%i0,%i4
.L900000843:
ld [%o1],%i1
cmp %i1,%o5
bne,pn %icc,.L77000435
sub %o1,4,%o1
.L77000431:
sub %i4,4,%i4
subcc %i5,1,%i5
bpos,a,pt %icc,.L900000843
ld [%i4],%o5
.L900000827:
ba .L900000842
cmp %g1,0
.L77000435:
sra %i5,0,%o0
sllx %o0,2,%l1
ld [%i0+%l1],%i3
ld [%i2+%l1],%l0
cmp %l0,%i3
bleu,pt %icc,.L77000379
nop
.L77000486:
cmp %g1,0
.L900000842:
ble,pn %icc,.L77000379
add %l3,1,%g3
.L77000511:
or %g0,0,%l5
cmp %g3,10
bl,pn %icc,.L77000487
or %g0,0,%g1
.L900000835:
prefetch [%i2],22
add %i0,4,%l2
prefetch [%i2+64],22
add %i2,8,%o5
sub %l3,7,%i0
prefetch [%i2+128],22
or %g0,2,%l5
prefetch [%i2+192],22
prefetch [%i2+256],22
prefetch [%i2+320],22
prefetch [%i2+384],22
ld [%l2-4],%l7
ld [%o5-4],%l6
prefetch [%o5+440],22
prefetch [%o5+504],22
ld [%i2],%i2
sub %i2,%l7,%g3
st %g3,[%o5-8]
srax %g3,32,%l7
.L900000833:
add %l5,8,%l5
add %o5,32,%o5
ld [%l2],%i5
prefetch [%o5+496],22
cmp %l5,%i0
add %l2,32,%l2
sub %l6,%i5,%g5
add %g5,%l7,%o0
ld [%o5-32],%l4
st %o0,[%o5-36]
srax %o0,32,%i3
ld [%l2-28],%i1
sub %l4,%i1,%i4
add %i4,%i3,%o1
ld [%o5-28],%o3
st %o1,[%o5-32]
srax %o1,32,%l1
ld [%l2-24],%o2
sub %o3,%o2,%g2
add %g2,%l1,%o7
ld [%o5-24],%l0
st %o7,[%o5-28]
srax %o7,32,%l6
ld [%l2-20],%o4
sub %l0,%o4,%g1
add %g1,%l6,%l7
ld [%o5-20],%i2
st %l7,[%o5-24]
srax %l7,32,%g4
ld [%l2-16],%g3
sub %i2,%g3,%i5
add %i5,%g4,%g5
ld [%o5-16],%i1
st %g5,[%o5-20]
srax %g5,32,%l4
ld [%l2-12],%o0
sub %i1,%o0,%i3
add %i3,%l4,%i4
ld [%o5-12],%o2
st %i4,[%o5-16]
srax %i4,32,%o3
ld [%l2-8],%o1
sub %o2,%o1,%l1
add %l1,%o3,%g2
ld [%o5-8],%o4
st %g2,[%o5-12]
srax %g2,32,%l0
ld [%l2-4],%o7
sub %o4,%o7,%l6
add %l6,%l0,%g1
ld [%o5-4],%l6
st %g1,[%o5-8]
ble,pt %icc,.L900000833
srax %g1,32,%l7
.L900000836:
ld [%l2],%l0
add %l2,4,%i0
or %g0,%o5,%i2
cmp %l5,%l3
sub %l6,%l0,%l6
add %l6,%l7,%g1
st %g1,[%o5-4]
bg,pn %icc,.L77000379
srax %g1,32,%g1
.L77000487:
ld [%i2],%o4
.L900000841:
ld [%i0],%i3
add %g1,%o4,%l0
add %l5,1,%l5
cmp %l5,%l3
add %i0,4,%i0
sub %l0,%i3,%l6
st %l6,[%i2]
srax %l6,32,%g1
add %i2,4,%i2
ble,a,pt %icc,.L900000841
ld [%i2],%o4
.L77000379:
ret ! Result =
restore %g0,%g0,%g0
.type mont_mulf_noconv,2
.size mont_mulf_noconv,(.-mont_mulf_noconv)
! Begin Disassembling Debug Info
.xstabs ".stab.index","V=10.0;DBG_GEN=4.14.14;cd;backend;Xa;O;R=Sun C 5.5 Patch 112760-07 2004/02/03",60,0,0,0
.xstabs ".stab.index","/workspace/ferenc/algorithms/bignum/unified/mont_mulf; /ws/onnv-tools/SUNWspro/SOS8/prod/bin/cc -D_KERNEL -DRF_INLINE_MACROS -fast -xarch=v9 -xO5 -xstrconst -xdepend -Xa -xchip=ultra3 -xcode=abs32 -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -W0,-xp -c conv_v9.il -o mont_mulf.o mont_mulf.c",52,0,0,0
! End Disassembling Debug Info
! Begin Disassembling Ident
.ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE)
.ident "@(#)mont_mulf.c\t1.2\t01/09/24 SMI" ! (/tmp/acompAAApja4Fx:8)
.ident "@(#)types.h\t1.74\t03/08/07 SMI" ! (/tmp/acompAAApja4Fx:9)
.ident "@(#)isa_defs.h\t1.20\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:10)
.ident "@(#)feature_tests.h\t1.18\t99/07/26 SMI" ! (/tmp/acompAAApja4Fx:11)
.ident "@(#)machtypes.h\t1.13\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:12)
.ident "@(#)inttypes.h\t1.2\t98/01/16 SMI" ! (/tmp/acompAAApja4Fx:13)
.ident "@(#)int_types.h\t1.6\t97/08/20 SMI" ! (/tmp/acompAAApja4Fx:14)
.ident "@(#)int_limits.h\t1.6\t99/08/06 SMI" ! (/tmp/acompAAApja4Fx:15)
.ident "@(#)int_const.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:16)
.ident "@(#)int_fmtio.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:17)
.ident "@(#)types32.h\t1.4\t98/02/13 SMI" ! (/tmp/acompAAApja4Fx:18)
.ident "@(#)select.h\t1.17\t01/08/15 SMI" ! (/tmp/acompAAApja4Fx:19)
.ident "@(#)math.h\t2.11\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:20)
.ident "@(#)math_iso.h\t1.2\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:21)
.ident "@(#)floatingpoint.h\t2.5\t99/06/22 SMI" ! (/tmp/acompAAApja4Fx:22)
.ident "@(#)stdio_tag.h\t1.3\t98/04/20 SMI" ! (/tmp/acompAAApja4Fx:23)
.ident "@(#)ieeefp.h\t2.8 99/10/29" ! (/tmp/acompAAApja4Fx:24)
.ident "acomp: Sun C 5.5 Patch 112760-07 2004/02/03" ! (/tmp/acompAAApja4Fx:57)
.ident "iropt: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (/tmp/acompAAApja4Fx:58)
.ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE)
! End Disassembling Ident
#define FZERO \
fzero %f0 ;\
fzero %f2 ;\
faddd %f0, %f2, %f4 ;\
fmuld %f0, %f2, %f6 ;\
faddd %f0, %f2, %f8 ;\
fmuld %f0, %f2, %f10 ;\
faddd %f0, %f2, %f12 ;\
fmuld %f0, %f2, %f14 ;\
faddd %f0, %f2, %f16 ;\
fmuld %f0, %f2, %f18 ;\
faddd %f0, %f2, %f20 ;\
fmuld %f0, %f2, %f22 ;\
faddd %f0, %f2, %f24 ;\
fmuld %f0, %f2, %f26 ;\
faddd %f0, %f2, %f28 ;\
fmuld %f0, %f2, %f30 ;\
faddd %f0, %f2, %f32 ;\
fmuld %f0, %f2, %f34 ;\
faddd %f0, %f2, %f36 ;\
fmuld %f0, %f2, %f38 ;\
faddd %f0, %f2, %f40 ;\
fmuld %f0, %f2, %f42 ;\
faddd %f0, %f2, %f44 ;\
fmuld %f0, %f2, %f46 ;\
faddd %f0, %f2, %f48 ;\
fmuld %f0, %f2, %f50 ;\
faddd %f0, %f2, %f52 ;\
fmuld %f0, %f2, %f54 ;\
faddd %f0, %f2, %f56 ;\
fmuld %f0, %f2, %f58 ;\
faddd %f0, %f2, %f60 ;\
fmuld %f0, %f2, %f62
#include "assym.h"
ENTRY(big_savefp)
rd %fprs, %o2
st %o2, [%o0 + FPU_FPRS]
andcc %o2, FPRS_FEF, %g0 ! is FPRS_FEF set?
bnz,a,pt %icc, .fregs_save ! yes, go to save
nop
wr %g0, FPRS_FEF, %fprs ! else, set the bit
stx %fsr, [%o0 + FPU_FSR] ! store %fsr
retl
nop
.fregs_save:
BSTORE_FPREGS(%o0, %o4)
stx %fsr, [%o0 + FPU_FSR] ! store %fsr
retl
nop
SET_SIZE(big_savefp)
ENTRY(big_restorefp)
ldx [%o0 + FPU_FSR], %fsr ! restore %fsr
ld [%o0 + FPU_FPRS], %o1
andcc %o1, FPRS_FEF, %g0 ! is FPRS_FEF set in saved %fprs?
bnz,pt %icc, .fregs_restore ! yes, go to restore
nop
FZERO ! zero out to avoid leaks
wr %g0, 0, %fprs
retl
nop
.fregs_restore:
BLOAD_FPREGS(%o0, %o2)
wr %o1, 0, %fprs
retl
nop
SET_SIZE(big_restorefp)