#pragma ident "%Z%%M% %I% %E% SMI"
.section ".text",#alloc,#execinstr
.file "mont_mulf_asm_v8plus.s"
.section ".rodata",#alloc
.align 8
!
! CONSTANT POOL
!
TwoTo16:
.word 1089470464
.word 0
.type TwoTo16,#object
.size TwoTo16,8
!
! CONSTANT POOL
!
TwoToMinus16:
.word 1055916032
.word 0
.type TwoToMinus16,#object
.size TwoToMinus16,8
!
! CONSTANT POOL
!
Zero:
.word 0
.word 0
.type Zero,#object
.size Zero,8
!
! CONSTANT POOL
!
TwoTo32:
.word 1106247680
.word 0
.type TwoTo32,#object
.size TwoTo32,8
!
! CONSTANT POOL
!
TwoToMinus32:
.word 1039138816
.word 0
.type TwoToMinus32,#object
.size TwoToMinus32,8
.section ".text",#alloc,#execinstr
.align 4
!
! SUBROUTINE conv_d16_to_i32
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_d16_to_i32
conv_d16_to_i32:
save %sp,-128,%sp
! FILE mont_mulf.c
! 1 !#define RF_INLINE_MACROS
! 3 !static const double TwoTo16=65536.0;
! 4 !static const double TwoToMinus16=1.0/65536.0;
! 5 !static const double Zero=0.0;
! 6 !static const double TwoTo32=65536.0*65536.0;
! 7 !static const double TwoToMinus32=1.0/(65536.0*65536.0);
! 9 !#ifdef RF_INLINE_MACROS
! 11 !double upper32(double);
! 12 !double lower32(double, double);
! 13 !double mod(double, double, double);
! 15 !#else
! 17 !static double upper32(double x)
! 18 !{
! 19 ! return floor(x*TwoToMinus32);
! 20 !}
! 22 !static double lower32(double x, double y)
! 23 !{
! 24 ! return x-TwoTo32*floor(x*TwoToMinus32);
! 25 !}
! 27 !static double mod(double x, double oneoverm, double m)
! 28 !{
! 29 ! return x-m*floor(x*oneoverm);
! 30 !}
! 32 !#endif
! 35 !static void cleanup(double *dt, int from, int tlen)
! 36 !{
! 37 ! int i;
! 38 ! double tmp,tmp1,x,x1;
! 40 ! tmp=tmp1=Zero;
! 41 !
! 52 !
! 53 ! for(i=2*from;i<2*tlen;i+=2)
! 54 ! {
! 55 ! x=dt[i];
! 56 ! x1=dt[i+1];
! 57 ! dt[i]=lower32(x,Zero)+tmp;
! 58 ! dt[i+1]=lower32(x1,Zero)+tmp1;
! 59 ! tmp=upper32(x);
! 60 ! tmp1=upper32(x1);
! 61 ! }
! 62 !
! 63 !}
! 66 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
! 67 !{
! 68 !int i;
! 69 !long long t, t1, a, b, c, d;
! 71 ! t1=0;
! 72 ! a=(long long)d16[0];
ldd [%i1],%f0
or %g0,%i1,%o0
! 73 ! b=(long long)d16[1];
! 74 ! for(i=0; i<ilen-1; i++)
sub %i3,1,%g2
cmp %g2,0
or %g0,0,%o4
fdtox %f0,%f0
std %f0,[%sp+120]
or %g0,0,%o7
or %g0,%i3,%o1
sub %i3,2,%o2
ldd [%o0+8],%f0
sethi %hi(0xfc00),%o1
add %o2,1,%g3
add %o1,1023,%o1
or %g0,%i0,%o5
fdtox %f0,%f0
std %f0,[%sp+112]
ldx [%sp+112],%g1
ldx [%sp+120],%g4
ble,pt %icc,.L900000117
sethi %hi(0xfc00),%g2
or %g0,-1,%g2
cmp %g3,3
srl %g2,0,%o3
bl,pn %icc,.L77000134
or %g0,%o0,%g2
! 75 ! {
! 76 ! c=(long long)d16[2*i+2];
ldd [%o0+16],%f0
! 77 ! t1+=a&0xffffffff;
! 78 ! t=(a>>32);
! 79 ! d=(long long)d16[2*i+3];
! 80 ! t1+=(b&0xffff)<<16;
! 81 ! t+=(b>>16)+(t1>>32);
! 82 ! i32[i]=t1&0xffffffff;
! 83 ! t1=t;
! 84 ! a=c;
! 85 ! b=d;
add %o0,16,%g2
and %g1,%o1,%o0
sllx %o0,16,%g3
and %g4,%o3,%o0
add %o0,%g3,%o4
fdtox %f0,%f0
std %f0,[%sp+104]
and %o4,%o3,%g5
ldd [%g2+8],%f2
add %o5,4,%o5
srax %o4,32,%o4
stx %o4,[%sp+112]
fdtox %f2,%f0
std %f0,[%sp+96]
srax %g1,16,%o0
ldx [%sp+112],%o7
srax %g4,32,%o4
add %o0,%o7,%g4
or %g0,1,%o7
ldx [%sp+104],%g3
add %o4,%g4,%o4
ldx [%sp+96],%g1
st %g5,[%o5-4]
or %g0,%g3,%g4
.L900000112:
ldd [%g2+16],%f0
add %o7,1,%o7
add %o5,4,%o5
cmp %o7,%o2
add %g2,16,%g2
fdtox %f0,%f0
std %f0,[%sp+104]
ldd [%g2+8],%f0
fdtox %f0,%f0
std %f0,[%sp+96]
and %g1,%o1,%g3
sllx %g3,16,%g5
and %g4,%o3,%g3
add %g3,%g5,%g3
srax %g1,16,%g1
add %g3,%o4,%g3
srax %g3,32,%o4
stx %o4,[%sp+112]
ldx [%sp+104],%g5
srax %g4,32,%o4
ldx [%sp+112],%g4
add %g1,%g4,%g4
ldx [%sp+96],%g1
add %o4,%g4,%o4
and %g3,%o3,%g3
or %g0,%g5,%g4
ble,pt %icc,.L900000112
st %g3,[%o5-4]
.L900000115:
ba .L900000117
sethi %hi(0xfc00),%g2
.L77000134:
ldd [%g2+16],%f0
.L900000116:
and %g4,%o3,%o0
and %g1,%o1,%g3
fdtox %f0,%f0
add %o4,%o0,%o0
std %f0,[%sp+104]
add %o7,1,%o7
sllx %g3,16,%o4
ldd [%g2+24],%f2
add %g2,16,%g2
add %o0,%o4,%o0
cmp %o7,%o2
and %o0,%o3,%g3
fdtox %f2,%f0
std %f0,[%sp+96]
srax %o0,32,%o0
stx %o0,[%sp+112]
srax %g4,32,%o4
ldx [%sp+96],%o0
srax %g1,16,%g5
ldx [%sp+112],%g4
ldx [%sp+104],%g1
st %g3,[%o5]
add %g5,%g4,%g4
add %o5,4,%o5
add %o4,%g4,%o4
or %g0,%g1,%g4
or %g0,%o0,%g1
ble,a,pt %icc,.L900000116
ldd [%g2+16],%f0
.L77000127:
! 86 ! }
! 87 ! t1+=a&0xffffffff;
! 88 ! t=(a>>32);
! 89 ! t1+=(b&0xffff)<<16;
! 90 ! i32[i]=t1&0xffffffff;
sethi %hi(0xfc00),%g2
.L900000117:
or %g0,-1,%g3
add %g2,1023,%g2
srl %g3,0,%g3
and %g1,%g2,%g2
and %g4,%g3,%g4
sllx %g2,16,%g2
add %o4,%g4,%g4
add %g4,%g2,%g2
sll %o7,2,%g4
and %g2,%g3,%g2
st %g2,[%i0+%g4]
ret ! Result =
restore %g0,%g0,%g0
.type conv_d16_to_i32,2
.size conv_d16_to_i32,(.-conv_d16_to_i32)
.section ".text",#alloc,#execinstr
.align 8
!
! CONSTANT POOL
!
.L_const_seg_900000201:
.word 1127219200,0
.align 4
.skip 16
!
! SUBROUTINE conv_i32_to_d32
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_i32_to_d32
conv_i32_to_d32:
or %g0,%o7,%g2
or %g0,%o1,%g4
.L900000210:
call .+8
sethi %hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g3
! 92 !}
! 94 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
! 95 !{
! 96 !int i;
! 98 !#pragma pipeloop(0)
! 99 ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
or %g0,0,%o5
add %g3,%lo(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g3
or %g0,%o0,%g5
add %g3,%o7,%g1
orcc %g0,%o2,%g3
ble,pt %icc,.L77000140
or %g0,%g2,%o7
sethi %hi(.L_const_seg_900000201),%g2
add %g2,%lo(.L_const_seg_900000201),%g2
sub %o2,1,%g3
ld [%g1+%g2],%g2
cmp %o2,9
bl,pn %icc,.L77000144
ldd [%g2],%f8
add %o1,16,%g4
sub %o2,5,%g1
ld [%o1],%f7
or %g0,4,%o5
ld [%o1+4],%f5
ld [%o1+8],%f3
fmovs %f8,%f6
ld [%o1+12],%f1
.L900000205:
ld [%g4],%f11
add %o5,5,%o5
add %g4,20,%g4
fsubd %f6,%f8,%f6
std %f6,[%g5]
cmp %o5,%g1
add %g5,40,%g5
fmovs %f8,%f4
ld [%g4-16],%f7
fsubd %f4,%f8,%f12
fmovs %f8,%f2
std %f12,[%g5-32]
ld [%g4-12],%f5
fsubd %f2,%f8,%f12
fmovs %f8,%f0
std %f12,[%g5-24]
ld [%g4-8],%f3
fsubd %f0,%f8,%f12
fmovs %f8,%f10
std %f12,[%g5-16]
ld [%g4-4],%f1
fsubd %f10,%f8,%f10
fmovs %f8,%f6
ble,pt %icc,.L900000205
std %f10,[%g5-8]
.L900000208:
fmovs %f8,%f4
add %g5,32,%g5
cmp %o5,%g3
fmovs %f8,%f2
fmovs %f8,%f0
fsubd %f6,%f8,%f6
std %f6,[%g5-32]
fsubd %f4,%f8,%f4
std %f4,[%g5-24]
fsubd %f2,%f8,%f2
std %f2,[%g5-16]
fsubd %f0,%f8,%f0
bg,pn %icc,.L77000140
std %f0,[%g5-8]
.L77000144:
ld [%g4],%f1
.L900000211:
ldd [%g2],%f8
add %o5,1,%o5
add %g4,4,%g4
cmp %o5,%g3
fmovs %f8,%f0
fsubd %f0,%f8,%f0
std %f0,[%g5]
add %g5,8,%g5
ble,a,pt %icc,.L900000211
ld [%g4],%f1
.L77000140:
retl ! Result =
nop
.type conv_i32_to_d32,2
.size conv_i32_to_d32,(.-conv_i32_to_d32)
.section ".text",#alloc,#execinstr
.align 8
!
! CONSTANT POOL
!
.L_const_seg_900000301:
.word 1127219200,0
.align 4
.skip 16
!
! SUBROUTINE conv_i32_to_d16
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_i32_to_d16
conv_i32_to_d16:
save %sp,-104,%sp
.L900000310:
call .+8
sethi %hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
orcc %g0,%i2,%o0
add %g3,%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
! 100 !}
! 103 !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
! 104 !{
! 105 !int i;
! 106 !unsigned int a;
! 108 !#pragma pipeloop(0)
! 109 ! for(i=0;i<len;i++)
ble,pt %icc,.L77000150
add %g3,%o7,%o2
! 110 ! {
! 111 ! a=i32[i];
! 112 ! d16[2*i]=(double)(a&0xffff);
! 113 ! d16[2*i+1]=(double)(a>>16);
sethi %hi(.L_const_seg_900000301),%g2
sub %o0,1,%o5
add %g2,%lo(.L_const_seg_900000301),%o1
ld [%o2+%o1],%o3
sethi %hi(0xfc00),%o0
add %o5,1,%g2
or %g0,0,%g1
cmp %g2,3
ldd [%o3],%f0
or %g0,%i1,%o7
add %o0,1023,%o4
or %g0,%i0,%g3
bl,pn %icc,.L77000154
add %o7,4,%o0
ld [%o0-4],%o1
or %g0,%o0,%o7
or %g0,1,%g1
and %o1,%o4,%o0
.L900000306:
st %o0,[%sp+96]
add %g1,1,%g1
add %g3,16,%g3
cmp %g1,%o5
add %o7,4,%o7
ld [%sp+96],%f3
fmovs %f0,%f2
fsubd %f2,%f0,%f2
srl %o1,16,%o0
std %f2,[%g3-16]
st %o0,[%sp+92]
ld [%sp+92],%f3
ld [%o7-4],%o1
fmovs %f0,%f2
fsubd %f2,%f0,%f2
and %o1,%o4,%o0
ble,pt %icc,.L900000306
std %f2,[%g3-8]
.L900000309:
st %o0,[%sp+96]
fmovs %f0,%f2
add %g3,16,%g3
srl %o1,16,%o0
ld [%sp+96],%f3
fsubd %f2,%f0,%f2
std %f2,[%g3-16]
st %o0,[%sp+92]
fmovs %f0,%f2
ld [%sp+92],%f3
fsubd %f2,%f0,%f0
std %f0,[%g3-8]
ret ! Result =
restore %g0,%g0,%g0
.L77000154:
ld [%o7],%o0
.L900000311:
and %o0,%o4,%o1
st %o1,[%sp+96]
add %g1,1,%g1
ldd [%o3],%f0
srl %o0,16,%o0
add %o7,4,%o7
cmp %g1,%o5
fmovs %f0,%f2
ld [%sp+96],%f3
fsubd %f2,%f0,%f2
std %f2,[%g3]
st %o0,[%sp+92]
fmovs %f0,%f2
ld [%sp+92],%f3
fsubd %f2,%f0,%f0
std %f0,[%g3+8]
add %g3,16,%g3
ble,a,pt %icc,.L900000311
ld [%o7],%o0
.L77000150:
ret ! Result =
restore %g0,%g0,%g0
.type conv_i32_to_d16,2
.size conv_i32_to_d16,(.-conv_i32_to_d16)
.section ".text",#alloc,#execinstr
.align 8
!
! CONSTANT POOL
!
.L_const_seg_900000401:
.word 1127219200,0
.align 4
.skip 16
!
! SUBROUTINE conv_i32_to_d32_and_d16
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_i32_to_d32_and_d16
conv_i32_to_d32_and_d16:
save %sp,-104,%sp
.L900000413:
call .+8
sethi %hi(_GLOBAL_OFFSET_TABLE_-(.L900000413-.)),%g4
! 114 ! }
! 115 !}
! 118 !void i16_to_d16_and_d32x4(const double * ,
! 119 ! const double * , const double * ,
! 120 ! double * , double * ,
! 121 ! float *
! 122 ! unsigned int* converted to float* */);
! 126 !void conv_i32_to_d32_and_d16(double *d32, double *d16,
! 127 ! unsigned int *i32, int len)
! 128 !{
! 129 !int i;
! 130 !unsigned int a;
! 132 !#pragma pipeloop(0)
! 133 ! for(i=0;i<len-3;i+=4)
sub %i3,3,%g2
cmp %g2,0
add %g4,%lo(_GLOBAL_OFFSET_TABLE_-(.L900000413-.)),%g4
or %g0,%i2,%g5
! 134 ! {
! 135 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
! 136 ! &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
sethi %hi(Zero),%g2
add %g4,%o7,%o2
or %g0,0,%g1
or %g0,%i0,%i4
add %g2,%lo(Zero),%g2
ble,pt %icc,.L900000416
cmp %g1,%i3
or %g0,%g5,%o4
ld [%o2+%g2],%o1
sub %i3,4,%o3
or %g0,0,%o7
or %g0,0,%o5
or %g0,%o4,%g4
.L900000415:
ldd [%o1],%f2
add %i4,%o7,%g2
add %i1,%o5,%g3
ldd [%o1-8],%f0
add %g1,4,%g1
add %o4,16,%o4
fmovd %f2,%f14
ld [%g4],%f15
cmp %g1,%o3
fmovd %f2,%f10
ld [%g4+4],%f11
fmovd %f2,%f6
ld [%g4+8],%f7
ld [%g4+12],%f3
fxtod %f14,%f14
fxtod %f10,%f10
ldd [%o1-16],%f16
fxtod %f6,%f6
std %f14,[%i4+%o7]
add %o7,32,%o7
fxtod %f2,%f2
fmuld %f0,%f14,%f12
std %f10,[%g2+8]
fmuld %f0,%f10,%f8
std %f6,[%g2+16]
fmuld %f0,%f6,%f4
std %f2,[%g2+24]
fmuld %f0,%f2,%f0
fdtox %f12,%f12
fdtox %f8,%f8
fdtox %f4,%f4
fdtox %f0,%f0
fxtod %f12,%f12
std %f12,[%g3+8]
fxtod %f8,%f8
std %f8,[%g3+24]
fxtod %f4,%f4
std %f4,[%g3+40]
fxtod %f0,%f0
fmuld %f12,%f16,%f12
std %f0,[%g3+56]
fmuld %f8,%f16,%f8
fmuld %f4,%f16,%f4
fmuld %f0,%f16,%f0
fsubd %f14,%f12,%f12
std %f12,[%i1+%o5]
fsubd %f10,%f8,%f8
std %f8,[%g3+16]
add %o5,64,%o5
fsubd %f6,%f4,%f4
std %f4,[%g3+32]
fsubd %f2,%f0,%f0
std %f0,[%g3+48]
ble,pt %icc,.L900000415
or %g0,%o4,%g4
.L77000159:
! 137 ! }
! 138 ! for(;i<len;i++)
cmp %g1,%i3
.L900000416:
bge,pt %icc,.L77000164
nop
! 139 ! {
! 140 ! a=i32[i];
! 141 ! d32[i]=(double)(i32[i]);
! 142 ! d16[2*i]=(double)(a&0xffff);
! 143 ! d16[2*i+1]=(double)(a>>16);
sethi %hi(.L_const_seg_900000401),%g2
add %g2,%lo(.L_const_seg_900000401),%o1
sethi %hi(0xfc00),%o0
ld [%o2+%o1],%o2
sll %g1,2,%o3
sub %i3,%g1,%g3
sll %g1,3,%g2
add %o0,1023,%o4
ldd [%o2],%f0
add %g5,%o3,%o0
cmp %g3,3
add %i4,%g2,%o3
sub %i3,1,%o1
sll %g1,4,%g4
bl,pn %icc,.L77000161
add %i1,%g4,%o5
ld [%o0],%f3
add %o3,8,%o3
ld [%o0],%o7
add %o5,16,%o5
add %g1,1,%g1
fmovs %f0,%f2
add %o0,4,%o0
and %o7,%o4,%g2
fsubd %f2,%f0,%f2
std %f2,[%o3-8]
srl %o7,16,%o7
st %g2,[%sp+96]
fmovs %f0,%f2
ld [%sp+96],%f3
fsubd %f2,%f0,%f2
std %f2,[%o5-16]
st %o7,[%sp+92]
fmovs %f0,%f2
ld [%sp+92],%f3
fsubd %f2,%f0,%f2
std %f2,[%o5-8]
.L900000409:
ld [%o0],%f3
add %g1,2,%g1
add %o5,32,%o5
ld [%o0],%o7
cmp %g1,%o1
add %o3,16,%o3
fmovs %f0,%f2
fsubd %f2,%f0,%f2
std %f2,[%o3-16]
and %o7,%o4,%g2
st %g2,[%sp+96]
ld [%sp+96],%f3
fmovs %f0,%f2
fsubd %f2,%f0,%f2
srl %o7,16,%o7
std %f2,[%o5-32]
st %o7,[%sp+92]
ld [%sp+92],%f3
fmovs %f0,%f2
fsubd %f2,%f0,%f2
std %f2,[%o5-24]
add %o0,4,%o0
ld [%o0],%f3
ld [%o0],%o7
fmovs %f0,%f2
fsubd %f2,%f0,%f2
std %f2,[%o3-8]
and %o7,%o4,%g2
st %g2,[%sp+96]
ld [%sp+96],%f3
fmovs %f0,%f2
fsubd %f2,%f0,%f2
srl %o7,16,%o7
std %f2,[%o5-16]
st %o7,[%sp+92]
ld [%sp+92],%f3
fmovs %f0,%f2
fsubd %f2,%f0,%f2
std %f2,[%o5-8]
bl,pt %icc,.L900000409
add %o0,4,%o0
.L900000412:
cmp %g1,%i3
bge,pn %icc,.L77000164
nop
.L77000161:
ld [%o0],%f3
.L900000414:
ldd [%o2],%f0
add %g1,1,%g1
ld [%o0],%o1
add %o0,4,%o0
cmp %g1,%i3
fmovs %f0,%f2
and %o1,%o4,%o7
fsubd %f2,%f0,%f2
std %f2,[%o3]
srl %o1,16,%o1
st %o7,[%sp+96]
add %o3,8,%o3
fmovs %f0,%f2
ld [%sp+96],%f3
fsubd %f2,%f0,%f2
std %f2,[%o5]
st %o1,[%sp+92]
fmovs %f0,%f2
ld [%sp+92],%f3
fsubd %f2,%f0,%f0
std %f0,[%o5+8]
add %o5,16,%o5
bl,a,pt %icc,.L900000414
ld [%o0],%f3
.L77000164:
ret ! Result =
restore %g0,%g0,%g0
.type conv_i32_to_d32_and_d16,2
.size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
.section ".text",#alloc,#execinstr
.align 4
!
! SUBROUTINE adjust_montf_result
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global adjust_montf_result
adjust_montf_result:
! 144 ! }
! 145 !}
! 148 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
! 149 !{
! 150 !long long acc;
! 151 !int i;
! 153 ! if(i32[len]>0) i=-1;
sll %o2,2,%g1
or %g0,-1,%g3
ld [%o0+%g1],%g1
cmp %g1,0
bleu,pn %icc,.L77000175
or %g0,%o1,%o3
ba .L900000511
cmp %g3,0
.L77000175:
! 154 ! else
! 155 ! {
! 156 ! for(i=len-1; i>=0; i--)
subcc %o2,1,%g3
bneg,pt %icc,.L900000511
cmp %g3,0
sll %g3,2,%g1
add %o0,%g1,%g2
add %o1,%g1,%g1
! 157 ! {
! 158 ! if(i32[i]!=nint[i]) break;
ld [%g1],%g5
.L900000510:
ld [%g2],%o5
sub %g1,4,%g1
sub %g2,4,%g2
cmp %o5,%g5
bne,pn %icc,.L77000182
nop
subcc %g3,1,%g3
bpos,a,pt %icc,.L900000510
ld [%g1],%g5
.L77000182:
! 159 ! }
! 160 ! }
! 161 ! if((i<0)||(i32[i]>nint[i]))
cmp %g3,0
.L900000511:
bl,pn %icc,.L77000198
sll %g3,2,%g2
ld [%o1+%g2],%g1
ld [%o0+%g2],%g2
cmp %g2,%g1
bleu,pt %icc,.L77000191
nop
.L77000198:
! 162 ! {
! 163 ! acc=0;
! 164 ! for(i=0;i<len;i++)
cmp %o2,0
ble,pt %icc,.L77000191
nop
or %g0,-1,%g2
sub %o2,1,%g4
srl %g2,0,%g3
or %g0,0,%g5
or %g0,0,%o5
or %g0,%o0,%o4
cmp %o2,3
add %o1,4,%g2
bl,pn %icc,.L77000199
add %o0,8,%g1
! 165 ! {
! 166 ! acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
ld [%o0],%o2
or %g0,%g2,%o3
ld [%o1],%o1
or %g0,%g1,%o4
! 167 ! i32[i]=acc&0xffffffff;
! 168 ! acc=acc>>32;
or %g0,2,%o5
ld [%o0+4],%g1
sub %o2,%o1,%o2
or %g0,%o2,%g5
and %o2,%g3,%o2
st %o2,[%o0]
srax %g5,32,%g5
.L900000505:
ld [%o3],%o2
add %o5,1,%o5
add %o3,4,%o3
cmp %o5,%g4
add %o4,4,%o4
sub %g1,%o2,%g1
add %g1,%g5,%g5
and %g5,%g3,%o2
ld [%o4-4],%g1
st %o2,[%o4-8]
ble,pt %icc,.L900000505
srax %g5,32,%g5
.L900000508:
ld [%o3],%g2
sub %g1,%g2,%g1
add %g1,%g5,%g1
and %g1,%g3,%g2
retl ! Result =
st %g2,[%o4-4]
.L77000199:
ld [%o4],%g1
.L900000509:
ld [%o3],%g2
add %g5,%g1,%g1
add %o5,1,%o5
add %o3,4,%o3
cmp %o5,%g4
sub %g1,%g2,%g1
and %g1,%g3,%g2
st %g2,[%o4]
add %o4,4,%o4
srax %g1,32,%g5
ble,a,pt %icc,.L900000509
ld [%o4],%g1
.L77000191:
retl ! Result =
nop
.type adjust_montf_result,2
.size adjust_montf_result,(.-adjust_montf_result)
.section ".text",#alloc,#execinstr
.align 4
.skip 16
!
! SUBROUTINE mont_mulf_noconv
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global mont_mulf_noconv
mont_mulf_noconv:
save %sp,-144,%sp
.L900000644:
call .+8
sethi %hi(_GLOBAL_OFFSET_TABLE_-(.L900000644-.)),%g4
! 169 ! }
! 170 ! }
! 171 !}
! 175 !void cleanup(double *dt, int from, int tlen);
! 177 !
! 183 !void mont_mulf_noconv(unsigned int *result,
! 184 ! double *dm1, double *dm2, double *dt,
! 185 ! double *dn, unsigned int *nint,
! 186 ! int nlen, double dn0)
! 187 !{
! 188 ! int i, j, jj;
! 189 ! int tmp;
! 190 ! double digit, m2j, nextm2j, a, b;
! 191 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
! 193 ! pdm1=&(dm1[0]);
! 194 ! pdm2=&(dm2[0]);
! 195 ! pdn=&(dn[0]);
! 196 ! pdm2[2*nlen]=Zero;
sethi %hi(Zero),%g2
ld [%fp+92],%o0
add %g4,%lo(_GLOBAL_OFFSET_TABLE_-(.L900000644-.)),%g4
add %g2,%lo(Zero),%g2
ldd [%fp+96],%f2
add %g4,%o7,%o3
st %i0,[%fp+68]
or %g0,%i3,%o1
ld [%o3+%g2],%g3
sll %o0,4,%g2
or %g0,%i1,%g4
fmovd %f2,%f16
st %i5,[%fp+88]
or %g0,%o1,%g5
or %g0,%i2,%o2
ldd [%g3],%f0
or %g0,%o0,%g1
! 198 ! if (nlen!=16)
cmp %o0,16
be,pn %icc,.L77000289
std %f0,[%o2+%g2]
! 199 ! {
! 200 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
sll %o0,2,%g2
or %g0,%i4,%i0
sll %o0,1,%o7
add %g2,2,%o2
cmp %o2,0
or %g0,%i2,%i1
ble,a,pt %icc,.L900000658
ldd [%g4],%f0
! 202 ! a=dt[0]=pdm1[0]*pdm2[0];
! 203 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
! 205 ! pdtj=&(dt[0]);
! 206 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
! 207 ! {
! 208 ! m2j=pdm2[j];
! 209 ! a=pdtj[0]+pdn[0]*digit;
! 210 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
! 211 ! pdtj[1]=b;
! 213 !#pragma pipeloop(0)
! 214 ! for(i=1;i<nlen;i++)
! 215 ! {
! 216 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
! 217 ! }
! 218 ! if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
! 219 !
! 220 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
! 221 ! }
! 222 ! }
! 223 ! else
! 224 ! {
! 225 ! a=dt[0]=pdm1[0]*pdm2[0];
! 227 ! dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
! 228 ! dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
! 229 ! dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
! 230 ! dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
! 231 ! dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
! 232 ! dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
! 233 ! dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
! 234 ! dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
! 235 ! dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
! 236 ! dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
! 237 ! dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
! 239 ! pdn_0=pdn[0];
! 240 ! pdm1_0=pdm1[0];
! 242 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
! 243 ! pdtj=&(dt[0]);
! 245 ! for(j=0;j<32;j++,pdtj++)
! 246 ! {
! 248 ! m2j=pdm2[j];
! 249 ! a=pdtj[0]+pdn_0*digit;
! 250 ! b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
! 251 ! pdtj[1]=b;
! 253 !
! 259 ! pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
! 260 ! pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
! 261 ! pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
! 262 ! pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
! 263 ! pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
! 264 ! pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
! 265 ! pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
! 266 ! pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
! 267 ! pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
! 268 ! pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
! 269 ! pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
! 270 ! pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
! 271 ! pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
! 272 ! pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
! 273 ! pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
! 274 !
! 275 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
! 276 ! }
! 277 ! }
! 279 ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
! 281 ! adjust_montf_result(result,nint,nlen);
add %g2,2,%o0
add %g2,1,%o2
cmp %o0,3
bl,pn %icc,.L77000279
or %g0,1,%o0
add %o1,8,%o1
or %g0,1,%o3
std %f0,[%g5]
.L900000628:
std %f0,[%o1]
add %o3,2,%o3
add %o1,16,%o1
cmp %o3,%g2
ble,pt %icc,.L900000628
std %f0,[%o1-8]
.L900000631:
cmp %o3,%o2
bg,pn %icc,.L77000284
add %o3,1,%o0
.L77000279:
std %f0,[%o1]
.L900000657:
ldd [%g3],%f0
cmp %o0,%o2
add %o1,8,%o1
add %o0,1,%o0
ble,a,pt %icc,.L900000657
std %f0,[%o1]
.L77000284:
ldd [%g4],%f0
.L900000658:
ldd [%i2],%f2
add %o7,1,%o2
cmp %o7,0
sll %o2,1,%o0
sub %o7,1,%o1
fmuld %f0,%f2,%f0
std %f0,[%g5]
sub %g1,1,%o7
ldd [%g3],%f6
or %g0,%o7,%i2
or %g0,0,%l0
ldd [%g3-8],%f2
or %g0,0,%i5
or %g0,%o1,%o5
fdtox %f0,%f0
ldd [%g3-16],%f4
or %g0,%o0,%o3
add %i1,8,%o4
or %g0,0,%i4
fmovs %f6,%f0
fxtod %f0,%f0
fmuld %f0,%f16,%f0
fmuld %f0,%f2,%f2
fdtox %f2,%f2
fxtod %f2,%f2
fmuld %f2,%f4,%f2
fsubd %f0,%f2,%f22
ble,pt %icc,.L900000651
sll %g1,4,%g2
ldd [%i0],%f0
.L900000652:
fmuld %f0,%f22,%f8
ldd [%g4],%f0
cmp %g1,1
ldd [%o4+%i4],%f6
add %g4,8,%o0
or %g0,1,%o1
ldd [%i3],%f2
add %i3,16,%l1
fmuld %f0,%f6,%f6
ldd [%g3-8],%f4
faddd %f2,%f8,%f2
ldd [%i3+8],%f0
ldd [%i1+%i4],%f20
faddd %f0,%f6,%f0
fmuld %f2,%f4,%f2
faddd %f0,%f2,%f18
std %f18,[%i3+8]
ble,pt %icc,.L900000656
srl %i5,31,%g2
cmp %i2,7
add %i0,8,%g2
bl,pn %icc,.L77000281
add %g2,24,%o2
ldd [%g4+8],%f2
add %g4,40,%o0
ldd [%g4+16],%f6
or %g0,%o2,%g2
add %i3,48,%l1
ldd [%g2-24],%f0
fmuld %f2,%f20,%f2
sub %i2,2,%o2
ldd [%g2-16],%f8
fmuld %f6,%f20,%f10
or %g0,5,%o1
ldd [%g4+24],%f14
fmuld %f0,%f22,%f4
ldd [%i3+16],%f0
ldd [%g2-8],%f6
ldd [%g4+32],%f12
faddd %f2,%f4,%f4
ldd [%i3+32],%f2
.L900000640:
ldd [%g2],%f24
add %o1,3,%o1
add %g2,24,%g2
fmuld %f8,%f22,%f8
ldd [%l1],%f28
cmp %o1,%o2
add %o0,24,%o0
ldd [%o0-24],%f26
faddd %f0,%f4,%f0
add %l1,48,%l1
faddd %f10,%f8,%f10
fmuld %f14,%f20,%f4
std %f0,[%l1-80]
ldd [%g2-16],%f8
fmuld %f6,%f22,%f6
ldd [%l1-32],%f0
ldd [%o0-16],%f14
faddd %f2,%f10,%f2
faddd %f4,%f6,%f10
fmuld %f12,%f20,%f4
std %f2,[%l1-64]
ldd [%g2-8],%f6
fmuld %f24,%f22,%f24
ldd [%l1-16],%f2
ldd [%o0-8],%f12
faddd %f28,%f10,%f10
std %f10,[%l1-48]
fmuld %f26,%f20,%f10
ble,pt %icc,.L900000640
faddd %f4,%f24,%f4
.L900000643:
fmuld %f8,%f22,%f28
ldd [%g2],%f24
faddd %f0,%f4,%f26
fmuld %f12,%f20,%f8
add %l1,32,%l1
cmp %o1,%i2
fmuld %f14,%f20,%f14
ldd [%l1-32],%f4
add %g2,8,%g2
faddd %f10,%f28,%f12
fmuld %f6,%f22,%f6
ldd [%l1-16],%f0
fmuld %f24,%f22,%f10
std %f26,[%l1-64]
faddd %f2,%f12,%f2
std %f2,[%l1-48]
faddd %f14,%f6,%f6
faddd %f8,%f10,%f2
faddd %f4,%f6,%f4
std %f4,[%l1-32]
faddd %f0,%f2,%f0
bg,pn %icc,.L77000213
std %f0,[%l1-16]
.L77000281:
ldd [%o0],%f0
.L900000655:
ldd [%g2],%f4
fmuld %f0,%f20,%f2
add %o1,1,%o1
ldd [%l1],%f0
add %o0,8,%o0
add %g2,8,%g2
fmuld %f4,%f22,%f4
cmp %o1,%i2
faddd %f2,%f4,%f2
faddd %f0,%f2,%f0
std %f0,[%l1]
add %l1,16,%l1
ble,a,pt %icc,.L900000655
ldd [%o0],%f0
.L77000213:
srl %i5,31,%g2
.L900000656:
cmp %l0,30
bne,a,pt %icc,.L900000654
fdtox %f18,%f0
add %i5,%g2,%g2
sub %o3,1,%o2
sra %g2,1,%o0
ldd [%g3],%f0
add %o0,1,%g2
sll %g2,1,%o0
fmovd %f0,%f2
sll %g2,4,%o1
cmp %o0,%o3
bge,pt %icc,.L77000215
or %g0,0,%l0
add %g5,%o1,%o1
ldd [%o1],%f6
.L900000653:
fdtox %f6,%f10
ldd [%o1+8],%f4
add %o0,2,%o0
ldd [%g3],%f12
fdtox %f6,%f6
cmp %o0,%o2
fdtox %f4,%f8
fdtox %f4,%f4
fmovs %f12,%f10
fmovs %f12,%f8
fxtod %f10,%f10
fxtod %f8,%f8
faddd %f10,%f2,%f2
std %f2,[%o1]
faddd %f8,%f0,%f0
std %f0,[%o1+8]
add %o1,16,%o1
fitod %f6,%f2
fitod %f4,%f0
ble,a,pt %icc,.L900000653
ldd [%o1],%f6
.L77000233:
or %g0,0,%l0
.L77000215:
fdtox %f18,%f0
.L900000654:
ldd [%g3],%f6
add %i5,1,%i5
add %i4,8,%i4
ldd [%g3-8],%f2
add %l0,1,%l0
add %i3,8,%i3
fmovs %f6,%f0
ldd [%g3-16],%f4
cmp %i5,%o5
fxtod %f0,%f0
fmuld %f0,%f16,%f0
fmuld %f0,%f2,%f2
fdtox %f2,%f2
fxtod %f2,%f2
fmuld %f2,%f4,%f2
fsubd %f0,%f2,%f22
ble,a,pt %icc,.L900000652
ldd [%i0],%f0
.L900000627:
ba .L900000651
sll %g1,4,%g2
.L77000289:
ldd [%o2],%f6
or %g0,%o1,%o4
or %g0,0,%o3
ldd [%g4],%f4
std %f0,[%o1+8]
std %f0,[%o1+16]
fmuld %f4,%f6,%f4
std %f4,[%o1]
std %f0,[%o1+24]
std %f0,[%o1+32]
fdtox %f4,%f4
std %f0,[%o1+40]
std %f0,[%o1+48]
std %f0,[%o1+56]
std %f0,[%o1+64]
std %f0,[%o1+72]
std %f0,[%o1+80]
std %f0,[%o1+88]
std %f0,[%o1+96]
std %f0,[%o1+104]
std %f0,[%o1+112]
std %f0,[%o1+120]
std %f0,[%o1+128]
std %f0,[%o1+136]
std %f0,[%o1+144]
std %f0,[%o1+152]
std %f0,[%o1+160]
std %f0,[%o1+168]
fmovs %f0,%f4
std %f0,[%o1+176]
or %g0,0,%o0
std %f0,[%o1+184]
fxtod %f4,%f4
std %f0,[%o1+192]
std %f0,[%o1+200]
std %f0,[%o1+208]
fmuld %f4,%f2,%f2
std %f0,[%o1+216]
std %f0,[%o1+224]
std %f0,[%o1+232]
std %f0,[%o1+240]
std %f0,[%o1+248]
std %f0,[%o1+256]
std %f0,[%o1+264]
std %f0,[%o1+272]
std %f0,[%o1+280]
std %f0,[%o1+288]
std %f0,[%o1+296]
std %f0,[%o1+304]
std %f0,[%o1+312]
std %f0,[%o1+320]
std %f0,[%o1+328]
std %f0,[%o1+336]
std %f0,[%o1+344]
std %f0,[%o1+352]
std %f0,[%o1+360]
std %f0,[%o1+368]
std %f0,[%o1+376]
std %f0,[%o1+384]
std %f0,[%o1+392]
std %f0,[%o1+400]
std %f0,[%o1+408]
std %f0,[%o1+416]
std %f0,[%o1+424]
std %f0,[%o1+432]
std %f0,[%o1+440]
std %f0,[%o1+448]
std %f0,[%o1+456]
std %f0,[%o1+464]
std %f0,[%o1+472]
std %f0,[%o1+480]
std %f0,[%o1+488]
std %f0,[%o1+496]
std %f0,[%o1+504]
std %f0,[%o1+512]
std %f0,[%o1+520]
ldd [%g3-8],%f0
ldd [%g3-16],%f8
fmuld %f2,%f0,%f6
ldd [%i4],%f4
ldd [%g4],%f0
fdtox %f6,%f6
fxtod %f6,%f6
fmuld %f6,%f8,%f6
fsubd %f2,%f6,%f2
fmuld %f4,%f2,%f12
.L900000650:
fmovd %f2,%f0
fmovd %f16,%f18
ldd [%i4],%f2
ldd [%o4],%f8
ldd [%g4],%f10
ldd [%g3-8],%f14
ldd [%g3-16],%f16
ldd [%i2],%f24
ldd [%g4+8],%f26
ldd [%g4+16],%f40
ldd [%g4+48],%f46
ldd [%g4+56],%f30
ldd [%g4+64],%f54
ldd [%g4+104],%f34
ldd [%g4+112],%f58
ldd [%i4+8],%f28
ldd [%i4+104],%f38
ldd [%i4+112],%f60
.L99999999:
!1
ldd [%g4+24],%f32
fmuld %f0,%f2,%f4
!2
ldd [%i4+24],%f36
fmuld %f26,%f24,%f20
!3
ldd [%g4+40],%f42
fmuld %f28,%f0,%f22
!4
ldd [%i4+40],%f44
fmuld %f32,%f24,%f32
!5
ldd [%i2+8],%f6
faddd %f4,%f8,%f4
fmuld %f36,%f0,%f36
!6
add %i2,8,%i2
ldd [%i4+56],%f50
fmuld %f42,%f24,%f42
!7
ldd [%g4+72],%f52
faddd %f20,%f22,%f20
fmuld %f44,%f0,%f44
!8
ldd [%o4+16],%f22
fmuld %f10,%f6,%f12
!9
ldd [%i4+72],%f56
faddd %f32,%f36,%f32
fmuld %f14,%f4,%f4
!10
ldd [%o4+48],%f36
fmuld %f30,%f24,%f48
!11
ldd [%o4+8],%f8
faddd %f20,%f22,%f20
fmuld %f50,%f0,%f50
!12
std %f20,[%o4+16]
faddd %f42,%f44,%f42
fmuld %f52,%f24,%f52
!13
ldd [%o4+80],%f44
faddd %f4,%f12,%f4
fmuld %f56,%f0,%f56
!14
ldd [%g4+88],%f20
faddd %f32,%f36,%f32
!15
ldd [%i4+88],%f22
faddd %f48,%f50,%f48
!16
ldd [%o4+112],%f50
faddd %f52,%f56,%f52
!17
ldd [%o4+144],%f56
faddd %f4,%f8,%f8
fmuld %f20,%f24,%f20
!18
std %f32,[%o4+48]
faddd %f42,%f44,%f42
fmuld %f22,%f0,%f22
!19
std %f42,[%o4+80]
faddd %f48,%f50,%f48
fmuld %f34,%f24,%f32
!20
std %f48,[%o4+112]
faddd %f52,%f56,%f52
fmuld %f38,%f0,%f36
!21
ldd [%g4+120],%f42
fdtox %f8,%f4
!22
std %f52,[%o4+144]
faddd %f20,%f22,%f20
!23
ldd [%i4+120],%f44
!24
ldd [%o4+176],%f22
faddd %f32,%f36,%f32
fmuld %f42,%f24,%f42
!25
ldd [%i4+16],%f50
fmovs %f17,%f4
!26
ldd [%g4+32],%f52
fmuld %f44,%f0,%f44
!27
ldd [%i4+32],%f56
fmuld %f40,%f24,%f48
!28
ldd [%o4+208],%f36
faddd %f20,%f22,%f20
fmuld %f50,%f0,%f50
!29
std %f20,[%o4+176]
fxtod %f4,%f4
fmuld %f52,%f24,%f52
!30
ldd [%i4+48],%f22
faddd %f42,%f44,%f42
fmuld %f56,%f0,%f56
!31
ldd [%o4+240],%f44
faddd %f32,%f36,%f32
!32
std %f32,[%o4+208]
faddd %f48,%f50,%f48
fmuld %f46,%f24,%f20
!33
ldd [%o4+32],%f50
fmuld %f4,%f18,%f12
!34
ldd [%i4+64],%f36
faddd %f52,%f56,%f52
fmuld %f22,%f0,%f22
!35
ldd [%o4+64],%f56
faddd %f42,%f44,%f42
!36
std %f42,[%o4+240]
faddd %f48,%f50,%f48
fmuld %f54,%f24,%f32
!37
std %f48,[%o4+32]
fmuld %f12,%f14,%f4
!38
ldd [%g4+80],%f42
faddd %f52,%f56,%f56 ! yes, tmp52!
fmuld %f36,%f0,%f36
!39
ldd [%i4+80],%f44
faddd %f20,%f22,%f20
!40
ldd [%g4+96],%f48
fmuld %f58,%f24,%f52
!41
ldd [%i4+96],%f50
fdtox %f4,%f4
fmuld %f42,%f24,%f42
!42
std %f56,[%o4+64] ! yes, tmp52!
faddd %f32,%f36,%f32
fmuld %f44,%f0,%f44
!43
ldd [%o4+96],%f22
fmuld %f48,%f24,%f48
!44
ldd [%o4+128],%f36
fmovd %f6,%f24
fmuld %f50,%f0,%f50
!45
fxtod %f4,%f4
fmuld %f60,%f0,%f56
!46
add %o4,8,%o4
faddd %f42,%f44,%f42
!47
ldd [%o4+160-8],%f44
faddd %f20,%f22,%f20
!48
std %f20,[%o4+96-8]
faddd %f48,%f50,%f48
!49
ldd [%o4+192-8],%f50
faddd %f52,%f56,%f52
fmuld %f4,%f16,%f4
!50
ldd [%o4+224-8],%f56
faddd %f32,%f36,%f32
!51
std %f32,[%o4+128-8]
faddd %f42,%f44,%f42
!52
add %o3,1,%o3
std %f42,[%o4+160-8]
faddd %f48,%f50,%f48
!53
cmp %o3,31
std %f48,[%o4+192-8]
fsubd %f12,%f4,%f0
!54
faddd %f52,%f56,%f52
ble,pt %icc,.L99999999
std %f52,[%o4+224-8]
!55
std %f8,[%o4]
.L77000285:
sll %g1,4,%g2
.L900000651:
ldd [%g5+%g2],%f0
add %g5,%g2,%i1
or %g0,0,%o4
ld [%fp+68],%o0
or %g0,0,%i0
cmp %g1,0
fdtox %f0,%f0
std %f0,[%sp+120]
sethi %hi(0xfc00),%o1
or %g0,%o0,%o3
sub %g1,1,%g4
ldd [%i1+8],%f0
or %g0,%o0,%g5
add %o1,1023,%o1
fdtox %f0,%f0
std %f0,[%sp+112]
ldx [%sp+112],%o5
ldx [%sp+120],%o7
ble,pt %icc,.L900000649
sethi %hi(0xfc00),%g2
or %g0,-1,%g2
cmp %g1,3
srl %g2,0,%o2
bl,pn %icc,.L77000286
or %g0,%i1,%g2
ldd [%i1+16],%f0
and %o5,%o1,%o0
add %i1,16,%g2
sllx %o0,16,%g3
and %o7,%o2,%o0
fdtox %f0,%f0
std %f0,[%sp+104]
add %o0,%g3,%o4
ldd [%i1+24],%f2
srax %o5,16,%o0
add %o3,4,%g5
stx %o0,[%sp+128]
and %o4,%o2,%o0
or %g0,1,%i0
stx %o0,[%sp+112]
srax %o4,32,%o0
fdtox %f2,%f0
stx %o0,[%sp+136]
srax %o7,32,%o4
std %f0,[%sp+96]
ldx [%sp+136],%o7
ldx [%sp+128],%o0
ldx [%sp+104],%g3
add %o0,%o7,%o0
ldx [%sp+112],%o7
add %o4,%o0,%o4
ldx [%sp+96],%o5
st %o7,[%o3]
or %g0,%g3,%o7
.L900000632:
ldd [%g2+16],%f0
add %i0,1,%i0
add %g5,4,%g5
cmp %i0,%g4
add %g2,16,%g2
fdtox %f0,%f0
std %f0,[%sp+104]
ldd [%g2+8],%f0
fdtox %f0,%f0
std %f0,[%sp+96]
and %o5,%o1,%g3
sllx %g3,16,%g3
stx %g3,[%sp+120]
and %o7,%o2,%g3
stx %o7,[%sp+128]
ldx [%sp+120],%o7
add %g3,%o7,%g3
ldx [%sp+128],%o7
srax %o5,16,%o5
add %g3,%o4,%g3
srax %g3,32,%o4
stx %o4,[%sp+112]
srax %o7,32,%o4
ldx [%sp+112],%o7
add %o5,%o7,%o7
ldx [%sp+96],%o5
add %o4,%o7,%o4
and %g3,%o2,%g3
ldx [%sp+104],%o7
ble,pt %icc,.L900000632
st %g3,[%g5-4]
.L900000635:
ba .L900000649
sethi %hi(0xfc00),%g2
.L77000286:
ldd [%g2+16],%f0
.L900000648:
and %o7,%o2,%o0
and %o5,%o1,%g3
fdtox %f0,%f0
add %o4,%o0,%o0
std %f0,[%sp+104]
add %i0,1,%i0
sllx %g3,16,%o4
ldd [%g2+24],%f2
add %g2,16,%g2
add %o0,%o4,%o4
cmp %i0,%g4
srax %o5,16,%o0
stx %o0,[%sp+112]
and %o4,%o2,%g3
srax %o4,32,%o5
fdtox %f2,%f0
std %f0,[%sp+96]
srax %o7,32,%o4
ldx [%sp+112],%o7
add %o7,%o5,%o7
ldx [%sp+104],%o5
add %o4,%o7,%o4
ldx [%sp+96],%o0
st %g3,[%g5]
or %g0,%o5,%o7
add %g5,4,%g5
or %g0,%o0,%o5
ble,a,pt %icc,.L900000648
ldd [%g2+16],%f0
.L77000236:
sethi %hi(0xfc00),%g2
.L900000649:
or %g0,-1,%o0
add %g2,1023,%g2
ld [%fp+88],%o1
srl %o0,0,%g3
and %o5,%g2,%g2
and %o7,%g3,%g4
sllx %g2,16,%g2
add %o4,%g4,%g4
add %g4,%g2,%g2
sll %i0,2,%g4
and %g2,%g3,%g2
st %g2,[%o3+%g4]
sll %g1,2,%g2
ld [%o3+%g2],%g2
cmp %g2,0
bleu,pn %icc,.L77000241
or %g0,-1,%o5
ba .L900000647
cmp %o5,0
.L77000241:
subcc %g1,1,%o5
bneg,pt %icc,.L900000647
cmp %o5,0
sll %o5,2,%g2
add %o1,%g2,%o0
add %o3,%g2,%o4
ld [%o0],%g2
.L900000646:
ld [%o4],%g3
sub %o0,4,%o0
sub %o4,4,%o4
cmp %g3,%g2
bne,pn %icc,.L77000244
nop
subcc %o5,1,%o5
bpos,a,pt %icc,.L900000646
ld [%o0],%g2
.L77000244:
cmp %o5,0
.L900000647:
bl,pn %icc,.L77000287
sll %o5,2,%g2
ld [%o1+%g2],%g3
ld [%o3+%g2],%g2
cmp %g2,%g3
bleu,pt %icc,.L77000224
nop
.L77000287:
cmp %g1,0
ble,pt %icc,.L77000224
nop
sub %g1,1,%o7
or %g0,-1,%g2
srl %g2,0,%o4
add %o7,1,%o0
or %g0,%o1,%o2
or %g0,0,%o5
or %g0,0,%g1
cmp %o0,3
add %o1,4,%o0
bl,pn %icc,.L77000288
add %o3,8,%o1
ld [%o0-4],%g3
or %g0,%o1,%o3
or %g0,%o0,%o2
ld [%o1-8],%g2
or %g0,2,%g1
ld [%o3-4],%o0
sub %g2,%g3,%g2
or %g0,%g2,%o5
and %g2,%o4,%g2
st %g2,[%o3-8]
srax %o5,32,%o5
.L900000636:
ld [%o2],%g2
add %g1,1,%g1
add %o2,4,%o2
cmp %g1,%o7
add %o3,4,%o3
sub %o0,%g2,%o0
add %o0,%o5,%o5
and %o5,%o4,%g2
ld [%o3-4],%o0
st %g2,[%o3-8]
ble,pt %icc,.L900000636
srax %o5,32,%o5
.L900000639:
ld [%o2],%o1
sub %o0,%o1,%o0
add %o0,%o5,%o0
and %o0,%o4,%o1
st %o1,[%o3-4]
ret ! Result =
restore %g0,%g0,%g0
.L77000288:
ld [%o3],%o0
.L900000645:
ld [%o2],%o1
add %o5,%o0,%o0
add %g1,1,%g1
add %o2,4,%o2
cmp %g1,%o7
sub %o0,%o1,%o0
and %o0,%o4,%o1
st %o1,[%o3]
add %o3,4,%o3
srax %o0,32,%o5
ble,a,pt %icc,.L900000645
ld [%o3],%o0
.L77000224:
ret ! Result =
restore %g0,%g0,%g0
.type mont_mulf_noconv,2
.size mont_mulf_noconv,(.-mont_mulf_noconv)
! Begin Disassembling Stabs
.xstabs ".stab.index","Xa ; O ; P ; V=3.1 ; R=WorkShop Compilers 5.0 99/02/25 C 5.0 patch 107289-01",60,0,0,0 ! (/tmp/acompAAAhNaOly:1)
.xstabs ".stab.index","/home/ferenc/venus/userland/rsa; /usr/dist/pkgs/devpro,v5.0/5.x-sparc/SC5.0/bin/cc -fast -xarch=v8plus -xO5 -xstrconst -xdepend -Xa -xchip=ultra2 -KPIC -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -c proba.il -o mont_mulf.o mont_mulf.c -W0,-xp",52,0,0,0 ! (/tmp/acompAAAhNaOly:2)
! End Disassembling Stabs
! Begin Disassembling Ident
.ident "cg: WorkShop Compilers 5.0 99/04/15 Compiler Common 5.0 Patch 107357-02" ! (NO SOURCE LINE)
.ident "acomp: WorkShop Compilers 5.0 99/02/25 C 5.0 patch 107289-01" ! (/tmp/acompAAAhNaOly:31)
! End Disassembling Ident