root/usr/src/lib/libmvec/common/vis/__vexpf.S
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

        .file   "__vexpf.S"

#include "libm.h"

        RO_DATA
        .align  64
!!  2^(i/256) - ((i & 0xf0) << 44), i = [0, 255]
.CONST_TBL:
        .word   0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf
        .word   0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281
        .word   0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc
        .word   0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1
        .word   0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89
        .word   0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836
        .word   0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0
        .word   0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919
        .word   0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85
        .word   0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec
        .word   0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5
        .word   0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e
        .word   0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6
        .word   0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab
        .word   0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e
        .word   0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2
        .word   0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0
        .word   0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f
        .word   0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c
        .word   0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b
        .word   0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027
        .word   0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d
        .word   0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819
        .word   0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1
        .word   0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a
        .word   0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75
        .word   0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29
        .word   0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70
        .word   0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13
        .word   0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f
        .word   0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589
        .word   0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b
        .word   0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd
        .word   0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32
        .word   0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d
        .word   0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b
        .word   0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a
        .word   0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef
        .word   0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4
        .word   0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173
        .word   0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175
        .word   0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024
        .word   0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a
        .word   0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4
        .word   0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232
        .word   0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237
        .word   0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2
        .word   0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7
        .word   0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114
        .word   0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff
        .word   0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee
        .word   0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef
        .word   0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27
        .word   0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2
        .word   0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf
        .word   0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc
        .word   0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03
        .word   0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93
        .word   0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71
        .word   0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4
        .word   0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd
        .word   0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7
        .word   0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6
        .word   0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538
        .word   0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e
        .word   0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645
        .word   0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5
        .word   0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87
        .word   0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a
        .word   0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd
        .word   0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09
        .word   0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6
        .word   0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb
        .word   0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0
        .word   0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491
        .word   0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9
        .word   0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7
        .word   0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21
        .word   0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436
        .word   0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f
        .word   0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778
        .word   0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9
        .word   0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a
        .word   0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2
        .word   0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5
        .word   0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3
        .word   0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2
        .word   0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d
        .word   0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5
        .word   0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e
        .word   0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb
        .word   0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8
        .word   0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052
        .word   0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59
        .word   0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba
        .word   0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774
        .word   0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff
        .word   0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952
        .word   0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1
        .word   0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a
        .word   0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4
        .word   0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f
        .word   0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207
        .word   0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d
        .word   0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c
        .word   0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22
        .word   0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933
        .word   0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db
        .word   0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675
        .word   0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74
        .word   0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968
        .word   0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6
        .word   0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3
        .word   0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075
        .word   0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315
        .word   0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658
        .word   0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17
        .word   0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12
        .word   0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76
        .word   0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740
        .word   0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e
        .word   0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510
        .word   0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a
        .word   0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274
        .word   0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8
        .word   0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89
        .word   0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514
        .word   0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9

        .word   0x7149f2ca, 0x0da24260  ! 1.0e30f, 1.0e-30f
        .word   0x3ecebfbe, 0x9d182250  ! KA2 = 3.66556671660783833261e-06
        .word   0x3f662e43, 0xe2528362  ! KA1 = 2.70760782821392980564e-03
        .word   0x40771547, 0x652b82fe  ! K256ONLN2 = 369.3299304675746271
        .word   0x42aeac4f, 0x42b17218  ! THRESHOLD = 87.3365402f
                                        ! THRESHOLDL = 88.7228394f
! local storage indices

#define tmp0            STACK_BIAS-32
#define tmp1            STACK_BIAS-28
#define tmp2            STACK_BIAS-24
#define tmp3            STACK_BIAS-20
#define tmp4            STACK_BIAS-16
#define tmp5            STACK_BIAS-12
#define tmp6            STACK_BIAS-8
#define tmp7            STACK_BIAS-4

! sizeof temp storage - must be a multiple of 16 for V9
#define tmps            0x20

#define I5_THRESHOLD    %i5
#define G1_CONST_TBL    %g5
#define G5_CONST        %g1

#define F62_K256ONLN2   %f62
#define F60_KA2         %f60
#define F58_KA1         %f58

#define THRESHOLDL      %f0

! register use
! i0  n
! i1  x
! i2  stridex
! i3  y
! i4  stridey

! i5  0x42aeac4f (87.3365402f)

! g1  CONST_TBL
! g5  0x7fffffff

! f62 K256ONLN2 = 369.3299304675746271
! f60 KA2 = 3.66556671660783833261e-06
! f58 KA1 = 2.70760782821392980564e-03


!               !!!!!  Algorithm  !!!!!
!
!  double y, dtmp, drez;
!  int k, sign, Xi;
!  float X, Y;
!  int THRESHOLD = 0x42aeac4f; /* 87.3365402f */
!  float THRESHOLDL = 88.7228394f;
!  double KA2 = 3.66556671660783833261e-06;
!  double KA1 = 2.70760782821392980564e-03;
!  double K256ONLN2 = 369.3299304675746271;
!  char *CONST_TBL;
!
!  X  = px[0];
!  Xi = ((int*)px)[0];
!  ax = Xi & 0x7fffffff;
!
!  if (ax > THRESHOLD) {
!    sign = ((unsigned)Xi >> 29) & 4;
!    if (ax >= 0x7f800000) {      /* Inf or NaN */
!      if (ax > 0x7f800000) {     /* NaN */
!        Y = X * X;               /* NaN -> NaN */
!        return Y;
!      }
!      Y = (sign) ? zero : X;     /* +Inf -> +Inf , -Inf -> zero */
!      return Y;
!    }
!
!    if ( X < 0.0f || X >= THRESHOLDL ) {
!      Y = ((float*)(CONST_TBL + 2048 + sign))[0];
!         /* Xi >= THRESHOLDL : Y = 1.0e+30f */
!         /* Xi < -THRESHOLD  : Y = 1.0e-30f */
!      Y =  Y * Y;
!         /* Xi >= THRESHOLDL : +Inf + overflow  */
!         /* Xi < -THRESHOLD  : +0 + underflow */
!      return Y;
!    }
!  }
!  vis_write_gsr(12 << 3);
!  y = (double) X;
!  y = K256ONLN2 * y;
!  k = (int) y;
!  dtmp = (double) k;
!  y -= dtmp;
!  dtmp = y * KA2;
!  dtmp += KA1;
!  y *= dtmp;
!  y = (y * KA2 + KA1) * y;
!  ((int*)&drez)[0] = k;
!  ((int*)&drez)[1] = 0;
!  ((float*)&drez)[0] = vis_fpackfix(drez);
!  k &= 255;
!  k <<= 3;
!  dtmp = ((double*)(CONST_TBL + k))[0];
!  drez = vis_fpadd32(drez,dtmp);
!  y *= drez;
!  y += drez;
!  Y = (float) y;
!
!
!  fstod %f16,%f40                      ! y = (double) X
!  fmuld F62_K256ONLN2,%f40,%f40        ! y *= K256ONLN2
!  fdtoi %f40,%f16                      ! k = (int) y
!  st  %f16,[%fp+tmp0]                  ! store k
!  fitod %f16,%f34                      ! dtmp = (double) k
!  fpackfix  %f16,%f16                  ! ((float*)&drez)[0] = vis_fpackfix(drez)
!  fsubd %f40,%f34,%f40                 ! y -= dtmp
!  fmuld F60_KA2,%f40,%f34              ! dtmp = y * KA2
!  faddd F58_KA1,%f34,%f34              ! dtmp += KA1
!  ld  [%fp+tmp0],%o0                   ! load k
!  fmuld %f34,%f40,%f40                 ! y *= dtmp
!  and %o0,255,%o0                      ! k &= 255
!  sll  %o0,3,%o0                       ! k <<= 3
!  ldd [G1_CONST_TBL+%o0],%f34          ! dtmp = ((double*)(CONST_TBL + k))[0]
!  fpadd32 %f16,%f34,%f34               ! drez = vis_fpadd32(drez,dtmp)
!  fmuld %f34,%f40,%f40                 ! y *= drez
!  faddd %f34,%f40,%f40                 ! y += drez
!  fdtos %f40,%f26                      ! (float) y
!--------------------------------------------------------------------

        ENTRY(__vexpf)
        save    %sp,-SA(MINFRAME)-tmps,%sp
        PIC_SETUP(l7)
        PIC_SET(l7,.CONST_TBL,g5)

        wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
        wr      %g0,0x60,%gsr

        sll     %i2,2,%i2
        sll     %i4,2,%i4

        ldd     [G1_CONST_TBL+2056],F60_KA2
        sethi   %hi(0x7ffffc00),G5_CONST
        ldd     [G1_CONST_TBL+2064],F58_KA1
        add     G5_CONST,1023,G5_CONST
        ldd     [G1_CONST_TBL+2072],F62_K256ONLN2
        ld      [G1_CONST_TBL+2080],I5_THRESHOLD
        ld      [G1_CONST_TBL+2084],THRESHOLDL

        subcc   %i0,8,%i0
        bneg,pn %icc,.tail
        fzeros  %f3

.main_loop_preload:

! preload 8 elements and get absolute values
        ld      [%i1],%l0               ! (0) Xi = ((int*)px)[0]
        fzeros  %f5
        ld      [%i1],%f16              ! (0) X = px[0]
        fzeros  %f7
        add     %i1,%i2,%o5             ! px += stridex
        ld      [%o5],%l1               ! (1) Xi = ((int*)px)[0]
        and     %l0,G5_CONST,%l0        ! (0) ax = Xi & 0x7fffffff
        fzeros  %f9
        ld      [%o5],%f2               ! (1) X = px[0]
        fzeros  %f11
        add     %o5,%i2,%i1             ! px += stridex
        ld      [%i1],%l2               ! (2) Xi = ((int*)px)[0]
        and     %l1,G5_CONST,%l1        ! (1) ax = Xi & 0x7fffffff
        fzeros  %f13
        ld      [%i1],%f4               ! (2) X = px[0]
        fzeros  %f15
        add     %i1,%i2,%o5             ! px += stridex
        ld      [%o5],%l3               ! (3) Xi = ((int*)px)[0]
        and     %l2,G5_CONST,%l2        ! (2) ax = Xi & 0x7fffffff
        fzeros  %f17
        ld      [%o5],%f6               ! (3) X = px[0]
        add     %o5,%i2,%o0             ! px += stridex
        ld      [%o0],%l4               ! (4) Xi = ((int*)px)[0]
        and     %l3,G5_CONST,%l3        ! (3) ax = Xi & 0x7fffffff
        add     %o0,%i2,%o1             ! px += stridex
        ld      [%o1],%l5               ! (5) Xi = ((int*)px)[0]
        add     %o1,%i2,%o2             ! px += stridex
        ld      [%o2],%l6               ! (6) Xi = ((int*)px)[0]
        and     %l4,G5_CONST,%l4        ! (4) ax = Xi & 0x7fffffff
        add     %o2,%i2,%o3             ! px += stridex
        ld      [%o3],%l7               ! (7) Xi = ((int*)px)[0]
        add     %o3,%i2,%i1             ! px += stridex
        and     %l5,G5_CONST,%l5        ! (5) ax = Xi & 0x7fffffff
        and     %l6,G5_CONST,%l6        ! (6) ax = Xi & 0x7fffffff
        ba      .main_loop
        and     %l7,G5_CONST,%l7        ! (7) ax = Xi & 0x7fffffff

        .align  16
.main_loop:
        cmp     %l0,I5_THRESHOLD
        bg,pn   %icc,.spec0             ! (0) if (ax > THRESHOLD)
        lda     [%o0]%asi,%f8           ! (4) X = px[0]
        fstod   %f16,%f40               ! (0) y = (double) X
.spec0_cont:
        cmp     %l1,I5_THRESHOLD
        bg,pn   %icc,.spec1             ! (1) if (ax > THRESHOLD)
        lda     [%o1]%asi,%f10          ! (5) X = px[0]
        fstod   %f2,%f42                ! (1) y = (double) X
.spec1_cont:
        cmp     %l2,I5_THRESHOLD
        bg,pn   %icc,.spec2             ! (2) if (ax > THRESHOLD)
        lda     [%o2]%asi,%f12          ! (6) X = px[0]
        fstod   %f4,%f44                ! (2) y = (double) X
.spec2_cont:
        cmp     %l3,I5_THRESHOLD
        bg,pn   %icc,.spec3             ! (3) if (ax > THRESHOLD)
        lda     [%o3]%asi,%f14          ! (7) X = px[0]
        fstod   %f6,%f46                ! (3) y = (double) X
.spec3_cont:
        cmp     %l4,I5_THRESHOLD
        bg,pn   %icc,.spec4             ! (4) if (ax > THRESHOLD)
        fmuld   F62_K256ONLN2,%f40,%f40 ! (0) y *= K256ONLN2
        fstod   %f8,%f48                ! (4) y = (double) X
.spec4_cont:
        cmp     %l5,I5_THRESHOLD
        bg,pn   %icc,.spec5             ! (5) if (ax > THRESHOLD)
        fmuld   F62_K256ONLN2,%f42,%f42 ! (1) y *= K256ONLN2
        fstod   %f10,%f50               ! (5) y = (double) X
.spec5_cont:
        cmp     %l6,I5_THRESHOLD
        bg,pn   %icc,.spec6             ! (6) if (ax > THRESHOLD)
        fmuld   F62_K256ONLN2,%f44,%f44 ! (2) y *= K256ONLN2
        fstod   %f12,%f52               ! (6) y = (double) X
.spec6_cont:
        cmp     %l7,I5_THRESHOLD
        bg,pn   %icc,.spec7             ! (7) if (ax > THRESHOLD)
        fmuld   F62_K256ONLN2,%f46,%f46 ! (3) y *= K256ONLN2
        fstod   %f14,%f54               ! (7) y = (double) X
.spec7_cont:
        fdtoi   %f40,%f16               ! (0) k = (int) y
        st      %f16,[%fp+tmp0]
        fmuld   F62_K256ONLN2,%f48,%f48 ! (4) y *= K256ONLN2

        fdtoi   %f42,%f2                ! (1) k = (int) y
        st      %f2,[%fp+tmp1]
        fmuld   F62_K256ONLN2,%f50,%f50 ! (5) y *= K256ONLN2

        fdtoi   %f44,%f4                ! (2) k = (int) y
        st      %f4,[%fp+tmp2]
        fmuld   F62_K256ONLN2,%f52,%f52 ! (6) y *= K256ONLN2

        fdtoi   %f46,%f6                ! (3) k = (int) y
        st      %f6,[%fp+tmp3]
        fmuld   F62_K256ONLN2,%f54,%f54 ! (7) y *= K256ONLN2

        fdtoi   %f48,%f8                ! (4) k = (int) y
        st      %f8,[%fp+tmp4]

        fdtoi   %f50,%f10               ! (5) k = (int) y
        st      %f10,[%fp+tmp5]

        fitod   %f16,%f34               ! (0) dtmp = (double) k
        fpackfix        %f16,%f16       ! (0) ((float*)&drez)[0] = vis_fpackfix(drez)
        nop
        nop

        fdtoi   %f52,%f12               ! (6) k = (int) y
        st      %f12,[%fp+tmp6]

        fdtoi   %f54,%f14               ! (7) k = (int) y
        st      %f14,[%fp+tmp7]

        lda     [%i1]%asi,%l0           ! (8) Xi = ((int*)px)[0]
        add     %i1,%i2,%o5             ! px += stridex
        fitod   %f2,%f18                ! (1) dtmp = (double) k
        fpackfix        %f2,%f2         ! (1) ((float*)&drez)[0] = vis_fpackfix(drez)

        lda     [%o5]%asi,%l1           ! (9) Xi = ((int*)px)[0]
        add     %o5,%i2,%i1             ! px += stridex
        fitod   %f4,%f20                ! (2) dtmp = (double) k
        fpackfix        %f4,%f4         ! (2) ((float*)&drez)[0] = vis_fpackfix(drez)

        lda     [%i1]%asi,%l2           ! (10) Xi = ((int*)px)[0]
        add     %i1,%i2,%o5             ! px += stridex
        fitod   %f6,%f22                ! (3) dtmp = (double) k
        fpackfix        %f6,%f6         ! (3) ((float*)&drez)[0] = vis_fpackfix(drez)

        lda     [%o5]%asi,%l3           ! (11) Xi = ((int*)px)[0]
        add     %o5,%i2,%i1             ! px += stridex
        fitod   %f8,%f24                ! (4) dtmp = (double) k
        fpackfix        %f8,%f8         ! (4) ((float*)&drez)[0] = vis_fpackfix(drez)

        fitod   %f10,%f26               ! (5) dtmp = (double) k
        fpackfix        %f10,%f10       ! (5) ((float*)&drez)[0] = vis_fpackfix(drez)

        fitod   %f12,%f28               ! (6) dtmp = (double) k
        fpackfix        %f12,%f12       ! (6) ((float*)&drez)[0] = vis_fpackfix(drez)

        fitod   %f14,%f30               ! (7) dtmp = (double) k
        fpackfix        %f14,%f14       ! (7) ((float*)&drez)[0] = vis_fpackfix(drez)

        ld      [%fp+tmp0],%o0          ! (0) load k
        and     %l0,G5_CONST,%l0        ! (8) ax = Xi & 0x7fffffff
        fsubd   %f40,%f34,%f40          ! (0) y -= dtmp

        ld      [%fp+tmp1],%o1          ! (1) load k
        and     %l1,G5_CONST,%l1        ! (9) ax = Xi & 0x7fffffff
        fsubd   %f42,%f18,%f42          ! (1) y -= dtmp

        ld      [%fp+tmp2],%o2          ! (2) load k
        and     %l2,G5_CONST,%l2        ! (10) ax = Xi & 0x7fffffff
        and     %o0,255,%o0             ! (0) k &= 255
        fsubd   %f44,%f20,%f44          ! (2) y -= dtmp

        ld      [%fp+tmp3],%o3          ! (3) load k
        and     %o1,255,%o1             ! (1) k &= 255
        fsubd   %f46,%f22,%f46          ! (3) y -= dtmp

        sll     %o0,3,%o0               ! (0) k <<= 3
        sll     %o1,3,%o1               ! (1) k <<= 3
        fmuld   F60_KA2,%f40,%f34       ! (0) dtmp = y * KA2
        fsubd   %f48,%f24,%f48          ! (4) y -= dtmp

        and     %l3,G5_CONST,%l3        ! (11) ax = Xi & 0x7fffffff
        and     %o2,255,%o2             ! (2) k &= 255
        fmuld   F60_KA2,%f42,%f18       ! (1) dtmp = y * KA2
        fsubd   %f50,%f26,%f50          ! (5) y -= dtmp

        sll     %o2,3,%o2               ! (2) k <<= 3
        fmuld   F60_KA2,%f44,%f20       ! (2) dtmp = y * KA2
        fsubd   %f52,%f28,%f52          ! (6) y -= dtmp

        ld      [%fp+tmp4],%o4          ! (4) load k
        and     %o3,255,%o3             ! (3) k &= 255
        fmuld   F60_KA2,%f46,%f22       ! (3) dtmp = y * KA2
        fsubd   %f54,%f30,%f54          ! (7) y -= dtmp

        ld      [%fp+tmp5],%o5          ! (5) load k
        sll     %o3,3,%o3               ! (3) k <<= 3
        fmuld   F60_KA2,%f48,%f24       ! (4) dtmp = y * KA2
        faddd   F58_KA1,%f34,%f34       ! (0) dtmp += KA1

        ld      [%fp+tmp6],%o7          ! (6) load k
        and     %o4,255,%o4             ! (4) k &= 255
        fmuld   F60_KA2,%f50,%f26       ! (5) dtmp = y * KA2
        faddd   F58_KA1,%f18,%f18       ! (1) dtmp += KA1

        ld      [%fp+tmp7],%l4          ! (7) load k
        and     %o5,255,%o5             ! (5) k &= 255
        fmuld   F60_KA2,%f52,%f28       ! (6) dtmp = y * KA2
        faddd   F58_KA1,%f20,%f20       ! (2) dtmp += KA1

        sll     %o5,3,%o5               ! (5) k <<= 3
        fmuld   F60_KA2,%f54,%f30       ! (7) dtmp = y * KA2
        faddd   F58_KA1,%f22,%f22       ! (3) dtmp += KA1

        fmuld   %f34,%f40,%f40          ! (0) y *= dtmp
        ldd     [G1_CONST_TBL+%o0],%f34 ! (0) dtmp = ((double*)(CONST_TBL + k))[0]
        and     %l4,255,%l4             ! (7) k &= 255
        faddd   F58_KA1,%f24,%f24       ! (4) dtmp += KA1

        fmuld   %f18,%f42,%f42          ! (1) y *= dtmp
        ldd     [G1_CONST_TBL+%o1],%f18 ! (1) dtmp = ((double*)(CONST_TBL + k))[0]
        sll     %l4,3,%l4               ! (7) k <<= 3
        faddd   F58_KA1,%f26,%f26       ! (5) dtmp += KA1

        fmuld   %f20,%f44,%f44          ! (2) y *= dtmp
        ldd     [G1_CONST_TBL+%o2],%f20 ! (2) dtmp = ((double*)(CONST_TBL + k))[0]
        faddd   F58_KA1,%f28,%f28       ! (6) dtmp += KA1

        fmuld   %f22,%f46,%f46          ! (3) y *= dtmp
        ldd     [G1_CONST_TBL+%o3],%f22 ! (3) dtmp = ((double*)(CONST_TBL + k))[0]
        sll     %o4,3,%o4               ! (4) k <<= 3
        faddd   F58_KA1,%f30,%f30       ! (7) dtmp += KA1

        fmuld   %f24,%f48,%f48          ! (4) y *= dtmp
        ldd     [G1_CONST_TBL+%o4],%f24 ! (4) dtmp = ((double*)(CONST_TBL + k))[0]
        and     %o7,255,%o7             ! (6) k &= 255
        fpadd32 %f16,%f34,%f34          ! (0) drez = vis_fpadd32(drez,dtmp)

        fmuld   %f26,%f50,%f50          ! (5) y *= dtmp
        ldd     [G1_CONST_TBL+%o5],%f26 ! (5) dtmp = ((double*)(CONST_TBL + k))[0]
        sll     %o7,3,%o7               ! (6) k <<= 3
        fpadd32 %f2,%f18,%f18           ! (1) drez = vis_fpadd32(drez,dtmp)

        fmuld   %f28,%f52,%f52          ! (6) y *= dtmp
        ldd     [G1_CONST_TBL+%o7],%f28 ! (6) dtmp = ((double*)(CONST_TBL + k))[0]
        sll     %i2,2,%o0
        fpadd32 %f4,%f20,%f20           ! (2) drez = vis_fpadd32(drez,dtmp)

        fmuld   %f30,%f54,%f54          ! (7) y *= dtmp
        ldd     [G1_CONST_TBL+%l4],%f30 ! (7) dtmp = ((double*)(CONST_TBL + k))[0]
        sub     %i1,%o0,%o0
        fpadd32 %f6,%f22,%f22           ! (3) drez = vis_fpadd32(drez,dtmp)

        lda     [%i1]%asi,%l4           ! (12) Xi = ((int*)px)[0]
        add     %i1,%i2,%o1             ! px += stridex
        fpadd32 %f8,%f24,%f24           ! (4) drez = vis_fpadd32(drez,dtmp)
        fmuld   %f34,%f40,%f40          ! (0) y *= drez

        lda     [%o1]%asi,%l5           ! (13) Xi = ((int*)px)[0]
        add     %o1,%i2,%o2             ! px += stridex
        fpadd32 %f10,%f26,%f26          ! (5)  drez = vis_fpadd32(drez,dtmp)
        fmuld   %f18,%f42,%f42          ! (1)  y *= drez

        lda     [%o2]%asi,%l6           ! (14) Xi = ((int*)px)[0]
        add     %o2,%i2,%o3             ! px += stridex
        fpadd32 %f12,%f28,%f28          ! (6)  drez = vis_fpadd32(drez,dtmp)
        fmuld   %f20,%f44,%f44          ! (2)  y *= drez

        lda     [%o3]%asi,%l7           ! (15) Xi = ((int*)px)[0]
        add     %o3,%i2,%i1             ! px += stridex
        fpadd32 %f14,%f30,%f30          ! (7)  drez = vis_fpadd32(drez,dtmp)
        fmuld   %f22,%f46,%f46          ! (3)  y *= drez

        lda     [%o0]%asi,%f16          ! (8)  X = px[0]
        add     %o0,%i2,%o5
        fmuld   %f24,%f48,%f48          ! (4)  y *= drez
        faddd   %f34,%f40,%f40          ! (0)  y += drez

        lda     [%o5]%asi,%f2           ! (9)  X = px[0]
        add     %o5,%i2,%o0
        fmuld   %f26,%f50,%f50          ! (5)  y *= drez
        faddd   %f18,%f42,%f42          ! (1)  y += drez

        lda     [%o0]%asi,%f4           ! (10) X = px[0]
        add     %o0,%i2,%o5
        fmuld   %f28,%f52,%f52          ! (6)  y *= drez
        faddd   %f20,%f44,%f44          ! (2)  y += drez

        lda     [%o5]%asi,%f6           ! (11) X = px[0]
        add     %o5,%i2,%o0
        fmuld   %f30,%f54,%f54          ! (7)  y *= drez
        faddd   %f22,%f46,%f46          ! (3)  y += drez

        and     %l4,G5_CONST,%l4        ! (12) ax = Xi & 0x7fffffff
        faddd   %f24,%f48,%f48          ! (4)  y += drez

        and     %l5,G5_CONST,%l5        ! (13) ax = Xi & 0x7fffffff
        faddd   %f26,%f50,%f50          ! (5)  y += drez

        and     %l6,G5_CONST,%l6        ! (14) ax = Xi & 0x7fffffff
        faddd   %f28,%f52,%f52          ! (6)  y += drez

        and     %l7,G5_CONST,%l7        ! (15) ax = Xi & 0x7fffffff
        faddd   %f30,%f54,%f54          ! (7)  y += drez

        fdtos   %f40,%f26               ! (0) (float) y
        st      %f26,[%i3]
        add     %i3,%i4,%o4             ! py += stridey

        fdtos   %f42,%f18               ! (1) (float) y
        st      %f18,[%o4]
        add     %o4,%i4,%i3             ! py += stridey

        fdtos   %f44,%f20               ! (2) (float) y
        st      %f20,[%i3]
        add     %i3,%i4,%o4             ! py += stridey

        fdtos   %f46,%f22               ! (3) (float) y
        st      %f22,[%o4]
        add     %o4,%i4,%i3             ! py += stridey

        fdtos   %f48,%f24               ! (4) (float) y
        st      %f24,[%i3]
        subcc   %i0,8,%i0
        add     %i3,%i4,%o4             ! py += stridey

        fdtos   %f50,%f26               ! (5) (float) y
        st      %f26,[%o4]
        add     %o4,%i4,%o5             ! py += stridey
        add     %i4,%i4,%o7

        fdtos   %f52,%f28               ! (6) (float) y
        st      %f28,[%o5]
        add     %o5,%i4,%o4             ! py += stridey
        add     %o5,%o7,%i3             ! py += stridey

        fdtos   %f54,%f30               ! (7) (float) y
        st      %f30,[%o4]
        bpos,pt %icc,.main_loop
        nop
.after_main_loop:
        sll     %i2,3,%o2
        sub     %i1,%o2,%i1

.tail:
        add     %i0,8,%i0
        subcc   %i0,1,%i0
        bneg,pn %icc,.exit

        ld      [%i1],%l0
        ld      [%i1],%f2
        add     %i1,%i2,%i1

.tail_loop:
        and     %l0,G5_CONST,%l1
        cmp     %l1,I5_THRESHOLD
        bg,pn   %icc,.tail_spec
        nop
.tail_spec_cont:
        fstod   %f2,%f40
        fmuld   F62_K256ONLN2,%f40,%f40
        fdtoi   %f40,%f2
        st      %f2,[%fp+tmp0]
        fitod   %f2,%f16
        fpackfix        %f2,%f2
        fsubd   %f40,%f16,%f40
        fmuld   F60_KA2,%f40,%f16
        faddd   F58_KA1,%f16,%f16
        ld      [%fp+tmp0],%o0
        fmuld   %f16,%f40,%f40
        and     %o0,255,%o0
        sll     %o0,3,%o0
        ldd     [G1_CONST_TBL+%o0],%f16
        fpadd32 %f2,%f16,%f16
        lda     [%i1]%asi,%l0
        fmuld   %f16,%f40,%f40
        lda     [%i1]%asi,%f2
        faddd   %f16,%f40,%f40
        add     %i1,%i2,%i1
        fdtos   %f40,%f16
        st      %f16,[%i3]
        add     %i3,%i4,%i3
        subcc   %i0,1,%i0
        bpos,pt %icc,.tail_loop
        nop

.exit:
        ret
        restore

.tail_spec:
        sethi   %hi(0x7f800000),%o4
        cmp     %l1,%o4
        bl,pt   %icc,.tail_spec_out_of_range
        nop

        srl     %l0,29,%l0
        ble,pn  %icc,.tail_spec_inf
        andcc   %l0,4,%g0

! NaN -> NaN

        fmuls   %f2,%f2,%f2
        ba      .tail_spec_exit
        st      %f2,[%i3]

.tail_spec_inf:
        be,a,pn %icc,.tail_spec_exit
        st      %f2,[%i3]

        ba      .tail_spec_exit
        st      %f3,[%i3]

.tail_spec_out_of_range:
        fcmpes  %fcc0,%f2,%f3
        fcmpes  %fcc1,%f2,THRESHOLDL
        fbl,pn  %fcc0,1f                ! if ( X < 0.0f )
        nop
        fbl,pt  %fcc1,.tail_spec_cont   ! if ( X < THRESHOLDL )
        nop
1:
        srl     %l0,29,%l0
        and     %l0,4,%l0
        add     %l0,2048,%l0
        ld      [G1_CONST_TBL+%l0],%f2
        fmuls   %f2,%f2,%f2
        st      %f2,[%i3]

.tail_spec_exit:
        lda     [%i1]%asi,%l0
        lda     [%i1]%asi,%f2
        add     %i1,%i2,%i1

        subcc   %i0,1,%i0
        bpos,pt %icc,.tail_loop
        add     %i3,%i4,%i3
        ba      .exit
        nop

        .align  16
.spec0:
        sethi   %hi(0x7f800000),%o5
        cmp     %l0,%o5
        bl,pt   %icc,.spec0_out_of_range
        sll     %i2,3,%o4

        ble,pn  %icc,.spec0_inf
        sub     %i1,%o4,%o4

! NaN -> NaN

        fmuls   %f16,%f16,%f16
        ba      .spec0_exit
        st      %f16,[%i3]

.spec0_inf:
        ld      [%o4],%l0
        srl     %l0,29,%l0
        andcc   %l0,4,%l0
        be,a,pn %icc,.spec0_exit
        st      %f16,[%i3]

        ba      .spec0_exit
        st      %f3,[%i3]

.spec0_out_of_range:
        fcmpes  %fcc0,%f16,%f3
        fcmpes  %fcc1,%f16,THRESHOLDL
        fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
        fstod   %f16,%f40                       ! (0) y = (double) X
        fbl,a,pt        %fcc1,.spec0_cont       ! if ( X < THRESHOLDL )
        fstod   %f16,%f40                       ! (0) y = (double) X
1:
        sub     %i1,%o4,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        and     %l0,4,%l0
        add     %l0,2048,%l0
        ld      [G1_CONST_TBL+%l0],%f16
        fmuls   %f16,%f16,%f16
        st      %f16,[%i3]

.spec0_exit:
        fmovs   %f2,%f16
        mov     %l1,%l0
        fmovs   %f4,%f2
        mov     %l2,%l1
        fmovs   %f6,%f4
        mov     %l3,%l2
        fmovs   %f8,%f6
        mov     %l4,%l3
        mov     %l5,%l4
        mov     %l6,%l5
        mov     %l7,%l6
        lda     [%i1]%asi,%l7
        add     %i1,%i2,%i1
        mov     %o1,%o0
        mov     %o2,%o1
        mov     %o3,%o2
        and     %l7,G5_CONST,%l7
        add     %o2,%i2,%o3

        subcc   %i0,1,%i0
        bpos,pt %icc,.main_loop
        add     %i3,%i4,%i3
        ba      .after_main_loop
        nop

        .align  16
.spec1:
        sethi   %hi(0x7f800000),%o5
        cmp     %l1,%o5
        bge,pn  %icc,1f
        nop
        fcmpes  %fcc0,%f2,%f3
        fcmpes  %fcc1,%f2,THRESHOLDL
        fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
        fstod   %f2,%f42                        ! (1) y = (double) X
        fbl,a,pt        %fcc1,.spec1_cont       ! if ( X < THRESHOLDL )
        fstod   %f2,%f42                        ! (1) y = (double) X
1:
        fmuld   F62_K256ONLN2,%f40,%f40
        fdtoi   %f40,%f16
        st      %f16,[%fp+tmp0]
        fitod   %f16,%f34
        fpackfix        %f16,%f16
        fsubd   %f40,%f34,%f40
        fmuld   F60_KA2,%f40,%f34
        faddd   F58_KA1,%f34,%f34
        ld      [%fp+tmp0],%o0
        fmuld   %f34,%f40,%f40
        and     %o0,255,%o0
        sll     %o0,3,%o0
        ldd     [G1_CONST_TBL+%o0],%f34
        fpadd32 %f16,%f34,%f34
        fmuld   %f34,%f40,%f40
        faddd   %f34,%f40,%f40
        fdtos   %f40,%f26
        st      %f26,[%i3]
        add     %i3,%i4,%i3

        cmp     %l1,%o5
        bl,pt   %icc,.spec1_out_of_range
        sll     %i2,3,%o4

        ble,pn  %icc,.spec1_inf
        sub     %i1,%o4,%o4

! NaN -> NaN

        fmuls   %f2,%f2,%f2
        ba      .spec1_exit
        st      %f2,[%i3]

.spec1_inf:
        add     %o4,%i2,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        andcc   %l0,4,%l0
        be,a,pn %icc,.spec1_exit
        st      %f2,[%i3]

        ba      .spec1_exit
        st      %f3,[%i3]

.spec1_out_of_range:
        sub     %i1,%o4,%o4
        add     %o4,%i2,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        and     %l0,4,%l0
        add     %l0,2048,%l0
        ld      [G1_CONST_TBL+%l0],%f2
        fmuls   %f2,%f2,%f2
        st      %f2,[%i3]

.spec1_exit:
        fmovs   %f4,%f16
        mov     %l2,%l0
        fmovs   %f6,%f2
        mov     %l3,%l1
        fmovs   %f8,%f4
        mov     %l4,%l2
        fmovs   %f10,%f6
        mov     %l5,%l3
        mov     %l6,%l4
        mov     %l7,%l5
        lda     [%i1]%asi,%l6
        add     %i1,%i2,%i1
        lda     [%i1]%asi,%l7
        add     %i1,%i2,%i1
        and     %l6,G5_CONST,%l6
        and     %l7,G5_CONST,%l7
        mov     %o2,%o0
        mov     %o3,%o1
        add     %o1,%i2,%o2
        add     %o2,%i2,%o3

        subcc   %i0,2,%i0
        bpos,pt %icc,.main_loop
        add     %i3,%i4,%i3
        ba      .after_main_loop
        nop

        .align  16
.spec2:
        sethi   %hi(0x7f800000),%o5
        cmp     %l2,%o5
        bge,pn  %icc,1f
        nop
        fcmpes  %fcc0,%f4,%f3
        fcmpes  %fcc1,%f4,THRESHOLDL
        fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
        fstod   %f4,%f44                        ! (2) y = (double) X
        fbl,a,pt        %fcc1,.spec2_cont       ! if ( X < THRESHOLDL )
        fstod   %f4,%f44                        ! (2) y = (double) X
1:
        fmuld   F62_K256ONLN2,%f40,%f40

        fmuld   F62_K256ONLN2,%f42,%f42

        fdtoi   %f40,%f16
        st      %f16,[%fp+tmp0]

        fdtoi   %f42,%f2
        st      %f2,[%fp+tmp1]

        fitod   %f16,%f34
        fpackfix        %f16,%f16

        fitod   %f2,%f18
        fpackfix        %f2,%f2

        fsubd   %f40,%f34,%f40

        fsubd   %f42,%f18,%f42

        fmuld   F60_KA2,%f40,%f34

        fmuld   F60_KA2,%f42,%f18

        faddd   F58_KA1,%f34,%f34

        faddd   F58_KA1,%f18,%f18

        ld      [%fp+tmp0],%o0
        fmuld   %f34,%f40,%f40

        ld      [%fp+tmp1],%o1
        fmuld   %f18,%f42,%f42

        and     %o0,255,%o0

        and     %o1,255,%o1

        sll     %o0,3,%o0

        sll     %o1,3,%o1

        ldd     [G1_CONST_TBL+%o0],%f34

        ldd     [G1_CONST_TBL+%o1],%f18

        fpadd32 %f16,%f34,%f34

        fpadd32 %f2,%f18,%f18

        fmuld   %f34,%f40,%f40

        fmuld   %f18,%f42,%f42

        faddd   %f34,%f40,%f40

        faddd   %f18,%f42,%f42

        fdtos   %f40,%f26
        st      %f26,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f42,%f18
        st      %f18,[%o4]
        add     %o4,%i4,%i3

        cmp     %l2,%o5
        sll     %i2,1,%o5
        bl,pt   %icc,.spec2_out_of_range
        sll     %i2,2,%o4

        ble,pn  %icc,.spec2_inf
        add     %o4,%o5,%o4

! NaN -> NaN

        fmuls   %f4,%f4,%f4
        ba      .spec2_exit
        st      %f4,[%i3]

.spec2_inf:
        sub     %i1,%o4,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        andcc   %l0,4,%l0
        be,a,pn %icc,.spec2_exit
        st      %f4,[%i3]

        ba      .spec2_exit
        st      %f3,[%i3]

.spec2_out_of_range:
        add     %o4,%o5,%o4
        sub     %i1,%o4,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        and     %l0,4,%l0
        add     %l0,2048,%l0
        ld      [G1_CONST_TBL+%l0],%f2
        fmuls   %f2,%f2,%f2
        st      %f2,[%i3]

.spec2_exit:
        fmovs   %f6,%f16
        mov     %l3,%l0
        mov     %o3,%o0
        fmovs   %f8,%f2
        mov     %l4,%l1
        add     %o0,%i2,%o1
        fmovs   %f10,%f4
        mov     %l5,%l2
        add     %o1,%i2,%o2
        fmovs   %f12,%f6
        mov     %l6,%l3
        mov     %l7,%l4
        lda     [%i1]%asi,%l5
        add     %i1,%i2,%i1
        add     %o2,%i2,%o3
        lda     [%i1]%asi,%l6
        add     %i1,%i2,%i1
        lda     [%i1]%asi,%l7
        add     %i1,%i2,%i1
        and     %l5,G5_CONST,%l5
        and     %l6,G5_CONST,%l6
        and     %l7,G5_CONST,%l7

        subcc   %i0,3,%i0
        bpos,pt %icc,.main_loop
        add     %i3,%i4,%i3
        ba      .after_main_loop
        nop
.spec3:
        sethi   %hi(0x7f800000),%o5
        cmp     %l3,%o5
        bge,pn  %icc,1f
        nop
        fcmpes  %fcc0,%f6,%f3
        fcmpes  %fcc1,%f6,THRESHOLDL
        fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
        fstod   %f6,%f46                        ! (3) y = (double) X
        fbl,a,pt        %fcc1,.spec3_cont       ! if ( X < THRESHOLDL )
        fstod   %f6,%f46                        ! (3) y = (double) X
1:
        fmuld   F62_K256ONLN2,%f40,%f40

        fmuld   F62_K256ONLN2,%f42,%f42

        fmuld   F62_K256ONLN2,%f44,%f44

        fdtoi   %f40,%f16
        st      %f16,[%fp+tmp0]

        fdtoi   %f42,%f2
        st      %f2,[%fp+tmp1]

        fdtoi   %f44,%f4
        st      %f4,[%fp+tmp2]

        fitod   %f16,%f34
        fpackfix        %f16,%f16

        fitod   %f2,%f18
        fpackfix        %f2,%f2

        fitod   %f4,%f20
        fpackfix        %f4,%f4

        fsubd   %f40,%f34,%f40

        fsubd   %f42,%f18,%f42

        fsubd   %f44,%f20,%f44

        fmuld   F60_KA2,%f40,%f34

        fmuld   F60_KA2,%f42,%f18

        fmuld   F60_KA2,%f44,%f20

        faddd   F58_KA1,%f34,%f34

        faddd   F58_KA1,%f18,%f18

        faddd   F58_KA1,%f20,%f20

        ld      [%fp+tmp0],%o0
        fmuld   %f34,%f40,%f40

        ld      [%fp+tmp1],%o1
        fmuld   %f18,%f42,%f42

        ld      [%fp+tmp2],%o2
        fmuld   %f20,%f44,%f44

        and     %o0,255,%o0
        and     %o1,255,%o1

        and     %o2,255,%o2
        sll     %o0,3,%o0

        sll     %o1,3,%o1
        sll     %o2,3,%o2

        ldd     [G1_CONST_TBL+%o0],%f34

        ldd     [G1_CONST_TBL+%o1],%f18

        ldd     [G1_CONST_TBL+%o2],%f20

        fpadd32 %f16,%f34,%f34

        fpadd32 %f2,%f18,%f18

        fpadd32 %f4,%f20,%f20

        fmuld   %f34,%f40,%f40

        fmuld   %f18,%f42,%f42

        fmuld   %f20,%f44,%f44

        faddd   %f34,%f40,%f40

        faddd   %f18,%f42,%f42

        faddd   %f20,%f44,%f44

        fdtos   %f40,%f26
        st      %f26,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f42,%f18
        st      %f18,[%o4]
        add     %o4,%i4,%i3

        fdtos   %f44,%f20
        st      %f20,[%i3]
        add     %i3,%i4,%i3

        cmp     %l3,%o5
        bl,pt   %icc,.spec3_out_of_range
        sll     %i2,2,%o4

        ble,pn  %icc,.spec3_inf
        add     %o4,%i2,%o4

! NaN -> NaN

        fmuls   %f6,%f6,%f6
        ba      .spec3_exit
        st      %f6,[%i3]

.spec3_inf:
        sub     %i1,%o4,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        andcc   %l0,4,%l0
        be,a,pn %icc,.spec3_exit
        st      %f6,[%i3]

        ba      .spec3_exit
        st      %f3,[%i3]

.spec3_out_of_range:
        add     %o4,%i2,%o4
        sub     %i1,%o4,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        and     %l0,4,%l0
        add     %l0,2048,%l0
        ld      [G1_CONST_TBL+%l0],%f2
        fmuls   %f2,%f2,%f2
        st      %f2,[%i3]

.spec3_exit:
        fmovs   %f8,%f16
        mov     %l4,%l0
        fmovs   %f10,%f2
        mov     %l5,%l1
        fmovs   %f12,%f4
        mov     %l6,%l2
        fmovs   %f14,%f6
        mov     %l7,%l3
        mov     %i1,%o0
        lda     [%o0]%asi,%l4
        add     %o0,%i2,%o1
        lda     [%o1]%asi,%l5
        add     %o1,%i2,%o2
        lda     [%o2]%asi,%l6
        add     %o2,%i2,%o3
        lda     [%o3]%asi,%l7
        add     %o3,%i2,%i1
        and     %l4,G5_CONST,%l4
        and     %l5,G5_CONST,%l5
        and     %l6,G5_CONST,%l6
        and     %l7,G5_CONST,%l7

        subcc   %i0,4,%i0
        bpos,pt %icc,.main_loop
        add     %i3,%i4,%i3
        ba      .after_main_loop
        nop

        .align  16
.spec4:
        sethi   %hi(0x7f800000),%o5
        cmp     %l4,%o5
        bge,pn  %icc,1f
        nop
        fcmpes  %fcc0,%f8,%f3
        fcmpes  %fcc1,%f8,THRESHOLDL
        fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
        fstod   %f8,%f48                        ! (4) y = (double) X
        fbl,a,pt        %fcc1,.spec4_cont       ! if ( X < THRESHOLDL )
        fstod   %f8,%f48                        ! (4) y = (double) X
1:
        fmuld   F62_K256ONLN2,%f42,%f42

        fmuld   F62_K256ONLN2,%f44,%f44

        fmuld   F62_K256ONLN2,%f46,%f46

        fdtoi   %f40,%f16
        st      %f16,[%fp+tmp0]

        fdtoi   %f42,%f2
        st      %f2,[%fp+tmp1]

        fdtoi   %f44,%f4
        st      %f4,[%fp+tmp2]

        fdtoi   %f46,%f6
        st      %f6,[%fp+tmp3]

        fitod   %f16,%f34
        fpackfix        %f16,%f16

        fitod   %f2,%f18
        fpackfix        %f2,%f2

        fitod   %f4,%f20
        fpackfix        %f4,%f4

        fitod   %f6,%f22
        fpackfix        %f6,%f6

        fsubd   %f40,%f34,%f40

        fsubd   %f42,%f18,%f42

        fsubd   %f44,%f20,%f44

        fsubd   %f46,%f22,%f46

        fmuld   F60_KA2,%f40,%f34

        fmuld   F60_KA2,%f42,%f18

        fmuld   F60_KA2,%f44,%f20

        fmuld   F60_KA2,%f46,%f22

        faddd   F58_KA1,%f34,%f34

        faddd   F58_KA1,%f18,%f18

        faddd   F58_KA1,%f20,%f20

        faddd   F58_KA1,%f22,%f22

        ld      [%fp+tmp0],%o0
        fmuld   %f34,%f40,%f40

        ld      [%fp+tmp1],%o1
        fmuld   %f18,%f42,%f42

        ld      [%fp+tmp2],%o2
        fmuld   %f20,%f44,%f44

        ld      [%fp+tmp3],%o3
        fmuld   %f22,%f46,%f46

        and     %o0,255,%o0
        and     %o1,255,%o1

        and     %o2,255,%o2
        and     %o3,255,%o3

        sll     %o0,3,%o0
        sll     %o1,3,%o1

        sll     %o2,3,%o2
        sll     %o3,3,%o3

        ldd     [G1_CONST_TBL+%o0],%f34

        ldd     [G1_CONST_TBL+%o1],%f18

        ldd     [G1_CONST_TBL+%o2],%f20

        ldd     [G1_CONST_TBL+%o3],%f22

        fpadd32 %f16,%f34,%f34

        fpadd32 %f2,%f18,%f18

        fpadd32 %f4,%f20,%f20

        fpadd32 %f6,%f22,%f22

        fmuld   %f34,%f40,%f40

        fmuld   %f18,%f42,%f42

        fmuld   %f20,%f44,%f44

        fmuld   %f22,%f46,%f46

        faddd   %f34,%f40,%f40

        faddd   %f18,%f42,%f42

        faddd   %f20,%f44,%f44

        faddd   %f22,%f46,%f46

        fdtos   %f40,%f26
        st      %f26,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f42,%f18
        st      %f18,[%o4]
        add     %o4,%i4,%i3

        fdtos   %f44,%f20
        st      %f20,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f46,%f22
        st      %f22,[%o4]
        add     %o4,%i4,%i3

        cmp     %l4,%o5
        bl,pt   %icc,.spec4_out_of_range
        sll     %i2,2,%o4

        ble,pn  %icc,.spec4_inf
        sub     %i1,%o4,%o4

! NaN -> NaN

        fmuls   %f8,%f8,%f8
        ba      .spec4_exit
        st      %f8,[%i3]

.spec4_inf:
        ld      [%o4],%l0
        srl     %l0,29,%l0
        andcc   %l0,4,%l0
        be,a,pn %icc,.spec4_exit
        st      %f8,[%i3]

        ba      .spec4_exit
        st      %f3,[%i3]

.spec4_out_of_range:
        sub     %i1,%o4,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        and     %l0,4,%l0
        add     %l0,2048,%l0
        ld      [G1_CONST_TBL+%l0],%f2
        fmuls   %f2,%f2,%f2
        st      %f2,[%i3]

.spec4_exit:
        fmovs   %f10,%f16
        mov     %l5,%l0
        fmovs   %f12,%f2
        mov     %l6,%l1
        fmovs   %f14,%f4
        mov     %l7,%l2
        lda     [%i1]%asi,%l3
        lda     [%i1]%asi,%f6
        add     %i1,%i2,%o0
        lda     [%o0]%asi,%l4
        add     %o0,%i2,%o1
        lda     [%o1]%asi,%l5
        add     %o1,%i2,%o2
        lda     [%o2]%asi,%l6
        add     %o2,%i2,%o3
        lda     [%o3]%asi,%l7
        add     %o3,%i2,%i1
        and     %l3,G5_CONST,%l3
        and     %l4,G5_CONST,%l4
        and     %l5,G5_CONST,%l5
        and     %l6,G5_CONST,%l6
        and     %l7,G5_CONST,%l7

        subcc   %i0,5,%i0
        bpos,pt %icc,.main_loop
        add     %i3,%i4,%i3
        ba      .after_main_loop
        nop

        .align 16
.spec5:
        sethi   %hi(0x7f800000),%o5
        cmp     %l5,%o5
        bge,pn  %icc,1f
        nop
        fcmpes  %fcc0,%f10,%f3
        fcmpes  %fcc1,%f10,THRESHOLDL
        fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
        fstod   %f10,%f50                       ! (5) y = (double) X
        fbl,a,pt        %fcc1,.spec5_cont       ! if ( X < THRESHOLDL )
        fstod   %f10,%f50                       ! (5) y = (double) X
1:
        fmuld   F62_K256ONLN2,%f44,%f44

        fmuld   F62_K256ONLN2,%f46,%f46

        fdtoi   %f40,%f16
        st      %f16,[%fp+tmp0]
        fmuld   F62_K256ONLN2,%f48,%f48

        fdtoi   %f42,%f2
        st      %f2,[%fp+tmp1]

        fdtoi   %f44,%f4
        st      %f4,[%fp+tmp2]

        fdtoi   %f46,%f6
        st      %f6,[%fp+tmp3]

        fdtoi   %f48,%f8
        st      %f8,[%fp+tmp4]

        fitod   %f16,%f34
        fpackfix        %f16,%f16

        fitod   %f2,%f18
        fpackfix        %f2,%f2

        fitod   %f4,%f20
        fpackfix        %f4,%f4

        fitod   %f6,%f22
        fpackfix        %f6,%f6

        fitod   %f8,%f24
        fpackfix        %f8,%f8

        ld      [%fp+tmp0],%o0
        fsubd   %f40,%f34,%f40

        ld      [%fp+tmp1],%o1
        fsubd   %f42,%f18,%f42

        ld      [%fp+tmp2],%o2
        and     %o0,255,%o0
        fsubd   %f44,%f20,%f44

        ld      [%fp+tmp3],%o3
        and     %o1,255,%o1
        fsubd   %f46,%f22,%f46

        sll     %o0,3,%o0
        sll     %o1,3,%o1
        fmuld   F60_KA2,%f40,%f34
        fsubd   %f48,%f24,%f48

        and     %o2,255,%o2
        fmuld   F60_KA2,%f42,%f18

        sll     %o2,3,%o2
        fmuld   F60_KA2,%f44,%f20

        ld      [%fp+tmp4],%o4
        and     %o3,255,%o3
        fmuld   F60_KA2,%f46,%f22

        sll     %o3,3,%o3
        fmuld   F60_KA2,%f48,%f24
        faddd   F58_KA1,%f34,%f34

        and     %o4,255,%o4
        faddd   F58_KA1,%f18,%f18

        faddd   F58_KA1,%f20,%f20

        faddd   F58_KA1,%f22,%f22

        fmuld   %f34,%f40,%f40
        ldd     [G1_CONST_TBL+%o0],%f34
        faddd   F58_KA1,%f24,%f24

        fmuld   %f18,%f42,%f42
        ldd     [G1_CONST_TBL+%o1],%f18

        fmuld   %f20,%f44,%f44
        ldd     [G1_CONST_TBL+%o2],%f20

        fmuld   %f22,%f46,%f46
        ldd     [G1_CONST_TBL+%o3],%f22
        sll     %o4,3,%o4

        fmuld   %f24,%f48,%f48
        ldd     [G1_CONST_TBL+%o4],%f24
        fpadd32 %f16,%f34,%f34

        fpadd32 %f2,%f18,%f18

        fpadd32 %f4,%f20,%f20

        fpadd32 %f6,%f22,%f22

        fpadd32 %f8,%f24,%f24
        fmuld   %f34,%f40,%f40

        fmuld   %f18,%f42,%f42

        fmuld   %f20,%f44,%f44

        fmuld   %f22,%f46,%f46

        fmuld   %f24,%f48,%f48
        faddd   %f34,%f40,%f40

        faddd   %f18,%f42,%f42

        faddd   %f20,%f44,%f44

        faddd   %f22,%f46,%f46

        faddd   %f24,%f48,%f48

        fdtos   %f40,%f26
        st      %f26,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f42,%f18
        st      %f18,[%o4]
        add     %o4,%i4,%i3

        fdtos   %f44,%f20
        st      %f20,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f46,%f22
        st      %f22,[%o4]
        add     %o4,%i4,%i3

        fdtos   %f48,%f24
        st      %f24,[%i3]
        add     %i3,%i4,%i3

        cmp     %l5,%o5
        bl,pt   %icc,.spec5_out_of_range
        sll     %i2,2,%o4

        ble,pn  %icc,.spec5_inf
        sub     %o4,%i2,%o4

! NaN -> NaN

        fmuls   %f10,%f10,%f10
        ba      .spec5_exit
        st      %f10,[%i3]

.spec5_inf:
        sub     %i1,%o4,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        andcc   %l0,4,%l0
        be,a,pn %icc,.spec5_exit
        st      %f10,[%i3]

        ba      .spec5_exit
        st      %f3,[%i3]

.spec5_out_of_range:
        sub     %o4,%i2,%o4
        sub     %i1,%o4,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        and     %l0,4,%l0
        add     %l0,2048,%l0
        ld      [G1_CONST_TBL+%l0],%f2
        fmuls   %f2,%f2,%f2
        st      %f2,[%i3]

.spec5_exit:
        fmovs   %f12,%f16
        mov     %l6,%l0
        fmovs   %f14,%f2
        mov     %l7,%l1
        lda     [%i1]%asi,%l2
        lda     [%i1]%asi,%f4
        add     %i1,%i2,%i1
        lda     [%i1]%asi,%l3
        lda     [%i1]%asi,%f6
        add     %i1,%i2,%o0
        lda     [%o0]%asi,%l4
        add     %o0,%i2,%o1
        lda     [%o1]%asi,%l5
        add     %o1,%i2,%o2
        lda     [%o2]%asi,%l6
        add     %o2,%i2,%o3
        lda     [%o3]%asi,%l7
        add     %o3,%i2,%i1
        and     %l2,G5_CONST,%l2
        and     %l3,G5_CONST,%l3
        and     %l4,G5_CONST,%l4
        and     %l5,G5_CONST,%l5
        and     %l6,G5_CONST,%l6
        and     %l7,G5_CONST,%l7

        subcc   %i0,6,%i0
        bpos,pt %icc,.main_loop
        add     %i3,%i4,%i3
        ba      .after_main_loop
        nop
.spec6:
        sethi   %hi(0x7f800000),%o5
        cmp     %l6,%o5
        bge,pn  %icc,1f
        nop
        fcmpes  %fcc0,%f12,%f3
        fcmpes  %fcc1,%f12,THRESHOLDL
        fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
        fstod   %f12,%f52                       ! (6) y = (double) X
        fbl,a,pt        %fcc1,.spec6_cont       ! if ( X < THRESHOLDL )
        fstod   %f12,%f52                       ! (6) y = (double) X
1:
        fmuld   F62_K256ONLN2,%f46,%f46

        fdtoi   %f40,%f16
        st      %f16,[%fp+tmp0]
        fmuld   F62_K256ONLN2,%f48,%f48

        fdtoi   %f42,%f2
        st      %f2,[%fp+tmp1]
        fmuld   F62_K256ONLN2,%f50,%f50

        fdtoi   %f44,%f4
        st      %f4,[%fp+tmp2]

        fdtoi   %f46,%f6
        st      %f6,[%fp+tmp3]

        fdtoi   %f48,%f8
        st      %f8,[%fp+tmp4]

        fdtoi   %f50,%f10
        st      %f10,[%fp+tmp5]

        fitod   %f16,%f34
        fpackfix        %f16,%f16

        fitod   %f2,%f18
        fpackfix        %f2,%f2

        fitod   %f4,%f20
        fpackfix        %f4,%f4

        fitod   %f6,%f22
        fpackfix        %f6,%f6

        fitod   %f8,%f24
        fpackfix        %f8,%f8

        fitod   %f10,%f26
        fpackfix        %f10,%f10

        ld      [%fp+tmp0],%o0
        fsubd   %f40,%f34,%f40

        ld      [%fp+tmp1],%o1
        fsubd   %f42,%f18,%f42

        ld      [%fp+tmp2],%o2
        and     %o0,255,%o0
        fsubd   %f44,%f20,%f44

        ld      [%fp+tmp3],%o3
        and     %o1,255,%o1
        fsubd   %f46,%f22,%f46

        sll     %o0,3,%o0
        sll     %o1,3,%o1
        fmuld   F60_KA2,%f40,%f34
        fsubd   %f48,%f24,%f48

        and     %o2,255,%o2
        fmuld   F60_KA2,%f42,%f18
        fsubd   %f50,%f26,%f50

        sll     %o2,3,%o2
        fmuld   F60_KA2,%f44,%f20

        ld      [%fp+tmp4],%o4
        and     %o3,255,%o3
        fmuld   F60_KA2,%f46,%f22

        ld      [%fp+tmp5],%o5
        sll     %o3,3,%o3
        fmuld   F60_KA2,%f48,%f24
        faddd   F58_KA1,%f34,%f34

        and     %o4,255,%o4
        fmuld   F60_KA2,%f50,%f26
        faddd   F58_KA1,%f18,%f18

        and     %o5,255,%o5
        faddd   F58_KA1,%f20,%f20

        sll     %o5,3,%o5
        faddd   F58_KA1,%f22,%f22

        fmuld   %f34,%f40,%f40
        ldd     [G1_CONST_TBL+%o0],%f34
        faddd   F58_KA1,%f24,%f24

        fmuld   %f18,%f42,%f42
        ldd     [G1_CONST_TBL+%o1],%f18
        faddd   F58_KA1,%f26,%f26

        fmuld   %f20,%f44,%f44
        ldd     [G1_CONST_TBL+%o2],%f20

        fmuld   %f22,%f46,%f46
        ldd     [G1_CONST_TBL+%o3],%f22
        sll     %o4,3,%o4

        fmuld   %f24,%f48,%f48
        ldd     [G1_CONST_TBL+%o4],%f24
        fpadd32 %f16,%f34,%f34

        fmuld   %f26,%f50,%f50
        ldd     [G1_CONST_TBL+%o5],%f26
        fpadd32 %f2,%f18,%f18

        fpadd32 %f4,%f20,%f20

        fpadd32 %f6,%f22,%f22

        fpadd32 %f8,%f24,%f24
        fmuld   %f34,%f40,%f40

        fpadd32 %f10,%f26,%f26
        fmuld   %f18,%f42,%f42

        fmuld   %f20,%f44,%f44

        fmuld   %f22,%f46,%f46

        fmuld   %f24,%f48,%f48
        faddd   %f34,%f40,%f40

        fmuld   %f26,%f50,%f50
        faddd   %f18,%f42,%f42

        faddd   %f20,%f44,%f44

        faddd   %f22,%f46,%f46

        faddd   %f24,%f48,%f48

        faddd   %f26,%f50,%f50

        fdtos   %f40,%f26
        st      %f26,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f42,%f18
        st      %f18,[%o4]
        add     %o4,%i4,%i3

        fdtos   %f44,%f20
        st      %f20,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f46,%f22
        st      %f22,[%o4]
        add     %o4,%i4,%i3

        fdtos   %f48,%f24
        st      %f24,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f50,%f26
        st      %f26,[%o4]
        add     %o4,%i4,%i3

        sethi   %hi(0x7f800000),%o5
        cmp     %l6,%o5
        bl,pt   %icc,.spec6_out_of_range
        sll     %i2,1,%o4

        ble,pn  %icc,.spec6_inf
        sub     %i1,%o4,%o4

! NaN -> NaN

        fmuls   %f12,%f12,%f12
        ba      .spec6_exit
        st      %f12,[%i3]

.spec6_inf:
        ld      [%o4],%l0
        srl     %l0,29,%l0
        andcc   %l0,4,%l0
        be,a,pn %icc,.spec6_exit
        st      %f12,[%i3]

        ba      .spec6_exit
        st      %f3,[%i3]

.spec6_out_of_range:
        sub     %i1,%o4,%o4
        ld      [%o4],%l0
        srl     %l0,29,%l0
        and     %l0,4,%l0
        add     %l0,2048,%l0
        ld      [G1_CONST_TBL+%l0],%f2
        fmuls   %f2,%f2,%f2
        st      %f2,[%i3]

.spec6_exit:
        fmovs   %f14,%f16
        mov     %l7,%l0
        lda     [%i1]%asi,%l1
        lda     [%i1]%asi,%f2
        add     %i1,%i2,%i1
        lda     [%i1]%asi,%l2
        lda     [%i1]%asi,%f4
        add     %i1,%i2,%i1
        lda     [%i1]%asi,%l3
        lda     [%i1]%asi,%f6
        add     %i1,%i2,%o0
        lda     [%o0]%asi,%l4
        add     %o0,%i2,%o1
        lda     [%o1]%asi,%l5
        add     %o1,%i2,%o2
        lda     [%o2]%asi,%l6
        add     %o2,%i2,%o3
        lda     [%o3]%asi,%l7
        add     %o3,%i2,%i1
        and     %l1,G5_CONST,%l1
        and     %l2,G5_CONST,%l2
        and     %l3,G5_CONST,%l3
        and     %l4,G5_CONST,%l4
        and     %l5,G5_CONST,%l5
        and     %l6,G5_CONST,%l6
        and     %l7,G5_CONST,%l7

        subcc   %i0,7,%i0
        bpos,pt %icc,.main_loop
        add     %i3,%i4,%i3
        ba      .after_main_loop
        nop

        .align  16
.spec7:
        sethi   %hi(0x7f800000),%o5
        cmp     %l7,%o5
        bge,pn  %icc,1f
        nop
        fcmpes  %fcc0,%f14,%f3
        fcmpes  %fcc1,%f14,THRESHOLDL
        fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
        fstod   %f14,%f54                       ! (7) y = (double) X
        fbl,a,pt        %fcc1,.spec7_cont       ! if ( X < THRESHOLDL )
        fstod   %f14,%f54                       ! (7) y = (double) X
1:
        fdtoi   %f40,%f16
        st      %f16,[%fp+tmp0]
        fmuld   F62_K256ONLN2,%f48,%f48

        fdtoi   %f42,%f2
        st      %f2,[%fp+tmp1]
        fmuld   F62_K256ONLN2,%f50,%f50

        fdtoi   %f44,%f4
        st      %f4,[%fp+tmp2]
        fmuld   F62_K256ONLN2,%f52,%f52

        fdtoi   %f46,%f6
        st      %f6,[%fp+tmp3]

        fdtoi   %f48,%f8
        st      %f8,[%fp+tmp4]

        fdtoi   %f50,%f10
        st      %f10,[%fp+tmp5]

        fdtoi   %f52,%f12
        st      %f12,[%fp+tmp6]

        fitod   %f16,%f34
        fpackfix        %f16,%f16

        fitod   %f2,%f18
        fpackfix        %f2,%f2

        fitod   %f4,%f20
        fpackfix        %f4,%f4

        fitod   %f6,%f22
        fpackfix        %f6,%f6

        fitod   %f8,%f24
        fpackfix        %f8,%f8

        fitod   %f10,%f26
        fpackfix        %f10,%f10

        fitod   %f12,%f28
        fpackfix        %f12,%f12

        ld      [%fp+tmp0],%o0
        fsubd   %f40,%f34,%f40

        ld      [%fp+tmp1],%o1
        fsubd   %f42,%f18,%f42

        ld      [%fp+tmp2],%o2
        and     %o0,255,%o0
        fsubd   %f44,%f20,%f44

        ld      [%fp+tmp3],%o3
        and     %o1,255,%o1
        fsubd   %f46,%f22,%f46

        sll     %o0,3,%o0
        sll     %o1,3,%o1
        fmuld   F60_KA2,%f40,%f34
        fsubd   %f48,%f24,%f48

        and     %o2,255,%o2
        fmuld   F60_KA2,%f42,%f18
        fsubd   %f50,%f26,%f50

        sll     %o2,3,%o2
        fmuld   F60_KA2,%f44,%f20
        fsubd   %f52,%f28,%f52

        ld      [%fp+tmp4],%o4
        and     %o3,255,%o3
        fmuld   F60_KA2,%f46,%f22

        ld      [%fp+tmp5],%o5
        sll     %o3,3,%o3
        fmuld   F60_KA2,%f48,%f24
        faddd   F58_KA1,%f34,%f34

        ld      [%fp+tmp6],%o7
        and     %o4,255,%o4
        fmuld   F60_KA2,%f50,%f26
        faddd   F58_KA1,%f18,%f18

        and     %o5,255,%o5
        fmuld   F60_KA2,%f52,%f28
        faddd   F58_KA1,%f20,%f20

        sll     %o5,3,%o5
        faddd   F58_KA1,%f22,%f22

        fmuld   %f34,%f40,%f40
        ldd     [G1_CONST_TBL+%o0],%f34
        faddd   F58_KA1,%f24,%f24

        fmuld   %f18,%f42,%f42
        ldd     [G1_CONST_TBL+%o1],%f18
        faddd   F58_KA1,%f26,%f26

        fmuld   %f20,%f44,%f44
        ldd     [G1_CONST_TBL+%o2],%f20
        faddd   F58_KA1,%f28,%f28

        fmuld   %f22,%f46,%f46
        ldd     [G1_CONST_TBL+%o3],%f22
        sll     %o4,3,%o4

        fmuld   %f24,%f48,%f48
        ldd     [G1_CONST_TBL+%o4],%f24
        and     %o7,255,%o7
        fpadd32 %f16,%f34,%f34

        fmuld   %f26,%f50,%f50
        ldd     [G1_CONST_TBL+%o5],%f26
        sll     %o7,3,%o7
        fpadd32 %f2,%f18,%f18

        fmuld   %f28,%f52,%f52
        ldd     [G1_CONST_TBL+%o7],%f28
        fpadd32 %f4,%f20,%f20

        fpadd32 %f6,%f22,%f22

        fpadd32 %f8,%f24,%f24
        fmuld   %f34,%f40,%f40

        fpadd32 %f10,%f26,%f26
        fmuld   %f18,%f42,%f42

        fpadd32 %f12,%f28,%f28
        fmuld   %f20,%f44,%f44

        fmuld   %f22,%f46,%f46

        fmuld   %f24,%f48,%f48
        faddd   %f34,%f40,%f40

        fmuld   %f26,%f50,%f50
        faddd   %f18,%f42,%f42

        fmuld   %f28,%f52,%f52
        faddd   %f20,%f44,%f44

        faddd   %f22,%f46,%f46

        faddd   %f24,%f48,%f48

        faddd   %f26,%f50,%f50

        faddd   %f28,%f52,%f52

        fdtos   %f40,%f26
        st      %f26,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f42,%f18
        st      %f18,[%o4]
        add     %o4,%i4,%i3

        fdtos   %f44,%f20
        st      %f20,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f46,%f22
        st      %f22,[%o4]
        add     %o4,%i4,%i3

        fdtos   %f48,%f24
        st      %f24,[%i3]
        add     %i3,%i4,%o4

        fdtos   %f50,%f26
        st      %f26,[%o4]
        add     %o4,%i4,%i3

        fdtos   %f52,%f28
        st      %f28,[%i3]
        add     %i3,%i4,%i3

        sethi   %hi(0x7f800000),%o5
        cmp     %l7,%o5
        bl,pt   %icc,.spec7_out_of_range
        sub     %i1,%i2,%o4

        ble,pn  %icc,.spec7_inf
        ld      [%o4],%l0

! NaN -> NaN

        fmuls   %f14,%f14,%f14
        ba      .spec7_exit
        st      %f14,[%i3]

.spec7_inf:
        srl     %l0,29,%l0
        andcc   %l0,4,%l0
        be,a,pn %icc,.spec7_exit
        st      %f14,[%i3]

        ba      .spec7_exit
        st      %f3,[%i3]

.spec7_out_of_range:
        ld      [%o4],%l0
        srl     %l0,29,%l0
        and     %l0,4,%l0
        add     %l0,2048,%l0
        ld      [G1_CONST_TBL+%l0],%f2
        fmuls   %f2,%f2,%f2
        st      %f2,[%i3]

.spec7_exit:
        subcc   %i0,8,%i0
        bpos,pt %icc,.main_loop_preload
        add     %i3,%i4,%i3

        ba      .tail
        nop
        SET_SIZE(__vexpf)