root/usr/src/lib/libmvec/common/vis/__vrhypotf.S
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

        .file   "__vrhypotf.S"

#include "libm.h"

        RO_DATA
        .align  64
.CONST_TBL:
! i = [0,63]
! TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
! TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
! TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
! TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));

        .word   0x3ff00000, 0x00000000, 0x3fd6a09e, 0x667f3bcd,
        .word   0x3fef81f8, 0x1f81f820, 0x3fd673e3, 0x2ef63a03,
        .word   0x3fef07c1, 0xf07c1f08, 0x3fd6482d, 0x37a5a3d2,
        .word   0x3fee9131, 0xabf0b767, 0x3fd61d72, 0xb7978671,
        .word   0x3fee1e1e, 0x1e1e1e1e, 0x3fd5f3aa, 0x673fa911,
        .word   0x3fedae60, 0x76b981db, 0x3fd5cacb, 0x7802f342,
        .word   0x3fed41d4, 0x1d41d41d, 0x3fd5a2cd, 0x8c69d61a,
        .word   0x3fecd856, 0x89039b0b, 0x3fd57ba8, 0xb0ee01b9,
        .word   0x3fec71c7, 0x1c71c71c, 0x3fd55555, 0x55555555,
        .word   0x3fec0e07, 0x0381c0e0, 0x3fd52fcc, 0x468d6b54,
        .word   0x3febacf9, 0x14c1bad0, 0x3fd50b06, 0xa8fc6b70,
        .word   0x3feb4e81, 0xb4e81b4f, 0x3fd4e6fd, 0xf33cf032,
        .word   0x3feaf286, 0xbca1af28, 0x3fd4c3ab, 0xe93bcf74,
        .word   0x3fea98ef, 0x606a63be, 0x3fd4a10a, 0x97af7b92,
        .word   0x3fea41a4, 0x1a41a41a, 0x3fd47f14, 0x4fe17f9f,
        .word   0x3fe9ec8e, 0x951033d9, 0x3fd45dc3, 0xa3c34fa3,
        .word   0x3fe99999, 0x9999999a, 0x3fd43d13, 0x6248490f,
        .word   0x3fe948b0, 0xfcd6e9e0, 0x3fd41cfe, 0x93ff5199,
        .word   0x3fe8f9c1, 0x8f9c18fa, 0x3fd3fd80, 0x77e70577,
        .word   0x3fe8acb9, 0x0f6bf3aa, 0x3fd3de94, 0x8077db58,
        .word   0x3fe86186, 0x18618618, 0x3fd3c036, 0x50e00e03,
        .word   0x3fe81818, 0x18181818, 0x3fd3a261, 0xba6d7a37,
        .word   0x3fe7d05f, 0x417d05f4, 0x3fd38512, 0xba21f51e,
        .word   0x3fe78a4c, 0x8178a4c8, 0x3fd36845, 0x766eec92,
        .word   0x3fe745d1, 0x745d1746, 0x3fd34bf6, 0x3d156826,
        .word   0x3fe702e0, 0x5c0b8170, 0x3fd33021, 0x8127c0e0,
        .word   0x3fe6c16c, 0x16c16c17, 0x3fd314c3, 0xd92a9e91,
        .word   0x3fe68168, 0x16816817, 0x3fd2f9d9, 0xfd52fd50,
        .word   0x3fe642c8, 0x590b2164, 0x3fd2df60, 0xc5df2c9e,
        .word   0x3fe60581, 0x60581606, 0x3fd2c555, 0x2988e428,
        .word   0x3fe5c988, 0x2b931057, 0x3fd2abb4, 0x3c0eb0f4,
        .word   0x3fe58ed2, 0x308158ed, 0x3fd2927b, 0x2cd320f5,
        .word   0x3fe55555, 0x55555555, 0x3fd279a7, 0x4590331c,
        .word   0x3fe51d07, 0xeae2f815, 0x3fd26135, 0xe91daf55,
        .word   0x3fe4e5e0, 0xa72f0539, 0x3fd24924, 0x92492492,
        .word   0x3fe4afd6, 0xa052bf5b, 0x3fd23170, 0xd2be638a,
        .word   0x3fe47ae1, 0x47ae147b, 0x3fd21a18, 0x51ff630a,
        .word   0x3fe446f8, 0x6562d9fb, 0x3fd20318, 0xcc6a8f5d,
        .word   0x3fe41414, 0x14141414, 0x3fd1ec70, 0x124e98f9,
        .word   0x3fe3e22c, 0xbce4a902, 0x3fd1d61c, 0x070ae7d3,
        .word   0x3fe3b13b, 0x13b13b14, 0x3fd1c01a, 0xa03be896,
        .word   0x3fe38138, 0x13813814, 0x3fd1aa69, 0xe4f2777f,
        .word   0x3fe3521c, 0xfb2b78c1, 0x3fd19507, 0xecf5b9e9,
        .word   0x3fe323e3, 0x4a2b10bf, 0x3fd17ff2, 0xe00ec3ee,
        .word   0x3fe2f684, 0xbda12f68, 0x3fd16b28, 0xf55d72d4,
        .word   0x3fe2c9fb, 0x4d812ca0, 0x3fd156a8, 0x72b5ef62,
        .word   0x3fe29e41, 0x29e4129e, 0x3fd1426f, 0xac0654db,
        .word   0x3fe27350, 0xb8812735, 0x3fd12e7d, 0x02c40253,
        .word   0x3fe24924, 0x92492492, 0x3fd11ace, 0xe560242a,
        .word   0x3fe21fb7, 0x8121fb78, 0x3fd10763, 0xcec30b26,
        .word   0x3fe1f704, 0x7dc11f70, 0x3fd0f43a, 0x45cdedad,
        .word   0x3fe1cf06, 0xada2811d, 0x3fd0e150, 0xdce2b60c,
        .word   0x3fe1a7b9, 0x611a7b96, 0x3fd0cea6, 0x317186dc,
        .word   0x3fe18118, 0x11811812, 0x3fd0bc38, 0xeb8ba412,
        .word   0x3fe15b1e, 0x5f75270d, 0x3fd0aa07, 0xbd7b7488,
        .word   0x3fe135c8, 0x1135c811, 0x3fd09811, 0x63615499,
        .word   0x3fe11111, 0x11111111, 0x3fd08654, 0xa2d4f6db,
        .word   0x3fe0ecf5, 0x6be69c90, 0x3fd074d0, 0x4a8b1438,
        .word   0x3fe0c971, 0x4fbcda3b, 0x3fd06383, 0x31ff307a,
        .word   0x3fe0a681, 0x0a6810a7, 0x3fd0526c, 0x39213bfa,
        .word   0x3fe08421, 0x08421084, 0x3fd0418a, 0x4806de7d,
        .word   0x3fe0624d, 0xd2f1a9fc, 0x3fd030dc, 0x4ea03a72,
        .word   0x3fe04104, 0x10410410, 0x3fd02061, 0x446ffa9a,
        .word   0x3fe02040, 0x81020408, 0x3fd01018, 0x28467ee9,
        .word   0x3ff00000, 0x00000000, 0x3fd00000, 0x00000000,
        .word   0x3fef81f8, 0x1f81f820, 0x3fcfc0bd, 0x88a0f1d9,
        .word   0x3fef07c1, 0xf07c1f08, 0x3fcf82ec, 0x882c0f9b,
        .word   0x3fee9131, 0xabf0b767, 0x3fcf467f, 0x2814b0cc,
        .word   0x3fee1e1e, 0x1e1e1e1e, 0x3fcf0b68, 0x48d2af1c,
        .word   0x3fedae60, 0x76b981db, 0x3fced19b, 0x75e78957,
        .word   0x3fed41d4, 0x1d41d41d, 0x3fce990c, 0xdad55ed2,
        .word   0x3fecd856, 0x89039b0b, 0x3fce61b1, 0x38f18adc,
        .word   0x3fec71c7, 0x1c71c71c, 0x3fce2b7d, 0xddfefa66,
        .word   0x3fec0e07, 0x0381c0e0, 0x3fcdf668, 0x9b7e6350,
        .word   0x3febacf9, 0x14c1bad0, 0x3fcdc267, 0xbea45549,
        .word   0x3feb4e81, 0xb4e81b4f, 0x3fcd8f72, 0x08e6b82d,
        .word   0x3feaf286, 0xbca1af28, 0x3fcd5d7e, 0xa914b937,
        .word   0x3fea98ef, 0x606a63be, 0x3fcd2c85, 0x34ed6d86,
        .word   0x3fea41a4, 0x1a41a41a, 0x3fccfc7d, 0xa32a9213,
        .word   0x3fe9ec8e, 0x951033d9, 0x3fcccd60, 0x45f5d358,
        .word   0x3fe99999, 0x9999999a, 0x3fcc9f25, 0xc5bfedd9,
        .word   0x3fe948b0, 0xfcd6e9e0, 0x3fcc71c7, 0x1c71c71c,
        .word   0x3fe8f9c1, 0x8f9c18fa, 0x3fcc453d, 0x90f057a2,
        .word   0x3fe8acb9, 0x0f6bf3aa, 0x3fcc1982, 0xb2ece47b,
        .word   0x3fe86186, 0x18618618, 0x3fcbee90, 0x56fb9c39,
        .word   0x3fe81818, 0x18181818, 0x3fcbc460, 0x92eb3118,
        .word   0x3fe7d05f, 0x417d05f4, 0x3fcb9aed, 0xba588347,
        .word   0x3fe78a4c, 0x8178a4c8, 0x3fcb7232, 0x5b79db11,
        .word   0x3fe745d1, 0x745d1746, 0x3fcb4a29, 0x3c1d9550,
        .word   0x3fe702e0, 0x5c0b8170, 0x3fcb22cd, 0x56d87d7e,
        .word   0x3fe6c16c, 0x16c16c17, 0x3fcafc19, 0xd8606169,
        .word   0x3fe68168, 0x16816817, 0x3fcad60a, 0x1d0fb394,
        .word   0x3fe642c8, 0x590b2164, 0x3fcab099, 0xae8f539a,
        .word   0x3fe60581, 0x60581606, 0x3fca8bc4, 0x41a3d02c,
        .word   0x3fe5c988, 0x2b931057, 0x3fca6785, 0xb41bacf7,
        .word   0x3fe58ed2, 0x308158ed, 0x3fca43da, 0x0adc6899,
        .word   0x3fe55555, 0x55555555, 0x3fca20bd, 0x700c2c3e,
        .word   0x3fe51d07, 0xeae2f815, 0x3fc9fe2c, 0x315637ee,
        .word   0x3fe4e5e0, 0xa72f0539, 0x3fc9dc22, 0xbe484458,
        .word   0x3fe4afd6, 0xa052bf5b, 0x3fc9ba9d, 0xa6c73588,
        .word   0x3fe47ae1, 0x47ae147b, 0x3fc99999, 0x9999999a,
        .word   0x3fe446f8, 0x6562d9fb, 0x3fc97913, 0x63068b54,
        .word   0x3fe41414, 0x14141414, 0x3fc95907, 0xeb87ab44,
        .word   0x3fe3e22c, 0xbce4a902, 0x3fc93974, 0x368cfa31,
        .word   0x3fe3b13b, 0x13b13b14, 0x3fc91a55, 0x6151761c,
        .word   0x3fe38138, 0x13813814, 0x3fc8fba8, 0xa1bf6f96,
        .word   0x3fe3521c, 0xfb2b78c1, 0x3fc8dd6b, 0x4563a009,
        .word   0x3fe323e3, 0x4a2b10bf, 0x3fc8bf9a, 0xb06e1af3,
        .word   0x3fe2f684, 0xbda12f68, 0x3fc8a234, 0x5cc04426,
        .word   0x3fe2c9fb, 0x4d812ca0, 0x3fc88535, 0xd90703c6,
        .word   0x3fe29e41, 0x29e4129e, 0x3fc8689c, 0xc7e07e7d,
        .word   0x3fe27350, 0xb8812735, 0x3fc84c66, 0xdf0ca4c2,
        .word   0x3fe24924, 0x92492492, 0x3fc83091, 0xe6a7f7e7,
        .word   0x3fe21fb7, 0x8121fb78, 0x3fc8151b, 0xb86fee1d,
        .word   0x3fe1f704, 0x7dc11f70, 0x3fc7fa02, 0x3f1068d1,
        .word   0x3fe1cf06, 0xada2811d, 0x3fc7df43, 0x7579b9b5,
        .word   0x3fe1a7b9, 0x611a7b96, 0x3fc7c4dd, 0x663ebb88,
        .word   0x3fe18118, 0x11811812, 0x3fc7aace, 0x2afa8b72,
        .word   0x3fe15b1e, 0x5f75270d, 0x3fc79113, 0xebbd7729,
        .word   0x3fe135c8, 0x1135c811, 0x3fc777ac, 0xde80baea,
        .word   0x3fe11111, 0x11111111, 0x3fc75e97, 0x46a0b098,
        .word   0x3fe0ecf5, 0x6be69c90, 0x3fc745d1, 0x745d1746,
        .word   0x3fe0c971, 0x4fbcda3b, 0x3fc72d59, 0xc45f1fc5,
        .word   0x3fe0a681, 0x0a6810a7, 0x3fc7152e, 0x9f44f01f,
        .word   0x3fe08421, 0x08421084, 0x3fc6fd4e, 0x79325467,
        .word   0x3fe0624d, 0xd2f1a9fc, 0x3fc6e5b7, 0xd16657e1,
        .word   0x3fe04104, 0x10410410, 0x3fc6ce69, 0x31d5858d,
        .word   0x3fe02040, 0x81020408, 0x3fc6b761, 0x2ec892f6,

        .word   0x000fffff, 0xffffffff  ! DC0
        .word   0x3ff00000, 0           ! DC1
        .word   0x7fffc000, 0           ! DC2
        .word   0x7fe00000, 0           ! DA0
        .word   0x60000000, 0           ! DA1
        .word   0x80808080, 0x3f800000  ! SCALE , FONE = 1.0f
        .word   0x3fefffff, 0xfee7f18f  ! KA0 =  9.99999997962321453275e-01
        .word   0xbfdfffff, 0xfe07e52f  ! KA1 = -4.99999998166077580600e-01
        .word   0x3fd80118, 0x0ca296d9  ! KA2 = 3.75066768969515586277e-01
        .word   0xbfd400fc, 0x0bbb8e78  ! KA3 = -3.12560092408808548438e-01

#define _0x7f800000     %o0
#define _0x7fffffff     %o7
#define TBL             %l2

#define TBL_SHIFT       2048

#define stridex         %l3
#define stridey         %l4
#define stridez         %l5
#define counter         %i0

#define DA0             %f52
#define DA1             %f44
#define SCALE           %f6

#define DC0             %f46
#define DC1             %f8
#define FZERO           %f9
#define DC2             %f50

#define KA3             %f56
#define KA2             %f58
#define KA1             %f60
#define KA0             %f54

#define tmp_counter     STACK_BIAS-0x04
#define tmp_px          STACK_BIAS-0x20
#define tmp_py          STACK_BIAS-0x18

#define ftmp0           STACK_BIAS-0x10
#define ftmp1           STACK_BIAS-0x0c
#define ftmp2           STACK_BIAS-0x10
#define ftmp3           STACK_BIAS-0x0c
#define ftmp4           STACK_BIAS-0x08

! sizeof temp storage - must be a multiple of 16 for V9
#define tmps            0x20

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!      !!!!!   algorithm   !!!!!
!  x0 = *px;
!  ax = *(int*)px;
!
!  y0 = *py;
!  ay = *(int*)py;
!
!  ax &= 0x7fffffff;
!  ay &= 0x7fffffff;
!
!  px += stridex;
!  py += stridey;
!
!  if ( ax >= 0x7f800000 || ay >= 0x7f800000 )
!  {
!    *pz = fabsf(x0) * fabsf(y0);
!    if( ax == 0x7f800000 ) *pz = 0.0f;
!    else if( ay == 0x7f800000 ) *pz = 0.0f;
!    pz += stridez;
!    continue;
!  }
!
!  if ( ay == 0 )
!  {
!    if ( ax == 0 )
!    {
!      *pz = 1.0f / 0.0f;
!      pz += stridez;
!      continue;
!    }
!  }
!
!  hyp0 = x0 * (double)x0;
!  dtmp0 = y0 * (double)y0;
!  hyp0 += dtmp0;
!
!  ibase0 = ((int*)&hyp0)[0];
!
!  dbase0 = vis_fand(hyp0,DA0);
!  dbase0 = vis_fmul8x16(SCALE, dbase0);
!  dbase0 = vis_fpsub32(DA1,dbase0);
!
!  hyp0 = vis_fand(hyp0,DC0);
!  hyp0 = vis_for(hyp0,DC1);
!  h_hi0 = vis_fand(hyp0,DC2);
!
!  ibase0 >>= 10;
!  si0 = ibase0 & 0x7f0;
!  xx0 = ((double*)((char*)TBL + si0))[0];
!
!  dtmp1 = hyp0 - h_hi0;
!  xx0 = dtmp1 * xx0;
!  res0 = ((double*)((char*)arr + si0))[1];
!  dtmp2 = KA3 * xx0;
!  dtmp2 += KA2;
!  dtmp2 *= xx0;
!  dtmp2 += KA1;
!  dtmp2 *= xx0;
!  dtmp2 += KA0;
!  res0 *= dtmp2;
!  res0 *= dbase0;
!  ftmp0 = (float)res0;
!  *pz = ftmp0;
!  pz += stridez;
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

        ENTRY(__vrhypotf)
        save    %sp,-SA(MINFRAME)-tmps,%sp
        PIC_SETUP(l7)
        PIC_SET(l7,.CONST_TBL,l2)
        wr      %g0,0x82,%asi

#ifdef __sparcv9
        ldx     [%fp+STACK_BIAS+176],stridez
#else
        ld      [%fp+STACK_BIAS+92],stridez
#endif

        stx     %i1,[%fp+tmp_px]
        sll     %i2,2,stridex

        stx     %i3,[%fp+tmp_py]
        sll     %i4,2,stridey

        st      %i0,[%fp+tmp_counter]
        sll     stridez,2,stridez
        mov     %i5,%o1

        ldd     [TBL+TBL_SHIFT],DC0
        ldd     [TBL+TBL_SHIFT+8],DC1
        ldd     [TBL+TBL_SHIFT+16],DC2
        ldd     [TBL+TBL_SHIFT+24],DA0
        ldd     [TBL+TBL_SHIFT+32],DA1
        ldd     [TBL+TBL_SHIFT+40],SCALE
        ldd     [TBL+TBL_SHIFT+48],KA0

        ldd     [TBL+TBL_SHIFT+56],KA1
        sethi   %hi(0x7f800000),%o0

        ldd     [TBL+TBL_SHIFT+64],KA2
        sethi   %hi(0x7ffffc00),%o7

        ldd     [TBL+TBL_SHIFT+72],KA3
        add     %o7,1023,%o7

.begin:
        ld      [%fp+tmp_counter],counter
        ldx     [%fp+tmp_px],%o4
        ldx     [%fp+tmp_py],%i2
        st      %g0,[%fp+tmp_counter]
.begin1:
        cmp     counter,0
        ble,pn  %icc,.exit
        nop

        lda     [%i2]0x82,%l6           ! (3_0) ay = *(int*)py;

        lda     [%o4]0x82,%i5           ! (3_0) ax = *(int*)px;

        lda     [%i2]0x82,%f2           ! (3_0) y0 = *py;
        and     %l6,_0x7fffffff,%l6     ! (3_0) ay &= 0x7fffffff;

        and     %i5,_0x7fffffff,%i5     ! (3_0) ax &= 0x7fffffff;
        cmp     %l6,_0x7f800000         ! (3_0) ay ? 0x7f800000
        bge,pn  %icc,.spec0             ! (3_0) if ( ay >= 0x7f800000 )
        lda     [%o4]0x82,%f4           ! (3_0) x0 = *px;

        cmp     %i5,_0x7f800000         ! (3_0) ax ? 0x7f800000
        bge,pn  %icc,.spec0             ! (3_0) if ( ax >= 0x7f800000 )
        nop

        cmp     %l6,0                   ! (3_0)
        be,pn   %icc,.spec1             ! (3_0) if ( ay == 0 )
        fsmuld  %f4,%f4,%f36            ! (3_0) hyp0 = x0 * (double)x0;
.cont_spec1:
        lda     [%i2+stridey]0x82,%l6   ! (4_0) ay = *(int*)py;

        fsmuld  %f2,%f2,%f62            ! (3_0) dtmp0 = y0 * (double)y0;
        lda     [stridex+%o4]0x82,%i5   ! (4_0) ax = *(int*)px;

        add     %o4,stridex,%l0         ! px += stridex

        add     %i2,stridey,%i2         ! py += stridey
        and     %l6,_0x7fffffff,%l6     ! (4_0) ay &= 0x7fffffff;

        and     %i5,_0x7fffffff,%i5     ! (4_0) ax &= 0x7fffffff;
        lda     [%i2]0x82,%f2           ! (4_0) y0 = *py;

        faddd   %f36,%f62,%f20          ! (3_0) hyp0 += dtmp0;
        cmp     %l6,_0x7f800000         ! (4_0) ay ? 0x7f800000

        bge,pn  %icc,.update0           ! (4_0) if ( ay >= 0x7f800000 )
        lda     [stridex+%o4]0x82,%f4   ! (4_0) x0 = *px;
.cont0:
        cmp     %i5,_0x7f800000         ! (4_0) ax ? 0x7f800000
        bge,pn  %icc,.update1           ! (4_0) if ( ax >= 0x7f800000 )
        st      %f20,[%fp+ftmp4]        ! (3_0) ibase0 = ((int*)&hyp0)[0];
.cont1:
        cmp     %l6,0                   ! (4_1) ay ? 0
        be,pn   %icc,.update2           ! (4_1) if ( ay == 0 )
        fsmuld  %f4,%f4,%f38            ! (4_1) hyp0 = x0 * (double)x0;
.cont2:
        lda     [%i2+stridey]0x82,%l6   ! (0_0) ay = *(int*)py;

        fsmuld  %f2,%f2,%f62            ! (4_1) dtmp0 = y0 * (double)y0;
        lda     [%l0+stridex]0x82,%i5   ! (0_0) ax = *(int*)px;

        add     %l0,stridex,%i1         ! px += stridex

        add     %i2,stridey,%i2         ! py += stridey
        and     %l6,_0x7fffffff,%l6     ! (0_0) ay &= 0x7fffffff;

        and     %i5,_0x7fffffff,%i5     ! (0_0) ax &= 0x7fffffff;
        lda     [%i2]0x82,%f2           ! (0_0) y0 = *py;

        cmp     %l6,_0x7f800000         ! (0_0) ay ? 0x7f800000
        bge,pn  %icc,.update3           ! (0_0) if ( ay >= 0x7f800000 )
        faddd   %f38,%f62,%f12          ! (4_1) hyp0 += dtmp0;
.cont3:
        lda     [%i1]0x82,%f4           ! (0_0) x0 = *px;

        cmp     %i5,_0x7f800000         ! (0_0) ax ? 0x7f800000
        bge,pn  %icc,.update4           ! (0_0) if ( ax >= 0x7f800000 )
        st      %f12,[%fp+ftmp0]        ! (4_1) ibase0 = ((int*)&hyp0)[0];
.cont4:
        cmp     %l6,0                   ! (0_0) ay ? 0
        be,pn   %icc,.update5           ! (0_0) if ( ay == 0 )
        fsmuld  %f4,%f4,%f38            ! (0_0) hyp0 = x0 * (double)x0;
.cont5:
        lda     [%i2+stridey]0x82,%l6   ! (1_0) ay = *(int*)py;

        fsmuld  %f2,%f2,%f62            ! (0_0) dtmp0 = y0 * (double)y0;
        lda     [%i1+stridex]0x82,%i5   ! (1_0) ax = *(int*)px;

        add     %i1,stridex,%g5         ! px += stridex

        add     %i2,stridey,%o3         ! py += stridey
        and     %l6,_0x7fffffff,%l6     ! (1_0) ay &= 0x7fffffff;
        fand    %f20,DC0,%f30           ! (3_1) hyp0 = vis_fand(hyp0,DC0);

        and     %i5,_0x7fffffff,%i5     ! (1_0) ax &= 0x7fffffff;
        lda     [%o3]0x82,%f2           ! (1_0) y0 = *py;

        faddd   %f38,%f62,%f14          ! (0_0) hyp0 += dtmp0;
        cmp     %l6,_0x7f800000         ! (1_0) ay ? 0x7f800000

        lda     [%g5]0x82,%f4           ! (1_0) x0 = *px;
        bge,pn  %icc,.update6           ! (1_0) if ( ay >= 0x7f800000 )
        for     %f30,DC1,%f28           ! (3_1) hyp0 = vis_for(hyp0,DC1);
.cont6:
        cmp     %i5,_0x7f800000         ! (1_0) ax ? 0x7f800000
        bge,pn  %icc,.update7           ! (1_0) if ( ax >= 0x7f800000 )
        ld      [%fp+ftmp4],%l1         ! (3_1) ibase0 = ((int*)&hyp0)[0];
.cont7:
        st      %f14,[%fp+ftmp1]        ! (0_0) ibase0 = ((int*)&hyp0)[0];

        cmp     %l6,0                   ! (1_0) ay ? 0
        be,pn   %icc,.update8           ! (1_0) if ( ay == 0 )
        fand    %f28,DC2,%f30           ! (3_1) h_hi0 = vis_fand(hyp0,DC2);
.cont8:
        fsmuld  %f4,%f4,%f38            ! (1_0) hyp0 = x0 * (double)x0;
        sra     %l1,10,%o5              ! (3_1) ibase0 >>= 10;

        and     %o5,2032,%o4            ! (3_1) si0 = ibase0 & 0x7f0;
        lda     [%o3+stridey]0x82,%l6   ! (2_0) ay = *(int*)py;

        fsmuld  %f2,%f2,%f62            ! (1_0) dtmp0 = y0 * (double)y0;
        add     %o4,TBL,%l7             ! (3_1) (char*)TBL + si0
        lda     [stridex+%g5]0x82,%i5   ! (2_0) ax = *(int*)px;
        fsubd   %f28,%f30,%f28          ! (3_1) dtmp1 = hyp0 - h_hi0;

        add     %g5,stridex,%i4         ! px += stridex
        ldd     [TBL+%o4],%f42          ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0];

        and     %l6,_0x7fffffff,%l6     ! (2_0) ay &= 0x7fffffff;
        add     %o3,stridey,%i2         ! py += stridey
        fand    %f12,DC0,%f30           ! (4_1) hyp0 = vis_fand(hyp0,DC0);

        and     %i5,_0x7fffffff,%i5     ! (2_0) ax &= 0x7fffffff;
        lda     [%i2]0x82,%f2           ! (2_0) y0 = *py;

        faddd   %f38,%f62,%f16          ! (1_0) hyp0 += dtmp0;
        cmp     %l6,_0x7f800000         ! (2_0) ay ? 0x7f800000
        fmuld   %f28,%f42,%f26          ! (3_1) xx0 = dtmp1 * xx0;

        lda     [stridex+%g5]0x82,%f4   ! (2_0) x0 = *px;
        bge,pn  %icc,.update9           ! (2_0) if ( ay >= 0x7f800000
        for     %f30,DC1,%f28           ! (4_1) hyp0 = vis_for(hyp0,DC1);
.cont9:
        cmp     %i5,_0x7f800000         ! (2_0) ax ? 0x7f800000
        bge,pn  %icc,.update10          ! (2_0) if ( ax >= 0x7f800000 )
        ld      [%fp+ftmp0],%i3         ! (4_1) ibase0 = ((int*)&hyp0)[0];
.cont10:
        st      %f16,[%fp+ftmp2]        ! (1_0) ibase0 = ((int*)&hyp0)[0];

        fmuld   KA3,%f26,%f34           ! (3_1) dtmp2 = KA3 * xx0;
        cmp     %l6,0                   ! (2_0) ay ? 0
        be,pn   %icc,.update11          ! (2_0) if ( ay == 0 )
        fand    %f28,DC2,%f30           ! (4_1) h_hi0 = vis_fand(hyp0,DC2);
.cont11:
        fsmuld  %f4,%f4,%f36            ! (2_0) hyp0 = x0 * (double)x0;
        sra     %i3,10,%i3              ! (4_1) ibase0 >>= 10;

        and     %i3,2032,%i3            ! (4_1) si0 = ibase0 & 0x7f0;
        lda     [%i2+stridey]0x82,%l6   ! (3_0) ay = *(int*)py;

        fsmuld  %f2,%f2,%f62            ! (2_0) dtmp0 = y0 * (double)y0;
        add     %i3,TBL,%i3             ! (4_1) (char*)TBL + si0
        lda     [%i4+stridex]0x82,%i5   ! (3_0) ax = *(int*)px;
        fsubd   %f28,%f30,%f28          ! (4_1) dtmp1 = hyp0 - h_hi0;

        add     %i4,stridex,%o4         ! px += stridex
        ldd     [%i3],%f42              ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0];
        faddd   %f34,KA2,%f10           ! (3_1) dtmp2 += KA2;

        add     %i2,stridey,%i2         ! py += stridey
        and     %l6,_0x7fffffff,%l6     ! (3_0) ay &= 0x7fffffff;
        fand    %f14,DC0,%f30           ! (0_0) hyp0 = vis_fand(hyp0,DC0);

        and     %i5,_0x7fffffff,%i5     ! (3_0) ax &= 0x7fffffff;
        lda     [%i2]0x82,%f2           ! (3_0) y0 = *py;

        faddd   %f36,%f62,%f18          ! (2_0) hyp0 += dtmp0;
        cmp     %l6,_0x7f800000         ! (3_0) ay ? 0x7f800000
        fmuld   %f28,%f42,%f32          ! (4_1) xx0 = dtmp1 * xx0;

        fmuld   %f10,%f26,%f10          ! (3_1) dtmp2 *= xx0;
        lda     [%o4]0x82,%f4           ! (3_0) x0 = *px;
        bge,pn  %icc,.update12          ! (3_0) if ( ay >= 0x7f800000 )
        for     %f30,DC1,%f28           ! (0_0) hyp0 = vis_for(hyp0,DC1);
.cont12:
        cmp     %i5,_0x7f800000         ! (3_0) ax ? 0x7f800000
        bge,pn  %icc,.update13          ! (3_0) if ( ax >= 0x7f800000 )
        ld      [%fp+ftmp1],%i1         ! (0_0) ibase0 = ((int*)&hyp0)[0];
.cont13:
        st      %f18,[%fp+ftmp3]        ! (2_0) ibase0 = ((int*)&hyp0)[0];

        fmuld   KA3,%f32,%f34           ! (4_1) dtmp2 = KA3 * xx0;
        cmp     %l6,0                   ! (3_0)
        be,pn   %icc,.update14          ! (3_0) if ( ay == 0 )
        fand    %f28,DC2,%f30           ! (0_0) h_hi0 = vis_fand(hyp0,DC2);
.cont14:
        fsmuld  %f4,%f4,%f36            ! (3_0) hyp0 = x0 * (double)x0;
        sra     %i1,10,%l1              ! (0_0) ibase0 >>= 10;
        faddd   %f10,KA1,%f40           ! (3_1) dtmp2 += KA1;

        and     %l1,2032,%o5            ! (0_0) si0 = ibase0 & 0x7f0;
        lda     [%i2+stridey]0x82,%l6   ! (4_0) ay = *(int*)py;

        fsmuld  %f2,%f2,%f62            ! (3_0) dtmp0 = y0 * (double)y0;
        add     %o5,TBL,%l1             ! (0_0) (char*)TBL + si0
        lda     [stridex+%o4]0x82,%i5   ! (4_0) ax = *(int*)px;
        fsubd   %f28,%f30,%f28          ! (0_0) dtmp1 = hyp0 - h_hi0;

        add     %o4,stridex,%l0         ! px += stridex
        ldd     [TBL+%o5],%f42          ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0];
        faddd   %f34,KA2,%f10           ! (4_1) dtmp2 += KA2;

        fmuld   %f40,%f26,%f40          ! (3_1) dtmp2 *= xx0;
        add     %i2,stridey,%i2         ! py += stridey
        and     %l6,_0x7fffffff,%l6     ! (4_0) ay &= 0x7fffffff;
        fand    %f16,DC0,%f30           ! (1_0) hyp0 = vis_fand(hyp0,DC0);

        and     %i5,_0x7fffffff,%i5     ! (4_0) ax &= 0x7fffffff;
        lda     [%i2]0x82,%f2           ! (4_0) y0 = *py;
        fand    %f20,DA0,%f24           ! (3_1) dbase0 = vis_fand(hyp0,DA0);

        faddd   %f36,%f62,%f20          ! (3_0) hyp0 += dtmp0;
        cmp     %l6,_0x7f800000         ! (4_0) ay ? 0x7f800000
        ldd     [%l7+8],%f36            ! (3_1) res0 = ((double*)((char*)arr + si0))[1];
        fmuld   %f28,%f42,%f26          ! (0_0) xx0 = dtmp1 * xx0;

        fmuld   %f10,%f32,%f10          ! (4_1) dtmp2 *= xx0;
        lda     [stridex+%o4]0x82,%f4   ! (4_0) x0 = *px;
        bge,pn  %icc,.update15          ! (4_0) if ( ay >= 0x7f800000 )
        for     %f30,DC1,%f28           ! (1_0) hyp0 = vis_for(hyp0,DC1);
.cont15:
        fmul8x16        SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
        cmp     %i5,_0x7f800000         ! (4_0) ax ? 0x7f800000
        ld      [%fp+ftmp2],%i1         ! (1_0) ibase0 = ((int*)&hyp0)[0];
        faddd   %f40,KA0,%f62           ! (3_1) dtmp2 += KA0;

        bge,pn  %icc,.update16          ! (4_0) if ( ax >= 0x7f800000 )
        st      %f20,[%fp+ftmp4]        ! (3_0) ibase0 = ((int*)&hyp0)[0];
.cont16:
        fmuld   KA3,%f26,%f34           ! (0_0) dtmp2 = KA3 * xx0;
        fand    %f28,DC2,%f30           ! (1_0) h_hi0 = vis_fand(hyp0,DC2);

        mov     %o1,%i4
        cmp     counter,5
        bl,pn   %icc,.tail
        nop

        ba      .main_loop
        sub     counter,5,counter

        .align  16
.main_loop:
        fsmuld  %f4,%f4,%f38            ! (4_1) hyp0 = x0 * (double)x0;
        sra     %i1,10,%o2              ! (1_1) ibase0 >>= 10;
        cmp     %l6,0                   ! (4_1) ay ? 0
        faddd   %f10,KA1,%f40           ! (4_2) dtmp2 += KA1;

        fmuld   %f36,%f62,%f36          ! (3_2) res0 *= dtmp2;
        and     %o2,2032,%o2            ! (1_1) si0 = ibase0 & 0x7f0;
        lda     [%i2+stridey]0x82,%l6   ! (0_0) ay = *(int*)py;
        fpsub32 DA1,%f24,%f24           ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0);

        fsmuld  %f2,%f2,%f62            ! (4_1) dtmp0 = y0 * (double)y0;
        add     %o2,TBL,%o2             ! (1_1) (char*)TBL + si0
        lda     [%l0+stridex]0x82,%o1   ! (0_0) ax = *(int*)px;
        fsubd   %f28,%f30,%f28          ! (1_1) dtmp1 = hyp0 - h_hi0;

        add     %l0,stridex,%i1         ! px += stridex
        ldd     [%o2],%f42              ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0];
        be,pn   %icc,.update17          ! (4_1) if ( ay == 0 )
        faddd   %f34,KA2,%f10           ! (0_1) dtmp2 += KA2;
.cont17:
        fmuld   %f40,%f32,%f40          ! (4_2) dtmp2 *= xx0;
        add     %i2,stridey,%i2         ! py += stridey
        and     %l6,_0x7fffffff,%l6     ! (0_0) ay &= 0x7fffffff;
        fand    %f18,DC0,%f30           ! (2_1) hyp0 = vis_fand(hyp0,DC0);

        fmuld   %f36,%f24,%f32          ! (3_2) res0 *= dbase0;
        and     %o1,_0x7fffffff,%o1     ! (0_0) ax &= 0x7fffffff;
        lda     [%i2]0x82,%f2           ! (0_0) y0 = *py;
        fand    %f12,DA0,%f24           ! (4_2) dbase0 = vis_fand(hyp0,DA0);

        faddd   %f38,%f62,%f12          ! (4_1) hyp0 += dtmp0;
        cmp     %l6,_0x7f800000         ! (0_0) ay ? 0x7f800000
        ldd     [%i3+8],%f62            ! (4_2) res0 = ((double*)((char*)arr + si0))[1];
        fmuld   %f28,%f42,%f36          ! (1_1) xx0 = dtmp1 * xx0;

        fmuld   %f10,%f26,%f10          ! (0_1) dtmp2 *= xx0;
        lda     [%i1]0x82,%f4           ! (0_0) x0 = *px;
        bge,pn  %icc,.update18          ! (0_0) if ( ay >= 0x7f800000 )
        for     %f30,DC1,%f28           ! (2_1) hyp0 = vis_for(hyp0,DC1);
.cont18:
        fmul8x16        SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0);
        cmp     %o1,_0x7f800000         ! (0_0) ax ? 0x7f800000
        ld      [%fp+ftmp3],%l0         ! (2_1) ibase0 = ((int*)&hyp0)[0];
        faddd   %f40,KA0,%f42           ! (4_2) dtmp2 += KA0;

        add     %i4,stridez,%i3         ! pz += stridez
        st      %f12,[%fp+ftmp0]        ! (4_1) ibase0 = ((int*)&hyp0)[0];
        bge,pn  %icc,.update19          ! (0_0) if ( ax >= 0x7f800000 )
        fdtos   %f32,%f1                ! (3_2) ftmp0 = (float)res0;
.cont19:
        fmuld   KA3,%f36,%f34           ! (1_1) dtmp2 = KA3 * xx0;
        cmp     %l6,0                   ! (0_0) ay ? 0
        st      %f1,[%i4]               ! (3_2) *pz = ftmp0;
        fand    %f28,DC2,%f30           ! (2_1) h_hi0 = vis_fand(hyp0,DC2);

        fsmuld  %f4,%f4,%f38            ! (0_0) hyp0 = x0 * (double)x0;
        sra     %l0,10,%i4              ! (2_1) ibase0 >>= 10;
        be,pn   %icc,.update20          ! (0_0) if ( ay == 0 )
        faddd   %f10,KA1,%f40           ! (0_1) dtmp2 += KA1;
.cont20:
        fmuld   %f62,%f42,%f32          ! (4_2) res0 *= dtmp2;
        and     %i4,2032,%g1            ! (2_1) si0 = ibase0 & 0x7f0;
        lda     [%i2+stridey]0x82,%l6   ! (1_0) ay = *(int*)py;
        fpsub32 DA1,%f24,%f24           ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0);

        fsmuld  %f2,%f2,%f62            ! (0_0) dtmp0 = y0 * (double)y0;
        add     %g1,TBL,%l0             ! (2_1) (char*)TBL + si0
        lda     [%i1+stridex]0x82,%i5   ! (1_0) ax = *(int*)px;
        fsubd   %f28,%f30,%f28          ! (2_1) dtmp1 = hyp0 - h_hi0;

        nop
        add     %i1,stridex,%g5         ! px += stridex
        ldd     [TBL+%g1],%f42          ! (2_1) xx0 = ((double*)((char*)TBL + si0))[0];
        faddd   %f34,KA2,%f10           ! (1_1) dtmp2 += KA2;

        fmuld   %f40,%f26,%f40          ! (0_1) dtmp2 *= xx0;
        add     %i2,stridey,%o3         ! py += stridey
        and     %l6,_0x7fffffff,%l6     ! (1_0) ay &= 0x7fffffff;
        fand    %f20,DC0,%f30           ! (3_1) hyp0 = vis_fand(hyp0,DC0);

        fmuld   %f32,%f24,%f26          ! (4_2) res0 *= dbase0;
        and     %i5,_0x7fffffff,%i5     ! (1_0) ax &= 0x7fffffff;
        lda     [%o3]0x82,%f2           ! (1_0) y0 = *py;
        fand    %f14,DA0,%f24           ! (0_1) dbase0 = vis_fand(hyp0,DA0);

        faddd   %f38,%f62,%f14          ! (0_0) hyp0 += dtmp0;
        cmp     %l6,_0x7f800000         ! (1_0) ay ? 0x7f800000
        ldd     [%l1+8],%f62            ! (0_1) res0 = ((double*)((char*)arr + si0))[1];
        fmuld   %f28,%f42,%f32          ! (2_1) xx0 = dtmp1 * xx0;

        fmuld   %f10,%f36,%f10          ! (1_1) dtmp2 *= xx0;
        lda     [%g5]0x82,%f4           ! (1_0) x0 = *px;
        bge,pn  %icc,.update21          ! (1_0) if ( ay >= 0x7f800000 )
        for     %f30,DC1,%f28           ! (3_1) hyp0 = vis_for(hyp0,DC1);
.cont21:
        fmul8x16        SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
        cmp     %i5,_0x7f800000         ! (1_0) ax ? 0x7f800000
        ld      [%fp+ftmp4],%l1         ! (3_1) ibase0 = ((int*)&hyp0)[0];
        faddd   %f40,KA0,%f42           ! (0_1) dtmp2 += KA0

        add     %i3,stridez,%o1         ! pz += stridez
        st      %f14,[%fp+ftmp1]        ! (0_0) ibase0 = ((int*)&hyp0)[0];
        bge,pn  %icc,.update22          ! (1_0) if ( ax >= 0x7f800000 )
        fdtos   %f26,%f1                ! (4_2) ftmp0 = (float)res0;
.cont22:
        fmuld   KA3,%f32,%f34           ! (2_1) dtmp2 = KA3 * xx0;
        cmp     %l6,0                   ! (1_0) ay ? 0
        st      %f1,[%i3]               ! (4_2) *pz = ftmp0;
        fand    %f28,DC2,%f30           ! (3_1) h_hi0 = vis_fand(hyp0,DC2);

        fsmuld  %f4,%f4,%f38            ! (1_0) hyp0 = x0 * (double)x0;
        sra     %l1,10,%o5              ! (3_1) ibase0 >>= 10;
        be,pn   %icc,.update23          ! (1_0) if ( ay == 0 )
        faddd   %f10,KA1,%f40           ! (1_1) dtmp2 += KA1;
.cont23:
        fmuld   %f62,%f42,%f26          ! (0_1) res0 *= dtmp2;
        and     %o5,2032,%o4            ! (3_1) si0 = ibase0 & 0x7f0;
        lda     [%o3+stridey]0x82,%l6   ! (2_0) ay = *(int*)py;
        fpsub32 DA1,%f24,%f24           ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0);

        fsmuld  %f2,%f2,%f62            ! (1_0) dtmp0 = y0 * (double)y0;
        add     %o4,TBL,%l7             ! (3_1) (char*)TBL + si0
        lda     [stridex+%g5]0x82,%i5   ! (2_0) ax = *(int*)px;
        fsubd   %f28,%f30,%f28          ! (3_1) dtmp1 = hyp0 - h_hi0;

        nop
        add     %g5,stridex,%i4         ! px += stridex
        ldd     [TBL+%o4],%f42          ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0];
        faddd   %f34,KA2,%f10           ! (2_1) dtmp2 += KA2;

        fmuld   %f40,%f36,%f40          ! (1_1) dtmp2 *= xx0;
        and     %l6,_0x7fffffff,%l6     ! (2_0) ay &= 0x7fffffff;
        add     %o3,stridey,%i2         ! py += stridey
        fand    %f12,DC0,%f30           ! (4_1) hyp0 = vis_fand(hyp0,DC0);

        fmuld   %f26,%f24,%f36          ! (0_1) res0 *= dbase0;
        and     %i5,_0x7fffffff,%i5     ! (2_0) ax &= 0x7fffffff;
        lda     [%i2]0x82,%f2           ! (2_0) y0 = *py;
        fand    %f16,DA0,%f24           ! (1_1) dbase0 = vis_fand(hyp0,DA0);

        faddd   %f38,%f62,%f16          ! (1_0) hyp0 += dtmp0;
        cmp     %l6,_0x7f800000         ! (2_0) ay ? 0x7f800000
        ldd     [%o2+8],%f38            ! (1_1) res0 = ((double*)((char*)arr + si0))[1];
        fmuld   %f28,%f42,%f26          ! (3_1) xx0 = dtmp1 * xx0;

        fmuld   %f10,%f32,%f10          ! (2_1) dtmp2 *= xx0;
        lda     [stridex+%g5]0x82,%f4   ! (2_0) x0 = *px;
        bge,pn  %icc,.update24          ! (2_0) if ( ay >= 0x7f800000
        for     %f30,DC1,%f28           ! (4_1) hyp0 = vis_for(hyp0,DC1);
.cont24:
        fmul8x16        SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
        cmp     %i5,_0x7f800000         ! (2_0) ax ? 0x7f800000
        ld      [%fp+ftmp0],%i3         ! (4_1) ibase0 = ((int*)&hyp0)[0];
        faddd   %f40,KA0,%f62           ! (1_1) dtmp2 += KA0;

        add     %o1,stridez,%g1         ! pz += stridez
        st      %f16,[%fp+ftmp2]        ! (1_0) ibase0 = ((int*)&hyp0)[0];
        bge,pn  %icc,.update25          ! (2_0) if ( ax >= 0x7f800000 )
        fdtos   %f36,%f1                ! (0_1) ftmp0 = (float)res0;
.cont25:
        fmuld   KA3,%f26,%f34           ! (3_1) dtmp2 = KA3 * xx0;
        cmp     %l6,0                   ! (2_0) ay ? 0
        st      %f1,[%o1]               ! (0_1) *pz = ftmp0;
        fand    %f28,DC2,%f30           ! (4_1) h_hi0 = vis_fand(hyp0,DC2);

        fsmuld  %f4,%f4,%f36            ! (2_0) hyp0 = x0 * (double)x0;
        sra     %i3,10,%i3              ! (4_1) ibase0 >>= 10;
        be,pn   %icc,.update26          ! (2_0) if ( ay == 0 )
        faddd   %f10,KA1,%f40           ! (2_1) dtmp2 += KA1;
.cont26:
        fmuld   %f38,%f62,%f38          ! (1_1) res0 *= dtmp2;
        and     %i3,2032,%i3            ! (4_1) si0 = ibase0 & 0x7f0;
        lda     [%i2+stridey]0x82,%l6   ! (3_0) ay = *(int*)py;
        fpsub32 DA1,%f24,%f24           ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0);

        fsmuld  %f2,%f2,%f62            ! (2_0) dtmp0 = y0 * (double)y0;
        add     %i3,TBL,%i3             ! (4_1) (char*)TBL + si0
        lda     [%i4+stridex]0x82,%i5   ! (3_0) ax = *(int*)px;
        fsubd   %f28,%f30,%f28          ! (4_1) dtmp1 = hyp0 - h_hi0;

        nop
        add     %i4,stridex,%o4         ! px += stridex
        ldd     [%i3],%f42              ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0];
        faddd   %f34,KA2,%f10           ! (3_1) dtmp2 += KA2;

        fmuld   %f40,%f32,%f40          ! (2_1) dtmp2 *= xx0;
        add     %i2,stridey,%i2         ! py += stridey
        and     %l6,_0x7fffffff,%l6     ! (3_0) ay &= 0x7fffffff;
        fand    %f14,DC0,%f30           ! (0_0) hyp0 = vis_fand(hyp0,DC0);

        fmuld   %f38,%f24,%f38          ! (1_1) res0 *= dbase0;
        and     %i5,_0x7fffffff,%i5     ! (3_0) ax &= 0x7fffffff;
        lda     [%i2]0x82,%f2           ! (3_0) y0 = *py;
        fand    %f18,DA0,%f24           ! (2_1) dbase0 = vis_fand(hyp0,DA0);

        faddd   %f36,%f62,%f18          ! (2_0) hyp0 += dtmp0;
        cmp     %l6,_0x7f800000         ! (3_0) ay ? 0x7f800000
        ldd     [%l0+8],%f62            ! (2_1) res0 = ((double*)((char*)arr + si0))[1];
        fmuld   %f28,%f42,%f32          ! (4_1) xx0 = dtmp1 * xx0;

        fmuld   %f10,%f26,%f10          ! (3_1) dtmp2 *= xx0;
        lda     [%o4]0x82,%f4           ! (3_0) x0 = *px;
        bge,pn  %icc,.update27          ! (3_0) if ( ay >= 0x7f800000 )
        for     %f30,DC1,%f28           ! (0_0) hyp0 = vis_for(hyp0,DC1);
.cont27:
        fmul8x16        SCALE,%f24,%f24 ! (2_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
        cmp     %i5,_0x7f800000         ! (3_0) ax ? 0x7f800000
        ld      [%fp+ftmp1],%i1         ! (0_0) ibase0 = ((int*)&hyp0)[0];
        faddd   %f40,KA0,%f42           ! (2_1) dtmp2 += KA0;

        add     %g1,stridez,%o3         ! pz += stridez
        st      %f18,[%fp+ftmp3]        ! (2_0) ibase0 = ((int*)&hyp0)[0];
        bge,pn  %icc,.update28          ! (3_0) if ( ax >= 0x7f800000 )
        fdtos   %f38,%f1                ! (1_1) ftmp0 = (float)res0;
.cont28:
        fmuld   KA3,%f32,%f34           ! (4_1) dtmp2 = KA3 * xx0;
        cmp     %l6,0                   ! (3_0)
        st      %f1,[%g1]               ! (1_1) *pz = ftmp0;
        fand    %f28,DC2,%f30           ! (0_0) h_hi0 = vis_fand(hyp0,DC2);

        fsmuld  %f4,%f4,%f36            ! (3_0) hyp0 = x0 * (double)x0;
        sra     %i1,10,%l1              ! (0_0) ibase0 >>= 10;
        be,pn   %icc,.update29          ! (3_0) if ( ay == 0 )
        faddd   %f10,KA1,%f40           ! (3_1) dtmp2 += KA1;
.cont29:
        fmuld   %f62,%f42,%f38          ! (2_1) res0 *= dtmp2;
        and     %l1,2032,%o5            ! (0_0) si0 = ibase0 & 0x7f0;
        lda     [%i2+stridey]0x82,%l6   ! (4_0) ay = *(int*)py;
        fpsub32 DA1,%f24,%f24           ! (2_1) dbase0 = vis_fpsub32(DA1,dbase0);

        fsmuld  %f2,%f2,%f62            ! (3_0) dtmp0 = y0 * (double)y0;
        add     %o5,TBL,%l1             ! (0_0) (char*)TBL + si0
        lda     [stridex+%o4]0x82,%i5   ! (4_0) ax = *(int*)px;
        fsubd   %f28,%f30,%f28          ! (0_0) dtmp1 = hyp0 - h_hi0;

        add     %o3,stridez,%i4         ! pz += stridez
        add     %o4,stridex,%l0         ! px += stridex
        ldd     [TBL+%o5],%f42          ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0];
        faddd   %f34,KA2,%f10           ! (4_1) dtmp2 += KA2;

        fmuld   %f40,%f26,%f40          ! (3_1) dtmp2 *= xx0;
        add     %i2,stridey,%i2         ! py += stridey
        and     %l6,_0x7fffffff,%l6     ! (4_0) ay &= 0x7fffffff;
        fand    %f16,DC0,%f30           ! (1_0) hyp0 = vis_fand(hyp0,DC0);

        fmuld   %f38,%f24,%f38          ! (2_1) res0 *= dbase0;
        and     %i5,_0x7fffffff,%i5     ! (4_0) ax &= 0x7fffffff;
        lda     [%i2]0x82,%f2           ! (4_0) y0 = *py;
        fand    %f20,DA0,%f24           ! (3_1) dbase0 = vis_fand(hyp0,DA0);

        faddd   %f36,%f62,%f20          ! (3_0) hyp0 += dtmp0;
        cmp     %l6,_0x7f800000         ! (4_0) ay ? 0x7f800000
        ldd     [%l7+8],%f36            ! (3_1) res0 = ((double*)((char*)arr + si0))[1];
        fmuld   %f28,%f42,%f26          ! (0_0) xx0 = dtmp1 * xx0;

        fmuld   %f10,%f32,%f10          ! (4_1) dtmp2 *= xx0;
        lda     [stridex+%o4]0x82,%f4   ! (4_0) x0 = *px;
        bge,pn  %icc,.update30          ! (4_0) if ( ay >= 0x7f800000 )
        for     %f30,DC1,%f28           ! (1_0) hyp0 = vis_for(hyp0,DC1);
.cont30:
        fmul8x16        SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
        cmp     %i5,_0x7f800000         ! (4_0) ax ? 0x7f800000
        ld      [%fp+ftmp2],%i1         ! (1_0) ibase0 = ((int*)&hyp0)[0];
        faddd   %f40,KA0,%f62           ! (3_1) dtmp2 += KA0;

        bge,pn  %icc,.update31          ! (4_0) if ( ax >= 0x7f800000 )
        st      %f20,[%fp+ftmp4]        ! (3_0) ibase0 = ((int*)&hyp0)[0];
.cont31:
        subcc   counter,5,counter       ! counter -= 5;
        fdtos   %f38,%f1                ! (2_1) ftmp0 = (float)res0;

        fmuld   KA3,%f26,%f34           ! (0_0) dtmp2 = KA3 * xx0;
        st      %f1,[%o3]               ! (2_1) *pz = ftmp0;
        bpos,pt %icc,.main_loop
        fand    %f28,DC2,%f30           ! (1_0) h_hi0 = vis_fand(hyp0,DC2);

        add     counter,5,counter

.tail:
        subcc   counter,1,counter
        bneg    .begin
        mov     %i4,%o1

        sra     %i1,10,%o2              ! (1_1) ibase0 >>= 10;
        faddd   %f10,KA1,%f40           ! (4_2) dtmp2 += KA1;

        fmuld   %f36,%f62,%f36          ! (3_2) res0 *= dtmp2;
        and     %o2,2032,%o2            ! (1_1) si0 = ibase0 & 0x7f0;
        fpsub32 DA1,%f24,%f24           ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0);

        add     %o2,TBL,%o2             ! (1_1) (char*)TBL + si0
        fsubd   %f28,%f30,%f28          ! (1_1) dtmp1 = hyp0 - h_hi0;

        ldd     [%o2],%f42              ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0];
        faddd   %f34,KA2,%f10           ! (0_1) dtmp2 += KA2;

        fmuld   %f40,%f32,%f40          ! (4_2) dtmp2 *= xx0;

        fmuld   %f36,%f24,%f32          ! (3_2) res0 *= dbase0;
        fand    %f12,DA0,%f24           ! (4_2) dbase0 = vis_fand(hyp0,DA0);

        ldd     [%i3+8],%f62            ! (4_2) res0 = ((double*)((char*)arr + si0))[1];
        fmuld   %f28,%f42,%f36          ! (1_1) xx0 = dtmp1 * xx0;

        fmuld   %f10,%f26,%f10          ! (0_1) dtmp2 *= xx0;

        fmul8x16        SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0);
        faddd   %f40,KA0,%f42           ! (4_2) dtmp2 += KA0;

        add     %i4,stridez,%i3         ! pz += stridez
        fdtos   %f32,%f1                ! (3_2) ftmp0 = (float)res0;

        fmuld   KA3,%f36,%f34           ! (1_1) dtmp2 = KA3 * xx0;
        st      %f1,[%i4]               ! (3_2) *pz = ftmp0;

        subcc   counter,1,counter
        bneg    .begin
        mov     %i3,%o1

        faddd   %f10,KA1,%f40           ! (0_1) dtmp2 += KA1;

        fmuld   %f62,%f42,%f32          ! (4_2) res0 *= dtmp2;
        fpsub32 DA1,%f24,%f24           ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0);


        faddd   %f34,KA2,%f10           ! (1_1) dtmp2 += KA2;

        fmuld   %f40,%f26,%f40          ! (0_1) dtmp2 *= xx0;

        fmuld   %f32,%f24,%f26          ! (4_2) res0 *= dbase0;
        fand    %f14,DA0,%f24           ! (0_1) dbase0 = vis_fand(hyp0,DA0);

        ldd     [%l1+8],%f62            ! (0_1) res0 = ((double*)((char*)arr + si0))[1];

        fmuld   %f10,%f36,%f10          ! (1_1) dtmp2 *= xx0;

        fmul8x16        SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
        faddd   %f40,KA0,%f42           ! (0_1) dtmp2 += KA0

        add     %i3,stridez,%o1         ! pz += stridez
        fdtos   %f26,%f1                ! (4_2) ftmp0 = (float)res0;

        st      %f1,[%i3]               ! (4_2) *pz = ftmp0;

        subcc   counter,1,counter
        bneg    .begin
        nop

        faddd   %f10,KA1,%f40           ! (1_1) dtmp2 += KA1;

        fmuld   %f62,%f42,%f26          ! (0_1) res0 *= dtmp2;
        fpsub32 DA1,%f24,%f24           ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0);

        fmuld   %f40,%f36,%f40          ! (1_1) dtmp2 *= xx0;

        fmuld   %f26,%f24,%f36          ! (0_1) res0 *= dbase0;
        fand    %f16,DA0,%f24           ! (1_1) dbase0 = vis_fand(hyp0,DA0);

        ldd     [%o2+8],%f38            ! (1_1) res0 = ((double*)((char*)arr + si0))[1];

        fmul8x16        SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
        faddd   %f40,KA0,%f62           ! (1_1) dtmp2 += KA0;

        add     %o1,stridez,%g1         ! pz += stridez
        fdtos   %f36,%f1                ! (0_1) ftmp0 = (float)res0;

        st      %f1,[%o1]               ! (0_1) *pz = ftmp0;

        subcc   counter,1,counter
        bneg    .begin
        mov     %g1,%o1

        fmuld   %f38,%f62,%f38          ! (1_1) res0 *= dtmp2;
        fpsub32 DA1,%f24,%f24           ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0);

        fmuld   %f38,%f24,%f38          ! (1_1) res0 *= dbase0;

        fdtos   %f38,%f1                ! (1_1) ftmp0 = (float)res0;
        st      %f1,[%g1]               ! (1_1) *pz = ftmp0;

        ba      .begin
        add     %g1,stridez,%o1         ! pz += stridez

        .align  16
.spec0:
        fabss   %f2,%f2                 ! fabsf(y0);

        fabss   %f4,%f4                 ! fabsf(x0);

        fcmps   %f2,%f4

        cmp     %l6,_0x7f800000         ! ay ? 0x7f800000
        be,a    1f                      ! if( ay == 0x7f800000 )
        st      %g0,[%o1]               ! *pz = 0.0f;

        cmp     %i5,_0x7f800000         ! ax ? 0x7f800000
        be,a    1f                      ! if( ax == 0x7f800000 )
        st      %g0,[%o1]               ! *pz = 0.0f;

        fmuls   %f2,%f4,%f2             ! fabsf(x0) * fabsf(y0);
        st      %f2,[%o1]               ! *pz = fabsf(x0) + fabsf(y0);
1:
        add     %o4,stridex,%o4         ! px += stridex;
        add     %i2,stridey,%i2         ! py += stridey;

        add     %o1,stridez,%o1         ! pz += stridez;
        ba      .begin1
        sub     counter,1,counter       ! counter--;

        .align  16
.spec1:
        cmp     %i5,0                   ! ax ? 0
        bne,pt  %icc,.cont_spec1        ! if ( ax != 0 )
        nop

        add     %o4,stridex,%o4         ! px += stridex;
        add     %i2,stridey,%i2         ! py += stridey;

        fdivs   %f7,%f9,%f2             ! 1.0f / 0.0f
        st      %f2,[%o1]               ! *pz = 1.0f / 0.0f;

        add     %o1,stridez,%o1         ! pz += stridez;
        ba      .begin1
        sub     counter,1,counter       ! counter--;

        .align  16
.update0:
        cmp     counter,1
        ble     .cont0
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,1,counter
        st      counter,[%fp+tmp_counter]

        stx     %l0,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont0
        mov     1,counter

        .align  16
.update1:
        cmp     counter,1
        ble     .cont1
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,1,counter
        st      counter,[%fp+tmp_counter]

        stx     %l0,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont1
        mov     1,counter

        .align  16
.update2:
        cmp     %i5,0
        bne     .cont2

        cmp     counter,1
        ble     .cont2
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,1,counter
        st      counter,[%fp+tmp_counter]

        stx     %l0,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont2
        mov     1,counter

        .align  16
.update3:
        cmp     counter,2
        ble     .cont3
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,2,counter
        st      counter,[%fp+tmp_counter]

        stx     %i1,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont3
        mov     2,counter

        .align  16
.update4:
        cmp     counter,2
        ble     .cont4
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,2,counter
        st      counter,[%fp+tmp_counter]

        stx     %i1,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont4
        mov     2,counter

        .align  16
.update5:
        cmp     %i5,0
        bne     .cont5

        cmp     counter,2
        ble     .cont5
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,2,counter
        st      counter,[%fp+tmp_counter]

        stx     %i1,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont5
        mov     2,counter

        .align  16
.update6:
        cmp     counter,3
        ble     .cont6
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,3,counter
        st      counter,[%fp+tmp_counter]

        stx     %g5,[%fp+tmp_px]

        stx     %o3,[%fp+tmp_py]
        ba      .cont6
        mov     3,counter

        .align  16
.update7:
        cmp     counter,3
        ble     .cont7
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,3,counter
        st      counter,[%fp+tmp_counter]

        stx     %g5,[%fp+tmp_px]

        stx     %o3,[%fp+tmp_py]
        ba      .cont7
        mov     3,counter

        .align  16
.update8:
        cmp     %i5,0
        bne     .cont8

        cmp     counter,3
        ble     .cont8
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,3,counter
        st      counter,[%fp+tmp_counter]

        stx     %g5,[%fp+tmp_px]

        stx     %o3,[%fp+tmp_py]
        ba      .cont8
        mov     3,counter

        .align  16
.update9:
        cmp     counter,4
        ble     .cont9
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,4,counter
        st      counter,[%fp+tmp_counter]

        stx     %i4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont9
        mov     4,counter

        .align  16
.update10:
        cmp     counter,4
        ble     .cont10
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,4,counter
        st      counter,[%fp+tmp_counter]

        stx     %i4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont10
        mov     4,counter

        .align  16
.update11:
        cmp     %i5,0
        bne     .cont11

        cmp     counter,4
        ble     .cont11
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,4,counter
        st      counter,[%fp+tmp_counter]

        stx     %i4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont11
        mov     4,counter

        .align  16
.update12:
        cmp     counter,5
        ble     .cont12
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,5,counter
        st      counter,[%fp+tmp_counter]

        stx     %o4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont12
        mov     5,counter

        .align  16
.update13:
        cmp     counter,5
        ble     .cont13
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,5,counter
        st      counter,[%fp+tmp_counter]

        stx     %o4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont13
        mov     5,counter

        .align  16
.update14:
        cmp     %i5,0
        bne     .cont14

        cmp     counter,5
        ble     .cont14
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,5,counter
        st      counter,[%fp+tmp_counter]

        stx     %o4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont14
        mov     5,counter

        .align  16
.update15:
        cmp     counter,6
        ble     .cont15
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,6,counter
        st      counter,[%fp+tmp_counter]

        stx     %l0,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont15
        mov     6,counter

        .align  16
.update16:
        cmp     counter,6
        ble     .cont16
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,6,counter
        st      counter,[%fp+tmp_counter]

        stx     %l0,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont16
        mov     6,counter

        .align  16
.update17:
        cmp     %i5,0
        bne     .cont17

        cmp     counter,1
        ble     .cont17
        fmovd   DC1,%f62

        sub     counter,1,counter
        st      counter,[%fp+tmp_counter]

        stx     %l0,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont17
        mov     1,counter

        .align  16
.update18:
        cmp     counter,2
        ble     .cont18
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,2,counter
        st      counter,[%fp+tmp_counter]

        stx     %i1,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont18
        mov     2,counter

        .align  16
.update19:
        cmp     counter,2
        ble     .cont19
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,2,counter
        st      counter,[%fp+tmp_counter]

        stx     %i1,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont19
        mov     2,counter

        .align  16
.update20:
        cmp     %o1,0
        bne     .cont20

        cmp     counter,2
        ble     .cont20
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,2,counter
        st      counter,[%fp+tmp_counter]

        stx     %i1,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont20
        mov     2,counter

        .align  16
.update21:
        cmp     counter,3
        ble     .cont21
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,3,counter
        st      counter,[%fp+tmp_counter]

        stx     %g5,[%fp+tmp_px]

        stx     %o3,[%fp+tmp_py]
        ba      .cont21
        mov     3,counter

        .align  16
.update22:
        cmp     counter,3
        ble     .cont22
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,3,counter
        st      counter,[%fp+tmp_counter]

        stx     %g5,[%fp+tmp_px]

        stx     %o3,[%fp+tmp_py]
        ba      .cont22
        mov     3,counter

        .align  16
.update23:
        cmp     %i5,0
        bne     .cont23

        cmp     counter,3
        ble     .cont23
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,3,counter
        st      counter,[%fp+tmp_counter]

        stx     %g5,[%fp+tmp_px]

        stx     %o3,[%fp+tmp_py]
        ba      .cont23
        mov     3,counter

        .align  16
.update24:
        cmp     counter,4
        ble     .cont24
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,4,counter
        st      counter,[%fp+tmp_counter]

        stx     %i4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont24
        mov     4,counter

        .align  16
.update25:
        cmp     counter,4
        ble     .cont25
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,4,counter
        st      counter,[%fp+tmp_counter]

        stx     %i4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont25
        mov     4,counter

        .align  16
.update26:
        cmp     %i5,0
        bne     .cont26

        cmp     counter,4
        ble     .cont26
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,4,counter
        st      counter,[%fp+tmp_counter]

        stx     %i4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont26
        mov     4,counter

        .align  16
.update27:
        cmp     counter,5
        ble     .cont27
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,5,counter
        st      counter,[%fp+tmp_counter]

        stx     %o4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont27
        mov     5,counter

        .align  16
.update28:
        cmp     counter,5
        ble     .cont28
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,5,counter
        st      counter,[%fp+tmp_counter]

        stx     %o4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont28
        mov     5,counter

        .align  16
.update29:
        cmp     %i5,0
        bne     .cont29

        cmp     counter,5
        ble     .cont29
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,5,counter
        st      counter,[%fp+tmp_counter]

        stx     %o4,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont29
        mov     5,counter

        .align  16
.update30:
        cmp     counter,6
        ble     .cont30
        ld      [TBL+TBL_SHIFT+44],%f2

        sub     counter,6,counter
        st      counter,[%fp+tmp_counter]

        stx     %l0,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont30
        mov     6,counter

        .align  16
.update31:
        cmp     counter,6
        ble     .cont31
        ld      [TBL+TBL_SHIFT+44],%f4

        sub     counter,6,counter
        st      counter,[%fp+tmp_counter]

        stx     %l0,[%fp+tmp_px]

        stx     %i2,[%fp+tmp_py]
        ba      .cont31
        mov     6,counter

        .align  16
.exit:
        ret
        restore
        SET_SIZE(__vrhypotf)