root/usr/src/lib/libmvec/common/vis/__vatan2.S
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

        .file   "__vatan2.S"

#include "libm.h"

        RO_DATA
        .align  64
constants:
        .word   0x3ff921fb,0x54442d18   ! pio2
        .word   0x3c91a626,0x33145c07   ! pio2_lo
        .word   0xbfd55555,0x555554ee   ! p1
        .word   0x3fc99999,0x997a1559   ! p2
        .word   0xbfc24923,0x158dfe02   ! p3
        .word   0x3fbc639d,0x0ed1347b   ! p4
        .word   0xffffffff,0x00000000   ! mask
        .word   0x3fc00000,0x00000000   ! twom3
        .word   0x46d00000,0x00000000   ! two110
        .word   0x3fe921fb,0x54442d18   ! pio4

! local storage indices

#define xscl            STACK_BIAS-0x8
#define yscl            STACK_BIAS-0x10
#define twom3           STACK_BIAS-0x18
#define two110          STACK_BIAS-0x20
#define pio4            STACK_BIAS-0x28
#define junk            STACK_BIAS-0x30
! sizeof temp storage - must be a multiple of 16 for V9
#define tmps            0x30

! register use

! i0  n
! i1  y
! i2  stridey
! i3  x
! i4  stridex
! i5  z

! l0  k0
! l1  k1
! l2  k2
! l3  hx
! l4  pz0
! l5  pz1
! l6  pz2
! l7  stridez

! the following are 64-bit registers in both V8+ and V9

! g1  __vlibm_TBL_atan2
! g5

! o0  hy
! o1  0x00004000
! o2  0x1420
! o3  0x7fe00000
! o4  0x03600000
! o5  0x00100000
! o7

! f0  y0
! f2  x0
! f4  t0
! f6  ah0
! f8  al0
! f10 y1
! f12 x1
! f14 t1
! f16 ah1
! f18 al1
! f20 y2
! f22 x2
! f24 t2
! f26 ah2
! f28 al2
! f30
! f32
! f34
! f36 sx0
! f38 sx1
! f40 sx2
! f42 sy0
! f44 sy1
! f46 sy2

#define mask    %f48
#define signbit %f50
#define pio2    %f52
#define pio2_lo %f54
#define p1      %f56
#define p2      %f58
#define p3      %f60
#define p4      %f62

        ENTRY(__vatan2)
        save    %sp,-SA(MINFRAME)-tmps,%sp
        PIC_SETUP(l7)
        PIC_SET(l7,constants,o0)
        PIC_SET(l7,__vlibm_TBL_atan2,o1)
        wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
        mov     %o1, %g1
#ifdef __sparcv9
        ldx     [%fp+STACK_BIAS+0xb0],%l7
#else
        ld      [%fp+0x5c],%l7
#endif
        ldd     [%o0+0x00],pio2         ! load/set up constants
        ldd     [%o0+0x08],pio2_lo
        ldd     [%o0+0x10],p1
        ldd     [%o0+0x18],p2
        ldd     [%o0+0x20],p3
        ldd     [%o0+0x28],p4
        ldd     [%o0+0x30],mask
        fzero   signbit
        fnegd   signbit,signbit
        sethi   %hi(0x00004000),%o1
        sethi   %hi(0x1420),%o2
        or      %o2,%lo(0x1420),%o2
        sethi   %hi(0x7fe00000),%o3
        sethi   %hi(0x03600000),%o4
        sethi   %hi(0x00100000),%o5
        ldd     [%o0+0x38],%f0          ! copy rarely used constants to stack
        ldd     [%o0+0x40],%f2
        ldd     [%o0+0x48],%f4
        std     %f0,[%fp+twom3]
        std     %f2,[%fp+two110]
        std     %f4,[%fp+pio4]
        sll     %i2,3,%i2               ! scale strides
        sll     %i4,3,%i4
        sll     %l7,3,%l7
        fzero   %f20                    ! loop prologue
        fzero   %f22
        fzero   %f24
        fzero   %f26
        fzero   %f46
        add     %fp,junk,%l6
        ld      [%i1],%f0               ! *y
        ld      [%i1+4],%f1
        ld      [%i3],%f8               ! *x
        ld      [%i3+4],%f9
        ld      [%i1],%o0               ! hy
        ba      .loop
        ld      [%i3],%l3               ! hx

! 16-byte aligned
        .align  16
.loop:
        fabsd   %f0,%f4
        mov     %i5,%l4
        add     %i1,%i2,%i1             ! y += stridey

        fabsd   %f8,%f2
        add     %i3,%i4,%i3             ! x += stridex
        add     %i5,%l7,%i5             ! z += stridez

        fand    %f0,signbit,%f42
        sethi   %hi(0x80000000),%g5

        fand    %f8,signbit,%f36
        andn    %o0,%g5,%o0
        andn    %l3,%g5,%l3

        fcmpd   %fcc0,%f4,%f2

        fmovd   %f4,%f0

        fmovdg  %fcc0,%f2,%f0           ! swap if |y| > |x|

        fmovdg  %fcc0,%f4,%f2
        mov     %o0,%o7
         lda    [%i1]%asi,%f10          ! preload next argument

          faddd %f26,%f20,%f26
         lda    [%i1+4]%asi,%f11

          faddd %f22,%f24,%f22
        movg    %fcc0,%l3,%o0

        movg    %fcc0,%o7,%l3

        fbu,pn  %fcc0,.nan0             ! if x or y is nan
! delay slot
         lda    [%i3]%asi,%f18

        sub     %l3,%o0,%l0             ! hx - hy
        sub     %l3,%o3,%g5
         fabsd  %f10,%f14
         lda    [%i3+4]%asi,%f19

        sub     %l0,%o4,%o7
          faddd %f22,%f26,%f26

        andcc   %g5,%o7,%g0
        bge,pn  %icc,.big0              ! if |x| or |x/y| is big
! delay slot
        nop

         fabsd  %f18,%f12
        cmp     %o0,%o5
        bl,pn   %icc,.small0            ! if |y| is small
! delay slot
         lda    [%i1]%asi,%o0

        add     %l0,%o1,%l0             ! k
        addcc   %i0,-1,%i0
        ble,pn  %icc,.last1
! delay slot
         lda    [%i3]%asi,%l3

.cont1:
        srl     %l0,10,%l0
         mov    %i5,%l5
          fxor  %f26,%f46,%f26
          st    %f26,[%l6]

         fand   %f10,signbit,%f44
        andn    %l0,0x1f,%l0
         add    %i1,%i2,%i1
          st    %f27,[%l6+4]

         fand   %f18,signbit,%f38
        cmp     %l0,%o2
        movg    %icc,%o2,%l0

         fcmpd  %fcc1,%f14,%f12
         add    %i3,%i4,%i3
         add    %i5,%l7,%i5

         fmovd  %f14,%f10
        add     %l0,%g1,%l0
         sethi  %hi(0x80000000),%g5

        ldd     [%l0+0x10],%f4
        fand    %f2,mask,%f6
         andn   %o0,%g5,%o0
         andn   %l3,%g5,%l3

         fmovdg %fcc1,%f12,%f10

         fmovdg %fcc1,%f14,%f12
         mov    %o0,%o7
          lda   [%i1]%asi,%f20

        fsubd   %f2,%f6,%f30
        fmuld   %f6,%f4,%f6
         movg   %fcc1,%l3,%o0

        fmuld   %f0,%f4,%f8
         movg   %fcc1,%o7,%l3

          lda   [%i1+4]%asi,%f21
         fbu,pn %fcc1,.nan1
! delay slot
         nop

          lda   [%i3]%asi,%f28
         sub    %l3,%o0,%l1
         sub    %l3,%o3,%g5

          lda   [%i3+4]%asi,%f29
        fmuld   %f30,%f4,%f30
        fsubd   %f0,%f6,%f4
         sub    %l1,%o4,%o7

          fabsd %f20,%f24
         andcc  %g5,%o7,%g0
         bge,pn %icc,.big1
! delay slot
         nop

        faddd   %f2,%f8,%f8
         cmp    %o0,%o5
         bl,pn  %icc,.small1
! delay slot
          lda   [%i1]%asi,%o0

          fabsd %f28,%f22
         add    %l1,%o1,%l1
         addcc  %i0,-1,%i0
          lda   [%i3]%asi,%l3

        fsubd   %f4,%f30,%f4
         srl    %l1,10,%l1
         ble,pn %icc,.last2
! delay slot
          mov   %i5,%l6

.cont2:
          fand  %f20,signbit,%f46
         andn   %l1,0x1f,%l1
          add   %i1,%i2,%i1

          fand  %f28,signbit,%f40
         cmp    %l1,%o2
         movg   %icc,%o2,%l1

          fcmpd %fcc2,%f24,%f22
          add   %i3,%i4,%i3
          add   %i5,%l7,%i5

        fdivd   %f4,%f8,%f4
          fmovd %f24,%f20
         add    %l1,%g1,%l1
          sethi %hi(0x80000000),%g5

         ldd    [%l1+0x10],%f14
         fand   %f12,mask,%f16
          andn  %o0,%g5,%o0
          andn  %l3,%g5,%l3

          fmovdg %fcc2,%f22,%f20

          fmovdg %fcc2,%f24,%f22
          mov   %o0,%o7

         fsubd  %f12,%f16,%f32
         fmuld  %f16,%f14,%f16
          movg  %fcc2,%l3,%o0

        fnegd   pio2_lo,%f8             ! al
         fmuld  %f10,%f14,%f18
          movg  %fcc2,%o7,%l3

        fzero   %f0
          fbu,pn %fcc2,.nan2
! delay slot
          nop

        fmovdg  %fcc0,signbit,%f0
          sub   %l3,%o0,%l2
          sub   %l3,%o3,%g5

         fmuld  %f32,%f14,%f32
         fsubd  %f10,%f16,%f14
          sub   %l2,%o4,%o7

         faddd  %f12,%f18,%f18
          andcc %g5,%o7,%g0
          bge,pn %icc,.big2
! delay slot
          nop

        fxor    %f36,%f0,%f36
          cmp   %o0,%o5
          bl,pn %icc,.small2
! delay slot
          nop

.cont3:
        fmovdg  %fcc0,signbit,%f8
          add   %l2,%o1,%l2

         fsubd  %f14,%f32,%f14
          srl   %l2,10,%l2

        fxor    %f36,pio2_lo,%f30       ! al
          andn  %l2,0x1f,%l2

        fxor    %f36,pio2,%f0           ! ah
          cmp   %l2,%o2
          movg  %icc,%o2,%l2

        fxor    %f42,%f36,%f42          ! sy

        faddd   %f8,%f30,%f8
        ldd     [%l0+0x8],%f30
          add   %l2,%g1,%l2

         fdivd  %f14,%f18,%f14
         fzero  %f10

          ldd   [%l2+0x10],%f24
          fand  %f22,mask,%f26

         fmovdg %fcc1,signbit,%f10

        fmuld   %f4,%f4,%f36
        faddd   %f8,%f30,%f8

          fsubd %f22,%f26,%f34
          fmuld %f26,%f24,%f26

          fmuld %f20,%f24,%f28
         fxor   %f38,%f10,%f38

        fmuld   %f4,p3,%f6
         fnegd  pio2_lo,%f18

        fmuld   %f36,p2,%f2
         fmovdg %fcc1,signbit,%f18

        fmuld   %f36,%f4,%f36
         fxor   %f38,pio2,%f10

          fmuld %f34,%f24,%f34
          fsubd %f20,%f26,%f24

          faddd %f22,%f28,%f28

        faddd   %f2,p1,%f2

        fmuld   %f36,p4,%f30
         fxor   %f38,pio2_lo,%f32

          fsubd %f24,%f34,%f24

         fxor   %f44,%f38,%f44

        fmuld   %f36,%f2,%f2
         faddd  %f18,%f32,%f18
         ldd    [%l1+0x8],%f32

        fmuld   %f36,%f36,%f36
        faddd   %f6,%f30,%f30

          fdivd %f24,%f28,%f24
          fzero %f20

          fmovdg %fcc2,signbit,%f20

        faddd   %f2,%f8,%f2

         fmuld  %f14,%f14,%f38
         faddd  %f18,%f32,%f18

        fmuld   %f36,%f30,%f36
          fxor  %f40,%f20,%f40

        fnegd   pio2,%f6                ! ah
         fmuld  %f14,p3,%f16

        fmovdg  %fcc0,signbit,%f6

         fmuld  %f38,p2,%f12
          fnegd pio2_lo,%f28

        faddd   %f2,%f36,%f2
         fmuld  %f38,%f14,%f38

        faddd   %f6,%f0,%f6
        ldd     [%l0],%f0

          fmovdg %fcc2,signbit,%f28

         faddd  %f12,p1,%f12

         fmuld  %f38,p4,%f32
          fxor  %f40,pio2_lo,%f34

          fxor  %f40,pio2,%f20

        faddd   %f2,%f4,%f2

         fmuld  %f38,%f12,%f12
          fxor  %f46,%f40,%f46

         fmuld  %f38,%f38,%f38
         faddd  %f16,%f32,%f32

          faddd %f28,%f34,%f28
          ldd   [%l2+0x8],%f34

        faddd   %f6,%f0,%f6
        lda     [%i1]%asi,%f0           ! preload next argument

         faddd  %f12,%f18,%f12
        lda     [%i1+4]%asi,%f1

          fmuld %f24,%f24,%f40
        lda     [%i3]%asi,%f8

         fmuld  %f38,%f32,%f38
          faddd %f28,%f34,%f28
        lda     [%i3+4]%asi,%f9

         fnegd  pio2,%f16
          fmuld %f24,p3,%f26
        lda     [%i1]%asi,%o0

         fmovdg %fcc1,signbit,%f16
        lda     [%i3]%asi,%l3

          fmuld %f40,p2,%f22

         faddd  %f12,%f38,%f12
          fmuld %f40,%f24,%f40

        faddd   %f2,%f6,%f6

         faddd  %f16,%f10,%f16
         ldd    [%l1],%f10

          faddd %f22,p1,%f22

         faddd  %f12,%f14,%f12
          fmuld %f40,p4,%f34

        fxor    %f6,%f42,%f6
        st      %f6,[%l4]

         faddd  %f16,%f10,%f16
        st      %f7,[%l4+4]

          fmuld %f40,%f22,%f22

          fmuld %f40,%f40,%f40
          faddd %f26,%f34,%f34

          fnegd pio2,%f26

         faddd  %f12,%f16,%f16

          faddd %f22,%f28,%f22

          fmuld %f40,%f34,%f40
          fmovdg %fcc2,signbit,%f26

! -

         fxor   %f16,%f44,%f16
         st     %f16,[%l5]

          faddd %f26,%f20,%f26
         st     %f17,[%l5+4]
        addcc   %i0,-1,%i0

          faddd %f22,%f40,%f22
        bg,pt   %icc,.loop
! delay slot
          ldd   [%l2],%f20


          faddd %f26,%f20,%f26
          faddd %f22,%f24,%f22
          faddd %f22,%f26,%f26
.done_from_special0:
          fxor  %f26,%f46,%f26
          st    %f26,[%l6]
          st    %f27,[%l6+4]
          ret
          restore



        .align  16
.last1:
        fmovd   pio2,%f10               ! set up dummy arguments
        fmovd   pio2,%f18
        fabsd   %f10,%f14
        fabsd   %f18,%f12
        sethi   %hi(0x3ff921fb),%o0
        or      %o0,%lo(0x3ff921fb),%o0
        mov     %o0,%l3
        ba,pt   %icc,.cont1
! delay slot
        add     %fp,junk,%i5



        .align  16
.last2:
        fmovd   pio2,%f20
        fmovd   pio2,%f28
        fabsd   %f20,%f24
        fabsd   %f28,%f22
        sethi   %hi(0x3ff921fb),%o0
        or      %o0,%lo(0x3ff921fb),%o0
        mov     %o0,%l3
        ba,pt   %icc,.cont2
! delay slot
        add     %fp,junk,%l6



        .align  16
.nan0:
          faddd %f22,%f26,%f26
.nan0_from_special0:
         fabsd  %f10,%f14
         lda    [%i3+4]%asi,%f19
         fabsd  %f18,%f12
         lda    [%i1]%asi,%o0
         lda    [%i3]%asi,%l3
        ba,pt   %icc,.special0
! delay slot
        fmuld   %f0,%f2,%f6


        .align  16
.big0:
         fabsd  %f18,%f12
         lda    [%i1]%asi,%o0
         lda    [%i3]%asi,%l3
        cmp     %g5,%o5
        bge,pn  %icc,.return_ah0        ! if hx >= 0x7ff00000
! delay slot
        nop
        cmp     %l0,%o4
        bge,pn  %icc,1f                 ! if hx - hy >= 0x03600000
! delay slot
        nop
        ldd     [%fp+twom3],%f6
        fmuld   %f0,%f6,%f0
        fmuld   %f2,%f6,%f2
        add     %l0,%o1,%l0
        addcc   %i0,-1,%i0
        ble,pn  %icc,.last1
! delay slot
        nop
        ba,pt   %icc,.cont1
! delay slot
        nop
1:
        fbg,pn  %fcc0,.return_ah0
! delay slot
        nop
        fcmpd   %fcc3,%f8,signbit
        fbl,pn  %fcc3,.return_ah0
! delay slot
        nop
        ba,pt   %icc,.special0
! delay slot
        fdivd   %f0,%f2,%f6


        .align  16
.small0:
         lda    [%i3]%asi,%l3
        fcmpd   %fcc3,%f0,signbit
        fbe,pt  %fcc3,.return_ah0
! delay slot
        nop
        ldd     [%fp+two110],%f6
        fmuld   %f0,%f6,%f0
        fmuld   %f2,%f6,%f2
        st      %f0,[%fp+yscl]
        ld      [%fp+yscl],%o7
        st      %f2,[%fp+xscl]
        ld      [%fp+xscl],%l0
        sub     %l0,%o7,%l0
        add     %l0,%o1,%l0
        addcc   %i0,-1,%i0
        ble,pn  %icc,.last1
! delay slot
        nop
        ba,pt   %icc,.cont1
! delay slot
        nop


        .align  16
.return_ah0:
        fzero   %f0
        fmovdg  %fcc0,signbit,%f0
        fxor    %f36,%f0,%f36
        fxor    %f36,pio2,%f0
        fxor    %f42,%f36,%f42
        fnegd   pio2,%f6
        fmovdg  %fcc0,signbit,%f6
        faddd   %f6,%f0,%f6
        sub     %g5,%l0,%o7
        cmp     %o7,%o5
        bl,pt   %icc,1f                 ! if hy < 0x7ff00000
! delay slot
        nop
        ldd     [%fp+pio4],%f0
        faddd   %f6,%f0,%f6
1:
        fdtoi   %f6,%f4
.special0:
        fxor    %f6,%f42,%f6
        st      %f6,[%l4]
        st      %f7,[%l4+4]
        addcc   %i0,-1,%i0
        ble,pn  %icc,.done_from_special0
! delay slot
        nop
        fmovd   %f10,%f0
        fmovd   %f18,%f8
        fmovd   %f14,%f4
        fmovd   %f12,%f2
        mov     %i5,%l4
        add     %i1,%i2,%i1
        add     %i3,%i4,%i3
        add     %i5,%l7,%i5
        fand    %f0,signbit,%f42
        sethi   %hi(0x80000000),%g5
        fand    %f8,signbit,%f36
        andn    %o0,%g5,%o0
        andn    %l3,%g5,%l3
        fcmpd   %fcc0,%f4,%f2
        fmovd   %f4,%f0
        fmovdg  %fcc0,%f2,%f0
        fmovdg  %fcc0,%f4,%f2
        mov     %o0,%o7
        movg    %fcc0,%l3,%o0
        movg    %fcc0,%o7,%l3
         lda    [%i1]%asi,%f10
         lda    [%i1+4]%asi,%f11
        fbu,pn  %fcc0,.nan0_from_special0
! delay slot
         lda    [%i3]%asi,%f18
         fabsd  %f10,%f14
         lda    [%i3+4]%asi,%f19
        sub     %l3,%o0,%l0
        sub     %l3,%o3,%g5
        sub     %l0,%o4,%o7
        andcc   %g5,%o7,%g0
        bge,pn  %icc,.big0
! delay slot
        nop
         fabsd  %f18,%f12
        cmp     %o0,%o5
        bl,pn   %icc,.small0
! delay slot
         lda    [%i1]%asi,%o0
        add     %l0,%o1,%l0
        addcc   %i0,-1,%i0
        ble,pn  %icc,.last1
! delay slot
         lda    [%i3]%asi,%l3
        ba,pt   %icc,.cont1
! delay slot
        nop



        .align  16
.nan1:
        fmuld   %f30,%f4,%f30
        fsubd   %f0,%f6,%f4
        faddd   %f2,%f8,%f8
        fsubd   %f4,%f30,%f4
.nan1_from_special1:
         lda    [%i3]%asi,%f28
         lda    [%i3+4]%asi,%f29
         fabsd  %f20,%f24
         lda    [%i1]%asi,%o0
         fabsd  %f28,%f22
         lda    [%i3]%asi,%l3
         mov    %i5,%l6
        ba,pt   %icc,.special1
! delay slot
        fmuld   %f10,%f12,%f16


        .align  16
.big1:
        faddd   %f2,%f8,%f8
        fsubd   %f4,%f30,%f4
.big1_from_special1:
         lda    [%i1]%asi,%o0
         fabsd  %f28,%f22
         lda    [%i3]%asi,%l3
         mov    %i5,%l6
        cmp     %g5,%o5
        bge,pn  %icc,.return_ah1
! delay slot
        nop
        cmp     %l1,%o4
        bge,pn  %icc,1f
! delay slot
        nop
        ldd     [%fp+twom3],%f16
        fmuld   %f10,%f16,%f10
        fmuld   %f12,%f16,%f12
        add     %l1,%o1,%l1
        srl     %l1,10,%l1
        addcc   %i0,-1,%i0
        ble,pn  %icc,.last2
! delay slot
        nop
        ba,pt   %icc,.cont2
! delay slot
        nop
1:
        fbg,pn  %fcc1,.return_ah1
! delay slot
        nop
        fcmpd   %fcc3,%f18,signbit
        fbl,pn  %fcc3,.return_ah1
! delay slot
        nop
        ba,pt   %icc,.special1
! delay slot
        fdivd   %f10,%f12,%f16


        .align  16
.small1:
        fsubd   %f4,%f30,%f4
.small1_from_special1:
         fabsd  %f28,%f22
         lda    [%i3]%asi,%l3
         mov    %i5,%l6
        fcmpd   %fcc3,%f10,signbit
        fbe,pt  %fcc3,.return_ah1
! delay slot
        nop
        ldd     [%fp+two110],%f16
        fmuld   %f10,%f16,%f10
        fmuld   %f12,%f16,%f12
        st      %f10,[%fp+yscl]
        ld      [%fp+yscl],%o7
        st      %f12,[%fp+xscl]
        ld      [%fp+xscl],%l1
        sub     %l1,%o7,%l1
        add     %l1,%o1,%l1
        srl     %l1,10,%l1
        addcc   %i0,-1,%i0
        ble,pn  %icc,.last2
! delay slot
        nop
        ba,pt   %icc,.cont2
! delay slot
        nop


        .align  16
.return_ah1:
        fzero   %f10
        fmovdg  %fcc1,signbit,%f10
        fxor    %f38,%f10,%f38
        fxor    %f38,pio2,%f10
        fxor    %f44,%f38,%f44
        fnegd   pio2,%f16
        fmovdg  %fcc1,signbit,%f16
        faddd   %f16,%f10,%f16
        sub     %g5,%l1,%o7
        cmp     %o7,%o5
        bl,pt   %icc,1f
! delay slot
        nop
        ldd     [%fp+pio4],%f10
        faddd   %f16,%f10,%f16
1:
        fdtoi   %f16,%f14
.special1:
        fxor    %f16,%f44,%f16
        st      %f16,[%l5]
        st      %f17,[%l5+4]
        addcc   %i0,-1,%i0
        bg,pn   %icc,1f
! delay slot
        nop
        fmovd   pio2,%f20               ! set up dummy argument
        fmovd   pio2,%f28
        fabsd   %f20,%f24
        fabsd   %f28,%f22
        sethi   %hi(0x3ff921fb),%o0
        or      %o0,%lo(0x3ff921fb),%o0
        mov     %o0,%l3
        add     %fp,junk,%i5
1:
        fmovd   %f20,%f10
        fmovd   %f28,%f18
        fmovd   %f24,%f14
        fmovd   %f22,%f12
        mov     %i5,%l5
        add     %i1,%i2,%i1
        add     %i3,%i4,%i3
        add     %i5,%l7,%i5
        fand    %f10,signbit,%f44
        sethi   %hi(0x80000000),%g5
        fand    %f18,signbit,%f38
        andn    %o0,%g5,%o0
        andn    %l3,%g5,%l3
        fcmpd   %fcc1,%f14,%f12
        fmovd   %f14,%f10
        fmovdg  %fcc1,%f12,%f10
        fmovdg  %fcc1,%f14,%f12
        mov     %o0,%o7
        movg    %fcc1,%l3,%o0
        movg    %fcc1,%o7,%l3
         lda    [%i1]%asi,%f20
         lda    [%i1+4]%asi,%f21
        fbu,pn  %fcc1,.nan1_from_special1
! delay slot
        nop
         lda    [%i3]%asi,%f28
         lda    [%i3+4]%asi,%f29
         fabsd  %f20,%f24
        sub     %l3,%o0,%l1
        sub     %l3,%o3,%g5
        sub     %l1,%o4,%o7
        andcc   %g5,%o7,%g0
        bge,pn  %icc,.big1_from_special1
! delay slot
        nop
        cmp     %o0,%o5
        bl,pn   %icc,.small1_from_special1
! delay slot
         lda    [%i1]%asi,%o0
         fabsd  %f28,%f22
         lda    [%i3]%asi,%l3
        add     %l1,%o1,%l1
        srl     %l1,10,%l1
        addcc   %i0,-1,%i0
        ble,pn  %icc,.last2
! delay slot
         mov    %i5,%l6
        ba,pt   %icc,.cont2
! delay slot
        nop



        .align  16
.nan2:
        fmovdg  %fcc0,signbit,%f0
         fmuld  %f32,%f14,%f32
         fsubd  %f10,%f16,%f14
         faddd  %f12,%f18,%f18
        fxor    %f36,%f0,%f36
.nan2_from_special2:
        ba,pt   %icc,.special2
! delay slot
        fmuld   %f20,%f22,%f26


        .align  16
.big2:
        fxor    %f36,%f0,%f36
.big2_from_special2:
        cmp     %g5,%o5
        bge,pn  %icc,.return_ah2
! delay slot
        nop
        cmp     %l2,%o4
        bge,pn  %icc,1f
! delay slot
        nop
        ldd     [%fp+twom3],%f26
        fmuld   %f20,%f26,%f20
        fmuld   %f22,%f26,%f22
        ba,pt   %icc,.cont3
! delay slot
        nop
1:
        fbg,pn  %fcc2,.return_ah2
! delay slot
        nop
        fcmpd   %fcc3,%f28,signbit
        fbl,pn  %fcc3,.return_ah2
! delay slot
        nop
        ba,pt   %icc,.special2
! delay slot
        fdivd   %f20,%f22,%f26


        .align  16
.small2:
        fcmpd   %fcc3,%f20,signbit
        fbe,pt  %fcc3,.return_ah2
! delay slot
        nop
        ldd     [%fp+two110],%f26
        fmuld   %f20,%f26,%f20
        fmuld   %f22,%f26,%f22
        st      %f20,[%fp+yscl]
        ld      [%fp+yscl],%o7
        st      %f22,[%fp+xscl]
        ld      [%fp+xscl],%l2
        sub     %l2,%o7,%l2
        ba,pt   %icc,.cont3
! delay slot
        nop


        .align  16
.return_ah2:
        fzero   %f20
        fmovdg  %fcc2,signbit,%f20
        fxor    %f40,%f20,%f40
        fxor    %f40,pio2,%f20
        fxor    %f46,%f40,%f46
        fnegd   pio2,%f26
        fmovdg  %fcc2,signbit,%f26
        faddd   %f26,%f20,%f26
        sub     %g5,%l2,%o7
        cmp     %o7,%o5
        bl,pt   %icc,1f
! delay slot
        nop
        ldd     [%fp+pio4],%f20
        faddd   %f26,%f20,%f26
1:
        fdtoi   %f26,%f24
.special2:
        fxor    %f26,%f46,%f26
        st      %f26,[%l6]
        st      %f27,[%l6+4]
        addcc   %i0,-1,%i0
        bg,pn   %icc,1f
! delay slot
        nop
        fmovd   pio2,%f20               ! set up dummy argument
        fmovd   pio2,%f22
        fzero   %f40
        fzero   %f46
        mov     0,%l2
        ba,pt   %icc,.cont3
! delay slot
        add     %fp,junk,%l6
1:
        lda     [%i1]%asi,%f20
        lda     [%i1+4]%asi,%f21
        lda     [%i3]%asi,%f28
        lda     [%i3+4]%asi,%f29
        fabsd   %f20,%f24
        lda     [%i1]%asi,%o0
        fabsd   %f28,%f22
        lda     [%i3]%asi,%l3
        mov     %i5,%l6
        fand    %f20,signbit,%f46
        add     %i1,%i2,%i1
        fand    %f28,signbit,%f40
        fcmpd   %fcc2,%f24,%f22
        add     %i3,%i4,%i3
        add     %i5,%l7,%i5
        fmovd   %f24,%f20
        sethi   %hi(0x80000000),%g5
        andn    %o0,%g5,%o0
        andn    %l3,%g5,%l3
        fmovdg  %fcc2,%f22,%f20
        fmovdg  %fcc2,%f24,%f22
        mov     %o0,%o7
        movg    %fcc2,%l3,%o0
        movg    %fcc2,%o7,%l3
        fbu,pn  %fcc2,.nan2_from_special2
! delay slot
        nop
        sub     %l3,%o0,%l2
        sub     %l3,%o3,%g5
        sub     %l2,%o4,%o7
        andcc   %g5,%o7,%g0
        bge,pn  %icc,.big2_from_special2
! delay slot
        nop
        cmp     %o0,%o5
        bl,pn   %icc,.small2
! delay slot
        nop
        ba,pt   %icc,.cont3
! delay slot
        nop

        SET_SIZE(__vatan2)