root/usr/src/lib/libmvec/common/__vatan.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <sys/isa_defs.h>
#include "libm_inlines.h"

#ifdef _LITTLE_ENDIAN
#define HI(x)   *(1+(int*)x)
#define LO(x)   *(unsigned*)x
#else
#define HI(x)   *(int*)x
#define LO(x)   *(1+(unsigned*)x)
#endif

#ifdef __RESTRICT
#define restrict _Restrict
#else
#define restrict
#endif

void
__vatan(int n, double * restrict x, int stridex, double * restrict y, int stridey)
{
  double  f, z, ans = 0.0L, ansu, ansl, tmp, poly, conup, conlo, dummy;
  double  f1,   ans1, ansu1, ansl1, tmp1, poly1, conup1, conlo1;
  double  f2,   ans2, ansu2, ansl2, tmp2, poly2, conup2, conlo2;
  int index, sign, intf, intflo, intz, argcount;
  int index1, sign1 = 0;
  int index2, sign2 = 0;
  double *yaddr,*yaddr1 = 0,*yaddr2 = 0;
  extern const double __vlibm_TBL_atan1[];
  extern double fabs(double);

/*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
 *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */

/* define dummy names for readability.  Use parray to help compiler optimize loads */
#define p3    parray[0]
#define p2    parray[1]
#define p1    parray[2]

  static const double parray[] = {
   -1.428029046844299722E-01,           /* p[3]         */
    1.999999917247000615E-01,           /* p[2]         */
   -3.333333333329292858E-01,           /* p[1]         */
    1.0,                                /* not used for p[0], though            */
   -1.0,                                /* used to flip sign of answer          */
  };

  if (n <= 0) return;           /* if no. of elements is 0 or neg, do nothing */
  do
  {
  LOOP0:

    f        = fabs(*x);                        /* fetch argument               */
    intf     = HI(x);                   /* upper half of x, as integer  */
    intflo   = LO(x);                   /* lower half of x, as integer  */
    sign     = intf &  0x80000000;              /* sign of argument             */
    intf     = intf & ~0x80000000;              /* abs(upper argument)          */

    if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
    {
      if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
      {
        ans   = f - f;                          /* return NaN if x=NaN*/
      }
      else if (intf < 0x3e300000)               /* avoid underflow for small arg */
      {
        dummy = 1.0e37 + f;
        dummy = dummy;
        ans   = f;
      }
      else if (intf > 0x43600000)               /* avoid underflow for big arg  */
      {
        index = 2;
        ans   = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low   */
      }
      *y      = (sign) ? -ans: ans;             /* store answer, with sign bit  */
      x      += stridex;
      y      += stridey;
      argcount = 0;                             /* initialize argcount          */
      if (--n <=0) break;                       /* we are done                  */
      goto LOOP0;                               /* otherwise, examine next arg  */
    }

    index    = 0;                               /* points to 0,0 in table       */
    if (intf > 0x40500000)                      /* if (|x| > 64                 */
    { f = -1.0/f;
      index  = 2;                               /* point to pi/2 upper, lower   */
    }
    else if (intf >= 0x3f900000)                /* if |x| >= (1/64)...          */
    {
      intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper        */
      HI(&z)  = intz;                           /* store as a double (z)        */
      LO(&z)  = 0;                              /* ...lower                     */
      f      = (f - z)/(1.0 + f*z);             /* get reduced argument         */
      index  = (intz - 0x3f900000) >> 15;       /* (index >> 16) << 1)          */
      index  = index + 4;                       /* skip over 0,0,pi/2,pi/2      */
    }
    yaddr    = y;                               /* address to store this answer */
    x       += stridex;                         /* point to next arg            */
    y       += stridey;                         /* point to next result         */
    argcount = 1;                               /* we now have 1 good argument  */
    if (--n <=0)
    {
      f1      = 0.0;                            /* put dummy values in args 1,2 */
      f2      = 0.0;
      index1  = 0;
      index2  = 0;
      goto UNROLL3;                             /* finish up with 1 good arg    */
    }

    /*--------------------------------------------------------------------------*/
    /*--------------------------------------------------------------------------*/
    /*--------------------------------------------------------------------------*/

  LOOP1:

    f1       = fabs(*x);                        /* fetch argument               */
    intf     = HI(x);                   /* upper half of x, as integer  */
    intflo   = LO(x);                   /* lower half of x, as integer  */
    sign1    = intf &  0x80000000;              /* sign of argument             */
    intf     = intf & ~0x80000000;              /* abs(upper argument)          */

    if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
    {
      if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
      {
        ans   = f1 - f1;                        /* return NaN if x=NaN*/
      }
      else if (intf < 0x3e300000)               /* avoid underflow for small arg */
      {
        dummy = 1.0e37 + f1;
        dummy = dummy;
        ans   = f1;
      }
      else if (intf > 0x43600000)               /* avoid underflow for big arg  */
      {
        index1 = 2;
        ans   = __vlibm_TBL_atan1[index1] + __vlibm_TBL_atan1[index1+1];/* pi/2 up + pi/2 low   */
      }
      *y      = (sign1) ? -ans: ans;            /* store answer, with sign bit  */
      x      += stridex;
      y      += stridey;
      argcount = 1;                             /* we still have 1 good arg     */
      if (--n <=0)
      {
        f1      = 0.0;                          /* put dummy values in args 1,2 */
        f2      = 0.0;
        index1  = 0;
        index2  = 0;
        goto UNROLL3;                           /* finish up with 1 good arg    */
      }
      goto LOOP1;                               /* otherwise, examine next arg  */
    }

    index1   = 0;                               /* points to 0,0 in table       */
    if (intf > 0x40500000)                      /* if (|x| > 64                 */
    { f1 = -1.0/f1;
      index1 = 2;                               /* point to pi/2 upper, lower   */
    }
    else if (intf >= 0x3f900000)                /* if |x| >= (1/64)...          */
    {
      intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper        */
      HI(&z) = intz;                            /* store as a double (z)        */
      LO(&z) = 0;                               /* ...lower                     */
      f1     = (f1 - z)/(1.0 + f1*z);           /* get reduced argument         */
      index1 = (intz - 0x3f900000) >> 15;       /* (index >> 16) << 1)          */
      index1 = index1 + 4;                      /* skip over 0,0,pi/2,pi/2      */
    }
    yaddr1   = y;                               /* address to store this answer */
    x       += stridex;                         /* point to next arg            */
    y       += stridey;                         /* point to next result         */
    argcount = 2;                               /* we now have 2 good arguments */
    if (--n <=0)
    {
      f2      = 0.0;                            /* put dummy value in arg 2 */
      index2  = 0;
      goto UNROLL3;                             /* finish up with 2 good args   */
    }

    /*--------------------------------------------------------------------------*/
    /*--------------------------------------------------------------------------*/
    /*--------------------------------------------------------------------------*/

  LOOP2:

    f2       = fabs(*x);                        /* fetch argument               */
    intf     = HI(x);                   /* upper half of x, as integer  */
    intflo   = LO(x);                   /* lower half of x, as integer  */
    sign2    = intf &  0x80000000;              /* sign of argument             */
    intf     = intf & ~0x80000000;              /* abs(upper argument)          */

    if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
    {
      if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
      {
        ans   = f2 - f2;                        /* return NaN if x=NaN*/
      }
      else if (intf < 0x3e300000)               /* avoid underflow for small arg */
      {
        dummy = 1.0e37 + f2;
        dummy = dummy;
        ans   = f2;
      }
      else if (intf > 0x43600000)               /* avoid underflow for big arg  */
      {
        index2 = 2;
        ans   = __vlibm_TBL_atan1[index2] + __vlibm_TBL_atan1[index2+1];/* pi/2 up + pi/2 low   */
      }
      *y      = (sign2) ? -ans: ans;            /* store answer, with sign bit  */
      x      += stridex;
      y      += stridey;
      argcount = 2;                             /* we still have 2 good args    */
      if (--n <=0)
      {
        f2      = 0.0;                          /* put dummy value in arg 2 */
        index2  = 0;
        goto UNROLL3;                           /* finish up with 2 good args   */
      }
      goto LOOP2;                               /* otherwise, examine next arg  */
    }

    index2   = 0;                               /* points to 0,0 in table       */
    if (intf > 0x40500000)                      /* if (|x| > 64                 */
    { f2 = -1.0/f2;
      index2 = 2;                               /* point to pi/2 upper, lower   */
    }
    else if (intf >= 0x3f900000)                /* if |x| >= (1/64)...          */
    {
      intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper        */
      HI(&z) = intz;                            /* store as a double (z)        */
      LO(&z) = 0;                               /* ...lower                     */
      f2     = (f2 - z)/(1.0 + f2*z);           /* get reduced argument         */
      index2 = (intz - 0x3f900000) >> 15;       /* (index >> 16) << 1)          */
      index2 = index2 + 4;                      /* skip over 0,0,pi/2,pi/2      */
    }
    yaddr2   = y;                               /* address to store this answer */
    x       += stridex;                         /* point to next arg            */
    y       += stridey;                         /* point to next result         */
    argcount = 3;                               /* we now have 3 good arguments */


/* here is the 3 way unrolled section,
   note, we may actually only have
   1,2, or 3 'real' arguments at this point
*/

UNROLL3:

    conup    = __vlibm_TBL_atan1[index ];       /* upper table                  */
    conup1   = __vlibm_TBL_atan1[index1];       /* upper table                  */
    conup2   = __vlibm_TBL_atan1[index2];       /* upper table                  */

    conlo    = __vlibm_TBL_atan1[index +1];     /* lower table                  */
    conlo1   = __vlibm_TBL_atan1[index1+1];     /* lower table                  */
    conlo2   = __vlibm_TBL_atan1[index2+1];     /* lower table                  */

    tmp      = f *f ;
    tmp1     = f1*f1;
    tmp2     = f2*f2;

    poly     = f *((p3*tmp  + p2)*tmp  + p1)*tmp ;
    poly1    = f1*((p3*tmp1 + p2)*tmp1 + p1)*tmp1;
    poly2    = f2*((p3*tmp2 + p2)*tmp2 + p1)*tmp2;

    ansu     = conup  + f ;                     /* compute atan(f)  upper       */
    ansu1    = conup1 + f1;                     /* compute atan(f)  upper       */
    ansu2    = conup2 + f2;                     /* compute atan(f)  upper       */

    ansl     = (((conup  - ansu) + f) + poly) + conlo ;
    ansl1    = (((conup1 - ansu1) + f1) + poly1) + conlo1;
    ansl2    = (((conup2 - ansu2) + f2) + poly2) + conlo2;

    ans      = ansu  + ansl ;
    ans1     = ansu1 + ansl1;
    ans2     = ansu2 + ansl2;

/* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */

   *yaddr    = sign ? -ans: ans;                /* this one is always good      */
   if (argcount < 3) break;                     /* end loop and finish up       */
     *yaddr1   = sign1 ? -ans1: ans1;
     *yaddr2   = sign2 ? -ans2: ans2;

  }  while (--n > 0);

 if (argcount == 2)
   {  *yaddr1  = sign1 ? -ans1: ans1;
   }
}