root/usr/src/lib/libmvec/common/vis/__vexp.S
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

        .file   "__vexp.S"

#include "libm.h"

        RO_DATA

/********************************************************************
 * vexp() algorithm is from mopt:f_exp.c.  Basics are included here
 * to supplement comments within this file.  vexp() has been unrolled
 * to a depth of 3.  Only element 0 is documented.
 *
 * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by
 *      2^44 to allow *2^k w/o shifting within the FP registers.  These
 *      had to be removed for CHEETAH to avoid the fdtox of a very large
 *      number, which would trap to kernel (2^52).
 *
 * Let  x = (k + j/256)ln2 + r
 * then exp(x) = exp(ln2^(k+j/256)) * exp(r)
 *             = 2^k * 2^(j/256) * exp(r)
 * where r is polynomial approximation
 *      exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3
 *             = 1 + r*(1+r*(B1+r*(B2+r*B3)))
 *      let
 *      p = r*(1+r*(B1+r*(B2+r*B3)))    ! notice, not quite exp(r)
 *      q = 2^(j/256) (high 64 bits)
 *      t = 2^(j/256) (extra precision) ! both from _TBL_exp_z[]
 *      then
 *      2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p )
 *      then actual computation is 2^k * ( q + ( t + q*p ) )
 *
 ********************************************************************/

        .align  16
TBL:
        .word   0x3ff00000,0x00000000
        .word   0x00000000,0x00000000
        .word   0x3ff00b1a,0xfa5abcbf
        .word   0xbc84f6b2,0xa7609f71
        .word   0x3ff0163d,0xa9fb3335
        .word   0x3c9b6129,0x9ab8cdb7
        .word   0x3ff02168,0x143b0281
        .word   0xbc82bf31,0x0fc54eb6
        .word   0x3ff02c9a,0x3e778061
        .word   0xbc719083,0x535b085d
        .word   0x3ff037d4,0x2e11bbcc
        .word   0x3c656811,0xeeade11a
        .word   0x3ff04315,0xe86e7f85
        .word   0xbc90a31c,0x1977c96e
        .word   0x3ff04e5f,0x72f654b1
        .word   0x3c84c379,0x3aa0d08c
        .word   0x3ff059b0,0xd3158574
        .word   0x3c8d73e2,0xa475b465
        .word   0x3ff0650a,0x0e3c1f89
        .word   0xbc95cb7b,0x5799c396
        .word   0x3ff0706b,0x29ddf6de
        .word   0xbc8c91df,0xe2b13c26
        .word   0x3ff07bd4,0x2b72a836
        .word   0x3c832334,0x54458700
        .word   0x3ff08745,0x18759bc8
        .word   0x3c6186be,0x4bb284ff
        .word   0x3ff092bd,0xf66607e0
        .word   0xbc968063,0x800a3fd1
        .word   0x3ff09e3e,0xcac6f383
        .word   0x3c914878,0x18316136
        .word   0x3ff0a9c7,0x9b1f3919
        .word   0x3c85d16c,0x873d1d38
        .word   0x3ff0b558,0x6cf9890f
        .word   0x3c98a62e,0x4adc610a
        .word   0x3ff0c0f1,0x45e46c85
        .word   0x3c94f989,0x06d21cef
        .word   0x3ff0cc92,0x2b7247f7
        .word   0x3c901edc,0x16e24f71
        .word   0x3ff0d83b,0x23395dec
        .word   0xbc9bc14d,0xe43f316a
        .word   0x3ff0e3ec,0x32d3d1a2
        .word   0x3c403a17,0x27c57b53
        .word   0x3ff0efa5,0x5fdfa9c5
        .word   0xbc949db9,0xbc54021b
        .word   0x3ff0fb66,0xaffed31b
        .word   0xbc6b9bed,0xc44ebd7b
        .word   0x3ff10730,0x28d7233e
        .word   0x3c8d46eb,0x1692fdd5
        .word   0x3ff11301,0xd0125b51
        .word   0xbc96c510,0x39449b3a
        .word   0x3ff11edb,0xab5e2ab6
        .word   0xbc9ca454,0xf703fb72
        .word   0x3ff12abd,0xc06c31cc
        .word   0xbc51b514,0xb36ca5c7
        .word   0x3ff136a8,0x14f204ab
        .word   0xbc67108f,0xba48dcf0
        .word   0x3ff1429a,0xaea92de0
        .word   0xbc932fbf,0x9af1369e
        .word   0x3ff14e95,0x934f312e
        .word   0xbc8b91e8,0x39bf44ab
        .word   0x3ff15a98,0xc8a58e51
        .word   0x3c82406a,0xb9eeab0a
        .word   0x3ff166a4,0x5471c3c2
        .word   0x3c58f23b,0x82ea1a32
        .word   0x3ff172b8,0x3c7d517b
        .word   0xbc819041,0xb9d78a76
        .word   0x3ff17ed4,0x8695bbc0
        .word   0x3c709e3f,0xe2ac5a64
        .word   0x3ff18af9,0x388c8dea
        .word   0xbc911023,0xd1970f6c
        .word   0x3ff19726,0x58375d2f
        .word   0x3c94aadd,0x85f17e08
        .word   0x3ff1a35b,0xeb6fcb75
        .word   0x3c8e5b4c,0x7b4968e4
        .word   0x3ff1af99,0xf8138a1c
        .word   0x3c97bf85,0xa4b69280
        .word   0x3ff1bbe0,0x84045cd4
        .word   0xbc995386,0x352ef607
        .word   0x3ff1c82f,0x95281c6b
        .word   0x3c900977,0x8010f8c9
        .word   0x3ff1d487,0x3168b9aa
        .word   0x3c9e016e,0x00a2643c
        .word   0x3ff1e0e7,0x5eb44027
        .word   0xbc96fdd8,0x088cb6de
        .word   0x3ff1ed50,0x22fcd91d
        .word   0xbc91df98,0x027bb78c
        .word   0x3ff1f9c1,0x8438ce4d
        .word   0xbc9bf524,0xa097af5c
        .word   0x3ff2063b,0x88628cd6
        .word   0x3c8dc775,0x814a8494
        .word   0x3ff212be,0x3578a819
        .word   0x3c93592d,0x2cfcaac9
        .word   0x3ff21f49,0x917ddc96
        .word   0x3c82a97e,0x9494a5ee
        .word   0x3ff22bdd,0xa27912d1
        .word   0x3c8d34fb,0x5577d69e
        .word   0x3ff2387a,0x6e756238
        .word   0x3c99b07e,0xb6c70573
        .word   0x3ff2451f,0xfb82140a
        .word   0x3c8acfcc,0x911ca996
        .word   0x3ff251ce,0x4fb2a63f
        .word   0x3c8ac155,0xbef4f4a4
        .word   0x3ff25e85,0x711ece75
        .word   0x3c93e1a2,0x4ac31b2c
        .word   0x3ff26b45,0x65e27cdd
        .word   0x3c82bd33,0x9940e9d9
        .word   0x3ff2780e,0x341ddf29
        .word   0x3c9e067c,0x05f9e76c
        .word   0x3ff284df,0xe1f56381
        .word   0xbc9a4c3a,0x8c3f0d7e
        .word   0x3ff291ba,0x7591bb70
        .word   0xbc82cc72,0x28401cbc
        .word   0x3ff29e9d,0xf51fdee1
        .word   0x3c8612e8,0xafad1255
        .word   0x3ff2ab8a,0x66d10f13
        .word   0xbc995743,0x191690a7
        .word   0x3ff2b87f,0xd0dad990
        .word   0xbc410adc,0xd6381aa4
        .word   0x3ff2c57e,0x39771b2f
        .word   0xbc950145,0xa6eb5124
        .word   0x3ff2d285,0xa6e4030b
        .word   0x3c900247,0x54db41d5
        .word   0x3ff2df96,0x1f641589
        .word   0x3c9d16cf,0xfbbce198
        .word   0x3ff2ecaf,0xa93e2f56
        .word   0x3c71ca0f,0x45d52383
        .word   0x3ff2f9d2,0x4abd886b
        .word   0xbc653c55,0x532bda93
        .word   0x3ff306fe,0x0a31b715
        .word   0x3c86f46a,0xd23182e4
        .word   0x3ff31432,0xedeeb2fd
        .word   0x3c8959a3,0xf3f3fcd0
        .word   0x3ff32170,0xfc4cd831
        .word   0x3c8a9ce7,0x8e18047c
        .word   0x3ff32eb8,0x3ba8ea32
        .word   0xbc9c45e8,0x3cb4f318
        .word   0x3ff33c08,0xb26416ff
        .word   0x3c932721,0x843659a6
        .word   0x3ff34962,0x66e3fa2d
        .word   0xbc835a75,0x930881a4
        .word   0x3ff356c5,0x5f929ff1
        .word   0xbc8b5cee,0x5c4e4628
        .word   0x3ff36431,0xa2de883b
        .word   0xbc8c3144,0xa06cb85e
        .word   0x3ff371a7,0x373aa9cb
        .word   0xbc963aea,0xbf42eae2
        .word   0x3ff37f26,0x231e754a
        .word   0xbc99f5ca,0x9eceb23c
        .word   0x3ff38cae,0x6d05d866
        .word   0xbc9e958d,0x3c9904bd
        .word   0x3ff39a40,0x1b7140ef
        .word   0xbc99a9a5,0xfc8e2934
        .word   0x3ff3a7db,0x34e59ff7
        .word   0xbc75e436,0xd661f5e3
        .word   0x3ff3b57f,0xbfec6cf4
        .word   0x3c954c66,0xe26fff18
        .word   0x3ff3c32d,0xc313a8e5
        .word   0xbc9efff8,0x375d29c3
        .word   0x3ff3d0e5,0x44ede173
        .word   0x3c7fe8d0,0x8c284c71
        .word   0x3ff3dea6,0x4c123422
        .word   0x3c8ada09,0x11f09ebc
        .word   0x3ff3ec70,0xdf1c5175
        .word   0xbc8af663,0x7b8c9bca
        .word   0x3ff3fa45,0x04ac801c
        .word   0xbc97d023,0xf956f9f3
        .word   0x3ff40822,0xc367a024
        .word   0x3c8bddf8,0xb6f4d048
        .word   0x3ff4160a,0x21f72e2a
        .word   0xbc5ef369,0x1c309278
        .word   0x3ff423fb,0x2709468a
        .word   0xbc98462d,0xc0b314dd
        .word   0x3ff431f5,0xd950a897
        .word   0xbc81c7dd,0xe35f7998
        .word   0x3ff43ffa,0x3f84b9d4
        .word   0x3c8880be,0x9704c002
        .word   0x3ff44e08,0x6061892d
        .word   0x3c489b7a,0x04ef80d0
        .word   0x3ff45c20,0x42a7d232
        .word   0xbc686419,0x82fb1f8e
        .word   0x3ff46a41,0xed1d0057
        .word   0x3c9c944b,0xd1648a76
        .word   0x3ff4786d,0x668b3237
        .word   0xbc9c20f0,0xed445733
        .word   0x3ff486a2,0xb5c13cd0
        .word   0x3c73c1a3,0xb69062f0
        .word   0x3ff494e1,0xe192aed2
        .word   0xbc83b289,0x5e499ea0
        .word   0x3ff4a32a,0xf0d7d3de
        .word   0x3c99cb62,0xf3d1be56
        .word   0x3ff4b17d,0xea6db7d7
        .word   0xbc8125b8,0x7f2897f0
        .word   0x3ff4bfda,0xd5362a27
        .word   0x3c7d4397,0xafec42e2
        .word   0x3ff4ce41,0xb817c114
        .word   0x3c905e29,0x690abd5d
        .word   0x3ff4dcb2,0x99fddd0d
        .word   0x3c98ecdb,0xbc6a7833
        .word   0x3ff4eb2d,0x81d8abff
        .word   0xbc95257d,0x2e5d7a52
        .word   0x3ff4f9b2,0x769d2ca7
        .word   0xbc94b309,0xd25957e3
        .word   0x3ff50841,0x7f4531ee
        .word   0x3c7a249b,0x49b7465f
        .word   0x3ff516da,0xa2cf6642
        .word   0xbc8f7685,0x69bd93ee
        .word   0x3ff5257d,0xe83f4eef
        .word   0xbc7c998d,0x43efef71
        .word   0x3ff5342b,0x569d4f82
        .word   0xbc807abe,0x1db13cac
        .word   0x3ff542e2,0xf4f6ad27
        .word   0x3c87926d,0x192d5f7e
        .word   0x3ff551a4,0xca5d920f
        .word   0xbc8d689c,0xefede59a
        .word   0x3ff56070,0xdde910d2
        .word   0xbc90fb6e,0x168eebf0
        .word   0x3ff56f47,0x36b527da
        .word   0x3c99bb2c,0x011d93ad
        .word   0x3ff57e27,0xdbe2c4cf
        .word   0xbc90b98c,0x8a57b9c4
        .word   0x3ff58d12,0xd497c7fd
        .word   0x3c8295e1,0x5b9a1de8
        .word   0x3ff59c08,0x27ff07cc
        .word   0xbc97e2ce,0xe467e60f
        .word   0x3ff5ab07,0xdd485429
        .word   0x3c96324c,0x054647ad
        .word   0x3ff5ba11,0xfba87a03
        .word   0xbc9b77a1,0x4c233e1a
        .word   0x3ff5c926,0x8a5946b7
        .word   0x3c3c4b1b,0x816986a2
        .word   0x3ff5d845,0x90998b93
        .word   0xbc9cd6a7,0xa8b45642
        .word   0x3ff5e76f,0x15ad2148
        .word   0x3c9ba6f9,0x3080e65e
        .word   0x3ff5f6a3,0x20dceb71
        .word   0xbc89eadd,0xe3cdcf92
        .word   0x3ff605e1,0xb976dc09
        .word   0xbc93e242,0x9b56de47
        .word   0x3ff6152a,0xe6cdf6f4
        .word   0x3c9e4b3e,0x4ab84c27
        .word   0x3ff6247e,0xb03a5585
        .word   0xbc9383c1,0x7e40b497
        .word   0x3ff633dd,0x1d1929fd
        .word   0x3c984710,0xbeb964e5
        .word   0x3ff64346,0x34ccc320
        .word   0xbc8c483c,0x759d8932
        .word   0x3ff652b9,0xfebc8fb7
        .word   0xbc9ae3d5,0xc9a73e08
        .word   0x3ff66238,0x82552225
        .word   0xbc9bb609,0x87591c34
        .word   0x3ff671c1,0xc70833f6
        .word   0xbc8e8732,0x586c6134
        .word   0x3ff68155,0xd44ca973
        .word   0x3c6038ae,0x44f73e65
        .word   0x3ff690f4,0xb19e9538
        .word   0x3c8804bd,0x9aeb445c
        .word   0x3ff6a09e,0x667f3bcd
        .word   0xbc9bdd34,0x13b26456
        .word   0x3ff6b052,0xfa75173e
        .word   0x3c7a38f5,0x2c9a9d0e
        .word   0x3ff6c012,0x750bdabf
        .word   0xbc728956,0x67ff0b0d
        .word   0x3ff6cfdc,0xddd47645
        .word   0x3c9c7aa9,0xb6f17309
        .word   0x3ff6dfb2,0x3c651a2f
        .word   0xbc6bbe3a,0x683c88ab
        .word   0x3ff6ef92,0x98593ae5
        .word   0xbc90b974,0x9e1ac8b2
        .word   0x3ff6ff7d,0xf9519484
        .word   0xbc883c0f,0x25860ef6
        .word   0x3ff70f74,0x66f42e87
        .word   0x3c59d644,0xd45aa65f
        .word   0x3ff71f75,0xe8ec5f74
        .word   0xbc816e47,0x86887a99
        .word   0x3ff72f82,0x86ead08a
        .word   0xbc920aa0,0x2cd62c72
        .word   0x3ff73f9a,0x48a58174
        .word   0xbc90a8d9,0x6c65d53c
        .word   0x3ff74fbd,0x35d7cbfd
        .word   0x3c9047fd,0x618a6e1c
        .word   0x3ff75feb,0x564267c9
        .word   0xbc902459,0x57316dd3
        .word   0x3ff77024,0xb1ab6e09
        .word   0x3c9b7877,0x169147f8
        .word   0x3ff78069,0x4fde5d3f
        .word   0x3c9866b8,0x0a02162c
        .word   0x3ff790b9,0x38ac1cf6
        .word   0x3c9349a8,0x62aadd3e
        .word   0x3ff7a114,0x73eb0187
        .word   0xbc841577,0xee04992f
        .word   0x3ff7b17b,0x0976cfdb
        .word   0xbc9bebb5,0x8468dc88
        .word   0x3ff7c1ed,0x0130c132
        .word   0x3c9f124c,0xd1164dd6
        .word   0x3ff7d26a,0x62ff86f0
        .word   0x3c91bddb,0xfb72b8b4
        .word   0x3ff7e2f3,0x36cf4e62
        .word   0x3c705d02,0xba15797e
        .word   0x3ff7f387,0x8491c491
        .word   0xbc807f11,0xcf9311ae
        .word   0x3ff80427,0x543e1a12
        .word   0xbc927c86,0x626d972b
        .word   0x3ff814d2,0xadd106d9
        .word   0x3c946437,0x0d151d4d
        .word   0x3ff82589,0x994cce13
        .word   0xbc9d4c1d,0xd41532d8
        .word   0x3ff8364c,0x1eb941f7
        .word   0x3c999b9a,0x31df2bd5
        .word   0x3ff8471a,0x4623c7ad
        .word   0xbc88d684,0xa341cdfb
        .word   0x3ff857f4,0x179f5b21
        .word   0xbc5ba748,0xf8b216d0
        .word   0x3ff868d9,0x9b4492ec
        .word   0x3ca01c83,0xb21584a3
        .word   0x3ff879ca,0xd931a436
        .word   0x3c85d2d7,0xd2db47bc
        .word   0x3ff88ac7,0xd98a6699
        .word   0x3c9994c2,0xf37cb53a
        .word   0x3ff89bd0,0xa478580f
        .word   0x3c9d5395,0x4475202a
        .word   0x3ff8ace5,0x422aa0db
        .word   0x3c96e9f1,0x56864b27
        .word   0x3ff8be05,0xbad61778
        .word   0x3c9ecb5e,0xfc43446e
        .word   0x3ff8cf32,0x16b5448c
        .word   0xbc70d55e,0x32e9e3aa
        .word   0x3ff8e06a,0x5e0866d9
        .word   0xbc97114a,0x6fc9b2e6
        .word   0x3ff8f1ae,0x99157736
        .word   0x3c85cc13,0xa2e3976c
        .word   0x3ff902fe,0xd0282c8a
        .word   0x3c9592ca,0x85fe3fd2
        .word   0x3ff9145b,0x0b91ffc6
        .word   0xbc9dd679,0x2e582524
        .word   0x3ff925c3,0x53aa2fe2
        .word   0xbc83455f,0xa639db7f
        .word   0x3ff93737,0xb0cdc5e5
        .word   0xbc675fc7,0x81b57ebc
        .word   0x3ff948b8,0x2b5f98e5
        .word   0xbc8dc3d6,0x797d2d99
        .word   0x3ff95a44,0xcbc8520f
        .word   0xbc764b7c,0x96a5f039
        .word   0x3ff96bdd,0x9a7670b3
        .word   0xbc5ba596,0x7f19c896
        .word   0x3ff97d82,0x9fde4e50
        .word   0xbc9d185b,0x7c1b85d0
        .word   0x3ff98f33,0xe47a22a2
        .word   0x3c7cabda,0xa24c78ed
        .word   0x3ff9a0f1,0x70ca07ba
        .word   0xbc9173bd,0x91cee632
        .word   0x3ff9b2bb,0x4d53fe0d
        .word   0xbc9dd84e,0x4df6d518
        .word   0x3ff9c491,0x82a3f090
        .word   0x3c7c7c46,0xb071f2be
        .word   0x3ff9d674,0x194bb8d5
        .word   0xbc9516be,0xa3dd8233
        .word   0x3ff9e863,0x19e32323
        .word   0x3c7824ca,0x78e64c6e
        .word   0x3ff9fa5e,0x8d07f29e
        .word   0xbc84a9ce,0xaaf1face
        .word   0x3ffa0c66,0x7b5de565
        .word   0xbc935949,0x5d1cd533
        .word   0x3ffa1e7a,0xed8eb8bb
        .word   0x3c9c6618,0xee8be70e
        .word   0x3ffa309b,0xec4a2d33
        .word   0x3c96305c,0x7ddc36ab
        .word   0x3ffa42c9,0x80460ad8
        .word   0xbc9aa780,0x589fb120
        .word   0x3ffa5503,0xb23e255d
        .word   0xbc9d2f6e,0xdb8d41e1
        .word   0x3ffa674a,0x8af46052
        .word   0x3c650f56,0x30670366
        .word   0x3ffa799e,0x1330b358
        .word   0x3c9bcb7e,0xcac563c6
        .word   0x3ffa8bfe,0x53c12e59
        .word   0xbc94f867,0xb2ba15a8
        .word   0x3ffa9e6b,0x5579fdbf
        .word   0x3c90fac9,0x0ef7fd31
        .word   0x3ffab0e5,0x21356eba
        .word   0x3c889c31,0xdae94544
        .word   0x3ffac36b,0xbfd3f37a
        .word   0xbc8f9234,0xcae76cd0
        .word   0x3ffad5ff,0x3a3c2774
        .word   0x3c97ef3b,0xb6b1b8e4
        .word   0x3ffae89f,0x995ad3ad
        .word   0x3c97a1cd,0x345dcc81
        .word   0x3ffafb4c,0xe622f2ff
        .word   0xbc94b2fc,0x0f315ecc
        .word   0x3ffb0e07,0x298db666
        .word   0xbc9bdef5,0x4c80e425
        .word   0x3ffb20ce,0x6c9a8952
        .word   0x3c94dd02,0x4a0756cc
        .word   0x3ffb33a2,0xb84f15fb
        .word   0xbc62805e,0x3084d708
        .word   0x3ffb4684,0x15b749b1
        .word   0xbc7f763d,0xe9df7c90
        .word   0x3ffb5972,0x8de5593a
        .word   0xbc9c71df,0xbbba6de3
        .word   0x3ffb6c6e,0x29f1c52a
        .word   0x3c92a8f3,0x52883f6e
        .word   0x3ffb7f76,0xf2fb5e47
        .word   0xbc75584f,0x7e54ac3b
        .word   0x3ffb928c,0xf22749e4
        .word   0xbc9b7216,0x54cb65c6
        .word   0x3ffba5b0,0x30a1064a
        .word   0xbc9efcd3,0x0e54292e
        .word   0x3ffbb8e0,0xb79a6f1f
        .word   0xbc3f52d1,0xc9696205
        .word   0x3ffbcc1e,0x904bc1d2
        .word   0x3c823dd0,0x7a2d9e84
        .word   0x3ffbdf69,0xc3f3a207
        .word   0xbc3c2623,0x60ea5b52
        .word   0x3ffbf2c2,0x5bd71e09
        .word   0xbc9efdca,0x3f6b9c73
        .word   0x3ffc0628,0x6141b33d
        .word   0xbc8d8a5a,0xa1fbca34
        .word   0x3ffc199b,0xdd85529c
        .word   0x3c811065,0x895048dd
        .word   0x3ffc2d1c,0xd9fa652c
        .word   0xbc96e516,0x17c8a5d7
        .word   0x3ffc40ab,0x5fffd07a
        .word   0x3c9b4537,0xe083c60a
        .word   0x3ffc5447,0x78fafb22
        .word   0x3c912f07,0x2493b5af
        .word   0x3ffc67f1,0x2e57d14b
        .word   0x3c92884d,0xff483cad
        .word   0x3ffc7ba8,0x8988c933
        .word   0xbc8e76bb,0xbe255559
        .word   0x3ffc8f6d,0x9406e7b5
        .word   0x3c71acbc,0x48805c44
        .word   0x3ffca340,0x5751c4db
        .word   0xbc87f2be,0xd10d08f4
        .word   0x3ffcb720,0xdcef9069
        .word   0x3c7503cb,0xd1e949db
        .word   0x3ffccb0f,0x2e6d1675
        .word   0xbc7d220f,0x86009093
        .word   0x3ffcdf0b,0x555dc3fa
        .word   0xbc8dd83b,0x53829d72
        .word   0x3ffcf315,0x5b5bab74
        .word   0xbc9a08e9,0xb86dff57
        .word   0x3ffd072d,0x4a07897c
        .word   0xbc9cbc37,0x43797a9c
        .word   0x3ffd1b53,0x2b08c968
        .word   0x3c955636,0x219a36ee
        .word   0x3ffd2f87,0x080d89f2
        .word   0xbc9d487b,0x719d8578
        .word   0x3ffd43c8,0xeacaa1d6
        .word   0x3c93db53,0xbf5a1614
        .word   0x3ffd5818,0xdcfba487
        .word   0x3c82ed02,0xd75b3706
        .word   0x3ffd6c76,0xe862e6d3
        .word   0x3c5fe87a,0x4a8165a0
        .word   0x3ffd80e3,0x16c98398
        .word   0xbc911ec1,0x8beddfe8
        .word   0x3ffd955d,0x71ff6075
        .word   0x3c9a052d,0xbb9af6be
        .word   0x3ffda9e6,0x03db3285
        .word   0x3c9c2300,0x696db532
        .word   0x3ffdbe7c,0xd63a8315
        .word   0xbc9b76f1,0x926b8be4
        .word   0x3ffdd321,0xf301b460
        .word   0x3c92da57,0x78f018c2
        .word   0x3ffde7d5,0x641c0658
        .word   0xbc9ca552,0x8e79ba8f
        .word   0x3ffdfc97,0x337b9b5f
        .word   0xbc91a5cd,0x4f184b5c
        .word   0x3ffe1167,0x6b197d17
        .word   0xbc72b529,0xbd5c7f44
        .word   0x3ffe2646,0x14f5a129
        .word   0xbc97b627,0x817a1496
        .word   0x3ffe3b33,0x3b16ee12
        .word   0xbc99f4a4,0x31fdc68a
        .word   0x3ffe502e,0xe78b3ff6
        .word   0x3c839e89,0x80a9cc8f
        .word   0x3ffe6539,0x24676d76
        .word   0xbc863ff8,0x7522b734
        .word   0x3ffe7a51,0xfbc74c83
        .word   0x3c92d522,0xca0c8de2
        .word   0x3ffe8f79,0x77cdb740
        .word   0xbc910894,0x80b054b1
        .word   0x3ffea4af,0xa2a490da
        .word   0xbc9e9c23,0x179c2893
        .word   0x3ffeb9f4,0x867cca6e
        .word   0x3c94832f,0x2293e4f2
        .word   0x3ffecf48,0x2d8e67f1
        .word   0xbc9c93f3,0xb411ad8c
        .word   0x3ffee4aa,0xa2188510
        .word   0x3c91c68d,0xa487568d
        .word   0x3ffefa1b,0xee615a27
        .word   0x3c9dc7f4,0x86a4b6b0
        .word   0x3fff0f9c,0x1cb6412a
        .word   0xbc932200,0x65181d45
        .word   0x3fff252b,0x376bba97
        .word   0x3c93a1a5,0xbf0d8e43
        .word   0x3fff3ac9,0x48dd7274
        .word   0xbc795a5a,0x3ed837de
        .word   0x3fff5076,0x5b6e4540
        .word   0x3c99d3e1,0x2dd8a18b
        .word   0x3fff6632,0x798844f8
        .word   0x3c9fa37b,0x3539343e
        .word   0x3fff7bfd,0xad9cbe14
        .word   0xbc9dbb12,0xd006350a
        .word   0x3fff91d8,0x02243c89
        .word   0xbc612ea8,0xa779f689
        .word   0x3fffa7c1,0x819e90d8
        .word   0x3c874853,0xf3a5931e
        .word   0x3fffbdba,0x3692d514
        .word   0xbc796773,0x15098eb6
        .word   0x3fffd3c2,0x2b8f71f1
        .word   0x3c62eb74,0x966579e7
        .word   0x3fffe9d9,0x6b2a23d9
        .word   0x3c74a603,0x7442fde3

        .align  16
constants:
        .word   0x3ef00000,0x00000000
        .word   0x40862e42,0xfefa39ef
        .word   0x01000000,0x00000000
        .word   0x7f000000,0x00000000
        .word   0x80000000,0x00000000
        .word   0x43f00000,0x00000000 ! scaling 2^12 two96
        .word   0xfff00000,0x00000000
        .word   0x3ff00000,0x00000000
        .word   0x3fdfffff,0xfffffff6
        .word   0x3fc55555,0x721a1d14
        .word   0x3fa55555,0x6e0896af
        .word   0x41371547,0x652b82fe ! scaling 2^12 invln2_256
        .word   0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h
        .word   0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l

        ! base set w/o scaling
        ! .word 0x43300000,0x00000000 ! scaling  two96
        ! .word 0x40771547,0x652b82fe ! scaling  invln2_256
        ! .word 0x3f662e42,0xfee00000 ! scaling  ln2_256h
        ! .word 0x3d6a39ef,0x35793c76 ! scaling  ln2_256l

#define ox3ef           0x0
#define thresh          0x8
#define tiny            0x10
#define huge            0x18
#define signbit         0x20
#define two96           0x28
#define neginf          0x30
#define one             0x38
#define B1OFF           0x40
#define B2OFF           0x48
#define B3OFF           0x50
#define invln2_256      0x58
#define ln2_256h        0x60
#define ln2_256l        0x68

! local storage indices

#define m2              STACK_BIAS-0x4
#define m1              STACK_BIAS-0x8
#define m0              STACK_BIAS-0xc
#define jnk             STACK_BIAS-0x20
! sizeof temp storage - must be a multiple of 16 for V9
#define tmps            0x20

! register use

! i0  n
! i1  x
! i2  stridex
! i3  y
! i4  stridey
! i5  0x80000000

! g1  TBL

! l0  m0
! l1  m1
! l2  m2
! l3  j0,oy0
! l4  j1,oy1
! l5  j2,oy2
! l6  0x3e300000
! l7  0x40862e41

! o0  py0
! o1  py1
! o2  py2
! o3  scratch
! o4  scratch
! o5  0x40874910
! o7  0x7ff00000

! f0  x0
! f2
! f4
! f6
! f8
! f10 x1
! f12
! f14
! f16
! f18
! f20 x2
! f22
! f24
! f26
! f28
! f30
! f32
! f34
! f36 0x3ef0...
! f38 thresh
! f40 tiny
! f42 huge
! f44 signbit
! f46 two96
! f48 neginf
! f50 one
! f52 B1
! f54 B2
! f56 B3
! f58 invln2_256
! f60 ln2_256h
! f62 ln2_256l
#define BOUNDRY %f36
#define THRESH %f38
#define TINY %f40
#define HUGE %f42
#define SIGNBIT %f44
#define TWO96 %f46
#define NEGINF %f48
#define ONE %f50
#define B1 %f52
#define B2 %f54
#define B3 %f56
#define INVLN2_256 %f58
#define LN2_256H %f60
#define LN2_256L %f62

        ENTRY(__vexp)
        save    %sp,-SA(MINFRAME)-tmps,%sp
        PIC_SETUP(l7)
        PIC_SET(l7,constants,o3)
        PIC_SET(l7,TBL,o0)
        mov     %o0,%g1
        wr      %g0,0x82,%asi           ! set %asi for non-faulting loads

        sethi   %hi(0x80000000),%i5
        sethi   %hi(0x3e300000),%l6
        sethi   %hi(0x40862e41),%l7
        or      %l7,%lo(0x40862e41),%l7
        sethi   %hi(0x40874910),%o5
        or      %o5,%lo(0x40874910),%o5
        sethi   %hi(0x7ff00000),%o7
        ldd     [%o3+ox3ef],BOUNDRY
        ldd     [%o3+thresh],THRESH
        ldd     [%o3+tiny],TINY
        ldd     [%o3+huge],HUGE
        ldd     [%o3+signbit],SIGNBIT
        ldd     [%o3+two96],TWO96
        ldd     [%o3+neginf],NEGINF
        ldd     [%o3+one],ONE
        ldd     [%o3+B1OFF],B1
        ldd     [%o3+B2OFF],B2
        ldd     [%o3+B3OFF],B3
        ldd     [%o3+invln2_256],INVLN2_256
        ldd     [%o3+ln2_256h],LN2_256H
        ldd     [%o3+ln2_256l],LN2_256L
        sll     %i2,3,%i2               ! scale strides
        sll     %i4,3,%i4
        add     %fp,jnk,%l3             ! precondition loop
        add     %fp,jnk,%l4
        add     %fp,jnk,%l5
        ld      [%i1],%l0               ! hx = *x
        ld      [%i1],%f0
        ld      [%i1+4],%f1
        andn    %l0,%i5,%l0             ! hx &= ~0x80000000
        ba      .loop0
        add     %i1,%i2,%i1             ! x += stridex

        .align  16
! -- 16 byte aligned
.loop0:
        lda     [%i1]%asi,%l1           ! preload next argument
        sub     %l0,%l6,%o3
        sub     %l7,%l0,%o4
        fand    %f0,SIGNBIT,%f2         ! get sign bit

        lda     [%i1]%asi,%f10
        orcc    %o3,%o4,%g0
        mov     %i3,%o0                 ! py0 = y
        bl,pn   %icc,.range0            ! if hx < 0x3e300000 or > 0x40862e41

! delay slot
        lda     [%i1+4]%asi,%f11
        addcc   %i0,-1,%i0
        add     %i3,%i4,%i3             ! y += stridey
        ble,pn  %icc,.endloop1

! delay slot
        andn    %l1,%i5,%l1
        add     %i1,%i2,%i1             ! x += stridex
        for     %f2,TWO96,%f2           ! used to strip least sig bits
        fmuld   %f0,INVLN2_256,%f4      ! x/ (ln2/256)  , creating k

.loop1:
        lda     [%i1]%asi,%l2           ! preload next argument
        sub     %l1,%l6,%o3
        sub     %l7,%l1,%o4
        fand    %f10,SIGNBIT,%f12

        lda     [%i1]%asi,%f20
        orcc    %o3,%o4,%g0
        mov     %i3,%o1                 ! py1 = y
        bl,pn   %icc,.range1            ! if hx < 0x3e300000 or > 0x40862e41

! delay slot
        lda     [%i1+4]%asi,%f21
        addcc   %i0,-1,%i0
        add     %i3,%i4,%i3             ! y += stridey
        ble,pn  %icc,.endloop2

! delay slot
        andn    %l2,%i5,%l2
        add     %i1,%i2,%i1             ! x += stridex
        for     %f12,TWO96,%f12
        fmuld   %f10,INVLN2_256,%f14

.loop2:
        sub     %l2,%l6,%o3
        sub     %l7,%l2,%o4
        fand    %f20,SIGNBIT,%f22
        fmuld   %f20,INVLN2_256,%f24            ! okay to put this here; for alignment

        orcc    %o3,%o4,%g0
        bl,pn   %icc,.range2            ! if hx < 0x3e300000 or > 0x40862e41
! delay slot
        for     %f22,TWO96,%f22
        faddd   %f4,%f2,%f4             ! creating k+j/256, sra to zero bits

.cont:
        faddd   %f14,%f12,%f14
        mov     %i3,%o2                 ! py2 = y

        faddd   %f24,%f22,%f24
        add     %i3,%i4,%i3             ! y += stridey

        ! BUBBLE USIII

        fsubd   %f4,%f2,%f8             ! creating k+j/256: sll
        st      %f6,[%l3]               ! store previous loop x0

        fsubd   %f14,%f12,%f18
        st      %f7,[%l3+4]             ! store previous loop x0

        fsubd   %f24,%f22,%f28
        st      %f16,[%l4]

        ! BUBBLE USIII

        fmuld   %f8,LN2_256H,%f2        ! closest LN2_256 to x
        st      %f17,[%l4+4]

        fmuld   %f18,LN2_256H,%f12
        st      %f26,[%l5]

        fmuld   %f28,LN2_256H,%f22
        st      %f27,[%l5+4]

        ! BUBBLE USIII

        fsubd   %f0,%f2,%f0             ! r = x - p*LN2_256H
        fmuld   %f8,LN2_256L,%f4        ! closest LN2_256 to x , added prec

        fsubd   %f10,%f12,%f10
        fmuld   %f18,LN2_256L,%f14

        fsubd   %f20,%f22,%f20
        fmuld   %f28,LN2_256L,%f24

        ! BUBBLE USIII

        fsubd   %f0,%f4,%f0             ! r -= p*LN2_256L

        fsubd   %f10,%f14,%f10

        fsubd   %f20,%f24,%f20

!!!!!!!!!!!!!!!!!!! New polynomial reorder starts here

        ! Alternate polynomial grouping allowing non-sequential calc of p
        ! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) )
        ! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ]
        !
        ! let               SLi        Ri           SRi         be accumulators

        fmuld   %f0,B3,%f2      ! SR1 = r1 * B3
        fdtoi   %f8,%f8                         ! convert k+j/256 to int
        st      %f8,[%fp+m0]                    ! store k, to shift return/use

        fmuld   %f10,B3,%f12    ! SR2 = r2 * B3
        fdtoi   %f18,%f18                       ! convert k+j/256 to int
        st      %f18,[%fp+m1]                   ! store k, to shift return/use

        fmuld   %f20,B3,%f22    ! SR3 = r3 * B3
        fdtoi   %f28,%f28                       ! convert k+j/256 to int
        st      %f28,[%fp+m2]                   ! store k, to shift return/use

        fmuld   %f0,%f0,%f4     ! R1 = r1 * r1

        fmuld   %f10,%f10,%f14  ! R2 = r2 * r2
        faddd   %f2,B2,%f2      ! SR1 += B2

        fmuld   %f20,%f20,%f24  ! R3 = r3 * r3
        faddd   %f12,B2,%f12    ! SR2 += B2

        faddd   %f22,B2,%f22    ! SR3 += B2
        fmuld   %f0,B1,%f6      ! SL1 = r1 * B1

        fmuld   %f10,B1,%f32    ! SL2 = r2 * B1
        fand    %f8,NEGINF,%f8
        ! best here for RAW BYPASS
        ld      [%fp+m0],%l0                    ! get nonshifted k into intreg

        fmuld   %f20,B1,%f34    ! SL3 = r3 * B1
        fand    %f18,NEGINF,%f18
        ld      [%fp+m1],%l1                    ! get nonshifted k into intreg

        fmuld   %f4,%f2,%f4     ! R1 = R1 * SR1
        fand    %f28,NEGINF,%f28
        ld      [%fp+m2],%l2                    ! get nonshifted k into intreg

        fmuld   %f14,%f12,%f14  ! R2 = R2 * SR2
        faddd   %f6,ONE,%f6     ! SL1 += 1

        fmuld   %f24,%f22,%f24  ! R3 = R3 * SR3
        faddd   %f32,ONE,%f32   ! SL2 += 1
        sra     %l0,8,%l3                       ! shift k tobe offset 256-8byte

        faddd   %f34,ONE,%f34   ! SL3 += 1
        sra     %l1,8,%l4                       ! shift k tobe offset 256-8byte
        sra     %l2,8,%l5                       ! shift k tobe offset 256-8byte

        ! BUBBLE in USIII
        and     %l3,0xff0,%l3
        and     %l4,0xff0,%l4



        faddd   %f6,%f4,%f6     ! R1 = SL1 + R1
        ldd     [%g1+%l3],%f4                   ! tbl[j]
        add     %l3,8,%l3                       ! inc j
        and     %l5,0xff0,%l5


        faddd   %f32,%f14,%f32  ! R2 = SL2 + R2
        ldd     [%g1+%l4],%f14                  ! tbl[j]
        add     %l4,8,%l4                       ! inc j
        sra     %l0,20,%o3

        faddd   %f34,%f24,%f34  ! R3 = SL3 + R3
        ldd     [%g1+%l5],%f24                  ! tbl[j]
        add     %l5,8,%l5                       ! inc j
        sra     %l1,20,%l1

        ! BUBBLE in USIII
        ldd     [%g1+%l4],%f16          ! tbl[j+1]
        add     %o3,1021,%o3            ! inc j

        fmuld   %f0,%f6,%f0     ! p1 = r1 * R1
        ldd     [%g1+%l3],%f6           ! tbl[j+1]
        add     %l1,1021,%l1            ! inc j
        sra     %l2,20,%l2

        fmuld   %f10,%f32,%f10  ! p2 = r2 * R2
        ldd     [%g1+%l5],%f26          ! tbl[j+1]
        add     %l2,1021,%l2            ! inc j

        fmuld   %f20,%f34,%f20  ! p3 = r3 * R3





!!!!!!!!!!!!!!!!!!! poly-reorder - ends here

        fmuld   %f0,%f4,%f0             ! start exp(x) = exp(r) * tbl[j]
        mov     %o0,%l3

        fmuld   %f10,%f14,%f10
        mov     %o1,%l4

        fmuld   %f20,%f24,%f20
        mov     %o2,%l5

        faddd   %f0,%f6,%f6             ! cont exp(x) : apply tbl[j] high bits
        lda     [%i1]%asi,%l0           ! preload next argument

        faddd   %f10,%f16,%f16
        lda     [%i1]%asi,%f0

        faddd   %f20,%f26,%f26
        lda     [%i1+4]%asi,%f1

        faddd   %f6,%f4,%f6             ! cont exp(x) : apply tbl[j+1] low bits
        add     %i1,%i2,%i1             ! x += stridex

        faddd   %f16,%f14,%f16
        andn    %l0,%i5,%l0
        or      %o3,%l1,%o4

! -- 16 byte aligned
        orcc    %o4,%l2,%o4
        bl,pn   %icc,.small
! delay slot
        faddd   %f26,%f24,%f26

        fpadd32 %f6,%f8,%f6             ! done exp(x) : apply 2^k
        fpadd32 %f16,%f18,%f16


        addcc   %i0,-1,%i0
        bg,pn   %icc,.loop0
! delay slot
        fpadd32 %f26,%f28,%f26

        ba,pt   %icc,.endloop0
! delay slot
        nop


        .align  16
.small:
        tst     %o3
        bge,pt  %icc,1f
! delay slot
        fpadd32 %f6,%f8,%f6
        fpadd32 %f6,BOUNDRY,%f6
        fmuld   %f6,TINY,%f6
1:
        tst     %l1
        bge,pt  %icc,1f
! delay slot
        fpadd32 %f16,%f18,%f16
        fpadd32 %f16,BOUNDRY,%f16
        fmuld   %f16,TINY,%f16
1:
        tst     %l2
        bge,pt  %icc,1f
! delay slot
        fpadd32 %f26,%f28,%f26
        fpadd32 %f26,BOUNDRY,%f26
        fmuld   %f26,TINY,%f26
1:
        addcc   %i0,-1,%i0
        bg,pn   %icc,.loop0
! delay slot
        nop
        ba,pt   %icc,.endloop0
! delay slot
        nop


.endloop2:
        for     %f12,TWO96,%f12
        fmuld   %f10,INVLN2_256,%f14
        faddd   %f14,%f12,%f14
        fsubd   %f14,%f12,%f18
        fmuld   %f18,LN2_256H,%f12
        fsubd   %f10,%f12,%f10
        fmuld   %f18,LN2_256L,%f14
        fsubd   %f10,%f14,%f10
        fmuld   %f10,B3,%f12
        fdtoi   %f18,%f18
        st      %f18,[%fp+m1]
        fmuld   %f10,%f10,%f14
        faddd   %f12,B2,%f12
        fmuld   %f10,B1,%f32
        fand    %f18,NEGINF,%f18
        ld      [%fp+m1],%l1
        fmuld   %f14,%f12,%f14
        faddd   %f32,ONE,%f32
        sra     %l1,8,%o4
        and     %o4,0xff0,%o4
        faddd   %f32,%f14,%f32
        ldd     [%g1+%o4],%f14
        add     %o4,8,%o4
        sra     %l1,20,%l1
        ldd     [%g1+%o4],%f30
        addcc   %l1,1021,%l1
        fmuld   %f10,%f32,%f10
        fmuld   %f10,%f14,%f10
        faddd   %f10,%f30,%f30
        faddd   %f30,%f14,%f30
        bge,pt  %icc,1f
! delay slot
        fpadd32 %f30,%f18,%f30
        fpadd32 %f30,BOUNDRY,%f30
        fmuld   %f30,TINY,%f30
1:
        st      %f30,[%o1]
        st      %f31,[%o1+4]

.endloop1:
        for     %f2,TWO96,%f2
        fmuld   %f0,INVLN2_256,%f4
        faddd   %f4,%f2,%f4
        fsubd   %f4,%f2,%f8
        fmuld   %f8,LN2_256H,%f2
        fsubd   %f0,%f2,%f0
        fmuld   %f8,LN2_256L,%f4
        fsubd   %f0,%f4,%f0
        fmuld   %f0,B3,%f2
        fdtoi   %f8,%f8
        st      %f8,[%fp+m0]
        fmuld   %f0,%f0,%f4
        faddd   %f2,B2,%f2
        fmuld   %f0,B1,%f32
        fand    %f8,NEGINF,%f8
        ld      [%fp+m0],%l0
        fmuld   %f4,%f2,%f4
        faddd   %f32,ONE,%f32
        sra     %l0,8,%o4
        and     %o4,0xff0,%o4
        faddd   %f32,%f4,%f32
        ldd     [%g1+%o4],%f4
        add     %o4,8,%o4
        sra     %l0,20,%o3
        ldd     [%g1+%o4],%f30
        addcc   %o3,1021,%o3
        fmuld   %f0,%f32,%f0
        fmuld   %f0,%f4,%f0
        faddd   %f0,%f30,%f30
        faddd   %f30,%f4,%f30
        bge,pt  %icc,1f
! delay slot
        fpadd32 %f30,%f8,%f30
        fpadd32 %f30,BOUNDRY,%f30
        fmuld   %f30,TINY,%f30
1:
        st      %f30,[%o0]
        st      %f31,[%o0+4]

.endloop0:
        st      %f6,[%l3]
        st      %f7,[%l3+4]
        st      %f16,[%l4]
        st      %f17,[%l4+4]
        st      %f26,[%l5]
        st      %f27,[%l5+4]
        ret
        restore


.range0:
        cmp     %l0,%l6
        bl,a,pt %icc,3f                 ! if x is tiny
! delay slot, annulled if branch not taken
        faddd   %f0,ONE,%f4

        cmp     %l0,%o5
        bg,pt   %icc,1f                 ! if x is huge, inf, nan
! delay slot
        nop

        fcmpd   %fcc0,%f0,THRESH
        fbg,a,pt %fcc0,3f               ! if x is huge and positive
! delay slot, annulled if branch not taken
        fmuld   HUGE,HUGE,%f4

! x is near the extremes but within range; return to the loop
        addcc   %i0,-1,%i0
        add     %i3,%i4,%i3             ! y += stridey
        ble,pn  %icc,.endloop1
! delay slot
        andn    %l1,%i5,%l1
        add     %i1,%i2,%i1             ! x += stridex
        for     %f2,TWO96,%f2
        ba,pt   %icc,.loop1
! delay slot
        fmuld   %f0,INVLN2_256,%f4

1:
        cmp     %l0,%o7
        bl,pn   %icc,2f                 ! if x is finite
! delay slot
        nop
        fzero   %f4
        fcmpd   %fcc0,%f0,NEGINF
        fmovdne %fcc0,%f0,%f4
        ba,pt   %icc,3f
        fmuld   %f4,%f4,%f4             ! x*x or zero*zero
2:
        fmovd   HUGE,%f4
        fcmpd   %fcc0,%f0,ONE
        fmovdl  %fcc0,TINY,%f4
        fmuld   %f4,%f4,%f4             ! huge*huge or tiny*tiny
3:
        st      %f4,[%o0]
        andn    %l1,%i5,%l0
        add     %i1,%i2,%i1             ! x += stridex
        fmovd   %f10,%f0
        st      %f5,[%o0+4]
        addcc   %i0,-1,%i0
        bg,pt   %icc,.loop0
! delay slot
        add     %i3,%i4,%i3             ! y += stridey
        ba,pt   %icc,.endloop0
! delay slot
        nop


.range1:
        cmp     %l1,%l6
        bl,a,pt %icc,3f                 ! if x is tiny
! delay slot, annulled if branch not taken
        faddd   %f10,ONE,%f14

        cmp     %l1,%o5
        bg,pt   %icc,1f                 ! if x is huge, inf, nan
! delay slot
        nop

        fcmpd   %fcc0,%f10,THRESH
        fbg,a,pt %fcc0,3f               ! if x is huge and positive
! delay slot, annulled if branch not taken
        fmuld   HUGE,HUGE,%f14

! x is near the extremes but within range; return to the loop
        addcc   %i0,-1,%i0
        add     %i3,%i4,%i3             ! y += stridey
        ble,pn  %icc,.endloop2
! delay slot
        andn    %l2,%i5,%l2
        add     %i1,%i2,%i1             ! x += stridex
        for     %f12,TWO96,%f12
        ba,pt   %icc,.loop2
! delay slot
        fmuld   %f10,INVLN2_256,%f14

1:
        cmp     %l1,%o7
        bl,pn   %icc,2f                 ! if x is finite
! delay slot
        nop
        fzero   %f14
        fcmpd   %fcc0,%f10,NEGINF
        fmovdne %fcc0,%f10,%f14
        ba,pt   %icc,3f
        fmuld   %f14,%f14,%f14          ! x*x or zero*zero
2:
        fmovd   HUGE,%f14
        fcmpd   %fcc0,%f10,ONE
        fmovdl  %fcc0,TINY,%f14
        fmuld   %f14,%f14,%f14          ! huge*huge or tiny*tiny
3:
        st      %f14,[%o1]
        andn    %l2,%i5,%l1
        add     %i1,%i2,%i1             ! x += stridex
        fmovd   %f20,%f10
        st      %f15,[%o1+4]
        addcc   %i0,-1,%i0
        bg,pt   %icc,.loop1
! delay slot
        add     %i3,%i4,%i3             ! y += stridey
        ba,pt   %icc,.endloop1
! delay slot
        nop


.range2:
        cmp     %l2,%l6
        bl,a,pt %icc,3f                 ! if x is tiny
! delay slot, annulled if branch not taken
        faddd   %f20,ONE,%f24

        cmp     %l2,%o5
        bg,pt   %icc,1f                 ! if x is huge, inf, nan
! delay slot
        nop

        fcmpd   %fcc0,%f20,THRESH
        fbg,a,pt %fcc0,3f               ! if x is huge and positive
! delay slot, annulled if branch not taken
        fmuld   HUGE,HUGE,%f24

! x is near the extremes but within range; return to the loop
        ba,pt   %icc,.cont
! delay slot
        faddd   %f4,%f2,%f4

1:
        cmp     %l2,%o7
        bl,pn   %icc,2f                 ! if x is finite
! delay slot
        nop
        fzero   %f24
        fcmpd   %fcc0,%f20,NEGINF
        fmovdne %fcc0,%f20,%f24
        ba,pt   %icc,3f
        fmuld   %f24,%f24,%f24          ! x*x or zero*zero
2:
        fmovd   HUGE,%f24
        fcmpd   %fcc0,%f20,ONE
        fmovdl  %fcc0,TINY,%f24
        fmuld   %f24,%f24,%f24          ! huge*huge or tiny*tiny
3:
        st      %f24,[%i3]
        st      %f25,[%i3+4]
        lda     [%i1]%asi,%l2           ! preload next argument
        lda     [%i1]%asi,%f20
        lda     [%i1+4]%asi,%f21
        andn    %l2,%i5,%l2
        add     %i1,%i2,%i1             ! x += stridex
        addcc   %i0,-1,%i0
        bg,pt   %icc,.loop2
! delay slot
        add     %i3,%i4,%i3             ! y += stridey
        ba,pt   %icc,.endloop2
! delay slot
        nop

        SET_SIZE(__vexp)