root/sys/dev/syscons/plasma/fp16.c
/*-
 * Copyright (c) 2015 Dag-Erling Smørgrav
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifdef _KERNEL
#include <sys/libkern.h>
#else
#include <stdio.h>
#include <strings.h>
#endif

#include "fp16.h"

/*
 * Compute the quare root of x, using Newton's method with 2^(log2(x)/2)
 * as the initial estimate.
 */
fp16_t
fp16_sqrt(fp16_t x)
{
        fp16_t y, delta;
        signed int log2x;

        /* special case */
        if (x == 0)
                return (0);

        /* shift toward 0 by half the logarithm */
        log2x = flsl(x) - 1;
        if (log2x >= 16) {
                y = x >> (log2x - 16) / 2;
        } else {
#if 0
                y = x << (16 - log2x) / 2;
#else
                /* XXX for now, return 0 for anything < 1 */
                return (0);
#endif
        }
        while (y > 0) {
                /* delta = y^2 / 2y */
                delta = fp16_div(fp16_sub(fp16_mul(y, y), x), y * 2);
                if (delta == 0)
                        break;
                y = fp16_sub(y, delta);
        }
        return (y);
}

static fp16_t fp16_sin_table[256] = {
            0,    402,    804,   1206,   1608,   2010,   2412,   2814,
         3215,   3617,   4018,   4420,   4821,   5222,   5622,   6023,
         6423,   6823,   7223,   7623,   8022,   8421,   8819,   9218,
         9616,  10013,  10410,  10807,  11204,  11600,  11995,  12390,
        12785,  13179,  13573,  13966,  14359,  14751,  15142,  15533,
        15923,  16313,  16702,  17091,  17479,  17866,  18253,  18638,
        19024,  19408,  19792,  20175,  20557,  20938,  21319,  21699,
        22078,  22456,  22833,  23210,  23586,  23960,  24334,  24707,
        25079,  25450,  25820,  26189,  26557,  26925,  27291,  27656,
        28020,  28383,  28745,  29105,  29465,  29824,  30181,  30538,
        30893,  31247,  31600,  31952,  32302,  32651,  32999,  33346,
        33692,  34036,  34379,  34721,  35061,  35400,  35738,  36074,
        36409,  36743,  37075,  37406,  37736,  38064,  38390,  38716,
        39039,  39362,  39682,  40002,  40319,  40636,  40950,  41263,
        41575,  41885,  42194,  42501,  42806,  43110,  43412,  43712,
        44011,  44308,  44603,  44897,  45189,  45480,  45768,  46055,
        46340,  46624,  46906,  47186,  47464,  47740,  48015,  48288,
        48558,  48828,  49095,  49360,  49624,  49886,  50146,  50403,
        50660,  50914,  51166,  51416,  51665,  51911,  52155,  52398,
        52639,  52877,  53114,  53348,  53581,  53811,  54040,  54266,
        54491,  54713,  54933,  55152,  55368,  55582,  55794,  56004,
        56212,  56417,  56621,  56822,  57022,  57219,  57414,  57606,
        57797,  57986,  58172,  58356,  58538,  58718,  58895,  59070,
        59243,  59414,  59583,  59749,  59913,  60075,  60235,  60392,
        60547,  60700,  60850,  60998,  61144,  61288,  61429,  61568,
        61705,  61839,  61971,  62100,  62228,  62353,  62475,  62596,
        62714,  62829,  62942,  63053,  63162,  63268,  63371,  63473,
        63571,  63668,  63762,  63854,  63943,  64030,  64115,  64197,
        64276,  64353,  64428,  64501,  64571,  64638,  64703,  64766,
        64826,  64884,  64939,  64992,  65043,  65091,  65136,  65179,
        65220,  65258,  65294,  65327,  65358,  65386,  65412,  65436,
        65457,  65475,  65491,  65505,  65516,  65524,  65531,  65534,
};

/*
 * Compute the sine of theta.
 */
fp16_t
fp16_sin(fp16_t theta)
{
        unsigned int i;

        i = 1024 * (theta % FP16_2PI) / FP16_2PI;
        switch (i / 256) {
        case 0:
                return (fp16_sin_table[i % 256]);
        case 1:
                return (fp16_sin_table[255 - i % 256]);
        case 2:
                return (-fp16_sin_table[i % 256]);
        case 3:
                return (-fp16_sin_table[255 - i % 256]);
        default:
                /* inconceivable! */
                return (0);
        }
}

/*
 * Compute the cosine of theta.
 */
fp16_t
fp16_cos(fp16_t theta)
{
        unsigned int i;

        i = 1024 * (theta % FP16_2PI) / FP16_2PI;
        switch (i / 256) {
        case 0:
                return (fp16_sin_table[255 - i % 256]);
        case 1:
                return (-fp16_sin_table[i % 256]);
        case 2:
                return (-fp16_sin_table[255 - i % 256]);
        case 3:
                return (fp16_sin_table[i % 256]);
        default:
                /* inconceivable! */
                return (0);
        }
}