root/lib/math/div64.c
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
 *
 * Based on former do_div() implementation from asm-parisc/div64.h:
 *      Copyright (C) 1999 Hewlett-Packard Co
 *      Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
 *
 *
 * Generic C version of 64bit/32bit division and modulo, with
 * 64bit result and 32bit remainder.
 *
 * The fast case for (n>>32 == 0) is handled inline by do_div().
 *
 * Code generated for this function might be very inefficient
 * for some CPUs. __div64_32() can be overridden by linking arch-specific
 * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
 * or by defining a preprocessor macro in arch/include/asm/div64.h.
 */

#include <linux/bitops.h>
#include <linux/export.h>
#include <linux/math.h>
#include <linux/math64.h>
#include <linux/minmax.h>
#include <linux/log2.h>

/* Not needed on 64bit architectures */
#if BITS_PER_LONG == 32

#ifndef __div64_32
uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
{
        uint64_t rem = *n;
        uint64_t b = base;
        uint64_t res, d = 1;
        uint32_t high = rem >> 32;

        /* Reduce the thing a bit first */
        res = 0;
        if (high >= base) {
                high /= base;
                res = (uint64_t) high << 32;
                rem -= (uint64_t) (high*base) << 32;
        }

        while ((int64_t)b > 0 && b < rem) {
                b = b+b;
                d = d+d;
        }

        do {
                if (rem >= b) {
                        rem -= b;
                        res += d;
                }
                b >>= 1;
                d >>= 1;
        } while (d);

        *n = res;
        return rem;
}
EXPORT_SYMBOL(__div64_32);
#endif

#ifndef div_s64_rem
s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
{
        u64 quotient;

        if (dividend < 0) {
                quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
                *remainder = -*remainder;
                if (divisor > 0)
                        quotient = -quotient;
        } else {
                quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
                if (divisor < 0)
                        quotient = -quotient;
        }
        return quotient;
}
EXPORT_SYMBOL(div_s64_rem);
#endif

/*
 * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
 * @dividend:   64bit dividend
 * @divisor:    64bit divisor
 * @remainder:  64bit remainder
 *
 * This implementation is a comparable to algorithm used by div64_u64.
 * But this operation, which includes math for calculating the remainder,
 * is kept distinct to avoid slowing down the div64_u64 operation on 32bit
 * systems.
 */
#ifndef div64_u64_rem
u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
{
        u32 high = divisor >> 32;
        u64 quot;

        if (high == 0) {
                u32 rem32;
                quot = div_u64_rem(dividend, divisor, &rem32);
                *remainder = rem32;
        } else {
                int n = fls(high);
                quot = div_u64(dividend >> n, divisor >> n);

                if (quot != 0)
                        quot--;

                *remainder = dividend - quot * divisor;
                if (*remainder >= divisor) {
                        quot++;
                        *remainder -= divisor;
                }
        }

        return quot;
}
EXPORT_SYMBOL(div64_u64_rem);
#endif

/*
 * div64_u64 - unsigned 64bit divide with 64bit divisor
 * @dividend:   64bit dividend
 * @divisor:    64bit divisor
 *
 * This implementation is a modified version of the algorithm proposed
 * by the book 'Hacker's Delight'.  The original source and full proof
 * can be found here and is available for use without restriction.
 *
 * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
 */
#ifndef div64_u64
u64 div64_u64(u64 dividend, u64 divisor)
{
        u32 high = divisor >> 32;
        u64 quot;

        if (high == 0) {
                quot = div_u64(dividend, divisor);
        } else {
                int n = fls(high);
                quot = div_u64(dividend >> n, divisor >> n);

                if (quot != 0)
                        quot--;
                if ((dividend - quot * divisor) >= divisor)
                        quot++;
        }

        return quot;
}
EXPORT_SYMBOL(div64_u64);
#endif

#ifndef div64_s64
s64 div64_s64(s64 dividend, s64 divisor)
{
        s64 quot, t;

        quot = div64_u64(abs(dividend), abs(divisor));
        t = (dividend ^ divisor) >> 63;

        return (quot ^ t) - t;
}
EXPORT_SYMBOL(div64_s64);
#endif

#endif /* BITS_PER_LONG == 32 */

/*
 * Iterative div/mod for use when dividend is not expected to be much
 * bigger than divisor.
 */
#ifndef iter_div_u64_rem
u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
{
        return __iter_div_u64_rem(dividend, divisor, remainder);
}
EXPORT_SYMBOL(iter_div_u64_rem);
#endif

#if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64)

#define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c)

#if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64)
static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
{
        /* native 64x64=128 bits multiplication */
        u128 prod = (u128)a * b + c;

        *p_lo = prod;
        return prod >> 64;
}
#else
static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
{
        /* perform a 64x64=128 bits multiplication in 32bit chunks */
        u64 x, y, z;

        /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
        x = mul_add(a, b, c);
        y = mul_add(a, b >> 32, c >> 32);
        y = add_u64_u32(y, x >> 32);
        z = mul_add(a >> 32, b >> 32, y >> 32);
        y = mul_add(a >> 32, b, y);
        *p_lo = (y << 32) + (u32)x;
        return add_u64_u32(z, y >> 32);
}
#endif

#ifndef BITS_PER_ITER
#define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16)
#endif

#if BITS_PER_ITER == 32
#define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c)
#define add_u64_long(a, b) ((a) + (b))
#else
#undef BITS_PER_ITER
#define BITS_PER_ITER 16
static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c)
{
        u64 n_lo = mul_add(a, b, c);
        u64 n_med = mul_add(a >> 32, b, c >> 32);

        n_med = add_u64_u32(n_med, n_lo >> 32);
        *p_lo = n_med << 32 | (u32)n_lo;
        return n_med >> 32;
}

#define add_u64_long(a, b) add_u64_u32(a, b)
#endif

u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
{
        unsigned long d_msig, q_digit;
        unsigned int reps, d_z_hi;
        u64 quotient, n_lo, n_hi;
        u32 overflow;

        n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c);

        if (!n_hi)
                return div64_u64(n_lo, d);

        if (unlikely(n_hi >= d)) {
                /* trigger runtime exception if divisor is zero */
                if (d == 0) {
                        unsigned long zero = 0;

                        OPTIMIZER_HIDE_VAR(zero);
                        return ~0UL/zero;
                }
                /* overflow: result is unrepresentable in a u64 */
                return ~0ULL;
        }

        /* Left align the divisor, shifting the dividend to match */
        d_z_hi = __builtin_clzll(d);
        if (d_z_hi) {
                d <<= d_z_hi;
                n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi);
                n_lo <<= d_z_hi;
        }

        reps = 64 / BITS_PER_ITER;
        /* Optimise loop count for small dividends */
        if (!(u32)(n_hi >> 32)) {
                reps -= 32 / BITS_PER_ITER;
                n_hi = n_hi << 32 | n_lo >> 32;
                n_lo <<= 32;
        }
#if BITS_PER_ITER == 16
        if (!(u32)(n_hi >> 48)) {
                reps--;
                n_hi = add_u64_u32(n_hi << 16, n_lo >> 48);
                n_lo <<= 16;
        }
#endif

        /* Invert the dividend so we can use add instead of subtract. */
        n_lo = ~n_lo;
        n_hi = ~n_hi;

        /*
         * Get the most significant BITS_PER_ITER bits of the divisor.
         * This is used to get a low 'guestimate' of the quotient digit.
         */
        d_msig = (d >> (64 - BITS_PER_ITER)) + 1;

        /*
         * Now do a 'long division' with BITS_PER_ITER bit 'digits'.
         * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits.
         * The worst case is dividing ~0 by 0x8000 which requires two subtracts.
         */
        quotient = 0;
        while (reps--) {
                q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig;
                /* Shift 'n' left to align with the product q_digit * d */
                overflow = n_hi >> (64 - BITS_PER_ITER);
                n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER));
                n_lo <<= BITS_PER_ITER;
                /* Add product to negated divisor */
                overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi);
                /* Adjust for the q_digit 'guestimate' being low */
                while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) {
                        q_digit++;
                        n_hi += d;
                        overflow += n_hi < d;
                }
                quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit);
        }

        /*
         * The above only ensures the remainder doesn't overflow,
         * it can still be possible to add (aka subtract) another copy
         * of the divisor.
         */
        if ((n_hi + d) > n_hi)
                quotient++;
        return quotient;
}
#if !defined(test_mul_u64_add_u64_div_u64)
EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
#endif
#endif