root/arch/arm/lib/div64.S
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 *  linux/arch/arm/lib/div64.S
 *
 *  Optimized computation of 64-bit dividend / 32-bit divisor
 *
 *  Author:     Nicolas Pitre
 *  Created:    Oct 5, 2003
 *  Copyright:  Monta Vista Software, Inc.
 */

#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/unwind.h>

#ifdef __ARMEB__
#define xh r0
#define xl r1
#define yh r2
#define yl r3
#else
#define xl r0
#define xh r1
#define yl r2
#define yh r3
#endif

/*
 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
 *
 * Note: Calling convention is totally non standard for optimal code.
 *       This is meant to be used by do_div() from include/asm/div64.h only.
 *
 * Input parameters:
 *      xh-xl   = dividend (clobbered)
 *      r4      = divisor (preserved)
 *
 * Output values:
 *      yh-yl   = result
 *      xh      = remainder
 *
 * Clobbered regs: xl, ip
 */

ENTRY(__do_div64)
UNWIND(.fnstart)

        @ Test for easy paths first.
        subs    ip, r4, #1
        bls     9f                      @ divisor is 0 or 1
        tst     ip, r4
        beq     8f                      @ divisor is power of 2

        @ See if we need to handle upper 32-bit result.
        cmp     xh, r4
        mov     yh, #0
        blo     3f

        @ Align divisor with upper part of dividend.
        @ The aligned divisor is stored in yl preserving the original.
        @ The bit position is stored in ip.

#if __LINUX_ARM_ARCH__ >= 5

        clz     yl, r4
        clz     ip, xh
        sub     yl, yl, ip
        mov     ip, #1
        mov     ip, ip, lsl yl
        mov     yl, r4, lsl yl

#else

        mov     yl, r4
        mov     ip, #1
1:      cmp     yl, #0x80000000
        cmpcc   yl, xh
        movcc   yl, yl, lsl #1
        movcc   ip, ip, lsl #1
        bcc     1b

#endif

        @ The division loop for needed upper bit positions.
        @ Break out early if dividend reaches 0.
2:      cmp     xh, yl
        orrcs   yh, yh, ip
        subscs  xh, xh, yl
        movsne  ip, ip, lsr #1
        mov     yl, yl, lsr #1
        bne     2b

        @ See if we need to handle lower 32-bit result.
3:      cmp     xh, #0
        mov     yl, #0
        cmpeq   xl, r4
        movlo   xh, xl
        retlo   lr

        @ The division loop for lower bit positions.
        @ Here we shift remainer bits leftwards rather than moving the
        @ divisor for comparisons, considering the carry-out bit as well.
        mov     ip, #0x80000000
4:      movs    xl, xl, lsl #1
        adcs    xh, xh, xh
        beq     6f
        cmpcc   xh, r4
5:      orrcs   yl, yl, ip
        subcs   xh, xh, r4
        movs    ip, ip, lsr #1
        bne     4b
        ret     lr

        @ The top part of remainder became zero.  If carry is set
        @ (the 33th bit) this is a false positive so resume the loop.
        @ Otherwise, if lower part is also null then we are done.
6:      bcs     5b
        cmp     xl, #0
        reteq   lr

        @ We still have remainer bits in the low part.  Bring them up.

#if __LINUX_ARM_ARCH__ >= 5

        clz     xh, xl                  @ we know xh is zero here so...
        add     xh, xh, #1
        mov     xl, xl, lsl xh
        mov     ip, ip, lsr xh

#else

7:      movs    xl, xl, lsl #1
        mov     ip, ip, lsr #1
        bcc     7b

#endif

        @ Current remainder is now 1.  It is worthless to compare with
        @ divisor at this point since divisor can not be smaller than 3 here.
        @ If possible, branch for another shift in the division loop.
        @ If no bit position left then we are done.
        movs    ip, ip, lsr #1
        mov     xh, #1
        bne     4b
        ret     lr

8:      @ Division by a power of 2: determine what that divisor order is
        @ then simply shift values around

#if __LINUX_ARM_ARCH__ >= 5

        clz     ip, r4
        rsb     ip, ip, #31

#else

        mov     yl, r4
        cmp     r4, #(1 << 16)
        mov     ip, #0
        movhs   yl, yl, lsr #16
        movhs   ip, #16

        cmp     yl, #(1 << 8)
        movhs   yl, yl, lsr #8
        addhs   ip, ip, #8

        cmp     yl, #(1 << 4)
        movhs   yl, yl, lsr #4
        addhs   ip, ip, #4

        cmp     yl, #(1 << 2)
        addhi   ip, ip, #3
        addls   ip, ip, yl, lsr #1

#endif

        mov     yh, xh, lsr ip
        mov     yl, xl, lsr ip
        rsb     ip, ip, #32
 ARM(   orr     yl, yl, xh, lsl ip      )
 THUMB( lsl     xh, xh, ip              )
 THUMB( orr     yl, yl, xh              )
        mov     xh, xl, lsl ip
        mov     xh, xh, lsr ip
        ret     lr

        @ eq -> division by 1: obvious enough...
9:      moveq   yl, xl
        moveq   yh, xh
        moveq   xh, #0
        reteq   lr
UNWIND(.fnend)

UNWIND(.fnstart)
UNWIND(.pad #4)
UNWIND(.save {lr})
Ldiv0_64:
        @ Division by 0:
        str     lr, [sp, #-8]!
        bl      __div0

        @ as wrong as it could be...
        mov     yl, #0
        mov     yh, #0
        mov     xh, #0
        ldr     pc, [sp], #8

UNWIND(.fnend)
ENDPROC(__do_div64)