root/lib/crypto/arm/aes-cipher-core.S
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Scalar AES core transform
 *
 * Copyright (C) 2017 Linaro Ltd.
 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
 */

#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/cache.h>

        .text
        .align          5

        rk              .req    r0
        rounds          .req    r1
        in              .req    r2
        out             .req    r3
        ttab            .req    ip

        t0              .req    lr
        t1              .req    r2
        t2              .req    r3

        .macro          __select, out, in, idx
        .if             __LINUX_ARM_ARCH__ < 7
        and             \out, \in, #0xff << (8 * \idx)
        .else
        ubfx            \out, \in, #(8 * \idx), #8
        .endif
        .endm

        .macro          __load, out, in, idx, sz, op
        .if             __LINUX_ARM_ARCH__ < 7 && \idx > 0
        ldr\op          \out, [ttab, \in, lsr #(8 * \idx) - \sz]
        .else
        ldr\op          \out, [ttab, \in, lsl #\sz]
        .endif
        .endm

        .macro          __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
        __select        \out0, \in0, 0
        __select        t0, \in1, 1
        __load          \out0, \out0, 0, \sz, \op
        __load          t0, t0, 1, \sz, \op

        .if             \enc
        __select        \out1, \in1, 0
        __select        t1, \in2, 1
        .else
        __select        \out1, \in3, 0
        __select        t1, \in0, 1
        .endif
        __load          \out1, \out1, 0, \sz, \op
        __select        t2, \in2, 2
        __load          t1, t1, 1, \sz, \op
        __load          t2, t2, 2, \sz, \op

        eor             \out0, \out0, t0, ror #24

        __select        t0, \in3, 3
        .if             \enc
        __select        \t3, \in3, 2
        __select        \t4, \in0, 3
        .else
        __select        \t3, \in1, 2
        __select        \t4, \in2, 3
        .endif
        __load          \t3, \t3, 2, \sz, \op
        __load          t0, t0, 3, \sz, \op
        __load          \t4, \t4, 3, \sz, \op

        .ifnb           \oldcpsr
        /*
         * This is the final round and we're done with all data-dependent table
         * lookups, so we can safely re-enable interrupts.
         */
        restore_irqs    \oldcpsr
        .endif

        eor             \out1, \out1, t1, ror #24
        eor             \out0, \out0, t2, ror #16
        ldm             rk!, {t1, t2}
        eor             \out1, \out1, \t3, ror #16
        eor             \out0, \out0, t0, ror #8
        eor             \out1, \out1, \t4, ror #8
        eor             \out0, \out0, t1
        eor             \out1, \out1, t2
        .endm

        .macro          fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
        __hround        \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
        __hround        \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
        .endm

        .macro          iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
        __hround        \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
        __hround        \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
        .endm

        .macro          do_crypt, round, ttab, ltab, bsz
        push            {r3-r11, lr}

        // Load keys first, to reduce latency in case they're not cached yet.
        ldm             rk!, {r8-r11}

        ldr             r4, [in]
        ldr             r5, [in, #4]
        ldr             r6, [in, #8]
        ldr             r7, [in, #12]

#ifdef CONFIG_CPU_BIG_ENDIAN
        rev_l           r4, t0
        rev_l           r5, t0
        rev_l           r6, t0
        rev_l           r7, t0
#endif

        eor             r4, r4, r8
        eor             r5, r5, r9
        eor             r6, r6, r10
        eor             r7, r7, r11

        mov_l           ttab, \ttab
        /*
         * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
         * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
         * intended to make cache-timing attacks more difficult.  They may not
         * be fully prevented, however; see the paper
         * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
         * ("Cache-timing attacks on AES") for a discussion of the many
         * difficulties involved in writing truly constant-time AES software.
         */
         save_and_disable_irqs  t0
        .set            i, 0
        .rept           1024 / 128
        ldr             r8, [ttab, #i + 0]
        ldr             r9, [ttab, #i + 32]
        ldr             r10, [ttab, #i + 64]
        ldr             r11, [ttab, #i + 96]
        .set            i, i + 128
        .endr
        push            {t0}            // oldcpsr

        tst             rounds, #2
        bne             1f

0:      \round          r8, r9, r10, r11, r4, r5, r6, r7
        \round          r4, r5, r6, r7, r8, r9, r10, r11

1:      subs            rounds, rounds, #4
        \round          r8, r9, r10, r11, r4, r5, r6, r7
        bls             2f
        \round          r4, r5, r6, r7, r8, r9, r10, r11
        b               0b

2:      .ifb            \ltab
        add             ttab, ttab, #1
        .else
        mov_l           ttab, \ltab
        // Prefetch inverse S-box for final round; see explanation above
        .set            i, 0
        .rept           256 / 64
        ldr             t0, [ttab, #i + 0]
        ldr             t1, [ttab, #i + 32]
        .set            i, i + 64
        .endr
        .endif

        pop             {rounds}        // oldcpsr
        \round          r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds

#ifdef CONFIG_CPU_BIG_ENDIAN
        rev_l           r4, t0
        rev_l           r5, t0
        rev_l           r6, t0
        rev_l           r7, t0
#endif

        ldr             out, [sp]

        str             r4, [out]
        str             r5, [out, #4]
        str             r6, [out, #8]
        str             r7, [out, #12]

        pop             {r3-r11, pc}

        .align          3
        .ltorg
        .endm

ENTRY(__aes_arm_encrypt)
        do_crypt        fround, aes_enc_tab,, 2
ENDPROC(__aes_arm_encrypt)

        .align          5
ENTRY(__aes_arm_decrypt)
        do_crypt        iround, aes_dec_tab, crypto_aes_inv_sbox, 0
ENDPROC(__aes_arm_decrypt)