root/arch/arm/mm/cache-v7.S
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 *  linux/arch/arm/mm/cache-v7.S
 *
 *  Copyright (C) 2001 Deep Blue Solutions Ltd.
 *  Copyright (C) 2005 ARM Ltd.
 *
 *  This is the "shell" of the ARMv7 processor support.
 */
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/errno.h>
#include <asm/unwind.h>
#include <asm/hardware/cache-b15-rac.h>

#include "proc-macros.S"

.arch armv7-a

#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
.globl icache_size
        .data
        .align  2
icache_size:
        .long   64
        .text
#endif
/*
 * The secondary kernel init calls v7_flush_dcache_all before it enables
 * the L1; however, the L1 comes out of reset in an undefined state, so
 * the clean + invalidate performed by v7_flush_dcache_all causes a bunch
 * of cache lines with uninitialized data and uninitialized tags to get
 * written out to memory, which does really unpleasant things to the main
 * processor.  We fix this by performing an invalidate, rather than a
 * clean + invalidate, before jumping into the kernel.
 *
 * This function needs to be called for both secondary cores startup and
 * primary core resume procedures.
 */
ENTRY(v7_invalidate_l1)
        mov     r0, #0
        mcr     p15, 2, r0, c0, c0, 0   @ select L1 data cache in CSSELR
        isb
        mrc     p15, 1, r0, c0, c0, 0   @ read cache geometry from CCSIDR

        movw    r3, #0x3ff
        and     r3, r3, r0, lsr #3      @ 'Associativity' in CCSIDR[12:3]
        clz     r1, r3                  @ WayShift
        mov     r2, #1
        mov     r3, r3, lsl r1          @ NumWays-1 shifted into bits [31:...]
        movs    r1, r2, lsl r1          @ #1 shifted left by same amount
        moveq   r1, #1                  @ r1 needs value > 0 even if only 1 way

        and     r2, r0, #0x7
        add     r2, r2, #4              @ SetShift

1:      movw    ip, #0x7fff
        and     r0, ip, r0, lsr #13     @ 'NumSets' in CCSIDR[27:13]

2:      mov     ip, r0, lsl r2          @ NumSet << SetShift
        orr     ip, ip, r3              @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
        mcr     p15, 0, ip, c7, c6, 2
        subs    r0, r0, #1              @ Set--
        bpl     2b
        subs    r3, r3, r1              @ Way--
        bcc     3f
        mrc     p15, 1, r0, c0, c0, 0   @ re-read cache geometry from CCSIDR
        b       1b
3:      dsb     st
        isb
        ret     lr
ENDPROC(v7_invalidate_l1)

/*
 *      v7_flush_icache_all()
 *
 *      Flush the whole I-cache.
 *
 *      Registers:
 *      r0 - set to 0
 */
SYM_TYPED_FUNC_START(v7_flush_icache_all)
        mov     r0, #0
        ALT_SMP(mcr     p15, 0, r0, c7, c1, 0)          @ invalidate I-cache inner shareable
        ALT_UP(mcr      p15, 0, r0, c7, c5, 0)          @ I+BTB cache invalidate
        ret     lr
SYM_FUNC_END(v7_flush_icache_all)

 /*
 *     v7_flush_dcache_louis()
 *
 *     Flush the D-cache up to the Level of Unification Inner Shareable
 *
 *     Corrupted registers: r0-r6, r9-r10
 */

ENTRY(v7_flush_dcache_louis)
        dmb                                     @ ensure ordering with previous memory accesses
        mrc     p15, 1, r0, c0, c0, 1           @ read clidr, r0 = clidr
ALT_SMP(mov     r3, r0, lsr #20)                @ move LoUIS into position
ALT_UP( mov     r3, r0, lsr #26)                @ move LoUU into position
        ands    r3, r3, #7 << 1                 @ extract LoU*2 field from clidr
        bne     start_flush_levels              @ LoU != 0, start flushing
#ifdef CONFIG_ARM_ERRATA_643719
ALT_SMP(mrc     p15, 0, r2, c0, c0, 0)          @ read main ID register
ALT_UP( ret     lr)                             @ LoUU is zero, so nothing to do
        movw    r1, #:lower16:(0x410fc090 >> 4) @ ID of ARM Cortex A9 r0p?
        movt    r1, #:upper16:(0x410fc090 >> 4)
        teq     r1, r2, lsr #4                  @ test for errata affected core and if so...
        moveq   r3, #1 << 1                     @   fix LoUIS value
        beq     start_flush_levels              @   start flushing cache levels
#endif
        ret     lr
ENDPROC(v7_flush_dcache_louis)

/*
 *      v7_flush_dcache_all()
 *
 *      Flush the whole D-cache.
 *
 *      Corrupted registers: r0-r6, r9-r10
 *
 *      - mm    - mm_struct describing address space
 */
ENTRY(v7_flush_dcache_all)
        dmb                                     @ ensure ordering with previous memory accesses
        mrc     p15, 1, r0, c0, c0, 1           @ read clidr
        mov     r3, r0, lsr #23                 @ move LoC into position
        ands    r3, r3, #7 << 1                 @ extract LoC*2 from clidr
        beq     finished                        @ if loc is 0, then no need to clean
start_flush_levels:
        mov     r10, #0                         @ start clean at cache level 0
flush_levels:
        add     r2, r10, r10, lsr #1            @ work out 3x current cache level
        mov     r1, r0, lsr r2                  @ extract cache type bits from clidr
        and     r1, r1, #7                      @ mask of the bits for current cache only
        cmp     r1, #2                          @ see what cache we have at this level
        blt     skip                            @ skip if no cache, or just i-cache
#ifdef CONFIG_PREEMPTION
        save_and_disable_irqs_notrace r9        @ make cssr&csidr read atomic
#endif
        mcr     p15, 2, r10, c0, c0, 0          @ select current cache level in cssr
        isb                                     @ isb to sych the new cssr&csidr
        mrc     p15, 1, r1, c0, c0, 0           @ read the new csidr
#ifdef CONFIG_PREEMPTION
        restore_irqs_notrace r9
#endif
        and     r2, r1, #7                      @ extract the length of the cache lines
        add     r2, r2, #4                      @ add 4 (line length offset)
        movw    r4, #0x3ff
        ands    r4, r4, r1, lsr #3              @ find maximum number on the way size
        clz     r5, r4                          @ find bit position of way size increment
        movw    r6, #0x7fff
        and     r1, r6, r1, lsr #13             @ extract max number of the index size
        mov     r6, #1
        movne   r4, r4, lsl r5                  @ # of ways shifted into bits [31:...]
        movne   r6, r6, lsl r5                  @ 1 shifted left by same amount
loop1:
        mov     r9, r1                          @ create working copy of max index
loop2:
        mov     r5, r9, lsl r2                  @ factor set number into r5
        orr     r5, r5, r4                      @ factor way number into r5
        orr     r5, r5, r10                     @ factor cache level into r5
        mcr     p15, 0, r5, c7, c14, 2          @ clean & invalidate by set/way
        subs    r9, r9, #1                      @ decrement the index
        bge     loop2
        subs    r4, r4, r6                      @ decrement the way
        bcs     loop1
skip:
        add     r10, r10, #2                    @ increment cache number
        cmp     r3, r10
#ifdef CONFIG_ARM_ERRATA_814220
        dsb
#endif
        bgt     flush_levels
finished:
        mov     r10, #0                         @ switch back to cache level 0
        mcr     p15, 2, r10, c0, c0, 0          @ select current cache level in cssr
        dsb     st
        isb
        ret     lr
ENDPROC(v7_flush_dcache_all)

/*
 *      v7_flush_cache_all()
 *
 *      Flush the entire cache system.
 *  The data cache flush is now achieved using atomic clean / invalidates
 *  working outwards from L1 cache. This is done using Set/Way based cache
 *  maintenance instructions.
 *  The instruction cache can still be invalidated back to the point of
 *  unification in a single instruction.
 *
 */
SYM_TYPED_FUNC_START(v7_flush_kern_cache_all)
        stmfd   sp!, {r4-r6, r9-r10, lr}
        bl      v7_flush_dcache_all
        mov     r0, #0
        ALT_SMP(mcr     p15, 0, r0, c7, c1, 0)  @ invalidate I-cache inner shareable
        ALT_UP(mcr      p15, 0, r0, c7, c5, 0)  @ I+BTB cache invalidate
        ldmfd   sp!, {r4-r6, r9-r10, lr}
        ret     lr
SYM_FUNC_END(v7_flush_kern_cache_all)

 /*
 *     v7_flush_kern_cache_louis(void)
 *
 *     Flush the data cache up to Level of Unification Inner Shareable.
 *     Invalidate the I-cache to the point of unification.
 */
SYM_TYPED_FUNC_START(v7_flush_kern_cache_louis)
        stmfd   sp!, {r4-r6, r9-r10, lr}
        bl      v7_flush_dcache_louis
        mov     r0, #0
        ALT_SMP(mcr     p15, 0, r0, c7, c1, 0)  @ invalidate I-cache inner shareable
        ALT_UP(mcr      p15, 0, r0, c7, c5, 0)  @ I+BTB cache invalidate
        ldmfd   sp!, {r4-r6, r9-r10, lr}
        ret     lr
SYM_FUNC_END(v7_flush_kern_cache_louis)

/*
 *      v7_flush_cache_all()
 *
 *      Flush all TLB entries in a particular address space
 *
 *      - mm    - mm_struct describing address space
 */
SYM_TYPED_FUNC_START(v7_flush_user_cache_all)
        ret     lr
SYM_FUNC_END(v7_flush_user_cache_all)

/*
 *      v7_flush_cache_range(start, end, flags)
 *
 *      Flush a range of TLB entries in the specified address space.
 *
 *      - start - start address (may not be aligned)
 *      - end   - end address (exclusive, may not be aligned)
 *      - flags - vm_area_struct flags describing address space
 *
 *      It is assumed that:
 *      - we have a VIPT cache.
 */
SYM_TYPED_FUNC_START(v7_flush_user_cache_range)
        ret     lr
SYM_FUNC_END(v7_flush_user_cache_range)

/*
 *      v7_coherent_kern_range(start,end)
 *
 *      Ensure that the I and D caches are coherent within specified
 *      region.  This is typically used when code has been written to
 *      a memory region, and will be executed.
 *
 *      - start   - virtual start address of region
 *      - end     - virtual end address of region
 *
 *      It is assumed that:
 *      - the Icache does not read data from the write buffer
 */
SYM_TYPED_FUNC_START(v7_coherent_kern_range)
#ifdef CONFIG_CFI /* Fallthrough if !CFI */
        b       v7_coherent_user_range
#endif
SYM_FUNC_END(v7_coherent_kern_range)

/*
 *      v7_coherent_user_range(start,end)
 *
 *      Ensure that the I and D caches are coherent within specified
 *      region.  This is typically used when code has been written to
 *      a memory region, and will be executed.
 *
 *      - start   - virtual start address of region
 *      - end     - virtual end address of region
 *
 *      It is assumed that:
 *      - the Icache does not read data from the write buffer
 */
SYM_TYPED_FUNC_START(v7_coherent_user_range)
 UNWIND(.fnstart                )
        dcache_line_size r2, r3
        sub     r3, r2, #1
        bic     r12, r0, r3
#ifdef CONFIG_ARM_ERRATA_764369
        ALT_SMP(W(dsb))
        ALT_UP(W(nop))
#endif
1:
 USER(  mcr     p15, 0, r12, c7, c11, 1 )       @ clean D line to the point of unification
        add     r12, r12, r2
        cmp     r12, r1
        blo     1b
        dsb     ishst
#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
        ldr     r3, =icache_size
        ldr     r2, [r3, #0]
#else
        icache_line_size r2, r3
#endif
        sub     r3, r2, #1
        bic     r12, r0, r3
2:
 USER(  mcr     p15, 0, r12, c7, c5, 1  )       @ invalidate I line
        add     r12, r12, r2
        cmp     r12, r1
        blo     2b
        mov     r0, #0
        ALT_SMP(mcr     p15, 0, r0, c7, c1, 6)  @ invalidate BTB Inner Shareable
        ALT_UP(mcr      p15, 0, r0, c7, c5, 6)  @ invalidate BTB
        dsb     ishst
        isb
        ret     lr

/*
 * Fault handling for the cache operation above. If the virtual address in r0
 * isn't mapped, fail with -EFAULT.
 */
9001:
#ifdef CONFIG_ARM_ERRATA_775420
        dsb
#endif
        mov     r0, #-EFAULT
        ret     lr
 UNWIND(.fnend          )
SYM_FUNC_END(v7_coherent_user_range)

/*
 *      v7_flush_kern_dcache_area(void *addr, size_t size)
 *
 *      Ensure that the data held in the page kaddr is written back
 *      to the page in question.
 *
 *      - addr  - kernel address
 *      - size  - region size
 */
SYM_TYPED_FUNC_START(v7_flush_kern_dcache_area)
        dcache_line_size r2, r3
        add     r1, r0, r1
        sub     r3, r2, #1
        bic     r0, r0, r3
#ifdef CONFIG_ARM_ERRATA_764369
        ALT_SMP(W(dsb))
        ALT_UP(W(nop))
#endif
1:
        mcr     p15, 0, r0, c7, c14, 1          @ clean & invalidate D line / unified line
        add     r0, r0, r2
        cmp     r0, r1
        blo     1b
        dsb     st
        ret     lr
SYM_FUNC_END(v7_flush_kern_dcache_area)

/*
 *      v7_dma_inv_range(start,end)
 *
 *      Invalidate the data cache within the specified region; we will
 *      be performing a DMA operation in this region and we want to
 *      purge old data in the cache.
 *
 *      - start   - virtual start address of region
 *      - end     - virtual end address of region
 */
v7_dma_inv_range:
        dcache_line_size r2, r3
        sub     r3, r2, #1
        tst     r0, r3
        bic     r0, r0, r3
#ifdef CONFIG_ARM_ERRATA_764369
        ALT_SMP(W(dsb))
        ALT_UP(W(nop))
#endif
        mcrne   p15, 0, r0, c7, c14, 1          @ clean & invalidate D / U line
        addne   r0, r0, r2

        tst     r1, r3
        bic     r1, r1, r3
        mcrne   p15, 0, r1, c7, c14, 1          @ clean & invalidate D / U line
        cmp     r0, r1
1:
        mcrlo   p15, 0, r0, c7, c6, 1           @ invalidate D / U line
        addlo   r0, r0, r2
        cmplo   r0, r1
        blo     1b
        dsb     st
        ret     lr
ENDPROC(v7_dma_inv_range)

/*
 *      v7_dma_clean_range(start,end)
 *      - start   - virtual start address of region
 *      - end     - virtual end address of region
 */
v7_dma_clean_range:
        dcache_line_size r2, r3
        sub     r3, r2, #1
        bic     r0, r0, r3
#ifdef CONFIG_ARM_ERRATA_764369
        ALT_SMP(W(dsb))
        ALT_UP(W(nop))
#endif
1:
        mcr     p15, 0, r0, c7, c10, 1          @ clean D / U line
        add     r0, r0, r2
        cmp     r0, r1
        blo     1b
        dsb     st
        ret     lr
ENDPROC(v7_dma_clean_range)

/*
 *      v7_dma_flush_range(start,end)
 *      - start   - virtual start address of region
 *      - end     - virtual end address of region
 */
SYM_TYPED_FUNC_START(v7_dma_flush_range)
        dcache_line_size r2, r3
        sub     r3, r2, #1
        bic     r0, r0, r3
#ifdef CONFIG_ARM_ERRATA_764369
        ALT_SMP(W(dsb))
        ALT_UP(W(nop))
#endif
1:
        mcr     p15, 0, r0, c7, c14, 1          @ clean & invalidate D / U line
        add     r0, r0, r2
        cmp     r0, r1
        blo     1b
        dsb     st
        ret     lr
SYM_FUNC_END(v7_dma_flush_range)

/*
 *      dma_map_area(start, size, dir)
 *      - start - kernel virtual start address
 *      - size  - size of region
 *      - dir   - DMA direction
 */
SYM_TYPED_FUNC_START(v7_dma_map_area)
        add     r1, r1, r0
        teq     r2, #DMA_FROM_DEVICE
        beq     v7_dma_inv_range
        b       v7_dma_clean_range
SYM_FUNC_END(v7_dma_map_area)

/*
 *      dma_unmap_area(start, size, dir)
 *      - start - kernel virtual start address
 *      - size  - size of region
 *      - dir   - DMA direction
 */
SYM_TYPED_FUNC_START(v7_dma_unmap_area)
        add     r1, r1, r0
        teq     r2, #DMA_TO_DEVICE
        bne     v7_dma_inv_range
        ret     lr
SYM_FUNC_END(v7_dma_unmap_area)