root/arch/arm/mm/fault-armv.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/arch/arm/mm/fault-armv.c
 *
 *  Copyright (C) 1995  Linus Torvalds
 *  Modifications for ARM processor (c) 1995-2002 Russell King
 */
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/bitops.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/gfp.h>

#include <asm/bugs.h>
#include <asm/cacheflush.h>
#include <asm/cachetype.h>
#include <asm/tlbflush.h>

#include "mm.h"

static pteval_t shared_pte_mask = L_PTE_MT_BUFFERABLE;

#if __LINUX_ARM_ARCH__ < 6
/*
 * We take the easy way out of this problem - we make the
 * PTE uncacheable.  However, we leave the write buffer on.
 *
 * Note that the pte lock held when calling update_mmu_cache must also
 * guard the pte (somewhere else in the same mm) that we modify here.
 * Therefore those configurations which might call adjust_pte (those
 * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock.
 */
static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address,
        unsigned long pfn, pte_t *ptep)
{
        pte_t entry = *ptep;
        int ret;

        /*
         * If this page is present, it's actually being shared.
         */
        ret = pte_present(entry);

        /*
         * If this page isn't present, or is already setup to
         * fault (ie, is old), we can safely ignore any issues.
         */
        if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) {
                flush_cache_page(vma, address, pfn);
                outer_flush_range((pfn << PAGE_SHIFT),
                                  (pfn << PAGE_SHIFT) + PAGE_SIZE);
                pte_val(entry) &= ~L_PTE_MT_MASK;
                pte_val(entry) |= shared_pte_mask;
                set_pte_at(vma->vm_mm, address, ptep, entry);
                flush_tlb_page(vma, address);
        }

        return ret;
}

static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
                      unsigned long pfn, bool need_lock)
{
        spinlock_t *ptl;
        pgd_t *pgd;
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
        pmd_t pmdval;
        int ret;

        pgd = pgd_offset(vma->vm_mm, address);
        if (pgd_none_or_clear_bad(pgd))
                return 0;

        p4d = p4d_offset(pgd, address);
        if (p4d_none_or_clear_bad(p4d))
                return 0;

        pud = pud_offset(p4d, address);
        if (pud_none_or_clear_bad(pud))
                return 0;

        pmd = pmd_offset(pud, address);
        if (pmd_none_or_clear_bad(pmd))
                return 0;

again:
        /*
         * This is called while another page table is mapped, so we
         * must use the nested version.  This also means we need to
         * open-code the spin-locking.
         */
        pte = pte_offset_map_rw_nolock(vma->vm_mm, pmd, address, &pmdval, &ptl);
        if (!pte)
                return 0;

        if (need_lock) {
                /*
                 * Use nested version here to indicate that we are already
                 * holding one similar spinlock.
                 */
                spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
                if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
                        pte_unmap_unlock(pte, ptl);
                        goto again;
                }
        }

        ret = do_adjust_pte(vma, address, pfn, pte);

        if (need_lock)
                spin_unlock(ptl);
        pte_unmap(pte);

        return ret;
}

static void
make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
              unsigned long addr, pte_t *ptep, unsigned long pfn)
{
        const unsigned long pmd_start_addr = ALIGN_DOWN(addr, PMD_SIZE);
        const unsigned long pmd_end_addr = pmd_start_addr + PMD_SIZE;
        struct mm_struct *mm = vma->vm_mm;
        struct vm_area_struct *mpnt;
        unsigned long offset;
        pgoff_t pgoff;
        int aliases = 0;

        pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);

        /*
         * If we have any shared mappings that are in the same mm
         * space, then we need to handle them specially to maintain
         * cache coherency.
         */
        flush_dcache_mmap_lock(mapping);
        vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
                /*
                 * If we are using split PTE locks, then we need to take the pte
                 * lock. Otherwise we are using shared mm->page_table_lock which
                 * is already locked, thus cannot take it.
                 */
                bool need_lock = IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS);
                unsigned long mpnt_addr;

                /*
                 * If this VMA is not in our MM, we can ignore it.
                 * Note that we intentionally mask out the VMA
                 * that we are fixing up.
                 */
                if (mpnt->vm_mm != mm || mpnt == vma)
                        continue;
                if (!(mpnt->vm_flags & VM_MAYSHARE))
                        continue;
                offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
                mpnt_addr = mpnt->vm_start + offset;

                /* Avoid deadlocks by not grabbing the same PTE lock again. */
                if (mpnt_addr >= pmd_start_addr && mpnt_addr < pmd_end_addr)
                        need_lock = false;
                aliases += adjust_pte(mpnt, mpnt_addr, pfn, need_lock);
        }
        flush_dcache_mmap_unlock(mapping);
        if (aliases)
                do_adjust_pte(vma, addr, pfn, ptep);
}

/*
 * Take care of architecture specific things when placing a new PTE into
 * a page table, or changing an existing PTE.  Basically, there are two
 * things that we need to take care of:
 *
 *  1. If PG_dcache_clean is not set for the page, we need to ensure
 *     that any cache entries for the kernels virtual memory
 *     range are written back to the page.
 *  2. If we have multiple shared mappings of the same space in
 *     an object, we need to deal with the cache aliasing issues.
 *
 * Note that the pte lock will be held.
 */
void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
                unsigned long addr, pte_t *ptep, unsigned int nr)
{
        unsigned long pfn = pte_pfn(*ptep);
        struct address_space *mapping;
        struct folio *folio;

        if (!pfn_valid(pfn))
                return;

        /*
         * The zero page is never written to, so never has any dirty
         * cache lines, and therefore never needs to be flushed.
         */
        if (is_zero_pfn(pfn))
                return;

        folio = page_folio(pfn_to_page(pfn));
        mapping = folio_flush_mapping(folio);
        if (!test_and_set_bit(PG_dcache_clean, &folio->flags.f))
                __flush_dcache_folio(mapping, folio);
        if (mapping) {
                if (cache_is_vivt())
                        make_coherent(mapping, vma, addr, ptep, pfn);
                else if (vma->vm_flags & VM_EXEC)
                        __flush_icache_all();
        }
}
#endif  /* __LINUX_ARM_ARCH__ < 6 */

/*
 * Check whether the write buffer has physical address aliasing
 * issues.  If it has, we need to avoid them for the case where
 * we have several shared mappings of the same object in user
 * space.
 */
static int __init check_writebuffer(unsigned long *p1, unsigned long *p2)
{
        register unsigned long zero = 0, one = 1, val;

        local_irq_disable();
        mb();
        *p1 = one;
        mb();
        *p2 = zero;
        mb();
        val = *p1;
        mb();
        local_irq_enable();
        return val != zero;
}

void __init check_writebuffer_bugs(void)
{
        struct page *page;
        const char *reason;
        unsigned long v = 1;

        pr_info("CPU: Testing write buffer coherency: ");

        page = alloc_page(GFP_KERNEL);
        if (page) {
                unsigned long *p1, *p2;
                pgprot_t prot = __pgprot_modify(PAGE_KERNEL,
                                        L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE);

                p1 = vmap(&page, 1, VM_IOREMAP, prot);
                p2 = vmap(&page, 1, VM_IOREMAP, prot);

                if (p1 && p2) {
                        v = check_writebuffer(p1, p2);
                        reason = "enabling work-around";
                } else {
                        reason = "unable to map memory\n";
                }

                vunmap(p1);
                vunmap(p2);
                put_page(page);
        } else {
                reason = "unable to grab page\n";
        }

        if (v) {
                pr_cont("failed, %s\n", reason);
                shared_pte_mask = L_PTE_MT_UNCACHED;
        } else {
                pr_cont("ok\n");
        }
}