root/arch/powerpc/platforms/powernv/pci-ioda-tce.c
// SPDX-License-Identifier: GPL-2.0+
/*
 * TCE helpers for IODA PCI/PCIe on PowerNV platforms
 *
 * Copyright 2018 IBM Corp.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/iommu.h>

#include <asm/iommu.h>
#include <asm/tce.h>
#include "pci.h"

unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
{
        struct pci_controller *hose = phb->hose;
        struct device_node *dn = hose->dn;
        unsigned long mask = 0;
        int i, rc, count;
        u32 val;

        count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
        if (count <= 0) {
                mask = SZ_4K | SZ_64K;
                /* Add 16M for POWER8 by default */
                if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
                                !cpu_has_feature(CPU_FTR_ARCH_300))
                        mask |= SZ_16M | SZ_256M;
                return mask;
        }

        for (i = 0; i < count; i++) {
                rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
                                                i, &val);
                if (rc == 0)
                        mask |= 1ULL << val;
        }

        return mask;
}

void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
                void *tce_mem, u64 tce_size,
                u64 dma_offset, unsigned int page_shift)
{
        tbl->it_blocksize = 16;
        tbl->it_base = (unsigned long)tce_mem;
        tbl->it_page_shift = page_shift;
        tbl->it_offset = dma_offset >> tbl->it_page_shift;
        tbl->it_index = 0;
        tbl->it_size = tce_size >> 3;
        tbl->it_busno = 0;
        tbl->it_type = TCE_PCI;
}

static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
{
        struct page *tce_mem = NULL;
        __be64 *addr;

        tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
                        shift - PAGE_SHIFT);
        if (!tce_mem) {
                pr_err("Failed to allocate a TCE memory, level shift=%d\n",
                                shift);
                return NULL;
        }
        addr = page_address(tce_mem);
        memset(addr, 0, 1UL << shift);

        return addr;
}

static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
                unsigned long size, unsigned int levels);

static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
{
        __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
        int  level = tbl->it_indirect_levels;
        const long shift = ilog2(tbl->it_level_size);
        unsigned long mask = (tbl->it_level_size - 1) << (level * shift);

        while (level) {
                int n = (idx & mask) >> (level * shift);
                unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));

                if (!tce) {
                        __be64 *tmp2;

                        if (!alloc)
                                return NULL;

                        tmp2 = pnv_alloc_tce_level(tbl->it_nid,
                                        ilog2(tbl->it_level_size) + 3);
                        if (!tmp2)
                                return NULL;

                        tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
                        oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
                                        cpu_to_be64(tce)));
                        if (oldtce) {
                                pnv_pci_ioda2_table_do_free_pages(tmp2,
                                        ilog2(tbl->it_level_size) + 3, 1);
                                tce = oldtce;
                        }
                }

                tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
                idx &= ~mask;
                mask >>= shift;
                --level;
        }

        return tmp + idx;
}

int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
                unsigned long uaddr, enum dma_data_direction direction,
                unsigned long attrs)
{
        u64 proto_tce = iommu_direction_to_tce_perm(direction);
        u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
        long i;

        if (proto_tce & TCE_PCI_WRITE)
                proto_tce |= TCE_PCI_READ;

        for (i = 0; i < npages; i++) {
                unsigned long newtce = proto_tce |
                        ((rpn + i) << tbl->it_page_shift);
                unsigned long idx = index - tbl->it_offset + i;

                *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
        }

        return 0;
}

#ifdef CONFIG_IOMMU_API
int pnv_tce_xchg(struct iommu_table *tbl, long index,
                unsigned long *hpa, enum dma_data_direction *direction)
{
        u64 proto_tce = iommu_direction_to_tce_perm(*direction);
        unsigned long newtce = *hpa | proto_tce, oldtce;
        unsigned long idx = index - tbl->it_offset;
        __be64 *ptce = NULL;

        BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));

        if (*direction == DMA_NONE) {
                ptce = pnv_tce(tbl, false, idx, false);
                if (!ptce) {
                        *hpa = 0;
                        return 0;
                }
        }

        if (!ptce) {
                ptce = pnv_tce(tbl, false, idx, true);
                if (!ptce)
                        return -ENOMEM;
        }

        if (newtce & TCE_PCI_WRITE)
                newtce |= TCE_PCI_READ;

        oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
        *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
        *direction = iommu_tce_direction(oldtce);

        return 0;
}

__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
{
        if (WARN_ON_ONCE(!tbl->it_userspace))
                return NULL;

        return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
}
#endif

void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
{
        long i;

        for (i = 0; i < npages; i++) {
                unsigned long idx = index - tbl->it_offset + i;
                __be64 *ptce = pnv_tce(tbl, false, idx, false);

                if (ptce)
                        *ptce = cpu_to_be64(0);
                else
                        /* Skip the rest of the level */
                        i |= tbl->it_level_size - 1;
        }
}

unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
{
        __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);

        if (!ptce)
                return 0;

        return be64_to_cpu(*ptce);
}

static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
                unsigned long size, unsigned int levels)
{
        const unsigned long addr_ul = (unsigned long) addr &
                        ~(TCE_PCI_READ | TCE_PCI_WRITE);

        if (levels) {
                long i;
                u64 *tmp = (u64 *) addr_ul;

                for (i = 0; i < size; ++i) {
                        unsigned long hpa = be64_to_cpu(tmp[i]);

                        if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
                                continue;

                        pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
                                        levels - 1);
                }
        }

        free_pages(addr_ul, get_order(size << 3));
}

void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
{
        const unsigned long size = tbl->it_indirect_levels ?
                        tbl->it_level_size : tbl->it_size;

        if (!tbl->it_size)
                return;

        pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
                        tbl->it_indirect_levels);
        if (tbl->it_userspace) {
                pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
                                tbl->it_indirect_levels);
        }
}

static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
                unsigned int levels, unsigned long limit,
                unsigned long *current_offset, unsigned long *total_allocated)
{
        __be64 *addr, *tmp;
        unsigned long allocated = 1UL << shift;
        unsigned int entries = 1UL << (shift - 3);
        long i;

        addr = pnv_alloc_tce_level(nid, shift);
        *total_allocated += allocated;

        --levels;
        if (!levels) {
                *current_offset += allocated;
                return addr;
        }

        for (i = 0; i < entries; ++i) {
                tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
                                levels, limit, current_offset, total_allocated);
                if (!tmp)
                        break;

                addr[i] = cpu_to_be64(__pa(tmp) |
                                TCE_PCI_READ | TCE_PCI_WRITE);

                if (*current_offset >= limit)
                        break;
        }

        return addr;
}

long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
                __u32 page_shift, __u64 window_size, __u32 levels,
                bool alloc_userspace_copy, struct iommu_table *tbl)
{
        void *addr, *uas = NULL;
        unsigned long offset = 0, level_shift, total_allocated = 0;
        unsigned long total_allocated_uas = 0;
        const unsigned int window_shift = ilog2(window_size);
        unsigned int entries_shift = window_shift - page_shift;
        unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
                        PAGE_SHIFT);
        const unsigned long tce_table_size = 1UL << table_shift;

        if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
                return -EINVAL;

        if (!is_power_of_2(window_size))
                return -EINVAL;

        /* Adjust direct table size from window_size and levels */
        entries_shift = (entries_shift + levels - 1) / levels;
        level_shift = entries_shift + 3;
        level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);

        if ((level_shift - 3) * levels + page_shift >= 55)
                return -EINVAL;

        /* Allocate TCE table */
        addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
                        1, tce_table_size, &offset, &total_allocated);

        /* addr==NULL means that the first level allocation failed */
        if (!addr)
                return -ENOMEM;

        /*
         * First level was allocated but some lower level failed as
         * we did not allocate as much as we wanted,
         * release partially allocated table.
         */
        if (levels == 1 && offset < tce_table_size)
                goto free_tces_exit;

        /* Allocate userspace view of the TCE table */
        if (alloc_userspace_copy) {
                offset = 0;
                uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
                                1, tce_table_size, &offset,
                                &total_allocated_uas);
                if (!uas)
                        goto free_tces_exit;
                if (levels == 1 && (offset < tce_table_size ||
                                total_allocated_uas != total_allocated))
                        goto free_uas_exit;
        }

        /* Setup linux iommu table */
        pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
                        page_shift);
        tbl->it_level_size = 1ULL << (level_shift - 3);
        tbl->it_indirect_levels = levels - 1;
        tbl->it_userspace = uas;
        tbl->it_nid = nid;

        pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
                        window_size, tce_table_size, bus_offset, tbl->it_base,
                        tbl->it_userspace, 1, levels);

        return 0;

free_uas_exit:
        pnv_pci_ioda2_table_do_free_pages(uas,
                        1ULL << (level_shift - 3), levels - 1);
free_tces_exit:
        pnv_pci_ioda2_table_do_free_pages(addr,
                        1ULL << (level_shift - 3), levels - 1);

        return -ENOMEM;
}

void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
                struct iommu_table_group *table_group)
{
        long i;
        bool found;
        struct iommu_table_group_link *tgl;

        if (!tbl || !table_group)
                return;

        /* Remove link to a group from table's list of attached groups */
        found = false;

        rcu_read_lock();
        list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
                if (tgl->table_group == table_group) {
                        list_del_rcu(&tgl->next);
                        kfree_rcu(tgl, rcu);
                        found = true;
                        break;
                }
        }
        rcu_read_unlock();

        if (WARN_ON(!found))
                return;

        /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
        found = false;
        for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
                if (table_group->tables[i] == tbl) {
                        iommu_tce_table_put(tbl);
                        table_group->tables[i] = NULL;
                        found = true;
                        break;
                }
        }
        WARN_ON(!found);
}

long pnv_pci_link_table_and_group(int node, int num,
                struct iommu_table *tbl,
                struct iommu_table_group *table_group)
{
        struct iommu_table_group_link *tgl = NULL;

        if (WARN_ON(!tbl || !table_group))
                return -EINVAL;

        tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
                        node);
        if (!tgl)
                return -ENOMEM;

        tgl->table_group = table_group;
        list_add_rcu(&tgl->next, &tbl->it_group_list);

        table_group->tables[num] = iommu_tce_table_get(tbl);

        return 0;
}