root/include/linux/generic_pt/iommu.h
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
 */
#ifndef __GENERIC_PT_IOMMU_H
#define __GENERIC_PT_IOMMU_H

#include <linux/generic_pt/common.h>
#include <linux/iommu.h>
#include <linux/mm_types.h>

struct iommu_iotlb_gather;
struct pt_iommu_ops;
struct pt_iommu_driver_ops;
struct iommu_dirty_bitmap;

/**
 * DOC: IOMMU Radix Page Table
 *
 * The IOMMU implementation of the Generic Page Table provides an ops struct
 * that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and
 * the generic map/unmap interface.
 *
 * This interface uses a caller provided locking approach. The caller must have
 * a VA range lock concept that prevents concurrent threads from calling ops on
 * the same VA. Generally the range lock must be at least as large as a single
 * map call.
 */

/**
 * struct pt_iommu - Base structure for IOMMU page tables
 *
 * The format-specific struct will include this as the first member.
 */
struct pt_iommu {
        /**
         * @domain: The core IOMMU domain. The driver should use a union to
         * overlay this memory with its previously existing domain struct to
         * create an alias.
         */
        struct iommu_domain domain;

        /**
         * @ops: Function pointers to access the API
         */
        const struct pt_iommu_ops *ops;

        /**
         * @driver_ops: Function pointers provided by the HW driver to help
         * manage HW details like caches.
         */
        const struct pt_iommu_driver_ops *driver_ops;

        /**
         * @nid: Node ID to use for table memory allocations. The IOMMU driver
         * may want to set the NID to the device's NID, if there are multiple
         * table walkers.
         */
        int nid;

        /**
         * @iommu_device: Device pointer used for any DMA cache flushing when
         * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the
         * page table which must have dma ops that perform cache flushing.
         */
        struct device *iommu_device;
};

/**
 * struct pt_iommu_info - Details about the IOMMU page table
 *
 * Returned from pt_iommu_ops->get_info()
 */
struct pt_iommu_info {
        /**
         * @pgsize_bitmap: A bitmask where each set bit indicates
         * a page size that can be natively stored in the page table.
         */
        u64 pgsize_bitmap;
};

struct pt_iommu_ops {
        /**
         * @set_dirty: Make the iova write dirty
         * @iommu_table: Table to manipulate
         * @iova: IO virtual address to start
         *
         * This is only used by iommufd testing. It makes the iova dirty so that
         * read_and_clear_dirty() will see it as dirty. Unlike all the other ops
         * this one is safe to call without holding any locking. It may return
         * -EAGAIN if there is a race.
         */
        int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova);

        /**
         * @get_info: Return the pt_iommu_info structure
         * @iommu_table: Table to query
         *
         * Return some basic static information about the page table.
         */
        void (*get_info)(struct pt_iommu *iommu_table,
                         struct pt_iommu_info *info);

        /**
         * @deinit: Undo a format specific init operation
         * @iommu_table: Table to destroy
         *
         * Release all of the memory. The caller must have already removed the
         * table from all HW access and all caches.
         */
        void (*deinit)(struct pt_iommu *iommu_table);
};

/**
 * struct pt_iommu_driver_ops - HW IOTLB cache flushing operations
 *
 * The IOMMU driver should implement these using container_of(iommu_table) to
 * get to it's iommu_domain derived structure. All ops can be called in atomic
 * contexts as they are buried under DMA API calls.
 */
struct pt_iommu_driver_ops {
        /**
         * @change_top: Update the top of table pointer
         * @iommu_table: Table to operate on
         * @top_paddr: New CPU physical address of the top pointer
         * @top_level: IOMMU PT level of the new top
         *
         * Called under the get_top_lock() spinlock. The driver must update all
         * HW references to this domain with a new top address and
         * configuration. On return mappings placed in the new top must be
         * reachable by the HW.
         *
         * top_level encodes the level in IOMMU PT format, level 0 is the
         * smallest page size increasing from there. This has to be translated
         * to any HW specific format. During this call the new top will not be
         * visible to any other API.
         *
         * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
         * enabled.
         */
        void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr,
                           unsigned int top_level);

        /**
         * @get_top_lock: lock to hold when changing the table top
         * @iommu_table: Table to operate on
         *
         * Return a lock to hold when changing the table top page table from
         * being stored in HW. The lock will be held prior to calling
         * change_top() and released once the top is fully visible.
         *
         * Typically this would be a lock that protects the iommu_domain's
         * attachment list.
         *
         * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
         * enabled.
         */
        spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table);
};

static inline void pt_iommu_deinit(struct pt_iommu *iommu_table)
{
        /*
         * It is safe to call pt_iommu_deinit() before an init, or if init
         * fails. The ops pointer will only become non-NULL if deinit needs to be
         * run.
         */
        if (iommu_table->ops)
                iommu_table->ops->deinit(iommu_table);
}

/**
 * struct pt_iommu_cfg - Common configuration values for all formats
 */
struct pt_iommu_cfg {
        /**
         * @features: Features required. Only these features will be turned on.
         * The feature list should reflect what the IOMMU HW is capable of.
         */
        unsigned int features;
        /**
         * @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will
         * imply the top level of the table.
         */
        u8 hw_max_vasz_lg2;
        /**
         * @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format
         * might select a lower maximum OA.
         */
        u8 hw_max_oasz_lg2;
};

/* Generate the exported function signatures from iommu_pt.h */
#define IOMMU_PROTOTYPES(fmt)                                                  \
        phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
                                                  dma_addr_t iova);            \
        int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain,            \
                                       unsigned long iova, phys_addr_t paddr,  \
                                       size_t pgsize, size_t pgcount,          \
                                       int prot, gfp_t gfp, size_t *mapped);   \
        size_t pt_iommu_##fmt##_unmap_pages(                                   \
                struct iommu_domain *domain, unsigned long iova,               \
                size_t pgsize, size_t pgcount,                                 \
                struct iommu_iotlb_gather *iotlb_gather);                      \
        int pt_iommu_##fmt##_read_and_clear_dirty(                             \
                struct iommu_domain *domain, unsigned long iova, size_t size,  \
                unsigned long flags, struct iommu_dirty_bitmap *dirty);        \
        int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table,                \
                                  const struct pt_iommu_##fmt##_cfg *cfg,      \
                                  gfp_t gfp);                                  \
        void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table,            \
                                      struct pt_iommu_##fmt##_hw_info *info)
#define IOMMU_FORMAT(fmt, member)       \
        struct pt_iommu_##fmt {         \
                struct pt_iommu iommu;  \
                struct pt_##fmt member; \
        };                              \
        IOMMU_PROTOTYPES(fmt)

/*
 * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the
 * iommu_pt
 */
#define IOMMU_PT_DOMAIN_OPS(fmt)                        \
        .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \
        .map_pages = &pt_iommu_##fmt##_map_pages,       \
        .unmap_pages = &pt_iommu_##fmt##_unmap_pages
#define IOMMU_PT_DIRTY_OPS(fmt) \
        .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty

/*
 * The driver should setup its domain struct like
 *      union {
 *              struct iommu_domain domain;
 *              struct pt_iommu_xxx xx;
 *      };
 * PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain);
 *
 * Which creates an alias between driver_domain.domain and
 * driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing
 * driver_domain.domain users.
 */
#define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \
        static_assert(offsetof(s, pt_iommu_memb.domain) ==   \
                      offsetof(s, domain_memb))

struct pt_iommu_amdv1_cfg {
        struct pt_iommu_cfg common;
        unsigned int starting_level;
};

struct pt_iommu_amdv1_hw_info {
        u64 host_pt_root;
        u8 mode;
};

IOMMU_FORMAT(amdv1, amdpt);

/* amdv1_mock is used by the iommufd selftest */
#define pt_iommu_amdv1_mock pt_iommu_amdv1
#define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg
struct pt_iommu_amdv1_mock_hw_info;
IOMMU_PROTOTYPES(amdv1_mock);

struct pt_iommu_vtdss_cfg {
        struct pt_iommu_cfg common;
        /* 4 is a 57 bit 5 level table */
        unsigned int top_level;
};

struct pt_iommu_vtdss_hw_info {
        u64 ssptptr;
        u8 aw;
};

IOMMU_FORMAT(vtdss, vtdss_pt);

struct pt_iommu_x86_64_cfg {
        struct pt_iommu_cfg common;
        /* 4 is a 57 bit 5 level table */
        unsigned int top_level;
};

struct pt_iommu_x86_64_hw_info {
        u64 gcr3_pt;
        u8 levels;
};

IOMMU_FORMAT(x86_64, x86_64_pt);

#undef IOMMU_PROTOTYPES
#undef IOMMU_FORMAT
#endif