root/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2020 Intel Corporation
 */

#include <linux/log2.h>

#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"

#include "gen8_ppgtt.h"
#include "i915_scatterlist.h"
#include "i915_trace.h"
#include "i915_pvinfo.h"
#include "i915_vgpu.h"
#include "intel_gt.h"
#include "intel_gtt.h"

static u64 gen8_pde_encode(const dma_addr_t addr,
                           const enum i915_cache_level level)
{
        u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;

        if (level != I915_CACHE_NONE)
                pde |= PPAT_CACHED_PDE;
        else
                pde |= PPAT_UNCACHED;

        return pde;
}

static u64 gen8_pte_encode(dma_addr_t addr,
                           unsigned int pat_index,
                           u32 flags)
{
        gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;

        if (unlikely(flags & PTE_READ_ONLY))
                pte &= ~GEN8_PAGE_RW;

        /*
         * For pre-gen12 platforms pat_index is the same as enum
         * i915_cache_level, so the switch-case here is still valid.
         * See translation table defined by LEGACY_CACHELEVEL.
         */
        switch (pat_index) {
        case I915_CACHE_NONE:
                pte |= PPAT_UNCACHED;
                break;
        case I915_CACHE_WT:
                pte |= PPAT_DISPLAY_ELLC;
                break;
        default:
                pte |= PPAT_CACHED;
                break;
        }

        return pte;
}

static u64 gen12_pte_encode(dma_addr_t addr,
                            unsigned int pat_index,
                            u32 flags)
{
        gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;

        if (unlikely(flags & PTE_READ_ONLY))
                pte &= ~GEN8_PAGE_RW;

        if (flags & PTE_LM)
                pte |= GEN12_PPGTT_PTE_LM;

        if (pat_index & BIT(0))
                pte |= GEN12_PPGTT_PTE_PAT0;

        if (pat_index & BIT(1))
                pte |= GEN12_PPGTT_PTE_PAT1;

        if (pat_index & BIT(2))
                pte |= GEN12_PPGTT_PTE_PAT2;

        if (pat_index & BIT(3))
                pte |= MTL_PPGTT_PTE_PAT3;

        return pte;
}

static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
{
        struct drm_i915_private *i915 = ppgtt->vm.i915;
        struct intel_uncore *uncore = ppgtt->vm.gt->uncore;
        enum vgt_g2v_type msg;
        int i;

        if (create)
                atomic_inc(px_used(ppgtt->pd)); /* never remove */
        else
                atomic_dec(px_used(ppgtt->pd));

        mutex_lock(&i915->vgpu.lock);

        if (i915_vm_is_4lvl(&ppgtt->vm)) {
                const u64 daddr = px_dma(ppgtt->pd);

                intel_uncore_write(uncore,
                                   vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
                intel_uncore_write(uncore,
                                   vgtif_reg(pdp[0].hi), upper_32_bits(daddr));

                msg = create ?
                        VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
                        VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY;
        } else {
                for (i = 0; i < GEN8_3LVL_PDPES; i++) {
                        const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);

                        intel_uncore_write(uncore,
                                           vgtif_reg(pdp[i].lo),
                                           lower_32_bits(daddr));
                        intel_uncore_write(uncore,
                                           vgtif_reg(pdp[i].hi),
                                           upper_32_bits(daddr));
                }

                msg = create ?
                        VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
                        VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY;
        }

        /* g2v_notify atomically (via hv trap) consumes the message packet. */
        intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg);

        mutex_unlock(&i915->vgpu.lock);
}

/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))

#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)

static unsigned int
gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
{
        const int shift = gen8_pd_shift(lvl);
        const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);

        GEM_BUG_ON(start >= end);
        end += ~mask >> gen8_pd_shift(1);

        *idx = i915_pde_index(start, shift);
        if ((start ^ end) & mask)
                return GEN8_PDES - *idx;
        else
                return i915_pde_index(end, shift) - *idx;
}

static bool gen8_pd_contains(u64 start, u64 end, int lvl)
{
        const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);

        GEM_BUG_ON(start >= end);
        return (start ^ end) & mask && (start & ~mask) == 0;
}

static unsigned int gen8_pt_count(u64 start, u64 end)
{
        GEM_BUG_ON(start >= end);
        if ((start ^ end) >> gen8_pd_shift(1))
                return GEN8_PDES - (start & (GEN8_PDES - 1));
        else
                return end - start;
}

static unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
{
        unsigned int shift = __gen8_pte_shift(vm->top);

        return (vm->total + (1ull << shift) - 1) >> shift;
}

static struct i915_page_directory *
gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
{
        struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);

        if (vm->top == 2)
                return ppgtt->pd;
        else
                return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
}

static struct i915_page_directory *
gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
{
        return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
}

static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
                                 struct i915_page_directory *pd,
                                 int count, int lvl)
{
        if (lvl) {
                void **pde = pd->entry;

                do {
                        if (!*pde)
                                continue;

                        __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
                } while (pde++, --count);
        }

        free_px(vm, &pd->pt, lvl);
}

static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
{
        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);

        if (vm->rsvd.obj)
                i915_gem_object_put(vm->rsvd.obj);

        if (intel_vgpu_active(vm->i915))
                gen8_ppgtt_notify_vgt(ppgtt, false);

        if (ppgtt->pd)
                __gen8_ppgtt_cleanup(vm, ppgtt->pd,
                                     gen8_pd_top_count(vm), vm->top);

        free_scratch(vm);
}

static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
                              struct i915_page_directory * const pd,
                              u64 start, const u64 end, int lvl)
{
        const struct drm_i915_gem_object * const scratch = vm->scratch[lvl];
        unsigned int idx, len;

        GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);

        len = gen8_pd_range(start, end, lvl--, &idx);
        GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
                  __func__, vm, lvl + 1, start, end,
                  idx, len, atomic_read(px_used(pd)));
        GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));

        do {
                struct i915_page_table *pt = pd->entry[idx];

                if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
                    gen8_pd_contains(start, end, lvl)) {
                        GTT_TRACE("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
                                  __func__, vm, lvl + 1, idx, start, end);
                        clear_pd_entry(pd, idx, scratch);
                        __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
                        start += (u64)I915_PDES << gen8_pd_shift(lvl);
                        continue;
                }

                if (lvl) {
                        start = __gen8_ppgtt_clear(vm, as_pd(pt),
                                                   start, end, lvl);
                } else {
                        unsigned int count;
                        unsigned int pte = gen8_pd_index(start, 0);
                        unsigned int num_ptes;
                        u64 *vaddr;

                        count = gen8_pt_count(start, end);
                        GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
                                  __func__, vm, lvl, start, end,
                                  gen8_pd_index(start, 0), count,
                                  atomic_read(&pt->used));
                        GEM_BUG_ON(!count || count >= atomic_read(&pt->used));

                        num_ptes = count;
                        if (pt->is_compact) {
                                GEM_BUG_ON(num_ptes % 16);
                                GEM_BUG_ON(pte % 16);
                                num_ptes /= 16;
                                pte /= 16;
                        }

                        vaddr = px_vaddr(pt);
                        memset64(vaddr + pte,
                                 vm->scratch[0]->encode,
                                 num_ptes);

                        atomic_sub(count, &pt->used);
                        start += count;
                }

                if (release_pd_entry(pd, idx, pt, scratch))
                        free_px(vm, pt, lvl);
        } while (idx++, --len);

        return start;
}

static void gen8_ppgtt_clear(struct i915_address_space *vm,
                             u64 start, u64 length)
{
        GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
        GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
        GEM_BUG_ON(range_overflows(start, length, vm->total));

        start >>= GEN8_PTE_SHIFT;
        length >>= GEN8_PTE_SHIFT;
        GEM_BUG_ON(length == 0);

        __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
                           start, start + length, vm->top);
}

static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
                               struct i915_vm_pt_stash *stash,
                               struct i915_page_directory * const pd,
                               u64 * const start, const u64 end, int lvl)
{
        unsigned int idx, len;

        GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);

        len = gen8_pd_range(*start, end, lvl--, &idx);
        GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
                  __func__, vm, lvl + 1, *start, end,
                  idx, len, atomic_read(px_used(pd)));
        GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));

        spin_lock(&pd->lock);
        GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
        do {
                struct i915_page_table *pt = pd->entry[idx];

                if (!pt) {
                        spin_unlock(&pd->lock);

                        GTT_TRACE("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
                                  __func__, vm, lvl + 1, idx);

                        pt = stash->pt[!!lvl];
                        __i915_gem_object_pin_pages(pt->base);

                        fill_px(pt, vm->scratch[lvl]->encode);

                        spin_lock(&pd->lock);
                        if (likely(!pd->entry[idx])) {
                                stash->pt[!!lvl] = pt->stash;
                                atomic_set(&pt->used, 0);
                                set_pd_entry(pd, idx, pt);
                        } else {
                                pt = pd->entry[idx];
                        }
                }

                if (lvl) {
                        atomic_inc(&pt->used);
                        spin_unlock(&pd->lock);

                        __gen8_ppgtt_alloc(vm, stash,
                                           as_pd(pt), start, end, lvl);

                        spin_lock(&pd->lock);
                        atomic_dec(&pt->used);
                        GEM_BUG_ON(!atomic_read(&pt->used));
                } else {
                        unsigned int count = gen8_pt_count(*start, end);

                        GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
                                  __func__, vm, lvl, *start, end,
                                  gen8_pd_index(*start, 0), count,
                                  atomic_read(&pt->used));

                        atomic_add(count, &pt->used);
                        /* All other pdes may be simultaneously removed */
                        GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
                        *start += count;
                }
        } while (idx++, --len);
        spin_unlock(&pd->lock);
}

static void gen8_ppgtt_alloc(struct i915_address_space *vm,
                             struct i915_vm_pt_stash *stash,
                             u64 start, u64 length)
{
        GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
        GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
        GEM_BUG_ON(range_overflows(start, length, vm->total));

        start >>= GEN8_PTE_SHIFT;
        length >>= GEN8_PTE_SHIFT;
        GEM_BUG_ON(length == 0);

        __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
                           &start, start + length, vm->top);
}

static void __gen8_ppgtt_foreach(struct i915_address_space *vm,
                                 struct i915_page_directory *pd,
                                 u64 *start, u64 end, int lvl,
                                 void (*fn)(struct i915_address_space *vm,
                                            struct i915_page_table *pt,
                                            void *data),
                                 void *data)
{
        unsigned int idx, len;

        len = gen8_pd_range(*start, end, lvl--, &idx);

        spin_lock(&pd->lock);
        do {
                struct i915_page_table *pt = pd->entry[idx];

                atomic_inc(&pt->used);
                spin_unlock(&pd->lock);

                if (lvl) {
                        __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl,
                                             fn, data);
                } else {
                        fn(vm, pt, data);
                        *start += gen8_pt_count(*start, end);
                }

                spin_lock(&pd->lock);
                atomic_dec(&pt->used);
        } while (idx++, --len);
        spin_unlock(&pd->lock);
}

static void gen8_ppgtt_foreach(struct i915_address_space *vm,
                               u64 start, u64 length,
                               void (*fn)(struct i915_address_space *vm,
                                          struct i915_page_table *pt,
                                          void *data),
                               void *data)
{
        start >>= GEN8_PTE_SHIFT;
        length >>= GEN8_PTE_SHIFT;

        __gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd,
                             &start, start + length, vm->top,
                             fn, data);
}

static __always_inline u64
gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
                      struct i915_page_directory *pdp,
                      struct sgt_dma *iter,
                      u64 idx,
                      unsigned int pat_index,
                      u32 flags)
{
        struct i915_page_directory *pd;
        const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags);
        gen8_pte_t *vaddr;

        pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
        vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
        do {
                GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
                vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;

                iter->dma += I915_GTT_PAGE_SIZE;
                if (iter->dma >= iter->max) {
                        iter->sg = __sg_next(iter->sg);
                        if (!iter->sg || sg_dma_len(iter->sg) == 0) {
                                idx = 0;
                                break;
                        }

                        iter->dma = sg_dma_address(iter->sg);
                        iter->max = iter->dma + sg_dma_len(iter->sg);
                }

                if (gen8_pd_index(++idx, 0) == 0) {
                        if (gen8_pd_index(idx, 1) == 0) {
                                /* Limited by sg length for 3lvl */
                                if (gen8_pd_index(idx, 2) == 0)
                                        break;

                                pd = pdp->entry[gen8_pd_index(idx, 2)];
                        }

                        drm_clflush_virt_range(vaddr, PAGE_SIZE);
                        vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
                }
        } while (1);
        drm_clflush_virt_range(vaddr, PAGE_SIZE);

        return idx;
}

static void
xehp_ppgtt_insert_huge(struct i915_address_space *vm,
                       struct i915_vma_resource *vma_res,
                       struct sgt_dma *iter,
                       unsigned int pat_index,
                       u32 flags)
{
        const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
        unsigned int rem = sg_dma_len(iter->sg);
        u64 start = vma_res->start;
        u64 end = start + vma_res->vma_size;

        GEM_BUG_ON(!i915_vm_is_4lvl(vm));

        do {
                struct i915_page_directory * const pdp =
                        gen8_pdp_for_page_address(vm, start);
                struct i915_page_directory * const pd =
                        i915_pd_entry(pdp, __gen8_pte_index(start, 2));
                struct i915_page_table *pt =
                        i915_pt_entry(pd, __gen8_pte_index(start, 1));
                gen8_pte_t encode = pte_encode;
                unsigned int page_size;
                gen8_pte_t *vaddr;
                u16 index, max, nent, i;

                max = I915_PDES;
                nent = 1;

                if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
                    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
                    rem >= I915_GTT_PAGE_SIZE_2M &&
                    !__gen8_pte_index(start, 0)) {
                        index = __gen8_pte_index(start, 1);
                        encode |= GEN8_PDE_PS_2M;
                        page_size = I915_GTT_PAGE_SIZE_2M;

                        vaddr = px_vaddr(pd);
                } else {
                        index =  __gen8_pte_index(start, 0);
                        page_size = I915_GTT_PAGE_SIZE;

                        if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
                                /*
                                 * Device local-memory on these platforms should
                                 * always use 64K pages or larger (including GTT
                                 * alignment), therefore if we know the whole
                                 * page-table needs to be filled we can always
                                 * safely use the compact-layout. Otherwise fall
                                 * back to the TLB hint with PS64. If this is
                                 * system memory we only bother with PS64.
                                 */
                                if ((encode & GEN12_PPGTT_PTE_LM) &&
                                    end - start >= SZ_2M && !index) {
                                        index = __gen8_pte_index(start, 0) / 16;
                                        page_size = I915_GTT_PAGE_SIZE_64K;

                                        max /= 16;

                                        vaddr = px_vaddr(pd);
                                        vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;

                                        pt->is_compact = true;
                                } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
                                           rem >= I915_GTT_PAGE_SIZE_64K &&
                                           !(index % 16)) {
                                        encode |= GEN12_PTE_PS64;
                                        page_size = I915_GTT_PAGE_SIZE_64K;
                                        nent = 16;
                                }
                        }

                        vaddr = px_vaddr(pt);
                }

                do {
                        GEM_BUG_ON(rem < page_size);

                        for (i = 0; i < nent; i++) {
                                vaddr[index++] =
                                        encode | (iter->dma + i *
                                                  I915_GTT_PAGE_SIZE);
                        }

                        start += page_size;
                        iter->dma += page_size;
                        rem -= page_size;
                        if (iter->dma >= iter->max) {
                                iter->sg = __sg_next(iter->sg);
                                if (!iter->sg)
                                        break;

                                rem = sg_dma_len(iter->sg);
                                if (!rem)
                                        break;

                                iter->dma = sg_dma_address(iter->sg);
                                iter->max = iter->dma + rem;

                                if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
                                        break;
                        }
                } while (rem >= page_size && index < max);

                drm_clflush_virt_range(vaddr, PAGE_SIZE);
                vma_res->page_sizes_gtt |= page_size;
        } while (iter->sg && sg_dma_len(iter->sg));
}

static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
                                   struct i915_vma_resource *vma_res,
                                   struct sgt_dma *iter,
                                   unsigned int pat_index,
                                   u32 flags)
{
        const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
        unsigned int rem = sg_dma_len(iter->sg);
        u64 start = vma_res->start;

        GEM_BUG_ON(!i915_vm_is_4lvl(vm));

        do {
                struct i915_page_directory * const pdp =
                        gen8_pdp_for_page_address(vm, start);
                struct i915_page_directory * const pd =
                        i915_pd_entry(pdp, __gen8_pte_index(start, 2));
                gen8_pte_t encode = pte_encode;
                unsigned int maybe_64K = -1;
                unsigned int page_size;
                gen8_pte_t *vaddr;
                u16 index;

                if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
                    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
                    rem >= I915_GTT_PAGE_SIZE_2M &&
                    !__gen8_pte_index(start, 0)) {
                        index = __gen8_pte_index(start, 1);
                        encode |= GEN8_PDE_PS_2M;
                        page_size = I915_GTT_PAGE_SIZE_2M;

                        vaddr = px_vaddr(pd);
                } else {
                        struct i915_page_table *pt =
                                i915_pt_entry(pd, __gen8_pte_index(start, 1));

                        index = __gen8_pte_index(start, 0);
                        page_size = I915_GTT_PAGE_SIZE;

                        if (!index &&
                            vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
                            IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
                            (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
                             rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
                                maybe_64K = __gen8_pte_index(start, 1);

                        vaddr = px_vaddr(pt);
                }

                do {
                        GEM_BUG_ON(sg_dma_len(iter->sg) < page_size);
                        vaddr[index++] = encode | iter->dma;

                        start += page_size;
                        iter->dma += page_size;
                        rem -= page_size;
                        if (iter->dma >= iter->max) {
                                iter->sg = __sg_next(iter->sg);
                                if (!iter->sg)
                                        break;

                                rem = sg_dma_len(iter->sg);
                                if (!rem)
                                        break;

                                iter->dma = sg_dma_address(iter->sg);
                                iter->max = iter->dma + rem;

                                if (maybe_64K != -1 && index < I915_PDES &&
                                    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
                                      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
                                       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
                                        maybe_64K = -1;

                                if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
                                        break;
                        }
                } while (rem >= page_size && index < I915_PDES);

                drm_clflush_virt_range(vaddr, PAGE_SIZE);

                /*
                 * Is it safe to mark the 2M block as 64K? -- Either we have
                 * filled whole page-table with 64K entries, or filled part of
                 * it and have reached the end of the sg table and we have
                 * enough padding.
                 */
                if (maybe_64K != -1 &&
                    (index == I915_PDES ||
                     (i915_vm_has_scratch_64K(vm) &&
                      !iter->sg && IS_ALIGNED(vma_res->start +
                                              vma_res->node_size,
                                              I915_GTT_PAGE_SIZE_2M)))) {
                        vaddr = px_vaddr(pd);
                        vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
                        drm_clflush_virt_range(vaddr, PAGE_SIZE);
                        page_size = I915_GTT_PAGE_SIZE_64K;

                        /*
                         * We write all 4K page entries, even when using 64K
                         * pages. In order to verify that the HW isn't cheating
                         * by using the 4K PTE instead of the 64K PTE, we want
                         * to remove all the surplus entries. If the HW skipped
                         * the 64K PTE, it will read/write into the scratch page
                         * instead - which we detect as missing results during
                         * selftests.
                         */
                        if (I915_SELFTEST_ONLY(vm->scrub_64K)) {
                                u16 i;

                                encode = vm->scratch[0]->encode;
                                vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K));

                                for (i = 1; i < index; i += 16)
                                        memset64(vaddr + i, encode, 15);

                                drm_clflush_virt_range(vaddr, PAGE_SIZE);
                        }
                }

                vma_res->page_sizes_gtt |= page_size;
        } while (iter->sg && sg_dma_len(iter->sg));
}

static void gen8_ppgtt_insert(struct i915_address_space *vm,
                              struct i915_vma_resource *vma_res,
                              unsigned int pat_index,
                              u32 flags)
{
        struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
        struct sgt_dma iter = sgt_dma(vma_res);

        if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
                if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 55))
                        xehp_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
                else
                        gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
        } else  {
                u64 idx = vma_res->start >> GEN8_PTE_SHIFT;

                do {
                        struct i915_page_directory * const pdp =
                                gen8_pdp_for_page_index(vm, idx);

                        idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
                                                    pat_index, flags);
                } while (idx);

                vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
        }
}

static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
                                    dma_addr_t addr,
                                    u64 offset,
                                    unsigned int pat_index,
                                    u32 flags)
{
        u64 idx = offset >> GEN8_PTE_SHIFT;
        struct i915_page_directory * const pdp =
                gen8_pdp_for_page_index(vm, idx);
        struct i915_page_directory *pd =
                i915_pd_entry(pdp, gen8_pd_index(idx, 2));
        struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
        gen8_pte_t *vaddr;

        GEM_BUG_ON(pt->is_compact);

        vaddr = px_vaddr(pt);
        vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags);
        drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
}

static void xehp_ppgtt_insert_entry_lm(struct i915_address_space *vm,
                                       dma_addr_t addr,
                                       u64 offset,
                                       unsigned int pat_index,
                                       u32 flags)
{
        u64 idx = offset >> GEN8_PTE_SHIFT;
        struct i915_page_directory * const pdp =
                gen8_pdp_for_page_index(vm, idx);
        struct i915_page_directory *pd =
                i915_pd_entry(pdp, gen8_pd_index(idx, 2));
        struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
        gen8_pte_t *vaddr;

        GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
        GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));

        /* XXX: we don't strictly need to use this layout */

        if (!pt->is_compact) {
                vaddr = px_vaddr(pd);
                vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
                pt->is_compact = true;
        }

        vaddr = px_vaddr(pt);
        vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags);
}

static void xehp_ppgtt_insert_entry(struct i915_address_space *vm,
                                    dma_addr_t addr,
                                    u64 offset,
                                    unsigned int pat_index,
                                    u32 flags)
{
        if (flags & PTE_LM)
                return xehp_ppgtt_insert_entry_lm(vm, addr, offset,
                                                  pat_index, flags);

        return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags);
}

static int gen8_init_scratch(struct i915_address_space *vm)
{
        u32 pte_flags;
        int ret;
        int i;

        /*
         * If everybody agrees to not to write into the scratch page,
         * we can reuse it for all vm, keeping contexts and processes separate.
         */
        if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
                struct i915_address_space *clone = vm->gt->vm;

                GEM_BUG_ON(!clone->has_read_only);

                vm->scratch_order = clone->scratch_order;
                for (i = 0; i <= vm->top; i++)
                        vm->scratch[i] = i915_gem_object_get(clone->scratch[i]);

                return 0;
        }

        ret = setup_scratch_page(vm);
        if (ret)
                return ret;

        pte_flags = vm->has_read_only;
        if (i915_gem_object_is_lmem(vm->scratch[0]))
                pte_flags |= PTE_LM;

        vm->scratch[0]->encode =
                vm->pte_encode(px_dma(vm->scratch[0]),
                               i915_gem_get_pat_index(vm->i915,
                                                      I915_CACHE_NONE),
                               pte_flags);

        for (i = 1; i <= vm->top; i++) {
                struct drm_i915_gem_object *obj;

                obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
                if (IS_ERR(obj)) {
                        ret = PTR_ERR(obj);
                        goto free_scratch;
                }

                ret = map_pt_dma(vm, obj);
                if (ret) {
                        i915_gem_object_put(obj);
                        goto free_scratch;
                }

                fill_px(obj, vm->scratch[i - 1]->encode);
                obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE);

                vm->scratch[i] = obj;
        }

        return 0;

free_scratch:
        while (i--)
                i915_gem_object_put(vm->scratch[i]);
        vm->scratch[0] = NULL;
        return ret;
}

static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
{
        struct i915_address_space *vm = &ppgtt->vm;
        struct i915_page_directory *pd = ppgtt->pd;
        unsigned int idx;

        GEM_BUG_ON(vm->top != 2);
        GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);

        for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
                struct i915_page_directory *pde;
                int err;

                pde = alloc_pd(vm);
                if (IS_ERR(pde))
                        return PTR_ERR(pde);

                err = map_pt_dma(vm, pde->pt.base);
                if (err) {
                        free_pd(vm, pde);
                        return err;
                }

                fill_px(pde, vm->scratch[1]->encode);
                set_pd_entry(pd, idx, pde);
                atomic_inc(px_used(pde)); /* keep pinned */
        }
        wmb();

        return 0;
}

static struct i915_page_directory *
gen8_alloc_top_pd(struct i915_address_space *vm)
{
        const unsigned int count = gen8_pd_top_count(vm);
        struct i915_page_directory *pd;
        int err;

        GEM_BUG_ON(count > I915_PDES);

        pd = __alloc_pd(count);
        if (unlikely(!pd))
                return ERR_PTR(-ENOMEM);

        pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
        if (IS_ERR(pd->pt.base)) {
                err = PTR_ERR(pd->pt.base);
                pd->pt.base = NULL;
                goto err_pd;
        }

        err = map_pt_dma(vm, pd->pt.base);
        if (err)
                goto err_pd;

        fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
        atomic_inc(px_used(pd)); /* mark as pinned */
        return pd;

err_pd:
        free_pd(vm, pd);
        return ERR_PTR(err);
}

static int gen8_init_rsvd(struct i915_address_space *vm)
{
        struct drm_i915_private *i915 = vm->i915;
        struct drm_i915_gem_object *obj;
        struct i915_vma *vma;
        int ret;

        if (!intel_gt_needs_wa_16018031267(vm->gt))
                return 0;

        /* The memory will be used only by GPU. */
        obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
                                          I915_BO_ALLOC_VOLATILE |
                                          I915_BO_ALLOC_GPU_ONLY);
        if (IS_ERR(obj))
                obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
        if (IS_ERR(obj))
                return PTR_ERR(obj);

        vma = i915_vma_instance(obj, vm, NULL);
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
                goto unref;
        }

        ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
        if (ret)
                goto unref;

        vm->rsvd.vma = i915_vma_make_unshrinkable(vma);
        vm->rsvd.obj = obj;
        vm->total -= vma->node.size;
        return 0;
unref:
        i915_gem_object_put(obj);
        return ret;
}

/*
 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
 * with a net effect resembling a 2-level page table in normal x86 terms. Each
 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
 * space.
 *
 */
struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
                                     unsigned long lmem_pt_obj_flags)
{
        struct i915_page_directory *pd;
        struct i915_ppgtt *ppgtt;
        int err;

        ppgtt = kzalloc_obj(*ppgtt);
        if (!ppgtt)
                return ERR_PTR(-ENOMEM);

        ppgtt_init(ppgtt, gt, lmem_pt_obj_flags);
        ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
        ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));

        /*
         * From bdw, there is hw support for read-only pages in the PPGTT.
         *
         * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
         * for now.
         *
         * Gen12 has inherited the same read-only fault issue from gen11.
         */
        ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);

        if (HAS_LMEM(gt->i915))
                ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
        else
                ppgtt->vm.alloc_pt_dma = alloc_pt_dma;

        /*
         * Using SMEM here instead of LMEM has the advantage of not reserving
         * high performance memory for a "never" used filler page. It also
         * removes the device access that would be required to initialise the
         * scratch page, reducing pressure on an even scarcer resource.
         */
        ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;

        if (GRAPHICS_VER(gt->i915) >= 12)
                ppgtt->vm.pte_encode = gen12_pte_encode;
        else
                ppgtt->vm.pte_encode = gen8_pte_encode;

        ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
        ppgtt->vm.insert_entries = gen8_ppgtt_insert;
        if (HAS_64K_PAGES(gt->i915))
                ppgtt->vm.insert_page = xehp_ppgtt_insert_entry;
        else
                ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
        ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
        ppgtt->vm.clear_range = gen8_ppgtt_clear;
        ppgtt->vm.foreach = gen8_ppgtt_foreach;
        ppgtt->vm.cleanup = gen8_ppgtt_cleanup;

        err = gen8_init_scratch(&ppgtt->vm);
        if (err)
                goto err_put;

        pd = gen8_alloc_top_pd(&ppgtt->vm);
        if (IS_ERR(pd)) {
                err = PTR_ERR(pd);
                goto err_put;
        }
        ppgtt->pd = pd;

        if (!i915_vm_is_4lvl(&ppgtt->vm)) {
                err = gen8_preallocate_top_level_pdp(ppgtt);
                if (err)
                        goto err_put;
        }

        if (intel_vgpu_active(gt->i915))
                gen8_ppgtt_notify_vgt(ppgtt, true);

        err = gen8_init_rsvd(&ppgtt->vm);
        if (err)
                goto err_put;

        return ppgtt;

err_put:
        i915_vm_put(&ppgtt->vm);
        return ERR_PTR(err);
}