root/sys/dev/pci/drm/i915/i915_vma_resource.c
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2021 Intel Corporation
 */

#include <linux/interval_tree_generic.h>
#include <linux/sched/mm.h>

#include "i915_sw_fence.h"
#include "i915_vma_resource.h"
#include "i915_drv.h"
#include "intel_memory_region.h"

#include "gt/intel_gtt.h"

static struct pool slab_vma_resources;

/**
 * DOC:
 * We use a per-vm interval tree to keep track of vma_resources
 * scheduled for unbind but not yet unbound. The tree is protected by
 * the vm mutex, and nodes are removed just after the unbind fence signals.
 * The removal takes the vm mutex from a kernel thread which we need to
 * keep in mind so that we don't grab the mutex and try to wait for all
 * pending unbinds to complete, because that will temporaryily block many
 * of the workqueue threads, and people will get angry.
 *
 * We should consider using a single ordered fence per VM instead but that
 * requires ordering the unbinds and might introduce unnecessary waiting
 * for unrelated unbinds. Amount of code will probably be roughly the same
 * due to the simplicity of using the interval tree interface.
 *
 * Another drawback of this interval tree is that the complexity of insertion
 * and removal of fences increases as O(ln(pending_unbinds)) instead of
 * O(1) for a single fence without interval tree.
 */
#define VMA_RES_START(_node) ((_node)->start - (_node)->guard)
#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1)
#ifdef __linux__
INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
                     u64, __subtree_last,
                     VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
#else
static struct i915_vma_resource *
vma_res_itree_iter_first(struct rb_root_cached *root, uint64_t start,
    uint64_t last)
{
        struct i915_vma_resource *node;
        struct rb_node *rb;

        for (rb = rb_first_cached(root); rb; rb = rb_next(rb)) {
                node = rb_entry(rb, typeof(*node), rb);
                if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last)
                        return node;
        }
        return NULL;
}

static struct i915_vma_resource *
vma_res_itree_iter_next(struct i915_vma_resource *node, uint64_t start,
    uint64_t last)
{
        struct rb_node *rb = &node->rb;

        for (rb = rb_next(rb); rb; rb = rb_next(rb)) {
                node = rb_entry(rb, typeof(*node), rb);
                if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last)
                        return node;
        }
        return NULL;
}

static void
vma_res_itree_remove(struct i915_vma_resource *node,
    struct rb_root_cached *root)
{
        rb_erase_cached(&node->rb, root);
}

static void
vma_res_itree_insert(struct i915_vma_resource *node,
    struct rb_root_cached *root)
{
        struct rb_node **iter = &root->rb_root.rb_node;
        struct rb_node *parent = NULL;
        struct i915_vma_resource *iter_node;

        while (*iter) {
                parent = *iter;
                iter_node = rb_entry(*iter, struct i915_vma_resource, rb);

                if (node->start < iter_node->start)
                        iter = &(*iter)->rb_left;
                else
                        iter = &(*iter)->rb_right;
        }

        rb_link_node(&node->rb, parent, iter);
        rb_insert_color_cached(&node->rb, root, false);
}
#endif

/* Callbacks for the unbind dma-fence. */

/**
 * i915_vma_resource_alloc - Allocate a vma resource
 *
 * Return: A pointer to a cleared struct i915_vma_resource or
 * a -ENOMEM error pointer if allocation fails.
 */
struct i915_vma_resource *i915_vma_resource_alloc(void)
{
#ifdef __linux__
        struct i915_vma_resource *vma_res =
                kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL);
#else
        struct i915_vma_resource *vma_res =
                pool_get(&slab_vma_resources, PR_WAITOK | PR_ZERO);
#endif

        return vma_res ? vma_res : ERR_PTR(-ENOMEM);
}

/**
 * i915_vma_resource_free - Free a vma resource
 * @vma_res: The vma resource to free.
 */
void i915_vma_resource_free(struct i915_vma_resource *vma_res)
{
#ifdef __linux__
        if (vma_res)
                kmem_cache_free(slab_vma_resources, vma_res);
#else
        if (vma_res)
                pool_put(&slab_vma_resources, vma_res);
#endif
}

static const char *get_driver_name(struct dma_fence *fence)
{
        return "vma unbind fence";
}

static const char *get_timeline_name(struct dma_fence *fence)
{
        return "unbound";
}

static void unbind_fence_free_rcu(struct rcu_head *head)
{
        struct i915_vma_resource *vma_res =
                container_of(head, typeof(*vma_res), unbind_fence.rcu);

        i915_vma_resource_free(vma_res);
}

static void unbind_fence_release(struct dma_fence *fence)
{
        struct i915_vma_resource *vma_res =
                container_of(fence, typeof(*vma_res), unbind_fence);

        i915_sw_fence_fini(&vma_res->chain);

        call_rcu(&fence->rcu, unbind_fence_free_rcu);
}

static const struct dma_fence_ops unbind_fence_ops = {
        .get_driver_name = get_driver_name,
        .get_timeline_name = get_timeline_name,
        .release = unbind_fence_release,
};

static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
{
        struct i915_address_space *vm;

        if (!refcount_dec_and_test(&vma_res->hold_count))
                return;

        dma_fence_signal(&vma_res->unbind_fence);

        vm = vma_res->vm;
        if (vma_res->wakeref)
                intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref);

        vma_res->vm = NULL;
        if (!RB_EMPTY_NODE(&vma_res->rb)) {
                mutex_lock(&vm->mutex);
                vma_res_itree_remove(vma_res, &vm->pending_unbind);
                mutex_unlock(&vm->mutex);
        }

        if (vma_res->bi.pages_rsgt)
                i915_refct_sgt_put(vma_res->bi.pages_rsgt);
}

/**
 * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind
 * fence.
 * @vma_res: The vma resource.
 * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold.
 *
 * The function may leave a dma_fence critical section.
 */
void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
                              bool lockdep_cookie)
{
        dma_fence_end_signalling(lockdep_cookie);

        if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
                unsigned long irq_flags;

                /* Inefficient open-coded might_lock_irqsave() */
                spin_lock_irqsave(&vma_res->lock, irq_flags);
                spin_unlock_irqrestore(&vma_res->lock, irq_flags);
        }

        __i915_vma_resource_unhold(vma_res);
}

/**
 * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence.
 * @vma_res: The vma resource.
 * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should
 * be given as an argument to the pairing i915_vma_resource_unhold.
 *
 * If returning true, the function enters a dma_fence signalling critical
 * section if not in one already.
 *
 * Return: true if holding successful, false if not.
 */
bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
                            bool *lockdep_cookie)
{
        bool held = refcount_inc_not_zero(&vma_res->hold_count);

        if (held)
                *lockdep_cookie = dma_fence_begin_signalling();

        return held;
}

static void i915_vma_resource_unbind_work(struct work_struct *work)
{
        struct i915_vma_resource *vma_res =
                container_of(work, typeof(*vma_res), work);
        struct i915_address_space *vm = vma_res->vm;
        bool lockdep_cookie;

        lockdep_cookie = dma_fence_begin_signalling();
        if (likely(!vma_res->skip_pte_rewrite))
                vma_res->ops->unbind_vma(vm, vma_res);

        dma_fence_end_signalling(lockdep_cookie);
        __i915_vma_resource_unhold(vma_res);
        i915_vma_resource_put(vma_res);
}

static int
i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
                               enum i915_sw_fence_notify state)
{
        struct i915_vma_resource *vma_res =
                container_of(fence, typeof(*vma_res), chain);
        struct dma_fence *unbind_fence =
                &vma_res->unbind_fence;

        switch (state) {
        case FENCE_COMPLETE:
                dma_fence_get(unbind_fence);
                if (vma_res->immediate_unbind) {
                        i915_vma_resource_unbind_work(&vma_res->work);
                } else {
                        INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
                        queue_work(system_unbound_wq, &vma_res->work);
                }
                break;
        case FENCE_FREE:
                i915_vma_resource_put(vma_res);
                break;
        }

        return NOTIFY_DONE;
}

/**
 * i915_vma_resource_unbind - Unbind a vma resource
 * @vma_res: The vma resource to unbind.
 * @tlb: pointer to vma->obj->mm.tlb associated with the resource
 *       to be stored at vma_res->tlb. When not-NULL, it will be used
 *       to do TLB cache invalidation before freeing a VMA resource.
 *       Used only for async unbind.
 *
 * At this point this function does little more than publish a fence that
 * signals immediately unless signaling is held back.
 *
 * Return: A refcounted pointer to a dma-fence that signals when unbinding is
 * complete.
 */
struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
                                           u32 *tlb)
{
        struct i915_address_space *vm = vma_res->vm;

        vma_res->tlb = tlb;

        /* Reference for the sw fence */
        i915_vma_resource_get(vma_res);

        /* Caller must already have a wakeref in this case. */
        if (vma_res->needs_wakeref)
                vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm);

        if (atomic_read(&vma_res->chain.pending) <= 1) {
                RB_CLEAR_NODE(&vma_res->rb);
                vma_res->immediate_unbind = 1;
        } else {
                vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind);
        }

        i915_sw_fence_commit(&vma_res->chain);

        return &vma_res->unbind_fence;
}

/**
 * __i915_vma_resource_init - Initialize a vma resource.
 * @vma_res: The vma resource to initialize
 *
 * Initializes the private members of a vma resource.
 */
void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
{
        mtx_init(&vma_res->lock, IPL_TTY);
        dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
                       &vma_res->lock, 0, 0);
        refcount_set(&vma_res->hold_count, 1);
        i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
}

static void
i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
                                     u64 *start,
                                     u64 *end)
{
        if (i915_vm_has_cache_coloring(vm)) {
                if (*start)
                        *start -= I915_GTT_PAGE_SIZE;
                *end += I915_GTT_PAGE_SIZE;
        }
}

/**
 * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
 * certain vm range.
 * @vm: The vm to look at.
 * @offset: The range start.
 * @size: The range size.
 * @intr: Whether to wait interrubtible.
 *
 * The function needs to be called with the vm lock held.
 *
 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
 */
int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
                                    u64 offset,
                                    u64 size,
                                    bool intr)
{
        struct i915_vma_resource *node;
        u64 last = offset + size - 1;

        lockdep_assert_held(&vm->mutex);
        might_sleep();

        i915_vma_resource_color_adjust_range(vm, &offset, &last);
        node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
        while (node) {
                int ret = dma_fence_wait(&node->unbind_fence, intr);

                if (ret)
                        return ret;

                node = vma_res_itree_iter_next(node, offset, last);
        }

        return 0;
}

/**
 * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
 * releasing the vm lock while waiting.
 * @vm: The vm to look at.
 *
 * The function may not be called with the vm lock held.
 * Typically this is called at vm destruction to finish any pending
 * unbind operations. The vm mutex is released while waiting to avoid
 * stalling kernel workqueues trying to grab the mutex.
 */
void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
{
        struct i915_vma_resource *node;
        struct dma_fence *fence;

        do {
                fence = NULL;
                mutex_lock(&vm->mutex);
                node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
                                                U64_MAX);
                if (node)
                        fence = dma_fence_get_rcu(&node->unbind_fence);
                mutex_unlock(&vm->mutex);

                if (fence) {
                        /*
                         * The wait makes sure the node eventually removes
                         * itself from the tree.
                         */
                        dma_fence_wait(fence, false);
                        dma_fence_put(fence);
                }
        } while (node);
}

/**
 * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
 * pending unbinds in a certain range of a vm.
 * @vm: The vm to look at.
 * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
 * @offset: The range start.
 * @size: The range size.
 * @intr: Whether to wait interrubtible.
 * @gfp: Allocation mode for memory allocations.
 *
 * The function makes @sw_fence await all pending unbinds in a certain
 * vm range before calling the complete notifier. To be able to await
 * each individual unbind, the function needs to allocate memory using
 * the @gpf allocation mode. If that fails, the function will instead
 * wait for the unbind fence to signal, using @intr to judge whether to
 * wait interruptible or not. Note that @gfp should ideally be selected so
 * as to avoid any expensive memory allocation stalls and rather fail and
 * synchronize itself. For now the vm mutex is required when calling this
 * function with means that @gfp can't call into direct reclaim. In reality
 * this means that during heavy memory pressure, we will sync in this
 * function.
 *
 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
 */
int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
                                     struct i915_sw_fence *sw_fence,
                                     u64 offset,
                                     u64 size,
                                     bool intr,
                                     gfp_t gfp)
{
        struct i915_vma_resource *node;
        u64 last = offset + size - 1;

        lockdep_assert_held(&vm->mutex);
        might_alloc(gfp);
        might_sleep();

        i915_vma_resource_color_adjust_range(vm, &offset, &last);
        node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
        while (node) {
                int ret;

                ret = i915_sw_fence_await_dma_fence(sw_fence,
                                                    &node->unbind_fence,
                                                    0, gfp);
                if (ret < 0) {
                        ret = dma_fence_wait(&node->unbind_fence, intr);
                        if (ret)
                                return ret;
                }

                node = vma_res_itree_iter_next(node, offset, last);
        }

        return 0;
}

void i915_vma_resource_module_exit(void)
{
#ifdef __linux__
        kmem_cache_destroy(slab_vma_resources);
#else
        pool_destroy(&slab_vma_resources);
#endif
}

int __init i915_vma_resource_module_init(void)
{
#ifdef __linux__
        slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
        if (!slab_vma_resources)
                return -ENOMEM;
#else
        pool_init(&slab_vma_resources, sizeof(struct i915_vma_resource),
            0, IPL_NONE, 0, "svmar", NULL);
#endif

        return 0;
}