drivers/gpu/drm/xe/xe_tile_sriov_vf.c

root/drivers/gpu/drm/xe/xe_tile_sriov_vf.c
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2025 Intel Corporation
 */

#include <drm/drm_managed.h>

#include "regs/xe_gtt_defs.h"

#include "xe_assert.h"
#include "xe_ggtt.h"
#include "xe_sriov.h"
#include "xe_sriov_printk.h"
#include "xe_tile_sriov_vf.h"
#include "xe_wopcm.h"

static int vf_init_ggtt_balloons(struct xe_tile *tile)
{
        struct xe_ggtt *ggtt = tile->mem.ggtt;

        xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));

        tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt);
        if (IS_ERR(tile->sriov.vf.ggtt_balloon[0]))
                return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]);

        tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt);
        if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) {
                xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
                return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]);
        }

        return 0;
}

/**
 * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range.
 * @tile: the &xe_tile struct instance
 *
 * Return: 0 on success or a negative error code on failure.
 */
static int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile)
{
        u64 ggtt_base = tile->sriov.vf.self_config.ggtt_base;
        u64 ggtt_size = tile->sriov.vf.self_config.ggtt_size;
        struct xe_device *xe = tile_to_xe(tile);
        u64 wopcm = xe_wopcm_size(xe);
        u64 start, end;
        int err;

        xe_tile_assert(tile, IS_SRIOV_VF(xe));
        xe_tile_assert(tile, ggtt_size);
        lockdep_assert_held(&tile->mem.ggtt->lock);

        /*
         * VF can only use part of the GGTT as allocated by the PF:
         *
         *      WOPCM                                  GUC_GGTT_TOP
         *      |<------------ Total GGTT size ------------------>|
         *
         *           VF GGTT base -->|<- size ->|
         *
         *      +--------------------+----------+-----------------+
         *      |////////////////////|   block  |\\\\\\\\\\\\\\\\\|
         *      +--------------------+----------+-----------------+
         *
         *      |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->|
         */

        if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP ||
            ggtt_size > GUC_GGTT_TOP - ggtt_base) {
                xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n",
                             tile->id, ggtt_base, ggtt_base + ggtt_size - 1);
                return -ERANGE;
        }

        start = wopcm;
        end = ggtt_base;
        if (end != start) {
                err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0],
                                                         start, end);
                if (err)
                        return err;
        }

        start = ggtt_base + ggtt_size;
        end = GUC_GGTT_TOP;
        if (end != start) {
                err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1],
                                                         start, end);
                if (err) {
                        xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
                        return err;
                }
        }

        return 0;
}

static int vf_balloon_ggtt(struct xe_tile *tile)
{
        struct xe_ggtt *ggtt = tile->mem.ggtt;
        int err;

        mutex_lock(&ggtt->lock);
        err = xe_tile_sriov_vf_balloon_ggtt_locked(tile);
        mutex_unlock(&ggtt->lock);

        return err;
}

/**
 * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes.
 * @tile: the &xe_tile struct instance
 */
void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile)
{
        xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));

        xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]);
        xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
}

static void vf_deballoon_ggtt(struct xe_tile *tile)
{
        mutex_lock(&tile->mem.ggtt->lock);
        xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
        mutex_unlock(&tile->mem.ggtt->lock);
}

static void vf_fini_ggtt_balloons(struct xe_tile *tile)
{
        xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));

        xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]);
        xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
}

static void cleanup_ggtt(struct drm_device *drm, void *arg)
{
        struct xe_tile *tile = arg;

        vf_deballoon_ggtt(tile);
        vf_fini_ggtt_balloons(tile);
}

/**
 * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration.
 * @tile: the &xe_tile
 *
 * This function is for VF use only.
 *
 * Return: 0 on success or a negative error code on failure.
 */
int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile)
{
        struct xe_device *xe = tile_to_xe(tile);
        int err;

        err = vf_init_ggtt_balloons(tile);
        if (err)
                return err;

        err = vf_balloon_ggtt(tile);
        if (err) {
                vf_fini_ggtt_balloons(tile);
                return err;
        }

        return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile);
}

/**
 * DOC: GGTT nodes shifting during VF post-migration recovery
 *
 * The first fixup applied to the VF KMD structures as part of post-migration
 * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved
 * from range previously assigned to this VF, into newly provisioned area.
 * The changes include balloons, which are resized accordingly.
 *
 * The balloon nodes are there to eliminate unavailable ranges from use: one
 * reserves the GGTT area below the range for current VF, and another one
 * reserves area above.
 *
 * Below is a GGTT layout of example VF, with a certain address range assigned to
 * said VF, and inaccessible areas above and below:
 *
 *  0                                                                        4GiB
 *  |<--------------------------- Total GGTT size ----------------------------->|
 *      WOPCM                                                         GUC_TOP
 *      |<-------------- Area mappable by xe_ggtt instance ---------------->|
 *
 *  +---+---------------------------------+----------+----------------------+---+
 *  |\\\|/////////////////////////////////|  VF mem  |//////////////////////|\\\|
 *  +---+---------------------------------+----------+----------------------+---+
 *
 * Hardware enforced access rules before migration:
 *
 *  |<------- inaccessible for VF ------->|<VF owned>|<-- inaccessible for VF ->|
 *
 * GGTT nodes used for tracking allocations:
 *
 *      |<---------- balloon ------------>|<- nodes->|<----- balloon ------>|
 *
 * After the migration, GGTT area assigned to the VF might have shifted, either
 * to lower or to higher address. But we expect the total size and extra areas to
 * be identical, as migration can only happen between matching platforms.
 * Below is an example of GGTT layout of the VF after migration. Content of the
 * GGTT for VF has been moved to a new area, and we receive its address from GuC:
 *
 *  +---+----------------------+----------+---------------------------------+---+
 *  |\\\|//////////////////////|  VF mem  |/////////////////////////////////|\\\|
 *  +---+----------------------+----------+---------------------------------+---+
 *
 * Hardware enforced access rules after migration:
 *
 *  |<- inaccessible for VF -->|<VF owned>|<------- inaccessible for VF ------->|
 *
 * So the VF has a new slice of GGTT assigned, and during migration process, the
 * memory content was copied to that new area. But the &xe_ggtt nodes are still
 * tracking allocations using the old addresses. The nodes within VF owned area
 * have to be shifted, and balloon nodes need to be resized to properly mask out
 * areas not owned by the VF.
 *
 * Fixed &xe_ggtt nodes used for tracking allocations:
 *
 *     |<------ balloon ------>|<- nodes->|<----------- balloon ----------->|
 *
 * Due to use of GPU profiles, we do not expect the old and new GGTT ares to
 * overlap; but our node shifting will fix addresses properly regardless.
 */

/**
 * xe_tile_sriov_vf_fixup_ggtt_nodes_locked - Shift GGTT allocations to match assigned range.
 * @tile: the &xe_tile struct instance
 * @shift: the shift value
 *
 * Since Global GTT is not virtualized, each VF has an assigned range
 * within the global space. This range might have changed during migration,
 * which requires all memory addresses pointing to GGTT to be shifted.
 */
void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift)
{
        struct xe_ggtt *ggtt = tile->mem.ggtt;

        lockdep_assert_held(&ggtt->lock);

        xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
        xe_ggtt_shift_nodes_locked(ggtt, shift);
        xe_tile_sriov_vf_balloon_ggtt_locked(tile);
}

/**
 * xe_tile_sriov_vf_lmem - VF LMEM configuration.
 * @tile: the &xe_tile
 *
 * This function is for VF use only.
 *
 * Return: size of the LMEM assigned to VF.
 */
u64 xe_tile_sriov_vf_lmem(struct xe_tile *tile)
{
        struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;

        xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));

        return config->lmem_size;
}

/**
 * xe_tile_sriov_vf_lmem_store - Store VF LMEM configuration
 * @tile: the &xe_tile
 * @lmem_size: VF LMEM size to store
 *
 * This function is for VF use only.
 */
void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size)
{
        struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;

        xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));

        config->lmem_size = lmem_size;
}

/**
 * xe_tile_sriov_vf_ggtt - VF GGTT configuration.
 * @tile: the &xe_tile
 *
 * This function is for VF use only.
 *
 * Return: size of the GGTT assigned to VF.
 */
u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile)
{
        struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;

        xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));

        return config->ggtt_size;
}

/**
 * xe_tile_sriov_vf_ggtt_store - Store VF GGTT configuration
 * @tile: the &xe_tile
 * @ggtt_size: VF GGTT size to store
 *
 * This function is for VF use only.
 */
void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size)
{
        struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;

        xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));

        config->ggtt_size = ggtt_size;
}

/**
 * xe_tile_sriov_vf_ggtt_base - VF GGTT base configuration.
 * @tile: the &xe_tile
 *
 * This function is for VF use only.
 *
 * Return: base of the GGTT assigned to VF.
 */
u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile)
{
        struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;

        xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));

        return config->ggtt_base;
}

/**
 * xe_tile_sriov_vf_ggtt_base_store - Store VF GGTT base configuration
 * @tile: the &xe_tile
 * @ggtt_base: VF GGTT base to store
 *
 * This function is for VF use only.
 */
void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_base)
{
        struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;

        xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));

        config->ggtt_base = ggtt_base;
}
Linux