root/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_icm_pool.c
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2019 Mellanox Technologies. */

#include "dr_types.h"

#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64
#define DR_ICM_POOL_STE_HOT_MEM_PERCENT 25
#define DR_ICM_POOL_MODIFY_HDR_PTRN_HOT_MEM_PERCENT 50
#define DR_ICM_POOL_MODIFY_ACTION_HOT_MEM_PERCENT 90

struct mlx5dr_icm_hot_chunk {
        struct mlx5dr_icm_buddy_mem *buddy_mem;
        unsigned int seg;
        enum mlx5dr_icm_chunk_size size;
};

struct mlx5dr_icm_pool {
        enum mlx5dr_icm_type icm_type;
        enum mlx5dr_icm_chunk_size max_log_chunk_sz;
        struct mlx5dr_domain *dmn;
        struct kmem_cache *chunks_kmem_cache;

        /* memory management */
        struct mutex mutex; /* protect the ICM pool and ICM buddy */
        struct list_head buddy_mem_list;

        /* Hardware may be accessing this memory but at some future,
         * undetermined time, it might cease to do so.
         * sync_ste command sets them free.
         */
        struct mlx5dr_icm_hot_chunk *hot_chunks_arr;
        u32 hot_chunks_num;
        u64 hot_memory_size;
        /* hot memory size threshold for triggering sync */
        u64 th;
};

struct mlx5dr_icm_dm {
        u32 obj_id;
        enum mlx5_sw_icm_type type;
        phys_addr_t addr;
        size_t length;
};

struct mlx5dr_icm_mr {
        u32 mkey;
        struct mlx5dr_icm_dm dm;
        struct mlx5dr_domain *dmn;
        size_t length;
        u64 icm_start_addr;
};

static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
                                 u32 pd, u64 length, u64 start_addr, int mode,
                                 u32 *mkey)
{
        u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
        void *mkc;

        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);

        MLX5_SET(mkc, mkc, access_mode_1_0, mode);
        MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
        MLX5_SET(mkc, mkc, lw, 1);
        MLX5_SET(mkc, mkc, lr, 1);
        if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) {
                MLX5_SET(mkc, mkc, rw, 1);
                MLX5_SET(mkc, mkc, rr, 1);
        }

        MLX5_SET64(mkc, mkc, len, length);
        MLX5_SET(mkc, mkc, pd, pd);
        MLX5_SET(mkc, mkc, qpn, 0xffffff);
        MLX5_SET64(mkc, mkc, start_addr, start_addr);

        return mlx5_core_create_mkey(mdev, mkey, in, inlen);
}

u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk)
{
        u32 offset = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type);

        return (u64)offset * chunk->seg;
}

u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk)
{
        return chunk->buddy_mem->icm_mr->mkey;
}

u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk)
{
        u32 size = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type);

        return (u64)chunk->buddy_mem->icm_mr->icm_start_addr + size * chunk->seg;
}

u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk)
{
        return mlx5dr_icm_pool_chunk_size_to_byte(chunk->size,
                        chunk->buddy_mem->pool->icm_type);
}

u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk)
{
        return mlx5dr_icm_pool_chunk_size_to_entries(chunk->size);
}

static struct mlx5dr_icm_mr *
dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
{
        struct mlx5_core_dev *mdev = pool->dmn->mdev;
        enum mlx5_sw_icm_type dm_type = 0;
        struct mlx5dr_icm_mr *icm_mr;
        size_t log_align_base = 0;
        int err;

        icm_mr = kvzalloc_obj(*icm_mr);
        if (!icm_mr)
                return NULL;

        icm_mr->dmn = pool->dmn;

        icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
                                                               pool->icm_type);

        switch (pool->icm_type) {
        case DR_ICM_TYPE_STE:
                dm_type = MLX5_SW_ICM_TYPE_STEERING;
                log_align_base = ilog2(icm_mr->dm.length);
                break;
        case DR_ICM_TYPE_MODIFY_ACTION:
                dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
                /* Align base is 64B */
                log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE);
                break;
        case DR_ICM_TYPE_MODIFY_HDR_PTRN:
                dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN;
                /* Align base is 64B */
                log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE);
                break;
        default:
                WARN_ON(pool->icm_type);
        }

        icm_mr->dm.type = dm_type;

        err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length,
                                   log_align_base, 0, &icm_mr->dm.addr,
                                   &icm_mr->dm.obj_id);
        if (err) {
                mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err);
                goto free_icm_mr;
        }

        /* Register device memory */
        err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn,
                                    icm_mr->dm.length,
                                    icm_mr->dm.addr,
                                    MLX5_MKC_ACCESS_MODE_SW_ICM,
                                    &icm_mr->mkey);
        if (err) {
                mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err);
                goto free_dm;
        }

        icm_mr->icm_start_addr = icm_mr->dm.addr;

        if (icm_mr->icm_start_addr & (BIT(log_align_base) - 1)) {
                mlx5dr_err(pool->dmn, "Failed to get Aligned ICM mem (asked: %zu)\n",
                           log_align_base);
                goto free_mkey;
        }

        return icm_mr;

free_mkey:
        mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
free_dm:
        mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
                               icm_mr->dm.addr, icm_mr->dm.obj_id);
free_icm_mr:
        kvfree(icm_mr);
        return NULL;
}

static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
{
        struct mlx5_core_dev *mdev = icm_mr->dmn->mdev;
        struct mlx5dr_icm_dm *dm = &icm_mr->dm;

        mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
        mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0,
                               dm->addr, dm->obj_id);
        kvfree(icm_mr);
}

static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy)
{
        /* We support only one type of STE size, both for ConnectX-5 and later
         * devices. Once the support for match STE which has a larger tag is
         * added (32B instead of 16B), the STE size for devices later than
         * ConnectX-5 needs to account for that.
         */
        return DR_STE_SIZE_REDUCED;
}

static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset)
{
        int num_of_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk);
        struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
        int ste_size = dr_icm_buddy_get_ste_size(buddy);
        int index = offset / DR_STE_SIZE;

        chunk->ste_arr = &buddy->ste_arr[index];
        chunk->miss_list = &buddy->miss_list[index];
        chunk->hw_ste_arr = buddy->hw_ste_arr + index * ste_size;

        memset(chunk->hw_ste_arr, 0, num_of_entries * ste_size);
        memset(chunk->ste_arr, 0,
               num_of_entries * sizeof(chunk->ste_arr[0]));
}

static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
{
        int num_of_entries =
                mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz);

        buddy->ste_arr = kvzalloc_objs(struct mlx5dr_ste, num_of_entries);
        if (!buddy->ste_arr)
                return -ENOMEM;

        /* Preallocate full STE size on non-ConnectX-5 devices since
         * we need to support both full and reduced with the same cache.
         */
        buddy->hw_ste_arr = kvcalloc(num_of_entries,
                                     dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL);
        if (!buddy->hw_ste_arr)
                goto free_ste_arr;

        buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL);
        if (!buddy->miss_list)
                goto free_hw_ste_arr;

        return 0;

free_hw_ste_arr:
        kvfree(buddy->hw_ste_arr);
free_ste_arr:
        kvfree(buddy->ste_arr);
        return -ENOMEM;
}

static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
{
        kvfree(buddy->ste_arr);
        kvfree(buddy->hw_ste_arr);
        kvfree(buddy->miss_list);
}

static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
{
        struct mlx5dr_icm_buddy_mem *buddy;
        struct mlx5dr_icm_mr *icm_mr;

        icm_mr = dr_icm_pool_mr_create(pool);
        if (!icm_mr)
                return -ENOMEM;

        buddy = kvzalloc_obj(*buddy);
        if (!buddy)
                goto free_mr;

        if (mlx5dr_buddy_init(buddy, pool->max_log_chunk_sz))
                goto err_free_buddy;

        buddy->icm_mr = icm_mr;
        buddy->pool = pool;

        if (pool->icm_type == DR_ICM_TYPE_STE) {
                /* Reduce allocations by preallocating and reusing the STE structures */
                if (dr_icm_buddy_init_ste_cache(buddy))
                        goto err_cleanup_buddy;
        }

        /* add it to the -start- of the list in order to search in it first */
        list_add(&buddy->list_node, &pool->buddy_mem_list);

        pool->dmn->num_buddies[pool->icm_type]++;

        return 0;

err_cleanup_buddy:
        mlx5dr_buddy_cleanup(buddy);
err_free_buddy:
        kvfree(buddy);
free_mr:
        dr_icm_pool_mr_destroy(icm_mr);
        return -ENOMEM;
}

static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy)
{
        enum mlx5dr_icm_type icm_type = buddy->pool->icm_type;

        dr_icm_pool_mr_destroy(buddy->icm_mr);

        mlx5dr_buddy_cleanup(buddy);

        if (icm_type == DR_ICM_TYPE_STE)
                dr_icm_buddy_cleanup_ste_cache(buddy);

        buddy->pool->dmn->num_buddies[icm_type]--;

        kvfree(buddy);
}

static void
dr_icm_chunk_init(struct mlx5dr_icm_chunk *chunk,
                  struct mlx5dr_icm_pool *pool,
                  enum mlx5dr_icm_chunk_size chunk_size,
                  struct mlx5dr_icm_buddy_mem *buddy_mem_pool,
                  unsigned int seg)
{
        int offset;

        chunk->seg = seg;
        chunk->size = chunk_size;
        chunk->buddy_mem = buddy_mem_pool;

        if (pool->icm_type == DR_ICM_TYPE_STE) {
                offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg;
                dr_icm_chunk_ste_init(chunk, offset);
        }

        buddy_mem_pool->used_memory += mlx5dr_icm_pool_get_chunk_byte_size(chunk);
}

static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool)
{
        return pool->hot_memory_size > pool->th;
}

static void dr_icm_pool_clear_hot_chunks_arr(struct mlx5dr_icm_pool *pool)
{
        struct mlx5dr_icm_hot_chunk *hot_chunk;
        u32 i, num_entries;

        for (i = 0; i < pool->hot_chunks_num; i++) {
                hot_chunk = &pool->hot_chunks_arr[i];
                num_entries = mlx5dr_icm_pool_chunk_size_to_entries(hot_chunk->size);
                mlx5dr_buddy_free_mem(hot_chunk->buddy_mem,
                                      hot_chunk->seg, ilog2(num_entries));
                hot_chunk->buddy_mem->used_memory -=
                        mlx5dr_icm_pool_chunk_size_to_byte(hot_chunk->size,
                                                           pool->icm_type);
        }

        pool->hot_chunks_num = 0;
        pool->hot_memory_size = 0;
}

static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool)
{
        struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy;
        int err;

        err = mlx5dr_cmd_sync_steering(pool->dmn->mdev);
        if (err) {
                mlx5dr_err(pool->dmn, "Failed to sync to HW (err: %d)\n", err);
                return err;
        }

        dr_icm_pool_clear_hot_chunks_arr(pool);

        list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) {
                if (!buddy->used_memory && pool->icm_type == DR_ICM_TYPE_STE)
                        dr_icm_buddy_destroy(buddy);
        }

        return 0;
}

static int dr_icm_handle_buddies_get_mem(struct mlx5dr_icm_pool *pool,
                                         enum mlx5dr_icm_chunk_size chunk_size,
                                         struct mlx5dr_icm_buddy_mem **buddy,
                                         unsigned int *seg)
{
        struct mlx5dr_icm_buddy_mem *buddy_mem_pool;
        bool new_mem = false;
        int err;

alloc_buddy_mem:
        /* find the next free place from the buddy list */
        list_for_each_entry(buddy_mem_pool, &pool->buddy_mem_list, list_node) {
                err = mlx5dr_buddy_alloc_mem(buddy_mem_pool,
                                             chunk_size, seg);
                if (!err)
                        goto found;

                if (WARN_ON(new_mem)) {
                        /* We have new memory pool, first in the list */
                        mlx5dr_err(pool->dmn,
                                   "No memory for order: %d\n",
                                   chunk_size);
                        goto out;
                }
        }

        /* no more available allocators in that pool, create new */
        err = dr_icm_buddy_create(pool);
        if (err) {
                mlx5dr_err(pool->dmn,
                           "Failed creating buddy for order %d\n",
                           chunk_size);
                goto out;
        }

        /* mark we have new memory, first in list */
        new_mem = true;
        goto alloc_buddy_mem;

found:
        *buddy = buddy_mem_pool;
out:
        return err;
}

/* Allocate an ICM chunk, each chunk holds a piece of ICM memory and
 * also memory used for HW STE management for optimizations.
 */
struct mlx5dr_icm_chunk *
mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
                       enum mlx5dr_icm_chunk_size chunk_size)
{
        struct mlx5dr_icm_chunk *chunk = NULL;
        struct mlx5dr_icm_buddy_mem *buddy;
        unsigned int seg;
        int ret;

        if (chunk_size > pool->max_log_chunk_sz)
                return NULL;

        mutex_lock(&pool->mutex);
        /* find mem, get back the relevant buddy pool and seg in that mem */
        ret = dr_icm_handle_buddies_get_mem(pool, chunk_size, &buddy, &seg);
        if (ret)
                goto out;

        chunk = kmem_cache_alloc(pool->chunks_kmem_cache, GFP_KERNEL);
        if (!chunk)
                goto out_err;

        dr_icm_chunk_init(chunk, pool, chunk_size, buddy, seg);

        goto out;

out_err:
        mlx5dr_buddy_free_mem(buddy, seg, chunk_size);
out:
        mutex_unlock(&pool->mutex);
        return chunk;
}

void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk)
{
        struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
        struct mlx5dr_icm_pool *pool = buddy->pool;
        struct mlx5dr_icm_hot_chunk *hot_chunk;
        struct kmem_cache *chunks_cache;

        chunks_cache = pool->chunks_kmem_cache;

        /* move the chunk to the waiting chunks array, AKA "hot" memory */
        mutex_lock(&pool->mutex);

        pool->hot_memory_size += mlx5dr_icm_pool_get_chunk_byte_size(chunk);

        hot_chunk = &pool->hot_chunks_arr[pool->hot_chunks_num++];
        hot_chunk->buddy_mem = chunk->buddy_mem;
        hot_chunk->seg = chunk->seg;
        hot_chunk->size = chunk->size;

        kmem_cache_free(chunks_cache, chunk);

        /* Check if we have chunks that are waiting for sync-ste */
        if (dr_icm_pool_is_sync_required(pool))
                dr_icm_pool_sync_all_buddy_pools(pool);

        mutex_unlock(&pool->mutex);
}

struct mlx5dr_ste_htbl *mlx5dr_icm_pool_alloc_htbl(struct mlx5dr_icm_pool *pool)
{
        return kmem_cache_alloc(pool->dmn->htbls_kmem_cache, GFP_KERNEL);
}

void mlx5dr_icm_pool_free_htbl(struct mlx5dr_icm_pool *pool, struct mlx5dr_ste_htbl *htbl)
{
        kmem_cache_free(pool->dmn->htbls_kmem_cache, htbl);
}

struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
                                               enum mlx5dr_icm_type icm_type)
{
        u32 num_of_chunks, entry_size;
        struct mlx5dr_icm_pool *pool;
        u32 max_hot_size = 0;

        pool = kvzalloc_obj(*pool);
        if (!pool)
                return NULL;

        pool->dmn = dmn;
        pool->icm_type = icm_type;
        pool->chunks_kmem_cache = dmn->chunks_kmem_cache;

        INIT_LIST_HEAD(&pool->buddy_mem_list);
        mutex_init(&pool->mutex);

        switch (icm_type) {
        case DR_ICM_TYPE_STE:
                pool->max_log_chunk_sz = dmn->info.max_log_sw_icm_sz;
                max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
                                                                  pool->icm_type) *
                               DR_ICM_POOL_STE_HOT_MEM_PERCENT / 100;
                break;
        case DR_ICM_TYPE_MODIFY_ACTION:
                pool->max_log_chunk_sz = dmn->info.max_log_action_icm_sz;
                max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
                                                                  pool->icm_type) *
                               DR_ICM_POOL_MODIFY_ACTION_HOT_MEM_PERCENT / 100;
                break;
        case DR_ICM_TYPE_MODIFY_HDR_PTRN:
                pool->max_log_chunk_sz = dmn->info.max_log_modify_hdr_pattern_icm_sz;
                max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
                                                                  pool->icm_type) *
                               DR_ICM_POOL_MODIFY_HDR_PTRN_HOT_MEM_PERCENT / 100;
                break;
        default:
                WARN_ON(icm_type);
        }

        entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type);

        num_of_chunks = DIV_ROUND_UP(max_hot_size, entry_size) + 1;
        pool->th = max_hot_size;

        pool->hot_chunks_arr = kvzalloc_objs(struct mlx5dr_icm_hot_chunk,
                                             num_of_chunks);
        if (!pool->hot_chunks_arr)
                goto free_pool;

        return pool;

free_pool:
        kvfree(pool);
        return NULL;
}

void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool)
{
        struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy;

        dr_icm_pool_clear_hot_chunks_arr(pool);

        list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node)
                dr_icm_buddy_destroy(buddy);

        kvfree(pool->hot_chunks_arr);
        mutex_destroy(&pool->mutex);
        kvfree(pool);
}