root/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies Ltd */

#include <linux/mlx5/driver.h>
#include <linux/mlx5/device.h>
#include "mlx5_core.h"
#include "dev.h"
#include "sf/vhca_event.h"
#include "sf/sf.h"
#include "sf/mlx5_ifc_vhca_event.h"
#include "ecpf.h"
#define CREATE_TRACE_POINTS
#include "diag/dev_tracepoint.h"

struct mlx5_sf_dev_table {
        struct xarray devices;
        phys_addr_t base_address;
        u64 sf_bar_length;
        struct workqueue_struct *active_wq;
        struct work_struct work;
        u8 stop_active_wq:1;
        struct mlx5_core_dev *dev;
};

struct mlx5_sf_dev_active_work_ctx {
        struct work_struct work;
        struct mlx5_vhca_state_event event;
        struct mlx5_sf_dev_table *table;
        int sf_index;
};

static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev)
{
        return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev);
}

bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
{
        struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;

        return table && !xa_empty(&table->devices);
}

static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev);
        struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);

        return sysfs_emit(buf, "%u\n", sf_dev->sfnum);
}
static DEVICE_ATTR_RO(sfnum);

static struct attribute *sf_device_attrs[] = {
        &dev_attr_sfnum.attr,
        NULL,
};

static const struct attribute_group sf_attr_group = {
        .attrs = sf_device_attrs,
};

static const struct attribute_group *sf_attr_groups[2] = {
        &sf_attr_group,
        NULL
};

static void mlx5_sf_dev_release(struct device *device)
{
        struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev);
        struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);

        mlx5_adev_idx_free(adev->id);
        kfree(sf_dev);
}

static void mlx5_sf_dev_remove_aux(struct mlx5_core_dev *dev,
                                   struct mlx5_sf_dev *sf_dev)
{
        int id;

        id = sf_dev->adev.id;
        trace_mlx5_sf_dev_del(dev, sf_dev, id);

        auxiliary_device_delete(&sf_dev->adev);
        auxiliary_device_uninit(&sf_dev->adev);
}

static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id, u32 sfnum)
{
        struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
        struct mlx5_sf_dev *sf_dev;
        struct pci_dev *pdev;
        int err;
        int id;

        id = mlx5_adev_idx_alloc();
        if (id < 0) {
                err = id;
                goto add_err;
        }

        sf_dev = kzalloc_obj(*sf_dev);
        if (!sf_dev) {
                mlx5_adev_idx_free(id);
                err = -ENOMEM;
                goto add_err;
        }
        pdev = dev->pdev;
        sf_dev->adev.id = id;
        sf_dev->adev.name = MLX5_SF_DEV_ID_NAME;
        sf_dev->adev.dev.release = mlx5_sf_dev_release;
        sf_dev->adev.dev.parent = &pdev->dev;
        sf_dev->adev.dev.groups = sf_attr_groups;
        sf_dev->sfnum = sfnum;
        sf_dev->parent_mdev = dev;
        sf_dev->fn_id = fn_id;

        sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length);

        trace_mlx5_sf_dev_add(dev, sf_dev, id);

        err = auxiliary_device_init(&sf_dev->adev);
        if (err) {
                mlx5_adev_idx_free(id);
                kfree(sf_dev);
                goto add_err;
        }

        err = auxiliary_device_add(&sf_dev->adev);
        if (err) {
                auxiliary_device_uninit(&sf_dev->adev);
                goto add_err;
        }

        err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL);
        if (err)
                goto xa_err;
        return;

xa_err:
        mlx5_sf_dev_remove_aux(dev, sf_dev);
add_err:
        mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n",
                      sf_index, sfnum, err);
}

static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev, u16 sf_index)
{
        struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;

        xa_erase(&table->devices, sf_index);
        mlx5_sf_dev_remove_aux(dev, sf_dev);
}

static int
mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_code, void *data)
{
        struct mlx5_core_dev *dev = container_of(nb, struct mlx5_core_dev,
                                                 priv.sf_dev_nb);
        struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
        const struct mlx5_vhca_state_event *event = data;
        struct mlx5_sf_dev *sf_dev;
        u16 max_functions;
        u16 sf_index;
        u16 base_id;

        if (!table)
                return 0;

        max_functions = mlx5_sf_max_functions(dev);
        if (!max_functions)
                return 0;

        base_id = mlx5_sf_start_function_id(dev);
        if (event->function_id < base_id || event->function_id >= (base_id + max_functions))
                return 0;

        sf_index = event->function_id - base_id;
        sf_dev = xa_load(&table->devices, sf_index);
        switch (event->new_vhca_state) {
        case MLX5_VHCA_STATE_INVALID:
        case MLX5_VHCA_STATE_ALLOCATED:
                if (sf_dev)
                        mlx5_sf_dev_del(dev, sf_dev, sf_index);
                break;
        case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
                if (sf_dev)
                        mlx5_sf_dev_del(dev, sf_dev, sf_index);
                else
                        mlx5_core_err(dev,
                                      "SF DEV: teardown state for invalid dev index=%d sfnum=0x%x\n",
                                      sf_index, event->sw_function_id);
                break;
        case MLX5_VHCA_STATE_ACTIVE:
                if (!sf_dev)
                        mlx5_sf_dev_add(dev, sf_index, event->function_id,
                                        event->sw_function_id);
                break;
        default:
                break;
        }
        return 0;
}

static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table)
{
        struct mlx5_core_dev *dev = table->dev;
        u16 max_functions;
        u16 function_id;
        int err = 0;
        int i;

        max_functions = mlx5_sf_max_functions(dev);
        function_id = mlx5_sf_start_function_id(dev);
        /* Arm the vhca context as the vhca event notifier */
        for (i = 0; i < max_functions; i++) {
                err = mlx5_vhca_event_arm(dev, function_id);
                if (err)
                        return err;

                function_id++;
        }
        return 0;
}

static void mlx5_sf_dev_add_active_work(struct work_struct *_work)
{
        struct mlx5_sf_dev_active_work_ctx *work_ctx;

        work_ctx = container_of(_work, struct mlx5_sf_dev_active_work_ctx, work);
        if (work_ctx->table->stop_active_wq)
                goto out;
        /* Don't probe device which is already probe */
        if (!xa_load(&work_ctx->table->devices, work_ctx->sf_index))
                mlx5_sf_dev_add(work_ctx->table->dev, work_ctx->sf_index,
                                work_ctx->event.function_id, work_ctx->event.sw_function_id);
        /* There is a race where SF got inactive after the query
         * above. e.g.: the query returns that the state of the
         * SF is active, and after that the eswitch manager set it to
         * inactive.
         * This case cannot be managed in SW, since the probing of the
         * SF is on one system, and the inactivation is on a different
         * system.
         * If the inactive is done after the SF perform init_hca(),
         * the SF will fully probe and then removed. If it was
         * done before init_hca(), the SF probe will fail.
         */
out:
        kfree(work_ctx);
}

/* In case SFs are generated externally, probe active SFs */
static void mlx5_sf_dev_queue_active_works(struct work_struct *_work)
{
        struct mlx5_sf_dev_table *table = container_of(_work, struct mlx5_sf_dev_table, work);
        u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
        struct mlx5_sf_dev_active_work_ctx *work_ctx;
        struct mlx5_core_dev *dev = table->dev;
        u16 max_functions;
        u16 function_id;
        u16 sw_func_id;
        int err = 0;
        int wq_idx;
        u8 state;
        int i;

        max_functions = mlx5_sf_max_functions(dev);
        function_id = mlx5_sf_start_function_id(dev);
        for (i = 0; i < max_functions; i++, function_id++) {
                if (table->stop_active_wq)
                        return;
                err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out));
                if (err)
                        /* A failure of specific vhca doesn't mean others will
                         * fail as well.
                         */
                        continue;
                state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
                if (state != MLX5_VHCA_STATE_ACTIVE)
                        continue;

                sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id);
                work_ctx = kzalloc_obj(*work_ctx);
                if (!work_ctx)
                        return;

                INIT_WORK(&work_ctx->work, &mlx5_sf_dev_add_active_work);
                work_ctx->event.function_id = function_id;
                work_ctx->event.sw_function_id = sw_func_id;
                work_ctx->table = table;
                work_ctx->sf_index = i;
                wq_idx = work_ctx->event.function_id % MLX5_DEV_MAX_WQS;
                mlx5_vhca_events_work_enqueue(dev, wq_idx, &work_ctx->work);
        }
}

/* In case SFs are generated externally, probe active SFs */
static int mlx5_sf_dev_create_active_works(struct mlx5_sf_dev_table *table)
{
        if (MLX5_CAP_GEN(table->dev, eswitch_manager))
                return 0; /* the table is local */

        /* Use a workqueue to probe active SFs, which are in large
         * quantity and may take up to minutes to probe.
         */
        table->active_wq = create_singlethread_workqueue("mlx5_active_sf");
        if (!table->active_wq)
                return -ENOMEM;
        INIT_WORK(&table->work, &mlx5_sf_dev_queue_active_works);
        queue_work(table->active_wq, &table->work);
        return 0;
}

static void mlx5_sf_dev_destroy_active_works(struct mlx5_sf_dev_table *table)
{
        if (table->active_wq) {
                table->stop_active_wq = true;
                destroy_workqueue(table->active_wq);
        }
}

int mlx5_sf_dev_notifier_init(struct mlx5_core_dev *dev)
{
        if (mlx5_core_is_sf(dev))
                return 0;

        dev->priv.sf_dev_nb.notifier_call = mlx5_sf_dev_state_change_handler;
        return mlx5_vhca_event_notifier_register(dev, &dev->priv.sf_dev_nb);
}

void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
{
        struct mlx5_sf_dev_table *table;
        int err;

        if (!mlx5_sf_dev_supported(dev))
                return;

        table = kzalloc_obj(*table);
        if (!table) {
                err = -ENOMEM;
                goto table_err;
        }

        table->dev = dev;
        table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12);
        table->base_address = pci_resource_start(dev->pdev, 2);
        xa_init(&table->devices);
        dev->priv.sf_dev_table = table;

        err = mlx5_sf_dev_create_active_works(table);
        if (err)
                goto add_active_err;

        err = mlx5_sf_dev_vhca_arm_all(table);
        if (err)
                goto arm_err;
        return;

arm_err:
        mlx5_sf_dev_destroy_active_works(table);
        mlx5_vhca_event_work_queues_flush(dev);
add_active_err:
        kfree(table);
        dev->priv.sf_dev_table = NULL;
table_err:
        mlx5_core_err(dev, "SF DEV table create err = %d\n", err);
}

static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table)
{
        struct mlx5_sf_dev *sf_dev;
        unsigned long index;

        xa_for_each(&table->devices, index, sf_dev) {
                xa_erase(&table->devices, index);
                mlx5_sf_dev_remove_aux(table->dev, sf_dev);
        }
}

void mlx5_sf_dev_notifier_cleanup(struct mlx5_core_dev *dev)
{
        if (mlx5_core_is_sf(dev))
                return;

        mlx5_vhca_event_notifier_unregister(dev, &dev->priv.sf_dev_nb);
}

void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
{
        struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;

        if (!table)
                return;

        mlx5_sf_dev_destroy_active_works(table);

        /* Now that event handler is not running, it is safe to destroy
         * the sf device without race.
         */
        mlx5_sf_dev_destroy_all(table);

        WARN_ON(!xa_empty(&table->devices));
        kfree(table);
        dev->priv.sf_dev_table = NULL;
}