root/drivers/hwtracing/ptt/hisi_ptt.c
// SPDX-License-Identifier: GPL-2.0
/*
 * Driver for HiSilicon PCIe tune and trace device
 *
 * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
 * Author: Yicong Yang <yangyicong@hisilicon.com>
 */

#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/cpuhotplug.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iommu.h>
#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/sysfs.h>
#include <linux/vmalloc.h>

#include "hisi_ptt.h"

/* Dynamic CPU hotplug state used by PTT */
static enum cpuhp_state hisi_ptt_pmu_online;

static bool hisi_ptt_wait_tuning_finish(struct hisi_ptt *hisi_ptt)
{
        u32 val;

        return !readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TUNING_INT_STAT,
                                   val, !(val & HISI_PTT_TUNING_INT_STAT_MASK),
                                   HISI_PTT_WAIT_POLL_INTERVAL_US,
                                   HISI_PTT_WAIT_TUNE_TIMEOUT_US);
}

static ssize_t hisi_ptt_tune_attr_show(struct device *dev,
                                       struct device_attribute *attr,
                                       char *buf)
{
        struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
        struct dev_ext_attribute *ext_attr;
        struct hisi_ptt_tune_desc *desc;
        u32 reg;
        u16 val;

        ext_attr = container_of(attr, struct dev_ext_attribute, attr);
        desc = ext_attr->var;

        mutex_lock(&hisi_ptt->tune_lock);

        reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
        reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
        reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
                          desc->event_code);
        writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);

        /* Write all 1 to indicates it's the read process */
        writel(~0U, hisi_ptt->iobase + HISI_PTT_TUNING_DATA);

        if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
                mutex_unlock(&hisi_ptt->tune_lock);
                return -ETIMEDOUT;
        }

        reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
        reg &= HISI_PTT_TUNING_DATA_VAL_MASK;
        val = FIELD_GET(HISI_PTT_TUNING_DATA_VAL_MASK, reg);

        mutex_unlock(&hisi_ptt->tune_lock);
        return sysfs_emit(buf, "%u\n", val);
}

static ssize_t hisi_ptt_tune_attr_store(struct device *dev,
                                        struct device_attribute *attr,
                                        const char *buf, size_t count)
{
        struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
        struct dev_ext_attribute *ext_attr;
        struct hisi_ptt_tune_desc *desc;
        u32 reg;
        u16 val;

        ext_attr = container_of(attr, struct dev_ext_attribute, attr);
        desc = ext_attr->var;

        if (kstrtou16(buf, 10, &val))
                return -EINVAL;

        mutex_lock(&hisi_ptt->tune_lock);

        reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
        reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
        reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
                          desc->event_code);
        writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
        writel(FIELD_PREP(HISI_PTT_TUNING_DATA_VAL_MASK, val),
               hisi_ptt->iobase + HISI_PTT_TUNING_DATA);

        if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
                mutex_unlock(&hisi_ptt->tune_lock);
                return -ETIMEDOUT;
        }

        mutex_unlock(&hisi_ptt->tune_lock);
        return count;
}

#define HISI_PTT_TUNE_ATTR(_name, _val, _show, _store)                  \
        static struct hisi_ptt_tune_desc _name##_desc = {               \
                .name = #_name,                                         \
                .event_code = (_val),                                   \
        };                                                              \
        static struct dev_ext_attribute hisi_ptt_##_name##_attr = {     \
                .attr   = __ATTR(_name, 0600, _show, _store),           \
                .var    = &_name##_desc,                                \
        }

#define HISI_PTT_TUNE_ATTR_COMMON(_name, _val)          \
        HISI_PTT_TUNE_ATTR(_name, _val,                 \
                           hisi_ptt_tune_attr_show,     \
                           hisi_ptt_tune_attr_store)

/*
 * The value of the tuning event are composed of two parts: main event code
 * in BIT[0,15] and subevent code in BIT[16,23]. For example, qox_tx_cpl is
 * a subevent of 'Tx path QoS control' which for tuning the weight of Tx
 * completion TLPs. See hisi_ptt.rst documentation for more information.
 */
#define HISI_PTT_TUNE_QOS_TX_CPL                (0x4 | (3 << 16))
#define HISI_PTT_TUNE_QOS_TX_NP                 (0x4 | (4 << 16))
#define HISI_PTT_TUNE_QOS_TX_P                  (0x4 | (5 << 16))
#define HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL        (0x5 | (6 << 16))
#define HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL        (0x5 | (7 << 16))

HISI_PTT_TUNE_ATTR_COMMON(qos_tx_cpl, HISI_PTT_TUNE_QOS_TX_CPL);
HISI_PTT_TUNE_ATTR_COMMON(qos_tx_np, HISI_PTT_TUNE_QOS_TX_NP);
HISI_PTT_TUNE_ATTR_COMMON(qos_tx_p, HISI_PTT_TUNE_QOS_TX_P);
HISI_PTT_TUNE_ATTR_COMMON(rx_alloc_buf_level, HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL);
HISI_PTT_TUNE_ATTR_COMMON(tx_alloc_buf_level, HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL);

static struct attribute *hisi_ptt_tune_attrs[] = {
        &hisi_ptt_qos_tx_cpl_attr.attr.attr,
        &hisi_ptt_qos_tx_np_attr.attr.attr,
        &hisi_ptt_qos_tx_p_attr.attr.attr,
        &hisi_ptt_rx_alloc_buf_level_attr.attr.attr,
        &hisi_ptt_tx_alloc_buf_level_attr.attr.attr,
        NULL,
};

static struct attribute_group hisi_ptt_tune_group = {
        .name   = "tune",
        .attrs  = hisi_ptt_tune_attrs,
};

static u16 hisi_ptt_get_filter_val(u16 devid, bool is_port)
{
        if (is_port)
                return BIT(HISI_PCIE_CORE_PORT_ID(devid & 0xff));

        return devid;
}

static bool hisi_ptt_wait_trace_hw_idle(struct hisi_ptt *hisi_ptt)
{
        u32 val;

        return !readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_STS,
                                          val, val & HISI_PTT_TRACE_IDLE,
                                          HISI_PTT_WAIT_POLL_INTERVAL_US,
                                          HISI_PTT_WAIT_TRACE_TIMEOUT_US);
}

static void hisi_ptt_wait_dma_reset_done(struct hisi_ptt *hisi_ptt)
{
        u32 val;

        readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS,
                                  val, !val, HISI_PTT_RESET_POLL_INTERVAL_US,
                                  HISI_PTT_RESET_TIMEOUT_US);
}

static void hisi_ptt_trace_end(struct hisi_ptt *hisi_ptt)
{
        writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);

        /* Mask the interrupt on the end */
        writel(HISI_PTT_TRACE_INT_MASK_ALL, hisi_ptt->iobase + HISI_PTT_TRACE_INT_MASK);

        hisi_ptt->trace_ctrl.started = false;
}

static int hisi_ptt_trace_start(struct hisi_ptt *hisi_ptt)
{
        struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
        u32 val;
        int i;

        /* Check device idle before start trace */
        if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt)) {
                pci_err(hisi_ptt->pdev, "Failed to start trace, the device is still busy\n");
                return -EBUSY;
        }

        ctrl->started = true;

        /* Reset the DMA before start tracing */
        val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
        val |= HISI_PTT_TRACE_CTRL_RST;
        writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);

        hisi_ptt_wait_dma_reset_done(hisi_ptt);

        val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
        val &= ~HISI_PTT_TRACE_CTRL_RST;
        writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);

        /* Reset the index of current buffer */
        hisi_ptt->trace_ctrl.buf_index = 0;

        /* Zero the trace buffers */
        for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
                memset(ctrl->trace_buf[i].addr, 0, HISI_PTT_TRACE_BUF_SIZE);

        /* Clear the interrupt status */
        writel(HISI_PTT_TRACE_INT_STAT_MASK, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
        writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_INT_MASK);

        /* Set the trace control register */
        val = FIELD_PREP(HISI_PTT_TRACE_CTRL_TYPE_SEL, ctrl->type);
        val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_RXTX_SEL, ctrl->direction);
        val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_DATA_FORMAT, ctrl->format);
        val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_TARGET_SEL, hisi_ptt->trace_ctrl.filter);
        if (!hisi_ptt->trace_ctrl.is_port)
                val |= HISI_PTT_TRACE_CTRL_FILTER_MODE;

        /* Start the Trace */
        val |= HISI_PTT_TRACE_CTRL_EN;
        writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);

        return 0;
}

static int hisi_ptt_update_aux(struct hisi_ptt *hisi_ptt, int index, bool stop)
{
        struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
        struct perf_output_handle *handle = &ctrl->handle;
        struct perf_event *event = handle->event;
        struct hisi_ptt_pmu_buf *buf;
        size_t size;
        void *addr;

        buf = perf_get_aux(handle);
        if (!buf || !handle->size)
                return -EINVAL;

        addr = ctrl->trace_buf[ctrl->buf_index].addr;

        /*
         * If we're going to stop, read the size of already traced data from
         * HISI_PTT_TRACE_WR_STS. Otherwise we're coming from the interrupt,
         * the data size is always HISI_PTT_TRACE_BUF_SIZE.
         */
        if (stop) {
                u32 reg;

                reg = readl(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS);
                size = FIELD_GET(HISI_PTT_TRACE_WR_STS_WRITE, reg);
        } else {
                size = HISI_PTT_TRACE_BUF_SIZE;
        }

        memcpy(buf->base + buf->pos, addr, size);
        buf->pos += size;

        /*
         * Always commit the data to the AUX buffer in time to make sure
         * userspace got enough time to consume the data.
         *
         * If we're not going to stop, apply a new one and check whether
         * there's enough room for the next trace.
         */
        perf_aux_output_end(handle, size);
        if (!stop) {
                buf = perf_aux_output_begin(handle, event);
                if (!buf)
                        return -EINVAL;

                buf->pos = handle->head % buf->length;
                if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
                        perf_aux_output_end(handle, 0);
                        return -EINVAL;
                }
        }

        return 0;
}

static irqreturn_t hisi_ptt_isr(int irq, void *context)
{
        struct hisi_ptt *hisi_ptt = context;
        u32 status, buf_idx;

        status = readl(hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
        if (!(status & HISI_PTT_TRACE_INT_STAT_MASK))
                return IRQ_NONE;

        buf_idx = ffs(status) - 1;

        /* Clear the interrupt status of buffer @buf_idx */
        writel(status, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);

        /*
         * Update the AUX buffer and cache the current buffer index,
         * as we need to know this and save the data when the trace
         * is ended out of the interrupt handler. End the trace
         * if the updating fails.
         */
        if (hisi_ptt_update_aux(hisi_ptt, buf_idx, false))
                hisi_ptt_trace_end(hisi_ptt);
        else
                hisi_ptt->trace_ctrl.buf_index = (buf_idx + 1) % HISI_PTT_TRACE_BUF_CNT;

        return IRQ_HANDLED;
}

static void hisi_ptt_irq_free_vectors(void *pdev)
{
        pci_free_irq_vectors(pdev);
}

static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt)
{
        struct pci_dev *pdev = hisi_ptt->pdev;
        int ret;

        ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
        if (ret < 0) {
                pci_err(pdev, "failed to allocate irq vector, ret = %d\n", ret);
                return ret;
        }

        ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_irq_free_vectors, pdev);
        if (ret < 0)
                return ret;

        hisi_ptt->trace_irq = pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ);
        ret = devm_request_irq(&pdev->dev, hisi_ptt->trace_irq, hisi_ptt_isr,
                                IRQF_NOBALANCING | IRQF_NO_THREAD, DRV_NAME,
                                hisi_ptt);
        if (ret) {
                pci_err(pdev, "failed to request irq %d, ret = %d\n",
                        hisi_ptt->trace_irq, ret);
                return ret;
        }

        return 0;
}

static void hisi_ptt_del_free_filter(struct hisi_ptt *hisi_ptt,
                                      struct hisi_ptt_filter_desc *filter)
{
        if (filter->is_port)
                hisi_ptt->port_mask &= ~hisi_ptt_get_filter_val(filter->devid, true);

        list_del(&filter->list);
        kfree(filter->name);
        kfree(filter);
}

static struct hisi_ptt_filter_desc *
hisi_ptt_alloc_add_filter(struct hisi_ptt *hisi_ptt, u16 devid, bool is_port)
{
        struct hisi_ptt_filter_desc *filter;
        u8 devfn = devid & 0xff;
        char *filter_name;

        filter_name = kasprintf(GFP_KERNEL, "%04x:%02x:%02x.%d", pci_domain_nr(hisi_ptt->pdev->bus),
                                 PCI_BUS_NUM(devid), PCI_SLOT(devfn), PCI_FUNC(devfn));
        if (!filter_name) {
                pci_err(hisi_ptt->pdev, "failed to allocate name for filter %04x:%02x:%02x.%d\n",
                        pci_domain_nr(hisi_ptt->pdev->bus), PCI_BUS_NUM(devid),
                        PCI_SLOT(devfn), PCI_FUNC(devfn));
                return NULL;
        }

        filter = kzalloc_obj(*filter);
        if (!filter) {
                pci_err(hisi_ptt->pdev, "failed to add filter for %s\n",
                        filter_name);
                kfree(filter_name);
                return NULL;
        }

        filter->name = filter_name;
        filter->is_port = is_port;
        filter->devid = devid;

        if (filter->is_port) {
                list_add_tail(&filter->list, &hisi_ptt->port_filters);

                /* Update the available port mask */
                hisi_ptt->port_mask |= hisi_ptt_get_filter_val(filter->devid, true);
        } else {
                list_add_tail(&filter->list, &hisi_ptt->req_filters);
        }

        return filter;
}

static ssize_t hisi_ptt_filter_show(struct device *dev, struct device_attribute *attr,
                                    char *buf)
{
        struct hisi_ptt_filter_desc *filter;
        unsigned long filter_val;

        filter = container_of(attr, struct hisi_ptt_filter_desc, attr);
        filter_val = hisi_ptt_get_filter_val(filter->devid, filter->is_port) |
                     (filter->is_port ? HISI_PTT_PMU_FILTER_IS_PORT : 0);

        return sysfs_emit(buf, "0x%05lx\n", filter_val);
}

static int hisi_ptt_create_rp_filter_attr(struct hisi_ptt *hisi_ptt,
                                          struct hisi_ptt_filter_desc *filter)
{
        struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;

        sysfs_attr_init(&filter->attr.attr);
        filter->attr.attr.name = filter->name;
        filter->attr.attr.mode = 0400; /* DEVICE_ATTR_ADMIN_RO */
        filter->attr.show = hisi_ptt_filter_show;

        return sysfs_add_file_to_group(kobj, &filter->attr.attr,
                                       HISI_PTT_RP_FILTERS_GRP_NAME);
}

static void hisi_ptt_remove_rp_filter_attr(struct hisi_ptt *hisi_ptt,
                                          struct hisi_ptt_filter_desc *filter)
{
        struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;

        sysfs_remove_file_from_group(kobj, &filter->attr.attr,
                                     HISI_PTT_RP_FILTERS_GRP_NAME);
}

static int hisi_ptt_create_req_filter_attr(struct hisi_ptt *hisi_ptt,
                                           struct hisi_ptt_filter_desc *filter)
{
        struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;

        sysfs_attr_init(&filter->attr.attr);
        filter->attr.attr.name = filter->name;
        filter->attr.attr.mode = 0400; /* DEVICE_ATTR_ADMIN_RO */
        filter->attr.show = hisi_ptt_filter_show;

        return sysfs_add_file_to_group(kobj, &filter->attr.attr,
                                       HISI_PTT_REQ_FILTERS_GRP_NAME);
}

static void hisi_ptt_remove_req_filter_attr(struct hisi_ptt *hisi_ptt,
                                           struct hisi_ptt_filter_desc *filter)
{
        struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;

        sysfs_remove_file_from_group(kobj, &filter->attr.attr,
                                     HISI_PTT_REQ_FILTERS_GRP_NAME);
}

static int hisi_ptt_create_filter_attr(struct hisi_ptt *hisi_ptt,
                                       struct hisi_ptt_filter_desc *filter)
{
        int ret;

        if (filter->is_port)
                ret = hisi_ptt_create_rp_filter_attr(hisi_ptt, filter);
        else
                ret = hisi_ptt_create_req_filter_attr(hisi_ptt, filter);

        if (ret)
                pci_err(hisi_ptt->pdev, "failed to create sysfs attribute for filter %s\n",
                        filter->name);

        return ret;
}

static void hisi_ptt_remove_filter_attr(struct hisi_ptt *hisi_ptt,
                                        struct hisi_ptt_filter_desc *filter)
{
        if (filter->is_port)
                hisi_ptt_remove_rp_filter_attr(hisi_ptt, filter);
        else
                hisi_ptt_remove_req_filter_attr(hisi_ptt, filter);
}

static void hisi_ptt_remove_all_filter_attributes(void *data)
{
        struct hisi_ptt_filter_desc *filter;
        struct hisi_ptt *hisi_ptt = data;

        mutex_lock(&hisi_ptt->filter_lock);

        list_for_each_entry(filter, &hisi_ptt->req_filters, list)
                hisi_ptt_remove_filter_attr(hisi_ptt, filter);

        list_for_each_entry(filter, &hisi_ptt->port_filters, list)
                hisi_ptt_remove_filter_attr(hisi_ptt, filter);

        hisi_ptt->sysfs_inited = false;
        mutex_unlock(&hisi_ptt->filter_lock);
}

static int hisi_ptt_init_filter_attributes(struct hisi_ptt *hisi_ptt)
{
        struct hisi_ptt_filter_desc *filter;
        int ret;

        mutex_lock(&hisi_ptt->filter_lock);

        /*
         * Register the reset callback in the first stage. In reset we traverse
         * the filters list to remove the sysfs attributes so the callback can
         * be called safely even without below filter attributes creation.
         */
        ret = devm_add_action(&hisi_ptt->pdev->dev,
                              hisi_ptt_remove_all_filter_attributes,
                              hisi_ptt);
        if (ret)
                goto out;

        list_for_each_entry(filter, &hisi_ptt->port_filters, list) {
                ret = hisi_ptt_create_filter_attr(hisi_ptt, filter);
                if (ret)
                        goto out;
        }

        list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
                ret = hisi_ptt_create_filter_attr(hisi_ptt, filter);
                if (ret)
                        goto out;
        }

        hisi_ptt->sysfs_inited = true;
out:
        mutex_unlock(&hisi_ptt->filter_lock);
        return ret;
}

static void hisi_ptt_update_filters(struct work_struct *work)
{
        struct delayed_work *delayed_work = to_delayed_work(work);
        struct hisi_ptt_filter_update_info info;
        struct hisi_ptt_filter_desc *filter;
        struct hisi_ptt *hisi_ptt;

        hisi_ptt = container_of(delayed_work, struct hisi_ptt, work);

        if (!mutex_trylock(&hisi_ptt->filter_lock)) {
                schedule_delayed_work(&hisi_ptt->work, HISI_PTT_WORK_DELAY_MS);
                return;
        }

        while (kfifo_get(&hisi_ptt->filter_update_kfifo, &info)) {
                if (info.is_add) {
                        /*
                         * Notify the users if failed to add this filter, others
                         * still work and available. See the comments in
                         * hisi_ptt_init_filters().
                         */
                        filter = hisi_ptt_alloc_add_filter(hisi_ptt, info.devid, info.is_port);
                        if (!filter)
                                continue;

                        /*
                         * If filters' sysfs entries hasn't been initialized,
                         * then we're still at probe stage. Add the filters to
                         * the list and later hisi_ptt_init_filter_attributes()
                         * will create sysfs attributes for all the filters.
                         */
                        if (hisi_ptt->sysfs_inited &&
                            hisi_ptt_create_filter_attr(hisi_ptt, filter)) {
                                hisi_ptt_del_free_filter(hisi_ptt, filter);
                                continue;
                        }
                } else {
                        struct hisi_ptt_filter_desc *tmp;
                        struct list_head *target_list;

                        target_list = info.is_port ? &hisi_ptt->port_filters :
                                      &hisi_ptt->req_filters;

                        list_for_each_entry_safe(filter, tmp, target_list, list)
                                if (filter->devid == info.devid) {
                                        if (hisi_ptt->sysfs_inited)
                                                hisi_ptt_remove_filter_attr(hisi_ptt, filter);

                                        hisi_ptt_del_free_filter(hisi_ptt, filter);
                                        break;
                                }
                }
        }

        mutex_unlock(&hisi_ptt->filter_lock);
}

/*
 * A PCI bus notifier is used here for dynamically updating the filter
 * list.
 */
static int hisi_ptt_notifier_call(struct notifier_block *nb, unsigned long action,
                                  void *data)
{
        struct hisi_ptt *hisi_ptt = container_of(nb, struct hisi_ptt, hisi_ptt_nb);
        struct hisi_ptt_filter_update_info info;
        struct pci_dev *pdev, *root_port;
        struct device *dev = data;
        u32 port_devid;

        pdev = to_pci_dev(dev);
        root_port = pcie_find_root_port(pdev);
        if (!root_port)
                return 0;

        port_devid = pci_dev_id(root_port);
        if (port_devid < hisi_ptt->lower_bdf ||
            port_devid > hisi_ptt->upper_bdf)
                return 0;

        info.is_port = pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT;
        info.devid = pci_dev_id(pdev);

        switch (action) {
        case BUS_NOTIFY_ADD_DEVICE:
                info.is_add = true;
                break;
        case BUS_NOTIFY_DEL_DEVICE:
                info.is_add = false;
                break;
        default:
                return 0;
        }

        /*
         * The FIFO size is 16 which is sufficient for almost all the cases,
         * since each PCIe core will have most 8 Root Ports (typically only
         * 1~4 Root Ports). On failure log the failed filter and let user
         * handle it.
         */
        if (kfifo_in_spinlocked(&hisi_ptt->filter_update_kfifo, &info, 1,
                                &hisi_ptt->filter_update_lock))
                schedule_delayed_work(&hisi_ptt->work, 0);
        else
                pci_warn(hisi_ptt->pdev,
                         "filter update fifo overflow for target %s\n",
                         pci_name(pdev));

        return 0;
}

static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
{
        struct pci_dev *root_port = pcie_find_root_port(pdev);
        struct hisi_ptt_filter_desc *filter;
        struct hisi_ptt *hisi_ptt = data;
        u32 port_devid;

        if (!root_port)
                return 0;

        port_devid = pci_dev_id(root_port);
        if (port_devid < hisi_ptt->lower_bdf ||
            port_devid > hisi_ptt->upper_bdf)
                return 0;

        /*
         * We won't fail the probe if filter allocation failed here. The filters
         * should be partial initialized and users would know which filter fails
         * through the log. Other functions of PTT device are still available.
         */
        filter = hisi_ptt_alloc_add_filter(hisi_ptt, pci_dev_id(pdev),
                                            pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT);
        if (!filter)
                return -ENOMEM;

        return 0;
}

static void hisi_ptt_release_filters(void *data)
{
        struct hisi_ptt_filter_desc *filter, *tmp;
        struct hisi_ptt *hisi_ptt = data;

        list_for_each_entry_safe(filter, tmp, &hisi_ptt->req_filters, list)
                hisi_ptt_del_free_filter(hisi_ptt, filter);

        list_for_each_entry_safe(filter, tmp, &hisi_ptt->port_filters, list)
                hisi_ptt_del_free_filter(hisi_ptt, filter);
}

static int hisi_ptt_config_trace_buf(struct hisi_ptt *hisi_ptt)
{
        struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
        struct device *dev = &hisi_ptt->pdev->dev;
        int i;

        ctrl->trace_buf = devm_kcalloc(dev, HISI_PTT_TRACE_BUF_CNT,
                                       sizeof(*ctrl->trace_buf), GFP_KERNEL);
        if (!ctrl->trace_buf)
                return -ENOMEM;

        for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; ++i) {
                ctrl->trace_buf[i].addr = dmam_alloc_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
                                                             &ctrl->trace_buf[i].dma,
                                                             GFP_KERNEL);
                if (!ctrl->trace_buf[i].addr)
                        return -ENOMEM;
        }

        /* Configure the trace DMA buffer */
        for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++) {
                writel(lower_32_bits(ctrl->trace_buf[i].dma),
                       hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_LO_0 +
                       i * HISI_PTT_TRACE_ADDR_STRIDE);
                writel(upper_32_bits(ctrl->trace_buf[i].dma),
                       hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_HI_0 +
                       i * HISI_PTT_TRACE_ADDR_STRIDE);
        }
        writel(HISI_PTT_TRACE_BUF_SIZE, hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_SIZE);

        return 0;
}

static int hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
{
        struct pci_dev *pdev = hisi_ptt->pdev;
        struct pci_bus *bus;
        int ret;
        u32 reg;

        INIT_DELAYED_WORK(&hisi_ptt->work, hisi_ptt_update_filters);
        INIT_KFIFO(hisi_ptt->filter_update_kfifo);
        spin_lock_init(&hisi_ptt->filter_update_lock);

        INIT_LIST_HEAD(&hisi_ptt->port_filters);
        INIT_LIST_HEAD(&hisi_ptt->req_filters);
        mutex_init(&hisi_ptt->filter_lock);

        ret = hisi_ptt_config_trace_buf(hisi_ptt);
        if (ret)
                return ret;

        /*
         * The device range register provides the information about the root
         * ports which the RCiEP can control and trace. The RCiEP and the root
         * ports which it supports are on the same PCIe core, with same domain
         * number but maybe different bus number. The device range register
         * will tell us which root ports we can support, Bit[31:16] indicates
         * the upper BDF numbers of the root port, while Bit[15:0] indicates
         * the lower.
         */
        reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE);
        hisi_ptt->upper_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg);
        hisi_ptt->lower_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg);

        bus = pci_find_bus(pci_domain_nr(pdev->bus), PCI_BUS_NUM(hisi_ptt->upper_bdf));
        if (bus)
                pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt);

        ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_release_filters, hisi_ptt);
        if (ret)
                return ret;

        hisi_ptt->trace_ctrl.on_cpu = -1;
        return 0;
}

static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
{
        struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
        const cpumask_t *cpumask = cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev));

        return cpumap_print_to_pagebuf(true, buf, cpumask);
}
static DEVICE_ATTR_RO(cpumask);

static struct attribute *hisi_ptt_cpumask_attrs[] = {
        &dev_attr_cpumask.attr,
        NULL
};

static const struct attribute_group hisi_ptt_cpumask_attr_group = {
        .attrs = hisi_ptt_cpumask_attrs,
};

/*
 * Bit 19 indicates the filter type, 1 for Root Port filter and 0 for Requester
 * filter. Bit[15:0] indicates the filter value, for Root Port filter it's
 * a bit mask of desired ports and for Requester filter it's the Requester ID
 * of the desired PCIe function. Bit[18:16] is reserved for extension.
 *
 * See hisi_ptt.rst documentation for detailed information.
 */
PMU_FORMAT_ATTR(filter,         "config:0-19");
PMU_FORMAT_ATTR(direction,      "config:20-23");
PMU_FORMAT_ATTR(type,           "config:24-31");
PMU_FORMAT_ATTR(format,         "config:32-35");

static struct attribute *hisi_ptt_pmu_format_attrs[] = {
        &format_attr_filter.attr,
        &format_attr_direction.attr,
        &format_attr_type.attr,
        &format_attr_format.attr,
        NULL
};

static struct attribute_group hisi_ptt_pmu_format_group = {
        .name = "format",
        .attrs = hisi_ptt_pmu_format_attrs,
};

static ssize_t hisi_ptt_filter_multiselect_show(struct device *dev,
                                                struct device_attribute *attr,
                                                char *buf)
{
        struct dev_ext_attribute *ext_attr;

        ext_attr = container_of(attr, struct dev_ext_attribute, attr);
        return sysfs_emit(buf, "%s\n", (char *)ext_attr->var);
}

static struct dev_ext_attribute root_port_filters_multiselect = {
        .attr = {
                .attr = { .name = "multiselect", .mode = 0400 },
                .show = hisi_ptt_filter_multiselect_show,
        },
        .var = "1",
};

static struct attribute *hisi_ptt_pmu_root_ports_attrs[] = {
        &root_port_filters_multiselect.attr.attr,
        NULL
};

static struct attribute_group hisi_ptt_pmu_root_ports_group = {
        .name = HISI_PTT_RP_FILTERS_GRP_NAME,
        .attrs = hisi_ptt_pmu_root_ports_attrs,
};

static struct dev_ext_attribute requester_filters_multiselect = {
        .attr = {
                .attr = { .name = "multiselect", .mode = 0400 },
                .show = hisi_ptt_filter_multiselect_show,
        },
        .var = "0",
};

static struct attribute *hisi_ptt_pmu_requesters_attrs[] = {
        &requester_filters_multiselect.attr.attr,
        NULL
};

static struct attribute_group hisi_ptt_pmu_requesters_group = {
        .name = HISI_PTT_REQ_FILTERS_GRP_NAME,
        .attrs = hisi_ptt_pmu_requesters_attrs,
};

static const struct attribute_group *hisi_ptt_pmu_groups[] = {
        &hisi_ptt_cpumask_attr_group,
        &hisi_ptt_pmu_format_group,
        &hisi_ptt_tune_group,
        &hisi_ptt_pmu_root_ports_group,
        &hisi_ptt_pmu_requesters_group,
        NULL
};

static int hisi_ptt_trace_valid_direction(u32 val)
{
        /*
         * The direction values have different effects according to the data
         * format (specified in the parentheses). TLP set A/B means different
         * set of TLP types. See hisi_ptt.rst documentation for more details.
         */
        static const u32 hisi_ptt_trace_available_direction[] = {
                0,      /* inbound(4DW) or reserved(8DW) */
                1,      /* outbound(4DW) */
                2,      /* {in, out}bound(4DW) or inbound(8DW), TLP set A */
                3,      /* {in, out}bound(4DW) or inbound(8DW), TLP set B */
        };
        int i;

        for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_direction); i++) {
                if (val == hisi_ptt_trace_available_direction[i])
                        return 0;
        }

        return -EINVAL;
}

static int hisi_ptt_trace_valid_type(u32 val)
{
        /* Different types can be set simultaneously */
        static const u32 hisi_ptt_trace_available_type[] = {
                1,      /* posted_request */
                2,      /* non-posted_request */
                4,      /* completion */
        };
        int i;

        if (!val)
                return -EINVAL;

        /*
         * Walk the available list and clear the valid bits of
         * the config. If there is any resident bit after the
         * walk then the config is invalid.
         */
        for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_type); i++)
                val &= ~hisi_ptt_trace_available_type[i];

        if (val)
                return -EINVAL;

        return 0;
}

static int hisi_ptt_trace_valid_format(u32 val)
{
        static const u32 hisi_ptt_trace_availble_format[] = {
                0,      /* 4DW */
                1,      /* 8DW */
        };
        int i;

        for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_availble_format); i++) {
                if (val == hisi_ptt_trace_availble_format[i])
                        return 0;
        }

        return -EINVAL;
}

static int hisi_ptt_trace_valid_filter(struct hisi_ptt *hisi_ptt, u64 config)
{
        unsigned long val, port_mask = hisi_ptt->port_mask;
        struct hisi_ptt_filter_desc *filter;
        int ret = 0;

        hisi_ptt->trace_ctrl.is_port = FIELD_GET(HISI_PTT_PMU_FILTER_IS_PORT, config);
        val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, config);

        /*
         * Port filters are defined as bit mask. For port filters, check
         * the bits in the @val are within the range of hisi_ptt->port_mask
         * and whether it's empty or not, otherwise user has specified
         * some unsupported root ports.
         *
         * For Requester ID filters, walk the available filter list to see
         * whether we have one matched.
         */
        mutex_lock(&hisi_ptt->filter_lock);
        if (!hisi_ptt->trace_ctrl.is_port) {
                list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
                        if (val == hisi_ptt_get_filter_val(filter->devid, filter->is_port))
                                goto out;
                }
        } else if (bitmap_subset(&val, &port_mask, BITS_PER_LONG)) {
                goto out;
        }

        ret = -EINVAL;
out:
        mutex_unlock(&hisi_ptt->filter_lock);
        return ret;
}

static void hisi_ptt_pmu_init_configs(struct hisi_ptt *hisi_ptt, struct perf_event *event)
{
        struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
        u32 val;

        val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, event->attr.config);
        hisi_ptt->trace_ctrl.filter = val;

        val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
        ctrl->direction = val;

        val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
        ctrl->type = val;

        val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
        ctrl->format = val;
}

static int hisi_ptt_pmu_event_init(struct perf_event *event)
{
        struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
        int ret;
        u32 val;

        if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type)
                return -ENOENT;

        if (event->cpu < 0) {
                dev_dbg(event->pmu->dev, "Per-task mode not supported\n");
                return -EOPNOTSUPP;
        }

        if (event->attach_state & PERF_ATTACH_TASK)
                return -EOPNOTSUPP;

        ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config);
        if (ret < 0)
                return ret;

        val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
        ret = hisi_ptt_trace_valid_direction(val);
        if (ret < 0)
                return ret;

        val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
        ret = hisi_ptt_trace_valid_type(val);
        if (ret < 0)
                return ret;

        val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
        return hisi_ptt_trace_valid_format(val);
}

static void *hisi_ptt_pmu_setup_aux(struct perf_event *event, void **pages,
                                    int nr_pages, bool overwrite)
{
        struct hisi_ptt_pmu_buf *buf;
        struct page **pagelist;
        int i;

        if (overwrite) {
                dev_warn(event->pmu->dev, "Overwrite mode is not supported\n");
                return NULL;
        }

        /* If the pages size less than buffers, we cannot start trace */
        if (nr_pages < HISI_PTT_TRACE_TOTAL_BUF_SIZE / PAGE_SIZE)
                return NULL;

        buf = kzalloc_obj(*buf);
        if (!buf)
                return NULL;

        pagelist = kzalloc_objs(*pagelist, nr_pages);
        if (!pagelist)
                goto err;

        for (i = 0; i < nr_pages; i++)
                pagelist[i] = virt_to_page(pages[i]);

        buf->base = vmap(pagelist, nr_pages, VM_MAP, PAGE_KERNEL);
        if (!buf->base) {
                kfree(pagelist);
                goto err;
        }

        buf->nr_pages = nr_pages;
        buf->length = nr_pages * PAGE_SIZE;
        buf->pos = 0;

        kfree(pagelist);
        return buf;
err:
        kfree(buf);
        return NULL;
}

static void hisi_ptt_pmu_free_aux(void *aux)
{
        struct hisi_ptt_pmu_buf *buf = aux;

        vunmap(buf->base);
        kfree(buf);
}

static void hisi_ptt_pmu_start(struct perf_event *event, int flags)
{
        struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
        struct perf_output_handle *handle = &hisi_ptt->trace_ctrl.handle;
        struct hw_perf_event *hwc = &event->hw;
        struct device *dev = event->pmu->dev;
        struct hisi_ptt_pmu_buf *buf;
        int cpu = event->cpu;
        int ret;

        hwc->state = 0;

        /* Serialize the perf process if user specified several CPUs */
        spin_lock(&hisi_ptt->pmu_lock);
        if (hisi_ptt->trace_ctrl.started) {
                dev_dbg(dev, "trace has already started\n");
                goto stop;
        }

        /*
         * Handle the interrupt on the same cpu which starts the trace to avoid
         * context mismatch. Otherwise we'll trigger the WARN from the perf
         * core in event_function_local(). If CPU passed is offline we'll fail
         * here, just log it since we can do nothing here.
         */
        ret = irq_set_affinity(hisi_ptt->trace_irq, cpumask_of(cpu));
        if (ret)
                dev_warn(dev, "failed to set the affinity of trace interrupt\n");

        hisi_ptt->trace_ctrl.on_cpu = cpu;

        buf = perf_aux_output_begin(handle, event);
        if (!buf) {
                dev_dbg(dev, "aux output begin failed\n");
                goto stop;
        }

        buf->pos = handle->head % buf->length;

        hisi_ptt_pmu_init_configs(hisi_ptt, event);

        ret = hisi_ptt_trace_start(hisi_ptt);
        if (ret) {
                dev_dbg(dev, "trace start failed, ret = %d\n", ret);
                perf_aux_output_end(handle, 0);
                goto stop;
        }

        spin_unlock(&hisi_ptt->pmu_lock);
        return;
stop:
        event->hw.state |= PERF_HES_STOPPED;
        spin_unlock(&hisi_ptt->pmu_lock);
}

static void hisi_ptt_pmu_stop(struct perf_event *event, int flags)
{
        struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
        struct hw_perf_event *hwc = &event->hw;

        if (hwc->state & PERF_HES_STOPPED)
                return;

        spin_lock(&hisi_ptt->pmu_lock);
        if (hisi_ptt->trace_ctrl.started) {
                hisi_ptt_trace_end(hisi_ptt);

                if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt))
                        dev_warn(event->pmu->dev, "Device is still busy\n");

                hisi_ptt_update_aux(hisi_ptt, hisi_ptt->trace_ctrl.buf_index, true);
        }
        spin_unlock(&hisi_ptt->pmu_lock);

        hwc->state |= PERF_HES_STOPPED;
        perf_event_update_userpage(event);
        hwc->state |= PERF_HES_UPTODATE;
}

static int hisi_ptt_pmu_add(struct perf_event *event, int flags)
{
        struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
        struct hw_perf_event *hwc = &event->hw;
        int cpu = event->cpu;

        /* Only allow the cpus on the device's node to add the event */
        if (!cpumask_test_cpu(cpu, cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev))))
                return 0;

        hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;

        if (flags & PERF_EF_START) {
                hisi_ptt_pmu_start(event, PERF_EF_RELOAD);
                if (hwc->state & PERF_HES_STOPPED)
                        return -EINVAL;
        }

        return 0;
}

static void hisi_ptt_pmu_del(struct perf_event *event, int flags)
{
        hisi_ptt_pmu_stop(event, PERF_EF_UPDATE);
}

static void hisi_ptt_pmu_read(struct perf_event *event)
{
}

static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node)
{
        cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node);
}

static void hisi_ptt_unregister_pmu(void *pmu)
{
        perf_pmu_unregister(pmu);
}

static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
{
        u16 core_id, sicl_id;
        char *pmu_name;
        u32 reg;
        int ret;

        ret = cpuhp_state_add_instance_nocalls(hisi_ptt_pmu_online,
                                               &hisi_ptt->hotplug_node);
        if (ret)
                return ret;

        ret = devm_add_action_or_reset(&hisi_ptt->pdev->dev,
                                       hisi_ptt_remove_cpuhp_instance,
                                       &hisi_ptt->hotplug_node);
        if (ret)
                return ret;

        mutex_init(&hisi_ptt->tune_lock);
        spin_lock_init(&hisi_ptt->pmu_lock);

        hisi_ptt->hisi_ptt_pmu = (struct pmu) {
                .module         = THIS_MODULE,
                .parent         = &hisi_ptt->pdev->dev,
                .capabilities   = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_NO_EXCLUDE,
                .task_ctx_nr    = perf_sw_context,
                .attr_groups    = hisi_ptt_pmu_groups,
                .event_init     = hisi_ptt_pmu_event_init,
                .setup_aux      = hisi_ptt_pmu_setup_aux,
                .free_aux       = hisi_ptt_pmu_free_aux,
                .start          = hisi_ptt_pmu_start,
                .stop           = hisi_ptt_pmu_stop,
                .add            = hisi_ptt_pmu_add,
                .del            = hisi_ptt_pmu_del,
                .read           = hisi_ptt_pmu_read,
        };

        reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
        core_id = FIELD_GET(HISI_PTT_CORE_ID, reg);
        sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg);

        pmu_name = devm_kasprintf(&hisi_ptt->pdev->dev, GFP_KERNEL, "hisi_ptt%u_%u",
                                  sicl_id, core_id);
        if (!pmu_name)
                return -ENOMEM;

        ret = perf_pmu_register(&hisi_ptt->hisi_ptt_pmu, pmu_name, -1);
        if (ret)
                return ret;

        return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
                                        hisi_ptt_unregister_pmu,
                                        &hisi_ptt->hisi_ptt_pmu);
}

static void hisi_ptt_unregister_filter_update_notifier(void *data)
{
        struct hisi_ptt *hisi_ptt = data;

        bus_unregister_notifier(&pci_bus_type, &hisi_ptt->hisi_ptt_nb);

        /* Cancel any work that has been queued */
        cancel_delayed_work_sync(&hisi_ptt->work);
}

/* Register the bus notifier for dynamically updating the filter list */
static int hisi_ptt_register_filter_update_notifier(struct hisi_ptt *hisi_ptt)
{
        int ret;

        hisi_ptt->hisi_ptt_nb.notifier_call = hisi_ptt_notifier_call;
        ret = bus_register_notifier(&pci_bus_type, &hisi_ptt->hisi_ptt_nb);
        if (ret)
                return ret;

        return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
                                        hisi_ptt_unregister_filter_update_notifier,
                                        hisi_ptt);
}

/*
 * The DMA of PTT trace can only use direct mappings due to some
 * hardware restriction. Check whether there is no IOMMU or the
 * policy of the IOMMU domain is passthrough, otherwise the trace
 * cannot work.
 *
 * The PTT device is supposed to behind an ARM SMMUv3, which
 * should have passthrough the device by a quirk.
 */
static int hisi_ptt_check_iommu_mapping(struct pci_dev *pdev)
{
        struct iommu_domain *iommu_domain;

        iommu_domain = iommu_get_domain_for_dev(&pdev->dev);
        if (!iommu_domain || iommu_domain->type == IOMMU_DOMAIN_IDENTITY)
                return 0;

        return -EOPNOTSUPP;
}

static int hisi_ptt_probe(struct pci_dev *pdev,
                          const struct pci_device_id *id)
{
        struct hisi_ptt *hisi_ptt;
        int ret;

        ret = hisi_ptt_check_iommu_mapping(pdev);
        if (ret) {
                pci_err(pdev, "requires direct DMA mappings\n");
                return ret;
        }

        hisi_ptt = devm_kzalloc(&pdev->dev, sizeof(*hisi_ptt), GFP_KERNEL);
        if (!hisi_ptt)
                return -ENOMEM;

        hisi_ptt->pdev = pdev;
        pci_set_drvdata(pdev, hisi_ptt);

        ret = pcim_enable_device(pdev);
        if (ret) {
                pci_err(pdev, "failed to enable device, ret = %d\n", ret);
                return ret;
        }

        ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
        if (ret) {
                pci_err(pdev, "failed to remap io memory, ret = %d\n", ret);
                return ret;
        }

        hisi_ptt->iobase = pcim_iomap_table(pdev)[2];

        ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
        if (ret) {
                pci_err(pdev, "failed to set 64 bit dma mask, ret = %d\n", ret);
                return ret;
        }

        pci_set_master(pdev);

        ret = hisi_ptt_register_irq(hisi_ptt);
        if (ret)
                return ret;

        ret = hisi_ptt_init_ctrls(hisi_ptt);
        if (ret) {
                pci_err(pdev, "failed to init controls, ret = %d\n", ret);
                return ret;
        }

        ret = hisi_ptt_register_filter_update_notifier(hisi_ptt);
        if (ret)
                pci_warn(pdev, "failed to register filter update notifier, ret = %d", ret);

        ret = hisi_ptt_register_pmu(hisi_ptt);
        if (ret) {
                pci_err(pdev, "failed to register PMU device, ret = %d", ret);
                return ret;
        }

        ret = hisi_ptt_init_filter_attributes(hisi_ptt);
        if (ret) {
                pci_err(pdev, "failed to init sysfs filter attributes, ret = %d", ret);
                return ret;
        }

        return 0;
}

static const struct pci_device_id hisi_ptt_id_tbl[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12e) },
        { }
};
MODULE_DEVICE_TABLE(pci, hisi_ptt_id_tbl);

static struct pci_driver hisi_ptt_driver = {
        .name = DRV_NAME,
        .id_table = hisi_ptt_id_tbl,
        .probe = hisi_ptt_probe,
};

static int hisi_ptt_cpu_teardown(unsigned int cpu, struct hlist_node *node)
{
        struct hisi_ptt *hisi_ptt;
        struct device *dev;
        int target, src;

        hisi_ptt = hlist_entry_safe(node, struct hisi_ptt, hotplug_node);
        src = hisi_ptt->trace_ctrl.on_cpu;
        dev = hisi_ptt->hisi_ptt_pmu.dev;

        if (!hisi_ptt->trace_ctrl.started || src != cpu)
                return 0;

        target = cpumask_any_but(cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)), cpu);
        if (target >= nr_cpu_ids) {
                dev_err(dev, "no available cpu for perf context migration\n");
                return 0;
        }

        perf_pmu_migrate_context(&hisi_ptt->hisi_ptt_pmu, src, target);

        /*
         * Also make sure the interrupt bind to the migrated CPU as well. Warn
         * the user on failure here.
         */
        if (irq_set_affinity(hisi_ptt->trace_irq, cpumask_of(target)))
                dev_warn(dev, "failed to set the affinity of trace interrupt\n");

        hisi_ptt->trace_ctrl.on_cpu = target;
        return 0;
}

static int __init hisi_ptt_init(void)
{
        int ret;

        ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRV_NAME, NULL,
                                      hisi_ptt_cpu_teardown);
        if (ret < 0)
                return ret;
        hisi_ptt_pmu_online = ret;

        ret = pci_register_driver(&hisi_ptt_driver);
        if (ret)
                cpuhp_remove_multi_state(hisi_ptt_pmu_online);

        return ret;
}
module_init(hisi_ptt_init);

static void __exit hisi_ptt_exit(void)
{
        pci_unregister_driver(&hisi_ptt_driver);
        cpuhp_remove_multi_state(hisi_ptt_pmu_online);
}
module_exit(hisi_ptt_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
MODULE_DESCRIPTION("Driver for HiSilicon PCIe tune and trace device");