root/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * isst_tpmi.c: SST TPMI interface core
 *
 * Copyright (c) 2023, Intel Corporation.
 * All Rights Reserved.
 *
 * This information will be useful to understand flows:
 * In the current generation of platforms, TPMI is supported via OOB
 * PCI device. This PCI device has one instance per CPU package.
 * There is a unique TPMI ID for SST. Each TPMI ID also has multiple
 * entries, representing per power domain information.
 *
 * There is one dev file for complete SST information and control same as the
 * prior generation of hardware. User spaces don't need to know how the
 * information is presented by the hardware. The TPMI core module implements
 * the hardware mapping.
 */

#define dev_fmt(fmt) "tpmi_sst: " fmt

#include <linux/auxiliary_bus.h>
#include <linux/delay.h>
#include <linux/intel_tpmi.h>
#include <linux/intel_vsec.h>
#include <linux/fs.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/minmax.h>
#include <linux/module.h>
#include <asm/msr.h>
#include <uapi/linux/isst_if.h>

#include "isst_tpmi_core.h"
#include "isst_if_common.h"

/* Supported SST hardware version by this driver */
#define ISST_MAJOR_VERSION      0
#define ISST_MINOR_VERSION      3

/*
 * Used to indicate if value read from MMIO needs to get multiplied
 * to get to a standard unit or not.
 */
#define SST_MUL_FACTOR_NONE    1

/* Define 100 as a scaling factor frequency ratio to frequency conversion */
#define SST_MUL_FACTOR_FREQ    100

/* All SST regs are 64 bit size */
#define SST_REG_SIZE   8

/**
 * struct sst_header -  SST main header
 * @interface_version:  Version number for this interface
 * @cap_mask:           Bitmask of the supported sub features. 1=the sub feature is enabled.
 *                      0=disabled.
 *                      Bit[8]= SST_CP enable (1), disable (0)
 *                      bit[9]= SST_PP enable (1), disable (0)
 *                      other bits are reserved for future use
 * @cp_offset:          Qword (8 bytes) offset to the SST_CP register bank
 * @pp_offset:          Qword (8 bytes) offset to the SST_PP register bank
 * @reserved:           Reserved for future use
 *
 * This register allows SW to discover SST capability and the offsets to SST-CP
 * and SST-PP register banks.
 */
struct sst_header {
        u8 interface_version;
        u8 cap_mask;
        u8 cp_offset;
        u8 pp_offset;
        u32 reserved;
} __packed;

/**
 * struct cp_header -   SST-CP (core-power) header
 * @feature_id:         0=SST-CP, 1=SST-PP, 2=SST-BF, 3=SST-TF
 * @feature_rev:        Interface Version number for this SST feature
 * @ratio_unit:         Frequency ratio unit. 00: 100MHz. All others are reserved
 * @reserved:           Reserved for future use
 *
 * This structure is used store SST-CP header. This is packed to the same
 * format as defined in the specifications.
 */
struct cp_header {
        u64 feature_id :4;
        u64 feature_rev :8;
        u64 ratio_unit :2;
        u64 reserved :50;
} __packed;

/**
 * struct pp_header -   SST-PP (Perf profile) header
 * @feature_id:         0=SST-CP, 1=SST-PP, 2=SST-BF, 3=SST-TF
 * @feature_rev:        Interface Version number for this SST feature
 * @level_en_mask:      SST-PP level enable/disable fuse mask
 * @allowed_level_mask: Allowed level mask used for dynamic config level switching
 * @reserved0:          Reserved for future use
 * @ratio_unit:         Frequency ratio unit. 00: 100MHz. All others are reserved
 * @block_size:         Size of PP block in Qword unit (8 bytes)
 * @dynamic_switch:     If set (1), dynamic switching of SST PP is supported
 * @memory_ratio_unit:  Memory Controller frequency ratio unit. 00: 100MHz, others reserved
 * @reserved1:          Reserved for future use
 *
 * This structure is used store SST-PP header. This is packed to the same
 * format as defined in the specifications.
 */
struct pp_header {
        u64 feature_id :4;
        u64 feature_rev :8;
        u64 level_en_mask :8;
        u64 allowed_level_mask :8;
        u64 reserved0 :4;
        u64 ratio_unit :2;
        u64 block_size :8;
        u64 dynamic_switch :1;
        u64 memory_ratio_unit :2;
        u64 reserved1 :19;
} __packed;

/**
 * struct feature_offset -      Offsets to SST-PP features
 * @pp_offset:          Qword offset within PP level for the SST_PP register bank
 * @bf_offset:          Qword offset within PP level for the SST_BF register bank
 * @tf_offset:          Qword offset within PP level for the SST_TF register bank
 * @reserved:           Reserved for future use
 *
 * This structure is used store offsets for SST features in the register bank.
 * This is packed to the same format as defined in the specifications.
 */
struct feature_offset {
        u64 pp_offset :8;
        u64 bf_offset :8;
        u64 tf_offset :8;
        u64 reserved :40;
} __packed;

/**
 * struct levels_offset -       Offsets to each SST PP level
 * @sst_pp_level0_offset:       Qword offset to the register block of PP level 0
 * @sst_pp_level1_offset:       Qword offset to the register block of PP level 1
 * @sst_pp_level2_offset:       Qword offset to the register block of PP level 2
 * @sst_pp_level3_offset:       Qword offset to the register block of PP level 3
 * @sst_pp_level4_offset:       Qword offset to the register block of PP level 4
 * @reserved:                   Reserved for future use
 *
 * This structure is used store offsets of SST PP levels in the register bank.
 * This is packed to the same format as defined in the specifications.
 */
struct levels_offset {
        u64 sst_pp_level0_offset :8;
        u64 sst_pp_level1_offset :8;
        u64 sst_pp_level2_offset :8;
        u64 sst_pp_level3_offset :8;
        u64 sst_pp_level4_offset :8;
        u64 reserved :24;
} __packed;

/**
 * struct pp_control_offset -   Offsets for SST PP controls
 * @perf_level:         A SST-PP level that SW intends to switch to
 * @perf_level_lock:    SST-PP level select lock. 0 - unlocked. 1 - locked till next reset
 * @resvd0:             Reserved for future use
 * @current_state:      Bit mask to control the enable(1)/disable(0) state of each feature
 *                      of the current PP level, bit 0 = BF, bit 1 = TF, bit 2-7 = reserved
 * @reserved:           Reserved for future use
 *
 * This structure is used store offsets of SST PP controls in the register bank.
 * This is packed to the same format as defined in the specifications.
 */
struct pp_control_offset {
        u64 perf_level :3;
        u64 perf_level_lock :1;
        u64 resvd0 :4;
        u64 current_state :8;
        u64 reserved :48;
} __packed;

/**
 * struct pp_status_offset -    Offsets for SST PP status fields
 * @sst_pp_level:       Returns the current SST-PP level
 * @sst_pp_lock:        Returns the lock bit setting of perf_level_lock in pp_control_offset
 * @error_type:         Returns last error of SST-PP level change request. 0: no error,
 *                      1: level change not allowed, others: reserved
 * @feature_state:      Bit mask to indicate the enable(1)/disable(0) state of each feature of the
 *                      current PP level. bit 0 = BF, bit 1 = TF, bit 2-7 reserved
 * @reserved0:          Reserved for future use
 * @feature_error_type: Returns last error of the specific feature. Three error_type bits per
 *                      feature. i.e. ERROR_TYPE[2:0] for BF, ERROR_TYPE[5:3] for TF, etc.
 *                      0x0: no error, 0x1: The specific feature is not supported by the hardware.
 *                      0x2-0x6: Reserved. 0x7: feature state change is not allowed.
 * @reserved1:          Reserved for future use
 *
 * This structure is used store offsets of SST PP status in the register bank.
 * This is packed to the same format as defined in the specifications.
 */
struct pp_status_offset {
        u64 sst_pp_level :3;
        u64 sst_pp_lock :1;
        u64 error_type :4;
        u64 feature_state :8;
        u64 reserved0 :16;
        u64 feature_error_type : 24;
        u64 reserved1 :8;
} __packed;

/**
 * struct perf_level -  Used to store perf level and mmio offset
 * @mmio_offset:        mmio offset for a perf level
 * @level:              perf level for this offset
 *
 * This structure is used store final mmio offset of each perf level from the
 * SST base mmio offset.
 */
struct perf_level {
        int mmio_offset;
        int level;
};

/**
 * struct tpmi_per_power_domain_info -  Store per power_domain SST info
 * @package_id:         Package id for this power_domain
 * @power_domain_id:    Power domain id, Each entry from the SST-TPMI instance is a power_domain.
 * @max_level:          Max possible PP level possible for this power_domain
 * @ratio_unit:         Ratio unit for converting to MHz
 * @avx_levels:         Number of AVX levels
 * @pp_block_size:      Block size from PP header
 * @sst_header:         Store SST header for this power_domain
 * @cp_header:          Store SST-CP header for this power_domain
 * @pp_header:          Store SST-PP header for this power_domain
 * @perf_levels:        Pointer to each perf level to map level to mmio offset
 * @feature_offsets:    Store feature offsets for each PP-level
 * @control_offset:     Store the control offset for each PP-level
 * @status_offset:      Store the status offset for each PP-level
 * @sst_base:           Mapped SST base IO memory
 * @auxdev:             Auxiliary device instance enumerated this instance
 * @saved_sst_cp_control: Save SST-CP control configuration to store restore for suspend/resume
 * @saved_clos_configs: Save SST-CP CLOS configuration to store restore for suspend/resume
 * @saved_clos_assocs:  Save SST-CP CLOS association to store restore for suspend/resume
 * @saved_pp_control:   Save SST-PP control information to store restore for suspend/resume
 * @write_blocked:      Write operation is blocked, so can't change SST state
 *
 * This structure is used store complete SST information for a power_domain. This information
 * is used to read/write request for any SST IOCTL. Each physical CPU package can have multiple
 * power_domains. Each power domain describes its own SST information and has its own controls.
 */
struct tpmi_per_power_domain_info {
        int package_id;
        int power_domain_id;
        int max_level;
        int ratio_unit;
        int avx_levels;
        int pp_block_size;
        struct sst_header sst_header;
        struct cp_header cp_header;
        struct pp_header pp_header;
        struct perf_level *perf_levels;
        struct feature_offset feature_offsets;
        struct pp_control_offset control_offset;
        struct pp_status_offset status_offset;
        void __iomem *sst_base;
        struct auxiliary_device *auxdev;
        u64 saved_sst_cp_control;
        u64 saved_clos_configs[4];
        u64 saved_clos_assocs[4];
        u64 saved_pp_control;
        bool write_blocked;
};

/* Supported maximum partitions */
#define SST_MAX_PARTITIONS      2

/**
 * struct tpmi_sst_struct -     Store sst info for a package
 * @package_id:                 Package id for this aux device instance
 * @number_of_power_domains:    Number of power_domains pointed by power_domain_info pointer
 * @power_domain_info:          Pointer to power domains information
 * @cdie_mask:                  Mask of compute dies present in a partition from hardware.
 *                              This mask is not present in the version 1 information header.
 * @io_dies:                    Number of IO dies in a partition. This will be 0 for TPMI
 *                              version 1 information header.
 * @partition_mask:             Mask of all partitions.
 * @partition_mask_current:     Current partition mask as some may have been unbound.
 *
 * This structure is used store full SST information for a package.
 * Each package has one or multiple OOB PCI devices. Each package can contain multiple
 * power domains.
 */
struct tpmi_sst_struct {
        int package_id;
        struct tpmi_per_power_domain_info *power_domain_info[SST_MAX_PARTITIONS];
        u16 cdie_mask[SST_MAX_PARTITIONS];
        u8 number_of_power_domains[SST_MAX_PARTITIONS];
        u8 io_dies[SST_MAX_PARTITIONS];
        u8 partition_mask;
        u8 partition_mask_current;
};

/**
 * struct tpmi_sst_common_struct -      Store all SST instances
 * @max_index:          Maximum instances currently present
 * @sst_inst:           Pointer to per package instance
 *
 * Stores every SST Package instance.
 */
struct tpmi_sst_common_struct {
        int max_index;
        struct tpmi_sst_struct **sst_inst;
};

/*
 * Each IOCTL request is processed under this lock. Also used to protect
 * registration functions and common data structures.
 */
static DEFINE_MUTEX(isst_tpmi_dev_lock);

/* Usage count to track, number of TPMI SST instances registered to this core. */
static int isst_core_usage_count;

/* Stores complete SST information for every package and power_domain */
static struct tpmi_sst_common_struct isst_common;

#define SST_MAX_AVX_LEVELS      3

#define SST_PP_OFFSET_0         8
#define SST_PP_OFFSET_1         16
#define SST_PP_OFFSET_SIZE      8

static int sst_add_perf_profiles(struct auxiliary_device *auxdev,
                                 struct tpmi_per_power_domain_info *pd_info,
                                 int levels)
{
        struct device *dev = &auxdev->dev;
        u64 perf_level_offsets;
        int i;

        pd_info->perf_levels = devm_kcalloc(dev, levels, sizeof(struct perf_level), GFP_KERNEL);
        if (!pd_info->perf_levels)
                return 0;

        pd_info->ratio_unit = pd_info->pp_header.ratio_unit;
        pd_info->avx_levels = SST_MAX_AVX_LEVELS;
        pd_info->pp_block_size = pd_info->pp_header.block_size;

        /* Read PP Offset 0: Get feature offset with PP level */
        *((u64 *)&pd_info->feature_offsets) = readq(pd_info->sst_base +
                                                    pd_info->sst_header.pp_offset +
                                                    SST_PP_OFFSET_0);

        perf_level_offsets = readq(pd_info->sst_base + pd_info->sst_header.pp_offset +
                                   SST_PP_OFFSET_1);

        for (i = 0; i < levels; ++i) {
                u64 offset;

                offset = perf_level_offsets & (0xffULL << (i * SST_PP_OFFSET_SIZE));
                offset >>= (i * 8);
                offset &= 0xff;
                offset *= 8; /* Convert to byte from QWORD offset */
                pd_info->perf_levels[i].mmio_offset = pd_info->sst_header.pp_offset + offset;
        }

        return 0;
}

static int sst_main(struct auxiliary_device *auxdev, struct tpmi_per_power_domain_info *pd_info)
{
        struct device *dev = &auxdev->dev;
        int i, mask, levels;

        *((u64 *)&pd_info->sst_header) = readq(pd_info->sst_base);
        pd_info->sst_header.cp_offset *= 8;
        pd_info->sst_header.pp_offset *= 8;

        if (pd_info->sst_header.interface_version == TPMI_VERSION_INVALID)
                return -ENODEV;

        if (TPMI_MAJOR_VERSION(pd_info->sst_header.interface_version) != ISST_MAJOR_VERSION) {
                dev_err(dev, "SST: Unsupported major version:%lx\n",
                        TPMI_MAJOR_VERSION(pd_info->sst_header.interface_version));
                return -ENODEV;
        }

        if (TPMI_MINOR_VERSION(pd_info->sst_header.interface_version) > ISST_MINOR_VERSION)
                dev_info(dev, "SST: Ignore: Unsupported minor version:%lx\n",
                         TPMI_MINOR_VERSION(pd_info->sst_header.interface_version));

        /* Read SST CP Header */
        *((u64 *)&pd_info->cp_header) = readq(pd_info->sst_base + pd_info->sst_header.cp_offset);

        /* Read PP header */
        *((u64 *)&pd_info->pp_header) = readq(pd_info->sst_base + pd_info->sst_header.pp_offset);

        mask = 0x01;
        levels = 0;
        for (i = 0; i < 8; ++i) {
                if (pd_info->pp_header.level_en_mask & mask)
                        levels = i;
                mask <<= 1;
        }
        pd_info->max_level = levels;
        sst_add_perf_profiles(auxdev, pd_info, levels + 1);

        return 0;
}

static u8 isst_instance_count(struct tpmi_sst_struct *sst_inst)
{
        u8 i, max_part, count = 0;

        /* Partition mask starts from bit 0 and contains 1s only */
        max_part = hweight8(sst_inst->partition_mask);
        for (i = 0; i < max_part; i++)
                count += sst_inst->number_of_power_domains[i];

        return count;
}

/**
 * map_cdies() - Map user domain ID to compute domain ID
 * @sst_inst: TPMI Instance
 * @id: User domain ID
 * @partition: Resolved partition
 *
 * Helper function to map_partition_power_domain_id() to resolve compute
 * domain ID and partition. Use hardware provided cdie_mask for a partition
 * as is to resolve a compute domain ID.
 *
 * Return: %-EINVAL on error, otherwise mapped domain ID >= 0.
 */
static int map_cdies(struct tpmi_sst_struct *sst_inst, u8 id, u8 *partition)
{
        u8 i, max_part;

        max_part = hweight8(sst_inst->partition_mask);
        for (i = 0; i < max_part; i++) {
                if (!(sst_inst->cdie_mask[i] & BIT(id)))
                        continue;

                *partition = i;
                return id - ffs(sst_inst->cdie_mask[i]) + 1;
        }

        return -EINVAL;
}

/**
 * map_partition_power_domain_id() - Map user domain ID to partition domain ID
 * @sst_inst: TPMI Instance
 * @id: User domain ID
 * @partition: Resolved partition
 *
 * In a partitioned system a CPU package has two separate MMIO ranges (Under
 * two PCI devices). But the CPU package compute die/power domain IDs are
 * unique in a package. User space can get compute die/power domain ID from
 * CPUID and MSR 0x54 for a CPU. So, those IDs need to be preserved even if
 * they are present in two different partitions with its own order.
 *
 * For example for command ISST_IF_COUNT_TPMI_INSTANCES, the valid_mask
 * is 111111b for a 4 compute and 2 IO dies system. This is presented as
 * provided by the hardware in a non-partitioned system with the following
 * order:
 *      I1-I0-C3-C2-C1-C0
 * Here: "C": for compute and "I" for IO die.
 * Compute dies are always present first in TPMI instances, as they have
 * to map to the real power domain/die ID of a system. In a non-partitioned
 * system there is no way to identify compute and IO die boundaries from
 * this driver without reading each CPU's mapping.
 *
 * The same order needs to be preserved, even if those compute dies are
 * distributed among multiple partitions. For example:
 * Partition 1 can contain: I1-C1-C0
 * Partition 2 can contain: I2-C3-C2
 *
 * This will require a conversion of user space IDs to the actual index into
 * array of stored power domains for each partition. For the above example
 * this function will return partition and index as follows:
 *
 * =============        =========       =====   ========
 * User space ID        Partition       Index   Die type
 * =============        =========       =====   ========
 * 0                    0               0       Compute
 * 1                    0               1       Compute
 * 2                    1               0       Compute
 * 3                    1               1       Compute
 * 4                    0               2       IO
 * 5                    1               2       IO
 * =============        =========       =====   ========
 *
 * Return: %-EINVAL on error, otherwise mapped domain ID >= 0.
 */
static int map_partition_power_domain_id(struct tpmi_sst_struct *sst_inst, u8 id, u8 *partition)
{
        u8 i, io_start_id, max_part;

        *partition = 0;

        /* If any PCI device for partition is unbound, treat this as failure */
        if (sst_inst->partition_mask != sst_inst->partition_mask_current)
                return -EINVAL;

        max_part = hweight8(sst_inst->partition_mask);

        /* IO Index begin here */
        io_start_id = fls(sst_inst->cdie_mask[max_part - 1]);

        if (id < io_start_id)
                return map_cdies(sst_inst, id, partition);

        for (i = 0; i < max_part; i++) {
                u8 io_id;

                io_id = id - io_start_id;
                if (io_id < sst_inst->io_dies[i]) {
                        u8 cdie_range;

                        cdie_range = fls(sst_inst->cdie_mask[i]) - ffs(sst_inst->cdie_mask[i]) + 1;
                        *partition = i;
                        return cdie_range + io_id;
                }
                io_start_id += sst_inst->io_dies[i];
        }

        return -EINVAL;
}

/*
 * Map a package and power_domain id to SST information structure unique for a power_domain.
 * The caller should call under isst_tpmi_dev_lock.
 */
static struct tpmi_per_power_domain_info *get_instance(int pkg_id, int power_domain_id)
{
        struct tpmi_per_power_domain_info *power_domain_info;
        struct tpmi_sst_struct *sst_inst;
        u8 part;

        if (!in_range(pkg_id, 0, topology_max_packages()) || pkg_id > isst_common.max_index)
                return NULL;

        sst_inst = isst_common.sst_inst[pkg_id];
        if (!sst_inst)
                return NULL;

        power_domain_id = map_partition_power_domain_id(sst_inst, power_domain_id, &part);
        if (power_domain_id < 0)
                return NULL;

        power_domain_info = &sst_inst->power_domain_info[part][power_domain_id];

        if (power_domain_info && !power_domain_info->sst_base)
                return NULL;

        return power_domain_info;
}

static bool disable_dynamic_sst_features(void)
{
        u64 value;

        if (!static_cpu_has(X86_FEATURE_HWP))
                return true;

        rdmsrq(MSR_PM_ENABLE, value);
        return !(value & 0x1);
}

#define _read_cp_info(name_str, name, offset, start, width, mult_factor)\
{\
        u64 val, mask;\
        \
        val = readq(power_domain_info->sst_base + power_domain_info->sst_header.cp_offset +\
                        (offset));\
        mask = GENMASK_ULL((start + width - 1), start);\
        val &= mask; \
        val >>= start;\
        name = (val * mult_factor);\
}

#define _write_cp_info(name_str, name, offset, start, width, div_factor)\
{\
        u64 val, mask;\
        \
        val = readq(power_domain_info->sst_base +\
                    power_domain_info->sst_header.cp_offset + (offset));\
        mask = GENMASK_ULL((start + width - 1), start);\
        val &= ~mask;\
        val |= (name / div_factor) << start;\
        writeq(val, power_domain_info->sst_base + power_domain_info->sst_header.cp_offset +\
                (offset));\
}

#define SST_CP_CONTROL_OFFSET   8
#define SST_CP_STATUS_OFFSET    16

#define SST_CP_ENABLE_START             0
#define SST_CP_ENABLE_WIDTH             1

#define SST_CP_PRIORITY_TYPE_START      1
#define SST_CP_PRIORITY_TYPE_WIDTH      1

static long isst_if_core_power_state(void __user *argp)
{
        struct tpmi_per_power_domain_info *power_domain_info;
        struct isst_core_power core_power;

        if (copy_from_user(&core_power, argp, sizeof(core_power)))
                return -EFAULT;

        if (core_power.get_set && disable_dynamic_sst_features())
                return -EFAULT;

        power_domain_info = get_instance(core_power.socket_id, core_power.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        if (core_power.get_set) {
                if (power_domain_info->write_blocked || !capable(CAP_SYS_ADMIN))
                        return -EPERM;

                _write_cp_info("cp_enable", core_power.enable, SST_CP_CONTROL_OFFSET,
                               SST_CP_ENABLE_START, SST_CP_ENABLE_WIDTH, SST_MUL_FACTOR_NONE)
                _write_cp_info("cp_prio_type", core_power.priority_type, SST_CP_CONTROL_OFFSET,
                               SST_CP_PRIORITY_TYPE_START, SST_CP_PRIORITY_TYPE_WIDTH,
                               SST_MUL_FACTOR_NONE)
        } else {
                /* get */
                _read_cp_info("cp_enable", core_power.enable, SST_CP_STATUS_OFFSET,
                              SST_CP_ENABLE_START, SST_CP_ENABLE_WIDTH, SST_MUL_FACTOR_NONE)
                _read_cp_info("cp_prio_type", core_power.priority_type, SST_CP_STATUS_OFFSET,
                              SST_CP_PRIORITY_TYPE_START, SST_CP_PRIORITY_TYPE_WIDTH,
                              SST_MUL_FACTOR_NONE)
                core_power.supported = !!(power_domain_info->sst_header.cap_mask & BIT(0));
                if (copy_to_user(argp, &core_power, sizeof(core_power)))
                        return -EFAULT;
        }

        return 0;
}

#define SST_CLOS_CONFIG_0_OFFSET        24

#define SST_CLOS_CONFIG_PRIO_START      4
#define SST_CLOS_CONFIG_PRIO_WIDTH      4

#define SST_CLOS_CONFIG_MIN_START       8
#define SST_CLOS_CONFIG_MIN_WIDTH       8

#define SST_CLOS_CONFIG_MAX_START       16
#define SST_CLOS_CONFIG_MAX_WIDTH       8

static long isst_if_clos_param(void __user *argp)
{
        struct tpmi_per_power_domain_info *power_domain_info;
        struct isst_clos_param clos_param;

        if (copy_from_user(&clos_param, argp, sizeof(clos_param)))
                return -EFAULT;

        power_domain_info = get_instance(clos_param.socket_id, clos_param.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        if (clos_param.get_set) {
                if (power_domain_info->write_blocked || !capable(CAP_SYS_ADMIN))
                        return -EPERM;

                _write_cp_info("clos.min_freq", clos_param.min_freq_mhz,
                               (SST_CLOS_CONFIG_0_OFFSET + clos_param.clos * SST_REG_SIZE),
                               SST_CLOS_CONFIG_MIN_START, SST_CLOS_CONFIG_MIN_WIDTH,
                               SST_MUL_FACTOR_FREQ);
                _write_cp_info("clos.max_freq", clos_param.max_freq_mhz,
                               (SST_CLOS_CONFIG_0_OFFSET + clos_param.clos * SST_REG_SIZE),
                               SST_CLOS_CONFIG_MAX_START, SST_CLOS_CONFIG_MAX_WIDTH,
                               SST_MUL_FACTOR_FREQ);
                _write_cp_info("clos.prio", clos_param.prop_prio,
                               (SST_CLOS_CONFIG_0_OFFSET + clos_param.clos * SST_REG_SIZE),
                               SST_CLOS_CONFIG_PRIO_START, SST_CLOS_CONFIG_PRIO_WIDTH,
                               SST_MUL_FACTOR_NONE);
        } else {
                /* get */
                _read_cp_info("clos.min_freq", clos_param.min_freq_mhz,
                                (SST_CLOS_CONFIG_0_OFFSET + clos_param.clos * SST_REG_SIZE),
                                SST_CLOS_CONFIG_MIN_START, SST_CLOS_CONFIG_MIN_WIDTH,
                                SST_MUL_FACTOR_FREQ)
                _read_cp_info("clos.max_freq", clos_param.max_freq_mhz,
                                (SST_CLOS_CONFIG_0_OFFSET + clos_param.clos * SST_REG_SIZE),
                                SST_CLOS_CONFIG_MAX_START, SST_CLOS_CONFIG_MAX_WIDTH,
                                SST_MUL_FACTOR_FREQ)
                _read_cp_info("clos.prio", clos_param.prop_prio,
                                (SST_CLOS_CONFIG_0_OFFSET + clos_param.clos * SST_REG_SIZE),
                                SST_CLOS_CONFIG_PRIO_START, SST_CLOS_CONFIG_PRIO_WIDTH,
                                SST_MUL_FACTOR_NONE)

                if (copy_to_user(argp, &clos_param, sizeof(clos_param)))
                        return -EFAULT;
        }

        return 0;
}

#define SST_CLOS_ASSOC_0_OFFSET         56
#define SST_CLOS_ASSOC_CPUS_PER_REG     16
#define SST_CLOS_ASSOC_BITS_PER_CPU     4

static long isst_if_clos_assoc(void __user *argp)
{
        struct isst_if_clos_assoc_cmds assoc_cmds;
        unsigned char __user *ptr;
        int i;

        /* Each multi command has u16 command count as the first field */
        if (copy_from_user(&assoc_cmds, argp, sizeof(assoc_cmds)))
                return -EFAULT;

        if (!assoc_cmds.cmd_count || assoc_cmds.cmd_count > ISST_IF_CMD_LIMIT)
                return -EINVAL;

        ptr = argp + offsetof(struct isst_if_clos_assoc_cmds, assoc_info);
        for (i = 0; i < assoc_cmds.cmd_count; ++i) {
                struct tpmi_per_power_domain_info *power_domain_info;
                struct isst_if_clos_assoc clos_assoc;
                int punit_id, punit_cpu_no, pkg_id;
                struct tpmi_sst_struct *sst_inst;
                int offset, shift, cpu;
                u64 val, mask, clos;
                u8 part;

                if (copy_from_user(&clos_assoc, ptr, sizeof(clos_assoc)))
                        return -EFAULT;

                if (clos_assoc.socket_id > topology_max_packages())
                        return -EINVAL;

                cpu = clos_assoc.logical_cpu;
                clos = clos_assoc.clos;

                if (assoc_cmds.punit_cpu_map)
                        punit_cpu_no = cpu;
                else
                        return -EOPNOTSUPP;

                if (punit_cpu_no < 0)
                        return -EINVAL;

                punit_id = clos_assoc.power_domain_id;
                pkg_id = clos_assoc.socket_id;

                sst_inst = isst_common.sst_inst[pkg_id];

                punit_id = map_partition_power_domain_id(sst_inst, punit_id, &part);
                if (punit_id < 0)
                        return -EINVAL;

                power_domain_info = &sst_inst->power_domain_info[part][punit_id];

                if (assoc_cmds.get_set && (power_domain_info->write_blocked ||
                                           !capable(CAP_SYS_ADMIN)))
                        return -EPERM;

                offset = SST_CLOS_ASSOC_0_OFFSET +
                                (punit_cpu_no / SST_CLOS_ASSOC_CPUS_PER_REG) * SST_REG_SIZE;
                shift = punit_cpu_no % SST_CLOS_ASSOC_CPUS_PER_REG;
                shift *= SST_CLOS_ASSOC_BITS_PER_CPU;

                val = readq(power_domain_info->sst_base +
                                power_domain_info->sst_header.cp_offset + offset);
                if (assoc_cmds.get_set) {
                        mask = GENMASK_ULL((shift + SST_CLOS_ASSOC_BITS_PER_CPU - 1), shift);
                        val &= ~mask;
                        val |= (clos << shift);
                        writeq(val, power_domain_info->sst_base +
                                        power_domain_info->sst_header.cp_offset + offset);
                } else {
                        val >>= shift;
                        clos_assoc.clos = val & GENMASK(SST_CLOS_ASSOC_BITS_PER_CPU - 1, 0);
                        if (copy_to_user(ptr, &clos_assoc, sizeof(clos_assoc)))
                                return -EFAULT;
                }

                ptr += sizeof(clos_assoc);
        }

        return 0;
}

#define _read_pp_info(name_str, name, offset, start, width, mult_factor)\
{\
        u64 val, _mask;\
        \
        val = readq(power_domain_info->sst_base + power_domain_info->sst_header.pp_offset +\
                    (offset));\
        _mask = GENMASK_ULL((start + width - 1), start);\
        val &= _mask;\
        val >>= start;\
        name = (val * mult_factor);\
}

#define _write_pp_info(name_str, name, offset, start, width, div_factor)\
{\
        u64 val, _mask;\
        \
        val = readq(power_domain_info->sst_base + power_domain_info->sst_header.pp_offset +\
                    (offset));\
        _mask = GENMASK((start + width - 1), start);\
        val &= ~_mask;\
        val |= (name / div_factor) << start;\
        writeq(val, power_domain_info->sst_base + power_domain_info->sst_header.pp_offset +\
              (offset));\
}

#define _read_bf_level_info(name_str, name, level, offset, start, width, mult_factor)\
{\
        u64 val, _mask;\
        \
        val = readq(power_domain_info->sst_base +\
                    power_domain_info->perf_levels[level].mmio_offset +\
                (power_domain_info->feature_offsets.bf_offset * 8) + (offset));\
        _mask = GENMASK_ULL((start + width - 1), start);\
        val &= _mask; \
        val >>= start;\
        name = (val * mult_factor);\
}

#define _read_tf_level_info(name_str, name, level, offset, start, width, mult_factor)\
{\
        u64 val, _mask;\
        \
        val = readq(power_domain_info->sst_base +\
                    power_domain_info->perf_levels[level].mmio_offset +\
                (power_domain_info->feature_offsets.tf_offset * 8) + (offset));\
        _mask = GENMASK_ULL((start + width - 1), start);\
        val &= _mask; \
        val >>= start;\
        name = (val * mult_factor);\
}

#define SST_PP_STATUS_OFFSET    32

#define SST_PP_LEVEL_START      0
#define SST_PP_LEVEL_WIDTH      3

#define SST_PP_LOCK_START       3
#define SST_PP_LOCK_WIDTH       1

#define SST_PP_FEATURE_STATE_START      8
#define SST_PP_FEATURE_STATE_WIDTH      8

#define SST_BF_FEATURE_SUPPORTED_START  12
#define SST_BF_FEATURE_SUPPORTED_WIDTH  1

#define SST_TF_FEATURE_SUPPORTED_START  12
#define SST_TF_FEATURE_SUPPORTED_WIDTH  1

static int isst_if_get_perf_level(void __user *argp)
{
        struct isst_perf_level_info perf_level;
        struct tpmi_per_power_domain_info *power_domain_info;
        unsigned long level_mask;
        u8 level, support;

        if (copy_from_user(&perf_level, argp, sizeof(perf_level)))
                return -EFAULT;

        power_domain_info = get_instance(perf_level.socket_id, perf_level.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        perf_level.max_level = power_domain_info->max_level;
        perf_level.level_mask = power_domain_info->pp_header.level_en_mask;
        perf_level.feature_rev = power_domain_info->pp_header.feature_rev;
        _read_pp_info("current_level", perf_level.current_level, SST_PP_STATUS_OFFSET,
                      SST_PP_LEVEL_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE)
        _read_pp_info("locked", perf_level.locked, SST_PP_STATUS_OFFSET,
                      SST_PP_LOCK_START, SST_PP_LOCK_WIDTH, SST_MUL_FACTOR_NONE)
        _read_pp_info("feature_state", perf_level.feature_state, SST_PP_STATUS_OFFSET,
                      SST_PP_FEATURE_STATE_START, SST_PP_FEATURE_STATE_WIDTH, SST_MUL_FACTOR_NONE)
        perf_level.enabled = !!(power_domain_info->sst_header.cap_mask & BIT(1));

        level_mask = perf_level.level_mask;
        perf_level.sst_bf_support = 0;
        for_each_set_bit(level, &level_mask, BITS_PER_BYTE) {
                /*
                 * Read BF support for a level. Read output is updated
                 * to "support" variable by the below macro.
                 */
                _read_bf_level_info("bf_support", support, level, 0, SST_BF_FEATURE_SUPPORTED_START,
                                    SST_BF_FEATURE_SUPPORTED_WIDTH, SST_MUL_FACTOR_NONE);

                /* If supported set the bit for the level */
                if (support)
                        perf_level.sst_bf_support |= BIT(level);
        }

        perf_level.sst_tf_support = 0;
        for_each_set_bit(level, &level_mask, BITS_PER_BYTE) {
                /*
                 * Read TF support for a level. Read output is updated
                 * to "support" variable by the below macro.
                 */
                _read_tf_level_info("tf_support", support, level, 0, SST_TF_FEATURE_SUPPORTED_START,
                                    SST_TF_FEATURE_SUPPORTED_WIDTH, SST_MUL_FACTOR_NONE);

                /* If supported set the bit for the level */
                if (support)
                        perf_level.sst_tf_support |= BIT(level);
        }

        if (copy_to_user(argp, &perf_level, sizeof(perf_level)))
                return -EFAULT;

        return 0;
}

#define SST_PP_CONTROL_OFFSET           24
#define SST_PP_LEVEL_CHANGE_TIME_MS     5
#define SST_PP_LEVEL_CHANGE_RETRY_COUNT 3

static int isst_if_set_perf_level(void __user *argp)
{
        struct isst_perf_level_control perf_level;
        struct tpmi_per_power_domain_info *power_domain_info;
        int level, retry = 0;

        if (disable_dynamic_sst_features())
                return -EFAULT;

        if (copy_from_user(&perf_level, argp, sizeof(perf_level)))
                return -EFAULT;

        power_domain_info = get_instance(perf_level.socket_id, perf_level.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        if (power_domain_info->write_blocked || !capable(CAP_SYS_ADMIN))
                return -EPERM;

        if (!(power_domain_info->pp_header.allowed_level_mask & BIT(perf_level.level)))
                return -EINVAL;

        _read_pp_info("current_level", level, SST_PP_STATUS_OFFSET,
                      SST_PP_LEVEL_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE)

        /* If the requested new level is same as the current level, reject */
        if (perf_level.level == level)
                return -EINVAL;

        _write_pp_info("perf_level", perf_level.level, SST_PP_CONTROL_OFFSET,
                       SST_PP_LEVEL_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE)

        /* It is possible that firmware is busy (although unlikely), so retry */
        do {
                /* Give time to FW to process */
                msleep(SST_PP_LEVEL_CHANGE_TIME_MS);

                _read_pp_info("current_level", level, SST_PP_STATUS_OFFSET,
                              SST_PP_LEVEL_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE)

                /* Check if the new level is active */
                if (perf_level.level == level)
                        break;

        } while (retry++ < SST_PP_LEVEL_CHANGE_RETRY_COUNT);

        /* If the level change didn't happen, return fault */
        if (perf_level.level != level)
                return -EFAULT;

        /* Reset the feature state on level change */
        _write_pp_info("perf_feature", 0, SST_PP_CONTROL_OFFSET,
                       SST_PP_FEATURE_STATE_START, SST_PP_FEATURE_STATE_WIDTH,
                       SST_MUL_FACTOR_NONE)

        /* Give time to FW to process */
        msleep(SST_PP_LEVEL_CHANGE_TIME_MS);

        return 0;
}

static int isst_if_set_perf_feature(void __user *argp)
{
        struct isst_perf_feature_control perf_feature;
        struct tpmi_per_power_domain_info *power_domain_info;

        if (disable_dynamic_sst_features())
                return -EFAULT;

        if (copy_from_user(&perf_feature, argp, sizeof(perf_feature)))
                return -EFAULT;

        power_domain_info = get_instance(perf_feature.socket_id, perf_feature.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        if (power_domain_info->write_blocked || !capable(CAP_SYS_ADMIN))
                return -EPERM;

        _write_pp_info("perf_feature", perf_feature.feature, SST_PP_CONTROL_OFFSET,
                       SST_PP_FEATURE_STATE_START, SST_PP_FEATURE_STATE_WIDTH,
                       SST_MUL_FACTOR_NONE)

        return 0;
}

#define _read_pp_level_info(name_str, name, level, offset, start, width, mult_factor)\
{\
        u64 val, _mask;\
        \
        val = readq(power_domain_info->sst_base +\
                    power_domain_info->perf_levels[level].mmio_offset +\
                (power_domain_info->feature_offsets.pp_offset * 8) + (offset));\
        _mask = GENMASK_ULL((start + width - 1), start);\
        val &= _mask; \
        val >>= start;\
        name = (val * mult_factor);\
}

#define SST_PP_INFO_0_OFFSET    0
#define SST_PP_INFO_1_OFFSET    8
#define SST_PP_INFO_2_OFFSET    16
#define SST_PP_INFO_3_OFFSET    24

/* SST_PP_INFO_4_OFFSET to SST_PP_INFO_9_OFFSET are trl levels */
#define SST_PP_INFO_4_OFFSET    32

#define SST_PP_INFO_10_OFFSET   80
#define SST_PP_INFO_11_OFFSET   88
#define SST_PP_INFO_12_OFFSET   96

#define SST_PP_P1_SSE_START     0
#define SST_PP_P1_SSE_WIDTH     8

#define SST_PP_P1_AVX2_START    8
#define SST_PP_P1_AVX2_WIDTH    8

#define SST_PP_P1_AVX512_START  16
#define SST_PP_P1_AVX512_WIDTH  8

#define SST_PP_P1_AMX_START     24
#define SST_PP_P1_AMX_WIDTH     8

#define SST_PP_TDP_START        32
#define SST_PP_TDP_WIDTH        15

#define SST_PP_T_PROCHOT_START  47
#define SST_PP_T_PROCHOT_WIDTH  8

#define SST_PP_MAX_MEMORY_FREQ_START    55
#define SST_PP_MAX_MEMORY_FREQ_WIDTH    7

#define SST_PP_COOLING_TYPE_START       62
#define SST_PP_COOLING_TYPE_WIDTH       2

#define SST_PP_TRL_0_RATIO_0_START      0
#define SST_PP_TRL_0_RATIO_0_WIDTH      8

#define SST_PP_TRL_CORES_BUCKET_0_START 0
#define SST_PP_TRL_CORES_BUCKET_0_WIDTH 8

#define SST_PP_CORE_RATIO_P0_START      0
#define SST_PP_CORE_RATIO_P0_WIDTH      8

#define SST_PP_CORE_RATIO_P1_START      8
#define SST_PP_CORE_RATIO_P1_WIDTH      8

#define SST_PP_CORE_RATIO_PN_START      16
#define SST_PP_CORE_RATIO_PN_WIDTH      8

#define SST_PP_CORE_RATIO_PM_START      24
#define SST_PP_CORE_RATIO_PM_WIDTH      8

#define SST_PP_CORE_RATIO_P0_FABRIC_START       32
#define SST_PP_CORE_RATIO_P0_FABRIC_WIDTH       8

#define SST_PP_CORE_RATIO_P1_FABRIC_START       40
#define SST_PP_CORE_RATIO_P1_FABRIC_WIDTH       8

#define SST_PP_CORE_RATIO_PM_FABRIC_START       48
#define SST_PP_CORE_RATIO_PM_FABRIC_WIDTH       8

#define SST_PP_CORE_RATIO_P0_FABRIC_1_START     0
#define SST_PP_CORE_RATIO_P0_FABRIC_1_WIDTH     8

#define SST_PP_CORE_RATIO_P1_FABRIC_1_START     8
#define SST_PP_CORE_RATIO_P1_FABRIC_1_WIDTH     8

#define SST_PP_CORE_RATIO_PM_FABRIC_1_START     16
#define SST_PP_CORE_RATIO_PM_FABRIC_1_WIDTH     8

static int isst_if_get_perf_level_info(void __user *argp)
{
        struct isst_perf_level_data_info perf_level;
        struct tpmi_per_power_domain_info *power_domain_info;
        int i, j;

        if (copy_from_user(&perf_level, argp, sizeof(perf_level)))
                return -EFAULT;

        power_domain_info = get_instance(perf_level.socket_id, perf_level.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        if (perf_level.level > power_domain_info->max_level)
                return -EINVAL;

        if (!(power_domain_info->pp_header.level_en_mask & BIT(perf_level.level)))
                return -EINVAL;

        _read_pp_level_info("tdp_ratio", perf_level.tdp_ratio, perf_level.level,
                            SST_PP_INFO_0_OFFSET, SST_PP_P1_SSE_START, SST_PP_P1_SSE_WIDTH,
                            SST_MUL_FACTOR_NONE)
        _read_pp_level_info("base_freq_mhz", perf_level.base_freq_mhz, perf_level.level,
                            SST_PP_INFO_0_OFFSET, SST_PP_P1_SSE_START, SST_PP_P1_SSE_WIDTH,
                            SST_MUL_FACTOR_FREQ)
        _read_pp_level_info("base_freq_avx2_mhz", perf_level.base_freq_avx2_mhz, perf_level.level,
                            SST_PP_INFO_0_OFFSET, SST_PP_P1_AVX2_START, SST_PP_P1_AVX2_WIDTH,
                            SST_MUL_FACTOR_FREQ)
        _read_pp_level_info("base_freq_avx512_mhz", perf_level.base_freq_avx512_mhz,
                            perf_level.level, SST_PP_INFO_0_OFFSET, SST_PP_P1_AVX512_START,
                            SST_PP_P1_AVX512_WIDTH, SST_MUL_FACTOR_FREQ)
        _read_pp_level_info("base_freq_amx_mhz", perf_level.base_freq_amx_mhz, perf_level.level,
                            SST_PP_INFO_0_OFFSET, SST_PP_P1_AMX_START, SST_PP_P1_AMX_WIDTH,
                            SST_MUL_FACTOR_FREQ)

        _read_pp_level_info("thermal_design_power_w", perf_level.thermal_design_power_w,
                            perf_level.level, SST_PP_INFO_1_OFFSET, SST_PP_TDP_START,
                            SST_PP_TDP_WIDTH, SST_MUL_FACTOR_NONE)
        perf_level.thermal_design_power_w /= 8; /* units are in 1/8th watt */
        _read_pp_level_info("tjunction_max_c", perf_level.tjunction_max_c, perf_level.level,
                            SST_PP_INFO_1_OFFSET, SST_PP_T_PROCHOT_START, SST_PP_T_PROCHOT_WIDTH,
                            SST_MUL_FACTOR_NONE)
        _read_pp_level_info("max_memory_freq_mhz", perf_level.max_memory_freq_mhz,
                            perf_level.level, SST_PP_INFO_1_OFFSET, SST_PP_MAX_MEMORY_FREQ_START,
                            SST_PP_MAX_MEMORY_FREQ_WIDTH, SST_MUL_FACTOR_FREQ)
        _read_pp_level_info("cooling_type", perf_level.cooling_type, perf_level.level,
                            SST_PP_INFO_1_OFFSET, SST_PP_COOLING_TYPE_START,
                            SST_PP_COOLING_TYPE_WIDTH, SST_MUL_FACTOR_NONE)

        for (i = 0; i < TRL_MAX_LEVELS; ++i) {
                for (j = 0; j < TRL_MAX_BUCKETS; ++j)
                        _read_pp_level_info("trl*_bucket*_freq_mhz",
                                            perf_level.trl_freq_mhz[i][j], perf_level.level,
                                            SST_PP_INFO_4_OFFSET + (i * SST_PP_TRL_0_RATIO_0_WIDTH),
                                            j * SST_PP_TRL_0_RATIO_0_WIDTH,
                                            SST_PP_TRL_0_RATIO_0_WIDTH,
                                            SST_MUL_FACTOR_FREQ);
        }

        for (i = 0; i < TRL_MAX_BUCKETS; ++i)
                _read_pp_level_info("bucket*_core_count", perf_level.bucket_core_counts[i],
                                    perf_level.level, SST_PP_INFO_10_OFFSET,
                                    SST_PP_TRL_CORES_BUCKET_0_WIDTH * i,
                                    SST_PP_TRL_CORES_BUCKET_0_WIDTH, SST_MUL_FACTOR_NONE)

        perf_level.max_buckets = TRL_MAX_BUCKETS;
        perf_level.max_trl_levels = TRL_MAX_LEVELS;

        _read_pp_level_info("p0_freq_mhz", perf_level.p0_freq_mhz, perf_level.level,
                            SST_PP_INFO_11_OFFSET, SST_PP_CORE_RATIO_P0_START,
                            SST_PP_CORE_RATIO_P0_WIDTH, SST_MUL_FACTOR_FREQ)
        _read_pp_level_info("p1_freq_mhz", perf_level.p1_freq_mhz, perf_level.level,
                            SST_PP_INFO_11_OFFSET, SST_PP_CORE_RATIO_P1_START,
                            SST_PP_CORE_RATIO_P1_WIDTH, SST_MUL_FACTOR_FREQ)
        _read_pp_level_info("pn_freq_mhz", perf_level.pn_freq_mhz, perf_level.level,
                            SST_PP_INFO_11_OFFSET, SST_PP_CORE_RATIO_PN_START,
                            SST_PP_CORE_RATIO_PN_WIDTH, SST_MUL_FACTOR_FREQ)
        _read_pp_level_info("pm_freq_mhz", perf_level.pm_freq_mhz, perf_level.level,
                            SST_PP_INFO_11_OFFSET, SST_PP_CORE_RATIO_PM_START,
                            SST_PP_CORE_RATIO_PM_WIDTH, SST_MUL_FACTOR_FREQ)
        _read_pp_level_info("p0_fabric_freq_mhz", perf_level.p0_fabric_freq_mhz,
                            perf_level.level, SST_PP_INFO_11_OFFSET,
                            SST_PP_CORE_RATIO_P0_FABRIC_START,
                            SST_PP_CORE_RATIO_P0_FABRIC_WIDTH, SST_MUL_FACTOR_FREQ)
        _read_pp_level_info("p1_fabric_freq_mhz", perf_level.p1_fabric_freq_mhz,
                            perf_level.level, SST_PP_INFO_11_OFFSET,
                            SST_PP_CORE_RATIO_P1_FABRIC_START,
                            SST_PP_CORE_RATIO_P1_FABRIC_WIDTH, SST_MUL_FACTOR_FREQ)
        _read_pp_level_info("pm_fabric_freq_mhz", perf_level.pm_fabric_freq_mhz,
                            perf_level.level, SST_PP_INFO_11_OFFSET,
                            SST_PP_CORE_RATIO_PM_FABRIC_START,
                            SST_PP_CORE_RATIO_PM_FABRIC_WIDTH, SST_MUL_FACTOR_FREQ)

        if (copy_to_user(argp, &perf_level, sizeof(perf_level)))
                return -EFAULT;

        return 0;
}

static int isst_if_get_perf_level_fabric_info(void __user *argp)
{
        struct isst_perf_level_fabric_info perf_level_fabric;
        struct tpmi_per_power_domain_info *power_domain_info;
        int start = SST_PP_CORE_RATIO_P0_FABRIC_START;
        int width = SST_PP_CORE_RATIO_P0_FABRIC_WIDTH;
        int offset = SST_PP_INFO_11_OFFSET;
        int i;

        if (copy_from_user(&perf_level_fabric, argp, sizeof(perf_level_fabric)))
                return -EFAULT;

        power_domain_info = get_instance(perf_level_fabric.socket_id,
                                         perf_level_fabric.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        if (perf_level_fabric.level > power_domain_info->max_level)
                return -EINVAL;

        if (power_domain_info->pp_header.feature_rev < 2)
                return -EINVAL;

        if (!(power_domain_info->pp_header.level_en_mask & BIT(perf_level_fabric.level)))
                return -EINVAL;

        /* For revision 2, maximum number of fabrics is 2 */
        perf_level_fabric.max_fabrics = 2;

        for (i = 0; i < perf_level_fabric.max_fabrics; i++) {
                _read_pp_level_info("p0_fabric_freq_mhz", perf_level_fabric.p0_fabric_freq_mhz[i],
                                    perf_level_fabric.level, offset, start, width,
                                    SST_MUL_FACTOR_FREQ)
                start += width;

                _read_pp_level_info("p1_fabric_freq_mhz", perf_level_fabric.p1_fabric_freq_mhz[i],
                                    perf_level_fabric.level, offset, start, width,
                                    SST_MUL_FACTOR_FREQ)
                start += width;

                _read_pp_level_info("pm_fabric_freq_mhz", perf_level_fabric.pm_fabric_freq_mhz[i],
                                    perf_level_fabric.level, offset, start, width,
                                    SST_MUL_FACTOR_FREQ)
                offset = SST_PP_INFO_12_OFFSET;
                start = SST_PP_CORE_RATIO_P0_FABRIC_1_START;
        }

        if (copy_to_user(argp, &perf_level_fabric, sizeof(perf_level_fabric)))
                return -EFAULT;

        return 0;
}

#define SST_PP_FUSED_CORE_COUNT_START   0
#define SST_PP_FUSED_CORE_COUNT_WIDTH   8

#define SST_PP_RSLVD_CORE_COUNT_START   8
#define SST_PP_RSLVD_CORE_COUNT_WIDTH   8

#define SST_PP_RSLVD_CORE_MASK_START    0
#define SST_PP_RSLVD_CORE_MASK_WIDTH    64

static int isst_if_get_perf_level_mask(void __user *argp)
{
        static struct isst_perf_level_cpu_mask cpumask;
        struct tpmi_per_power_domain_info *power_domain_info;
        u64 mask;

        if (copy_from_user(&cpumask, argp, sizeof(cpumask)))
                return -EFAULT;

        power_domain_info = get_instance(cpumask.socket_id, cpumask.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        _read_pp_level_info("mask", mask, cpumask.level, SST_PP_INFO_2_OFFSET,
                            SST_PP_RSLVD_CORE_MASK_START, SST_PP_RSLVD_CORE_MASK_WIDTH,
                            SST_MUL_FACTOR_NONE)

        cpumask.mask = mask;

        if (!cpumask.punit_cpu_map)
                return -EOPNOTSUPP;

        if (copy_to_user(argp, &cpumask, sizeof(cpumask)))
                return -EFAULT;

        return 0;
}

#define SST_BF_INFO_0_OFFSET    0
#define SST_BF_INFO_1_OFFSET    8

#define SST_BF_P1_HIGH_START    13
#define SST_BF_P1_HIGH_WIDTH    8

#define SST_BF_P1_LOW_START     21
#define SST_BF_P1_LOW_WIDTH     8

#define SST_BF_T_PROHOT_START   38
#define SST_BF_T_PROHOT_WIDTH   8

#define SST_BF_TDP_START        46
#define SST_BF_TDP_WIDTH        15

static int isst_if_get_base_freq_info(void __user *argp)
{
        static struct isst_base_freq_info base_freq;
        struct tpmi_per_power_domain_info *power_domain_info;

        if (copy_from_user(&base_freq, argp, sizeof(base_freq)))
                return -EFAULT;

        power_domain_info = get_instance(base_freq.socket_id, base_freq.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        if (base_freq.level > power_domain_info->max_level)
                return -EINVAL;

        _read_bf_level_info("p1_high", base_freq.high_base_freq_mhz, base_freq.level,
                            SST_BF_INFO_0_OFFSET, SST_BF_P1_HIGH_START, SST_BF_P1_HIGH_WIDTH,
                            SST_MUL_FACTOR_FREQ)
        _read_bf_level_info("p1_low", base_freq.low_base_freq_mhz, base_freq.level,
                            SST_BF_INFO_0_OFFSET, SST_BF_P1_LOW_START, SST_BF_P1_LOW_WIDTH,
                            SST_MUL_FACTOR_FREQ)
        _read_bf_level_info("BF-TJ", base_freq.tjunction_max_c, base_freq.level,
                            SST_BF_INFO_0_OFFSET, SST_BF_T_PROHOT_START, SST_BF_T_PROHOT_WIDTH,
                            SST_MUL_FACTOR_NONE)
        _read_bf_level_info("BF-tdp", base_freq.thermal_design_power_w, base_freq.level,
                            SST_BF_INFO_0_OFFSET, SST_BF_TDP_START, SST_BF_TDP_WIDTH,
                            SST_MUL_FACTOR_NONE)
        base_freq.thermal_design_power_w /= 8; /*unit = 1/8th watt*/

        if (copy_to_user(argp, &base_freq, sizeof(base_freq)))
                return -EFAULT;

        return 0;
}

#define P1_HI_CORE_MASK_START   0
#define P1_HI_CORE_MASK_WIDTH   64

static int isst_if_get_base_freq_mask(void __user *argp)
{
        static struct isst_perf_level_cpu_mask cpumask;
        struct tpmi_per_power_domain_info *power_domain_info;
        u64 mask;

        if (copy_from_user(&cpumask, argp, sizeof(cpumask)))
                return -EFAULT;

        power_domain_info = get_instance(cpumask.socket_id, cpumask.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        _read_bf_level_info("BF-cpumask", mask, cpumask.level, SST_BF_INFO_1_OFFSET,
                            P1_HI_CORE_MASK_START, P1_HI_CORE_MASK_WIDTH,
                            SST_MUL_FACTOR_NONE)

        cpumask.mask = mask;

        if (!cpumask.punit_cpu_map)
                return -EOPNOTSUPP;

        if (copy_to_user(argp, &cpumask, sizeof(cpumask)))
                return -EFAULT;

        return 0;
}

static int isst_if_get_tpmi_instance_count(void __user *argp)
{
        struct isst_tpmi_instance_count tpmi_inst;
        struct tpmi_sst_struct *sst_inst;
        int i;

        if (copy_from_user(&tpmi_inst, argp, sizeof(tpmi_inst)))
                return -EFAULT;

        if (tpmi_inst.socket_id >= topology_max_packages())
                return -EINVAL;

        sst_inst = isst_common.sst_inst[tpmi_inst.socket_id];

        tpmi_inst.count = isst_instance_count(sst_inst);

        tpmi_inst.valid_mask = 0;
        for (i = 0; i < tpmi_inst.count; i++) {
                struct tpmi_per_power_domain_info *pd_info;
                u8 part;
                int pd;

                pd = map_partition_power_domain_id(sst_inst, i, &part);
                if (pd < 0)
                        continue;

                pd_info = &sst_inst->power_domain_info[part][pd];
                if (pd_info->sst_base)
                        tpmi_inst.valid_mask |= BIT(i);
        }

        if (!tpmi_inst.valid_mask)
                tpmi_inst.count = 0;

        if (copy_to_user(argp, &tpmi_inst, sizeof(tpmi_inst)))
                return -EFAULT;

        return 0;
}

#define SST_TF_INFO_0_OFFSET    0
#define SST_TF_INFO_1_OFFSET    8
#define SST_TF_INFO_2_OFFSET    16
#define SST_TF_INFO_8_OFFSET    64
#define SST_TF_INFO_8_BUCKETS   3

#define SST_TF_MAX_LP_CLIP_RATIOS       TRL_MAX_LEVELS

#define SST_TF_FEATURE_REV_START        4
#define SST_TF_FEATURE_REV_WIDTH        8

#define SST_TF_LP_CLIP_RATIO_0_START    16
#define SST_TF_LP_CLIP_RATIO_0_WIDTH    8

#define SST_TF_RATIO_0_START    0
#define SST_TF_RATIO_0_WIDTH    8

#define SST_TF_NUM_CORE_0_START 0
#define SST_TF_NUM_CORE_0_WIDTH 8

#define SST_TF_NUM_MOD_0_START  0
#define SST_TF_NUM_MOD_0_WIDTH  16

static int isst_if_get_turbo_freq_info(void __user *argp)
{
        static struct isst_turbo_freq_info turbo_freq;
        struct tpmi_per_power_domain_info *power_domain_info;
        u8 feature_rev;
        int i, j;

        if (copy_from_user(&turbo_freq, argp, sizeof(turbo_freq)))
                return -EFAULT;

        power_domain_info = get_instance(turbo_freq.socket_id, turbo_freq.power_domain_id);
        if (!power_domain_info)
                return -EINVAL;

        if (turbo_freq.level > power_domain_info->max_level)
                return -EINVAL;

        turbo_freq.max_buckets = TRL_MAX_BUCKETS;
        turbo_freq.max_trl_levels = TRL_MAX_LEVELS;
        turbo_freq.max_clip_freqs = SST_TF_MAX_LP_CLIP_RATIOS;

        _read_tf_level_info("feature_rev", feature_rev, turbo_freq.level,
                            SST_TF_INFO_0_OFFSET, SST_TF_FEATURE_REV_START,
                            SST_TF_FEATURE_REV_WIDTH, SST_MUL_FACTOR_NONE);

        for (i = 0; i < turbo_freq.max_clip_freqs; ++i)
                _read_tf_level_info("lp_clip*", turbo_freq.lp_clip_freq_mhz[i],
                                    turbo_freq.level, SST_TF_INFO_0_OFFSET,
                                    SST_TF_LP_CLIP_RATIO_0_START +
                                    (i * SST_TF_LP_CLIP_RATIO_0_WIDTH),
                                    SST_TF_LP_CLIP_RATIO_0_WIDTH, SST_MUL_FACTOR_FREQ)

        for (i = 0; i < TRL_MAX_LEVELS; ++i) {
                for (j = 0; j < TRL_MAX_BUCKETS; ++j)
                        _read_tf_level_info("cydn*_bucket_*_trl",
                                            turbo_freq.trl_freq_mhz[i][j], turbo_freq.level,
                                            SST_TF_INFO_2_OFFSET + (i * SST_TF_RATIO_0_WIDTH),
                                            j * SST_TF_RATIO_0_WIDTH, SST_TF_RATIO_0_WIDTH,
                                            SST_MUL_FACTOR_FREQ)
        }

        memset(turbo_freq.bucket_core_counts, 0, sizeof(turbo_freq.bucket_core_counts));

        if (feature_rev >= 2) {
                bool has_tf_info_8 = false;

                for (i = 0; i < SST_TF_INFO_8_BUCKETS; ++i) {
                        _read_tf_level_info("bucket_*_mod_count", turbo_freq.bucket_core_counts[i],
                                            turbo_freq.level, SST_TF_INFO_8_OFFSET,
                                            SST_TF_NUM_MOD_0_WIDTH * i, SST_TF_NUM_MOD_0_WIDTH,
                                            SST_MUL_FACTOR_NONE)

                        if (turbo_freq.bucket_core_counts[i])
                                has_tf_info_8 = true;
                }

                if (has_tf_info_8)
                        goto done_core_count;
        }

        for (i = 0; i < TRL_MAX_BUCKETS; ++i)
                _read_tf_level_info("bucket_*_core_count", turbo_freq.bucket_core_counts[i],
                                    turbo_freq.level, SST_TF_INFO_1_OFFSET,
                                    SST_TF_NUM_CORE_0_WIDTH * i, SST_TF_NUM_CORE_0_WIDTH,
                                    SST_MUL_FACTOR_NONE)


done_core_count:

        if (copy_to_user(argp, &turbo_freq, sizeof(turbo_freq)))
                return -EFAULT;

        return 0;
}

static long isst_if_def_ioctl(struct file *file, unsigned int cmd,
                              unsigned long arg)
{
        void __user *argp = (void __user *)arg;
        long ret = -ENOTTY;

        mutex_lock(&isst_tpmi_dev_lock);
        switch (cmd) {
        case ISST_IF_COUNT_TPMI_INSTANCES:
                ret = isst_if_get_tpmi_instance_count(argp);
                break;
        case ISST_IF_CORE_POWER_STATE:
                ret = isst_if_core_power_state(argp);
                break;
        case ISST_IF_CLOS_PARAM:
                ret = isst_if_clos_param(argp);
                break;
        case ISST_IF_CLOS_ASSOC:
                ret = isst_if_clos_assoc(argp);
                break;
        case ISST_IF_PERF_LEVELS:
                ret = isst_if_get_perf_level(argp);
                break;
        case ISST_IF_PERF_SET_LEVEL:
                ret = isst_if_set_perf_level(argp);
                break;
        case ISST_IF_PERF_SET_FEATURE:
                ret = isst_if_set_perf_feature(argp);
                break;
        case ISST_IF_GET_PERF_LEVEL_INFO:
                ret = isst_if_get_perf_level_info(argp);
                break;
        case ISST_IF_GET_PERF_LEVEL_FABRIC_INFO:
                ret = isst_if_get_perf_level_fabric_info(argp);
                break;
        case ISST_IF_GET_PERF_LEVEL_CPU_MASK:
                ret = isst_if_get_perf_level_mask(argp);
                break;
        case ISST_IF_GET_BASE_FREQ_INFO:
                ret = isst_if_get_base_freq_info(argp);
                break;
        case ISST_IF_GET_BASE_FREQ_CPU_MASK:
                ret = isst_if_get_base_freq_mask(argp);
                break;
        case ISST_IF_GET_TURBO_FREQ_INFO:
                ret = isst_if_get_turbo_freq_info(argp);
                break;
        default:
                break;
        }
        mutex_unlock(&isst_tpmi_dev_lock);

        return ret;
}

#define TPMI_SST_AUTO_SUSPEND_DELAY_MS  2000

int tpmi_sst_dev_add(struct auxiliary_device *auxdev)
{
        struct tpmi_per_power_domain_info *pd_info;
        bool read_blocked = 0, write_blocked = 0;
        struct oobmsm_plat_info *plat_info;
        struct device *dev = &auxdev->dev;
        struct tpmi_sst_struct *tpmi_sst;
        u8 i, num_resources, io_die_cnt;
        int ret, pkg = 0, inst = 0;
        bool first_enum = false;
        u16 cdie_mask;
        u8 partition;

        ret = tpmi_get_feature_status(auxdev, TPMI_ID_SST, &read_blocked, &write_blocked);
        if (ret)
                dev_info(dev, "Can't read feature status: ignoring read/write blocked status\n");

        if (read_blocked) {
                dev_info(dev, "Firmware has blocked reads, exiting\n");
                return -ENODEV;
        }

        plat_info = tpmi_get_platform_data(auxdev);
        if (!plat_info) {
                dev_err(dev, "No platform info\n");
                return -EINVAL;
        }

        pkg = plat_info->package_id;
        if (pkg >= topology_max_packages()) {
                dev_err(dev, "Invalid package id :%x\n", pkg);
                return -EINVAL;
        }

        partition = plat_info->partition;
        if (partition >= SST_MAX_PARTITIONS) {
                dev_err(&auxdev->dev, "Invalid partition :%x\n", partition);
                return -EINVAL;
        }

        num_resources = tpmi_get_resource_count(auxdev);

        if (!num_resources)
                return -EINVAL;

        mutex_lock(&isst_tpmi_dev_lock);

        if (isst_common.sst_inst[pkg]) {
                tpmi_sst = isst_common.sst_inst[pkg];
        } else {
                /*
                 * tpmi_sst instance is for a package. So needs to be
                 * allocated only once for both partitions. We can't use
                 * devm_* allocation here as each partition is a
                 * different device, which can be unbound.
                 */
                tpmi_sst = kzalloc_obj(*tpmi_sst);
                if (!tpmi_sst) {
                        ret = -ENOMEM;
                        goto unlock_exit;
                }
                first_enum = true;
        }

        ret = 0;

        pd_info = devm_kcalloc(dev, num_resources, sizeof(*pd_info), GFP_KERNEL);
        if (!pd_info) {
                ret = -ENOMEM;
                goto unlock_free;
        }

        /* Get the IO die count, if cdie_mask is present */
        if (plat_info->cdie_mask) {
                u8 cdie_range;

                cdie_mask = plat_info->cdie_mask;
                cdie_range = fls(cdie_mask) - ffs(cdie_mask) + 1;
                io_die_cnt = num_resources - cdie_range;
        } else {
                /*
                 * This is a synthetic mask, careful when assuming that
                 * they are compute dies only.
                 */
                cdie_mask = (1 << num_resources) - 1;
                io_die_cnt = 0;
        }

        for (i = 0; i < num_resources; ++i) {
                struct resource *res;

                res = tpmi_get_resource_at_index(auxdev, i);
                if (!res) {
                        pd_info[i].sst_base = NULL;
                        continue;
                }

                pd_info[i].package_id = pkg;
                pd_info[i].power_domain_id = i;
                pd_info[i].auxdev = auxdev;
                pd_info[i].write_blocked = write_blocked;
                pd_info[i].sst_base = devm_ioremap_resource(dev, res);
                if (IS_ERR(pd_info[i].sst_base)) {
                        ret = PTR_ERR(pd_info[i].sst_base);
                        goto unlock_free;
                }

                if (sst_main(auxdev, &pd_info[i])) {
                        /*
                         * This entry is not valid, hardware can partially
                         * populate dies. In this case MMIO will have 0xFFs.
                         * Also possible some pre-production hardware has
                         * invalid data. But don't fail and continue to use
                         * other dies with valid data.
                         */
                        devm_iounmap(dev, pd_info[i].sst_base);
                        pd_info[i].sst_base = NULL;
                        continue;
                }

                ++inst;
        }

        if (!inst) {
                ret = -ENODEV;
                goto unlock_free;
        }

        tpmi_sst->package_id = pkg;

        tpmi_sst->power_domain_info[partition] = pd_info;
        tpmi_sst->number_of_power_domains[partition] = num_resources;
        tpmi_sst->cdie_mask[partition] = cdie_mask;
        tpmi_sst->io_dies[partition] = io_die_cnt;
        tpmi_sst->partition_mask |= BIT(partition);
        tpmi_sst->partition_mask_current |= BIT(partition);

        auxiliary_set_drvdata(auxdev, tpmi_sst);

        if (isst_common.max_index < pkg)
                isst_common.max_index = pkg;
        isst_common.sst_inst[pkg] = tpmi_sst;

unlock_free:
        if (ret && first_enum)
                kfree(tpmi_sst);
unlock_exit:
        mutex_unlock(&isst_tpmi_dev_lock);

        return ret;
}
EXPORT_SYMBOL_NS_GPL(tpmi_sst_dev_add, "INTEL_TPMI_SST");

void tpmi_sst_dev_remove(struct auxiliary_device *auxdev)
{
        struct tpmi_sst_struct *tpmi_sst = auxiliary_get_drvdata(auxdev);
        struct oobmsm_plat_info *plat_info;

        plat_info = tpmi_get_platform_data(auxdev);
        if (!plat_info)
                return;

        mutex_lock(&isst_tpmi_dev_lock);
        tpmi_sst->power_domain_info[plat_info->partition] = NULL;
        tpmi_sst->partition_mask_current &= ~BIT(plat_info->partition);
        /* Free the package instance when the all partitions are removed */
        if (!tpmi_sst->partition_mask_current) {
                isst_common.sst_inst[tpmi_sst->package_id] = NULL;
                kfree(tpmi_sst);
        }
        mutex_unlock(&isst_tpmi_dev_lock);
}
EXPORT_SYMBOL_NS_GPL(tpmi_sst_dev_remove, "INTEL_TPMI_SST");

#define SST_PP_CAP_CP_ENABLE    BIT(0)
#define SST_PP_CAP_PP_ENABLE    BIT(1)

void tpmi_sst_dev_suspend(struct auxiliary_device *auxdev)
{
        struct tpmi_sst_struct *tpmi_sst = auxiliary_get_drvdata(auxdev);
        struct tpmi_per_power_domain_info *power_domain_info, *pd_info;
        struct oobmsm_plat_info *plat_info;
        void __iomem *cp_base;
        int num_resources, i;

        plat_info = tpmi_get_platform_data(auxdev);
        if (!plat_info)
                return;

        power_domain_info = tpmi_sst->power_domain_info[plat_info->partition];
        num_resources = tpmi_sst->number_of_power_domains[plat_info->partition];

        for (i = 0; i < num_resources; i++) {
                pd_info = &power_domain_info[i];
                if (!pd_info || !pd_info->sst_base)
                        continue;

                if (!(pd_info->sst_header.cap_mask & SST_PP_CAP_CP_ENABLE))
                        goto process_pp_suspend;

                cp_base = pd_info->sst_base + pd_info->sst_header.cp_offset;
                pd_info->saved_sst_cp_control = readq(cp_base + SST_CP_CONTROL_OFFSET);
                memcpy_fromio(pd_info->saved_clos_configs, cp_base + SST_CLOS_CONFIG_0_OFFSET,
                              sizeof(pd_info->saved_clos_configs));
                memcpy_fromio(pd_info->saved_clos_assocs, cp_base + SST_CLOS_ASSOC_0_OFFSET,
                              sizeof(pd_info->saved_clos_assocs));

process_pp_suspend:
                if (!(pd_info->sst_header.cap_mask & SST_PP_CAP_PP_ENABLE))
                        continue;

                pd_info->saved_pp_control = readq(pd_info->sst_base +
                                                  pd_info->sst_header.pp_offset +
                                                  SST_PP_CONTROL_OFFSET);
        }
}
EXPORT_SYMBOL_NS_GPL(tpmi_sst_dev_suspend, "INTEL_TPMI_SST");

void tpmi_sst_dev_resume(struct auxiliary_device *auxdev)
{
        struct tpmi_sst_struct *tpmi_sst = auxiliary_get_drvdata(auxdev);
        struct tpmi_per_power_domain_info *power_domain_info, *pd_info;
        struct oobmsm_plat_info *plat_info;
        void __iomem *cp_base;
        int num_resources, i;

        plat_info = tpmi_get_platform_data(auxdev);
        if (!plat_info)
                return;

        power_domain_info = tpmi_sst->power_domain_info[plat_info->partition];
        num_resources = tpmi_sst->number_of_power_domains[plat_info->partition];

        for (i = 0; i < num_resources; i++) {
                pd_info = &power_domain_info[i];
                if (!pd_info || !pd_info->sst_base)
                        continue;

                if (!(pd_info->sst_header.cap_mask & SST_PP_CAP_CP_ENABLE))
                        goto process_pp_resume;

                cp_base = pd_info->sst_base + pd_info->sst_header.cp_offset;
                writeq(pd_info->saved_sst_cp_control, cp_base + SST_CP_CONTROL_OFFSET);
                memcpy_toio(cp_base + SST_CLOS_CONFIG_0_OFFSET, pd_info->saved_clos_configs,
                            sizeof(pd_info->saved_clos_configs));
                memcpy_toio(cp_base + SST_CLOS_ASSOC_0_OFFSET, pd_info->saved_clos_assocs,
                            sizeof(pd_info->saved_clos_assocs));

process_pp_resume:
                if (!(pd_info->sst_header.cap_mask & SST_PP_CAP_PP_ENABLE))
                        continue;

                writeq(pd_info->saved_pp_control, power_domain_info->sst_base +
                       pd_info->sst_header.pp_offset + SST_PP_CONTROL_OFFSET);
        }
}
EXPORT_SYMBOL_NS_GPL(tpmi_sst_dev_resume, "INTEL_TPMI_SST");

#define ISST_TPMI_API_VERSION   0x03

int tpmi_sst_init(void)
{
        struct isst_if_cmd_cb cb;
        int ret = 0;

        mutex_lock(&isst_tpmi_dev_lock);

        if (isst_core_usage_count) {
                ++isst_core_usage_count;
                goto init_done;
        }

        isst_common.sst_inst = kzalloc_objs(*isst_common.sst_inst,
                                            topology_max_packages());
        if (!isst_common.sst_inst) {
                ret = -ENOMEM;
                goto init_done;
        }

        memset(&cb, 0, sizeof(cb));
        cb.cmd_size = sizeof(struct isst_if_io_reg);
        cb.offset = offsetof(struct isst_if_io_regs, io_reg);
        cb.cmd_callback = NULL;
        cb.api_version = ISST_TPMI_API_VERSION;
        cb.def_ioctl = isst_if_def_ioctl;
        cb.owner = THIS_MODULE;
        ret = isst_if_cdev_register(ISST_IF_DEV_TPMI, &cb);
        if (ret)
                kfree(isst_common.sst_inst);
        else
                ++isst_core_usage_count;
init_done:
        mutex_unlock(&isst_tpmi_dev_lock);
        return ret;
}
EXPORT_SYMBOL_NS_GPL(tpmi_sst_init, "INTEL_TPMI_SST");

void tpmi_sst_exit(void)
{
        mutex_lock(&isst_tpmi_dev_lock);
        if (isst_core_usage_count)
                --isst_core_usage_count;

        if (!isst_core_usage_count) {
                isst_if_cdev_unregister(ISST_IF_DEV_TPMI);
                kfree(isst_common.sst_inst);
        }
        mutex_unlock(&isst_tpmi_dev_lock);
}
EXPORT_SYMBOL_NS_GPL(tpmi_sst_exit, "INTEL_TPMI_SST");

MODULE_IMPORT_NS("INTEL_TPMI");
MODULE_IMPORT_NS("INTEL_TPMI_POWER_DOMAIN");

MODULE_DESCRIPTION("ISST TPMI interface module");
MODULE_LICENSE("GPL");