#include <linux/clk.h>
#include <linux/math64.h>
#include <linux/platform_device.h>
#include <linux/pm_opp.h>
#include <drm/drm_managed.h>
#include <subdev/clk.h>
#include "nouveau_drv.h"
#include "nouveau_chan.h"
#include "priv.h"
#include "gk20a_devfreq.h"
#include "gk20a.h"
#include "gp10b.h"
#define PMU_BUSY_CYCLES_NORM_MAX 1000U
#define PWR_PMU_IDLE_COUNTER_TOTAL 0U
#define PWR_PMU_IDLE_COUNTER_BUSY 4U
#define PWR_PMU_IDLE_COUNT_REG_OFFSET 0x0010A508U
#define PWR_PMU_IDLE_COUNT_REG_SIZE 16U
#define PWR_PMU_IDLE_COUNT_MASK 0x7FFFFFFFU
#define PWR_PMU_IDLE_COUNT_RESET_VALUE (0x1U << 31U)
#define PWR_PMU_IDLE_INTR_REG_OFFSET 0x0010A9E8U
#define PWR_PMU_IDLE_INTR_ENABLE_VALUE 0U
#define PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET 0x0010A9ECU
#define PWR_PMU_IDLE_INTR_STATUS_MASK 0x00000001U
#define PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE 0x1U
#define PWR_PMU_IDLE_THRESHOLD_REG_OFFSET 0x0010A8A0U
#define PWR_PMU_IDLE_THRESHOLD_REG_SIZE 4U
#define PWR_PMU_IDLE_THRESHOLD_MAX_VALUE 0x7FFFFFFFU
#define PWR_PMU_IDLE_CTRL_REG_OFFSET 0x0010A50CU
#define PWR_PMU_IDLE_CTRL_REG_SIZE 16U
#define PWR_PMU_IDLE_CTRL_VALUE_MASK 0x3U
#define PWR_PMU_IDLE_CTRL_VALUE_BUSY 0x2U
#define PWR_PMU_IDLE_CTRL_VALUE_ALWAYS 0x3U
#define PWR_PMU_IDLE_CTRL_FILTER_MASK (0x1U << 2)
#define PWR_PMU_IDLE_CTRL_FILTER_DISABLED 0x0U
#define PWR_PMU_IDLE_MASK_REG_OFFSET 0x0010A504U
#define PWR_PMU_IDLE_MASK_REG_SIZE 16U
#define PWM_PMU_IDLE_MASK_GR_ENABLED 0x1U
#define PWM_PMU_IDLE_MASK_CE_2_ENABLED 0x200000U
struct gk20a_devfreq {
struct devfreq *devfreq;
void __iomem *regs;
struct devfreq_simple_ondemand_data gov_data;
ktime_t busy_time;
ktime_t total_time;
ktime_t time_last_update;
};
static struct gk20a_devfreq *dev_to_gk20a_devfreq(struct device *dev)
{
struct nouveau_drm *drm = dev_get_drvdata(dev);
struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0);
struct nvkm_clk *base = nvkm_clk(subdev);
switch (drm->nvkm->chipset) {
case 0x13b: return gp10b_clk(base)->devfreq; break;
default: return gk20a_clk(base)->devfreq; break;
}
}
static void gk20a_pmu_init_perfmon_counter(struct gk20a_devfreq *gdevfreq)
{
u32 data;
writel(PWR_PMU_IDLE_INTR_ENABLE_VALUE,
gdevfreq->regs + PWR_PMU_IDLE_INTR_REG_OFFSET);
writel(PWR_PMU_IDLE_THRESHOLD_MAX_VALUE,
gdevfreq->regs + PWR_PMU_IDLE_THRESHOLD_REG_OFFSET +
(PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_THRESHOLD_REG_SIZE));
data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
(PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE));
data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK);
data |= PWR_PMU_IDLE_CTRL_VALUE_ALWAYS | PWR_PMU_IDLE_CTRL_FILTER_DISABLED;
writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
(PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE));
writel(PWM_PMU_IDLE_MASK_GR_ENABLED | PWM_PMU_IDLE_MASK_CE_2_ENABLED,
gdevfreq->regs + PWR_PMU_IDLE_MASK_REG_OFFSET +
(PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_MASK_REG_SIZE));
data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
(PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE));
data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK);
data |= PWR_PMU_IDLE_CTRL_VALUE_BUSY | PWR_PMU_IDLE_CTRL_FILTER_DISABLED;
writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
(PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE));
}
static u32 gk20a_pmu_read_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id)
{
u32 ret;
ret = readl(gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET +
(counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE));
return ret & PWR_PMU_IDLE_COUNT_MASK;
}
static void gk20a_pmu_reset_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id)
{
writel(PWR_PMU_IDLE_COUNT_RESET_VALUE, gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET +
(counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE));
}
static u32 gk20a_pmu_read_idle_intr_status(struct gk20a_devfreq *gdevfreq)
{
u32 ret;
ret = readl(gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET);
return ret & PWR_PMU_IDLE_INTR_STATUS_MASK;
}
static void gk20a_pmu_clear_idle_intr_status(struct gk20a_devfreq *gdevfreq)
{
writel(PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE,
gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET);
}
static void gk20a_devfreq_update_utilization(struct gk20a_devfreq *gdevfreq)
{
ktime_t now, last;
u64 busy_cycles, total_cycles;
u32 norm, intr_status;
now = ktime_get();
last = gdevfreq->time_last_update;
gdevfreq->total_time = ktime_us_delta(now, last);
busy_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY);
total_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL);
intr_status = gk20a_pmu_read_idle_intr_status(gdevfreq);
gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY);
gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL);
if (intr_status != 0UL) {
norm = PMU_BUSY_CYCLES_NORM_MAX;
gk20a_pmu_clear_idle_intr_status(gdevfreq);
} else if (total_cycles == 0ULL || busy_cycles > total_cycles) {
norm = PMU_BUSY_CYCLES_NORM_MAX;
} else {
norm = (u32)div64_u64(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX,
total_cycles);
}
gdevfreq->busy_time = div_u64(gdevfreq->total_time * norm, PMU_BUSY_CYCLES_NORM_MAX);
gdevfreq->time_last_update = now;
}
static int gk20a_devfreq_target(struct device *dev, unsigned long *freq,
u32 flags)
{
struct nouveau_drm *drm = dev_get_drvdata(dev);
struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0);
struct nvkm_clk *base = nvkm_clk(subdev);
struct nvkm_pstate *pstates = base->func->pstates;
int nr_pstates = base->func->nr_pstates;
int i, ret;
for (i = 0; i < nr_pstates - 1; i++)
if (pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV >= *freq)
break;
ret = nvkm_clk_ustate(base, pstates[i].pstate, 0);
ret |= nvkm_clk_ustate(base, pstates[i].pstate, 1);
if (ret) {
nvkm_error(subdev, "cannot update clock\n");
return ret;
}
*freq = pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV;
return 0;
}
static int gk20a_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
{
struct nouveau_drm *drm = dev_get_drvdata(dev);
struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0);
struct nvkm_clk *base = nvkm_clk(subdev);
*freq = nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV;
return 0;
}
static void gk20a_devfreq_reset(struct gk20a_devfreq *gdevfreq)
{
gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY);
gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL);
gk20a_pmu_clear_idle_intr_status(gdevfreq);
gdevfreq->busy_time = 0;
gdevfreq->total_time = 0;
gdevfreq->time_last_update = ktime_get();
}
static int gk20a_devfreq_get_dev_status(struct device *dev,
struct devfreq_dev_status *status)
{
struct nouveau_drm *drm = dev_get_drvdata(dev);
struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev);
gk20a_devfreq_get_cur_freq(dev, &status->current_frequency);
gk20a_devfreq_update_utilization(gdevfreq);
status->busy_time = ktime_to_ns(gdevfreq->busy_time);
status->total_time = ktime_to_ns(gdevfreq->total_time);
gk20a_devfreq_reset(gdevfreq);
NV_DEBUG(drm, "busy %lu total %lu %lu %% freq %lu MHz\n",
status->busy_time, status->total_time,
status->busy_time / (status->total_time / 100),
status->current_frequency / 1000 / 1000);
return 0;
}
static struct devfreq_dev_profile gk20a_devfreq_profile = {
.timer = DEVFREQ_TIMER_DELAYED,
.polling_ms = 50,
.target = gk20a_devfreq_target,
.get_cur_freq = gk20a_devfreq_get_cur_freq,
.get_dev_status = gk20a_devfreq_get_dev_status,
};
int gk20a_devfreq_init(struct nvkm_clk *base, struct gk20a_devfreq **gdevfreq)
{
struct nvkm_device *device = base->subdev.device;
struct nouveau_drm *drm = dev_get_drvdata(device->dev);
struct nvkm_device_tegra *tdev = device->func->tegra(device);
struct nvkm_pstate *pstates = base->func->pstates;
int nr_pstates = base->func->nr_pstates;
struct gk20a_devfreq *new_gdevfreq;
int i;
new_gdevfreq = drmm_kzalloc(drm->dev, sizeof(struct gk20a_devfreq), GFP_KERNEL);
if (!new_gdevfreq)
return -ENOMEM;
new_gdevfreq->regs = tdev->regs;
for (i = 0; i < nr_pstates; i++)
dev_pm_opp_add(base->subdev.device->dev,
pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV, 0);
gk20a_pmu_init_perfmon_counter(new_gdevfreq);
gk20a_devfreq_reset(new_gdevfreq);
gk20a_devfreq_profile.initial_freq =
nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV;
new_gdevfreq->gov_data.upthreshold = 45;
new_gdevfreq->gov_data.downdifferential = 5;
new_gdevfreq->devfreq = devm_devfreq_add_device(device->dev,
&gk20a_devfreq_profile,
DEVFREQ_GOV_SIMPLE_ONDEMAND,
&new_gdevfreq->gov_data);
if (IS_ERR(new_gdevfreq->devfreq))
return PTR_ERR(new_gdevfreq->devfreq);
*gdevfreq = new_gdevfreq;
return 0;
}
int gk20a_devfreq_resume(struct device *dev)
{
struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev);
if (!gdevfreq || !gdevfreq->devfreq)
return 0;
return devfreq_resume_device(gdevfreq->devfreq);
}
int gk20a_devfreq_suspend(struct device *dev)
{
struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev);
if (!gdevfreq || !gdevfreq->devfreq)
return 0;
return devfreq_suspend_device(gdevfreq->devfreq);
}