root/sys/dev/pci/drm/amd/display/dc/dml2/dml2_policy.c
/* SPDX-License-Identifier: MIT */
/*
 * Copyright 2023 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: AMD
 *
 */

#include "dml2_policy.h"

static void get_optimal_ntuple(
        const struct soc_bounding_box_st *socbb,
        struct soc_state_bounding_box_st *entry)
{
        if (entry->dcfclk_mhz > 0) {
                float bw_on_sdp = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));

                entry->fabricclk_mhz = bw_on_sdp / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));
                entry->dram_speed_mts = bw_on_sdp / (socbb->num_chans *
                        socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));
        } else if (entry->fabricclk_mhz > 0) {
                float bw_on_fabric = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));

                entry->dcfclk_mhz = bw_on_fabric / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));
                entry->dram_speed_mts = bw_on_fabric / (socbb->num_chans *
                        socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));
        } else if (entry->dram_speed_mts > 0) {
                float bw_on_dram = (float)(entry->dram_speed_mts * socbb->num_chans *
                        socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));

                entry->fabricclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));
                entry->dcfclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));
        }
}

static float calculate_net_bw_in_mbytes_sec(const struct soc_bounding_box_st *socbb,
        struct soc_state_bounding_box_st *entry)
{
        float memory_bw_mbytes_sec = (float)(entry->dram_speed_mts *  socbb->num_chans *
                socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));

        float fabric_bw_mbytes_sec = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));

        float sdp_bw_mbytes_sec = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));

        float limiting_bw_mbytes_sec = memory_bw_mbytes_sec;

        if (fabric_bw_mbytes_sec < limiting_bw_mbytes_sec)
                limiting_bw_mbytes_sec = fabric_bw_mbytes_sec;

        if (sdp_bw_mbytes_sec < limiting_bw_mbytes_sec)
                limiting_bw_mbytes_sec = sdp_bw_mbytes_sec;

        return limiting_bw_mbytes_sec;
}

static void insert_entry_into_table_sorted(const struct soc_bounding_box_st *socbb,
        struct soc_states_st *table,
        struct soc_state_bounding_box_st *entry)
{
        int index = 0;
        int i = 0;
        float net_bw_of_new_state = 0;

        get_optimal_ntuple(socbb, entry);

        if (table->num_states == 0) {
                index = 0;
        } else {
                net_bw_of_new_state = calculate_net_bw_in_mbytes_sec(socbb, entry);
                while (net_bw_of_new_state > calculate_net_bw_in_mbytes_sec(socbb, &table->state_array[index])) {
                        index++;
                        if (index >= (int) table->num_states)
                                break;
                }

                for (i = table->num_states; i > index; i--) {
                        table->state_array[i] = table->state_array[i - 1];
                }
                //ASSERT(index < MAX_CLK_TABLE_SIZE);
        }

        table->state_array[index] = *entry;
        table->state_array[index].dcfclk_mhz = (int)entry->dcfclk_mhz;
        table->state_array[index].fabricclk_mhz = (int)entry->fabricclk_mhz;
        table->state_array[index].dram_speed_mts = (int)entry->dram_speed_mts;
        table->num_states++;
}

static void remove_entry_from_table_at_index(struct soc_states_st *table,
        unsigned int index)
{
        int i;

        if (table->num_states == 0)
                return;

        for (i = index; i < (int) table->num_states - 1; i++) {
                table->state_array[i] = table->state_array[i + 1];
        }
        memset(&table->state_array[--table->num_states], 0, sizeof(struct soc_state_bounding_box_st));
}

int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s,
        struct dml2_policy_build_synthetic_soc_states_params *p)
{
        int i, j;
        unsigned int min_fclk_mhz = p->in_states->state_array[0].fabricclk_mhz;
        unsigned int min_dcfclk_mhz = p->in_states->state_array[0].dcfclk_mhz;
        unsigned int min_socclk_mhz = p->in_states->state_array[0].socclk_mhz;

        int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0,
                max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0,
                max_uclk_mhz = 0, max_socclk_mhz = 0;

        int num_uclk_dpms = 0, num_fclk_dpms = 0;

        for (i = 0; i < __DML_MAX_STATE_ARRAY_SIZE__; i++) {
                if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz)
                        max_dcfclk_mhz = (int) p->in_states->state_array[i].dcfclk_mhz;
                if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz)
                        max_fclk_mhz = (int) p->in_states->state_array[i].fabricclk_mhz;
                if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz)
                        max_socclk_mhz = (int) p->in_states->state_array[i].socclk_mhz;
                if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz)
                        max_uclk_mhz = (int) p->in_states->state_array[i].dram_speed_mts;
                if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz)
                        max_dispclk_mhz = (int) p->in_states->state_array[i].dispclk_mhz;
                if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz)
                        max_dppclk_mhz = (int) p->in_states->state_array[i].dppclk_mhz;
                if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz)
                        max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz;
                if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz)
                        max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz;

                if (p->in_states->state_array[i].fabricclk_mhz > 0)
                        num_fclk_dpms++;
                if (p->in_states->state_array[i].dram_speed_mts > 0)
                        num_uclk_dpms++;
        }

        if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dppclk_mhz || !max_phyclk_mhz || !max_dtbclk_mhz)
                return -1;

        p->out_states->num_states = 0;

        s->entry = p->in_states->state_array[0];

        s->entry.dispclk_mhz = max_dispclk_mhz;
        s->entry.dppclk_mhz = max_dppclk_mhz;
        s->entry.dtbclk_mhz = max_dtbclk_mhz;
        s->entry.phyclk_mhz = max_phyclk_mhz;

        s->entry.dscclk_mhz = max_dispclk_mhz / 3;
        s->entry.phyclk_mhz = max_phyclk_mhz;
        s->entry.dtbclk_mhz = max_dtbclk_mhz;

        // Insert all the DCFCLK STAs first
        for (i = 0; i < p->num_dcfclk_stas; i++) {
                s->entry.dcfclk_mhz = p->dcfclk_stas_mhz[i];
                s->entry.fabricclk_mhz = 0;
                s->entry.dram_speed_mts = 0;
                if (i > 0)
                        s->entry.socclk_mhz = max_socclk_mhz;

                insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
        }

        // Insert the UCLK DPMS
        for (i = 0; i < num_uclk_dpms; i++) {
                s->entry.dcfclk_mhz = 0;
                s->entry.fabricclk_mhz = 0;
                s->entry.dram_speed_mts = p->in_states->state_array[i].dram_speed_mts;
                if (i == 0) {
                        s->entry.socclk_mhz = min_socclk_mhz;
                } else {
                        s->entry.socclk_mhz = max_socclk_mhz;
                }

                insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
        }

        // Insert FCLK DPMs (if present)
        if (num_fclk_dpms > 2) {
                for (i = 0; i < num_fclk_dpms; i++) {
                        s->entry.dcfclk_mhz = 0;
                        s->entry.fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz;
                        s->entry.dram_speed_mts = 0;

                insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
                }
        }
        // Add max FCLK
        else {
                s->entry.dcfclk_mhz = 0;
                s->entry.fabricclk_mhz = p->in_states->state_array[num_fclk_dpms - 1].fabricclk_mhz;
                s->entry.dram_speed_mts = 0;

                insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
        }

        // Remove states that require higher clocks than are supported
        for (i = p->out_states->num_states - 1; i >= 0; i--) {
                if (p->out_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz ||
                        p->out_states->state_array[i].fabricclk_mhz > max_fclk_mhz ||
                        p->out_states->state_array[i].dram_speed_mts > max_uclk_mhz)
                        remove_entry_from_table_at_index(p->out_states, i);
        }

        // At this point, the table contains all "points of interest" based on
        // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock
        // ratios (by derate, are exact).

        // Round up UCLK to DPMs
        for (i = p->out_states->num_states - 1; i >= 0; i--) {
                for (j = 0; j < num_uclk_dpms; j++) {
                        if (p->in_states->state_array[j].dram_speed_mts >= p->out_states->state_array[i].dram_speed_mts) {
                                p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[j].dram_speed_mts;
                                break;
                        }
                }
        }

        // If FCLK is coarse grained, round up to next DPMs
        if (num_fclk_dpms > 2) {
                for (i = p->out_states->num_states - 1; i >= 0; i--) {
                        for (j = 0; j < num_fclk_dpms; j++) {
                                if (p->in_states->state_array[j].fabricclk_mhz >= p->out_states->state_array[i].fabricclk_mhz) {
                                        p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[j].fabricclk_mhz;
                                        break;
                                }
                        }
                }
        }

        // Clamp to min FCLK/DCFCLK
        for (i = p->out_states->num_states - 1; i >= 0; i--) {
                if (p->out_states->state_array[i].fabricclk_mhz < min_fclk_mhz) {
                        p->out_states->state_array[i].fabricclk_mhz = min_fclk_mhz;
                }
                if (p->out_states->state_array[i].dcfclk_mhz < min_dcfclk_mhz) {
                        p->out_states->state_array[i].dcfclk_mhz = min_dcfclk_mhz;
                }
        }

        // Remove duplicate states, note duplicate states are always neighbouring since table is sorted.
        i = 0;
        while (i < (int) p->out_states->num_states - 1) {
                if (p->out_states->state_array[i].dcfclk_mhz == p->out_states->state_array[i + 1].dcfclk_mhz &&
                        p->out_states->state_array[i].fabricclk_mhz == p->out_states->state_array[i + 1].fabricclk_mhz &&
                        p->out_states->state_array[i].dram_speed_mts == p->out_states->state_array[i + 1].dram_speed_mts)
                        remove_entry_from_table_at_index(p->out_states, i);
        else
                i++;
        }

        return 0;
}

void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy)
{
        for (int i = 0; i < __DML_NUM_PLANES__; i++) {
                policy->MPCCombineUse[i] = dml_mpc_as_needed_for_voltage; // TOREVIEW: Is this still needed?  When is MPCC useful for pstate given CRB?
                policy->ODMUse[i] = dml_odm_use_policy_combine_as_needed;
                policy->ImmediateFlipRequirement[i] = dml_immediate_flip_required;
                policy->AllowForPStateChangeOrStutterInVBlank[i] = dml_prefetch_support_uclk_fclk_and_stutter_if_possible;
        }

        /* Change the default policy initializations as per spreadsheet. We might need to
         * review and change them later on as per Jun's earlier comments.
         */
        policy->UseUnboundedRequesting = dml_unbounded_requesting_enable;
        policy->UseMinimumRequiredDCFCLK = false;
        policy->DRAMClockChangeRequirementFinal = true; // TOREVIEW: What does this mean?
        policy->FCLKChangeRequirementFinal = true; // TOREVIEW: What does this mean?
        policy->USRRetrainingRequiredFinal = true;
        policy->EnhancedPrefetchScheduleAccelerationFinal = true; // TOREVIEW: What does this mean?
        policy->NomDETInKByteOverrideEnable = false;
        policy->NomDETInKByteOverrideValue = 0;
        policy->DCCProgrammingAssumesScanDirectionUnknownFinal = true;
        policy->SynchronizeTimingsFinal = true;
        policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = true;
        policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = true; // TOREVIEW: What does this mean?
        policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = true; // TOREVIEW: What does this mean?
        if (project == dml_project_dcn35 ||
                project == dml_project_dcn36 ||
                project == dml_project_dcn351) {
                policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false;
                policy->EnhancedPrefetchScheduleAccelerationFinal = 0;
                policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/
                policy->UseOnlyMaxPrefetchModes = 1;
        }
}