root/src/add-ons/accelerants/radeon/pll.c
/*
        Copyright (c) 2002-2004, Thomas Kurschel


        Part of Radeon accelerant

        Takes care of PLL
*/


#include "radeon_accelerant.h"

#include "pll_regs.h"
#include "pll_access.h"
#include "utils.h"
#include <stdlib.h>
#include "set_mode.h"


static void Radeon_PLLWaitForReadUpdateComplete(
        accelerator_info *ai, int crtc_idx )
{
        int i;

        // we should wait forever, but
        // 1. this is unsafe
        // 2. some r300 loop forever (reported by XFree86)
        for( i = 0; i < 10000; ++i ) {
                if( (Radeon_INPLL( ai->regs, ai->si->asic, crtc_idx == 0 ? RADEON_PPLL_REF_DIV : RADEON_P2PLL_REF_DIV )
                        & RADEON_PPLL_ATOMIC_UPDATE_R) == 0 )
                        return;
        }
}

static void Radeon_PLLWriteUpdate(
        accelerator_info *ai, int crtc_idx )
{
        Radeon_PLLWaitForReadUpdateComplete( ai, crtc_idx );

    Radeon_OUTPLLP( ai->regs, ai->si->asic,
        crtc_idx == 0 ? RADEON_PPLL_REF_DIV : RADEON_P2PLL_REF_DIV,
        RADEON_PPLL_ATOMIC_UPDATE_W,
        ~RADEON_PPLL_ATOMIC_UPDATE_W );
}

// calculate PLL dividers
// pll - info about PLL
// freq - whished frequency in Hz
// fixed_post_div - if != 0, fixed divider to be used
// dividers - filled with proper dividers
void Radeon_CalcPLLDividers(
        const pll_info *pll, uint32 freq, uint fixed_post_div, pll_dividers *dividers )
{
        // the PLL gets the reference
        //              pll_in = ref_freq / ref_div
        // this must be within pll_in_min..pll_in_max
        // the VCO of the PLL has the frequency
        //              vco = pll_in * feedback_div * extra_feedback_div
        //                  = ref_freq / ref_div * feedback_div * extra_feedback_div
        // where pre_feedback_div is hard-wired
        // this must be within vco_min..vco_max
        // the pixel clock is calculated as
        //              pll_out = vco / post_div / extra_post_div
        //                      = ref_freq * feedback_div * extra_feedback_div / (ref_div * post_div * extra_post_div)
        // where extra_post_div _may_ be choosable between 1 and 2

        // synonyms are:
        //              ref_div = M
        //              feedback_div = N
        //              post_div = P

        int
                min_post_div_idx, max_post_div_idx,
                post_div_idx, extra_post_div_idx,
                best_post_div_idx, best_extra_post_div_idx;

        uint32
                best_ref_div, best_feedback_div, best_freq;
        int32
                best_error, best_vco_dev;

        best_error = 999999999;

        // make compiler happy
        best_post_div_idx = 0;
        best_extra_post_div_idx = 0;
        best_ref_div = 1;
        best_feedback_div = 1;
        best_freq = 1;
        best_vco_dev = 1;

        if( fixed_post_div == 0 ) {
                min_post_div_idx = 0;
                for(
                        max_post_div_idx = 0;
                        pll->post_divs[max_post_div_idx].divider != 0;
                        ++max_post_div_idx )
                        ;
                --max_post_div_idx;
        } else {
                for(
                        min_post_div_idx = 0;
                        pll->post_divs[min_post_div_idx].divider != fixed_post_div;
                        ++min_post_div_idx )
                        ;

                max_post_div_idx = min_post_div_idx;

                //SHOW_FLOW( 2, "idx of fixed post divider: %d", min_post_div_idx );
        }

        // post dividers are quite restrictive, so they provide little search space only
        for( extra_post_div_idx = 0; pll->extra_post_divs[extra_post_div_idx].divider != 0; ++extra_post_div_idx ) {
                for( post_div_idx = min_post_div_idx; post_div_idx <= max_post_div_idx; ++post_div_idx ) {
                        uint32 ref_div;
                        uint32 post_div =
                                pll->post_divs[post_div_idx].divider
                                * pll->extra_post_divs[extra_post_div_idx].divider;

                        // post devider determines VCO frequency, so determine and verify it;
                        // freq is in Hz, everything else is in 10 kHz units
                        // we use 10 kHz units as long as possible to avoid uint32 overflows
                        uint32 vco = (freq / 10000) * post_div;

                        //SHOW_FLOW( 2, "post_div=%d, vco=%d", post_div, vco );

                        if( vco < pll->vco_min || vco > pll->vco_max )
                                continue;

                        //SHOW_FLOW0( 2, "jau" );

                        // we can either iterate through feedback or reference dividers;
                        // usually, there are fewer possible reference dividers, so I picked them
                        for( ref_div = pll->min_ref_div; ref_div <= pll->max_ref_div; ++ref_div ) {
                                uint32 feedback_div, cur_freq;
                                int32 error, vco_dev;

                                // this implies the frequency of the lock unit
                                uint32 pll_in = pll->ref_freq / ref_div;

                                if( pll_in < pll->pll_in_min || pll_in > pll->pll_in_max )
                                        continue;

                                // well, only one variable is left
                                // timing is almost certainly valid, time to use Hz units
                                feedback_div = RoundDiv64(
                                        (int64)freq * ref_div * post_div,
                                        pll->ref_freq * 10000 * pll->extra_feedback_div);

                                if( feedback_div < pll->min_feedback_div ||
                                        feedback_div > pll->max_feedback_div )
                                        continue;

                                // let's see what we've got
                                cur_freq = RoundDiv64(
                                        (int64)pll->ref_freq * 10000 * feedback_div * pll->extra_feedback_div,
                                        ref_div * post_div );

                                // absolute error in terms of output clock
                                error = abs( (int32)cur_freq - (int32)freq );
                                // deviation from perfect VCO clock
                                vco_dev = abs( (int32)vco - (int32)(pll->best_vco) );

                                // if there is no optimal VCO frequency, choose setting with less error;
                                // if there is an optimal VCO frequency, choose new settings if
                                // - error is reduced significantly (100 Hz or more), or
                                // - output frequency is almost the same (less then 100 Hz difference) but
                                //       VCO frequency is closer to best frequency
                                if( (pll->best_vco == 0 && error < best_error) ||
                                        (pll->best_vco != 0 &&
                                         (error < best_error - 100 ||
                                         (abs( error - best_error ) < 100 && vco_dev < best_vco_dev ))))
                                {
                                        //SHOW_FLOW( 2, "got freq=%d, best_freq=%d", freq, cur_freq );
                                        best_post_div_idx = post_div_idx;
                                        best_extra_post_div_idx = extra_post_div_idx;
                                        best_ref_div = ref_div;
                                        best_feedback_div = feedback_div;
                                        best_freq = cur_freq;
                                        best_error = error;
                                        best_vco_dev = vco_dev;
                                }
                        }
                }
        }

        dividers->post_code = pll->post_divs[best_post_div_idx].code;
        dividers->post = pll->post_divs[best_post_div_idx].divider;
        dividers->extra_post_code = pll->post_divs[best_extra_post_div_idx].code;
        dividers->extra_post = pll->post_divs[best_extra_post_div_idx].divider;
        dividers->ref = best_ref_div;
        dividers->feedback = best_feedback_div;
        dividers->freq = best_freq;

        /*SHOW_FLOW( 2, "post_code=%d, post=%d, extra_post_code=%d, extra_post=%d, ref=%d, feedback=%d, freq=%d",
                dividers->post_code, dividers->post, dividers->extra_post_code,
                dividers->extra_post, dividers->ref, dividers->feedback, dividers->freq );*/
}


// with a TV timing given, find a corresponding CRT timing.
// both timing must meet at the end of a frame, but as the PLL has a
// limited frequency granularity, you don't really get a CRT timing
// with precisely the same frame rate; the solution is to tweak the CRT
// image a bit by making it wider/taller/smaller until the frame rate
// drift is under a given threshold;
// we follow two aims:
//      - primary, keep frame rate in sync
//  - secondary, only tweak as much as unavoidable
void Radeon_MatchCRTPLL(
        const pll_info *pll,
        uint32 tv_v_total, uint32 tv_h_total, uint32 tv_frame_size_adjust, uint32 freq,
        const display_mode *mode, uint32 max_v_tweak, uint32 max_h_tweak,
        uint32 max_frame_rate_drift, uint32 fixed_post_div,
        pll_dividers *dividers,
        display_mode *tweaked_mode )
{
        uint32 v_tweak;
        int32 v_tweak_dir;
        uint32 pix_per_tv_frame;

        SHOW_FLOW( 2, "fixed post divider: %d", fixed_post_div );

        // number of TV pixels per frame
        pix_per_tv_frame = tv_v_total * tv_h_total + tv_frame_size_adjust;

        // starting with original data we tweak total horizontal and vertical size
        // more and more until we find a proper CRT clock frequency
        for( v_tweak = 0; v_tweak <= max_v_tweak; ++v_tweak ) {
                for( v_tweak_dir = -1; v_tweak_dir <= 1; v_tweak_dir += 2 ) {
                        uint32 h_tweak;
                        int32 h_tweak_dir;

                        uint32 v_total = mode->timing.v_total + v_tweak * v_tweak_dir;

                        for( h_tweak = 0; h_tweak <= max_h_tweak; ++h_tweak ) {
                                for( h_tweak_dir = -1; h_tweak_dir <= 1; h_tweak_dir += 2 ) {
                                        uint32 pix_per_crt_frame, frame_rate_drift;
                                        uint32 crt_freq;
                                        uint32 abs_crt_error;

                                        uint32 h_total = mode->timing.h_total + h_tweak * h_tweak_dir;

                                        // number of CRT pixels per frame
                                        pix_per_crt_frame = v_total * h_total;

                                        // frame rate must be:
                                        //      frame_rate = freq / pix_per_tv_half_frame
                                        // because of interlace, we must use half frames
                                        //      pix_per_tv_half_frame = pix_per_tv_frame / 2
                                        // to get a CRT image with the same frame rate, we get
                                        //      crt_freq = frame_rate * pix_per_crt_frame
                                        //               = freq / (pix_per_tv_frame / 2) * pix_per_crt_frame
                                        // formula is reordered as usual to improve accuracy
                                        crt_freq = (uint64)freq * pix_per_crt_frame * 2 / pix_per_tv_frame;

                                        Radeon_CalcPLLDividers( pll, crt_freq, fixed_post_div, dividers );

                                        // get absolute CRT clock error per second
                                        abs_crt_error = abs( (int32)(dividers->freq) - (int32)crt_freq );

                                        //SHOW_INFO( 2, "whished=%d, is=%d", crt_freq, dividers->freq );

                                        // convert it to relative CRT clock error:
                                        //      rel_error = abs_crt_error / crt_freq
                                        // now to absolute TV clock error per second:
                                        //      abs_tv_error = rel_error * tv_freq
                                        // and finally to TV clock error per frame:
                                        //      frame_rate_drift = abs_tv_error / frame_rate
                                        //                       = abs_crt_error / crt_freq * tv_freq / frame_rate
                                        // this can be simplified by using:
                                        //      tv_freq = pix_per_tv_frame * frame_rate
                                        // so we get:
                                        //      frame_rate_drift = abs_crt_error / crt_freq * pix_per_tv_frame * frame_rate / frame_rate
                                        //                       = abs_crt_error / crt_freq * pix_per_tv_frame
                                        frame_rate_drift = (uint64)abs_crt_error * pix_per_tv_frame / freq;

                                        // if drift is within threshold, we take this setting and stop
                                        // searching (later iteration will increasingly tweak screen size,
                                        // and we don't really want that)
                                        if( frame_rate_drift <= max_frame_rate_drift ) {
                                                SHOW_INFO( 2, "frame_rate_drift=%d, crt_freq=%d, v_total=%d, h_total=%d",
                                                        frame_rate_drift, crt_freq, v_total, h_total );

                                                tweaked_mode->timing.pixel_clock = crt_freq;
                                                tweaked_mode->timing.v_total = v_total;
                                                tweaked_mode->timing.h_total = h_total;
                                                return;
                                        }
                                }
                        }
                }
    }
}


// table to map divider to register value
static pll_divider_map post_divs[] = {
        {  1, 0 },
        {  2, 1 },
        {  4, 2 },
        {  8, 3 },
        {  3, 4 },
//      { 16, 5 },      // at least for pll2 of M6, this value is reserved
        {  6, 6 },
        { 12, 7 },
        {  0, 0 }
};


// normal PLLs have no extra post divider
static pll_divider_map extra_post_divs[] = {
        { 1, 1 },
        { 0, 0 }
};


// extra post-divider provided by Rage Theatre
static pll_divider_map external_extra_post_divs[] = {
        { 1, 0 },
        { 2, 1 },
        { 0, 0 }
};


// post-dividers of Rage Theatre
static pll_divider_map tv_post_divs[] = {
        {  1, 1 },
        {  2, 2 },
        {  3, 3 },
        {  4, 4 },
        {  5, 5 },
        {  6, 6 },
        {  7, 7 },
        {  8, 8 },
        {  9, 9 },
        { 10, 10 },
        { 11, 11 },
        { 12, 12 },
        { 13, 13 },
        { 14, 14 },
        { 15, 15 },
        {  0, 0 }
};


// get PLL parameters of TV PLL
void Radeon_GetTVPLLConfiguration( const general_pll_info *general_pll, pll_info *pll,
        bool internal_encoder )
{
        pll->post_divs = tv_post_divs;
        pll->extra_post_divs = internal_encoder ? extra_post_divs : external_extra_post_divs;
        pll->ref_freq = general_pll->ref_freq;
        pll->vco_min = 10000;
        pll->vco_max = 25000;
        // I'm not sure about the upper limit
        pll->min_ref_div = 4;
        pll->max_ref_div = 0x3ff;
        // in the original code, they set it to 330kHz if PAL is requested and
        // quartz is 27 MHz, but I don't see how these circumstances can effect the
        // mimimal PLL input frequency
        pll->pll_in_min = 20;//40;
        // in the original code, they don't define an upper limit
        pll->pll_in_max = 100;
        pll->extra_feedback_div = 1;
        pll->min_feedback_div = 4;
        pll->max_feedback_div = 0x7ff;
        pll->best_vco = 21000;
}


// get PLL parameters of CRT PLL used in conjunction with TV-out
void Radeon_GetTVCRTPLLConfiguration( const general_pll_info *general_pll, pll_info *pll,
        bool internal_tv_encoder )
{
        pll->post_divs = post_divs;
        pll->extra_post_divs = extra_post_divs;
        pll->ref_freq = general_pll->ref_freq;

        // in sample code, these limits are set in a strange way;
        // as a first shot, I use the BIOS provided limits
        /*pll->vco_min = general_pll->min_pll_freq;
        pll->vco_max = general_pll->max_pll_freq;*/

        // in sample code, they use a variable post divider during calculation, but
        // use a fixed post divider for programming - the variable post divider is
        // multiplied to the feedback divider;
        // because of the fixed post divider (3), the VCO always runs far out of
        // its stable frequency range, so we have hack the limits
        pll->vco_min = 4000;
        pll->vco_max = general_pll->max_pll_freq;

        // in sample code, lower limit is 4, but in register spec they say everything but 0/1
        pll->min_ref_div = 2;
        pll->max_ref_div = 0x3ff;
        pll->pll_in_min = 20;
        pll->pll_in_max = 100;
        pll->extra_feedback_div = 1;
        pll->min_feedback_div = 4;
        pll->max_feedback_div = 0x7ff;
        pll->best_vco = internal_tv_encoder ? 17500 : 21000;
}


// calc PLL dividers for CRT
// mode->timing.pixel_clock must be in Hz because required accuracy in TV-Out mode
void Radeon_CalcCRTPLLDividers(
        const general_pll_info *general_pll, const display_mode *mode, pll_dividers *dividers )
{
        pll_info pll;

        pll.post_divs = post_divs;
        pll.extra_post_divs = extra_post_divs;
        pll.ref_freq = general_pll->ref_freq;
        pll.vco_min = general_pll->min_pll_freq;
        pll.vco_max = general_pll->max_pll_freq;
        pll.min_ref_div = 2;
        pll.max_ref_div = 0x3ff;
        pll.pll_in_min = 40;
        pll.pll_in_max = 100;
        pll.extra_feedback_div = 1;
        pll.min_feedback_div = 4;
        pll.max_feedback_div = 0x7ff;
        pll.best_vco = 0;

        SHOW_FLOW( 2, "freq=%ld", mode->timing.pixel_clock );

        Radeon_CalcPLLDividers( &pll, mode->timing.pixel_clock, 0, dividers );
}


// calculate PLL registers
// mode->timing.pixel_clock must be in Hz because required accuracy in TV-Out mode
// (old: freq is in 10kHz)
void Radeon_CalcPLLRegisters(
        const display_mode *mode, const pll_dividers *dividers, pll_regs *values )
{
        values->dot_clock_freq = dividers->freq;
        values->feedback_div   = dividers->feedback;
        values->post_div       = dividers->post;
        values->pll_output_freq = dividers->freq * dividers->post;

        values->ppll_ref_div   = dividers->ref;
        values->ppll_div_3     = (dividers->feedback | (dividers->post_code << 16));
        // this is mad: the PLL controls the horizontal length in sub-byte precision!
        values->htotal_cntl    = mode->timing.h_total & 7;

        SHOW_FLOW( 2, "dot_clock_freq=%ld, pll_output_freq=%ld, ref_div=%d, feedback_div=%d, post_div=%d",
                values->dot_clock_freq, values->pll_output_freq,
                values->ppll_ref_div, values->feedback_div, values->post_div );
}

// write values into PLL registers
void Radeon_ProgramPLL(
        accelerator_info *ai, int crtc_idx, pll_regs *values )
{
        vuint8 *regs = ai->regs;
        radeon_type asic = ai->si->asic;

        SHOW_FLOW0( 2, "" );

        // use some other PLL for pixel clock source to not fiddling with PLL
        // while somebody is using it
    Radeon_OUTPLLP( regs, asic, crtc_idx == 0 ? RADEON_VCLK_ECP_CNTL : RADEON_PIXCLKS_CNTL,
        RADEON_VCLK_SRC_CPU_CLK, ~RADEON_VCLK_SRC_SEL_MASK );

    Radeon_OUTPLLP( regs, asic,
                crtc_idx == 0 ? RADEON_PPLL_CNTL : RADEON_P2PLL_CNTL,
            RADEON_PPLL_RESET
            | RADEON_PPLL_ATOMIC_UPDATE_EN
            | RADEON_PPLL_VGA_ATOMIC_UPDATE_EN,
            ~(RADEON_PPLL_RESET
                | RADEON_PPLL_ATOMIC_UPDATE_EN
                | RADEON_PPLL_VGA_ATOMIC_UPDATE_EN) );

        // select divider 3 (well, only required for first PLL)
    OUTREGP( regs, RADEON_CLOCK_CNTL_INDEX,
            RADEON_PLL_DIV_SEL_DIV3,
            ~RADEON_PLL_DIV_SEL_MASK );

        RADEONPllErrataAfterIndex(regs, asic);

        if( ai->si->new_pll && crtc_idx == 0 ) {
                // starting with r300, the reference divider of the first PLL was
                // moved to another bit position; at the old location, you only
                // find the "BIOS suggested divider"; no clue why they did that
                Radeon_OUTPLLP( regs, asic,
                RADEON_PPLL_REF_DIV,
                values->ppll_ref_div << RADEON_PPLL_REF_DIV_ACC_SHIFT,
                ~RADEON_PPLL_REF_DIV_ACC_MASK );
        } else {
            Radeon_OUTPLLP( regs, asic,
                crtc_idx == 0 ? RADEON_PPLL_REF_DIV : RADEON_P2PLL_REF_DIV,
                values->ppll_ref_div,
                ~RADEON_PPLL_REF_DIV_MASK );
    }

    Radeon_OUTPLLP( regs, asic,
        crtc_idx == 0 ? RADEON_PPLL_DIV_3 : RADEON_P2PLL_DIV_0,
        values->ppll_div_3,
        ~RADEON_PPLL_FB3_DIV_MASK );

    Radeon_OUTPLLP( regs, asic,
        crtc_idx == 0 ? RADEON_PPLL_DIV_3 : RADEON_P2PLL_DIV_0,
        values->ppll_div_3,
        ~RADEON_PPLL_POST3_DIV_MASK );

    Radeon_PLLWriteUpdate( ai, crtc_idx );
    Radeon_PLLWaitForReadUpdateComplete( ai, crtc_idx );

    Radeon_OUTPLL( regs, asic,
        crtc_idx == 0 ? RADEON_HTOTAL_CNTL : RADEON_HTOTAL2_CNTL,
        values->htotal_cntl );

        Radeon_OUTPLLP( regs, asic,
                crtc_idx == 0 ? RADEON_PPLL_CNTL : RADEON_P2PLL_CNTL, 0,
                ~(RADEON_PPLL_RESET
                | RADEON_PPLL_SLEEP
                | RADEON_PPLL_ATOMIC_UPDATE_EN
                | RADEON_PPLL_VGA_ATOMIC_UPDATE_EN) );

        // there is no way to check whether PLL has settled, so wait a bit
        snooze( 5000 );

        // use PLL for pixel clock again
    Radeon_OUTPLLP( regs, asic,
        crtc_idx == 0 ? RADEON_VCLK_ECP_CNTL : RADEON_PIXCLKS_CNTL,
        RADEON_VCLK_SRC_PPLL_CLK, ~RADEON_VCLK_SRC_SEL_MASK );
}