root/src/add-ons/kernel/drivers/graphics/radeon/CP_setup.c
/*
        Copyright (c) 2002, Thomas Kurschel


        Part of Radeon accelerant

        CP initialization/sync/cleanup.

        It also handles command buffer synchronization.

        non-local memory is used as following:
        - 2048 dwords for ring buffer
        - 253 indirect buffers a 4k (1024 dwords)
        - 8 dwords for returned data (i.e. current read ptr)
          & 6 dwords for "scratch registers"

        usage of scratch registers:
        - reg 0 = reached engine.count

        with a granularity of 4 KByte, we need 2+253+1=256 blocks, which is exactly 1 MB
*/

#include "radeon_driver.h"
#include "CPMicroCode.h"
#include "mmio.h"
#include "cp_regs.h"
#include "pll_regs.h"
#include "rbbm_regs.h"
#include "buscntrl_regs.h"
#include "config_regs.h"
#include "memcntrl_regs.h"
#include "utils.h"
#include "pll_access.h"

#include "log_coll.h"
#include "log_enum.h"

#include <string.h>

#if 0

// macros for user-space

#define ALLOC_MEM( asize, mem_type, aglobal, handle, offset ) \
        { \
                radeon_alloc_mem am; \
\
                am.magic = RADEON_PRIVATE_DATA_MAGIC; \
                am.size = (asize) * 4; \
                am.memory_type = (mt_nonlocal); \
                am.global = (aglobal); \
\
                res = ioctl( ai->fd, RADEON_ALLOC_MEM, &am ); \
                if( res == B_OK ) \
                        *(handle) = am.handle; \
                        *(offset) = am.offset; \
        }

#define MEM2CPU( mem ) \
        ((uint32 *)(ai->mapped_memory[(mem).memory_type].data + (mem).offset))

#define MEM2GC( mem ) ((mem).offset + si->memory[(mem).memory_type].virtual_addr_start)

#define FREE_MEM( mem_type, handle ) \
        { \
                radeon_free_mem fm; \
\
                fm.magic = RADEON_PRIVATE_DATA_MAGIC; \
                fm.memory_type = mem_type; \
                fm.handle = offset; \
\
                ioctl( ai->fd, RADEON_FREE_MEM, &fm ); \
        }

#else

// macros for kernel-space

// allocate memory
// if memory_type is non-local, it is replaced with default non-local type
#define ALLOC_MEM( asize, mem_type, aglobal, handle, offset ) \
        if( mem_type == mt_nonlocal ) \
                mem_type = di->si->nonlocal_type; \
        res = mem_alloc( di->memmgr[mem_type], asize, NULL, handle, offset );

// get address as seen by program to access allocated memory
// (memory_type must _not_ be non-local, see ALLOC_MEM)
#define MEM2CPU( memory_type, offset ) \
        ((uint8 *)(memory_type == mt_local ? di->si->local_mem : \
        (memory_type == mt_PCI ? di->pci_gart.buffer.ptr : di->agp_gart.buffer.ptr)) \
        + (offset))

// get graphics card's virtual address of allocated memory
// (memory_type must _not_ be non-local, see ALLOC_MEM)
#define MEM2GC( memory_type, offset ) \
        (di->si->memory[(memory_type)].virtual_addr_start + (offset))

// free memory
// if memory_type is non-local, it is replaced with default non-local type
#define FREE_MEM( mem_type, handle ) \
        mem_free( \
                di->memmgr[ mem_type == mt_nonlocal ? di->si->nonlocal_type : mem_type], \
                handle, NULL );

#endif


void Radeon_DiscardAllIndirectBuffers( device_info *di );

#define RADEON_SCRATCH_REG_OFFSET       32


void Radeon_FlushPixelCache( device_info *di );

// wait until engine is idle;
// acquire_lock -       true, if lock must be hold
//                                      false, if lock is already acquired
// keep_lock -          true, keep lock on exit (only valid if acquire_lock is true)
void Radeon_WaitForIdle( device_info *di, bool acquire_lock, bool keep_lock )
{
        if( acquire_lock )
                ACQUIRE_BEN( di->si->cp.lock );

        Radeon_WaitForFifo( di, 64 );

        while( 1 ) {
                bigtime_t start_time = system_time();

                do {
                        if( (INREG( di->regs, RADEON_RBBM_STATUS ) & RADEON_RBBM_ACTIVE) == 0 ) {
                                Radeon_FlushPixelCache( di );

                                if( acquire_lock && !keep_lock)
                                        RELEASE_BEN( di->si->cp.lock );

                                return;
                        }

                        snooze( 1 );
                } while( system_time() - start_time < 1000000 );

                SHOW_ERROR( 3,
                        "Engine didn't become idle (rbbm_status=%" B_PRIx32 ", "
                        "cp_stat=%" B_PRIx32 ", "
                        "tlb_address=%" B_PRIx32 ", "
                        "tlb_data=%" B_PRIx32 ")",
                        INREG( di->regs, RADEON_RBBM_STATUS ),
                        INREG( di->regs, RADEON_CP_STAT ),
                        INREG( di->regs, RADEON_AIC_TLB_ADDR ),
                        INREG( di->regs, RADEON_AIC_TLB_DATA ));

                LOG( di->si->log, _Radeon_WaitForIdle );

                Radeon_ResetEngine( di );
        }
}


// wait until "entries" FIFO entries are empty
// lock must be hold
void Radeon_WaitForFifo( device_info *di, int entries )
{
        while( 1 ) {
                bigtime_t start_time = system_time();

                do {
                        int slots = INREG( di->regs, RADEON_RBBM_STATUS ) & RADEON_RBBM_FIFOCNT_MASK;

                        if ( slots >= entries )
                                return;

                        snooze( 1 );
                } while( system_time() - start_time < 1000000 );

                LOG( di->si->log, _Radeon_WaitForFifo );

                Radeon_ResetEngine( di );
        }
}

// flush pixel cache of graphics card
void Radeon_FlushPixelCache( device_info *di )
{
        bigtime_t start_time;

        OUTREGP( di->regs, RADEON_RB2D_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL,
                ~RADEON_RB2D_DC_FLUSH_ALL );

        start_time = system_time();

        do {
                if( (INREG( di->regs, RADEON_RB2D_DSTCACHE_CTLSTAT )
                         & RADEON_RB2D_DC_BUSY) == 0 )
                        return;

                snooze( 1 );
        } while( system_time() - start_time < 1000000 );

        LOG( di->si->log, _Radeon_FlushPixelCache );

        SHOW_ERROR0( 0, "pixel cache didn't become empty" );
}

// reset graphics card's engine
// lock must be hold
void Radeon_ResetEngine( device_info *di )
{
        vuint8 *regs = di->regs;
        shared_info *si = di->si;
        uint32 clock_cntl_index, mclk_cntl, rbbm_soft_reset, host_path_cntl;
        uint32 cur_read_ptr;

        SHOW_FLOW0( 3, "" );

        Radeon_FlushPixelCache( di );

        clock_cntl_index = INREG( regs, RADEON_CLOCK_CNTL_INDEX );
        RADEONPllErrataAfterIndex( regs, di->asic );    // drm has no errata here!
        mclk_cntl = Radeon_INPLL( regs, di->asic, RADEON_MCLK_CNTL );

        // enable clock of units to be reset
        Radeon_OUTPLL( regs, di->asic, RADEON_MCLK_CNTL, mclk_cntl |
      RADEON_FORCEON_MCLKA |
      RADEON_FORCEON_MCLKB |
      RADEON_FORCEON_YCLKA |
      RADEON_FORCEON_YCLKB |
      RADEON_FORCEON_MC |
      RADEON_FORCEON_AIC );

        // do the reset
    host_path_cntl = INREG( regs, RADEON_HOST_PATH_CNTL );
        rbbm_soft_reset = INREG( regs, RADEON_RBBM_SOFT_RESET );

        OUTREG( regs, RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
                        RADEON_SOFT_RESET_CP |
                        RADEON_SOFT_RESET_HI |
                        RADEON_SOFT_RESET_SE |
                        RADEON_SOFT_RESET_RE |
                        RADEON_SOFT_RESET_PP |
                        RADEON_SOFT_RESET_E2 |
                        RADEON_SOFT_RESET_RB ) );
        INREG( regs, RADEON_RBBM_SOFT_RESET);
        OUTREG( regs, RADEON_RBBM_SOFT_RESET, rbbm_soft_reset &
                ~( RADEON_SOFT_RESET_CP |
                   RADEON_SOFT_RESET_HI |
                   RADEON_SOFT_RESET_SE |
                   RADEON_SOFT_RESET_RE |
                   RADEON_SOFT_RESET_PP |
                   RADEON_SOFT_RESET_E2 |
                   RADEON_SOFT_RESET_RB ) );
        INREG( regs, RADEON_RBBM_SOFT_RESET);

    OUTREG( regs, RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET );
    INREG( regs, RADEON_HOST_PATH_CNTL );
    OUTREG( regs, RADEON_HOST_PATH_CNTL, host_path_cntl );

        Radeon_OUTPLL( regs, di->asic, RADEON_MCLK_CNTL, mclk_cntl );
        OUTREG( regs, RADEON_CLOCK_CNTL_INDEX, clock_cntl_index );
        //RADEONPllErrataAfterIndex( regs, di->asic ); // drm doesn't do this here!
        OUTREG( regs, RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);

        if ( di->acc_dma )
        {
                // reset ring buffer
                cur_read_ptr = INREG( regs, RADEON_CP_RB_RPTR );
                OUTREG( regs, RADEON_CP_RB_WPTR, cur_read_ptr );

                //if( si->cp.ring.head ) {
                // during init, there are no feedback data
                if( si->cp.feedback.mem_handle != 0 ) {
                        *(uint32 *)MEM2CPU( si->cp.feedback.mem_type, si->cp.feedback.head_mem_offset) =
                                cur_read_ptr;
                        //      *si->cp.ring.head = cur_read_ptr;
                        si->cp.ring.tail = cur_read_ptr;
                }

                // mark all buffers as being finished
                Radeon_DiscardAllIndirectBuffers( di );
        }

        ++si->engine.count;
        return;
}


// upload Micro-Code of CP
static void loadMicroEngineRAMData( device_info *di )
{
        int i;
        const uint32 (*microcode)[2];

        SHOW_FLOW0( 3, "" );

        switch( di->asic ) {
        case rt_r300:
        case rt_rv350:
        case rt_r350:
        case rt_rv380:
        case rt_r420:
                microcode = r300_cp_microcode;
                break;
        case rt_r200:
                microcode = r200_cp_microcode;
                break;
        case rt_rs100:
        default:
                microcode = radeon_cp_microcode;
        }

        Radeon_WaitForIdle( di, false, false );

        OUTREG( di->regs, RADEON_CP_ME_RAM_ADDR, 0 );

        for ( i = 0 ; i < 256 ; i++ ) {
                OUTREG( di->regs, RADEON_CP_ME_RAM_DATAH, microcode[i][1] );
                OUTREG( di->regs, RADEON_CP_ME_RAM_DATAL, microcode[i][0] );
        }
}

// aring_size - size of ring in dwords
static status_t initRingBuffer( device_info *di, int aring_size )
{
        status_t res;
        shared_info *si = di->si;
        CP_info *cp = &si->cp;
        vuint8 *regs = di->regs;
        uint32 offset;
        memory_type_e memory_type;

        memset( &cp->ring, 0, sizeof( cp->ring ));

        // ring and indirect buffers can be either in AGP or PCI GART
        // (it seems that they cannot be in graphics memory, at least
        //  I had serious coherency problems when I tried that)
        memory_type = mt_nonlocal;

        ALLOC_MEM( aring_size * 4, memory_type, true,
                &cp->ring.mem_handle, &offset );

        if( res != B_OK ) {
                SHOW_ERROR0( 0, "Cannot allocate ring buffer" );
                return res;
        }

        // setup CP buffer
        cp->ring.mem_type = memory_type;
        cp->ring.mem_offset = offset;
        cp->ring.vm_base = MEM2GC( memory_type, offset );
        cp->ring.size = aring_size;
        cp->ring.tail_mask = aring_size - 1;
        OUTREG( regs, RADEON_CP_RB_BASE, cp->ring.vm_base );
        SHOW_INFO( 3, "CP buffer address=%" B_PRIx32, cp->ring.vm_base );

        // set ring buffer size
        // (it's log2 of qwords)
        OUTREG( regs, RADEON_CP_RB_CNTL, radeon_log2( cp->ring.size / 2 ));
        SHOW_INFO( 3, "CP buffer size mask=%d", radeon_log2( cp->ring.size / 2 ) );

        // set write pointer delay to zero;
        // we assume that memory synchronization is done correctly my MoBo
        // and Radeon_SendCP contains a hack that hopefully fixes such problems
        OUTREG( regs, RADEON_CP_RB_WPTR_DELAY, 0 );

        memset( MEM2CPU( cp->ring.mem_type, cp->ring.mem_offset), 0, cp->ring.size * 4 );

        // set CP buffer pointers
        OUTREG( regs, RADEON_CP_RB_RPTR, 0 );
        OUTREG( regs, RADEON_CP_RB_WPTR, 0 );
        //*cp->ring.head = 0;
        cp->ring.tail = 0;

        return B_OK;
}

static void uninitRingBuffer( device_info *di )
{
        vuint8 *regs = di->regs;

        // abort any activity
        Radeon_ResetEngine( di );

        // disable CP BM
        OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
        // read-back for flushing
        INREG( regs, RADEON_CP_CSQ_CNTL );

        FREE_MEM( mt_nonlocal, di->si->cp.ring.mem_handle );
}

static status_t initCPFeedback( device_info *di )
{
        CP_info *cp = &di->si->cp;
        vuint8 *regs = di->regs;
        uint32 offset;
        memory_type_e memory_type;
        status_t res;

        // status information should be in PCI memory, so CPU can
        // poll it without locking the bus (PCI memory is the only
        // cachable memory available)
        memory_type = mt_PCI;

        ALLOC_MEM( RADEON_SCRATCH_REG_OFFSET + 0x40, memory_type, true,
                &cp->feedback.mem_handle, &offset );

        if( res != B_OK ) {
                SHOW_ERROR0( 0, "Cannot allocate buffers for status information" );
                return res;
        }

        // setup CP read pointer buffer
        cp->feedback.mem_type = memory_type;
        cp->feedback.head_mem_offset = offset;
        cp->feedback.head_vm_address = MEM2GC( memory_type, cp->feedback.head_mem_offset );
        OUTREG( regs, RADEON_CP_RB_RPTR_ADDR, cp->feedback.head_vm_address );
        SHOW_INFO( 3, "CP read pointer buffer==%" B_PRIx32,
                cp->feedback.head_vm_address );

        // setup scratch register buffer
        cp->feedback.scratch_mem_offset = offset + RADEON_SCRATCH_REG_OFFSET;
        cp->feedback.scratch_vm_start = MEM2GC( memory_type, cp->feedback.scratch_mem_offset );
        OUTREG( regs, RADEON_SCRATCH_ADDR, cp->feedback.scratch_vm_start );
        OUTREG( regs, RADEON_SCRATCH_UMSK, 0x3f );

        *(uint32 *)MEM2CPU( cp->feedback.mem_type, cp->feedback.head_mem_offset) = 0;
        memset( MEM2CPU( cp->feedback.mem_type, cp->feedback.scratch_mem_offset), 0, 0x40 );
        //*cp->ring.head = 0;

        return B_OK;
}

static void uninitCPFeedback( device_info *di )
{
        vuint8 *regs = di->regs;

        // don't allow any scratch buffer update
        OUTREG( regs, RADEON_SCRATCH_UMSK, 0x0 );

        FREE_MEM( mt_PCI, di->si->cp.feedback.mem_handle );
}

static status_t initIndirectBuffers( device_info *di )
{
        CP_info *cp = &di->si->cp;
        uint32 offset;
        memory_type_e memory_type;
        int i;
        status_t res;

        memory_type = mt_nonlocal;

        ALLOC_MEM( NUM_INDIRECT_BUFFERS * INDIRECT_BUFFER_SIZE * 4, memory_type,
                true, &cp->buffers.mem_handle, &offset );

        if( res != B_OK ) {
                SHOW_ERROR0( 0, "Cannot allocate indirect buffers" );
                return B_ERROR;
        }

        cp->buffers.mem_type = memory_type;
        cp->buffers.mem_offset = offset;
        cp->buffers.vm_start = MEM2GC( memory_type, cp->buffers.mem_offset );

        for( i = 0; i < NUM_INDIRECT_BUFFERS - 1; ++i ) {
                cp->buffers.buffers[i].next = i + 1;
        }

        cp->buffers.buffers[i].next = -1;

        cp->buffers.free_list = 0;
        cp->buffers.oldest = -1;
        cp->buffers.newest = -1;
        cp->buffers.active_state = -1;
        cp->buffers.cur_tag = 0;

        memset( MEM2CPU( cp->buffers.mem_type, cp->buffers.mem_offset), 0,
                NUM_INDIRECT_BUFFERS * INDIRECT_BUFFER_SIZE * 4 );

        return B_OK;
}

static void uninitIndirectBuffers( device_info *di )
{
        FREE_MEM( mt_nonlocal, di->si->cp.buffers.mem_handle );
}

// initialize CP so it's ready for BM
status_t Radeon_InitCP( device_info *di )
{
        thread_id thid;
    thread_info thinfo;
        status_t res;

        SHOW_FLOW0( 3, "" );

        // this is _really_ necessary so functions like ResetEngine() know
        // that the CP is not set up yet
        memset( &di->si->cp, 0, sizeof( di->si->cp ));

        if( (res = INIT_BEN( di->si->cp.lock, "Radeon CP" )) < 0 )
                return res;

        // HACK: change owner of benaphore semaphore to team of calling thread;
        // reason: user code cannot acquire kernel semaphores, but the accelerant
        // is in user space; interestingly, it's enough to change the semaphore's
        // owner to _any_ non-system team (that's the only security check done by
        // the kernel)
        thid = find_thread( NULL );
    get_thread_info( thid, &thinfo );
    set_sem_owner( di->si->cp.lock.sem, thinfo.team );

        // init raw CP
        if ( di->acc_dma ) loadMicroEngineRAMData( di );

        // do soft-reset
        Radeon_ResetEngine( di );

        // after warm-reset, the CP may still be active and thus react to
        // register writes during initialization unpredictably, so we better
        // stop it first
        OUTREG( di->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
        INREG( di->regs, RADEON_CP_CSQ_CNTL );

        // reset CP to make disabling active
        Radeon_ResetEngine( di );

        if ( di->acc_dma )
        {
                res = initRingBuffer( di, CP_RING_SIZE );
                if( res < 0 )
                        goto err4;

                res = initCPFeedback( di );
                if( res < 0 )
                        goto err3;

                res = initIndirectBuffers( di );
                if( res < 0 )
                        goto err2;

                // tell CP to use BM
                Radeon_WaitForIdle( di, false, false );

                // enable direct and indirect CP bus mastering
                OUTREG( di->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM );

                // allow bus mastering in general
                OUTREGP( di->regs, RADEON_BUS_CNTL, 0, ~RADEON_BUS_MASTER_DIS );
        }


        // don't allow mixing of 2D/3D/scratch/wait_until commands
        // (in fact, this doesn't seem to make any difference as we do a
        // manual sync in all these cases anyway)
        OUTREG( di->regs, RADEON_ISYNC_CNTL,
                RADEON_ISYNC_ANY2D_IDLE3D |
                RADEON_ISYNC_ANY3D_IDLE2D |
                RADEON_ISYNC_WAIT_IDLEGUI |
                RADEON_ISYNC_CPSCRATCH_IDLEGUI );

        SHOW_FLOW( 3, "bus_cntl=%" B_PRIx32, INREG( di->regs, RADEON_BUS_CNTL ));

        SHOW_FLOW0( 3, "Done" );

        return B_OK;

//err:
//      uninitIndirectBuffers( ai );
err2:
        uninitCPFeedback( di );
err3:
        uninitRingBuffer( di );
err4:
        DELETE_BEN( di->si->cp.lock );
        return res;
}


// shutdown CP, freeing any memory
void Radeon_UninitCP( device_info *di )
{
        vuint8 *regs = di->regs;

        // abort any pending commands
        Radeon_ResetEngine( di );

        // disable CP BM
        OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
        // read-back for flushing
        INREG( regs, RADEON_CP_CSQ_CNTL );

        if ( di->acc_dma )
        {
                uninitRingBuffer( di );
                uninitCPFeedback( di );
                uninitIndirectBuffers( di );
        }

        DELETE_BEN( di->si->cp.lock );
}


// mark all indirect buffers as being free;
// this should only be called after a reset;
// lock must be hold
void Radeon_DiscardAllIndirectBuffers( device_info *di )
{
        CP_info *cp = &di->si->cp;

        // during init, there is no indirect buffer
        if( cp->buffers.mem_handle == 0 )
                return;

        // mark all sent indirect buffers as free
        while( cp->buffers.oldest != -1 ) {
                indirect_buffer *oldest_buffer =
                        &cp->buffers.buffers[cp->buffers.oldest];
                int tmp_oldest_buffer;

                SHOW_FLOW( 0, "%d", cp->buffers.oldest );

                // remove buffer from "used" list
                tmp_oldest_buffer = oldest_buffer->next;

                if( tmp_oldest_buffer == -1 )
                        cp->buffers.newest = -1;

                // put it on free list
                oldest_buffer->next = cp->buffers.free_list;
                cp->buffers.free_list = cp->buffers.oldest;

                cp->buffers.oldest = tmp_oldest_buffer;
        }
}

// lets hide this in here, as it's got lots of lovely register headers already...
// does it go here, or in the accelerant anyway?
// for now i'm assuming you turn on dynamic clocks, and they take care of themselves onwards...
// so doing it at driver init seems sensible after a valid detection of course...
void Radeon_SetDynamicClock( device_info *di, int mode)
{
    vuint8 *regs = di->regs;
    radeon_type asic = di->asic;
    uint32 tmp;

    switch(mode) {
        case 0: /* Turn everything OFF (ForceON to everything)*/
                if ( di->num_crtc != 2 ) {
                        tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
                        tmp |= (RADEON_SCLK_FORCE_CP   | RADEON_SCLK_FORCE_HDP |
                                RADEON_SCLK_FORCE_DISP1 | RADEON_SCLK_FORCE_TOP |
                                RADEON_SCLK_FORCE_E2   | RADEON_SCLK_FORCE_SE  |
                                RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_VIP |
                                RADEON_SCLK_FORCE_RE   | RADEON_SCLK_FORCE_PB  |
                                RADEON_SCLK_FORCE_TAM  | RADEON_SCLK_FORCE_TDM |
                                RADEON_SCLK_FORCE_RB);
                        Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
                } else if (asic == rt_rv350) {
                        /* for RV350/M10, no delays are required. */
                        tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
                        tmp |= (R300_SCLK_FORCE_TCL |
                                R300_SCLK_FORCE_GA  |
                                R300_SCLK_FORCE_CBA);
                        Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);

                        tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
                        tmp |= (RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP      |
                                RADEON_SCLK_FORCE_HDP   | RADEON_SCLK_FORCE_DISP1   |
                                RADEON_SCLK_FORCE_TOP   | RADEON_SCLK_FORCE_E2      |
                                R300_SCLK_FORCE_VAP     | RADEON_SCLK_FORCE_IDCT    |
                                RADEON_SCLK_FORCE_VIP   | R300_SCLK_FORCE_SR        |
                                R300_SCLK_FORCE_PX      | R300_SCLK_FORCE_TX        |
                                R300_SCLK_FORCE_US      | RADEON_SCLK_FORCE_TV_SCLK |
                                R300_SCLK_FORCE_SU      | RADEON_SCLK_FORCE_OV0);
                        Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);

                        tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
                        tmp |= RADEON_SCLK_MORE_FORCEON;
                        Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);

                        tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
                        tmp |= (RADEON_FORCEON_MCLKA |
                                RADEON_FORCEON_MCLKB |
                                RADEON_FORCEON_YCLKA |
                                RADEON_FORCEON_YCLKB |
                                RADEON_FORCEON_MC);
                        Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);

                        tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
                        tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb  |
                                RADEON_PIXCLK_DAC_ALWAYS_ONb |
                        R300_DISP_DAC_PIXCLK_DAC_BLANK_OFF);
                        Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);

                        tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
                        tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb         |
                                RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
                                RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
                                R300_DVOCLK_ALWAYS_ONb            |
                                RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
                                RADEON_PIXCLK_GV_ALWAYS_ONb       |
                                R300_PIXCLK_DVO_ALWAYS_ONb        |
                                RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
                                RADEON_PIXCLK_TMDS_ALWAYS_ONb     |
                                R300_PIXCLK_TRANS_ALWAYS_ONb      |
                                R300_PIXCLK_TVO_ALWAYS_ONb        |
                                R300_P2G2CLK_ALWAYS_ONb            |
                                R300_P2G2CLK_DAC_ALWAYS_ONb           |
                                R300_DISP_DAC_PIXCLK_DAC2_BLANK_OFF);
                        Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
                }  else {
                        tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
                        tmp |= (RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_E2);
                        tmp |= RADEON_SCLK_FORCE_SE;

                        if ( di->num_crtc != 2 ) {
                                tmp |= ( RADEON_SCLK_FORCE_RB    |
                                RADEON_SCLK_FORCE_TDM   |
                                RADEON_SCLK_FORCE_TAM   |
                                RADEON_SCLK_FORCE_PB    |
                                RADEON_SCLK_FORCE_RE    |
                                RADEON_SCLK_FORCE_VIP   |
                                RADEON_SCLK_FORCE_IDCT  |
                                RADEON_SCLK_FORCE_TOP   |
                                RADEON_SCLK_FORCE_DISP1 |
                                RADEON_SCLK_FORCE_DISP2 |
                                RADEON_SCLK_FORCE_HDP    );
                        } else if ((asic == rt_r300) || (asic == rt_r350)) {
                                tmp |= ( RADEON_SCLK_FORCE_HDP   |
                                        RADEON_SCLK_FORCE_DISP1 |
                                        RADEON_SCLK_FORCE_DISP2 |
                                        RADEON_SCLK_FORCE_TOP   |
                                        RADEON_SCLK_FORCE_IDCT  |
                                        RADEON_SCLK_FORCE_VIP);
                        }
                        Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);

                        snooze(16000);

                        if ((asic == rt_r300) || (asic == rt_r350)) {
                                tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
                                tmp |= ( R300_SCLK_FORCE_TCL |
                                        R300_SCLK_FORCE_GA  |
                                        R300_SCLK_FORCE_CBA);
                                Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
                                snooze(16000);
                        }

                        if (di->is_igp) {
                                tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
                                tmp &= ~(RADEON_FORCEON_MCLKA |
                                        RADEON_FORCEON_YCLKA);
                                Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
                                snooze(16000);
                        }

                        if ((asic == rt_rv200) ||
                                (asic == rt_rv250) ||
                                (asic == rt_rv280)) {
                                tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
                                tmp |= RADEON_SCLK_MORE_FORCEON;
                                Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
                                snooze(16000);
                        }

                        tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
                        tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb         |
                                RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
                                RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
                                RADEON_PIXCLK_GV_ALWAYS_ONb       |
                                RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb |
                                RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
                                RADEON_PIXCLK_TMDS_ALWAYS_ONb);

                        Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
                        snooze(16000);

                        tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
                        tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb  |
                                RADEON_PIXCLK_DAC_ALWAYS_ONb);
                        Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
                }
                SHOW_FLOW0( 3, "Dynamic Clock Scaling Disabled" );
                break;
        case 1:
                if ( di->num_crtc != 2 ) {
                        tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
                        if ((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) > RADEON_CFG_ATI_REV_A13) {
                                tmp &= ~(RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_RB);
                        }
                        tmp &= ~(RADEON_SCLK_FORCE_HDP  | RADEON_SCLK_FORCE_DISP1 |
                                RADEON_SCLK_FORCE_TOP  | RADEON_SCLK_FORCE_SE   |
                                RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_RE   |
                                RADEON_SCLK_FORCE_PB   | RADEON_SCLK_FORCE_TAM  |
                                RADEON_SCLK_FORCE_TDM);
                        Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
                } else if ((asic == rt_r300)
                                || (asic == rt_r350)
                                || (asic == rt_rv350)) {
                        if (asic == rt_rv350) {
                                tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
                                tmp &= ~(R300_SCLK_FORCE_TCL |
                                        R300_SCLK_FORCE_GA  |
                                        R300_SCLK_FORCE_CBA);
                                tmp |=  (R300_SCLK_TCL_MAX_DYN_STOP_LAT |
                                        R300_SCLK_GA_MAX_DYN_STOP_LAT  |
                                        R300_SCLK_CBA_MAX_DYN_STOP_LAT);
                                Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);

                                tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
                                tmp &= ~(RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP      |
                                        RADEON_SCLK_FORCE_HDP   | RADEON_SCLK_FORCE_DISP1   |
                                        RADEON_SCLK_FORCE_TOP   | RADEON_SCLK_FORCE_E2      |
                                        R300_SCLK_FORCE_VAP     | RADEON_SCLK_FORCE_IDCT    |
                                        RADEON_SCLK_FORCE_VIP   | R300_SCLK_FORCE_SR        |
                                        R300_SCLK_FORCE_PX      | R300_SCLK_FORCE_TX        |
                                        R300_SCLK_FORCE_US      | RADEON_SCLK_FORCE_TV_SCLK |
                                        R300_SCLK_FORCE_SU      | RADEON_SCLK_FORCE_OV0);
                                        tmp |=  RADEON_DYN_STOP_LAT_MASK;
                                Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);

                                tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
                                tmp &= ~RADEON_SCLK_MORE_FORCEON;
                                tmp |=  RADEON_SCLK_MORE_MAX_DYN_STOP_LAT;
                                Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);

                                tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
                                tmp |= (RADEON_PIXCLK_ALWAYS_ONb |
                                        RADEON_PIXCLK_DAC_ALWAYS_ONb);
                                Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);

                                tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
                                tmp |= (RADEON_PIX2CLK_ALWAYS_ONb         |
                                        RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
                                        RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
                                        R300_DVOCLK_ALWAYS_ONb            |
                                        RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
                                        RADEON_PIXCLK_GV_ALWAYS_ONb       |
                                        R300_PIXCLK_DVO_ALWAYS_ONb        |
                                        RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
                                        RADEON_PIXCLK_TMDS_ALWAYS_ONb     |
                                        R300_PIXCLK_TRANS_ALWAYS_ONb      |
                                        R300_PIXCLK_TVO_ALWAYS_ONb        |
                                        R300_P2G2CLK_ALWAYS_ONb           |
                                        R300_P2G2CLK_DAC_ALWAYS_ONb);
                                Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);

                                tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_MISC);
                                tmp |= (RADEON_MC_MCLK_DYN_ENABLE |
                                        RADEON_IO_MCLK_DYN_ENABLE);
                                Radeon_OUTPLL(regs, asic, RADEON_MCLK_MISC, tmp);

                                tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
                                tmp |= (RADEON_FORCEON_MCLKA |
                                        RADEON_FORCEON_MCLKB);

                                tmp &= ~(RADEON_FORCEON_YCLKA  |
                                        RADEON_FORCEON_YCLKB  |
                                        RADEON_FORCEON_MC);

                                /* Some releases of vbios have set DISABLE_MC_MCLKA
                                and DISABLE_MC_MCLKB bits in the vbios table.  Setting these
                                bits will cause H/W hang when reading video memory with dynamic clocking
                                enabled. */
                                if ((tmp & R300_DISABLE_MC_MCLKA) &&
                                (tmp & R300_DISABLE_MC_MCLKB)) {
                                        /* If both bits are set, then check the active channels */
                                        tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
                                        if (di->ram.width == 64) {
                                                if (INREG( regs, RADEON_MEM_CNTL) & R300_MEM_USE_CD_CH_ONLY)
                                                tmp &= ~R300_DISABLE_MC_MCLKB;
                                                else
                                                tmp &= ~R300_DISABLE_MC_MCLKA;
                                        } else {
                                                tmp &= ~(R300_DISABLE_MC_MCLKA |
                                                R300_DISABLE_MC_MCLKB);
                                        }
                                }

                                Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
                        } else {
                                tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
                                tmp &= ~(R300_SCLK_FORCE_VAP);
                                tmp |= RADEON_SCLK_FORCE_CP;
                                Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
                                snooze(15000);

                                tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
                                tmp &= ~(R300_SCLK_FORCE_TCL |
                                R300_SCLK_FORCE_GA  |
                                R300_SCLK_FORCE_CBA);
                                Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
                        }
                } else {
                        tmp = Radeon_INPLL(regs, asic, RADEON_CLK_PWRMGT_CNTL);

                        tmp &= ~(RADEON_ACTIVE_HILO_LAT_MASK     |
                                RADEON_DISP_DYN_STOP_LAT_MASK   |
                                RADEON_DYN_STOP_MODE_MASK);

                        tmp |= (RADEON_ENGIN_DYNCLK_MODE |
                        (0x01 << RADEON_ACTIVE_HILO_LAT_SHIFT));
                        Radeon_OUTPLL(regs, asic, RADEON_CLK_PWRMGT_CNTL, tmp);
                        snooze(15000);

                        tmp = Radeon_INPLL(regs, asic, RADEON_CLK_PIN_CNTL);
                        tmp |= RADEON_SCLK_DYN_START_CNTL;
                        Radeon_OUTPLL(regs, asic, RADEON_CLK_PIN_CNTL, tmp);
                        snooze(15000);

                        /* When DRI is enabled, setting DYN_STOP_LAT to zero can cause some R200
                        to lockup randomly, leave them as set by BIOS.
                        */
                        tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
                        /*tmp &= RADEON_SCLK_SRC_SEL_MASK;*/
                        tmp &= ~RADEON_SCLK_FORCEON_MASK;

                        /*RAGE_6::A11 A12 A12N1 A13, RV250::A11 A12, R300*/
                        if (((asic == rt_rv250) &&
                                ((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
                                  RADEON_CFG_ATI_REV_A13)) ||
                                ((asic == rt_rv100) &&
                                ((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <=
                                  RADEON_CFG_ATI_REV_A13)))
                        {
                                tmp |= RADEON_SCLK_FORCE_CP;
                                tmp |= RADEON_SCLK_FORCE_VIP;
                        }

                        Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);

                        if ((asic == rt_rv200) ||
                                (asic == rt_rv250) ||
                                (asic == rt_rv280)) {
                                tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
                                tmp &= ~RADEON_SCLK_MORE_FORCEON;

                                /* RV200::A11 A12 RV250::A11 A12 */
                                if (((asic == rt_rv200) ||
                                         (asic == rt_rv250)) &&
                                        ((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
                                          RADEON_CFG_ATI_REV_A13))
                                {
                                        tmp |= RADEON_SCLK_MORE_FORCEON;
                                }
                                Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
                                snooze(15000);
                        }

                        /* RV200::A11 A12, RV250::A11 A12 */
                        if (((asic == rt_rv200) ||
                                 (asic == rt_rv250)) &&
                                ((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
                                  RADEON_CFG_ATI_REV_A13))
                        {
                                tmp = Radeon_INPLL(regs, asic, RADEON_PLL_PWRMGT_CNTL);
                                tmp |= RADEON_TCL_BYPASS_DISABLE;
                                Radeon_OUTPLL(regs, asic, RADEON_PLL_PWRMGT_CNTL, tmp);
                        }
                        snooze(15000);

                        /*enable dynamic mode for display clocks (PIXCLK and PIX2CLK)*/
                        tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
                        tmp |=  (RADEON_PIX2CLK_ALWAYS_ONb         |
                                RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
                                RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
                                RADEON_PIXCLK_GV_ALWAYS_ONb       |
                                RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb |
                                RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
                                RADEON_PIXCLK_TMDS_ALWAYS_ONb);

                        Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
                        snooze(15000);

                        tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
                        tmp |= (RADEON_PIXCLK_ALWAYS_ONb  |
                                RADEON_PIXCLK_DAC_ALWAYS_ONb);

                        Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
                        snooze(15000);
                }
                SHOW_FLOW0( 3, "Dynamic Clock Scaling Enabled" );
                break;
        default:
                break;
        }
}