#include "radeon_accelerant.h"
#include "mmio.h"
#include "buscntrl_regs.h"
#include "utils.h"
#include <sys/ioctl.h>
#include "CP.h"
#include "log_coll.h"
#include "log_enum.h"
#include <string.h>
static uint getAvailRingBuffer( accelerator_info *ai )
{
CP_info *cp = &ai->si->cp;
int space;
space =
*(uint32 *)(ai->mapped_memory[cp->feedback.mem_type].data + cp->feedback.head_mem_offset)
- cp->ring.tail;
if( space <= 0 )
space += cp->ring.size;
--space;
SHOW_FLOW( 3, "head=%ld, tail=%ld, space=%ld",
*(uint32 *)(ai->mapped_memory[cp->feedback.mem_type].data + cp->feedback.head_mem_offset),
cp->ring.tail, space );
LOG1( si->log, _GetAvailRingBufferQueue, space );
cp->ring.space = space;
return space;
}
void Radeon_FreeIndirectBuffers( accelerator_info *ai )
{
CP_info *cp = &ai->si->cp;
int32 cur_processed_tag =
((uint32 *)(ai->mapped_memory[cp->feedback.mem_type].data + cp->feedback.scratch_mem_offset))[1];
SHOW_FLOW( 3, "processed_tag=%d", cur_processed_tag );
while( cp->buffers.oldest != -1 ) {
indirect_buffer *oldest_buffer =
&cp->buffers.buffers[cp->buffers.oldest];
int tmp_oldest_buffer;
SHOW_FLOW( 3, "oldset buffer's tag: %d", oldest_buffer->send_tag );
if( (int32)(cur_processed_tag - oldest_buffer->send_tag) < 0 )
break;
SHOW_FLOW( 3, "mark %d as being free", oldest_buffer->send_tag );
tmp_oldest_buffer = oldest_buffer->next;
if( tmp_oldest_buffer == -1 )
cp->buffers.newest = -1;
oldest_buffer->next = cp->buffers.free_list;
cp->buffers.free_list = cp->buffers.oldest;
cp->buffers.oldest = tmp_oldest_buffer;
}
}
static void Radeon_WaitForFreeIndirectBuffers( accelerator_info *ai )
{
bigtime_t start_time;
CP_info *cp = &ai->si->cp;
SHOW_FLOW0( 3, "" );
start_time = system_time();
while( 1 ) {
bigtime_t sample_time;
Radeon_FreeIndirectBuffers( ai );
if( cp->buffers.free_list >= 0 )
return;
sample_time = system_time();
if( sample_time - start_time > 100000 )
break;
RELEASE_BEN( cp->lock );
if( sample_time - start_time > 5000 )
snooze( (sample_time - start_time) / 10 );
else
Radeon_Spin( 1 );
ACQUIRE_BEN( cp->lock );
}
SHOW_ERROR0( 0, "All buffers are in use and engine doesn't finish any of them" );
RELEASE_BEN( cp->lock );
Radeon_ResetEngine( ai );
ACQUIRE_BEN( cp->lock );
}
int Radeon_AllocIndirectBuffer( accelerator_info *ai, bool keep_lock )
{
CP_info *cp = &ai->si->cp;
int buffer_idx;
SHOW_FLOW0( 3, "" );
ACQUIRE_BEN( cp->lock );
if( cp->buffers.free_list == -1 )
Radeon_WaitForFreeIndirectBuffers( ai );
buffer_idx = cp->buffers.free_list;
cp->buffers.free_list = cp->buffers.buffers[buffer_idx].next;
RELEASE_BEN( cp->lock );
(void)keep_lock;
SHOW_FLOW( 3, "got %d", buffer_idx );
return buffer_idx;
}
void Radeon_FreeIndirectBuffer( accelerator_info *ai, int buffer_idx, bool never_used )
{
CP_info *cp = &ai->si->cp;
SHOW_FLOW( 3, "buffer_idx=%d, never_used=%d", buffer_idx, never_used );
if( !never_used )
Radeon_WaitForIdle( ai, false );
ACQUIRE_BEN( cp->lock );
cp->buffers.buffers[buffer_idx].next = cp->buffers.free_list;
cp->buffers.free_list = buffer_idx;
RELEASE_BEN( cp->lock );
SHOW_FLOW0( 3, "done" );
}
void Radeon_WaitForRingBufferSpace( accelerator_info *ai, uint num_dwords );
#define WRITE_RB_START( num_dwords ) \
{ \
uint32 *ring_start; \
uint32 ring_tail, ring_tail_mask; \
uint32 ring_tail_increment = (num_dwords); \
if( cp->ring.space < ring_tail_increment ) \
Radeon_WaitForRingBufferSpace( ai, ring_tail_increment ); \
ring_start = \
(uint32 *)(ai->mapped_memory[cp->ring.mem_type].data + cp->ring.mem_offset); \
\
ring_tail = cp->ring.tail; \
ring_tail_mask = cp->ring.tail_mask;
#define WRITE_RB( value ) \
{ \
uint32 val = (value); \
SHOW_FLOW( 3, "@%d: %x", ring_tail, val ); \
ring_start[ring_tail++] = val; \
ring_tail &= ring_tail_mask; \
}
#define WRITE_RB_FINISH \
cp->ring.tail = ring_tail; \
cp->ring.space -= ring_tail_increment; \
}
void Radeon_SendIndirectBuffer( accelerator_info *ai,
int buffer_idx, int buffer_size,
int state_buffer_idx, int state_buffer_size, bool has_lock )
{
CP_info *cp = &ai->si->cp;
bool need_stateupdate;
SHOW_FLOW( 3, "buffer_idx=%d, buffer_size=%d, state_buffer_idx=%d, state_buffer_size=%d",
buffer_idx, buffer_size, state_buffer_idx, state_buffer_size );
if( (buffer_size & 1) != 0 ) {
SHOW_FLOW( 3, "buffer has uneven size (%d)", buffer_size );
Radeon_GetIndirectBufferPtr( ai, buffer_idx )[buffer_size] = RADEON_CP_PACKET2;
buffer_size += 1;
}
ACQUIRE_BEN( cp->lock );
(void)has_lock;
need_stateupdate =
state_buffer_size > 0 && state_buffer_idx != cp->buffers.active_state;
WRITE_RB_START( 5 + (need_stateupdate ? 3 : 0) );
if( need_stateupdate ) {
SHOW_FLOW0( 3, "update state" );
WRITE_RB( CP_PACKET0( RADEON_CP_IB_BASE, 2 ));
WRITE_RB( cp->buffers.vm_start +
state_buffer_idx * INDIRECT_BUFFER_SIZE * sizeof( uint32 ));
WRITE_RB( state_buffer_size );
cp->buffers.active_state = state_buffer_idx;
}
WRITE_RB( CP_PACKET0( RADEON_CP_IB_BASE, 2 ));
WRITE_RB( cp->buffers.vm_start + buffer_idx * INDIRECT_BUFFER_SIZE * sizeof( uint32 ));
WRITE_RB( buffer_size );
WRITE_RB( CP_PACKET0( RADEON_SCRATCH_REG1, 1 ));
WRITE_RB( cp->buffers.buffers[buffer_idx].send_tag = (int32)++cp->buffers.cur_tag );
SHOW_FLOW( 3, "Assigned tag %d", cp->buffers.buffers[buffer_idx].send_tag );
WRITE_RB_FINISH;
if( cp->buffers.newest > 0 )
cp->buffers.buffers[cp->buffers.newest].next = buffer_idx;
else
cp->buffers.oldest = buffer_idx;
cp->buffers.newest = buffer_idx;
cp->buffers.buffers[buffer_idx].next = -1;
#ifdef __i386__
__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
#endif
INREG( ai->regs, RADEON_CP_RB_RPTR );
OUTREG( ai->regs, RADEON_CP_RB_WPTR, cp->ring.tail );
RELEASE_BEN( cp->lock );
SHOW_FLOW0( 3, "done" );
}
void Radeon_InvalidateStateBuffer( accelerator_info *ai, int state_buffer_idx )
{
CP_info *cp = &ai->si->cp;
Radeon_WaitForIdle( ai, false );
ACQUIRE_BEN( cp->lock );
if( cp->buffers.active_state == state_buffer_idx )
cp->buffers.active_state = -1;
RELEASE_BEN( cp->lock );
}
void Radeon_WaitForRingBufferSpace( accelerator_info *ai, uint num_dwords )
{
bigtime_t start_time;
CP_info *cp = &ai->si->cp;
start_time = system_time();
while( getAvailRingBuffer( ai ) < num_dwords ) {
bigtime_t sample_time;
sample_time = system_time();
if( sample_time - start_time > 100000 )
break;
RELEASE_BEN( cp->lock );
if( sample_time - start_time > 5000 )
snooze( (sample_time - start_time) / 10 );
else
Radeon_Spin( 1 );
ACQUIRE_BEN( cp->lock );
}
}