#include <linux/iosys-map.h>
#include <linux/rwsem.h>
#include <drm/drm_print.h>
#include <drm/panthor_drm.h>
#include "panthor_device.h"
#include "panthor_gem.h"
#include "panthor_heap.h"
#include "panthor_mmu.h"
#include "panthor_regs.h"
#define HEAP_CONTEXT_SIZE 32
struct panthor_heap_chunk_header {
u64 next;
u32 unknown[14];
};
struct panthor_heap_chunk {
struct list_head node;
struct panthor_kernel_bo *bo;
};
struct panthor_heap {
struct list_head chunks;
struct mutex lock;
u32 chunk_size;
u32 max_chunks;
u32 target_in_flight;
u32 chunk_count;
};
#define MAX_HEAPS_PER_POOL 128
struct panthor_heap_pool {
struct kref refcount;
struct panthor_device *ptdev;
struct panthor_vm *vm;
struct rw_semaphore lock;
struct xarray xa;
struct panthor_kernel_bo *gpu_contexts;
atomic_t size;
};
static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
{
u32 l2_features = ptdev->gpu_info.l2_features;
u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
}
static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
{
return panthor_heap_ctx_stride(pool->ptdev) * id;
}
static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
{
return pool->gpu_contexts->kmap +
panthor_get_heap_ctx_offset(pool, id);
}
static void panthor_free_heap_chunk(struct panthor_heap_pool *pool,
struct panthor_heap *heap,
struct panthor_heap_chunk *chunk)
{
mutex_lock(&heap->lock);
list_del(&chunk->node);
heap->chunk_count--;
mutex_unlock(&heap->lock);
atomic_sub(heap->chunk_size, &pool->size);
panthor_kernel_bo_destroy(chunk->bo);
kfree(chunk);
}
static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool,
struct panthor_heap *heap,
bool initial_chunk)
{
struct panthor_heap_chunk *chunk;
struct panthor_heap_chunk_header *hdr;
int ret;
chunk = kmalloc_obj(*chunk);
if (!chunk)
return -ENOMEM;
chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size,
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
PANTHOR_VM_KERNEL_AUTO_VA,
"Tiler heap chunk");
if (IS_ERR(chunk->bo)) {
ret = PTR_ERR(chunk->bo);
goto err_free_chunk;
}
ret = panthor_kernel_bo_vmap(chunk->bo);
if (ret)
goto err_destroy_bo;
hdr = chunk->bo->kmap;
memset(hdr, 0, sizeof(*hdr));
if (initial_chunk && !list_empty(&heap->chunks)) {
struct panthor_heap_chunk *prev_chunk;
u64 prev_gpuva;
prev_chunk = list_first_entry(&heap->chunks,
struct panthor_heap_chunk,
node);
prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
(heap->chunk_size >> 12);
}
panthor_kernel_bo_vunmap(chunk->bo);
mutex_lock(&heap->lock);
list_add(&chunk->node, &heap->chunks);
heap->chunk_count++;
mutex_unlock(&heap->lock);
atomic_add(heap->chunk_size, &pool->size);
return 0;
err_destroy_bo:
panthor_kernel_bo_destroy(chunk->bo);
err_free_chunk:
kfree(chunk);
return ret;
}
static void panthor_free_heap_chunks(struct panthor_heap_pool *pool,
struct panthor_heap *heap)
{
struct panthor_heap_chunk *chunk, *tmp;
list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
panthor_free_heap_chunk(pool, heap, chunk);
}
static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool,
struct panthor_heap *heap,
u32 chunk_count)
{
int ret;
u32 i;
for (i = 0; i < chunk_count; i++) {
ret = panthor_alloc_heap_chunk(pool, heap, true);
if (ret)
return ret;
}
return 0;
}
static int
panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
{
struct panthor_heap *heap;
heap = xa_erase(&pool->xa, handle);
if (!heap)
return -EINVAL;
panthor_free_heap_chunks(pool, heap);
mutex_destroy(&heap->lock);
kfree(heap);
return 0;
}
int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
{
int ret;
down_write(&pool->lock);
ret = panthor_heap_destroy_locked(pool, handle);
up_write(&pool->lock);
return ret;
}
int panthor_heap_create(struct panthor_heap_pool *pool,
u32 initial_chunk_count,
u32 chunk_size,
u32 max_chunks,
u32 target_in_flight,
u64 *heap_ctx_gpu_va,
u64 *first_chunk_gpu_va)
{
struct panthor_heap *heap;
struct panthor_heap_chunk *first_chunk;
struct panthor_vm *vm;
int ret = 0;
u32 id;
if (initial_chunk_count == 0)
return -EINVAL;
if (initial_chunk_count > max_chunks)
return -EINVAL;
if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
chunk_size < SZ_128K || chunk_size > SZ_8M)
return -EINVAL;
down_read(&pool->lock);
vm = panthor_vm_get(pool->vm);
up_read(&pool->lock);
if (!vm)
return -EINVAL;
heap = kzalloc_obj(*heap);
if (!heap) {
ret = -ENOMEM;
goto err_put_vm;
}
mutex_init(&heap->lock);
INIT_LIST_HEAD(&heap->chunks);
heap->chunk_size = chunk_size;
heap->max_chunks = max_chunks;
heap->target_in_flight = target_in_flight;
ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count);
if (ret)
goto err_free_heap;
first_chunk = list_first_entry(&heap->chunks,
struct panthor_heap_chunk,
node);
*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
down_write(&pool->lock);
if (!pool->vm) {
ret = -EINVAL;
} else {
ret = xa_alloc(&pool->xa, &id, heap,
XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
if (!ret) {
void *gpu_ctx = panthor_get_heap_ctx(pool, id);
memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
panthor_get_heap_ctx_offset(pool, id);
}
}
up_write(&pool->lock);
if (ret)
goto err_free_heap;
panthor_vm_put(vm);
return id;
err_free_heap:
panthor_free_heap_chunks(pool, heap);
mutex_destroy(&heap->lock);
kfree(heap);
err_put_vm:
panthor_vm_put(vm);
return ret;
}
int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
u64 heap_gpu_va,
u64 chunk_gpu_va)
{
u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
struct panthor_heap *heap;
int ret;
if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
return -EINVAL;
down_read(&pool->lock);
heap = xa_load(&pool->xa, heap_id);
if (!heap) {
ret = -EINVAL;
goto out_unlock;
}
chunk_gpu_va &= GENMASK_ULL(63, 12);
mutex_lock(&heap->lock);
list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
removed = chunk;
list_del(&chunk->node);
heap->chunk_count--;
atomic_sub(heap->chunk_size, &pool->size);
break;
}
}
mutex_unlock(&heap->lock);
if (removed) {
panthor_kernel_bo_destroy(chunk->bo);
kfree(chunk);
ret = 0;
} else {
ret = -EINVAL;
}
out_unlock:
up_read(&pool->lock);
return ret;
}
int panthor_heap_grow(struct panthor_heap_pool *pool,
u64 heap_gpu_va,
u32 renderpasses_in_flight,
u32 pending_frag_count,
u64 *new_chunk_gpu_va)
{
u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
struct panthor_heap_chunk *chunk;
struct panthor_heap *heap;
int ret;
if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
return -EINVAL;
down_read(&pool->lock);
heap = xa_load(&pool->xa, heap_id);
if (!heap) {
ret = -EINVAL;
goto out_unlock;
}
if (renderpasses_in_flight > heap->target_in_flight ||
heap->chunk_count >= heap->max_chunks) {
ret = -ENOMEM;
goto out_unlock;
}
ret = panthor_alloc_heap_chunk(pool, heap, false);
if (ret)
goto out_unlock;
chunk = list_first_entry(&heap->chunks,
struct panthor_heap_chunk,
node);
*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
(heap->chunk_size >> 12);
ret = 0;
out_unlock:
up_read(&pool->lock);
return ret;
}
static void panthor_heap_pool_release(struct kref *refcount)
{
struct panthor_heap_pool *pool =
container_of(refcount, struct panthor_heap_pool, refcount);
xa_destroy(&pool->xa);
kfree(pool);
}
void panthor_heap_pool_put(struct panthor_heap_pool *pool)
{
if (pool)
kref_put(&pool->refcount, panthor_heap_pool_release);
}
struct panthor_heap_pool *
panthor_heap_pool_get(struct panthor_heap_pool *pool)
{
if (pool)
kref_get(&pool->refcount);
return pool;
}
struct panthor_heap_pool *
panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
{
size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
panthor_heap_ctx_stride(ptdev),
4096);
struct panthor_heap_pool *pool;
int ret = 0;
pool = kzalloc_obj(*pool);
if (!pool)
return ERR_PTR(-ENOMEM);
pool->vm = vm;
pool->ptdev = ptdev;
init_rwsem(&pool->lock);
xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
kref_init(&pool->refcount);
pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
PANTHOR_VM_KERNEL_AUTO_VA,
"Heap pool");
if (IS_ERR(pool->gpu_contexts)) {
ret = PTR_ERR(pool->gpu_contexts);
goto err_destroy_pool;
}
ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
if (ret)
goto err_destroy_pool;
atomic_add(pool->gpu_contexts->obj->size, &pool->size);
return pool;
err_destroy_pool:
panthor_heap_pool_destroy(pool);
return ERR_PTR(ret);
}
void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
{
struct panthor_heap *heap;
unsigned long i;
if (!pool)
return;
down_write(&pool->lock);
xa_for_each(&pool->xa, i, heap)
drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
if (!IS_ERR_OR_NULL(pool->gpu_contexts)) {
atomic_sub(pool->gpu_contexts->obj->size, &pool->size);
panthor_kernel_bo_destroy(pool->gpu_contexts);
}
pool->vm = NULL;
up_write(&pool->lock);
panthor_heap_pool_put(pool);
}
size_t panthor_heap_pool_size(struct panthor_heap_pool *pool)
{
if (!pool)
return 0;
return atomic_read(&pool->size);
}