#include <drm/drm_drv.h>
#include <drm/drm_exec.h>
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
#include <drm/panthor_drm.h>
#include <linux/build_bug.h>
#include <linux/cleanup.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/dma-resv.h>
#include <linux/firmware.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/iosys-map.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/rcupdate.h>
#include "panthor_devfreq.h"
#include "panthor_device.h"
#include "panthor_fw.h"
#include "panthor_gem.h"
#include "panthor_gpu.h"
#include "panthor_heap.h"
#include "panthor_mmu.h"
#include "panthor_regs.h"
#include "panthor_sched.h"
#define JOB_TIMEOUT_MS 5000
#define MAX_CSG_PRIO 0xf
#define NUM_INSTRS_PER_CACHE_LINE (64 / sizeof(u64))
#define MAX_INSTRS_PER_JOB 24
struct panthor_group;
struct panthor_csg_slot {
struct panthor_group *group;
u8 priority;
};
enum panthor_csg_priority {
PANTHOR_CSG_PRIORITY_LOW = 0,
PANTHOR_CSG_PRIORITY_MEDIUM,
PANTHOR_CSG_PRIORITY_HIGH,
PANTHOR_CSG_PRIORITY_RT,
PANTHOR_CSG_PRIORITY_COUNT,
};
struct panthor_scheduler {
struct panthor_device *ptdev;
struct workqueue_struct *wq;
struct workqueue_struct *heap_alloc_wq;
struct delayed_work tick_work;
struct work_struct sync_upd_work;
struct work_struct fw_events_work;
atomic_t fw_events;
u64 resched_target;
u64 last_tick;
u64 tick_period;
struct mutex lock;
struct {
struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT];
struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT];
struct list_head waiting;
} groups;
struct panthor_csg_slot csg_slots[MAX_CSGS];
u32 csg_slot_count;
u32 cs_slot_count;
u32 as_slot_count;
u32 used_csg_slot_count;
u32 sb_slot_count;
bool might_have_idle_groups;
struct {
bool has_ref;
} pm;
struct {
struct mutex lock;
atomic_t in_progress;
struct list_head stopped_groups;
} reset;
};
struct panthor_syncobj_32b {
u32 seqno;
u32 status;
};
struct panthor_syncobj_64b {
u64 seqno;
u32 status;
u32 pad;
};
struct panthor_queue {
struct drm_gpu_scheduler scheduler;
struct drm_sched_entity entity;
char *name;
struct {
struct delayed_work work;
unsigned long remaining;
} timeout;
u8 doorbell_id;
u8 priority;
#define CSF_MAX_QUEUE_PRIO GENMASK(3, 0)
struct panthor_kernel_bo *ringbuf;
struct {
struct panthor_kernel_bo *mem;
struct panthor_fw_ringbuf_input_iface *input;
const struct panthor_fw_ringbuf_output_iface *output;
u32 input_fw_va;
u32 output_fw_va;
} iface;
struct {
u64 gpu_va;
u64 ref;
bool gt;
bool sync64;
struct drm_gem_object *obj;
u64 offset;
void *kmap;
} syncwait;
struct {
spinlock_t lock;
u64 id;
atomic64_t seqno;
struct dma_fence *last_fence;
struct list_head in_flight_jobs;
} fence_ctx;
struct {
struct panthor_kernel_bo *slots;
u32 slot_count;
u32 seqno;
} profiling;
};
enum panthor_group_state {
PANTHOR_CS_GROUP_CREATED,
PANTHOR_CS_GROUP_ACTIVE,
PANTHOR_CS_GROUP_SUSPENDED,
PANTHOR_CS_GROUP_TERMINATED,
PANTHOR_CS_GROUP_UNKNOWN_STATE,
};
struct panthor_group {
struct kref refcount;
struct panthor_device *ptdev;
struct panthor_vm *vm;
u64 compute_core_mask;
u64 fragment_core_mask;
u64 tiler_core_mask;
u8 max_compute_cores;
u8 max_fragment_cores;
u8 max_tiler_cores;
u8 priority;
u32 blocked_queues;
u32 idle_queues;
spinlock_t fatal_lock;
u32 fatal_queues;
atomic_t tiler_oom;
u32 queue_count;
struct panthor_queue *queues[MAX_CS_PER_CSG];
int csg_id;
bool destroyed;
bool timedout;
bool innocent;
struct panthor_kernel_bo *syncobjs;
struct {
struct panthor_gpu_usage data;
spinlock_t lock;
size_t kbo_sizes;
} fdinfo;
struct {
pid_t pid;
char comm[TASK_COMM_LEN];
} task_info;
enum panthor_group_state state;
struct panthor_kernel_bo *suspend_buf;
struct panthor_kernel_bo *protm_suspend_buf;
struct work_struct sync_upd_work;
struct work_struct tiler_oom_work;
struct work_struct term_work;
struct work_struct release_work;
struct list_head run_node;
struct list_head wait_node;
};
struct panthor_job_profiling_data {
struct {
u64 before;
u64 after;
} cycles;
struct {
u64 before;
u64 after;
} time;
};
#define group_queue_work(group, wname) \
do { \
group_get(group); \
if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \
group_put(group); \
} while (0)
#define sched_queue_work(sched, wname) \
do { \
if (!atomic_read(&(sched)->reset.in_progress) && \
!panthor_device_reset_is_pending((sched)->ptdev)) \
queue_work((sched)->wq, &(sched)->wname ## _work); \
} while (0)
#define sched_queue_delayed_work(sched, wname, delay) \
do { \
if (!atomic_read(&sched->reset.in_progress) && \
!panthor_device_reset_is_pending((sched)->ptdev)) \
mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
} while (0)
#define MAX_GROUPS_PER_POOL 128
#define GROUP_REGISTERED XA_MARK_1
struct panthor_group_pool {
struct xarray xa;
};
struct panthor_job {
struct drm_sched_job base;
struct kref refcount;
struct panthor_group *group;
u32 queue_idx;
struct {
u64 start;
u32 size;
u32 latest_flush;
} call_info;
struct {
u64 start;
u64 end;
} ringbuf;
struct list_head node;
struct dma_fence *done_fence;
struct {
u32 mask;
u32 slot;
} profiling;
};
static void
panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
{
if (queue->syncwait.kmap) {
struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap);
drm_gem_vunmap(queue->syncwait.obj, &map);
queue->syncwait.kmap = NULL;
}
drm_gem_object_put(queue->syncwait.obj);
queue->syncwait.obj = NULL;
}
static void *
panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue)
{
struct panthor_device *ptdev = group->ptdev;
struct panthor_gem_object *bo;
struct iosys_map map;
int ret;
if (queue->syncwait.kmap) {
bo = container_of(queue->syncwait.obj,
struct panthor_gem_object, base.base);
goto out_sync;
}
bo = panthor_vm_get_bo_for_va(group->vm,
queue->syncwait.gpu_va,
&queue->syncwait.offset);
if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo)))
goto err_put_syncwait_obj;
queue->syncwait.obj = &bo->base.base;
ret = drm_gem_vmap(queue->syncwait.obj, &map);
if (drm_WARN_ON(&ptdev->base, ret))
goto err_put_syncwait_obj;
queue->syncwait.kmap = map.vaddr;
if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
goto err_put_syncwait_obj;
out_sync:
panthor_gem_sync(&bo->base.base,
DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE,
queue->syncwait.offset,
queue->syncwait.sync64 ?
sizeof(struct panthor_syncobj_64b) :
sizeof(struct panthor_syncobj_32b));
return queue->syncwait.kmap + queue->syncwait.offset;
err_put_syncwait_obj:
panthor_queue_put_syncwait_obj(queue);
return NULL;
}
static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue)
{
if (IS_ERR_OR_NULL(queue))
return;
disable_delayed_work_sync(&queue->timeout.work);
if (queue->entity.fence_context)
drm_sched_entity_destroy(&queue->entity);
if (queue->scheduler.ops)
drm_sched_fini(&queue->scheduler);
kfree(queue->name);
panthor_queue_put_syncwait_obj(queue);
panthor_kernel_bo_destroy(queue->ringbuf);
panthor_kernel_bo_destroy(queue->iface.mem);
panthor_kernel_bo_destroy(queue->profiling.slots);
dma_fence_put(queue->fence_ctx.last_fence);
kfree(queue);
}
static void group_release_work(struct work_struct *work)
{
struct panthor_group *group = container_of(work,
struct panthor_group,
release_work);
u32 i;
synchronize_rcu();
for (i = 0; i < group->queue_count; i++)
group_free_queue(group, group->queues[i]);
panthor_kernel_bo_destroy(group->suspend_buf);
panthor_kernel_bo_destroy(group->protm_suspend_buf);
panthor_kernel_bo_destroy(group->syncobjs);
panthor_vm_put(group->vm);
kfree(group);
}
static void group_release(struct kref *kref)
{
struct panthor_group *group = container_of(kref,
struct panthor_group,
refcount);
struct panthor_device *ptdev = group->ptdev;
drm_WARN_ON(&ptdev->base, group->csg_id >= 0);
drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node));
drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node));
queue_work(panthor_cleanup_wq, &group->release_work);
}
static void group_put(struct panthor_group *group)
{
if (group)
kref_put(&group->refcount, group_release);
}
static struct panthor_group *
group_get(struct panthor_group *group)
{
if (group)
kref_get(&group->refcount);
return group;
}
static int
group_bind_locked(struct panthor_group *group, u32 csg_id)
{
struct panthor_device *ptdev = group->ptdev;
struct panthor_csg_slot *csg_slot;
int ret;
lockdep_assert_held(&ptdev->scheduler->lock);
if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS ||
ptdev->scheduler->csg_slots[csg_id].group))
return -EINVAL;
ret = panthor_vm_active(group->vm);
if (ret)
return ret;
csg_slot = &ptdev->scheduler->csg_slots[csg_id];
group_get(group);
group->csg_id = csg_id;
for (u32 i = 0; i < group->queue_count; i++)
group->queues[i]->doorbell_id = csg_id + 1;
csg_slot->group = group;
return 0;
}
static int
group_unbind_locked(struct panthor_group *group)
{
struct panthor_device *ptdev = group->ptdev;
struct panthor_csg_slot *slot;
lockdep_assert_held(&ptdev->scheduler->lock);
if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS))
return -EINVAL;
if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
return -EINVAL;
slot = &ptdev->scheduler->csg_slots[group->csg_id];
panthor_vm_idle(group->vm);
group->csg_id = -1;
atomic_set(&group->tiler_oom, 0);
cancel_work(&group->tiler_oom_work);
for (u32 i = 0; i < group->queue_count; i++)
group->queues[i]->doorbell_id = -1;
slot->group = NULL;
group_put(group);
return 0;
}
static bool
group_is_idle(struct panthor_group *group)
{
u32 inactive_queues = group->idle_queues | group->blocked_queues;
return hweight32(inactive_queues) == group->queue_count;
}
static bool
group_can_run(struct panthor_group *group)
{
return group->state != PANTHOR_CS_GROUP_TERMINATED &&
group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
!group->destroyed && group->fatal_queues == 0 &&
!group->timedout;
}
static bool
queue_timeout_is_suspended(struct panthor_queue *queue)
{
return queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT;
}
static void
queue_reset_timeout_locked(struct panthor_queue *queue)
{
lockdep_assert_held(&queue->fence_ctx.lock);
if (!queue_timeout_is_suspended(queue)) {
mod_delayed_work(queue->scheduler.timeout_wq,
&queue->timeout.work,
msecs_to_jiffies(JOB_TIMEOUT_MS));
}
}
static void
queue_suspend_timeout_locked(struct panthor_queue *queue)
{
unsigned long qtimeout, now;
struct panthor_group *group;
struct panthor_job *job;
bool timer_was_active;
lockdep_assert_held(&queue->fence_ctx.lock);
if (queue_timeout_is_suspended(queue))
return;
job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs,
struct panthor_job, node);
group = job ? job->group : NULL;
if (group && group_can_run(group) &&
(group->blocked_queues & BIT(job->queue_idx)) &&
group_is_idle(group))
return;
now = jiffies;
qtimeout = queue->timeout.work.timer.expires;
timer_was_active = cancel_delayed_work(&queue->timeout.work);
if (!timer_was_active || !job)
queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
else if (time_after(qtimeout, now))
queue->timeout.remaining = qtimeout - now;
else
queue->timeout.remaining = 0;
if (WARN_ON_ONCE(queue->timeout.remaining > msecs_to_jiffies(JOB_TIMEOUT_MS)))
queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
}
static void
queue_suspend_timeout(struct panthor_queue *queue)
{
spin_lock(&queue->fence_ctx.lock);
queue_suspend_timeout_locked(queue);
spin_unlock(&queue->fence_ctx.lock);
}
static void
queue_resume_timeout(struct panthor_queue *queue)
{
spin_lock(&queue->fence_ctx.lock);
if (queue_timeout_is_suspended(queue)) {
mod_delayed_work(queue->scheduler.timeout_wq,
&queue->timeout.work,
queue->timeout.remaining);
queue->timeout.remaining = MAX_SCHEDULE_TIMEOUT;
}
spin_unlock(&queue->fence_ctx.lock);
}
static void
cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
{
struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id];
struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
lockdep_assert_held(&ptdev->scheduler->lock);
queue->iface.input->extract = queue->iface.output->extract;
drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
CS_CONFIG_DOORBELL(queue->doorbell_id);
cs_iface->input->ack_irq_mask = ~0;
panthor_fw_update_reqs(cs_iface, req,
CS_IDLE_SYNC_WAIT |
CS_IDLE_EMPTY |
CS_STATE_START,
CS_IDLE_SYNC_WAIT |
CS_IDLE_EMPTY |
CS_STATE_MASK);
if (queue->iface.input->insert != queue->iface.input->extract)
queue_resume_timeout(queue);
}
static int
cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
{
struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
struct panthor_queue *queue = group->queues[cs_id];
lockdep_assert_held(&ptdev->scheduler->lock);
panthor_fw_update_reqs(cs_iface, req,
CS_STATE_STOP,
CS_STATE_MASK);
queue_suspend_timeout(queue);
return 0;
}
static void
csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
{
struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
struct panthor_fw_csg_iface *csg_iface;
u64 endpoint_req;
lockdep_assert_held(&ptdev->scheduler->lock);
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
endpoint_req = panthor_fw_csg_endpoint_req_get(ptdev, csg_iface);
csg_slot->priority = CSG_EP_REQ_PRIORITY_GET(endpoint_req);
}
static void
cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
{
struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
struct panthor_queue *queue = group->queues[cs_id];
struct panthor_fw_cs_iface *cs_iface =
panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id);
u32 status_wait_cond;
switch (cs_iface->output->status_blocked_reason) {
case CS_STATUS_BLOCKED_REASON_UNBLOCKED:
if (queue->iface.input->insert == queue->iface.output->extract &&
cs_iface->output->status_scoreboards == 0)
group->idle_queues |= BIT(cs_id);
break;
case CS_STATUS_BLOCKED_REASON_SYNC_WAIT:
if (list_empty(&group->wait_node)) {
list_move_tail(&group->wait_node,
&group->ptdev->scheduler->groups.waiting);
}
if (!cs_iface->output->status_scoreboards)
group->blocked_queues |= BIT(cs_id);
queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr;
queue->syncwait.ref = cs_iface->output->status_wait_sync_value;
status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK;
queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT;
if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) {
u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi;
queue->syncwait.sync64 = true;
queue->syncwait.ref |= sync_val_hi << 32;
} else {
queue->syncwait.sync64 = false;
}
break;
default:
break;
}
}
static void
csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id)
{
struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
struct panthor_group *group = csg_slot->group;
u32 i;
lockdep_assert_held(&ptdev->scheduler->lock);
group->idle_queues = 0;
group->blocked_queues = 0;
for (i = 0; i < group->queue_count; i++) {
if (group->queues[i])
cs_slot_sync_queue_state_locked(ptdev, csg_id, i);
}
}
static void
csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
{
struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
struct panthor_fw_csg_iface *csg_iface;
struct panthor_group *group;
enum panthor_group_state new_state, old_state;
u32 csg_state;
lockdep_assert_held(&ptdev->scheduler->lock);
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
group = csg_slot->group;
if (!group)
return;
old_state = group->state;
csg_state = csg_iface->output->ack & CSG_STATE_MASK;
switch (csg_state) {
case CSG_STATE_START:
case CSG_STATE_RESUME:
new_state = PANTHOR_CS_GROUP_ACTIVE;
break;
case CSG_STATE_TERMINATE:
new_state = PANTHOR_CS_GROUP_TERMINATED;
break;
case CSG_STATE_SUSPEND:
new_state = PANTHOR_CS_GROUP_SUSPENDED;
break;
default:
drm_err(&ptdev->base, "Invalid state on CSG %d (state=%d)",
csg_id, csg_state);
new_state = PANTHOR_CS_GROUP_UNKNOWN_STATE;
break;
}
if (old_state == new_state)
return;
if (new_state == PANTHOR_CS_GROUP_UNKNOWN_STATE)
panthor_device_schedule_reset(ptdev);
if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
csg_slot_sync_queues_state_locked(ptdev, csg_id);
if (old_state == PANTHOR_CS_GROUP_ACTIVE) {
u32 i;
for (i = 0; i < group->queue_count; i++) {
if (group->queues[i])
cs_slot_reset_locked(ptdev, csg_id, i);
}
}
group->state = new_state;
}
static int
csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
{
struct panthor_fw_csg_iface *csg_iface;
struct panthor_csg_slot *csg_slot;
struct panthor_group *group;
u32 queue_mask = 0, i;
u64 endpoint_req;
lockdep_assert_held(&ptdev->scheduler->lock);
if (priority > MAX_CSG_PRIO)
return -EINVAL;
if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS))
return -EINVAL;
csg_slot = &ptdev->scheduler->csg_slots[csg_id];
group = csg_slot->group;
if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE)
return 0;
csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id);
for (i = 0; i < group->queue_count; i++) {
if (group->queues[i]) {
cs_slot_prog_locked(ptdev, csg_id, i);
queue_mask |= BIT(i);
}
}
csg_iface->input->allow_compute = group->compute_core_mask;
csg_iface->input->allow_fragment = group->fragment_core_mask;
csg_iface->input->allow_other = group->tiler_core_mask;
endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) |
CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) |
CSG_EP_REQ_TILER(group->max_tiler_cores) |
CSG_EP_REQ_PRIORITY(priority);
panthor_fw_csg_endpoint_req_set(ptdev, csg_iface, endpoint_req);
csg_iface->input->config = panthor_vm_as(group->vm);
if (group->suspend_buf)
csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf);
else
csg_iface->input->suspend_buf = 0;
if (group->protm_suspend_buf) {
csg_iface->input->protm_suspend_buf =
panthor_kernel_bo_gpuva(group->protm_suspend_buf);
} else {
csg_iface->input->protm_suspend_buf = 0;
}
csg_iface->input->ack_irq_mask = ~0;
panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask);
return 0;
}
static void
cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
u32 csg_id, u32 cs_id)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
struct panthor_group *group = csg_slot->group;
struct panthor_fw_cs_iface *cs_iface;
u32 fatal;
u64 info;
lockdep_assert_held(&sched->lock);
cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
fatal = cs_iface->output->fatal;
info = cs_iface->output->fatal_info;
if (group) {
drm_warn(&ptdev->base, "CS_FATAL: pid=%d, comm=%s\n",
group->task_info.pid, group->task_info.comm);
group->fatal_queues |= BIT(cs_id);
}
if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) {
panthor_device_schedule_reset(ptdev);
cancel_delayed_work(&sched->tick_work);
} else {
sched_queue_delayed_work(sched, tick, 0);
}
drm_warn(&ptdev->base,
"CSG slot %d CS slot: %d\n"
"CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
"CS_FATAL.EXCEPTION_DATA: 0x%x\n"
"CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
csg_id, cs_id,
(unsigned int)CS_EXCEPTION_TYPE(fatal),
panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)),
(unsigned int)CS_EXCEPTION_DATA(fatal),
info);
}
static void
cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
u32 csg_id, u32 cs_id)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
struct panthor_group *group = csg_slot->group;
struct panthor_queue *queue = group && cs_id < group->queue_count ?
group->queues[cs_id] : NULL;
struct panthor_fw_cs_iface *cs_iface;
u32 fault;
u64 info;
lockdep_assert_held(&sched->lock);
cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
fault = cs_iface->output->fault;
info = cs_iface->output->fault_info;
if (queue) {
u64 cs_extract = queue->iface.output->extract;
struct panthor_job *job;
spin_lock(&queue->fence_ctx.lock);
list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) {
if (cs_extract >= job->ringbuf.end)
continue;
if (cs_extract < job->ringbuf.start)
break;
dma_fence_set_error(job->done_fence, -EINVAL);
}
spin_unlock(&queue->fence_ctx.lock);
}
if (group) {
drm_warn(&ptdev->base, "CS_FAULT: pid=%d, comm=%s\n",
group->task_info.pid, group->task_info.comm);
}
drm_warn(&ptdev->base,
"CSG slot %d CS slot: %d\n"
"CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
"CS_FAULT.EXCEPTION_DATA: 0x%x\n"
"CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
csg_id, cs_id,
(unsigned int)CS_EXCEPTION_TYPE(fault),
panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)),
(unsigned int)CS_EXCEPTION_DATA(fault),
info);
}
static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
{
struct panthor_device *ptdev = group->ptdev;
struct panthor_scheduler *sched = ptdev->scheduler;
u32 renderpasses_in_flight, pending_frag_count;
struct panthor_heap_pool *heaps = NULL;
u64 heap_address, new_chunk_va = 0;
u32 vt_start, vt_end, frag_end;
int ret, csg_id;
mutex_lock(&sched->lock);
csg_id = group->csg_id;
if (csg_id >= 0) {
struct panthor_fw_cs_iface *cs_iface;
cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
heaps = panthor_vm_get_heap_pool(group->vm, false);
heap_address = cs_iface->output->heap_address;
vt_start = cs_iface->output->heap_vt_start;
vt_end = cs_iface->output->heap_vt_end;
frag_end = cs_iface->output->heap_frag_end;
renderpasses_in_flight = vt_start - frag_end;
pending_frag_count = vt_end - frag_end;
}
mutex_unlock(&sched->lock);
if (unlikely(csg_id < 0))
return 0;
if (IS_ERR(heaps) || frag_end > vt_end || vt_end >= vt_start) {
ret = -EINVAL;
} else {
ret = panthor_heap_grow(heaps, heap_address,
renderpasses_in_flight,
pending_frag_count, &new_chunk_va);
}
if (ret && ret != -ENOMEM) {
drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
group->fatal_queues |= BIT(cs_id);
sched_queue_delayed_work(sched, tick, 0);
goto out_put_heap_pool;
}
mutex_lock(&sched->lock);
csg_id = group->csg_id;
if (csg_id >= 0) {
struct panthor_fw_csg_iface *csg_iface;
struct panthor_fw_cs_iface *cs_iface;
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
cs_iface->input->heap_start = new_chunk_va;
cs_iface->input->heap_end = new_chunk_va;
panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM);
panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id));
panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
}
mutex_unlock(&sched->lock);
if (unlikely(csg_id < 0 && new_chunk_va))
panthor_heap_return_chunk(heaps, heap_address, new_chunk_va);
ret = 0;
out_put_heap_pool:
panthor_heap_pool_put(heaps);
return ret;
}
static void group_tiler_oom_work(struct work_struct *work)
{
struct panthor_group *group =
container_of(work, struct panthor_group, tiler_oom_work);
u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0);
while (tiler_oom) {
u32 cs_id = ffs(tiler_oom) - 1;
group_process_tiler_oom(group, cs_id);
tiler_oom &= ~BIT(cs_id);
}
group_put(group);
}
static void
cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
u32 csg_id, u32 cs_id)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
struct panthor_group *group = csg_slot->group;
lockdep_assert_held(&sched->lock);
if (drm_WARN_ON(&ptdev->base, !group))
return;
atomic_or(BIT(cs_id), &group->tiler_oom);
group_get(group);
if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work))
group_put(group);
}
static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
u32 csg_id, u32 cs_id)
{
struct panthor_fw_cs_iface *cs_iface;
u32 req, ack, events;
lockdep_assert_held(&ptdev->scheduler->lock);
cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
req = cs_iface->input->req;
ack = cs_iface->output->ack;
events = (req ^ ack) & CS_EVT_MASK;
if (events & CS_FATAL)
cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id);
if (events & CS_FAULT)
cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id);
if (events & CS_TILER_OOM)
cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT);
return (events & (CS_FAULT | CS_TILER_OOM)) != 0;
}
static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id)
{
struct panthor_scheduler *sched = ptdev->scheduler;
lockdep_assert_held(&sched->lock);
sched->might_have_idle_groups = true;
sched_queue_delayed_work(sched, tick, 0);
}
static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
u32 csg_id)
{
struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
struct panthor_group *group = csg_slot->group;
lockdep_assert_held(&ptdev->scheduler->lock);
if (group)
group_queue_work(group, sync_upd);
sched_queue_work(ptdev->scheduler, sync_upd);
}
static void
csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
struct panthor_group *group = csg_slot->group;
lockdep_assert_held(&sched->lock);
group = csg_slot->group;
if (!drm_WARN_ON(&ptdev->base, !group)) {
drm_warn(&ptdev->base, "CSG_PROGRESS_TIMER_EVENT: pid=%d, comm=%s\n",
group->task_info.pid, group->task_info.comm);
group->timedout = true;
}
drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id);
sched_queue_delayed_work(sched, tick, 0);
}
static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id)
{
u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events;
struct panthor_fw_csg_iface *csg_iface;
u32 ring_cs_db_mask = 0;
lockdep_assert_held(&ptdev->scheduler->lock);
if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
return;
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
req = READ_ONCE(csg_iface->input->req);
ack = READ_ONCE(csg_iface->output->ack);
cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req);
cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack);
csg_events = (req ^ ack) & CSG_EVT_MASK;
if (req == ack && cs_irq_req == cs_irq_ack)
return;
csg_iface->input->cs_irq_ack = cs_irq_req;
panthor_fw_update_reqs(csg_iface, req, ack,
CSG_SYNC_UPDATE |
CSG_IDLE |
CSG_PROGRESS_TIMER_EVENT);
if (csg_events & CSG_IDLE)
csg_slot_process_idle_event_locked(ptdev, csg_id);
if (csg_events & CSG_PROGRESS_TIMER_EVENT)
csg_slot_process_progress_timer_event_locked(ptdev, csg_id);
cs_irqs = cs_irq_req ^ cs_irq_ack;
while (cs_irqs) {
u32 cs_id = ffs(cs_irqs) - 1;
if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id))
ring_cs_db_mask |= BIT(cs_id);
cs_irqs &= ~BIT(cs_id);
}
if (csg_events & CSG_SYNC_UPDATE)
csg_slot_sync_update_locked(ptdev, csg_id);
if (ring_cs_db_mask)
panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask);
panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
}
static void sched_process_idle_event_locked(struct panthor_device *ptdev)
{
struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
lockdep_assert_held(&ptdev->scheduler->lock);
panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
sched_queue_delayed_work(ptdev->scheduler, tick, 0);
}
static void sched_process_global_irq_locked(struct panthor_device *ptdev)
{
struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
u32 req, ack, evts;
lockdep_assert_held(&ptdev->scheduler->lock);
req = READ_ONCE(glb_iface->input->req);
ack = READ_ONCE(glb_iface->output->ack);
evts = (req ^ ack) & GLB_EVT_MASK;
if (evts & GLB_IDLE)
sched_process_idle_event_locked(ptdev);
}
static void process_fw_events_work(struct work_struct *work)
{
struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
fw_events_work);
u32 events = atomic_xchg(&sched->fw_events, 0);
struct panthor_device *ptdev = sched->ptdev;
mutex_lock(&sched->lock);
if (events & JOB_INT_GLOBAL_IF) {
sched_process_global_irq_locked(ptdev);
events &= ~JOB_INT_GLOBAL_IF;
}
while (events) {
u32 csg_id = ffs(events) - 1;
sched_process_csg_irq_locked(ptdev, csg_id);
events &= ~BIT(csg_id);
}
mutex_unlock(&sched->lock);
}
void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
{
if (!ptdev->scheduler)
return;
atomic_or(events, &ptdev->scheduler->fw_events);
sched_queue_work(ptdev->scheduler, fw_events);
}
static const char *fence_get_driver_name(struct dma_fence *fence)
{
return "panthor";
}
static const char *queue_fence_get_timeline_name(struct dma_fence *fence)
{
return "queue-fence";
}
static const struct dma_fence_ops panthor_queue_fence_ops = {
.get_driver_name = fence_get_driver_name,
.get_timeline_name = queue_fence_get_timeline_name,
};
struct panthor_csg_slots_upd_ctx {
u32 update_mask;
u32 timedout_mask;
struct {
u32 value;
u32 mask;
} requests[MAX_CSGS];
};
static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx)
{
memset(ctx, 0, sizeof(*ctx));
}
static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev,
struct panthor_csg_slots_upd_ctx *ctx,
u32 csg_id, u32 value, u32 mask)
{
if (drm_WARN_ON(&ptdev->base, !mask) ||
drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
return;
ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask);
ctx->requests[csg_id].mask |= mask;
ctx->update_mask |= BIT(csg_id);
}
static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev,
struct panthor_csg_slots_upd_ctx *ctx)
{
struct panthor_scheduler *sched = ptdev->scheduler;
u32 update_slots = ctx->update_mask;
lockdep_assert_held(&sched->lock);
if (!ctx->update_mask)
return 0;
while (update_slots) {
struct panthor_fw_csg_iface *csg_iface;
u32 csg_id = ffs(update_slots) - 1;
update_slots &= ~BIT(csg_id);
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
panthor_fw_update_reqs(csg_iface, req,
ctx->requests[csg_id].value,
ctx->requests[csg_id].mask);
}
panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask);
update_slots = ctx->update_mask;
while (update_slots) {
struct panthor_fw_csg_iface *csg_iface;
u32 csg_id = ffs(update_slots) - 1;
u32 req_mask = ctx->requests[csg_id].mask, acked;
int ret;
update_slots &= ~BIT(csg_id);
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100);
if (acked & CSG_ENDPOINT_CONFIG)
csg_slot_sync_priority_locked(ptdev, csg_id);
if (acked & CSG_STATE_MASK)
csg_slot_sync_state_locked(ptdev, csg_id);
if (acked & CSG_STATUS_UPDATE)
csg_slot_sync_queues_state_locked(ptdev, csg_id);
if (ret && acked != req_mask &&
((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) {
drm_err(&ptdev->base, "CSG %d update request timedout", csg_id);
ctx->timedout_mask |= BIT(csg_id);
}
}
if (ctx->timedout_mask)
return -ETIMEDOUT;
return 0;
}
struct panthor_sched_tick_ctx {
struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT];
struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT];
u32 idle_group_count;
u32 group_count;
struct panthor_vm *vms[MAX_CS_PER_CSG];
u32 as_count;
bool immediate_tick;
bool stop_tick;
u32 csg_upd_failed_mask;
};
static bool
tick_ctx_is_full(const struct panthor_scheduler *sched,
const struct panthor_sched_tick_ctx *ctx)
{
return ctx->group_count == sched->csg_slot_count;
}
static void
tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
struct panthor_sched_tick_ctx *ctx,
struct list_head *queue,
bool skip_idle_groups,
bool owned_by_tick_ctx)
{
struct panthor_group *group, *tmp;
if (tick_ctx_is_full(sched, ctx))
return;
list_for_each_entry_safe(group, tmp, queue, run_node) {
u32 i;
if (!group_can_run(group))
continue;
if (skip_idle_groups && group_is_idle(group))
continue;
for (i = 0; i < ctx->as_count; i++) {
if (ctx->vms[i] == group->vm)
break;
}
if (i == ctx->as_count && ctx->as_count == sched->as_slot_count)
continue;
if (!owned_by_tick_ctx)
group_get(group);
ctx->group_count++;
if (group_is_idle(group))
ctx->idle_group_count++;
else if (!list_empty(&ctx->groups[group->priority]))
ctx->stop_tick = false;
list_move_tail(&group->run_node, &ctx->groups[group->priority]);
if (i == ctx->as_count)
ctx->vms[ctx->as_count++] = group->vm;
if (tick_ctx_is_full(sched, ctx))
return;
}
}
static void
tick_ctx_insert_old_group(struct panthor_scheduler *sched,
struct panthor_sched_tick_ctx *ctx,
struct panthor_group *group)
{
struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id];
struct panthor_group *other_group;
list_for_each_entry(other_group,
&ctx->old_groups[csg_slot->group->priority],
run_node) {
struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];
if (csg_slot->priority > other_csg_slot->priority) {
list_add_tail(&group->run_node, &other_group->run_node);
return;
}
}
list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
}
static void
tick_ctx_init(struct panthor_scheduler *sched,
struct panthor_sched_tick_ctx *ctx)
{
struct panthor_device *ptdev = sched->ptdev;
struct panthor_csg_slots_upd_ctx upd_ctx;
int ret;
u32 i;
memset(ctx, 0, sizeof(*ctx));
csgs_upd_ctx_init(&upd_ctx);
ctx->stop_tick = true;
for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
INIT_LIST_HEAD(&ctx->groups[i]);
INIT_LIST_HEAD(&ctx->old_groups[i]);
}
for (i = 0; i < sched->csg_slot_count; i++) {
struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
struct panthor_group *group = csg_slot->group;
struct panthor_fw_csg_iface *csg_iface;
if (!group)
continue;
csg_iface = panthor_fw_get_csg_iface(ptdev, i);
group_get(group);
if (panthor_vm_has_unhandled_faults(group->vm)) {
sched_process_csg_irq_locked(ptdev, i);
if (!group->fatal_queues)
group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
}
tick_ctx_insert_old_group(sched, ctx, group);
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
csg_iface->output->ack ^ CSG_STATUS_UPDATE,
CSG_STATUS_UPDATE);
}
ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
if (ret) {
panthor_device_schedule_reset(ptdev);
ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
}
}
static void
group_term_post_processing(struct panthor_group *group)
{
struct panthor_job *job, *tmp;
LIST_HEAD(faulty_jobs);
bool cookie;
u32 i = 0;
if (drm_WARN_ON(&group->ptdev->base, group_can_run(group)))
return;
cookie = dma_fence_begin_signalling();
for (i = 0; i < group->queue_count; i++) {
struct panthor_queue *queue = group->queues[i];
struct panthor_syncobj_64b *syncobj;
int err;
if (group->fatal_queues & BIT(i))
err = -EINVAL;
else if (group->timedout)
err = -ETIMEDOUT;
else
err = -ECANCELED;
if (!queue)
continue;
spin_lock(&queue->fence_ctx.lock);
list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
list_move_tail(&job->node, &faulty_jobs);
dma_fence_set_error(job->done_fence, err);
dma_fence_signal_locked(job->done_fence);
}
spin_unlock(&queue->fence_ctx.lock);
syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
syncobj->status = ~0;
syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
sched_queue_work(group->ptdev->scheduler, sync_upd);
}
dma_fence_end_signalling(cookie);
list_for_each_entry_safe(job, tmp, &faulty_jobs, node) {
list_del_init(&job->node);
panthor_job_put(&job->base);
}
}
static void group_term_work(struct work_struct *work)
{
struct panthor_group *group =
container_of(work, struct panthor_group, term_work);
group_term_post_processing(group);
group_put(group);
}
static void
tick_ctx_cleanup(struct panthor_scheduler *sched,
struct panthor_sched_tick_ctx *ctx)
{
struct panthor_device *ptdev = sched->ptdev;
struct panthor_group *group, *tmp;
u32 i;
for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) {
list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) {
drm_WARN_ON(&ptdev->base, !ctx->csg_upd_failed_mask &&
group_can_run(group));
if (!group_can_run(group)) {
list_del_init(&group->run_node);
list_del_init(&group->wait_node);
group_queue_work(group, term);
} else if (group->csg_id >= 0) {
list_del_init(&group->run_node);
} else {
list_move(&group->run_node,
group_is_idle(group) ?
&sched->groups.idle[group->priority] :
&sched->groups.runnable[group->priority]);
}
group_put(group);
}
}
for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
drm_WARN_ON(&ptdev->base,
!ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) {
if (group->csg_id >= 0) {
list_del_init(&group->run_node);
} else {
list_move(&group->run_node,
group_is_idle(group) ?
&sched->groups.idle[group->priority] :
&sched->groups.runnable[group->priority]);
}
group_put(group);
}
}
}
static void
tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx)
{
struct panthor_group *group, *tmp;
struct panthor_device *ptdev = sched->ptdev;
struct panthor_csg_slot *csg_slot;
int prio, new_csg_prio = MAX_CSG_PRIO, i;
u32 free_csg_slots = 0;
struct panthor_csg_slots_upd_ctx upd_ctx;
int ret;
csgs_upd_ctx_init(&upd_ctx);
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
bool term = !group_can_run(group);
int csg_id = group->csg_id;
if (drm_WARN_ON(&ptdev->base, csg_id < 0))
continue;
csg_slot = &sched->csg_slots[csg_id];
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND,
CSG_STATE_MASK);
}
list_for_each_entry(group, &ctx->groups[prio], run_node) {
struct panthor_fw_csg_iface *csg_iface;
int csg_id = group->csg_id;
if (csg_id < 0) {
new_csg_prio--;
continue;
}
csg_slot = &sched->csg_slots[csg_id];
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
if (csg_slot->priority == new_csg_prio) {
new_csg_prio--;
continue;
}
panthor_fw_csg_endpoint_req_update(ptdev, csg_iface,
CSG_EP_REQ_PRIORITY(new_csg_prio),
CSG_EP_REQ_PRIORITY_MASK);
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
CSG_ENDPOINT_CONFIG);
new_csg_prio--;
}
}
ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
if (ret) {
panthor_device_schedule_reset(ptdev);
ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
return;
}
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
if (group->csg_id >= 0)
sched_process_csg_irq_locked(ptdev, group->csg_id);
group_unbind_locked(group);
}
}
for (i = 0; i < sched->csg_slot_count; i++) {
if (!sched->csg_slots[i].group)
free_csg_slots |= BIT(i);
}
csgs_upd_ctx_init(&upd_ctx);
new_csg_prio = MAX_CSG_PRIO;
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
list_for_each_entry(group, &ctx->groups[prio], run_node) {
int csg_id = group->csg_id;
struct panthor_fw_csg_iface *csg_iface;
if (csg_id >= 0) {
new_csg_prio--;
continue;
}
csg_id = ffs(free_csg_slots) - 1;
if (drm_WARN_ON(&ptdev->base, csg_id < 0))
break;
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
csg_slot = &sched->csg_slots[csg_id];
group_bind_locked(group, csg_id);
csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--);
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
group->state == PANTHOR_CS_GROUP_SUSPENDED ?
CSG_STATE_RESUME : CSG_STATE_START,
CSG_STATE_MASK);
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
CSG_ENDPOINT_CONFIG);
free_csg_slots &= ~BIT(csg_id);
}
}
ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
if (ret) {
panthor_device_schedule_reset(ptdev);
ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
return;
}
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) {
list_del_init(&group->run_node);
if (group->destroyed)
ctx->immediate_tick = true;
group_put(group);
}
list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) {
if (!group_can_run(group))
continue;
if (group_is_idle(group))
list_move_tail(&group->run_node, &sched->groups.idle[prio]);
else
list_move_tail(&group->run_node, &sched->groups.runnable[prio]);
group_put(group);
}
}
sched->used_csg_slot_count = ctx->group_count;
sched->might_have_idle_groups = ctx->idle_group_count > 0;
}
static u64
tick_ctx_update_resched_target(struct panthor_scheduler *sched,
const struct panthor_sched_tick_ctx *ctx)
{
u64 resched_target;
if (ctx->stop_tick)
goto no_tick;
resched_target = sched->last_tick + sched->tick_period;
if (time_before64(sched->resched_target, sched->last_tick) ||
time_before64(resched_target, sched->resched_target))
sched->resched_target = resched_target;
return sched->resched_target - sched->last_tick;
no_tick:
sched->resched_target = U64_MAX;
return U64_MAX;
}
static void tick_work(struct work_struct *work)
{
struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
tick_work.work);
struct panthor_device *ptdev = sched->ptdev;
struct panthor_sched_tick_ctx ctx;
u64 resched_target = sched->resched_target;
u64 remaining_jiffies = 0, resched_delay;
u64 now = get_jiffies_64();
int prio, ret, cookie;
bool full_tick;
if (!drm_dev_enter(&ptdev->base, &cookie))
return;
ret = panthor_device_resume_and_get(ptdev);
if (drm_WARN_ON(&ptdev->base, ret))
goto out_dev_exit;
if (resched_target == U64_MAX)
resched_target = sched->last_tick + sched->tick_period;
if (time_before64(now, resched_target))
remaining_jiffies = resched_target - now;
full_tick = remaining_jiffies == 0;
mutex_lock(&sched->lock);
if (panthor_device_reset_is_pending(sched->ptdev))
goto out_unlock;
tick_ctx_init(sched, &ctx);
if (ctx.csg_upd_failed_mask)
goto out_cleanup_ctx;
if (!full_tick) {
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
prio >= 0 && !tick_ctx_is_full(sched, &ctx);
prio--) {
tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio],
true, true);
if (prio == PANTHOR_CSG_PRIORITY_RT) {
tick_ctx_pick_groups_from_list(sched, &ctx,
&sched->groups.runnable[prio],
true, false);
}
}
}
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
prio >= 0 && !tick_ctx_is_full(sched, &ctx);
prio--) {
struct panthor_group *old_highest_prio_group =
list_first_entry_or_null(&ctx.old_groups[prio],
struct panthor_group, run_node);
if (old_highest_prio_group)
list_del(&old_highest_prio_group->run_node);
tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true);
tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio],
true, false);
if (old_highest_prio_group) {
list_add_tail(&old_highest_prio_group->run_node, &ctx.old_groups[prio]);
tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio],
true, true);
}
}
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
prio >= 0 && !tick_ctx_is_full(sched, &ctx);
prio--) {
tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true);
tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio],
false, false);
}
tick_ctx_apply(sched, &ctx);
if (ctx.csg_upd_failed_mask)
goto out_cleanup_ctx;
if (ctx.idle_group_count == ctx.group_count) {
panthor_devfreq_record_idle(sched->ptdev);
if (sched->pm.has_ref) {
pm_runtime_put_autosuspend(ptdev->base.dev);
sched->pm.has_ref = false;
}
} else {
panthor_devfreq_record_busy(sched->ptdev);
if (!sched->pm.has_ref) {
pm_runtime_get(ptdev->base.dev);
sched->pm.has_ref = true;
}
}
sched->last_tick = now;
resched_delay = tick_ctx_update_resched_target(sched, &ctx);
if (ctx.immediate_tick)
resched_delay = 0;
if (resched_delay != U64_MAX)
sched_queue_delayed_work(sched, tick, resched_delay);
out_cleanup_ctx:
tick_ctx_cleanup(sched, &ctx);
out_unlock:
mutex_unlock(&sched->lock);
pm_runtime_mark_last_busy(ptdev->base.dev);
pm_runtime_put_autosuspend(ptdev->base.dev);
out_dev_exit:
drm_dev_exit(cookie);
}
static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx)
{
struct panthor_queue *queue = group->queues[queue_idx];
union {
struct panthor_syncobj_64b sync64;
struct panthor_syncobj_32b sync32;
} *syncobj;
bool result;
u64 value;
syncobj = panthor_queue_get_syncwait_obj(group, queue);
if (!syncobj)
return -EINVAL;
value = queue->syncwait.sync64 ?
syncobj->sync64.seqno :
syncobj->sync32.seqno;
if (queue->syncwait.gt)
result = value > queue->syncwait.ref;
else
result = value <= queue->syncwait.ref;
if (result)
panthor_queue_put_syncwait_obj(queue);
return result;
}
static void sync_upd_work(struct work_struct *work)
{
struct panthor_scheduler *sched = container_of(work,
struct panthor_scheduler,
sync_upd_work);
struct panthor_group *group, *tmp;
bool immediate_tick = false;
mutex_lock(&sched->lock);
list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) {
u32 tested_queues = group->blocked_queues;
u32 unblocked_queues = 0;
while (tested_queues) {
u32 cs_id = ffs(tested_queues) - 1;
int ret;
ret = panthor_queue_eval_syncwait(group, cs_id);
drm_WARN_ON(&group->ptdev->base, ret < 0);
if (ret)
unblocked_queues |= BIT(cs_id);
tested_queues &= ~BIT(cs_id);
}
if (unblocked_queues) {
group->blocked_queues &= ~unblocked_queues;
if (group->csg_id < 0) {
list_move(&group->run_node,
&sched->groups.runnable[group->priority]);
if (group->priority == PANTHOR_CSG_PRIORITY_RT)
immediate_tick = true;
}
}
if (!group->blocked_queues)
list_del_init(&group->wait_node);
}
mutex_unlock(&sched->lock);
if (immediate_tick)
sched_queue_delayed_work(sched, tick, 0);
}
static void sched_resume_tick(struct panthor_device *ptdev)
{
struct panthor_scheduler *sched = ptdev->scheduler;
u64 delay_jiffies, now;
drm_WARN_ON(&ptdev->base, sched->resched_target != U64_MAX);
now = get_jiffies_64();
sched->resched_target = sched->last_tick + sched->tick_period;
if (sched->used_csg_slot_count == sched->csg_slot_count &&
time_before64(now, sched->resched_target))
delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
else
delay_jiffies = 0;
sched_queue_delayed_work(sched, tick, delay_jiffies);
}
static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
{
struct panthor_device *ptdev = group->ptdev;
struct panthor_scheduler *sched = ptdev->scheduler;
struct list_head *queue = &sched->groups.runnable[group->priority];
bool was_idle;
if (!group_can_run(group))
return;
if ((queue_mask & group->blocked_queues) == queue_mask)
return;
was_idle = group_is_idle(group);
group->idle_queues &= ~queue_mask;
if (atomic_read(&sched->reset.in_progress))
return;
if (was_idle && !group_is_idle(group))
list_move_tail(&group->run_node, queue);
if (group->priority == PANTHOR_CSG_PRIORITY_RT) {
sched_queue_delayed_work(sched, tick, 0);
return;
}
if (sched->might_have_idle_groups) {
sched_queue_delayed_work(sched, tick, 0);
return;
}
if (sched->resched_target != U64_MAX) {
if (sched->used_csg_slot_count < sched->csg_slot_count)
sched_queue_delayed_work(sched, tick, 0);
return;
}
sched_resume_tick(ptdev);
}
static void queue_stop(struct panthor_queue *queue,
struct panthor_job *bad_job)
{
disable_delayed_work_sync(&queue->timeout.work);
drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
}
static void queue_start(struct panthor_queue *queue)
{
struct panthor_job *job;
list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
job->base.s_fence->parent = dma_fence_get(job->done_fence);
enable_delayed_work(&queue->timeout.work);
drm_sched_start(&queue->scheduler, 0);
}
static void panthor_group_stop(struct panthor_group *group)
{
struct panthor_scheduler *sched = group->ptdev->scheduler;
lockdep_assert_held(&sched->reset.lock);
for (u32 i = 0; i < group->queue_count; i++)
queue_stop(group->queues[i], NULL);
group_get(group);
list_move_tail(&group->run_node, &sched->reset.stopped_groups);
}
static void panthor_group_start(struct panthor_group *group)
{
struct panthor_scheduler *sched = group->ptdev->scheduler;
lockdep_assert_held(&group->ptdev->scheduler->reset.lock);
for (u32 i = 0; i < group->queue_count; i++)
queue_start(group->queues[i]);
if (group_can_run(group)) {
list_move_tail(&group->run_node,
group_is_idle(group) ?
&sched->groups.idle[group->priority] :
&sched->groups.runnable[group->priority]);
} else {
list_del_init(&group->run_node);
list_del_init(&group->wait_node);
group_queue_work(group, term);
}
group_put(group);
}
void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
{
if (ptdev->scheduler)
sched_queue_delayed_work(ptdev->scheduler, tick, 0);
}
void panthor_sched_prepare_for_vm_destruction(struct panthor_device *ptdev)
{
flush_work(&ptdev->scheduler->tick_work.work);
}
void panthor_sched_resume(struct panthor_device *ptdev)
{
sched_queue_delayed_work(ptdev->scheduler, tick, 0);
}
void panthor_sched_suspend(struct panthor_device *ptdev)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_csg_slots_upd_ctx upd_ctx;
u32 suspended_slots;
u32 i;
mutex_lock(&sched->lock);
csgs_upd_ctx_init(&upd_ctx);
for (i = 0; i < sched->csg_slot_count; i++) {
struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
if (csg_slot->group) {
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
group_can_run(csg_slot->group) ?
CSG_STATE_SUSPEND : CSG_STATE_TERMINATE,
CSG_STATE_MASK);
}
}
suspended_slots = upd_ctx.update_mask;
csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
suspended_slots &= ~upd_ctx.timedout_mask;
if (upd_ctx.timedout_mask) {
u32 slot_mask = upd_ctx.timedout_mask;
drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
csgs_upd_ctx_init(&upd_ctx);
while (slot_mask) {
u32 csg_id = ffs(slot_mask) - 1;
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
if (group_can_run(csg_slot->group))
csg_slot->group->innocent = true;
csg_slot->group->timedout = true;
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
CSG_STATE_TERMINATE,
CSG_STATE_MASK);
slot_mask &= ~BIT(csg_id);
}
csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
slot_mask = upd_ctx.timedout_mask;
while (slot_mask) {
u32 csg_id = ffs(slot_mask) - 1;
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
struct panthor_group *group = csg_slot->group;
if (group->state != PANTHOR_CS_GROUP_TERMINATED) {
group->state = PANTHOR_CS_GROUP_TERMINATED;
for (i = 0; i < group->queue_count; i++) {
if (group->queues[i])
cs_slot_reset_locked(ptdev, csg_id, i);
}
}
slot_mask &= ~BIT(csg_id);
}
}
if (suspended_slots) {
bool flush_caches_failed = false;
u32 slot_mask = suspended_slots;
if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0))
flush_caches_failed = true;
while (slot_mask) {
u32 csg_id = ffs(slot_mask) - 1;
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
if (flush_caches_failed)
csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
else
csg_slot_sync_update_locked(ptdev, csg_id);
slot_mask &= ~BIT(csg_id);
}
}
for (i = 0; i < sched->csg_slot_count; i++) {
struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
struct panthor_group *group = csg_slot->group;
if (!group)
continue;
group_get(group);
if (group->csg_id >= 0)
sched_process_csg_irq_locked(ptdev, group->csg_id);
group_unbind_locked(group);
drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node));
if (group_can_run(group)) {
list_add(&group->run_node,
&sched->groups.idle[group->priority]);
} else {
list_del_init(&group->wait_node);
group_queue_work(group, term);
}
group_put(group);
}
mutex_unlock(&sched->lock);
}
void panthor_sched_pre_reset(struct panthor_device *ptdev)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_group *group, *group_tmp;
u32 i;
mutex_lock(&sched->reset.lock);
atomic_set(&sched->reset.in_progress, true);
cancel_work_sync(&sched->sync_upd_work);
cancel_delayed_work_sync(&sched->tick_work);
panthor_sched_suspend(ptdev);
for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) {
list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node)
panthor_group_stop(group);
}
for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) {
list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node)
panthor_group_stop(group);
}
mutex_unlock(&sched->reset.lock);
}
void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
{
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_group *group, *group_tmp;
mutex_lock(&sched->reset.lock);
list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) {
if (reset_failed)
group->state = PANTHOR_CS_GROUP_TERMINATED;
panthor_group_start(group);
}
atomic_set(&sched->reset.in_progress, false);
mutex_unlock(&sched->reset.lock);
if (!reset_failed) {
sched_queue_delayed_work(sched, tick, 0);
sched_queue_work(sched, sync_upd);
}
}
static void update_fdinfo_stats(struct panthor_job *job)
{
struct panthor_group *group = job->group;
struct panthor_queue *queue = group->queues[job->queue_idx];
struct panthor_gpu_usage *fdinfo = &group->fdinfo.data;
struct panthor_job_profiling_data *slots = queue->profiling.slots->kmap;
struct panthor_job_profiling_data *data = &slots[job->profiling.slot];
scoped_guard(spinlock, &group->fdinfo.lock) {
if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_CYCLES)
fdinfo->cycles += data->cycles.after - data->cycles.before;
if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP)
fdinfo->time += data->time.after - data->time.before;
}
}
void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
{
struct panthor_group_pool *gpool = pfile->groups;
struct panthor_group *group;
unsigned long i;
if (IS_ERR_OR_NULL(gpool))
return;
xa_lock(&gpool->xa);
xa_for_each_marked(&gpool->xa, i, group, GROUP_REGISTERED) {
guard(spinlock)(&group->fdinfo.lock);
pfile->stats.cycles += group->fdinfo.data.cycles;
pfile->stats.time += group->fdinfo.data.time;
group->fdinfo.data.cycles = 0;
group->fdinfo.data.time = 0;
}
xa_unlock(&gpool->xa);
}
static bool queue_check_job_completion(struct panthor_queue *queue)
{
struct panthor_syncobj_64b *syncobj = NULL;
struct panthor_job *job, *job_tmp;
bool cookie, progress = false;
LIST_HEAD(done_jobs);
cookie = dma_fence_begin_signalling();
spin_lock(&queue->fence_ctx.lock);
list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
if (!syncobj) {
struct panthor_group *group = job->group;
syncobj = group->syncobjs->kmap +
(job->queue_idx * sizeof(*syncobj));
}
if (syncobj->seqno < job->done_fence->seqno)
break;
list_move_tail(&job->node, &done_jobs);
dma_fence_signal_locked(job->done_fence);
}
if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
queue_suspend_timeout_locked(queue);
progress = true;
} else if (!list_empty(&done_jobs)) {
queue_reset_timeout_locked(queue);
progress = true;
}
spin_unlock(&queue->fence_ctx.lock);
dma_fence_end_signalling(cookie);
list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
if (job->profiling.mask)
update_fdinfo_stats(job);
list_del_init(&job->node);
panthor_job_put(&job->base);
}
return progress;
}
static void group_sync_upd_work(struct work_struct *work)
{
struct panthor_group *group =
container_of(work, struct panthor_group, sync_upd_work);
u32 queue_idx;
bool cookie;
cookie = dma_fence_begin_signalling();
for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
struct panthor_queue *queue = group->queues[queue_idx];
if (!queue)
continue;
queue_check_job_completion(queue);
}
dma_fence_end_signalling(cookie);
group_put(group);
}
struct panthor_job_ringbuf_instrs {
u64 buffer[MAX_INSTRS_PER_JOB];
u32 count;
};
struct panthor_job_instr {
u32 profile_mask;
u64 instr;
};
#define JOB_INSTR(__prof, __instr) \
{ \
.profile_mask = __prof, \
.instr = __instr, \
}
static void
copy_instrs_to_ringbuf(struct panthor_queue *queue,
struct panthor_job *job,
struct panthor_job_ringbuf_instrs *instrs)
{
u64 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
u64 start = job->ringbuf.start & (ringbuf_size - 1);
u64 size, written;
instrs->count = ALIGN(instrs->count, NUM_INSTRS_PER_CACHE_LINE);
size = instrs->count * sizeof(u64);
WARN_ON(size > ringbuf_size);
written = min(ringbuf_size - start, size);
memcpy(queue->ringbuf->kmap + start, instrs->buffer, written);
if (written < size)
memcpy(queue->ringbuf->kmap,
&instrs->buffer[written / sizeof(u64)],
size - written);
}
struct panthor_job_cs_params {
u32 profile_mask;
u64 addr_reg; u64 val_reg;
u64 cycle_reg; u64 time_reg;
u64 sync_addr; u64 times_addr;
u64 cs_start; u64 cs_size;
u32 last_flush; u32 waitall_mask;
};
static void
get_job_cs_params(struct panthor_job *job, struct panthor_job_cs_params *params)
{
struct panthor_group *group = job->group;
struct panthor_queue *queue = group->queues[job->queue_idx];
struct panthor_device *ptdev = group->ptdev;
struct panthor_scheduler *sched = ptdev->scheduler;
params->addr_reg = ptdev->csif_info.cs_reg_count -
ptdev->csif_info.unpreserved_cs_reg_count;
params->val_reg = params->addr_reg + 2;
params->cycle_reg = params->addr_reg;
params->time_reg = params->val_reg;
params->sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
job->queue_idx * sizeof(struct panthor_syncobj_64b);
params->times_addr = panthor_kernel_bo_gpuva(queue->profiling.slots) +
(job->profiling.slot * sizeof(struct panthor_job_profiling_data));
params->waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
params->cs_start = job->call_info.start;
params->cs_size = job->call_info.size;
params->last_flush = job->call_info.latest_flush;
params->profile_mask = job->profiling.mask;
}
#define JOB_INSTR_ALWAYS(instr) \
JOB_INSTR(PANTHOR_DEVICE_PROFILING_DISABLED, (instr))
#define JOB_INSTR_TIMESTAMP(instr) \
JOB_INSTR(PANTHOR_DEVICE_PROFILING_TIMESTAMP, (instr))
#define JOB_INSTR_CYCLES(instr) \
JOB_INSTR(PANTHOR_DEVICE_PROFILING_CYCLES, (instr))
static void
prepare_job_instrs(const struct panthor_job_cs_params *params,
struct panthor_job_ringbuf_instrs *instrs)
{
const struct panthor_job_instr instr_seq[] = {
JOB_INSTR_ALWAYS((2ull << 56) | (params->val_reg << 48) | params->last_flush),
JOB_INSTR_ALWAYS((36ull << 56) | (0ull << 48) | (params->val_reg << 40) |
(0 << 16) | 0x233),
JOB_INSTR_CYCLES((1ull << 56) | (params->cycle_reg << 48) |
(params->times_addr +
offsetof(struct panthor_job_profiling_data, cycles.before))),
JOB_INSTR_CYCLES((40ull << 56) | (params->cycle_reg << 40) | (1ll << 32)),
JOB_INSTR_TIMESTAMP((1ull << 56) | (params->time_reg << 48) |
(params->times_addr +
offsetof(struct panthor_job_profiling_data, time.before))),
JOB_INSTR_TIMESTAMP((40ull << 56) | (params->time_reg << 40) | (0ll << 32)),
JOB_INSTR_ALWAYS((1ull << 56) | (params->addr_reg << 48) | params->cs_start),
JOB_INSTR_ALWAYS((2ull << 56) | (params->val_reg << 48) | params->cs_size),
JOB_INSTR_ALWAYS((3ull << 56) | (1 << 16)),
JOB_INSTR_ALWAYS((32ull << 56) | (params->addr_reg << 40) |
(params->val_reg << 32)),
JOB_INSTR_CYCLES((1ull << 56) | (params->cycle_reg << 48) |
(params->times_addr +
offsetof(struct panthor_job_profiling_data, cycles.after))),
JOB_INSTR_CYCLES((40ull << 56) | (params->cycle_reg << 40) | (1ll << 32)),
JOB_INSTR_TIMESTAMP((1ull << 56) | (params->time_reg << 48) |
(params->times_addr +
offsetof(struct panthor_job_profiling_data, time.after))),
JOB_INSTR_TIMESTAMP((40ull << 56) | (params->time_reg << 40) | (0ll << 32)),
JOB_INSTR_ALWAYS((1ull << 56) | (params->addr_reg << 48) | params->sync_addr),
JOB_INSTR_ALWAYS((1ull << 56) | (params->val_reg << 48) | 1),
JOB_INSTR_ALWAYS((3ull << 56) | (params->waitall_mask << 16)),
JOB_INSTR_ALWAYS((51ull << 56) | (0ull << 48) | (params->addr_reg << 40) |
(params->val_reg << 32) | (0 << 16) | 1),
JOB_INSTR_ALWAYS((47ull << 56)),
};
u32 pad;
instrs->count = 0;
static_assert(sizeof(instrs->buffer) % 64 == 0,
"panthor_job_ringbuf_instrs::buffer is not aligned on a cacheline");
static_assert(ALIGN(ARRAY_SIZE(instr_seq), NUM_INSTRS_PER_CACHE_LINE) ==
ARRAY_SIZE(instrs->buffer),
"instr_seq vs panthor_job_ringbuf_instrs::buffer size mismatch");
for (u32 i = 0; i < ARRAY_SIZE(instr_seq); i++) {
if (instr_seq[i].profile_mask &&
!(instr_seq[i].profile_mask & params->profile_mask))
continue;
instrs->buffer[instrs->count++] = instr_seq[i].instr;
}
pad = ALIGN(instrs->count, NUM_INSTRS_PER_CACHE_LINE);
memset(&instrs->buffer[instrs->count], 0,
(pad - instrs->count) * sizeof(instrs->buffer[0]));
instrs->count = pad;
}
static u32 calc_job_credits(u32 profile_mask)
{
struct panthor_job_ringbuf_instrs instrs;
struct panthor_job_cs_params params = {
.profile_mask = profile_mask,
};
prepare_job_instrs(¶ms, &instrs);
return instrs.count;
}
static struct dma_fence *
queue_run_job(struct drm_sched_job *sched_job)
{
struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
struct panthor_group *group = job->group;
struct panthor_queue *queue = group->queues[job->queue_idx];
struct panthor_device *ptdev = group->ptdev;
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_job_ringbuf_instrs instrs;
struct panthor_job_cs_params cs_params;
struct dma_fence *done_fence;
int ret;
if (!job->call_info.size) {
job->done_fence = dma_fence_get(queue->fence_ctx.last_fence);
return dma_fence_get(job->done_fence);
}
ret = panthor_device_resume_and_get(ptdev);
if (drm_WARN_ON(&ptdev->base, ret))
return ERR_PTR(ret);
mutex_lock(&sched->lock);
if (!group_can_run(group)) {
done_fence = ERR_PTR(-ECANCELED);
goto out_unlock;
}
dma_fence_init(job->done_fence,
&panthor_queue_fence_ops,
&queue->fence_ctx.lock,
queue->fence_ctx.id,
atomic64_inc_return(&queue->fence_ctx.seqno));
job->profiling.slot = queue->profiling.seqno++;
if (queue->profiling.seqno == queue->profiling.slot_count)
queue->profiling.seqno = 0;
job->ringbuf.start = queue->iface.input->insert;
get_job_cs_params(job, &cs_params);
prepare_job_instrs(&cs_params, &instrs);
copy_instrs_to_ringbuf(queue, job, &instrs);
job->ringbuf.end = job->ringbuf.start + (instrs.count * sizeof(u64));
panthor_job_get(&job->base);
spin_lock(&queue->fence_ctx.lock);
list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs);
spin_unlock(&queue->fence_ctx.lock);
wmb();
queue->iface.input->extract = queue->iface.output->extract;
queue->iface.input->insert = job->ringbuf.end;
if (group->csg_id < 0) {
group_schedule_locked(group, BIT(job->queue_idx));
} else {
u32 queue_mask = BIT(job->queue_idx);
bool resume_tick = group_is_idle(group) &&
(group->idle_queues & queue_mask) &&
!(group->blocked_queues & queue_mask) &&
sched->resched_target == U64_MAX;
group->idle_queues &= ~queue_mask;
if (resume_tick)
sched_resume_tick(ptdev);
gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
if (!sched->pm.has_ref &&
!(group->blocked_queues & BIT(job->queue_idx))) {
pm_runtime_get(ptdev->base.dev);
sched->pm.has_ref = true;
}
queue_resume_timeout(queue);
panthor_devfreq_record_busy(sched->ptdev);
}
dma_fence_put(queue->fence_ctx.last_fence);
queue->fence_ctx.last_fence = dma_fence_get(job->done_fence);
done_fence = dma_fence_get(job->done_fence);
out_unlock:
mutex_unlock(&sched->lock);
pm_runtime_mark_last_busy(ptdev->base.dev);
pm_runtime_put_autosuspend(ptdev->base.dev);
return done_fence;
}
static enum drm_gpu_sched_stat
queue_timedout_job(struct drm_sched_job *sched_job)
{
struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
struct panthor_group *group = job->group;
struct panthor_device *ptdev = group->ptdev;
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_queue *queue = group->queues[job->queue_idx];
drm_warn(&ptdev->base, "job timeout: pid=%d, comm=%s, seqno=%llu\n",
group->task_info.pid, group->task_info.comm, job->done_fence->seqno);
drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
queue_stop(queue, job);
mutex_lock(&sched->lock);
group->timedout = true;
if (group->csg_id >= 0) {
sched_queue_delayed_work(ptdev->scheduler, tick, 0);
} else {
list_del_init(&group->run_node);
list_del_init(&group->wait_node);
group_queue_work(group, term);
}
mutex_unlock(&sched->lock);
queue_start(queue);
return DRM_GPU_SCHED_STAT_RESET;
}
static void queue_free_job(struct drm_sched_job *sched_job)
{
drm_sched_job_cleanup(sched_job);
panthor_job_put(sched_job);
}
static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
.run_job = queue_run_job,
.timedout_job = queue_timedout_job,
.free_job = queue_free_job,
};
static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
u32 cs_ringbuf_size)
{
u32 min_profiled_job_instrs = U32_MAX;
u32 last_flag = fls(PANTHOR_DEVICE_PROFILING_ALL);
for (u32 i = 0; i < last_flag; i++) {
min_profiled_job_instrs =
min(min_profiled_job_instrs, calc_job_credits(BIT(i)));
}
return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64));
}
static void queue_timeout_work(struct work_struct *work)
{
struct panthor_queue *queue = container_of(work, struct panthor_queue,
timeout.work.work);
bool progress;
progress = queue_check_job_completion(queue);
if (!progress)
drm_sched_fault(&queue->scheduler);
}
static struct panthor_queue *
group_create_queue(struct panthor_group *group,
const struct drm_panthor_queue_create *args,
u64 drm_client_id, u32 gid, u32 qid)
{
struct drm_sched_init_args sched_args = {
.ops = &panthor_queue_sched_ops,
.submit_wq = group->ptdev->scheduler->wq,
.num_rqs = 1,
.credit_limit = args->ringbuf_size / sizeof(u64),
.timeout = MAX_SCHEDULE_TIMEOUT,
.timeout_wq = group->ptdev->reset.wq,
.dev = group->ptdev->base.dev,
};
struct drm_gpu_scheduler *drm_sched;
struct panthor_queue *queue;
int ret;
if (args->pad[0] || args->pad[1] || args->pad[2])
return ERR_PTR(-EINVAL);
if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K ||
!is_power_of_2(args->ringbuf_size))
return ERR_PTR(-EINVAL);
if (args->priority > CSF_MAX_QUEUE_PRIO)
return ERR_PTR(-EINVAL);
queue = kzalloc_obj(*queue);
if (!queue)
return ERR_PTR(-ENOMEM);
queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
INIT_DELAYED_WORK(&queue->timeout.work, queue_timeout_work);
queue->fence_ctx.id = dma_fence_context_alloc(1);
spin_lock_init(&queue->fence_ctx.lock);
INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
queue->priority = args->priority;
queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
args->ringbuf_size,
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
PANTHOR_VM_KERNEL_AUTO_VA,
"CS ring buffer");
if (IS_ERR(queue->ringbuf)) {
ret = PTR_ERR(queue->ringbuf);
goto err_free_queue;
}
ret = panthor_kernel_bo_vmap(queue->ringbuf);
if (ret)
goto err_free_queue;
queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
&queue->iface.input,
&queue->iface.output,
&queue->iface.input_fw_va,
&queue->iface.output_fw_va);
if (IS_ERR(queue->iface.mem)) {
ret = PTR_ERR(queue->iface.mem);
goto err_free_queue;
}
queue->profiling.slot_count =
calc_profiling_ringbuf_num_slots(group->ptdev, args->ringbuf_size);
queue->profiling.slots =
panthor_kernel_bo_create(group->ptdev, group->vm,
queue->profiling.slot_count *
sizeof(struct panthor_job_profiling_data),
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
PANTHOR_VM_KERNEL_AUTO_VA,
"Group job stats");
if (IS_ERR(queue->profiling.slots)) {
ret = PTR_ERR(queue->profiling.slots);
goto err_free_queue;
}
ret = panthor_kernel_bo_vmap(queue->profiling.slots);
if (ret)
goto err_free_queue;
queue->name = kasprintf(GFP_KERNEL, "panthor-queue-%llu-%u-%u", drm_client_id, gid, qid);
if (!queue->name) {
ret = -ENOMEM;
goto err_free_queue;
}
sched_args.name = queue->name;
ret = drm_sched_init(&queue->scheduler, &sched_args);
if (ret)
goto err_free_queue;
drm_sched = &queue->scheduler;
ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL);
if (ret)
goto err_free_queue;
return queue;
err_free_queue:
group_free_queue(group, queue);
return ERR_PTR(ret);
}
static void group_init_task_info(struct panthor_group *group)
{
struct task_struct *task = current->group_leader;
group->task_info.pid = task->pid;
get_task_comm(group->task_info.comm, task);
}
static void add_group_kbo_sizes(struct panthor_device *ptdev,
struct panthor_group *group)
{
struct panthor_queue *queue;
int i;
if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(group)))
return;
if (drm_WARN_ON(&ptdev->base, ptdev != group->ptdev))
return;
group->fdinfo.kbo_sizes += group->suspend_buf->obj->size;
group->fdinfo.kbo_sizes += group->protm_suspend_buf->obj->size;
group->fdinfo.kbo_sizes += group->syncobjs->obj->size;
for (i = 0; i < group->queue_count; i++) {
queue = group->queues[i];
group->fdinfo.kbo_sizes += queue->ringbuf->obj->size;
group->fdinfo.kbo_sizes += queue->iface.mem->obj->size;
group->fdinfo.kbo_sizes += queue->profiling.slots->obj->size;
}
}
#define MAX_GROUPS_PER_POOL 128
int panthor_group_create(struct panthor_file *pfile,
const struct drm_panthor_group_create *group_args,
const struct drm_panthor_queue_create *queue_args,
u64 drm_client_id)
{
struct panthor_device *ptdev = pfile->ptdev;
struct panthor_group_pool *gpool = pfile->groups;
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
struct panthor_group *group = NULL;
u32 gid, i, suspend_size;
int ret;
if (group_args->pad)
return -EINVAL;
if (group_args->priority >= PANTHOR_CSG_PRIORITY_COUNT)
return -EINVAL;
if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) ||
(group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) ||
(group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present))
return -EINVAL;
if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores ||
hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores ||
hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores)
return -EINVAL;
group = kzalloc_obj(*group);
if (!group)
return -ENOMEM;
spin_lock_init(&group->fatal_lock);
kref_init(&group->refcount);
group->state = PANTHOR_CS_GROUP_CREATED;
group->csg_id = -1;
group->ptdev = ptdev;
group->max_compute_cores = group_args->max_compute_cores;
group->compute_core_mask = group_args->compute_core_mask;
group->max_fragment_cores = group_args->max_fragment_cores;
group->fragment_core_mask = group_args->fragment_core_mask;
group->max_tiler_cores = group_args->max_tiler_cores;
group->tiler_core_mask = group_args->tiler_core_mask;
group->priority = group_args->priority;
INIT_LIST_HEAD(&group->wait_node);
INIT_LIST_HEAD(&group->run_node);
INIT_WORK(&group->term_work, group_term_work);
INIT_WORK(&group->sync_upd_work, group_sync_upd_work);
INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
INIT_WORK(&group->release_work, group_release_work);
group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
if (!group->vm) {
ret = -EINVAL;
goto err_put_group;
}
suspend_size = csg_iface->control->suspend_size;
group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
if (IS_ERR(group->suspend_buf)) {
ret = PTR_ERR(group->suspend_buf);
group->suspend_buf = NULL;
goto err_put_group;
}
suspend_size = csg_iface->control->protm_suspend_size;
group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
if (IS_ERR(group->protm_suspend_buf)) {
ret = PTR_ERR(group->protm_suspend_buf);
group->protm_suspend_buf = NULL;
goto err_put_group;
}
group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
group_args->queues.count *
sizeof(struct panthor_syncobj_64b),
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
PANTHOR_VM_KERNEL_AUTO_VA,
"Group sync objects");
if (IS_ERR(group->syncobjs)) {
ret = PTR_ERR(group->syncobjs);
goto err_put_group;
}
ret = panthor_kernel_bo_vmap(group->syncobjs);
if (ret)
goto err_put_group;
memset(group->syncobjs->kmap, 0,
group_args->queues.count * sizeof(struct panthor_syncobj_64b));
ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
if (ret)
goto err_put_group;
for (i = 0; i < group_args->queues.count; i++) {
group->queues[i] = group_create_queue(group, &queue_args[i], drm_client_id, gid, i);
if (IS_ERR(group->queues[i])) {
ret = PTR_ERR(group->queues[i]);
group->queues[i] = NULL;
goto err_erase_gid;
}
group->queue_count++;
}
group->idle_queues = GENMASK(group->queue_count - 1, 0);
mutex_lock(&sched->reset.lock);
if (atomic_read(&sched->reset.in_progress)) {
panthor_group_stop(group);
} else {
mutex_lock(&sched->lock);
list_add_tail(&group->run_node,
&sched->groups.idle[group->priority]);
mutex_unlock(&sched->lock);
}
mutex_unlock(&sched->reset.lock);
add_group_kbo_sizes(group->ptdev, group);
spin_lock_init(&group->fdinfo.lock);
group_init_task_info(group);
xa_set_mark(&gpool->xa, gid, GROUP_REGISTERED);
return gid;
err_erase_gid:
xa_erase(&gpool->xa, gid);
err_put_group:
group_put(group);
return ret;
}
int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
{
struct panthor_group_pool *gpool = pfile->groups;
struct panthor_device *ptdev = pfile->ptdev;
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_group *group;
if (!xa_get_mark(&gpool->xa, group_handle, GROUP_REGISTERED))
return -EINVAL;
group = xa_erase(&gpool->xa, group_handle);
if (!group)
return -EINVAL;
mutex_lock(&sched->reset.lock);
mutex_lock(&sched->lock);
group->destroyed = true;
if (group->csg_id >= 0) {
sched_queue_delayed_work(sched, tick, 0);
} else if (!atomic_read(&sched->reset.in_progress)) {
list_del_init(&group->run_node);
list_del_init(&group->wait_node);
group_queue_work(group, term);
}
mutex_unlock(&sched->lock);
mutex_unlock(&sched->reset.lock);
group_put(group);
return 0;
}
static struct panthor_group *group_from_handle(struct panthor_group_pool *pool,
unsigned long group_handle)
{
struct panthor_group *group;
xa_lock(&pool->xa);
group = group_get(xa_find(&pool->xa, &group_handle, group_handle, GROUP_REGISTERED));
xa_unlock(&pool->xa);
return group;
}
int panthor_group_get_state(struct panthor_file *pfile,
struct drm_panthor_group_get_state *get_state)
{
struct panthor_group_pool *gpool = pfile->groups;
struct panthor_device *ptdev = pfile->ptdev;
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_group *group;
if (get_state->pad)
return -EINVAL;
group = group_from_handle(gpool, get_state->group_handle);
if (!group)
return -EINVAL;
memset(get_state, 0, sizeof(*get_state));
mutex_lock(&sched->lock);
if (group->timedout)
get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT;
if (group->fatal_queues) {
get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
get_state->fatal_queues = group->fatal_queues;
}
if (group->innocent)
get_state->state |= DRM_PANTHOR_GROUP_STATE_INNOCENT;
mutex_unlock(&sched->lock);
group_put(group);
return 0;
}
int panthor_group_pool_create(struct panthor_file *pfile)
{
struct panthor_group_pool *gpool;
gpool = kzalloc_obj(*gpool);
if (!gpool)
return -ENOMEM;
xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1);
pfile->groups = gpool;
return 0;
}
void panthor_group_pool_destroy(struct panthor_file *pfile)
{
struct panthor_group_pool *gpool = pfile->groups;
struct panthor_group *group;
unsigned long i;
if (IS_ERR_OR_NULL(gpool))
return;
xa_for_each(&gpool->xa, i, group)
panthor_group_destroy(pfile, i);
xa_destroy(&gpool->xa);
kfree(gpool);
pfile->groups = NULL;
}
void
panthor_fdinfo_gather_group_mem_info(struct panthor_file *pfile,
struct drm_memory_stats *stats)
{
struct panthor_group_pool *gpool = pfile->groups;
struct panthor_group *group;
unsigned long i;
if (IS_ERR_OR_NULL(gpool))
return;
xa_lock(&gpool->xa);
xa_for_each_marked(&gpool->xa, i, group, GROUP_REGISTERED) {
stats->resident += group->fdinfo.kbo_sizes;
if (group->csg_id >= 0)
stats->active += group->fdinfo.kbo_sizes;
}
xa_unlock(&gpool->xa);
}
static void job_release(struct kref *ref)
{
struct panthor_job *job = container_of(ref, struct panthor_job, refcount);
drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node));
if (job->base.s_fence)
drm_sched_job_cleanup(&job->base);
if (job->done_fence && job->done_fence->ops)
dma_fence_put(job->done_fence);
else
dma_fence_free(job->done_fence);
group_put(job->group);
kfree(job);
}
struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job)
{
if (sched_job) {
struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
kref_get(&job->refcount);
}
return sched_job;
}
void panthor_job_put(struct drm_sched_job *sched_job)
{
struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
if (sched_job)
kref_put(&job->refcount, job_release);
}
struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job)
{
struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
return job->group->vm;
}
struct drm_sched_job *
panthor_job_create(struct panthor_file *pfile,
u16 group_handle,
const struct drm_panthor_queue_submit *qsubmit,
u64 drm_client_id)
{
struct panthor_group_pool *gpool = pfile->groups;
struct panthor_job *job;
u32 credits;
int ret;
if (qsubmit->pad)
return ERR_PTR(-EINVAL);
if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0))
return ERR_PTR(-EINVAL);
if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7))
return ERR_PTR(-EINVAL);
if (qsubmit->latest_flush & GENMASK(30, 24))
return ERR_PTR(-EINVAL);
job = kzalloc_obj(*job);
if (!job)
return ERR_PTR(-ENOMEM);
kref_init(&job->refcount);
job->queue_idx = qsubmit->queue_index;
job->call_info.size = qsubmit->stream_size;
job->call_info.start = qsubmit->stream_addr;
job->call_info.latest_flush = qsubmit->latest_flush;
INIT_LIST_HEAD(&job->node);
job->group = group_from_handle(gpool, group_handle);
if (!job->group) {
ret = -EINVAL;
goto err_put_job;
}
if (!group_can_run(job->group)) {
ret = -EINVAL;
goto err_put_job;
}
if (job->queue_idx >= job->group->queue_count ||
!job->group->queues[job->queue_idx]) {
ret = -EINVAL;
goto err_put_job;
}
if (job->call_info.size) {
job->done_fence = kzalloc_obj(*job->done_fence);
if (!job->done_fence) {
ret = -ENOMEM;
goto err_put_job;
}
}
job->profiling.mask = pfile->ptdev->profile_mask;
credits = calc_job_credits(job->profiling.mask);
if (credits == 0) {
ret = -EINVAL;
goto err_put_job;
}
ret = drm_sched_job_init(&job->base,
&job->group->queues[job->queue_idx]->entity,
credits, job->group, drm_client_id);
if (ret)
goto err_put_job;
return &job->base;
err_put_job:
panthor_job_put(&job->base);
return ERR_PTR(ret);
}
void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job)
{
struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished,
DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
}
void panthor_sched_unplug(struct panthor_device *ptdev)
{
struct panthor_scheduler *sched = ptdev->scheduler;
disable_delayed_work_sync(&sched->tick_work);
disable_work_sync(&sched->fw_events_work);
disable_work_sync(&sched->sync_upd_work);
mutex_lock(&sched->lock);
if (sched->pm.has_ref) {
pm_runtime_put(ptdev->base.dev);
sched->pm.has_ref = false;
}
mutex_unlock(&sched->lock);
}
static void panthor_sched_fini(struct drm_device *ddev, void *res)
{
struct panthor_scheduler *sched = res;
int prio;
if (!sched || !sched->csg_slot_count)
return;
if (sched->wq)
destroy_workqueue(sched->wq);
if (sched->heap_alloc_wq)
destroy_workqueue(sched->heap_alloc_wq);
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio]));
drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio]));
}
drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting));
}
int panthor_sched_init(struct panthor_device *ptdev)
{
struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0);
struct panthor_scheduler *sched;
u32 gpu_as_count, num_groups;
int prio, ret;
sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL);
if (!sched)
return -ENOMEM;
num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num);
num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups);
gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1));
if (!gpu_as_count) {
drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)",
gpu_as_count + 1);
return -EINVAL;
}
sched->ptdev = ptdev;
sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features);
sched->csg_slot_count = num_groups;
sched->cs_slot_count = csg_iface->control->stream_num;
sched->as_slot_count = gpu_as_count;
ptdev->csif_info.csg_slot_count = sched->csg_slot_count;
ptdev->csif_info.cs_slot_count = sched->cs_slot_count;
ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count;
sched->last_tick = 0;
sched->resched_target = U64_MAX;
sched->tick_period = msecs_to_jiffies(10);
INIT_DELAYED_WORK(&sched->tick_work, tick_work);
INIT_WORK(&sched->sync_upd_work, sync_upd_work);
INIT_WORK(&sched->fw_events_work, process_fw_events_work);
ret = drmm_mutex_init(&ptdev->base, &sched->lock);
if (ret)
return ret;
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
INIT_LIST_HEAD(&sched->groups.runnable[prio]);
INIT_LIST_HEAD(&sched->groups.idle[prio]);
}
INIT_LIST_HEAD(&sched->groups.waiting);
ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock);
if (ret)
return ret;
INIT_LIST_HEAD(&sched->reset.stopped_groups);
sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0);
sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (!sched->wq || !sched->heap_alloc_wq) {
panthor_sched_fini(&ptdev->base, sched);
drm_err(&ptdev->base, "Failed to allocate the workqueues");
return -ENOMEM;
}
ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched);
if (ret)
return ret;
ptdev->scheduler = sched;
return 0;
}