drm/panthor: Report innocent group kill

Groups can be killed during a reset even though they did nothing wrong.
That usually happens when the FW is put in a bad state by other groups,
resulting in group suspension failures when the reset happens.

If we end up in that situation, flag the group innocent and report
innocence through a new DRM_PANTHOR_GROUP_STATE flag.

Bump the minor driver version to reflect the uAPI change.

Changes in v4:
- Add an entry to the driver version changelog
- Add R-bs

Changes in v3:
- Actually report innocence to userspace

Changes in v2:
- New patch

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241211080500.2349505-1-boris.brezillon@collabora.com
This commit is contained in:
Boris Brezillon 2024-12-11 09:05:00 +01:00 committed by ChenJiali
commit 2ebad648cd
3 changed files with 29 additions and 1 deletions

View file

@ -1451,6 +1451,7 @@ static const struct file_operations panthor_drm_driver_fops = {
* - 1.1 - adds DEV_QUERY_TIMESTAMP_INFO query
* - 1.2 - adds DEV_QUERY_GROUP_PRIORITIES_INFO query
* - adds PANTHOR_GROUP_PRIORITY_REALTIME priority
* - 1.3 - adds DRM_PANTHOR_GROUP_STATE_INNOCENT flag
*/
static const struct drm_driver panthor_drm_driver = {
.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ |
@ -1464,7 +1465,7 @@ static const struct drm_driver panthor_drm_driver = {
.desc = "Panthor DRM driver",
.date = "20230801",
.major = 1,
.minor = 2,
.minor = 3,
.gem_create_object = panthor_gem_create_object,
.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,

View file

@ -610,6 +610,16 @@ struct panthor_group {
*/
bool timedout;
/**
* @innocent: True when the group becomes unusable because the group suspension
* failed during a reset.
*
* Sometimes the FW was put in a bad state by other groups, causing the group
* suspension happening in the reset path to fail. In that case, we consider the
* group innocent.
*/
bool innocent;
/**
* @syncobjs: Pool of per-queue synchronization objects.
*
@ -2676,6 +2686,12 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
u32 csg_id = ffs(slot_mask) - 1;
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
/* If the group was still usable before that point, we consider
* it innocent.
*/
if (group_can_run(csg_slot->group))
csg_slot->group->innocent = true;
/* We consider group suspension failures as fatal and flag the
* group as unusable by setting timedout=true.
*/
@ -3517,6 +3533,8 @@ int panthor_group_get_state(struct panthor_file *pfile,
get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
get_state->fatal_queues = group->fatal_queues;
}
if (group->innocent)
get_state->state |= DRM_PANTHOR_GROUP_STATE_INNOCENT;
mutex_unlock(&sched->lock);
group_put(group);

View file

@ -918,6 +918,15 @@ enum drm_panthor_group_state_flags {
* When a group ends up with this flag set, no jobs can be submitted to its queues.
*/
DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1,
/**
* @DRM_PANTHOR_GROUP_STATE_INNOCENT: Group was killed during a reset caused by other
* groups.
*
* This flag can only be set if DRM_PANTHOR_GROUP_STATE_TIMEDOUT is set and
* DRM_PANTHOR_GROUP_STATE_FATAL_FAULT is not.
*/
DRM_PANTHOR_GROUP_STATE_INNOCENT = 1 << 2,
};
/**