Skip to content

Commit

Permalink
amd/amdgpu: Reduce unnecessary repetitive GPU resets
Browse files Browse the repository at this point in the history
In multiple GPUs case, after a GPU has started
resetting all GPUs on hive, other GPUs do not
need to trigger GPU reset again.

Signed-off-by: YiPeng Chai <[email protected]>
Reviewed-by: Hawking Zhang <[email protected]>
  • Loading branch information
YiPeng Chai authored and Hawking Zhang committed Sep 21, 2024
1 parent 2f767b9 commit 8809b4b
Showing 1 changed file with 20 additions and 1 deletion.
21 changes: 20 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -4030,8 +4030,27 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
}

if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) {
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
int hive_ras_recovery = 0;

if (hive) {
hive_ras_recovery = atomic_read(&hive->ras_recovery);
amdgpu_put_xgmi_hive(hive);
}
/* In the case of multiple GPUs, after a GPU has started
* resetting all GPUs on hive, other GPUs do not need to
* trigger GPU reset again.
*/
if (!hive_ras_recovery)
amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
else
atomic_set(&ras->in_recovery, 0);
} else {
flush_work(&ras->recovery_work);
amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
}

return 0;
}

Expand Down

0 comments on commit 8809b4b

Please sign in to comment.