tree: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master head: 9d49ed9ca93b8c564033c1d6808017bc9052b5db commit: 13202496bf8a62ad53279167d3db440cecb7ff18 [7512/8469] drm/amd/amdgpu implement tdr advanced mode config: alpha-randconfig-r015-20210329 (attached as .config) compiler: alpha-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=13202496bf8a62ad53279167d3db440cecb7ff18 git remote add linux-next https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git git fetch --no-tags linux-next master git checkout 13202496bf8a62ad53279167d3db440cecb7ff18 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=alpha If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All warnings (new ones prefixed by >>): drivers/gpu/drm/amd/amdgpu/amdgpu_device.c: In function 'amdgpu_device_suspend': drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:3720:6: warning: variable 'r' set but not used [-Wunused-but-set-variable] 3720 | int r; | ^ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c: At top level: >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:4578:6: warning: no previous prototype for 'amdgpu_device_recheck_guilty_jobs' [-Wmissing-prototypes] 4578 | void amdgpu_device_recheck_guilty_jobs(struct amdgpu_device *adev, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ vim +/amdgpu_device_recheck_guilty_jobs +4578 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 4577 > 4578 void amdgpu_device_recheck_guilty_jobs(struct amdgpu_device *adev, 4579 struct amdgpu_hive_info *hive, 4580 struct list_head *device_list_handle, 4581 bool *need_full_reset) 4582 { 4583 int i, r = 0; 4584 4585 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4586 struct amdgpu_ring *ring = adev->rings[i]; 4587 int ret = 0; 4588 struct drm_sched_job *s_job; 4589 4590 if (!ring || !ring->sched.thread) 4591 continue; 4592 4593 s_job = list_first_entry_or_null(&ring->sched.pending_list, 4594 struct drm_sched_job, list); 4595 if (s_job == NULL) 4596 continue; 4597 4598 /* clear job's guilty and depend the folowing step to decide the real one */ 4599 drm_sched_reset_karma(s_job); 4600 drm_sched_resubmit_jobs_ext(&ring->sched, 1); 4601 4602 ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout); 4603 if (ret == 0) { /* timeout */ 4604 DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n", 4605 ring->sched.name, s_job->id); 4606 4607 /* set guilty */ 4608 drm_sched_increase_karma(s_job); 4609 retry: 4610 /* do hw reset */ 4611 if (amdgpu_sriov_vf(adev)) { 4612 amdgpu_virt_fini_data_exchange(adev); 4613 r = amdgpu_device_reset_sriov(adev, false); 4614 if (r) 4615 adev->asic_reset_res = r; 4616 } else { 4617 r = amdgpu_do_asic_reset(hive, device_list_handle, 4618 need_full_reset, false); 4619 if (r && r == -EAGAIN) 4620 goto retry; 4621 } 4622 4623 /* 4624 * add reset counter so that the following 4625 * resubmitted job could flush vmid 4626 */ 4627 atomic_inc(&adev->gpu_reset_counter); 4628 continue; 4629 } 4630 4631 /* got the hw fence, signal finished fence */ 4632 atomic_dec(ring->sched.score); 4633 dma_fence_get(&s_job->s_fence->finished); 4634 dma_fence_signal(&s_job->s_fence->finished); 4635 dma_fence_put(&s_job->s_fence->finished); 4636 4637 /* remove node from list and free the job */ 4638 spin_lock(&ring->sched.job_list_lock); 4639 list_del_init(&s_job->list); 4640 spin_unlock(&ring->sched.job_list_lock); 4641 ring->sched.ops->free_job(s_job); 4642 } 4643 } 4644 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org