[Public] > -----Original Message----- > From: Kuehling, Felix > Sent: November 30, 2022 6:55 PM > To: Kim, Jonathan ; amd- > gfx@lists.freedesktop.org > Subject: Re: [PATCH 27/29] drm/amdkfd: add debug queue snapshot > operation > > > On 2022-10-31 12:23, Jonathan Kim wrote: > > Allow the debugger to get a snapshot of a specified number of queues > > containing various queue property information that is copied to the > > debugger. > > > > Since the debugger doesn't know how many queues exist at any given > time, > > allow the debugger to pass the requested number of snapshots as 0 to get > > the actual number of potential snapshots to use for a subsequent snapshot > > request for actual information. > > > > To prevent future ABI breakage, pass in the requested entry_size. > > The KFD will return it's own entry_size in case the debugger still wants > > log the information in a core dump on sizing failure. > > > > Also allow the debugger to clear exceptions when doing a snapshot. > > > > v2: change buf_size arg to num_queues for clarity. > > fix minimum entry size calculation. > > > > Signed-off-by: Jonathan Kim > > Two nit-picks inline. > > > > --- > > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++ > > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 41 > +++++++++++++++++++ > > .../drm/amd/amdkfd/kfd_device_queue_manager.h | 4 ++ > > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++ > > .../amd/amdkfd/kfd_process_queue_manager.c | 40 > ++++++++++++++++++ > > 5 files changed, 96 insertions(+) > > > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > index 2c8f107237ee..cea393350980 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > @@ -2961,6 +2961,12 @@ static int kfd_ioctl_set_debug_trap(struct file > *filep, struct kfd_process *p, v > > &args->query_exception_info.info_size); > > break; > > case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: > > + r = pqm_get_queue_snapshot(&target->pqm, > > + args->queue_snapshot.exception_mask, > > + (void __user *)args- > >queue_snapshot.snapshot_buf_ptr, > > + &args->queue_snapshot.num_queues, > > + &args->queue_snapshot.entry_size); > > + break; > > case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: > > pr_warn("Debug op %i not supported yet\n", args->op); > > r = -EACCES; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > > index 589efbefc8dc..51f8c5676c56 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > > @@ -2950,6 +2950,47 @@ int suspend_queues(struct kfd_process *p, > > return total_suspended; > > } > > > > +static uint32_t set_queue_type_for_user(struct queue_properties > *q_props) > > +{ > > + switch (q_props->type) { > > + case KFD_QUEUE_TYPE_COMPUTE: > > + return q_props->format == KFD_QUEUE_FORMAT_PM4 > > + ? KFD_IOC_QUEUE_TYPE_COMPUTE > > + : > KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; > > + case KFD_QUEUE_TYPE_SDMA: > > + return KFD_IOC_QUEUE_TYPE_SDMA; > > + case KFD_QUEUE_TYPE_SDMA_XGMI: > > + return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; > > + default: > > + WARN_ONCE(true, "queue type not recognized!"); > > + return 0xffffffff; > > + }; > > +} > > + > > +void set_queue_snapshot_entry(struct device_queue_manager *dqm, > > + struct queue *q, > > + uint64_t exception_clear_mask, > > + struct kfd_queue_snapshot_entry *qss_entry) > > The dqm parameter is not needed. The function can get this from > q->device->dqm. It's also only needed for dqm locking. I'm not sure > that's even necessary. Aren't the event_mutex and target process mutex > held by the caller enough to protect the exception_status and other > queue properties? I can't really remember why we device locked in the experimental phase tbh but I think you're right. The process event lock should protect event status on interrupt writes. The process lock should protect everything else (property updates/destruction etc). Thanks, Jon > > > > +{ > > + dqm_lock(dqm); > > + > > + qss_entry->ring_base_address = q->properties.queue_address; > > + qss_entry->write_pointer_address = (uint64_t)q- > >properties.write_ptr; > > + qss_entry->read_pointer_address = (uint64_t)q- > >properties.read_ptr; > > + qss_entry->ctx_save_restore_address = > > + q- > >properties.ctx_save_restore_area_address; > > + qss_entry->ctx_save_restore_area_size = > > + q->properties.ctx_save_restore_area_size; > > + qss_entry->exception_status = q->properties.exception_status; > > + qss_entry->queue_id = q->properties.queue_id; > > + qss_entry->gpu_id = q->device->id; > > + qss_entry->ring_size = (uint32_t)q->properties.queue_size; > > + qss_entry->queue_type = set_queue_type_for_user(&q- > >properties); > > + q->properties.exception_status &= ~exception_clear_mask; > > + > > + dqm_unlock(dqm); > > +} > > + > > int debug_lock_and_unmap(struct device_queue_manager *dqm) > > { > > int r; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > > index 12643528684c..094705b932fc 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > > @@ -297,6 +297,10 @@ int resume_queues(struct kfd_process *p, > > bool resume_all_queues, > > uint32_t num_queues, > > uint32_t *usr_queue_id_array); > > +void set_queue_snapshot_entry(struct device_queue_manager *dqm, > > + struct queue *q, > > + uint64_t exception_clear_mask, > > + struct kfd_queue_snapshot_entry *qss_entry); > > int debug_lock_and_unmap(struct device_queue_manager *dqm); > > int debug_map_and_unlock(struct device_queue_manager *dqm); > > int debug_refresh_runlist(struct device_queue_manager *dqm); > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > > index aee4fe20e676..ebd701143981 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > > @@ -1302,6 +1302,11 @@ int pqm_get_wave_state(struct > process_queue_manager *pqm, > > void __user *ctl_stack, > > u32 *ctl_stack_used_size, > > u32 *save_area_used_size); > > +int pqm_get_queue_snapshot(struct process_queue_manager *pqm, > > + uint64_t exception_clear_mask, > > + struct kfd_queue_snapshot_entry __user *buf, > > + int *num_qss_entries, > > + uint32_t *entry_size); > > > > int amdkfd_fence_wait_timeout(uint64_t *fence_addr, > > uint64_t fence_value, > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > > index 15db83c9a585..30df1046c30b 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > > @@ -569,6 +569,46 @@ int pqm_get_wave_state(struct > process_queue_manager *pqm, > > save_area_used_size); > > } > > > > +int pqm_get_queue_snapshot(struct process_queue_manager *pqm, > > + uint64_t exception_clear_mask, > > + struct kfd_queue_snapshot_entry __user *buf, > > + int *num_qss_entries, > > + uint32_t *entry_size) > > +{ > > + struct process_queue_node *pqn; > > + uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = > *num_qss_entries; > > + int r; > > + > > + *num_qss_entries = 0; > > + if (!(*entry_size)) > > + return -EINVAL; > > + > > + *entry_size = min_t(size_t, *entry_size, sizeof(struct > kfd_queue_snapshot_entry)); > > + mutex_lock(&pqm->process->event_mutex); > > + > > + list_for_each_entry(pqn, &pqm->queues, process_queue_list) { > > + if (!pqn->q) > > + continue; > > + > > + if (*num_qss_entries < tmp_qss_entries) { > > + struct kfd_queue_snapshot_entry src = {0}; > > It's safer to use memset here. This initialization may not initialize > padding, so it doesn't guarantee that no uninitialized data leaks from > kernel mode to user mode. > > Regards, > Felix > > > > + > > + set_queue_snapshot_entry(pqn->q->device->dqm, > > + pqn->q, exception_clear_mask, > &src); > > + > > + if (copy_to_user(buf, &src, *entry_size)) { > > + r = -EFAULT; > > + break; > > + } > > + buf += tmp_entry_size; > > + } > > + *num_qss_entries += 1; > > + } > > + > > + mutex_unlock(&pqm->process->event_mutex); > > + return r; > > +} > > + > > static int get_queue_data_sizes(struct kfd_process_device *pdd, > > struct queue *q, > > uint32_t *mqd_size,