Thanks very much! Please review again. Rico ________________________________ From: Tuikov, Luben Sent: Wednesday, October 16, 2019 1:59 To: Yin, Tianci (Rico) ; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org Cc: Koenig, Christian ; Deucher, Alexander Subject: Re: [PATCH 8/8] drm/amdgpu/psp: add psp memory training implementation On 2019-10-13 11:21 p.m., Tianci Yin wrote: > From: "Tianci.Yin" > > add memory training implementation code to save resume time. > > Change-Id: I625794a780b11d824ab57ef39cc33b872c6dc6c9 > Reviewed-by: Alex Deucher > Signed-off-by: Tianci.Yin > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 9 ++ > drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 159 ++++++++++++++++++++++++ > 3 files changed, 169 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 8704f93cabf2..c2b776fd82b5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -151,6 +151,7 @@ extern uint amdgpu_sdma_phase_quantum; > extern char *amdgpu_disable_cu; > extern char *amdgpu_virtual_display; > extern uint amdgpu_pp_feature_mask; > +extern uint amdgpu_force_long_training; > extern int amdgpu_job_hang_limit; > extern int amdgpu_lbpw; > extern int amdgpu_compute_multipipe; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index da7cbee25c61..c7d086569acb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -127,6 +127,7 @@ char *amdgpu_disable_cu = NULL; > char *amdgpu_virtual_display = NULL; > /* OverDrive(bit 14) disabled by default*/ > uint amdgpu_pp_feature_mask = 0xffffbfff; > +uint amdgpu_force_long_training = 0; > int amdgpu_job_hang_limit = 0; > int amdgpu_lbpw = -1; > int amdgpu_compute_multipipe = -1; > @@ -390,6 +391,14 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); > MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))"); > module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444); > > +/** > + * DOC: forcelongtraining (uint) > + * Force long memory training in resume. > + * The default is zero, indicates short training in resume. > + */ > +MODULE_PARM_DESC(forcelongtraining, "force memory long training"); > +module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444); > + > /** > * DOC: pcie_gen_cap (uint) > * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h. > diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > index 2ba0f68ced10..b7efaa3e913c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > @@ -902,6 +902,162 @@ static int psp_v11_0_rlc_autoload_start(struct psp_context *psp) > return psp_rlc_autoload_start(psp); > } > > +static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) > +{ > + int ret = 0; > + int i = 0; > + uint32_t data_32 = 0; NAK! Leave all of those integer variables uninitialized. > + struct amdgpu_device *adev = psp->adev; > + > + data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20); > + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32); > + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg); > + > + /*max 5s*/ > + while (i < 50) { > + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), > + 0x80000000, 0x80000000, false); > + if (ret == 0) > + break; > + i++; > + } NAK! For-loop please: for (i = 0; i < 50; i++) { ret = ...; } Regards, Luben > + DRM_DEBUG("%s training %s, cost %d * %dms.\n", > + (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long", > + (ret == 0) ? "succeed" : "failed", > + i, adev->usec_timeout/1000); > + return ret; > +} > + > +static int psp_v11_0_memory_training_fini(struct psp_context *psp) > +{ > + int ret = 0; > + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; > + > + ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; > + if(ctx->sys_cache) { > + kfree(ctx->sys_cache); > + ctx->sys_cache = NULL; > + } > + > + return ret; > +} > + > +static int psp_v11_0_memory_training_init(struct psp_context *psp) > +{ > + int ret = 0; > + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; > + > + if(ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) { > + DRM_DEBUG("memory training does not support!\n"); > + return 0; > + } > + > + ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL); > + if(ctx->sys_cache == NULL) { > + DRM_ERROR("alloc mem_train_ctx.sys_cache failed(%d)!\n", ret); > + ret = -ENOMEM; > + goto Err_out; > + } > + > + DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", > + ctx->train_data_size, > + ctx->p2c_train_data_offset, > + ctx->c2p_train_data_offset); > + ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS; > + return 0; > + > +Err_out: > + psp_v11_0_memory_training_fini(psp); > + return ret; > +} > + > +/* > + * save and restore proces > + */ > +static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) > +{ > + int ret = 0; > + uint32_t p2c_header[4]; > + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; > + uint32_t *pcache = (uint32_t*)ctx->sys_cache; > + > + if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { > + DRM_DEBUG("Memory training does not support.\n"); > + return 0; > + } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) { > + DRM_ERROR("Please check initialization failure.\n"); > + return -EINVAL; > + } > + > + if (psp_v11_0_is_sos_alive(psp)) { > + DRM_DEBUG("sos is alive, skip memory training.\n"); > + return 0; > + } > + > + amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); > + DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", > + pcache[0], pcache[1], pcache[2], pcache[3], > + p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); > + > + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { > + DRM_DEBUG("short training depend on restore.\n"); > + ops |= PSP_MEM_TRAIN_RESTORE; > + } > + > + if ((ops & PSP_MEM_TRAIN_RESTORE) && > + pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { > + DRM_DEBUG("sys_cache[0] is invalid, restore depend on save.\n"); > + ops |= PSP_MEM_TRAIN_SAVE; > + } > + > + if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE && > + !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE && > + pcache[3] == p2c_header[3])) { > + DRM_DEBUG("sys_cache is invalid or out-of-date, need save training data to sys_cache.\n"); > + ops |= PSP_MEM_TRAIN_SAVE; > + } > + > + if ((ops & PSP_MEM_TRAIN_SAVE) && > + p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { > + DRM_DEBUG("p2c_header[0] is invalid, save depend on long training.\n"); > + ops |= PSP_MEM_TRAIN_SEND_LONG_MSG; > + } > + > + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { > + ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG; > + ops |= PSP_MEM_TRAIN_SAVE; > + } > + > + DRM_DEBUG("mem training ops:%x.\n", ops); > + > + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { > + ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); > + if (ret) { > + DRM_ERROR("send long training msg failed.\n"); > + return ret; > + } > + } > + > + if (ops & PSP_MEM_TRAIN_SAVE) { > + amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false); > + } > + > + if (ops & PSP_MEM_TRAIN_RESTORE) { > + amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true); > + } > + > + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { > + ret = psp_v11_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ? > + PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN); > + if (ret) { > + DRM_ERROR("send training msg failed.\n"); > + return ret; > + } > + } > + ctx->training_cnt++; > + return ret; > +} > + > static const struct psp_funcs psp_v11_0_funcs = { > .init_microcode = psp_v11_0_init_microcode, > .bootloader_load_kdb = psp_v11_0_bootloader_load_kdb, > @@ -922,6 +1078,9 @@ static const struct psp_funcs psp_v11_0_funcs = { > .ras_trigger_error = psp_v11_0_ras_trigger_error, > .ras_cure_posion = psp_v11_0_ras_cure_posion, > .rlc_autoload_start = psp_v11_0_rlc_autoload_start, > + .mem_training_init = psp_v11_0_memory_training_init, > + .mem_training_fini = psp_v11_0_memory_training_fini, > + .mem_training = psp_v11_0_memory_training, > }; > > void psp_v11_0_set_psp_funcs(struct psp_context *psp) >