* [PATCH 1/3] habanalabs: move relevant datapath work outside cs lock @ 2021-03-21 20:11 Oded Gabbay 2021-03-21 20:11 ` [PATCH 2/3] habanalabs: support legacy and new pll indexes Oded Gabbay 2021-03-21 20:11 ` [PATCH 3/3] habanalabs: improve utilization calculation Oded Gabbay 0 siblings, 2 replies; 5+ messages in thread From: Oded Gabbay @ 2021-03-21 20:11 UTC (permalink / raw) To: linux-kernel; +Cc: Ofir Bitton From: Ofir Bitton <obitton@habana.ai> In order to shorten the time cs lock is being held, we move any possible work outside of the cs lock. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org> --- .../habanalabs/common/command_submission.c | 86 +++++++++++-------- drivers/misc/habanalabs/common/device.c | 13 ++- drivers/misc/habanalabs/common/habanalabs.h | 4 + 3 files changed, 68 insertions(+), 35 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 720588aed28b..ba6d3e317255 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -84,6 +84,38 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) return 0; } +static void sob_reset_work(struct work_struct *work) +{ + struct hl_cs_compl *hl_cs_cmpl = + container_of(work, struct hl_cs_compl, sob_reset_work); + struct hl_device *hdev = hl_cs_cmpl->hdev; + + /* + * A signal CS can get completion while the corresponding wait + * for signal CS is on its way to the PQ. The wait for signal CS + * will get stuck if the signal CS incremented the SOB to its + * max value and there are no pending (submitted) waits on this + * SOB. + * We do the following to void this situation: + * 1. The wait for signal CS must get a ref for the signal CS as + * soon as possible in cs_ioctl_signal_wait() and put it + * before being submitted to the PQ but after it incremented + * the SOB refcnt in init_signal_wait_cs(). + * 2. Signal/Wait for signal CS will decrement the SOB refcnt + * here. + * These two measures guarantee that the wait for signal CS will + * reset the SOB upon completion rather than the signal CS and + * hence the above scenario is avoided. + */ + kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + + if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) + hdev->asic_funcs->reset_sob_group(hdev, + hl_cs_cmpl->sob_group); + + kfree(hl_cs_cmpl); +} + static void hl_fence_release(struct kref *kref) { struct hl_fence *fence = @@ -109,28 +141,9 @@ static void hl_fence_release(struct kref *kref) hl_cs_cmpl->hw_sob->sob_id, hl_cs_cmpl->sob_val); - /* - * A signal CS can get completion while the corresponding wait - * for signal CS is on its way to the PQ. The wait for signal CS - * will get stuck if the signal CS incremented the SOB to its - * max value and there are no pending (submitted) waits on this - * SOB. - * We do the following to void this situation: - * 1. The wait for signal CS must get a ref for the signal CS as - * soon as possible in cs_ioctl_signal_wait() and put it - * before being submitted to the PQ but after it incremented - * the SOB refcnt in init_signal_wait_cs(). - * 2. Signal/Wait for signal CS will decrement the SOB refcnt - * here. - * These two measures guarantee that the wait for signal CS will - * reset the SOB upon completion rather than the signal CS and - * hence the above scenario is avoided. - */ - kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work); - if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) - hdev->asic_funcs->reset_sob_group(hdev, - hl_cs_cmpl->sob_group); + return; } free: @@ -670,9 +683,23 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, goto free_cs; } + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); + if (!cs->jobs_in_queue_cnt) + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); + + if (!cs->jobs_in_queue_cnt) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); + rc = -ENOMEM; + goto free_cs_cmpl; + } + cs_cmpl->hdev = hdev; cs_cmpl->type = cs->type; spin_lock_init(&cs_cmpl->lock); + INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work); cs->fence = &cs_cmpl->base_fence; spin_lock(&ctx->cs_lock); @@ -702,19 +729,6 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, goto free_fence; } - cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, - sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); - if (!cs->jobs_in_queue_cnt) - cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, - sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); - - if (!cs->jobs_in_queue_cnt) { - atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); - atomic64_inc(&cntr->out_of_mem_drop_cnt); - rc = -ENOMEM; - goto free_fence; - } - /* init hl_fence */ hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); @@ -737,6 +751,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, free_fence: spin_unlock(&ctx->cs_lock); + kfree(cs->jobs_in_queue_cnt); +free_cs_cmpl: kfree(cs_cmpl); free_cs: kfree(cs); @@ -759,6 +775,8 @@ void hl_cs_rollback_all(struct hl_device *hdev) int i; struct hl_cs *cs, *tmp; + flush_workqueue(hdev->sob_reset_wq); + /* flush all completions before iterating over the CS mirror list in * order to avoid a race with the release functions */ diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index e22df6824bc3..53bc5ccb612f 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -368,11 +368,19 @@ static int device_early_init(struct hl_device *hdev) goto free_cq_wq; } + hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0); + if (!hdev->sob_reset_wq) { + dev_err(hdev->dev, + "Failed to allocate SOB reset workqueue\n"); + rc = -ENOMEM; + goto free_eq_wq; + } + hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), GFP_KERNEL); if (!hdev->hl_chip_info) { rc = -ENOMEM; - goto free_eq_wq; + goto free_sob_reset_wq; } hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE, @@ -418,6 +426,8 @@ static int device_early_init(struct hl_device *hdev) kfree(hdev->idle_busy_ts_arr); free_chip_info: kfree(hdev->hl_chip_info); +free_sob_reset_wq: + destroy_workqueue(hdev->sob_reset_wq); free_eq_wq: destroy_workqueue(hdev->eq_wq); free_cq_wq: @@ -454,6 +464,7 @@ static void device_early_fini(struct hl_device *hdev) kfree(hdev->idle_busy_ts_arr); kfree(hdev->hl_chip_info); + destroy_workqueue(hdev->sob_reset_wq); destroy_workqueue(hdev->eq_wq); destroy_workqueue(hdev->device_reset_work.wq); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 2dcefd6485e5..65f34918faed 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -528,6 +528,7 @@ struct hl_fence { /** * struct hl_cs_compl - command submission completion object. + * @sob_reset_work: workqueue object to run SOB reset flow. * @base_fence: hl fence object. * @lock: spinlock to protect fence. * @hdev: habanalabs device structure. @@ -538,6 +539,7 @@ struct hl_fence { * @sob_group: the SOB group that is used in this collective wait CS. */ struct hl_cs_compl { + struct work_struct sob_reset_work; struct hl_fence base_fence; spinlock_t lock; struct hl_device *hdev; @@ -1905,6 +1907,7 @@ struct hl_mmu_funcs { * @cq_wq: work queues of completion queues for executing work in process * context. * @eq_wq: work queue of event queue for executing work in process context. + * @sob_reset_wq: work queue for sob reset executions. * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. * @cs_mirror_list: CS mirror list for TDR. @@ -2022,6 +2025,7 @@ struct hl_device { struct hl_user_interrupt common_user_interrupt; struct workqueue_struct **cq_wq; struct workqueue_struct *eq_wq; + struct workqueue_struct *sob_reset_wq; struct hl_ctx *kernel_ctx; struct hl_hw_queue *kernel_queues; struct list_head cs_mirror_list; -- 2.25.1 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] habanalabs: support legacy and new pll indexes 2021-03-21 20:11 [PATCH 1/3] habanalabs: move relevant datapath work outside cs lock Oded Gabbay @ 2021-03-21 20:11 ` Oded Gabbay 2021-04-15 14:17 ` Nathan Chancellor 2021-03-21 20:11 ` [PATCH 3/3] habanalabs: improve utilization calculation Oded Gabbay 1 sibling, 1 reply; 5+ messages in thread From: Oded Gabbay @ 2021-03-21 20:11 UTC (permalink / raw) To: linux-kernel; +Cc: Ohad Sharabi From: Ohad Sharabi <osharabi@habana.ai> In order to use minimum of hard coded values common to LKD and F/W a dynamic method to work with PLLs is introduced in this patch. Formerly asic specific PLL numbering is now common for all asics. To be backward compatible a bit in dev status is defined, if the bit is not set LKD will keep working with old PLL numbering. Signed-off-by: Ohad Sharabi <osharabi@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org> --- drivers/misc/habanalabs/common/firmware_if.c | 49 ++++++++++++++++++- drivers/misc/habanalabs/common/habanalabs.h | 14 ++++-- drivers/misc/habanalabs/common/sysfs.c | 24 ++++++--- drivers/misc/habanalabs/gaudi/gaudi.c | 33 +++++++++++++ drivers/misc/habanalabs/goya/goya.c | 26 ++++++++++ .../misc/habanalabs/include/common/cpucp_if.h | 41 ++++++++++++++++ .../habanalabs/include/common/hl_boot_if.h | 6 +++ .../habanalabs/include/gaudi/gaudi_fw_if.h | 14 ------ .../misc/habanalabs/include/goya/goya_fw_if.h | 11 ----- 9 files changed, 182 insertions(+), 36 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 2a58edaf984a..092691a8917d 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -539,18 +539,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) return rc; } -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, + enum pll_index *pll_index) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u8 pll_byte, pll_bit_off; + bool dynamic_pll; + + if (input_pll_index >= PLL_MAX) { + dev_err(hdev->dev, "PLL index %d is out of range\n", + input_pll_index); + return -EINVAL; + } + + dynamic_pll = prop->fw_security_status_valid && + (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN); + + if (!dynamic_pll) { + /* + * in case we are working with legacy FW (each asic has unique + * PLL numbering) extract the legacy numbering + */ + *pll_index = hdev->legacy_pll_map[input_pll_index]; + return 0; + } + + /* PLL map is a u8 array */ + pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3]; + pll_bit_off = input_pll_index & 0x7; + + if (!(pll_byte & BIT(pll_bit_off))) { + dev_err(hdev->dev, "PLL index %d is not supported\n", + input_pll_index); + return -EINVAL; + } + + *pll_index = input_pll_index; + + return 0; +} + +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, u16 *pll_freq_arr) { struct cpucp_packet pkt; + enum pll_index used_pll_idx; u64 result; int rc; + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return rc; + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_type = __cpu_to_le16(pll_index); + pkt.pll_type = __cpu_to_le16((u16)used_pll_idx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 65f34918faed..dc8126b270d1 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1946,6 +1946,8 @@ struct hl_mmu_funcs { * @aggregated_cs_counters: aggregated cs counters among all contexts * @mmu_priv: device-specific MMU data. * @mmu_func: device-related MMU functions. + * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and + * static (asic specific) PLL indexes. * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This @@ -2070,6 +2072,8 @@ struct hl_device { struct hl_mmu_priv mmu_priv; struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; + enum pll_index *legacy_pll_map; + atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; @@ -2383,7 +2387,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy); -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, + enum pll_index *pll_index); +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, u16 *pll_freq_arr); int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, @@ -2404,8 +2410,10 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, int hl_pci_init(struct hl_device *hdev); void hl_pci_fini(struct hl_device *hdev); -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); +long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, + bool curr); +void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, + u64 freq); int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value); int hl_set_temperature(struct hl_device *hdev, diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index f37634cf8b65..c7ac5dc0cda4 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -9,12 +9,18 @@ #include <linux/pci.h> -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) +long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, + bool curr) { struct cpucp_packet pkt; + u32 used_pll_idx; u64 result; int rc; + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return rc; + memset(&pkt, 0, sizeof(pkt)); if (curr) @@ -23,7 +29,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) else pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32(pll_index); + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); @@ -31,23 +37,29 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) if (rc) { dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n", - pll_index, rc); + used_pll_idx, rc); return rc; } return (long) result; } -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) +void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, + u64 freq) { struct cpucp_packet pkt; + u32 used_pll_idx; int rc; + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return; + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32(pll_index); + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); pkt.value = cpu_to_le64(freq); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -56,7 +68,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) if (rc) dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n", - pll_index, rc); + used_pll_idx, rc); } u64 hl_get_max_power(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index a65ae0dbdb92..8fa190fde462 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -105,6 +105,36 @@ #define GAUDI_PLL_MAX 10 +/* + * this enum kept here for compatibility with old FW (in which each asic has + * unique PLL numbering + */ +enum gaudi_pll_index { + GAUDI_CPU_PLL = 0, + GAUDI_PCI_PLL, + GAUDI_SRAM_PLL, + GAUDI_HBM_PLL, + GAUDI_NIC_PLL, + GAUDI_DMA_PLL, + GAUDI_MESH_PLL, + GAUDI_MME_PLL, + GAUDI_TPC_PLL, + GAUDI_IF_PLL, +}; + +static enum pll_index gaudi_pll_map[PLL_MAX] = { + [CPU_PLL] = GAUDI_CPU_PLL, + [PCI_PLL] = GAUDI_PCI_PLL, + [SRAM_PLL] = GAUDI_SRAM_PLL, + [HBM_PLL] = GAUDI_HBM_PLL, + [NIC_PLL] = GAUDI_NIC_PLL, + [DMA_PLL] = GAUDI_DMA_PLL, + [MESH_PLL] = GAUDI_MESH_PLL, + [MME_PLL] = GAUDI_MME_PLL, + [TPC_PLL] = GAUDI_TPC_PLL, + [IF_PLL] = GAUDI_IF_PLL, +}; + static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", @@ -1588,6 +1618,9 @@ static int gaudi_sw_init(struct hl_device *hdev) hdev->asic_specific = gaudi; + /* store legacy PLL map */ + hdev->legacy_pll_map = gaudi_pll_map; + /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 9e7531167c73..f3b3145b206f 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -118,6 +118,29 @@ #define IS_MME_IDLE(mme_arch_sts) \ (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) +/* + * this enum kept here for compatibility with old FW (in which each asic has + * unique PLL numbering + */ +enum goya_pll_index { + GOYA_CPU_PLL = 0, + GOYA_IC_PLL, + GOYA_MC_PLL, + GOYA_MME_PLL, + GOYA_PCI_PLL, + GOYA_EMMC_PLL, + GOYA_TPC_PLL, +}; + +static enum pll_index goya_pll_map[PLL_MAX] = { + [CPU_PLL] = GOYA_CPU_PLL, + [IC_PLL] = GOYA_IC_PLL, + [MC_PLL] = GOYA_MC_PLL, + [MME_PLL] = GOYA_MME_PLL, + [PCI_PLL] = GOYA_PCI_PLL, + [EMMC_PLL] = GOYA_EMMC_PLL, + [TPC_PLL] = GOYA_TPC_PLL, +}; static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = { "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3", @@ -853,6 +876,9 @@ static int goya_sw_init(struct hl_device *hdev) hdev->asic_specific = goya; + /* store legacy PLL map */ + hdev->legacy_pll_map = goya_pll_map; + /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0); diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index 6ba480a316ce..e745c78dd8fd 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -28,6 +28,9 @@ #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6 #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0 +#define PLL_MAP_MAX_BITS 128 +#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) + /* * info of the pkt queue pointers in the first async occurrence */ @@ -473,6 +476,42 @@ enum cpucp_pll_type_attributes { cpucp_pll_pci, }; +/* + * PLL enumeration table used for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table. + * Changing the order of entries or removing entries is not allowed. + */ +enum pll_index { + CPU_PLL = 0, + PCI_PLL = 1, + NIC_PLL = 2, + DMA_PLL = 3, + MESH_PLL = 4, + MME_PLL = 5, + TPC_PLL = 6, + IF_PLL = 7, + SRAM_PLL = 8, + NS_DCORE_PLL = 9, + MESH_DCORE_PLL = 10, + HBM_PLL = 11, + TPC_DCORE_PLL = 12, + VIDEO_DCORE_PLL = 13, + SRAM_DCORE_PLL = 14, + NIC_PHY_DCORE_PLL = 15, + MSS_DCORE_PLL = 16, + DMA_DCORE_PLL = 17, + SIF_PLL = 18, + DDR_PLL = 19, + VID_PLL = 20, + BANK_PLL = 21, + MMU_PLL = 22, + IC_PLL = 23, + MC_PLL = 24, + EMMC_PLL = 25, + PLL_MAX +}; + /* Event Queue Packets */ struct eq_generic_event { @@ -547,6 +586,7 @@ struct cpucp_security_info { * @dram_size: available DRAM size. * @card_name: card name that will be displayed in HWMON subsystem on the host * @sec_info: security information + * @pll_map: Bit map of supported PLLs for current ASIC version. */ struct cpucp_info { struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; @@ -568,6 +608,7 @@ struct cpucp_info { __u8 pad[7]; struct cpucp_security_info sec_info; __le32 reserved6; + uint8_t pll_map[PLL_MAP_LEN]; }; struct cpucp_mac_addr { diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index d17185b6aea9..1717874ff306 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -179,6 +179,11 @@ * configured and is ready for use. * Initialized in: ppboot * + * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. + * FW sends to host a bitmap of supported + * PLLs. + * Initialized in: linux + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -206,6 +211,7 @@ #define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15) #define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16) #define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17) +#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19) #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) enum cpu_boot_status { diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h index 25acd9e87e20..a9f51f9f9e92 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h @@ -20,20 +20,6 @@ #define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */ #define LINUX_FW_OFFSET 0x800000 /* 8MB in HBM */ -enum gaudi_pll_index { - CPU_PLL = 0, - PCI_PLL, - SRAM_PLL, - HBM_PLL, - NIC_PLL, - DMA_PLL, - MESH_PLL, - MME_PLL, - TPC_PLL, - IF_PLL, - PLL_MAX -}; - enum gaudi_nic_axi_error { RXB, RXE, diff --git a/drivers/misc/habanalabs/include/goya/goya_fw_if.h b/drivers/misc/habanalabs/include/goya/goya_fw_if.h index daf8d8cd14be..bc05f86c73ac 100644 --- a/drivers/misc/habanalabs/include/goya/goya_fw_if.h +++ b/drivers/misc/habanalabs/include/goya/goya_fw_if.h @@ -15,17 +15,6 @@ #define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */ #define LINUX_FW_OFFSET 0x800000 /* 8MB in DDR */ -enum goya_pll_index { - CPU_PLL = 0, - IC_PLL, - MC_PLL, - MME_PLL, - PCI_PLL, - EMMC_PLL, - TPC_PLL, - PLL_MAX -}; - #define GOYA_PLL_FREQ_LOW 50000000 /* 50 MHz */ #endif /* GOYA_FW_IF_H */ -- 2.25.1 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 2/3] habanalabs: support legacy and new pll indexes 2021-03-21 20:11 ` [PATCH 2/3] habanalabs: support legacy and new pll indexes Oded Gabbay @ 2021-04-15 14:17 ` Nathan Chancellor 2021-04-17 9:10 ` Oded Gabbay 0 siblings, 1 reply; 5+ messages in thread From: Nathan Chancellor @ 2021-04-15 14:17 UTC (permalink / raw) To: Oded Gabbay; +Cc: linux-kernel, Ohad Sharabi On Sun, Mar 21, 2021 at 10:11:29PM +0200, Oded Gabbay wrote: > From: Ohad Sharabi <osharabi@habana.ai> > > In order to use minimum of hard coded values common to LKD and F/W > a dynamic method to work with PLLs is introduced in this patch. > Formerly asic specific PLL numbering is now common for all asics. > To be backward compatible a bit in dev status is defined, if the bit is > not set LKD will keep working with old PLL numbering. > > Signed-off-by: Ohad Sharabi <osharabi@habana.ai> > Reviewed-by: Oded Gabbay <ogabbay@kernel.org> > Signed-off-by: Oded Gabbay <ogabbay@kernel.org> > --- > drivers/misc/habanalabs/common/firmware_if.c | 49 ++++++++++++++++++- > drivers/misc/habanalabs/common/habanalabs.h | 14 ++++-- > drivers/misc/habanalabs/common/sysfs.c | 24 ++++++--- > drivers/misc/habanalabs/gaudi/gaudi.c | 33 +++++++++++++ > drivers/misc/habanalabs/goya/goya.c | 26 ++++++++++ > .../misc/habanalabs/include/common/cpucp_if.h | 41 ++++++++++++++++ > .../habanalabs/include/common/hl_boot_if.h | 6 +++ > .../habanalabs/include/gaudi/gaudi_fw_if.h | 14 ------ > .../misc/habanalabs/include/goya/goya_fw_if.h | 11 ----- > 9 files changed, 182 insertions(+), 36 deletions(-) > > diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c > index 2a58edaf984a..092691a8917d 100644 > --- a/drivers/misc/habanalabs/common/firmware_if.c > +++ b/drivers/misc/habanalabs/common/firmware_if.c > @@ -539,18 +539,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) > return rc; > } > > -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, > +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, > + enum pll_index *pll_index) > +{ > + struct asic_fixed_properties *prop = &hdev->asic_prop; > + u8 pll_byte, pll_bit_off; > + bool dynamic_pll; > + > + if (input_pll_index >= PLL_MAX) { > + dev_err(hdev->dev, "PLL index %d is out of range\n", > + input_pll_index); > + return -EINVAL; > + } > + > + dynamic_pll = prop->fw_security_status_valid && > + (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN); > + > + if (!dynamic_pll) { > + /* > + * in case we are working with legacy FW (each asic has unique > + * PLL numbering) extract the legacy numbering > + */ > + *pll_index = hdev->legacy_pll_map[input_pll_index]; > + return 0; > + } > + > + /* PLL map is a u8 array */ > + pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3]; > + pll_bit_off = input_pll_index & 0x7; > + > + if (!(pll_byte & BIT(pll_bit_off))) { > + dev_err(hdev->dev, "PLL index %d is not supported\n", > + input_pll_index); > + return -EINVAL; > + } > + > + *pll_index = input_pll_index; > + > + return 0; > +} > + > +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, > u16 *pll_freq_arr) > { > struct cpucp_packet pkt; > + enum pll_index used_pll_idx; > u64 result; > int rc; > > + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); > + if (rc) > + return rc; > + > memset(&pkt, 0, sizeof(pkt)); > > pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET << > CPUCP_PKT_CTL_OPCODE_SHIFT); > - pkt.pll_type = __cpu_to_le16(pll_index); > + pkt.pll_type = __cpu_to_le16((u16)used_pll_idx); > > rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), > HL_CPUCP_INFO_TIMEOUT_USEC, &result); > diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h > index 65f34918faed..dc8126b270d1 100644 > --- a/drivers/misc/habanalabs/common/habanalabs.h > +++ b/drivers/misc/habanalabs/common/habanalabs.h > @@ -1946,6 +1946,8 @@ struct hl_mmu_funcs { > * @aggregated_cs_counters: aggregated cs counters among all contexts > * @mmu_priv: device-specific MMU data. > * @mmu_func: device-related MMU functions. > + * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and > + * static (asic specific) PLL indexes. > * @dram_used_mem: current DRAM memory consumption. > * @timeout_jiffies: device CS timeout value. > * @max_power: the max power of the device, as configured by the sysadmin. This > @@ -2070,6 +2072,8 @@ struct hl_device { > struct hl_mmu_priv mmu_priv; > struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; > > + enum pll_index *legacy_pll_map; > + > atomic64_t dram_used_mem; > u64 timeout_jiffies; > u64 max_power; > @@ -2383,7 +2387,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, > struct hl_info_pci_counters *counters); > int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, > u64 *total_energy); > -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, > +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, > + enum pll_index *pll_index); > +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, > u16 *pll_freq_arr); > int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power); > int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, > @@ -2404,8 +2410,10 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, > int hl_pci_init(struct hl_device *hdev); > void hl_pci_fini(struct hl_device *hdev); > > -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); > -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); > +long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, > + bool curr); > +void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, > + u64 freq); > int hl_get_temperature(struct hl_device *hdev, > int sensor_index, u32 attr, long *value); > int hl_set_temperature(struct hl_device *hdev, > diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c > index f37634cf8b65..c7ac5dc0cda4 100644 > --- a/drivers/misc/habanalabs/common/sysfs.c > +++ b/drivers/misc/habanalabs/common/sysfs.c > @@ -9,12 +9,18 @@ > > #include <linux/pci.h> > > -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) > +long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, > + bool curr) > { > struct cpucp_packet pkt; > + u32 used_pll_idx; > u64 result; > int rc; > > + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); > + if (rc) > + return rc; > + > memset(&pkt, 0, sizeof(pkt)); > > if (curr) > @@ -23,7 +29,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) > else > pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << > CPUCP_PKT_CTL_OPCODE_SHIFT); > - pkt.pll_index = cpu_to_le32(pll_index); > + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); > > rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), > 0, &result); > @@ -31,23 +37,29 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) > if (rc) { > dev_err(hdev->dev, > "Failed to get frequency of PLL %d, error %d\n", > - pll_index, rc); > + used_pll_idx, rc); > return rc; > } > > return (long) result; > } > > -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) > +void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, > + u64 freq) > { > struct cpucp_packet pkt; > + u32 used_pll_idx; > int rc; > > + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); > + if (rc) > + return; > + > memset(&pkt, 0, sizeof(pkt)); > > pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << > CPUCP_PKT_CTL_OPCODE_SHIFT); > - pkt.pll_index = cpu_to_le32(pll_index); > + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); > pkt.value = cpu_to_le64(freq); > > rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), > @@ -56,7 +68,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) > if (rc) > dev_err(hdev->dev, > "Failed to set frequency to PLL %d, error %d\n", > - pll_index, rc); > + used_pll_idx, rc); > } > > u64 hl_get_max_power(struct hl_device *hdev) > diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c > index a65ae0dbdb92..8fa190fde462 100644 > --- a/drivers/misc/habanalabs/gaudi/gaudi.c > +++ b/drivers/misc/habanalabs/gaudi/gaudi.c > @@ -105,6 +105,36 @@ > > #define GAUDI_PLL_MAX 10 > > +/* > + * this enum kept here for compatibility with old FW (in which each asic has > + * unique PLL numbering > + */ > +enum gaudi_pll_index { > + GAUDI_CPU_PLL = 0, > + GAUDI_PCI_PLL, > + GAUDI_SRAM_PLL, > + GAUDI_HBM_PLL, > + GAUDI_NIC_PLL, > + GAUDI_DMA_PLL, > + GAUDI_MESH_PLL, > + GAUDI_MME_PLL, > + GAUDI_TPC_PLL, > + GAUDI_IF_PLL, > +}; > + > +static enum pll_index gaudi_pll_map[PLL_MAX] = { > + [CPU_PLL] = GAUDI_CPU_PLL, > + [PCI_PLL] = GAUDI_PCI_PLL, > + [SRAM_PLL] = GAUDI_SRAM_PLL, > + [HBM_PLL] = GAUDI_HBM_PLL, > + [NIC_PLL] = GAUDI_NIC_PLL, > + [DMA_PLL] = GAUDI_DMA_PLL, > + [MESH_PLL] = GAUDI_MESH_PLL, > + [MME_PLL] = GAUDI_MME_PLL, > + [TPC_PLL] = GAUDI_TPC_PLL, > + [IF_PLL] = GAUDI_IF_PLL, > +}; > + These maps end up introducing quite a few instances of -Wenum-conversion, which is visible by default with clang but still happens when explicitly enabled with GCC: drivers/misc/habanalabs/gaudi/gaudi.c:129:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [HBM_PLL] = GAUDI_HBM_PLL, ^~~~~~~~~~~~~ drivers/misc/habanalabs/gaudi/gaudi.c:128:15: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [SRAM_PLL] = GAUDI_SRAM_PLL, ^~~~~~~~~~~~~~ drivers/misc/habanalabs/gaudi/gaudi.c:135:13: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [IF_PLL] = GAUDI_IF_PLL, ^~~~~~~~~~~~ drivers/misc/habanalabs/gaudi/gaudi.c:134:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [TPC_PLL] = GAUDI_TPC_PLL, ^~~~~~~~~~~~~ drivers/misc/habanalabs/gaudi/gaudi.c:133:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [MME_PLL] = GAUDI_MME_PLL, ^~~~~~~~~~~~~ drivers/misc/habanalabs/gaudi/gaudi.c:132:15: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [MESH_PLL] = GAUDI_MESH_PLL, ^~~~~~~~~~~~~~ drivers/misc/habanalabs/gaudi/gaudi.c:131:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [DMA_PLL] = GAUDI_DMA_PLL, ^~~~~~~~~~~~~ drivers/misc/habanalabs/gaudi/gaudi.c:130:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [NIC_PLL] = GAUDI_NIC_PLL, ^~~~~~~~~~~~~ drivers/misc/habanalabs/gaudi/gaudi.c:127:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [PCI_PLL] = GAUDI_PCI_PLL, ^~~~~~~~~~~~~ drivers/misc/habanalabs/gaudi/gaudi.c:126:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [CPU_PLL] = GAUDI_CPU_PLL, ^~~~~~~~~~~~~ 10 warnings generated. drivers/misc/habanalabs/goya/goya.c:141:15: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [EMMC_PLL] = GOYA_EMMC_PLL, ^~~~~~~~~~~~~ drivers/misc/habanalabs/goya/goya.c:138:13: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [MC_PLL] = GOYA_MC_PLL, ^~~~~~~~~~~ drivers/misc/habanalabs/goya/goya.c:137:13: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [IC_PLL] = GOYA_IC_PLL, ^~~~~~~~~~~ drivers/misc/habanalabs/goya/goya.c:142:14: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [TPC_PLL] = GOYA_TPC_PLL, ^~~~~~~~~~~~ drivers/misc/habanalabs/goya/goya.c:139:14: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [MME_PLL] = GOYA_MME_PLL, ^~~~~~~~~~~~ drivers/misc/habanalabs/goya/goya.c:140:14: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [PCI_PLL] = GOYA_PCI_PLL, ^~~~~~~~~~~~ drivers/misc/habanalabs/goya/goya.c:136:14: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] [CPU_PLL] = GOYA_CPU_PLL, ^~~~~~~~~~~~ 7 warnings generated. I have come up with two solutions but neither seems particularly great. 1. Change the type of the map then add an explicit cast. diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 8730b691ec61..5958824a68f4 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -122,7 +122,7 @@ enum gaudi_pll_index { GAUDI_IF_PLL, }; -static enum pll_index gaudi_pll_map[PLL_MAX] = { +static enum gaudi_pll_index gaudi_pll_map[PLL_MAX] = { [CPU_PLL] = GAUDI_CPU_PLL, [PCI_PLL] = GAUDI_PCI_PLL, [SRAM_PLL] = GAUDI_SRAM_PLL, @@ -1653,7 +1653,7 @@ static int gaudi_sw_init(struct hl_device *hdev) hdev->asic_specific = gaudi; /* store legacy PLL map */ - hdev->legacy_pll_map = gaudi_pll_map; + hdev->legacy_pll_map = (enum pll_index *)gaudi_pll_map; /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index e27338f4aad2..0325c9755485 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -132,7 +132,7 @@ enum goya_pll_index { GOYA_TPC_PLL, }; -static enum pll_index goya_pll_map[PLL_MAX] = { +static enum goya_pll_index goya_pll_map[PLL_MAX] = { [CPU_PLL] = GOYA_CPU_PLL, [IC_PLL] = GOYA_IC_PLL, [MC_PLL] = GOYA_MC_PLL, @@ -898,7 +898,7 @@ static int goya_sw_init(struct hl_device *hdev) hdev->asic_specific = goya; /* store legacy PLL map */ - hdev->legacy_pll_map = goya_pll_map; + hdev->legacy_pll_map = (enum pll_index *)goya_pll_map; /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 2. Just throw away the type, these values are really just used as integers. diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 44e89da30b4a..fd550a0a4bff 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2071,7 +2071,7 @@ struct hl_device { struct hl_mmu_priv mmu_priv; struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; - enum pll_index *legacy_pll_map; + int *legacy_pll_map; atomic64_t dram_used_mem; u64 timeout_jiffies; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 8730b691ec61..99319f2ef52d 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -122,7 +122,7 @@ enum gaudi_pll_index { GAUDI_IF_PLL, }; -static enum pll_index gaudi_pll_map[PLL_MAX] = { +static int gaudi_pll_map[PLL_MAX] = { [CPU_PLL] = GAUDI_CPU_PLL, [PCI_PLL] = GAUDI_PCI_PLL, [SRAM_PLL] = GAUDI_SRAM_PLL, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index e27338f4aad2..db5c948f0580 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -132,7 +132,7 @@ enum goya_pll_index { GOYA_TPC_PLL, }; -static enum pll_index goya_pll_map[PLL_MAX] = { +static int goya_pll_map[PLL_MAX] = { [CPU_PLL] = GOYA_CPU_PLL, [IC_PLL] = GOYA_IC_PLL, [MC_PLL] = GOYA_MC_PLL, Cheers, Nathan > static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { > "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", > "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", > @@ -1588,6 +1618,9 @@ static int gaudi_sw_init(struct hl_device *hdev) > > hdev->asic_specific = gaudi; > > + /* store legacy PLL map */ > + hdev->legacy_pll_map = gaudi_pll_map; > + > /* Create DMA pool for small allocations */ > hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), > &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); > diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c > index 9e7531167c73..f3b3145b206f 100644 > --- a/drivers/misc/habanalabs/goya/goya.c > +++ b/drivers/misc/habanalabs/goya/goya.c > @@ -118,6 +118,29 @@ > #define IS_MME_IDLE(mme_arch_sts) \ > (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) > > +/* > + * this enum kept here for compatibility with old FW (in which each asic has > + * unique PLL numbering > + */ > +enum goya_pll_index { > + GOYA_CPU_PLL = 0, > + GOYA_IC_PLL, > + GOYA_MC_PLL, > + GOYA_MME_PLL, > + GOYA_PCI_PLL, > + GOYA_EMMC_PLL, > + GOYA_TPC_PLL, > +}; > + > +static enum pll_index goya_pll_map[PLL_MAX] = { > + [CPU_PLL] = GOYA_CPU_PLL, > + [IC_PLL] = GOYA_IC_PLL, > + [MC_PLL] = GOYA_MC_PLL, > + [MME_PLL] = GOYA_MME_PLL, > + [PCI_PLL] = GOYA_PCI_PLL, > + [EMMC_PLL] = GOYA_EMMC_PLL, > + [TPC_PLL] = GOYA_TPC_PLL, > +}; > > static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = { > "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3", > @@ -853,6 +876,9 @@ static int goya_sw_init(struct hl_device *hdev) > > hdev->asic_specific = goya; > > + /* store legacy PLL map */ > + hdev->legacy_pll_map = goya_pll_map; > + > /* Create DMA pool for small allocations */ > hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), > &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0); > diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h > index 6ba480a316ce..e745c78dd8fd 100644 > --- a/drivers/misc/habanalabs/include/common/cpucp_if.h > +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h > @@ -28,6 +28,9 @@ > #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6 > #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0 > > +#define PLL_MAP_MAX_BITS 128 > +#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) > + > /* > * info of the pkt queue pointers in the first async occurrence > */ > @@ -473,6 +476,42 @@ enum cpucp_pll_type_attributes { > cpucp_pll_pci, > }; > > +/* > + * PLL enumeration table used for all ASICs and future SW versions. > + * For future ASIC-LKD compatibility, we can only add new enumerations. > + * at the end of the table. > + * Changing the order of entries or removing entries is not allowed. > + */ > +enum pll_index { > + CPU_PLL = 0, > + PCI_PLL = 1, > + NIC_PLL = 2, > + DMA_PLL = 3, > + MESH_PLL = 4, > + MME_PLL = 5, > + TPC_PLL = 6, > + IF_PLL = 7, > + SRAM_PLL = 8, > + NS_DCORE_PLL = 9, > + MESH_DCORE_PLL = 10, > + HBM_PLL = 11, > + TPC_DCORE_PLL = 12, > + VIDEO_DCORE_PLL = 13, > + SRAM_DCORE_PLL = 14, > + NIC_PHY_DCORE_PLL = 15, > + MSS_DCORE_PLL = 16, > + DMA_DCORE_PLL = 17, > + SIF_PLL = 18, > + DDR_PLL = 19, > + VID_PLL = 20, > + BANK_PLL = 21, > + MMU_PLL = 22, > + IC_PLL = 23, > + MC_PLL = 24, > + EMMC_PLL = 25, > + PLL_MAX > +}; > + > /* Event Queue Packets */ > > struct eq_generic_event { > @@ -547,6 +586,7 @@ struct cpucp_security_info { > * @dram_size: available DRAM size. > * @card_name: card name that will be displayed in HWMON subsystem on the host > * @sec_info: security information > + * @pll_map: Bit map of supported PLLs for current ASIC version. > */ > struct cpucp_info { > struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; > @@ -568,6 +608,7 @@ struct cpucp_info { > __u8 pad[7]; > struct cpucp_security_info sec_info; > __le32 reserved6; > + uint8_t pll_map[PLL_MAP_LEN]; > }; > > struct cpucp_mac_addr { > diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h > index d17185b6aea9..1717874ff306 100644 > --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h > +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h > @@ -179,6 +179,11 @@ > * configured and is ready for use. > * Initialized in: ppboot > * > + * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. > + * FW sends to host a bitmap of supported > + * PLLs. > + * Initialized in: linux > + * > * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. > * This is a main indication that the > * running FW populates the device status > @@ -206,6 +211,7 @@ > #define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15) > #define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16) > #define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17) > +#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19) > #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) > > enum cpu_boot_status { > diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h > index 25acd9e87e20..a9f51f9f9e92 100644 > --- a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h > +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h > @@ -20,20 +20,6 @@ > #define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */ > #define LINUX_FW_OFFSET 0x800000 /* 8MB in HBM */ > > -enum gaudi_pll_index { > - CPU_PLL = 0, > - PCI_PLL, > - SRAM_PLL, > - HBM_PLL, > - NIC_PLL, > - DMA_PLL, > - MESH_PLL, > - MME_PLL, > - TPC_PLL, > - IF_PLL, > - PLL_MAX > -}; > - > enum gaudi_nic_axi_error { > RXB, > RXE, > diff --git a/drivers/misc/habanalabs/include/goya/goya_fw_if.h b/drivers/misc/habanalabs/include/goya/goya_fw_if.h > index daf8d8cd14be..bc05f86c73ac 100644 > --- a/drivers/misc/habanalabs/include/goya/goya_fw_if.h > +++ b/drivers/misc/habanalabs/include/goya/goya_fw_if.h > @@ -15,17 +15,6 @@ > #define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */ > #define LINUX_FW_OFFSET 0x800000 /* 8MB in DDR */ > > -enum goya_pll_index { > - CPU_PLL = 0, > - IC_PLL, > - MC_PLL, > - MME_PLL, > - PCI_PLL, > - EMMC_PLL, > - TPC_PLL, > - PLL_MAX > -}; > - > #define GOYA_PLL_FREQ_LOW 50000000 /* 50 MHz */ > > #endif /* GOYA_FW_IF_H */ > -- > 2.25.1 > ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 2/3] habanalabs: support legacy and new pll indexes 2021-04-15 14:17 ` Nathan Chancellor @ 2021-04-17 9:10 ` Oded Gabbay 0 siblings, 0 replies; 5+ messages in thread From: Oded Gabbay @ 2021-04-17 9:10 UTC (permalink / raw) To: Nathan Chancellor Cc: Oded Gabbay, Linux-Kernel@Vger. Kernel. Org, Ohad Sharabi Thanks Nathan, I have already a pending patch that fixes this issue: https://lkml.org/lkml/2021/4/17/73 On Thu, Apr 15, 2021 at 5:17 PM Nathan Chancellor <nathan@kernel.org> wrote: > > On Sun, Mar 21, 2021 at 10:11:29PM +0200, Oded Gabbay wrote: > > From: Ohad Sharabi <osharabi@habana.ai> > > > > In order to use minimum of hard coded values common to LKD and F/W > > a dynamic method to work with PLLs is introduced in this patch. > > Formerly asic specific PLL numbering is now common for all asics. > > To be backward compatible a bit in dev status is defined, if the bit is > > not set LKD will keep working with old PLL numbering. > > > > Signed-off-by: Ohad Sharabi <osharabi@habana.ai> > > Reviewed-by: Oded Gabbay <ogabbay@kernel.org> > > Signed-off-by: Oded Gabbay <ogabbay@kernel.org> > > --- > > drivers/misc/habanalabs/common/firmware_if.c | 49 ++++++++++++++++++- > > drivers/misc/habanalabs/common/habanalabs.h | 14 ++++-- > > drivers/misc/habanalabs/common/sysfs.c | 24 ++++++--- > > drivers/misc/habanalabs/gaudi/gaudi.c | 33 +++++++++++++ > > drivers/misc/habanalabs/goya/goya.c | 26 ++++++++++ > > .../misc/habanalabs/include/common/cpucp_if.h | 41 ++++++++++++++++ > > .../habanalabs/include/common/hl_boot_if.h | 6 +++ > > .../habanalabs/include/gaudi/gaudi_fw_if.h | 14 ------ > > .../misc/habanalabs/include/goya/goya_fw_if.h | 11 ----- > > 9 files changed, 182 insertions(+), 36 deletions(-) > > > > diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c > > index 2a58edaf984a..092691a8917d 100644 > > --- a/drivers/misc/habanalabs/common/firmware_if.c > > +++ b/drivers/misc/habanalabs/common/firmware_if.c > > @@ -539,18 +539,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) > > return rc; > > } > > > > -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, > > +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, > > + enum pll_index *pll_index) > > +{ > > + struct asic_fixed_properties *prop = &hdev->asic_prop; > > + u8 pll_byte, pll_bit_off; > > + bool dynamic_pll; > > + > > + if (input_pll_index >= PLL_MAX) { > > + dev_err(hdev->dev, "PLL index %d is out of range\n", > > + input_pll_index); > > + return -EINVAL; > > + } > > + > > + dynamic_pll = prop->fw_security_status_valid && > > + (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN); > > + > > + if (!dynamic_pll) { > > + /* > > + * in case we are working with legacy FW (each asic has unique > > + * PLL numbering) extract the legacy numbering > > + */ > > + *pll_index = hdev->legacy_pll_map[input_pll_index]; > > + return 0; > > + } > > + > > + /* PLL map is a u8 array */ > > + pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3]; > > + pll_bit_off = input_pll_index & 0x7; > > + > > + if (!(pll_byte & BIT(pll_bit_off))) { > > + dev_err(hdev->dev, "PLL index %d is not supported\n", > > + input_pll_index); > > + return -EINVAL; > > + } > > + > > + *pll_index = input_pll_index; > > + > > + return 0; > > +} > > + > > +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, > > u16 *pll_freq_arr) > > { > > struct cpucp_packet pkt; > > + enum pll_index used_pll_idx; > > u64 result; > > int rc; > > > > + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); > > + if (rc) > > + return rc; > > + > > memset(&pkt, 0, sizeof(pkt)); > > > > pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET << > > CPUCP_PKT_CTL_OPCODE_SHIFT); > > - pkt.pll_type = __cpu_to_le16(pll_index); > > + pkt.pll_type = __cpu_to_le16((u16)used_pll_idx); > > > > rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), > > HL_CPUCP_INFO_TIMEOUT_USEC, &result); > > diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h > > index 65f34918faed..dc8126b270d1 100644 > > --- a/drivers/misc/habanalabs/common/habanalabs.h > > +++ b/drivers/misc/habanalabs/common/habanalabs.h > > @@ -1946,6 +1946,8 @@ struct hl_mmu_funcs { > > * @aggregated_cs_counters: aggregated cs counters among all contexts > > * @mmu_priv: device-specific MMU data. > > * @mmu_func: device-related MMU functions. > > + * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and > > + * static (asic specific) PLL indexes. > > * @dram_used_mem: current DRAM memory consumption. > > * @timeout_jiffies: device CS timeout value. > > * @max_power: the max power of the device, as configured by the sysadmin. This > > @@ -2070,6 +2072,8 @@ struct hl_device { > > struct hl_mmu_priv mmu_priv; > > struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; > > > > + enum pll_index *legacy_pll_map; > > + > > atomic64_t dram_used_mem; > > u64 timeout_jiffies; > > u64 max_power; > > @@ -2383,7 +2387,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, > > struct hl_info_pci_counters *counters); > > int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, > > u64 *total_energy); > > -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, > > +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, > > + enum pll_index *pll_index); > > +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, > > u16 *pll_freq_arr); > > int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power); > > int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, > > @@ -2404,8 +2410,10 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, > > int hl_pci_init(struct hl_device *hdev); > > void hl_pci_fini(struct hl_device *hdev); > > > > -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); > > -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); > > +long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, > > + bool curr); > > +void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, > > + u64 freq); > > int hl_get_temperature(struct hl_device *hdev, > > int sensor_index, u32 attr, long *value); > > int hl_set_temperature(struct hl_device *hdev, > > diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c > > index f37634cf8b65..c7ac5dc0cda4 100644 > > --- a/drivers/misc/habanalabs/common/sysfs.c > > +++ b/drivers/misc/habanalabs/common/sysfs.c > > @@ -9,12 +9,18 @@ > > > > #include <linux/pci.h> > > > > -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) > > +long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, > > + bool curr) > > { > > struct cpucp_packet pkt; > > + u32 used_pll_idx; > > u64 result; > > int rc; > > > > + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); > > + if (rc) > > + return rc; > > + > > memset(&pkt, 0, sizeof(pkt)); > > > > if (curr) > > @@ -23,7 +29,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) > > else > > pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << > > CPUCP_PKT_CTL_OPCODE_SHIFT); > > - pkt.pll_index = cpu_to_le32(pll_index); > > + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); > > > > rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), > > 0, &result); > > @@ -31,23 +37,29 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) > > if (rc) { > > dev_err(hdev->dev, > > "Failed to get frequency of PLL %d, error %d\n", > > - pll_index, rc); > > + used_pll_idx, rc); > > return rc; > > } > > > > return (long) result; > > } > > > > -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) > > +void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, > > + u64 freq) > > { > > struct cpucp_packet pkt; > > + u32 used_pll_idx; > > int rc; > > > > + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); > > + if (rc) > > + return; > > + > > memset(&pkt, 0, sizeof(pkt)); > > > > pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << > > CPUCP_PKT_CTL_OPCODE_SHIFT); > > - pkt.pll_index = cpu_to_le32(pll_index); > > + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); > > pkt.value = cpu_to_le64(freq); > > > > rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), > > @@ -56,7 +68,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) > > if (rc) > > dev_err(hdev->dev, > > "Failed to set frequency to PLL %d, error %d\n", > > - pll_index, rc); > > + used_pll_idx, rc); > > } > > > > u64 hl_get_max_power(struct hl_device *hdev) > > diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c > > index a65ae0dbdb92..8fa190fde462 100644 > > --- a/drivers/misc/habanalabs/gaudi/gaudi.c > > +++ b/drivers/misc/habanalabs/gaudi/gaudi.c > > @@ -105,6 +105,36 @@ > > > > #define GAUDI_PLL_MAX 10 > > > > +/* > > + * this enum kept here for compatibility with old FW (in which each asic has > > + * unique PLL numbering > > + */ > > +enum gaudi_pll_index { > > + GAUDI_CPU_PLL = 0, > > + GAUDI_PCI_PLL, > > + GAUDI_SRAM_PLL, > > + GAUDI_HBM_PLL, > > + GAUDI_NIC_PLL, > > + GAUDI_DMA_PLL, > > + GAUDI_MESH_PLL, > > + GAUDI_MME_PLL, > > + GAUDI_TPC_PLL, > > + GAUDI_IF_PLL, > > +}; > > + > > +static enum pll_index gaudi_pll_map[PLL_MAX] = { > > + [CPU_PLL] = GAUDI_CPU_PLL, > > + [PCI_PLL] = GAUDI_PCI_PLL, > > + [SRAM_PLL] = GAUDI_SRAM_PLL, > > + [HBM_PLL] = GAUDI_HBM_PLL, > > + [NIC_PLL] = GAUDI_NIC_PLL, > > + [DMA_PLL] = GAUDI_DMA_PLL, > > + [MESH_PLL] = GAUDI_MESH_PLL, > > + [MME_PLL] = GAUDI_MME_PLL, > > + [TPC_PLL] = GAUDI_TPC_PLL, > > + [IF_PLL] = GAUDI_IF_PLL, > > +}; > > + > > These maps end up introducing quite a few instances of > -Wenum-conversion, which is visible by default with clang but still > happens when explicitly enabled with GCC: > > drivers/misc/habanalabs/gaudi/gaudi.c:129:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [HBM_PLL] = GAUDI_HBM_PLL, > ^~~~~~~~~~~~~ > drivers/misc/habanalabs/gaudi/gaudi.c:128:15: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [SRAM_PLL] = GAUDI_SRAM_PLL, > ^~~~~~~~~~~~~~ > drivers/misc/habanalabs/gaudi/gaudi.c:135:13: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [IF_PLL] = GAUDI_IF_PLL, > ^~~~~~~~~~~~ > drivers/misc/habanalabs/gaudi/gaudi.c:134:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [TPC_PLL] = GAUDI_TPC_PLL, > ^~~~~~~~~~~~~ > drivers/misc/habanalabs/gaudi/gaudi.c:133:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [MME_PLL] = GAUDI_MME_PLL, > ^~~~~~~~~~~~~ > drivers/misc/habanalabs/gaudi/gaudi.c:132:15: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [MESH_PLL] = GAUDI_MESH_PLL, > ^~~~~~~~~~~~~~ > drivers/misc/habanalabs/gaudi/gaudi.c:131:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [DMA_PLL] = GAUDI_DMA_PLL, > ^~~~~~~~~~~~~ > drivers/misc/habanalabs/gaudi/gaudi.c:130:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [NIC_PLL] = GAUDI_NIC_PLL, > ^~~~~~~~~~~~~ > drivers/misc/habanalabs/gaudi/gaudi.c:127:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [PCI_PLL] = GAUDI_PCI_PLL, > ^~~~~~~~~~~~~ > drivers/misc/habanalabs/gaudi/gaudi.c:126:14: warning: implicit conversion from enumeration type 'enum gaudi_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [CPU_PLL] = GAUDI_CPU_PLL, > ^~~~~~~~~~~~~ > 10 warnings generated. > drivers/misc/habanalabs/goya/goya.c:141:15: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [EMMC_PLL] = GOYA_EMMC_PLL, > ^~~~~~~~~~~~~ > drivers/misc/habanalabs/goya/goya.c:138:13: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [MC_PLL] = GOYA_MC_PLL, > ^~~~~~~~~~~ > drivers/misc/habanalabs/goya/goya.c:137:13: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [IC_PLL] = GOYA_IC_PLL, > ^~~~~~~~~~~ > drivers/misc/habanalabs/goya/goya.c:142:14: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [TPC_PLL] = GOYA_TPC_PLL, > ^~~~~~~~~~~~ > drivers/misc/habanalabs/goya/goya.c:139:14: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [MME_PLL] = GOYA_MME_PLL, > ^~~~~~~~~~~~ > drivers/misc/habanalabs/goya/goya.c:140:14: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [PCI_PLL] = GOYA_PCI_PLL, > ^~~~~~~~~~~~ > drivers/misc/habanalabs/goya/goya.c:136:14: warning: implicit conversion from enumeration type 'enum goya_pll_index' to different enumeration type 'enum pll_index' [-Wenum-conversion] > [CPU_PLL] = GOYA_CPU_PLL, > ^~~~~~~~~~~~ > 7 warnings generated. > > I have come up with two solutions but neither seems particularly great. > > 1. Change the type of the map then add an explicit cast. > > diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c > index 8730b691ec61..5958824a68f4 100644 > --- a/drivers/misc/habanalabs/gaudi/gaudi.c > +++ b/drivers/misc/habanalabs/gaudi/gaudi.c > @@ -122,7 +122,7 @@ enum gaudi_pll_index { > GAUDI_IF_PLL, > }; > > -static enum pll_index gaudi_pll_map[PLL_MAX] = { > +static enum gaudi_pll_index gaudi_pll_map[PLL_MAX] = { > [CPU_PLL] = GAUDI_CPU_PLL, > [PCI_PLL] = GAUDI_PCI_PLL, > [SRAM_PLL] = GAUDI_SRAM_PLL, > @@ -1653,7 +1653,7 @@ static int gaudi_sw_init(struct hl_device *hdev) > hdev->asic_specific = gaudi; > > /* store legacy PLL map */ > - hdev->legacy_pll_map = gaudi_pll_map; > + hdev->legacy_pll_map = (enum pll_index *)gaudi_pll_map; > > /* Create DMA pool for small allocations */ > hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), > diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c > index e27338f4aad2..0325c9755485 100644 > --- a/drivers/misc/habanalabs/goya/goya.c > +++ b/drivers/misc/habanalabs/goya/goya.c > @@ -132,7 +132,7 @@ enum goya_pll_index { > GOYA_TPC_PLL, > }; > > -static enum pll_index goya_pll_map[PLL_MAX] = { > +static enum goya_pll_index goya_pll_map[PLL_MAX] = { > [CPU_PLL] = GOYA_CPU_PLL, > [IC_PLL] = GOYA_IC_PLL, > [MC_PLL] = GOYA_MC_PLL, > @@ -898,7 +898,7 @@ static int goya_sw_init(struct hl_device *hdev) > hdev->asic_specific = goya; > > /* store legacy PLL map */ > - hdev->legacy_pll_map = goya_pll_map; > + hdev->legacy_pll_map = (enum pll_index *)goya_pll_map; > > /* Create DMA pool for small allocations */ > hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), > > > 2. Just throw away the type, these values are really just used as > integers. > > diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h > index 44e89da30b4a..fd550a0a4bff 100644 > --- a/drivers/misc/habanalabs/common/habanalabs.h > +++ b/drivers/misc/habanalabs/common/habanalabs.h > @@ -2071,7 +2071,7 @@ struct hl_device { > struct hl_mmu_priv mmu_priv; > struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; > > - enum pll_index *legacy_pll_map; > + int *legacy_pll_map; > > atomic64_t dram_used_mem; > u64 timeout_jiffies; > diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c > index 8730b691ec61..99319f2ef52d 100644 > --- a/drivers/misc/habanalabs/gaudi/gaudi.c > +++ b/drivers/misc/habanalabs/gaudi/gaudi.c > @@ -122,7 +122,7 @@ enum gaudi_pll_index { > GAUDI_IF_PLL, > }; > > -static enum pll_index gaudi_pll_map[PLL_MAX] = { > +static int gaudi_pll_map[PLL_MAX] = { > [CPU_PLL] = GAUDI_CPU_PLL, > [PCI_PLL] = GAUDI_PCI_PLL, > [SRAM_PLL] = GAUDI_SRAM_PLL, > diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c > index e27338f4aad2..db5c948f0580 100644 > --- a/drivers/misc/habanalabs/goya/goya.c > +++ b/drivers/misc/habanalabs/goya/goya.c > @@ -132,7 +132,7 @@ enum goya_pll_index { > GOYA_TPC_PLL, > }; > > -static enum pll_index goya_pll_map[PLL_MAX] = { > +static int goya_pll_map[PLL_MAX] = { > [CPU_PLL] = GOYA_CPU_PLL, > [IC_PLL] = GOYA_IC_PLL, > [MC_PLL] = GOYA_MC_PLL, > > Cheers, > Nathan > > > static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { > > "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", > > "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", > > @@ -1588,6 +1618,9 @@ static int gaudi_sw_init(struct hl_device *hdev) > > > > hdev->asic_specific = gaudi; > > > > + /* store legacy PLL map */ > > + hdev->legacy_pll_map = gaudi_pll_map; > > + > > /* Create DMA pool for small allocations */ > > hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), > > &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); > > diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c > > index 9e7531167c73..f3b3145b206f 100644 > > --- a/drivers/misc/habanalabs/goya/goya.c > > +++ b/drivers/misc/habanalabs/goya/goya.c > > @@ -118,6 +118,29 @@ > > #define IS_MME_IDLE(mme_arch_sts) \ > > (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) > > > > +/* > > + * this enum kept here for compatibility with old FW (in which each asic has > > + * unique PLL numbering > > + */ > > +enum goya_pll_index { > > + GOYA_CPU_PLL = 0, > > + GOYA_IC_PLL, > > + GOYA_MC_PLL, > > + GOYA_MME_PLL, > > + GOYA_PCI_PLL, > > + GOYA_EMMC_PLL, > > + GOYA_TPC_PLL, > > +}; > > + > > +static enum pll_index goya_pll_map[PLL_MAX] = { > > + [CPU_PLL] = GOYA_CPU_PLL, > > + [IC_PLL] = GOYA_IC_PLL, > > + [MC_PLL] = GOYA_MC_PLL, > > + [MME_PLL] = GOYA_MME_PLL, > > + [PCI_PLL] = GOYA_PCI_PLL, > > + [EMMC_PLL] = GOYA_EMMC_PLL, > > + [TPC_PLL] = GOYA_TPC_PLL, > > +}; > > > > static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = { > > "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3", > > @@ -853,6 +876,9 @@ static int goya_sw_init(struct hl_device *hdev) > > > > hdev->asic_specific = goya; > > > > + /* store legacy PLL map */ > > + hdev->legacy_pll_map = goya_pll_map; > > + > > /* Create DMA pool for small allocations */ > > hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), > > &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0); > > diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h > > index 6ba480a316ce..e745c78dd8fd 100644 > > --- a/drivers/misc/habanalabs/include/common/cpucp_if.h > > +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h > > @@ -28,6 +28,9 @@ > > #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6 > > #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0 > > > > +#define PLL_MAP_MAX_BITS 128 > > +#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) > > + > > /* > > * info of the pkt queue pointers in the first async occurrence > > */ > > @@ -473,6 +476,42 @@ enum cpucp_pll_type_attributes { > > cpucp_pll_pci, > > }; > > > > +/* > > + * PLL enumeration table used for all ASICs and future SW versions. > > + * For future ASIC-LKD compatibility, we can only add new enumerations. > > + * at the end of the table. > > + * Changing the order of entries or removing entries is not allowed. > > + */ > > +enum pll_index { > > + CPU_PLL = 0, > > + PCI_PLL = 1, > > + NIC_PLL = 2, > > + DMA_PLL = 3, > > + MESH_PLL = 4, > > + MME_PLL = 5, > > + TPC_PLL = 6, > > + IF_PLL = 7, > > + SRAM_PLL = 8, > > + NS_DCORE_PLL = 9, > > + MESH_DCORE_PLL = 10, > > + HBM_PLL = 11, > > + TPC_DCORE_PLL = 12, > > + VIDEO_DCORE_PLL = 13, > > + SRAM_DCORE_PLL = 14, > > + NIC_PHY_DCORE_PLL = 15, > > + MSS_DCORE_PLL = 16, > > + DMA_DCORE_PLL = 17, > > + SIF_PLL = 18, > > + DDR_PLL = 19, > > + VID_PLL = 20, > > + BANK_PLL = 21, > > + MMU_PLL = 22, > > + IC_PLL = 23, > > + MC_PLL = 24, > > + EMMC_PLL = 25, > > + PLL_MAX > > +}; > > + > > /* Event Queue Packets */ > > > > struct eq_generic_event { > > @@ -547,6 +586,7 @@ struct cpucp_security_info { > > * @dram_size: available DRAM size. > > * @card_name: card name that will be displayed in HWMON subsystem on the host > > * @sec_info: security information > > + * @pll_map: Bit map of supported PLLs for current ASIC version. > > */ > > struct cpucp_info { > > struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; > > @@ -568,6 +608,7 @@ struct cpucp_info { > > __u8 pad[7]; > > struct cpucp_security_info sec_info; > > __le32 reserved6; > > + uint8_t pll_map[PLL_MAP_LEN]; > > }; > > > > struct cpucp_mac_addr { > > diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h > > index d17185b6aea9..1717874ff306 100644 > > --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h > > +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h > > @@ -179,6 +179,11 @@ > > * configured and is ready for use. > > * Initialized in: ppboot > > * > > + * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. > > + * FW sends to host a bitmap of supported > > + * PLLs. > > + * Initialized in: linux > > + * > > * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. > > * This is a main indication that the > > * running FW populates the device status > > @@ -206,6 +211,7 @@ > > #define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15) > > #define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16) > > #define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17) > > +#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19) > > #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) > > > > enum cpu_boot_status { > > diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h > > index 25acd9e87e20..a9f51f9f9e92 100644 > > --- a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h > > +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h > > @@ -20,20 +20,6 @@ > > #define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */ > > #define LINUX_FW_OFFSET 0x800000 /* 8MB in HBM */ > > > > -enum gaudi_pll_index { > > - CPU_PLL = 0, > > - PCI_PLL, > > - SRAM_PLL, > > - HBM_PLL, > > - NIC_PLL, > > - DMA_PLL, > > - MESH_PLL, > > - MME_PLL, > > - TPC_PLL, > > - IF_PLL, > > - PLL_MAX > > -}; > > - > > enum gaudi_nic_axi_error { > > RXB, > > RXE, > > diff --git a/drivers/misc/habanalabs/include/goya/goya_fw_if.h b/drivers/misc/habanalabs/include/goya/goya_fw_if.h > > index daf8d8cd14be..bc05f86c73ac 100644 > > --- a/drivers/misc/habanalabs/include/goya/goya_fw_if.h > > +++ b/drivers/misc/habanalabs/include/goya/goya_fw_if.h > > @@ -15,17 +15,6 @@ > > #define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */ > > #define LINUX_FW_OFFSET 0x800000 /* 8MB in DDR */ > > > > -enum goya_pll_index { > > - CPU_PLL = 0, > > - IC_PLL, > > - MC_PLL, > > - MME_PLL, > > - PCI_PLL, > > - EMMC_PLL, > > - TPC_PLL, > > - PLL_MAX > > -}; > > - > > #define GOYA_PLL_FREQ_LOW 50000000 /* 50 MHz */ > > > > #endif /* GOYA_FW_IF_H */ > > -- > > 2.25.1 > > ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 3/3] habanalabs: improve utilization calculation 2021-03-21 20:11 [PATCH 1/3] habanalabs: move relevant datapath work outside cs lock Oded Gabbay 2021-03-21 20:11 ` [PATCH 2/3] habanalabs: support legacy and new pll indexes Oded Gabbay @ 2021-03-21 20:11 ` Oded Gabbay 1 sibling, 0 replies; 5+ messages in thread From: Oded Gabbay @ 2021-03-21 20:11 UTC (permalink / raw) To: linux-kernel; +Cc: Koby Elbaz From: Koby Elbaz <kelbaz@habana.ai> The new approach is based on the notion that the relative current power consumption is in relation of proportionality to device's true utilization. Utilization info ranges between [0,100]% Currently, dc_power values are hard-coded. Signed-off-by: Koby Elbaz <kelbaz@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org> --- .../habanalabs/common/command_submission.c | 18 --- drivers/misc/habanalabs/common/device.c | 121 ++---------------- drivers/misc/habanalabs/common/habanalabs.h | 25 +--- .../misc/habanalabs/common/habanalabs_ioctl.c | 11 +- drivers/misc/habanalabs/common/hw_queue.c | 8 -- drivers/misc/habanalabs/gaudi/gaudi.c | 20 ++- drivers/misc/habanalabs/gaudi/gaudiP.h | 3 + drivers/misc/habanalabs/goya/goya.c | 1 + drivers/misc/habanalabs/goya/goyaP.h | 2 + 9 files changed, 40 insertions(+), 169 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index ba6d3e317255..21a60b7c2091 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -505,24 +505,6 @@ static void cs_do_release(struct kref *ref) goto out; } - hdev->asic_funcs->hw_queues_lock(hdev); - - hdev->cs_active_cnt--; - if (!hdev->cs_active_cnt) { - struct hl_device_idle_busy_ts *ts; - - ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; - ts->busy_to_idle_ts = ktime_get(); - - if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) - hdev->idle_busy_ts_idx = 0; - } else if (hdev->cs_active_cnt < 0) { - dev_crit(hdev->dev, "CS active cnt %d is negative\n", - hdev->cs_active_cnt); - } - - hdev->asic_funcs->hw_queues_unlock(hdev); - /* Need to update CI for all queue jobs that does not get completion */ hl_hw_queue_update_ci(cs); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 53bc5ccb612f..49f0ceac4b81 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -383,17 +383,9 @@ static int device_early_init(struct hl_device *hdev) goto free_sob_reset_wq; } - hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE, - sizeof(struct hl_device_idle_busy_ts), - (GFP_KERNEL | __GFP_ZERO)); - if (!hdev->idle_busy_ts_arr) { - rc = -ENOMEM; - goto free_chip_info; - } - rc = hl_mmu_if_set_funcs(hdev); if (rc) - goto free_idle_busy_ts_arr; + goto free_chip_info; hl_cb_mgr_init(&hdev->kernel_cb_mgr); @@ -422,8 +414,6 @@ static int device_early_init(struct hl_device *hdev) free_cb_mgr: hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); -free_idle_busy_ts_arr: - kfree(hdev->idle_busy_ts_arr); free_chip_info: kfree(hdev->hl_chip_info); free_sob_reset_wq: @@ -461,7 +451,6 @@ static void device_early_fini(struct hl_device *hdev) hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); - kfree(hdev->idle_busy_ts_arr); kfree(hdev->hl_chip_info); destroy_workqueue(hdev->sob_reset_wq); @@ -582,100 +571,24 @@ static void device_late_fini(struct hl_device *hdev) hdev->late_init_done = false; } -uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms) +int hl_device_utilization(struct hl_device *hdev, u32 *utilization) { - struct hl_device_idle_busy_ts *ts; - ktime_t zero_ktime, curr = ktime_get(); - u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx; - s64 period_us, last_start_us, last_end_us, last_busy_time_us, - total_busy_time_us = 0, total_busy_time_ms; - - zero_ktime = ktime_set(0, 0); - period_us = period_ms * USEC_PER_MSEC; - ts = &hdev->idle_busy_ts_arr[last_index]; - - /* check case that device is currently in idle */ - if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) && - !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) { - - last_index--; - /* Handle case idle_busy_ts_idx was 0 */ - if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) - last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; - - ts = &hdev->idle_busy_ts_arr[last_index]; - } - - while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) { - /* Check if we are in last sample case. i.e. if the sample - * begun before the sampling period. This could be a real - * sample or 0 so need to handle both cases - */ - last_start_us = ktime_to_us( - ktime_sub(curr, ts->idle_to_busy_ts)); - - if (last_start_us > period_us) { - - /* First check two cases: - * 1. If the device is currently busy - * 2. If the device was idle during the whole sampling - * period - */ - - if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) { - /* Check if the device is currently busy */ - if (ktime_compare(ts->idle_to_busy_ts, - zero_ktime)) - return 100; - - /* We either didn't have any activity or we - * reached an entry which is 0. Either way, - * exit and return what was accumulated so far - */ - break; - } - - /* If sample has finished, check it is relevant */ - last_end_us = ktime_to_us( - ktime_sub(curr, ts->busy_to_idle_ts)); - - if (last_end_us > period_us) - break; - - /* It is relevant so add it but with adjustment */ - last_busy_time_us = ktime_to_us( - ktime_sub(ts->busy_to_idle_ts, - ts->idle_to_busy_ts)); - total_busy_time_us += last_busy_time_us - - (last_start_us - period_us); - break; - } - - /* Check if the sample is finished or still open */ - if (ktime_compare(ts->busy_to_idle_ts, zero_ktime)) - last_busy_time_us = ktime_to_us( - ktime_sub(ts->busy_to_idle_ts, - ts->idle_to_busy_ts)); - else - last_busy_time_us = ktime_to_us( - ktime_sub(curr, ts->idle_to_busy_ts)); - - total_busy_time_us += last_busy_time_us; + u64 max_power, curr_power, dc_power; + int rc; - last_index--; - /* Handle case idle_busy_ts_idx was 0 */ - if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) - last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; + max_power = hdev->asic_prop.max_power_default; + dc_power = hdev->asic_prop.dc_power_default; + rc = hl_fw_cpucp_power_get(hdev, &curr_power); - ts = &hdev->idle_busy_ts_arr[last_index]; + if (rc) + return rc; - overlap_cnt++; - } + curr_power = clamp(curr_power, dc_power, max_power); - total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us, - USEC_PER_MSEC); + *utilization = (u32)(((curr_power - dc_power) * 100) / + (max_power - dc_power)); - return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms); + return 0; } /* @@ -1110,14 +1023,6 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_reset(hdev, &hdev->completion_queue[i]); - hdev->idle_busy_ts_idx = 0; - hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0); - hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0); - - if (hdev->cs_active_cnt) - dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n", - hdev->cs_active_cnt); - mutex_lock(&hdev->fpriv_list_lock); /* Make sure the context switch phase will run again */ diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index dc8126b270d1..ddb65639f518 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -61,8 +61,6 @@ #define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */ -#define HL_IDLE_BUSY_TS_ARR_SIZE 4096 - #define HL_COMMON_USER_INTERRUPT_ID 0xFFF /* Memory */ @@ -391,6 +389,7 @@ struct hl_mmu_properties { * @dram_size: DRAM total size. * @dram_pci_bar_size: size of PCI bar towards DRAM. * @max_power_default: max power of the device after reset + * @dc_power_default: power consumed by the device in mode idle. * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page * fault. * @pcie_dbi_base_address: Base address of the PCIE_DBI block. @@ -463,6 +462,7 @@ struct asic_fixed_properties { u64 dram_size; u64 dram_pci_bar_size; u64 max_power_default; + u64 dc_power_default; u64 dram_size_for_default_page_mapping; u64 pcie_dbi_base_address; u64 pcie_aux_dbi_reg_addr; @@ -1760,16 +1760,6 @@ struct hl_device_reset_work { struct hl_device *hdev; }; -/** - * struct hl_device_idle_busy_ts - used for calculating device utilization rate. - * @idle_to_busy_ts: timestamp where device changed from idle to busy. - * @busy_to_idle_ts: timestamp where device changed from busy to idle. - */ -struct hl_device_idle_busy_ts { - ktime_t idle_to_busy_ts; - ktime_t busy_to_idle_ts; -}; - /** * struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop * information. @@ -1941,8 +1931,6 @@ struct hl_mmu_funcs { * when a user opens the device * @fpriv_list_lock: protects the fpriv_list * @compute_ctx: current compute context executing. - * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy - * and vice-versa * @aggregated_cs_counters: aggregated cs counters among all contexts * @mmu_priv: device-specific MMU data. * @mmu_func: device-related MMU functions. @@ -1960,13 +1948,10 @@ struct hl_mmu_funcs { * @curr_pll_profile: current PLL profile. * @card_type: Various ASICs have several card types. This indicates the card * type of the current device. - * @cs_active_cnt: number of active command submissions on this device (active - * means already in H/W queues) * @major: habanalabs kernel driver major. * @high_pll: high PLL profile frequency. * @soft_reset_cnt: number of soft reset since the driver was loaded. * @hard_reset_cnt: number of hard reset since the driver was loaded. - * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr * @clk_throttling_reason: bitmask represents the current clk throttling reasons * @id: device minor. * @id_control: minor of the control device @@ -2065,8 +2050,6 @@ struct hl_device { struct hl_ctx *compute_ctx; - struct hl_device_idle_busy_ts *idle_busy_ts_arr; - struct hl_cs_counters_atomic aggregated_cs_counters; struct hl_mmu_priv mmu_priv; @@ -2081,12 +2064,10 @@ struct hl_device { atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; enum cpucp_card_types card_type; - int cs_active_cnt; u32 major; u32 high_pll; u32 soft_reset_cnt; u32 hard_reset_cnt; - u32 idle_busy_ts_idx; u32 clk_throttling_reason; u16 id; u16 id_control; @@ -2275,7 +2256,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags); void hl_hpriv_get(struct hl_fpriv *hpriv); int hl_hpriv_put(struct hl_fpriv *hpriv); int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); -uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms); +int hl_device_utilization(struct hl_device *hdev, u32 *utilization); int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sensors_arr); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 9fc429b82a92..33841c272eb6 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -226,19 +226,14 @@ static int device_utilization(struct hl_device *hdev, struct hl_info_args *args) struct hl_info_device_utilization device_util = {0}; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; if ((!max_size) || (!out)) return -EINVAL; - if ((args->period_ms < 100) || (args->period_ms > 1000) || - (args->period_ms % 100)) { - dev_err(hdev->dev, - "period %u must be between 100 - 1000 and must be divisible by 100\n", - args->period_ms); + rc = hl_device_utilization(hdev, &device_util.utilization); + if (rc) return -EINVAL; - } - - device_util.utilization = hl_device_utilization(hdev, args->period_ms); return copy_to_user(out, &device_util, min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0; diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 0f335182267f..4acc25dccad3 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -635,14 +635,6 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) spin_unlock(&hdev->cs_mirror_lock); - if (!hdev->cs_active_cnt++) { - struct hl_device_idle_busy_ts *ts; - - ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx]; - ts->busy_to_idle_ts = ktime_set(0, 0); - ts->idle_to_busy_ts = ktime_get(); - } - list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) switch (job->queue_type) { case QUEUE_TYPE_EXT: diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 8fa190fde462..f273b792bc5d 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -426,6 +426,19 @@ get_collective_mode(struct hl_device *hdev, u32 queue_id) return HL_COLLECTIVE_NOT_SUPPORTED; } +static inline void set_default_power_values(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (hdev->card_type == cpucp_card_type_pmc) { + prop->max_power_default = MAX_POWER_DEFAULT_PMC; + prop->dc_power_default = DC_POWER_DEFAULT_PMC; + } else { + prop->max_power_default = MAX_POWER_DEFAULT_PCI; + prop->dc_power_default = DC_POWER_DEFAULT_PCI; + } +} + static int gaudi_get_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -537,7 +550,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->num_of_events = GAUDI_EVENT_SIZE; prop->tpc_enabled_mask = TPC_ENABLED_MASK; - prop->max_power_default = MAX_POWER_DEFAULT_PCI; + set_default_power_values(hdev); prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; @@ -7796,10 +7809,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); - if (hdev->card_type == cpucp_card_type_pci) - prop->max_power_default = MAX_POWER_DEFAULT_PCI; - else if (hdev->card_type == cpucp_card_type_pmc) - prop->max_power_default = MAX_POWER_DEFAULT_PMC; + set_default_power_values(hdev); hdev->max_power = prop->max_power_default; diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 50bb4ad570fd..5929be81ec23 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -47,6 +47,9 @@ #define MAX_POWER_DEFAULT_PCI 200000 /* 200W */ #define MAX_POWER_DEFAULT_PMC 350000 /* 350W */ +#define DC_POWER_DEFAULT_PCI 60000 /* 60W */ +#define DC_POWER_DEFAULT_PMC 60000 /* 60W */ + #define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */ #define TPC_ENABLED_MASK 0xFF diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index f3b3145b206f..09b423455439 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -469,6 +469,7 @@ int goya_get_fixed_properties(struct hl_device *hdev) prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT; prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE; prop->max_power_default = MAX_POWER_DEFAULT; + prop->dc_power_default = DC_POWER_DEFAULT; prop->tpc_enabled_mask = TPC_ENABLED_MASK; prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 23fe099ed218..ef8c6c8b5c8d 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -49,6 +49,8 @@ #define MAX_POWER_DEFAULT 200000 /* 200W */ +#define DC_POWER_DEFAULT 20000 /* 20W */ + #define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */ #define GOYA_DEFAULT_CARD_NAME "HL1000" -- 2.25.1 ^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2021-04-17 9:11 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2021-03-21 20:11 [PATCH 1/3] habanalabs: move relevant datapath work outside cs lock Oded Gabbay 2021-03-21 20:11 ` [PATCH 2/3] habanalabs: support legacy and new pll indexes Oded Gabbay 2021-04-15 14:17 ` Nathan Chancellor 2021-04-17 9:10 ` Oded Gabbay 2021-03-21 20:11 ` [PATCH 3/3] habanalabs: improve utilization calculation Oded Gabbay
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).