linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] habanalabs: Update the device idle check
@ 2019-07-01 13:59 Tomer Tayar
  2019-07-01 13:59 ` [PATCH v2 3/3] habanalabs: Add busy engines bitmask to HW idle IOCTL Tomer Tayar
  2019-07-01 13:59 ` [PATCH v2 2/3] habanalabs: Add debugfs node for engines status Tomer Tayar
  0 siblings, 2 replies; 4+ messages in thread
From: Tomer Tayar @ 2019-07-01 13:59 UTC (permalink / raw)
  To: oded.gabbay; +Cc: linux-kernel

The patch updates the device idle check:
- Add reading the DMA core status register, because it is possible that
  a QMAN has finished its work but the DMA itself is still running.
- Remove the MME shadow status check, as the MME ARCH status register
  includes the status of all MME shadows.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
---
 drivers/misc/habanalabs/goya/goya.c           |  10 +-
 .../include/goya/asic_reg/dma_ch_0_masks.h    | 418 ++++++++++++++++++
 .../include/goya/asic_reg/goya_regs.h         |   1 +
 3 files changed, 425 insertions(+), 4 deletions(-)
 create mode 100644 drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index ce127a6f606f..8653aa914724 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4893,17 +4893,22 @@ int goya_armcp_info_get(struct hl_device *hdev)
 
 static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
 {
-	u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
+	u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg,
+		dma_core_sts;
 	int i;
 
 	offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
 
 	for (i = 0 ; i < DMA_MAX_NUM ; i++) {
 		dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset;
+		dma_core_sts = mmDMA_CH_0_STS0 + i * offset;
 
 		if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
 				DMA_QM_IDLE_MASK)
 			return HL_ENG_BUSY(buf, size, "DMA%d_QM", i);
+
+		if (RREG32(dma_core_sts) & DMA_CH_0_STS0_DMA_BUSY_MASK)
+			return HL_ENG_BUSY(buf, size, "DMA%d_CORE", i);
 	}
 
 	offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
@@ -4938,9 +4943,6 @@ static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
 			MME_ARCH_IDLE_MASK)
 		return HL_ENG_BUSY(buf, size, "MME_ARCH");
 
-	if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
-		return HL_ENG_BUSY(buf, size, "MME");
-
 	return true;
 }
 
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h
new file mode 100644
index 000000000000..028143408401
--- /dev/null
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+/************************************
+ ** This is an auto-generated file **
+ **       DO NOT EDIT BELOW        **
+ ************************************/
+
+#ifndef ASIC_REG_DMA_CH_0_MASKS_H_
+#define ASIC_REG_DMA_CH_0_MASKS_H_
+
+/*
+ *****************************************
+ *   DMA_CH_0 (Prototype: DMA_CH)
+ *****************************************
+ */
+
+/* DMA_CH_0_CFG0 */
+#define DMA_CH_0_CFG0_RD_MAX_OUTSTAND_SHIFT                          0
+#define DMA_CH_0_CFG0_RD_MAX_OUTSTAND_MASK                           0x3FF
+#define DMA_CH_0_CFG0_WR_MAX_OUTSTAND_SHIFT                          16
+#define DMA_CH_0_CFG0_WR_MAX_OUTSTAND_MASK                           0xFFF0000
+
+/* DMA_CH_0_CFG1 */
+#define DMA_CH_0_CFG1_RD_BUF_MAX_SIZE_SHIFT                          0
+#define DMA_CH_0_CFG1_RD_BUF_MAX_SIZE_MASK                           0x3FF
+
+/* DMA_CH_0_ERRMSG_ADDR_LO */
+#define DMA_CH_0_ERRMSG_ADDR_LO_VAL_SHIFT                            0
+#define DMA_CH_0_ERRMSG_ADDR_LO_VAL_MASK                             0xFFFFFFFF
+
+/* DMA_CH_0_ERRMSG_ADDR_HI */
+#define DMA_CH_0_ERRMSG_ADDR_HI_VAL_SHIFT                            0
+#define DMA_CH_0_ERRMSG_ADDR_HI_VAL_MASK                             0xFFFFFFFF
+
+/* DMA_CH_0_ERRMSG_WDATA */
+#define DMA_CH_0_ERRMSG_WDATA_VAL_SHIFT                              0
+#define DMA_CH_0_ERRMSG_WDATA_VAL_MASK                               0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_ADDR_LO */
+#define DMA_CH_0_RD_COMP_ADDR_LO_VAL_SHIFT                           0
+#define DMA_CH_0_RD_COMP_ADDR_LO_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_ADDR_HI */
+#define DMA_CH_0_RD_COMP_ADDR_HI_VAL_SHIFT                           0
+#define DMA_CH_0_RD_COMP_ADDR_HI_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_WDATA */
+#define DMA_CH_0_RD_COMP_WDATA_VAL_SHIFT                             0
+#define DMA_CH_0_RD_COMP_WDATA_VAL_MASK                              0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_ADDR_LO */
+#define DMA_CH_0_WR_COMP_ADDR_LO_VAL_SHIFT                           0
+#define DMA_CH_0_WR_COMP_ADDR_LO_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_ADDR_HI */
+#define DMA_CH_0_WR_COMP_ADDR_HI_VAL_SHIFT                           0
+#define DMA_CH_0_WR_COMP_ADDR_HI_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_WDATA */
+#define DMA_CH_0_WR_COMP_WDATA_VAL_SHIFT                             0
+#define DMA_CH_0_WR_COMP_WDATA_VAL_MASK                              0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_SRC_ADDR_LO */
+#define DMA_CH_0_LDMA_SRC_ADDR_LO_VAL_SHIFT                          0
+#define DMA_CH_0_LDMA_SRC_ADDR_LO_VAL_MASK                           0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_SRC_ADDR_HI */
+#define DMA_CH_0_LDMA_SRC_ADDR_HI_VAL_SHIFT                          0
+#define DMA_CH_0_LDMA_SRC_ADDR_HI_VAL_MASK                           0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_DST_ADDR_LO */
+#define DMA_CH_0_LDMA_DST_ADDR_LO_VAL_SHIFT                          0
+#define DMA_CH_0_LDMA_DST_ADDR_LO_VAL_MASK                           0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_DST_ADDR_HI */
+#define DMA_CH_0_LDMA_DST_ADDR_HI_VAL_SHIFT                          0
+#define DMA_CH_0_LDMA_DST_ADDR_HI_VAL_MASK                           0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_TSIZE */
+#define DMA_CH_0_LDMA_TSIZE_VAL_SHIFT                                0
+#define DMA_CH_0_LDMA_TSIZE_VAL_MASK                                 0xFFFFFFFF
+
+/* DMA_CH_0_COMIT_TRANSFER */
+#define DMA_CH_0_COMIT_TRANSFER_PCI_UPS_WKORDR_SHIFT                 0
+#define DMA_CH_0_COMIT_TRANSFER_PCI_UPS_WKORDR_MASK                  0x1
+#define DMA_CH_0_COMIT_TRANSFER_RD_COMP_EN_SHIFT                     1
+#define DMA_CH_0_COMIT_TRANSFER_RD_COMP_EN_MASK                      0x2
+#define DMA_CH_0_COMIT_TRANSFER_WR_COMP_EN_SHIFT                     2
+#define DMA_CH_0_COMIT_TRANSFER_WR_COMP_EN_MASK                      0x4
+#define DMA_CH_0_COMIT_TRANSFER_NOSNOOP_SHIFT                        3
+#define DMA_CH_0_COMIT_TRANSFER_NOSNOOP_MASK                         0x8
+#define DMA_CH_0_COMIT_TRANSFER_SRC_ADDR_INC_DIS_SHIFT               4
+#define DMA_CH_0_COMIT_TRANSFER_SRC_ADDR_INC_DIS_MASK                0x10
+#define DMA_CH_0_COMIT_TRANSFER_DST_ADDR_INC_DIS_SHIFT               5
+#define DMA_CH_0_COMIT_TRANSFER_DST_ADDR_INC_DIS_MASK                0x20
+#define DMA_CH_0_COMIT_TRANSFER_MEM_SET_SHIFT                        6
+#define DMA_CH_0_COMIT_TRANSFER_MEM_SET_MASK                         0x40
+#define DMA_CH_0_COMIT_TRANSFER_MOD_TENSOR_SHIFT                     15
+#define DMA_CH_0_COMIT_TRANSFER_MOD_TENSOR_MASK                      0x8000
+#define DMA_CH_0_COMIT_TRANSFER_CTL_SHIFT                            16
+#define DMA_CH_0_COMIT_TRANSFER_CTL_MASK                             0xFFFF0000
+
+/* DMA_CH_0_STS0 */
+#define DMA_CH_0_STS0_DMA_BUSY_SHIFT                                 0
+#define DMA_CH_0_STS0_DMA_BUSY_MASK                                  0x1
+#define DMA_CH_0_STS0_RD_STS_CTX_FULL_SHIFT                          1
+#define DMA_CH_0_STS0_RD_STS_CTX_FULL_MASK                           0x2
+#define DMA_CH_0_STS0_WR_STS_CTX_FULL_SHIFT                          2
+#define DMA_CH_0_STS0_WR_STS_CTX_FULL_MASK                           0x4
+
+/* DMA_CH_0_STS1 */
+#define DMA_CH_0_STS1_RD_STS_CTX_CNT_SHIFT                           0
+#define DMA_CH_0_STS1_RD_STS_CTX_CNT_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_STS2 */
+#define DMA_CH_0_STS2_WR_STS_CTX_CNT_SHIFT                           0
+#define DMA_CH_0_STS2_WR_STS_CTX_CNT_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_STS3 */
+#define DMA_CH_0_STS3_RD_STS_TRN_CNT_SHIFT                           0
+#define DMA_CH_0_STS3_RD_STS_TRN_CNT_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_STS4 */
+#define DMA_CH_0_STS4_WR_STS_TRN_CNT_SHIFT                           0
+#define DMA_CH_0_STS4_WR_STS_TRN_CNT_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_SRC_ADDR_LO_STS */
+#define DMA_CH_0_SRC_ADDR_LO_STS_VAL_SHIFT                           0
+#define DMA_CH_0_SRC_ADDR_LO_STS_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_SRC_ADDR_HI_STS */
+#define DMA_CH_0_SRC_ADDR_HI_STS_VAL_SHIFT                           0
+#define DMA_CH_0_SRC_ADDR_HI_STS_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_SRC_TSIZE_STS */
+#define DMA_CH_0_SRC_TSIZE_STS_VAL_SHIFT                             0
+#define DMA_CH_0_SRC_TSIZE_STS_VAL_MASK                              0xFFFFFFFF
+
+/* DMA_CH_0_DST_ADDR_LO_STS */
+#define DMA_CH_0_DST_ADDR_LO_STS_VAL_SHIFT                           0
+#define DMA_CH_0_DST_ADDR_LO_STS_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_DST_ADDR_HI_STS */
+#define DMA_CH_0_DST_ADDR_HI_STS_VAL_SHIFT                           0
+#define DMA_CH_0_DST_ADDR_HI_STS_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_DST_TSIZE_STS */
+#define DMA_CH_0_DST_TSIZE_STS_VAL_SHIFT                             0
+#define DMA_CH_0_DST_TSIZE_STS_VAL_MASK                              0xFFFFFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_EN */
+#define DMA_CH_0_RD_RATE_LIM_EN_VAL_SHIFT                            0
+#define DMA_CH_0_RD_RATE_LIM_EN_VAL_MASK                             0x1
+
+/* DMA_CH_0_RD_RATE_LIM_RST_TOKEN */
+#define DMA_CH_0_RD_RATE_LIM_RST_TOKEN_VAL_SHIFT                     0
+#define DMA_CH_0_RD_RATE_LIM_RST_TOKEN_VAL_MASK                      0xFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_SAT */
+#define DMA_CH_0_RD_RATE_LIM_SAT_VAL_SHIFT                           0
+#define DMA_CH_0_RD_RATE_LIM_SAT_VAL_MASK                            0xFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_TOUT */
+#define DMA_CH_0_RD_RATE_LIM_TOUT_VAL_SHIFT                          0
+#define DMA_CH_0_RD_RATE_LIM_TOUT_VAL_MASK                           0x7FFFFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_EN */
+#define DMA_CH_0_WR_RATE_LIM_EN_VAL_SHIFT                            0
+#define DMA_CH_0_WR_RATE_LIM_EN_VAL_MASK                             0x1
+
+/* DMA_CH_0_WR_RATE_LIM_RST_TOKEN */
+#define DMA_CH_0_WR_RATE_LIM_RST_TOKEN_VAL_SHIFT                     0
+#define DMA_CH_0_WR_RATE_LIM_RST_TOKEN_VAL_MASK                      0xFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_SAT */
+#define DMA_CH_0_WR_RATE_LIM_SAT_VAL_SHIFT                           0
+#define DMA_CH_0_WR_RATE_LIM_SAT_VAL_MASK                            0xFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_TOUT */
+#define DMA_CH_0_WR_RATE_LIM_TOUT_VAL_SHIFT                          0
+#define DMA_CH_0_WR_RATE_LIM_TOUT_VAL_MASK                           0x7FFFFFFF
+
+/* DMA_CH_0_CFG2 */
+#define DMA_CH_0_CFG2_FORCE_WORD_SHIFT                               0
+#define DMA_CH_0_CFG2_FORCE_WORD_MASK                                0x1
+
+/* DMA_CH_0_TDMA_CTL */
+#define DMA_CH_0_TDMA_CTL_DTYPE_SHIFT                                0
+#define DMA_CH_0_TDMA_CTL_DTYPE_MASK                                 0x7
+
+/* DMA_CH_0_TDMA_SRC_BASE_ADDR_LO */
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_LO_VAL_SHIFT                     0
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_LO_VAL_MASK                      0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_BASE_ADDR_HI */
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_HI_VAL_SHIFT                     0
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_HI_VAL_MASK                      0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_0 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_0_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_0_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_0 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_0_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_0_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_0 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_0_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_0_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_0 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_0_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_SRC_STRIDE_0_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_1 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_1_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_1_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_1 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_1_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_1_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_1 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_1_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_1_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_1 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_1_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_SRC_STRIDE_1_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_2 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_2_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_2_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_2 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_2_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_2_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_2 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_2_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_2_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_2 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_2_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_SRC_STRIDE_2_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_3 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_3_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_3_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_3 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_3_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_3_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_3 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_3_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_3_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_3 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_3_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_SRC_STRIDE_3_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_4 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_4_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_4_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_4 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_4_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_4_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_4 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_4_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_4_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_4 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_4_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_SRC_STRIDE_4_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_BASE_ADDR_LO */
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_LO_VAL_SHIFT                     0
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_LO_VAL_MASK                      0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_BASE_ADDR_HI */
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_HI_VAL_SHIFT                     0
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_HI_VAL_MASK                      0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_0 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_0_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_0_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_0 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_0_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_0_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_0 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_0_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_0_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_0 */
+#define DMA_CH_0_TDMA_DST_STRIDE_0_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_DST_STRIDE_0_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_1 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_1_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_1_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_1 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_1_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_1_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_1 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_1_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_1_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_1 */
+#define DMA_CH_0_TDMA_DST_STRIDE_1_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_DST_STRIDE_1_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_2 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_2_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_2_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_2 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_2_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_2_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_2 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_2_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_2_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_2 */
+#define DMA_CH_0_TDMA_DST_STRIDE_2_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_DST_STRIDE_2_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_3 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_3_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_3_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_3 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_3_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_3_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_3 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_3_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_3_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_3 */
+#define DMA_CH_0_TDMA_DST_STRIDE_3_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_DST_STRIDE_3_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_4 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_4_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_4_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_4 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_4_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_4_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_4 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_4_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_4_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_4 */
+#define DMA_CH_0_TDMA_DST_STRIDE_4_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_DST_STRIDE_4_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_MEM_INIT_BUSY */
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_DATA_SHIFT                        0
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_DATA_MASK                         0xFF
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_MD_SHIFT                          8
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_MD_MASK                           0x100
+
+#endif /* ASIC_REG_DMA_CH_0_MASKS_H_ */
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
index 506e71e201e1..19b0f0ef1d0b 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
@@ -88,6 +88,7 @@
 #include "psoc_global_conf_masks.h"
 #include "dma_macro_masks.h"
 #include "dma_qm_0_masks.h"
+#include "dma_ch_0_masks.h"
 #include "tpc0_qm_masks.h"
 #include "tpc0_cmdq_masks.h"
 #include "mme_qm_masks.h"
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 2/3] habanalabs: Add debugfs node for engines status
  2019-07-01 13:59 [PATCH v2 1/3] habanalabs: Update the device idle check Tomer Tayar
  2019-07-01 13:59 ` [PATCH v2 3/3] habanalabs: Add busy engines bitmask to HW idle IOCTL Tomer Tayar
@ 2019-07-01 13:59 ` Tomer Tayar
  1 sibling, 0 replies; 4+ messages in thread
From: Tomer Tayar @ 2019-07-01 13:59 UTC (permalink / raw)
  To: oded.gabbay; +Cc: linux-kernel

Command submissions sent to the device are composed of command buffers
which are targeted to different device engines, like DMA and compute
entities. When a command submission gets stuck, knowing in which engine
the stuck is, is crucial for debugging.
This patch adds a debugfs node that exports this information, by
displaying the engines' various registers that assemble their idle/busy
status.
The information retrieval is based on the is_device_idle ASIC function.
The printout in this function, of the first detected busy engine, is
removed because it becomes redundant in the presence of the more
elaborated info of the new debugfs node.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
---
Changes in v2:
- Move the idle check updates into a separate patch.
- Update Documentation/.../debugfs-driver-habanalabs with the new
  debugfs node.

 .../ABI/testing/debugfs-driver-habanalabs     |   7 +
 drivers/misc/habanalabs/debugfs.c             |  12 ++
 drivers/misc/habanalabs/goya/goya.c           | 129 ++++++++++++------
 drivers/misc/habanalabs/habanalabs.h          |   8 +-
 drivers/misc/habanalabs/habanalabs_ioctl.c    |   2 +-
 5 files changed, 105 insertions(+), 53 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 18191c2becab..f0ac14b70ecb 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -51,6 +51,13 @@ Description:    Enables the root user to set the device to specific state.
                 Valid values are "disable", "enable", "suspend", "resume".
                 User can read this property to see the valid values
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/engines
+Date:           Jul 2019
+KernelVersion:  5.3
+Contact:        oded.gabbay@gmail.com
+Description:    Displays the status registers values of the device engines and
+                their derived idle status
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
 Date:           Jan 2019
 KernelVersion:  5.1
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 17974919b760..6a5dfb14eca1 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -500,6 +500,17 @@ static ssize_t mmu_write(struct file *file, const char __user *buf,
 	return -EINVAL;
 }
 
+static int engines_show(struct seq_file *s, void *data)
+{
+	struct hl_debugfs_entry *entry = s->private;
+	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+	struct hl_device *hdev = dev_entry->hdev;
+
+	hdev->asic_funcs->is_device_idle(hdev, s);
+
+	return 0;
+}
+
 static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -893,6 +904,7 @@ static const struct hl_info_list hl_debugfs_list[] = {
 	{"userptr", userptr_show, NULL},
 	{"vm", vm_show, NULL},
 	{"mmu", mmu_show, mmu_write},
+	{"engines", engines_show, NULL}
 };
 
 static int hl_debugfs_open(struct inode *inode, struct file *file)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 8653aa914724..41e97531f300 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -15,6 +15,7 @@
 #include <linux/hwmon.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/iommu.h>
+#include <linux/seq_file.h>
 
 /*
  * GOYA security scheme:
@@ -90,6 +91,30 @@
 #define GOYA_CB_POOL_CB_CNT		512
 #define GOYA_CB_POOL_CB_SIZE		0x20000		/* 128KB */
 
+#define IS_QM_IDLE(engine, qm_glbl_sts0) \
+	(((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
+#define IS_DMA_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(DMA, qm_glbl_sts0)
+#define IS_TPC_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(TPC, qm_glbl_sts0)
+#define IS_MME_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(MME, qm_glbl_sts0)
+
+#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
+	(((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
+			engine##_CMDQ_IDLE_MASK)
+#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
+	IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
+#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
+	IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
+
+#define IS_DMA_IDLE(dma_core_sts0) \
+	!((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
+
+#define IS_TPC_IDLE(tpc_cfg_sts) \
+	(((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
+
+#define IS_MME_IDLE(mme_arch_sts) \
+	(((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
+
+
 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
 		"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
 		"goya cq 4", "goya cpu eq"
@@ -2796,7 +2821,6 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
 	dma_addr_t fence_dma_addr;
 	struct hl_cb *cb;
 	u32 tmp, timeout;
-	char buf[16] = {};
 	int rc;
 
 	if (hdev->pldm)
@@ -2804,10 +2828,9 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
 	else
 		timeout = HL_DEVICE_TIMEOUT_USEC;
 
-	if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) {
+	if (!hdev->asic_funcs->is_device_idle(hdev, NULL)) {
 		dev_err_ratelimited(hdev->dev,
-			"Can't send KMD job on QMAN0 because %s is busy\n",
-			buf);
+			"Can't send KMD job on QMAN0 because the device is not idle\n");
 		return -EBUSY;
 	}
 
@@ -4891,59 +4914,75 @@ int goya_armcp_info_get(struct hl_device *hdev)
 	return 0;
 }
 
-static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
+static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
 {
-	u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg,
-		dma_core_sts;
+	const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
+	const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
+	u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
+		mme_arch_sts;
+	bool is_idle = true, is_eng_idle;
+	u64 offset;
 	int i;
 
+	if (s)
+		seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
+				"---  -------  ------------  -------------\n");
+
 	offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
 
 	for (i = 0 ; i < DMA_MAX_NUM ; i++) {
-		dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset;
-		dma_core_sts = mmDMA_CH_0_STS0 + i * offset;
+		qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
+		dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
+		is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
+				IS_DMA_IDLE(dma_core_sts0);
+		is_idle &= is_eng_idle;
 
-		if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
-				DMA_QM_IDLE_MASK)
-			return HL_ENG_BUSY(buf, size, "DMA%d_QM", i);
-
-		if (RREG32(dma_core_sts) & DMA_CH_0_STS0_DMA_BUSY_MASK)
-			return HL_ENG_BUSY(buf, size, "DMA%d_CORE", i);
+		if (s)
+			seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
+					qm_glbl_sts0, dma_core_sts0);
 	}
 
+	if (s)
+		seq_puts(s,
+			"\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
+			"---  -------  ------------  --------------  ----------\n");
+
 	offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
 
 	for (i = 0 ; i < TPC_MAX_NUM ; i++) {
-		tpc_qm_reg = mmTPC0_QM_GLBL_STS0 + i * offset;
-		tpc_cmdq_reg = mmTPC0_CMDQ_GLBL_STS0 + i * offset;
-		tpc_cfg_reg = mmTPC0_CFG_STATUS + i * offset;
-
-		if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
-				TPC_QM_IDLE_MASK)
-			return HL_ENG_BUSY(buf, size, "TPC%d_QM", i);
-
-		if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
-				TPC_CMDQ_IDLE_MASK)
-			return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i);
-
-		if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
-				TPC_CFG_IDLE_MASK)
-			return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i);
-	}
-
-	if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
-			MME_QM_IDLE_MASK)
-		return HL_ENG_BUSY(buf, size, "MME_QM");
-
-	if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
-			MME_CMDQ_IDLE_MASK)
-		return HL_ENG_BUSY(buf, size, "MME_CMDQ");
-
-	if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
-			MME_ARCH_IDLE_MASK)
-		return HL_ENG_BUSY(buf, size, "MME_ARCH");
-
-	return true;
+		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
+		cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
+		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
+		is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
+				IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
+				IS_TPC_IDLE(tpc_cfg_sts);
+		is_idle &= is_eng_idle;
+
+		if (s)
+			seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
+				qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
+	}
+
+	if (s)
+		seq_puts(s,
+			"\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
+			"---  -------  ------------  --------------  -----------\n");
+
+	qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
+	cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
+	mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
+	is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
+			IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
+			IS_MME_IDLE(mme_arch_sts);
+	is_idle &= is_eng_idle;
+
+	if (s) {
+		seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
+				cmdq_glbl_sts0, mme_arch_sts);
+		seq_puts(s, "\n");
+	}
+
+	return is_idle;
 }
 
 static void goya_hw_queues_lock(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 5e4a631b3d88..2c9ea61099b4 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -557,7 +557,7 @@ struct hl_asic_funcs {
 			u32 asid, u64 va, u64 size);
 	int (*send_heartbeat)(struct hl_device *hdev);
 	int (*debug_coresight)(struct hl_device *hdev, void *data);
-	bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size);
+	bool (*is_device_idle)(struct hl_device *hdev, struct seq_file *s);
 	int (*soft_reset_late_init)(struct hl_device *hdev);
 	void (*hw_queues_lock)(struct hl_device *hdev);
 	void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -1112,12 +1112,6 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 	(cond) ? 0 : -ETIMEDOUT; \
 })
 
-#define HL_ENG_BUSY(buf, size, fmt, ...) ({ \
-		if (buf) \
-			snprintf(buf, size, fmt, ##__VA_ARGS__); \
-		false; \
-	})
-
 struct hwmon_chip_info;
 
 /**
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index c641c7eb6f7c..b04585af27ad 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -119,7 +119,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL, 0);
+	hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL);
 
 	return copy_to_user(out, &hw_idle,
 		min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 3/3] habanalabs: Add busy engines bitmask to HW idle IOCTL
  2019-07-01 13:59 [PATCH v2 1/3] habanalabs: Update the device idle check Tomer Tayar
@ 2019-07-01 13:59 ` Tomer Tayar
  2019-07-03  7:31   ` Oded Gabbay
  2019-07-01 13:59 ` [PATCH v2 2/3] habanalabs: Add debugfs node for engines status Tomer Tayar
  1 sibling, 1 reply; 4+ messages in thread
From: Tomer Tayar @ 2019-07-01 13:59 UTC (permalink / raw)
  To: oded.gabbay; +Cc: linux-kernel

The information which is currently provided as a response to the
"HL_INFO_HW_IDLE" IOCTL is merely a general boolean value.
This patch extends it and provides also a bitmask that indicates which
of the device engines are busy.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
---
 drivers/misc/habanalabs/debugfs.c          |  2 +-
 drivers/misc/habanalabs/goya/goya.c        | 11 ++++++--
 drivers/misc/habanalabs/habanalabs.h       |  3 ++-
 drivers/misc/habanalabs/habanalabs_ioctl.c |  3 ++-
 include/uapi/misc/habanalabs.h             | 30 +++++++++++++++++++++-
 5 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 6a5dfb14eca1..18e499c900c7 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -506,7 +506,7 @@ static int engines_show(struct seq_file *s, void *data)
 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 	struct hl_device *hdev = dev_entry->hdev;
 
-	hdev->asic_funcs->is_device_idle(hdev, s);
+	hdev->asic_funcs->is_device_idle(hdev, NULL, s);
 
 	return 0;
 }
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 41e97531f300..75294ec65257 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -2828,7 +2828,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
 	else
 		timeout = HL_DEVICE_TIMEOUT_USEC;
 
-	if (!hdev->asic_funcs->is_device_idle(hdev, NULL)) {
+	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
 		dev_err_ratelimited(hdev->dev,
 			"Can't send KMD job on QMAN0 because the device is not idle\n");
 		return -EBUSY;
@@ -4914,7 +4914,8 @@ int goya_armcp_info_get(struct hl_device *hdev)
 	return 0;
 }
 
-static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
+static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
+				struct seq_file *s)
 {
 	const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
 	const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
@@ -4937,6 +4938,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
 				IS_DMA_IDLE(dma_core_sts0);
 		is_idle &= is_eng_idle;
 
+		if (mask)
+			*mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
 		if (s)
 			seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
 					qm_glbl_sts0, dma_core_sts0);
@@ -4958,6 +4961,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
 				IS_TPC_IDLE(tpc_cfg_sts);
 		is_idle &= is_eng_idle;
 
+		if (mask)
+			*mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
 		if (s)
 			seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
 				qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
@@ -4976,6 +4981,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
 			IS_MME_IDLE(mme_arch_sts);
 	is_idle &= is_eng_idle;
 
+	if (mask)
+		*mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
 	if (s) {
 		seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
 				cmdq_glbl_sts0, mme_arch_sts);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 2c9ea61099b4..10da9940ee0d 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -557,7 +557,8 @@ struct hl_asic_funcs {
 			u32 asid, u64 va, u64 size);
 	int (*send_heartbeat)(struct hl_device *hdev);
 	int (*debug_coresight)(struct hl_device *hdev, void *data);
-	bool (*is_device_idle)(struct hl_device *hdev, struct seq_file *s);
+	bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
+				struct seq_file *s);
 	int (*soft_reset_late_init)(struct hl_device *hdev);
 	void (*hw_queues_lock)(struct hl_device *hdev);
 	void (*hw_queues_unlock)(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index b04585af27ad..07127576b3e8 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -119,7 +119,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL);
+	hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
+					&hw_idle.busy_engines_mask, NULL);
 
 	return copy_to_user(out, &hw_idle,
 		min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 204ab9b4ae67..3956c226ca35 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -45,6 +45,30 @@ enum goya_queue_id {
 	GOYA_QUEUE_ID_SIZE
 };
 
+/*
+ * Engine Numbering
+ *
+ * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle'
+ */
+
+enum goya_engine_id {
+	GOYA_ENGINE_ID_DMA_0 = 0,
+	GOYA_ENGINE_ID_DMA_1,
+	GOYA_ENGINE_ID_DMA_2,
+	GOYA_ENGINE_ID_DMA_3,
+	GOYA_ENGINE_ID_DMA_4,
+	GOYA_ENGINE_ID_MME_0,
+	GOYA_ENGINE_ID_TPC_0,
+	GOYA_ENGINE_ID_TPC_1,
+	GOYA_ENGINE_ID_TPC_2,
+	GOYA_ENGINE_ID_TPC_3,
+	GOYA_ENGINE_ID_TPC_4,
+	GOYA_ENGINE_ID_TPC_5,
+	GOYA_ENGINE_ID_TPC_6,
+	GOYA_ENGINE_ID_TPC_7,
+	GOYA_ENGINE_ID_SIZE
+};
+
 enum hl_device_status {
 	HL_DEVICE_STATUS_OPERATIONAL,
 	HL_DEVICE_STATUS_IN_RESET,
@@ -86,7 +110,11 @@ struct hl_info_dram_usage {
 
 struct hl_info_hw_idle {
 	__u32 is_idle;
-	__u32 pad;
+	/*
+	 * Bitmask of busy engines.
+	 * Bits definition is according to `enum <chip>_enging_id'.
+	 */
+	__u32 busy_engines_mask;
 };
 
 struct hl_info_device_status {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 3/3] habanalabs: Add busy engines bitmask to HW idle IOCTL
  2019-07-01 13:59 ` [PATCH v2 3/3] habanalabs: Add busy engines bitmask to HW idle IOCTL Tomer Tayar
@ 2019-07-03  7:31   ` Oded Gabbay
  0 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2019-07-03  7:31 UTC (permalink / raw)
  To: Tomer Tayar; +Cc: linux-kernel

On Mon, Jul 1, 2019 at 4:59 PM Tomer Tayar <ttayar@habana.ai> wrote:
>
> The information which is currently provided as a response to the
> "HL_INFO_HW_IDLE" IOCTL is merely a general boolean value.
> This patch extends it and provides also a bitmask that indicates which
> of the device engines are busy.
>
> Signed-off-by: Tomer Tayar <ttayar@habana.ai>
> ---
>  drivers/misc/habanalabs/debugfs.c          |  2 +-
>  drivers/misc/habanalabs/goya/goya.c        | 11 ++++++--
>  drivers/misc/habanalabs/habanalabs.h       |  3 ++-
>  drivers/misc/habanalabs/habanalabs_ioctl.c |  3 ++-
>  include/uapi/misc/habanalabs.h             | 30 +++++++++++++++++++++-
>  5 files changed, 43 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
> index 6a5dfb14eca1..18e499c900c7 100644
> --- a/drivers/misc/habanalabs/debugfs.c
> +++ b/drivers/misc/habanalabs/debugfs.c
> @@ -506,7 +506,7 @@ static int engines_show(struct seq_file *s, void *data)
>         struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
>         struct hl_device *hdev = dev_entry->hdev;
>
> -       hdev->asic_funcs->is_device_idle(hdev, s);
> +       hdev->asic_funcs->is_device_idle(hdev, NULL, s);
>
>         return 0;
>  }
> diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
> index 41e97531f300..75294ec65257 100644
> --- a/drivers/misc/habanalabs/goya/goya.c
> +++ b/drivers/misc/habanalabs/goya/goya.c
> @@ -2828,7 +2828,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
>         else
>                 timeout = HL_DEVICE_TIMEOUT_USEC;
>
> -       if (!hdev->asic_funcs->is_device_idle(hdev, NULL)) {
> +       if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
>                 dev_err_ratelimited(hdev->dev,
>                         "Can't send KMD job on QMAN0 because the device is not idle\n");
>                 return -EBUSY;
> @@ -4914,7 +4914,8 @@ int goya_armcp_info_get(struct hl_device *hdev)
>         return 0;
>  }
>
> -static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
> +static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
> +                               struct seq_file *s)
>  {
>         const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
>         const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
> @@ -4937,6 +4938,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
>                                 IS_DMA_IDLE(dma_core_sts0);
>                 is_idle &= is_eng_idle;
>
> +               if (mask)
> +                       *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
>                 if (s)
>                         seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
>                                         qm_glbl_sts0, dma_core_sts0);
> @@ -4958,6 +4961,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
>                                 IS_TPC_IDLE(tpc_cfg_sts);
>                 is_idle &= is_eng_idle;
>
> +               if (mask)
> +                       *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
>                 if (s)
>                         seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
>                                 qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
> @@ -4976,6 +4981,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
>                         IS_MME_IDLE(mme_arch_sts);
>         is_idle &= is_eng_idle;
>
> +       if (mask)
> +               *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
>         if (s) {
>                 seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
>                                 cmdq_glbl_sts0, mme_arch_sts);
> diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
> index 2c9ea61099b4..10da9940ee0d 100644
> --- a/drivers/misc/habanalabs/habanalabs.h
> +++ b/drivers/misc/habanalabs/habanalabs.h
> @@ -557,7 +557,8 @@ struct hl_asic_funcs {
>                         u32 asid, u64 va, u64 size);
>         int (*send_heartbeat)(struct hl_device *hdev);
>         int (*debug_coresight)(struct hl_device *hdev, void *data);
> -       bool (*is_device_idle)(struct hl_device *hdev, struct seq_file *s);
> +       bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
> +                               struct seq_file *s);
>         int (*soft_reset_late_init)(struct hl_device *hdev);
>         void (*hw_queues_lock)(struct hl_device *hdev);
>         void (*hw_queues_unlock)(struct hl_device *hdev);
> diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
> index b04585af27ad..07127576b3e8 100644
> --- a/drivers/misc/habanalabs/habanalabs_ioctl.c
> +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
> @@ -119,7 +119,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
>         if ((!max_size) || (!out))
>                 return -EINVAL;
>
> -       hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL);
> +       hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
> +                                       &hw_idle.busy_engines_mask, NULL);
>
>         return copy_to_user(out, &hw_idle,
>                 min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
> diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
> index 204ab9b4ae67..3956c226ca35 100644
> --- a/include/uapi/misc/habanalabs.h
> +++ b/include/uapi/misc/habanalabs.h
> @@ -45,6 +45,30 @@ enum goya_queue_id {
>         GOYA_QUEUE_ID_SIZE
>  };
>
> +/*
> + * Engine Numbering
> + *
> + * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle'
> + */
> +
> +enum goya_engine_id {
> +       GOYA_ENGINE_ID_DMA_0 = 0,
> +       GOYA_ENGINE_ID_DMA_1,
> +       GOYA_ENGINE_ID_DMA_2,
> +       GOYA_ENGINE_ID_DMA_3,
> +       GOYA_ENGINE_ID_DMA_4,
> +       GOYA_ENGINE_ID_MME_0,
> +       GOYA_ENGINE_ID_TPC_0,
> +       GOYA_ENGINE_ID_TPC_1,
> +       GOYA_ENGINE_ID_TPC_2,
> +       GOYA_ENGINE_ID_TPC_3,
> +       GOYA_ENGINE_ID_TPC_4,
> +       GOYA_ENGINE_ID_TPC_5,
> +       GOYA_ENGINE_ID_TPC_6,
> +       GOYA_ENGINE_ID_TPC_7,
> +       GOYA_ENGINE_ID_SIZE
> +};
> +
>  enum hl_device_status {
>         HL_DEVICE_STATUS_OPERATIONAL,
>         HL_DEVICE_STATUS_IN_RESET,
> @@ -86,7 +110,11 @@ struct hl_info_dram_usage {
>
>  struct hl_info_hw_idle {
>         __u32 is_idle;
> -       __u32 pad;
> +       /*
> +        * Bitmask of busy engines.
> +        * Bits definition is according to `enum <chip>_enging_id'.
> +        */
> +       __u32 busy_engines_mask;
>  };
>
>  struct hl_info_device_status {
> --
> 2.17.1
>

This patch-set is:
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-07-03  7:31 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-01 13:59 [PATCH v2 1/3] habanalabs: Update the device idle check Tomer Tayar
2019-07-01 13:59 ` [PATCH v2 3/3] habanalabs: Add busy engines bitmask to HW idle IOCTL Tomer Tayar
2019-07-03  7:31   ` Oded Gabbay
2019-07-01 13:59 ` [PATCH v2 2/3] habanalabs: Add debugfs node for engines status Tomer Tayar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).