All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] radeon: Deinline indirect register accessor functions
@ 2015-05-20 11:02 Denys Vlasenko
  2015-05-28 19:21 ` Deucher, Alexander
  0 siblings, 1 reply; 2+ messages in thread
From: Denys Vlasenko @ 2015-05-20 11:02 UTC (permalink / raw)
  To: Christian König; +Cc: Denys Vlasenko, Alex Deucher, linux-kernel

This patch deinlines indirect register accessor functions.

These functions perform two mmio accesses, framed by spin lock/unlock.
Spin lock/unlock by itself takes more than 50 cycles in ideal case
(if lock is exclusively cached on current CPU).

With this .config: http://busybox.net/~vda/kernel_config,
after uninlining these functions have sizes and callsite counts
as follows:

r600_uvd_ctx_rreg: 111 bytes, 4 callsites
r600_uvd_ctx_wreg: 113 bytes, 5 callsites
eg_pif_phy0_rreg: 106 bytes, 13 callsites
eg_pif_phy0_wreg: 108 bytes, 13 callsites
eg_pif_phy1_rreg: 107 bytes, 13 callsites
eg_pif_phy1_wreg: 108 bytes, 13 callsites
rv370_pcie_rreg: 111 bytes, 21 callsites
rv370_pcie_wreg: 113 bytes, 24 callsites
r600_rcu_rreg: 111 bytes, 16 callsites
r600_rcu_wreg: 113 bytes, 25 callsites
cik_didt_rreg: 106 bytes, 10 callsites
cik_didt_wreg: 107 bytes, 10 callsites
tn_smc_rreg: 106 bytes, 126 callsites
tn_smc_wreg: 107 bytes, 116 callsites
eg_cg_rreg: 107 bytes, 20 callsites
eg_cg_wreg: 108 bytes, 52 callsites

Functions r100_mm_rreg() and r100_mm_rreg() have a fast path and
a locked (slow) path. This patch deinlines only slow path.

r100_mm_rreg_slow: 78 bytes, 2083 callsites
r100_mm_wreg_slow: 81 bytes, 3570 callsites

Reduction in code size is more than 65,000 bytes:

    text     data      bss       dec     hex filename
85740176 22294680 20627456 128662312 7ab3b28 vmlinux.before
85674192 22294776 20627456 128598664 7aa4288 vmlinux

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: linux-kernel@vger.kernel.org
---
Changes in v2: only partially deinline r100_mm_r/wreg

Changes in v3: move deinlined functions into files which correspond to
particular hw. Explain why these functions aren't inlined.

 drivers/gpu/drm/radeon/cik.c       |  25 +++++
 drivers/gpu/drm/radeon/evergreen.c |  69 ++++++++++++
 drivers/gpu/drm/radeon/ni.c        |  25 +++++
 drivers/gpu/drm/radeon/r100.c      |  22 ++++
 drivers/gpu/drm/radeon/r300.c      |  25 +++++
 drivers/gpu/drm/radeon/r600.c      |  47 ++++++++
 drivers/gpu/drm/radeon/radeon.h    | 225 +++++--------------------------------
 7 files changed, 241 insertions(+), 197 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 3e670d3..7fe99ce 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -141,6 +141,31 @@ static void cik_fini_cg(struct radeon_device *rdev);
 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
 					  bool enable);
 
+/*
+ * Indirect registers accessor
+ */
+u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
+	WREG32(CIK_DIDT_IND_INDEX, (reg));
+	r = RREG32(CIK_DIDT_IND_DATA);
+	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
+	return r;
+}
+
+void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
+	WREG32(CIK_DIDT_IND_INDEX, (reg));
+	WREG32(CIK_DIDT_IND_DATA, (v));
+	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
+}
+
 /* get temperature in millidegrees */
 int ci_get_temp(struct radeon_device *rdev)
 {
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 973df06..1e78c1f 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -35,6 +35,75 @@
 #include "evergreen_blit_shaders.h"
 #include "radeon_ucode.h"
 
+/*
+ * Indirect registers accessor
+ */
+u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&rdev->cg_idx_lock, flags);
+	WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
+	r = RREG32(EVERGREEN_CG_IND_DATA);
+	spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
+	return r;
+}
+
+void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rdev->cg_idx_lock, flags);
+	WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
+	WREG32(EVERGREEN_CG_IND_DATA, (v));
+	spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
+}
+
+u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+	WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
+	r = RREG32(EVERGREEN_PIF_PHY0_DATA);
+	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+	return r;
+}
+
+void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+	WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
+	WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
+	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+}
+
+u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+	WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
+	r = RREG32(EVERGREEN_PIF_PHY1_DATA);
+	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+	return r;
+}
+
+void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+	WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
+	WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
+	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+}
+
 static const u32 crtc_offsets[6] =
 {
 	EVERGREEN_CRTC0_REGISTER_OFFSET,
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index dab0081..e9bf710 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -36,6 +36,31 @@
 #include "radeon_ucode.h"
 #include "clearstate_cayman.h"
 
+/*
+ * Indirect registers accessor
+ */
+u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&rdev->smc_idx_lock, flags);
+	WREG32(TN_SMC_IND_INDEX_0, (reg));
+	r = RREG32(TN_SMC_IND_DATA_0);
+	spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
+	return r;
+}
+
+void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rdev->smc_idx_lock, flags);
+	WREG32(TN_SMC_IND_INDEX_0, (reg));
+	WREG32(TN_SMC_IND_DATA_0, (v));
+	spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
+}
+
 static const u32 tn_rlc_save_restore_register_list[] =
 {
 	0x98fc,
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 04f2514..238b13f 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -4090,6 +4090,28 @@ int r100_init(struct radeon_device *rdev)
 	return 0;
 }
 
+uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
+{
+	unsigned long flags;
+	uint32_t ret;
+
+	spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
+	writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+	ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
+	return ret;
+}
+
+void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
+	writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+	writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
+}
+
 u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
 {
 	if (reg < rdev->rio_mem_size)
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 08d68f3..718b12b 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -50,6 +50,31 @@
  */
 
 /*
+ * Indirect registers accessor
+ */
+uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	unsigned long flags;
+	uint32_t r;
+
+	spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
+	WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
+	r = RREG32(RADEON_PCIE_DATA);
+	spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
+	return r;
+}
+
+void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
+	WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
+	WREG32(RADEON_PCIE_DATA, (v));
+	spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
+}
+
+/*
  * rv370,rv380 PCIE GART
  */
 static int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 2fcad34..b667409 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -108,6 +108,53 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev);
 extern int evergreen_rlc_resume(struct radeon_device *rdev);
 extern void rv770_set_clk_bypass_mode(struct radeon_device *rdev);
 
+/*
+ * Indirect registers accessor
+ */
+u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
+	WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
+	r = RREG32(R600_RCU_DATA);
+	spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
+	return r;
+}
+
+void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
+	WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
+	WREG32(R600_RCU_DATA, (v));
+	spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
+}
+
+u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
+	WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
+	r = RREG32(R600_UVD_CTX_DATA);
+	spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
+	return r;
+}
+
+void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
+	WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
+	WREG32(R600_UVD_CTX_DATA, (v));
+	spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
+}
+
 /**
  * r600_get_xclk - get the xclk
  *
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5587603..ffc621b 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2465,38 +2465,24 @@ int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
 #define RADEON_MIN_MMIO_SIZE 0x10000
 
+uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg);
+void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
 				    bool always_indirect)
 {
 	/* The mmio size is 64kb at minimum. Allows the if to be optimized out. */
 	if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
 		return readl(((void __iomem *)rdev->rmmio) + reg);
-	else {
-		unsigned long flags;
-		uint32_t ret;
-
-		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
-		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
-		ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
-		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
-
-		return ret;
-	}
+	else
+		return r100_mm_rreg_slow(rdev, reg);
 }
-
 static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
 				bool always_indirect)
 {
 	if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
 		writel(v, ((void __iomem *)rdev->rmmio) + reg);
-	else {
-		unsigned long flags;
-
-		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
-		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
-		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
-		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
-	}
+	else
+		r100_mm_wreg_slow(rdev, reg, v);
 }
 
 u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
@@ -2580,184 +2566,29 @@ static inline struct radeon_fence *to_radeon_fence(struct fence *f)
 #define WDOORBELL32(index, v) cik_mm_wdoorbell(rdev, (index), (v))
 
 /*
- * Indirect registers accessor
+ * Indirect registers accessors.
+ * They used to be inlined, but this increases code size by ~65 kbytes.
+ * Since each performs a pair of MMIO ops
+ * within a spin_lock_irqsave/spin_unlock_irqrestore region,
+ * the cost of call+ret is almost negligible. MMIO and locking
+ * costs several dozens of cycles each at best, call+ret is ~5 cycles.
  */
-static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
-{
-	unsigned long flags;
-	uint32_t r;
-
-	spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
-	WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
-	r = RREG32(RADEON_PCIE_DATA);
-	spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
-	return r;
-}
-
-static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
-	WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
-	WREG32(RADEON_PCIE_DATA, (v));
-	spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
-}
-
-static inline u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg)
-{
-	unsigned long flags;
-	u32 r;
-
-	spin_lock_irqsave(&rdev->smc_idx_lock, flags);
-	WREG32(TN_SMC_IND_INDEX_0, (reg));
-	r = RREG32(TN_SMC_IND_DATA_0);
-	spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
-	return r;
-}
-
-static inline void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rdev->smc_idx_lock, flags);
-	WREG32(TN_SMC_IND_INDEX_0, (reg));
-	WREG32(TN_SMC_IND_DATA_0, (v));
-	spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
-}
-
-static inline u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg)
-{
-	unsigned long flags;
-	u32 r;
-
-	spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
-	WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
-	r = RREG32(R600_RCU_DATA);
-	spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
-	return r;
-}
-
-static inline void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
-	WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
-	WREG32(R600_RCU_DATA, (v));
-	spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
-}
-
-static inline u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg)
-{
-	unsigned long flags;
-	u32 r;
-
-	spin_lock_irqsave(&rdev->cg_idx_lock, flags);
-	WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
-	r = RREG32(EVERGREEN_CG_IND_DATA);
-	spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
-	return r;
-}
-
-static inline void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rdev->cg_idx_lock, flags);
-	WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
-	WREG32(EVERGREEN_CG_IND_DATA, (v));
-	spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
-}
-
-static inline u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg)
-{
-	unsigned long flags;
-	u32 r;
-
-	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
-	WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
-	r = RREG32(EVERGREEN_PIF_PHY0_DATA);
-	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
-	return r;
-}
-
-static inline void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
-	WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
-	WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
-	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
-}
-
-static inline u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg)
-{
-	unsigned long flags;
-	u32 r;
-
-	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
-	WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
-	r = RREG32(EVERGREEN_PIF_PHY1_DATA);
-	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
-	return r;
-}
-
-static inline void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
-	WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
-	WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
-	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
-}
-
-static inline u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg)
-{
-	unsigned long flags;
-	u32 r;
-
-	spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
-	WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
-	r = RREG32(R600_UVD_CTX_DATA);
-	spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
-	return r;
-}
-
-static inline void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
-	WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
-	WREG32(R600_UVD_CTX_DATA, (v));
-	spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
-}
-
-
-static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
-{
-	unsigned long flags;
-	u32 r;
-
-	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
-	WREG32(CIK_DIDT_IND_INDEX, (reg));
-	r = RREG32(CIK_DIDT_IND_DATA);
-	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
-	return r;
-}
-
-static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
-	WREG32(CIK_DIDT_IND_INDEX, (reg));
-	WREG32(CIK_DIDT_IND_DATA, (v));
-	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
-}
+uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
+void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg);
+void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg);
+void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg);
+void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg);
+void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg);
+void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg);
+void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg);
+void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v);
 
 void r100_pll_errata_after_index(struct radeon_device *rdev);
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* RE: [PATCH v3] radeon: Deinline indirect register accessor functions
  2015-05-20 11:02 [PATCH v3] radeon: Deinline indirect register accessor functions Denys Vlasenko
@ 2015-05-28 19:21 ` Deucher, Alexander
  0 siblings, 0 replies; 2+ messages in thread
From: Deucher, Alexander @ 2015-05-28 19:21 UTC (permalink / raw)
  To: Denys Vlasenko, Koenig, Christian; +Cc: linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 20056 bytes --]

> -----Original Message-----
> From: Denys Vlasenko [mailto:dvlasenk@redhat.com]
> Sent: Wednesday, May 20, 2015 7:03 AM
> To: Koenig, Christian
> Cc: Denys Vlasenko; Deucher, Alexander; linux-kernel@vger.kernel.org
> Subject: [PATCH v3] radeon: Deinline indirect register accessor functions
> 
> This patch deinlines indirect register accessor functions.
> 
> These functions perform two mmio accesses, framed by spin lock/unlock.
> Spin lock/unlock by itself takes more than 50 cycles in ideal case
> (if lock is exclusively cached on current CPU).
> 
> With this .config: http://busybox.net/~vda/kernel_config,
> after uninlining these functions have sizes and callsite counts
> as follows:
> 
> r600_uvd_ctx_rreg: 111 bytes, 4 callsites
> r600_uvd_ctx_wreg: 113 bytes, 5 callsites
> eg_pif_phy0_rreg: 106 bytes, 13 callsites
> eg_pif_phy0_wreg: 108 bytes, 13 callsites
> eg_pif_phy1_rreg: 107 bytes, 13 callsites
> eg_pif_phy1_wreg: 108 bytes, 13 callsites
> rv370_pcie_rreg: 111 bytes, 21 callsites
> rv370_pcie_wreg: 113 bytes, 24 callsites
> r600_rcu_rreg: 111 bytes, 16 callsites
> r600_rcu_wreg: 113 bytes, 25 callsites
> cik_didt_rreg: 106 bytes, 10 callsites
> cik_didt_wreg: 107 bytes, 10 callsites
> tn_smc_rreg: 106 bytes, 126 callsites
> tn_smc_wreg: 107 bytes, 116 callsites
> eg_cg_rreg: 107 bytes, 20 callsites
> eg_cg_wreg: 108 bytes, 52 callsites
> 
> Functions r100_mm_rreg() and r100_mm_rreg() have a fast path and
> a locked (slow) path. This patch deinlines only slow path.
> 
> r100_mm_rreg_slow: 78 bytes, 2083 callsites
> r100_mm_wreg_slow: 81 bytes, 3570 callsites
> 
> Reduction in code size is more than 65,000 bytes:
> 
>     text     data      bss       dec     hex filename
> 85740176 22294680 20627456 128662312 7ab3b28 vmlinux.before
> 85674192 22294776 20627456 128598664 7aa4288 vmlinux
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: linux-kernel@vger.kernel.org

Applied.  Thanks!

Alex

> ---
> Changes in v2: only partially deinline r100_mm_r/wreg
> 
> Changes in v3: move deinlined functions into files which correspond to
> particular hw. Explain why these functions aren't inlined.
> 
>  drivers/gpu/drm/radeon/cik.c       |  25 +++++
>  drivers/gpu/drm/radeon/evergreen.c |  69 ++++++++++++
>  drivers/gpu/drm/radeon/ni.c        |  25 +++++
>  drivers/gpu/drm/radeon/r100.c      |  22 ++++
>  drivers/gpu/drm/radeon/r300.c      |  25 +++++
>  drivers/gpu/drm/radeon/r600.c      |  47 ++++++++
>  drivers/gpu/drm/radeon/radeon.h    | 225 +++++--------------------------------
>  7 files changed, 241 insertions(+), 197 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
> index 3e670d3..7fe99ce 100644
> --- a/drivers/gpu/drm/radeon/cik.c
> +++ b/drivers/gpu/drm/radeon/cik.c
> @@ -141,6 +141,31 @@ static void cik_fini_cg(struct radeon_device *rdev);
>  static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
>  					  bool enable);
> 
> +/*
> + * Indirect registers accessor
> + */
> +u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
> +{
> +	unsigned long flags;
> +	u32 r;
> +
> +	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
> +	WREG32(CIK_DIDT_IND_INDEX, (reg));
> +	r = RREG32(CIK_DIDT_IND_DATA);
> +	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
> +	return r;
> +}
> +
> +void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
> +	WREG32(CIK_DIDT_IND_INDEX, (reg));
> +	WREG32(CIK_DIDT_IND_DATA, (v));
> +	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
> +}
> +
>  /* get temperature in millidegrees */
>  int ci_get_temp(struct radeon_device *rdev)
>  {
> diff --git a/drivers/gpu/drm/radeon/evergreen.c
> b/drivers/gpu/drm/radeon/evergreen.c
> index 973df06..1e78c1f 100644
> --- a/drivers/gpu/drm/radeon/evergreen.c
> +++ b/drivers/gpu/drm/radeon/evergreen.c
> @@ -35,6 +35,75 @@
>  #include "evergreen_blit_shaders.h"
>  #include "radeon_ucode.h"
> 
> +/*
> + * Indirect registers accessor
> + */
> +u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg)
> +{
> +	unsigned long flags;
> +	u32 r;
> +
> +	spin_lock_irqsave(&rdev->cg_idx_lock, flags);
> +	WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
> +	r = RREG32(EVERGREEN_CG_IND_DATA);
> +	spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
> +	return r;
> +}
> +
> +void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rdev->cg_idx_lock, flags);
> +	WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
> +	WREG32(EVERGREEN_CG_IND_DATA, (v));
> +	spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
> +}
> +
> +u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg)
> +{
> +	unsigned long flags;
> +	u32 r;
> +
> +	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
> +	WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
> +	r = RREG32(EVERGREEN_PIF_PHY0_DATA);
> +	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
> +	return r;
> +}
> +
> +void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
> +	WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
> +	WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
> +	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
> +}
> +
> +u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg)
> +{
> +	unsigned long flags;
> +	u32 r;
> +
> +	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
> +	WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
> +	r = RREG32(EVERGREEN_PIF_PHY1_DATA);
> +	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
> +	return r;
> +}
> +
> +void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
> +	WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
> +	WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
> +	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
> +}
> +
>  static const u32 crtc_offsets[6] =
>  {
>  	EVERGREEN_CRTC0_REGISTER_OFFSET,
> diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
> index dab0081..e9bf710 100644
> --- a/drivers/gpu/drm/radeon/ni.c
> +++ b/drivers/gpu/drm/radeon/ni.c
> @@ -36,6 +36,31 @@
>  #include "radeon_ucode.h"
>  #include "clearstate_cayman.h"
> 
> +/*
> + * Indirect registers accessor
> + */
> +u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg)
> +{
> +	unsigned long flags;
> +	u32 r;
> +
> +	spin_lock_irqsave(&rdev->smc_idx_lock, flags);
> +	WREG32(TN_SMC_IND_INDEX_0, (reg));
> +	r = RREG32(TN_SMC_IND_DATA_0);
> +	spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
> +	return r;
> +}
> +
> +void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rdev->smc_idx_lock, flags);
> +	WREG32(TN_SMC_IND_INDEX_0, (reg));
> +	WREG32(TN_SMC_IND_DATA_0, (v));
> +	spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
> +}
> +
>  static const u32 tn_rlc_save_restore_register_list[] =
>  {
>  	0x98fc,
> diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
> index 04f2514..238b13f 100644
> --- a/drivers/gpu/drm/radeon/r100.c
> +++ b/drivers/gpu/drm/radeon/r100.c
> @@ -4090,6 +4090,28 @@ int r100_init(struct radeon_device *rdev)
>  	return 0;
>  }
> 
> +uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
> +{
> +	unsigned long flags;
> +	uint32_t ret;
> +
> +	spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
> +	writel(reg, ((void __iomem *)rdev->rmmio) +
> RADEON_MM_INDEX);
> +	ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
> +	spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
> +	return ret;
> +}
> +
> +void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg,
> uint32_t v)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
> +	writel(reg, ((void __iomem *)rdev->rmmio) +
> RADEON_MM_INDEX);
> +	writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
> +	spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
> +}
> +
>  u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
>  {
>  	if (reg < rdev->rio_mem_size)
> diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
> index 08d68f3..718b12b 100644
> --- a/drivers/gpu/drm/radeon/r300.c
> +++ b/drivers/gpu/drm/radeon/r300.c
> @@ -50,6 +50,31 @@
>   */
> 
>  /*
> + * Indirect registers accessor
> + */
> +uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
> +{
> +	unsigned long flags;
> +	uint32_t r;
> +
> +	spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
> +	WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
> +	r = RREG32(RADEON_PCIE_DATA);
> +	spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
> +	return r;
> +}
> +
> +void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
> +	WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
> +	WREG32(RADEON_PCIE_DATA, (v));
> +	spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
> +}
> +
> +/*
>   * rv370,rv380 PCIE GART
>   */
>  static int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
> diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
> index 2fcad34..b667409 100644
> --- a/drivers/gpu/drm/radeon/r600.c
> +++ b/drivers/gpu/drm/radeon/r600.c
> @@ -108,6 +108,53 @@ static void r600_pcie_gen2_enable(struct
> radeon_device *rdev);
>  extern int evergreen_rlc_resume(struct radeon_device *rdev);
>  extern void rv770_set_clk_bypass_mode(struct radeon_device *rdev);
> 
> +/*
> + * Indirect registers accessor
> + */
> +u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg)
> +{
> +	unsigned long flags;
> +	u32 r;
> +
> +	spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
> +	WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
> +	r = RREG32(R600_RCU_DATA);
> +	spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
> +	return r;
> +}
> +
> +void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
> +	WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
> +	WREG32(R600_RCU_DATA, (v));
> +	spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
> +}
> +
> +u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg)
> +{
> +	unsigned long flags;
> +	u32 r;
> +
> +	spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
> +	WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
> +	r = RREG32(R600_UVD_CTX_DATA);
> +	spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
> +	return r;
> +}
> +
> +void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
> +	WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
> +	WREG32(R600_UVD_CTX_DATA, (v));
> +	spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
> +}
> +
>  /**
>   * r600_get_xclk - get the xclk
>   *
> diff --git a/drivers/gpu/drm/radeon/radeon.h
> b/drivers/gpu/drm/radeon/radeon.h
> index 5587603..ffc621b 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2465,38 +2465,24 @@ int radeon_gpu_wait_for_idle(struct
> radeon_device *rdev);
> 
>  #define RADEON_MIN_MMIO_SIZE 0x10000
> 
> +uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg);
> +void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg,
> uint32_t v);
>  static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t
> reg,
>  				    bool always_indirect)
>  {
>  	/* The mmio size is 64kb at minimum. Allows the if to be optimized
> out. */
>  	if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) &&
> !always_indirect)
>  		return readl(((void __iomem *)rdev->rmmio) + reg);
> -	else {
> -		unsigned long flags;
> -		uint32_t ret;
> -
> -		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
> -		writel(reg, ((void __iomem *)rdev->rmmio) +
> RADEON_MM_INDEX);
> -		ret = readl(((void __iomem *)rdev->rmmio) +
> RADEON_MM_DATA);
> -		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
> -
> -		return ret;
> -	}
> +	else
> +		return r100_mm_rreg_slow(rdev, reg);
>  }
> -
>  static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg,
> uint32_t v,
>  				bool always_indirect)
>  {
>  	if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) &&
> !always_indirect)
>  		writel(v, ((void __iomem *)rdev->rmmio) + reg);
> -	else {
> -		unsigned long flags;
> -
> -		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
> -		writel(reg, ((void __iomem *)rdev->rmmio) +
> RADEON_MM_INDEX);
> -		writel(v, ((void __iomem *)rdev->rmmio) +
> RADEON_MM_DATA);
> -		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
> -	}
> +	else
> +		r100_mm_wreg_slow(rdev, reg, v);
>  }
> 
>  u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
> @@ -2580,184 +2566,29 @@ static inline struct radeon_fence
> *to_radeon_fence(struct fence *f)
>  #define WDOORBELL32(index, v) cik_mm_wdoorbell(rdev, (index), (v))
> 
>  /*
> - * Indirect registers accessor
> + * Indirect registers accessors.
> + * They used to be inlined, but this increases code size by ~65 kbytes.
> + * Since each performs a pair of MMIO ops
> + * within a spin_lock_irqsave/spin_unlock_irqrestore region,
> + * the cost of call+ret is almost negligible. MMIO and locking
> + * costs several dozens of cycles each at best, call+ret is ~5 cycles.
>   */
> -static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t
> reg)
> -{
> -	unsigned long flags;
> -	uint32_t r;
> -
> -	spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
> -	WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
> -	r = RREG32(RADEON_PCIE_DATA);
> -	spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
> -	return r;
> -}
> -
> -static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg,
> uint32_t v)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
> -	WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
> -	WREG32(RADEON_PCIE_DATA, (v));
> -	spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
> -}
> -
> -static inline u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg)
> -{
> -	unsigned long flags;
> -	u32 r;
> -
> -	spin_lock_irqsave(&rdev->smc_idx_lock, flags);
> -	WREG32(TN_SMC_IND_INDEX_0, (reg));
> -	r = RREG32(TN_SMC_IND_DATA_0);
> -	spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
> -	return r;
> -}
> -
> -static inline void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&rdev->smc_idx_lock, flags);
> -	WREG32(TN_SMC_IND_INDEX_0, (reg));
> -	WREG32(TN_SMC_IND_DATA_0, (v));
> -	spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
> -}
> -
> -static inline u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg)
> -{
> -	unsigned long flags;
> -	u32 r;
> -
> -	spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
> -	WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
> -	r = RREG32(R600_RCU_DATA);
> -	spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
> -	return r;
> -}
> -
> -static inline void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
> -	WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
> -	WREG32(R600_RCU_DATA, (v));
> -	spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
> -}
> -
> -static inline u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg)
> -{
> -	unsigned long flags;
> -	u32 r;
> -
> -	spin_lock_irqsave(&rdev->cg_idx_lock, flags);
> -	WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
> -	r = RREG32(EVERGREEN_CG_IND_DATA);
> -	spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
> -	return r;
> -}
> -
> -static inline void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&rdev->cg_idx_lock, flags);
> -	WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
> -	WREG32(EVERGREEN_CG_IND_DATA, (v));
> -	spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
> -}
> -
> -static inline u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg)
> -{
> -	unsigned long flags;
> -	u32 r;
> -
> -	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
> -	WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
> -	r = RREG32(EVERGREEN_PIF_PHY0_DATA);
> -	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
> -	return r;
> -}
> -
> -static inline void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg,
> u32 v)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
> -	WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
> -	WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
> -	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
> -}
> -
> -static inline u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg)
> -{
> -	unsigned long flags;
> -	u32 r;
> -
> -	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
> -	WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
> -	r = RREG32(EVERGREEN_PIF_PHY1_DATA);
> -	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
> -	return r;
> -}
> -
> -static inline void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg,
> u32 v)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&rdev->pif_idx_lock, flags);
> -	WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
> -	WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
> -	spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
> -}
> -
> -static inline u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg)
> -{
> -	unsigned long flags;
> -	u32 r;
> -
> -	spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
> -	WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
> -	r = RREG32(R600_UVD_CTX_DATA);
> -	spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
> -	return r;
> -}
> -
> -static inline void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg,
> u32 v)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
> -	WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
> -	WREG32(R600_UVD_CTX_DATA, (v));
> -	spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
> -}
> -
> -
> -static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
> -{
> -	unsigned long flags;
> -	u32 r;
> -
> -	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
> -	WREG32(CIK_DIDT_IND_INDEX, (reg));
> -	r = RREG32(CIK_DIDT_IND_DATA);
> -	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
> -	return r;
> -}
> -
> -static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
> -	WREG32(CIK_DIDT_IND_INDEX, (reg));
> -	WREG32(CIK_DIDT_IND_DATA, (v));
> -	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
> -}
> +uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
> +void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t
> v);
> +u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg);
> +void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v);
> +u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg);
> +void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v);
> +u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg);
> +void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v);
> +u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg);
> +void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v);
> +u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg);
> +void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v);
> +u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg);
> +void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v);
> +u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg);
> +void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v);
> 
>  void r100_pll_errata_after_index(struct radeon_device *rdev);
> 
> --
> 1.8.1.4

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-05-28 19:53 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-20 11:02 [PATCH v3] radeon: Deinline indirect register accessor functions Denys Vlasenko
2015-05-28 19:21 ` Deucher, Alexander

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.