* [PATCH v4 01/12] arm-cci: Define CCI counter period
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 02/12] arm-cci: Refactor pmu_write_counter Suzuki K. Poulose
` (10 subsequent siblings)
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
Instead of hard coding the period we program on the PMU
counters, define a symbol.
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index ee47e6b..3786879 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -85,6 +85,14 @@ static const struct of_device_id arm_cci_matches[] = {
#define CCI_PMU_CNTR_MASK ((1ULL << 32) -1)
#define CCI_PMU_CNTR_LAST(cci_pmu) (cci_pmu->num_cntrs - 1)
+/*
+ * The CCI PMU counters have a period of 2^32. To account for the
+ * possiblity of extreme interrupt latency we program for a period of
+ * half that. Hopefully we can handle the interrupt before another 2^31
+ * events occur and the counter overtakes its previous value.
+ */
+#define CCI_CNTR_PERIOD (1UL << 31)
+
#define CCI_PMU_MAX_HW_CNTRS(model) \
((model)->num_hw_cntrs + (model)->fixed_hw_cntrs)
@@ -797,15 +805,8 @@ static void pmu_read(struct perf_event *event)
void pmu_event_set_period(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- /*
- * The CCI PMU counters have a period of 2^32. To account for the
- * possiblity of extreme interrupt latency we program for a period of
- * half that. Hopefully we can handle the interrupt before another 2^31
- * events occur and the counter overtakes its previous value.
- */
- u64 val = 1ULL << 31;
- local64_set(&hwc->prev_count, val);
- pmu_write_counter(event, val);
+ local64_set(&hwc->prev_count, CCI_CNTR_PERIOD);
+ pmu_write_counter(event, CCI_CNTR_PERIOD);
}
static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4 02/12] arm-cci: Refactor pmu_write_counter
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 01/12] arm-cci: Define CCI counter period Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 03/12] arm-cci: Group writes to counter Suzuki K. Poulose
` (9 subsequent siblings)
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
Refactor pmu_write_counter to add __pmu_write_counter() which
will actually write to the counter once the event is validated.
This can be used by hooks specific to CCI PMU model to program
the counter, where the event is already validated.
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 3786879..ce0d3ef 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -767,16 +767,22 @@ static u32 pmu_read_counter(struct perf_event *event)
return value;
}
+static void __pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx)
+{
+ pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
+}
+
static void pmu_write_counter(struct perf_event *event, u32 value)
{
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
struct hw_perf_event *hw_counter = &event->hw;
int idx = hw_counter->idx;
- if (unlikely(!pmu_is_valid_counter(cci_pmu, idx)))
+ if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
- else
- pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
+ return;
+ }
+ __pmu_write_counter(cci_pmu, value, idx);
}
static u64 pmu_event_update(struct perf_event *event)
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4 03/12] arm-cci: Group writes to counter
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 01/12] arm-cci: Define CCI counter period Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 02/12] arm-cci: Refactor pmu_write_counter Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 04/12] arm-cci: Fix the flags for pmu_start called from pmu_add Suzuki K. Poulose
` (8 subsequent siblings)
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
Add a helper to group the writes to PMU counter, this will be
used by the transaction hooks.
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index ce0d3ef..f6b8717 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -785,6 +785,21 @@ static void pmu_write_counter(struct perf_event *event, u32 value)
__pmu_write_counter(cci_pmu, value, idx);
}
+/* Write a value to a given set of counters */
+static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask, u32 value)
+{
+ int i;
+
+ for_each_set_bit(i, mask, cci_pmu->num_cntrs)
+ __pmu_write_counter(cci_pmu, value, i);
+}
+
+static void __maybe_unused
+pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask, u32 value)
+{
+ __pmu_write_counters(cci_pmu, mask, value);
+}
+
static u64 pmu_event_update(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4 04/12] arm-cci: Fix the flags for pmu_start called from pmu_add
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
` (2 preceding siblings ...)
2015-12-17 17:49 ` [PATCH v4 03/12] arm-cci: Group writes to counter Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 05/12] arm-cci: PMU: Add support for transactions Suzuki K. Poulose
` (7 subsequent siblings)
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
The cci PMU always reprograms the counter value in pmu->start()
irrespective of the mode it is called from, making sure that
the hwc->state is PERF_HES_UPTODATE.
When pmu->add() is called with PERF_EF_START, we invoke
pmu->start() with PERF_EF_RELOAD removing the PERF_EF_START.
This makes it impossible to detect where the pmu->start()
is called from, i.e, PERF_EF_START or a real PERF_EF_RELOAD.
This patch fixes the issue by, passing the right flags down
to the pmu->start() when called from pmu->add().
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index f6b8717..f00cbce 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -1020,7 +1020,7 @@ static int cci_pmu_add(struct perf_event *event, int flags)
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
if (flags & PERF_EF_START)
- cci_pmu_start(event, PERF_EF_RELOAD);
+ cci_pmu_start(event, flags);
/* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4 05/12] arm-cci: PMU: Add support for transactions
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
` (3 preceding siblings ...)
2015-12-17 17:49 ` [PATCH v4 04/12] arm-cci: Fix the flags for pmu_start called from pmu_add Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 18:42 ` Peter Zijlstra
2015-12-17 17:49 ` [PATCH v4 06/12] arm-cci: Refactor CCI PMU enable/disable methods Suzuki K. Poulose
` (6 subsequent siblings)
11 siblings, 1 reply; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm,
Suzuki K. Poulose, peterz
This patch adds the transaction hooks for CCI PMU, which can be
later exploited to amortise the cost of writing the counters for
CCI-500 PMU.
We keep track of only the 'ADD' transactions. While we are in a
transaction, we keep track of the indices allocated for the events
and delay the following operations until the transaction is committed.
1) Programming the event on the counter
2) Enabling the counter
3) Setting the period for the event.
Additionally to prevent pmu->del() from updating bogus values from
an event added in the transaction (since we haven't set the period
on the event before the transaction is committed), we mark the state
of the event as PERF_HES_STOPPED in pmu->start(). This will be cleared
once the transaction is committed.
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Punit Agrawal <punit.agrawal@arm.com>
Cc: peterz@infradead.org
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 114 insertions(+), 5 deletions(-)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index f00cbce..ec3d4fd 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -115,6 +115,8 @@ struct cci_pmu_hw_events {
struct perf_event **events;
unsigned long *used_mask;
raw_spinlock_t pmu_lock;
+ unsigned long txn_flags;
+ unsigned long *txn_mask;
};
struct cci_pmu;
@@ -965,12 +967,25 @@ static void cci_pmu_start(struct perf_event *event, int pmu_flags)
raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
- /* Configure the counter unless you are counting a fixed event */
- if (!pmu_fixed_hw_idx(cci_pmu, idx))
- pmu_set_event(cci_pmu, idx, hwc->config_base);
+ /*
+ * If we got here from pmu->add(PERF_EF_START) while we are in a
+ * transaction, we note down the index and write to the counters
+ * in a batch when we commit the transaction. see cci_pmu_commit_txn().
+ * Also, mark this one as STOPPED until we commit the transaction
+ * to avoid reading bogus values in pmu->del() if the transaction
+ * fails later.
+ */
+ if ((pmu_flags & PERF_EF_START) && (hw_events->txn_flags == PERF_PMU_TXN_ADD)) {
+ hwc->state = PERF_HES_STOPPED;
+ set_bit(idx, hw_events->txn_mask);
+ } else {
+ /* Configure the counter unless you are counting a fixed event */
+ if (!pmu_fixed_hw_idx(cci_pmu, idx))
+ pmu_set_event(cci_pmu, idx, hwc->config_base);
- pmu_event_set_period(event);
- pmu_enable_counter(cci_pmu, idx);
+ pmu_event_set_period(event);
+ pmu_enable_counter(cci_pmu, idx);
+ }
raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
}
@@ -981,6 +996,10 @@ static void cci_pmu_stop(struct perf_event *event, int pmu_flags)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ /*
+ * If the counter was never started, e.g a failed transaction
+ * do nothing.
+ */
if (hwc->state & PERF_HES_STOPPED)
return;
@@ -1200,6 +1219,87 @@ static int cci_pmu_event_init(struct perf_event *event)
return err;
}
+static void cci_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
+{
+ struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
+ struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+
+ WARN_ON_ONCE(hw_events->txn_flags);
+
+ hw_events->txn_flags = txn_flags;
+ memset(hw_events->txn_mask, 0,
+ BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long));
+}
+
+/*
+ * Completing the transaction involves :
+ *
+ * 1) Updating the period for each event in the transaction.
+ * - Updating the event->hw.prev_count for each event.
+ * - Writing the period to all the counters allocated for
+ * the transaction.
+ * 2) Program the events to the counters
+ * 3) Changing the event->hw.state from PERF_HES_STOPPED, now that
+ * we are committing the event.
+ * 4) Enable the counter
+ */
+static int cci_pmu_complete_txn(struct cci_pmu *cci_pmu)
+{
+ int i, rc = 0;
+ unsigned long flags;
+ struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+
+ raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
+
+ /* Set event period for all the counters in this txn */
+ pmu_write_counters(cci_pmu, hw_events->txn_mask, CCI_CNTR_PERIOD);
+
+ for_each_set_bit(i, hw_events->txn_mask, cci_pmu->num_cntrs) {
+ struct perf_event *event = hw_events->events[i];
+
+ if (!event) {
+ WARN_ON_ONCE(1);
+ rc = -EFAULT;
+ goto unlock;
+ }
+
+ local64_set(&event->hw.prev_count, CCI_CNTR_PERIOD);
+ if (!pmu_fixed_hw_idx(cci_pmu, i))
+ pmu_set_event(cci_pmu, i, event->hw.config_base);
+ event->hw.state = 0;
+ pmu_enable_counter(cci_pmu, i);
+ }
+
+unlock:
+ raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
+
+ return rc;
+}
+
+static int cci_pmu_commit_txn(struct pmu *pmu)
+{
+ int rc = 0;
+ struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
+ struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+
+ WARN_ON_ONCE(!hw_events->txn_flags);
+
+ if (hw_events->txn_flags == PERF_PMU_TXN_ADD)
+ rc = cci_pmu_complete_txn(cci_pmu);
+
+ if (!rc)
+ hw_events->txn_flags = 0;
+ return rc;
+}
+
+static void cci_pmu_cancel_txn(struct pmu *pmu)
+{
+ struct cci_pmu_hw_events *hw_events = &to_cci_pmu(pmu)->hw_events;
+
+ WARN_ON_ONCE(!hw_events->txn_flags);
+ hw_events->txn_flags = 0;
+}
+
static ssize_t pmu_cpumask_attr_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1257,6 +1357,9 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
.pmu_enable = cci_pmu_enable,
.pmu_disable = cci_pmu_disable,
.event_init = cci_pmu_event_init,
+ .start_txn = cci_pmu_start_txn,
+ .commit_txn = cci_pmu_commit_txn,
+ .cancel_txn = cci_pmu_cancel_txn,
.add = cci_pmu_add,
.del = cci_pmu_del,
.start = cci_pmu_start,
@@ -1463,6 +1566,12 @@ static struct cci_pmu *cci_pmu_alloc(struct platform_device *pdev)
if (!cci_pmu->hw_events.used_mask)
return ERR_PTR(-ENOMEM);
+ cci_pmu->hw_events.txn_mask = devm_kcalloc(&pdev->dev,
+ BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)),
+ sizeof(*cci_pmu->hw_events.txn_mask),
+ GFP_KERNEL);
+ if (!cci_pmu->hw_events.txn_mask)
+ return ERR_PTR(-ENOMEM);
return cci_pmu;
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH v4 05/12] arm-cci: PMU: Add support for transactions
2015-12-17 17:49 ` [PATCH v4 05/12] arm-cci: PMU: Add support for transactions Suzuki K. Poulose
@ 2015-12-17 18:42 ` Peter Zijlstra
2015-12-18 10:28 ` Suzuki K. Poulose
0 siblings, 1 reply; 22+ messages in thread
From: Peter Zijlstra @ 2015-12-17 18:42 UTC (permalink / raw)
To: Suzuki K. Poulose
Cc: linux-arm-kernel, linux-kernel, mark.rutland, punit.agrawal, arm
On Thu, Dec 17, 2015 at 05:49:12PM +0000, Suzuki K. Poulose wrote:
> This patch adds the transaction hooks for CCI PMU, which can be
> later exploited to amortise the cost of writing the counters for
> CCI-500 PMU.
>
> We keep track of only the 'ADD' transactions. While we are in a
> transaction, we keep track of the indices allocated for the events
> and delay the following operations until the transaction is committed.
> 1) Programming the event on the counter
> 2) Enabling the counter
> 3) Setting the period for the event.
So that's not really what the txn interface is for, its meant to
amortize event scheduling.
The above doesn't look like it has a failure case, in which case you can
achieve the same simpler, using pmu::pmu_{dis,en}able().
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v4 05/12] arm-cci: PMU: Add support for transactions
2015-12-17 18:42 ` Peter Zijlstra
@ 2015-12-18 10:28 ` Suzuki K. Poulose
2015-12-18 10:42 ` Peter Zijlstra
0 siblings, 1 reply; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-18 10:28 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-arm-kernel, linux-kernel, mark.rutland, punit.agrawal, arm
On 17/12/15 18:42, Peter Zijlstra wrote:
> On Thu, Dec 17, 2015 at 05:49:12PM +0000, Suzuki K. Poulose wrote:
>> We keep track of only the 'ADD' transactions. While we are in a
>> transaction, we keep track of the indices allocated for the events
>> and delay the following operations until the transaction is committed.
>> 1) Programming the event on the counter
>> 2) Enabling the counter
>> 3) Setting the period for the event.
>
> So that's not really what the txn interface is for, its meant to
> amortize event scheduling.
OK
>
> The above doesn't look like it has a failure case, in which case you can
> achieve the same simpler, using pmu::pmu_{dis,en}able().
>
I thought about that, but was not sure if pmu->stop() is guaranteed to be
called on all the events scheduled on the PMU when we pmu::pmu_disable().
Is it ?
Thanks for the quick response.
Suzuki
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v4 05/12] arm-cci: PMU: Add support for transactions
2015-12-18 10:28 ` Suzuki K. Poulose
@ 2015-12-18 10:42 ` Peter Zijlstra
2015-12-18 10:58 ` Suzuki K. Poulose
0 siblings, 1 reply; 22+ messages in thread
From: Peter Zijlstra @ 2015-12-18 10:42 UTC (permalink / raw)
To: Suzuki K. Poulose
Cc: linux-arm-kernel, linux-kernel, mark.rutland, punit.agrawal, arm
On Fri, Dec 18, 2015 at 10:28:23AM +0000, Suzuki K. Poulose wrote:
> On 17/12/15 18:42, Peter Zijlstra wrote:
> >The above doesn't look like it has a failure case, in which case you can
> >achieve the same simpler, using pmu::pmu_{dis,en}able().
> >
>
> I thought about that, but was not sure if pmu->stop() is guaranteed to be
> called on all the events scheduled on the PMU when we pmu::pmu_disable().
> Is it ?
Not by core code, but you get to implement your pmu::pmu_disable() call,
and if that's what you need, you can make it do that.
Examples:
On some x86 hardware we indeed have to poke at each counter control
register and clear the ENable bit, which is the same what
pmu::stop(.flags=0) would do.
But other x86 hardware has a global disable switch, which is much
cheaper than poking at the individual counter control registers one by
one. In this case we only update the counter control register if it
needs updates (typically in the pmu_enable path).
Yet other x86 hardware can auto disable this global state on interrupt,
which saves us yet another machine register poke.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v4 05/12] arm-cci: PMU: Add support for transactions
2015-12-18 10:42 ` Peter Zijlstra
@ 2015-12-18 10:58 ` Suzuki K. Poulose
2015-12-18 11:47 ` Peter Zijlstra
0 siblings, 1 reply; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-18 10:58 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-arm-kernel, linux-kernel, mark.rutland, punit.agrawal, arm
On 18/12/15 10:42, Peter Zijlstra wrote:
> On Fri, Dec 18, 2015 at 10:28:23AM +0000, Suzuki K. Poulose wrote:
>> On 17/12/15 18:42, Peter Zijlstra wrote:
>> I thought about that, but was not sure if pmu->stop() is guaranteed to be
>> called on all the events scheduled on the PMU when we pmu::pmu_disable().
>> Is it ?
>
> Not by core code, but you get to implement your pmu::pmu_disable() call,
> and if that's what you need, you can make it do that.
OK.
>
> Examples:
>
> On some x86 hardware we indeed have to poke at each counter control
> register and clear the ENable bit, which is the same what
> pmu::stop(.flags=0) would do.
We have a global Enable/Disable for CCI PMU and thats what we use
currently. To be able to reprogram the counters with the event period
(we program the counter with a specific count in pmu::start() and at
overflow irq handler, not to be confused with the sampling period, which
is not supported), we need to be sure that the counter value has been updated.
May be we could check the event->hw->state to see if we need to reprogram it.
Thanks
Suzuki
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v4 05/12] arm-cci: PMU: Add support for transactions
2015-12-18 10:58 ` Suzuki K. Poulose
@ 2015-12-18 11:47 ` Peter Zijlstra
2015-12-21 10:55 ` Suzuki K. Poulose
0 siblings, 1 reply; 22+ messages in thread
From: Peter Zijlstra @ 2015-12-18 11:47 UTC (permalink / raw)
To: Suzuki K. Poulose
Cc: linux-arm-kernel, linux-kernel, mark.rutland, punit.agrawal, arm
On Fri, Dec 18, 2015 at 10:58:17AM +0000, Suzuki K. Poulose wrote:
> We have a global Enable/Disable for CCI PMU and thats what we use
> currently. To be able to reprogram the counters with the event period
> (we program the counter with a specific count in pmu::start() and at
> overflow irq handler, not to be confused with the sampling period, which
> is not supported), we need to be sure that the counter value has been updated.
>
> May be we could check the event->hw->state to see if we need to reprogram it.
Right, have a look at arch/x86/kernel/cpu/perf_event.c:x86_pmu_enable()
If there's new events, it does two loops over the events.
The first loop does stop(PERF_EF_UPDATE) any counter that got moved.
The second loop does start(PERF_EF_RELOAD) on moved and new events.
The PERF_HES_ARCH bit is used to preserve the stopped state of counters
that were programmed but temporarily stopped.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v4 05/12] arm-cci: PMU: Add support for transactions
2015-12-18 11:47 ` Peter Zijlstra
@ 2015-12-21 10:55 ` Suzuki K. Poulose
2016-01-05 13:37 ` Peter Zijlstra
0 siblings, 1 reply; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-21 10:55 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-arm-kernel, linux-kernel, mark.rutland, punit.agrawal, arm
On Fri, Dec 18, 2015 at 12:47:51PM +0100, Peter Zijlstra wrote:
> On Fri, Dec 18, 2015 at 10:58:17AM +0000, Suzuki K. Poulose wrote:
>
> > We have a global Enable/Disable for CCI PMU and thats what we use
> > currently. To be able to reprogram the counters with the event period
> > (we program the counter with a specific count in pmu::start() and at
> > overflow irq handler, not to be confused with the sampling period, which
> > is not supported), we need to be sure that the counter value has been updated.
> >
> > May be we could check the event->hw->state to see if we need to reprogram it.
>
> Right, have a look at arch/x86/kernel/cpu/perf_event.c:x86_pmu_enable()
>
Thanks for that hint. Here is what I cam up with. We don't reschedule
the events, all we need to do is group the writes to the counters. Hence
we could as well add a flag for those events which need programming
and perform the write in pmu::pmu_enable().
----8>-----
arm-cci PMU: Delay counter writes to pmu_enable
Delay setting the event periods for enabled events to pmu::pmu_enable().
We mark the event.hw->state PERF_HES_ARCH for the events that we know
have their counts recorded and have been started. Since we reprogram the
counters every time before count, we can set the counters for all the
event counters which are !STOPPED && ARCH.
Grouping the writes to counters can ammortise the cost of the operation
on PMUs where it is expensive (e.g, CCI-500).
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Punit Agrawal <punit.agrawal@arm.com>
Cc: peterz@infradead.org
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 42 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 40 insertions(+), 2 deletions(-)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 0189f3a..c768ee4 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -916,6 +916,40 @@ static void hw_perf_event_destroy(struct perf_event *event)
}
}
+/*
+ * Program the CCI PMU counters which have PERF_HES_ARCH set
+ * with the event period and mark them ready before we enable
+ * PMU.
+ */
+void cci_pmu_update_counters(struct cci_pmu *cci_pmu)
+{
+ int i;
+ unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)];
+
+ memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long));
+
+ for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) {
+ struct hw_perf_event *hwe;
+
+ if (!cci_pmu->hw_events.events[i]) {
+ WARN_ON(1);
+ continue;
+ }
+
+ hwe = &cci_pmu->hw_events.events[i]->hw;
+ /* Leave the events which are not counting */
+ if (hwe->state & PERF_HES_STOPPED)
+ continue;
+ if (hwe->state & PERF_HES_ARCH) {
+ set_bit(i, mask);
+ hwe->state &= ~PERF_HES_ARCH;
+ local64_set(&hwe->prev_count, CCI_CNTR_PERIOD);
+ }
+ }
+
+ pmu_write_counters(cci_pmu, mask, CCI_CNTR_PERIOD);
+}
+
static void cci_pmu_enable(struct pmu *pmu)
{
struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
@@ -927,6 +961,7 @@ static void cci_pmu_enable(struct pmu *pmu)
return;
raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
+ cci_pmu_update_counters(cci_pmu);
__cci_pmu_enable();
raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
@@ -980,8 +1015,11 @@ static void cci_pmu_start(struct perf_event *event, int pmu_flags)
/* Configure the counter unless you are counting a fixed event */
if (!pmu_fixed_hw_idx(cci_pmu, idx))
pmu_set_event(cci_pmu, idx, hwc->config_base);
-
- pmu_event_set_period(event);
+ /*
+ * Mark this counter, so that we can program the
+ * counter with the event_period. see cci_pmu_enable()
+ */
+ hwc->state = PERF_HES_ARCH;
pmu_enable_counter(cci_pmu, idx);
raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
--
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH v4 05/12] arm-cci: PMU: Add support for transactions
2015-12-21 10:55 ` Suzuki K. Poulose
@ 2016-01-05 13:37 ` Peter Zijlstra
2016-01-05 13:43 ` Suzuki K. Poulose
0 siblings, 1 reply; 22+ messages in thread
From: Peter Zijlstra @ 2016-01-05 13:37 UTC (permalink / raw)
To: Suzuki K. Poulose
Cc: linux-arm-kernel, linux-kernel, mark.rutland, punit.agrawal, arm
On Mon, Dec 21, 2015 at 10:55:29AM +0000, Suzuki K. Poulose wrote:
> Thanks for that hint. Here is what I cam up with. We don't reschedule
> the events, all we need to do is group the writes to the counters. Hence
> we could as well add a flag for those events which need programming
> and perform the write in pmu::pmu_enable().
I'm still somewhat confused..
> Grouping the writes to counters can ammortise the cost of the operation
> on PMUs where it is expensive (e.g, CCI-500).
This rationale makes me think you want to reduce the number of counter
writes, not batch them per-se.
So why are you unconditionally writing all counters, instead of only
those that changed?
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v4 05/12] arm-cci: PMU: Add support for transactions
2016-01-05 13:37 ` Peter Zijlstra
@ 2016-01-05 13:43 ` Suzuki K. Poulose
2016-01-05 14:53 ` Peter Zijlstra
0 siblings, 1 reply; 22+ messages in thread
From: Suzuki K. Poulose @ 2016-01-05 13:43 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-arm-kernel, linux-kernel, mark.rutland, punit.agrawal, arm
On 05/01/16 13:37, Peter Zijlstra wrote:
> On Mon, Dec 21, 2015 at 10:55:29AM +0000, Suzuki K. Poulose wrote:
>> Thanks for that hint. Here is what I cam up with. We don't reschedule
>> the events, all we need to do is group the writes to the counters. Hence
>> we could as well add a flag for those events which need programming
>> and perform the write in pmu::pmu_enable().
>
> I'm still somewhat confused..
>
>> Grouping the writes to counters can ammortise the cost of the operation
>> on PMUs where it is expensive (e.g, CCI-500).
>
> This rationale makes me think you want to reduce the number of counter
> writes, not batch them per-se.
>
> So why are you unconditionally writing all counters, instead of only
> those that changed?
>
The ARM CCI PMU reprograms all the counters with a specific value (2^31)
to account for high interrupt latencies in recording the counters that
overflowed. So, pmu_stop() updates the counter and pmu_start() resets
the counter to the above value, always.
Now, writing to a single counter requires
1) Stopping and disabling all the counters in HW (So that step 3 doesn't
interfere with the other counters)
2) Program the target counter with invalid event and enable the counter.
3) Enable the PMU and then write to the counter.
4) Reset everything back to normal.
So, the approach here is to delay the writes to the counters as much as possible
and batch them. So that we don't have to repeat steps 1 & 4 for every single
counter.
Does it help ?
Thanks
Suzuki
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v4 05/12] arm-cci: PMU: Add support for transactions
2016-01-05 13:43 ` Suzuki K. Poulose
@ 2016-01-05 14:53 ` Peter Zijlstra
0 siblings, 0 replies; 22+ messages in thread
From: Peter Zijlstra @ 2016-01-05 14:53 UTC (permalink / raw)
To: Suzuki K. Poulose
Cc: linux-arm-kernel, linux-kernel, mark.rutland, punit.agrawal, arm
On Tue, Jan 05, 2016 at 01:43:30PM +0000, Suzuki K. Poulose wrote:
> On 05/01/16 13:37, Peter Zijlstra wrote:
> >On Mon, Dec 21, 2015 at 10:55:29AM +0000, Suzuki K. Poulose wrote:
> >>Thanks for that hint. Here is what I cam up with. We don't reschedule
> >>the events, all we need to do is group the writes to the counters. Hence
> >>we could as well add a flag for those events which need programming
> >>and perform the write in pmu::pmu_enable().
> >
> >I'm still somewhat confused..
> >
> >>Grouping the writes to counters can ammortise the cost of the operation
> >>on PMUs where it is expensive (e.g, CCI-500).
> >
> >This rationale makes me think you want to reduce the number of counter
> >writes, not batch them per-se.
> >
> >So why are you unconditionally writing all counters, instead of only
> >those that changed?
> >
>
> The ARM CCI PMU reprograms all the counters with a specific value (2^31)
> to account for high interrupt latencies in recording the counters that
> overflowed. So, pmu_stop() updates the counter and pmu_start() resets
> the counter to the above value, always.
>
> Now, writing to a single counter requires
>
> 1) Stopping and disabling all the counters in HW (So that step 3 doesn't
> interfere with the other counters)
> 2) Program the target counter with invalid event and enable the counter.
> 3) Enable the PMU and then write to the counter.
> 4) Reset everything back to normal.
>
>
> So, the approach here is to delay the writes to the counters as much as possible
> and batch them. So that we don't have to repeat steps 1 & 4 for every single
> counter.
>
> Does it help ?
Yes, thanks!
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 06/12] arm-cci: Refactor CCI PMU enable/disable methods
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
` (4 preceding siblings ...)
2015-12-17 17:49 ` [PATCH v4 05/12] arm-cci: PMU: Add support for transactions Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 07/12] arm-cci: Get the status of a counter Suzuki K. Poulose
` (5 subsequent siblings)
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
This patch refactors the CCI PMU driver code a little bit to
make it easier share the code for enabling/disabling the CCI
PMU. This will be used by the hooks to work around the special cases
where writing to a counter is not always that easy(e.g, CCI-500)
No functional changes.
Cc: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 32 ++++++++++++++++++++++----------
1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index ec3d4fd..e2824a7 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -677,6 +677,26 @@ static u32 pmu_get_max_counters(void)
CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT;
}
+/* Should be called with cci_pmu->hw_events->pmu_lock held */
+static void __cci_pmu_enable(void)
+{
+ u32 val;
+
+ /* Enable all the PMU counters. */
+ val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
+ writel(val, cci_ctrl_base + CCI_PMCR);
+}
+
+/* Should be called with cci_pmu->hw_events->pmu_lock held */
+static void __cci_pmu_disable(void)
+{
+ u32 val;
+
+ /* Disable all the PMU counters. */
+ val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
+ writel(val, cci_ctrl_base + CCI_PMCR);
+}
+
static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event)
{
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
@@ -904,16 +924,12 @@ static void cci_pmu_enable(struct pmu *pmu)
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs);
unsigned long flags;
- u32 val;
if (!enabled)
return;
raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
-
- /* Enable all the PMU counters. */
- val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
- writel(val, cci_ctrl_base + CCI_PMCR);
+ __cci_pmu_enable();
raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
}
@@ -923,13 +939,9 @@ static void cci_pmu_disable(struct pmu *pmu)
struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
unsigned long flags;
- u32 val;
raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
-
- /* Disable all the PMU counters. */
- val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
- writel(val, cci_ctrl_base + CCI_PMCR);
+ __cci_pmu_disable();
raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4 07/12] arm-cci: Get the status of a counter
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
` (5 preceding siblings ...)
2015-12-17 17:49 ` [PATCH v4 06/12] arm-cci: Refactor CCI PMU enable/disable methods Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 08/12] arm-cci: Add routines to save/restore all counters Suzuki K. Poulose
` (4 subsequent siblings)
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
Add helper routines to get the counter status and the event
programmed on it.
Cc: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index e2824a7..7f77f65 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -662,11 +662,23 @@ static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx)
pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL);
}
+static bool __maybe_unused
+pmu_counter_is_enabled(struct cci_pmu *cci_pmu, int idx)
+{
+ return (pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR_CTRL) & 0x1) != 0;
+}
+
static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event)
{
pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL);
}
+static u32 __maybe_unused
+pmu_get_event(struct cci_pmu *cci_pmu, int idx)
+{
+ return pmu_read_register(cci_pmu, idx, CCI_PMU_EVT_SEL);
+}
+
/*
* Returns the number of programmable counters actually implemented
* by the cci
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4 08/12] arm-cci: Add routines to save/restore all counters
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
` (6 preceding siblings ...)
2015-12-17 17:49 ` [PATCH v4 07/12] arm-cci: Get the status of a counter Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 09/12] arm-cci: Provide hook for writing to PMU counters Suzuki K. Poulose
` (3 subsequent siblings)
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
Adds helper routines to disable the counter controls for
all the counters on the CCI PMU and restore it back, by
preserving the original state in caller provided mask.
Cc: Punit Agrawal <punit.agrawal@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 7f77f65..7207def 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -679,6 +679,45 @@ pmu_get_event(struct cci_pmu *cci_pmu, int idx)
return pmu_read_register(cci_pmu, idx, CCI_PMU_EVT_SEL);
}
+
+/*
+ * For all counters on the CCI-PMU, disable any 'enabled' counters,
+ * saving the changed counters in the mask, so that we can restore
+ * it later using pmu_restore_counters. The mask is private to the
+ * caller. We cannot rely on the used_mask maintained by the CCI_PMU
+ * as it only tells us if the counter is assigned to perf_event or not.
+ * The state of the perf_event cannot be locked by the PMU layer, hence
+ * we check the individual counter status (which can be locked by
+ * cci_pm->hw_events->pmu_lock).
+ *
+ * @mask should be initialised by the caller.
+ */
+static void __maybe_unused
+pmu_save_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+ int i;
+
+ for (i = 0; i < cci_pmu->num_cntrs; i++) {
+ if (pmu_counter_is_enabled(cci_pmu, i)) {
+ set_bit(i, mask);
+ pmu_disable_counter(cci_pmu, i);
+ }
+ }
+}
+
+/*
+ * Restore the status of the counters. Reversal of the pmu_disable_counters().
+ * For each counter set in the mask, enable the counter back.
+ */
+static void __maybe_unused
+pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+ int i;
+
+ for_each_set_bit(i, mask, cci_pmu->num_cntrs)
+ pmu_enable_counter(cci_pmu, i);
+}
+
/*
* Returns the number of programmable counters actually implemented
* by the cci
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4 09/12] arm-cci: Provide hook for writing to PMU counters
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
` (7 preceding siblings ...)
2015-12-17 17:49 ` [PATCH v4 08/12] arm-cci: Add routines to save/restore all counters Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 10/12] arm-cci: CCI-500: Work around PMU counter writes Suzuki K. Poulose
` (2 subsequent siblings)
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
Add a hook for writing to CCI PMU counters. This callback
can be used for CCI models which requires some extra work
to program the PMU counter values. To accommodate group writes
and single counter writes, the call back accepts a bitmask
of the counter indices which need to be programmed with the
given value.
Cc: Punit Agrawal <punit.agrawal@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 7207def..43f2523 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -136,6 +136,7 @@ struct cci_pmu_model {
struct event_range event_ranges[CCI_IF_MAX];
int (*validate_hw_event)(struct cci_pmu *, unsigned long);
int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long);
+ void (*write_counters)(struct cci_pmu *, unsigned long *, u32 val);
};
static struct cci_pmu_model cci_pmu_models[];
@@ -855,7 +856,15 @@ static void pmu_write_counter(struct perf_event *event, u32 value)
dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
return;
}
- __pmu_write_counter(cci_pmu, value, idx);
+
+ if (cci_pmu->model->write_counters) {
+ unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)];
+
+ memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long));
+ set_bit(idx, mask);
+ cci_pmu->model->write_counters(cci_pmu, mask, value);
+ } else
+ __pmu_write_counter(cci_pmu, value, idx);
}
/* Write a value to a given set of counters */
@@ -870,7 +879,10 @@ static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask, u
static void __maybe_unused
pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask, u32 value)
{
- __pmu_write_counters(cci_pmu, mask, value);
+ if (cci_pmu->model->write_counters)
+ cci_pmu->model->write_counters(cci_pmu, mask, value);
+ else
+ __pmu_write_counters(cci_pmu, mask, value);
}
static u64 pmu_event_update(struct perf_event *event)
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4 10/12] arm-cci: CCI-500: Work around PMU counter writes
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
` (8 preceding siblings ...)
2015-12-17 17:49 ` [PATCH v4 09/12] arm-cci: Provide hook for writing to PMU counters Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 11/12] arm-cci500: Rearrange PMU driver for code sharing with CCI-550 PMU Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 12/12] arm-cci: CoreLink CCI-550 PMU driver Suzuki K. Poulose
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
The CCI PMU driver sets the event counter to the half of the maximum
value(2^31) it can count before we start the counters via
pmu_event_set_period(). This is done to give us the best chance to
handle the overflow interrupt, taking care of extreme interrupt latencies.
However, CCI-500 comes with advanced power saving schemes, which
disables the clock to the event counters unless the counters are enabled to
count (PMCR.CEN). This prevents the driver from writing the period to the
counters before starting them. Also, there is no way we can reset the
individual event counter to 0 (PMCR.RST resets all the counters, losing
their current readings). However the value of the counter is preserved and
could be read back, when the counters are not enabled.
So we cannot reliably use the counters and compute the number of events
generated during the sampling period since we don't have the value of the
counter at start.
This patch works around this issue by changing writes to the counter
with the following steps.
1) Disable all the counters (remembering any counters which were enabled)
2) Save the current event and program the target counter to count an
invalid event, which by spec is guaranteed to not-generate any events.
3) Enable the target counter.
4) Enable the CCI PMU
5) Write to the target counter.
6) Disable the CCI PMU and the target counter
7) Restore the event back on the target counter.
8) Restore the status of the all the counters
Cc: Punit Agrawal <punit.agrawal@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/arm-cci.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 43f2523..b0a64cf 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -846,6 +846,57 @@ static void __pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx)
pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
}
+#ifdef CONFIG_ARM_CCI500_PMU
+
+/*
+ * CCI-500 has advanced power saving policies, which could gate the
+ * clocks to the PMU counters, which makes the writes to them ineffective.
+ * The only way to write to those counters is when the global counters
+ * are enabled and the particular counter is enabled.
+ *
+ * So we do the following :
+ *
+ * 1) Disable all the PMU counters, saving their current state
+ * 2) Save the programmed event, and write an invalid event code
+ * to the event control register for the counter, so that the
+ * counters are not modified.
+ * 3) Enable the counter control for the counter.
+ * 4) Enable the global PMU profiling
+ * 5) Set the counter value
+ * 6) Disable the counter, global PMU.
+ * 7) Restore the event in the target counter
+ * 8) Restore the status of the rest of the counters.
+ *
+ * We choose an event code which has very little chances of getting
+ * assigned a valid code for step(2). We use the highest possible
+ * event code (0x1f) for the master interface 0.
+ */
+#define CCI500_INVALID_EVENT ((CCI500_PORT_M0 << CCI500_PMU_EVENT_SOURCE_SHIFT) | \
+ (CCI500_PMU_EVENT_CODE_MASK << CCI500_PMU_EVENT_CODE_SHIFT))
+static void cci500_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask, u32 value)
+{
+ unsigned long saved_mask[BITS_TO_LONGS(cci_pmu->num_cntrs)];
+ u32 event;
+ int i;
+
+ pmu_save_counters(cci_pmu, saved_mask);
+
+ for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
+ event = pmu_get_event(cci_pmu, i);
+ pmu_set_event(cci_pmu, i, CCI500_INVALID_EVENT);
+ pmu_enable_counter(cci_pmu, i);
+ __cci_pmu_enable();
+ __pmu_write_counter(cci_pmu, value, i);
+ __cci_pmu_disable();
+ pmu_disable_counter(cci_pmu, i);
+ pmu_set_event(cci_pmu, i, event);
+ }
+
+ pmu_restore_counters(cci_pmu, saved_mask);
+}
+
+#endif /* CONFIG_ARM_CCI500_PMU */
+
static void pmu_write_counter(struct perf_event *event, u32 value)
{
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
@@ -1550,6 +1601,7 @@ static struct cci_pmu_model cci_pmu_models[] = {
},
},
.validate_hw_event = cci500_validate_hw_event,
+ .write_counters = cci500_pmu_write_counters,
},
#endif
};
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4 11/12] arm-cci500: Rearrange PMU driver for code sharing with CCI-550 PMU
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
` (9 preceding siblings ...)
2015-12-17 17:49 ` [PATCH v4 10/12] arm-cci: CCI-500: Work around PMU counter writes Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
2015-12-17 17:49 ` [PATCH v4 12/12] arm-cci: CoreLink CCI-550 PMU driver Suzuki K. Poulose
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
CCI-550 PMU shares most of the CCI-500 PMU attributes including the
event format, PMU event codes. The only difference is an additional
master interface (MI6 - 0xe). Hence we share the driver code for both,
except for a model specific event validate method.
This patch renames the common CCI500 symbols to CCI5xx, including the
Kconfig symbol.
No functional changes to the PMU driver.
Acked-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
drivers/bus/Kconfig | 2 +-
drivers/bus/arm-cci.c | 218 +++++++++++++++++++++++++------------------------
2 files changed, 112 insertions(+), 108 deletions(-)
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 116b363..3793f4e 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -34,7 +34,7 @@ config ARM_CCI400_PORT_CTRL
Low level power management driver for CCI400 cache coherent
interconnect for ARM platforms.
-config ARM_CCI500_PMU
+config ARM_CCI5xx_PMU
bool "ARM CCI500 PMU support"
depends on (ARM && CPU_V7) || ARM64
depends on PERF_EVENTS
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index b0a64cf..99a9d57 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -52,7 +52,7 @@ static const struct of_device_id arm_cci_matches[] = {
#ifdef CONFIG_ARM_CCI400_COMMON
{.compatible = "arm,cci-400", .data = CCI400_PORTS_DATA },
#endif
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
{ .compatible = "arm,cci-500", },
#endif
{},
@@ -100,7 +100,7 @@ static const struct of_device_id arm_cci_matches[] = {
enum {
CCI_IF_SLAVE,
CCI_IF_MASTER,
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
CCI_IF_GLOBAL,
#endif
CCI_IF_MAX,
@@ -164,7 +164,7 @@ enum cci_models {
CCI400_R0,
CCI400_R1,
#endif
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
CCI500_R0,
#endif
CCI_MODEL_MAX
@@ -434,73 +434,67 @@ static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev
}
#endif /* CONFIG_ARM_CCI400_PMU */
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
/*
- * CCI500 provides 8 independent event counters that can count
- * any of the events available.
- *
- * CCI500 PMU event id is an 9-bit value made of two parts.
+ * CCI5xx PMU event id is an 9-bit value made of two parts.
* bits [8:5] - Source for the event
- * 0x0-0x6 - Slave interfaces
- * 0x8-0xD - Master interfaces
- * 0xf - Global Events
- * 0x7,0xe - Reserved
- *
* bits [4:0] - Event code (specific to type of interface)
+ *
+ *
*/
/* Port ids */
-#define CCI500_PORT_S0 0x0
-#define CCI500_PORT_S1 0x1
-#define CCI500_PORT_S2 0x2
-#define CCI500_PORT_S3 0x3
-#define CCI500_PORT_S4 0x4
-#define CCI500_PORT_S5 0x5
-#define CCI500_PORT_S6 0x6
-
-#define CCI500_PORT_M0 0x8
-#define CCI500_PORT_M1 0x9
-#define CCI500_PORT_M2 0xa
-#define CCI500_PORT_M3 0xb
-#define CCI500_PORT_M4 0xc
-#define CCI500_PORT_M5 0xd
-
-#define CCI500_PORT_GLOBAL 0xf
-
-#define CCI500_PMU_EVENT_MASK 0x1ffUL
-#define CCI500_PMU_EVENT_SOURCE_SHIFT 0x5
-#define CCI500_PMU_EVENT_SOURCE_MASK 0xf
-#define CCI500_PMU_EVENT_CODE_SHIFT 0x0
-#define CCI500_PMU_EVENT_CODE_MASK 0x1f
-
-#define CCI500_PMU_EVENT_SOURCE(event) \
- ((event >> CCI500_PMU_EVENT_SOURCE_SHIFT) & CCI500_PMU_EVENT_SOURCE_MASK)
-#define CCI500_PMU_EVENT_CODE(event) \
- ((event >> CCI500_PMU_EVENT_CODE_SHIFT) & CCI500_PMU_EVENT_CODE_MASK)
-
-#define CCI500_SLAVE_PORT_MIN_EV 0x00
-#define CCI500_SLAVE_PORT_MAX_EV 0x1f
-#define CCI500_MASTER_PORT_MIN_EV 0x00
-#define CCI500_MASTER_PORT_MAX_EV 0x06
-#define CCI500_GLOBAL_PORT_MIN_EV 0x00
-#define CCI500_GLOBAL_PORT_MAX_EV 0x0f
-
-
-#define CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \
- CCI_EXT_ATTR_ENTRY(_name, cci500_pmu_global_event_show, \
+#define CCI5xx_PORT_S0 0x0
+#define CCI5xx_PORT_S1 0x1
+#define CCI5xx_PORT_S2 0x2
+#define CCI5xx_PORT_S3 0x3
+#define CCI5xx_PORT_S4 0x4
+#define CCI5xx_PORT_S5 0x5
+#define CCI5xx_PORT_S6 0x6
+
+#define CCI5xx_PORT_M0 0x8
+#define CCI5xx_PORT_M1 0x9
+#define CCI5xx_PORT_M2 0xa
+#define CCI5xx_PORT_M3 0xb
+#define CCI5xx_PORT_M4 0xc
+#define CCI5xx_PORT_M5 0xd
+
+#define CCI5xx_PORT_GLOBAL 0xf
+
+#define CCI5xx_PMU_EVENT_MASK 0x1ffUL
+#define CCI5xx_PMU_EVENT_SOURCE_SHIFT 0x5
+#define CCI5xx_PMU_EVENT_SOURCE_MASK 0xf
+#define CCI5xx_PMU_EVENT_CODE_SHIFT 0x0
+#define CCI5xx_PMU_EVENT_CODE_MASK 0x1f
+
+#define CCI5xx_PMU_EVENT_SOURCE(event) \
+ ((event >> CCI5xx_PMU_EVENT_SOURCE_SHIFT) & CCI5xx_PMU_EVENT_SOURCE_MASK)
+#define CCI5xx_PMU_EVENT_CODE(event) \
+ ((event >> CCI5xx_PMU_EVENT_CODE_SHIFT) & CCI5xx_PMU_EVENT_CODE_MASK)
+
+#define CCI5xx_SLAVE_PORT_MIN_EV 0x00
+#define CCI5xx_SLAVE_PORT_MAX_EV 0x1f
+#define CCI5xx_MASTER_PORT_MIN_EV 0x00
+#define CCI5xx_MASTER_PORT_MAX_EV 0x06
+#define CCI5xx_GLOBAL_PORT_MIN_EV 0x00
+#define CCI5xx_GLOBAL_PORT_MAX_EV 0x0f
+
+
+#define CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \
+ CCI_EXT_ATTR_ENTRY(_name, cci5xx_pmu_global_event_show, \
(unsigned long) _config)
-static ssize_t cci500_pmu_global_event_show(struct device *dev,
+static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
struct device_attribute *attr, char *buf);
-static struct attribute *cci500_pmu_format_attrs[] = {
+static struct attribute *cci5xx_pmu_format_attrs[] = {
CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"),
NULL,
};
-static struct attribute *cci500_pmu_event_attrs[] = {
+static struct attribute *cci5xx_pmu_event_attrs[] = {
/* Slave events */
CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0),
CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1),
@@ -545,64 +539,73 @@ static struct attribute *cci500_pmu_event_attrs[] = {
CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6),
/* Global events */
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE),
- CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE),
+ CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
NULL
};
-static ssize_t cci500_pmu_global_event_show(struct device *dev,
+static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct dev_ext_attribute *eattr = container_of(attr,
struct dev_ext_attribute, attr);
/* Global events have single fixed source code */
return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n",
- (unsigned long)eattr->var, CCI500_PORT_GLOBAL);
+ (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL);
}
+/*
+ * CCI500 provides 8 independent event counters that can count
+ * any of the events available.
+ * CCI500 PMU event source ids
+ * 0x0-0x6 - Slave interfaces
+ * 0x8-0xD - Master interfaces
+ * 0xf - Global Events
+ * 0x7,0xe - Reserved
+ */
static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
unsigned long hw_event)
{
- u32 ev_source = CCI500_PMU_EVENT_SOURCE(hw_event);
- u32 ev_code = CCI500_PMU_EVENT_CODE(hw_event);
+ u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
+ u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
int if_type;
- if (hw_event & ~CCI500_PMU_EVENT_MASK)
+ if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
return -ENOENT;
switch (ev_source) {
- case CCI500_PORT_S0:
- case CCI500_PORT_S1:
- case CCI500_PORT_S2:
- case CCI500_PORT_S3:
- case CCI500_PORT_S4:
- case CCI500_PORT_S5:
- case CCI500_PORT_S6:
+ case CCI5xx_PORT_S0:
+ case CCI5xx_PORT_S1:
+ case CCI5xx_PORT_S2:
+ case CCI5xx_PORT_S3:
+ case CCI5xx_PORT_S4:
+ case CCI5xx_PORT_S5:
+ case CCI5xx_PORT_S6:
if_type = CCI_IF_SLAVE;
break;
- case CCI500_PORT_M0:
- case CCI500_PORT_M1:
- case CCI500_PORT_M2:
- case CCI500_PORT_M3:
- case CCI500_PORT_M4:
- case CCI500_PORT_M5:
+ case CCI5xx_PORT_M0:
+ case CCI5xx_PORT_M1:
+ case CCI5xx_PORT_M2:
+ case CCI5xx_PORT_M3:
+ case CCI5xx_PORT_M4:
+ case CCI5xx_PORT_M5:
if_type = CCI_IF_MASTER;
break;
- case CCI500_PORT_GLOBAL:
+ case CCI5xx_PORT_GLOBAL:
if_type = CCI_IF_GLOBAL;
break;
default:
@@ -615,7 +618,8 @@ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
return -ENOENT;
}
-#endif /* CONFIG_ARM_CCI500_PMU */
+
+#endif /* CONFIG_ARM_CCI5xx_PMU */
static ssize_t cci_pmu_format_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -846,7 +850,7 @@ static void __pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx)
pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
}
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
/*
* CCI-500 has advanced power saving policies, which could gate the
@@ -871,9 +875,9 @@ static void __pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx)
* assigned a valid code for step(2). We use the highest possible
* event code (0x1f) for the master interface 0.
*/
-#define CCI500_INVALID_EVENT ((CCI500_PORT_M0 << CCI500_PMU_EVENT_SOURCE_SHIFT) | \
- (CCI500_PMU_EVENT_CODE_MASK << CCI500_PMU_EVENT_CODE_SHIFT))
-static void cci500_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask, u32 value)
+#define CCI5xx_INVALID_EVENT ((CCI5xx_PORT_M0 << CCI5xx_PMU_EVENT_SOURCE_SHIFT) | \
+ (CCI5xx_PMU_EVENT_CODE_MASK << CCI5xx_PMU_EVENT_CODE_SHIFT))
+static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask, u32 value)
{
unsigned long saved_mask[BITS_TO_LONGS(cci_pmu->num_cntrs)];
u32 event;
@@ -883,7 +887,7 @@ static void cci500_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *ma
for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
event = pmu_get_event(cci_pmu, i);
- pmu_set_event(cci_pmu, i, CCI500_INVALID_EVENT);
+ pmu_set_event(cci_pmu, i, CCI5xx_INVALID_EVENT);
pmu_enable_counter(cci_pmu, i);
__cci_pmu_enable();
__pmu_write_counter(cci_pmu, value, i);
@@ -895,7 +899,7 @@ static void cci500_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *ma
pmu_restore_counters(cci_pmu, saved_mask);
}
-#endif /* CONFIG_ARM_CCI500_PMU */
+#endif /* CONFIG_ARM_CCI5xx_PMU */
static void pmu_write_counter(struct perf_event *event, u32 value)
{
@@ -1578,30 +1582,30 @@ static struct cci_pmu_model cci_pmu_models[] = {
.get_event_idx = cci400_get_event_idx,
},
#endif
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
[CCI500_R0] = {
.name = "CCI_500",
.fixed_hw_cntrs = 0,
.num_hw_cntrs = 8,
.cntr_size = SZ_64K,
- .format_attrs = cci500_pmu_format_attrs,
- .event_attrs = cci500_pmu_event_attrs,
+ .format_attrs = cci5xx_pmu_format_attrs,
+ .event_attrs = cci5xx_pmu_event_attrs,
.event_ranges = {
[CCI_IF_SLAVE] = {
- CCI500_SLAVE_PORT_MIN_EV,
- CCI500_SLAVE_PORT_MAX_EV,
+ CCI5xx_SLAVE_PORT_MIN_EV,
+ CCI5xx_SLAVE_PORT_MAX_EV,
},
[CCI_IF_MASTER] = {
- CCI500_MASTER_PORT_MIN_EV,
- CCI500_MASTER_PORT_MAX_EV,
+ CCI5xx_MASTER_PORT_MIN_EV,
+ CCI5xx_MASTER_PORT_MAX_EV,
},
[CCI_IF_GLOBAL] = {
- CCI500_GLOBAL_PORT_MIN_EV,
- CCI500_GLOBAL_PORT_MAX_EV,
+ CCI5xx_GLOBAL_PORT_MIN_EV,
+ CCI5xx_GLOBAL_PORT_MAX_EV,
},
},
.validate_hw_event = cci500_validate_hw_event,
- .write_counters = cci500_pmu_write_counters,
+ .write_counters = cci5xx_pmu_write_counters,
},
#endif
};
@@ -1621,7 +1625,7 @@ static const struct of_device_id arm_cci_pmu_matches[] = {
.data = &cci_pmu_models[CCI400_R1],
},
#endif
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
{
.compatible = "arm,cci-500-pmu,r0",
.data = &cci_pmu_models[CCI500_R0],
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4 12/12] arm-cci: CoreLink CCI-550 PMU driver
2015-12-17 17:49 [PATCHv4 00/12] arm-cci: PMU updates Suzuki K. Poulose
` (10 preceding siblings ...)
2015-12-17 17:49 ` [PATCH v4 11/12] arm-cci500: Rearrange PMU driver for code sharing with CCI-550 PMU Suzuki K. Poulose
@ 2015-12-17 17:49 ` Suzuki K. Poulose
11 siblings, 0 replies; 22+ messages in thread
From: Suzuki K. Poulose @ 2015-12-17 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Cc: linux-kernel, mark.rutland, punit.agrawal, arm, Suzuki K. Poulose
Add ARM CoreLink CCI-550 cache coherent interconnect PMU
driver support. The CCI-550 PMU shares all the attributes of CCI-500
PMU, except for an additional master interface (MI-6 - 0xe).
CCI-550 requires the same work around as for CCI-500 to
write to the PMU counter.
Acked-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
---
Documentation/devicetree/bindings/arm/cci.txt | 2 +
drivers/bus/Kconfig | 8 +--
drivers/bus/arm-cci.c | 85 ++++++++++++++++++++++++-
3 files changed, 90 insertions(+), 5 deletions(-)
diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt
index aef1d20..a1a5a7e 100644
--- a/Documentation/devicetree/bindings/arm/cci.txt
+++ b/Documentation/devicetree/bindings/arm/cci.txt
@@ -34,6 +34,7 @@ specific to ARM.
Definition: must contain one of the following:
"arm,cci-400"
"arm,cci-500"
+ "arm,cci-550"
- reg
Usage: required
@@ -101,6 +102,7 @@ specific to ARM.
"arm,cci-400-pmu" - DEPRECATED, permitted only where OS has
secure acces to CCI registers
"arm,cci-500-pmu,r0"
+ "arm,cci-550-pmu,r0"
- reg:
Usage: required
Value type: Integer cells. A register entry, expressed
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 3793f4e..54c030b 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -35,14 +35,14 @@ config ARM_CCI400_PORT_CTRL
interconnect for ARM platforms.
config ARM_CCI5xx_PMU
- bool "ARM CCI500 PMU support"
+ bool "ARM CCI-500/CCI-550 PMU support"
depends on (ARM && CPU_V7) || ARM64
depends on PERF_EVENTS
select ARM_CCI_PMU
help
- Support for PMU events monitoring on the ARM CCI-500 cache coherent
- interconnect. CCI-500 provides 8 independent event counters, which
- can count events pertaining to the slave/master interfaces as well
+ Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache
+ coherent interconnects. Both of them provide 8 independent event counters,
+ which can count events pertaining to the slave/master interfaces as well
as the internal events to the CCI.
If unsure, say Y
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 99a9d57..99ae553 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -54,6 +54,7 @@ static const struct of_device_id arm_cci_matches[] = {
#endif
#ifdef CONFIG_ARM_CCI5xx_PMU
{ .compatible = "arm,cci-500", },
+ { .compatible = "arm,cci-550", },
#endif
{},
};
@@ -166,6 +167,7 @@ enum cci_models {
#endif
#ifdef CONFIG_ARM_CCI5xx_PMU
CCI500_R0,
+ CCI550_R0,
#endif
CCI_MODEL_MAX
};
@@ -459,6 +461,7 @@ static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev
#define CCI5xx_PORT_M3 0xb
#define CCI5xx_PORT_M4 0xc
#define CCI5xx_PORT_M5 0xd
+#define CCI5xx_PORT_M6 0xe
#define CCI5xx_PORT_GLOBAL 0xf
@@ -619,6 +622,58 @@ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
return -ENOENT;
}
+/*
+ * CCI550 provides 8 independent event counters that can count
+ * any of the events available.
+ * CCI550 PMU event source ids
+ * 0x0-0x6 - Slave interfaces
+ * 0x8-0xe - Master interfaces
+ * 0xf - Global Events
+ * 0x7 - Reserved
+ */
+static int cci550_validate_hw_event(struct cci_pmu *cci_pmu,
+ unsigned long hw_event)
+{
+ u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
+ u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
+ int if_type;
+
+ if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
+ return -ENOENT;
+
+ switch (ev_source) {
+ case CCI5xx_PORT_S0:
+ case CCI5xx_PORT_S1:
+ case CCI5xx_PORT_S2:
+ case CCI5xx_PORT_S3:
+ case CCI5xx_PORT_S4:
+ case CCI5xx_PORT_S5:
+ case CCI5xx_PORT_S6:
+ if_type = CCI_IF_SLAVE;
+ break;
+ case CCI5xx_PORT_M0:
+ case CCI5xx_PORT_M1:
+ case CCI5xx_PORT_M2:
+ case CCI5xx_PORT_M3:
+ case CCI5xx_PORT_M4:
+ case CCI5xx_PORT_M5:
+ case CCI5xx_PORT_M6:
+ if_type = CCI_IF_MASTER;
+ break;
+ case CCI5xx_PORT_GLOBAL:
+ if_type = CCI_IF_GLOBAL;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
+ ev_code <= cci_pmu->model->event_ranges[if_type].max)
+ return hw_event;
+
+ return -ENOENT;
+}
+
#endif /* CONFIG_ARM_CCI5xx_PMU */
static ssize_t cci_pmu_format_show(struct device *dev,
@@ -853,7 +908,7 @@ static void __pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx)
#ifdef CONFIG_ARM_CCI5xx_PMU
/*
- * CCI-500 has advanced power saving policies, which could gate the
+ * CCI-500/CCI-550 has advanced power saving policies, which could gate the
* clocks to the PMU counters, which makes the writes to them ineffective.
* The only way to write to those counters is when the global counters
* are enabled and the particular counter is enabled.
@@ -1607,6 +1662,30 @@ static struct cci_pmu_model cci_pmu_models[] = {
.validate_hw_event = cci500_validate_hw_event,
.write_counters = cci5xx_pmu_write_counters,
},
+ [CCI550_R0] = {
+ .name = "CCI_550",
+ .fixed_hw_cntrs = 0,
+ .num_hw_cntrs = 8,
+ .cntr_size = SZ_64K,
+ .format_attrs = cci5xx_pmu_format_attrs,
+ .event_attrs = cci5xx_pmu_event_attrs,
+ .event_ranges = {
+ [CCI_IF_SLAVE] = {
+ CCI5xx_SLAVE_PORT_MIN_EV,
+ CCI5xx_SLAVE_PORT_MAX_EV,
+ },
+ [CCI_IF_MASTER] = {
+ CCI5xx_MASTER_PORT_MIN_EV,
+ CCI5xx_MASTER_PORT_MAX_EV,
+ },
+ [CCI_IF_GLOBAL] = {
+ CCI5xx_GLOBAL_PORT_MIN_EV,
+ CCI5xx_GLOBAL_PORT_MAX_EV,
+ },
+ },
+ .validate_hw_event = cci550_validate_hw_event,
+ .write_counters = cci5xx_pmu_write_counters,
+ },
#endif
};
@@ -1630,6 +1709,10 @@ static const struct of_device_id arm_cci_pmu_matches[] = {
.compatible = "arm,cci-500-pmu,r0",
.data = &cci_pmu_models[CCI500_R0],
},
+ {
+ .compatible = "arm,cci-550-pmu,r0",
+ .data = &cci_pmu_models[CCI550_R0],
+ },
#endif
{},
};
--
1.7.9.5
^ permalink raw reply related [flat|nested] 22+ messages in thread