All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
@ 2017-02-19 18:51 ` Anurup M
  0 siblings, 0 replies; 12+ messages in thread
From: Anurup M @ 2017-02-19 18:51 UTC (permalink / raw)
  To: mark.rutland, will.deacon
  Cc: linux-kernel, linux-arm-kernel, anurup.m, zhangshaokun,
	tanxiaojun, xuwei5, sanil.kumar, john.garry, gabriele.paoloni,
	shiju.jose, huangdaode, linuxarm, dikshit.n, shyju.pv,
	anurupvasu

The L3 cache PMU use N-N SPI interrupt which has no support
in kernel mainline. So use hrtimer to poll and update event
counter to avoid overflow condition for L3 cache PMU.
A interval of 10 seconds is used for the hrtimer.
The time interval can be configured in the sysfs.

Signed-off-by: Dikshit N <dikshit.n@huawei.com>
Signed-off-by: Anurup M <anurup.m@huawei.com>
---
 drivers/perf/hisilicon/hisi_uncore_l3c.c | 44 +++++++++++++++
 drivers/perf/hisilicon/hisi_uncore_pmu.c | 95 ++++++++++++++++++++++++++++++++
 drivers/perf/hisilicon/hisi_uncore_pmu.h | 17 ++++++
 3 files changed, 156 insertions(+)

diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c.c b/drivers/perf/hisilicon/hisi_uncore_l3c.c
index 5c6bea0..d211020 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c.c
@@ -20,6 +20,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/bitmap.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -53,6 +55,22 @@ enum armv8_hisi_l3c_counters {
 #define L3C_CNT0_REG_OFF 0x170
 #define L3C_EVENT_EN 0x1000000
 
+/*
+ * Default timer frequency to poll and avoid counter overflow.
+ * CPU speed = 2.4Ghz, Therefore Access time = 0.4ns
+ * L1 cache - 2 way set associative
+ * L2  - 16 way set associative
+ * L3  - 16 way set associative. L3 cache has 4 banks.
+ *
+ * Overflow time = 2^31 * (acces time L1 + access time L2 + access time L3)
+ * = 2^31 * ((2 * 0.4ns) + (16 * 0.4ns) + (4 * 16 * 0.4ns)) = 70 seconds
+ *
+ * L3 cache is also used by devices like PCIe, SAS etc. at
+ * the same time. So the overflow time could be even smaller.
+ * So on a safe side we use a timer interval of 10sec
+ */
+#define L3C_HRTIMER_INTERVAL (10LL * MSEC_PER_SEC)
+
 #define GET_MODULE_ID(hwmod_data) hwmod_data->l3c_hwcfg.module_id
 #define GET_BANK_SEL(hwmod_data) hwmod_data->l3c_hwcfg.bank_select
 
@@ -467,6 +485,18 @@ static const struct attribute_group hisi_l3c_attr_group = {
 
 static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
 
+static DEVICE_ATTR(hrtimer_interval, 0644, hisi_hrtimer_interval_sysfs_show,
+					hisi_hrtimer_interval_sysfs_store);
+
+static struct attribute *hisi_l3c_hrtimer_interval_attrs[] = {
+	&dev_attr_hrtimer_interval.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_hrtimer_interval_attr_group = {
+	.attrs = hisi_l3c_hrtimer_interval_attrs,
+};
+
 static struct attribute *hisi_l3c_cpumask_attrs[] = {
 	&dev_attr_cpumask.attr,
 	NULL,
@@ -481,6 +511,7 @@ static const struct attribute_group *hisi_l3c_pmu_attr_groups[] = {
 	&hisi_l3c_format_group,
 	&hisi_l3c_events_group,
 	&hisi_l3c_cpumask_attr_group,
+	&hisi_l3c_hrtimer_interval_attr_group,
 	NULL,
 };
 
@@ -496,6 +527,15 @@ static struct hisi_uncore_ops hisi_uncore_l3c_ops = {
 	.write_counter = hisi_l3c_write_counter,
 };
 
+/* Initialize hrtimer to poll for avoiding counter overflow */
+static void hisi_l3c_hrtimer_init(struct hisi_pmu *l3c_pmu)
+{
+	INIT_LIST_HEAD(&l3c_pmu->active_list);
+	l3c_pmu->ops->start_hrtimer = hisi_hrtimer_start;
+	l3c_pmu->ops->stop_hrtimer = hisi_hrtimer_stop;
+	hisi_hrtimer_init(l3c_pmu, L3C_HRTIMER_INTERVAL);
+}
+
 static int hisi_l3c_pmu_init(struct hisi_pmu *l3c_pmu,
 				struct hisi_djtag_client *client)
 {
@@ -505,6 +545,7 @@ static int hisi_l3c_pmu_init(struct hisi_pmu *l3c_pmu,
 
 	l3c_pmu->num_events = HISI_HWEVENT_L3C_EVENT_MAX;
 	l3c_pmu->num_counters = HISI_IDX_L3C_COUNTER_MAX;
+	l3c_pmu->num_active = 0;
 	l3c_pmu->scl_id = hisi_djtag_get_sclid(client);
 
 	l3c_pmu->name = kasprintf(GFP_KERNEL, "hisi_l3c%u_%u",
@@ -515,6 +556,9 @@ static int hisi_l3c_pmu_init(struct hisi_pmu *l3c_pmu,
 	/* Pick one core to use for cpumask attributes */
 	cpumask_set_cpu(smp_processor_id(), &l3c_pmu->cpu);
 
+	/* Use hrtimer to poll for avoiding counter overflow */
+	hisi_l3c_hrtimer_init(l3c_pmu);
+
 	return 0;
 }
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 200e673e..377e1bc 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -66,6 +66,83 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev,
 	return cpumap_print_to_pagebuf(true, buf, &hisi_pmu->cpu);
 }
 
+/*
+ * sysfs hrtimer_interval attributes
+ */
+ssize_t hisi_hrtimer_interval_sysfs_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
+
+	if (hisi_pmu->hrt_duration)
+		return sprintf(buf, "%llu\n",
+			       hisi_pmu->hrt_duration);
+	return 0;
+}
+
+ssize_t hisi_hrtimer_interval_sysfs_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
+
+	if (kstrtoull(buf, 0, &hisi_pmu->hrt_duration) < 0)
+		return -EINVAL;
+	return count;
+}
+
+/* The counter overflow IRQ is not supported for some PMUs
+ * use hrtimer to periodically poll and avoid overflow
+ */
+static enum hrtimer_restart hisi_hrtimer_callback(struct hrtimer *hrtimer)
+{
+	struct hisi_pmu *hisi_pmu = container_of(hrtimer,
+						 struct hisi_pmu, hrtimer);
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	unsigned long flags;
+
+	/* Return if no active events */
+	if (!hisi_pmu->num_active)
+		return HRTIMER_NORESTART;
+
+	local_irq_save(flags);
+
+	/* Update event count for each active event */
+	list_for_each_entry(event, &hisi_pmu->active_list, active_entry) {
+		hwc = &event->hw;
+		/* Read hardware counter and update the Perf event counter */
+		hisi_pmu->ops->event_update(event, hwc, GET_CNTR_IDX(hwc));
+	}
+
+	local_irq_restore(flags);
+	hrtimer_forward_now(hrtimer, ms_to_ktime(hisi_pmu->hrt_duration));
+	return HRTIMER_RESTART;
+}
+
+void hisi_hrtimer_init(struct hisi_pmu *hisi_pmu, u64 timer_interval)
+{
+	/* hr timer clock initalization */
+	hrtimer_init(&hisi_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hisi_pmu->hrtimer.function = &hisi_hrtimer_callback;
+	hisi_pmu->hrt_duration = timer_interval;
+}
+
+void hisi_hrtimer_start(struct hisi_pmu *hisi_pmu)
+{
+	hrtimer_start(&hisi_pmu->hrtimer,
+			ms_to_ktime(hisi_pmu->hrt_duration),
+			HRTIMER_MODE_REL_PINNED);
+}
+
+void hisi_hrtimer_stop(struct hisi_pmu *hisi_pmu)
+{
+	hrtimer_cancel(&hisi_pmu->hrtimer);
+}
+
 /* djtag read interface - Call djtag driver to access SoC registers */
 int hisi_djtag_readreg(int module_id, int bank, u32 offset,
 				struct hisi_djtag_client *client, u32 *value)
@@ -268,6 +345,15 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags)
 						(u32)prev_raw_count);
 	}
 
+	/* Start hrtimer when the first event is started in this PMU */
+	if (hisi_pmu->ops->start_hrtimer) {
+		hisi_pmu->num_active++;
+		list_add_tail(&event->active_entry, &hisi_pmu->active_list);
+
+		if (hisi_pmu->num_active == 1)
+			hisi_pmu->ops->start_hrtimer(hisi_pmu);
+	}
+
 	hisi_uncore_pmu_enable_event(event);
 	perf_event_update_userpage(event);
 }
@@ -281,6 +367,15 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
 	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 	hwc->state |= PERF_HES_STOPPED;
 
+	/* Stop hrtimer when the last event is stopped in this PMU */
+	if (hisi_pmu->ops->stop_hrtimer) {
+		hisi_pmu->num_active--;
+		list_del(&event->active_entry);
+
+		if (hisi_pmu->num_active == 0)
+			hisi_pmu->ops->stop_hrtimer(hisi_pmu);
+	}
+
 	if (hwc->state & PERF_HES_UPTODATE)
 		return;
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index 785618b..4a92585 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -77,13 +77,20 @@ struct hisi_uncore_ops {
 	void (*disable_counter)(struct hisi_pmu *, int);
 	void (*start_counters)(struct hisi_pmu *);
 	void (*stop_counters)(struct hisi_pmu *);
+	void (*start_hrtimer)(struct hisi_pmu *);
+	void (*stop_hrtimer)(struct hisi_pmu *);
 };
 
 /* Generic pmu struct for different pmu types */
 struct hisi_pmu {
 	const char *name;
 	struct perf_event **hw_perf_events;
+	struct list_head active_list; /* Active events list */
 	struct hisi_uncore_ops *ops;
+	struct hrtimer hrtimer; /* hrtimer to handle the
+				 * counter overflow
+				 */
+	u64 hrt_duration; /* hrtimer timeout */
 	struct device *dev;
 	void *hwmod_data; /* Hardware module specific data */
 	cpumask_t cpu;
@@ -92,6 +99,7 @@ struct hisi_pmu {
 	u32 scl_id;
 	int num_counters;
 	int num_events;
+	int num_active;
 };
 
 void hisi_uncore_pmu_read(struct perf_event *event);
@@ -111,6 +119,15 @@ ssize_t hisi_format_sysfs_show(struct device *dev,
 				  struct device_attribute *attr, char *buf);
 ssize_t hisi_cpumask_sysfs_show(struct device *dev,
 				struct device_attribute *attr, char *buf);
+ssize_t hisi_hrtimer_interval_sysfs_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf);
+ssize_t hisi_hrtimer_interval_sysfs_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count);
+void hisi_hrtimer_init(struct hisi_pmu *hisi_pmu, u64 timer_interval);
+void hisi_hrtimer_start(struct hisi_pmu *hisi_pmu);
+void hisi_hrtimer_stop(struct hisi_pmu *hisi_pmu);
 int hisi_djtag_readreg(int module_id, int bank, u32 offset,
 				struct hisi_djtag_client *client,
 							u32 *value);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
@ 2017-02-19 18:51 ` Anurup M
  0 siblings, 0 replies; 12+ messages in thread
From: Anurup M @ 2017-02-19 18:51 UTC (permalink / raw)
  To: linux-arm-kernel

The L3 cache PMU use N-N SPI interrupt which has no support
in kernel mainline. So use hrtimer to poll and update event
counter to avoid overflow condition for L3 cache PMU.
A interval of 10 seconds is used for the hrtimer.
The time interval can be configured in the sysfs.

Signed-off-by: Dikshit N <dikshit.n@huawei.com>
Signed-off-by: Anurup M <anurup.m@huawei.com>
---
 drivers/perf/hisilicon/hisi_uncore_l3c.c | 44 +++++++++++++++
 drivers/perf/hisilicon/hisi_uncore_pmu.c | 95 ++++++++++++++++++++++++++++++++
 drivers/perf/hisilicon/hisi_uncore_pmu.h | 17 ++++++
 3 files changed, 156 insertions(+)

diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c.c b/drivers/perf/hisilicon/hisi_uncore_l3c.c
index 5c6bea0..d211020 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c.c
@@ -20,6 +20,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/bitmap.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -53,6 +55,22 @@ enum armv8_hisi_l3c_counters {
 #define L3C_CNT0_REG_OFF 0x170
 #define L3C_EVENT_EN 0x1000000
 
+/*
+ * Default timer frequency to poll and avoid counter overflow.
+ * CPU speed = 2.4Ghz, Therefore Access time = 0.4ns
+ * L1 cache - 2 way set associative
+ * L2  - 16 way set associative
+ * L3  - 16 way set associative. L3 cache has 4 banks.
+ *
+ * Overflow time = 2^31 * (acces time L1 + access time L2 + access time L3)
+ * = 2^31 * ((2 * 0.4ns) + (16 * 0.4ns) + (4 * 16 * 0.4ns)) = 70 seconds
+ *
+ * L3 cache is also used by devices like PCIe, SAS etc. at
+ * the same time. So the overflow time could be even smaller.
+ * So on a safe side we use a timer interval of 10sec
+ */
+#define L3C_HRTIMER_INTERVAL (10LL * MSEC_PER_SEC)
+
 #define GET_MODULE_ID(hwmod_data) hwmod_data->l3c_hwcfg.module_id
 #define GET_BANK_SEL(hwmod_data) hwmod_data->l3c_hwcfg.bank_select
 
@@ -467,6 +485,18 @@ static const struct attribute_group hisi_l3c_attr_group = {
 
 static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
 
+static DEVICE_ATTR(hrtimer_interval, 0644, hisi_hrtimer_interval_sysfs_show,
+					hisi_hrtimer_interval_sysfs_store);
+
+static struct attribute *hisi_l3c_hrtimer_interval_attrs[] = {
+	&dev_attr_hrtimer_interval.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_hrtimer_interval_attr_group = {
+	.attrs = hisi_l3c_hrtimer_interval_attrs,
+};
+
 static struct attribute *hisi_l3c_cpumask_attrs[] = {
 	&dev_attr_cpumask.attr,
 	NULL,
@@ -481,6 +511,7 @@ static const struct attribute_group *hisi_l3c_pmu_attr_groups[] = {
 	&hisi_l3c_format_group,
 	&hisi_l3c_events_group,
 	&hisi_l3c_cpumask_attr_group,
+	&hisi_l3c_hrtimer_interval_attr_group,
 	NULL,
 };
 
@@ -496,6 +527,15 @@ static struct hisi_uncore_ops hisi_uncore_l3c_ops = {
 	.write_counter = hisi_l3c_write_counter,
 };
 
+/* Initialize hrtimer to poll for avoiding counter overflow */
+static void hisi_l3c_hrtimer_init(struct hisi_pmu *l3c_pmu)
+{
+	INIT_LIST_HEAD(&l3c_pmu->active_list);
+	l3c_pmu->ops->start_hrtimer = hisi_hrtimer_start;
+	l3c_pmu->ops->stop_hrtimer = hisi_hrtimer_stop;
+	hisi_hrtimer_init(l3c_pmu, L3C_HRTIMER_INTERVAL);
+}
+
 static int hisi_l3c_pmu_init(struct hisi_pmu *l3c_pmu,
 				struct hisi_djtag_client *client)
 {
@@ -505,6 +545,7 @@ static int hisi_l3c_pmu_init(struct hisi_pmu *l3c_pmu,
 
 	l3c_pmu->num_events = HISI_HWEVENT_L3C_EVENT_MAX;
 	l3c_pmu->num_counters = HISI_IDX_L3C_COUNTER_MAX;
+	l3c_pmu->num_active = 0;
 	l3c_pmu->scl_id = hisi_djtag_get_sclid(client);
 
 	l3c_pmu->name = kasprintf(GFP_KERNEL, "hisi_l3c%u_%u",
@@ -515,6 +556,9 @@ static int hisi_l3c_pmu_init(struct hisi_pmu *l3c_pmu,
 	/* Pick one core to use for cpumask attributes */
 	cpumask_set_cpu(smp_processor_id(), &l3c_pmu->cpu);
 
+	/* Use hrtimer to poll for avoiding counter overflow */
+	hisi_l3c_hrtimer_init(l3c_pmu);
+
 	return 0;
 }
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 200e673e..377e1bc 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -66,6 +66,83 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev,
 	return cpumap_print_to_pagebuf(true, buf, &hisi_pmu->cpu);
 }
 
+/*
+ * sysfs hrtimer_interval attributes
+ */
+ssize_t hisi_hrtimer_interval_sysfs_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
+
+	if (hisi_pmu->hrt_duration)
+		return sprintf(buf, "%llu\n",
+			       hisi_pmu->hrt_duration);
+	return 0;
+}
+
+ssize_t hisi_hrtimer_interval_sysfs_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
+
+	if (kstrtoull(buf, 0, &hisi_pmu->hrt_duration) < 0)
+		return -EINVAL;
+	return count;
+}
+
+/* The counter overflow IRQ is not supported for some PMUs
+ * use hrtimer to periodically poll and avoid overflow
+ */
+static enum hrtimer_restart hisi_hrtimer_callback(struct hrtimer *hrtimer)
+{
+	struct hisi_pmu *hisi_pmu = container_of(hrtimer,
+						 struct hisi_pmu, hrtimer);
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	unsigned long flags;
+
+	/* Return if no active events */
+	if (!hisi_pmu->num_active)
+		return HRTIMER_NORESTART;
+
+	local_irq_save(flags);
+
+	/* Update event count for each active event */
+	list_for_each_entry(event, &hisi_pmu->active_list, active_entry) {
+		hwc = &event->hw;
+		/* Read hardware counter and update the Perf event counter */
+		hisi_pmu->ops->event_update(event, hwc, GET_CNTR_IDX(hwc));
+	}
+
+	local_irq_restore(flags);
+	hrtimer_forward_now(hrtimer, ms_to_ktime(hisi_pmu->hrt_duration));
+	return HRTIMER_RESTART;
+}
+
+void hisi_hrtimer_init(struct hisi_pmu *hisi_pmu, u64 timer_interval)
+{
+	/* hr timer clock initalization */
+	hrtimer_init(&hisi_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hisi_pmu->hrtimer.function = &hisi_hrtimer_callback;
+	hisi_pmu->hrt_duration = timer_interval;
+}
+
+void hisi_hrtimer_start(struct hisi_pmu *hisi_pmu)
+{
+	hrtimer_start(&hisi_pmu->hrtimer,
+			ms_to_ktime(hisi_pmu->hrt_duration),
+			HRTIMER_MODE_REL_PINNED);
+}
+
+void hisi_hrtimer_stop(struct hisi_pmu *hisi_pmu)
+{
+	hrtimer_cancel(&hisi_pmu->hrtimer);
+}
+
 /* djtag read interface - Call djtag driver to access SoC registers */
 int hisi_djtag_readreg(int module_id, int bank, u32 offset,
 				struct hisi_djtag_client *client, u32 *value)
@@ -268,6 +345,15 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags)
 						(u32)prev_raw_count);
 	}
 
+	/* Start hrtimer when the first event is started in this PMU */
+	if (hisi_pmu->ops->start_hrtimer) {
+		hisi_pmu->num_active++;
+		list_add_tail(&event->active_entry, &hisi_pmu->active_list);
+
+		if (hisi_pmu->num_active == 1)
+			hisi_pmu->ops->start_hrtimer(hisi_pmu);
+	}
+
 	hisi_uncore_pmu_enable_event(event);
 	perf_event_update_userpage(event);
 }
@@ -281,6 +367,15 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
 	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 	hwc->state |= PERF_HES_STOPPED;
 
+	/* Stop hrtimer when the last event is stopped in this PMU */
+	if (hisi_pmu->ops->stop_hrtimer) {
+		hisi_pmu->num_active--;
+		list_del(&event->active_entry);
+
+		if (hisi_pmu->num_active == 0)
+			hisi_pmu->ops->stop_hrtimer(hisi_pmu);
+	}
+
 	if (hwc->state & PERF_HES_UPTODATE)
 		return;
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index 785618b..4a92585 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -77,13 +77,20 @@ struct hisi_uncore_ops {
 	void (*disable_counter)(struct hisi_pmu *, int);
 	void (*start_counters)(struct hisi_pmu *);
 	void (*stop_counters)(struct hisi_pmu *);
+	void (*start_hrtimer)(struct hisi_pmu *);
+	void (*stop_hrtimer)(struct hisi_pmu *);
 };
 
 /* Generic pmu struct for different pmu types */
 struct hisi_pmu {
 	const char *name;
 	struct perf_event **hw_perf_events;
+	struct list_head active_list; /* Active events list */
 	struct hisi_uncore_ops *ops;
+	struct hrtimer hrtimer; /* hrtimer to handle the
+				 * counter overflow
+				 */
+	u64 hrt_duration; /* hrtimer timeout */
 	struct device *dev;
 	void *hwmod_data; /* Hardware module specific data */
 	cpumask_t cpu;
@@ -92,6 +99,7 @@ struct hisi_pmu {
 	u32 scl_id;
 	int num_counters;
 	int num_events;
+	int num_active;
 };
 
 void hisi_uncore_pmu_read(struct perf_event *event);
@@ -111,6 +119,15 @@ ssize_t hisi_format_sysfs_show(struct device *dev,
 				  struct device_attribute *attr, char *buf);
 ssize_t hisi_cpumask_sysfs_show(struct device *dev,
 				struct device_attribute *attr, char *buf);
+ssize_t hisi_hrtimer_interval_sysfs_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf);
+ssize_t hisi_hrtimer_interval_sysfs_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count);
+void hisi_hrtimer_init(struct hisi_pmu *hisi_pmu, u64 timer_interval);
+void hisi_hrtimer_start(struct hisi_pmu *hisi_pmu);
+void hisi_hrtimer_stop(struct hisi_pmu *hisi_pmu);
 int hisi_djtag_readreg(int module_id, int bank, u32 offset,
 				struct hisi_djtag_client *client,
 							u32 *value);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
  2017-02-19 18:51 ` Anurup M
@ 2017-02-20 11:09   ` Mark Rutland
  -1 siblings, 0 replies; 12+ messages in thread
From: Mark Rutland @ 2017-02-20 11:09 UTC (permalink / raw)
  To: Anurup M
  Cc: will.deacon, linux-kernel, linux-arm-kernel, anurup.m,
	zhangshaokun, tanxiaojun, xuwei5, sanil.kumar, john.garry,
	gabriele.paoloni, shiju.jose, huangdaode, linuxarm, dikshit.n,
	shyju.pv

On Sun, Feb 19, 2017 at 01:51:03PM -0500, Anurup M wrote:
> The L3 cache PMU use N-N SPI interrupt which has no support
> in kernel mainline.

Could you elaborate on what you mean by this?

I don't understand what is meant here. How exactly are the interrupts
wired up in HW, and what exactly is not supported by Linux?

> So use hrtimer to poll and update event
> counter to avoid overflow condition for L3 cache PMU.
> A interval of 10 seconds is used for the hrtimer.
> The time interval can be configured in the sysfs.

I'm not too keen on giving userspace the ability to control this, since
it gives an awful lot of rope for userspace to tie around itself.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
@ 2017-02-20 11:09   ` Mark Rutland
  0 siblings, 0 replies; 12+ messages in thread
From: Mark Rutland @ 2017-02-20 11:09 UTC (permalink / raw)
  To: linux-arm-kernel

On Sun, Feb 19, 2017 at 01:51:03PM -0500, Anurup M wrote:
> The L3 cache PMU use N-N SPI interrupt which has no support
> in kernel mainline.

Could you elaborate on what you mean by this?

I don't understand what is meant here. How exactly are the interrupts
wired up in HW, and what exactly is not supported by Linux?

> So use hrtimer to poll and update event
> counter to avoid overflow condition for L3 cache PMU.
> A interval of 10 seconds is used for the hrtimer.
> The time interval can be configured in the sysfs.

I'm not too keen on giving userspace the ability to control this, since
it gives an awful lot of rope for userspace to tie around itself.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
  2017-02-20 11:09   ` Mark Rutland
@ 2017-02-21  7:07     ` Anurup M
  -1 siblings, 0 replies; 12+ messages in thread
From: Anurup M @ 2017-02-21  7:07 UTC (permalink / raw)
  To: Mark Rutland, marc.zyngier
  Cc: will.deacon, linux-kernel, linux-arm-kernel, anurup.m,
	zhangshaokun, tanxiaojun, xuwei5, sanil.kumar, john.garry,
	gabriele.paoloni, shiju.jose, huangdaode, linuxarm, dikshit.n,
	shyju.pv, majun258, Shameerali Kolothum Thodi

Adding Marc.

On Monday 20 February 2017 04:39 PM, Mark Rutland wrote:
> On Sun, Feb 19, 2017 at 01:51:03PM -0500, Anurup M wrote:
>> The L3 cache PMU use N-N SPI interrupt which has no support
>> in kernel mainline.
> Could you elaborate on what you mean by this?
>
> I don't understand what is meant here. How exactly are the interrupts
> wired up in HW, and what exactly is not supported by Linux?

In HW the L3C overflow IRQ is wired as SPI which use N-N model.
But according to ARM GIC V2 specification, the peripheral(hardware) 
interrupts should use 1-N model.
N-N model is used by SGIs. In GIC V3 spec I could not find any 
description of N-N model.
So I think the N-N model for SPI will not be supported.

Hi Marc,
     Does ARM GIC support N-N module for SPI? Please share your comments.

>> So use hrtimer to poll and update event
>> counter to avoid overflow condition for L3 cache PMU.
>> A interval of 10 seconds is used for the hrtimer.
>> The time interval can be configured in the sysfs.
> I'm not too keen on giving userspace the ability to control this, since
> it gives an awful lot of rope for userspace to tie around itself.

I thought of giving facility to system user to decide the interval based 
on the system usage.

If we do not provide this facility, then we always set the worst case 
overflow interval?
I am trying to understand it better.

Thanks,
Anurup

> Thanks,
> Mark.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
@ 2017-02-21  7:07     ` Anurup M
  0 siblings, 0 replies; 12+ messages in thread
From: Anurup M @ 2017-02-21  7:07 UTC (permalink / raw)
  To: linux-arm-kernel

Adding Marc.

On Monday 20 February 2017 04:39 PM, Mark Rutland wrote:
> On Sun, Feb 19, 2017 at 01:51:03PM -0500, Anurup M wrote:
>> The L3 cache PMU use N-N SPI interrupt which has no support
>> in kernel mainline.
> Could you elaborate on what you mean by this?
>
> I don't understand what is meant here. How exactly are the interrupts
> wired up in HW, and what exactly is not supported by Linux?

In HW the L3C overflow IRQ is wired as SPI which use N-N model.
But according to ARM GIC V2 specification, the peripheral(hardware) 
interrupts should use 1-N model.
N-N model is used by SGIs. In GIC V3 spec I could not find any 
description of N-N model.
So I think the N-N model for SPI will not be supported.

Hi Marc,
     Does ARM GIC support N-N module for SPI? Please share your comments.

>> So use hrtimer to poll and update event
>> counter to avoid overflow condition for L3 cache PMU.
>> A interval of 10 seconds is used for the hrtimer.
>> The time interval can be configured in the sysfs.
> I'm not too keen on giving userspace the ability to control this, since
> it gives an awful lot of rope for userspace to tie around itself.

I thought of giving facility to system user to decide the interval based 
on the system usage.

If we do not provide this facility, then we always set the worst case 
overflow interval?
I am trying to understand it better.

Thanks,
Anurup

> Thanks,
> Mark.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
  2017-02-21  7:07     ` Anurup M
@ 2017-02-21  9:43       ` Marc Zyngier
  -1 siblings, 0 replies; 12+ messages in thread
From: Marc Zyngier @ 2017-02-21  9:43 UTC (permalink / raw)
  To: Anurup M, Mark Rutland
  Cc: will.deacon, linux-kernel, linux-arm-kernel, anurup.m,
	zhangshaokun, tanxiaojun, xuwei5, sanil.kumar, john.garry,
	gabriele.paoloni, shiju.jose, huangdaode, linuxarm, dikshit.n,
	shyju.pv, majun258, Shameerali Kolothum Thodi

On 21/02/17 07:07, Anurup M wrote:
> Adding Marc.
> 
> On Monday 20 February 2017 04:39 PM, Mark Rutland wrote:
>> On Sun, Feb 19, 2017 at 01:51:03PM -0500, Anurup M wrote:
>>> The L3 cache PMU use N-N SPI interrupt which has no support
>>> in kernel mainline.
>> Could you elaborate on what you mean by this?
>>
>> I don't understand what is meant here. How exactly are the interrupts
>> wired up in HW, and what exactly is not supported by Linux?
> 
> In HW the L3C overflow IRQ is wired as SPI which use N-N model.
> But according to ARM GIC V2 specification, the peripheral(hardware) 
> interrupts should use 1-N model.
> N-N model is used by SGIs. In GIC V3 spec I could not find any 
> description of N-N model.
> So I think the N-N model for SPI will not be supported.
> 
> Hi Marc,
>      Does ARM GIC support N-N module for SPI? Please share your comments.

There is no support for this kind of broadcast IRQs in any published
version of the GIC architecture. The semantics of such interrupts are
just crazy, and I'm really glad we don't support them.

What we could support is 1-of-N, but that's
(1) inefficient,
(2) impossible to virtualize correctly,
(3) only possible with GICv2.

So what you're getting is interrupts targeted at a single CPU, and
that's it.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
@ 2017-02-21  9:43       ` Marc Zyngier
  0 siblings, 0 replies; 12+ messages in thread
From: Marc Zyngier @ 2017-02-21  9:43 UTC (permalink / raw)
  To: linux-arm-kernel

On 21/02/17 07:07, Anurup M wrote:
> Adding Marc.
> 
> On Monday 20 February 2017 04:39 PM, Mark Rutland wrote:
>> On Sun, Feb 19, 2017 at 01:51:03PM -0500, Anurup M wrote:
>>> The L3 cache PMU use N-N SPI interrupt which has no support
>>> in kernel mainline.
>> Could you elaborate on what you mean by this?
>>
>> I don't understand what is meant here. How exactly are the interrupts
>> wired up in HW, and what exactly is not supported by Linux?
> 
> In HW the L3C overflow IRQ is wired as SPI which use N-N model.
> But according to ARM GIC V2 specification, the peripheral(hardware) 
> interrupts should use 1-N model.
> N-N model is used by SGIs. In GIC V3 spec I could not find any 
> description of N-N model.
> So I think the N-N model for SPI will not be supported.
> 
> Hi Marc,
>      Does ARM GIC support N-N module for SPI? Please share your comments.

There is no support for this kind of broadcast IRQs in any published
version of the GIC architecture. The semantics of such interrupts are
just crazy, and I'm really glad we don't support them.

What we could support is 1-of-N, but that's
(1) inefficient,
(2) impossible to virtualize correctly,
(3) only possible with GICv2.

So what you're getting is interrupts targeted at a single CPU, and
that's it.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
  2017-02-20 11:09   ` Mark Rutland
@ 2017-02-21 12:09     ` Will Deacon
  -1 siblings, 0 replies; 12+ messages in thread
From: Will Deacon @ 2017-02-21 12:09 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Anurup M, linux-kernel, linux-arm-kernel, anurup.m, zhangshaokun,
	tanxiaojun, xuwei5, sanil.kumar, john.garry, gabriele.paoloni,
	shiju.jose, huangdaode, linuxarm, dikshit.n, shyju.pv

On Mon, Feb 20, 2017 at 11:09:43AM +0000, Mark Rutland wrote:
> On Sun, Feb 19, 2017 at 01:51:03PM -0500, Anurup M wrote:
> > The L3 cache PMU use N-N SPI interrupt which has no support
> > in kernel mainline.
> 
> Could you elaborate on what you mean by this?
> 
> I don't understand what is meant here. How exactly are the interrupts
> wired up in HW, and what exactly is not supported by Linux?
> 
> > So use hrtimer to poll and update event
> > counter to avoid overflow condition for L3 cache PMU.
> > A interval of 10 seconds is used for the hrtimer.
> > The time interval can be configured in the sysfs.
> 
> I'm not too keen on giving userspace the ability to control this, since
> it gives an awful lot of rope for userspace to tie around itself.

Agreed. I'd also go a step further and say that for PMUs with either
terminally broken interrupts (like this one) or just missing interrupts
(like the CPU PMU on raspberry pi iirc), then the perf core should take
care of an hrtimer in an attempt to generate samples often enough. We
already have PERF_PMU_CAP_NO_INTERRUPT, but it currently just disables
sampling events.

The fiddly part is knowing how to program the timer, and I think you'd
need the PMU driver to provide an upper-bound on events per nanosecond.
I'm pretty sure that would be highly unreliable (especially for shared
resources such as the L3), at which point, is it worth the hassle?

Will

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
@ 2017-02-21 12:09     ` Will Deacon
  0 siblings, 0 replies; 12+ messages in thread
From: Will Deacon @ 2017-02-21 12:09 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Feb 20, 2017 at 11:09:43AM +0000, Mark Rutland wrote:
> On Sun, Feb 19, 2017 at 01:51:03PM -0500, Anurup M wrote:
> > The L3 cache PMU use N-N SPI interrupt which has no support
> > in kernel mainline.
> 
> Could you elaborate on what you mean by this?
> 
> I don't understand what is meant here. How exactly are the interrupts
> wired up in HW, and what exactly is not supported by Linux?
> 
> > So use hrtimer to poll and update event
> > counter to avoid overflow condition for L3 cache PMU.
> > A interval of 10 seconds is used for the hrtimer.
> > The time interval can be configured in the sysfs.
> 
> I'm not too keen on giving userspace the ability to control this, since
> it gives an awful lot of rope for userspace to tie around itself.

Agreed. I'd also go a step further and say that for PMUs with either
terminally broken interrupts (like this one) or just missing interrupts
(like the CPU PMU on raspberry pi iirc), then the perf core should take
care of an hrtimer in an attempt to generate samples often enough. We
already have PERF_PMU_CAP_NO_INTERRUPT, but it currently just disables
sampling events.

The fiddly part is knowing how to program the timer, and I think you'd
need the PMU driver to provide an upper-bound on events per nanosecond.
I'm pretty sure that would be highly unreliable (especially for shared
resources such as the L3), at which point, is it worth the hassle?

Will

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
  2017-02-21 12:09     ` Will Deacon
@ 2017-02-24  3:11       ` Anurup M
  -1 siblings, 0 replies; 12+ messages in thread
From: Anurup M @ 2017-02-24  3:11 UTC (permalink / raw)
  To: Will Deacon, Mark Rutland
  Cc: linux-kernel, linux-arm-kernel, anurup.m, zhangshaokun,
	tanxiaojun, xuwei5, sanil.kumar, john.garry, gabriele.paoloni,
	shiju.jose, huangdaode, linuxarm, dikshit.n, shyju.pv



On Tuesday 21 February 2017 05:39 PM, Will Deacon wrote:
> On Mon, Feb 20, 2017 at 11:09:43AM +0000, Mark Rutland wrote:
>> On Sun, Feb 19, 2017 at 01:51:03PM -0500, Anurup M wrote:
>>> The L3 cache PMU use N-N SPI interrupt which has no support
>>> in kernel mainline.
>> Could you elaborate on what you mean by this?
>>
>> I don't understand what is meant here. How exactly are the interrupts
>> wired up in HW, and what exactly is not supported by Linux?
>>
>>> So use hrtimer to poll and update event
>>> counter to avoid overflow condition for L3 cache PMU.
>>> A interval of 10 seconds is used for the hrtimer.
>>> The time interval can be configured in the sysfs.
>> I'm not too keen on giving userspace the ability to control this, since
>> it gives an awful lot of rope for userspace to tie around itself.
> Agreed. I'd also go a step further and say that for PMUs with either
> terminally broken interrupts (like this one) or just missing interrupts
> (like the CPU PMU on raspberry pi iirc), then the perf core should take
> care of an hrtimer in an attempt to generate samples often enough. We
> already have PERF_PMU_CAP_NO_INTERRUPT, but it currently just disables
> sampling events.
>
> The fiddly part is knowing how to program the timer, and I think you'd
> need the PMU driver to provide an upper-bound on events per nanosecond.
> I'm pretty sure that would be highly unreliable (especially for shared
> resources such as the L3), at which point, is it worth the hassle?

Agreed, it is difficult for user to arrive at a interval for the shared 
resource
like L3 cache. So I shall remove this facility exposed to user.

Shall use a realistic and safer upper bound as hrtimer interval for the 
uncore
units which do not support IRQ.

Thanks,
Anuurp

> Will

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow
@ 2017-02-24  3:11       ` Anurup M
  0 siblings, 0 replies; 12+ messages in thread
From: Anurup M @ 2017-02-24  3:11 UTC (permalink / raw)
  To: linux-arm-kernel



On Tuesday 21 February 2017 05:39 PM, Will Deacon wrote:
> On Mon, Feb 20, 2017 at 11:09:43AM +0000, Mark Rutland wrote:
>> On Sun, Feb 19, 2017 at 01:51:03PM -0500, Anurup M wrote:
>>> The L3 cache PMU use N-N SPI interrupt which has no support
>>> in kernel mainline.
>> Could you elaborate on what you mean by this?
>>
>> I don't understand what is meant here. How exactly are the interrupts
>> wired up in HW, and what exactly is not supported by Linux?
>>
>>> So use hrtimer to poll and update event
>>> counter to avoid overflow condition for L3 cache PMU.
>>> A interval of 10 seconds is used for the hrtimer.
>>> The time interval can be configured in the sysfs.
>> I'm not too keen on giving userspace the ability to control this, since
>> it gives an awful lot of rope for userspace to tie around itself.
> Agreed. I'd also go a step further and say that for PMUs with either
> terminally broken interrupts (like this one) or just missing interrupts
> (like the CPU PMU on raspberry pi iirc), then the perf core should take
> care of an hrtimer in an attempt to generate samples often enough. We
> already have PERF_PMU_CAP_NO_INTERRUPT, but it currently just disables
> sampling events.
>
> The fiddly part is knowing how to program the timer, and I think you'd
> need the PMU driver to provide an upper-bound on events per nanosecond.
> I'm pretty sure that would be highly unreliable (especially for shared
> resources such as the L3), at which point, is it worth the hassle?

Agreed, it is difficult for user to arrive at a interval for the shared 
resource
like L3 cache. So I shall remove this facility exposed to user.

Shall use a realistic and safer upper bound as hrtimer interval for the 
uncore
units which do not support IRQ.

Thanks,
Anuurp

> Will

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-02-24  3:19 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-19 18:51 [PATCH v4 08/11] drivers: perf: hisi: use poll method to avoid L3C counter overflow Anurup M
2017-02-19 18:51 ` Anurup M
2017-02-20 11:09 ` Mark Rutland
2017-02-20 11:09   ` Mark Rutland
2017-02-21  7:07   ` Anurup M
2017-02-21  7:07     ` Anurup M
2017-02-21  9:43     ` Marc Zyngier
2017-02-21  9:43       ` Marc Zyngier
2017-02-21 12:09   ` Will Deacon
2017-02-21 12:09     ` Will Deacon
2017-02-24  3:11     ` Anurup M
2017-02-24  3:11       ` Anurup M

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.