linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 net-next 0/2] use bulk reads for ocelot statistics
@ 2022-01-25  7:15 Colin Foster
  2022-01-25  7:15 ` [PATCH v3 net-next 1/2] net: mscc: ocelot: add ability to perform bulk reads Colin Foster
  2022-01-25  7:15 ` [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats Colin Foster
  0 siblings, 2 replies; 8+ messages in thread
From: Colin Foster @ 2022-01-25  7:15 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: Jakub Kicinski, David S. Miller, UNGLinuxDriver,
	Alexandre Belloni, Claudiu Manoil, Vladimir Oltean

Ocelot loops over memory regions to gather stats on different ports.
These regions are mostly continuous, and are ordered. This patch set
uses that information to break the stats reads into regions that can get
read in bulk.

The motiviation is for general cleanup, but also for SPI. Performing two
back-to-back reads on a SPI bus require toggling the CS line, holding,
re-toggling the CS line, sending 3 address bytes, sending N padding
bytes, then actually performing the read. Bulk reads could reduce almost
all of that overhead, but require that the reads are performed via
regmap_bulk_read.

v1 > v2: reword commit messages
v2 > v3: correctly mark this for net-next when sending

Colin Foster (2):
  net: mscc: ocelot: add ability to perform bulk reads
  net: mscc: ocelot: use bulk reads for stats

 drivers/net/ethernet/mscc/ocelot.c    | 76 ++++++++++++++++++++++-----
 drivers/net/ethernet/mscc/ocelot_io.c | 13 +++++
 include/soc/mscc/ocelot.h             | 12 +++++
 3 files changed, 88 insertions(+), 13 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 net-next 1/2] net: mscc: ocelot: add ability to perform bulk reads
  2022-01-25  7:15 [PATCH v3 net-next 0/2] use bulk reads for ocelot statistics Colin Foster
@ 2022-01-25  7:15 ` Colin Foster
  2022-01-25  7:15 ` [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats Colin Foster
  1 sibling, 0 replies; 8+ messages in thread
From: Colin Foster @ 2022-01-25  7:15 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: Jakub Kicinski, David S. Miller, UNGLinuxDriver,
	Alexandre Belloni, Claudiu Manoil, Vladimir Oltean

Regmap supports bulk register reads. Ocelot does not. This patch adds
support for Ocelot to invoke bulk regmap reads. That will allow any driver
that performs consecutive reads over memory regions to optimize that
access.

Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
---
 drivers/net/ethernet/mscc/ocelot_io.c | 13 +++++++++++++
 include/soc/mscc/ocelot.h             |  4 ++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c
index 7390fa3980ec..2067382d0ee1 100644
--- a/drivers/net/ethernet/mscc/ocelot_io.c
+++ b/drivers/net/ethernet/mscc/ocelot_io.c
@@ -10,6 +10,19 @@
 
 #include "ocelot.h"
 
+int __ocelot_bulk_read_ix(struct ocelot *ocelot, u32 reg, u32 offset, void *buf,
+			  int count)
+{
+	u16 target = reg >> TARGET_OFFSET;
+
+	WARN_ON(!target);
+
+	return regmap_bulk_read(ocelot->targets[target],
+				ocelot->map[target][reg & REG_MASK] + offset,
+				buf, count);
+}
+EXPORT_SYMBOL_GPL(__ocelot_bulk_read_ix);
+
 u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset)
 {
 	u16 target = reg >> TARGET_OFFSET;
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 62cd61d4142e..b66e5abe04a7 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -744,6 +744,8 @@ struct ocelot_policer {
 	u32 burst; /* bytes */
 };
 
+#define ocelot_bulk_read_rix(ocelot, reg, ri, buf, count) __ocelot_bulk_read_ix(ocelot, reg, reg##_RSZ * (ri), buf, count)
+
 #define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
 #define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi))
 #define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri))
@@ -786,6 +788,8 @@ struct ocelot_policer {
 u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
 void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
 void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg);
+int __ocelot_bulk_read_ix(struct ocelot *ocelot, u32 reg, u32 offset, void *buf,
+			  int count);
 u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
 void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset);
 void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats
  2022-01-25  7:15 [PATCH v3 net-next 0/2] use bulk reads for ocelot statistics Colin Foster
  2022-01-25  7:15 ` [PATCH v3 net-next 1/2] net: mscc: ocelot: add ability to perform bulk reads Colin Foster
@ 2022-01-25  7:15 ` Colin Foster
  2022-01-27  2:34   ` Jakub Kicinski
  2022-01-31 10:22   ` Vladimir Oltean
  1 sibling, 2 replies; 8+ messages in thread
From: Colin Foster @ 2022-01-25  7:15 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: Jakub Kicinski, David S. Miller, UNGLinuxDriver,
	Alexandre Belloni, Claudiu Manoil, Vladimir Oltean

Create and utilize bulk regmap reads instead of single access for gathering
stats. The background reading of statistics happens frequently, and over
a few contiguous memory regions.

High speed PCIe buses and MMIO access will probably see negligible
performance increase. Lower speed buses like SPI and I2C could see
significant performance increase, since the bus configuration and register
access times account for a large percentage of data transfer time.

Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
---
 drivers/net/ethernet/mscc/ocelot.c | 76 +++++++++++++++++++++++++-----
 include/soc/mscc/ocelot.h          |  8 ++++
 2 files changed, 71 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 455293aa6343..bf466eaeba3d 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1737,32 +1737,40 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
 }
 EXPORT_SYMBOL(ocelot_get_strings);
 
-static void ocelot_update_stats(struct ocelot *ocelot)
+static int ocelot_update_stats(struct ocelot *ocelot)
 {
-	int i, j;
+	struct ocelot_stats_region *region;
+	int i, j, err = 0;
 
 	mutex_lock(&ocelot->stats_lock);
 
 	for (i = 0; i < ocelot->num_phys_ports; i++) {
+		unsigned int idx = 0;
 		/* Configure the port to read the stats from */
 		ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
 
-		for (j = 0; j < ocelot->num_stats; j++) {
-			u32 val;
-			unsigned int idx = i * ocelot->num_stats + j;
+		list_for_each_entry(region, &ocelot->stats_regions, node) {
+			err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
+						   region->offset, region->buf,
+						   region->count);
+			if (err)
+				goto out;
 
-			val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
-					      ocelot->stats_layout[j].offset);
+			for (j = 0; j < region->count; j++) {
+				if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX))
+					ocelot->stats[idx + j] += (u64)1 << 32;
 
-			if (val < (ocelot->stats[idx] & U32_MAX))
-				ocelot->stats[idx] += (u64)1 << 32;
+				ocelot->stats[idx + j] = (ocelot->stats[idx + j] &
+							~(u64)U32_MAX) + region->buf[j];
+			}
 
-			ocelot->stats[idx] = (ocelot->stats[idx] &
-					      ~(u64)U32_MAX) + val;
+			idx += region->count;
 		}
 	}
 
+out:
 	mutex_unlock(&ocelot->stats_lock);
+	return err;
 }
 
 static void ocelot_check_stats_work(struct work_struct *work)
@@ -1779,10 +1787,11 @@ static void ocelot_check_stats_work(struct work_struct *work)
 
 void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
 {
-	int i;
+	int i, err;
 
 	/* check and update now */
-	ocelot_update_stats(ocelot);
+	err = ocelot_update_stats(ocelot);
+	WARN_ONCE(err, "Error %d updating ethtool stats\n", err);
 
 	/* Copy all counters */
 	for (i = 0; i < ocelot->num_stats; i++)
@@ -1799,6 +1808,43 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
 }
 EXPORT_SYMBOL(ocelot_get_sset_count);
 
+static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
+{
+	struct ocelot_stats_region *region = NULL;
+	unsigned int last;
+	int i;
+
+	INIT_LIST_HEAD(&ocelot->stats_regions);
+
+	for (i = 0; i < ocelot->num_stats; i++) {
+		if (region && ocelot->stats_layout[i].offset == last + 1) {
+			region->count++;
+		} else {
+			region = devm_kzalloc(ocelot->dev, sizeof(*region),
+					      GFP_KERNEL);
+			if (!region)
+				return -ENOMEM;
+
+			region->offset = ocelot->stats_layout[i].offset;
+			region->count = 1;
+			list_add_tail(&region->node, &ocelot->stats_regions);
+		}
+
+		last = ocelot->stats_layout[i].offset;
+	}
+
+	list_for_each_entry(region, &ocelot->stats_regions, node) {
+		region->buf = devm_kzalloc(ocelot->dev,
+					   region->count * sizeof(*region->buf),
+					   GFP_KERNEL);
+
+		if (!region->buf)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
 int ocelot_get_ts_info(struct ocelot *ocelot, int port,
 		       struct ethtool_ts_info *info)
 {
@@ -2799,6 +2845,10 @@ int ocelot_init(struct ocelot *ocelot)
 				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
 				 ANA_CPUQ_8021_CFG, i);
 
+	ret = ocelot_prepare_stats_regions(ocelot);
+	if (ret)
+		return ret;
+
 	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
 	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
 			   OCELOT_STATS_CHECK_DELAY);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index b66e5abe04a7..837450fdea57 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -542,6 +542,13 @@ struct ocelot_stat_layout {
 	char name[ETH_GSTRING_LEN];
 };
 
+struct ocelot_stats_region {
+	struct list_head node;
+	u32 offset;
+	int count;
+	u32 *buf;
+};
+
 enum ocelot_tag_prefix {
 	OCELOT_TAG_PREFIX_DISABLED	= 0,
 	OCELOT_TAG_PREFIX_NONE,
@@ -673,6 +680,7 @@ struct ocelot {
 	struct regmap_field		*regfields[REGFIELD_MAX];
 	const u32 *const		*map;
 	const struct ocelot_stat_layout	*stats_layout;
+	struct list_head		stats_regions;
 	unsigned int			num_stats;
 
 	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats
  2022-01-25  7:15 ` [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats Colin Foster
@ 2022-01-27  2:34   ` Jakub Kicinski
  2022-01-27 16:30     ` Colin Foster
  2022-01-31 10:22   ` Vladimir Oltean
  1 sibling, 1 reply; 8+ messages in thread
From: Jakub Kicinski @ 2022-01-27  2:34 UTC (permalink / raw)
  To: Colin Foster
  Cc: linux-kernel, netdev, David S. Miller, UNGLinuxDriver,
	Alexandre Belloni, Claudiu Manoil, Vladimir Oltean

On Mon, 24 Jan 2022 23:15:31 -0800 Colin Foster wrote:
> Create and utilize bulk regmap reads instead of single access for gathering
> stats. The background reading of statistics happens frequently, and over
> a few contiguous memory regions.
> 
> High speed PCIe buses and MMIO access will probably see negligible
> performance increase. Lower speed buses like SPI and I2C could see
> significant performance increase, since the bus configuration and register
> access times account for a large percentage of data transfer time.
> 
> Signed-off-by: Colin Foster <colin.foster@in-advantage.com>

> +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> +{
> +	struct ocelot_stats_region *region = NULL;
> +	unsigned int last;
> +	int i;
> +
> +	INIT_LIST_HEAD(&ocelot->stats_regions);
> +
> +	for (i = 0; i < ocelot->num_stats; i++) {
> +		if (region && ocelot->stats_layout[i].offset == last + 1) {
> +			region->count++;
> +		} else {
> +			region = devm_kzalloc(ocelot->dev, sizeof(*region),
> +					      GFP_KERNEL);
> +			if (!region)
> +				return -ENOMEM;
> +
> +			region->offset = ocelot->stats_layout[i].offset;
> +			region->count = 1;
> +			list_add_tail(&region->node, &ocelot->stats_regions);
> +		}
> +
> +		last = ocelot->stats_layout[i].offset;
> +	}
> +
> +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> +		region->buf = devm_kzalloc(ocelot->dev,
> +					   region->count * sizeof(*region->buf),
> +					   GFP_KERNEL);

devm_kcalloc()

> +

unnecessary new line

> +		if (!region->buf)
> +			return -ENOMEM;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats
  2022-01-27  2:34   ` Jakub Kicinski
@ 2022-01-27 16:30     ` Colin Foster
  0 siblings, 0 replies; 8+ messages in thread
From: Colin Foster @ 2022-01-27 16:30 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: linux-kernel, netdev, David S. Miller, UNGLinuxDriver,
	Alexandre Belloni, Claudiu Manoil, Vladimir Oltean

Hi Jakub,

On Wed, Jan 26, 2022 at 06:34:36PM -0800, Jakub Kicinski wrote:
> On Mon, 24 Jan 2022 23:15:31 -0800 Colin Foster wrote:
> > Create and utilize bulk regmap reads instead of single access for gathering
> > stats. The background reading of statistics happens frequently, and over
> > a few contiguous memory regions.
> > 
> > High speed PCIe buses and MMIO access will probably see negligible
> > performance increase. Lower speed buses like SPI and I2C could see
> > significant performance increase, since the bus configuration and register
> > access times account for a large percentage of data transfer time.
> > 
> > Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
> 
> > +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> > +{
> > +	struct ocelot_stats_region *region = NULL;
> > +	unsigned int last;
> > +	int i;
> > +
> > +	INIT_LIST_HEAD(&ocelot->stats_regions);
> > +
> > +	for (i = 0; i < ocelot->num_stats; i++) {
> > +		if (region && ocelot->stats_layout[i].offset == last + 1) {
> > +			region->count++;
> > +		} else {
> > +			region = devm_kzalloc(ocelot->dev, sizeof(*region),
> > +					      GFP_KERNEL);
> > +			if (!region)
> > +				return -ENOMEM;
> > +
> > +			region->offset = ocelot->stats_layout[i].offset;
> > +			region->count = 1;
> > +			list_add_tail(&region->node, &ocelot->stats_regions);
> > +		}
> > +
> > +		last = ocelot->stats_layout[i].offset;
> > +	}
> > +
> > +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> > +		region->buf = devm_kzalloc(ocelot->dev,
> > +					   region->count * sizeof(*region->buf),
> > +					   GFP_KERNEL);
> 
> devm_kcalloc()

Good catch. I'll probably give it another day for review and resubmit 
tomrrow. 

> 
> > +
> 
> unnecessary new line
> 
> > +		if (!region->buf)
> > +			return -ENOMEM;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats
  2022-01-25  7:15 ` [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats Colin Foster
  2022-01-27  2:34   ` Jakub Kicinski
@ 2022-01-31 10:22   ` Vladimir Oltean
  2022-01-31 15:51     ` Colin Foster
  2022-02-08  4:35     ` Colin Foster
  1 sibling, 2 replies; 8+ messages in thread
From: Vladimir Oltean @ 2022-01-31 10:22 UTC (permalink / raw)
  To: Colin Foster
  Cc: linux-kernel, netdev, Jakub Kicinski, David S. Miller,
	UNGLinuxDriver, Alexandre Belloni, Claudiu Manoil

On Mon, Jan 24, 2022 at 11:15:31PM -0800, Colin Foster wrote:
> Create and utilize bulk regmap reads instead of single access for gathering
> stats. The background reading of statistics happens frequently, and over
> a few contiguous memory regions.
> 
> High speed PCIe buses and MMIO access will probably see negligible
> performance increase. Lower speed buses like SPI and I2C could see
> significant performance increase, since the bus configuration and register
> access times account for a large percentage of data transfer time.
> 
> Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
> ---
>  drivers/net/ethernet/mscc/ocelot.c | 76 +++++++++++++++++++++++++-----
>  include/soc/mscc/ocelot.h          |  8 ++++
>  2 files changed, 71 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> index 455293aa6343..bf466eaeba3d 100644
> --- a/drivers/net/ethernet/mscc/ocelot.c
> +++ b/drivers/net/ethernet/mscc/ocelot.c
> @@ -1737,32 +1737,40 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
>  }
>  EXPORT_SYMBOL(ocelot_get_strings);
>  
> -static void ocelot_update_stats(struct ocelot *ocelot)
> +static int ocelot_update_stats(struct ocelot *ocelot)
>  {
> -	int i, j;
> +	struct ocelot_stats_region *region;
> +	int i, j, err = 0;
>  
>  	mutex_lock(&ocelot->stats_lock);
>  
>  	for (i = 0; i < ocelot->num_phys_ports; i++) {
> +		unsigned int idx = 0;

It is usual to leave a blank line between variable declarations and code.

>  		/* Configure the port to read the stats from */
>  		ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
>  
> -		for (j = 0; j < ocelot->num_stats; j++) {
> -			u32 val;
> -			unsigned int idx = i * ocelot->num_stats + j;
> +		list_for_each_entry(region, &ocelot->stats_regions, node) {
> +			err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,

I'd be tempted to pass SYS << TARGET_OFFSET here.

> +						   region->offset, region->buf,
> +						   region->count);
> +			if (err)
> +				goto out;
>  
> -			val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> -					      ocelot->stats_layout[j].offset);
> +			for (j = 0; j < region->count; j++) {
> +				if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX))
> +					ocelot->stats[idx + j] += (u64)1 << 32;
>  
> -			if (val < (ocelot->stats[idx] & U32_MAX))
> -				ocelot->stats[idx] += (u64)1 << 32;
> +				ocelot->stats[idx + j] = (ocelot->stats[idx + j] &
> +							~(u64)U32_MAX) + region->buf[j];
> +			}
>  
> -			ocelot->stats[idx] = (ocelot->stats[idx] &
> -					      ~(u64)U32_MAX) + val;
> +			idx += region->count;
>  		}
>  	}
>  
> +out:
>  	mutex_unlock(&ocelot->stats_lock);
> +	return err;
>  }
>  
>  static void ocelot_check_stats_work(struct work_struct *work)
> @@ -1779,10 +1787,11 @@ static void ocelot_check_stats_work(struct work_struct *work)
>  
>  void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
>  {
> -	int i;
> +	int i, err;
>  
>  	/* check and update now */
> -	ocelot_update_stats(ocelot);
> +	err = ocelot_update_stats(ocelot);
> +	WARN_ONCE(err, "Error %d updating ethtool stats\n", err);
>  
>  	/* Copy all counters */
>  	for (i = 0; i < ocelot->num_stats; i++)
> @@ -1799,6 +1808,43 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
>  }
>  EXPORT_SYMBOL(ocelot_get_sset_count);
>  
> +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> +{
> +	struct ocelot_stats_region *region = NULL;
> +	unsigned int last;
> +	int i;
> +
> +	INIT_LIST_HEAD(&ocelot->stats_regions);
> +
> +	for (i = 0; i < ocelot->num_stats; i++) {
> +		if (region && ocelot->stats_layout[i].offset == last + 1) {
> +			region->count++;
> +		} else {
> +			region = devm_kzalloc(ocelot->dev, sizeof(*region),
> +					      GFP_KERNEL);
> +			if (!region)
> +				return -ENOMEM;
> +
> +			region->offset = ocelot->stats_layout[i].offset;
> +			region->count = 1;
> +			list_add_tail(&region->node, &ocelot->stats_regions);
> +		}
> +
> +		last = ocelot->stats_layout[i].offset;
> +	}
> +
> +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> +		region->buf = devm_kzalloc(ocelot->dev,
> +					   region->count * sizeof(*region->buf),
> +					   GFP_KERNEL);
> +
> +		if (!region->buf)
> +			return -ENOMEM;
> +	}
> +
> +	return 0;
> +}
> +
>  int ocelot_get_ts_info(struct ocelot *ocelot, int port,
>  		       struct ethtool_ts_info *info)
>  {
> @@ -2799,6 +2845,10 @@ int ocelot_init(struct ocelot *ocelot)
>  				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
>  				 ANA_CPUQ_8021_CFG, i);
>  
> +	ret = ocelot_prepare_stats_regions(ocelot);
> +	if (ret)
> +		return ret;
> +

Destroy ocelot->stats_queue and ocelot->owq.

>  	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
>  	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
>  			   OCELOT_STATS_CHECK_DELAY);
> diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
> index b66e5abe04a7..837450fdea57 100644
> --- a/include/soc/mscc/ocelot.h
> +++ b/include/soc/mscc/ocelot.h
> @@ -542,6 +542,13 @@ struct ocelot_stat_layout {
>  	char name[ETH_GSTRING_LEN];
>  };
>  
> +struct ocelot_stats_region {
> +	struct list_head node;
> +	u32 offset;
> +	int count;
> +	u32 *buf;
> +};
> +
>  enum ocelot_tag_prefix {
>  	OCELOT_TAG_PREFIX_DISABLED	= 0,
>  	OCELOT_TAG_PREFIX_NONE,
> @@ -673,6 +680,7 @@ struct ocelot {
>  	struct regmap_field		*regfields[REGFIELD_MAX];
>  	const u32 *const		*map;
>  	const struct ocelot_stat_layout	*stats_layout;
> +	struct list_head		stats_regions;
>  	unsigned int			num_stats;
>  
>  	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
> -- 
> 2.25.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats
  2022-01-31 10:22   ` Vladimir Oltean
@ 2022-01-31 15:51     ` Colin Foster
  2022-02-08  4:35     ` Colin Foster
  1 sibling, 0 replies; 8+ messages in thread
From: Colin Foster @ 2022-01-31 15:51 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: linux-kernel, netdev, Jakub Kicinski, David S. Miller,
	UNGLinuxDriver, Alexandre Belloni, Claudiu Manoil

Hi Vladimir,

Thanks for the feedback. I already submitted v4, so I'll put these into
v5 this week.

On Mon, Jan 31, 2022 at 10:22:55AM +0000, Vladimir Oltean wrote:
> On Mon, Jan 24, 2022 at 11:15:31PM -0800, Colin Foster wrote:
> > Create and utilize bulk regmap reads instead of single access for gathering
> > stats. The background reading of statistics happens frequently, and over
> > a few contiguous memory regions.
> > 
> > High speed PCIe buses and MMIO access will probably see negligible
> > performance increase. Lower speed buses like SPI and I2C could see
> > significant performance increase, since the bus configuration and register
> > access times account for a large percentage of data transfer time.
> > 
> > Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
> > ---
> >  drivers/net/ethernet/mscc/ocelot.c | 76 +++++++++++++++++++++++++-----
> >  include/soc/mscc/ocelot.h          |  8 ++++
> >  2 files changed, 71 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> > index 455293aa6343..bf466eaeba3d 100644
> > --- a/drivers/net/ethernet/mscc/ocelot.c
> > +++ b/drivers/net/ethernet/mscc/ocelot.c
> > @@ -1737,32 +1737,40 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
> >  }
> >  EXPORT_SYMBOL(ocelot_get_strings);
> >  
> > -static void ocelot_update_stats(struct ocelot *ocelot)
> > +static int ocelot_update_stats(struct ocelot *ocelot)
> >  {
> > -	int i, j;
> > +	struct ocelot_stats_region *region;
> > +	int i, j, err = 0;
> >  
> >  	mutex_lock(&ocelot->stats_lock);
> >  
> >  	for (i = 0; i < ocelot->num_phys_ports; i++) {
> > +		unsigned int idx = 0;
> 
> It is usual to leave a blank line between variable declarations and code.
> 
> >  		/* Configure the port to read the stats from */
> >  		ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
> >  
> > -		for (j = 0; j < ocelot->num_stats; j++) {
> > -			u32 val;
> > -			unsigned int idx = i * ocelot->num_stats + j;
> > +		list_for_each_entry(region, &ocelot->stats_regions, node) {
> > +			err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> 
> I'd be tempted to pass SYS << TARGET_OFFSET here.

I'll take another look at this.

> 
> > +						   region->offset, region->buf,
> > +						   region->count);
> > +			if (err)
> > +				goto out;
> >  
> > -			val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> > -					      ocelot->stats_layout[j].offset);
> > +			for (j = 0; j < region->count; j++) {
> > +				if (region->buf[j] < (ocelot->stats[idx + j] & U32_MAX))
> > +					ocelot->stats[idx + j] += (u64)1 << 32;
> >  
> > -			if (val < (ocelot->stats[idx] & U32_MAX))
> > -				ocelot->stats[idx] += (u64)1 << 32;
> > +				ocelot->stats[idx + j] = (ocelot->stats[idx + j] &
> > +							~(u64)U32_MAX) + region->buf[j];
> > +			}
> >  
> > -			ocelot->stats[idx] = (ocelot->stats[idx] &
> > -					      ~(u64)U32_MAX) + val;
> > +			idx += region->count;
> >  		}
> >  	}
> >  
> > +out:
> >  	mutex_unlock(&ocelot->stats_lock);
> > +	return err;
> >  }
> >  
> >  static void ocelot_check_stats_work(struct work_struct *work)
> > @@ -1779,10 +1787,11 @@ static void ocelot_check_stats_work(struct work_struct *work)
> >  
> >  void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
> >  {
> > -	int i;
> > +	int i, err;
> >  
> >  	/* check and update now */
> > -	ocelot_update_stats(ocelot);
> > +	err = ocelot_update_stats(ocelot);
> > +	WARN_ONCE(err, "Error %d updating ethtool stats\n", err);
> >  
> >  	/* Copy all counters */
> >  	for (i = 0; i < ocelot->num_stats; i++)
> > @@ -1799,6 +1808,43 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
> >  }
> >  EXPORT_SYMBOL(ocelot_get_sset_count);
> >  
> > +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> > +{
> > +	struct ocelot_stats_region *region = NULL;
> > +	unsigned int last;
> > +	int i;
> > +
> > +	INIT_LIST_HEAD(&ocelot->stats_regions);
> > +
> > +	for (i = 0; i < ocelot->num_stats; i++) {
> > +		if (region && ocelot->stats_layout[i].offset == last + 1) {
> > +			region->count++;
> > +		} else {
> > +			region = devm_kzalloc(ocelot->dev, sizeof(*region),
> > +					      GFP_KERNEL);
> > +			if (!region)
> > +				return -ENOMEM;
> > +
> > +			region->offset = ocelot->stats_layout[i].offset;
> > +			region->count = 1;
> > +			list_add_tail(&region->node, &ocelot->stats_regions);
> > +		}
> > +
> > +		last = ocelot->stats_layout[i].offset;
> > +	}
> > +
> > +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> > +		region->buf = devm_kzalloc(ocelot->dev,
> > +					   region->count * sizeof(*region->buf),
> > +					   GFP_KERNEL);
> > +
> > +		if (!region->buf)
> > +			return -ENOMEM;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  int ocelot_get_ts_info(struct ocelot *ocelot, int port,
> >  		       struct ethtool_ts_info *info)
> >  {
> > @@ -2799,6 +2845,10 @@ int ocelot_init(struct ocelot *ocelot)
> >  				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
> >  				 ANA_CPUQ_8021_CFG, i);
> >  
> > +	ret = ocelot_prepare_stats_regions(ocelot);
> > +	if (ret)
> > +		return ret;
> > +
> 
> Destroy ocelot->stats_queue and ocelot->owq.

Wow, good catch!

> 
> >  	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
> >  	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
> >  			   OCELOT_STATS_CHECK_DELAY);
> > diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
> > index b66e5abe04a7..837450fdea57 100644
> > --- a/include/soc/mscc/ocelot.h
> > +++ b/include/soc/mscc/ocelot.h
> > @@ -542,6 +542,13 @@ struct ocelot_stat_layout {
> >  	char name[ETH_GSTRING_LEN];
> >  };
> >  
> > +struct ocelot_stats_region {
> > +	struct list_head node;
> > +	u32 offset;
> > +	int count;
> > +	u32 *buf;
> > +};
> > +
> >  enum ocelot_tag_prefix {
> >  	OCELOT_TAG_PREFIX_DISABLED	= 0,
> >  	OCELOT_TAG_PREFIX_NONE,
> > @@ -673,6 +680,7 @@ struct ocelot {
> >  	struct regmap_field		*regfields[REGFIELD_MAX];
> >  	const u32 *const		*map;
> >  	const struct ocelot_stat_layout	*stats_layout;
> > +	struct list_head		stats_regions;
> >  	unsigned int			num_stats;
> >  
> >  	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
> > -- 
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats
  2022-01-31 10:22   ` Vladimir Oltean
  2022-01-31 15:51     ` Colin Foster
@ 2022-02-08  4:35     ` Colin Foster
  1 sibling, 0 replies; 8+ messages in thread
From: Colin Foster @ 2022-02-08  4:35 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: linux-kernel, netdev, Jakub Kicinski, David S. Miller,
	UNGLinuxDriver, Alexandre Belloni, Claudiu Manoil

Hi Vladimir,

I'm sending out v5 shortly. Sorry I seem to have forgotten to respond.
All changes made, except the SYS << TARGET_OFFSET. The macros for *_rix
use reg##_RSZ for expansion.

On Mon, Jan 31, 2022 at 10:22:55AM +0000, Vladimir Oltean wrote:
> On Mon, Jan 24, 2022 at 11:15:31PM -0800, Colin Foster wrote:
[ ... ]
> >  	mutex_lock(&ocelot->stats_lock);
> >  
> >  	for (i = 0; i < ocelot->num_phys_ports; i++) {
> > +		unsigned int idx = 0;
> 
> It is usual to leave a blank line between variable declarations and code.
> 
> >  		/* Configure the port to read the stats from */
> >  		ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
> >  
> > -		for (j = 0; j < ocelot->num_stats; j++) {
> > -			u32 val;
> > -			unsigned int idx = i * ocelot->num_stats + j;
> > +		list_for_each_entry(region, &ocelot->stats_regions, node) {
> > +			err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> 
> I'd be tempted to pass SYS << TARGET_OFFSET here.

Expands to SYS_COUNT_RX_OCTETS_RSZ, defined in
include/soc/mscc/ocelot_sys.h.

> 
> > +						   region->offset, region->buf,
> > +						   region->count);
[ ... ]
> > +@ -2799,6 +2845,10 @@ int ocelot_init(struct ocelot *ocelot)
> >  				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
> >  				 ANA_CPUQ_8021_CFG, i);
> >  
> > +	ret = ocelot_prepare_stats_regions(ocelot);
> > +	if (ret)
> > +		return ret;
> > +
> 
> Destroy ocelot->stats_queue and ocelot->owq.
> 
> >  	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
> >  	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
> >  			   OCELOT_STATS_CHECK_DELAY);
> > diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
> > index b66e5abe04a7..837450fdea57 100644
> > --- a/include/soc/mscc/ocelot.h
> > +++ b/include/soc/mscc/ocelot.h
> > @@ -542,6 +542,13 @@ struct ocelot_stat_layout {
> >  	char name[ETH_GSTRING_LEN];
> >  };
> >  
> > +struct ocelot_stats_region {
> > +	struct list_head node;
> > +	u32 offset;
> > +	int count;
> > +	u32 *buf;
> > +};
> > +
> >  enum ocelot_tag_prefix {
> >  	OCELOT_TAG_PREFIX_DISABLED	= 0,
> >  	OCELOT_TAG_PREFIX_NONE,
> > @@ -673,6 +680,7 @@ struct ocelot {
> >  	struct regmap_field		*regfields[REGFIELD_MAX];
> >  	const u32 *const		*map;
> >  	const struct ocelot_stat_layout	*stats_layout;
> > +	struct list_head		stats_regions;
> >  	unsigned int			num_stats;
> >  
> >  	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
> > -- 
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-02-08  4:35 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-25  7:15 [PATCH v3 net-next 0/2] use bulk reads for ocelot statistics Colin Foster
2022-01-25  7:15 ` [PATCH v3 net-next 1/2] net: mscc: ocelot: add ability to perform bulk reads Colin Foster
2022-01-25  7:15 ` [PATCH v3 net-next 2/2] net: mscc: ocelot: use bulk reads for stats Colin Foster
2022-01-27  2:34   ` Jakub Kicinski
2022-01-27 16:30     ` Colin Foster
2022-01-31 10:22   ` Vladimir Oltean
2022-01-31 15:51     ` Colin Foster
2022-02-08  4:35     ` Colin Foster

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).