All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/5] be2net fixes v2
@ 2011-08-23  5:41 Sathya Perla
  2011-08-23  5:41 ` [PATCH net-next 1/5] be2net: Fix race in posting rx buffers Sathya Perla
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Sathya Perla @ 2011-08-23  5:41 UTC (permalink / raw)
  To: netdev

Incorporated Eric's feedback on [3/5] be2net: fix erx->rx_drops_no_frags wrap around
Pls apply. Thanks.

Sathya Perla (5):
  be2net: Fix race in posting rx buffers.
  be2net: get rid of memory mapped pci-cfg space address
  be2net: fix erx->rx_drops_no_frags wrap around
  be2net: increase FW update completion timeout
  be2net: remove unused variable

 drivers/net/ethernet/emulex/benet/be.h      |    2 -
 drivers/net/ethernet/emulex/benet/be_cmds.c |    2 +-
 drivers/net/ethernet/emulex/benet/be_main.c |   51 +++++++++++++++------------
 3 files changed, 29 insertions(+), 26 deletions(-)

-- 
1.7.4


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH net-next 1/5] be2net: Fix race in posting rx buffers.
  2011-08-23  5:41 [PATCH net-next 0/5] be2net fixes v2 Sathya Perla
@ 2011-08-23  5:41 ` Sathya Perla
  2011-08-23  5:41 ` [PATCH net-next 2/5] be2net: get rid of memory mapped pci-cfg space address Sathya Perla
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Sathya Perla @ 2011-08-23  5:41 UTC (permalink / raw)
  To: netdev

There is a possibility of be_post_rx_frags() being called simultaneously from
both be_worker() (when rx_post_starved) and be_poll_rx() (when rxq->used is 0).
This can be avoided by posting rx buffers only when some completions have been
reaped.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index ef62594..09eb699 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1862,7 +1862,7 @@ loop_continue:
 	}
 
 	/* Refill the queue */
-	if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM)
+	if (work_done && atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM)
 		be_post_rx_frags(rxo, GFP_ATOMIC);
 
 	/* All consumed */
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next 2/5] be2net: get rid of memory mapped pci-cfg space address
  2011-08-23  5:41 [PATCH net-next 0/5] be2net fixes v2 Sathya Perla
  2011-08-23  5:41 ` [PATCH net-next 1/5] be2net: Fix race in posting rx buffers Sathya Perla
@ 2011-08-23  5:41 ` Sathya Perla
  2011-08-23  5:41 ` [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around Sathya Perla
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Sathya Perla @ 2011-08-23  5:41 UTC (permalink / raw)
  To: netdev

Get rid of adapter->pcicfg and its use. Use pci_config_read/write_dword()
instead.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be.h      |    1 -
 drivers/net/ethernet/emulex/benet/be_main.c |   27 ++++++++-------------------
 2 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 12b5b51..868d7f4 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -298,7 +298,6 @@ struct be_adapter {
 
 	u8 __iomem *csr;
 	u8 __iomem *db;		/* Door Bell */
-	u8 __iomem *pcicfg;	/* PCI config space */
 
 	struct mutex mbox_lock; /* For serializing mbox cmds to BE card */
 	struct be_dma_mem mbox_mem;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 09eb699..2375c0c 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -141,13 +141,15 @@ static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
 
 static void be_intr_set(struct be_adapter *adapter, bool enable)
 {
-	u8 __iomem *addr = adapter->pcicfg + PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET;
-	u32 reg = ioread32(addr);
-	u32 enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
+	u32 reg, enabled;
 
 	if (adapter->eeh_err)
 		return;
 
+	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
+				&reg);
+	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
+
 	if (!enabled && enable)
 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 	else if (enabled && !enable)
@@ -155,7 +157,8 @@ static void be_intr_set(struct be_adapter *adapter, bool enable)
 	else
 		return;
 
-	iowrite32(reg, addr);
+	pci_write_config_dword(adapter->pdev,
+			PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
 }
 
 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
@@ -2951,14 +2954,12 @@ static void be_unmap_pci_bars(struct be_adapter *adapter)
 		iounmap(adapter->csr);
 	if (adapter->db)
 		iounmap(adapter->db);
-	if (adapter->pcicfg && be_physfn(adapter))
-		iounmap(adapter->pcicfg);
 }
 
 static int be_map_pci_bars(struct be_adapter *adapter)
 {
 	u8 __iomem *addr;
-	int pcicfg_reg, db_reg;
+	int db_reg;
 
 	if (lancer_chip(adapter)) {
 		addr = ioremap_nocache(pci_resource_start(adapter->pdev, 0),
@@ -2978,10 +2979,8 @@ static int be_map_pci_bars(struct be_adapter *adapter)
 	}
 
 	if (adapter->generation == BE_GEN2) {
-		pcicfg_reg = 1;
 		db_reg = 4;
 	} else {
-		pcicfg_reg = 0;
 		if (be_physfn(adapter))
 			db_reg = 4;
 		else
@@ -2993,16 +2992,6 @@ static int be_map_pci_bars(struct be_adapter *adapter)
 		goto pci_map_err;
 	adapter->db = addr;
 
-	if (be_physfn(adapter)) {
-		addr = ioremap_nocache(
-				pci_resource_start(adapter->pdev, pcicfg_reg),
-				pci_resource_len(adapter->pdev, pcicfg_reg));
-		if (addr == NULL)
-			goto pci_map_err;
-		adapter->pcicfg = addr;
-	} else
-		adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
-
 	return 0;
 pci_map_err:
 	be_unmap_pci_bars(adapter);
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around
  2011-08-23  5:41 [PATCH net-next 0/5] be2net fixes v2 Sathya Perla
  2011-08-23  5:41 ` [PATCH net-next 1/5] be2net: Fix race in posting rx buffers Sathya Perla
  2011-08-23  5:41 ` [PATCH net-next 2/5] be2net: get rid of memory mapped pci-cfg space address Sathya Perla
@ 2011-08-23  5:41 ` Sathya Perla
  2011-08-23  6:41   ` Eric Dumazet
  2011-08-23  5:41 ` [PATCH net-next 4/5] be2net: increase FW update completion timeout Sathya Perla
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Sathya Perla @ 2011-08-23  5:41 UTC (permalink / raw)
  To: netdev

The rx_drops_no_frags HW counter for RSS rings is 16bits in HW and can
wraparound often. Maintain a 32-bit accumulator in the driver to prevent
frequent wraparound.

Also, incorporated Eric's feedback to use ACCESS_ONCE() for the accumulator
write.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |   22 +++++++++++++++++++---
 1 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 2375c0c..fb2eda0 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -378,6 +378,18 @@ static void populate_lancer_stats(struct be_adapter *adapter)
 				pport_stats->rx_drops_too_many_frags_lo;
 }
 
+static void accumulate_16bit_val(u32 *acc, u16 val)
+{
+#define lo(x)			(x & 0xFFFF)
+#define hi(x)			(x & 0xFFFF0000)
+	bool wrapped = val < lo(*acc);
+	u32 newacc = hi(*acc) + val;
+
+	if (wrapped)
+		newacc += 65536;
+	ACCESS_ONCE(*acc) = newacc;
+}
+
 void be_parse_stats(struct be_adapter *adapter)
 {
 	struct be_erx_stats_v1 *erx = be_erx_stats_from_cmd(adapter);
@@ -394,9 +406,13 @@ void be_parse_stats(struct be_adapter *adapter)
 	}
 
 	/* as erx_v1 is longer than v0, ok to use v1 defn for v0 access */
-	for_all_rx_queues(adapter, rxo, i)
-		rx_stats(rxo)->rx_drops_no_frags =
-			erx->rx_drops_no_fragments[rxo->q.id];
+	for_all_rx_queues(adapter, rxo, i) {
+		/* below erx HW counter can actually wrap around after
+		 * 65535. Driver accumulates a 32-bit value
+		 */
+		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
+				(u16)erx->rx_drops_no_fragments[rxo->q.id]);
+	}
 }
 
 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next 4/5] be2net: increase FW update completion timeout
  2011-08-23  5:41 [PATCH net-next 0/5] be2net fixes v2 Sathya Perla
                   ` (2 preceding siblings ...)
  2011-08-23  5:41 ` [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around Sathya Perla
@ 2011-08-23  5:41 ` Sathya Perla
  2011-08-23  5:41 ` [PATCH net-next 5/5] be2net: remove unused variable Sathya Perla
  2011-08-25  0:08 ` [PATCH net-next 0/5] be2net fixes v2 David Miller
  5 siblings, 0 replies; 16+ messages in thread
From: Sathya Perla @ 2011-08-23  5:41 UTC (permalink / raw)
  To: netdev

Flashing some of the PHYs can take longer thus increasing the total flash
update time to a max of 40s.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index bec039d..bebeee6 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -1939,7 +1939,7 @@ int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_dma_mem *cmd,
 	spin_unlock_bh(&adapter->mcc_lock);
 
 	if (!wait_for_completion_timeout(&adapter->flash_compl,
-			msecs_to_jiffies(12000)))
+			msecs_to_jiffies(40000)))
 		status = -1;
 	else
 		status = adapter->flash_status;
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next 5/5] be2net: remove unused variable
  2011-08-23  5:41 [PATCH net-next 0/5] be2net fixes v2 Sathya Perla
                   ` (3 preceding siblings ...)
  2011-08-23  5:41 ` [PATCH net-next 4/5] be2net: increase FW update completion timeout Sathya Perla
@ 2011-08-23  5:41 ` Sathya Perla
  2011-08-25  0:08 ` [PATCH net-next 0/5] be2net fixes v2 David Miller
  5 siblings, 0 replies; 16+ messages in thread
From: Sathya Perla @ 2011-08-23  5:41 UTC (permalink / raw)
  To: netdev


Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be.h |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 868d7f4..c5f0516 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -347,7 +347,6 @@ struct be_adapter {
 	u32 beacon_state;	/* for set_phys_id */
 
 	bool eeh_err;
-	bool link_up;
 	u32 port_num;
 	bool promiscuous;
 	bool wol;
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around
  2011-08-23  5:41 ` [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around Sathya Perla
@ 2011-08-23  6:41   ` Eric Dumazet
  2011-08-23  7:06     ` Sathya.Perla
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2011-08-23  6:41 UTC (permalink / raw)
  To: Sathya Perla; +Cc: netdev

Le mardi 23 août 2011 à 11:11 +0530, Sathya Perla a écrit :
> The rx_drops_no_frags HW counter for RSS rings is 16bits in HW and can
> wraparound often. Maintain a 32-bit accumulator in the driver to prevent
> frequent wraparound.
> 
> Also, incorporated Eric's feedback to use ACCESS_ONCE() for the accumulator
> write.
> 
> Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
> ---
>  drivers/net/ethernet/emulex/benet/be_main.c |   22 +++++++++++++++++++---
>  1 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
> index 2375c0c..fb2eda0 100644
> --- a/drivers/net/ethernet/emulex/benet/be_main.c
> +++ b/drivers/net/ethernet/emulex/benet/be_main.c
> @@ -378,6 +378,18 @@ static void populate_lancer_stats(struct be_adapter *adapter)
>  				pport_stats->rx_drops_too_many_frags_lo;
>  }
>  
> +static void accumulate_16bit_val(u32 *acc, u16 val)
> +{
> +#define lo(x)			(x & 0xFFFF)
> +#define hi(x)			(x & 0xFFFF0000)
> +	bool wrapped = val < lo(*acc);
> +	u32 newacc = hi(*acc) + val;
> +
> +	if (wrapped)
> +		newacc += 65536;
> +	ACCESS_ONCE(*acc) = newacc;
> +}
> +

I still feel something is wrong here :

>  void be_parse_stats(struct be_adapter *adapter)
>  {
>  	struct be_erx_stats_v1 *erx = be_erx_stats_from_cmd(adapter);
> @@ -394,9 +406,13 @@ void be_parse_stats(struct be_adapter *adapter)
>  	}
>  
>  	/* as erx_v1 is longer than v0, ok to use v1 defn for v0 access */
> -	for_all_rx_queues(adapter, rxo, i)
> -		rx_stats(rxo)->rx_drops_no_frags =
> -			erx->rx_drops_no_fragments[rxo->q.id];

previous code was not doing a sum_of_all_queues.

It only gave the final erx->rx_drops_no_fragments[rxo->q.id], not taking
into account previous rx_stats(rxo)->rx_drops_no_frags value.

Your changelog is about wrap around, while the bug might have be
different (No real sum)

Now you say : previous value is meaningfull, and we add to it 16bits
values.

> +	for_all_rx_queues(adapter, rxo, i) {
> +		/* below erx HW counter can actually wrap around after
> +		 * 65535. Driver accumulates a 32-bit value
> +		 */
> +		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
> +				(u16)erx->rx_drops_no_fragments[rxo->q.id]);
> +	}
>  }
>  

Arent multiple calls to be_parse_stats() will have wrong final
rx_drops_no_frags value ?

Or are the rx_drops_no_fragments[rxo->q.id] cleared when read ?

I am afraid that if HW maintains 16bit values, then the only way is to
also have a 16bit accumulator. You cannot detect wraparounds without
also keeping a copy of previous 16bit samples.

u16 accum = 0;
or_all_rx_queues(adapter, rxo, i) {
	accum += erx->rx_drops_no_fragments[rxo->q.id];
}
rx_stats(rxo)->rx_drops_no_frags = accum;



^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around
  2011-08-23  6:41   ` Eric Dumazet
@ 2011-08-23  7:06     ` Sathya.Perla
  2011-08-23  7:29       ` Eric Dumazet
  0 siblings, 1 reply; 16+ messages in thread
From: Sathya.Perla @ 2011-08-23  7:06 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev

>-----Original Message-----
>From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
>Sent: Tuesday, August 23, 2011 12:11 PM
>
>Le mardi 23 août 2011 à 11:11 +0530, Sathya Perla a écrit :
>> The rx_drops_no_frags HW counter for RSS rings is 16bits in HW and can
>> wraparound often. Maintain a 32-bit accumulator in the driver to prevent
>> frequent wraparound.
>>
>> Also, incorporated Eric's feedback to use ACCESS_ONCE() for the
>accumulator
>> write.
<...>
>>
>> +static void accumulate_16bit_val(u32 *acc, u16 val)
>> +{
>> +#define lo(x)			(x & 0xFFFF)
>> +#define hi(x)			(x & 0xFFFF0000)
>> +	bool wrapped = val < lo(*acc);
>> +	u32 newacc = hi(*acc) + val;
>> +
>> +	if (wrapped)
>> +		newacc += 65536;
>> +	ACCESS_ONCE(*acc) = newacc;
>> +}
>> +
>
>I still feel something is wrong here :
>
>>  void be_parse_stats(struct be_adapter *adapter)
>>  {
>>  	struct be_erx_stats_v1 *erx = be_erx_stats_from_cmd(adapter);
>> @@ -394,9 +406,13 @@ void be_parse_stats(struct be_adapter *adapter)
>>  	}
>>
>>  	/* as erx_v1 is longer than v0, ok to use v1 defn for v0 access */
>> -	for_all_rx_queues(adapter, rxo, i)
>> -		rx_stats(rxo)->rx_drops_no_frags =
>> -			erx->rx_drops_no_fragments[rxo->q.id];
>
>previous code was not doing a sum_of_all_queues.
Yes, the new code still *does not* do a sum of all queues. The code just 
parses per-queue stats. For each queue, the 16 bit
HW stats value is taken and stored in a 32-bit *per-queue* variable.
The comment that "driver accumulates a 32-bit val" may be misleading. The
code here is not doing a sum of the per-queue stat.

+       for_all_rx_queues(adapter, rxo, i) {
+               /* below erx HW counter can actually wrap around after
+                * 65535. Driver accumulates a 32-bit value
+                */
+               accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
+                               (u16)erx->rx_drops_no_fragments[rxo->q.id]);
+       }

>
>It only gave the final erx->rx_drops_no_fragments[rxo->q.id], not taking
>into account previous rx_stats(rxo)->rx_drops_no_frags value.
>
>Your changelog is about wrap around, while the bug might have be
>different (No real sum)
>
>Now you say : previous value is meaningfull, and we add to it 16bits
>values.
>
>> +	for_all_rx_queues(adapter, rxo, i) {
>> +		/* below erx HW counter can actually wrap around after
>> +		 * 65535. Driver accumulates a 32-bit value
>> +		 */
>> +		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
>> +				(u16)erx->rx_drops_no_fragments[rxo->q.id]);
>> +	}
>>  }
>>
>
>Arent multiple calls to be_parse_stats() will have wrong final
>rx_drops_no_frags value ?
>
>Or are the rx_drops_no_fragments[rxo->q.id] cleared when read ?

The following logic should take care of that:
	u32 newacc = hi(*acc) + val;
Only the upper 16 bits of the accumulator are retained and the new-val
of the 16-bit stat is used for the lower 16 bits. So, even if I call this
routine 10 times with the same value for val, the accumulator value will not change.

Say: accumulator is 0x00010001
Say: new 16-bit hw counter value is now 2 (it was previously 1 and had wraped-aroud before that)
accumulate(acc, 2) ==> newacc = 0x00010000 + 2 = 0x00010002
any number of calls to accumulate(acc, 2) will still produce acc = 0x00010002

>
>I am afraid that if HW maintains 16bit values, then the only way is to
>also have a 16bit accumulator. You cannot detect wraparounds without
>also keeping a copy of previous 16bit samples.

I don't agree:
Say: accumulator is 0x0000FFFF
Say: new 16-bit hw counter value is now 0 (after a wrap-aroud)
accumulate(acc, 0) ==> newacc = 0x00000000 + 0 = 0x00000000
After the wraparound check newacc = 0x00010000

>
>u16 accum = 0;
>or_all_rx_queues(adapter, rxo, i) {
>	accum += erx->rx_drops_no_fragments[rxo->q.id];
>}
>rx_stats(rxo)->rx_drops_no_frags = accum;
>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around
  2011-08-23  7:06     ` Sathya.Perla
@ 2011-08-23  7:29       ` Eric Dumazet
  0 siblings, 0 replies; 16+ messages in thread
From: Eric Dumazet @ 2011-08-23  7:29 UTC (permalink / raw)
  To: Sathya.Perla; +Cc: netdev

Le mardi 23 août 2011 à 00:06 -0700, Sathya.Perla@Emulex.Com a écrit :
> >-----Original Message-----
> >From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> >Sent: Tuesday, August 23, 2011 12:11 PM
> >
> >Le mardi 23 août 2011 à 11:11 +0530, Sathya Perla a écrit :
> >> The rx_drops_no_frags HW counter for RSS rings is 16bits in HW and can
> >> wraparound often. Maintain a 32-bit accumulator in the driver to prevent
> >> frequent wraparound.
> >>
> >> Also, incorporated Eric's feedback to use ACCESS_ONCE() for the
> >accumulator
> >> write.
> <...>
> >>
> >> +static void accumulate_16bit_val(u32 *acc, u16 val)
> >> +{
> >> +#define lo(x)			(x & 0xFFFF)
> >> +#define hi(x)			(x & 0xFFFF0000)
> >> +	bool wrapped = val < lo(*acc);
> >> +	u32 newacc = hi(*acc) + val;
> >> +
> >> +	if (wrapped)
> >> +		newacc += 65536;
> >> +	ACCESS_ONCE(*acc) = newacc;
> >> +}
> >> +
> >
> >I still feel something is wrong here :
> >
> >>  void be_parse_stats(struct be_adapter *adapter)
> >>  {
> >>  	struct be_erx_stats_v1 *erx = be_erx_stats_from_cmd(adapter);
> >> @@ -394,9 +406,13 @@ void be_parse_stats(struct be_adapter *adapter)
> >>  	}
> >>
> >>  	/* as erx_v1 is longer than v0, ok to use v1 defn for v0 access */
> >> -	for_all_rx_queues(adapter, rxo, i)
> >> -		rx_stats(rxo)->rx_drops_no_frags =
> >> -			erx->rx_drops_no_fragments[rxo->q.id];
> >
> >previous code was not doing a sum_of_all_queues.
> Yes, the new code still *does not* do a sum of all queues. The code just 
> parses per-queue stats. For each queue, the 16 bit
> HW stats value is taken and stored in a 32-bit *per-queue* variable.
> The comment that "driver accumulates a 32-bit val" may be misleading. The
> code here is not doing a sum of the per-queue stat.

Ah ok, I now see the code intent, and everything seems fine now, thanks
for explaining.



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 0/5] be2net fixes v2
  2011-08-23  5:41 [PATCH net-next 0/5] be2net fixes v2 Sathya Perla
                   ` (4 preceding siblings ...)
  2011-08-23  5:41 ` [PATCH net-next 5/5] be2net: remove unused variable Sathya Perla
@ 2011-08-25  0:08 ` David Miller
  5 siblings, 0 replies; 16+ messages in thread
From: David Miller @ 2011-08-25  0:08 UTC (permalink / raw)
  To: sathya.perla; +Cc: netdev

From: Sathya Perla <sathya.perla@emulex.com>
Date: Tue, 23 Aug 2011 11:11:50 +0530

> Incorporated Eric's feedback on [3/5] be2net: fix erx->rx_drops_no_frags wrap around
> Pls apply. Thanks.
> 
> Sathya Perla (5):
>   be2net: Fix race in posting rx buffers.
>   be2net: get rid of memory mapped pci-cfg space address
>   be2net: fix erx->rx_drops_no_frags wrap around
>   be2net: increase FW update completion timeout
>   be2net: remove unused variable

Series applied, thanks.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around
  2011-08-22 12:44         ` Eric Dumazet
@ 2011-08-22 17:22           ` Sathya.Perla
  0 siblings, 0 replies; 16+ messages in thread
From: Sathya.Perla @ 2011-08-22 17:22 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev

OK, thanks Eric. I'll re-spin this patch series ...

-Sathya
________________________________________
From: Eric Dumazet [eric.dumazet@gmail.com]
Sent: Monday, August 22, 2011 6:14 PM
To: Perla, Sathya
Cc: netdev@vger.kernel.org
Subject: RE: [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around

Le lundi 22 août 2011 à 05:15 -0700, Sathya.Perla@Emulex.Com a écrit :
> >-----Original Message-----
> >From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> >Sent: Monday, August 22, 2011 5:18 PM
> >
> >Le lundi 22 août 2011 à 13:43 +0200, Eric Dumazet a écrit :
> >
> >> This adds a race for lockless SNMP readers (in be_get_stats64())
> >>
> >> They can see the 32bit value going backward.
> >>
> >> You have to be very careful to read the *acc once, and write it once.
> >
> >The "write once" is the only requirement.
> >
> >Something like :
> >
> >u32 newval = hi(*acc) + val;
> >
> >if (wrapped)
> >     newval += 65536;
> >ACCESS_ONCE(*acc) = newval;
> >
>
> Eric,
>
> I'm wondering why you'd need ACCESS_ONCE() here? Wouldn't using the temp variable (newval) as you suggested,
> ensure that the reader doesn't see a half-baked value?
>

If you write :

u32 newval = hi(*acc) + val;
if (wrapped)
        newval += 65536;
*acc = newval;


C compiler (gcc) is free to eliminate the temp variable and to generate
same code than :

*acc = hi(*acc) + val;
if (wrapped)
        *acc += 65536;

ACCESS_ONCE() here is the clean way (and self explanatory/documented) to
keep compiler to write to *acc twice.




^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around
  2011-08-22 12:15       ` Sathya.Perla
@ 2011-08-22 12:44         ` Eric Dumazet
  2011-08-22 17:22           ` Sathya.Perla
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2011-08-22 12:44 UTC (permalink / raw)
  To: Sathya.Perla; +Cc: netdev

Le lundi 22 août 2011 à 05:15 -0700, Sathya.Perla@Emulex.Com a écrit :
> >-----Original Message-----
> >From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> >Sent: Monday, August 22, 2011 5:18 PM
> >
> >Le lundi 22 août 2011 à 13:43 +0200, Eric Dumazet a écrit :
> >
> >> This adds a race for lockless SNMP readers (in be_get_stats64())
> >>
> >> They can see the 32bit value going backward.
> >>
> >> You have to be very careful to read the *acc once, and write it once.
> >
> >The "write once" is the only requirement.
> >
> >Something like :
> >
> >u32 newval = hi(*acc) + val;
> >
> >if (wrapped)
> >	newval += 65536;
> >ACCESS_ONCE(*acc) = newval;
> >
> 
> Eric,
> 
> I'm wondering why you'd need ACCESS_ONCE() here? Wouldn't using the temp variable (newval) as you suggested,
> ensure that the reader doesn't see a half-baked value?
> 

If you write :

u32 newval = hi(*acc) + val;
if (wrapped)
	newval += 65536;
*acc = newval;


C compiler (gcc) is free to eliminate the temp variable and to generate
same code than :

*acc = hi(*acc) + val;
if (wrapped)
	*acc += 65536;

ACCESS_ONCE() here is the clean way (and self explanatory/documented) to
keep compiler to write to *acc twice.




^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around
  2011-08-22 11:47     ` Eric Dumazet
@ 2011-08-22 12:15       ` Sathya.Perla
  2011-08-22 12:44         ` Eric Dumazet
  0 siblings, 1 reply; 16+ messages in thread
From: Sathya.Perla @ 2011-08-22 12:15 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev

>-----Original Message-----
>From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
>Sent: Monday, August 22, 2011 5:18 PM
>
>Le lundi 22 août 2011 à 13:43 +0200, Eric Dumazet a écrit :
>
>> This adds a race for lockless SNMP readers (in be_get_stats64())
>>
>> They can see the 32bit value going backward.
>>
>> You have to be very careful to read the *acc once, and write it once.
>
>The "write once" is the only requirement.
>
>Something like :
>
>u32 newval = hi(*acc) + val;
>
>if (wrapped)
>	newval += 65536;
>ACCESS_ONCE(*acc) = newval;
>

Eric,

I'm wondering why you'd need ACCESS_ONCE() here? Wouldn't using the temp variable (newval) as you suggested,
ensure that the reader doesn't see a half-baked value?


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around
  2011-08-22 11:43   ` Eric Dumazet
@ 2011-08-22 11:47     ` Eric Dumazet
  2011-08-22 12:15       ` Sathya.Perla
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2011-08-22 11:47 UTC (permalink / raw)
  To: Sathya Perla; +Cc: netdev

Le lundi 22 août 2011 à 13:43 +0200, Eric Dumazet a écrit :

> This adds a race for lockless SNMP readers (in be_get_stats64())
> 
> They can see the 32bit value going backward.
> 
> You have to be very careful to read the *acc once, and write it once.

The "write once" is the only requirement.

Something like :

u32 newval = hi(*acc) + val;

if (wrapped)
	newval += 65536;
ACCESS_ONCE(*acc) = newval;




^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around
  2011-08-22 11:13 ` [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around Sathya Perla
@ 2011-08-22 11:43   ` Eric Dumazet
  2011-08-22 11:47     ` Eric Dumazet
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2011-08-22 11:43 UTC (permalink / raw)
  To: Sathya Perla; +Cc: netdev

Le lundi 22 août 2011 à 16:43 +0530, Sathya Perla a écrit :
> The rx_drops_no_frags HW counter for RSS rings is 16bits in HW and can
> wraparound often. Maintain a 32-bit accumulator in the driver to prevent
> frequent wraparound.
> 
> Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
> ---
>  drivers/net/ethernet/emulex/benet/be_main.c |   21 ++++++++++++++++++---
>  1 files changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
> index 2375c0c..4e588d8 100644
> --- a/drivers/net/ethernet/emulex/benet/be_main.c
> +++ b/f
> @@ -378,6 +378,17 @@ static void populate_lancer_stats(struct be_adapter *adapter)
>  				pport_stats->rx_drops_too_many_frags_lo;
>  }
>  
> +static void accumulate_16bit_val(u32 *acc, u16 val)
> +{
> +#define lo(x)			(x & 0xFFFF)
> +#define hi(x)			(x & 0xFFFF0000)
> +	bool wrapped = val < lo(*acc);
> +
> +	*acc = hi(*acc) + val;
> +	if (wrapped)
> +		*acc += 65536;
> +}
> +
>  

This adds a race for lockless SNMP readers (in be_get_stats64())

They can see the 32bit value going backward.

You have to be very careful to read the *acc once, and write it once.





^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around
  2011-08-22 11:13 [PATCH net-next 0/5] be2net: fixes Sathya Perla
@ 2011-08-22 11:13 ` Sathya Perla
  2011-08-22 11:43   ` Eric Dumazet
  0 siblings, 1 reply; 16+ messages in thread
From: Sathya Perla @ 2011-08-22 11:13 UTC (permalink / raw)
  To: netdev

The rx_drops_no_frags HW counter for RSS rings is 16bits in HW and can
wraparound often. Maintain a 32-bit accumulator in the driver to prevent
frequent wraparound.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |   21 ++++++++++++++++++---
 1 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 2375c0c..4e588d8 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -378,6 +378,17 @@ static void populate_lancer_stats(struct be_adapter *adapter)
 				pport_stats->rx_drops_too_many_frags_lo;
 }
 
+static void accumulate_16bit_val(u32 *acc, u16 val)
+{
+#define lo(x)			(x & 0xFFFF)
+#define hi(x)			(x & 0xFFFF0000)
+	bool wrapped = val < lo(*acc);
+
+	*acc = hi(*acc) + val;
+	if (wrapped)
+		*acc += 65536;
+}
+
 void be_parse_stats(struct be_adapter *adapter)
 {
 	struct be_erx_stats_v1 *erx = be_erx_stats_from_cmd(adapter);
@@ -394,9 +405,13 @@ void be_parse_stats(struct be_adapter *adapter)
 	}
 
 	/* as erx_v1 is longer than v0, ok to use v1 defn for v0 access */
-	for_all_rx_queues(adapter, rxo, i)
-		rx_stats(rxo)->rx_drops_no_frags =
-			erx->rx_drops_no_fragments[rxo->q.id];
+	for_all_rx_queues(adapter, rxo, i) {
+		/* below erx HW counter can actually wrap around after
+		 * 65535. Driver accumulates a 32-bit value
+		 */
+		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
+				(u16)erx->rx_drops_no_fragments[rxo->q.id]);
+	}
 }
 
 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2011-08-25  0:08 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-08-23  5:41 [PATCH net-next 0/5] be2net fixes v2 Sathya Perla
2011-08-23  5:41 ` [PATCH net-next 1/5] be2net: Fix race in posting rx buffers Sathya Perla
2011-08-23  5:41 ` [PATCH net-next 2/5] be2net: get rid of memory mapped pci-cfg space address Sathya Perla
2011-08-23  5:41 ` [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around Sathya Perla
2011-08-23  6:41   ` Eric Dumazet
2011-08-23  7:06     ` Sathya.Perla
2011-08-23  7:29       ` Eric Dumazet
2011-08-23  5:41 ` [PATCH net-next 4/5] be2net: increase FW update completion timeout Sathya Perla
2011-08-23  5:41 ` [PATCH net-next 5/5] be2net: remove unused variable Sathya Perla
2011-08-25  0:08 ` [PATCH net-next 0/5] be2net fixes v2 David Miller
  -- strict thread matches above, loose matches on Subject: below --
2011-08-22 11:13 [PATCH net-next 0/5] be2net: fixes Sathya Perla
2011-08-22 11:13 ` [PATCH net-next 3/5] be2net: fix erx->rx_drops_no_frags wrap around Sathya Perla
2011-08-22 11:43   ` Eric Dumazet
2011-08-22 11:47     ` Eric Dumazet
2011-08-22 12:15       ` Sathya.Perla
2011-08-22 12:44         ` Eric Dumazet
2011-08-22 17:22           ` Sathya.Perla

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.