All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next] mlx4: do not use rwlock in fast path
@ 2017-02-09 17:10 Eric Dumazet
  2017-02-14 16:28 ` David Miller
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Eric Dumazet @ 2017-02-09 17:10 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Tariq Toukan, Shawn Bohrer

From: Eric Dumazet <edumazet@google.com>

Using a reader-writer lock in fast path is silly, when we can
instead use RCU or a seqlock.

For mlx4 hwstamp clock, a seqlock is the way to go, removing
two atomic operations and false sharing. 

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_clock.c |   35 ++++++++--------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h  |    2 
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index 504461a464c581bf77b5cca127680f262222..e7b81a305469e64b97f68bc0e2bcb064b78f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -62,12 +62,13 @@ void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
 			    struct skb_shared_hwtstamps *hwts,
 			    u64 timestamp)
 {
-	unsigned long flags;
+	unsigned int seq;
 	u64 nsec;
 
-	read_lock_irqsave(&mdev->clock_lock, flags);
-	nsec = timecounter_cyc2time(&mdev->clock, timestamp);
-	read_unlock_irqrestore(&mdev->clock_lock, flags);
+	do {
+		seq = read_seqbegin(&mdev->clock_lock);
+		nsec = timecounter_cyc2time(&mdev->clock, timestamp);
+	} while (read_seqretry(&mdev->clock_lock, seq));
 
 	memset(hwts, 0, sizeof(struct skb_shared_hwtstamps));
 	hwts->hwtstamp = ns_to_ktime(nsec);
@@ -95,9 +96,9 @@ void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev)
 	unsigned long flags;
 
 	if (timeout) {
-		write_lock_irqsave(&mdev->clock_lock, flags);
+		write_seqlock_irqsave(&mdev->clock_lock, flags);
 		timecounter_read(&mdev->clock);
-		write_unlock_irqrestore(&mdev->clock_lock, flags);
+		write_sequnlock_irqrestore(&mdev->clock_lock, flags);
 		mdev->last_overflow_check = jiffies;
 	}
 }
@@ -128,10 +129,10 @@ static int mlx4_en_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta)
 	adj *= delta;
 	diff = div_u64(adj, 1000000000ULL);
 
-	write_lock_irqsave(&mdev->clock_lock, flags);
+	write_seqlock_irqsave(&mdev->clock_lock, flags);
 	timecounter_read(&mdev->clock);
 	mdev->cycles.mult = neg_adj ? mult - diff : mult + diff;
-	write_unlock_irqrestore(&mdev->clock_lock, flags);
+	write_sequnlock_irqrestore(&mdev->clock_lock, flags);
 
 	return 0;
 }
@@ -149,9 +150,9 @@ static int mlx4_en_phc_adjtime(struct ptp_clock_info *ptp, s64 delta)
 						ptp_clock_info);
 	unsigned long flags;
 
-	write_lock_irqsave(&mdev->clock_lock, flags);
+	write_seqlock_irqsave(&mdev->clock_lock, flags);
 	timecounter_adjtime(&mdev->clock, delta);
-	write_unlock_irqrestore(&mdev->clock_lock, flags);
+	write_sequnlock_irqrestore(&mdev->clock_lock, flags);
 
 	return 0;
 }
@@ -172,9 +173,9 @@ static int mlx4_en_phc_gettime(struct ptp_clock_info *ptp,
 	unsigned long flags;
 	u64 ns;
 
-	write_lock_irqsave(&mdev->clock_lock, flags);
+	write_seqlock_irqsave(&mdev->clock_lock, flags);
 	ns = timecounter_read(&mdev->clock);
-	write_unlock_irqrestore(&mdev->clock_lock, flags);
+	write_sequnlock_irqrestore(&mdev->clock_lock, flags);
 
 	*ts = ns_to_timespec64(ns);
 
@@ -198,9 +199,9 @@ static int mlx4_en_phc_settime(struct ptp_clock_info *ptp,
 	unsigned long flags;
 
 	/* reset the timecounter */
-	write_lock_irqsave(&mdev->clock_lock, flags);
+	write_seqlock_irqsave(&mdev->clock_lock, flags);
 	timecounter_init(&mdev->clock, &mdev->cycles, ns);
-	write_unlock_irqrestore(&mdev->clock_lock, flags);
+	write_sequnlock_irqrestore(&mdev->clock_lock, flags);
 
 	return 0;
 }
@@ -266,7 +267,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
 	if (mdev->ptp_clock)
 		return;
 
-	rwlock_init(&mdev->clock_lock);
+	seqlock_init(&mdev->clock_lock);
 
 	memset(&mdev->cycles, 0, sizeof(mdev->cycles));
 	mdev->cycles.read = mlx4_en_read_clock;
@@ -276,10 +277,10 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
 		clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
 	mdev->nominal_c_mult = mdev->cycles.mult;
 
-	write_lock_irqsave(&mdev->clock_lock, flags);
+	write_seqlock_irqsave(&mdev->clock_lock, flags);
 	timecounter_init(&mdev->clock, &mdev->cycles,
 			 ktime_to_ns(ktime_get_real()));
-	write_unlock_irqrestore(&mdev->clock_lock, flags);
+	write_sequnlock_irqrestore(&mdev->clock_lock, flags);
 
 	/* Calculate period in seconds to call the overflow watchdog - to make
 	 * sure counter is checked at least once every wrap around.
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 52f157cea7765ad6907c59aead357a158848..59f67cdd27d65125138c746fbb615d945d53 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -417,9 +417,9 @@ struct mlx4_en_dev {
 	u32                     priv_pdn;
 	spinlock_t              uar_lock;
 	u8			mac_removed[MLX4_MAX_PORTS + 1];
-	rwlock_t		clock_lock;
 	u32			nominal_c_mult;
 	struct cyclecounter	cycles;
+	seqlock_t		clock_lock;
 	struct timecounter	clock;
 	unsigned long		last_overflow_check;
 	unsigned long		overflow_period;

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] mlx4: do not use rwlock in fast path
  2017-02-09 17:10 [PATCH net-next] mlx4: do not use rwlock in fast path Eric Dumazet
@ 2017-02-14 16:28 ` David Miller
  2017-02-15  9:51   ` Tariq Toukan
  2017-02-15 17:10 ` David Miller
  2018-06-27 12:11 ` Tariq Toukan
  2 siblings, 1 reply; 6+ messages in thread
From: David Miller @ 2017-02-14 16:28 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, tariqt, sbohrer

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 09 Feb 2017 09:10:04 -0800

> From: Eric Dumazet <edumazet@google.com>
> 
> Using a reader-writer lock in fast path is silly, when we can
> instead use RCU or a seqlock.
> 
> For mlx4 hwstamp clock, a seqlock is the way to go, removing
> two atomic operations and false sharing. 
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Tariq Toukan <tariqt@mellanox.com>

Tariq or someone else at Mellanox please review, this patch has been
rotting for 5 days in patchwork.

Thank you.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] mlx4: do not use rwlock in fast path
  2017-02-14 16:28 ` David Miller
@ 2017-02-15  9:51   ` Tariq Toukan
  0 siblings, 0 replies; 6+ messages in thread
From: Tariq Toukan @ 2017-02-15  9:51 UTC (permalink / raw)
  To: David Miller, eric.dumazet; +Cc: netdev, tariqt, sbohrer



On 14/02/2017 6:28 PM, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Thu, 09 Feb 2017 09:10:04 -0800
>
>> From: Eric Dumazet <edumazet@google.com>
>>
>> Using a reader-writer lock in fast path is silly, when we can
>> instead use RCU or a seqlock.
>>
>> For mlx4 hwstamp clock, a seqlock is the way to go, removing
>> two atomic operations and false sharing.
>>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> Cc: Tariq Toukan <tariqt@mellanox.com>
> Tariq or someone else at Mellanox please review, this patch has been
> rotting for 5 days in patchwork.
>
> Thank you.
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] mlx4: do not use rwlock in fast path
  2017-02-09 17:10 [PATCH net-next] mlx4: do not use rwlock in fast path Eric Dumazet
  2017-02-14 16:28 ` David Miller
@ 2017-02-15 17:10 ` David Miller
  2018-06-27 12:11 ` Tariq Toukan
  2 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2017-02-15 17:10 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, tariqt, sbohrer

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 09 Feb 2017 09:10:04 -0800

> From: Eric Dumazet <edumazet@google.com>
> 
> Using a reader-writer lock in fast path is silly, when we can
> instead use RCU or a seqlock.
> 
> For mlx4 hwstamp clock, a seqlock is the way to go, removing
> two atomic operations and false sharing. 
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied, thanks Eric.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] mlx4: do not use rwlock in fast path
  2017-02-09 17:10 [PATCH net-next] mlx4: do not use rwlock in fast path Eric Dumazet
  2017-02-14 16:28 ` David Miller
  2017-02-15 17:10 ` David Miller
@ 2018-06-27 12:11 ` Tariq Toukan
  2018-06-27 13:49   ` Eric Dumazet
  2 siblings, 1 reply; 6+ messages in thread
From: Tariq Toukan @ 2018-06-27 12:11 UTC (permalink / raw)
  To: Eric Dumazet, David Miller
  Cc: netdev, Tariq Toukan, Shawn Bohrer, Shay Agroskin, Eran Ben Elisha



On 09/02/2017 7:10 PM, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> Using a reader-writer lock in fast path is silly, when we can
> instead use RCU or a seqlock.
> 
> For mlx4 hwstamp clock, a seqlock is the way to go, removing
> two atomic operations and false sharing.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Tariq Toukan <tariqt@mellanox.com>
> ---
>   drivers/net/ethernet/mellanox/mlx4/en_clock.c |   35 ++++++++--------
>   drivers/net/ethernet/mellanox/mlx4/mlx4_en.h  |    2
>   2 files changed, 19 insertions(+), 18 deletions(-)
> 

Hi Eric,

When my peer, Shay, modified mlx5 to adopt this same locking 
scheme/type, he noticed a degradation in packet rate.
He got back to testing mlx4 and also noticed a degradation introduced by 
this patch.

Perf numbers (single ring):

mlx4:
with rw-lock: ~8.54M pps
with seq-lock: ~8.51M pps

mlx5:
With rw-lock: ~14.94M pps
With seq-lock: ~14.48M pps

Actually, this can be explained by the analysis below.
In short, number of readers is significantly larger than of writers. 
Hence optimizing the readers flow would give better numbers. The issue 
is, the read/write lock might cause writers starvation. Maybe RCU fits 
best here?

Degradation analysis:
The patch changes the lock type which protects reads and updates of a 
variable ( (struct mlx4_en_dev).clock variable)
This variable is used to convert the hw timestamp into skb->hwtstamps.
This variable is read for each transmitted/received packet and updated 
only via ptp module and some overflow periodic work we have (maximum of 
10 times per second)
Meaning that there are much more readers than writers, and it’s best to 
optimize the readers flow.

Best,
Tariq

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] mlx4: do not use rwlock in fast path
  2018-06-27 12:11 ` Tariq Toukan
@ 2018-06-27 13:49   ` Eric Dumazet
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Dumazet @ 2018-06-27 13:49 UTC (permalink / raw)
  To: Tariq Toukan, Eric Dumazet, David Miller
  Cc: netdev, Shawn Bohrer, Shay Agroskin, Eran Ben Elisha



On 06/27/2018 05:11 AM, Tariq Toukan wrote:
> 
> 
> On 09/02/2017 7:10 PM, Eric Dumazet wrote:
>> From: Eric Dumazet <edumazet@google.com>
>>
>> Using a reader-writer lock in fast path is silly, when we can
>> instead use RCU or a seqlock.
>>
>> For mlx4 hwstamp clock, a seqlock is the way to go, removing
>> two atomic operations and false sharing.
>>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> Cc: Tariq Toukan <tariqt@mellanox.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx4/en_clock.c |   35 ++++++++--------
>>   drivers/net/ethernet/mellanox/mlx4/mlx4_en.h  |    2
>>   2 files changed, 19 insertions(+), 18 deletions(-)
>>
> 
> Hi Eric,
> 
> When my peer, Shay, modified mlx5 to adopt this same locking scheme/type, he noticed a degradation in packet rate.
> He got back to testing mlx4 and also noticed a degradation introduced by this patch.
> 
> Perf numbers (single ring):
> 
> mlx4:
> with rw-lock: ~8.54M pps
> with seq-lock: ~8.51M pps
> 
> mlx5:
> With rw-lock: ~14.94M pps
> With seq-lock: ~14.48M pps
> 
> Actually, this can be explained by the analysis below.
> In short, number of readers is significantly larger than of writers. Hence optimizing the readers flow would give better numbers. The issue is, the read/write lock might cause writers starvation. Maybe RCU fits best here?
> 
> Degradation analysis:
> The patch changes the lock type which protects reads and updates of a variable ( (struct mlx4_en_dev).clock variable)
> This variable is used to convert the hw timestamp into skb->hwtstamps.
> This variable is read for each transmitted/received packet and updated only via ptp module and some overflow periodic work we have (maximum of 10 times per second)
> Meaning that there are much more readers than writers, and it’s best to optimize the readers flow.
>

Hi Tariq

Are you sure you enabled time stamps in your tests ?

mlx4_en_fill_hwtstamps() is _really_ called 8,540,000 times per second,
meaning a same amount of read_lock_irqsave()/read_unlock_irqrestore() is performed ?

You have a pretty damn good CPU it seems.

seqlock has no cost for a reader [1], other than reading one integer value and testing it.
[1] If this value never change (and is on a clean cache line).

Really this looks like ring->hwtstamp_rx_filter != HWTSTAMP_FILTER_ALL in your tests.

The numbers you gave just give one cycle difference per packet (half a nano second),
so I really doubt adding back the heavy  read_lock_irqsave()/read_unlock_irqrestore()
could be faster.

Conceptually seqlock is some form of RCU, it really optimizes the readers flow.

Thanks

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-06-27 13:49 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-09 17:10 [PATCH net-next] mlx4: do not use rwlock in fast path Eric Dumazet
2017-02-14 16:28 ` David Miller
2017-02-15  9:51   ` Tariq Toukan
2017-02-15 17:10 ` David Miller
2018-06-27 12:11 ` Tariq Toukan
2018-06-27 13:49   ` Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.