All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] net: sfc: fix memory leak due to ptp channel
@ 2022-05-02 17:02 Taehee Yoo
  2022-05-03 13:37 ` Taehee Yoo
  0 siblings, 1 reply; 2+ messages in thread
From: Taehee Yoo @ 2022-05-02 17:02 UTC (permalink / raw)
  To: davem, kuba, pabeni, edumazet, netdev, ecree.xilinx,
	habetsm.xilinx, bhutchings, smhodgson
  Cc: ap420073

It fixes memory leak in ring buffer change logic.

When ring buffer size is changed(ethtool -G eth0 rx 4096), sfc driver
works like below.
1. stop all channels and remove ring buffers.
2. allocates new buffer array.
3. allocates rx buffers.
4. start channels.

While the above steps are working, it skips some steps if the channel
doesn't have a ->copy callback function.
Due to ptp channel doesn't have ->copy callback, these above steps are
skipped for ptp channel.
It eventually makes some problems.
a. ptp channel's ring buffer size is not changed, it works only
   1024(default).
b. memory leak.

The reason for memory leak is to use the wrong ring buffer values.
There are some values, they are related to ring buffer size.
a. efx->rxq_entries
 - This is a global value of rx queue size.
b. rx_queue->ptr_mask
 - used for access ring buffer as circular ring.
 - roundup_pow_of_two(efx->rxq_entries) - 1
c. rx_queue->max_fill
 - efx->rxq_entries - EFX_RXD_HEAD_ROOM

These all values should be based on ring buffer size consistently.
But ptp channel's values are not.
a. efx->rxq_entries
 - This is global(for sfc) value, always new ring buffer size.
b. rx_queue->ptr_mask
 - This is always 1023(default).
c. rx_queue->max_fill
 - This is new ring buffer size - EFX_RXD_HEAD_ROOM.

Let's assume we set 4096 for rx ring buffer,

                      normal channel     ptp channel
efx->rxq_entries      4096               4096
rx_queue->ptr_mask    4095               1023
rx_queue->max_fill    4086               4086

sfc driver allocates rx ring buffers based on these values.
When it allocates ptp channel's ring buffer, 4086 ring buffers are
allocated then, these buffers are attached to the allocated array.
But ptp channel's ring buffer array size is still 1024(default)
and ptr_mask is still 1023 too.
So, about 3K ring buffers will be overwritten to the array.
This is the reason for memory leak.

Test commands:
   ethtool -G <interface name> rx 4096
   while :
   do
       ip link set <interface name> up
       ip link set <interface name> down
   done

In order to avoid this problem, it adds ->copy callback to ptp channel
type.
So that rx_queue->ptr_mask value will be updated correctly.

Fixes: 7c236c43b838 ("sfc: Add support for IEEE-1588 PTP")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
---
 drivers/net/ethernet/sfc/efx_channels.c |  7 ++++++-
 drivers/net/ethernet/sfc/ptp.c          | 13 ++++++++++++-
 drivers/net/ethernet/sfc/ptp.h          |  1 +
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index 377df8b7f015..40df910aa140 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -867,7 +867,9 @@ static void efx_set_xdp_channels(struct efx_nic *efx)
 
 int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 {
-	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
+	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel,
+			   *ptp_channel = efx_ptp_channel(efx);
+	struct efx_ptp_data *ptp_data = efx->ptp_data;
 	unsigned int i, next_buffer_table = 0;
 	u32 old_rxq_entries, old_txq_entries;
 	int rc, rc2;
@@ -938,6 +940,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 
 	efx_set_xdp_channels(efx);
 out:
+	efx->ptp_data = NULL;
 	/* Destroy unused channel structures */
 	for (i = 0; i < efx->n_channels; i++) {
 		channel = other_channel[i];
@@ -948,6 +951,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 		}
 	}
 
+	efx->ptp_data = ptp_data;
 	rc2 = efx_soft_enable_interrupts(efx);
 	if (rc2) {
 		rc = rc ? rc : rc2;
@@ -966,6 +970,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 	efx->txq_entries = old_txq_entries;
 	for (i = 0; i < efx->n_channels; i++)
 		swap(efx->channel[i], other_channel[i]);
+	efx_ptp_update_channel(efx, ptp_channel);
 	goto out;
 }
 
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index f0ef515e2ade..2cf714f92655 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -45,6 +45,7 @@
 #include "farch_regs.h"
 #include "tx.h"
 #include "nic.h" /* indirectly includes ptp.h */
+#include "efx_channels.h"
 
 /* Maximum number of events expected to make up a PTP event */
 #define	MAX_EVENT_FRAGS			3
@@ -541,6 +542,11 @@ struct efx_channel *efx_ptp_channel(struct efx_nic *efx)
 	return efx->ptp_data ? efx->ptp_data->channel : NULL;
 }
 
+void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel)
+{
+	efx->ptp_data->channel = channel;
+}
+
 static u32 last_sync_timestamp_major(struct efx_nic *efx)
 {
 	struct efx_channel *channel = efx_ptp_channel(efx);
@@ -1443,6 +1449,11 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel)
 	int rc = 0;
 	unsigned int pos;
 
+	if (efx->ptp_data) {
+		efx->ptp_data->channel = channel;
+		return 0;
+	}
+
 	ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL);
 	efx->ptp_data = ptp;
 	if (!efx->ptp_data)
@@ -2176,7 +2187,7 @@ static const struct efx_channel_type efx_ptp_channel_type = {
 	.pre_probe		= efx_ptp_probe_channel,
 	.post_remove		= efx_ptp_remove_channel,
 	.get_name		= efx_ptp_get_channel_name,
-	/* no copy operation; there is no need to reallocate this channel */
+	.copy                   = efx_copy_channel,
 	.receive_skb		= efx_ptp_rx,
 	.want_txqs		= efx_ptp_want_txqs,
 	.keep_eventq		= false,
diff --git a/drivers/net/ethernet/sfc/ptp.h b/drivers/net/ethernet/sfc/ptp.h
index 9855e8c9e544..7b1ef7002b3f 100644
--- a/drivers/net/ethernet/sfc/ptp.h
+++ b/drivers/net/ethernet/sfc/ptp.h
@@ -16,6 +16,7 @@ struct ethtool_ts_info;
 int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
 void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
 struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
+void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel);
 void efx_ptp_remove(struct efx_nic *efx);
 int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
 int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH net] net: sfc: fix memory leak due to ptp channel
  2022-05-02 17:02 [PATCH net] net: sfc: fix memory leak due to ptp channel Taehee Yoo
@ 2022-05-03 13:37 ` Taehee Yoo
  0 siblings, 0 replies; 2+ messages in thread
From: Taehee Yoo @ 2022-05-03 13:37 UTC (permalink / raw)
  To: davem, kuba, pabeni, edumazet, netdev, ecree.xilinx, habetsm.xilinx

On 5/3/22 02:02, Taehee Yoo wrote:
 > It fixes memory leak in ring buffer change logic.
 >
 > When ring buffer size is changed(ethtool -G eth0 rx 4096), sfc driver
 > works like below.
 > 1. stop all channels and remove ring buffers.
 > 2. allocates new buffer array.
 > 3. allocates rx buffers.
 > 4. start channels.
 >
 > While the above steps are working, it skips some steps if the channel
 > doesn't have a ->copy callback function.
 > Due to ptp channel doesn't have ->copy callback, these above steps are
 > skipped for ptp channel.
 > It eventually makes some problems.
 > a. ptp channel's ring buffer size is not changed, it works only
 >     1024(default).
 > b. memory leak.
 >
 > The reason for memory leak is to use the wrong ring buffer values.
 > There are some values, they are related to ring buffer size.
 > a. efx->rxq_entries
 >   - This is a global value of rx queue size.
 > b. rx_queue->ptr_mask
 >   - used for access ring buffer as circular ring.
 >   - roundup_pow_of_two(efx->rxq_entries) - 1
 > c. rx_queue->max_fill
 >   - efx->rxq_entries - EFX_RXD_HEAD_ROOM
 >
 > These all values should be based on ring buffer size consistently.
 > But ptp channel's values are not.
 > a. efx->rxq_entries
 >   - This is global(for sfc) value, always new ring buffer size.
 > b. rx_queue->ptr_mask
 >   - This is always 1023(default).
 > c. rx_queue->max_fill
 >   - This is new ring buffer size - EFX_RXD_HEAD_ROOM.
 >
 > Let's assume we set 4096 for rx ring buffer,
 >
 >                        normal channel     ptp channel
 > efx->rxq_entries      4096               4096
 > rx_queue->ptr_mask    4095               1023
 > rx_queue->max_fill    4086               4086
 >
 > sfc driver allocates rx ring buffers based on these values.
 > When it allocates ptp channel's ring buffer, 4086 ring buffers are
 > allocated then, these buffers are attached to the allocated array.
 > But ptp channel's ring buffer array size is still 1024(default)
 > and ptr_mask is still 1023 too.
 > So, about 3K ring buffers will be overwritten to the array.
 > This is the reason for memory leak.
 >
 > Test commands:
 >     ethtool -G <interface name> rx 4096
 >     while :
 >     do
 >         ip link set <interface name> up
 >         ip link set <interface name> down
 >     done
 >
 > In order to avoid this problem, it adds ->copy callback to ptp channel
 > type.
 > So that rx_queue->ptr_mask value will be updated correctly.
 >
 > Fixes: 7c236c43b838 ("sfc: Add support for IEEE-1588 PTP")
 > Signed-off-by: Taehee Yoo <ap420073@gmail.com>
 > ---
 >   drivers/net/ethernet/sfc/efx_channels.c |  7 ++++++-
 >   drivers/net/ethernet/sfc/ptp.c          | 13 ++++++++++++-
 >   drivers/net/ethernet/sfc/ptp.h          |  1 +
 >   3 files changed, 19 insertions(+), 2 deletions(-)
 >
 > diff --git a/drivers/net/ethernet/sfc/efx_channels.c 
b/drivers/net/ethernet/sfc/efx_channels.c
 > index 377df8b7f015..40df910aa140 100644
 > --- a/drivers/net/ethernet/sfc/efx_channels.c
 > +++ b/drivers/net/ethernet/sfc/efx_channels.c
 > @@ -867,7 +867,9 @@ static void efx_set_xdp_channels(struct efx_nic *efx)
 >
 >   int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 
txq_entries)
 >   {
 > -	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
 > +	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel,
 > +			   *ptp_channel = efx_ptp_channel(efx);
 > +	struct efx_ptp_data *ptp_data = efx->ptp_data;
 >   	unsigned int i, next_buffer_table = 0;
 >   	u32 old_rxq_entries, old_txq_entries;
 >   	int rc, rc2;
 > @@ -938,6 +940,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 
rxq_entries, u32 txq_entries)
 >
 >   	efx_set_xdp_channels(efx);
 >   out:
 > +	efx->ptp_data = NULL;
 >   	/* Destroy unused channel structures */
 >   	for (i = 0; i < efx->n_channels; i++) {
 >   		channel = other_channel[i];
 > @@ -948,6 +951,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 
rxq_entries, u32 txq_entries)
 >   		}
 >   	}
 >
 > +	efx->ptp_data = ptp_data;
 >   	rc2 = efx_soft_enable_interrupts(efx);
 >   	if (rc2) {
 >   		rc = rc ? rc : rc2;
 > @@ -966,6 +970,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 
rxq_entries, u32 txq_entries)
 >   	efx->txq_entries = old_txq_entries;
 >   	for (i = 0; i < efx->n_channels; i++)
 >   		swap(efx->channel[i], other_channel[i]);
 > +	efx_ptp_update_channel(efx, ptp_channel);
 >   	goto out;
 >   }
 >
 > diff --git a/drivers/net/ethernet/sfc/ptp.c 
b/drivers/net/ethernet/sfc/ptp.c
 > index f0ef515e2ade..2cf714f92655 100644
 > --- a/drivers/net/ethernet/sfc/ptp.c
 > +++ b/drivers/net/ethernet/sfc/ptp.c
 > @@ -45,6 +45,7 @@
 >   #include "farch_regs.h"
 >   #include "tx.h"
 >   #include "nic.h" /* indirectly includes ptp.h */
 > +#include "efx_channels.h"
 >
 >   /* Maximum number of events expected to make up a PTP event */
 >   #define	MAX_EVENT_FRAGS			3
 > @@ -541,6 +542,11 @@ struct efx_channel *efx_ptp_channel(struct 
efx_nic *efx)
 >   	return efx->ptp_data ? efx->ptp_data->channel : NULL;
 >   }
 >
 > +void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel 
*channel)
 > +{
 > +	efx->ptp_data->channel = channel;

I missed that efx->ptp_data can be a NULL.
If so, it makes the kernel panic.
So I will send a v2 patch tomorrow after some tests.

Thanks,
Taehee Yoo

 > +}
 > +
 >   static u32 last_sync_timestamp_major(struct efx_nic *efx)
 >   {
 >   	struct efx_channel *channel = efx_ptp_channel(efx);
 > @@ -1443,6 +1449,11 @@ int efx_ptp_probe(struct efx_nic *efx, struct 
efx_channel *channel)
 >   	int rc = 0;
 >   	unsigned int pos;
 >
 > +	if (efx->ptp_data) {
 > +		efx->ptp_data->channel = channel;
 > +		return 0;
 > +	}
 > +
 >   	ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL);
 >   	efx->ptp_data = ptp;
 >   	if (!efx->ptp_data)
 > @@ -2176,7 +2187,7 @@ static const struct efx_channel_type 
efx_ptp_channel_type = {
 >   	.pre_probe		= efx_ptp_probe_channel,
 >   	.post_remove		= efx_ptp_remove_channel,
 >   	.get_name		= efx_ptp_get_channel_name,
 > -	/* no copy operation; there is no need to reallocate this channel */
 > +	.copy                   = efx_copy_channel,
 >   	.receive_skb		= efx_ptp_rx,
 >   	.want_txqs		= efx_ptp_want_txqs,
 >   	.keep_eventq		= false,
 > diff --git a/drivers/net/ethernet/sfc/ptp.h 
b/drivers/net/ethernet/sfc/ptp.h
 > index 9855e8c9e544..7b1ef7002b3f 100644
 > --- a/drivers/net/ethernet/sfc/ptp.h
 > +++ b/drivers/net/ethernet/sfc/ptp.h
 > @@ -16,6 +16,7 @@ struct ethtool_ts_info;
 >   int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
 >   void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
 >   struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
 > +void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel 
*channel);
 >   void efx_ptp_remove(struct efx_nic *efx);
 >   int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
 >   int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-05-03 13:37 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-02 17:02 [PATCH net] net: sfc: fix memory leak due to ptp channel Taehee Yoo
2022-05-03 13:37 ` Taehee Yoo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.