linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* stmmac ethernet in kernel 4.4: coalescing related pauses?
@ 2016-11-23 10:51 Pavel Machek
  2016-11-24  8:55 ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses Pavel Machek
  2016-11-28 13:07 ` stmmac ethernet in kernel 4.4: coalescing related pauses? Lino Sanfilippo
  0 siblings, 2 replies; 64+ messages in thread
From: Pavel Machek @ 2016-11-23 10:51 UTC (permalink / raw)
  To: peppe.cavallaro, netdev, kernel list


[-- Attachment #1.1: Type: text/plain, Size: 933 bytes --]

Hi!

I'm debugging strange delays during transmit in stmmac driver. They
seem to be present in 4.4 kernel (and older kernels, too). Workload is
burst of udp packets being sent, pause, burst of udp packets, ...

Test code is attached, I use these parameters for testing:

./udp-test raw 10.0.0.6 1234 1000 100 30

The delays seem to be related to coalescing:

drivers/net/ethernet/stmicro/stmmac/common.h
#define STMMAC_COAL_TX_TIMER    40000
#define STMMAC_MAX_COAL_TX_TICK 100000
#define STMMAC_TX_MAX_FRAMES    256

If I lower the parameters, delays are gone, but I get netdev watchdog
backtrace followed by broken driver.

Any ideas what is going on there?

[I'm currently trying to get newer kernels working on affected
hardware.]

Best regards,

									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #1.2: CMakeLists.txt --]
[-- Type: text/plain, Size: 589 bytes --]

cmake_minimum_required(VERSION 2.8.7)
project(streaming)

find_package(Boost REQUIRED COMPONENTS system)

set(SOURCES 
	udp-test.cpp)

add_executable(udp-test ${SOURCES})

if (BUILD_TESTS)
  enable_testing()
endif()

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11")

set_property(TARGET udp-test PROPERTY CXX_STANDARD 11)
target_link_libraries(udp-test boost_system pthread )

[-- Attachment #1.3: udp-test.cpp --]
[-- Type: text/x-c++src, Size: 6995 bytes --]

#include <boost/asio.hpp>
#include <boost/asio/steady_timer.hpp>
#include <iostream>


namespace asio = boost::asio;

class UdpSendTest
{
	public:
		UdpSendTest(asio::io_service& io_service, const std::string& dest_ip, int dest_port, int packet_size, int packet_count, int interval)
			: io_service_(io_service),
	   		timer_(io_service),
			udp_socket_(io_service),
			dest_ip_(std::move(dest_ip)),
			dest_port_(dest_port),
			packet_size_(packet_size),
			packet_count_(packet_count),
			period_(interval)
		{
		}

		void start()
		{
			timer_.expires_from_now(period_);
			timer_.async_wait(std::bind(&UdpSendTest::handleTimer, this, std::placeholders::_1));
			try
			{
				udp_socket_.connect(asio::ip::udp::endpoint(boost::asio::ip::address::from_string(dest_ip_), dest_port_));
			}
			catch(boost::system::system_error e)
			{
				std::cerr<< "Could not connect:"<<e.what()<<std::endl;
			}
		}
	private:
		static_assert(std::chrono::steady_clock::is_steady, "steady_clock does not use the monotonic system clock. Please use a toolchain with full support for std::chrono!");

		void sendPackets()
		{
			std::vector<unsigned char> buffer(packet_size_,0);
			
			for (int i=0; i<packet_count_; i++)
			{
				if (buffer.size() > 1)
				{
					buffer[0] = i / 255;
					buffer[1] = i % 255;
				}
				else
					buffer[0]=i%255;

				auto t0 = std::chrono::steady_clock::now();
				try
				{
					udp_socket_.send(asio::buffer(buffer));
				}
				catch(boost::system::system_error& error)
				{
					std::cerr<<"Could not send UDP packet, reason: "<<error.what()<<std::endl;
				}
				auto delta_t = std::chrono::steady_clock::now() - t0;
				auto delta_t_us = std::chrono::duration_cast<std::chrono::microseconds>(delta_t).count();
				if (delta_t_us > 10000)
				{
					std::cout<<"Sending UDP packet took >10ms: "<<delta_t_us<<"us"<<std::endl;
				}
				if (delta_t_us > period_.count() * 1000)
				{
					std::cout<<"This would lead to a lost frame!"<<std::endl;
				}
			}
		}

		void handleTimer(boost::system::error_code ec)
		{
			if (ec)
			{
				std::cerr<<"Timer interrupted, exiting!"<<std::endl;
				return;
			}

			sendPackets();
			timer_.expires_at(timer_.expires_at() + period_);
			timer_.async_wait(std::bind(&UdpSendTest::handleTimer, this, std::placeholders::_1));

		}

		asio::io_service& io_service_;
		asio::steady_timer timer_;

		asio::ip::udp::socket udp_socket_;

		std::string dest_ip_;
		int dest_port_;
		int packet_size_;
		int packet_count_;
		std::chrono::milliseconds period_;
};

class UdpSendTestLowlevel
{
	public:
		UdpSendTestLowlevel(asio::io_service& io_service, const std::string& dest_ip, int dest_port, int packet_size, int packet_count, int interval)
			: io_service_(io_service),
	   		timer_(io_service),
			dest_ip_(std::move(dest_ip)),
			dest_port_(dest_port),
			packet_size_(packet_size),
			packet_count_(packet_count),
			period_(interval)
		{
		}

		void start()
		{
			timer_.expires_from_now(period_);
			timer_.async_wait(std::bind(&UdpSendTestLowlevel::handleTimer, this, std::placeholders::_1));
			socket_fd_ = socket(AF_INET, SOCK_DGRAM, 0);
			if (socket_fd_ < 0)
				std::cerr<<"could not create socket: " <<strerror(errno)<<std::endl;

			auto h = gethostbyname(dest_ip_.c_str());
			if (h == nullptr)
				std::cerr<<"Could not find host: "<<dest_ip_<<std::endl;

			server_addr_.sin_family = h->h_addrtype;
			memcpy((char*)&server_addr_.sin_addr.s_addr, h->h_addr_list[0], h->h_length);
			server_addr_.sin_port = htons(dest_port_);

			client_addr_.sin_family = AF_INET;
			client_addr_.sin_addr.s_addr = htonl(INADDR_ANY);
			client_addr_.sin_port = htons(0);
			auto rc = bind(socket_fd_,reinterpret_cast<sockaddr*>(&client_addr_), sizeof(client_addr_));
			if (rc < 0)
				std::cerr<<"Could not open Port"<<std::endl;
		}
	private:
		static_assert(std::chrono::steady_clock::is_steady, "steady_clock does not use the monotonic system clock. Please use a toolchain with full support for std::chrono!");

		void sendPackets()
		{
			std::vector<unsigned char> buffer(packet_size_,0);
			
			for (int i=0; i<packet_count_; i++)
			{
				if (buffer.size() > 1)
				{
					buffer[0] = i / 255;
					buffer[1] = i % 255;
				}
				else
					buffer[0]=i%255;

				auto t0 = std::chrono::steady_clock::now();
				auto rc = sendto(socket_fd_, buffer.data(), buffer.size(), 0, (sockaddr* )&server_addr_, sizeof(server_addr_));
				if (rc<0)
					std::cerr<<"Could not send UDP packet"<<std::endl;
				auto delta_t = std::chrono::steady_clock::now() - t0;
				auto delta_t_us = std::chrono::duration_cast<std::chrono::microseconds>(delta_t).count();
				if (delta_t_us > 10000)
				{
					std::cout<<"Sending UDP packet took >10ms: "<<delta_t_us<<"us"<<std::endl;
				}
				if (delta_t_us > period_.count() * 1000)
				{
					std::cout<<"This would lead to a lost frame!"<<std::endl;
				}
			}
		}

		void handleTimer(boost::system::error_code ec)
		{
			if (ec)
			{
				std::cerr<<"Timer interrupted, exiting!"<<std::endl;
				return;
			}

			sendPackets();
			timer_.expires_at(timer_.expires_at() + period_);
			timer_.async_wait(std::bind(&UdpSendTestLowlevel::handleTimer, this, std::placeholders::_1));

		}

		asio::io_service& io_service_;
		asio::steady_timer timer_;

		std::string dest_ip_;
		int dest_port_;
		int packet_size_;
		int packet_count_;
		std::chrono::milliseconds period_;

		int socket_fd_;
		struct sockaddr_in client_addr_, server_addr_;
};
int main(int argc, char** argv)
{
	if (argc < 7)
	{
		std::cout<<"usage: "<<argv[0]<<" [boost|raw] dest_ip dest_port packet_size packet_count interval_ms"<<std::endl;
		return 1;
	}
	if (std::atoi(argv[4])<1)
	{
		std::cerr<<"Please select a packet size > 0 bytes!"<<std::endl;
		return 1;
	}
	asio::io_service io_service;
	
	std::string mode(argv[1]);

	int dest_port = std::atoi(argv[3]);
	int packet_size = std::atoi(argv[4]);
	int packet_count = std::atoi(argv[5]);
	int frame_interval = std::atoi(argv[6]);

	int bytes_per_sec = packet_size * packet_count *(1000.f/frame_interval);
	int bytes_per_sec_2 = (packet_size+12) * packet_count *(1000.f/frame_interval);
	std::cout<<"Sending "<<packet_count<<" packets ("<<packet_size<<"b each) at an interval of "<<frame_interval<<"ms, expected data rate:"<<bytes_per_sec <<"b/s ("<<bytes_per_sec_2<<"b/s incl udp overhead)"<<std::endl;
	if (bytes_per_sec_2 > 1000 * 1000 * 100)
		std::cerr<<"Warning: trying to transmit > 100Mb/s"<<std::endl;

	if (mode == "boost")
	{
	
		UdpSendTest u(io_service,
				argv[2],dest_port, packet_size, packet_count, frame_interval);
		u.start();
		io_service.run();
	}
	else
	{
		UdpSendTestLowlevel u(io_service,
				argv[2],dest_port, packet_size, packet_count, frame_interval);
		u.start();
		io_service.run();
	}

	return 0;
}

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-11-23 10:51 stmmac ethernet in kernel 4.4: coalescing related pauses? Pavel Machek
@ 2016-11-24  8:55 ` Pavel Machek
  2016-11-24 10:29   ` Pavel Machek
  2016-11-24 16:04   ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses David Miller
  2016-11-28 13:07 ` stmmac ethernet in kernel 4.4: coalescing related pauses? Lino Sanfilippo
  1 sibling, 2 replies; 64+ messages in thread
From: Pavel Machek @ 2016-11-24  8:55 UTC (permalink / raw)
  To: peppe.cavallaro, netdev, kernel list

[-- Attachment #1: Type: text/plain, Size: 1150 bytes --]

Hi!

> I'm debugging strange delays during transmit in stmmac driver. They
> seem to be present in 4.4 kernel (and older kernels, too). Workload is
> burst of udp packets being sent, pause, burst of udp packets, ...
> 
> Test code is attached, I use these parameters for testing:
> 
> ./udp-test raw 10.0.0.6 1234 1000 100 30
> 
> The delays seem to be related to coalescing:
> 
> drivers/net/ethernet/stmicro/stmmac/common.h
> #define STMMAC_COAL_TX_TIMER    40000
> #define STMMAC_MAX_COAL_TX_TICK 100000
> #define STMMAC_TX_MAX_FRAMES    256
> 
> If I lower the parameters, delays are gone, but I get netdev watchdog
> backtrace followed by broken driver.
> 
> Any ideas what is going on there?

4.9-rc6 still has the delays. With the

#define STMMAC_COAL_TX_TIMER 1000
#define STMMAC_TX_MAX_FRAMES 2

settings, delays go away, and driver still works. (It fails fairly
fast in 4.4). Good news. But the question still is: what is going on
there?

Best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-11-24  8:55 ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses Pavel Machek
@ 2016-11-24 10:29   ` Pavel Machek
  2016-11-24 10:36     ` Pavel Machek
  2016-11-24 16:04   ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses David Miller
  1 sibling, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-11-24 10:29 UTC (permalink / raw)
  To: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier

[-- Attachment #1: Type: text/plain, Size: 3938 bytes --]

Hi!

What is going on with stmmac_tso_xmit() vs. stmmac_xmit()? One seems
to be copy of another, with subtle differences -- like calling
netif_queue_stopped() under spin_lock(&priv->tx_lock), or not.

What is going on with all these likely()s? Likely new hardware owners
will not be happy... or anyone running a lot of jumbo frames. (Perhaps
CPU's branch prediction can do better job here, without explicit hints?)

     if (unlikely(is_jumbo) && likely(priv->synopsys_id <
                                              DWMAC_CORE_4_00)) {

Are you sure this is okay?

        if (unlikely(netif_queue_stopped(priv->dev) &&
                     stmmac_tx_avail(priv) > STMMAC_TX_THRESH)) {
		netif_tx_lock(priv->dev);
                if (netif_queue_stopped(priv->dev) &&
                    stmmac_tx_avail(priv) > STMMAC_TX_THRESH) {

---

Fix english in comments.

Signed-off-by: Pavel Machek <pavel@ucw.cz>

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1f9ec02..e5a5a05 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1747,11 +1747,11 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane)
 		priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0);
 
-	/*  set TX ring length */
+	/* Set TX ring length */
 	if (priv->hw->dma->set_tx_ring_len)
 		priv->hw->dma->set_tx_ring_len(priv->ioaddr,
 					       (DMA_TX_SIZE - 1));
-	/*  set RX ring length */
+	/* Set RX ring length */
 	if (priv->hw->dma->set_rx_ring_len)
 		priv->hw->dma->set_rx_ring_len(priv->ioaddr,
 					       (DMA_RX_SIZE - 1));
@@ -2212,7 +2212,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->tx_skbuff[first_entry] = skb;
 
 	enh_desc = priv->plat->enh_desc;
-	/* To program the descriptors according to the size of the frame */
+	/* Program the descriptors according to the size of the frame */
 	if (enh_desc)
 		is_jumbo = priv->hw->mode->is_jumbo_frm(skb->len, enh_desc);
 
@@ -2665,7 +2665,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
  *  @budget : maximum number of packets that the current CPU can receive from
  *	      all interfaces.
  *  Description :
- *  To look at the incoming frames and clear the tx resources.
+ *  Look at the incoming frames and clear the tx resources.
  */
 static int stmmac_poll(struct napi_struct *napi, int budget)
 {
@@ -2828,7 +2828,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 		return IRQ_NONE;
 	}
 
-	/* To handle GMAC own interrupts */
+	/* Handle GMAC own interrupts */
 	if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) {
 		int status = priv->hw->mac->host_irq_status(priv->hw,
 							    &priv->xstats);
@@ -2853,7 +2853,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 		}
 	}
 
-	/* To handle DMA interrupts */
+	/* Handle DMA interrupts */
 	stmmac_dma_interrupt(priv);
 
 	return IRQ_HANDLED;
@@ -3145,7 +3145,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 
 	priv->hw = mac;
 
-	/* To use the chained or ring mode */
+	/* Use the chained or ring mode */
 	if (priv->synopsys_id >= DWMAC_CORE_4_00) {
 		priv->hw->mode = &dwmac4_ring_mode_ops;
 	} else {
@@ -3191,7 +3191,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	} else
 		pr_info(" No HW DMA feature register supported");
 
-	/* To use alternate (extended), normal or GMAC4 descriptor structures */
+	/* Use alternate (extended), normal or GMAC4 descriptor structures */
 	if (priv->synopsys_id >= DWMAC_CORE_4_00)
 		priv->hw->desc = &dwmac4_desc_ops;
 	else



-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-11-24 10:29   ` Pavel Machek
@ 2016-11-24 10:36     ` Pavel Machek
  2016-11-24 10:46       ` [PATCH] stmmac ethernet: unify locking Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-11-24 10:36 UTC (permalink / raw)
  To: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier

[-- Attachment #1: Type: text/plain, Size: 3392 bytes --]

Hi!

> What is going on with all these likely()s? Likely new hardware owners
> will not be happy... or anyone running a lot of jumbo frames. (Perhaps
> CPU's branch prediction can do better job here, without explicit hints?)
> 
>      if (unlikely(is_jumbo) && likely(priv->synopsys_id <
>                                               DWMAC_CORE_4_00)) {

    Fix english, remove misleading unlikely's.

Signed-off-by: Pavel Machek <pavel@denx.de>

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index e5a5a05..0363db3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2003,7 +2003,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Compute header lengths */
 	proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 
-	/* Desc availability based on threshold should be enough safe */
+	/* Desc availability based on threshold should be safe enough */
 	if (unlikely(stmmac_tx_avail(priv) <
 		(((skb->len - proto_hdr_len) / TSO_MAX_BUFF_SIZE + 1)))) {
 		if (!netif_queue_stopped(dev)) {
@@ -2216,8 +2216,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (enh_desc)
 		is_jumbo = priv->hw->mode->is_jumbo_frm(skb->len, enh_desc);
 
-	if (unlikely(is_jumbo) && likely(priv->synopsys_id <
-					 DWMAC_CORE_4_00)) {
+	if (unlikely(is_jumbo) && priv->synopsys_id < DWMAC_CORE_4_00) {
 		entry = priv->hw->mode->jumbo_frm(priv, skb, csum_insertion);
 		if (unlikely(entry < 0))
 			goto dma_map_err;
@@ -2242,7 +2241,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		priv->tx_skbuff[entry] = NULL;
 
-		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
+		if (priv->synopsys_id >= DWMAC_CORE_4_00) {
 			desc->des0 = des;
 			priv->tx_skbuff_dma[entry].buf = desc->des0;
 		} else {
@@ -2319,7 +2318,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (dma_mapping_error(priv->device, des))
 			goto dma_map_err;
 
-		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
+		if (priv->synopsys_id >= DWMAC_CORE_4_00) {
 			first->des0 = des;
 			priv->tx_skbuff_dma[first_entry].buf = first->des0;
 		} else {
@@ -2438,7 +2437,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
 				break;
 			}
 
-			if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
+			if (priv->synopsys_id >= DWMAC_CORE_4_00) {
 				p->des0 = priv->rx_skbuff_dma[entry];
 				p->des1 = 0;
 			} else {
@@ -2455,7 +2454,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
 		}
 		wmb();
 
-		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
+		if (priv->synopsys_id >= DWMAC_CORE_4_00)
 			priv->hw->desc->init_rx_desc(p, priv->use_riwt, 0, 0);
 		else
 			priv->hw->desc->set_rx_owner(p);
@@ -2545,7 +2544,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 			int frame_len;
 			unsigned int des;
 
-			if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
+			if (priv->synopsys_id >= DWMAC_CORE_4_00)
 				des = p->des0;
 			else
 				des = p->des2;


-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* [PATCH] stmmac ethernet: unify locking
  2016-11-24 10:36     ` Pavel Machek
@ 2016-11-24 10:46       ` Pavel Machek
  2016-11-24 11:05         ` [PATCH] stmmac ethernet: remove cut & paste code Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-11-24 10:46 UTC (permalink / raw)
  To: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier

[-- Attachment #1: Type: text/plain, Size: 965 bytes --]


Make locking match in both _xmit functions.
   
Signed-off-by: Pavel Machek <pavel@denx.de>

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0363db3..1cff258 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2185,12 +2185,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	spin_lock(&priv->tx_lock);
 
 	if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
-		spin_unlock(&priv->tx_lock);
 		if (!netif_queue_stopped(dev)) {
 			netif_stop_queue(dev);
 			/* This is a hard error, log it. */
 			pr_err("%s: Tx Ring full when queue awake\n", __func__);
 		}
+		spin_unlock(&priv->tx_lock);
 		return NETDEV_TX_BUSY;
 	}
 

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* [PATCH] stmmac ethernet: remove cut & paste code
  2016-11-24 10:46       ` [PATCH] stmmac ethernet: unify locking Pavel Machek
@ 2016-11-24 11:05         ` Pavel Machek
  2016-11-24 20:05           ` Joe Perches
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-11-24 11:05 UTC (permalink / raw)
  To: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier

[-- Attachment #1: Type: text/plain, Size: 3963 bytes --]


Remove duplicate code from _tx routines.
    
Signed-off-by: Pavel Machek <pavel@denx.de>

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1cff258..5cf9cef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1960,6 +1960,38 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
 	}
 }
 
+static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int nfrags, struct dma_desc *desc)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
+		if (netif_msg_hw(priv))
+			pr_debug("%s: stop transmitted packets\n", __func__);
+		netif_stop_queue(dev);
+	}
+
+	dev->stats.tx_bytes += skb->len;
+
+	/* According to the coalesce parameter the IC bit for the latest
+	 * segment is reset and the timer re-started to clean the tx status.
+	 * This approach takes care about the fragments: desc is the first
+	 * element in case of no SG.
+	 */
+	priv->tx_count_frames += nfrags + 1;
+	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
+		mod_timer(&priv->txtimer,
+			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
+	} else {
+		priv->tx_count_frames = 0;
+		priv->hw->desc->set_tx_ic(desc);
+		priv->xstats.tx_set_ic_bit++;
+	}
+
+	if (!priv->hwts_tx_en)
+		skb_tx_timestamp(skb);
+}
+
+
 /**
  *  stmmac_tso_xmit - Tx entry point of the driver for oversized frames (TSO)
  *  @skb : the socket buffer
@@ -2081,30 +2113,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
 
-	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
-		if (netif_msg_hw(priv))
-			pr_debug("%s: stop transmitted packets\n", __func__);
-		netif_stop_queue(dev);
-	}
-
-	dev->stats.tx_bytes += skb->len;
+	stmmac_xmit_common(skb, dev, nfrags, desc);
+	
 	priv->xstats.tx_tso_frames++;
 	priv->xstats.tx_tso_nfrags += nfrags;
 
-	/* Manage tx mitigation */
-	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
-		mod_timer(&priv->txtimer,
-			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-	} else {
-		priv->tx_count_frames = 0;
-		priv->hw->desc->set_tx_ic(desc);
-		priv->xstats.tx_set_ic_bit++;
-	}
-
-	if (!priv->hwts_tx_en)
-		skb_tx_timestamp(skb);
-
 	if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 		     priv->hwts_tx_en)) {
 		/* declare that device is doing timestamping */
@@ -2280,31 +2293,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		print_pkt(skb->data, skb->len);
 	}
 
-	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
-		if (netif_msg_hw(priv))
-			pr_debug("%s: stop transmitted packets\n", __func__);
-		netif_stop_queue(dev);
-	}
-
-	dev->stats.tx_bytes += skb->len;
-
-	/* According to the coalesce parameter the IC bit for the latest
-	 * segment is reset and the timer re-started to clean the tx status.
-	 * This approach takes care about the fragments: desc is the first
-	 * element in case of no SG.
-	 */
-	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
-		mod_timer(&priv->txtimer,
-			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-	} else {
-		priv->tx_count_frames = 0;
-		priv->hw->desc->set_tx_ic(desc);
-		priv->xstats.tx_set_ic_bit++;
-	}
-
-	if (!priv->hwts_tx_en)
-		skb_tx_timestamp(skb);
+	stmmac_xmit_common(skb, dev, nfrags, desc);
 
 	/* Ready to fill the first descriptor and set the OWN bit w/o any
 	 * problems because all the descriptors are actually ready to be


-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-11-24  8:55 ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses Pavel Machek
  2016-11-24 10:29   ` Pavel Machek
@ 2016-11-24 16:04   ` David Miller
  2016-11-24 21:25     ` Pavel Machek
                       ` (6 more replies)
  1 sibling, 7 replies; 64+ messages in thread
From: David Miller @ 2016-11-24 16:04 UTC (permalink / raw)
  To: pavel; +Cc: peppe.cavallaro, netdev, linux-kernel

From: Pavel Machek <pavel@ucw.cz>
Date: Thu, 24 Nov 2016 09:55:06 +0100

> Hi!
> 
>> I'm debugging strange delays during transmit in stmmac driver. They
>> seem to be present in 4.4 kernel (and older kernels, too). Workload is
>> burst of udp packets being sent, pause, burst of udp packets, ...
>> 
>> Test code is attached, I use these parameters for testing:
>> 
>> ./udp-test raw 10.0.0.6 1234 1000 100 30
>> 
>> The delays seem to be related to coalescing:
>> 
>> drivers/net/ethernet/stmicro/stmmac/common.h
>> #define STMMAC_COAL_TX_TIMER    40000
>> #define STMMAC_MAX_COAL_TX_TICK 100000
>> #define STMMAC_TX_MAX_FRAMES    256
>> 
>> If I lower the parameters, delays are gone, but I get netdev watchdog
>> backtrace followed by broken driver.
>> 
>> Any ideas what is going on there?
> 
> 4.9-rc6 still has the delays. With the
> 
> #define STMMAC_COAL_TX_TIMER 1000
> #define STMMAC_TX_MAX_FRAMES 2
> 
> settings, delays go away, and driver still works. (It fails fairly
> fast in 4.4). Good news. But the question still is: what is going on
> there?

256 packets looks way too large for being a trigger for aborting the
TX coalescing timer.

Looking more deeply into this, the driver is using non-highres timers
to implement the TX coalescing.  This simply cannot work.

1 HZ, which is the lowest granularity of non-highres timers in the
kernel, is variable as well as already too large of a delay for
effective TX coalescing.

I seriously think that the TX coalescing support should be ripped out
or disabled entirely until it is implemented properly in this driver.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac ethernet: remove cut & paste code
  2016-11-24 11:05         ` [PATCH] stmmac ethernet: remove cut & paste code Pavel Machek
@ 2016-11-24 20:05           ` Joe Perches
  2016-11-24 21:44             ` Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: Joe Perches @ 2016-11-24 20:05 UTC (permalink / raw)
  To: Pavel Machek, peppe.cavallaro, netdev, kernel list, ezequiel,
	sonic.zhang, fabrice.gasnier

On Thu, 2016-11-24 at 12:05 +0100, Pavel Machek wrote:
> Remove duplicate code from _tx routines.

trivia:

> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
[]
> @@ -1960,6 +1960,38 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
>  	}
>  }
>  
> +static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int nfrags, struct dma_desc *desc)
> +{
> +	struct stmmac_priv *priv = netdev_priv(dev);
> +
> +	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
> +		if (netif_msg_hw(priv))
> +			pr_debug("%s: stop transmitted packets\n", __func__);

		netif_dbg(priv, hw, dev, "%s: stop transmitted packets\n",
			  __func__);

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-11-24 16:04   ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses David Miller
@ 2016-11-24 21:25     ` Pavel Machek
  2016-12-02  8:24       ` Giuseppe CAVALLARO
  2016-11-28 11:55     ` [PATCH] stmmac: fix comments, make debug output consistent Pavel Machek
                       ` (5 subsequent siblings)
  6 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-11-24 21:25 UTC (permalink / raw)
  To: David Miller; +Cc: peppe.cavallaro, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1831 bytes --]

Hi!

> >> I'm debugging strange delays during transmit in stmmac driver. They
> >> seem to be present in 4.4 kernel (and older kernels, too). Workload is
> >> burst of udp packets being sent, pause, burst of udp packets, ...
...
> > 4.9-rc6 still has the delays. With the
> > 
> > #define STMMAC_COAL_TX_TIMER 1000
> > #define STMMAC_TX_MAX_FRAMES 2
> > 
> > settings, delays go away, and driver still works. (It fails fairly
> > fast in 4.4). Good news. But the question still is: what is going on
> > there?
> 
> 256 packets looks way too large for being a trigger for aborting the
> TX coalescing timer.
> 
> Looking more deeply into this, the driver is using non-highres timers
> to implement the TX coalescing.  This simply cannot work.
> 
> 1 HZ, which is the lowest granularity of non-highres timers in the
> kernel, is variable as well as already too large of a delay for
> effective TX coalescing.
> 
> I seriously think that the TX coalescing support should be ripped out
> or disabled entirely until it is implemented properly in this
> driver.

Ok, I'd disable coalescing, but could not figure it out till. What is
generic way to do that?

It seems only thing stmmac_tx_timer() does is calling
stmmac_tx_clean(), which reclaims tx_skbuff[] entries. It should be
possible to do that explicitely, without delay, but it stops working
completely if I attempt to do that.

On a side note, stmmac_poll() does stmmac_enable_dma_irq() while
stmmac_dma_interrupt() disables interrupts. But I don't see any
protection between the two, so IMO it could race and we'd end up
without polling or interrupts...

Thanks and best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac ethernet: remove cut & paste code
  2016-11-24 20:05           ` Joe Perches
@ 2016-11-24 21:44             ` Pavel Machek
  2016-11-24 22:27               ` Joe Perches
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-11-24 21:44 UTC (permalink / raw)
  To: Joe Perches
  Cc: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier

[-- Attachment #1: Type: text/plain, Size: 1196 bytes --]

On Thu 2016-11-24 12:05:25, Joe Perches wrote:
> On Thu, 2016-11-24 at 12:05 +0100, Pavel Machek wrote:
> > Remove duplicate code from _tx routines.
> 
> trivia:
> 
> > diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> []
> > @@ -1960,6 +1960,38 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
> >  	}
> >  }
> >  
> > +static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int nfrags, struct dma_desc *desc)
> > +{
> > +	struct stmmac_priv *priv = netdev_priv(dev);
> > +
> > +	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
> > +		if (netif_msg_hw(priv))
> > +			pr_debug("%s: stop transmitted packets\n", __func__);
> 
> 		netif_dbg(priv, hw, dev, "%s: stop transmitted packets\n",
> 			  __func__);

Not now. Modifying the code while de-duplicating would be bad idea.

(And it looks like the driver has rather more serious problems than
printk style...)

Thanks,									
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac ethernet: remove cut & paste code
  2016-11-24 21:44             ` Pavel Machek
@ 2016-11-24 22:27               ` Joe Perches
  2016-11-28 11:50                 ` Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: Joe Perches @ 2016-11-24 22:27 UTC (permalink / raw)
  To: Pavel Machek
  Cc: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier

On Thu, 2016-11-24 at 22:44 +0100, Pavel Machek wrote:
> On Thu 2016-11-24 12:05:25, Joe Perches wrote:
> > On Thu, 2016-11-24 at 12:05 +0100, Pavel Machek wrote:
> > > Remove duplicate code from _tx routines.
> > 
> > trivia:
> > 
> > > diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> > 
> > []
> > > @@ -1960,6 +1960,38 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
> > >  	}
> > >  }
> > >  
> > > +static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int nfrags, struct dma_desc *desc)
> > > +{
> > > +	struct stmmac_priv *priv = netdev_priv(dev);
> > > +
> > > +	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
> > > +		if (netif_msg_hw(priv))
> > > +			pr_debug("%s: stop transmitted packets\n", __func__);
> > 
> > 		netif_dbg(priv, hw, dev, "%s: stop transmitted packets\n",
> > 			  __func__);
> 
> Not now. Modifying the code while de-duplicating would be bad idea.

Too many people think overly granular patches are the
best and only way to make changes.

Deduplication and consolidation can happen simultaneously.

> (And it looks like the driver has rather more serious problems than
> printk style...)

Probably so.

cheers, Joe

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac ethernet: remove cut & paste code
  2016-11-24 22:27               ` Joe Perches
@ 2016-11-28 11:50                 ` Pavel Machek
  2016-11-28 14:24                   ` Joe Perches
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-11-28 11:50 UTC (permalink / raw)
  To: Joe Perches
  Cc: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier

[-- Attachment #1: Type: text/plain, Size: 1737 bytes --]

On Thu 2016-11-24 14:27:13, Joe Perches wrote:
> On Thu, 2016-11-24 at 22:44 +0100, Pavel Machek wrote:
> > On Thu 2016-11-24 12:05:25, Joe Perches wrote:
> > > On Thu, 2016-11-24 at 12:05 +0100, Pavel Machek wrote:
> > > > Remove duplicate code from _tx routines.
> > > 
> > > trivia:
> > > 
> > > > diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> > > 
> > > []
> > > > @@ -1960,6 +1960,38 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
> > > >  	}
> > > >  }
> > > >  
> > > > +static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int nfrags, struct dma_desc *desc)
> > > > +{
> > > > +	struct stmmac_priv *priv = netdev_priv(dev);
> > > > +
> > > > +	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
> > > > +		if (netif_msg_hw(priv))
> > > > +			pr_debug("%s: stop transmitted packets\n", __func__);
> > > 
> > > 		netif_dbg(priv, hw, dev, "%s: stop transmitted packets\n",
> > > 			  __func__);
> > 
> > Not now. Modifying the code while de-duplicating would be bad idea.
> 
> Too many people think overly granular patches are the
> best and only way to make changes.

> Deduplication and consolidation can happen simultaneously.

Can, but should not at this point. Please take a look at the driver in
question before commenting on trivial printk style.

Feel free to do your favourite cleanup on whole tree, or per-driver
basis. Doing it on per-message basis would be wrong thing to do.

Thanks,
									Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH] stmmac: fix comments, make debug output consistent
  2016-11-24 16:04   ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses David Miller
  2016-11-24 21:25     ` Pavel Machek
@ 2016-11-28 11:55     ` Pavel Machek
  2016-11-30  0:53       ` David Miller
  2016-11-28 12:13     ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses Pavel Machek
                       ` (4 subsequent siblings)
  6 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-11-28 11:55 UTC (permalink / raw)
  To: David Miller, alexandre.torgue
  Cc: peppe.cavallaro, netdev, linux-kernel, Andrew Morton

[-- Attachment #1: Type: text/plain, Size: 3421 bytes --]


Fix comments, add some new, and make debugfs output consistent.
    
Signed-off-by: Pavel Machek <pavel@denx.de>

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index a61de04..6074d97 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -44,6 +44,7 @@
 #define	DWMAC_CORE_4_00	0x40
 #define STMMAC_CHAN0	0	/* Always supported and default for all chips */
 
+/* These need to be power of two, and >= 4 */
 #define DMA_TX_SIZE 512
 #define DMA_RX_SIZE 512
 #define STMMAC_GET_ENTRY(x, size)	((x + 1) & (size - 1))
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9dfdbe0..f7133d0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -108,8 +108,8 @@ module_param(eee_timer, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
 #define STMMAC_LPI_T(x) (jiffies + msecs_to_jiffies(x))
 
-/* By default the driver will use the ring mode to manage tx and rx descriptors
- * but passing this value so user can force to use the chain instead of the ring
+/* By default the driver will use the ring mode to manage tx and rx descriptors,
+ * but allow user to force to use the chain instead of the ring
  */
 static unsigned int chain_mode;
 module_param(chain_mode, int, S_IRUGO);
@@ -2966,6 +2966,8 @@ static int stmmac_sysfs_ring_open(struct inode *inode, struct file *file)
 	return single_open(file, stmmac_sysfs_ring_read, inode->i_private);
 }
 
+/* Debugfs files, should appear in /sys/kernel/debug/stmmaceth/eth0 */
+
 static const struct file_operations stmmac_rings_status_fops = {
 	.owner = THIS_MODULE,
 	.open = stmmac_sysfs_ring_open,
@@ -2988,11 +2990,11 @@ static int stmmac_sysfs_dma_cap_read(struct seq_file *seq, void *v)
 	seq_printf(seq, "\tDMA HW features\n");
 	seq_printf(seq, "==============================\n");
 
-	seq_printf(seq, "\t10/100 Mbps %s\n",
+	seq_printf(seq, "\t10/100 Mbps: %s\n",
 		   (priv->dma_cap.mbps_10_100) ? "Y" : "N");
-	seq_printf(seq, "\t1000 Mbps %s\n",
+	seq_printf(seq, "\t1000 Mbps: %s\n",
 		   (priv->dma_cap.mbps_1000) ? "Y" : "N");
-	seq_printf(seq, "\tHalf duple %s\n",
+	seq_printf(seq, "\tHalf duplex: %s\n",
 		   (priv->dma_cap.half_duplex) ? "Y" : "N");
 	seq_printf(seq, "\tHash Filter: %s\n",
 		   (priv->dma_cap.hash_filter) ? "Y" : "N");
@@ -3010,9 +3012,9 @@ static int stmmac_sysfs_dma_cap_read(struct seq_file *seq, void *v)
 		   (priv->dma_cap.rmon) ? "Y" : "N");
 	seq_printf(seq, "\tIEEE 1588-2002 Time Stamp: %s\n",
 		   (priv->dma_cap.time_stamp) ? "Y" : "N");
-	seq_printf(seq, "\tIEEE 1588-2008 Advanced Time Stamp:%s\n",
+	seq_printf(seq, "\tIEEE 1588-2008 Advanced Time Stamp: %s\n",
 		   (priv->dma_cap.atime_stamp) ? "Y" : "N");
-	seq_printf(seq, "\t802.3az - Energy-Efficient Ethernet (EEE) %s\n",
+	seq_printf(seq, "\t802.3az - Energy-Efficient Ethernet (EEE): %s\n",
 		   (priv->dma_cap.eee) ? "Y" : "N");
 	seq_printf(seq, "\tAV features: %s\n", (priv->dma_cap.av) ? "Y" : "N");
 	seq_printf(seq, "\tChecksum Offload in TX: %s\n",

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-11-24 16:04   ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses David Miller
  2016-11-24 21:25     ` Pavel Machek
  2016-11-28 11:55     ` [PATCH] stmmac: fix comments, make debug output consistent Pavel Machek
@ 2016-11-28 12:13     ` Pavel Machek
  2016-11-28 12:17     ` [PATCH] stmmac: reduce code duplication getting basic descriptors Pavel Machek
                       ` (3 subsequent siblings)
  6 siblings, 0 replies; 64+ messages in thread
From: Pavel Machek @ 2016-11-28 12:13 UTC (permalink / raw)
  To: David Miller, alexandre.torgue; +Cc: peppe.cavallaro, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1893 bytes --]

Hi!

> >> drivers/net/ethernet/stmicro/stmmac/common.h
> >> #define STMMAC_COAL_TX_TIMER    40000
> >> #define STMMAC_MAX_COAL_TX_TICK 100000
> >> #define STMMAC_TX_MAX_FRAMES    256
> >> 
> >> If I lower the parameters, delays are gone, but I get netdev watchdog
> >> backtrace followed by broken driver.
> >> 
> >> Any ideas what is going on there?
> > 
> > 4.9-rc6 still has the delays. With the
> > 
> > #define STMMAC_COAL_TX_TIMER 1000
> > #define STMMAC_TX_MAX_FRAMES 2
> > 
> > settings, delays go away, and driver still works. (It fails fairly
> > fast in 4.4). Good news. But the question still is: what is going on
> > there?
> 
> 256 packets looks way too large for being a trigger for aborting the
> TX coalescing timer.
> 
> Looking more deeply into this, the driver is using non-highres timers
> to implement the TX coalescing.  This simply cannot work.
> 
> 1 HZ, which is the lowest granularity of non-highres timers in the
> kernel, is variable as well as already too large of a delay for
> effective TX coalescing.
> 
> I seriously think that the TX coalescing support should be ripped out
> or disabled entirely until it is implemented properly in this
> driver.

Hmm. I still don't understand how the coalescing is supposed to do any
good in this driver.

As long as transmits happen, it seems driver is not recycling used DMA
descriptors. When the transmits stops, it waits for 40msec, then
recycles them. We'll run out of DMA descriptors in 5msec at 100mbit
speeds.

Giuseppe Cavallaro, Alexandre Torgue: could you comment here? Can you
apply the cleanup patches? The driver is marked as supported, but I
see no reactions on email, and bugzilla is down :-(.

Thanks,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH] stmmac: reduce code duplication getting basic descriptors
  2016-11-24 16:04   ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses David Miller
                       ` (2 preceding siblings ...)
  2016-11-28 12:13     ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses Pavel Machek
@ 2016-11-28 12:17     ` Pavel Machek
  2016-11-28 15:25       ` kbuild test robot
  2016-12-02 14:09       ` Alexandre Torgue
  2016-11-30 11:44     ` [PATCH] stmmac: simplify flag assignment Pavel Machek
                       ` (2 subsequent siblings)
  6 siblings, 2 replies; 64+ messages in thread
From: Pavel Machek @ 2016-11-28 12:17 UTC (permalink / raw)
  To: David Miller, Andrew Morton, alexandre.torgue
  Cc: peppe.cavallaro, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 3052 bytes --]


Remove code duplication getting basic descriptors.
    
Signed-off-by: Pavel Machek <pavel@denx.de>

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f7133d0..ed20668 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -929,6 +929,17 @@ static int stmmac_set_bfsize(int mtu, int bufsize)
 	return ret;
 }
 
+static inline struct dma_desc *stmmac_tx_desc(struct stmmac_priv *priv, int i)
+{
+	struct dma_desc *p;
+	if (priv->extend_desc)
+		p = &((priv->dma_etx + i)->basic);
+	else
+		p = priv->dma_tx + i;
+	return p;
+}
+
+
 /**
  * stmmac_clear_descriptors - clear descriptors
  * @priv: driver private structure
@@ -1078,11 +1089,7 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
 
 	/* TX INITIALIZATION */
 	for (i = 0; i < DMA_TX_SIZE; i++) {
-		struct dma_desc *p;
-		if (priv->extend_desc)
-			p = &((priv->dma_etx + i)->basic);
-		else
-			p = priv->dma_tx + i;
+		struct dma_desc *p = stmmac_tx_desc(priv, i);
 
 		if (priv->synopsys_id >= DWMAC_CORE_4_00) {
 			p->des0 = 0;
@@ -1129,12 +1136,7 @@ static void dma_free_tx_skbufs(struct stmmac_priv *priv)
 	int i;
 
 	for (i = 0; i < DMA_TX_SIZE; i++) {
-		struct dma_desc *p;
-
-		if (priv->extend_desc)
-			p = &((priv->dma_etx + i)->basic);
-		else
-			p = priv->dma_tx + i;
+		struct dma_desc *p = stmmac_tx_desc(priv, i);
 
 		if (priv->tx_skbuff_dma[i].buf) {
 			if (priv->tx_skbuff_dma[i].map_as_page)
@@ -1314,14 +1316,9 @@ static void __stmmac_tx_clean(struct stmmac_priv *priv)
 
 	while (entry != priv->cur_tx) {
 		struct sk_buff *skb = priv->tx_skbuff[entry];
-		struct dma_desc *p;
+		struct dma_desc *p = stmmac_tx_desc(priv, entry);
 		int status;
 
-		if (priv->extend_desc)
-			p = (struct dma_desc *)(priv->dma_etx + entry);
-		else
-			p = priv->dma_tx + entry;
-
 		status = priv->hw->desc->tx_status(&priv->dev->stats,
 						      &priv->xstats, p,
 						      priv->ioaddr);
@@ -2227,11 +2224,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
 
-	if (likely(priv->extend_desc))
-		desc = (struct dma_desc *)(priv->dma_etx + entry);
-	else
-		desc = priv->dma_tx + entry;
-
+	desc = stmmac_tx_desc(priv, entry);
 	first = desc;
 
 	priv->tx_skbuff[first_entry] = skb;
@@ -2254,10 +2247,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
 
-		if (likely(priv->extend_desc))
-			desc = (struct dma_desc *)(priv->dma_etx + entry);
-		else
-			desc = priv->dma_tx + entry;
+		desc = stmmac_tx_desc(priv, entry);
 
 		des = skb_frag_dma_map(priv->device, frag, 0, len,
 				       DMA_TO_DEVICE);

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
  2016-11-23 10:51 stmmac ethernet in kernel 4.4: coalescing related pauses? Pavel Machek
  2016-11-24  8:55 ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses Pavel Machek
@ 2016-11-28 13:07 ` Lino Sanfilippo
  2016-11-28 14:54   ` David Miller
  1 sibling, 1 reply; 64+ messages in thread
From: Lino Sanfilippo @ 2016-11-28 13:07 UTC (permalink / raw)
  To: Pavel Machek, peppe.cavallaro, netdev, kernel list

Hi Pavel,

On 23.11.2016 11:51, Pavel Machek wrote:

> I'm debugging strange delays during transmit in stmmac driver. They
> seem to be present in 4.4 kernel (and older kernels, too). Workload is
> burst of udp packets being sent, pause, burst of udp packets, ...
>
> Test code is attached, I use these parameters for testing:
>
> ./udp-test raw 10.0.0.6 1234 1000 100 30
>
> The delays seem to be related to coalescing:
>
> drivers/net/ethernet/stmicro/stmmac/common.h
> #define STMMAC_COAL_TX_TIMER    40000
> #define STMMAC_MAX_COAL_TX_TICK 100000
> #define STMMAC_TX_MAX_FRAMES    256
>
> If I lower the parameters, delays are gone, but I get netdev watchdog
> backtrace followed by broken driver.
>
> Any ideas what is going on there?
>
> [I'm currently trying to get newer kernels working on affected
> hardware.]
>
> Best regards,
>
> 									Pavel

I once encountered a similar behaviour with a driver. The reason was that the socket
queue limit was temporarily exhausted because the irq handler did not free the tx skbs
fast enough (that driver also used irq coalescing).
Calling skb_orphan() in the xmit handler made this issue disappear.

Regards,
Lino  

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac ethernet: remove cut & paste code
  2016-11-28 11:50                 ` Pavel Machek
@ 2016-11-28 14:24                   ` Joe Perches
  2016-11-28 14:35                     ` Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: Joe Perches @ 2016-11-28 14:24 UTC (permalink / raw)
  To: Pavel Machek
  Cc: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier

On Mon, 2016-11-28 at 12:50 +0100, Pavel Machek wrote:
> On Thu 2016-11-24 14:27:13, Joe Perches wrote:
> > On Thu, 2016-11-24 at 22:44 +0100, Pavel Machek wrote:
> > > On Thu 2016-11-24 12:05:25, Joe Perches wrote:
> > > > On Thu, 2016-11-24 at 12:05 +0100, Pavel Machek wrote:
> > > > > Remove duplicate code from _tx routines.
> > > > 
> > > > trivia:
> > > > 
> > > > > diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> > > > 
> > > > []
> > > > > @@ -1960,6 +1960,38 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
> > > > >  	}
> > > > >  }
> > > > >  
> > > > > +static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int nfrags, struct dma_desc *desc)
> > > > > +{
> > > > > +	struct stmmac_priv *priv = netdev_priv(dev);
> > > > > +
> > > > > +	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
> > > > > +		if (netif_msg_hw(priv))
> > > > > +			pr_debug("%s: stop transmitted packets\n", __func__);
> > > > 
> > > > 		netif_dbg(priv, hw, dev, "%s: stop transmitted packets\n",
> > > > 			  __func__);
> > > 
> > > Not now. Modifying the code while de-duplicating would be bad idea.
> > 
> > Too many people think overly granular patches are the
> > best and only way to make changes.
> > Deduplication and consolidation can happen simultaneously.
> 
> Can, but should not at this point. Please take a look at the driver in
> question before commenting on trivial printk style.

I had.

It's perfectly acceptable and already uses netif_<level> properly.

This consolidation now introduces the _only_ instance where it is
now improperly using a netif_msg_<type> then single pr_<level>
function sequence that should be consolidated into netif_dbg.

Every other use of netif_msg_<level> then either emits multiple
lines or is used in an if/else.

cheers, Joe

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac ethernet: remove cut & paste code
  2016-11-28 14:24                   ` Joe Perches
@ 2016-11-28 14:35                     ` Pavel Machek
  2016-11-28 16:03                       ` Joe Perches
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-11-28 14:35 UTC (permalink / raw)
  To: Joe Perches
  Cc: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier

[-- Attachment #1: Type: text/plain, Size: 2695 bytes --]

On Mon 2016-11-28 06:24:28, Joe Perches wrote:
> On Mon, 2016-11-28 at 12:50 +0100, Pavel Machek wrote:
> > On Thu 2016-11-24 14:27:13, Joe Perches wrote:
> > > On Thu, 2016-11-24 at 22:44 +0100, Pavel Machek wrote:
> > > > On Thu 2016-11-24 12:05:25, Joe Perches wrote:
> > > > > On Thu, 2016-11-24 at 12:05 +0100, Pavel Machek wrote:
> > > > > > Remove duplicate code from _tx routines.
> > > > > 
> > > > > trivia:
> > > > > 
> > > > > > diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> > > > > 
> > > > > []
> > > > > > @@ -1960,6 +1960,38 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
> > > > > >  	}
> > > > > >  }
> > > > > >  
> > > > > > +static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int nfrags, struct dma_desc *desc)
> > > > > > +{
> > > > > > +	struct stmmac_priv *priv = netdev_priv(dev);
> > > > > > +
> > > > > > +	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
> > > > > > +		if (netif_msg_hw(priv))
> > > > > > +			pr_debug("%s: stop transmitted packets\n", __func__);
> > > > > 
> > > > > 		netif_dbg(priv, hw, dev, "%s: stop transmitted packets\n",
> > > > > 			  __func__);
> > > > 
> > > > Not now. Modifying the code while de-duplicating would be bad idea.
> > > 
> > > Too many people think overly granular patches are the
> > > best and only way to make changes.
> > > Deduplication and consolidation can happen simultaneously.
> > 
> > Can, but should not at this point. Please take a look at the driver in
> > question before commenting on trivial printk style.
> 
> I had.
> 
> It's perfectly acceptable and already uses netif_<level> properly.
> 
> This consolidation now introduces the _only_ instance where it is
> now improperly using a netif_msg_<type> then single pr_<level>
> function sequence that should be consolidated into netif_dbg.

> Every other use of netif_msg_<level> then either emits multiple
> lines or is used in an if/else.

Are you looking at right driver? I don't see single use of
netif_msg_<level>, but see this at stmmac_main.c:756. Code is actually
pretty consistent using pr_*.

                                if (netif_msg_link(priv))
                                        pr_warn("%s: Speed (%d) not 10/100\n",
                                                dev->name, phydev->speed);

Anyway, I'm moving code around, if you want to do trivial cleanups, do
them yourself.
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
  2016-11-28 13:07 ` stmmac ethernet in kernel 4.4: coalescing related pauses? Lino Sanfilippo
@ 2016-11-28 14:54   ` David Miller
  2016-11-28 15:31     ` Eric Dumazet
  2016-11-28 15:33     ` Lino Sanfilippo
  0 siblings, 2 replies; 64+ messages in thread
From: David Miller @ 2016-11-28 14:54 UTC (permalink / raw)
  To: lsanfil; +Cc: pavel, peppe.cavallaro, netdev, linux-kernel

From: Lino Sanfilippo <lsanfil@marvell.com>
Date: Mon, 28 Nov 2016 14:07:51 +0100

> Calling skb_orphan() in the xmit handler made this issue disappear.

This is not the way to handle this problem.

The solution is to free the SKBs in a timely manner after the
chip has transmitted the frame.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac: reduce code duplication getting basic descriptors
  2016-11-28 12:17     ` [PATCH] stmmac: reduce code duplication getting basic descriptors Pavel Machek
@ 2016-11-28 15:25       ` kbuild test robot
  2016-12-02 14:09       ` Alexandre Torgue
  1 sibling, 0 replies; 64+ messages in thread
From: kbuild test robot @ 2016-11-28 15:25 UTC (permalink / raw)
  To: Pavel Machek
  Cc: kbuild-all, David Miller, Andrew Morton, alexandre.torgue,
	peppe.cavallaro, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1828 bytes --]

Hi Pavel,

[auto build test WARNING on net-next/master]
[also build test WARNING on v4.9-rc7 next-20161128]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Pavel-Machek/stmmac-reduce-code-duplication-getting-basic-descriptors/20161128-204339
config: x86_64-randconfig-s1-11282147 (attached as .config)
compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/stmicro/stmmac/stmmac_main.c: In function 'dma_free_tx_skbufs':
>> drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:1137: warning: unused variable 'p'
   drivers/net/ethernet/stmicro/stmmac/stmmac_main.o: warning: objtool: stmmac_resume()+0x125: function has unreachable instruction

vim +/p +1137 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

  1121		return ret;
  1122	}
  1123	
  1124	static void dma_free_rx_skbufs(struct stmmac_priv *priv)
  1125	{
  1126		int i;
  1127	
  1128		for (i = 0; i < DMA_RX_SIZE; i++)
  1129			stmmac_free_rx_buffers(priv, i);
  1130	}
  1131	
  1132	static void dma_free_tx_skbufs(struct stmmac_priv *priv)
  1133	{
  1134		int i;
  1135	
  1136		for (i = 0; i < DMA_TX_SIZE; i++) {
> 1137			struct dma_desc *p = stmmac_tx_desc(priv, i);
  1138	
  1139			if (priv->tx_skbuff_dma[i].buf) {
  1140				if (priv->tx_skbuff_dma[i].map_as_page)
  1141					dma_unmap_page(priv->device,
  1142						       priv->tx_skbuff_dma[i].buf,
  1143						       priv->tx_skbuff_dma[i].len,
  1144						       DMA_TO_DEVICE);
  1145				else

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 31122 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
  2016-11-28 14:54   ` David Miller
@ 2016-11-28 15:31     ` Eric Dumazet
  2016-11-28 15:57       ` Lino Sanfilippo
  2016-11-30 10:28       ` Pavel Machek
  2016-11-28 15:33     ` Lino Sanfilippo
  1 sibling, 2 replies; 64+ messages in thread
From: Eric Dumazet @ 2016-11-28 15:31 UTC (permalink / raw)
  To: David Miller; +Cc: lsanfil, pavel, peppe.cavallaro, netdev, linux-kernel

On Mon, 2016-11-28 at 09:54 -0500, David Miller wrote:
> From: Lino Sanfilippo <lsanfil@marvell.com>
> Date: Mon, 28 Nov 2016 14:07:51 +0100
> 
> > Calling skb_orphan() in the xmit handler made this issue disappear.
> 
> This is not the way to handle this problem.
> 
> The solution is to free the SKBs in a timely manner after the
> chip has transmitted the frame.

Note that the 'pauses' described by Pavel are also caused by a too small
SO_SNDBUF value on the UDP socket.

An immediate fix, with no kernel change is to increase it.

echo 1000000 >/proc/sys/net/core/wmem_default

or

val = 1000000;
setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
  2016-11-28 14:54   ` David Miller
  2016-11-28 15:31     ` Eric Dumazet
@ 2016-11-28 15:33     ` Lino Sanfilippo
  1 sibling, 0 replies; 64+ messages in thread
From: Lino Sanfilippo @ 2016-11-28 15:33 UTC (permalink / raw)
  To: David Miller; +Cc: pavel, peppe.cavallaro, netdev, linux-kernel

Hi,

On 28.11.2016 15:54, David Miller wrote:
> From: Lino Sanfilippo <lsanfil@marvell.com>
> Date: Mon, 28 Nov 2016 14:07:51 +0100
>
>> Calling skb_orphan() in the xmit handler made this issue disappear.
>
> This is not the way to handle this problem.
>

I agree, its not a clean solution. But if the use of skb_orphan makes the delays
disappear, it can at least be a hint that socket queue limits are involved.

Regards,
Lino

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
  2016-11-28 15:31     ` Eric Dumazet
@ 2016-11-28 15:57       ` Lino Sanfilippo
  2016-11-28 16:30         ` David Miller
  2016-11-30 10:28       ` Pavel Machek
  1 sibling, 1 reply; 64+ messages in thread
From: Lino Sanfilippo @ 2016-11-28 15:57 UTC (permalink / raw)
  To: Eric Dumazet, David Miller; +Cc: pavel, peppe.cavallaro, netdev, linux-kernel



On 28.11.2016 16:31, Eric Dumazet wrote:
> On Mon, 2016-11-28 at 09:54 -0500, David Miller wrote:
>> From: Lino Sanfilippo <lsanfil@marvell.com>
>> Date: Mon, 28 Nov 2016 14:07:51 +0100
>>
>>> Calling skb_orphan() in the xmit handler made this issue disappear.
>>
>> This is not the way to handle this problem.
>>
>> The solution is to free the SKBs in a timely manner after the
>> chip has transmitted the frame.
>
> Note that the 'pauses' described by Pavel are also caused by a too small
> SO_SNDBUF value on the UDP socket.
>
> An immediate fix, with no kernel change is to increase it.
>
> echo 1000000 >/proc/sys/net/core/wmem_default
>
> or
>
> val = 1000000;
> setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));
>

I wonder if the best fix would be indeed to deactivate irq coalescing completely.
Does it make any sense at all to use it if a driver uses NAPI already?

Regards,
Lino

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac ethernet: remove cut & paste code
  2016-11-28 14:35                     ` Pavel Machek
@ 2016-11-28 16:03                       ` Joe Perches
  0 siblings, 0 replies; 64+ messages in thread
From: Joe Perches @ 2016-11-28 16:03 UTC (permalink / raw)
  To: Pavel Machek
  Cc: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier

On Mon, 2016-11-28 at 15:35 +0100, Pavel Machek wrote:
> On Mon 2016-11-28 06:24:28, Joe Perches wrote:
> > On Mon, 2016-11-28 at 12:50 +0100, Pavel Machek wrote:
> > > On Thu 2016-11-24 14:27:13, Joe Perches wrote:
> > > > On Thu, 2016-11-24 at 22:44 +0100, Pavel Machek wrote:
> > > > > On Thu 2016-11-24 12:05:25, Joe Perches wrote:
> > > > > > On Thu, 2016-11-24 at 12:05 +0100, Pavel Machek wrote:
> > > > > > > Remove duplicate code from _tx routines.
> > > > > > 
> > > > > > trivia:
> > > > > > 
> > > > > > > diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> > > > > > 
> > > > > > []
> > > > > > > @@ -1960,6 +1960,38 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
> > > > > > >  	}
> > > > > > >  }
> > > > > > >  
> > > > > > > +static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int nfrags, struct dma_desc *desc)
> > > > > > > +{
> > > > > > > +	struct stmmac_priv *priv = netdev_priv(dev);
> > > > > > > +
> > > > > > > +	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
> > > > > > > +		if (netif_msg_hw(priv))
> > > > > > > +			pr_debug("%s: stop transmitted packets\n", __func__);
> > > > > > 
> > > > > > 		netif_dbg(priv, hw, dev, "%s: stop transmitted packets\n",
> > > > > > 			  __func__);
> > > > > 
> > > > > Not now. Modifying the code while de-duplicating would be bad idea.
> > > > 
> > > > Too many people think overly granular patches are the
> > > > best and only way to make changes.
> > > > Deduplication and consolidation can happen simultaneously.
> > > 
> > > Can, but should not at this point. Please take a look at the driver in
> > > question before commenting on trivial printk style.
> > 
> > I had.
> > 
> > It's perfectly acceptable and already uses netif_<level> properly.
> > 
> > This consolidation now introduces the _only_ instance where it is
> > now improperly using a netif_msg_<type> then single pr_<level>
> > function sequence that should be consolidated into netif_dbg.
> > Every other use of netif_msg_<level> then either emits multiple
> > lines or is used in an if/else.
> 
> Are you looking at right driver?

Yes and I think you should make changes against -next
and not Linus' where this is:

b3e51069627e2 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c (LABBE Corentin            2016-11-16 20:09:41 +0100  755)                              netif_warn(priv, link, priv->dev,
b3e51069627e2 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c (LABBE Corentin            2016-11-16 20:09:41 +0100  756)                                         "Speed (%d) not 10/100\n",
b3e51069627e2 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c (LABBE Corentin            2016-11-16 20:09:41 +0100  757)                                         phydev->speed);

>  I don't see single use of
> netif_msg_<level>, but see this at stmmac_main.c:756. Code is actually
> pretty consistent using pr_*.
> 
>                                 if (netif_msg_link(priv))
>                                         pr_warn("%s: Speed (%d) not 10/100\n",
>                                                 dev->name, phydev->speed);
> 
> Anyway, I'm moving code around, if you want to do trivial cleanups, do
> them yourself.

cheers, Joe

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
  2016-11-28 15:57       ` Lino Sanfilippo
@ 2016-11-28 16:30         ` David Miller
  2016-11-28 17:01           ` Lino Sanfilippo
  0 siblings, 1 reply; 64+ messages in thread
From: David Miller @ 2016-11-28 16:30 UTC (permalink / raw)
  To: lsanfil; +Cc: eric.dumazet, pavel, peppe.cavallaro, netdev, linux-kernel

From: Lino Sanfilippo <lsanfil@marvell.com>
Date: Mon, 28 Nov 2016 16:57:35 +0100

> I wonder if the best fix would be indeed to deactivate irq coalescing
> completely.
> Does it make any sense at all to use it if a driver uses NAPI already?

It absolutely does make sense, when it is implemented and functions
properly.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
  2016-11-28 16:30         ` David Miller
@ 2016-11-28 17:01           ` Lino Sanfilippo
  0 siblings, 0 replies; 64+ messages in thread
From: Lino Sanfilippo @ 2016-11-28 17:01 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, pavel, peppe.cavallaro, netdev, linux-kernel



On 28.11.2016 17:30, David Miller wrote:
> From: Lino Sanfilippo <lsanfil@marvell.com>
> Date: Mon, 28 Nov 2016 16:57:35 +0100
>
>> I wonder if the best fix would be indeed to deactivate irq coalescing
>> completely.
>> Does it make any sense at all to use it if a driver uses NAPI already?
>
> It absolutely does make sense, when it is implemented and functions
> properly.
>

Interesting. I always thought both (NAPI and irq coalescing) are essentially doing the same thing only
one time in software and one time with hw support. Did I misunderstand NAPI?

Regards,
Lino

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac: fix comments, make debug output consistent
  2016-11-28 11:55     ` [PATCH] stmmac: fix comments, make debug output consistent Pavel Machek
@ 2016-11-30  0:53       ` David Miller
  0 siblings, 0 replies; 64+ messages in thread
From: David Miller @ 2016-11-30  0:53 UTC (permalink / raw)
  To: pavel; +Cc: alexandre.torgue, peppe.cavallaro, netdev, linux-kernel, akpm

From: Pavel Machek <pavel@ucw.cz>
Date: Mon, 28 Nov 2016 12:55:59 +0100

> Fix comments, add some new, and make debugfs output consistent.
>     
> Signed-off-by: Pavel Machek <pavel@denx.de>

Applied to net-next, thanks.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
  2016-11-28 15:31     ` Eric Dumazet
  2016-11-28 15:57       ` Lino Sanfilippo
@ 2016-11-30 10:28       ` Pavel Machek
  1 sibling, 0 replies; 64+ messages in thread
From: Pavel Machek @ 2016-11-30 10:28 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, lsanfil, peppe.cavallaro, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1090 bytes --]

On Mon 2016-11-28 07:31:43, Eric Dumazet wrote:
> On Mon, 2016-11-28 at 09:54 -0500, David Miller wrote:
> > From: Lino Sanfilippo <lsanfil@marvell.com>
> > Date: Mon, 28 Nov 2016 14:07:51 +0100
> > 
> > > Calling skb_orphan() in the xmit handler made this issue disappear.
> > 
> > This is not the way to handle this problem.
> > 
> > The solution is to free the SKBs in a timely manner after the
> > chip has transmitted the frame.
> 
> Note that the 'pauses' described by Pavel are also caused by a too small
> SO_SNDBUF value on the UDP socket.
> 
> An immediate fix, with no kernel change is to increase it.
> 
> echo 1000000 >/proc/sys/net/core/wmem_default

Thanks a lot. For the record, that works around the problem, too. (Or
at least helps a lot; it may be possible that problem still remains if
continuous stream of packets is going to trigger this, if I read the
sources correctly.)

Best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH] stmmac: simplify flag assignment
  2016-11-24 16:04   ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses David Miller
                       ` (3 preceding siblings ...)
  2016-11-28 12:17     ` [PATCH] stmmac: reduce code duplication getting basic descriptors Pavel Machek
@ 2016-11-30 11:44     ` Pavel Machek
  2016-12-01 20:23       ` David Miller
  2016-12-02  8:27       ` [PATCH] stmmac: simplify flag assignment Giuseppe CAVALLARO
  2016-12-01 10:32     ` [PATCH] stmmac: cleanup documenation, make it match reality Pavel Machek
  2016-12-05 12:27     ` [PATCH] stmmac: disable tx coalescing Pavel Machek
  6 siblings, 2 replies; 64+ messages in thread
From: Pavel Machek @ 2016-11-30 11:44 UTC (permalink / raw)
  To: David Miller; +Cc: peppe.cavallaro, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 933 bytes --]


Simplify flag assignment.
    
Signed-off-by: Pavel Machek <pavel@denx.de>

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ed20668..0b706a7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2771,12 +2771,8 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev,
 		features &= ~NETIF_F_CSUM_MASK;
 
 	/* Disable tso if asked by ethtool */
-	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
-		if (features & NETIF_F_TSO)
-			priv->tso = true;
-		else
-			priv->tso = false;
-	}
+	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen))
+		priv->tso = !!(features & NETIF_F_TSO);
 
 	return features;
 }


-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* [PATCH] stmmac: cleanup documenation, make it match reality
  2016-11-24 16:04   ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses David Miller
                       ` (4 preceding siblings ...)
  2016-11-30 11:44     ` [PATCH] stmmac: simplify flag assignment Pavel Machek
@ 2016-12-01 10:32     ` Pavel Machek
  2016-12-03 20:07       ` David Miller
  2016-12-05 12:27     ` [PATCH] stmmac: disable tx coalescing Pavel Machek
  6 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-12-01 10:32 UTC (permalink / raw)
  To: David Miller, Andrew Morton; +Cc: peppe.cavallaro, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 5951 bytes --]


Fix english in documentation, make documentation match reality, remove
options that were removed from code.
    
Signed-off-by: Pavel Machek <pavel@denx.de>

diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index e226f89..014f4f7 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -28,8 +28,6 @@ CONFIG_STMMAC_PCI: is to enable the pci driver.
 2) Driver parameters list:
 	debug: message level (0: no output, 16: all);
 	phyaddr: to manually provide the physical address to the PHY device;
-	dma_rxsize: DMA rx ring size;
-	dma_txsize: DMA tx ring size;
 	buf_sz: DMA buffer size;
 	tc: control the HW FIFO threshold;
 	watchdog: transmit timeout (in milliseconds);
@@ -40,31 +38,31 @@ CONFIG_STMMAC_PCI: is to enable the pci driver.
 
 3) Command line options
 Driver parameters can be also passed in command line by using:
-	stmmaceth=dma_rxsize:128,dma_txsize:512
+	stmmaceth=watchdog:100,chain_mode=1
 
 4) Driver information and notes
 
 4.1) Transmit process
 The xmit method is invoked when the kernel needs to transmit a packet; it sets
-the descriptors in the ring and informs the DMA engine that there is a packet
+the descriptors in the ring and informs the DMA engine, that there is a packet
 ready to be transmitted.
 By default, the driver sets the NETIF_F_SG bit in the features field of the
-net_device structure enabling the scatter-gather feature. This is true on
+net_device structure, enabling the scatter-gather feature. This is true on
 chips and configurations where the checksum can be done in hardware.
-Once the controller has finished transmitting the packet, napi will be
+Once the controller has finished transmitting the packet, timer will be
 scheduled to release the transmit resources.
 
 4.2) Receive process
 When one or more packets are received, an interrupt happens. The interrupts
-are not queued so the driver has to scan all the descriptors in the ring during
+are not queued, so the driver has to scan all the descriptors in the ring during
 the receive process.
-This is based on NAPI so the interrupt handler signals only if there is work
+This is based on NAPI, so the interrupt handler signals only if there is work
 to be done, and it exits.
 Then the poll method will be scheduled at some future point.
 The incoming packets are stored, by the DMA, in a list of pre-allocated socket
 buffers in order to avoid the memcpy (zero-copy).
 
-4.3) Interrupt Mitigation
+4.3) Interrupt mitigation
 The driver is able to mitigate the number of its DMA interrupts
 using NAPI for the reception on chips older than the 3.50.
 New chips have an HW RX-Watchdog used for this mitigation.
@@ -88,19 +86,20 @@ the list, hence creating the explicit chaining in the descriptor itself,
 whereas such explicit chaining is not possible in RING mode.
 
 4.5.1) Extended descriptors
-	The extended descriptors give us information about the Ethernet payload
-	when it is carrying PTP packets or TCP/UDP/ICMP over IP.
-	These are not available on GMAC Synopsys chips older than the 3.50.
-	At probe time the driver will decide if these can be actually used.
-	This support also is mandatory for PTPv2 because the extra descriptors
-	are used for saving the hardware timestamps and Extended Status.
+The extended descriptors give us information about the Ethernet payload
+when it is carrying PTP packets or TCP/UDP/ICMP over IP.
+These are not available on GMAC Synopsys chips older than the 3.50.
+At probe time the driver will decide if these can be actually used.
+This support also is mandatory for PTPv2 because the extra descriptors
+are used for saving the hardware timestamps and Extended Status.
 
 4.6) Ethtool support
 Ethtool is supported.
 
 For example, driver statistics (including RMON), internal errors can be taken
 using:
-  # ethtool -S ethX command
+  # ethtool -S ethX
+command
 
 4.7) Jumbo and Segmentation Offloading
 Jumbo frames are supported and tested for the GMAC.
@@ -275,11 +274,11 @@ Please see the following document:
 	Documentation/devicetree/bindings/net/stmmac.txt
 
 4.11) This is a summary of the content of some relevant files:
- o stmmac_main.c: to implement the main network device driver;
- o stmmac_mdio.c: to provide mdio functions;
- o stmmac_pci: this the PCI driver;
- o stmmac_platform.c: this the platform driver (OF supported)
- o stmmac_ethtool.c: to implement the ethtool support;
+ o stmmac_main.c: implements the main network device driver;
+ o stmmac_mdio.c: provides MDIO functions;
+ o stmmac_pci: this is the PCI driver;
+ o stmmac_platform.c: this the platform driver (OF supported);
+ o stmmac_ethtool.c: implements the ethtool support;
  o stmmac.h: private driver structure;
  o common.h: common definitions and VFTs;
  o mmc_core.c/mmc.h: Management MAC Counters;
@@ -381,12 +380,12 @@ In addition to the basic timestamp features mentioned in IEEE 1588-2002
 Timestamps, new GMAC cores support the advanced timestamp features.
 IEEE 1588-2008 that can be enabled when configure the Kernel.
 
-8) SGMII/RGMII supports
+8) SGMII/RGMII support
 New GMAC devices provide own way to manage RGMII/SGMII.
 This information is available at run-time by looking at the
 HW capability register. This means that the stmmac can manage
-auto-negotiation and link status w/o using the PHYLIB stuff
+auto-negotiation and link status w/o using the PHYLIB stuff.
 In fact, the HW provides a subset of extended registers to
 restart the ANE, verify Full/Half duplex mode and Speed.
-Also thanks to these registers it is possible to look at the
+Thanks to these registers, it is possible to look at the
 Auto-negotiated Link Parter Ability.
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac: simplify flag assignment
  2016-11-30 11:44     ` [PATCH] stmmac: simplify flag assignment Pavel Machek
@ 2016-12-01 20:23       ` David Miller
  2016-12-01 22:48         ` stmmac: turn coalescing / NAPI off in stmmac Pavel Machek
  2016-12-02  8:27       ` [PATCH] stmmac: simplify flag assignment Giuseppe CAVALLARO
  1 sibling, 1 reply; 64+ messages in thread
From: David Miller @ 2016-12-01 20:23 UTC (permalink / raw)
  To: pavel; +Cc: peppe.cavallaro, netdev, linux-kernel

From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 30 Nov 2016 12:44:31 +0100

> 
> Simplify flag assignment.
>     
> Signed-off-by: Pavel Machek <pavel@denx.de>
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index ed20668..0b706a7 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -2771,12 +2771,8 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev,
>  		features &= ~NETIF_F_CSUM_MASK;
>  
>  	/* Disable tso if asked by ethtool */
> -	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
> -		if (features & NETIF_F_TSO)
> -			priv->tso = true;
> -		else
> -			priv->tso = false;
> -	}
> +	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen))
> +		priv->tso = !!(features & NETIF_F_TSO);
>  

Pavel, this really seems arbitrary.

Whilst I really appreciate you're looking into this driver a bit because
of some issues you are trying to resolve, I'd like to ask that you not
start bombarding me with nit-pick cleanups here and there and instead
concentrate on the real bug or issue.

Thanks in advance.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* stmmac: turn coalescing / NAPI off in stmmac
  2016-12-01 20:23       ` David Miller
@ 2016-12-01 22:48         ` Pavel Machek
  2016-12-02  8:39           ` Giuseppe CAVALLARO
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-12-01 22:48 UTC (permalink / raw)
  To: David Miller, alexandre.torgue; +Cc: peppe.cavallaro, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 7554 bytes --]


> > @@ -2771,12 +2771,8 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev,
> >  		features &= ~NETIF_F_CSUM_MASK;
> >  
> >  	/* Disable tso if asked by ethtool */
> > -	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
> > -		if (features & NETIF_F_TSO)
> > -			priv->tso = true;
> > -		else
> > -			priv->tso = false;
> > -	}
> > +	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen))
> > +		priv->tso = !!(features & NETIF_F_TSO);
> >  
> 
> Pavel, this really seems arbitrary.
> 
> Whilst I really appreciate you're looking into this driver a bit because
> of some issues you are trying to resolve, I'd like to ask that you not
> start bombarding me with nit-pick cleanups here and there and instead
> concentrate on the real bug or issue.

Well, fixing clean code is easier than fixing strange code... Plus I
was hoping to make the mainainers to talk. The driver is listed as
supported after all.

Anyway... since you asked. I belive I have way to disable NAPI / tx
coalescing in the driver. Unfortunately, locking is missing on the rx
path, and needs to be extended to _irqsave variant on tx path.

So patch currently looks like this (hand edited, can't be
applied, got it working few hours ago). Does it look acceptable?

I'd prefer this to go after the patch that pulls common code to single
place, so that single place needs to be patched. Plus I guess I should
add ifdefs, so that more advanced NAPI / tx coalescing code can be
reactivated when it is fixed. Trivial fixes can go on top. Does that
sound like a plan?

Which tree do you want patches against?

https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/ ?

Best regards,
								Pavel


diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0b706a7..c0016c8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1395,9 +1397,10 @@ static void __stmmac_tx_clean(struct stmmac_priv *priv)
 
 static void stmmac_tx_clean(struct stmmac_priv *priv)
 {
-	spin_lock(&priv->tx_lock);
+	unsigned long flags;
+	spin_lock_irqsave(&priv->tx_lock, flags);
 	__stmmac_tx_clean(priv);
-	spin_unlock(&priv->tx_lock);	
+	spin_unlock_irqrestore(&priv->tx_lock, flags);	
 }
 
 static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv)
@@ -1441,6 +1444,8 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
 	netif_wake_queue(priv->dev);
 }
 
+static int stmmac_rx(struct stmmac_priv *priv, int limit);
+
 /**
  * stmmac_dma_interrupt - DMA ISR
  * @priv: driver private structure
@@ -1452,10 +1457,17 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 {
 	int status;
 	int rxfifosz = priv->plat->rx_fifo_size;
+	unsigned long flags;
 
 	status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
 	if (likely((status & handle_rx)) || (status & handle_tx)) {
+		int r;
+		spin_lock_irqsave(&priv->tx_lock, flags);
+		r = stmmac_rx(priv, 999);
+		spin_unlock_irqrestore(&priv->tx_lock, flags);		
+#if 0
 		if (likely(napi_schedule_prep(&priv->napi))) {
 			//pr_err("napi: schedule\n");
 			stmmac_disable_dma_irq(priv);
@@ -1463,7 +1475,8 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 		} else
 			pr_err("napi: schedule failed\n");
 #endif
+		stmmac_tx_clean(priv);
 	}
 	if (unlikely(status & tx_hard_error_bump_tc)) {
 		/* Try to bump up the dma threshold on this failure */
@@ -1638,7 +1651,7 @@ static void stmmac_tx_timer(unsigned long data)
 {
 	struct stmmac_priv *priv = (struct stmmac_priv *)data;
 
-	stmmac_tx_clean(priv);
+	//stmmac_tx_clean(priv);
 }
 
 /**
@@ -1990,6 +2003,8 @@ static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int
 	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
 		mod_timer(&priv->txtimer,
 			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
+		priv->hw->desc->set_tx_ic(desc);
+		priv->xstats.tx_set_ic_bit++;
 	} else {
 		priv->tx_count_frames = 0;
 		priv->hw->desc->set_tx_ic(desc);
@@ -2038,8 +2053,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct dma_desc *desc, *first, *mss_desc = NULL;
 	u8 proto_hdr_len;
 	int i;
+	unsigned long flags;
 
-	spin_lock(&priv->tx_lock);
+	spin_lock_irqsave(&priv->tx_lock, flags);
 
 	/* Compute header lengths */
 	proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -2052,7 +2068,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 			/* This is a hard error, log it. */
 			pr_err("%s: Tx Ring full when queue awake\n", __func__);
 		}
-		spin_unlock(&priv->tx_lock);
+		spin_unlock_irqrestore(&priv->tx_lock, flags);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -2168,11 +2184,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
 				       STMMAC_CHAN0);
 
-	spin_unlock(&priv->tx_lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 	return NETDEV_TX_OK;
 
 dma_map_err:
-	spin_unlock(&priv->tx_lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 	dev_err(priv->device, "Tx dma map failed\n");
 	dev_kfree_skb(skb);
 	priv->dev->stats.tx_dropped++;
@@ -2197,6 +2213,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct dma_desc *desc, *first;
 	unsigned int enh_desc;
 	unsigned int des;
+	unsigned int flags;
 
 	/* Manage oversized TCP frames for GMAC4 device */
 	if (skb_is_gso(skb) && priv->tso) {
@@ -2204,7 +2221,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 			return stmmac_tso_xmit(skb, dev);
 	}
 
-	spin_lock(&priv->tx_lock);
+	spin_lock_irqsave(&priv->tx_lock, flags);
 
 	if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
 		if (!netif_queue_stopped(dev)) {
@@ -2212,7 +2229,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 			/* This is a hard error, log it. */
 			pr_err("%s: Tx Ring full when queue awake\n", __func__);
 		}
-		spin_unlock(&priv->tx_lock);
+		spin_unlock_irqrestore(&priv->tx_lock, flags);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -2347,11 +2364,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
 					       STMMAC_CHAN0);
 
-	spin_unlock(&priv->tx_lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 	return NETDEV_TX_OK;
 
 dma_map_err:
-	spin_unlock(&priv->tx_lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 	dev_err(priv->device, "Tx dma map failed\n");
 	dev_kfree_skb(skb);
 	priv->dev->stats.tx_dropped++;
@@ -2634,7 +2651,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 			else
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-			napi_gro_receive(&priv->napi, skb);
+			//napi_gro_receive(&priv->napi, skb);
+			netif_rx(skb);
 
 			priv->dev->stats.rx_packets++;
 			priv->dev->stats.rx_bytes += frame_len;
@@ -2662,6 +2680,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 	struct stmmac_priv *priv = container_of(napi, struct stmmac_priv, napi);
 	int work_done;
 
+	BUG();
 	priv->xstats.napi_poll++;
 	stmmac_tx_clean(priv);
 



-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-11-24 21:25     ` Pavel Machek
@ 2016-12-02  8:24       ` Giuseppe CAVALLARO
  2016-12-02  8:41         ` Giuseppe CAVALLARO
                           ` (2 more replies)
  0 siblings, 3 replies; 64+ messages in thread
From: Giuseppe CAVALLARO @ 2016-12-02  8:24 UTC (permalink / raw)
  To: Pavel Machek, David Miller; +Cc: netdev, linux-kernel


Hello


On 11/24/2016 10:25 PM, Pavel Machek wrote:
> Hi!
>
>>>> I'm debugging strange delays during transmit in stmmac driver. They
>>>> seem to be present in 4.4 kernel (and older kernels, too). Workload is
>>>> burst of udp packets being sent, pause, burst of udp packets, ...
> ...
>>> 4.9-rc6 still has the delays. With the
>>>
>>> #define STMMAC_COAL_TX_TIMER 1000
>>> #define STMMAC_TX_MAX_FRAMES 2
>>>
>>> settings, delays go away, and driver still works. (It fails fairly
>>> fast in 4.4). Good news. But the question still is: what is going on
>>> there?
>>
>> 256 packets looks way too large for being a trigger for aborting the
>> TX coalescing timer.
>>
>> Looking more deeply into this, the driver is using non-highres timers
>> to implement the TX coalescing.  This simply cannot work.
>>
>> 1 HZ, which is the lowest granularity of non-highres timers in the
>> kernel, is variable as well as already too large of a delay for
>> effective TX coalescing.
>>
>> I seriously think that the TX coalescing support should be ripped out
>> or disabled entirely until it is implemented properly in this
>> driver.
>
> Ok, I'd disable coalescing, but could not figure it out till. What is
> generic way to do that?
>
> It seems only thing stmmac_tx_timer() does is calling
> stmmac_tx_clean(), which reclaims tx_skbuff[] entries. It should be
> possible to do that explicitely, without delay, but it stops working
> completely if I attempt to do that.
>
> On a side note, stmmac_poll() does stmmac_enable_dma_irq() while
> stmmac_dma_interrupt() disables interrupts. But I don't see any
> protection between the two, so IMO it could race and we'd end up
> without polling or interrupts...


the idea behind the TX mitigation is to mix the interrupt and
timer and this approach gave us real benefit in terms
of performances and CPU usage (especially on SH4-200/SH4-300 platforms
based).
In the ring, some descriptors can raise the irq (according to a
threshold) and set the IC bit. In this path, the NAPI  poll will be
scheduled.
But there is a timer that can run (and we experimented that no high
resolution is needed) to clear the tx resources.
Concerning the lock protection, we had reviewed long time ago and
IIRC, no raise condition should be present. Open to review it, again!

So, welcome any other schema and testing on platforms supported.


Hoping this summary can help.

Peppe


>
> Thanks and best regards,
> 									Pavel
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac: simplify flag assignment
  2016-11-30 11:44     ` [PATCH] stmmac: simplify flag assignment Pavel Machek
  2016-12-01 20:23       ` David Miller
@ 2016-12-02  8:27       ` Giuseppe CAVALLARO
  1 sibling, 0 replies; 64+ messages in thread
From: Giuseppe CAVALLARO @ 2016-12-02  8:27 UTC (permalink / raw)
  To: Pavel Machek, David Miller; +Cc: netdev, linux-kernel, Alexandre TORGUE

+ Alex

On 11/30/2016 12:44 PM, Pavel Machek wrote:
>
> Simplify flag assignment.
>
> Signed-off-by: Pavel Machek <pavel@denx.de>
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index ed20668..0b706a7 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -2771,12 +2771,8 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev,
>  		features &= ~NETIF_F_CSUM_MASK;
>
>  	/* Disable tso if asked by ethtool */
> -	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
> -		if (features & NETIF_F_TSO)
> -			priv->tso = true;
> -		else
> -			priv->tso = false;
> -	}
> +	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen))
> +		priv->tso = !!(features & NETIF_F_TSO);
>
>  	return features;
>  }
>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac: turn coalescing / NAPI off in stmmac
  2016-12-01 22:48         ` stmmac: turn coalescing / NAPI off in stmmac Pavel Machek
@ 2016-12-02  8:39           ` Giuseppe CAVALLARO
  2016-12-02 10:42             ` Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: Giuseppe CAVALLARO @ 2016-12-02  8:39 UTC (permalink / raw)
  To: Pavel Machek, David Miller, alexandre.torgue; +Cc: netdev, linux-kernel

On 12/1/2016 11:48 PM, Pavel Machek wrote:
>
>>> @@ -2771,12 +2771,8 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev,
>>>  		features &= ~NETIF_F_CSUM_MASK;
>>>
>>>  	/* Disable tso if asked by ethtool */
>>> -	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
>>> -		if (features & NETIF_F_TSO)
>>> -			priv->tso = true;
>>> -		else
>>> -			priv->tso = false;
>>> -	}
>>> +	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen))
>>> +		priv->tso = !!(features & NETIF_F_TSO);
>>>
>>
>> Pavel, this really seems arbitrary.
>>
>> Whilst I really appreciate you're looking into this driver a bit because
>> of some issues you are trying to resolve, I'd like to ask that you not
>> start bombarding me with nit-pick cleanups here and there and instead
>> concentrate on the real bug or issue.
>
> Well, fixing clean code is easier than fixing strange code... Plus I
> was hoping to make the mainainers to talk. The driver is listed as
> supported after all.

Absolutely, I am available to support you, better I can.
So no problem to clarify strange or complex parts of the driver
and find/try new solutions to enhance it.

> Anyway... since you asked. I belive I have way to disable NAPI / tx
> coalescing in the driver. Unfortunately, locking is missing on the rx
> path, and needs to be extended to _irqsave variant on tx path.

I have just replied to a previous thread about that...

To be honest, I have in the box just a patch to fix lock on lpi
as I had discussed in this mailing list some week ago.
I will provide it asap.

>
> So patch currently looks like this (hand edited, can't be
> applied, got it working few hours ago). Does it look acceptable?
>
> I'd prefer this to go after the patch that pulls common code to single
> place, so that single place needs to be patched. Plus I guess I should
> add ifdefs, so that more advanced NAPI / tx coalescing code can be
> reactivated when it is fixed. Trivial fixes can go on top. Does that
> sound like a plan?

Hmm, what I find strange is that, just this code is running since a
long time on several platforms and Chip versions. No raise condition
have been found or lock protection problems (also proving look
mechanisms).

I'd like to avoid to break old compatibilities and having the same
performances but if there are some bugs I can support to review
and test. Indeed, this year we have added the 4.x but some parts
of the code (for TSO) should be self-contained. So I cannot image
regressions on common part of the code... I let Alex to do a double
check.

Pavel, I ask you sorry if I missed some problems so, if you can
(as D. Miller asked) to send us a cover letter + all patches
I will try to reply soon. I can do also some tests if you ask
me that! I could run on 3.x and 4.x but I cannot promise you
benchmarks.

> Which tree do you want patches against?
>
> https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/ ?

I think that bug fixing should be on top of net.git but I let Miller
to decide.

Best Regards
Peppe

>
> Best regards,
> 								Pavel
>
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index 0b706a7..c0016c8 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -1395,9 +1397,10 @@ static void __stmmac_tx_clean(struct stmmac_priv *priv)
>
>  static void stmmac_tx_clean(struct stmmac_priv *priv)
>  {
> -	spin_lock(&priv->tx_lock);
> +	unsigned long flags;
> +	spin_lock_irqsave(&priv->tx_lock, flags);
>  	__stmmac_tx_clean(priv);
> -	spin_unlock(&priv->tx_lock);	
> +	spin_unlock_irqrestore(&priv->tx_lock, flags);	
>  }
>
>  static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv)
> @@ -1441,6 +1444,8 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
>  	netif_wake_queue(priv->dev);
>  }
>
> +static int stmmac_rx(struct stmmac_priv *priv, int limit);
> +
>  /**
>   * stmmac_dma_interrupt - DMA ISR
>   * @priv: driver private structure
> @@ -1452,10 +1457,17 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
>  {
>  	int status;
>  	int rxfifosz = priv->plat->rx_fifo_size;
> +	unsigned long flags;
>
>  	status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
>  	if (likely((status & handle_rx)) || (status & handle_tx)) {
> +		int r;
> +		spin_lock_irqsave(&priv->tx_lock, flags);
> +		r = stmmac_rx(priv, 999);
> +		spin_unlock_irqrestore(&priv->tx_lock, flags);		
> +#if 0
>  		if (likely(napi_schedule_prep(&priv->napi))) {
>  			//pr_err("napi: schedule\n");
>  			stmmac_disable_dma_irq(priv);
> @@ -1463,7 +1475,8 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
>  		} else
>  			pr_err("napi: schedule failed\n");
>  #endif
> +		stmmac_tx_clean(priv);
>  	}
>  	if (unlikely(status & tx_hard_error_bump_tc)) {
>  		/* Try to bump up the dma threshold on this failure */
> @@ -1638,7 +1651,7 @@ static void stmmac_tx_timer(unsigned long data)
>  {
>  	struct stmmac_priv *priv = (struct stmmac_priv *)data;
>
> -	stmmac_tx_clean(priv);
> +	//stmmac_tx_clean(priv);
>  }
>
>  /**
> @@ -1990,6 +2003,8 @@ static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int
>  	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
>  		mod_timer(&priv->txtimer,
>  			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
> +		priv->hw->desc->set_tx_ic(desc);
> +		priv->xstats.tx_set_ic_bit++;
>  	} else {
>  		priv->tx_count_frames = 0;
>  		priv->hw->desc->set_tx_ic(desc);
> @@ -2038,8 +2053,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
>  	struct dma_desc *desc, *first, *mss_desc = NULL;
>  	u8 proto_hdr_len;
>  	int i;
> +	unsigned long flags;
>
> -	spin_lock(&priv->tx_lock);
> +	spin_lock_irqsave(&priv->tx_lock, flags);
>
>  	/* Compute header lengths */
>  	proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
> @@ -2052,7 +2068,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
>  			/* This is a hard error, log it. */
>  			pr_err("%s: Tx Ring full when queue awake\n", __func__);
>  		}
> -		spin_unlock(&priv->tx_lock);
> +		spin_unlock_irqrestore(&priv->tx_lock, flags);
>  		return NETDEV_TX_BUSY;
>  	}
>
> @@ -2168,11 +2184,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
>  	priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
>  				       STMMAC_CHAN0);
>
> -	spin_unlock(&priv->tx_lock);
> +	spin_unlock_irqrestore(&priv->tx_lock, flags);
>  	return NETDEV_TX_OK;
>
>  dma_map_err:
> -	spin_unlock(&priv->tx_lock);
> +	spin_unlock_irqrestore(&priv->tx_lock, flags);
>  	dev_err(priv->device, "Tx dma map failed\n");
>  	dev_kfree_skb(skb);
>  	priv->dev->stats.tx_dropped++;
> @@ -2197,6 +2213,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>  	struct dma_desc *desc, *first;
>  	unsigned int enh_desc;
>  	unsigned int des;
> +	unsigned int flags;
>
>  	/* Manage oversized TCP frames for GMAC4 device */
>  	if (skb_is_gso(skb) && priv->tso) {
> @@ -2204,7 +2221,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>  			return stmmac_tso_xmit(skb, dev);
>  	}
>
> -	spin_lock(&priv->tx_lock);
> +	spin_lock_irqsave(&priv->tx_lock, flags);
>
>  	if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
>  		if (!netif_queue_stopped(dev)) {
> @@ -2212,7 +2229,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>  			/* This is a hard error, log it. */
>  			pr_err("%s: Tx Ring full when queue awake\n", __func__);
>  		}
> -		spin_unlock(&priv->tx_lock);
> +		spin_unlock_irqrestore(&priv->tx_lock, flags);
>  		return NETDEV_TX_BUSY;
>  	}
>
> @@ -2347,11 +2364,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>  		priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
>  					       STMMAC_CHAN0);
>
> -	spin_unlock(&priv->tx_lock);
> +	spin_unlock_irqrestore(&priv->tx_lock, flags);
>  	return NETDEV_TX_OK;
>
>  dma_map_err:
> -	spin_unlock(&priv->tx_lock);
> +	spin_unlock_irqrestore(&priv->tx_lock, flags);
>  	dev_err(priv->device, "Tx dma map failed\n");
>  	dev_kfree_skb(skb);
>  	priv->dev->stats.tx_dropped++;
> @@ -2634,7 +2651,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
>  			else
>  				skb->ip_summed = CHECKSUM_UNNECESSARY;
>
> -			napi_gro_receive(&priv->napi, skb);
> +			//napi_gro_receive(&priv->napi, skb);
> +			netif_rx(skb);
>
>  			priv->dev->stats.rx_packets++;
>  			priv->dev->stats.rx_bytes += frame_len;
> @@ -2662,6 +2680,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
>  	struct stmmac_priv *priv = container_of(napi, struct stmmac_priv, napi);
>  	int work_done;
>
> +	BUG();
>  	priv->xstats.napi_poll++;
>  	stmmac_tx_clean(priv);
>
>
>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02  8:24       ` Giuseppe CAVALLARO
@ 2016-12-02  8:41         ` Giuseppe CAVALLARO
  2016-12-02  8:45         ` Pavel Machek
  2016-12-05 11:56         ` Pavel Machek
  2 siblings, 0 replies; 64+ messages in thread
From: Giuseppe CAVALLARO @ 2016-12-02  8:41 UTC (permalink / raw)
  To: Pavel Machek, David Miller; +Cc: netdev, linux-kernel, lsanfil

+ Lino

On 12/2/2016 9:24 AM, Giuseppe CAVALLARO wrote:
>
> Hello
>
>
> On 11/24/2016 10:25 PM, Pavel Machek wrote:
>> Hi!
>>
>>>>> I'm debugging strange delays during transmit in stmmac driver. They
>>>>> seem to be present in 4.4 kernel (and older kernels, too). Workload is
>>>>> burst of udp packets being sent, pause, burst of udp packets, ...
>> ...
>>>> 4.9-rc6 still has the delays. With the
>>>>
>>>> #define STMMAC_COAL_TX_TIMER 1000
>>>> #define STMMAC_TX_MAX_FRAMES 2
>>>>
>>>> settings, delays go away, and driver still works. (It fails fairly
>>>> fast in 4.4). Good news. But the question still is: what is going on
>>>> there?
>>>
>>> 256 packets looks way too large for being a trigger for aborting the
>>> TX coalescing timer.
>>>
>>> Looking more deeply into this, the driver is using non-highres timers
>>> to implement the TX coalescing.  This simply cannot work.
>>>
>>> 1 HZ, which is the lowest granularity of non-highres timers in the
>>> kernel, is variable as well as already too large of a delay for
>>> effective TX coalescing.
>>>
>>> I seriously think that the TX coalescing support should be ripped out
>>> or disabled entirely until it is implemented properly in this
>>> driver.
>>
>> Ok, I'd disable coalescing, but could not figure it out till. What is
>> generic way to do that?
>>
>> It seems only thing stmmac_tx_timer() does is calling
>> stmmac_tx_clean(), which reclaims tx_skbuff[] entries. It should be
>> possible to do that explicitely, without delay, but it stops working
>> completely if I attempt to do that.
>>
>> On a side note, stmmac_poll() does stmmac_enable_dma_irq() while
>> stmmac_dma_interrupt() disables interrupts. But I don't see any
>> protection between the two, so IMO it could race and we'd end up
>> without polling or interrupts...
>
>
> the idea behind the TX mitigation is to mix the interrupt and
> timer and this approach gave us real benefit in terms
> of performances and CPU usage (especially on SH4-200/SH4-300 platforms
> based).
> In the ring, some descriptors can raise the irq (according to a
> threshold) and set the IC bit. In this path, the NAPI  poll will be
> scheduled.
> But there is a timer that can run (and we experimented that no high
> resolution is needed) to clear the tx resources.
> Concerning the lock protection, we had reviewed long time ago and
> IIRC, no raise condition should be present. Open to review it, again!
>
> So, welcome any other schema and testing on platforms supported.
>
>
> Hoping this summary can help.
>
> Peppe
>
>
>>
>> Thanks and best regards,
>>                                     Pavel
>>
>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02  8:24       ` Giuseppe CAVALLARO
  2016-12-02  8:41         ` Giuseppe CAVALLARO
@ 2016-12-02  8:45         ` Pavel Machek
  2016-12-02  9:43           ` Giuseppe CAVALLARO
  2016-12-02 14:05           ` Aw: " Lino Sanfilippo
  2016-12-05 11:56         ` Pavel Machek
  2 siblings, 2 replies; 64+ messages in thread
From: Pavel Machek @ 2016-12-02  8:45 UTC (permalink / raw)
  To: Giuseppe CAVALLARO, alexandre.torgue; +Cc: David Miller, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 2436 bytes --]

Hi!

> >>1 HZ, which is the lowest granularity of non-highres timers in the
> >>kernel, is variable as well as already too large of a delay for
> >>effective TX coalescing.
> >>
> >>I seriously think that the TX coalescing support should be ripped out
> >>or disabled entirely until it is implemented properly in this
> >>driver.
> >
> >Ok, I'd disable coalescing, but could not figure it out till. What is
> >generic way to do that?
> >
> >It seems only thing stmmac_tx_timer() does is calling
> >stmmac_tx_clean(), which reclaims tx_skbuff[] entries. It should be
> >possible to do that explicitely, without delay, but it stops working
> >completely if I attempt to do that.
> >
> >On a side note, stmmac_poll() does stmmac_enable_dma_irq() while
> >stmmac_dma_interrupt() disables interrupts. But I don't see any
> >protection between the two, so IMO it could race and we'd end up
> >without polling or interrupts...
> 
> 
> the idea behind the TX mitigation is to mix the interrupt and
> timer and this approach gave us real benefit in terms
> of performances and CPU usage (especially on SH4-200/SH4-300 platforms
> based).

Well, if you have a workload that sends and receive packets, it tends
to work ok, as you do tx_clean() in stmmac_poll(). My workload is not
like that -- it is "sending packets at 3MB/sec, receiving none". So
the stmmac_tx_timer() is rescheduled and rescheduled and rescheduled,
and then we run out of transmit descriptors, and then 40msec passes,
and then we clean them. Bad.

And that's why low-res timers do not cut it.

> In the ring, some descriptors can raise the irq (according to a
> threshold) and set the IC bit. In this path, the NAPI  poll will be
> scheduled.

Not NAPI poll but stmmac_tx_timer(), right? 

> But there is a timer that can run (and we experimented that no high
> resolution is needed) to clear the tx resources.
> Concerning the lock protection, we had reviewed long time ago and
> IIRC, no raise condition should be present. Open to review it,
> again!

Well, I certainly like the fact that we are talking :-).

And yes, I have some questions.

There's nothing that protect stmmac_poll() from running concurently
with stmmac_dma_interrupt(), right?

Best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02  8:45         ` Pavel Machek
@ 2016-12-02  9:43           ` Giuseppe CAVALLARO
  2016-12-02 12:32             ` Pavel Machek
  2016-12-05 10:15             ` Pavel Machek
  2016-12-02 14:05           ` Aw: " Lino Sanfilippo
  1 sibling, 2 replies; 64+ messages in thread
From: Giuseppe CAVALLARO @ 2016-12-02  9:43 UTC (permalink / raw)
  To: Pavel Machek, alexandre.torgue; +Cc: David Miller, netdev, linux-kernel

Hi Pavel

On 12/2/2016 9:45 AM, Pavel Machek wrote:
> Hi!
>
>>>> 1 HZ, which is the lowest granularity of non-highres timers in the
>>>> kernel, is variable as well as already too large of a delay for
>>>> effective TX coalescing.
>>>>
>>>> I seriously think that the TX coalescing support should be ripped out
>>>> or disabled entirely until it is implemented properly in this
>>>> driver.
>>>
>>> Ok, I'd disable coalescing, but could not figure it out till. What is
>>> generic way to do that?
>>>
>>> It seems only thing stmmac_tx_timer() does is calling
>>> stmmac_tx_clean(), which reclaims tx_skbuff[] entries. It should be
>>> possible to do that explicitely, without delay, but it stops working
>>> completely if I attempt to do that.
>>>
>>> On a side note, stmmac_poll() does stmmac_enable_dma_irq() while
>>> stmmac_dma_interrupt() disables interrupts. But I don't see any
>>> protection between the two, so IMO it could race and we'd end up
>>> without polling or interrupts...
>>
>>
>> the idea behind the TX mitigation is to mix the interrupt and
>> timer and this approach gave us real benefit in terms
>> of performances and CPU usage (especially on SH4-200/SH4-300 platforms
>> based).
>
> Well, if you have a workload that sends and receive packets, it tends
> to work ok, as you do tx_clean() in stmmac_poll(). My workload is not
> like that -- it is "sending packets at 3MB/sec, receiving none". So
> the stmmac_tx_timer() is rescheduled and rescheduled and rescheduled,
> and then we run out of transmit descriptors, and then 40msec passes,
> and then we clean them. Bad.
>
> And that's why low-res timers do not cut it.

in that case, I expect that the tuning of the driver could help you.
I mean, by using ethtool, it could be enough to set the IC bit on all
the descriptors. You should touch the tx_coal_frames.

Then you can use ethtool -S to monitor the status.

We had experimented this tuning on STB IP where just datagrams
had to send externally. To be honest, although we had seen
better results w/o any timer, we kept this approach enabled
because the timer was fast enough to cover our tests on SH4 boxes.

FYI, stmmac doesn't implement adaptive algo.

>
>> In the ring, some descriptors can raise the irq (according to a
>> threshold) and set the IC bit. In this path, the NAPI  poll will be
>> scheduled.
>
> Not NAPI poll but stmmac_tx_timer(), right?

in the xmit according the the threshold the timer is started or the
interrupt is set inside the descriptor.
Then stmmac_tx_clean will be always called and, if you see the flow,
no irqlock protection is needed!

>
>> But there is a timer that can run (and we experimented that no high
>> resolution is needed) to clear the tx resources.
>> Concerning the lock protection, we had reviewed long time ago and
>> IIRC, no raise condition should be present. Open to review it,
>> again!
>
> Well, I certainly like the fact that we are talking :-).
>
> And yes, I have some questions.
>
> There's nothing that protect stmmac_poll() from running concurently
> with stmmac_dma_interrupt(), right?

This is not necessary.

Best Regards
peppe

>
> Best regards,
> 									Pavel
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac: turn coalescing / NAPI off in stmmac
  2016-12-02  8:39           ` Giuseppe CAVALLARO
@ 2016-12-02 10:42             ` Pavel Machek
  2016-12-02 15:31               ` Giuseppe CAVALLARO
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-12-02 10:42 UTC (permalink / raw)
  To: Giuseppe CAVALLARO; +Cc: David Miller, alexandre.torgue, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 2372 bytes --]

Hi!

> >Anyway... since you asked. I belive I have way to disable NAPI / tx
> >coalescing in the driver. Unfortunately, locking is missing on the rx
> >path, and needs to be extended to _irqsave variant on tx path.
> 
> I have just replied to a previous thread about that...

Yeah, please reply to David's mail where he describes why it can't
work.

> >So patch currently looks like this (hand edited, can't be
> >applied, got it working few hours ago). Does it look acceptable?
> >
> >I'd prefer this to go after the patch that pulls common code to single
> >place, so that single place needs to be patched. Plus I guess I should
> >add ifdefs, so that more advanced NAPI / tx coalescing code can be
> >reactivated when it is fixed. Trivial fixes can go on top. Does that
> >sound like a plan?
> 
> Hmm, what I find strange is that, just this code is running since a
> long time on several platforms and Chip versions. No raise condition
> have been found or lock protection problems (also proving look
> mechanisms).

Well, it works better for me when I disable CONFIG_SMP. It is normal
that locking problems are hard to reproduce :-(.

> Pavel, I ask you sorry if I missed some problems so, if you can
> (as D. Miller asked) to send us a cover letter + all patches
> I will try to reply soon. I can do also some tests if you ask
> me that! I could run on 3.x and 4.x but I cannot promise you
> benchmarks.

Actually... I have questions here. David normally pulls from you (can
I have a address of your git tree?).

Could you apply these to your git?

[PATCH] stmmac ethernet: unify locking
[PATCH] stmmac: simplify flag assignment
[PATCH] stmmac: cleanup documenation, make it match reality

They are rather trivial and independend, I'm not sure what cover
letter would say, besides "simple fixes".

Then I can re-do the reset on top of that...

> >Which tree do you want patches against?
> >
> >https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/ ?
> 
> I think that bug fixing should be on top of net.git but I let Miller
> to decide.

Hmm. It is "only" a performance problem (40msec delays).. I guess
-next is better target.

Best regards,
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02  9:43           ` Giuseppe CAVALLARO
@ 2016-12-02 12:32             ` Pavel Machek
  2016-12-02 13:51               ` Giuseppe CAVALLARO
  2016-12-05 10:15             ` Pavel Machek
  1 sibling, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-12-02 12:32 UTC (permalink / raw)
  To: Giuseppe CAVALLARO; +Cc: alexandre.torgue, David Miller, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 3728 bytes --]

Hi!

> >Well, if you have a workload that sends and receive packets, it tends
> >to work ok, as you do tx_clean() in stmmac_poll(). My workload is not
> >like that -- it is "sending packets at 3MB/sec, receiving none". So
> >the stmmac_tx_timer() is rescheduled and rescheduled and rescheduled,
> >and then we run out of transmit descriptors, and then 40msec passes,
> >and then we clean them. Bad.
> >
> >And that's why low-res timers do not cut it.
> 
> in that case, I expect that the tuning of the driver could help you.
> I mean, by using ethtool, it could be enough to set the IC bit on all
> the descriptors. You should touch the tx_coal_frames.
> 
> Then you can use ethtool -S to monitor the status.

Yes, I did something similar. Unfortnunately that meant crash within
minutes, at least with 4.4 kernel. (If you know what was fixed between
4.4 and 4.9, that would be helpful).

> We had experimented this tuning on STB IP where just datagrams
> had to send externally. To be honest, although we had seen
> better results w/o any timer, we kept this approach enabled
> because the timer was fast enough to cover our tests on SH4 boxes.

Please reply to David, and explain how it is supposed to
work... because right now it does not. 40 msec delays are not
acceptable in default configuration.

> >>In the ring, some descriptors can raise the irq (according to a
> >>threshold) and set the IC bit. In this path, the NAPI  poll will be
> >>scheduled.
> >
> >Not NAPI poll but stmmac_tx_timer(), right?
> 
> in the xmit according the the threshold the timer is started or the
> interrupt is set inside the descriptor.
> Then stmmac_tx_clean will be always called and, if you see the flow,
> no irqlock protection is needed!

Agreed that no irqlock protection is needed if we rely on napi and timers.

> >>Concerning the lock protection, we had reviewed long time ago and
> >>IIRC, no raise condition should be present. Open to review it,
> >>again!
...
> >There's nothing that protect stmmac_poll() from running concurently
> >with stmmac_dma_interrupt(), right?
> 
> This is not necessary.

dma_interrupt accesses shared priv->xstats; variables are of type
unsigned long (not atomic_t), yet they are accesssed from interrupt
context and from stmmac_ethtool without any locking. That can result
in broken statistics AFAICT.

Please take another look. As far as I can tell, you can have two cpus
at #1 and #2 in the code, at the same time. It looks like napi_... has
some atomic opertions inside so that looks safe at the first look. But
I'm not sure if they also include enough memory barriers to make it
safe...?


static void stmmac_dma_interrupt(struct stmmac_priv *priv)
{
...
        status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
        if (likely((status & handle_rx)) || (status & handle_tx)) {
                if (likely(napi_schedule_prep(&priv->napi))) {
#1
                        stmmac_disable_dma_irq(priv);
                        __napi_schedule(&priv->napi);
                }
        }


static int stmmac_poll(struct napi_struct *napi, int budget)
{
        struct stmmac_priv *priv = container_of(napi, struct stmmac_priv, napi);
        int work_done = 0;

        priv->xstats.napi_poll++;
        stmmac_tx_clean(priv);

        work_done = stmmac_rx(priv, budget);
	if (work_done < budget) {
                napi_complete(napi);
#2
	        stmmac_enable_dma_irq(priv);
        }
        return work_done;
}


Best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02 12:32             ` Pavel Machek
@ 2016-12-02 13:51               ` Giuseppe CAVALLARO
  2016-12-02 14:26                 ` Alexandre Torgue
  2016-12-05 12:01                 ` Pavel Machek
  0 siblings, 2 replies; 64+ messages in thread
From: Giuseppe CAVALLARO @ 2016-12-02 13:51 UTC (permalink / raw)
  To: Pavel Machek
  Cc: alexandre.torgue, David Miller, netdev, linux-kernel, Alexandre TORGUE

On 12/2/2016 1:32 PM, Pavel Machek wrote:
> Hi!
>
>>> Well, if you have a workload that sends and receive packets, it tends
>>> to work ok, as you do tx_clean() in stmmac_poll(). My workload is not
>>> like that -- it is "sending packets at 3MB/sec, receiving none". So
>>> the stmmac_tx_timer() is rescheduled and rescheduled and rescheduled,
>>> and then we run out of transmit descriptors, and then 40msec passes,
>>> and then we clean them. Bad.
>>>
>>> And that's why low-res timers do not cut it.
>>
>> in that case, I expect that the tuning of the driver could help you.
>> I mean, by using ethtool, it could be enough to set the IC bit on all
>> the descriptors. You should touch the tx_coal_frames.
>>
>> Then you can use ethtool -S to monitor the status.
>
> Yes, I did something similar. Unfortnunately that meant crash within
> minutes, at least with 4.4 kernel. (If you know what was fixed between
> 4.4 and 4.9, that would be helpful).

4.4 has no GMAC4 support.
Alex, do you remember any patches to fix that?

>> We had experimented this tuning on STB IP where just datagrams
>> had to send externally. To be honest, although we had seen
>> better results w/o any timer, we kept this approach enabled
>> because the timer was fast enough to cover our tests on SH4 boxes.
>
> Please reply to David, and explain how it is supposed to
> work... because right now it does not. 40 msec delays are not
> acceptable in default configuration.

I mean, that on UP and SMP system this schema helped
to improve the performance saving CPU on my side and this has been
tested since a long time (~4 years).
I tested something similar to yours where unidirectional traffic
with limited throughput was needed and I can confirm you that
tuning/removing coalesce parameters this helped. The tuning I decided
to keep in the driver was suitable in several user cases and if now
you have problems or you want to review it I can just confirm that
there are no problems on my side. If you want to simply the logic
around the tx process and remove timer on official driver I can accept
that. I will just ask you uto double check if the throughput and
CPU usage when request max throughput (better if on GiGa setup) has
no regressions.
Otherwise we could start thinking about adaptive schema if feasible.

>>>> In the ring, some descriptors can raise the irq (according to a
>>>> threshold) and set the IC bit. In this path, the NAPI  poll will be
>>>> scheduled.
>>>
>>> Not NAPI poll but stmmac_tx_timer(), right?
>>
>> in the xmit according the the threshold the timer is started or the
>> interrupt is set inside the descriptor.
>> Then stmmac_tx_clean will be always called and, if you see the flow,
>> no irqlock protection is needed!
>
> Agreed that no irqlock protection is needed if we rely on napi and timers.

ok

>
>>>> Concerning the lock protection, we had reviewed long time ago and
>>>> IIRC, no raise condition should be present. Open to review it,
>>>> again!
> ...
>>> There's nothing that protect stmmac_poll() from running concurently
>>> with stmmac_dma_interrupt(), right?
>>
>> This is not necessary.
>
> dma_interrupt accesses shared priv->xstats; variables are of type
> unsigned long (not atomic_t), yet they are accesssed from interrupt
> context and from stmmac_ethtool without any locking. That can result
> in broken statistics AFAICT.

ok we can check this and welcome patches and I'd prefer to
remove xstats from critical part of the code like ISR (that
comes from old story of the driver).

>
> Please take another look. As far as I can tell, you can have two cpus
> at #1 and #2 in the code, at the same time. It looks like napi_... has
> some atomic opertions inside so that looks safe at the first look. But
> I'm not sure if they also include enough memory barriers to make it
> safe...?

Although I have never reproduced related issues on SMP platforms
due to reordering of memory operations but, as said above, welcome
review on this especially if you are seeing problems when manage NAPI.

FYI, the only memory barrier you will see in the driver are about the
OWN_BIT setting till now.

> static void stmmac_dma_interrupt(struct stmmac_priv *priv)
> {
> ...
>         status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
>         if (likely((status & handle_rx)) || (status & handle_tx)) {
>                 if (likely(napi_schedule_prep(&priv->napi))) {
> #1
>                         stmmac_disable_dma_irq(priv);
>                         __napi_schedule(&priv->napi);
>                 }
>         }
>
>
> static int stmmac_poll(struct napi_struct *napi, int budget)
> {
>         struct stmmac_priv *priv = container_of(napi, struct stmmac_priv, napi);
>         int work_done = 0;
>
>         priv->xstats.napi_poll++;
>         stmmac_tx_clean(priv);
>
>         work_done = stmmac_rx(priv, budget);
> 	if (work_done < budget) {
>                 napi_complete(napi);
> #2
> 	        stmmac_enable_dma_irq(priv);
>         }

hmm, I have to check (and refresh my memory) but the driver
uses the napi_schedule_prep.

Regards

Peppe

>         return work_done;
> }
>
>
> Best regards,
> 									Pavel
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Aw: Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02  8:45         ` Pavel Machek
  2016-12-02  9:43           ` Giuseppe CAVALLARO
@ 2016-12-02 14:05           ` Lino Sanfilippo
  2016-12-07 12:31             ` [RFC] " Pavel Machek
  1 sibling, 1 reply; 64+ messages in thread
From: Lino Sanfilippo @ 2016-12-02 14:05 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Giuseppe CAVALLARO, alexandre.torgue, David Miller, netdev, linux-kernel

Hi,


> 
> There's nothing that protect stmmac_poll() from running concurently
> with stmmac_dma_interrupt(), right?
> 

could it be that there is also another issue concerned locking?:
The tx completion handler takes the xmit_lock in case that the
netif_queue is stopped. This is AFAICS unnecessary, since both
xmit and completion handler are already synchronized by the private
tx lock. But it is IMHO also dangerous:

In the xmit handler we have the locking order
1. xmit_lock
2. private tx lock

while in the completion handler its the reverse:

1. private tx lock
2. xmit lock.

Do I miss something?


Regards,
Lino

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac: reduce code duplication getting basic descriptors
  2016-11-28 12:17     ` [PATCH] stmmac: reduce code duplication getting basic descriptors Pavel Machek
  2016-11-28 15:25       ` kbuild test robot
@ 2016-12-02 14:09       ` Alexandre Torgue
  1 sibling, 0 replies; 64+ messages in thread
From: Alexandre Torgue @ 2016-12-02 14:09 UTC (permalink / raw)
  To: Pavel Machek, David Miller, Andrew Morton
  Cc: peppe.cavallaro, netdev, linux-kernel

Hi Pavel,

On 11/28/2016 01:17 PM, Pavel Machek wrote:
>
> Remove code duplication getting basic descriptors.

I agree with your patch, it will make code easier to understand.
After fix kbuild issue you can add my Acked-by;

Regards
Alex

>
> Signed-off-by: Pavel Machek <pavel@denx.de>
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index f7133d0..ed20668 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -929,6 +929,17 @@ static int stmmac_set_bfsize(int mtu, int bufsize)
>  	return ret;
>  }
>
> +static inline struct dma_desc *stmmac_tx_desc(struct stmmac_priv *priv, int i)
> +{
> +	struct dma_desc *p;
> +	if (priv->extend_desc)
> +		p = &((priv->dma_etx + i)->basic);
> +	else
> +		p = priv->dma_tx + i;
> +	return p;
> +}
> +
> +
>  /**
>   * stmmac_clear_descriptors - clear descriptors
>   * @priv: driver private structure
> @@ -1078,11 +1089,7 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
>
>  	/* TX INITIALIZATION */
>  	for (i = 0; i < DMA_TX_SIZE; i++) {
> -		struct dma_desc *p;
> -		if (priv->extend_desc)
> -			p = &((priv->dma_etx + i)->basic);
> -		else
> -			p = priv->dma_tx + i;
> +		struct dma_desc *p = stmmac_tx_desc(priv, i);
>
>  		if (priv->synopsys_id >= DWMAC_CORE_4_00) {
>  			p->des0 = 0;
> @@ -1129,12 +1136,7 @@ static void dma_free_tx_skbufs(struct stmmac_priv *priv)
>  	int i;
>
>  	for (i = 0; i < DMA_TX_SIZE; i++) {
> -		struct dma_desc *p;
> -
> -		if (priv->extend_desc)
> -			p = &((priv->dma_etx + i)->basic);
> -		else
> -			p = priv->dma_tx + i;
> +		struct dma_desc *p = stmmac_tx_desc(priv, i);
>
>  		if (priv->tx_skbuff_dma[i].buf) {
>  			if (priv->tx_skbuff_dma[i].map_as_page)
> @@ -1314,14 +1316,9 @@ static void __stmmac_tx_clean(struct stmmac_priv *priv)
>
>  	while (entry != priv->cur_tx) {
>  		struct sk_buff *skb = priv->tx_skbuff[entry];
> -		struct dma_desc *p;
> +		struct dma_desc *p = stmmac_tx_desc(priv, entry);
>  		int status;
>
> -		if (priv->extend_desc)
> -			p = (struct dma_desc *)(priv->dma_etx + entry);
> -		else
> -			p = priv->dma_tx + entry;
> -
>  		status = priv->hw->desc->tx_status(&priv->dev->stats,
>  						      &priv->xstats, p,
>  						      priv->ioaddr);
> @@ -2227,11 +2224,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>
>  	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
>
> -	if (likely(priv->extend_desc))
> -		desc = (struct dma_desc *)(priv->dma_etx + entry);
> -	else
> -		desc = priv->dma_tx + entry;
> -
> +	desc = stmmac_tx_desc(priv, entry);
>  	first = desc;
>
>  	priv->tx_skbuff[first_entry] = skb;
> @@ -2254,10 +2247,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>
>  		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
>
> -		if (likely(priv->extend_desc))
> -			desc = (struct dma_desc *)(priv->dma_etx + entry);
> -		else
> -			desc = priv->dma_tx + entry;
> +		desc = stmmac_tx_desc(priv, entry);
>
>  		des = skb_frag_dma_map(priv->device, frag, 0, len,
>  				       DMA_TO_DEVICE);
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02 13:51               ` Giuseppe CAVALLARO
@ 2016-12-02 14:26                 ` Alexandre Torgue
  2016-12-02 15:19                   ` Giuseppe CAVALLARO
  2016-12-05 12:01                 ` Pavel Machek
  1 sibling, 1 reply; 64+ messages in thread
From: Alexandre Torgue @ 2016-12-02 14:26 UTC (permalink / raw)
  To: Giuseppe CAVALLARO, Pavel Machek; +Cc: David Miller, netdev, linux-kernel

Hi Pavel and Peppe,

On 12/02/2016 02:51 PM, Giuseppe CAVALLARO wrote:
> On 12/2/2016 1:32 PM, Pavel Machek wrote:
>> Hi!
>>
>>>> Well, if you have a workload that sends and receive packets, it tends
>>>> to work ok, as you do tx_clean() in stmmac_poll(). My workload is not
>>>> like that -- it is "sending packets at 3MB/sec, receiving none". So
>>>> the stmmac_tx_timer() is rescheduled and rescheduled and rescheduled,
>>>> and then we run out of transmit descriptors, and then 40msec passes,
>>>> and then we clean them. Bad.
>>>>
>>>> And that's why low-res timers do not cut it.
>>>
>>> in that case, I expect that the tuning of the driver could help you.
>>> I mean, by using ethtool, it could be enough to set the IC bit on all
>>> the descriptors. You should touch the tx_coal_frames.
>>>
>>> Then you can use ethtool -S to monitor the status.
>>
>> Yes, I did something similar. Unfortnunately that meant crash within
>> minutes, at least with 4.4 kernel. (If you know what was fixed between
>> 4.4 and 4.9, that would be helpful).
>
> 4.4 has no GMAC4 support.
> Alex, do you remember any patches to fix that?

No sorry Peppe.

Pavel,

Sorry but I'm a little bit confused. I'm dropped in some mails without 
historic. I see cleanup, coalescence issue and TSO question.
What is your main issue? Are you working on gmac4 or 3.x ?
Can you refresh a little bit the story please ?

Regards
Alex
>
>>> We had experimented this tuning on STB IP where just datagrams
>>> had to send externally. To be honest, although we had seen
>>> better results w/o any timer, we kept this approach enabled
>>> because the timer was fast enough to cover our tests on SH4 boxes.
>>
>> Please reply to David, and explain how it is supposed to
>> work... because right now it does not. 40 msec delays are not
>> acceptable in default configuration.
>
> I mean, that on UP and SMP system this schema helped
> to improve the performance saving CPU on my side and this has been
> tested since a long time (~4 years).
> I tested something similar to yours where unidirectional traffic
> with limited throughput was needed and I can confirm you that
> tuning/removing coalesce parameters this helped. The tuning I decided
> to keep in the driver was suitable in several user cases and if now
> you have problems or you want to review it I can just confirm that
> there are no problems on my side. If you want to simply the logic
> around the tx process and remove timer on official driver I can accept
> that. I will just ask you uto double check if the throughput and
> CPU usage when request max throughput (better if on GiGa setup) has
> no regressions.
> Otherwise we could start thinking about adaptive schema if feasible.
>
>>>>> In the ring, some descriptors can raise the irq (according to a
>>>>> threshold) and set the IC bit. In this path, the NAPI  poll will be
>>>>> scheduled.
>>>>
>>>> Not NAPI poll but stmmac_tx_timer(), right?
>>>
>>> in the xmit according the the threshold the timer is started or the
>>> interrupt is set inside the descriptor.
>>> Then stmmac_tx_clean will be always called and, if you see the flow,
>>> no irqlock protection is needed!
>>
>> Agreed that no irqlock protection is needed if we rely on napi and
>> timers.
>
> ok
>
>>
>>>>> Concerning the lock protection, we had reviewed long time ago and
>>>>> IIRC, no raise condition should be present. Open to review it,
>>>>> again!
>> ...
>>>> There's nothing that protect stmmac_poll() from running concurently
>>>> with stmmac_dma_interrupt(), right?
>>>
>>> This is not necessary.
>>
>> dma_interrupt accesses shared priv->xstats; variables are of type
>> unsigned long (not atomic_t), yet they are accesssed from interrupt
>> context and from stmmac_ethtool without any locking. That can result
>> in broken statistics AFAICT.
>
> ok we can check this and welcome patches and I'd prefer to
> remove xstats from critical part of the code like ISR (that
> comes from old story of the driver).
>
>>
>> Please take another look. As far as I can tell, you can have two cpus
>> at #1 and #2 in the code, at the same time. It looks like napi_... has
>> some atomic opertions inside so that looks safe at the first look. But
>> I'm not sure if they also include enough memory barriers to make it
>> safe...?
>
> Although I have never reproduced related issues on SMP platforms
> due to reordering of memory operations but, as said above, welcome
> review on this especially if you are seeing problems when manage NAPI.
>
> FYI, the only memory barrier you will see in the driver are about the
> OWN_BIT setting till now.
>
>> static void stmmac_dma_interrupt(struct stmmac_priv *priv)
>> {
>> ...
>>         status = priv->hw->dma->dma_interrupt(priv->ioaddr,
>> &priv->xstats);
>>         if (likely((status & handle_rx)) || (status & handle_tx)) {
>>                 if (likely(napi_schedule_prep(&priv->napi))) {
>> #1
>>                         stmmac_disable_dma_irq(priv);
>>                         __napi_schedule(&priv->napi);
>>                 }
>>         }
>>
>>
>> static int stmmac_poll(struct napi_struct *napi, int budget)
>> {
>>         struct stmmac_priv *priv = container_of(napi, struct
>> stmmac_priv, napi);
>>         int work_done = 0;
>>
>>         priv->xstats.napi_poll++;
>>         stmmac_tx_clean(priv);
>>
>>         work_done = stmmac_rx(priv, budget);
>>     if (work_done < budget) {
>>                 napi_complete(napi);
>> #2
>>             stmmac_enable_dma_irq(priv);
>>         }
>
> hmm, I have to check (and refresh my memory) but the driver
> uses the napi_schedule_prep.
>
> Regards
>
> Peppe
>
>>         return work_done;
>> }
>>
>>
>> Best regards,
>>                                     Pavel
>>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02 14:26                 ` Alexandre Torgue
@ 2016-12-02 15:19                   ` Giuseppe CAVALLARO
  2016-12-05 12:14                     ` Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: Giuseppe CAVALLARO @ 2016-12-02 15:19 UTC (permalink / raw)
  To: Alexandre Torgue, Pavel Machek; +Cc: David Miller, netdev, linux-kernel

Hi Alex

On 12/2/2016 3:26 PM, Alexandre Torgue wrote:
>> 4.4 has no GMAC4 support.
>> Alex, do you remember any patches to fix that?
>
> No sorry Peppe.
>
> Pavel,
>
> Sorry but I'm a little bit confused. I'm dropped in some mails without
> historic. I see cleanup, coalescence issue and TSO question.
> What is your main issue? Are you working on gmac4 or 3.x ?
> Can you refresh a little bit the story please ?

let me try to do a sum, please Pavel feel free to correct me.

There are some open points about the tx mitigation schema
that we are trying to detail and eventually tune or change
(but keeping the same performance on other user-case).

In particular, the test case that is raising problem is
an unicast tx bench.
I suggested Pavel to tune coalesce (IC bit settings) via
ethtool and monitor stats but he is getting problems (maybe
due to lock).

IIUC problems are mainly on new kernel and not on 4.4 where
the gmac4 is missing. Please Pavel, could you confirm?

Then, there are some open points about lock protections
for xstat and Pavel is getting some problem on SMP.
I do think that we need to review that. This also could
improve the code in critical parts.

Also there are some other discussion about the lock
protection on NAPI still under discussion. I have not
clear if in this case Pavel is getting strange behavior.

Regards
Peppe

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac: turn coalescing / NAPI off in stmmac
  2016-12-02 10:42             ` Pavel Machek
@ 2016-12-02 15:31               ` Giuseppe CAVALLARO
  2016-12-05 11:45                 ` Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: Giuseppe CAVALLARO @ 2016-12-02 15:31 UTC (permalink / raw)
  To: Pavel Machek; +Cc: David Miller, alexandre.torgue, netdev, linux-kernel

Hi Pavel

On 12/2/2016 11:42 AM, Pavel Machek wrote:
> Hi!
>
>>> Anyway... since you asked. I belive I have way to disable NAPI / tx
>>> coalescing in the driver. Unfortunately, locking is missing on the rx
>>> path, and needs to be extended to _irqsave variant on tx path.
>>
>> I have just replied to a previous thread about that...
>
> Yeah, please reply to David's mail where he describes why it can't
> work.

let me to re-check the mails :-) I can try to provide you
more details about what I experimented

>
>>> So patch currently looks like this (hand edited, can't be
>>> applied, got it working few hours ago). Does it look acceptable?
>>>
>>> I'd prefer this to go after the patch that pulls common code to single
>>> place, so that single place needs to be patched. Plus I guess I should
>>> add ifdefs, so that more advanced NAPI / tx coalescing code can be
>>> reactivated when it is fixed. Trivial fixes can go on top. Does that
>>> sound like a plan?
>>
>> Hmm, what I find strange is that, just this code is running since a
>> long time on several platforms and Chip versions. No raise condition
>> have been found or lock protection problems (also proving look
>> mechanisms).
>
> Well, it works better for me when I disable CONFIG_SMP. It is normal
> that locking problems are hard to reproduce :-(.

can you share me the test, maybe I can try to reproduce on ARM box.
Are you using 3.x or 4.x GMAC?

>> Pavel, I ask you sorry if I missed some problems so, if you can
>> (as D. Miller asked) to send us a cover letter + all patches
>> I will try to reply soon. I can do also some tests if you ask
>> me that! I could run on 3.x and 4.x but I cannot promise you
>> benchmarks.
>
> Actually... I have questions here. David normally pulls from you (can
> I have a address of your git tree?).

No I send the patches to the mailing list.

>
> Could you apply these to your git?
>
> [PATCH] stmmac ethernet: unify locking
> [PATCH] stmmac: simplify flag assignment
> [PATCH] stmmac: cleanup documenation, make it match reality
>
> They are rather trivial and independend, I'm not sure what cover
> letter would say, besides "simple fixes".
>
> Then I can re-do the reset on top of that...
>
>>> Which tree do you want patches against?
>>>
>>> https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/ ?
>>
>> I think that bug fixing should be on top of net.git but I let Miller
>> to decide.
>
> Hmm. It is "only" a performance problem (40msec delays).. I guess
> -next is better target.

ok, maybe if you resend these with a cover-letter I can try to
contribute on reviewing (in case of I have missed some detail).

Best Regards
Peppe

>
> Best regards,
> 								Pavel
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac: cleanup documenation, make it match reality
  2016-12-01 10:32     ` [PATCH] stmmac: cleanup documenation, make it match reality Pavel Machek
@ 2016-12-03 20:07       ` David Miller
  0 siblings, 0 replies; 64+ messages in thread
From: David Miller @ 2016-12-03 20:07 UTC (permalink / raw)
  To: pavel; +Cc: akpm, peppe.cavallaro, netdev, linux-kernel

From: Pavel Machek <pavel@ucw.cz>
Date: Thu, 1 Dec 2016 11:32:18 +0100

> Fix english in documentation, make documentation match reality, remove
> options that were removed from code.
>     
> Signed-off-by: Pavel Machek <pavel@denx.de>

Applied.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02  9:43           ` Giuseppe CAVALLARO
  2016-12-02 12:32             ` Pavel Machek
@ 2016-12-05 10:15             ` Pavel Machek
  2016-12-05 11:40               ` Lino Sanfilippo
  1 sibling, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-12-05 10:15 UTC (permalink / raw)
  To: Giuseppe CAVALLARO; +Cc: alexandre.torgue, David Miller, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 799 bytes --]

Hi!

> >>In the ring, some descriptors can raise the irq (according to a
> >>threshold) and set the IC bit. In this path, the NAPI  poll will be
> >>scheduled.
> >
> >Not NAPI poll but stmmac_tx_timer(), right?
> 
> in the xmit according the the threshold the timer is started or the
> interrupt is set inside the descriptor.
> Then stmmac_tx_clean will be always called and, if you see the flow,
> no irqlock protection is needed!

Actually, I was wrong. irqlock protection is needed, since
stmmac_tx_clean() is called from timer, and that's interrupt context,
as you can confirm using BUG_ON(in_interrupt());

Best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-05 10:15             ` Pavel Machek
@ 2016-12-05 11:40               ` Lino Sanfilippo
  2016-12-05 22:02                 ` Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: Lino Sanfilippo @ 2016-12-05 11:40 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Giuseppe CAVALLARO, alexandre.torgue, David Miller, netdev, linux-kernel

Hi,

> 
> Actually, I was wrong. irqlock protection is needed, since
> stmmac_tx_clean() is called from timer, and that's interrupt context,
> as you can confirm using BUG_ON(in_interrupt());
> 

in_interrupt() can mean both softirq and hardirq context. In this case it
means softirq. So I guess you were right before, and no irq locking is needed.

Regards,
Lino

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac: turn coalescing / NAPI off in stmmac
  2016-12-02 15:31               ` Giuseppe CAVALLARO
@ 2016-12-05 11:45                 ` Pavel Machek
  0 siblings, 0 replies; 64+ messages in thread
From: Pavel Machek @ 2016-12-05 11:45 UTC (permalink / raw)
  To: Giuseppe CAVALLARO; +Cc: David Miller, alexandre.torgue, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1816 bytes --]

Hi!

> >>>So patch currently looks like this (hand edited, can't be
> >>>applied, got it working few hours ago). Does it look acceptable?
> >>>
> >>>I'd prefer this to go after the patch that pulls common code to single
> >>>place, so that single place needs to be patched. Plus I guess I should
> >>>add ifdefs, so that more advanced NAPI / tx coalescing code can be
> >>>reactivated when it is fixed. Trivial fixes can go on top. Does that
> >>>sound like a plan?
> >>
> >>Hmm, what I find strange is that, just this code is running since a
> >>long time on several platforms and Chip versions. No raise condition
> >>have been found or lock protection problems (also proving look
> >>mechanisms).
> >
> >Well, it works better for me when I disable CONFIG_SMP. It is normal
> >that locking problems are hard to reproduce :-(.
> 
> can you share me the test, maybe I can try to reproduce on ARM box.
> Are you using 3.x or 4.x GMAC?

I'm using board similar to altera socfpga. 3.70a, as far as I can
tell.

                gmac0: ethernet@ff700000 {
                        compatible = "altr,socfpga-stmmac", "snps,dwmac-3.70a", "snps,dwmac\
";

> >>Pavel, I ask you sorry if I missed some problems so, if you can
> >>(as D. Miller asked) to send us a cover letter + all patches
> >>I will try to reply soon. I can do also some tests if you ask
> >>me that! I could run on 3.x and 4.x but I cannot promise you
> >>benchmarks.
> >
> >Actually... I have questions here. David normally pulls from you (can
> >I have a address of your git tree?).
> 
> No I send the patches to the mailing list.

Aha, ok.

Best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02  8:24       ` Giuseppe CAVALLARO
  2016-12-02  8:41         ` Giuseppe CAVALLARO
  2016-12-02  8:45         ` Pavel Machek
@ 2016-12-05 11:56         ` Pavel Machek
  2 siblings, 0 replies; 64+ messages in thread
From: Pavel Machek @ 2016-12-05 11:56 UTC (permalink / raw)
  To: Giuseppe CAVALLARO; +Cc: David Miller, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1896 bytes --]

Hi!

> the idea behind the TX mitigation is to mix the interrupt and
> timer and this approach gave us real benefit in terms
> of performances and CPU usage (especially on SH4-200/SH4-300 platforms
> based).
> In the ring, some descriptors can raise the irq (according to a
> threshold) and set the IC bit. In this path, the NAPI  poll will be
> scheduled.
> But there is a timer that can run (and we experimented that no high
> resolution is needed) to clear the tx resources.

I'm sorry, but it is just broken. It could have never worked. If it
appered it did, you did not test it right.

First, low-res timers have resolution down to one per second (see
David's email). It is not acceptable to delay transmits for 40msec,
and certainly not acceptable to delay them for 1000msec.

Second, the logic is wrong:

        if (likely(priv->tx_coal_frames > priv->tx_count_frames))
	                mod_timer(&priv->txtimer,
 				STMMAC_COAL_TIMER(priv->tx_coal_timer));
	                else {
 				priv->tx_count_frames = 0;
 				priv->hw->desc->set_tx_ic(desc);
 				priv->xstats.tx_set_ic_bit++;
			}
									

doing tx_clean() after set number of packets, or set time after the
first packet is transmitted would make sense. But that's not what the
code does. As long as packets are being transmitted, you move the
timer into the future.. so that finally you run out of the place, then
wait for timer (!) and only then you do the cleaning.

Third, times are wrong by order of magnitude. AFAICT cleaning should
be at around 5msec at 100mbit speeds, and at around .5msec at
gigabit. You have 40msec there. (Perhaps that is not too important if
the logic is fixed, as described above).

Best regards,
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02 13:51               ` Giuseppe CAVALLARO
  2016-12-02 14:26                 ` Alexandre Torgue
@ 2016-12-05 12:01                 ` Pavel Machek
  1 sibling, 0 replies; 64+ messages in thread
From: Pavel Machek @ 2016-12-05 12:01 UTC (permalink / raw)
  To: Giuseppe CAVALLARO; +Cc: alexandre.torgue, David Miller, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1870 bytes --]

Hi!

> >>We had experimented this tuning on STB IP where just datagrams
> >>had to send externally. To be honest, although we had seen
> >>better results w/o any timer, we kept this approach enabled
> >>because the timer was fast enough to cover our tests on SH4 boxes.
> >
> >Please reply to David, and explain how it is supposed to
> >work... because right now it does not. 40 msec delays are not
> >acceptable in default configuration.
> 
> I mean, that on UP and SMP system this schema helped
> to improve the performance saving CPU on my side and this has been
> tested since a long time (~4 years).
> I tested something similar to yours where unidirectional traffic
> with limited throughput was needed and I can confirm you that
> tuning/removing coalesce parameters this helped. The tuning I decided
> to keep in the driver was suitable in several user cases and if now
> you have problems or you want to review it I can just confirm that
> there are no problems on my side. If you want to simply the logic
> around the tx process and remove timer on official driver I can accept
> that. I will just ask you uto double check if the throughput and
> CPU usage when request max throughput (better if on GiGa setup) has
> no regressions.
> Otherwise we could start thinking about adaptive schema if feasible.

Ok, so you see the issue. Good.

See the other email to description how it could be fixed... the logic
is broken. How it was not discovered for 4 years is mystery to me.
 
> >Agreed that no irqlock protection is needed if we rely on napi and timers.
> 
> ok

Actually I was wrong there. Another reason to disable tx coalescing
until it is fixed.

Best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02 15:19                   ` Giuseppe CAVALLARO
@ 2016-12-05 12:14                     ` Pavel Machek
  0 siblings, 0 replies; 64+ messages in thread
From: Pavel Machek @ 2016-12-05 12:14 UTC (permalink / raw)
  To: Giuseppe CAVALLARO; +Cc: Alexandre Torgue, David Miller, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1588 bytes --]

Hi!

> >Sorry but I'm a little bit confused. I'm dropped in some mails without
> >historic. I see cleanup, coalescence issue and TSO question.
> >What is your main issue? Are you working on gmac4 or 3.x ?
> >Can you refresh a little bit the story please ?
> 
> let me try to do a sum, please Pavel feel free to correct me.
> 
> There are some open points about the tx mitigation schema
> that we are trying to detail and eventually tune or change
> (but keeping the same performance on other user-case).
> 
> In particular, the test case that is raising problem is
> an unicast tx bench.
> I suggested Pavel to tune coalesce (IC bit settings) via
> ethtool and monitor stats but he is getting problems (maybe
> due to lock).
> 
> IIUC problems are mainly on new kernel and not on 4.4 where
> the gmac4 is missing. Please Pavel, could you confirm?

Actually no, it is the other way around. I can get 4.9 to work with
some tuning. 4.4 likes to crash when tx coalesce is enabled with
shorter than 40 msec timeout. (It crashes with default settings, too,
but that takes too long to reproduce.)

> Also there are some other discussion about the lock
> protection on NAPI still under discussion. I have not
> clear if in this case Pavel is getting strange behavior.

Yep, locking is broken in more than one place. I believe I understand
what some problems are. Let me prepare the patches.

Best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH] stmmac: disable tx coalescing
  2016-11-24 16:04   ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses David Miller
                       ` (5 preceding siblings ...)
  2016-12-01 10:32     ` [PATCH] stmmac: cleanup documenation, make it match reality Pavel Machek
@ 2016-12-05 12:27     ` Pavel Machek
  2016-12-11 19:07       ` Pavel Machek
  6 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-12-05 12:27 UTC (permalink / raw)
  To: David Miller
  Cc: peppe.cavallaro, netdev, linux-kernel, alexandre.torgue, LinoSanfilippo

[-- Attachment #1: Type: text/plain, Size: 1926 bytes --]


Tx coalescing in stmmac is broken in more than one way, so disable it
for now.
    
First, low-res timers have resolution down to one per second. It is
not acceptable to delay transmits for 40msec, and certainly not
acceptable to delay them for 1000msec.
    
Second, the logic is wrong:
    
    if (likely(priv->tx_coal_frames > priv->tx_count_frames))
     	mod_timer(&priv->txtimer,
     	          STMMAC_COAL_TIMER(priv->tx_coal_timer));
    ...
    
doing tx_clean() after set number of packets, or set time after the
first packet is transmitted would make sense. But that's not what the
code does. As long as packets are being transmitted, you move the
timer into the future.. so that finally you run out of the place, then
wait for timer (!) and only then you do the cleaning.

Third, tx_cleanup is not safe to call from interrupt (tx_lock is not
irqsave), but that's exactly what coalescing code does.

Signed-off-by: Pavel Machek <pavel@denx.de>

---

This is stable candidate, afaict. It is broken in 4.4, too.

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 3ced2e1..32ce148 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -244,11 +244,11 @@ struct stmmac_extra_stats {
 /* Max/Min RI Watchdog Timer count value */
 #define MAX_DMA_RIWT		0xff
 #define MIN_DMA_RIWT		0x20
-/* Tx coalesce parameters */
+/* Tx coalesce parameters. Set STMMAC_TX_FRAMES to 0 to disable coalescing. */
 #define STMMAC_COAL_TX_TIMER	40000
 #define STMMAC_MAX_COAL_TX_TICK	100000
 #define STMMAC_TX_MAX_FRAMES	256
-#define STMMAC_TX_FRAMES	64
+#define STMMAC_TX_FRAMES	0
 
 /* Rx IPC status */
 enum rx_frame_status {

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-05 11:40               ` Lino Sanfilippo
@ 2016-12-05 22:02                 ` Pavel Machek
  2016-12-05 22:37                   ` Lino Sanfilippo
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-12-05 22:02 UTC (permalink / raw)
  To: Lino Sanfilippo
  Cc: Giuseppe CAVALLARO, alexandre.torgue, David Miller, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 836 bytes --]

Hi!

> > 
> > Actually, I was wrong. irqlock protection is needed, since
> > stmmac_tx_clean() is called from timer, and that's interrupt context,
> > as you can confirm using BUG_ON(in_interrupt());
> > 
> 
> in_interrupt() can mean both softirq and hardirq context. In this case it
> means softirq. So I guess you were right before, and no irq locking is needed.

Are you absolutely sure? Because my testing seems to indicate
otherwise (but I may have made a mistake).

According to

https://www.kernel.org/pub/linux/kernel/people/rusty/kernel-locking/c214.html

we need spin_lock_bh at minimum, as we are locking user context
against timer.

Best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-05 22:02                 ` Pavel Machek
@ 2016-12-05 22:37                   ` Lino Sanfilippo
  2016-12-05 22:40                     ` Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: Lino Sanfilippo @ 2016-12-05 22:37 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Giuseppe CAVALLARO, alexandre.torgue, David Miller, netdev, linux-kernel

Hi Pavel,

On 05.12.2016 23:02, Pavel Machek wrote:
> 
> we need spin_lock_bh at minimum, as we are locking user context
> against timer.
> 
> Best regards,
> 									Pavel
> 

I was referring to stmmac_tx_clean() which AFAICS is only called from softirq context,
(one time in the timer handler and one time in napi poll handler) so a spin_lock() should
be sufficient. I cant see how this is called from userspace. If it were, a spin_lock_bh() had
to be used, of course.

Regards,
Lino 

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-05 22:37                   ` Lino Sanfilippo
@ 2016-12-05 22:40                     ` Pavel Machek
  2016-12-05 22:54                       ` Lino Sanfilippo
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-12-05 22:40 UTC (permalink / raw)
  To: Lino Sanfilippo
  Cc: Giuseppe CAVALLARO, alexandre.torgue, David Miller, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 874 bytes --]

On Mon 2016-12-05 23:37:09, Lino Sanfilippo wrote:
> Hi Pavel,
> 
> On 05.12.2016 23:02, Pavel Machek wrote:
> > 
> > we need spin_lock_bh at minimum, as we are locking user context
> > against timer.
> > 
> > Best regards,
> > 									Pavel
> > 
> 
> I was referring to stmmac_tx_clean() which AFAICS is only called from softirq context,
> (one time in the timer handler and one time in napi poll handler) so a spin_lock() should
> be sufficient. I cant see how this is called from userspace. If it were, a spin_lock_bh() had
> to be used, of course.

stmmac_tx_clean() shares lock with stmmac_tx() -- and that's process
context as far as I can tell. So... spin_lock_bh() at
minimum... right?

									Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-05 22:40                     ` Pavel Machek
@ 2016-12-05 22:54                       ` Lino Sanfilippo
  2016-12-05 23:11                         ` Lino Sanfilippo
  0 siblings, 1 reply; 64+ messages in thread
From: Lino Sanfilippo @ 2016-12-05 22:54 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Giuseppe CAVALLARO, alexandre.torgue, David Miller, netdev, linux-kernel

On 05.12.2016 23:40, Pavel Machek wrote:
> On Mon 2016-12-05 23:37:09, Lino Sanfilippo wrote:
>> Hi Pavel,
>> 
>> On 05.12.2016 23:02, Pavel Machek wrote:
>> > 
>> > we need spin_lock_bh at minimum, as we are locking user context
>> > against timer.
>> > 
>> > Best regards,
>> > 									Pavel
>> > 
>> 
>> I was referring to stmmac_tx_clean() which AFAICS is only called from softirq context,
>> (one time in the timer handler and one time in napi poll handler) so a spin_lock() should
>> be sufficient. I cant see how this is called from userspace. If it were, a spin_lock_bh() had
>> to be used, of course.
> 
> stmmac_tx_clean() shares lock with stmmac_tx() -- and that's process
> context as far as I can tell. So... spin_lock_bh() at
> minimum... right?
> 
> 									Pavel
> 

You mean stmmac_xmit()? Thats also softirq AFAICT, its the TX softirq....

Regards,
Lino

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-05 22:54                       ` Lino Sanfilippo
@ 2016-12-05 23:11                         ` Lino Sanfilippo
  0 siblings, 0 replies; 64+ messages in thread
From: Lino Sanfilippo @ 2016-12-05 23:11 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Giuseppe CAVALLARO, alexandre.torgue, David Miller, netdev, linux-kernel


> 
> You mean stmmac_xmit()? Thats also softirq AFAICT, its the TX softirq....
> 
> Regards,
> Lino
> 
> 

Hmm. netdevices.txt says:

ndo_start_xmit:
	...

	Context: Process with BHs disabled or BH (timer),
	         will be called with interrupts disabled by netconsole.

	...

If this is correct it can indeed be process context, too. However BHs are already
disabled.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [RFC] Re: Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-02 14:05           ` Aw: " Lino Sanfilippo
@ 2016-12-07 12:31             ` Pavel Machek
  2016-12-07 13:18               ` Lino Sanfilippo
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-12-07 12:31 UTC (permalink / raw)
  To: Lino Sanfilippo
  Cc: Giuseppe CAVALLARO, alexandre.torgue, David Miller, netdev, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 4485 bytes --]

On Fri 2016-12-02 15:05:21, Lino Sanfilippo wrote:
> Hi,
> 
> 
> > 
> > There's nothing that protect stmmac_poll() from running concurently
> > with stmmac_dma_interrupt(), right?
> > 
> 
> could it be that there is also another issue concerned locking?:
> The tx completion handler takes the xmit_lock in case that the
> netif_queue is stopped. This is AFAICS unnecessary, since both
> xmit and completion handler are already synchronized by the private
> tx lock. But it is IMHO also dangerous:
> 
> In the xmit handler we have the locking order
> 1. xmit_lock
> 2. private tx lock
> 
> while in the completion handler its the reverse:
> 
> 1. private tx lock
> 2. xmit lock.
> 
> Do I miss something?

No, it seems you are right. Something like this?

Hmm. And can priv->tx_lock be removed, as we already rely on
netif_tx_lock?

(I copied the "lock already held" annotations from forcedeth. I hope
they are right....)

Best regards,
								Pavel


commit a6f21255dfc11fcadc5062dfd0c5f3d77ca4f634
Author: Pavel <pavel@ucw.cz>
Date:   Wed Dec 7 13:29:15 2016 +0100

    Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
    
    The tx completion handler takes the xmit_lock in case that the
    netif_queue is stopped. This is AFAICS unnecessary, since both
    xmit and completion handler are already synchronized by the private
    tx lock. But it is IMHO also dangerous:
    
    In the xmit handler we have the locking order
    1. xmit_lock
    2. private tx lock
    
    while in the completion handler its the reverse:
    
    1. private tx lock
    2. xmit lock.
    
    Signed-off-by: Pavel Machek <pavel@denx.de>

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 982c952..5df9bb3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1380,14 +1380,9 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 
 	if (unlikely(netif_queue_stopped(priv->dev) &&
 		     stmmac_tx_avail(priv) > STMMAC_TX_THRESH)) {
-		netif_tx_lock(priv->dev);
-		if (netif_queue_stopped(priv->dev) &&
-		    stmmac_tx_avail(priv) > STMMAC_TX_THRESH) {
-			netif_dbg(priv, tx_done, priv->dev,
-				  "%s: restart transmit\n", __func__);
-			netif_wake_queue(priv->dev);
-		}
-		netif_tx_unlock(priv->dev);
+		netif_dbg(priv, tx_done, priv->dev,
+			  "%s: restart transmit\n", __func__);
+		netif_wake_queue(priv->dev);
 	}
 
 	if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) {
@@ -1630,7 +1625,9 @@ static void stmmac_tx_timer(unsigned long data)
 {
 	struct stmmac_priv *priv = (struct stmmac_priv *)data;
 
+	netif_tx_lock_bh(priv->dev);
 	stmmac_tx_clean(priv);
+	netif_tx_unlock_bh(priv->dev);
 }
 
 /**
@@ -1994,7 +1991,8 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
  *   --------
  *
  * mss is fixed when enable tso, so w/o programming the TDES3 ctx field.
- */
+ *
+ *  Called with netif_tx_lock held.                                                                 */
 static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	u32 pay_len, mss;
@@ -2174,6 +2172,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
  *  Description : this is the tx entry point of the driver.
  *  It programs the chain or the ring and supports oversized frames
  *  and SG feature.
+ *  Called with netif_tx_lock held. 
  */
 static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 {
@@ -2684,7 +2683,9 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 	int work_done = 0;
 
 	priv->xstats.napi_poll++;
+	netif_tx_lock_bh(priv->dev);
 	stmmac_tx_clean(priv);
+	netif_tx_unlock_bh(priv->dev);
 
 	work_done = stmmac_rx(priv, budget);
 	if (work_done < budget) {
@@ -2701,6 +2702,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
  *   complete within a reasonable time. The driver will mark the error in the
  *   netdev structure and arrange for the device to be reset to a sane state
  *   in order to transmit a new packet.
+ * Called with netif_tx_lock held.
  */
 static void stmmac_tx_timeout(struct net_device *dev)
 {


-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: [RFC] Re: Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
  2016-12-07 12:31             ` [RFC] " Pavel Machek
@ 2016-12-07 13:18               ` Lino Sanfilippo
  0 siblings, 0 replies; 64+ messages in thread
From: Lino Sanfilippo @ 2016-12-07 13:18 UTC (permalink / raw)
  To: Pavel Machek, Lino Sanfilippo
  Cc: Giuseppe CAVALLARO, alexandre.torgue, David Miller, netdev, linux-kernel

Hi,

On 07.12.2016 13:31, Pavel Machek wrote:
> On Fri 2016-12-02 15:05:21, Lino Sanfilippo wrote:
>> Hi,
>>
>>
>>>
>>> There's nothing that protect stmmac_poll() from running concurently
>>> with stmmac_dma_interrupt(), right?
>>>
>>
>> could it be that there is also another issue concerned locking?:
>> The tx completion handler takes the xmit_lock in case that the
>> netif_queue is stopped. This is AFAICS unnecessary, since both
>> xmit and completion handler are already synchronized by the private
>> tx lock. But it is IMHO also dangerous:
>>
>> In the xmit handler we have the locking order
>> 1. xmit_lock
>> 2. private tx lock
>>
>> while in the completion handler its the reverse:
>>
>> 1. private tx lock
>> 2. xmit lock.
>>
>> Do I miss something?
>
> No, it seems you are right. Something like this?
>
> Hmm. And can priv->tx_lock be removed, as we already rely on
> netif_tx_lock?
>

David wanted indeed the private lock to be removed completely. See
this thread.

https://marc.info/?l=linux-netdev&m=148072001900620&w=2

If you can wait a bit, I already have patches prepared to fix this issue in both
drivers. I want to send them as soon as I am at home (a few hours).

Regards,
Lino

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac: disable tx coalescing
  2016-12-05 12:27     ` [PATCH] stmmac: disable tx coalescing Pavel Machek
@ 2016-12-11 19:07       ` Pavel Machek
  2016-12-11 19:31         ` David Miller
  0 siblings, 1 reply; 64+ messages in thread
From: Pavel Machek @ 2016-12-11 19:07 UTC (permalink / raw)
  To: David Miller
  Cc: peppe.cavallaro, netdev, linux-kernel, alexandre.torgue, LinoSanfilippo

[-- Attachment #1: Type: text/plain, Size: 2325 bytes --]

Hi!

David, ping? Can I get you to apply this one?

As you noticed, tx coalescing is completely broken in that driver, and
not easy to repair. This is simplest way to disable it. It can still
be re-enabled from userspace, so code can be fixed in future.

Best regards,
								Pavel

> 
> Tx coalescing in stmmac is broken in more than one way, so disable it
> for now.
>     
> First, low-res timers have resolution down to one per second. It is
> not acceptable to delay transmits for 40msec, and certainly not
> acceptable to delay them for 1000msec.
>     
> Second, the logic is wrong:
>     
>     if (likely(priv->tx_coal_frames > priv->tx_count_frames))
>      	mod_timer(&priv->txtimer,
>      	          STMMAC_COAL_TIMER(priv->tx_coal_timer));
>     ...
>     
> doing tx_clean() after set number of packets, or set time after the
> first packet is transmitted would make sense. But that's not what the
> code does. As long as packets are being transmitted, you move the
> timer into the future.. so that finally you run out of the place, then
> wait for timer (!) and only then you do the cleaning.
> 
> Third, tx_cleanup is not safe to call from interrupt (tx_lock is not
> irqsave), but that's exactly what coalescing code does.
> 
> Signed-off-by: Pavel Machek <pavel@denx.de>
> 
> ---
> 
> This is stable candidate, afaict. It is broken in 4.4, too.
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
> index 3ced2e1..32ce148 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/common.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/common.h
> @@ -244,11 +244,11 @@ struct stmmac_extra_stats {
>  /* Max/Min RI Watchdog Timer count value */
>  #define MAX_DMA_RIWT		0xff
>  #define MIN_DMA_RIWT		0x20
> -/* Tx coalesce parameters */
> +/* Tx coalesce parameters. Set STMMAC_TX_FRAMES to 0 to disable coalescing. */
>  #define STMMAC_COAL_TX_TIMER	40000
>  #define STMMAC_MAX_COAL_TX_TICK	100000
>  #define STMMAC_TX_MAX_FRAMES	256
> -#define STMMAC_TX_FRAMES	64
> +#define STMMAC_TX_FRAMES	0
>  
>  /* Rx IPC status */
>  enum rx_frame_status {
> 



-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac: disable tx coalescing
  2016-12-11 19:07       ` Pavel Machek
@ 2016-12-11 19:31         ` David Miller
  2016-12-11 19:57           ` Pavel Machek
  0 siblings, 1 reply; 64+ messages in thread
From: David Miller @ 2016-12-11 19:31 UTC (permalink / raw)
  To: pavel
  Cc: peppe.cavallaro, netdev, linux-kernel, alexandre.torgue, LinoSanfilippo

From: Pavel Machek <pavel@ucw.cz>
Date: Sun, 11 Dec 2016 20:07:50 +0100

> David, ping? Can I get you to apply this one?
> 
> As you noticed, tx coalescing is completely broken in that driver, and
> not easy to repair. This is simplest way to disable it. It can still
> be re-enabled from userspace, so code can be fixed in future.

Sorry I'm not applying this, I want you and the driver maintainer
to reach a real solution.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] stmmac: disable tx coalescing
  2016-12-11 19:31         ` David Miller
@ 2016-12-11 19:57           ` Pavel Machek
  0 siblings, 0 replies; 64+ messages in thread
From: Pavel Machek @ 2016-12-11 19:57 UTC (permalink / raw)
  To: David Miller
  Cc: peppe.cavallaro, netdev, linux-kernel, alexandre.torgue, LinoSanfilippo

[-- Attachment #1: Type: text/plain, Size: 1744 bytes --]

On Sun 2016-12-11 14:31:13, David Miller wrote:
> From: Pavel Machek <pavel@ucw.cz>
> Date: Sun, 11 Dec 2016 20:07:50 +0100
> 
> > David, ping? Can I get you to apply this one?
> > 
> > As you noticed, tx coalescing is completely broken in that driver, and
> > not easy to repair. This is simplest way to disable it. It can still
> > be re-enabled from userspace, so code can be fixed in future.
> 
> Sorry I'm not applying this, I want you and the driver maintainer
> to reach a real solution.

This is what you said about this driver:

# > 4.9-rc6 still has the delays. With the
# >
# > #define STMMAC_COAL_TX_TIMER 1000
# > #define STMMAC_TX_MAX_FRAMES 2
# >
# > settings, delays go away, and driver still works. (It fails fairly
# > fast in 4.4). Good news. But the question still is: what is going on
# > there?
# 
# 256 packets looks way too large for being a trigger for aborting the
# TX coalescing timer.
# 
# Looking more deeply into this, the driver is using non-highres timers
# to implement the TX coalescing.  This simply cannot work.
# 
# 1 HZ, which is the lowest granularity of non-highres timers in the
# kernel, is variable as well as already too large of a delay for
# effective TX coalescing.
# 
# I seriously think that the TX coalescing support should be ripped out
# or disabled entirely until it is implemented properly in this driver.

I'm doing just that -- disabling it entirely until it is done
properly.

Unfortunately, maintainer does not think delays are huge problem. So I
need your help here.

Thanks,									
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 64+ messages in thread

end of thread, other threads:[~2016-12-11 19:57 UTC | newest]

Thread overview: 64+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-23 10:51 stmmac ethernet in kernel 4.4: coalescing related pauses? Pavel Machek
2016-11-24  8:55 ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses Pavel Machek
2016-11-24 10:29   ` Pavel Machek
2016-11-24 10:36     ` Pavel Machek
2016-11-24 10:46       ` [PATCH] stmmac ethernet: unify locking Pavel Machek
2016-11-24 11:05         ` [PATCH] stmmac ethernet: remove cut & paste code Pavel Machek
2016-11-24 20:05           ` Joe Perches
2016-11-24 21:44             ` Pavel Machek
2016-11-24 22:27               ` Joe Perches
2016-11-28 11:50                 ` Pavel Machek
2016-11-28 14:24                   ` Joe Perches
2016-11-28 14:35                     ` Pavel Machek
2016-11-28 16:03                       ` Joe Perches
2016-11-24 16:04   ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses David Miller
2016-11-24 21:25     ` Pavel Machek
2016-12-02  8:24       ` Giuseppe CAVALLARO
2016-12-02  8:41         ` Giuseppe CAVALLARO
2016-12-02  8:45         ` Pavel Machek
2016-12-02  9:43           ` Giuseppe CAVALLARO
2016-12-02 12:32             ` Pavel Machek
2016-12-02 13:51               ` Giuseppe CAVALLARO
2016-12-02 14:26                 ` Alexandre Torgue
2016-12-02 15:19                   ` Giuseppe CAVALLARO
2016-12-05 12:14                     ` Pavel Machek
2016-12-05 12:01                 ` Pavel Machek
2016-12-05 10:15             ` Pavel Machek
2016-12-05 11:40               ` Lino Sanfilippo
2016-12-05 22:02                 ` Pavel Machek
2016-12-05 22:37                   ` Lino Sanfilippo
2016-12-05 22:40                     ` Pavel Machek
2016-12-05 22:54                       ` Lino Sanfilippo
2016-12-05 23:11                         ` Lino Sanfilippo
2016-12-02 14:05           ` Aw: " Lino Sanfilippo
2016-12-07 12:31             ` [RFC] " Pavel Machek
2016-12-07 13:18               ` Lino Sanfilippo
2016-12-05 11:56         ` Pavel Machek
2016-11-28 11:55     ` [PATCH] stmmac: fix comments, make debug output consistent Pavel Machek
2016-11-30  0:53       ` David Miller
2016-11-28 12:13     ` stmmac ethernet in kernel 4.9-rc6: coalescing related pauses Pavel Machek
2016-11-28 12:17     ` [PATCH] stmmac: reduce code duplication getting basic descriptors Pavel Machek
2016-11-28 15:25       ` kbuild test robot
2016-12-02 14:09       ` Alexandre Torgue
2016-11-30 11:44     ` [PATCH] stmmac: simplify flag assignment Pavel Machek
2016-12-01 20:23       ` David Miller
2016-12-01 22:48         ` stmmac: turn coalescing / NAPI off in stmmac Pavel Machek
2016-12-02  8:39           ` Giuseppe CAVALLARO
2016-12-02 10:42             ` Pavel Machek
2016-12-02 15:31               ` Giuseppe CAVALLARO
2016-12-05 11:45                 ` Pavel Machek
2016-12-02  8:27       ` [PATCH] stmmac: simplify flag assignment Giuseppe CAVALLARO
2016-12-01 10:32     ` [PATCH] stmmac: cleanup documenation, make it match reality Pavel Machek
2016-12-03 20:07       ` David Miller
2016-12-05 12:27     ` [PATCH] stmmac: disable tx coalescing Pavel Machek
2016-12-11 19:07       ` Pavel Machek
2016-12-11 19:31         ` David Miller
2016-12-11 19:57           ` Pavel Machek
2016-11-28 13:07 ` stmmac ethernet in kernel 4.4: coalescing related pauses? Lino Sanfilippo
2016-11-28 14:54   ` David Miller
2016-11-28 15:31     ` Eric Dumazet
2016-11-28 15:57       ` Lino Sanfilippo
2016-11-28 16:30         ` David Miller
2016-11-28 17:01           ` Lino Sanfilippo
2016-11-30 10:28       ` Pavel Machek
2016-11-28 15:33     ` Lino Sanfilippo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).