All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 1/2] eal: add nanosleep based delay function
       [not found] ` <CGME20180831124402eucas1p120ba1cdc8c3f6bc6c5bd09b5d3ede771@eucas1p1.samsung.com>
@ 2018-08-31 12:45   ` Ilya Maximets
  0 siblings, 0 replies; 7+ messages in thread
From: Ilya Maximets @ 2018-08-31 12:45 UTC (permalink / raw)
  To: dev
  Cc: Jingjing Wu, Konstantin Ananyev, Wenzhuo Lu, Beilei Xing,
	Qi Zhang, Xiao Wang, Bruce Richardson, Ilya Maximets

Add a new rte_delay_us_sleep() function that uses nanosleep().
This function can be used by applications to not implement
their own nanosleep() based callback and by internal DPDK
code if CPU non-blocking delay needed.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---
 lib/librte_eal/common/eal_common_timer.c      | 19 +++++++
 .../common/include/generic/rte_cycles.h       | 11 ++++
 lib/librte_eal/rte_eal_version.map            |  1 +
 test/test/autotest_data.py                    |  6 +++
 test/test/meson.build                         |  1 +
 test/test/test_cycles.c                       | 51 ++++++++++++++-----
 6 files changed, 75 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c
index 2e2b770fb..4a00525ca 100644
--- a/lib/librte_eal/common/eal_common_timer.c
+++ b/lib/librte_eal/common/eal_common_timer.c
@@ -7,9 +7,11 @@
 #include <unistd.h>
 #include <inttypes.h>
 #include <sys/types.h>
+#include <time.h>
 #include <errno.h>
 
 #include <rte_common.h>
+#include <rte_compat.h>
 #include <rte_log.h>
 #include <rte_cycles.h>
 #include <rte_pause.h>
@@ -31,6 +33,23 @@ rte_delay_us_block(unsigned int us)
 		rte_pause();
 }
 
+void __rte_experimental
+rte_delay_us_sleep(unsigned int us)
+{
+	struct timespec wait[2];
+	int ind = 0;
+
+	wait[0].tv_sec = 0;
+	if (us >= US_PER_S) {
+		wait[0].tv_sec = us / US_PER_S;
+		us -= wait[0].tv_sec * US_PER_S;
+	}
+	wait[0].tv_nsec = 1000 * us;
+
+	while (nanosleep(&wait[ind], &wait[1 - ind]) == EINTR)
+		ind = 1 - ind;
+}
+
 uint64_t
 rte_get_tsc_hz(void)
 {
diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h
index 0ff1af504..ac379e878 100644
--- a/lib/librte_eal/common/include/generic/rte_cycles.h
+++ b/lib/librte_eal/common/include/generic/rte_cycles.h
@@ -13,6 +13,7 @@
  */
 
 #include <stdint.h>
+#include <rte_compat.h>
 #include <rte_debug.h>
 #include <rte_atomic.h>
 
@@ -157,6 +158,16 @@ rte_delay_ms(unsigned ms)
  */
 void rte_delay_us_block(unsigned int us);
 
+/**
+ * Delay function that uses system sleep.
+ * Does not block the CPU core.
+ *
+ * @param us
+ *   Number of microseconds to wait.
+ */
+void __rte_experimental
+rte_delay_us_sleep(unsigned int us);
+
 /**
  * Replace rte_delay_us with user defined function.
  *
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 344a43d32..d68777ce0 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -270,6 +270,7 @@ EXPERIMENTAL {
 	rte_class_register;
 	rte_class_unregister;
 	rte_ctrl_thread_create;
+	rte_delay_us_sleep;
 	rte_dev_event_callback_register;
 	rte_dev_event_callback_unregister;
 	rte_dev_event_monitor_start;
diff --git a/test/test/autotest_data.py b/test/test/autotest_data.py
index f68d9b111..874d0cb53 100644
--- a/test/test/autotest_data.py
+++ b/test/test/autotest_data.py
@@ -278,6 +278,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Sleep delay",
+        "Command": "delay_us_sleep_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Rawdev autotest",
         "Command": "rawdev_autotest",
diff --git a/test/test/meson.build b/test/test/meson.build
index b1dd6eca2..0078aea30 100644
--- a/test/test/meson.build
+++ b/test/test/meson.build
@@ -143,6 +143,7 @@ test_names = [
 	'cryptodev_dpaa_sec_autotest',
 	'cycles_autotest',
 	'debug_autotest',
+	'delay_us_sleep_autotest',
 	'devargs_autotest',
 	'distributor_autotest',
 	'distributor_perf_autotest',
diff --git a/test/test/test_cycles.c b/test/test/test_cycles.c
index 149f60b07..c78e6a5b1 100644
--- a/test/test/test_cycles.c
+++ b/test/test/test_cycles.c
@@ -23,6 +23,30 @@
  *   of cycles is correct with regard to the frequency of the timer.
  */
 
+static int
+check_wait_one_second(void)
+{
+	uint64_t cycles, prev_cycles;
+	uint64_t hz = rte_get_timer_hz();
+	uint64_t max_inc = (hz / 100); /* 10 ms max between 2 reads */
+
+	/* check that waiting 1 second is precise */
+	prev_cycles = rte_get_timer_cycles();
+	rte_delay_us(1000000);
+	cycles = rte_get_timer_cycles();
+
+	if ((uint64_t)(cycles - prev_cycles) > (hz + max_inc)) {
+		printf("delay_us is not accurate: too long\n");
+		return -1;
+	}
+	if ((uint64_t)(cycles - prev_cycles) < (hz - max_inc)) {
+		printf("delay_us is not accurate: too short\n");
+		return -1;
+	}
+
+	return 0;
+}
+
 static int
 test_cycles(void)
 {
@@ -43,24 +67,23 @@ test_cycles(void)
 		prev_cycles = cycles;
 	}
 
-	/* check that waiting 1 second is precise */
-	prev_cycles = rte_get_timer_cycles();
-	rte_delay_us(1000000);
-	cycles = rte_get_timer_cycles();
+	return check_wait_one_second();
+}
 
-	if ((uint64_t)(cycles - prev_cycles) > (hz + max_inc)) {
-		printf("delay_us is not accurate: too long\n");
-		return -1;
-	}
-	if ((uint64_t)(cycles - prev_cycles) < (hz - max_inc)) {
-		printf("delay_us is not accurate: too short\n");
-		return -1;
-	}
+REGISTER_TEST_COMMAND(cycles_autotest, test_cycles);
 
-	return 0;
+/*
+ * One second precision test with rte_delay_us_sleep.
+ */
+
+static int
+test_delay_us_sleep(void)
+{
+	rte_delay_us_callback_register(rte_delay_us_sleep);
+	return check_wait_one_second();
 }
 
-REGISTER_TEST_COMMAND(cycles_autotest, test_cycles);
+REGISTER_TEST_COMMAND(delay_us_sleep_autotest, test_delay_us_sleep);
 
 /*
  * rte_delay_us_callback test
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [RFC 2/2] drivers/net: use sleep delay by default for intel NICs
       [not found] ` <CGME20180831124404eucas1p20daff43600dfe450c9106616f886eab4@eucas1p2.samsung.com>
@ 2018-08-31 12:45   ` Ilya Maximets
  2018-09-03 15:14     ` Wiles, Keith
  0 siblings, 1 reply; 7+ messages in thread
From: Ilya Maximets @ 2018-08-31 12:45 UTC (permalink / raw)
  To: dev
  Cc: Jingjing Wu, Konstantin Ananyev, Wenzhuo Lu, Beilei Xing,
	Qi Zhang, Xiao Wang, Bruce Richardson, Ilya Maximets

NICs uses different delays up to a second during their
configuration. It makes no sense to busy-wait so long wasting
CPU cycles and preventing any other threads to execute on the
same CPU core. These busy polling are the rudiments that came
from the kernel drivers where you can not sleep in interrupt
context, but as we're in userspace, we're able and should
sleep to allow other threads to run.
Delays never called on rx/tx path, so this should not affect
performance.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---
 drivers/net/avf/Makefile             | 1 +
 drivers/net/avf/base/avf_osdep.h     | 4 ++--
 drivers/net/e1000/Makefile           | 1 +
 drivers/net/e1000/base/e1000_osdep.h | 2 +-
 drivers/net/i40e/base/i40e_osdep.h   | 6 +++---
 drivers/net/ifc/base/ifcvf_osdep.h   | 2 +-
 drivers/net/ixgbe/base/ixgbe_osdep.h | 2 +-
 7 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/avf/Makefile b/drivers/net/avf/Makefile
index 3f815bbc4..8ee707529 100644
--- a/drivers/net/avf/Makefile
+++ b/drivers/net/avf/Makefile
@@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_avf.a
 
 CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
 LDLIBS += -lrte_bus_pci
diff --git a/drivers/net/avf/base/avf_osdep.h b/drivers/net/avf/base/avf_osdep.h
index 9ef45968e..442a5acd0 100644
--- a/drivers/net/avf/base/avf_osdep.h
+++ b/drivers/net/avf/base/avf_osdep.h
@@ -93,8 +93,8 @@ typedef uint64_t        u64;
 #define avf_memset(a, b, c, d) memset((a), (b), (c))
 #define avf_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
 
-#define avf_usec_delay(x) rte_delay_us(x)
-#define avf_msec_delay(x) rte_delay_us(1000*(x))
+#define avf_usec_delay(x) rte_delay_us_sleep(x)
+#define avf_msec_delay(x) avf_usec_delay(1000 * (x))
 
 #define AVF_PCI_REG(reg)		rte_read32(reg)
 #define AVF_PCI_REG_ADDR(a, reg) \
diff --git a/drivers/net/e1000/Makefile b/drivers/net/e1000/Makefile
index 9c87e883b..0ed627656 100644
--- a/drivers/net/e1000/Makefile
+++ b/drivers/net/e1000/Makefile
@@ -10,6 +10,7 @@ LIB = librte_pmd_e1000.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
 LDLIBS += -lrte_bus_pci
diff --git a/drivers/net/e1000/base/e1000_osdep.h b/drivers/net/e1000/base/e1000_osdep.h
index b8868049f..5958ea157 100644
--- a/drivers/net/e1000/base/e1000_osdep.h
+++ b/drivers/net/e1000/base/e1000_osdep.h
@@ -48,7 +48,7 @@
 
 #include "../e1000_logs.h"
 
-#define DELAY(x) rte_delay_us(x)
+#define DELAY(x) rte_delay_us_sleep(x)
 #define usec_delay(x) DELAY(x)
 #define usec_delay_irq(x) DELAY(x)
 #define msec_delay(x) DELAY(1000*(x))
diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
index 8e5c593c9..a6072e153 100644
--- a/drivers/net/i40e/base/i40e_osdep.h
+++ b/drivers/net/i40e/base/i40e_osdep.h
@@ -233,9 +233,9 @@ struct i40e_spinlock {
 #define i40e_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
 
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
-#define DELAY(x) rte_delay_us(x)
-#define i40e_usec_delay(x) rte_delay_us(x)
-#define i40e_msec_delay(x) rte_delay_us(1000*(x))
+#define DELAY(x) rte_delay_us_sleep(x)
+#define i40e_usec_delay(x) DELAY(x)
+#define i40e_msec_delay(x) DELAY(1000 * (x))
 #define udelay(x) DELAY(x)
 #define msleep(x) DELAY(1000*(x))
 #define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000))
diff --git a/drivers/net/ifc/base/ifcvf_osdep.h b/drivers/net/ifc/base/ifcvf_osdep.h
index cf151ef52..6aef25ea4 100644
--- a/drivers/net/ifc/base/ifcvf_osdep.h
+++ b/drivers/net/ifc/base/ifcvf_osdep.h
@@ -17,7 +17,7 @@
 #define DEBUGOUT(S, args...)    RTE_LOG(DEBUG, PMD, S, ##args)
 #define STATIC                  static
 
-#define msec_delay	rte_delay_ms
+#define msec_delay(x)	rte_delay_us_sleep(1000 * (x))
 
 #define IFCVF_READ_REG8(reg)		rte_read8(reg)
 #define IFCVF_WRITE_REG8(val, reg)	rte_write8((val), (reg))
diff --git a/drivers/net/ixgbe/base/ixgbe_osdep.h b/drivers/net/ixgbe/base/ixgbe_osdep.h
index bb5dfd2af..94ede9bc2 100644
--- a/drivers/net/ixgbe/base/ixgbe_osdep.h
+++ b/drivers/net/ixgbe/base/ixgbe_osdep.h
@@ -51,7 +51,7 @@
 
 #define ASSERT(x) if(!(x)) rte_panic("IXGBE: x")
 
-#define DELAY(x) rte_delay_us(x)
+#define DELAY(x) rte_delay_us_sleep(x)
 #define usec_delay(x) DELAY(x)
 #define msec_delay(x) DELAY(1000*(x))
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC 2/2] drivers/net: use sleep delay by default for intel NICs
  2018-08-31 12:45   ` [RFC 2/2] drivers/net: use sleep delay by default for intel NICs Ilya Maximets
@ 2018-09-03 15:14     ` Wiles, Keith
  2018-09-03 15:33       ` Ilya Maximets
  0 siblings, 1 reply; 7+ messages in thread
From: Wiles, Keith @ 2018-09-03 15:14 UTC (permalink / raw)
  To: Ilya Maximets
  Cc: dev, Wu, Jingjing, Ananyev, Konstantin, Lu, Wenzhuo, Xing,
	Beilei, Zhang, Qi Z, Wang, Xiao W, Richardson, Bruce



> On Aug 31, 2018, at 1:45 PM, Ilya Maximets <i.maximets@samsung.com> wrote:
> 
> NICs uses different delays up to a second during their
> configuration. It makes no sense to busy-wait so long wasting
> CPU cycles and preventing any other threads to execute on the
> same CPU core. These busy polling are the rudiments that came
> from the kernel drivers where you can not sleep in interrupt
> context, but as we're in userspace, we're able and should
> sleep to allow other threads to run.
> Delays never called on rx/tx path, so this should not affect
> performance.

I have a question, the only thread being effected by the busy wait is the thread assigned to the core or the master lcore in the case of rte_eal_init(). It should not effect other threads running on other cores, right? If you do have other threads running in the same code then I see the need, please help me understand the use and how it is effecting DPDK.
> 
> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
> ---
> drivers/net/avf/Makefile             | 1 +
> drivers/net/avf/base/avf_osdep.h     | 4 ++--
> drivers/net/e1000/Makefile           | 1 +
> drivers/net/e1000/base/e1000_osdep.h | 2 +-
> drivers/net/i40e/base/i40e_osdep.h   | 6 +++---
> drivers/net/ifc/base/ifcvf_osdep.h   | 2 +-
> drivers/net/ixgbe/base/ixgbe_osdep.h | 2 +-
> 7 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/avf/Makefile b/drivers/net/avf/Makefile
> index 3f815bbc4..8ee707529 100644
> --- a/drivers/net/avf/Makefile
> +++ b/drivers/net/avf/Makefile
> @@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
> LIB = librte_pmd_avf.a
> 
> CFLAGS += -O3
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
> LDLIBS += -lrte_bus_pci
> diff --git a/drivers/net/avf/base/avf_osdep.h b/drivers/net/avf/base/avf_osdep.h
> index 9ef45968e..442a5acd0 100644
> --- a/drivers/net/avf/base/avf_osdep.h
> +++ b/drivers/net/avf/base/avf_osdep.h
> @@ -93,8 +93,8 @@ typedef uint64_t        u64;
> #define avf_memset(a, b, c, d) memset((a), (b), (c))
> #define avf_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
> 
> -#define avf_usec_delay(x) rte_delay_us(x)
> -#define avf_msec_delay(x) rte_delay_us(1000*(x))
> +#define avf_usec_delay(x) rte_delay_us_sleep(x)
> +#define avf_msec_delay(x) avf_usec_delay(1000 * (x))
> 
> #define AVF_PCI_REG(reg)		rte_read32(reg)
> #define AVF_PCI_REG_ADDR(a, reg) \
> diff --git a/drivers/net/e1000/Makefile b/drivers/net/e1000/Makefile
> index 9c87e883b..0ed627656 100644
> --- a/drivers/net/e1000/Makefile
> +++ b/drivers/net/e1000/Makefile
> @@ -10,6 +10,7 @@ LIB = librte_pmd_e1000.a
> 
> CFLAGS += -O3
> CFLAGS += $(WERROR_FLAGS)
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
> LDLIBS += -lrte_bus_pci
> diff --git a/drivers/net/e1000/base/e1000_osdep.h b/drivers/net/e1000/base/e1000_osdep.h
> index b8868049f..5958ea157 100644
> --- a/drivers/net/e1000/base/e1000_osdep.h
> +++ b/drivers/net/e1000/base/e1000_osdep.h
> @@ -48,7 +48,7 @@
> 
> #include "../e1000_logs.h"
> 
> -#define DELAY(x) rte_delay_us(x)
> +#define DELAY(x) rte_delay_us_sleep(x)
> #define usec_delay(x) DELAY(x)
> #define usec_delay_irq(x) DELAY(x)
> #define msec_delay(x) DELAY(1000*(x))
> diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
> index 8e5c593c9..a6072e153 100644
> --- a/drivers/net/i40e/base/i40e_osdep.h
> +++ b/drivers/net/i40e/base/i40e_osdep.h
> @@ -233,9 +233,9 @@ struct i40e_spinlock {
> #define i40e_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
> 
> #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
> -#define DELAY(x) rte_delay_us(x)
> -#define i40e_usec_delay(x) rte_delay_us(x)
> -#define i40e_msec_delay(x) rte_delay_us(1000*(x))
> +#define DELAY(x) rte_delay_us_sleep(x)
> +#define i40e_usec_delay(x) DELAY(x)
> +#define i40e_msec_delay(x) DELAY(1000 * (x))
> #define udelay(x) DELAY(x)
> #define msleep(x) DELAY(1000*(x))
> #define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000))
> diff --git a/drivers/net/ifc/base/ifcvf_osdep.h b/drivers/net/ifc/base/ifcvf_osdep.h
> index cf151ef52..6aef25ea4 100644
> --- a/drivers/net/ifc/base/ifcvf_osdep.h
> +++ b/drivers/net/ifc/base/ifcvf_osdep.h
> @@ -17,7 +17,7 @@
> #define DEBUGOUT(S, args...)    RTE_LOG(DEBUG, PMD, S, ##args)
> #define STATIC                  static
> 
> -#define msec_delay	rte_delay_ms
> +#define msec_delay(x)	rte_delay_us_sleep(1000 * (x))
> 
> #define IFCVF_READ_REG8(reg)		rte_read8(reg)
> #define IFCVF_WRITE_REG8(val, reg)	rte_write8((val), (reg))
> diff --git a/drivers/net/ixgbe/base/ixgbe_osdep.h b/drivers/net/ixgbe/base/ixgbe_osdep.h
> index bb5dfd2af..94ede9bc2 100644
> --- a/drivers/net/ixgbe/base/ixgbe_osdep.h
> +++ b/drivers/net/ixgbe/base/ixgbe_osdep.h
> @@ -51,7 +51,7 @@
> 
> #define ASSERT(x) if(!(x)) rte_panic("IXGBE: x")
> 
> -#define DELAY(x) rte_delay_us(x)
> +#define DELAY(x) rte_delay_us_sleep(x)
> #define usec_delay(x) DELAY(x)
> #define msec_delay(x) DELAY(1000*(x))
> 
> -- 
> 2.17.1
> 

Regards,
Keith

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC 2/2] drivers/net: use sleep delay by default for intel NICs
  2018-09-03 15:14     ` Wiles, Keith
@ 2018-09-03 15:33       ` Ilya Maximets
  2018-09-03 15:39         ` Wiles, Keith
  0 siblings, 1 reply; 7+ messages in thread
From: Ilya Maximets @ 2018-09-03 15:33 UTC (permalink / raw)
  To: Wiles, Keith
  Cc: dev, Wu, Jingjing, Ananyev, Konstantin, Lu, Wenzhuo, Xing,
	Beilei, Zhang, Qi Z, Wang, Xiao W, Richardson, Bruce

On 03.09.2018 18:14, Wiles, Keith wrote:
> 
> 
>> On Aug 31, 2018, at 1:45 PM, Ilya Maximets <i.maximets@samsung.com> wrote:
>>
>> NICs uses different delays up to a second during their
>> configuration. It makes no sense to busy-wait so long wasting
>> CPU cycles and preventing any other threads to execute on the
>> same CPU core. These busy polling are the rudiments that came
>> from the kernel drivers where you can not sleep in interrupt
>> context, but as we're in userspace, we're able and should
>> sleep to allow other threads to run.
>> Delays never called on rx/tx path, so this should not affect
>> performance.
> 
> I have a question, the only thread being effected by the busy wait is the thread assigned to the core or the master lcore in the case of rte_eal_init(). It should not effect other threads running on other cores, right? If you do have other threads running in the same code then I see the need, please help me understand the use and how it is effecting DPDK.

Originally, this patch comes from this issue:
   http://mails.dpdk.org/archives/dev/2018-August/110640.html

non-EAL threads is one of the answers.
For example, main thread in OVS periodically checks the link statuses
and hangs there busy waiting on different NIC configuration operations.
Also, main OVS tread is responsible for port configuration and
re-configuration. There are few other in-dpdk threads like interrupts
handling thread (alarm handling thread). vhost_thread is working on
the same lcores and wants some time to work too.

In case of hyperthreading busy-waiting will significantly affect
threads on the siblings.

Best regards, Ilya Maximets.

>>
>> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
>> ---
>> drivers/net/avf/Makefile             | 1 +
>> drivers/net/avf/base/avf_osdep.h     | 4 ++--
>> drivers/net/e1000/Makefile           | 1 +
>> drivers/net/e1000/base/e1000_osdep.h | 2 +-
>> drivers/net/i40e/base/i40e_osdep.h   | 6 +++---
>> drivers/net/ifc/base/ifcvf_osdep.h   | 2 +-
>> drivers/net/ixgbe/base/ixgbe_osdep.h | 2 +-
>> 7 files changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/avf/Makefile b/drivers/net/avf/Makefile
>> index 3f815bbc4..8ee707529 100644
>> --- a/drivers/net/avf/Makefile
>> +++ b/drivers/net/avf/Makefile
>> @@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>> LIB = librte_pmd_avf.a
>>
>> CFLAGS += -O3
>> +CFLAGS += -DALLOW_EXPERIMENTAL_API
>> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
>> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
>> LDLIBS += -lrte_bus_pci
>> diff --git a/drivers/net/avf/base/avf_osdep.h b/drivers/net/avf/base/avf_osdep.h
>> index 9ef45968e..442a5acd0 100644
>> --- a/drivers/net/avf/base/avf_osdep.h
>> +++ b/drivers/net/avf/base/avf_osdep.h
>> @@ -93,8 +93,8 @@ typedef uint64_t        u64;
>> #define avf_memset(a, b, c, d) memset((a), (b), (c))
>> #define avf_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
>>
>> -#define avf_usec_delay(x) rte_delay_us(x)
>> -#define avf_msec_delay(x) rte_delay_us(1000*(x))
>> +#define avf_usec_delay(x) rte_delay_us_sleep(x)
>> +#define avf_msec_delay(x) avf_usec_delay(1000 * (x))
>>
>> #define AVF_PCI_REG(reg)		rte_read32(reg)
>> #define AVF_PCI_REG_ADDR(a, reg) \
>> diff --git a/drivers/net/e1000/Makefile b/drivers/net/e1000/Makefile
>> index 9c87e883b..0ed627656 100644
>> --- a/drivers/net/e1000/Makefile
>> +++ b/drivers/net/e1000/Makefile
>> @@ -10,6 +10,7 @@ LIB = librte_pmd_e1000.a
>>
>> CFLAGS += -O3
>> CFLAGS += $(WERROR_FLAGS)
>> +CFLAGS += -DALLOW_EXPERIMENTAL_API
>> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
>> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
>> LDLIBS += -lrte_bus_pci
>> diff --git a/drivers/net/e1000/base/e1000_osdep.h b/drivers/net/e1000/base/e1000_osdep.h
>> index b8868049f..5958ea157 100644
>> --- a/drivers/net/e1000/base/e1000_osdep.h
>> +++ b/drivers/net/e1000/base/e1000_osdep.h
>> @@ -48,7 +48,7 @@
>>
>> #include "../e1000_logs.h"
>>
>> -#define DELAY(x) rte_delay_us(x)
>> +#define DELAY(x) rte_delay_us_sleep(x)
>> #define usec_delay(x) DELAY(x)
>> #define usec_delay_irq(x) DELAY(x)
>> #define msec_delay(x) DELAY(1000*(x))
>> diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
>> index 8e5c593c9..a6072e153 100644
>> --- a/drivers/net/i40e/base/i40e_osdep.h
>> +++ b/drivers/net/i40e/base/i40e_osdep.h
>> @@ -233,9 +233,9 @@ struct i40e_spinlock {
>> #define i40e_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
>>
>> #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
>> -#define DELAY(x) rte_delay_us(x)
>> -#define i40e_usec_delay(x) rte_delay_us(x)
>> -#define i40e_msec_delay(x) rte_delay_us(1000*(x))
>> +#define DELAY(x) rte_delay_us_sleep(x)
>> +#define i40e_usec_delay(x) DELAY(x)
>> +#define i40e_msec_delay(x) DELAY(1000 * (x))
>> #define udelay(x) DELAY(x)
>> #define msleep(x) DELAY(1000*(x))
>> #define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000))
>> diff --git a/drivers/net/ifc/base/ifcvf_osdep.h b/drivers/net/ifc/base/ifcvf_osdep.h
>> index cf151ef52..6aef25ea4 100644
>> --- a/drivers/net/ifc/base/ifcvf_osdep.h
>> +++ b/drivers/net/ifc/base/ifcvf_osdep.h
>> @@ -17,7 +17,7 @@
>> #define DEBUGOUT(S, args...)    RTE_LOG(DEBUG, PMD, S, ##args)
>> #define STATIC                  static
>>
>> -#define msec_delay	rte_delay_ms
>> +#define msec_delay(x)	rte_delay_us_sleep(1000 * (x))
>>
>> #define IFCVF_READ_REG8(reg)		rte_read8(reg)
>> #define IFCVF_WRITE_REG8(val, reg)	rte_write8((val), (reg))
>> diff --git a/drivers/net/ixgbe/base/ixgbe_osdep.h b/drivers/net/ixgbe/base/ixgbe_osdep.h
>> index bb5dfd2af..94ede9bc2 100644
>> --- a/drivers/net/ixgbe/base/ixgbe_osdep.h
>> +++ b/drivers/net/ixgbe/base/ixgbe_osdep.h
>> @@ -51,7 +51,7 @@
>>
>> #define ASSERT(x) if(!(x)) rte_panic("IXGBE: x")
>>
>> -#define DELAY(x) rte_delay_us(x)
>> +#define DELAY(x) rte_delay_us_sleep(x)
>> #define usec_delay(x) DELAY(x)
>> #define msec_delay(x) DELAY(1000*(x))
>>
>> -- 
>> 2.17.1
>>
> 
> Regards,
> Keith
> 
> 
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC 2/2] drivers/net: use sleep delay by default for intel NICs
  2018-09-03 15:33       ` Ilya Maximets
@ 2018-09-03 15:39         ` Wiles, Keith
  2018-09-03 15:50           ` Ilya Maximets
  0 siblings, 1 reply; 7+ messages in thread
From: Wiles, Keith @ 2018-09-03 15:39 UTC (permalink / raw)
  To: Ilya Maximets
  Cc: dev, Wu, Jingjing, Ananyev, Konstantin, Lu, Wenzhuo, Xing,
	Beilei, Zhang, Qi Z, Wang, Xiao W, Richardson, Bruce



> On Sep 3, 2018, at 4:33 PM, Ilya Maximets <i.maximets@samsung.com> wrote:
> 
> On 03.09.2018 18:14, Wiles, Keith wrote:
>> 
>> 
>>> On Aug 31, 2018, at 1:45 PM, Ilya Maximets <i.maximets@samsung.com> wrote:
>>> 
>>> NICs uses different delays up to a second during their
>>> configuration. It makes no sense to busy-wait so long wasting
>>> CPU cycles and preventing any other threads to execute on the
>>> same CPU core. These busy polling are the rudiments that came
>>> from the kernel drivers where you can not sleep in interrupt
>>> context, but as we're in userspace, we're able and should
>>> sleep to allow other threads to run.
>>> Delays never called on rx/tx path, so this should not affect
>>> performance.
>> 
>> I have a question, the only thread being effected by the busy wait is the thread assigned to the core or the master lcore in the case of rte_eal_init(). It should not effect other threads running on other cores, right? If you do have other threads running in the same code then I see the need, please help me understand the use and how it is effecting DPDK.
> 
> Originally, this patch comes from this issue:
>   http://mails.dpdk.org/archives/dev/2018-August/110640.html
> 
> non-EAL threads is one of the answers.
> For example, main thread in OVS periodically checks the link statuses
> and hangs there busy waiting on different NIC configuration operations.

We have link state get no wait why is that not working instead of wait polling the line state?

> Also, main OVS tread is responsible for port configuration and
> re-configuration. There are few other in-dpdk threads like interrupts
> handling thread (alarm handling thread). vhost_thread is working on
> the same lcores and wants some time to work too.
> 
> In case of hyperthreading busy-waiting will significantly affect
> threads on the siblings.
> 
> Best regards, Ilya Maximets.
> 
>>> 
>>> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
>>> ---
>>> drivers/net/avf/Makefile             | 1 +
>>> drivers/net/avf/base/avf_osdep.h     | 4 ++--
>>> drivers/net/e1000/Makefile           | 1 +
>>> drivers/net/e1000/base/e1000_osdep.h | 2 +-
>>> drivers/net/i40e/base/i40e_osdep.h   | 6 +++---
>>> drivers/net/ifc/base/ifcvf_osdep.h   | 2 +-
>>> drivers/net/ixgbe/base/ixgbe_osdep.h | 2 +-
>>> 7 files changed, 10 insertions(+), 8 deletions(-)
>>> 
>>> diff --git a/drivers/net/avf/Makefile b/drivers/net/avf/Makefile
>>> index 3f815bbc4..8ee707529 100644
>>> --- a/drivers/net/avf/Makefile
>>> +++ b/drivers/net/avf/Makefile
>>> @@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>>> LIB = librte_pmd_avf.a
>>> 
>>> CFLAGS += -O3
>>> +CFLAGS += -DALLOW_EXPERIMENTAL_API
>>> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
>>> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
>>> LDLIBS += -lrte_bus_pci
>>> diff --git a/drivers/net/avf/base/avf_osdep.h b/drivers/net/avf/base/avf_osdep.h
>>> index 9ef45968e..442a5acd0 100644
>>> --- a/drivers/net/avf/base/avf_osdep.h
>>> +++ b/drivers/net/avf/base/avf_osdep.h
>>> @@ -93,8 +93,8 @@ typedef uint64_t        u64;
>>> #define avf_memset(a, b, c, d) memset((a), (b), (c))
>>> #define avf_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
>>> 
>>> -#define avf_usec_delay(x) rte_delay_us(x)
>>> -#define avf_msec_delay(x) rte_delay_us(1000*(x))
>>> +#define avf_usec_delay(x) rte_delay_us_sleep(x)
>>> +#define avf_msec_delay(x) avf_usec_delay(1000 * (x))
>>> 
>>> #define AVF_PCI_REG(reg)		rte_read32(reg)
>>> #define AVF_PCI_REG_ADDR(a, reg) \
>>> diff --git a/drivers/net/e1000/Makefile b/drivers/net/e1000/Makefile
>>> index 9c87e883b..0ed627656 100644
>>> --- a/drivers/net/e1000/Makefile
>>> +++ b/drivers/net/e1000/Makefile
>>> @@ -10,6 +10,7 @@ LIB = librte_pmd_e1000.a
>>> 
>>> CFLAGS += -O3
>>> CFLAGS += $(WERROR_FLAGS)
>>> +CFLAGS += -DALLOW_EXPERIMENTAL_API
>>> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
>>> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
>>> LDLIBS += -lrte_bus_pci
>>> diff --git a/drivers/net/e1000/base/e1000_osdep.h b/drivers/net/e1000/base/e1000_osdep.h
>>> index b8868049f..5958ea157 100644
>>> --- a/drivers/net/e1000/base/e1000_osdep.h
>>> +++ b/drivers/net/e1000/base/e1000_osdep.h
>>> @@ -48,7 +48,7 @@
>>> 
>>> #include "../e1000_logs.h"
>>> 
>>> -#define DELAY(x) rte_delay_us(x)
>>> +#define DELAY(x) rte_delay_us_sleep(x)
>>> #define usec_delay(x) DELAY(x)
>>> #define usec_delay_irq(x) DELAY(x)
>>> #define msec_delay(x) DELAY(1000*(x))
>>> diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
>>> index 8e5c593c9..a6072e153 100644
>>> --- a/drivers/net/i40e/base/i40e_osdep.h
>>> +++ b/drivers/net/i40e/base/i40e_osdep.h
>>> @@ -233,9 +233,9 @@ struct i40e_spinlock {
>>> #define i40e_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
>>> 
>>> #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
>>> -#define DELAY(x) rte_delay_us(x)
>>> -#define i40e_usec_delay(x) rte_delay_us(x)
>>> -#define i40e_msec_delay(x) rte_delay_us(1000*(x))
>>> +#define DELAY(x) rte_delay_us_sleep(x)
>>> +#define i40e_usec_delay(x) DELAY(x)
>>> +#define i40e_msec_delay(x) DELAY(1000 * (x))
>>> #define udelay(x) DELAY(x)
>>> #define msleep(x) DELAY(1000*(x))
>>> #define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000))
>>> diff --git a/drivers/net/ifc/base/ifcvf_osdep.h b/drivers/net/ifc/base/ifcvf_osdep.h
>>> index cf151ef52..6aef25ea4 100644
>>> --- a/drivers/net/ifc/base/ifcvf_osdep.h
>>> +++ b/drivers/net/ifc/base/ifcvf_osdep.h
>>> @@ -17,7 +17,7 @@
>>> #define DEBUGOUT(S, args...)    RTE_LOG(DEBUG, PMD, S, ##args)
>>> #define STATIC                  static
>>> 
>>> -#define msec_delay	rte_delay_ms
>>> +#define msec_delay(x)	rte_delay_us_sleep(1000 * (x))
>>> 
>>> #define IFCVF_READ_REG8(reg)		rte_read8(reg)
>>> #define IFCVF_WRITE_REG8(val, reg)	rte_write8((val), (reg))
>>> diff --git a/drivers/net/ixgbe/base/ixgbe_osdep.h b/drivers/net/ixgbe/base/ixgbe_osdep.h
>>> index bb5dfd2af..94ede9bc2 100644
>>> --- a/drivers/net/ixgbe/base/ixgbe_osdep.h
>>> +++ b/drivers/net/ixgbe/base/ixgbe_osdep.h
>>> @@ -51,7 +51,7 @@
>>> 
>>> #define ASSERT(x) if(!(x)) rte_panic("IXGBE: x")
>>> 
>>> -#define DELAY(x) rte_delay_us(x)
>>> +#define DELAY(x) rte_delay_us_sleep(x)
>>> #define usec_delay(x) DELAY(x)
>>> #define msec_delay(x) DELAY(1000*(x))
>>> 
>>> -- 
>>> 2.17.1
>>> 
>> 
>> Regards,
>> Keith

Regards,
Keith

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC 2/2] drivers/net: use sleep delay by default for intel NICs
  2018-09-03 15:39         ` Wiles, Keith
@ 2018-09-03 15:50           ` Ilya Maximets
  2018-09-03 16:04             ` Wiles, Keith
  0 siblings, 1 reply; 7+ messages in thread
From: Ilya Maximets @ 2018-09-03 15:50 UTC (permalink / raw)
  To: Wiles, Keith
  Cc: dev, Wu, Jingjing, Ananyev, Konstantin, Lu, Wenzhuo, Xing,
	Beilei, Zhang, Qi Z, Wang, Xiao W, Richardson, Bruce

On 03.09.2018 18:39, Wiles, Keith wrote:
> 
> 
>> On Sep 3, 2018, at 4:33 PM, Ilya Maximets <i.maximets@samsung.com> wrote:
>>
>> On 03.09.2018 18:14, Wiles, Keith wrote:
>>>
>>>
>>>> On Aug 31, 2018, at 1:45 PM, Ilya Maximets <i.maximets@samsung.com> wrote:
>>>>
>>>> NICs uses different delays up to a second during their
>>>> configuration. It makes no sense to busy-wait so long wasting
>>>> CPU cycles and preventing any other threads to execute on the
>>>> same CPU core. These busy polling are the rudiments that came
>>>> from the kernel drivers where you can not sleep in interrupt
>>>> context, but as we're in userspace, we're able and should
>>>> sleep to allow other threads to run.
>>>> Delays never called on rx/tx path, so this should not affect
>>>> performance.
>>>
>>> I have a question, the only thread being effected by the busy wait is the thread assigned to the core or the master lcore in the case of rte_eal_init(). It should not effect other threads running on other cores, right? If you do have other threads running in the same code then I see the need, please help me understand the use and how it is effecting DPDK.
>>
>> Originally, this patch comes from this issue:
>>   http://mails.dpdk.org/archives/dev/2018-August/110640.html
>>
>> non-EAL threads is one of the answers.
>> For example, main thread in OVS periodically checks the link statuses
>> and hangs there busy waiting on different NIC configuration operations.
> 
> We have link state get no wait why is that not working instead of wait polling the line state?

Yes, and OVS uses it, but ixgbe driver contains the workaround for a
fiber links configuration issue that calls ixgbe_setup_link, that
busy waits trying to configure multispeed fiber. You may found
details in the patch I mentioned. I moved this code to the separate
alarm thread, but it will eat CPU cycles anyway preventing others
from working.

> 
>> Also, main OVS tread is responsible for port configuration and
>> re-configuration. There are few other in-dpdk threads like interrupts
>> handling thread (alarm handling thread). vhost_thread is working on
>> the same lcores and wants some time to work too.
>>
>> In case of hyperthreading busy-waiting will significantly affect
>> threads on the siblings.
>>
>> Best regards, Ilya Maximets.
>>
>>>>
>>>> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
>>>> ---
>>>> drivers/net/avf/Makefile             | 1 +
>>>> drivers/net/avf/base/avf_osdep.h     | 4 ++--
>>>> drivers/net/e1000/Makefile           | 1 +
>>>> drivers/net/e1000/base/e1000_osdep.h | 2 +-
>>>> drivers/net/i40e/base/i40e_osdep.h   | 6 +++---
>>>> drivers/net/ifc/base/ifcvf_osdep.h   | 2 +-
>>>> drivers/net/ixgbe/base/ixgbe_osdep.h | 2 +-
>>>> 7 files changed, 10 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/net/avf/Makefile b/drivers/net/avf/Makefile
>>>> index 3f815bbc4..8ee707529 100644
>>>> --- a/drivers/net/avf/Makefile
>>>> +++ b/drivers/net/avf/Makefile
>>>> @@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>>>> LIB = librte_pmd_avf.a
>>>>
>>>> CFLAGS += -O3
>>>> +CFLAGS += -DALLOW_EXPERIMENTAL_API
>>>> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
>>>> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
>>>> LDLIBS += -lrte_bus_pci
>>>> diff --git a/drivers/net/avf/base/avf_osdep.h b/drivers/net/avf/base/avf_osdep.h
>>>> index 9ef45968e..442a5acd0 100644
>>>> --- a/drivers/net/avf/base/avf_osdep.h
>>>> +++ b/drivers/net/avf/base/avf_osdep.h
>>>> @@ -93,8 +93,8 @@ typedef uint64_t        u64;
>>>> #define avf_memset(a, b, c, d) memset((a), (b), (c))
>>>> #define avf_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
>>>>
>>>> -#define avf_usec_delay(x) rte_delay_us(x)
>>>> -#define avf_msec_delay(x) rte_delay_us(1000*(x))
>>>> +#define avf_usec_delay(x) rte_delay_us_sleep(x)
>>>> +#define avf_msec_delay(x) avf_usec_delay(1000 * (x))
>>>>
>>>> #define AVF_PCI_REG(reg)		rte_read32(reg)
>>>> #define AVF_PCI_REG_ADDR(a, reg) \
>>>> diff --git a/drivers/net/e1000/Makefile b/drivers/net/e1000/Makefile
>>>> index 9c87e883b..0ed627656 100644
>>>> --- a/drivers/net/e1000/Makefile
>>>> +++ b/drivers/net/e1000/Makefile
>>>> @@ -10,6 +10,7 @@ LIB = librte_pmd_e1000.a
>>>>
>>>> CFLAGS += -O3
>>>> CFLAGS += $(WERROR_FLAGS)
>>>> +CFLAGS += -DALLOW_EXPERIMENTAL_API
>>>> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
>>>> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
>>>> LDLIBS += -lrte_bus_pci
>>>> diff --git a/drivers/net/e1000/base/e1000_osdep.h b/drivers/net/e1000/base/e1000_osdep.h
>>>> index b8868049f..5958ea157 100644
>>>> --- a/drivers/net/e1000/base/e1000_osdep.h
>>>> +++ b/drivers/net/e1000/base/e1000_osdep.h
>>>> @@ -48,7 +48,7 @@
>>>>
>>>> #include "../e1000_logs.h"
>>>>
>>>> -#define DELAY(x) rte_delay_us(x)
>>>> +#define DELAY(x) rte_delay_us_sleep(x)
>>>> #define usec_delay(x) DELAY(x)
>>>> #define usec_delay_irq(x) DELAY(x)
>>>> #define msec_delay(x) DELAY(1000*(x))
>>>> diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
>>>> index 8e5c593c9..a6072e153 100644
>>>> --- a/drivers/net/i40e/base/i40e_osdep.h
>>>> +++ b/drivers/net/i40e/base/i40e_osdep.h
>>>> @@ -233,9 +233,9 @@ struct i40e_spinlock {
>>>> #define i40e_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
>>>>
>>>> #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
>>>> -#define DELAY(x) rte_delay_us(x)
>>>> -#define i40e_usec_delay(x) rte_delay_us(x)
>>>> -#define i40e_msec_delay(x) rte_delay_us(1000*(x))
>>>> +#define DELAY(x) rte_delay_us_sleep(x)
>>>> +#define i40e_usec_delay(x) DELAY(x)
>>>> +#define i40e_msec_delay(x) DELAY(1000 * (x))
>>>> #define udelay(x) DELAY(x)
>>>> #define msleep(x) DELAY(1000*(x))
>>>> #define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000))
>>>> diff --git a/drivers/net/ifc/base/ifcvf_osdep.h b/drivers/net/ifc/base/ifcvf_osdep.h
>>>> index cf151ef52..6aef25ea4 100644
>>>> --- a/drivers/net/ifc/base/ifcvf_osdep.h
>>>> +++ b/drivers/net/ifc/base/ifcvf_osdep.h
>>>> @@ -17,7 +17,7 @@
>>>> #define DEBUGOUT(S, args...)    RTE_LOG(DEBUG, PMD, S, ##args)
>>>> #define STATIC                  static
>>>>
>>>> -#define msec_delay	rte_delay_ms
>>>> +#define msec_delay(x)	rte_delay_us_sleep(1000 * (x))
>>>>
>>>> #define IFCVF_READ_REG8(reg)		rte_read8(reg)
>>>> #define IFCVF_WRITE_REG8(val, reg)	rte_write8((val), (reg))
>>>> diff --git a/drivers/net/ixgbe/base/ixgbe_osdep.h b/drivers/net/ixgbe/base/ixgbe_osdep.h
>>>> index bb5dfd2af..94ede9bc2 100644
>>>> --- a/drivers/net/ixgbe/base/ixgbe_osdep.h
>>>> +++ b/drivers/net/ixgbe/base/ixgbe_osdep.h
>>>> @@ -51,7 +51,7 @@
>>>>
>>>> #define ASSERT(x) if(!(x)) rte_panic("IXGBE: x")
>>>>
>>>> -#define DELAY(x) rte_delay_us(x)
>>>> +#define DELAY(x) rte_delay_us_sleep(x)
>>>> #define usec_delay(x) DELAY(x)
>>>> #define msec_delay(x) DELAY(1000*(x))
>>>>
>>>> -- 
>>>> 2.17.1
>>>>
>>>
>>> Regards,
>>> Keith
> 
> Regards,
> Keith
> 
> 
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC 2/2] drivers/net: use sleep delay by default for intel NICs
  2018-09-03 15:50           ` Ilya Maximets
@ 2018-09-03 16:04             ` Wiles, Keith
  0 siblings, 0 replies; 7+ messages in thread
From: Wiles, Keith @ 2018-09-03 16:04 UTC (permalink / raw)
  To: Ilya Maximets
  Cc: dev, Wu, Jingjing, Ananyev, Konstantin, Lu, Wenzhuo, Xing,
	Beilei, Zhang, Qi Z, Wang, Xiao W, Richardson, Bruce



> On Sep 3, 2018, at 4:50 PM, Ilya Maximets <i.maximets@samsung.com> wrote:
> 
> On 03.09.2018 18:39, Wiles, Keith wrote:
>> 
>> 
>>> On Sep 3, 2018, at 4:33 PM, Ilya Maximets <i.maximets@samsung.com> wrote:
>>> 
>>> On 03.09.2018 18:14, Wiles, Keith wrote:
>>>> 
>>>> 
>>>>> On Aug 31, 2018, at 1:45 PM, Ilya Maximets <i.maximets@samsung.com> wrote:
>>>>> 
>>>>> NICs uses different delays up to a second during their
>>>>> configuration. It makes no sense to busy-wait so long wasting
>>>>> CPU cycles and preventing any other threads to execute on the
>>>>> same CPU core. These busy polling are the rudiments that came
>>>>> from the kernel drivers where you can not sleep in interrupt
>>>>> context, but as we're in userspace, we're able and should
>>>>> sleep to allow other threads to run.
>>>>> Delays never called on rx/tx path, so this should not affect
>>>>> performance.
>>>> 
>>>> I have a question, the only thread being effected by the busy wait is the thread assigned to the core or the master lcore in the case of rte_eal_init(). It should not effect other threads running on other cores, right? If you do have other threads running in the same code then I see the need, please help me understand the use and how it is effecting DPDK.
>>> 
>>> Originally, this patch comes from this issue:
>>>  http://mails.dpdk.org/archives/dev/2018-August/110640.html
>>> 
>>> non-EAL threads is one of the answers.
>>> For example, main thread in OVS periodically checks the link statuses
>>> and hangs there busy waiting on different NIC configuration operations.
>> 
>> We have link state get no wait why is that not working instead of wait polling the line state?
> 
> Yes, and OVS uses it, but ixgbe driver contains the workaround for a
> fiber links configuration issue that calls ixgbe_setup_link, that
> busy waits trying to configure multispeed fiber. You may found
> details in the patch I mentioned. I moved this code to the separate
> alarm thread, but it will eat CPU cycles anyway preventing others
> from working.

OK. I see.

I think using nanosleep is ok as long as that works on all of the platforms FreeBSD, Linux, Windows will be coming I assume later. This should not be a show stopper unless it does not work on FreeBSD, but I am pretty it does as I do not have access to look.

I will look at the rest of the patch.

> 
>> 
>>> Also, main OVS tread is responsible for port configuration and
>>> re-configuration. There are few other in-dpdk threads like interrupts
>>> handling thread (alarm handling thread). vhost_thread is working on
>>> the same lcores and wants some time to work too.
>>> 
>>> In case of hyperthreading busy-waiting will significantly affect
>>> threads on the siblings.
>>> 
>>> Best regards, Ilya Maximets.
>>> 
>>>>> 
>>>>> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
>>>>> ---
>>>>> drivers/net/avf/Makefile             | 1 +
>>>>> drivers/net/avf/base/avf_osdep.h     | 4 ++--
>>>>> drivers/net/e1000/Makefile           | 1 +
>>>>> drivers/net/e1000/base/e1000_osdep.h | 2 +-
>>>>> drivers/net/i40e/base/i40e_osdep.h   | 6 +++---
>>>>> drivers/net/ifc/base/ifcvf_osdep.h   | 2 +-
>>>>> drivers/net/ixgbe/base/ixgbe_osdep.h | 2 +-
>>>>> 7 files changed, 10 insertions(+), 8 deletions(-)
>>>>> 
>>>>> diff --git a/drivers/net/avf/Makefile b/drivers/net/avf/Makefile
>>>>> index 3f815bbc4..8ee707529 100644
>>>>> --- a/drivers/net/avf/Makefile
>>>>> +++ b/drivers/net/avf/Makefile
>>>>> @@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>>>>> LIB = librte_pmd_avf.a
>>>>> 
>>>>> CFLAGS += -O3
>>>>> +CFLAGS += -DALLOW_EXPERIMENTAL_API
>>>>> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
>>>>> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
>>>>> LDLIBS += -lrte_bus_pci
>>>>> diff --git a/drivers/net/avf/base/avf_osdep.h b/drivers/net/avf/base/avf_osdep.h
>>>>> index 9ef45968e..442a5acd0 100644
>>>>> --- a/drivers/net/avf/base/avf_osdep.h
>>>>> +++ b/drivers/net/avf/base/avf_osdep.h
>>>>> @@ -93,8 +93,8 @@ typedef uint64_t        u64;
>>>>> #define avf_memset(a, b, c, d) memset((a), (b), (c))
>>>>> #define avf_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
>>>>> 
>>>>> -#define avf_usec_delay(x) rte_delay_us(x)
>>>>> -#define avf_msec_delay(x) rte_delay_us(1000*(x))
>>>>> +#define avf_usec_delay(x) rte_delay_us_sleep(x)
>>>>> +#define avf_msec_delay(x) avf_usec_delay(1000 * (x))
>>>>> 
>>>>> #define AVF_PCI_REG(reg)		rte_read32(reg)
>>>>> #define AVF_PCI_REG_ADDR(a, reg) \
>>>>> diff --git a/drivers/net/e1000/Makefile b/drivers/net/e1000/Makefile
>>>>> index 9c87e883b..0ed627656 100644
>>>>> --- a/drivers/net/e1000/Makefile
>>>>> +++ b/drivers/net/e1000/Makefile
>>>>> @@ -10,6 +10,7 @@ LIB = librte_pmd_e1000.a
>>>>> 
>>>>> CFLAGS += -O3
>>>>> CFLAGS += $(WERROR_FLAGS)
>>>>> +CFLAGS += -DALLOW_EXPERIMENTAL_API
>>>>> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
>>>>> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
>>>>> LDLIBS += -lrte_bus_pci
>>>>> diff --git a/drivers/net/e1000/base/e1000_osdep.h b/drivers/net/e1000/base/e1000_osdep.h
>>>>> index b8868049f..5958ea157 100644
>>>>> --- a/drivers/net/e1000/base/e1000_osdep.h
>>>>> +++ b/drivers/net/e1000/base/e1000_osdep.h
>>>>> @@ -48,7 +48,7 @@
>>>>> 
>>>>> #include "../e1000_logs.h"
>>>>> 
>>>>> -#define DELAY(x) rte_delay_us(x)
>>>>> +#define DELAY(x) rte_delay_us_sleep(x)
>>>>> #define usec_delay(x) DELAY(x)
>>>>> #define usec_delay_irq(x) DELAY(x)
>>>>> #define msec_delay(x) DELAY(1000*(x))
>>>>> diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
>>>>> index 8e5c593c9..a6072e153 100644
>>>>> --- a/drivers/net/i40e/base/i40e_osdep.h
>>>>> +++ b/drivers/net/i40e/base/i40e_osdep.h
>>>>> @@ -233,9 +233,9 @@ struct i40e_spinlock {
>>>>> #define i40e_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
>>>>> 
>>>>> #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
>>>>> -#define DELAY(x) rte_delay_us(x)
>>>>> -#define i40e_usec_delay(x) rte_delay_us(x)
>>>>> -#define i40e_msec_delay(x) rte_delay_us(1000*(x))
>>>>> +#define DELAY(x) rte_delay_us_sleep(x)
>>>>> +#define i40e_usec_delay(x) DELAY(x)
>>>>> +#define i40e_msec_delay(x) DELAY(1000 * (x))
>>>>> #define udelay(x) DELAY(x)
>>>>> #define msleep(x) DELAY(1000*(x))
>>>>> #define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000))
>>>>> diff --git a/drivers/net/ifc/base/ifcvf_osdep.h b/drivers/net/ifc/base/ifcvf_osdep.h
>>>>> index cf151ef52..6aef25ea4 100644
>>>>> --- a/drivers/net/ifc/base/ifcvf_osdep.h
>>>>> +++ b/drivers/net/ifc/base/ifcvf_osdep.h
>>>>> @@ -17,7 +17,7 @@
>>>>> #define DEBUGOUT(S, args...)    RTE_LOG(DEBUG, PMD, S, ##args)
>>>>> #define STATIC                  static
>>>>> 
>>>>> -#define msec_delay	rte_delay_ms
>>>>> +#define msec_delay(x)	rte_delay_us_sleep(1000 * (x))
>>>>> 
>>>>> #define IFCVF_READ_REG8(reg)		rte_read8(reg)
>>>>> #define IFCVF_WRITE_REG8(val, reg)	rte_write8((val), (reg))
>>>>> diff --git a/drivers/net/ixgbe/base/ixgbe_osdep.h b/drivers/net/ixgbe/base/ixgbe_osdep.h
>>>>> index bb5dfd2af..94ede9bc2 100644
>>>>> --- a/drivers/net/ixgbe/base/ixgbe_osdep.h
>>>>> +++ b/drivers/net/ixgbe/base/ixgbe_osdep.h
>>>>> @@ -51,7 +51,7 @@
>>>>> 
>>>>> #define ASSERT(x) if(!(x)) rte_panic("IXGBE: x")
>>>>> 
>>>>> -#define DELAY(x) rte_delay_us(x)
>>>>> +#define DELAY(x) rte_delay_us_sleep(x)
>>>>> #define usec_delay(x) DELAY(x)
>>>>> #define msec_delay(x) DELAY(1000*(x))
>>>>> 
>>>>> -- 
>>>>> 2.17.1
>>>>> 
>>>> 
>>>> Regards,
>>>> Keith
>> 
>> Regards,
>> Keith

Regards,
Keith

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-09-03 16:06 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20180831124517.27619-1-i.maximets@samsung.com>
     [not found] ` <CGME20180831124402eucas1p120ba1cdc8c3f6bc6c5bd09b5d3ede771@eucas1p1.samsung.com>
2018-08-31 12:45   ` [RFC 1/2] eal: add nanosleep based delay function Ilya Maximets
     [not found] ` <CGME20180831124404eucas1p20daff43600dfe450c9106616f886eab4@eucas1p2.samsung.com>
2018-08-31 12:45   ` [RFC 2/2] drivers/net: use sleep delay by default for intel NICs Ilya Maximets
2018-09-03 15:14     ` Wiles, Keith
2018-09-03 15:33       ` Ilya Maximets
2018-09-03 15:39         ` Wiles, Keith
2018-09-03 15:50           ` Ilya Maximets
2018-09-03 16:04             ` Wiles, Keith

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.