All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kni: fast data availability check in thread_single loop
@ 2016-12-29 23:23 Sergey Vyazmitinov
  2017-01-11 17:29 ` Ferruh Yigit
  0 siblings, 1 reply; 7+ messages in thread
From: Sergey Vyazmitinov @ 2016-12-29 23:23 UTC (permalink / raw)
  To: ferruh.yigit; +Cc: dev, Sergey Vyazmitinov

This allow to significant reduces packets processing latency.

Signed-off-by: Sergey Vyazmitinov <s.vyazmitinov@brain4net.com>
---
 .../linuxapp/eal/include/exec-env/rte_kni_common.h |  6 ++++
 lib/librte_eal/linuxapp/kni/kni_misc.c             | 33 ++++++++++++++++------
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 09713b0..8183a8e 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -109,6 +109,12 @@ struct rte_kni_fifo {
 	void *volatile buffer[];     /**< The buffer contains mbuf pointers */
 };
 
+static inline int
+kni_fifo_empty(struct rte_kni_fifo *fifo)
+{
+	return fifo->write == fifo->read;
+}
+
 /*
  * The kernel image of the rte_mbuf struct, with only the relevant fields.
  * Padding is necessary to assure the offsets of these fields
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 497db9b..4bf9bfa 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -45,6 +45,7 @@ MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION("Kernel Module for managing kni devices");
 
 #define KNI_RX_LOOP_NUM 1000
+#define KNI_RX_DATA_LOOP_NUM 2500
 
 #define KNI_MAX_DEVICES 32
 
@@ -129,25 +130,39 @@ static struct pernet_operations kni_net_ops = {
 #endif
 };
 
-static int
-kni_thread_single(void *data)
+static inline void
+kni_thread_single_rx_data_loop(struct kni_net *knet)
 {
-	struct kni_net *knet = data;
-	int j;
 	struct kni_dev *dev;
+	int i;
 
-	while (!kthread_should_stop()) {
-		down_read(&knet->kni_list_lock);
-		for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
-			list_for_each_entry(dev, &knet->kni_list_head, list) {
+	for (i = 0; i < KNI_RX_DATA_LOOP_NUM; ++i) {
+		list_for_each_entry(dev, &knet->kni_list_head, list) {
+			/* Burst dequeue from rx_q */
+			if (!kni_fifo_empty((struct rte_kni_fifo *)dev->rx_q)) {
 #ifdef RTE_KNI_VHOST
 				kni_chk_vhost_rx(dev);
 #else
 				kni_net_rx(dev);
 #endif
-				kni_net_poll_resp(dev);
 			}
 		}
+	}
+	list_for_each_entry(dev, &knet->kni_list_head, list) {
+		kni_net_poll_resp(dev);
+	}
+}
+
+static int
+kni_thread_single(void *data)
+{
+	struct kni_net *knet = data;
+	int j;
+
+	while (!kthread_should_stop()) {
+		down_read(&knet->kni_list_lock);
+		for (j = 0; j < KNI_RX_LOOP_NUM; j++)
+			kni_thread_single_rx_data_loop(knet);
 		up_read(&knet->kni_list_lock);
 #ifdef RTE_KNI_PREEMPT_DEFAULT
 		/* reschedule out for a while */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] kni: fast data availability check in thread_single loop
  2016-12-29 23:23 [PATCH] kni: fast data availability check in thread_single loop Sergey Vyazmitinov
@ 2017-01-11 17:29 ` Ferruh Yigit
  2017-01-18 11:05   ` Sergey Vyazmitinov
  0 siblings, 1 reply; 7+ messages in thread
From: Ferruh Yigit @ 2017-01-11 17:29 UTC (permalink / raw)
  To: Sergey Vyazmitinov; +Cc: dev

On 12/29/2016 11:23 PM, Sergey Vyazmitinov wrote:
> This allow to significant reduces packets processing latency.
> 
> Signed-off-by: Sergey Vyazmitinov <s.vyazmitinov@brain4net.com>
> ---
>  .../linuxapp/eal/include/exec-env/rte_kni_common.h |  6 ++++
>  lib/librte_eal/linuxapp/kni/kni_misc.c             | 33 ++++++++++++++++------
>  2 files changed, 30 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> index 09713b0..8183a8e 100644
> --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> @@ -109,6 +109,12 @@ struct rte_kni_fifo {
>  	void *volatile buffer[];     /**< The buffer contains mbuf pointers */
>  };
>  
> +static inline int
> +kni_fifo_empty(struct rte_kni_fifo *fifo)
> +{
> +	return fifo->write == fifo->read;
> +}
> +
>  /*
>   * The kernel image of the rte_mbuf struct, with only the relevant fields.
>   * Padding is necessary to assure the offsets of these fields
> diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c
> index 497db9b..4bf9bfa 100644
> --- a/lib/librte_eal/linuxapp/kni/kni_misc.c
> +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
> @@ -45,6 +45,7 @@ MODULE_AUTHOR("Intel Corporation");
>  MODULE_DESCRIPTION("Kernel Module for managing kni devices");
>  
>  #define KNI_RX_LOOP_NUM 1000
> +#define KNI_RX_DATA_LOOP_NUM 2500
>  
>  #define KNI_MAX_DEVICES 32
>  
> @@ -129,25 +130,39 @@ static struct pernet_operations kni_net_ops = {
>  #endif
>  };
>  
> -static int
> -kni_thread_single(void *data)
> +static inline void
> +kni_thread_single_rx_data_loop(struct kni_net *knet)
>  {
> -	struct kni_net *knet = data;
> -	int j;
>  	struct kni_dev *dev;
> +	int i;
>  
> -	while (!kthread_should_stop()) {
> -		down_read(&knet->kni_list_lock);
> -		for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
> -			list_for_each_entry(dev, &knet->kni_list_head, list) {
> +	for (i = 0; i < KNI_RX_DATA_LOOP_NUM; ++i) {

When there are multiple KNI interfaces, and lets assume there is traffic
too, this will behave like:

KNI1x2500 data_packets + KNI2x2500 data_packets .... KNI10x2500

After data packets, KNI1 resp_packet + KNI2 resp_packets ...

Won't this scenario also may cause latency? And perhaps jitter according
KNI interface traffic loads?

This may be good for some use cases, but not sure if this is good for all.

> +		list_for_each_entry(dev, &knet->kni_list_head, list) {
> +			/* Burst dequeue from rx_q */
> +			if (!kni_fifo_empty((struct rte_kni_fifo *)dev->rx_q)) {

Do we need this check, since first thing in kni_net_rx_normal() is
checking if there is item in the queue?

>  #ifdef RTE_KNI_VHOST
>  				kni_chk_vhost_rx(dev);
>  #else
>  				kni_net_rx(dev);
>  #endif
> -				kni_net_poll_resp(dev);
>  			}
>  		}
> +	}
> +	list_for_each_entry(dev, &knet->kni_list_head, list) {
> +		kni_net_poll_resp(dev);
> +	}
> +}
> +
> +static int
> +kni_thread_single(void *data)
> +{
> +	struct kni_net *knet = data;
> +	int j;
> +
> +	while (!kthread_should_stop()) {
> +		down_read(&knet->kni_list_lock);
> +		for (j = 0; j < KNI_RX_LOOP_NUM; j++)
> +			kni_thread_single_rx_data_loop(knet);
>  		up_read(&knet->kni_list_lock);
>  #ifdef RTE_KNI_PREEMPT_DEFAULT
>  		/* reschedule out for a while */
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] kni: fast data availability check in thread_single loop
  2017-01-11 17:29 ` Ferruh Yigit
@ 2017-01-18 11:05   ` Sergey Vyazmitinov
  2017-01-18 12:35     ` Ferruh Yigit
  2017-01-18 13:11     ` Jay Rolette
  0 siblings, 2 replies; 7+ messages in thread
From: Sergey Vyazmitinov @ 2017-01-18 11:05 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: dev

On Thu, Jan 12, 2017 at 12:29 AM, Ferruh Yigit <ferruh.yigit@intel.com>
 wrote:

> On 12/29/2016 11:23 PM, Sergey Vyazmitinov wrote:
> > This allow to significant reduces packets processing latency.
> >
> > Signed-off-by: Sergey Vyazmitinov <s.vyazmitinov@brain4net.com>
> > ---
> >  .../linuxapp/eal/include/exec-env/rte_kni_common.h |  6 ++++
> >  lib/librte_eal/linuxapp/kni/kni_misc.c             | 33
> ++++++++++++++++------
> >  2 files changed, 30 insertions(+), 9 deletions(-)
> >
> > diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> > index 09713b0..8183a8e 100644
> > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> > @@ -109,6 +109,12 @@ struct rte_kni_fifo {
> >       void *volatile buffer[];     /**< The buffer contains mbuf
> pointers */
> >  };
> >
> > +static inline int
> > +kni_fifo_empty(struct rte_kni_fifo *fifo)
> > +{
> > +     return fifo->write == fifo->read;
> > +}
> > +
> >  /*
> >   * The kernel image of the rte_mbuf struct, with only the relevant
> fields.
> >   * Padding is necessary to assure the offsets of these fields
> > diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c
> b/lib/librte_eal/linuxapp/kni/kni_misc.c
> > index 497db9b..4bf9bfa 100644
> > --- a/lib/librte_eal/linuxapp/kni/kni_misc.c
> > +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
> > @@ -45,6 +45,7 @@ MODULE_AUTHOR("Intel Corporation");
> >  MODULE_DESCRIPTION("Kernel Module for managing kni devices");
> >
> >  #define KNI_RX_LOOP_NUM 1000
> > +#define KNI_RX_DATA_LOOP_NUM 2500
> >
> >  #define KNI_MAX_DEVICES 32
> >
> > @@ -129,25 +130,39 @@ static struct pernet_operations kni_net_ops = {
> >  #endif
> >  };
> >
> > -static int
> > -kni_thread_single(void *data)
> > +static inline void
> > +kni_thread_single_rx_data_loop(struct kni_net *knet)
> >  {
> > -     struct kni_net *knet = data;
> > -     int j;
> >       struct kni_dev *dev;
> > +     int i;
> >
> > -     while (!kthread_should_stop()) {
> > -             down_read(&knet->kni_list_lock);
> > -             for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
> > -                     list_for_each_entry(dev, &knet->kni_list_head,
> list) {
> > +     for (i = 0; i < KNI_RX_DATA_LOOP_NUM; ++i) {
>
> When there are multiple KNI interfaces, and lets assume there is traffic
> too, this will behave like:
>
> KNI1x2500 data_packets + KNI2x2500 data_packets .... KNI10x2500
>
> After data packets, KNI1 resp_packet + KNI2 resp_packets ...
>
> Won't this scenario also may cause latency? And perhaps jitter according
> KNI interface traffic loads?
>
> This may be good for some use cases, but not sure if this is good for all.
>
We can decrease KNI_RX_DATA_LOOP_NUM to some reasonable value.
I can make test to find lower bound.
Also, the point is in fast check for a new data in interface rx queue.
May be will be better add some kind of break after several kni_net_rx calls.
Without them loop ends very quickly.
Anyway, this patch decrease average latency in my case from 4.5ms to
0.011ms in ping test with 100000 packets.

>
> > +             list_for_each_entry(dev, &knet->kni_list_head, list) {
> > +                     /* Burst dequeue from rx_q */
> > +                     if (!kni_fifo_empty((struct rte_kni_fifo
> *)dev->rx_q)) {
>
> Do we need this check, since first thing in kni_net_rx_normal() is
> checking if there is item in the queue?
>
> You right. Without that check latency is even less.

>  #ifdef RTE_KNI_VHOST
> >                               kni_chk_vhost_rx(dev);
> >  #else
> >                               kni_net_rx(dev);
> >  #endif
> > -                             kni_net_poll_resp(dev);
> >                       }
> >               }
> > +     }
> > +     list_for_each_entry(dev, &knet->kni_list_head, list) {
> > +             kni_net_poll_resp(dev);
> > +     }
> > +}
> > +
> > +static int
> > +kni_thread_single(void *data)
> > +{
> > +     struct kni_net *knet = data;
> > +     int j;
> > +
> > +     while (!kthread_should_stop()) {
> > +             down_read(&knet->kni_list_lock);
> > +             for (j = 0; j < KNI_RX_LOOP_NUM; j++)
> > +                     kni_thread_single_rx_data_loop(knet);
> >               up_read(&knet->kni_list_lock);
> >  #ifdef RTE_KNI_PREEMPT_DEFAULT
> >               /* reschedule out for a while */
> >
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] kni: fast data availability check in thread_single loop
  2017-01-18 11:05   ` Sergey Vyazmitinov
@ 2017-01-18 12:35     ` Ferruh Yigit
  2017-01-18 13:11     ` Jay Rolette
  1 sibling, 0 replies; 7+ messages in thread
From: Ferruh Yigit @ 2017-01-18 12:35 UTC (permalink / raw)
  To: Sergey Vyazmitinov; +Cc: dev

On 1/18/2017 11:05 AM, Sergey Vyazmitinov wrote:
> 
> 
> On Thu, Jan 12, 2017 at 12:29 AM, Ferruh Yigit <ferruh.yigit@intel.com
> <mailto:ferruh.yigit@intel.com>> wrote:
> 

<...>

>     &knet->kni_list_head, list) {
>     > +     for (i = 0; i < KNI_RX_DATA_LOOP_NUM; ++i) {
> 
>     When there are multiple KNI interfaces, and lets assume there is traffic
>     too, this will behave like:
> 
>     KNI1x2500 data_packets + KNI2x2500 data_packets .... KNI10x2500
> 
>     After data packets, KNI1 resp_packet + KNI2 resp_packets ...
> 
>     Won't this scenario also may cause latency? And perhaps jitter according
>     KNI interface traffic loads?
> 
>     This may be good for some use cases, but not sure if this is good
>     for all.
> 
> We can decrease KNI_RX_DATA_LOOP_NUM to some reasonable value.
> I can make test to find lower bound.

I believe the effect of these changes differ per use case, and load on
interfaces, may not work as well for everybody.

> Also, the point is in fast check for a new data in interface rx queue.
> May be will be better add some kind of break after several kni_net_rx calls.
> Without them loop ends very quickly.
> Anyway, this patch decrease average latency in my case from 4.5ms to 

4.5ms is too slow, can there be any other issue?

> 0.011ms in ping test with 100000 packets.

<...>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] kni: fast data availability check in thread_single loop
  2017-01-18 11:05   ` Sergey Vyazmitinov
  2017-01-18 12:35     ` Ferruh Yigit
@ 2017-01-18 13:11     ` Jay Rolette
  2017-03-10 12:59       ` Thomas Monjalon
  1 sibling, 1 reply; 7+ messages in thread
From: Jay Rolette @ 2017-01-18 13:11 UTC (permalink / raw)
  To: Sergey Vyazmitinov; +Cc: Ferruh Yigit, DPDK

On Wed, Jan 18, 2017 at 5:05 AM, Sergey Vyazmitinov <
s.vyazmitinov@brain4net.com> wrote:

> On Thu, Jan 12, 2017 at 12:29 AM, Ferruh Yigit <ferruh.yigit@intel.com>
>  wrote:
>
> > On 12/29/2016 11:23 PM, Sergey Vyazmitinov wrote:
> > > This allow to significant reduces packets processing latency.
> > >
> > > Signed-off-by: Sergey Vyazmitinov <s.vyazmitinov@brain4net.com>
> > > ---
> > >  .../linuxapp/eal/include/exec-env/rte_kni_common.h |  6 ++++
> > >  lib/librte_eal/linuxapp/kni/kni_misc.c             | 33
> > ++++++++++++++++------
> > >  2 files changed, 30 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/linuxapp/eal/
> include/exec-env/rte_kni_common.h
> > b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> > > index 09713b0..8183a8e 100644
> > > --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> > > +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
> > > @@ -109,6 +109,12 @@ struct rte_kni_fifo {
> > >       void *volatile buffer[];     /**< The buffer contains mbuf
> > pointers */
> > >  };
> > >
> > > +static inline int
> > > +kni_fifo_empty(struct rte_kni_fifo *fifo)
> > > +{
> > > +     return fifo->write == fifo->read;
> > > +}
> > > +
> > >  /*
> > >   * The kernel image of the rte_mbuf struct, with only the relevant
> > fields.
> > >   * Padding is necessary to assure the offsets of these fields
> > > diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c
> > b/lib/librte_eal/linuxapp/kni/kni_misc.c
> > > index 497db9b..4bf9bfa 100644
> > > --- a/lib/librte_eal/linuxapp/kni/kni_misc.c
> > > +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
> > > @@ -45,6 +45,7 @@ MODULE_AUTHOR("Intel Corporation");
> > >  MODULE_DESCRIPTION("Kernel Module for managing kni devices");
> > >
> > >  #define KNI_RX_LOOP_NUM 1000
> > > +#define KNI_RX_DATA_LOOP_NUM 2500
> > >
> > >  #define KNI_MAX_DEVICES 32
> > >
> > > @@ -129,25 +130,39 @@ static struct pernet_operations kni_net_ops = {
> > >  #endif
> > >  };
> > >
> > > -static int
> > > -kni_thread_single(void *data)
> > > +static inline void
> > > +kni_thread_single_rx_data_loop(struct kni_net *knet)
> > >  {
> > > -     struct kni_net *knet = data;
> > > -     int j;
> > >       struct kni_dev *dev;
> > > +     int i;
> > >
> > > -     while (!kthread_should_stop()) {
> > > -             down_read(&knet->kni_list_lock);
> > > -             for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
> > > -                     list_for_each_entry(dev, &knet->kni_list_head,
> > list) {
> > > +     for (i = 0; i < KNI_RX_DATA_LOOP_NUM; ++i) {
> >
> > When there are multiple KNI interfaces, and lets assume there is traffic
> > too, this will behave like:
> >
> > KNI1x2500 data_packets + KNI2x2500 data_packets .... KNI10x2500
> >
> > After data packets, KNI1 resp_packet + KNI2 resp_packets ...
> >
> > Won't this scenario also may cause latency? And perhaps jitter according
> > KNI interface traffic loads?
> >
> > This may be good for some use cases, but not sure if this is good for
> all.
> >
> We can decrease KNI_RX_DATA_LOOP_NUM to some reasonable value.
> I can make test to find lower bound.
> Also, the point is in fast check for a new data in interface rx queue.
> May be will be better add some kind of break after several kni_net_rx
> calls.
> Without them loop ends very quickly.
> Anyway, this patch decrease average latency in my case from 4.5ms to
> 0.011ms in ping test with 100000 packets.
>

If you were seeing latency of 4.5ms, then it is more likely a different
issue.

At the end of the loop where KNI is reading packets from the queue, it
calls *schedule_timeout_interruptible()* with (by default) a 5us timeout.
However, that call just guarantees that the thread will sleep for AT LEAST
5us.

For most x86 Linux distros, HZ = 250 in the kernel, which works out to 4ms.
I'm reasonably certain the latency you are seeing is because the KNI thread
is sleeping and not getting woken up like you might expect.

When you increased the number of loops happening before the sleep, you
increased how long KNI spends before it sleeps and it happened to be long
enough in your particular test to change your average latency. If you ran
your test for a few minutes and built a histogram of ping times, I bet
you'll see ~4ms of latency pop up regularly.

More details from when I dug into this behavior previously:
http://dpdk.org/ml/archives/dev/2015-June/018858.html

Jay



>
> >
> > > +             list_for_each_entry(dev, &knet->kni_list_head, list) {
> > > +                     /* Burst dequeue from rx_q */
> > > +                     if (!kni_fifo_empty((struct rte_kni_fifo
> > *)dev->rx_q)) {
> >
> > Do we need this check, since first thing in kni_net_rx_normal() is
> > checking if there is item in the queue?
> >
> > You right. Without that check latency is even less.
>
> >  #ifdef RTE_KNI_VHOST
> > >                               kni_chk_vhost_rx(dev);
> > >  #else
> > >                               kni_net_rx(dev);
> > >  #endif
> > > -                             kni_net_poll_resp(dev);
> > >                       }
> > >               }
> > > +     }
> > > +     list_for_each_entry(dev, &knet->kni_list_head, list) {
> > > +             kni_net_poll_resp(dev);
> > > +     }
> > > +}
> > > +
> > > +static int
> > > +kni_thread_single(void *data)
> > > +{
> > > +     struct kni_net *knet = data;
> > > +     int j;
> > > +
> > > +     while (!kthread_should_stop()) {
> > > +             down_read(&knet->kni_list_lock);
> > > +             for (j = 0; j < KNI_RX_LOOP_NUM; j++)
> > > +                     kni_thread_single_rx_data_loop(knet);
> > >               up_read(&knet->kni_list_lock);
> > >  #ifdef RTE_KNI_PREEMPT_DEFAULT
> > >               /* reschedule out for a while */
> > >
> >
> >
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] kni: fast data availability check in thread_single loop
  2017-01-18 13:11     ` Jay Rolette
@ 2017-03-10 12:59       ` Thomas Monjalon
  2017-03-10 13:16         ` Jay Rolette
  0 siblings, 1 reply; 7+ messages in thread
From: Thomas Monjalon @ 2017-03-10 12:59 UTC (permalink / raw)
  To: Jay Rolette, Sergey Vyazmitinov, Ferruh Yigit; +Cc: dev

2017-01-18 07:11, Jay Rolette:
> On Wed, Jan 18, 2017 at 5:05 AM, Sergey Vyazmitinov <
> s.vyazmitinov@brain4net.com> wrote:
> 
> > On Thu, Jan 12, 2017 at 12:29 AM, Ferruh Yigit <ferruh.yigit@intel.com>
> >  wrote:
> >
> > > On 12/29/2016 11:23 PM, Sergey Vyazmitinov wrote:
> > > > This allow to significant reduces packets processing latency.
> > > >
> > > > Signed-off-by: Sergey Vyazmitinov <s.vyazmitinov@brain4net.com>
[...]
> > > > --- a/lib/librte_eal/linuxapp/kni/kni_misc.c
> > > > +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
> > > > @@ -45,6 +45,7 @@ MODULE_AUTHOR("Intel Corporation");
> > > >  MODULE_DESCRIPTION("Kernel Module for managing kni devices");
> > > >
> > > >  #define KNI_RX_LOOP_NUM 1000
> > > > +#define KNI_RX_DATA_LOOP_NUM 2500
> > > >
> > > >  #define KNI_MAX_DEVICES 32
> > > >
> > > > @@ -129,25 +130,39 @@ static struct pernet_operations kni_net_ops = {
> > > >  #endif
> > > >  };
> > > >
> > > > -static int
> > > > -kni_thread_single(void *data)
> > > > +static inline void
> > > > +kni_thread_single_rx_data_loop(struct kni_net *knet)
> > > >  {
> > > > -     struct kni_net *knet = data;
> > > > -     int j;
> > > >       struct kni_dev *dev;
> > > > +     int i;
> > > >
> > > > -     while (!kthread_should_stop()) {
> > > > -             down_read(&knet->kni_list_lock);
> > > > -             for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
> > > > -                     list_for_each_entry(dev, &knet->kni_list_head,
> > > list) {
> > > > +     for (i = 0; i < KNI_RX_DATA_LOOP_NUM; ++i) {
> > >
> > > When there are multiple KNI interfaces, and lets assume there is traffic
> > > too, this will behave like:
> > >
> > > KNI1x2500 data_packets + KNI2x2500 data_packets .... KNI10x2500
> > >
> > > After data packets, KNI1 resp_packet + KNI2 resp_packets ...
> > >
> > > Won't this scenario also may cause latency? And perhaps jitter according
> > > KNI interface traffic loads?
> > >
> > > This may be good for some use cases, but not sure if this is good for
> > all.
> > >
> > We can decrease KNI_RX_DATA_LOOP_NUM to some reasonable value.
> > I can make test to find lower bound.
> > Also, the point is in fast check for a new data in interface rx queue.
> > May be will be better add some kind of break after several kni_net_rx
> > calls.
> > Without them loop ends very quickly.
> > Anyway, this patch decrease average latency in my case from 4.5ms to
> > 0.011ms in ping test with 100000 packets.
> >
> 
> If you were seeing latency of 4.5ms, then it is more likely a different
> issue.
> 
> At the end of the loop where KNI is reading packets from the queue, it
> calls *schedule_timeout_interruptible()* with (by default) a 5us timeout.
> However, that call just guarantees that the thread will sleep for AT LEAST
> 5us.
> 
> For most x86 Linux distros, HZ = 250 in the kernel, which works out to 4ms.
> I'm reasonably certain the latency you are seeing is because the KNI thread
> is sleeping and not getting woken up like you might expect.
> 
> When you increased the number of loops happening before the sleep, you
> increased how long KNI spends before it sleeps and it happened to be long
> enough in your particular test to change your average latency. If you ran
> your test for a few minutes and built a histogram of ping times, I bet
> you'll see ~4ms of latency pop up regularly.
> 
> More details from when I dug into this behavior previously:
> http://dpdk.org/ml/archives/dev/2015-June/018858.html

No answer in this discussion.
Should we close it in patchwork?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] kni: fast data availability check in thread_single loop
  2017-03-10 12:59       ` Thomas Monjalon
@ 2017-03-10 13:16         ` Jay Rolette
  0 siblings, 0 replies; 7+ messages in thread
From: Jay Rolette @ 2017-03-10 13:16 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Sergey Vyazmitinov, Ferruh Yigit, DPDK

On Fri, Mar 10, 2017 at 6:59 AM, Thomas Monjalon <thomas.monjalon@6wind.com>
wrote:

> 2017-01-18 07:11, Jay Rolette:
> > On Wed, Jan 18, 2017 at 5:05 AM, Sergey Vyazmitinov <
> > s.vyazmitinov@brain4net.com> wrote:
> >
> > > On Thu, Jan 12, 2017 at 12:29 AM, Ferruh Yigit <ferruh.yigit@intel.com
> >
> > >  wrote:
> > >
> > > > On 12/29/2016 11:23 PM, Sergey Vyazmitinov wrote:
> > > > > This allow to significant reduces packets processing latency.
> > > > >
> > > > > Signed-off-by: Sergey Vyazmitinov <s.vyazmitinov@brain4net.com>
> [...]
> > > > > --- a/lib/librte_eal/linuxapp/kni/kni_misc.c
> > > > > +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
> > > > > @@ -45,6 +45,7 @@ MODULE_AUTHOR("Intel Corporation");
> > > > >  MODULE_DESCRIPTION("Kernel Module for managing kni devices");
> > > > >
> > > > >  #define KNI_RX_LOOP_NUM 1000
> > > > > +#define KNI_RX_DATA_LOOP_NUM 2500
> > > > >
> > > > >  #define KNI_MAX_DEVICES 32
> > > > >
> > > > > @@ -129,25 +130,39 @@ static struct pernet_operations kni_net_ops
> = {
> > > > >  #endif
> > > > >  };
> > > > >
> > > > > -static int
> > > > > -kni_thread_single(void *data)
> > > > > +static inline void
> > > > > +kni_thread_single_rx_data_loop(struct kni_net *knet)
> > > > >  {
> > > > > -     struct kni_net *knet = data;
> > > > > -     int j;
> > > > >       struct kni_dev *dev;
> > > > > +     int i;
> > > > >
> > > > > -     while (!kthread_should_stop()) {
> > > > > -             down_read(&knet->kni_list_lock);
> > > > > -             for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
> > > > > -                     list_for_each_entry(dev,
> &knet->kni_list_head,
> > > > list) {
> > > > > +     for (i = 0; i < KNI_RX_DATA_LOOP_NUM; ++i) {
> > > >
> > > > When there are multiple KNI interfaces, and lets assume there is
> traffic
> > > > too, this will behave like:
> > > >
> > > > KNI1x2500 data_packets + KNI2x2500 data_packets .... KNI10x2500
> > > >
> > > > After data packets, KNI1 resp_packet + KNI2 resp_packets ...
> > > >
> > > > Won't this scenario also may cause latency? And perhaps jitter
> according
> > > > KNI interface traffic loads?
> > > >
> > > > This may be good for some use cases, but not sure if this is good for
> > > all.
> > > >
> > > We can decrease KNI_RX_DATA_LOOP_NUM to some reasonable value.
> > > I can make test to find lower bound.
> > > Also, the point is in fast check for a new data in interface rx queue.
> > > May be will be better add some kind of break after several kni_net_rx
> > > calls.
> > > Without them loop ends very quickly.
> > > Anyway, this patch decrease average latency in my case from 4.5ms to
> > > 0.011ms in ping test with 100000 packets.
> > >
> >
> > If you were seeing latency of 4.5ms, then it is more likely a different
> > issue.
> >
> > At the end of the loop where KNI is reading packets from the queue, it
> > calls *schedule_timeout_interruptible()* with (by default) a 5us
> timeout.
> > However, that call just guarantees that the thread will sleep for AT
> LEAST
> > 5us.
> >
> > For most x86 Linux distros, HZ = 250 in the kernel, which works out to
> 4ms.
> > I'm reasonably certain the latency you are seeing is because the KNI
> thread
> > is sleeping and not getting woken up like you might expect.
> >
> > When you increased the number of loops happening before the sleep, you
> > increased how long KNI spends before it sleeps and it happened to be long
> > enough in your particular test to change your average latency. If you ran
> > your test for a few minutes and built a histogram of ping times, I bet
> > you'll see ~4ms of latency pop up regularly.
> >
> > More details from when I dug into this behavior previously:
> > http://dpdk.org/ml/archives/dev/2015-June/018858.html
>
> No answer in this discussion.
> Should we close it in patchwork?
>

I don't believe we should merge the patch.

Jay

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-03-10 13:16 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-29 23:23 [PATCH] kni: fast data availability check in thread_single loop Sergey Vyazmitinov
2017-01-11 17:29 ` Ferruh Yigit
2017-01-18 11:05   ` Sergey Vyazmitinov
2017-01-18 12:35     ` Ferruh Yigit
2017-01-18 13:11     ` Jay Rolette
2017-03-10 12:59       ` Thomas Monjalon
2017-03-10 13:16         ` Jay Rolette

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.