All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] ipmr: remove cache_resolve_queue_len
@ 2019-09-03  8:43 Hangbin Liu
  2019-09-03  9:15 ` Nikolay Aleksandrov
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Hangbin Liu @ 2019-09-03  8:43 UTC (permalink / raw)
  To: netdev
  Cc: Phil Karn, Sukumar Gopalakrishnan, David S . Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI, Hangbin Liu

This is a re-post of previous patch wrote by David Miller[1].

Phil Karn reported[2] that on busy networks with lots of unresolved
multicast routing entries, the creation of new multicast group routes
can be extremely slow and unreliable.

The reason is we hard-coded multicast route entries with unresolved source
addresses(cache_resolve_queue_len) to 10. If some multicast route never
resolves and the unresolved source addresses increased, there will
be no ability to create new multicast route cache.

To resolve this issue, we need either add a sysctl entry to make the
cache_resolve_queue_len configurable, or just remove cache_resolve_queue_len
directly, as we already have the socket receive queue limits of mrouted
socket, pointed by David.

From my side, I'd perfer to remove the cache_resolve_queue_len instead
of creating two more(IPv4 and IPv6 version) sysctl entry.

[1] https://lkml.org/lkml/2018/7/22/11
[2] https://lkml.org/lkml/2018/7/21/343

Reported-by: Phil Karn <karn@ka9q.net>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 include/linux/mroute_base.h |  2 --
 net/ipv4/ipmr.c             | 27 ++++++++++++++++++---------
 net/ipv6/ip6mr.c            | 10 +++-------
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 34de06b426ef..50fb89533029 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -235,7 +235,6 @@ struct mr_table_ops {
  * @mfc_hash: Hash table of all resolved routes for easy lookup
  * @mfc_cache_list: list of resovled routes for possible traversal
  * @maxvif: Identifier of highest value vif currently in use
- * @cache_resolve_queue_len: current size of unresolved queue
  * @mroute_do_assert: Whether to inform userspace on wrong ingress
  * @mroute_do_pim: Whether to receive IGMP PIMv1
  * @mroute_reg_vif_num: PIM-device vif index
@@ -252,7 +251,6 @@ struct mr_table {
 	struct rhltable		mfc_hash;
 	struct list_head	mfc_cache_list;
 	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
 	bool			mroute_do_assert;
 	bool			mroute_do_pim;
 	bool			mroute_do_wrvifwhole;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c07bc82cbbe9..6c5278b45afb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -744,8 +744,6 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
 
-	atomic_dec(&mrt->cache_resolve_queue_len);
-
 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
@@ -1133,9 +1131,11 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 	}
 
 	if (!found) {
+		bool was_empty;
+
 		/* Create a new entry if allowable */
-		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
-		    (c = ipmr_cache_alloc_unres()) == NULL) {
+		c = ipmr_cache_alloc_unres();
+		if (!c) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -1161,11 +1161,11 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 			return err;
 		}
 
-		atomic_inc(&mrt->cache_resolve_queue_len);
+		was_empty = list_empty(&mrt->mfc_unres_queue);
 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
 		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
 
-		if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
+		if (was_empty)
 			mod_timer(&mrt->ipmr_expire_timer,
 				  c->_c.mfc_un.unres.expires);
 	}
@@ -1272,7 +1272,6 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 			list_del(&_uc->list);
-			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
 		}
@@ -1328,7 +1327,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags)
 	}
 
 	if (flags & MRT_FLUSH_MFC) {
-		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+		if (!list_empty(&mrt->mfc_unres_queue)) {
 			spin_lock_bh(&mfc_unres_lock);
 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 				list_del(&c->list);
@@ -2750,9 +2749,19 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return ipmr_mfc_delete(tbl, &mfcc, parent);
 }
 
+static int queue_count(struct mr_table *mrt)
+{
+	struct list_head *pos;
+	int count = 0;
+
+	list_for_each(pos, &mrt->mfc_unres_queue)
+		count++;
+	return count;
+}
+
 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
 {
-	u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
+	u32 queue_len = queue_count(mrt);
 
 	if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
 	    nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e80d36c5073d..d02f0f903559 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -768,8 +768,6 @@ static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 
-	atomic_dec(&mrt->cache_resolve_queue_len);
-
 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
@@ -1148,8 +1146,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
-		    (c = ip6mr_cache_alloc_unres()) == NULL) {
+		c = ip6mr_cache_alloc_unres();
+		if (!c) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -1176,7 +1174,6 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 			return err;
 		}
 
-		atomic_inc(&mrt->cache_resolve_queue_len);
 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 
@@ -1468,7 +1465,6 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
 			list_del(&_uc->list);
-			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
 		}
@@ -1526,7 +1522,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags)
 	}
 
 	if (flags & MRT6_FLUSH_MFC) {
-		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+		if (!list_empty(&mrt->mfc_unres_queue)) {
 			spin_lock_bh(&mfc_unres_lock);
 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 				list_del(&c->list);
-- 
2.19.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipmr: remove cache_resolve_queue_len
  2019-09-03  8:43 [PATCH net] ipmr: remove cache_resolve_queue_len Hangbin Liu
@ 2019-09-03  9:15 ` Nikolay Aleksandrov
  2019-09-03 12:55   ` Hangbin Liu
  2019-09-04  3:34 ` [PATCHv2 net-next] " Hangbin Liu
  2019-09-06  7:36 ` [PATCHv3 net-next] ipmr: remove hard code cache_resolve_queue_len limit Hangbin Liu
  2 siblings, 1 reply; 10+ messages in thread
From: Nikolay Aleksandrov @ 2019-09-03  9:15 UTC (permalink / raw)
  To: Hangbin Liu, netdev
  Cc: Phil Karn, Sukumar Gopalakrishnan, David S . Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI

On 03/09/2019 11:43, Hangbin Liu wrote:
> This is a re-post of previous patch wrote by David Miller[1].
> 
> Phil Karn reported[2] that on busy networks with lots of unresolved
> multicast routing entries, the creation of new multicast group routes
> can be extremely slow and unreliable.
> 
> The reason is we hard-coded multicast route entries with unresolved source
> addresses(cache_resolve_queue_len) to 10. If some multicast route never
> resolves and the unresolved source addresses increased, there will
> be no ability to create new multicast route cache.
> 
> To resolve this issue, we need either add a sysctl entry to make the
> cache_resolve_queue_len configurable, or just remove cache_resolve_queue_len
> directly, as we already have the socket receive queue limits of mrouted
> socket, pointed by David.
> 
> From my side, I'd perfer to remove the cache_resolve_queue_len instead
> of creating two more(IPv4 and IPv6 version) sysctl entry.
> 
> [1] https://lkml.org/lkml/2018/7/22/11
> [2] https://lkml.org/lkml/2018/7/21/343
> 
> Reported-by: Phil Karn <karn@ka9q.net>
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
>  include/linux/mroute_base.h |  2 --
>  net/ipv4/ipmr.c             | 27 ++++++++++++++++++---------
>  net/ipv6/ip6mr.c            | 10 +++-------
>  3 files changed, 21 insertions(+), 18 deletions(-)
> 

Hi,
IMO this is definitely net-next material. A few more comments below.

Note that hosts will automatically have this limit lifted to about 270
entries with current defaults, some might be surprised if they have
higher limits set and could be left with queues allowing for thousands
of entries in a linked list.

> diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
> index 34de06b426ef..50fb89533029 100644
> --- a/include/linux/mroute_base.h
> +++ b/include/linux/mroute_base.h
> @@ -235,7 +235,6 @@ struct mr_table_ops {
>   * @mfc_hash: Hash table of all resolved routes for easy lookup
>   * @mfc_cache_list: list of resovled routes for possible traversal
>   * @maxvif: Identifier of highest value vif currently in use
> - * @cache_resolve_queue_len: current size of unresolved queue
>   * @mroute_do_assert: Whether to inform userspace on wrong ingress
>   * @mroute_do_pim: Whether to receive IGMP PIMv1
>   * @mroute_reg_vif_num: PIM-device vif index
> @@ -252,7 +251,6 @@ struct mr_table {
>  	struct rhltable		mfc_hash;
>  	struct list_head	mfc_cache_list;
>  	int			maxvif;
> -	atomic_t		cache_resolve_queue_len;
>  	bool			mroute_do_assert;
>  	bool			mroute_do_pim;
>  	bool			mroute_do_wrvifwhole;
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index c07bc82cbbe9..6c5278b45afb 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -744,8 +744,6 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
>  	struct sk_buff *skb;
>  	struct nlmsgerr *e;
>  
> -	atomic_dec(&mrt->cache_resolve_queue_len);
> -
>  	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
>  		if (ip_hdr(skb)->version == 0) {
>  			struct nlmsghdr *nlh = skb_pull(skb,
> @@ -1133,9 +1131,11 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
>  	}
>  
>  	if (!found) {
> +		bool was_empty;
> +
>  		/* Create a new entry if allowable */
> -		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
> -		    (c = ipmr_cache_alloc_unres()) == NULL) {
> +		c = ipmr_cache_alloc_unres();
> +		if (!c) {
>  			spin_unlock_bh(&mfc_unres_lock);
>  
>  			kfree_skb(skb);
> @@ -1161,11 +1161,11 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
>  			return err;
>  		}
>  
> -		atomic_inc(&mrt->cache_resolve_queue_len);
> +		was_empty = list_empty(&mrt->mfc_unres_queue);
>  		list_add(&c->_c.list, &mrt->mfc_unres_queue);
>  		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
>  
> -		if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
> +		if (was_empty)
>  			mod_timer(&mrt->ipmr_expire_timer,
>  				  c->_c.mfc_un.unres.expires);
>  	}
> @@ -1272,7 +1272,6 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
>  		if (uc->mfc_origin == c->mfc_origin &&
>  		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
>  			list_del(&_uc->list);
> -			atomic_dec(&mrt->cache_resolve_queue_len);
>  			found = true;
>  			break;
>  		}
> @@ -1328,7 +1327,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags)
>  	}
>  
>  	if (flags & MRT_FLUSH_MFC) {
> -		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
> +		if (!list_empty(&mrt->mfc_unres_queue)) {
>  			spin_lock_bh(&mfc_unres_lock);
>  			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
>  				list_del(&c->list);
> @@ -2750,9 +2749,19 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
>  		return ipmr_mfc_delete(tbl, &mfcc, parent);
>  }
>  
> +static int queue_count(struct mr_table *mrt)
> +{
> +	struct list_head *pos;
> +	int count = 0;
> +
> +	list_for_each(pos, &mrt->mfc_unres_queue)
> +		count++;
> +	return count;
> +}

I don't think we hold the mfc_unres_lock here while walking
the unresolved list below in ipmr_fill_table().

> +
>  static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
>  {
> -	u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
> +	u32 queue_len = queue_count(mrt);
>  
>  	if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
>  	    nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
> diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
> index e80d36c5073d..d02f0f903559 100644
> --- a/net/ipv6/ip6mr.c
> +++ b/net/ipv6/ip6mr.c
> @@ -768,8 +768,6 @@ static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
>  	struct net *net = read_pnet(&mrt->net);
>  	struct sk_buff *skb;
>  
> -	atomic_dec(&mrt->cache_resolve_queue_len);
> -
>  	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
>  		if (ipv6_hdr(skb)->version == 0) {
>  			struct nlmsghdr *nlh = skb_pull(skb,
> @@ -1148,8 +1146,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
>  		 *	Create a new entry if allowable
>  		 */
>  
> -		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
> -		    (c = ip6mr_cache_alloc_unres()) == NULL) {
> +		c = ip6mr_cache_alloc_unres();
> +		if (!c) {
>  			spin_unlock_bh(&mfc_unres_lock);
>  
>  			kfree_skb(skb);
> @@ -1176,7 +1174,6 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
>  			return err;
>  		}
>  
> -		atomic_inc(&mrt->cache_resolve_queue_len);
>  		list_add(&c->_c.list, &mrt->mfc_unres_queue);
>  		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
>  
> @@ -1468,7 +1465,6 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
>  		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
>  		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
>  			list_del(&_uc->list);
> -			atomic_dec(&mrt->cache_resolve_queue_len);
>  			found = true;
>  			break;
>  		}
> @@ -1526,7 +1522,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags)
>  	}
>  
>  	if (flags & MRT6_FLUSH_MFC) {
> -		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
> +		if (!list_empty(&mrt->mfc_unres_queue)) {
>  			spin_lock_bh(&mfc_unres_lock);
>  			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
>  				list_del(&c->list);
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipmr: remove cache_resolve_queue_len
  2019-09-03  9:15 ` Nikolay Aleksandrov
@ 2019-09-03 12:55   ` Hangbin Liu
  2019-09-03 13:18     ` Nikolay Aleksandrov
  0 siblings, 1 reply; 10+ messages in thread
From: Hangbin Liu @ 2019-09-03 12:55 UTC (permalink / raw)
  To: Nikolay Aleksandrov
  Cc: netdev, Phil Karn, Sukumar Gopalakrishnan, David S . Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI

Hi Nikolay,

Thanks for the feedback, see comments below.

On Tue, Sep 03, 2019 at 12:15:34PM +0300, Nikolay Aleksandrov wrote:
> On 03/09/2019 11:43, Hangbin Liu wrote:
> > This is a re-post of previous patch wrote by David Miller[1].
> > 
> > Phil Karn reported[2] that on busy networks with lots of unresolved
> > multicast routing entries, the creation of new multicast group routes
> > can be extremely slow and unreliable.
> > 
> > The reason is we hard-coded multicast route entries with unresolved source
> > addresses(cache_resolve_queue_len) to 10. If some multicast route never
> > resolves and the unresolved source addresses increased, there will
> > be no ability to create new multicast route cache.
> > 
> > To resolve this issue, we need either add a sysctl entry to make the
> > cache_resolve_queue_len configurable, or just remove cache_resolve_queue_len
> > directly, as we already have the socket receive queue limits of mrouted
> > socket, pointed by David.
> > 
> > From my side, I'd perfer to remove the cache_resolve_queue_len instead
> > of creating two more(IPv4 and IPv6 version) sysctl entry.
> > 
> > [1] https://lkml.org/lkml/2018/7/22/11
> > [2] https://lkml.org/lkml/2018/7/21/343
> > 
> > Reported-by: Phil Karn <karn@ka9q.net>
> > Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> > ---
> >  include/linux/mroute_base.h |  2 --
> >  net/ipv4/ipmr.c             | 27 ++++++++++++++++++---------
> >  net/ipv6/ip6mr.c            | 10 +++-------
> >  3 files changed, 21 insertions(+), 18 deletions(-)
> > 
> 
> Hi,
> IMO this is definitely net-next material. A few more comments below.

I thoug this is a bug fix. But it looks more suitable to net-next as you said.
> 
> Note that hosts will automatically have this limit lifted to about 270
> entries with current defaults, some might be surprised if they have
> higher limits set and could be left with queues allowing for thousands
> of entries in a linked list.

I think this is just a cache list and should be expired soon. The cache
creation would also failed if there is no buffer.

But if you like, I can write a patch with sysctl parameter.
> 
> > +static int queue_count(struct mr_table *mrt)
> > +{
> > +	struct list_head *pos;
> > +	int count = 0;
> > +
> > +	list_for_each(pos, &mrt->mfc_unres_queue)
> > +		count++;
> > +	return count;
> > +}
> 
> I don't think we hold the mfc_unres_lock here while walking
> the unresolved list below in ipmr_fill_table().

ah, yes, I will fix this.

Thanks
Hangbin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipmr: remove cache_resolve_queue_len
  2019-09-03 12:55   ` Hangbin Liu
@ 2019-09-03 13:18     ` Nikolay Aleksandrov
  0 siblings, 0 replies; 10+ messages in thread
From: Nikolay Aleksandrov @ 2019-09-03 13:18 UTC (permalink / raw)
  To: Hangbin Liu
  Cc: netdev, Phil Karn, Sukumar Gopalakrishnan, David S . Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI

On 03/09/2019 15:55, Hangbin Liu wrote:
> Hi Nikolay,
> 
> Thanks for the feedback, see comments below.
> 
> On Tue, Sep 03, 2019 at 12:15:34PM +0300, Nikolay Aleksandrov wrote:
>> On 03/09/2019 11:43, Hangbin Liu wrote:
>>> This is a re-post of previous patch wrote by David Miller[1].
>>>
>>> Phil Karn reported[2] that on busy networks with lots of unresolved
>>> multicast routing entries, the creation of new multicast group routes
>>> can be extremely slow and unreliable.
>>>
>>> The reason is we hard-coded multicast route entries with unresolved source
>>> addresses(cache_resolve_queue_len) to 10. If some multicast route never
>>> resolves and the unresolved source addresses increased, there will
>>> be no ability to create new multicast route cache.
>>>
>>> To resolve this issue, we need either add a sysctl entry to make the
>>> cache_resolve_queue_len configurable, or just remove cache_resolve_queue_len
>>> directly, as we already have the socket receive queue limits of mrouted
>>> socket, pointed by David.
>>>
>>> From my side, I'd perfer to remove the cache_resolve_queue_len instead
>>> of creating two more(IPv4 and IPv6 version) sysctl entry.
>>>
>>> [1] https://lkml.org/lkml/2018/7/22/11
>>> [2] https://lkml.org/lkml/2018/7/21/343
>>>
>>> Reported-by: Phil Karn <karn@ka9q.net>
>>> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
>>> ---
>>>  include/linux/mroute_base.h |  2 --
>>>  net/ipv4/ipmr.c             | 27 ++++++++++++++++++---------
>>>  net/ipv6/ip6mr.c            | 10 +++-------
>>>  3 files changed, 21 insertions(+), 18 deletions(-)
>>>
>>
>> Hi,
>> IMO this is definitely net-next material. A few more comments below.
> 
> I thoug this is a bug fix. But it looks more suitable to net-next as you said.
>>
>> Note that hosts will automatically have this limit lifted to about 270
>> entries with current defaults, some might be surprised if they have
>> higher limits set and could be left with queues allowing for thousands
>> of entries in a linked list.
> 
> I think this is just a cache list and should be expired soon. The cache
> creation would also failed if there is no buffer.
> 
> But if you like, I can write a patch with sysctl parameter.

I wasn't suggesting the sysctl, I'm actually against it as I've said before when
commenting these patches. Just wanted to note that some setups will end up with
the possibility of having thousands of entries in a linked list after updating. Unless
we change the data structure I guess we just have to live with it, I wouldn't want to
add a sysctl or another hard limit that will have to be removed after a while. In the
long run we might update the algorithm used for unres entries.

So to make it clear: I'm fine with this patch as it is, definitely an improvement IMO.

>>
>>> +static int queue_count(struct mr_table *mrt)
>>> +{
>>> +	struct list_head *pos;
>>> +	int count = 0;
>>> +
>>> +	list_for_each(pos, &mrt->mfc_unres_queue)
>>> +		count++;
>>> +	return count;
>>> +}
>>
>> I don't think we hold the mfc_unres_lock here while walking
>> the unresolved list below in ipmr_fill_table().
> 
> ah, yes, I will fix this.
> 
> Thanks
> Hangbin
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCHv2 net-next] ipmr: remove cache_resolve_queue_len
  2019-09-03  8:43 [PATCH net] ipmr: remove cache_resolve_queue_len Hangbin Liu
  2019-09-03  9:15 ` Nikolay Aleksandrov
@ 2019-09-04  3:34 ` Hangbin Liu
  2019-09-04  7:50   ` Eric Dumazet
  2019-09-06  7:36 ` [PATCHv3 net-next] ipmr: remove hard code cache_resolve_queue_len limit Hangbin Liu
  2 siblings, 1 reply; 10+ messages in thread
From: Hangbin Liu @ 2019-09-04  3:34 UTC (permalink / raw)
  To: netdev
  Cc: Phil Karn, Sukumar Gopalakrishnan, David S . Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI, Hangbin Liu

This is a re-post of previous patch wrote by David Miller[1].

Phil Karn reported[2] that on busy networks with lots of unresolved
multicast routing entries, the creation of new multicast group routes
can be extremely slow and unreliable.

The reason is we hard-coded multicast route entries with unresolved source
addresses(cache_resolve_queue_len) to 10. If some multicast route never
resolves and the unresolved source addresses increased, there will
be no ability to create new multicast route cache.

To resolve this issue, we need either add a sysctl entry to make the
cache_resolve_queue_len configurable, or just remove cache_resolve_queue_len
directly, as we already have the socket receive queue limits of mrouted
socket, pointed by David.

From my side, I'd perfer to remove the cache_resolve_queue_len instead
of creating two more(IPv4 and IPv6 version) sysctl entry.

[1] https://lkml.org/lkml/2018/7/22/11
[2] https://lkml.org/lkml/2018/7/21/343

v2: hold the mfc_unres_lock while walking the unresolved list in
queue_count(), as Nikolay Aleksandrov remind.

Reported-by: Phil Karn <karn@ka9q.net>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 include/linux/mroute_base.h |  2 --
 net/ipv4/ipmr.c             | 30 +++++++++++++++++++++---------
 net/ipv6/ip6mr.c            | 10 +++-------
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 34de06b426ef..50fb89533029 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -235,7 +235,6 @@ struct mr_table_ops {
  * @mfc_hash: Hash table of all resolved routes for easy lookup
  * @mfc_cache_list: list of resovled routes for possible traversal
  * @maxvif: Identifier of highest value vif currently in use
- * @cache_resolve_queue_len: current size of unresolved queue
  * @mroute_do_assert: Whether to inform userspace on wrong ingress
  * @mroute_do_pim: Whether to receive IGMP PIMv1
  * @mroute_reg_vif_num: PIM-device vif index
@@ -252,7 +251,6 @@ struct mr_table {
 	struct rhltable		mfc_hash;
 	struct list_head	mfc_cache_list;
 	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
 	bool			mroute_do_assert;
 	bool			mroute_do_pim;
 	bool			mroute_do_wrvifwhole;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c07bc82cbbe9..4d67c64d94a4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -744,8 +744,6 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
 
-	atomic_dec(&mrt->cache_resolve_queue_len);
-
 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
@@ -1133,9 +1131,11 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 	}
 
 	if (!found) {
+		bool was_empty;
+
 		/* Create a new entry if allowable */
-		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
-		    (c = ipmr_cache_alloc_unres()) == NULL) {
+		c = ipmr_cache_alloc_unres();
+		if (!c) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -1161,11 +1161,11 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 			return err;
 		}
 
-		atomic_inc(&mrt->cache_resolve_queue_len);
+		was_empty = list_empty(&mrt->mfc_unres_queue);
 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
 		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
 
-		if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
+		if (was_empty)
 			mod_timer(&mrt->ipmr_expire_timer,
 				  c->_c.mfc_un.unres.expires);
 	}
@@ -1272,7 +1272,6 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 			list_del(&_uc->list);
-			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
 		}
@@ -1328,7 +1327,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags)
 	}
 
 	if (flags & MRT_FLUSH_MFC) {
-		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+		if (!list_empty(&mrt->mfc_unres_queue)) {
 			spin_lock_bh(&mfc_unres_lock);
 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 				list_del(&c->list);
@@ -2750,9 +2749,22 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return ipmr_mfc_delete(tbl, &mfcc, parent);
 }
 
+static int queue_count(struct mr_table *mrt)
+{
+	struct list_head *pos;
+	int count = 0;
+
+	spin_lock_bh(&mfc_unres_lock);
+	list_for_each(pos, &mrt->mfc_unres_queue)
+		count++;
+	spin_unlock_bh(&mfc_unres_lock);
+
+	return count;
+}
+
 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
 {
-	u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
+	u32 queue_len = queue_count(mrt);
 
 	if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
 	    nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e80d36c5073d..d02f0f903559 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -768,8 +768,6 @@ static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 
-	atomic_dec(&mrt->cache_resolve_queue_len);
-
 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
@@ -1148,8 +1146,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
-		    (c = ip6mr_cache_alloc_unres()) == NULL) {
+		c = ip6mr_cache_alloc_unres();
+		if (!c) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -1176,7 +1174,6 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 			return err;
 		}
 
-		atomic_inc(&mrt->cache_resolve_queue_len);
 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 
@@ -1468,7 +1465,6 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
 			list_del(&_uc->list);
-			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
 		}
@@ -1526,7 +1522,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags)
 	}
 
 	if (flags & MRT6_FLUSH_MFC) {
-		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+		if (!list_empty(&mrt->mfc_unres_queue)) {
 			spin_lock_bh(&mfc_unres_lock);
 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 				list_del(&c->list);
-- 
2.19.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCHv2 net-next] ipmr: remove cache_resolve_queue_len
  2019-09-04  3:34 ` [PATCHv2 net-next] " Hangbin Liu
@ 2019-09-04  7:50   ` Eric Dumazet
  2019-09-05  3:37     ` Hangbin Liu
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2019-09-04  7:50 UTC (permalink / raw)
  To: Hangbin Liu, netdev
  Cc: Phil Karn, Sukumar Gopalakrishnan, David S . Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI



On 9/4/19 5:34 AM, Hangbin Liu wrote:
> This is a re-post of previous patch wrote by David Miller[1].
> 
> Phil Karn reported[2] that on busy networks with lots of unresolved
> multicast routing entries, the creation of new multicast group routes
> can be extremely slow and unreliable.
> 
> The reason is we hard-coded multicast route entries with unresolved source
> addresses(cache_resolve_queue_len) to 10. If some multicast route never
> resolves and the unresolved source addresses increased, there will
> be no ability to create new multicast route cache.
> 
> To resolve this issue, we need either add a sysctl entry to make the
> cache_resolve_queue_len configurable, or just remove cache_resolve_queue_len
> directly, as we already have the socket receive queue limits of mrouted
> socket, pointed by David.
> 
> From my side, I'd perfer to remove the cache_resolve_queue_len instead
> of creating two more(IPv4 and IPv6 version) sysctl entry.

> +static int queue_count(struct mr_table *mrt)
> +{
> +	struct list_head *pos;
> +	int count = 0;
> +
> +	spin_lock_bh(&mfc_unres_lock);
> +	list_for_each(pos, &mrt->mfc_unres_queue)
> +		count++;
> +	spin_unlock_bh(&mfc_unres_lock);
> +
> +	return count;
> +}

I guess that even if we remove a limit on the number of items, we probably should
keep the atomic counter (no code churn, patch much easier to review...)

Your patch could be a one liner really [1]

Eventually replacing this linear list with an RB-tree, so that we can be on the safe side.

[1]
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c07bc82cbbe96d53d05c1665b2f03faa055f1084..313470f6bb148326b4afbc00d265b6a1e40d93bd 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1134,8 +1134,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 
        if (!found) {
                /* Create a new entry if allowable */
-               if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
-                   (c = ipmr_cache_alloc_unres()) == NULL) {
+               c = ipmr_cache_alloc_unres();
+               if (!c) {
                        spin_unlock_bh(&mfc_unres_lock);
 
                        kfree_skb(skb);

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCHv2 net-next] ipmr: remove cache_resolve_queue_len
  2019-09-04  7:50   ` Eric Dumazet
@ 2019-09-05  3:37     ` Hangbin Liu
  0 siblings, 0 replies; 10+ messages in thread
From: Hangbin Liu @ 2019-09-05  3:37 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, Phil Karn, Sukumar Gopalakrishnan, David S . Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI

On Wed, Sep 04, 2019 at 09:50:15AM +0200, Eric Dumazet wrote:
> > +static int queue_count(struct mr_table *mrt)
> > +{
> > +	struct list_head *pos;
> > +	int count = 0;
> > +
> > +	spin_lock_bh(&mfc_unres_lock);
> > +	list_for_each(pos, &mrt->mfc_unres_queue)
> > +		count++;
> > +	spin_unlock_bh(&mfc_unres_lock);
> > +
> > +	return count;
> > +}
> 
> I guess that even if we remove a limit on the number of items, we probably should
> keep the atomic counter (no code churn, patch much easier to review...)
> 
> Your patch could be a one liner really [1]
> 
> Eventually replacing this linear list with an RB-tree, so that we can be on the safe side.
> 
> [1]
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index c07bc82cbbe96d53d05c1665b2f03faa055f1084..313470f6bb148326b4afbc00d265b6a1e40d93bd 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -1134,8 +1134,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
>  
>         if (!found) {
>                 /* Create a new entry if allowable */
> -               if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
> -                   (c = ipmr_cache_alloc_unres()) == NULL) {
> +               c = ipmr_cache_alloc_unres();
> +               if (!c) {
>                         spin_unlock_bh(&mfc_unres_lock);
>  
>                         kfree_skb(skb);

hmm, that looks more clear and easy to review..

Hi David, Alexey,

What do you think? If you also agree, I could post a new version patch.

Thanks
Hangbin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCHv3 net-next] ipmr: remove hard code cache_resolve_queue_len limit
  2019-09-03  8:43 [PATCH net] ipmr: remove cache_resolve_queue_len Hangbin Liu
  2019-09-03  9:15 ` Nikolay Aleksandrov
  2019-09-04  3:34 ` [PATCHv2 net-next] " Hangbin Liu
@ 2019-09-06  7:36 ` Hangbin Liu
  2019-09-06 10:08   ` Nikolay Aleksandrov
  2019-09-07 15:49   ` David Miller
  2 siblings, 2 replies; 10+ messages in thread
From: Hangbin Liu @ 2019-09-06  7:36 UTC (permalink / raw)
  To: netdev
  Cc: Phil Karn, Sukumar Gopalakrishnan, David S . Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI, Eric Dumazet, Hangbin Liu

This is a re-post of previous patch wrote by David Miller[1].

Phil Karn reported[2] that on busy networks with lots of unresolved
multicast routing entries, the creation of new multicast group routes
can be extremely slow and unreliable.

The reason is we hard-coded multicast route entries with unresolved source
addresses(cache_resolve_queue_len) to 10. If some multicast route never
resolves and the unresolved source addresses increased, there will
be no ability to create new multicast route cache.

To resolve this issue, we need either add a sysctl entry to make the
cache_resolve_queue_len configurable, or just remove cache_resolve_queue_len
limit directly, as we already have the socket receive queue limits of mrouted
socket, pointed by David.

From my side, I'd perfer to remove the cache_resolve_queue_len limit instead
of creating two more(IPv4 and IPv6 version) sysctl entry.

[1] https://lkml.org/lkml/2018/7/22/11
[2] https://lkml.org/lkml/2018/7/21/343

v3: instead of remove cache_resolve_queue_len totally, let's only remove
the hard code limit when allocate the unresolved cache, as Eric Dumazet
suggested, so we don't need to re-count it in other places.

v2: hold the mfc_unres_lock while walking the unresolved list in
queue_count(), as Nikolay Aleksandrov remind.

Reported-by: Phil Karn <karn@ka9q.net>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 net/ipv4/ipmr.c  | 4 ++--
 net/ipv6/ip6mr.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c07bc82cbbe9..313470f6bb14 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1134,8 +1134,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 
 	if (!found) {
 		/* Create a new entry if allowable */
-		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
-		    (c = ipmr_cache_alloc_unres()) == NULL) {
+		c = ipmr_cache_alloc_unres();
+		if (!c) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e80d36c5073d..857a89ad4d6c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1148,8 +1148,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
-		    (c = ip6mr_cache_alloc_unres()) == NULL) {
+		c = ip6mr_cache_alloc_unres();
+		if (!c) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
-- 
2.19.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCHv3 net-next] ipmr: remove hard code cache_resolve_queue_len limit
  2019-09-06  7:36 ` [PATCHv3 net-next] ipmr: remove hard code cache_resolve_queue_len limit Hangbin Liu
@ 2019-09-06 10:08   ` Nikolay Aleksandrov
  2019-09-07 15:49   ` David Miller
  1 sibling, 0 replies; 10+ messages in thread
From: Nikolay Aleksandrov @ 2019-09-06 10:08 UTC (permalink / raw)
  To: Hangbin Liu, netdev
  Cc: Phil Karn, Sukumar Gopalakrishnan, David S . Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI, Eric Dumazet

On 06/09/2019 10:36, Hangbin Liu wrote:
> This is a re-post of previous patch wrote by David Miller[1].
> 
> Phil Karn reported[2] that on busy networks with lots of unresolved
> multicast routing entries, the creation of new multicast group routes
> can be extremely slow and unreliable.
> 
> The reason is we hard-coded multicast route entries with unresolved source
> addresses(cache_resolve_queue_len) to 10. If some multicast route never
> resolves and the unresolved source addresses increased, there will
> be no ability to create new multicast route cache.
> 
> To resolve this issue, we need either add a sysctl entry to make the
> cache_resolve_queue_len configurable, or just remove cache_resolve_queue_len
> limit directly, as we already have the socket receive queue limits of mrouted
> socket, pointed by David.
> 
> From my side, I'd perfer to remove the cache_resolve_queue_len limit instead
> of creating two more(IPv4 and IPv6 version) sysctl entry.
> 
> [1] https://lkml.org/lkml/2018/7/22/11
> [2] https://lkml.org/lkml/2018/7/21/343
> 
> v3: instead of remove cache_resolve_queue_len totally, let's only remove
> the hard code limit when allocate the unresolved cache, as Eric Dumazet
> suggested, so we don't need to re-count it in other places.
> 
> v2: hold the mfc_unres_lock while walking the unresolved list in
> queue_count(), as Nikolay Aleksandrov remind.
> 
> Reported-by: Phil Karn <karn@ka9q.net>
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
>  net/ipv4/ipmr.c  | 4 ++--
>  net/ipv6/ip6mr.c | 4 ++--
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 

Please CC all interested parties who have reviewed/commented on previous versions.

I'd also add a Suggested-by tag such as:
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>

Looks good to me, thanks!
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv3 net-next] ipmr: remove hard code cache_resolve_queue_len limit
  2019-09-06  7:36 ` [PATCHv3 net-next] ipmr: remove hard code cache_resolve_queue_len limit Hangbin Liu
  2019-09-06 10:08   ` Nikolay Aleksandrov
@ 2019-09-07 15:49   ` David Miller
  1 sibling, 0 replies; 10+ messages in thread
From: David Miller @ 2019-09-07 15:49 UTC (permalink / raw)
  To: liuhangbin; +Cc: netdev, karn, sukumarg1973, kuznet, yoshfuji, eric.dumazet

From: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri,  6 Sep 2019 15:36:01 +0800

> This is a re-post of previous patch wrote by David Miller[1].
> 
> Phil Karn reported[2] that on busy networks with lots of unresolved
> multicast routing entries, the creation of new multicast group routes
> can be extremely slow and unreliable.
> 
> The reason is we hard-coded multicast route entries with unresolved source
> addresses(cache_resolve_queue_len) to 10. If some multicast route never
> resolves and the unresolved source addresses increased, there will
> be no ability to create new multicast route cache.
> 
> To resolve this issue, we need either add a sysctl entry to make the
> cache_resolve_queue_len configurable, or just remove cache_resolve_queue_len
> limit directly, as we already have the socket receive queue limits of mrouted
> socket, pointed by David.
> 
> From my side, I'd perfer to remove the cache_resolve_queue_len limit instead
> of creating two more(IPv4 and IPv6 version) sysctl entry.
> 
> [1] https://lkml.org/lkml/2018/7/22/11
> [2] https://lkml.org/lkml/2018/7/21/343
> 
> v3: instead of remove cache_resolve_queue_len totally, let's only remove
> the hard code limit when allocate the unresolved cache, as Eric Dumazet
> suggested, so we don't need to re-count it in other places.
> 
> v2: hold the mfc_unres_lock while walking the unresolved list in
> queue_count(), as Nikolay Aleksandrov remind.
> 
> Reported-by: Phil Karn <karn@ka9q.net>
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>

Applied to net-next.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-09-07 15:49 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-03  8:43 [PATCH net] ipmr: remove cache_resolve_queue_len Hangbin Liu
2019-09-03  9:15 ` Nikolay Aleksandrov
2019-09-03 12:55   ` Hangbin Liu
2019-09-03 13:18     ` Nikolay Aleksandrov
2019-09-04  3:34 ` [PATCHv2 net-next] " Hangbin Liu
2019-09-04  7:50   ` Eric Dumazet
2019-09-05  3:37     ` Hangbin Liu
2019-09-06  7:36 ` [PATCHv3 net-next] ipmr: remove hard code cache_resolve_queue_len limit Hangbin Liu
2019-09-06 10:08   ` Nikolay Aleksandrov
2019-09-07 15:49   ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.