Re: hard-coded limit on unresolved multicast route cache in ipv4/ipmr.c causes slow, unreliable creation of multicast routes on busy networks

From: Hangbin Liu <liuhangbin@gmail.com>
To: Sukumar Gopalakrishnan <sukumarg1973@gmail.com>
Cc: davem@davemloft.net, karn@ka9q.net, kuznet@ms2.inr.ac.ru,
	yoshfuji@linux-ipv6.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: hard-coded limit on unresolved multicast route cache in ipv4/ipmr.c causes slow, unreliable creation of multicast routes on busy networks
Date: Tue, 4 Dec 2018 14:51:01 +0800	[thread overview]
Message-ID: <20181204065100.GT24677@leo.usersys.redhat.com> (raw)
In-Reply-To: <CADiZnkSy=rFq5xLs6RcgJDihQ1Vwo2WBBY9Fi_5jOHr8XupukQ@mail.gmail.com>

On Mon, Nov 26, 2018 at 10:44:49AM +0530, Sukumar Gopalakrishnan wrote:
> Hi,
> 
>  There is a patch to make this queue len configurable. Is below mentioned going
> to be applied ?
> 
> http://lkml.iu.edu/hypermail/linux/kernel/1810.3/02344.html

It looks this topic stuckd again..

> 
> Regards,
> Sukumar
> 
> On Tue, Nov 20, 2018 at 2:55 PM Hangbin Liu <liuhangbin@gmail.com> wrote:
> 
>     Hi David,
> 
>     On Sat, Jul 21, 2018 at 10:03:09PM -0700, David Miller wrote:
>     > Yeah that limit is bogus for several reasons.
>     ...
>     >
>     > Therefore, it probably is safe and correct to remove this
>     > cache_resolve_queue_len altogether.
>     >
>     > Something like this:
>     >
>     > diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
>     > index d633f737b3c6..b166465d7c05 100644
>     > --- a/include/linux/mroute_base.h
>     > +++ b/include/linux/mroute_base.h
>     > @@ -234,7 +234,6 @@ struct mr_table_ops {
>     >   * @mfc_hash: Hash table of all resolved routes for easy lookup
>     >   * @mfc_cache_list: list of resovled routes for possible traversal
>     >   * @maxvif: Identifier of highest value vif currently in use
>     > - * @cache_resolve_queue_len: current size of unresolved queue
>     >   * @mroute_do_assert: Whether to inform userspace on wrong ingress
>     >   * @mroute_do_pim: Whether to receive IGMP PIMv1
>     >   * @mroute_reg_vif_num: PIM-device vif index
>     > @@ -251,7 +250,6 @@ struct mr_table {
>     >       struct rhltable         mfc_hash;
>     >       struct list_head        mfc_cache_list;
>     >       int                     maxvif;
>     > -     atomic_t                cache_resolve_queue_len;
>     >       bool                    mroute_do_assert;
>     >       bool                    mroute_do_pim;
>     >       int                     mroute_reg_vif_num;
>     > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
>     > index 9f79b9803a16..c007cf9bfe82 100644
>     > --- a/net/ipv4/ipmr.c
>     > +++ b/net/ipv4/ipmr.c
>     > @@ -747,8 +747,6 @@ static void ipmr_destroy_unres(struct mr_table *mrt,
>     struct mfc_cache *c)
>     >       struct sk_buff *skb;
>     >       struct nlmsgerr *e;
>     > 
>     > -     atomic_dec(&mrt->cache_resolve_queue_len);
>     > -
>     >       while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
>     >               if (ip_hdr(skb)->version == 0) {
>     >                       struct nlmsghdr *nlh = skb_pull(skb,
>     > @@ -1135,9 +1133,11 @@ static int ipmr_cache_unresolved(struct mr_table
>     *mrt, vifi_t vifi,
>     >       }
>     > 
>     >       if (!found) {
>     > +             bool was_empty;
>     > +
>     >               /* Create a new entry if allowable */
>     > -             if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
>     > -                 (c = ipmr_cache_alloc_unres()) == NULL) {
>     > +             c = ipmr_cache_alloc_unres();
>     > +             if (!c) {
>     >                       spin_unlock_bh(&mfc_unres_lock);
>     > 
>     >                       kfree_skb(skb);
>     > @@ -1163,11 +1163,11 @@ static int ipmr_cache_unresolved(struct mr_table
>     *mrt, vifi_t vifi,
>     >                       return err;
>     >               }
>     > 
>     > -             atomic_inc(&mrt->cache_resolve_queue_len);
>     > +             was_empty = list_empty(&mrt->mfc_unres_queue);
>     >               list_add(&c->_c.list, &mrt->mfc_unres_queue);
>     >               mroute_netlink_event(mrt, c, RTM_NEWROUTE);
>     > 
>     > -             if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
>     > +             if (was_empty)
>     >                       mod_timer(&mrt->ipmr_expire_timer,
>     >                                 c->_c.mfc_un.unres.expires);
> 
>     In ipmr_expire_process() and ipmr_do_expire_process(), they start mod_timer
>     when !list_empty(&mrt->mfc_unres_queue), should here also be !was_empty?
> 
>     BTW, do you have any plan to apply this patch in kernel?
> 
>     Regards
>     Hangbin
> 
>     >       }
>     > @@ -1274,7 +1274,6 @@ static int ipmr_mfc_add(struct net *net, struct
>     mr_table *mrt,
>     >               if (uc->mfc_origin == c->mfc_origin &&
>     >                   uc->mfc_mcastgrp == c->mfc_mcastgrp) {
>     >                       list_del(&_uc->list);
>     > -                     atomic_dec(&mrt->cache_resolve_queue_len);
>     >                       found = true;
>     >                       break;
>     >               }
>     > @@ -1322,7 +1321,7 @@ static void mroute_clean_tables(struct mr_table
>     *mrt, bool all)
>     >               mr_cache_put(c);
>     >       }
>     > 
>     > -     if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
>     > +     if (!list_empty(&mrt->mfc_unres_queue)) {
>     >               spin_lock_bh(&mfc_unres_lock);
>     >               list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue,
>     list) {
>     >                       list_del(&c->list);
>     > @@ -2648,9 +2647,19 @@ static int ipmr_rtm_route(struct sk_buff *skb,
>     struct nlmsghdr *nlh,
>     >               return ipmr_mfc_delete(tbl, &mfcc, parent);
>     >  }
>     > 
>     > +static int queue_count(struct mr_table *mrt)
>     > +{
>     > +     struct list_head *pos;
>     > +     int count = 0;
>     > +     
>     > +     list_for_each(pos, &mrt->mfc_unres_queue)
>     > +             count++;
>     > +     return count;
>     > +}
>     > +
>     >  static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
>     >  {
>     > -     u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
>     > +     u32 queue_len = queue_count(mrt);
>     > 
>     >       if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
>     >           nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
>     > diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
>     > index 0d0f0053bb11..75e9c5a3e7ea 100644
>     > --- a/net/ipv6/ip6mr.c
>     > +++ b/net/ipv6/ip6mr.c
>     > @@ -759,8 +759,6 @@ static void ip6mr_destroy_unres(struct mr_table *mrt,
>     struct mfc6_cache *c)
>     >       struct net *net = read_pnet(&mrt->net);
>     >       struct sk_buff *skb;
>     > 
>     > -     atomic_dec(&mrt->cache_resolve_queue_len);
>     > -
>     >       while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL)
>     {
>     >               if (ipv6_hdr(skb)->version == 0) {
>     >                       struct nlmsghdr *nlh = skb_pull(skb,
>     > @@ -1139,8 +1137,8 @@ static int ip6mr_cache_unresolved(struct mr_table
>     *mrt, mifi_t mifi,
>     >                *      Create a new entry if allowable
>     >                */
>     > 
>     > -             if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
>     > -                 (c = ip6mr_cache_alloc_unres()) == NULL) {
>     > +             c = ip6mr_cache_alloc_unres();
>     > +             if (!c) {
>     >                       spin_unlock_bh(&mfc_unres_lock);
>     > 
>     >                       kfree_skb(skb);
>     > @@ -1167,7 +1165,6 @@ static int ip6mr_cache_unresolved(struct mr_table
>     *mrt, mifi_t mifi,
>     >                       return err;
>     >               }
>     > 
>     > -             atomic_inc(&mrt->cache_resolve_queue_len);
>     >               list_add(&c->_c.list, &mrt->mfc_unres_queue);
>     >               mr6_netlink_event(mrt, c, RTM_NEWROUTE);
>     > 
>     > @@ -1455,7 +1452,6 @@ static int ip6mr_mfc_add(struct net *net, struct
>     mr_table *mrt,
>     >               if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
>     >                   ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp))
>     {
>     >                       list_del(&_uc->list);
>     > -                     atomic_dec(&mrt->cache_resolve_queue_len);
>     >                       found = true;
>     >                       break;
>     >               }
>     > @@ -1502,7 +1498,7 @@ static void mroute_clean_tables(struct mr_table
>     *mrt, bool all)
>     >               mr_cache_put(c);
>     >       }
>     > 
>     > -     if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
>     > +     if (!list_empty(&mrt->mfc_unres_queue)) {
>     >               spin_lock_bh(&mfc_unres_lock);
>     >               list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue,
>     list) {
>     >                       list_del(&c->list);
>