netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vlad Yasevich <vyasevich@gmail.com>
To: Xin Long <lucien.xin@gmail.com>
Cc: network dev <netdev@vger.kernel.org>,
	linux-sctp@vger.kernel.org,
	Marcelo Ricardo Leitner <mleitner@redhat.com>,
	Vlad Yasevich <vyasevic@redhat.com>,
	daniel@iogearbox.net, davem <davem@davemloft.net>
Subject: Re: [PATCH net-next 1/5] sctp: add the rhashtable apis for sctp global transport hashtable
Date: Thu, 7 Jan 2016 15:28:03 -0500	[thread overview]
Message-ID: <568ECA53.304@gmail.com> (raw)
In-Reply-To: <CADvbK_eu13b9RQ9eMLkBJZyTuQLOwThyuyzgDpwg9RbupXBN5A@mail.gmail.com>

On 01/06/2016 12:01 PM, Xin Long wrote:
> On Wed, Jan 6, 2016 at 2:38 AM, Vlad Yasevich <vyasevich@gmail.com> wrote:
>> On 12/30/2015 10:50 AM, Xin Long wrote:
>>> tranport hashtbale will replace the association hashtable to do the
>>> lookup for transport, and then get association by t->assoc, rhashtable
>>> apis will be used because of it's resizable, scalable and using rcu.
>>>
>>> lport + rport + paddr will be the base hashkey to locate the chain,
>>> with net to protect one netns from another, then plus the laddr to
>>> compare to get the target.
>>>
>>> this patch will provider the lookup functions:
>>> - sctp_epaddr_lookup_transport
>>> - sctp_addrs_lookup_transport
>>>
>>> hash/unhash functions:
>>> - sctp_hash_transport
>>> - sctp_unhash_transport
>>>
>>> init/destroy functions:
>>> - sctp_transport_hashtable_init
>>> - sctp_transport_hashtable_destroy
>>>
>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
>>> Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
>>> ---
>>>  include/net/sctp/sctp.h    |  11 ++++
>>>  include/net/sctp/structs.h |   5 ++
>>>  net/sctp/input.c           | 131 +++++++++++++++++++++++++++++++++++++++++++++
>>>  3 files changed, 147 insertions(+)
>>>
>>> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
>>> index ce13cf2..7bbdfba 100644
>>> --- a/include/net/sctp/sctp.h
>>> +++ b/include/net/sctp/sctp.h
>>> @@ -143,6 +143,17 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
>>>                                struct sctp_transport *t);
>>>  void sctp_backlog_migrate(struct sctp_association *assoc,
>>>                         struct sock *oldsk, struct sock *newsk);
>>> +int sctp_transport_hashtable_init(void);
>>> +void sctp_transport_hashtable_destroy(void);
>>> +void sctp_hash_transport(struct sctp_transport *t);
>>> +void sctp_unhash_transport(struct sctp_transport *t);
>>> +struct sctp_transport *sctp_addrs_lookup_transport(
>>> +                             struct net *net,
>>> +                             const union sctp_addr *laddr,
>>> +                             const union sctp_addr *paddr);
>>> +struct sctp_transport *sctp_epaddr_lookup_transport(
>>> +                             const struct sctp_endpoint *ep,
>>> +                             const union sctp_addr *paddr);
>>>
>>>  /*
>>>   * sctp/proc.c
>>> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
>>> index eea9bde..4ab87d0 100644
>>> --- a/include/net/sctp/structs.h
>>> +++ b/include/net/sctp/structs.h
>>> @@ -48,6 +48,7 @@
>>>  #define __sctp_structs_h__
>>>
>>>  #include <linux/ktime.h>
>>> +#include <linux/rhashtable.h>
>>>  #include <linux/socket.h>    /* linux/in.h needs this!!    */
>>>  #include <linux/in.h>                /* We get struct sockaddr_in. */
>>>  #include <linux/in6.h>               /* We get struct in6_addr     */
>>> @@ -123,6 +124,8 @@ extern struct sctp_globals {
>>>       struct sctp_hashbucket *assoc_hashtable;
>>>       /* This is the sctp port control hash.  */
>>>       struct sctp_bind_hashbucket *port_hashtable;
>>> +     /* This is the hash of all transports. */
>>> +     struct rhashtable transport_hashtable;
>>>
>>>       /* Sizes of above hashtables. */
>>>       int ep_hashsize;
>>> @@ -147,6 +150,7 @@ extern struct sctp_globals {
>>>  #define sctp_assoc_hashtable         (sctp_globals.assoc_hashtable)
>>>  #define sctp_port_hashsize           (sctp_globals.port_hashsize)
>>>  #define sctp_port_hashtable          (sctp_globals.port_hashtable)
>>> +#define sctp_transport_hashtable     (sctp_globals.transport_hashtable)
>>>  #define sctp_checksum_disable                (sctp_globals.checksum_disable)
>>>
>>>  /* SCTP Socket type: UDP or TCP style. */
>>> @@ -753,6 +757,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet)
>>>  struct sctp_transport {
>>>       /* A list of transports. */
>>>       struct list_head transports;
>>> +     struct rhash_head node;
>>>
>>>       /* Reference counting. */
>>>       atomic_t refcnt;
>>> diff --git a/net/sctp/input.c b/net/sctp/input.c
>>> index b6493b3..bac8278 100644
>>> --- a/net/sctp/input.c
>>> +++ b/net/sctp/input.c
>>> @@ -782,6 +782,137 @@ hit:
>>>       return ep;
>>>  }
>>>
>>> +/* rhashtable for transport */
>>> +struct sctp_hash_cmp_arg {
>>> +     const union sctp_addr           *laddr;
>>> +     const union sctp_addr           *paddr;
>>> +     const struct net                *net;
>>> +};
>>> +
>>> +static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
>>> +                             const void *ptr)
>>> +{
>>> +     const struct sctp_hash_cmp_arg *x = arg->key;
>>> +     const struct sctp_transport *t = ptr;
>>> +     struct sctp_association *asoc = t->asoc;
>>> +     const struct net *net = x->net;
>>> +
>>> +     if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
>>> +             return 1;
>>> +     if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
>>> +             return 1;
>>> +     if (!net_eq(sock_net(asoc->base.sk), net))
>>> +             return 1;
>>> +     if (!sctp_bind_addr_match(&asoc->base.bind_addr,
>>> +                               x->laddr, sctp_sk(asoc->base.sk)))
>>> +             return 1;
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
>>> +{
>>> +     const struct sctp_transport *t = data;
>>> +     const union sctp_addr *paddr = &t->ipaddr;
>>> +     const struct net *net = sock_net(t->asoc->base.sk);
>>> +     u16 lport = htons(t->asoc->base.bind_addr.port);
>>> +     u32 addr;
>>> +
>>> +     if (paddr->sa.sa_family == AF_INET6)
>>> +             addr = jhash(&paddr->v6.sin6_addr, 16, seed);
>>> +     else
>>> +             addr = paddr->v4.sin_addr.s_addr;
>>> +
>>> +     return  jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
>>> +                          (__force __u32)lport, net_hash_mix(net), seed);
>>> +}
>>> +
>>> +static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
>>> +{
>>> +     const struct sctp_hash_cmp_arg *x = data;
>>> +     const union sctp_addr *paddr = x->paddr;
>>> +     const struct net *net = x->net;
>>> +     u16 lport = x->laddr->v4.sin_port;
>>> +     u32 addr;
>>> +
>>> +     if (paddr->sa.sa_family == AF_INET6)
>>> +             addr = jhash(&paddr->v6.sin6_addr, 16, seed);
>>> +     else
>>> +             addr = paddr->v4.sin_addr.s_addr;
>>> +
>>> +     return  jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
>>> +                          (__force __u32)lport, net_hash_mix(net), seed);
>>> +}
>>> +
>>> +static const struct rhashtable_params sctp_hash_params = {
>>> +     .head_offset            = offsetof(struct sctp_transport, node),
>>> +     .hashfn                 = sctp_hash_key,
>>> +     .obj_hashfn             = sctp_hash_obj,
>>> +     .obj_cmpfn              = sctp_hash_cmp,
>>> +     .automatic_shrinking    = true,
>>> +};
>>> +
>>> +int sctp_transport_hashtable_init(void)
>>> +{
>>> +     return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params);
>>> +}
>>> +
>>> +void sctp_transport_hashtable_destroy(void)
>>> +{
>>> +     rhashtable_destroy(&sctp_transport_hashtable);
>>> +}
>>> +
>>> +void sctp_hash_transport(struct sctp_transport *t)
>>> +{
>>> +     struct sctp_sockaddr_entry *addr;
>>> +     struct sctp_hash_cmp_arg arg;
>>> +
>>> +     addr = list_entry(t->asoc->base.bind_addr.address_list.next,
>>> +                       struct sctp_sockaddr_entry, list);
>>> +     arg.laddr = &addr->a;
>>> +     arg.paddr = &t->ipaddr;
>>> +     arg.net   = sock_net(t->asoc->base.sk);
>>> +
>>> +reinsert:
>>> +     if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg,
>>> +                                      &t->node, sctp_hash_params) == -EBUSY)
>>> +             goto reinsert;
>>> +}
>>> +
>>> +void sctp_unhash_transport(struct sctp_transport *t)
>>> +{
>>> +     rhashtable_remove_fast(&sctp_transport_hashtable, &t->node,
>>> +                            sctp_hash_params);
>>> +}
>>> +
>>> +struct sctp_transport *sctp_addrs_lookup_transport(
>>> +                             struct net *net,
>>> +                             const union sctp_addr *laddr,
>>> +                             const union sctp_addr *paddr)
>>> +{
>>> +     struct sctp_hash_cmp_arg arg = {
>>> +             .laddr = laddr,
>>> +             .paddr = paddr,
>>> +             .net   = net,
>>> +     };
>>> +
>>> +     return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
>>> +                                   sctp_hash_params);
>>> +}
>>> +
>>> +struct sctp_transport *sctp_epaddr_lookup_transport(
>>> +                             const struct sctp_endpoint *ep,
>>> +                             const union sctp_addr *paddr)
>>> +{
>>> +     struct sctp_sockaddr_entry *addr;
>>> +     struct net *net = sock_net(ep->base.sk);
>>> +
>>> +     addr = list_entry(ep->base.bind_addr.address_list.next,
>>> +                       struct sctp_sockaddr_entry, list);
>>> +
>>> +     return sctp_addrs_lookup_transport(net, &addr->a, paddr);
>>> +}
>>> +
>>
>> I don't think that this right, mainly because not all endpoint
>> addresses will be copied to association bind_addr list.   As a result,
>> you may actually have an association on this endpoint to a given
>> peer, but may not be using the first address from the endpoint..
>>
>> What might work is to pick an endpoint address that would be usable within
>> the scope of the peer address.
> it's not that easy, does it make sense to you if I change
> 
>      if (!sctp_bind_addr_match(&asoc->base.bind_addr,
>                                x->laddr, sctp_sk(asoc->base.sk)))
> 
> to
>      if (!sctp_bind_addr_match(&asoc->ep->base.bind_addr,
>                                x->laddr, sctp_sk(asoc->base.sk)))
> 
> in sctp_hash_cmp() ?
> 

No, the problem them becomes the accept/peel-off path.  The assoc->ep
linkage isn't protected in the lookup path and thus can race against
accept call moving the assoc from one endpoint to another.
This is why we've used the association list as it is under rcu protection.

We might be able to get away with making asoc->ep pointer rcu protected...
Need to think a bit more about it.

-vlad

  parent reply	other threads:[~2016-01-07 20:28 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-30 15:50 [PATCH net-next 0/5] sctp: use transport hashtable to replace association's with rhashtable Xin Long
2015-12-30 15:50 ` [PATCH net-next 1/5] sctp: add the rhashtable apis for sctp global transport hashtable Xin Long
2015-12-30 15:50   ` [PATCH net-next 2/5] sctp: apply rhashtable api to send/recv path Xin Long
2015-12-30 15:50     ` [PATCH net-next 3/5] sctp: apply rhashtable api to sctp procfs Xin Long
2015-12-30 15:50       ` [PATCH net-next 4/5] sctp: drop the old assoc hashtable of sctp Xin Long
2015-12-30 15:50         ` [PATCH net-next 5/5] sctp: remove the local_bh_disable/enable in sctp_endpoint_lookup_assoc Xin Long
2016-01-05 19:07     ` [PATCH net-next 2/5] sctp: apply rhashtable api to send/recv path Vlad Yasevich
2016-01-06 16:18       ` Xin Long
2016-01-06 17:42       ` mleitner
2016-01-11 15:00         ` Vlad Yasevich
2015-12-30 16:57   ` [PATCH net-next 1/5] sctp: add the rhashtable apis for sctp global transport hashtable Eric Dumazet
2015-12-30 17:50     ` David Miller
2016-01-11  9:32       ` Herbert Xu
2016-01-11 16:33         ` Marcelo Ricardo Leitner
2016-01-11 18:08           ` Vlad Yasevich
2016-01-11 18:19             ` Marcelo Ricardo Leitner
2015-12-30 17:41   ` Marcelo Ricardo Leitner
2016-01-05 10:10     ` Xin Long
2016-01-11  9:22       ` Herbert Xu
2016-01-05 18:38   ` Vlad Yasevich
2016-01-06 17:01     ` Xin Long
2016-01-06 18:19       ` Marcelo Ricardo Leitner
2016-01-07 17:23         ` Marcelo Ricardo Leitner
2016-01-07 20:28       ` Vlad Yasevich [this message]
2016-01-11  9:30   ` Herbert Xu
2016-01-11 16:00     ` mleitner
2016-01-11 17:20       ` Vlad Yasevich
2016-01-11 18:09         ` mleitner
2016-01-11 21:35           ` David Miller
2016-01-11 21:31         ` David Miller
2015-12-30 17:19 ` [PATCH net-next 0/5] sctp: use transport hashtable to replace association's with rhashtable Eric Dumazet
2015-12-30 17:32   ` Marcelo Ricardo Leitner
2015-12-30 19:11     ` Eric Dumazet
2015-12-30 20:44       ` David Miller
2015-12-30 21:57         ` Eric Dumazet
2015-12-30 22:29           ` Marcelo Ricardo Leitner
2015-12-30 17:52   ` David Miller
2015-12-30 19:03     ` Eric Dumazet
2015-12-30 20:40       ` David Miller
2016-01-04 22:30 ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=568ECA53.304@gmail.com \
    --to=vyasevich@gmail.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=linux-sctp@vger.kernel.org \
    --cc=lucien.xin@gmail.com \
    --cc=mleitner@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=vyasevic@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).