All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kishen Maloor <kishen.maloor@intel.com>
To: Paolo Abeni <pabeni@redhat.com>, <mptcp@lists.linux.dev>
Subject: Re: [PATCH mptcp-next v2 08/21] mptcp: attempt to add listening sockets for announced addrs
Date: Tue, 18 Jan 2022 17:20:22 -0800	[thread overview]
Message-ID: <d2c95daa-5ad1-68d9-1072-4220c3931838@intel.com> (raw)
In-Reply-To: <c187fa2042e25cdc85aff2dd6687781b2511d86a.camel@redhat.com>

Hi Paolo, Matthieu,

On 1/14/22 7:54 AM, Paolo Abeni wrote:
> Hello,
> 
> On Wed, 2022-01-12 at 17:15 -0500, Kishen Maloor wrote:
>> When ADD_ADDR announcements use the port associated with an
>> active subflow, this change ensures that a listening socket is
>> bound to the announced address and port for subsequently
>> receiving MP_JOINs from the remote end. In case there's
>> a recorded lsk bound to that address+port, it is reused.
>> But if a listening socket for this address is already held by the
>> application then no further action is taken.
>>
>> When a listening socket is created, it is stored in
>> struct mptcp_pm_add_entry and released accordingly.
>>
>> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/203
>>
>> v2: fixed formatting
>>
>> Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
> 
> should be either:
> 
> """
> <changelog>
> 
> <tags>
> """
> 
> or:
> 
> """
> <tags>
> ---
> <changelog>
> """
> 
> we usually keep the changelog outside the commit message for
> development history before landing on netdev, that is:

Thanks! I shall reflect this change in the related commit messages.

> 
> """
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/203
> Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
> ---
> v2: fixed formatting
> """
> 
>> ---
>>  net/mptcp/pm_netlink.c | 56 ++++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 54 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
>> index 779ec9d375f0..e2211f3b8c8c 100644
>> --- a/net/mptcp/pm_netlink.c
>> +++ b/net/mptcp/pm_netlink.c
>> @@ -43,6 +43,7 @@ struct mptcp_pm_add_entry {
>>  	struct mptcp_addr_info	addr;
>>  	struct timer_list	add_timer;
>>  	struct mptcp_sock	*sock;
>> +	struct mptcp_local_lsk	*lsk_ref;
>>  	u8			retrans_times;
>>  };
>>  
>> @@ -66,6 +67,10 @@ struct pm_nl_pernet {
>>  #define MPTCP_PM_ADDR_MAX	8
>>  #define ADD_ADDR_RETRANS_MAX	3
>>  
>> +static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
>> +					    struct mptcp_pm_addr_entry *entry,
>> +					    struct socket **lsk);
>> +
>>  static bool addresses_equal(const struct mptcp_addr_info *a,
>>  			    const struct mptcp_addr_info *b, bool use_port)
>>  {
>> @@ -438,7 +443,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
>>  }
>>  
>>  static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
>> -				     struct mptcp_pm_addr_entry *entry)
>> +				     struct mptcp_pm_addr_entry *entry,
>> +				     struct mptcp_local_lsk *lsk_ref)
>>  {
>>  	struct mptcp_pm_add_entry *add_entry = NULL;
>>  	struct sock *sk = (struct sock *)msk;
>> @@ -458,6 +464,10 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
>>  	add_entry->addr = entry->addr;
>>  	add_entry->sock = msk;
>>  	add_entry->retrans_times = 0;
>> +	add_entry->lsk_ref = lsk_ref;
>> +
>> +	if (lsk_ref)
>> +		lsk_list_add_ref(lsk_ref);
>>  
>>  	timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
>>  	sk_reset_timer(sk, &add_entry->add_timer,
>> @@ -470,8 +480,11 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
>>  {
>>  	struct mptcp_pm_add_entry *entry, *tmp;
>>  	struct sock *sk = (struct sock *)msk;
>> +	struct pm_nl_pernet *pernet;
>>  	LIST_HEAD(free_list);
>>  
>> +	pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
>> +
>>  	pr_debug("msk=%p", msk);
>>  
>>  	spin_lock_bh(&msk->pm.lock);
>> @@ -480,6 +493,8 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
>>  
>>  	list_for_each_entry_safe(entry, tmp, &free_list, list) {
>>  		sk_stop_timer_sync(sk, &entry->add_timer);
>> +		if (entry->lsk_ref)
>> +			lsk_list_release(pernet, entry->lsk_ref);
>>  		kfree(entry);
>>  	}
>>  }
>> @@ -570,13 +585,16 @@ lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *add
>>  }
>>  
>>  static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
>> +	__must_hold(&msk->pm.lock)
>>  {
>> +	struct mptcp_local_lsk *lsk_ref = NULL;
>>  	struct sock *sk = (struct sock *)msk;
>>  	struct mptcp_pm_addr_entry *local;
>>  	unsigned int add_addr_signal_max;
>>  	unsigned int local_addr_max;
>>  	struct pm_nl_pernet *pernet;
>>  	unsigned int subflows_max;
>> +	struct socket *lsk;
>>  
>>  	pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
>>  
>> @@ -607,12 +625,39 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
>>  		local = select_signal_address(pernet, msk);
>>  
>>  		if (local) {
>> -			if (mptcp_pm_alloc_anno_list(msk, local)) {
>> +			if (!local->addr.port) {
>> +				local->addr.port =
>> +					((struct inet_sock *)inet_sk
>> +					 ((struct sock *)msk))->inet_sport;
>> +
>> +				lsk_ref = lsk_list_find(pernet, &local->addr);
>> +
>> +				if (!lsk_ref) {
>> +					spin_unlock_bh(&msk->pm.lock);
>> +
>> +					mptcp_pm_nl_create_listen_socket(sk, local, &lsk);
>> +
>> +					spin_lock_bh(&msk->pm.lock);
>> +
>> +					if (lsk)
>> +						lsk_ref = lsk_list_add(pernet, &local->addr, lsk);
>> +
>> +					if (lsk && !lsk_ref)
>> +						sock_release(lsk);
> 
> Let's suppose an user-space application listen on 2 different address
> (A, B) and does:
> 
> """
> s1 = socket()
> bind(s1, A)
> listen(s1)
>  // at this point incoming MPTCP connection can be established on s1
>  // and ADD_ADDR sub-options could be sent back 
> 
> s2 = socket()
> bind(s2, B)
> listen(s2)
> """
> 
> If there is a signal endpoint on B, the above listen can race with the 
> mptcp_pm_nl_create_listen_socket() call, leading to hard to track
> startup issues for user-space application.
> 
> I really think we want at list a configuration option, off by default,
> for this feature. Some specific self-test would be a plus.

Looking at your example above, assuming both A and B are bound to the same port
then yes, a race such as you suggest could occur. 

But if you consider bug #203, it arose only because there was no listener in
the application (and unexpectedly so). So if the path manager creates a listener (at the time 
of the address advertisement) to facilitate MPJs then that would have the usual
side effects of creating listeners (in general).

For e.g,. I think this clash could also occur with the existing code in the kernel PM and using a 
port-based endpoint when the app happens to bind a socket to that specific addr+port. 

The other scenario where the path manager needs to always establish a listener is when
running alongside an MPTCP client.

We could certainly add an "attempt to create lsk" option to the ADD_ADDR netlink commands,
as I believe you've both suggested, but perhaps we should think further about the guidance
regarding usage of this option.

For e.g., if creating lsks is not the default behavior, then bug #203 would persist 
unless the entity that issues the ADD_ADDR command exercises this option.

> 
> It will help reviewing, splitting this series in at least 2 chunks:
> * pre-reqs up to ~this patch
> * user-space PM specific stuff
> 
> Side note: it would be nice reducing the level of intentation, e.g.
> factoring-out part of the inner code in some helper.
> 
>> +				}
>> +
>> +				local->addr.port = 0;
>> +			}
>> +
>> +			if (mptcp_pm_alloc_anno_list(msk, local, lsk_ref)) {
>>  				__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
>>  				msk->pm.add_addr_signaled++;
>>  				mptcp_pm_announce_addr(msk, &local->addr, false);
>>  				mptcp_pm_nl_addr_send_ack(msk);
>>  			}
>> +
>> +			if (lsk_ref)
>> +				lsk_list_release(pernet, lsk_ref);
> 
> Probaly not very relevant, but something alike:
> 
> 	rcu_read_lock()
> 	lsk_ref = __lsk_list_find();
> 	if (lst_ref)
> 		if (mptcp_pm_alloc_anno_list(...)
> 	rcu_read_unlock()
> 
> would save a pair of possibly contended atomic operations in the common
> case.
> 
> Thanks!
> 
> Paolo
> 


  parent reply	other threads:[~2022-01-19  1:20 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-12 22:15 [PATCH mptcp-next v2 00/21] mptcp: support userspace path management Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 01/21] mptcp: do not restrict subflows with non-kernel PMs Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 02/21] mptcp: store remote id from MP_JOIN SYN/ACK in local ctx Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 03/21] mptcp: reflect remote port (not 0) in ANNOUNCED events Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 04/21] mptcp: establish subflows from either end of connection Kishen Maloor
2022-01-14 22:43   ` Mat Martineau
2022-01-17  8:59     ` Paolo Abeni
2022-01-19  1:26       ` Kishen Maloor
2022-01-19 12:01         ` Paolo Abeni
2022-01-19 17:59           ` Kishen Maloor
2022-01-19 18:59             ` Mat Martineau
2022-01-12 22:15 ` [PATCH mptcp-next v2 05/21] mptcp: netlink: store per namespace list of refcounted listen socks Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 06/21] mptcp: netlink: store lsk ref in mptcp_pm_addr_entry Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 07/21] mptcp: netlink: process IPv6 addrs in creating listening sockets Kishen Maloor
2022-01-14 15:27   ` Paolo Abeni
2022-01-14 22:09     ` Mat Martineau
2022-01-19  1:25       ` Kishen Maloor
2022-01-19 19:14         ` Mat Martineau
2022-01-12 22:15 ` [PATCH mptcp-next v2 08/21] mptcp: attempt to add listening sockets for announced addrs Kishen Maloor
2022-01-14 15:54   ` Paolo Abeni
2022-01-14 17:47     ` Matthieu Baerts
2022-01-14 22:26     ` Mat Martineau
2022-01-19  1:20     ` Kishen Maloor [this message]
2022-01-12 22:15 ` [PATCH mptcp-next v2 09/21] mptcp: allow ADD_ADDR reissuance by userspace PMs Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 10/21] mptcp: handle local addrs announced " Kishen Maloor
2022-01-14 17:11   ` Paolo Abeni
2022-01-19  1:24     ` Kishen Maloor
2022-01-19 19:20       ` Mat Martineau
2022-01-19 20:27         ` Kishen Maloor
2022-01-19 20:44           ` Mat Martineau
2022-01-19 21:30             ` Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 11/21] mptcp: read attributes of addr entries managed " Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 12/21] mptcp: netlink: split mptcp_pm_parse_addr into two functions Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 13/21] mptcp: netlink: Add MPTCP_PM_CMD_ANNOUNCE Kishen Maloor
2022-01-14 17:04   ` Paolo Abeni
2022-01-19  1:21     ` Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 14/21] mptcp: selftests: support MPTCP_PM_CMD_ANNOUNCE Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 15/21] mptcp: netlink: Add MPTCP_PM_CMD_REMOVE Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 16/21] mptcp: selftests: support MPTCP_PM_CMD_REMOVE Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 17/21] mptcp: netlink: allow userspace-driven subflow establishment Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 18/21] mptcp: selftests: support MPTCP_PM_CMD_SUBFLOW_CREATE Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 19/21] mptcp: selftests: support MPTCP_PM_CMD_SUBFLOW_DESTROY Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 20/21] mptcp: selftests: capture netlink events Kishen Maloor
2022-01-12 22:15 ` [PATCH mptcp-next v2 21/21] selftests: mptcp: functional tests for the userspace PM type Kishen Maloor
2022-01-12 22:35   ` selftests: mptcp: functional tests for the userspace PM type: Build Failure MPTCP CI

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d2c95daa-5ad1-68d9-1072-4220c3931838@intel.com \
    --to=kishen.maloor@intel.com \
    --cc=mptcp@lists.linux.dev \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.