All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [MPTCP] [PATCH] Revert tcp_skb_cb to its original size
@ 2017-11-09 14:55 Rao Shoaib
  0 siblings, 0 replies; 4+ messages in thread
From: Rao Shoaib @ 2017-11-09 14:55 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 9968 bytes --]



On 11/08/2017 06:32 PM, Christoph Paasch wrote:
> Hello Rao,
>
> On 08/11/17 - 13:19:49, Shoaib Rao wrote:
>> Christoph,
>>
>> Following is a patch based on branch mptcp_v0.91.
>> I have looked into the issue that you pointed out. It is same as the partial ACK issue. If there is no partial ack everything will work and this patch covers all the non partial ack cases.
>>
>> For the partial ACK case the fix is very simple. We can send the packet out without any mapping and the previous mapping should cover it. We can also update the mapping i.e. care a sub mapping.
> Indeed, you can omit the DSS-option in these segments.
>
> One other case is when you get SACK'd bytes. In that case the size of the
> segment also gets reduced and the DSS-mapping length will be wrong.
>
> Basically, all you need to store in this skb is the original data_length of
> the segment. Are there 16 bytes somewhere that we could use?
>
>
> Christoph
No, all we need is a bit in the skb to mark the skb to not send out the 
mapping.  I am looking at other options too.

Shoaib
>
>> The Receiver code (detect mapping()) is very implementation based. I will change it to accept two more mapping, sub mapping that does not violate an existing mapping and a super mapping.
>>
>> I will provide that patch later. Also note that the fields used in this patch are specific to this release.
>>
>> Shoaib
>>
>> ---
>>   include/linux/skbuff.h   |  2 +-
>>   include/net/tcp.h        | 32 +++++++++-------
>>   net/mptcp/mptcp_output.c | 97 ++++++++++++++++++++++++++++++------------------
>>   3 files changed, 80 insertions(+), 51 deletions(-)
>>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index f66cd5e..ca2e26a 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -540,7 +540,7 @@ struct sk_buff {
>>   	 * want to keep them across layers you have to do a skb_clone()
>>   	 * first. This is owned by whoever has the skb queued ATM.
>>   	 */
>> -	char			cb[56] __aligned(8);
>> +	char			cb[48] __aligned(8);
>>   
>>   	unsigned long		_skb_refdst;
>>   	void			(*destructor)(struct sk_buff *skb);
>> diff --git a/include/net/tcp.h b/include/net/tcp.h
>> index 655ecd4..b476e86 100644
>> --- a/include/net/tcp.h
>> +++ b/include/net/tcp.h
>> @@ -842,14 +842,18 @@ struct tcp_skb_cb {
>>   		__u32		tcp_gso_segs;
>>   	};
>>   
>> -#ifdef CONFIG_MPTCP
>> -	__u8		mptcp_flags;	/* flags for the MPTCP layer    */
>> -	__u8		dss_off;	/* Number of 4-byte words until
>> -					 * seq-number */
>> -#endif
>>   	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/
>>   
>> -	__u8		sacked;		/* State flags for SACK/FACK.	*/
>> +	/* Below union is fine as the skb will use one or the other
>> +	 * The SKB in the rtx queue will set sacked and does not care
>> +	 * about dss_off.
>> +	 * Eventually dss_off will not be needed.
>> +	 */
>> +	
>> +	union {
>> +		__u8		sacked;	/* State flags for SACK/FACK.	*/
>> +		__u8            dss_off;
>> +	};
>>   #define TCPCB_SACKED_ACKED	0x01	/* SKB ACK'd by a SACK block	*/
>>   #define TCPCB_SACKED_RETRANS	0x02	/* SKB retransmitted		*/
>>   #define TCPCB_LOST		0x04	/* SKB is lost			*/
>> @@ -860,8 +864,14 @@ struct tcp_skb_cb {
>>   				TCPCB_REPAIRED)
>>   
>>   	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
>> -	/* 1 byte hole */
>> -	__u32		ack_seq;	/* Sequence number ACK'd	*/
>> +
>> +	__u8		mptcp_flags;
>> +	union {
>> +		__u32		ack_seq; /* Sequence number ACK'd	*/
>> +		__u32		mptcp_data_seq;
>> +		__u32		path_mask;
>> +		
>> +	};
>>   	union {
>>   		union {
>>   			struct inet_skb_parm	h4;
>> @@ -869,12 +879,6 @@ struct tcp_skb_cb {
>>   			struct inet6_skb_parm	h6;
>>   #endif
>>   		} header;	/* For incoming frames		*/
>> -#ifdef CONFIG_MPTCP
>> -		union {			/* For MPTCP outgoing frames */
>> -			__u32 path_mask; /* paths that tried to send this skb */
>> -			__u32 dss[6];	/* DSS options */
>> -		};
>> -#endif
>>   	};
>>   };
>>   
>> diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c
>> index 691ef6f..5318459 100644
>> --- a/net/mptcp/mptcp_output.c
>> +++ b/net/mptcp/mptcp_output.c
>> @@ -57,34 +57,13 @@ EXPORT_SYMBOL(mptcp_sub_len_remove_addr_align);
>>   /* get the data-seq and end-data-seq and store them again in the
>>    * tcp_skb_cb
>>    */
>> +/* Rao: May need work */
>>   static bool mptcp_reconstruct_mapping(struct sk_buff *skb)
>>   {
>> -	const struct mp_dss *mpdss = (struct mp_dss *)TCP_SKB_CB(skb)->dss;
>> -	u32 *p32;
>> -	u16 *p16;
>> -
>>   	if (!mptcp_is_data_seq(skb))
>>   		return false;
>>   
>> -	if (!mpdss->M)
>> -		return false;
>> -
>> -	/* Move the pointer to the data-seq */
>> -	p32 = (u32 *)mpdss;
>> -	p32++;
>> -	if (mpdss->A) {
>> -		p32++;
>> -		if (mpdss->a)
>> -			p32++;
>> -	}
>> -
>> -	TCP_SKB_CB(skb)->seq = ntohl(*p32);
>> -
>> -	/* Get the data_len to calculate the end_data_seq */
>> -	p32++;
>> -	p32++;
>> -	p16 = (u16 *)p32;
>> -	TCP_SKB_CB(skb)->end_seq = ntohs(*p16) + TCP_SKB_CB(skb)->seq;
>> +	TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->mptcp_data_seq;
>>   
>>   	return true;
>>   }
>> @@ -182,7 +161,7 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk
>>   	/* Segment goes back to the MPTCP-layer. So, we need to zero the
>>   	 * path_mask/dss.
>>   	 */
>> -	memset(TCP_SKB_CB(skb)->dss, 0 , mptcp_dss_len);
>> +	TCP_SKB_CB(skb)->path_mask = 0;
>>   
>>   	/* We need to find out the path-mask from the meta-write-queue
>>   	 * to properly select a subflow.
>> @@ -319,25 +298,30 @@ combine:
>>   	}
>>   }
>>   
>> -static int mptcp_write_dss_mapping(const struct tcp_sock *tp, const struct sk_buff *skb,
>> -				   __be32 *ptr)
>> +
>> +static int mptcp_write_dss_mapping(const struct tcp_sock *tp,
>> +				   const struct sk_buff *skb, __be32 *ptr)
>>   {
>>   	const struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>>   	__be32 *start = ptr;
>>   	__u16 data_len;
>>   
>> -	*ptr++ = htonl(tcb->seq); /* data_seq */
>> +	*ptr++ = htonl(tcb->mptcp_data_seq); /* data_seq */
>>   
>>   	/* If it's a non-data DATA_FIN, we set subseq to 0 (draft v7) */
>>   	if (mptcp_is_data_fin(skb) && skb->len == 0)
>>   		*ptr++ = 0; /* subseq */
>>   	else
>> -		*ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* subseq */
>> +		*ptr++ = htonl(tcb->seq - tp->mptcp->snt_isn); /* subseq */
>>   
>> -	if (tcb->mptcp_flags & MPTCPHDR_INF)
>> +	if (tcb->mptcp_flags & MPTCPHDR_INF) {
>>   		data_len = 0;
>> -	else
>> +	} else {
>>   		data_len = tcb->end_seq - tcb->seq;
>> +		/* mptcp_entail_skb adds one for FIN */
>> +		if (tcb->tcp_flags & TCPHDR_FIN)
>> +			data_len -= 1;
>> +	}
>>   
>>   	if (tp->mpcb->dss_csum && data_len) {
>>   		__be16 *p16 = (__be16 *)ptr;
>> @@ -395,6 +379,47 @@ static int mptcp_write_dss_data_ack(const struct tcp_sock *tp, const struct sk_b
>>    * To avoid this we save the initial DSS mapping which allows us to
>>    * send the same DSS mapping even for fragmented retransmits.
>>    */
>> +#if 0
>> +static int mptcp_write_dss_mapping(const struct tcp_sock *tp, const struct sk_buff *skb,
>> +				   __be32 *ptr)
>> +{
>> +	const struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>> +	__be32 *start = ptr;
>> +	__u16 data_len;
>> +
>> +	*ptr++ = htonl(tcb->seq); /* data_seq */
>> +
>> +	/* If it's a non-data DATA_FIN, we set subseq to 0 (draft v7) */
>> +	if (mptcp_is_data_fin(skb) && skb->len == 0)
>> +		*ptr++ = 0; /* subseq */
>> +	else
>> +		*ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* subseq */
>> +
>> +	if (tcb->mptcp_flags & MPTCPHDR_INF)
>> +		data_len = 0;
>> +	else
>> +		data_len = tcb->end_seq - tcb->seq;
>> +
>> +	if (tp->mpcb->dss_csum && data_len) {
>> +		__be16 *p16 = (__be16 *)ptr;
>> +		__be32 hdseq = mptcp_get_highorder_sndbits(skb, tp->mpcb);
>> +		__wsum csum;
>> +
>> +		*ptr = htonl(((data_len) << 16) |
>> +			     (TCPOPT_EOL << 8) |
>> +			     (TCPOPT_EOL));
>> +		csum = csum_partial(ptr - 2, 12, skb->csum);
>> +		p16++;
>> +		*p16++ = csum_fold(csum_partial(&hdseq, sizeof(hdseq), csum));
>> +	} else {
>> +		*ptr++ = htonl(((data_len) << 16) |
>> +			       (TCPOPT_NOP << 8) |
>> +			       (TCPOPT_NOP));
>> +	}
>> +
>> +	return ptr - start;
>> +}
>> +
>>   static void mptcp_save_dss_data_seq(const struct tcp_sock *tp, struct sk_buff *skb)
>>   {
>>   	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>> @@ -424,6 +449,7 @@ static int mptcp_write_dss_data_seq(const struct tcp_sock *tp, struct sk_buff *s
>>   
>>   	return mptcp_dss_len/sizeof(*ptr);
>>   }
>> +#endif
>>   
>>   static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject)
>>   {
>> @@ -469,8 +495,8 @@ static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject)
>>   	if (mptcp_is_data_fin(subskb))
>>   		mptcp_combine_dfin(subskb, meta_sk, sk);
>>   
>> -	mptcp_save_dss_data_seq(tp, subskb);
>> -
>> +	TCP_SKB_CB(subskb)->mptcp_flags |= MPTCPHDR_SEQ;
>> +	tcb->mptcp_data_seq = tcb->seq;
>>   	tcb->seq = tp->write_seq;
>>   
>>   	/* Take into account seg len */
>> @@ -1197,10 +1223,9 @@ void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp,
>>   	}
>>   
>>   	if (OPTION_DATA_ACK & opts->mptcp_options) {
>> -		if (!mptcp_is_data_seq(skb))
>> -			ptr += mptcp_write_dss_data_ack(tp, skb, ptr);
>> -		else
>> -			ptr += mptcp_write_dss_data_seq(tp, skb, ptr);
>> +		ptr += mptcp_write_dss_data_ack(tp, skb, ptr);
>> +		if (mptcp_is_data_seq(skb))
>> +			ptr += mptcp_write_dss_mapping(tp, skb, ptr);
>>   	}
>>   	if (unlikely(OPTION_MP_PRIO & opts->mptcp_options)) {
>>   		struct mp_prio *mpprio = (struct mp_prio *)ptr;
>> -- 
>> 2.7.4
>>
>> _______________________________________________
>> mptcp mailing list
>> mptcp(a)lists.01.org
>> https://lists.01.org/mailman/listinfo/mptcp


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [MPTCP] [PATCH] Revert tcp_skb_cb to its original size
@ 2017-11-13 17:10 Rao Shoaib
  0 siblings, 0 replies; 4+ messages in thread
From: Rao Shoaib @ 2017-11-13 17:10 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 16068 bytes --]



On 11/09/2017 06:55 AM, Rao Shoaib wrote:
>
>
> On 11/08/2017 06:32 PM, Christoph Paasch wrote:
>> Hello Rao,
>>
>> On 08/11/17 - 13:19:49, Shoaib Rao wrote:
>>> Christoph,
>>>
>>> Following is a patch based on branch mptcp_v0.91.
>>> I have looked into the issue that you pointed out. It is same as the 
>>> partial ACK issue. If there is no partial ack everything will work 
>>> and this patch covers all the non partial ack cases.
>>>
>>> For the partial ACK case the fix is very simple. We can send the 
>>> packet out without any mapping and the previous mapping should cover 
>>> it. We can also update the mapping i.e. care a sub mapping.
>> Indeed, you can omit the DSS-option in these segments.
>>
>> One other case is when you get SACK'd bytes. In that case the size of 
>> the
>> segment also gets reduced and the DSS-mapping length will be wrong.
>>
>> Basically, all you need to store in this skb is the original 
>> data_length of
>> the segment. Are there 16 bytes somewhere that we could use?
>>
>>
>> Christoph
> No, all we need is a bit in the skb to mark the skb to not send out 
> the mapping.  I am looking at other options too.
>
> Shoaib

FYI. The issue has been resolved and tested including SACK. A simple way 
to resolve the issue would have been for the receiver to consider the 
data-seq as a hint for the mapping to be applied. Such a mapping would 
have had to exist and cover the sub-flow sequence numbers.


   360  IP 192.168.1.32.34274 > 192.168.1.3.ssh: Flags [P.], seq 
468:504, ack 15441, win 551, options [nop,nop,TS val 56864061 ecr 
3089143032,mptcp dss ack 518271459 seq 1105958085 subseq 3310 len 36 
csum 0xda75], length 36
   361  IP 192.168.1.3.ssh > 192.168.1.32.34274: Flags [.], ack 504, win 
291, options [nop,nop,TS val 3091810840 ecr 56864061,mptcp dss ack 
1105958121], length 0
   362  IP 192.168.1.3.ssh > 192.168.1.32.34274: Flags [P.], seq 
15441:15477, ack 504, win 291, options [nop,nop,TS val 3091810898 ecr 
56864061,mptcp dss ack 1105958121 seq 518271459 subseq 20366 len 36 csum 
0x71cc], length 36
   363  IP 192.168.1.32.34274 > 192.168.1.3.ssh: Flags [.], ack 15462, 
win 551, options [nop,nop,TS val 56864075 ecr 3091810898,mptcp dss ack 
518271459], length 0
   364  IP 192.168.1.32.34274 > 192.168.1.3.ssh: Flags [P.], seq 
504:540, ack 15462, win 551, options [nop,nop,TS val 56864083 ecr 
3091810898,mptcp dss ack 518271459 seq 1105958121 subseq 3346 len 36 
csum 0xf34f], length 36
   365  IP 192.168.1.3.ssh > 192.168.1.32.34274: Flags [.], ack 540, win 
291, options [nop,nop,TS val 3091810960 ecr 56864083,mptcp dss ack 
1105958157], length 0
   366  IP 192.168.1.3.ssh > 192.168.1.32.34274: Flags [P.], seq 
15477:15513, ack 540, win 291, options [nop,nop,TS val 3091810995 ecr 
56864083,mptcp dss ack 1105958157 seq 518271495 subseq 20402 len 36 csum 
0x7184], length 36
   367  IP 192.168.1.32.34274 > 192.168.1.3.ssh: Flags [.], ack 15462, 
win 553, options [nop,nop,TS val 56864099 ecr 3091810898,nop,nop,sack 1 
{15477:15498},mptcp dss ack 518271459], length 0
   368  IP 192.168.1.32.34274 > 192.168.1.3.ssh: Flags [P.], seq 
540:576, ack 15462, win 553, options [nop,nop,TS val 56864117 ecr 
3091810898,mptcp dss ack 518271459 seq 1105958157 subseq 3382 len 36 
csum 0x38c], length 36
   369  IP 192.168.1.3.ssh > 192.168.1.32.34274: Flags [P.], seq 
15477:15513, ack 576, win 291, options [nop,nop,TS val 3091811085 ecr 
56864117,mptcp dss ack 1105958157 seq 518271495 subseq 20402 len 36 csum 
0x7184], length 36
   370  IP 192.168.1.32.34274 > 192.168.1.3.ssh: Flags [.], ack 15462, 
win 553, options [nop,nop,TS val 56864122 ecr 3091810898,nop,nop,sack 2 
{15477:15498}{15477:15498},mptcp dss ack 518271459], length 0
   371  IP 192.168.1.3.ssh > 192.168.1.32.34274: Flags [P.], seq 
15462:15513, ack 576, win 291, options [nop,nop,TS val 3091811321 ecr 
56864122,mptcp dss ack 1105958193 seq 518271480 subseq 20387 len 51 csum 
0x7193], length 51
   372  IP 192.168.1.32.34274 > 192.168.1.3.ssh: Flags [.], ack 15498, 
win 553, options [nop,nop,TS val 56864181 ecr 3091811321,nop,nop,sack 1 
{15477:15498},mptcp dss ack 518271531], length 0
   373  IP 192.168.1.3.ssh > 192.168.1.32.34274: Flags [P.], seq 
15498:15513, ack 576, win 291, options [nop,nop,TS val 3091811809 ecr 
56864181,mptcp dss ack 1105958193 seq 518271516 subseq 20423 len 15 csum 
0x716f], length 15
   374  IP 192.168.1.32.34274 > 192.168.1.3.ssh: Flags [.], ack 15513, 
win 553, options [nop,nop,TS val 56864303 ecr 3091811809,mptcp dss ack 
518271531], length 0

Shoaib

>>
>>> The Receiver code (detect mapping()) is very implementation based. I 
>>> will change it to accept two more mapping, sub mapping that does not 
>>> violate an existing mapping and a super mapping.
>>>
>>> I will provide that patch later. Also note that the fields used in 
>>> this patch are specific to this release.
>>>
>>> Shoaib
>>>
>>> ---
>>>   include/linux/skbuff.h   |  2 +-
>>>   include/net/tcp.h        | 32 +++++++++-------
>>>   net/mptcp/mptcp_output.c | 97 
>>> ++++++++++++++++++++++++++++++------------------
>>>   3 files changed, 80 insertions(+), 51 deletions(-)
>>>
>>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>>> index f66cd5e..ca2e26a 100644
>>> --- a/include/linux/skbuff.h
>>> +++ b/include/linux/skbuff.h
>>> @@ -540,7 +540,7 @@ struct sk_buff {
>>>        * want to keep them across layers you have to do a skb_clone()
>>>        * first. This is owned by whoever has the skb queued ATM.
>>>        */
>>> -    char            cb[56] __aligned(8);
>>> +    char            cb[48] __aligned(8);
>>>         unsigned long        _skb_refdst;
>>>       void            (*destructor)(struct sk_buff *skb);
>>> diff --git a/include/net/tcp.h b/include/net/tcp.h
>>> index 655ecd4..b476e86 100644
>>> --- a/include/net/tcp.h
>>> +++ b/include/net/tcp.h
>>> @@ -842,14 +842,18 @@ struct tcp_skb_cb {
>>>           __u32        tcp_gso_segs;
>>>       };
>>>   -#ifdef CONFIG_MPTCP
>>> -    __u8        mptcp_flags;    /* flags for the MPTCP layer    */
>>> -    __u8        dss_off;    /* Number of 4-byte words until
>>> -                     * seq-number */
>>> -#endif
>>>       __u8        tcp_flags;    /* TCP header flags. (tcp[13])    */
>>>   -    __u8        sacked;        /* State flags for SACK/FACK.    */
>>> +    /* Below union is fine as the skb will use one or the other
>>> +     * The SKB in the rtx queue will set sacked and does not care
>>> +     * about dss_off.
>>> +     * Eventually dss_off will not be needed.
>>> +     */
>>> +
>>> +    union {
>>> +        __u8        sacked;    /* State flags for SACK/FACK.    */
>>> +        __u8            dss_off;
>>> +    };
>>>   #define TCPCB_SACKED_ACKED    0x01    /* SKB ACK'd by a SACK 
>>> block    */
>>>   #define TCPCB_SACKED_RETRANS    0x02    /* SKB 
>>> retransmitted        */
>>>   #define TCPCB_LOST        0x04    /* SKB is lost */
>>> @@ -860,8 +864,14 @@ struct tcp_skb_cb {
>>>                   TCPCB_REPAIRED)
>>>         __u8        ip_dsfield;    /* IPv4 tos or IPv6 dsfield    */
>>> -    /* 1 byte hole */
>>> -    __u32        ack_seq;    /* Sequence number ACK'd    */
>>> +
>>> +    __u8        mptcp_flags;
>>> +    union {
>>> +        __u32        ack_seq; /* Sequence number ACK'd    */
>>> +        __u32        mptcp_data_seq;
>>> +        __u32        path_mask;
>>> +
>>> +    };
>>>       union {
>>>           union {
>>>               struct inet_skb_parm    h4;
>>> @@ -869,12 +879,6 @@ struct tcp_skb_cb {
>>>               struct inet6_skb_parm    h6;
>>>   #endif
>>>           } header;    /* For incoming frames        */
>>> -#ifdef CONFIG_MPTCP
>>> -        union {            /* For MPTCP outgoing frames */
>>> -            __u32 path_mask; /* paths that tried to send this skb */
>>> -            __u32 dss[6];    /* DSS options */
>>> -        };
>>> -#endif
>>>       };
>>>   };
>>>   diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c
>>> index 691ef6f..5318459 100644
>>> --- a/net/mptcp/mptcp_output.c
>>> +++ b/net/mptcp/mptcp_output.c
>>> @@ -57,34 +57,13 @@ EXPORT_SYMBOL(mptcp_sub_len_remove_addr_align);
>>>   /* get the data-seq and end-data-seq and store them again in the
>>>    * tcp_skb_cb
>>>    */
>>> +/* Rao: May need work */
>>>   static bool mptcp_reconstruct_mapping(struct sk_buff *skb)
>>>   {
>>> -    const struct mp_dss *mpdss = (struct mp_dss 
>>> *)TCP_SKB_CB(skb)->dss;
>>> -    u32 *p32;
>>> -    u16 *p16;
>>> -
>>>       if (!mptcp_is_data_seq(skb))
>>>           return false;
>>>   -    if (!mpdss->M)
>>> -        return false;
>>> -
>>> -    /* Move the pointer to the data-seq */
>>> -    p32 = (u32 *)mpdss;
>>> -    p32++;
>>> -    if (mpdss->A) {
>>> -        p32++;
>>> -        if (mpdss->a)
>>> -            p32++;
>>> -    }
>>> -
>>> -    TCP_SKB_CB(skb)->seq = ntohl(*p32);
>>> -
>>> -    /* Get the data_len to calculate the end_data_seq */
>>> -    p32++;
>>> -    p32++;
>>> -    p16 = (u16 *)p32;
>>> -    TCP_SKB_CB(skb)->end_seq = ntohs(*p16) + TCP_SKB_CB(skb)->seq;
>>> +    TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->mptcp_data_seq;
>>>         return true;
>>>   }
>>> @@ -182,7 +161,7 @@ static void __mptcp_reinject_data(struct sk_buff 
>>> *orig_skb, struct sock *meta_sk
>>>       /* Segment goes back to the MPTCP-layer. So, we need to zero the
>>>        * path_mask/dss.
>>>        */
>>> -    memset(TCP_SKB_CB(skb)->dss, 0 , mptcp_dss_len);
>>> +    TCP_SKB_CB(skb)->path_mask = 0;
>>>         /* We need to find out the path-mask from the meta-write-queue
>>>        * to properly select a subflow.
>>> @@ -319,25 +298,30 @@ combine:
>>>       }
>>>   }
>>>   -static int mptcp_write_dss_mapping(const struct tcp_sock *tp, 
>>> const struct sk_buff *skb,
>>> -                   __be32 *ptr)
>>> +
>>> +static int mptcp_write_dss_mapping(const struct tcp_sock *tp,
>>> +                   const struct sk_buff *skb, __be32 *ptr)
>>>   {
>>>       const struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>>>       __be32 *start = ptr;
>>>       __u16 data_len;
>>>   -    *ptr++ = htonl(tcb->seq); /* data_seq */
>>> +    *ptr++ = htonl(tcb->mptcp_data_seq); /* data_seq */
>>>         /* If it's a non-data DATA_FIN, we set subseq to 0 (draft 
>>> v7) */
>>>       if (mptcp_is_data_fin(skb) && skb->len == 0)
>>>           *ptr++ = 0; /* subseq */
>>>       else
>>> -        *ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* 
>>> subseq */
>>> +        *ptr++ = htonl(tcb->seq - tp->mptcp->snt_isn); /* subseq */
>>>   -    if (tcb->mptcp_flags & MPTCPHDR_INF)
>>> +    if (tcb->mptcp_flags & MPTCPHDR_INF) {
>>>           data_len = 0;
>>> -    else
>>> +    } else {
>>>           data_len = tcb->end_seq - tcb->seq;
>>> +        /* mptcp_entail_skb adds one for FIN */
>>> +        if (tcb->tcp_flags & TCPHDR_FIN)
>>> +            data_len -= 1;
>>> +    }
>>>         if (tp->mpcb->dss_csum && data_len) {
>>>           __be16 *p16 = (__be16 *)ptr;
>>> @@ -395,6 +379,47 @@ static int mptcp_write_dss_data_ack(const 
>>> struct tcp_sock *tp, const struct sk_b
>>>    * To avoid this we save the initial DSS mapping which allows us to
>>>    * send the same DSS mapping even for fragmented retransmits.
>>>    */
>>> +#if 0
>>> +static int mptcp_write_dss_mapping(const struct tcp_sock *tp, const 
>>> struct sk_buff *skb,
>>> +                   __be32 *ptr)
>>> +{
>>> +    const struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>>> +    __be32 *start = ptr;
>>> +    __u16 data_len;
>>> +
>>> +    *ptr++ = htonl(tcb->seq); /* data_seq */
>>> +
>>> +    /* If it's a non-data DATA_FIN, we set subseq to 0 (draft v7) */
>>> +    if (mptcp_is_data_fin(skb) && skb->len == 0)
>>> +        *ptr++ = 0; /* subseq */
>>> +    else
>>> +        *ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* 
>>> subseq */
>>> +
>>> +    if (tcb->mptcp_flags & MPTCPHDR_INF)
>>> +        data_len = 0;
>>> +    else
>>> +        data_len = tcb->end_seq - tcb->seq;
>>> +
>>> +    if (tp->mpcb->dss_csum && data_len) {
>>> +        __be16 *p16 = (__be16 *)ptr;
>>> +        __be32 hdseq = mptcp_get_highorder_sndbits(skb, tp->mpcb);
>>> +        __wsum csum;
>>> +
>>> +        *ptr = htonl(((data_len) << 16) |
>>> +                 (TCPOPT_EOL << 8) |
>>> +                 (TCPOPT_EOL));
>>> +        csum = csum_partial(ptr - 2, 12, skb->csum);
>>> +        p16++;
>>> +        *p16++ = csum_fold(csum_partial(&hdseq, sizeof(hdseq), csum));
>>> +    } else {
>>> +        *ptr++ = htonl(((data_len) << 16) |
>>> +                   (TCPOPT_NOP << 8) |
>>> +                   (TCPOPT_NOP));
>>> +    }
>>> +
>>> +    return ptr - start;
>>> +}
>>> +
>>>   static void mptcp_save_dss_data_seq(const struct tcp_sock *tp, 
>>> struct sk_buff *skb)
>>>   {
>>>       struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>>> @@ -424,6 +449,7 @@ static int mptcp_write_dss_data_seq(const struct 
>>> tcp_sock *tp, struct sk_buff *s
>>>         return mptcp_dss_len/sizeof(*ptr);
>>>   }
>>> +#endif
>>>     static bool mptcp_skb_entail(struct sock *sk, struct sk_buff 
>>> *skb, int reinject)
>>>   {
>>> @@ -469,8 +495,8 @@ static bool mptcp_skb_entail(struct sock *sk, 
>>> struct sk_buff *skb, int reinject)
>>>       if (mptcp_is_data_fin(subskb))
>>>           mptcp_combine_dfin(subskb, meta_sk, sk);
>>>   -    mptcp_save_dss_data_seq(tp, subskb);
>>> -
>>> +    TCP_SKB_CB(subskb)->mptcp_flags |= MPTCPHDR_SEQ;
>>> +    tcb->mptcp_data_seq = tcb->seq;
>>>       tcb->seq = tp->write_seq;
>>>         /* Take into account seg len */
>>> @@ -1197,10 +1223,9 @@ void mptcp_options_write(__be32 *ptr, struct 
>>> tcp_sock *tp,
>>>       }
>>>         if (OPTION_DATA_ACK & opts->mptcp_options) {
>>> -        if (!mptcp_is_data_seq(skb))
>>> -            ptr += mptcp_write_dss_data_ack(tp, skb, ptr);
>>> -        else
>>> -            ptr += mptcp_write_dss_data_seq(tp, skb, ptr);
>>> +        ptr += mptcp_write_dss_data_ack(tp, skb, ptr);
>>> +        if (mptcp_is_data_seq(skb))
>>> +            ptr += mptcp_write_dss_mapping(tp, skb, ptr);
>>>       }
>>>       if (unlikely(OPTION_MP_PRIO & opts->mptcp_options)) {
>>>           struct mp_prio *mpprio = (struct mp_prio *)ptr;
>>> -- 
>>> 2.7.4
>>>
>>> _______________________________________________
>>> mptcp mailing list
>>> mptcp(a)lists.01.org
>>> https://lists.01.org/mailman/listinfo/mptcp
>
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [MPTCP] [PATCH] Revert tcp_skb_cb to its original size
@ 2017-11-09  2:32 Christoph Paasch
  0 siblings, 0 replies; 4+ messages in thread
From: Christoph Paasch @ 2017-11-09  2:32 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 9415 bytes --]

Hello Rao,

On 08/11/17 - 13:19:49, Shoaib Rao wrote:
> Christoph,
> 
> Following is a patch based on branch mptcp_v0.91.
> I have looked into the issue that you pointed out. It is same as the partial ACK issue. If there is no partial ack everything will work and this patch covers all the non partial ack cases.
> 
> For the partial ACK case the fix is very simple. We can send the packet out without any mapping and the previous mapping should cover it. We can also update the mapping i.e. care a sub mapping.

Indeed, you can omit the DSS-option in these segments.

One other case is when you get SACK'd bytes. In that case the size of the
segment also gets reduced and the DSS-mapping length will be wrong.

Basically, all you need to store in this skb is the original data_length of
the segment. Are there 16 bytes somewhere that we could use?


Christoph

> The Receiver code (detect mapping()) is very implementation based. I will change it to accept two more mapping, sub mapping that does not violate an existing mapping and a super mapping.
> 
> I will provide that patch later. Also note that the fields used in this patch are specific to this release.
> 
> Shoaib
> 
> ---
>  include/linux/skbuff.h   |  2 +-
>  include/net/tcp.h        | 32 +++++++++-------
>  net/mptcp/mptcp_output.c | 97 ++++++++++++++++++++++++++++++------------------
>  3 files changed, 80 insertions(+), 51 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index f66cd5e..ca2e26a 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -540,7 +540,7 @@ struct sk_buff {
>  	 * want to keep them across layers you have to do a skb_clone()
>  	 * first. This is owned by whoever has the skb queued ATM.
>  	 */
> -	char			cb[56] __aligned(8);
> +	char			cb[48] __aligned(8);
>  
>  	unsigned long		_skb_refdst;
>  	void			(*destructor)(struct sk_buff *skb);
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 655ecd4..b476e86 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -842,14 +842,18 @@ struct tcp_skb_cb {
>  		__u32		tcp_gso_segs;
>  	};
>  
> -#ifdef CONFIG_MPTCP
> -	__u8		mptcp_flags;	/* flags for the MPTCP layer    */
> -	__u8		dss_off;	/* Number of 4-byte words until
> -					 * seq-number */
> -#endif
>  	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/
>  
> -	__u8		sacked;		/* State flags for SACK/FACK.	*/
> +	/* Below union is fine as the skb will use one or the other
> +	 * The SKB in the rtx queue will set sacked and does not care
> +	 * about dss_off.
> +	 * Eventually dss_off will not be needed.
> +	 */
> +	 
> +	union {
> +		__u8		sacked;	/* State flags for SACK/FACK.	*/
> +		__u8            dss_off;
> +	};
>  #define TCPCB_SACKED_ACKED	0x01	/* SKB ACK'd by a SACK block	*/
>  #define TCPCB_SACKED_RETRANS	0x02	/* SKB retransmitted		*/
>  #define TCPCB_LOST		0x04	/* SKB is lost			*/
> @@ -860,8 +864,14 @@ struct tcp_skb_cb {
>  				TCPCB_REPAIRED)
>  
>  	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
> -	/* 1 byte hole */
> -	__u32		ack_seq;	/* Sequence number ACK'd	*/
> +
> +	__u8		mptcp_flags;
> +	union {
> +		__u32		ack_seq; /* Sequence number ACK'd	*/
> +		__u32		mptcp_data_seq;
> +		__u32		path_mask;
> +		
> +	};
>  	union {
>  		union {
>  			struct inet_skb_parm	h4;
> @@ -869,12 +879,6 @@ struct tcp_skb_cb {
>  			struct inet6_skb_parm	h6;
>  #endif
>  		} header;	/* For incoming frames		*/
> -#ifdef CONFIG_MPTCP
> -		union {			/* For MPTCP outgoing frames */
> -			__u32 path_mask; /* paths that tried to send this skb */
> -			__u32 dss[6];	/* DSS options */
> -		};
> -#endif
>  	};
>  };
>  
> diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c
> index 691ef6f..5318459 100644
> --- a/net/mptcp/mptcp_output.c
> +++ b/net/mptcp/mptcp_output.c
> @@ -57,34 +57,13 @@ EXPORT_SYMBOL(mptcp_sub_len_remove_addr_align);
>  /* get the data-seq and end-data-seq and store them again in the
>   * tcp_skb_cb
>   */
> +/* Rao: May need work */
>  static bool mptcp_reconstruct_mapping(struct sk_buff *skb)
>  {
> -	const struct mp_dss *mpdss = (struct mp_dss *)TCP_SKB_CB(skb)->dss;
> -	u32 *p32;
> -	u16 *p16;
> -
>  	if (!mptcp_is_data_seq(skb))
>  		return false;
>  
> -	if (!mpdss->M)
> -		return false;
> -
> -	/* Move the pointer to the data-seq */
> -	p32 = (u32 *)mpdss;
> -	p32++;
> -	if (mpdss->A) {
> -		p32++;
> -		if (mpdss->a)
> -			p32++;
> -	}
> -
> -	TCP_SKB_CB(skb)->seq = ntohl(*p32);
> -
> -	/* Get the data_len to calculate the end_data_seq */
> -	p32++;
> -	p32++;
> -	p16 = (u16 *)p32;
> -	TCP_SKB_CB(skb)->end_seq = ntohs(*p16) + TCP_SKB_CB(skb)->seq;
> +	TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->mptcp_data_seq;
>  
>  	return true;
>  }
> @@ -182,7 +161,7 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk
>  	/* Segment goes back to the MPTCP-layer. So, we need to zero the
>  	 * path_mask/dss.
>  	 */
> -	memset(TCP_SKB_CB(skb)->dss, 0 , mptcp_dss_len);
> +	TCP_SKB_CB(skb)->path_mask = 0;
>  
>  	/* We need to find out the path-mask from the meta-write-queue
>  	 * to properly select a subflow.
> @@ -319,25 +298,30 @@ combine:
>  	}
>  }
>  
> -static int mptcp_write_dss_mapping(const struct tcp_sock *tp, const struct sk_buff *skb,
> -				   __be32 *ptr)
> +
> +static int mptcp_write_dss_mapping(const struct tcp_sock *tp,
> +				   const struct sk_buff *skb, __be32 *ptr)
>  {
>  	const struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>  	__be32 *start = ptr;
>  	__u16 data_len;
>  
> -	*ptr++ = htonl(tcb->seq); /* data_seq */
> +	*ptr++ = htonl(tcb->mptcp_data_seq); /* data_seq */
>  
>  	/* If it's a non-data DATA_FIN, we set subseq to 0 (draft v7) */
>  	if (mptcp_is_data_fin(skb) && skb->len == 0)
>  		*ptr++ = 0; /* subseq */
>  	else
> -		*ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* subseq */
> +		*ptr++ = htonl(tcb->seq - tp->mptcp->snt_isn); /* subseq */
>  
> -	if (tcb->mptcp_flags & MPTCPHDR_INF)
> +	if (tcb->mptcp_flags & MPTCPHDR_INF) {
>  		data_len = 0;
> -	else
> +	} else {
>  		data_len = tcb->end_seq - tcb->seq;
> +		/* mptcp_entail_skb adds one for FIN */
> +		if (tcb->tcp_flags & TCPHDR_FIN)
> +			data_len -= 1;
> +	}
>  
>  	if (tp->mpcb->dss_csum && data_len) {
>  		__be16 *p16 = (__be16 *)ptr;
> @@ -395,6 +379,47 @@ static int mptcp_write_dss_data_ack(const struct tcp_sock *tp, const struct sk_b
>   * To avoid this we save the initial DSS mapping which allows us to
>   * send the same DSS mapping even for fragmented retransmits.
>   */
> +#if 0
> +static int mptcp_write_dss_mapping(const struct tcp_sock *tp, const struct sk_buff *skb,
> +				   __be32 *ptr)
> +{
> +	const struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
> +	__be32 *start = ptr;
> +	__u16 data_len;
> +
> +	*ptr++ = htonl(tcb->seq); /* data_seq */
> +
> +	/* If it's a non-data DATA_FIN, we set subseq to 0 (draft v7) */
> +	if (mptcp_is_data_fin(skb) && skb->len == 0)
> +		*ptr++ = 0; /* subseq */
> +	else
> +		*ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* subseq */
> +
> +	if (tcb->mptcp_flags & MPTCPHDR_INF)
> +		data_len = 0;
> +	else
> +		data_len = tcb->end_seq - tcb->seq;
> +
> +	if (tp->mpcb->dss_csum && data_len) {
> +		__be16 *p16 = (__be16 *)ptr;
> +		__be32 hdseq = mptcp_get_highorder_sndbits(skb, tp->mpcb);
> +		__wsum csum;
> +
> +		*ptr = htonl(((data_len) << 16) |
> +			     (TCPOPT_EOL << 8) |
> +			     (TCPOPT_EOL));
> +		csum = csum_partial(ptr - 2, 12, skb->csum);
> +		p16++;
> +		*p16++ = csum_fold(csum_partial(&hdseq, sizeof(hdseq), csum));
> +	} else {
> +		*ptr++ = htonl(((data_len) << 16) |
> +			       (TCPOPT_NOP << 8) |
> +			       (TCPOPT_NOP));
> +	}
> +
> +	return ptr - start;
> +}
> +
>  static void mptcp_save_dss_data_seq(const struct tcp_sock *tp, struct sk_buff *skb)
>  {
>  	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
> @@ -424,6 +449,7 @@ static int mptcp_write_dss_data_seq(const struct tcp_sock *tp, struct sk_buff *s
>  
>  	return mptcp_dss_len/sizeof(*ptr);
>  }
> +#endif
>  
>  static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject)
>  {
> @@ -469,8 +495,8 @@ static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject)
>  	if (mptcp_is_data_fin(subskb))
>  		mptcp_combine_dfin(subskb, meta_sk, sk);
>  
> -	mptcp_save_dss_data_seq(tp, subskb);
> -
> +	TCP_SKB_CB(subskb)->mptcp_flags |= MPTCPHDR_SEQ;
> +	tcb->mptcp_data_seq = tcb->seq;
>  	tcb->seq = tp->write_seq;
>  
>  	/* Take into account seg len */
> @@ -1197,10 +1223,9 @@ void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp,
>  	}
>  
>  	if (OPTION_DATA_ACK & opts->mptcp_options) {
> -		if (!mptcp_is_data_seq(skb))
> -			ptr += mptcp_write_dss_data_ack(tp, skb, ptr);
> -		else
> -			ptr += mptcp_write_dss_data_seq(tp, skb, ptr);
> +		ptr += mptcp_write_dss_data_ack(tp, skb, ptr);
> +		if (mptcp_is_data_seq(skb))
> +			ptr += mptcp_write_dss_mapping(tp, skb, ptr);
>  	}
>  	if (unlikely(OPTION_MP_PRIO & opts->mptcp_options)) {
>  		struct mp_prio *mpprio = (struct mp_prio *)ptr;
> -- 
> 2.7.4
> 
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [MPTCP] [PATCH] Revert tcp_skb_cb to its original size
@ 2017-11-08 21:19 Shoaib Rao
  0 siblings, 0 replies; 4+ messages in thread
From: Shoaib Rao @ 2017-11-08 21:19 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 8332 bytes --]

Christoph,

Following is a patch based on branch mptcp_v0.91.
I have looked into the issue that you pointed out. It is same as the partial ACK issue. If there is no partial ack everything will work and this patch covers all the non partial ack cases.

For the partial ACK case the fix is very simple. We can send the packet out without any mapping and the previous mapping should cover it. We can also update the mapping i.e. care a sub mapping.

The Receiver code (detect mapping()) is very implementation based. I will change it to accept two more mapping, sub mapping that does not violate an existing mapping and a super mapping.

I will provide that patch later. Also note that the fields used in this patch are specific to this release.

Shoaib

---
 include/linux/skbuff.h   |  2 +-
 include/net/tcp.h        | 32 +++++++++-------
 net/mptcp/mptcp_output.c | 97 ++++++++++++++++++++++++++++++------------------
 3 files changed, 80 insertions(+), 51 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f66cd5e..ca2e26a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -540,7 +540,7 @@ struct sk_buff {
 	 * want to keep them across layers you have to do a skb_clone()
 	 * first. This is owned by whoever has the skb queued ATM.
 	 */
-	char			cb[56] __aligned(8);
+	char			cb[48] __aligned(8);
 
 	unsigned long		_skb_refdst;
 	void			(*destructor)(struct sk_buff *skb);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 655ecd4..b476e86 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -842,14 +842,18 @@ struct tcp_skb_cb {
 		__u32		tcp_gso_segs;
 	};
 
-#ifdef CONFIG_MPTCP
-	__u8		mptcp_flags;	/* flags for the MPTCP layer    */
-	__u8		dss_off;	/* Number of 4-byte words until
-					 * seq-number */
-#endif
 	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/
 
-	__u8		sacked;		/* State flags for SACK/FACK.	*/
+	/* Below union is fine as the skb will use one or the other
+	 * The SKB in the rtx queue will set sacked and does not care
+	 * about dss_off.
+	 * Eventually dss_off will not be needed.
+	 */
+	 
+	union {
+		__u8		sacked;	/* State flags for SACK/FACK.	*/
+		__u8            dss_off;
+	};
 #define TCPCB_SACKED_ACKED	0x01	/* SKB ACK'd by a SACK block	*/
 #define TCPCB_SACKED_RETRANS	0x02	/* SKB retransmitted		*/
 #define TCPCB_LOST		0x04	/* SKB is lost			*/
@@ -860,8 +864,14 @@ struct tcp_skb_cb {
 				TCPCB_REPAIRED)
 
 	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
-	/* 1 byte hole */
-	__u32		ack_seq;	/* Sequence number ACK'd	*/
+
+	__u8		mptcp_flags;
+	union {
+		__u32		ack_seq; /* Sequence number ACK'd	*/
+		__u32		mptcp_data_seq;
+		__u32		path_mask;
+		
+	};
 	union {
 		union {
 			struct inet_skb_parm	h4;
@@ -869,12 +879,6 @@ struct tcp_skb_cb {
 			struct inet6_skb_parm	h6;
 #endif
 		} header;	/* For incoming frames		*/
-#ifdef CONFIG_MPTCP
-		union {			/* For MPTCP outgoing frames */
-			__u32 path_mask; /* paths that tried to send this skb */
-			__u32 dss[6];	/* DSS options */
-		};
-#endif
 	};
 };
 
diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c
index 691ef6f..5318459 100644
--- a/net/mptcp/mptcp_output.c
+++ b/net/mptcp/mptcp_output.c
@@ -57,34 +57,13 @@ EXPORT_SYMBOL(mptcp_sub_len_remove_addr_align);
 /* get the data-seq and end-data-seq and store them again in the
  * tcp_skb_cb
  */
+/* Rao: May need work */
 static bool mptcp_reconstruct_mapping(struct sk_buff *skb)
 {
-	const struct mp_dss *mpdss = (struct mp_dss *)TCP_SKB_CB(skb)->dss;
-	u32 *p32;
-	u16 *p16;
-
 	if (!mptcp_is_data_seq(skb))
 		return false;
 
-	if (!mpdss->M)
-		return false;
-
-	/* Move the pointer to the data-seq */
-	p32 = (u32 *)mpdss;
-	p32++;
-	if (mpdss->A) {
-		p32++;
-		if (mpdss->a)
-			p32++;
-	}
-
-	TCP_SKB_CB(skb)->seq = ntohl(*p32);
-
-	/* Get the data_len to calculate the end_data_seq */
-	p32++;
-	p32++;
-	p16 = (u16 *)p32;
-	TCP_SKB_CB(skb)->end_seq = ntohs(*p16) + TCP_SKB_CB(skb)->seq;
+	TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->mptcp_data_seq;
 
 	return true;
 }
@@ -182,7 +161,7 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk
 	/* Segment goes back to the MPTCP-layer. So, we need to zero the
 	 * path_mask/dss.
 	 */
-	memset(TCP_SKB_CB(skb)->dss, 0 , mptcp_dss_len);
+	TCP_SKB_CB(skb)->path_mask = 0;
 
 	/* We need to find out the path-mask from the meta-write-queue
 	 * to properly select a subflow.
@@ -319,25 +298,30 @@ combine:
 	}
 }
 
-static int mptcp_write_dss_mapping(const struct tcp_sock *tp, const struct sk_buff *skb,
-				   __be32 *ptr)
+
+static int mptcp_write_dss_mapping(const struct tcp_sock *tp,
+				   const struct sk_buff *skb, __be32 *ptr)
 {
 	const struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 	__be32 *start = ptr;
 	__u16 data_len;
 
-	*ptr++ = htonl(tcb->seq); /* data_seq */
+	*ptr++ = htonl(tcb->mptcp_data_seq); /* data_seq */
 
 	/* If it's a non-data DATA_FIN, we set subseq to 0 (draft v7) */
 	if (mptcp_is_data_fin(skb) && skb->len == 0)
 		*ptr++ = 0; /* subseq */
 	else
-		*ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* subseq */
+		*ptr++ = htonl(tcb->seq - tp->mptcp->snt_isn); /* subseq */
 
-	if (tcb->mptcp_flags & MPTCPHDR_INF)
+	if (tcb->mptcp_flags & MPTCPHDR_INF) {
 		data_len = 0;
-	else
+	} else {
 		data_len = tcb->end_seq - tcb->seq;
+		/* mptcp_entail_skb adds one for FIN */
+		if (tcb->tcp_flags & TCPHDR_FIN)
+			data_len -= 1;
+	}
 
 	if (tp->mpcb->dss_csum && data_len) {
 		__be16 *p16 = (__be16 *)ptr;
@@ -395,6 +379,47 @@ static int mptcp_write_dss_data_ack(const struct tcp_sock *tp, const struct sk_b
  * To avoid this we save the initial DSS mapping which allows us to
  * send the same DSS mapping even for fragmented retransmits.
  */
+#if 0
+static int mptcp_write_dss_mapping(const struct tcp_sock *tp, const struct sk_buff *skb,
+				   __be32 *ptr)
+{
+	const struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+	__be32 *start = ptr;
+	__u16 data_len;
+
+	*ptr++ = htonl(tcb->seq); /* data_seq */
+
+	/* If it's a non-data DATA_FIN, we set subseq to 0 (draft v7) */
+	if (mptcp_is_data_fin(skb) && skb->len == 0)
+		*ptr++ = 0; /* subseq */
+	else
+		*ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* subseq */
+
+	if (tcb->mptcp_flags & MPTCPHDR_INF)
+		data_len = 0;
+	else
+		data_len = tcb->end_seq - tcb->seq;
+
+	if (tp->mpcb->dss_csum && data_len) {
+		__be16 *p16 = (__be16 *)ptr;
+		__be32 hdseq = mptcp_get_highorder_sndbits(skb, tp->mpcb);
+		__wsum csum;
+
+		*ptr = htonl(((data_len) << 16) |
+			     (TCPOPT_EOL << 8) |
+			     (TCPOPT_EOL));
+		csum = csum_partial(ptr - 2, 12, skb->csum);
+		p16++;
+		*p16++ = csum_fold(csum_partial(&hdseq, sizeof(hdseq), csum));
+	} else {
+		*ptr++ = htonl(((data_len) << 16) |
+			       (TCPOPT_NOP << 8) |
+			       (TCPOPT_NOP));
+	}
+
+	return ptr - start;
+}
+
 static void mptcp_save_dss_data_seq(const struct tcp_sock *tp, struct sk_buff *skb)
 {
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -424,6 +449,7 @@ static int mptcp_write_dss_data_seq(const struct tcp_sock *tp, struct sk_buff *s
 
 	return mptcp_dss_len/sizeof(*ptr);
 }
+#endif
 
 static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject)
 {
@@ -469,8 +495,8 @@ static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject)
 	if (mptcp_is_data_fin(subskb))
 		mptcp_combine_dfin(subskb, meta_sk, sk);
 
-	mptcp_save_dss_data_seq(tp, subskb);
-
+	TCP_SKB_CB(subskb)->mptcp_flags |= MPTCPHDR_SEQ;
+	tcb->mptcp_data_seq = tcb->seq;
 	tcb->seq = tp->write_seq;
 
 	/* Take into account seg len */
@@ -1197,10 +1223,9 @@ void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 	}
 
 	if (OPTION_DATA_ACK & opts->mptcp_options) {
-		if (!mptcp_is_data_seq(skb))
-			ptr += mptcp_write_dss_data_ack(tp, skb, ptr);
-		else
-			ptr += mptcp_write_dss_data_seq(tp, skb, ptr);
+		ptr += mptcp_write_dss_data_ack(tp, skb, ptr);
+		if (mptcp_is_data_seq(skb))
+			ptr += mptcp_write_dss_mapping(tp, skb, ptr);
 	}
 	if (unlikely(OPTION_MP_PRIO & opts->mptcp_options)) {
 		struct mp_prio *mpprio = (struct mp_prio *)ptr;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-11-13 17:10 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-09 14:55 [MPTCP] [PATCH] Revert tcp_skb_cb to its original size Rao Shoaib
  -- strict thread matches above, loose matches on Subject: below --
2017-11-13 17:10 Rao Shoaib
2017-11-09  2:32 Christoph Paasch
2017-11-08 21:19 Shoaib Rao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.