All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] ebpf: fix bpf_msg_pull_data
@ 2018-08-30  0:07 Tushar Dave
  2018-08-30  0:21 ` Tushar Dave
  0 siblings, 1 reply; 6+ messages in thread
From: Tushar Dave @ 2018-08-30  0:07 UTC (permalink / raw)
  To: john.fastabend, ast, daniel, davem, netdev; +Cc: sowmini.varadhan

While doing some preliminary testing it is found that bpf helper
bpf_msg_pull_data does not calculate the data and data_end offset
correctly. Fix it!

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
 net/core/filter.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index c25eb36..3eeb3d6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2285,7 +2285,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
 BPF_CALL_4(bpf_msg_pull_data,
 	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
 {
-	unsigned int len = 0, offset = 0, copy = 0;
+	unsigned int len = 0, offset = 0, copy = 0, off = 0;
 	struct scatterlist *sg = msg->sg_data;
 	int first_sg, last_sg, i, shift;
 	unsigned char *p, *to, *from;
@@ -2299,22 +2299,30 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
 	i = msg->sg_start;
 	do {
 		len = sg[i].length;
-		offset += len;
 		if (start < offset + len)
 			break;
+		offset += len;
 		i++;
 		if (i == MAX_SKB_FRAGS)
 			i = 0;
-	} while (i != msg->sg_end);
+	} while (i <= msg->sg_end);
 
+	/* return error if start is out of range */
 	if (unlikely(start >= offset + len))
 		return -EINVAL;
 
-	if (!msg->sg_copy[i] && bytes <= len)
-		goto out;
+	/* return error if i is last entry in sglist and end is out of range */
+	if (msg->sg_copy[i] && end > offset + len)
+		return -EINVAL;
 
 	first_sg = i;
 
+	/* if i is not last entry in sg list and end (i.e start + bytes) is
+	 * within this sg[i] then goto out and calculate data and data_end
+	 */
+	if (!msg->sg_copy[i] && end <= offset + len)
+		goto out;
+
 	/* At this point we need to linearize multiple scatterlist
 	 * elements or a single shared page. Either way we need to
 	 * copy into a linear buffer exclusively owned by BPF. Then
@@ -2330,9 +2338,14 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
 		i++;
 		if (i == MAX_SKB_FRAGS)
 			i = 0;
-		if (bytes < copy)
+		if (end < copy)
 			break;
-	} while (i != msg->sg_end);
+	} while (i <= msg->sg_end);
+
+	/* return error if i is last entry in sglist and end is out of range */
+	if (i > msg->sg_end && end > offset + copy)
+		return -EINVAL;
+
 	last_sg = i;
 
 	if (unlikely(copy < end - start))
@@ -2342,23 +2355,22 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
 	if (unlikely(!page))
 		return -ENOMEM;
 	p = page_address(page);
-	offset = 0;
 
 	i = first_sg;
 	do {
 		from = sg_virt(&sg[i]);
 		len = sg[i].length;
-		to = p + offset;
+		to = p + off;
 
 		memcpy(to, from, len);
-		offset += len;
+		off += len;
 		sg[i].length = 0;
 		put_page(sg_page(&sg[i]));
 
 		i++;
 		if (i == MAX_SKB_FRAGS)
 			i = 0;
-	} while (i != last_sg);
+	} while (i < last_sg);
 
 	sg[first_sg].length = copy;
 	sg_set_page(&sg[first_sg], page, copy, 0);
@@ -2380,7 +2392,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
 		else
 			move_from = i + shift;
 
-		if (move_from == msg->sg_end)
+		if (move_from > msg->sg_end)
 			break;
 
 		sg[i] = sg[move_from];
@@ -2396,7 +2408,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
 	if (msg->sg_end < 0)
 		msg->sg_end += MAX_SKB_FRAGS;
 out:
-	msg->data = sg_virt(&sg[i]) + start - offset;
+	msg->data = sg_virt(&sg[first_sg]) + start - offset;
 	msg->data_end = msg->data + bytes;
 
 	return 0;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net] ebpf: fix bpf_msg_pull_data
  2018-08-30  0:07 [PATCH net] ebpf: fix bpf_msg_pull_data Tushar Dave
@ 2018-08-30  0:21 ` Tushar Dave
  2018-08-30  7:20   ` Daniel Borkmann
  0 siblings, 1 reply; 6+ messages in thread
From: Tushar Dave @ 2018-08-30  0:21 UTC (permalink / raw)
  To: john.fastabend, ast, daniel, davem, netdev; +Cc: sowmini.varadhan



On 08/29/2018 05:07 PM, Tushar Dave wrote:
> While doing some preliminary testing it is found that bpf helper
> bpf_msg_pull_data does not calculate the data and data_end offset
> correctly. Fix it!
> 
> Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
> Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
> Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
> ---
>   net/core/filter.c | 38 +++++++++++++++++++++++++-------------
>   1 file changed, 25 insertions(+), 13 deletions(-)
> 
> diff --git a/net/core/filter.c b/net/core/filter.c
> index c25eb36..3eeb3d6 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2285,7 +2285,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>   BPF_CALL_4(bpf_msg_pull_data,
>   	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
>   {
> -	unsigned int len = 0, offset = 0, copy = 0;
> +	unsigned int len = 0, offset = 0, copy = 0, off = 0;
>   	struct scatterlist *sg = msg->sg_data;
>   	int first_sg, last_sg, i, shift;
>   	unsigned char *p, *to, *from;
> @@ -2299,22 +2299,30 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>   	i = msg->sg_start;
>   	do {
>   		len = sg[i].length;
> -		offset += len;
>   		if (start < offset + len)
>   			break;
> +		offset += len;
>   		i++;
>   		if (i == MAX_SKB_FRAGS)
>   			i = 0;
> -	} while (i != msg->sg_end);
> +	} while (i <= msg->sg_end);
>   
> +	/* return error if start is out of range */
>   	if (unlikely(start >= offset + len))
>   		return -EINVAL;
>   
> -	if (!msg->sg_copy[i] && bytes <= len)
> -		goto out;
> +	/* return error if i is last entry in sglist and end is out of range */
> +	if (msg->sg_copy[i] && end > offset + len)
> +		return -EINVAL;
>   
>   	first_sg = i;
>   
> +	/* if i is not last entry in sg list and end (i.e start + bytes) is
> +	 * within this sg[i] then goto out and calculate data and data_end
> +	 */
> +	if (!msg->sg_copy[i] && end <= offset + len)
> +		goto out;
> +
>   	/* At this point we need to linearize multiple scatterlist
>   	 * elements or a single shared page. Either way we need to
>   	 * copy into a linear buffer exclusively owned by BPF. Then
> @@ -2330,9 +2338,14 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>   		i++;
>   		if (i == MAX_SKB_FRAGS)
>   			i = 0;
> -		if (bytes < copy)
> +		if (end < copy)
>   			break;
> -	} while (i != msg->sg_end);
> +	} while (i <= msg->sg_end);
> +
> +	/* return error if i is last entry in sglist and end is out of range */
> +	if (i > msg->sg_end && end > offset + copy)
> +		return -EINVAL;
> +
>   	last_sg = i;
>   
>   	if (unlikely(copy < end - start))
> @@ -2342,23 +2355,22 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>   	if (unlikely(!page))
>   		return -ENOMEM;
>   	p = page_address(page);
> -	offset = 0;
>   
>   	i = first_sg;
>   	do {
>   		from = sg_virt(&sg[i]);
>   		len = sg[i].length;
> -		to = p + offset;
> +		to = p + off;
>   
>   		memcpy(to, from, len);
> -		offset += len;
> +		off += len;
>   		sg[i].length = 0;
>   		put_page(sg_page(&sg[i]));
>   
>   		i++;
>   		if (i == MAX_SKB_FRAGS)
>   			i = 0;
> -	} while (i != last_sg);
> +	} while (i < last_sg);
>   
>   	sg[first_sg].length = copy;
>   	sg_set_page(&sg[first_sg], page, copy, 0);
> @@ -2380,7 +2392,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>   		else
>   			move_from = i + shift;
>   
> -		if (move_from == msg->sg_end)
> +		if (move_from > msg->sg_end)
>   			break;
>   
>   		sg[i] = sg[move_from];
> @@ -2396,7 +2408,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>   	if (msg->sg_end < 0)
>   		msg->sg_end += MAX_SKB_FRAGS;
>   out:
> -	msg->data = sg_virt(&sg[i]) + start - offset;
> +	msg->data = sg_virt(&sg[first_sg]) + start - offset;
>   	msg->data_end = msg->data + bytes;
>   
>   	return 0;
> 

Please discard this patch. I just noticed that Daniel Borkmann sent 
some similar fixes for bpf_msg_pull_data.


-Tushar

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] ebpf: fix bpf_msg_pull_data
  2018-08-30  0:21 ` Tushar Dave
@ 2018-08-30  7:20   ` Daniel Borkmann
  2018-08-31  8:37     ` Tushar Dave
  0 siblings, 1 reply; 6+ messages in thread
From: Daniel Borkmann @ 2018-08-30  7:20 UTC (permalink / raw)
  To: Tushar Dave, john.fastabend, ast, davem, netdev; +Cc: sowmini.varadhan

On 08/30/2018 02:21 AM, Tushar Dave wrote:
> On 08/29/2018 05:07 PM, Tushar Dave wrote:
>> While doing some preliminary testing it is found that bpf helper
>> bpf_msg_pull_data does not calculate the data and data_end offset
>> correctly. Fix it!
>>
>> Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
>> Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
>> Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
>> ---
>>   net/core/filter.c | 38 +++++++++++++++++++++++++-------------
>>   1 file changed, 25 insertions(+), 13 deletions(-)
>>
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index c25eb36..3eeb3d6 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -2285,7 +2285,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>   BPF_CALL_4(bpf_msg_pull_data,
>>          struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
>>   {
>> -    unsigned int len = 0, offset = 0, copy = 0;
>> +    unsigned int len = 0, offset = 0, copy = 0, off = 0;
>>       struct scatterlist *sg = msg->sg_data;
>>       int first_sg, last_sg, i, shift;
>>       unsigned char *p, *to, *from;
>> @@ -2299,22 +2299,30 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>       i = msg->sg_start;
>>       do {
>>           len = sg[i].length;
>> -        offset += len;
>>           if (start < offset + len)
>>               break;
>> +        offset += len;
>>           i++;
>>           if (i == MAX_SKB_FRAGS)
>>               i = 0;
>> -    } while (i != msg->sg_end);
>> +    } while (i <= msg->sg_end);

I don't think this condition is correct, msg operates as a scatterlist ring,
so sg_end may very well be < current i when there's a wrap-around in the
traversal ... more below.

>>   +    /* return error if start is out of range */
>>       if (unlikely(start >= offset + len))
>>           return -EINVAL;
>>   -    if (!msg->sg_copy[i] && bytes <= len)
>> -        goto out;
>> +    /* return error if i is last entry in sglist and end is out of range */
>> +    if (msg->sg_copy[i] && end > offset + len)
>> +        return -EINVAL;
>>         first_sg = i;
>>   +    /* if i is not last entry in sg list and end (i.e start + bytes) is
>> +     * within this sg[i] then goto out and calculate data and data_end
>> +     */
>> +    if (!msg->sg_copy[i] && end <= offset + len)
>> +        goto out;
>> +
>>       /* At this point we need to linearize multiple scatterlist
>>        * elements or a single shared page. Either way we need to
>>        * copy into a linear buffer exclusively owned by BPF. Then
>> @@ -2330,9 +2338,14 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>           i++;
>>           if (i == MAX_SKB_FRAGS)
>>               i = 0;
>> -        if (bytes < copy)
>> +        if (end < copy)
>>               break;
>> -    } while (i != msg->sg_end);
>> +    } while (i <= msg->sg_end);
>> +
>> +    /* return error if i is last entry in sglist and end is out of range */
>> +    if (i > msg->sg_end && end > offset + copy)
>> +        return -EINVAL;
>> +
>>       last_sg = i;
>>         if (unlikely(copy < end - start))
>> @@ -2342,23 +2355,22 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>       if (unlikely(!page))
>>           return -ENOMEM;
>>       p = page_address(page);
>> -    offset = 0;
>>         i = first_sg;
>>       do {
>>           from = sg_virt(&sg[i]);
>>           len = sg[i].length;
>> -        to = p + offset;
>> +        to = p + off;
>>             memcpy(to, from, len);
>> -        offset += len;
>> +        off += len;
>>           sg[i].length = 0;
>>           put_page(sg_page(&sg[i]));
>>             i++;
>>           if (i == MAX_SKB_FRAGS)
>>               i = 0;
>> -    } while (i != last_sg);
>> +    } while (i < last_sg);
>>         sg[first_sg].length = copy;
>>       sg_set_page(&sg[first_sg], page, copy, 0);
>> @@ -2380,7 +2392,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>           else
>>               move_from = i + shift;
>>   -        if (move_from == msg->sg_end)
>> +        if (move_from > msg->sg_end)
>>               break;
>>             sg[i] = sg[move_from];
>> @@ -2396,7 +2408,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>       if (msg->sg_end < 0)
>>           msg->sg_end += MAX_SKB_FRAGS;
>>   out:
>> -    msg->data = sg_virt(&sg[i]) + start - offset;
>> +    msg->data = sg_virt(&sg[first_sg]) + start - offset;
>>       msg->data_end = msg->data + bytes;
>>         return 0;
>>
> 
> Please discard this patch. I just noticed that Daniel Borkmann sent some similar fixes for bpf_msg_pull_data.

Yeah I've been looking at these recently as well, please make sure you test
with the below fixes included to see if there's anything left:

https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=5b24109b0563d45094c470684c1f8cea1af269f8
https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=0e06b227c5221dd51b5569de93f3b9f532be4a32
https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=2e43f95dd8ee62bc8bf57f2afac37fbd70c8d565
https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=a8cf76a9023bc6709b1361d06bb2fae5227b9d68

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] ebpf: fix bpf_msg_pull_data
  2018-08-30  7:20   ` Daniel Borkmann
@ 2018-08-31  8:37     ` Tushar Dave
  2018-08-31 12:15       ` Daniel Borkmann
  0 siblings, 1 reply; 6+ messages in thread
From: Tushar Dave @ 2018-08-31  8:37 UTC (permalink / raw)
  To: Daniel Borkmann, john.fastabend, ast, davem, netdev; +Cc: sowmini.varadhan



On 08/30/2018 12:20 AM, Daniel Borkmann wrote:
> On 08/30/2018 02:21 AM, Tushar Dave wrote:
>> On 08/29/2018 05:07 PM, Tushar Dave wrote:
>>> While doing some preliminary testing it is found that bpf helper
>>> bpf_msg_pull_data does not calculate the data and data_end offset
>>> correctly. Fix it!
>>>
>>> Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
>>> Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
>>> Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
>>> ---
>>>    net/core/filter.c | 38 +++++++++++++++++++++++++-------------
>>>    1 file changed, 25 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>> index c25eb36..3eeb3d6 100644
>>> --- a/net/core/filter.c
>>> +++ b/net/core/filter.c
>>> @@ -2285,7 +2285,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>    BPF_CALL_4(bpf_msg_pull_data,
>>>           struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
>>>    {
>>> -    unsigned int len = 0, offset = 0, copy = 0;
>>> +    unsigned int len = 0, offset = 0, copy = 0, off = 0;
>>>        struct scatterlist *sg = msg->sg_data;
>>>        int first_sg, last_sg, i, shift;
>>>        unsigned char *p, *to, *from;
>>> @@ -2299,22 +2299,30 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>        i = msg->sg_start;
>>>        do {
>>>            len = sg[i].length;
>>> -        offset += len;
>>>            if (start < offset + len)
>>>                break;
>>> +        offset += len;
>>>            i++;
>>>            if (i == MAX_SKB_FRAGS)
>>>                i = 0;
>>> -    } while (i != msg->sg_end);
>>> +    } while (i <= msg->sg_end);
> 
> I don't think this condition is correct, msg operates as a scatterlist ring,
> so sg_end may very well be < current i when there's a wrap-around in the
> traversal ... more below.

I'm wondering then how this is suppose to work in case sg list is not
ring! For RDS, We have sg list that is not a ring. More below.

> 
>>>    +    /* return error if start is out of range */
>>>        if (unlikely(start >= offset + len))
>>>            return -EINVAL;
>>>    -    if (!msg->sg_copy[i] && bytes <= len)
>>> -        goto out;
>>> +    /* return error if i is last entry in sglist and end is out of range */
>>> +    if (msg->sg_copy[i] && end > offset + len)
>>> +        return -EINVAL;
>>>          first_sg = i;
>>>    +    /* if i is not last entry in sg list and end (i.e start + bytes) is
>>> +     * within this sg[i] then goto out and calculate data and data_end
>>> +     */
>>> +    if (!msg->sg_copy[i] && end <= offset + len)
>>> +        goto out;
>>> +
>>>        /* At this point we need to linearize multiple scatterlist
>>>         * elements or a single shared page. Either way we need to
>>>         * copy into a linear buffer exclusively owned by BPF. Then
>>> @@ -2330,9 +2338,14 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>            i++;
>>>            if (i == MAX_SKB_FRAGS)
>>>                i = 0;
>>> -        if (bytes < copy)
>>> +        if (end < copy)
>>>                break;
>>> -    } while (i != msg->sg_end);
>>> +    } while (i <= msg->sg_end);
>>> +
>>> +    /* return error if i is last entry in sglist and end is out of range */
>>> +    if (i > msg->sg_end && end > offset + copy)
>>> +        return -EINVAL;
>>> +
>>>        last_sg = i;
>>>          if (unlikely(copy < end - start))
>>> @@ -2342,23 +2355,22 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>        if (unlikely(!page))
>>>            return -ENOMEM;
>>>        p = page_address(page);
>>> -    offset = 0;
>>>          i = first_sg;
>>>        do {
>>>            from = sg_virt(&sg[i]);
>>>            len = sg[i].length;
>>> -        to = p + offset;
>>> +        to = p + off;
>>>              memcpy(to, from, len);
>>> -        offset += len;
>>> +        off += len;
>>>            sg[i].length = 0;
>>>            put_page(sg_page(&sg[i]));
>>>              i++;
>>>            if (i == MAX_SKB_FRAGS)
>>>                i = 0;
>>> -    } while (i != last_sg);
>>> +    } while (i < last_sg);
>>>          sg[first_sg].length = copy;
>>>        sg_set_page(&sg[first_sg], page, copy, 0);
>>> @@ -2380,7 +2392,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>            else
>>>                move_from = i + shift;
>>>    -        if (move_from == msg->sg_end)
>>> +        if (move_from > msg->sg_end)
>>>                break;
>>>              sg[i] = sg[move_from];
>>> @@ -2396,7 +2408,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>        if (msg->sg_end < 0)
>>>            msg->sg_end += MAX_SKB_FRAGS;
>>>    out:
>>> -    msg->data = sg_virt(&sg[i]) + start - offset;
>>> +    msg->data = sg_virt(&sg[first_sg]) + start - offset;
>>>        msg->data_end = msg->data + bytes;
>>>          return 0;
>>>
>>
>> Please discard this patch. I just noticed that Daniel Borkmann sent some similar fixes for bpf_msg_pull_data.
> 
> Yeah I've been looking at these recently as well, please make sure you test
> with the below fixes included to see if there's anything left:

I tested the latest net tree which has all the fixes you mentioned and I
am still seeing issues.

As I already mentioned before on RFC v3 thread, we need to be careful
reusing 'offset' while linearizing multiple scatterlist
elements.
Variable 'offset' is used to calculate the 'msg->data'
i.e. msg->data = sg_virt(&sg[first_sg]) + start - offset"

We should use different variable name (e.g. off) while linearizing
multiple scatterlist elements or a single shared page so that we don't
overwrite 'offset' that has correct value already been calculated.

A code change for this would look like:

diff --git a/net/core/filter.c b/net/core/filter.c
index 60a29ca..076ca09 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2326,7 +2326,7 @@ struct sock *do_msg_redirect_map(struct 
sk_msg_buff *msg)
  BPF_CALL_4(bpf_msg_pull_data,
            struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
  {
-       unsigned int len = 0, offset = 0, copy = 0;
+       unsigned int len = 0, offset = 0, copy = 0, off;
         int bytes = end - start, bytes_sg_total;
         struct scatterlist *sg = msg->sg_data;
         int first_sg, last_sg, i, shift;
@@ -2354,6 +2354,8 @@ struct sock *do_msg_redirect_map(struct 
sk_msg_buff *msg)
          * account for the headroom.
          */
         bytes_sg_total = start - offset + bytes;

         if (!msg->sg_copy[i] && bytes_sg_total <= len)
                 goto out;

@@ -2382,18 +2384,18 @@ struct sock *do_msg_redirect_map(struct 
sk_msg_buff *msg)
         if (unlikely(!page))
                 return -ENOMEM;
         p = page_address(page);
-       offset = 0;
+       off = 0;

         i = first_sg;
         do {
                 from = sg_virt(&sg[i]);
                 len = sg[i].length;
-               to = p + offset;
+               to = p + off;

                 memcpy(to, from, len);
-               offset += len;
+               off += len;
                 sg[i].length = 0;
                 put_page(sg_page(&sg[i]));

                 sk_msg_iter_var(i);
         } while (i != last_sg);


Apart from above bug fix, I see issues in below 2 cases:

case 1:
=======
For things like RDS, where sg list is not a ring how to know end of packet?

For example I have RDS packet of length 8192 Bytes which is in
scatterlist like
sge[0].lenghth = 1400
sge[1].length = 1448
sge[2].length = 1448
sge[3].length = 1448
sge[4].length = 1448
sge[5].length = 1000

Now when I execute "bpf_msg_pull_data(msg, 8190, 8196, 0)" on above RDS
packet, I expect bpf_msg_pull_data to return error because end is out of
bound e.g. 8196 > 8192. Instead current code wraps it to sge index 0!
That is if I dump first 6 bytes starting with 'start' I get byte value
at offset 8190, 8191 from sge[5] and value at offset 8192, 8193, 8194
8195 bytes from sge[0] from offset 0 ,1 ,2 ,3 respectively - which is
not expected!


case 2:
=======
Same example of RDS packet as above, when I execute
"bpf_msg_pull_data(msg, 7191, 8192, 0)"
I get -EINVAL which is *wrong*.

Debugging this, I found, right before we enter below do while loop the 
values of variables are:
i = 4
first_sg = 4
last_sg= 5
start=7191
bytes_sg_total=2448
offset=5744
copy=0
len=1448

      do {
                 copy += sg[i].length;
                 sk_msg_iter_var(i);
                 if (bytes_sg_total <= copy)
                         break;
         } while (i != msg->sg_end);
         last_sg = i;

However, above loop execute only one time because after one execution,
i=5 and msg->sg_end=5. That makes condition "while (i != msg->sg_end)"
false. So when loop exit, the values are:
i = 5
first_sg=4
last_sg= 5
start=7191
bytes_sg_total=2448 <<<<<<
offset=5744
copy=1448 <<<<<<
len=1448

And that makes below condition true
         if (unlikely(bytes_sg_total > copy))
                 return -EINVAL;


There may be one or more similar corner scenarios like case 1 and 2
which can be fixed however I am not sure how can we fix so that we can
get it right for sg ring and sg list?

Thanks.
-Tushar
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=5b24109b0563d45094c470684c1f8cea1af269f8
> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=0e06b227c5221dd51b5569de93f3b9f532be4a32
> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=2e43f95dd8ee62bc8bf57f2afac37fbd70c8d565
> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=a8cf76a9023bc6709b1361d06bb2fae5227b9d68
> 
> Thanks,
> Daniel
> 

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net] ebpf: fix bpf_msg_pull_data
  2018-08-31  8:37     ` Tushar Dave
@ 2018-08-31 12:15       ` Daniel Borkmann
  2018-08-31 21:41         ` Tushar Dave
  0 siblings, 1 reply; 6+ messages in thread
From: Daniel Borkmann @ 2018-08-31 12:15 UTC (permalink / raw)
  To: Tushar Dave, john.fastabend, ast, davem, netdev; +Cc: sowmini.varadhan

On 08/31/2018 10:37 AM, Tushar Dave wrote:
> On 08/30/2018 12:20 AM, Daniel Borkmann wrote:
>> On 08/30/2018 02:21 AM, Tushar Dave wrote:
>>> On 08/29/2018 05:07 PM, Tushar Dave wrote:
>>>> While doing some preliminary testing it is found that bpf helper
>>>> bpf_msg_pull_data does not calculate the data and data_end offset
>>>> correctly. Fix it!
>>>>
>>>> Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
>>>> Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
>>>> Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
>>>> ---
>>>>    net/core/filter.c | 38 +++++++++++++++++++++++++-------------
>>>>    1 file changed, 25 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>>> index c25eb36..3eeb3d6 100644
>>>> --- a/net/core/filter.c
>>>> +++ b/net/core/filter.c
>>>> @@ -2285,7 +2285,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>    BPF_CALL_4(bpf_msg_pull_data,
>>>>           struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
>>>>    {
>>>> -    unsigned int len = 0, offset = 0, copy = 0;
>>>> +    unsigned int len = 0, offset = 0, copy = 0, off = 0;
>>>>        struct scatterlist *sg = msg->sg_data;
>>>>        int first_sg, last_sg, i, shift;
>>>>        unsigned char *p, *to, *from;
>>>> @@ -2299,22 +2299,30 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>        i = msg->sg_start;
>>>>        do {
>>>>            len = sg[i].length;
>>>> -        offset += len;
>>>>            if (start < offset + len)
>>>>                break;
>>>> +        offset += len;
>>>>            i++;
>>>>            if (i == MAX_SKB_FRAGS)
>>>>                i = 0;
>>>> -    } while (i != msg->sg_end);
>>>> +    } while (i <= msg->sg_end);
>>
>> I don't think this condition is correct, msg operates as a scatterlist ring,
>> so sg_end may very well be < current i when there's a wrap-around in the
>> traversal ... more below.
> 
> I'm wondering then how this is suppose to work in case sg list is not
> ring! For RDS, We have sg list that is not a ring. More below.
> 
>>
>>>>    +    /* return error if start is out of range */
>>>>        if (unlikely(start >= offset + len))
>>>>            return -EINVAL;
>>>>    -    if (!msg->sg_copy[i] && bytes <= len)
>>>> -        goto out;
>>>> +    /* return error if i is last entry in sglist and end is out of range */
>>>> +    if (msg->sg_copy[i] && end > offset + len)
>>>> +        return -EINVAL;
>>>>          first_sg = i;
>>>>    +    /* if i is not last entry in sg list and end (i.e start + bytes) is
>>>> +     * within this sg[i] then goto out and calculate data and data_end
>>>> +     */
>>>> +    if (!msg->sg_copy[i] && end <= offset + len)
>>>> +        goto out;
>>>> +
>>>>        /* At this point we need to linearize multiple scatterlist
>>>>         * elements or a single shared page. Either way we need to
>>>>         * copy into a linear buffer exclusively owned by BPF. Then
>>>> @@ -2330,9 +2338,14 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>            i++;
>>>>            if (i == MAX_SKB_FRAGS)
>>>>                i = 0;
>>>> -        if (bytes < copy)
>>>> +        if (end < copy)
>>>>                break;
>>>> -    } while (i != msg->sg_end);
>>>> +    } while (i <= msg->sg_end);
>>>> +
>>>> +    /* return error if i is last entry in sglist and end is out of range */
>>>> +    if (i > msg->sg_end && end > offset + copy)
>>>> +        return -EINVAL;
>>>> +
>>>>        last_sg = i;
>>>>          if (unlikely(copy < end - start))
>>>> @@ -2342,23 +2355,22 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>        if (unlikely(!page))
>>>>            return -ENOMEM;
>>>>        p = page_address(page);
>>>> -    offset = 0;
>>>>          i = first_sg;
>>>>        do {
>>>>            from = sg_virt(&sg[i]);
>>>>            len = sg[i].length;
>>>> -        to = p + offset;
>>>> +        to = p + off;
>>>>              memcpy(to, from, len);
>>>> -        offset += len;
>>>> +        off += len;
>>>>            sg[i].length = 0;
>>>>            put_page(sg_page(&sg[i]));
>>>>              i++;
>>>>            if (i == MAX_SKB_FRAGS)
>>>>                i = 0;
>>>> -    } while (i != last_sg);
>>>> +    } while (i < last_sg);
>>>>          sg[first_sg].length = copy;
>>>>        sg_set_page(&sg[first_sg], page, copy, 0);
>>>> @@ -2380,7 +2392,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>            else
>>>>                move_from = i + shift;
>>>>    -        if (move_from == msg->sg_end)
>>>> +        if (move_from > msg->sg_end)
>>>>                break;
>>>>              sg[i] = sg[move_from];
>>>> @@ -2396,7 +2408,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>        if (msg->sg_end < 0)
>>>>            msg->sg_end += MAX_SKB_FRAGS;
>>>>    out:
>>>> -    msg->data = sg_virt(&sg[i]) + start - offset;
>>>> +    msg->data = sg_virt(&sg[first_sg]) + start - offset;
>>>>        msg->data_end = msg->data + bytes;
>>>>          return 0;
>>>>
>>>
>>> Please discard this patch. I just noticed that Daniel Borkmann sent some similar fixes for bpf_msg_pull_data.
>>
>> Yeah I've been looking at these recently as well, please make sure you test
>> with the below fixes included to see if there's anything left:
> 
> I tested the latest net tree which has all the fixes you mentioned and I
> am still seeing issues.
> 
> As I already mentioned before on RFC v3 thread, we need to be careful
> reusing 'offset' while linearizing multiple scatterlist
> elements.
> Variable 'offset' is used to calculate the 'msg->data'
> i.e. msg->data = sg_virt(&sg[first_sg]) + start - offset"
> 
> We should use different variable name (e.g. off) while linearizing
> multiple scatterlist elements or a single shared page so that we don't
> overwrite 'offset' that has correct value already been calculated.

Agree, sigh, that's also buggy, please submit the below as proper patch,
thanks!

> A code change for this would look like:
> 
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 60a29ca..076ca09 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2326,7 +2326,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>  BPF_CALL_4(bpf_msg_pull_data,
>            struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
>  {
> -       unsigned int len = 0, offset = 0, copy = 0;
> +       unsigned int len = 0, offset = 0, copy = 0, off;

Nit: probably better name it 'poffset' since this is relative to p.

>         int bytes = end - start, bytes_sg_total;
>         struct scatterlist *sg = msg->sg_data;
>         int first_sg, last_sg, i, shift;
> @@ -2354,6 +2354,8 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>          * account for the headroom.
>          */
>         bytes_sg_total = start - offset + bytes;
> 
>         if (!msg->sg_copy[i] && bytes_sg_total <= len)
>                 goto out;
> 
> @@ -2382,18 +2384,18 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>         if (unlikely(!page))
>                 return -ENOMEM;
>         p = page_address(page);
> -       offset = 0;
> +       off = 0;
> 
>         i = first_sg;
>         do {
>                 from = sg_virt(&sg[i]);
>                 len = sg[i].length;
> -               to = p + offset;
> +               to = p + off;
> 
>                 memcpy(to, from, len);
> -               offset += len;
> +               off += len;
>                 sg[i].length = 0;
>                 put_page(sg_page(&sg[i]));
> 
>                 sk_msg_iter_var(i);
>         } while (i != last_sg);
> 
> 
> Apart from above bug fix, I see issues in below 2 cases:

Ok.

> case 1:
> =======
> For things like RDS, where sg list is not a ring how to know end of packet?
> 
> For example I have RDS packet of length 8192 Bytes which is in
> scatterlist like
> sge[0].lenghth = 1400
> sge[1].length = 1448
> sge[2].length = 1448
> sge[3].length = 1448
> sge[4].length = 1448
> sge[5].length = 1000
> 
> Now when I execute "bpf_msg_pull_data(msg, 8190, 8196, 0)" on above RDS
> packet, I expect bpf_msg_pull_data to return error because end is out of
> bound e.g. 8196 > 8192. Instead current code wraps it to sge index 0!
> That is if I dump first 6 bytes starting with 'start' I get byte value
> at offset 8190, 8191 from sge[5] and value at offset 8192, 8193, 8194
> 8195 bytes from sge[0] from offset 0 ,1 ,2 ,3 respectively - which is
> not expected!

For the sg ring case which is what is upstream today, it will find start_sg
as 5 and last_sg as 6. And therefore bail out in bytes_sg_total > copy test
since 1004 > 1000. sge[5] offset is 7192 with 1000 bytes len, so start is at
offset 998 from there and end at 1004. So I get -EINVAL from the mentioned
test.

Is msg->sg_start and msg->sg_end set properly in your RDS case? As long as
there's no wrap-around the helper should be usable also in RDS case. I'm not
sure though why you see above oob.

> case 2:
> =======
> Same example of RDS packet as above, when I execute
> "bpf_msg_pull_data(msg, 7191, 8192, 0)"
> I get -EINVAL which is *wrong*.
> 
> Debugging this, I found, right before we enter below do while loop the values of variables are:
> i = 4
> first_sg = 4
> last_sg= 5
> start=7191
> bytes_sg_total=2448
> offset=5744
> copy=0
> len=1448
> 
>      do {
>                 copy += sg[i].length;
>                 sk_msg_iter_var(i);
>                 if (bytes_sg_total <= copy)
>                         break;
>         } while (i != msg->sg_end);
>         last_sg = i;
> 
> However, above loop execute only one time because after one execution,
> i=5 and msg->sg_end=5. That makes condition "while (i != msg->sg_end)"
> false. So when loop exit, the values are:
> i = 5
> first_sg=4
> last_sg= 5
> start=7191
> bytes_sg_total=2448 <<<<<<
> offset=5744
> copy=1448 <<<<<<
> len=1448
> 
> And that makes below condition true
>         if (unlikely(bytes_sg_total > copy))
>                 return -EINVAL;

Hmm, is your msg->sg_end correct? I'm getting start_sg of 4 and last_sg of 6.
So it breaks on the bytes_sg_total <= copy test, and then it merges sges 4 and 5
into 4 with 2448 length while dropping sge 5. How do you set up sk_msg_buff?

> There may be one or more similar corner scenarios like case 1 and 2
> which can be fixed however I am not sure how can we fix so that we can
> get it right for sg ring and sg list?

Imho, it should be sufficient to have invariant of msg->sg_start < msg->sg_end
and having msg->sg_start point to the starting sge while msg->sg_end to the
last sge slot + 1.

> Thanks.
> -Tushar
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=5b24109b0563d45094c470684c1f8cea1af269f8
>> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=0e06b227c5221dd51b5569de93f3b9f532be4a32
>> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=2e43f95dd8ee62bc8bf57f2afac37fbd70c8d565
>> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=a8cf76a9023bc6709b1361d06bb2fae5227b9d68
>>
>> Thanks,
>> Daniel
>>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] ebpf: fix bpf_msg_pull_data
  2018-08-31 12:15       ` Daniel Borkmann
@ 2018-08-31 21:41         ` Tushar Dave
  0 siblings, 0 replies; 6+ messages in thread
From: Tushar Dave @ 2018-08-31 21:41 UTC (permalink / raw)
  To: Daniel Borkmann, john.fastabend, ast, davem, netdev; +Cc: sowmini.varadhan



On 08/31/2018 05:15 AM, Daniel Borkmann wrote:
> On 08/31/2018 10:37 AM, Tushar Dave wrote:
>> On 08/30/2018 12:20 AM, Daniel Borkmann wrote:
>>> On 08/30/2018 02:21 AM, Tushar Dave wrote:
>>>> On 08/29/2018 05:07 PM, Tushar Dave wrote:
>>>>> While doing some preliminary testing it is found that bpf helper
>>>>> bpf_msg_pull_data does not calculate the data and data_end offset
>>>>> correctly. Fix it!
>>>>>
>>>>> Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
>>>>> Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
>>>>> Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
>>>>> ---
>>>>>     net/core/filter.c | 38 +++++++++++++++++++++++++-------------
>>>>>     1 file changed, 25 insertions(+), 13 deletions(-)
>>>>>
>>>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>>>> index c25eb36..3eeb3d6 100644
>>>>> --- a/net/core/filter.c
>>>>> +++ b/net/core/filter.c
>>>>> @@ -2285,7 +2285,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>>     BPF_CALL_4(bpf_msg_pull_data,
>>>>>            struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
>>>>>     {
>>>>> -    unsigned int len = 0, offset = 0, copy = 0;
>>>>> +    unsigned int len = 0, offset = 0, copy = 0, off = 0;
>>>>>         struct scatterlist *sg = msg->sg_data;
>>>>>         int first_sg, last_sg, i, shift;
>>>>>         unsigned char *p, *to, *from;
>>>>> @@ -2299,22 +2299,30 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>>         i = msg->sg_start;
>>>>>         do {
>>>>>             len = sg[i].length;
>>>>> -        offset += len;
>>>>>             if (start < offset + len)
>>>>>                 break;
>>>>> +        offset += len;
>>>>>             i++;
>>>>>             if (i == MAX_SKB_FRAGS)
>>>>>                 i = 0;
>>>>> -    } while (i != msg->sg_end);
>>>>> +    } while (i <= msg->sg_end);
>>>
>>> I don't think this condition is correct, msg operates as a scatterlist ring,
>>> so sg_end may very well be < current i when there's a wrap-around in the
>>> traversal ... more below.
>>
>> I'm wondering then how this is suppose to work in case sg list is not
>> ring! For RDS, We have sg list that is not a ring. More below.
>>
>>>
>>>>>     +    /* return error if start is out of range */
>>>>>         if (unlikely(start >= offset + len))
>>>>>             return -EINVAL;
>>>>>     -    if (!msg->sg_copy[i] && bytes <= len)
>>>>> -        goto out;
>>>>> +    /* return error if i is last entry in sglist and end is out of range */
>>>>> +    if (msg->sg_copy[i] && end > offset + len)
>>>>> +        return -EINVAL;
>>>>>           first_sg = i;
>>>>>     +    /* if i is not last entry in sg list and end (i.e start + bytes) is
>>>>> +     * within this sg[i] then goto out and calculate data and data_end
>>>>> +     */
>>>>> +    if (!msg->sg_copy[i] && end <= offset + len)
>>>>> +        goto out;
>>>>> +
>>>>>         /* At this point we need to linearize multiple scatterlist
>>>>>          * elements or a single shared page. Either way we need to
>>>>>          * copy into a linear buffer exclusively owned by BPF. Then
>>>>> @@ -2330,9 +2338,14 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>>             i++;
>>>>>             if (i == MAX_SKB_FRAGS)
>>>>>                 i = 0;
>>>>> -        if (bytes < copy)
>>>>> +        if (end < copy)
>>>>>                 break;
>>>>> -    } while (i != msg->sg_end);
>>>>> +    } while (i <= msg->sg_end);
>>>>> +
>>>>> +    /* return error if i is last entry in sglist and end is out of range */
>>>>> +    if (i > msg->sg_end && end > offset + copy)
>>>>> +        return -EINVAL;
>>>>> +
>>>>>         last_sg = i;
>>>>>           if (unlikely(copy < end - start))
>>>>> @@ -2342,23 +2355,22 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>>         if (unlikely(!page))
>>>>>             return -ENOMEM;
>>>>>         p = page_address(page);
>>>>> -    offset = 0;
>>>>>           i = first_sg;
>>>>>         do {
>>>>>             from = sg_virt(&sg[i]);
>>>>>             len = sg[i].length;
>>>>> -        to = p + offset;
>>>>> +        to = p + off;
>>>>>               memcpy(to, from, len);
>>>>> -        offset += len;
>>>>> +        off += len;
>>>>>             sg[i].length = 0;
>>>>>             put_page(sg_page(&sg[i]));
>>>>>               i++;
>>>>>             if (i == MAX_SKB_FRAGS)
>>>>>                 i = 0;
>>>>> -    } while (i != last_sg);
>>>>> +    } while (i < last_sg);
>>>>>           sg[first_sg].length = copy;
>>>>>         sg_set_page(&sg[first_sg], page, copy, 0);
>>>>> @@ -2380,7 +2392,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>>             else
>>>>>                 move_from = i + shift;
>>>>>     -        if (move_from == msg->sg_end)
>>>>> +        if (move_from > msg->sg_end)
>>>>>                 break;
>>>>>               sg[i] = sg[move_from];
>>>>> @@ -2396,7 +2408,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>>>>         if (msg->sg_end < 0)
>>>>>             msg->sg_end += MAX_SKB_FRAGS;
>>>>>     out:
>>>>> -    msg->data = sg_virt(&sg[i]) + start - offset;
>>>>> +    msg->data = sg_virt(&sg[first_sg]) + start - offset;
>>>>>         msg->data_end = msg->data + bytes;
>>>>>           return 0;
>>>>>
>>>>
>>>> Please discard this patch. I just noticed that Daniel Borkmann sent some similar fixes for bpf_msg_pull_data.
>>>
>>> Yeah I've been looking at these recently as well, please make sure you test
>>> with the below fixes included to see if there's anything left:
>>
>> I tested the latest net tree which has all the fixes you mentioned and I
>> am still seeing issues.
>>
>> As I already mentioned before on RFC v3 thread, we need to be careful
>> reusing 'offset' while linearizing multiple scatterlist
>> elements.
>> Variable 'offset' is used to calculate the 'msg->data'
>> i.e. msg->data = sg_virt(&sg[first_sg]) + start - offset"
>>
>> We should use different variable name (e.g. off) while linearizing
>> multiple scatterlist elements or a single shared page so that we don't
>> overwrite 'offset' that has correct value already been calculated.
> 
> Agree, sigh, that's also buggy, please submit the below as proper patch,
> thanks!

Sent. More below.

> 
>> A code change for this would look like:
>>
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 60a29ca..076ca09 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -2326,7 +2326,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>   BPF_CALL_4(bpf_msg_pull_data,
>>             struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
>>   {
>> -       unsigned int len = 0, offset = 0, copy = 0;
>> +       unsigned int len = 0, offset = 0, copy = 0, off;
> 
> Nit: probably better name it 'poffset' since this is relative to p.
> 
>>          int bytes = end - start, bytes_sg_total;
>>          struct scatterlist *sg = msg->sg_data;
>>          int first_sg, last_sg, i, shift;
>> @@ -2354,6 +2354,8 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>           * account for the headroom.
>>           */
>>          bytes_sg_total = start - offset + bytes;
>>
>>          if (!msg->sg_copy[i] && bytes_sg_total <= len)
>>                  goto out;
>>
>> @@ -2382,18 +2384,18 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>          if (unlikely(!page))
>>                  return -ENOMEM;
>>          p = page_address(page);
>> -       offset = 0;
>> +       off = 0;
>>
>>          i = first_sg;
>>          do {
>>                  from = sg_virt(&sg[i]);
>>                  len = sg[i].length;
>> -               to = p + offset;
>> +               to = p + off;
>>
>>                  memcpy(to, from, len);
>> -               offset += len;
>> +               off += len;
>>                  sg[i].length = 0;
>>                  put_page(sg_page(&sg[i]));
>>
>>                  sk_msg_iter_var(i);
>>          } while (i != last_sg);
>>
>>
>> Apart from above bug fix, I see issues in below 2 cases:
> 
> Ok.
> 
>> case 1:
>> =======
>> For things like RDS, where sg list is not a ring how to know end of packet?
>>
>> For example I have RDS packet of length 8192 Bytes which is in
>> scatterlist like
>> sge[0].lenghth = 1400
>> sge[1].length = 1448
>> sge[2].length = 1448
>> sge[3].length = 1448
>> sge[4].length = 1448
>> sge[5].length = 1000
>>
>> Now when I execute "bpf_msg_pull_data(msg, 8190, 8196, 0)" on above RDS
>> packet, I expect bpf_msg_pull_data to return error because end is out of
>> bound e.g. 8196 > 8192. Instead current code wraps it to sge index 0!
>> That is if I dump first 6 bytes starting with 'start' I get byte value
>> at offset 8190, 8191 from sge[5] and value at offset 8192, 8193, 8194
>> 8195 bytes from sge[0] from offset 0 ,1 ,2 ,3 respectively - which is
>> not expected!
> 
> For the sg ring case which is what is upstream today, it will find start_sg
> as 5 and last_sg as 6. And therefore bail out in bytes_sg_total > copy test
> since 1004 > 1000. sge[5] offset is 7192 with 1000 bytes len, so start is at
> offset 998 from there and end at 1004. So I get -EINVAL from the mentioned
> test.
> 
> Is msg->sg_start and msg->sg_end set properly in your RDS case? As long as
> there's no wrap-around the helper should be usable also in RDS case. I'm not
> sure though why you see above oob.
> 
>> case 2:
>> =======
>> Same example of RDS packet as above, when I execute
>> "bpf_msg_pull_data(msg, 7191, 8192, 0)"
>> I get -EINVAL which is *wrong*.
>>
>> Debugging this, I found, right before we enter below do while loop the values of variables are:
>> i = 4
>> first_sg = 4
>> last_sg= 5
>> start=7191
>> bytes_sg_total=2448
>> offset=5744
>> copy=0
>> len=1448
>>
>>       do {
>>                  copy += sg[i].length;
>>                  sk_msg_iter_var(i);
>>                  if (bytes_sg_total <= copy)
>>                          break;
>>          } while (i != msg->sg_end);
>>          last_sg = i;
>>
>> However, above loop execute only one time because after one execution,
>> i=5 and msg->sg_end=5. That makes condition "while (i != msg->sg_end)"
>> false. So when loop exit, the values are:
>> i = 5
>> first_sg=4
>> last_sg= 5
>> start=7191
>> bytes_sg_total=2448 <<<<<<
>> offset=5744
>> copy=1448 <<<<<<
>> len=1448
>>
>> And that makes below condition true
>>          if (unlikely(bytes_sg_total > copy))
>>                  return -EINVAL;
> 
> Hmm, is your msg->sg_end correct? I'm getting start_sg of 4 and last_sg of 6.
> So it breaks on the bytes_sg_total <= copy test, and then it merges sges 4 and 5
> into 4 with 2448 length while dropping sge 5. How do you set up sk_msg_buff?
> 
>> There may be one or more similar corner scenarios like case 1 and 2
>> which can be fixed however I am not sure how can we fix so that we can
>> get it right for sg ring and sg list?
> 
> Imho, it should be sufficient to have invariant of msg->sg_start < msg->sg_end
> and having msg->sg_start point to the starting sge while msg->sg_end to the
> last sge slot + 1.

Yup! I was thinking same :)
For RDS, I have "msg->sg_end = sg_nents(sg) - 1". That makes msg->sg_end
= 5 for above example, and therefore -EINVAL.
I changed "msg->sg_end = sg_nents(sg)" and it works without errors (i.e.
equivalent to last sge slot + 1)

I have ran tests with various RDS packet size pulling data with
different start and end values, and all of the tests passed without any
issues.


BTW, I'll send RFC v4 for RDS as soon as all bpf_msg_pull_data fixes
will get pulled into net-next tree.

Thanks.

-Tushar
> 
>> Thanks.
>> -Tushar
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=5b24109b0563d45094c470684c1f8cea1af269f8
>>> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=0e06b227c5221dd51b5569de93f3b9f532be4a32
>>> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=2e43f95dd8ee62bc8bf57f2afac37fbd70c8d565
>>> https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/commit/net/core/filter.c?id=a8cf76a9023bc6709b1361d06bb2fae5227b9d68
>>>
>>> Thanks,
>>> Daniel
>>>
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-09-01  1:52 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-30  0:07 [PATCH net] ebpf: fix bpf_msg_pull_data Tushar Dave
2018-08-30  0:21 ` Tushar Dave
2018-08-30  7:20   ` Daniel Borkmann
2018-08-31  8:37     ` Tushar Dave
2018-08-31 12:15       ` Daniel Borkmann
2018-08-31 21:41         ` Tushar Dave

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.