All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs
@ 2018-11-21  0:18 Vlad Dumitrescu
  2018-11-21  0:49 ` Eric Dumazet
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Vlad Dumitrescu @ 2018-11-21  0:18 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, netdev
  Cc: Eric Dumazet, Willem de Bruijn, Vlad Dumitrescu

This could be used to rate limit egress traffic in concert with a qdisc
which supports Earliest Departure Time, such as FQ.

Signed-off-by: Vlad Dumitrescu <vladum@google.com>
---
 include/uapi/linux/bpf.h                    |  1 +
 net/core/filter.c                           | 26 +++++++++++++++++++++
 tools/include/uapi/linux/bpf.h              |  1 +
 tools/testing/selftests/bpf/test_verifier.c |  4 ++++
 4 files changed, 32 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c1554aa074659..23e2031a43d43 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2468,6 +2468,7 @@ struct __sk_buff {
 
 	__u32 data_meta;
 	struct bpf_flow_keys *flow_keys;
+	__u64 tstamp;
 };
 
 struct bpf_tunnel_key {
diff --git a/net/core/filter.c b/net/core/filter.c
index f6ca38a7d4332..c45155c8e519c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		if (size != sizeof(struct bpf_flow_keys *))
 			return false;
 		break;
+	case bpf_ctx_range(struct __sk_buff, tstamp):
+		if (size != sizeof(__u64))
+			return false;
+		break;
 	default:
 		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
@@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_end):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -5624,6 +5629,7 @@ static bool cg_skb_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_end):
@@ -5665,6 +5671,7 @@ static bool lwt_is_valid_access(int off, int size,
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -5874,6 +5881,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 		case bpf_ctx_range(struct __sk_buff, priority):
 		case bpf_ctx_range(struct __sk_buff, tc_classid):
 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+		case bpf_ctx_range(struct __sk_buff, tstamp):
 			break;
 		default:
 			return false;
@@ -6093,6 +6101,7 @@ static bool sk_skb_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -6179,6 +6188,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -6488,6 +6498,22 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
 				      si->src_reg, off);
 		break;
+
+	case offsetof(struct __sk_buff, tstamp):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_DW,
+					      si->dst_reg, si->src_reg,
+					      bpf_target_off(struct sk_buff,
+							     tstamp, 8,
+							     target_size));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_DW,
+					      si->dst_reg, si->src_reg,
+					      bpf_target_off(struct sk_buff,
+							     tstamp, 8,
+							     target_size));
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c1554aa074659..23e2031a43d43 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2468,6 +2468,7 @@ struct __sk_buff {
 
 	__u32 data_meta;
 	struct bpf_flow_keys *flow_keys;
+	__u64 tstamp;
 };
 
 struct bpf_tunnel_key {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 54d16fbdef8b9..10b04a52904e0 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2446,6 +2446,10 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, tc_index)),
 			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
 				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tstamp)),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tstamp)),
 			BPF_EXIT_INSN(),
 		},
 		.errstr_unpriv = "",
-- 
2.19.1.1215.g8438c0b245-goog

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs
  2018-11-21  0:18 [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs Vlad Dumitrescu
@ 2018-11-21  0:49 ` Eric Dumazet
  2018-11-21  2:01   ` Willem de Bruijn
  2018-11-21  2:40 ` Alexei Starovoitov
  2018-11-22 19:39 ` [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs Vlad Dumitrescu
  2 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2018-11-21  0:49 UTC (permalink / raw)
  To: Vlad Dumitrescu, Alexei Starovoitov, Daniel Borkmann, netdev
  Cc: Eric Dumazet, Willem de Bruijn



On 11/20/2018 04:18 PM, Vlad Dumitrescu wrote:
> This could be used to rate limit egress traffic in concert with a qdisc
> which supports Earliest Departure Time, such as FQ.
> 
> Signed-off-by: Vlad Dumitrescu <vladum@google.com>
> ---
>  include/uapi/linux/bpf.h                    |  1 +
>  net/core/filter.c                           | 26 +++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h              |  1 +
>  tools/testing/selftests/bpf/test_verifier.c |  4 ++++
>  4 files changed, 32 insertions(+)
>

Awesome, thanks Vlad

Note that this also can be used to implement a delay (a la netem).

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs
  2018-11-21  0:49 ` Eric Dumazet
@ 2018-11-21  2:01   ` Willem de Bruijn
  0 siblings, 0 replies; 10+ messages in thread
From: Willem de Bruijn @ 2018-11-21  2:01 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Vlad Dumitrescu, Alexei Starovoitov, Daniel Borkmann,
	Network Development, Eric Dumazet, Willem de Bruijn

On Tue, Nov 20, 2018 at 8:22 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 11/20/2018 04:18 PM, Vlad Dumitrescu wrote:
> > This could be used to rate limit egress traffic in concert with a qdisc
> > which supports Earliest Departure Time, such as FQ.
> >
> > Signed-off-by: Vlad Dumitrescu <vladum@google.com>
> > ---
> >  include/uapi/linux/bpf.h                    |  1 +
> >  net/core/filter.c                           | 26 +++++++++++++++++++++
> >  tools/include/uapi/linux/bpf.h              |  1 +
> >  tools/testing/selftests/bpf/test_verifier.c |  4 ++++
> >  4 files changed, 32 insertions(+)
> >
>
> Awesome, thanks Vlad
>
> Note that this also can be used to implement a delay (a la netem).
>
> Acked-by: Eric Dumazet <edumazet@google.com>

Acked-by: Willem de Bruijn <willemb@google.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs
  2018-11-21  0:18 [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs Vlad Dumitrescu
  2018-11-21  0:49 ` Eric Dumazet
@ 2018-11-21  2:40 ` Alexei Starovoitov
  2018-11-21 13:08   ` Eric Dumazet
  2018-11-22 19:39 ` [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs Vlad Dumitrescu
  2 siblings, 1 reply; 10+ messages in thread
From: Alexei Starovoitov @ 2018-11-21  2:40 UTC (permalink / raw)
  To: Vlad Dumitrescu
  Cc: Alexei Starovoitov, Daniel Borkmann, netdev, Eric Dumazet,
	Willem de Bruijn

On Tue, Nov 20, 2018 at 07:18:48PM -0500, Vlad Dumitrescu wrote:
> This could be used to rate limit egress traffic in concert with a qdisc
> which supports Earliest Departure Time, such as FQ.
> 
> Signed-off-by: Vlad Dumitrescu <vladum@google.com>
> ---
>  include/uapi/linux/bpf.h                    |  1 +
>  net/core/filter.c                           | 26 +++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h              |  1 +
>  tools/testing/selftests/bpf/test_verifier.c |  4 ++++
>  4 files changed, 32 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c1554aa074659..23e2031a43d43 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2468,6 +2468,7 @@ struct __sk_buff {
>  
>  	__u32 data_meta;
>  	struct bpf_flow_keys *flow_keys;
> +	__u64 tstamp;
>  };
>  
>  struct bpf_tunnel_key {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index f6ca38a7d4332..c45155c8e519c 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
>  		if (size != sizeof(struct bpf_flow_keys *))
>  			return false;
>  		break;
> +	case bpf_ctx_range(struct __sk_buff, tstamp):
> +		if (size != sizeof(__u64))
> +			return false;
> +		break;
>  	default:
>  		/* Only narrow read access allowed for now. */
>  		if (type == BPF_WRITE) {
> @@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size,
>  	case bpf_ctx_range(struct __sk_buff, data_end):
>  	case bpf_ctx_range(struct __sk_buff, flow_keys):
>  	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
> +	case bpf_ctx_range(struct __sk_buff, tstamp):
>  		return false;
>  	}
>  
> @@ -5624,6 +5629,7 @@ static bool cg_skb_is_valid_access(int off, int size,
>  	case bpf_ctx_range(struct __sk_buff, tc_classid):
>  	case bpf_ctx_range(struct __sk_buff, data_meta):
>  	case bpf_ctx_range(struct __sk_buff, flow_keys):
> +	case bpf_ctx_range(struct __sk_buff, tstamp):
>  		return false;

looks good to me.

Any particular reason you decided to disable it for cg_skb ?
It seems to me the same EDT approach will work from
cgroup-bpf skb hooks just as well and then we can have neat
way of controlling traffic per-container instead of tc-clsbpf global.
If you're already on cgroup v2 it will save you a lot of classifier
cycles, since you'd be able to group apps by cgroup
instead of relying on ip only.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs
  2018-11-21  2:40 ` Alexei Starovoitov
@ 2018-11-21 13:08   ` Eric Dumazet
  2018-11-21 18:48     ` Vlad Dumitrescu
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2018-11-21 13:08 UTC (permalink / raw)
  To: Alexei Starovoitov, Vlad Dumitrescu
  Cc: Alexei Starovoitov, Daniel Borkmann, netdev, Eric Dumazet,
	Willem de Bruijn



On 11/20/2018 06:40 PM, Alexei Starovoitov wrote:

> 
> looks good to me.
> 
> Any particular reason you decided to disable it for cg_skb ?
> It seems to me the same EDT approach will work from
> cgroup-bpf skb hooks just as well and then we can have neat
> way of controlling traffic per-container instead of tc-clsbpf global.
> If you're already on cgroup v2 it will save you a lot of classifier
> cycles, since you'd be able to group apps by cgroup
> instead of relying on ip only.

Vlad first wrote a complete version, but we felt explaining the _why_
was probably harder.

No particular reason, other than having to write more tests perhaps.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs
  2018-11-21 13:08   ` Eric Dumazet
@ 2018-11-21 18:48     ` Vlad Dumitrescu
  2018-11-21 22:46       ` Alexei Starovoitov
  2018-11-21 22:57       ` Daniel Borkmann
  0 siblings, 2 replies; 10+ messages in thread
From: Vlad Dumitrescu @ 2018-11-21 18:48 UTC (permalink / raw)
  To: eric.dumazet, alexei.starovoitov
  Cc: Vlad Dumitrescu, ast, Daniel Borkmann, netdev, edumazet, willemb

On Wed, Nov 21, 2018 at 5:08 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 11/20/2018 06:40 PM, Alexei Starovoitov wrote:
>
> >
> > looks good to me.
> >
> > Any particular reason you decided to disable it for cg_skb ?
> > It seems to me the same EDT approach will work from
> > cgroup-bpf skb hooks just as well and then we can have neat
> > way of controlling traffic per-container instead of tc-clsbpf global.
> > If you're already on cgroup v2 it will save you a lot of classifier
> > cycles, since you'd be able to group apps by cgroup
> > instead of relying on ip only.
>
> Vlad first wrote a complete version, but we felt explaining the _why_
> was probably harder.
>
> No particular reason, other than having to write more tests perhaps.

This sounds reasonable to me. I can prepare a v2.

Any concerns regarding capabilities? For example data and data_end are
only available to CAP_SYS_ADMIN. Note that enforcement of this would
be done by a global component later in the pipeline (e.g., FQ qdisc).

Any opinions on sk_filter, lwt, and sk_skb before I send v2?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs
  2018-11-21 18:48     ` Vlad Dumitrescu
@ 2018-11-21 22:46       ` Alexei Starovoitov
  2018-11-21 22:57       ` Daniel Borkmann
  1 sibling, 0 replies; 10+ messages in thread
From: Alexei Starovoitov @ 2018-11-21 22:46 UTC (permalink / raw)
  To: Vlad Dumitrescu
  Cc: eric.dumazet, Vlad Dumitrescu, ast, Daniel Borkmann, netdev,
	edumazet, willemb

On Wed, Nov 21, 2018 at 10:48:21AM -0800, Vlad Dumitrescu wrote:
> On Wed, Nov 21, 2018 at 5:08 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> >
> >
> > On 11/20/2018 06:40 PM, Alexei Starovoitov wrote:
> >
> > >
> > > looks good to me.
> > >
> > > Any particular reason you decided to disable it for cg_skb ?
> > > It seems to me the same EDT approach will work from
> > > cgroup-bpf skb hooks just as well and then we can have neat
> > > way of controlling traffic per-container instead of tc-clsbpf global.
> > > If you're already on cgroup v2 it will save you a lot of classifier
> > > cycles, since you'd be able to group apps by cgroup
> > > instead of relying on ip only.
> >
> > Vlad first wrote a complete version, but we felt explaining the _why_
> > was probably harder.
> >
> > No particular reason, other than having to write more tests perhaps.
> 
> This sounds reasonable to me. I can prepare a v2.

thank you

> Any concerns regarding capabilities? For example data and data_end are
> only available to CAP_SYS_ADMIN. Note that enforcement of this would
> be done by a global component later in the pipeline (e.g., FQ qdisc).

I'd do cap_sys_admin for now, since i'm not sure whether any tstamp
values will be acceptable to fq.

> Any opinions on sk_filter, lwt, and sk_skb before I send v2?

sk_filter not appealing, since it's too late in the stack.
lwt could be interesting, but I'd wait until first user appears.
sk_skb - useful, but it requires more work.
We'll follow up to that sk_skb with our own patches.

Thanks!

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs
  2018-11-21 18:48     ` Vlad Dumitrescu
  2018-11-21 22:46       ` Alexei Starovoitov
@ 2018-11-21 22:57       ` Daniel Borkmann
  1 sibling, 0 replies; 10+ messages in thread
From: Daniel Borkmann @ 2018-11-21 22:57 UTC (permalink / raw)
  To: Vlad Dumitrescu, eric.dumazet, alexei.starovoitov
  Cc: Vlad Dumitrescu, ast, netdev, edumazet, willemb

On 11/21/2018 07:48 PM, Vlad Dumitrescu wrote:
> On Wed, Nov 21, 2018 at 5:08 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> On 11/20/2018 06:40 PM, Alexei Starovoitov wrote:
>>>
>>> looks good to me.
>>>
>>> Any particular reason you decided to disable it for cg_skb ?
>>> It seems to me the same EDT approach will work from
>>> cgroup-bpf skb hooks just as well and then we can have neat
>>> way of controlling traffic per-container instead of tc-clsbpf global.
>>> If you're already on cgroup v2 it will save you a lot of classifier
>>> cycles, since you'd be able to group apps by cgroup
>>> instead of relying on ip only.
>>
>> Vlad first wrote a complete version, but we felt explaining the _why_
>> was probably harder.
>>
>> No particular reason, other than having to write more tests perhaps.
> 
> This sounds reasonable to me. I can prepare a v2.
> 
> Any concerns regarding capabilities? For example data and data_end are
> only available to CAP_SYS_ADMIN. Note that enforcement of this would
> be done by a global component later in the pipeline (e.g., FQ qdisc).

cg_skb_is_valid_access() has the CAP_SYS_ADMIN enforcement for direct
packet access since cg_skb can also run from unprivileged. Makes sense
to do the same for skb->tstamp for the STX_MEM part at least.

> Any opinions on sk_filter, lwt, and sk_skb before I send v2?

I'd probably leave that out for the time being if there is no concrete
use at this point.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs
  2018-11-21  0:18 [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs Vlad Dumitrescu
  2018-11-21  0:49 ` Eric Dumazet
  2018-11-21  2:40 ` Alexei Starovoitov
@ 2018-11-22 19:39 ` Vlad Dumitrescu
  2018-11-22 23:49   ` Alexei Starovoitov
  2 siblings, 1 reply; 10+ messages in thread
From: Vlad Dumitrescu @ 2018-11-22 19:39 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, netdev
  Cc: Eric Dumazet, Willem de Bruijn, Vlad Dumitrescu

This could be used to rate limit egress traffic in concert with a qdisc
which supports Earliest Departure Time, such as FQ.

Write access from cg skb progs only with CAP_SYS_ADMIN, since the value
will be used by downstream qdiscs. It might make sense to relax this.

Changes v1 -> v2:
  - allow access from cg skb, write only with CAP_SYS_ADMIN

Signed-off-by: Vlad Dumitrescu <vladum@google.com>
---
 include/uapi/linux/bpf.h                    |  1 +
 net/core/filter.c                           | 29 +++++++++++++++++++++
 tools/include/uapi/linux/bpf.h              |  1 +
 tools/testing/selftests/bpf/test_verifier.c | 29 +++++++++++++++++++++
 4 files changed, 60 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c1554aa074659..23e2031a43d43 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2468,6 +2468,7 @@ struct __sk_buff {
 
 	__u32 data_meta;
 	struct bpf_flow_keys *flow_keys;
+	__u64 tstamp;
 };
 
 struct bpf_tunnel_key {
diff --git a/net/core/filter.c b/net/core/filter.c
index f6ca38a7d4332..65dc13aeca7c4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		if (size != sizeof(struct bpf_flow_keys *))
 			return false;
 		break;
+	case bpf_ctx_range(struct __sk_buff, tstamp):
+		if (size != sizeof(__u64))
+			return false;
+		break;
 	default:
 		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
@@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_end):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -5638,6 +5643,10 @@ static bool cg_skb_is_valid_access(int off, int size,
 		case bpf_ctx_range(struct __sk_buff, priority):
 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
+		case bpf_ctx_range(struct __sk_buff, tstamp):
+			if (!capable(CAP_SYS_ADMIN))
+				return false;
+			break;
 		default:
 			return false;
 		}
@@ -5665,6 +5674,7 @@ static bool lwt_is_valid_access(int off, int size,
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -5874,6 +5884,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 		case bpf_ctx_range(struct __sk_buff, priority):
 		case bpf_ctx_range(struct __sk_buff, tc_classid):
 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+		case bpf_ctx_range(struct __sk_buff, tstamp):
 			break;
 		default:
 			return false;
@@ -6093,6 +6104,7 @@ static bool sk_skb_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -6179,6 +6191,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -6488,6 +6501,22 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
 				      si->src_reg, off);
 		break;
+
+	case offsetof(struct __sk_buff, tstamp):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_DW,
+					      si->dst_reg, si->src_reg,
+					      bpf_target_off(struct sk_buff,
+							     tstamp, 8,
+							     target_size));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_DW,
+					      si->dst_reg, si->src_reg,
+					      bpf_target_off(struct sk_buff,
+							     tstamp, 8,
+							     target_size));
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c1554aa074659..23e2031a43d43 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2468,6 +2468,7 @@ struct __sk_buff {
 
 	__u32 data_meta;
 	struct bpf_flow_keys *flow_keys;
+	__u64 tstamp;
 };
 
 struct bpf_tunnel_key {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 54d16fbdef8b9..537a8f91af02d 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2446,6 +2446,10 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, tc_index)),
 			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
 				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tstamp)),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tstamp)),
 			BPF_EXIT_INSN(),
 		},
 		.errstr_unpriv = "",
@@ -5297,6 +5301,31 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R2 leaks addr into helper function",
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
 	},
+	{
+		"write tstamp from CGROUP_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tstamp)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "invalid bpf_context access off=152 size=8",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"read tstamp from CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tstamp)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
 	{
 		"multiple registers share map_lookup_elem result",
 		.insns = {
-- 
2.19.1.1215.g8438c0b245-goog

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs
  2018-11-22 19:39 ` [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs Vlad Dumitrescu
@ 2018-11-22 23:49   ` Alexei Starovoitov
  0 siblings, 0 replies; 10+ messages in thread
From: Alexei Starovoitov @ 2018-11-22 23:49 UTC (permalink / raw)
  To: Vlad Dumitrescu
  Cc: Alexei Starovoitov, Daniel Borkmann, netdev, Eric Dumazet,
	Willem de Bruijn

On Thu, Nov 22, 2018 at 02:39:16PM -0500, Vlad Dumitrescu wrote:
> This could be used to rate limit egress traffic in concert with a qdisc
> which supports Earliest Departure Time, such as FQ.
> 
> Write access from cg skb progs only with CAP_SYS_ADMIN, since the value
> will be used by downstream qdiscs. It might make sense to relax this.
> 
> Changes v1 -> v2:
>   - allow access from cg skb, write only with CAP_SYS_ADMIN
> 
> Signed-off-by: Vlad Dumitrescu <vladum@google.com>

Applied to bpf-next.
I copied Eric's and Willem's Acks from v1, since v2 is essentially the same.
Thanks everyone!

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-11-23 10:31 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-21  0:18 [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs Vlad Dumitrescu
2018-11-21  0:49 ` Eric Dumazet
2018-11-21  2:01   ` Willem de Bruijn
2018-11-21  2:40 ` Alexei Starovoitov
2018-11-21 13:08   ` Eric Dumazet
2018-11-21 18:48     ` Vlad Dumitrescu
2018-11-21 22:46       ` Alexei Starovoitov
2018-11-21 22:57       ` Daniel Borkmann
2018-11-22 19:39 ` [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs Vlad Dumitrescu
2018-11-22 23:49   ` Alexei Starovoitov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.