All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next v1 0/8] Dynptr convenience helpers
@ 2022-09-08  0:02 Joanne Koong
  2022-09-08  0:02 ` [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly Joanne Koong
                   ` (7 more replies)
  0 siblings, 8 replies; 33+ messages in thread
From: Joanne Koong @ 2022-09-08  0:02 UTC (permalink / raw)
  To: bpf; +Cc: daniel, martin.lau, andrii, ast, Kernel-team, Joanne Koong

This patchset is the 3rd in the dynptr series. The 1st can be found here [0]
and the 2nd can be found here [1].

In this patchset, the following convenience helpers are added for interacting
with bpf dynamic pointers:

    * bpf_dynptr_data_rdonly
    * bpf_dynptr_trim
    * bpf_dynptr_advance
    * bpf_dynptr_is_null
    * bpf_dynptr_is_rdonly
    * bpf_dynptr_get_size
    * bpf_dynptr_get_offset
    * bpf_dynptr_clone
    * bpf_dynptr_iterator

[0] https://lore.kernel.org/bpf/20220523210712.3641569-1-joannelkoong@gmail.com/
[1] https://lore.kernel.org/bpf/20220907183129.745846-1-joannelkoong@gmail.com/

Joanne Koong (8):
  bpf: Add bpf_dynptr_data_rdonly
  bpf: Add bpf_dynptr_trim and bpf_dynptr_advance
  bpf: Add bpf_dynptr_is_null and bpf_dynptr_is_rdonly
  bpf: Add bpf_dynptr_get_size and bpf_dynptr_get_offset
  bpf: Add bpf_dynptr_clone
  bpf: Add verifier support for custom callback return range
  bpf: Add bpf_dynptr_iterator
  selftests/bpf: Tests for dynptr convenience helpers

 include/linux/bpf_verifier.h                  |   1 +
 include/uapi/linux/bpf.h                      | 120 ++++
 kernel/bpf/helpers.c                          | 231 +++++++-
 kernel/bpf/verifier.c                         | 155 +++--
 scripts/bpf_doc.py                            |   3 +
 tools/include/uapi/linux/bpf.h                | 120 ++++
 .../testing/selftests/bpf/prog_tests/dynptr.c |  30 +
 .../testing/selftests/bpf/progs/dynptr_fail.c | 462 +++++++++++++++
 .../selftests/bpf/progs/dynptr_success.c      | 551 +++++++++++++++++-
 9 files changed, 1617 insertions(+), 56 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly
  2022-09-08  0:02 [PATCH bpf-next v1 0/8] Dynptr convenience helpers Joanne Koong
@ 2022-09-08  0:02 ` Joanne Koong
  2022-09-09 15:29   ` Song Liu
  2022-09-09 15:51   ` Shmulik Ladkani
  2022-09-08  0:02 ` [PATCH bpf-next v1 2/8] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance Joanne Koong
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 33+ messages in thread
From: Joanne Koong @ 2022-09-08  0:02 UTC (permalink / raw)
  To: bpf; +Cc: daniel, martin.lau, andrii, ast, Kernel-team, Joanne Koong

Add a new helper bpf_dynptr_data_rdonly

void *bpf_dynptr_data_rdonly(struct bpf_dynptr *ptr, u32 offset, u32 len);

which gets a read-only pointer to the underlying dynptr data.

This is equivalent to bpf_dynptr_data(), except the pointer returned is
read-only, which allows this to support both read-write and read-only
dynptrs.

One example where this will be useful is for skb dynptrs where the
program type only allows read-only access to packet data. This API will
provide a way to obtain a data slice that can be used for direct reads.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 include/uapi/linux/bpf.h       | 15 +++++++++++++++
 kernel/bpf/helpers.c           | 32 ++++++++++++++++++++++++++------
 kernel/bpf/verifier.c          |  7 +++++--
 tools/include/uapi/linux/bpf.h | 15 +++++++++++++++
 4 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c55c23f25c0f..cce3356765fc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5439,6 +5439,20 @@ union bpf_attr {
  *		*flags* is currently unused, it must be 0 for now.
  *	Return
  *		0 on success, -EINVAL if flags is not 0.
+ *
+ * void *bpf_dynptr_data_rdonly(struct bpf_dynptr *ptr, u32 offset, u32 len)
+ *	Description
+ *		Get a read-only pointer to the underlying dynptr data.
+ *
+ *		This is equivalent to **bpf_dynptr_data**\ () except the
+ *		pointer returned is read-only, which allows this to support
+ *		both read-write and read-only dynptrs. For more details on using
+ *		the API, please refer to **bpf_dynptr_data**\ ().
+ *	Return
+ *		Read-only pointer to the underlying dynptr data, NULL if the
+ *		dynptr is invalid or if the offset and length is out of bounds
+ *		or in a paged buffer for skb-type dynptrs or across fragments
+ *		for xdp-type dynptrs.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5652,6 +5666,7 @@ union bpf_attr {
 	FN(ktime_get_tai_ns),		\
 	FN(dynptr_from_skb),		\
 	FN(dynptr_from_xdp),		\
+	FN(dynptr_data_rdonly),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index befafae34a63..30a59c9e5df3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1572,7 +1572,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
+void *__bpf_dynptr_data(struct bpf_dynptr_kern *ptr, u32 offset, u32 len, bool writable)
 {
 	enum bpf_dynptr_type type;
 	void *data;
@@ -1585,7 +1585,7 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
 	if (err)
 		return 0;
 
-	if (bpf_dynptr_is_rdonly(ptr))
+	if (writable && bpf_dynptr_is_rdonly(ptr))
 		return 0;
 
 	type = bpf_dynptr_get_type(ptr);
@@ -1610,13 +1610,31 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
 		/* if the requested data in across fragments, then it cannot
 		 * be accessed directly - bpf_xdp_pointer will return NULL
 		 */
-		return (unsigned long)bpf_xdp_pointer(ptr->data,
-						      ptr->offset + offset, len);
+		return bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
 	default:
-		WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
+		WARN_ONCE(true, "__bpf_dynptr_data: unknown dynptr type %d\n", type);
 		return 0;
 	}
-	return (unsigned long)(data + ptr->offset + offset);
+	return data + ptr->offset + offset;
+}
+
+BPF_CALL_3(bpf_dynptr_data_rdonly, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
+{
+	return (unsigned long)__bpf_dynptr_data(ptr, offset, len, false);
+}
+
+static const struct bpf_func_proto bpf_dynptr_data_rdonly_proto = {
+	.func		= bpf_dynptr_data_rdonly,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_DYNPTR_MEM_OR_NULL | MEM_RDONLY,
+	.arg1_type	= ARG_PTR_TO_DYNPTR,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
+};
+
+BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
+{
+	return (unsigned long)__bpf_dynptr_data(ptr, offset, len, true);
 }
 
 static const struct bpf_func_proto bpf_dynptr_data_proto = {
@@ -1698,6 +1716,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_dynptr_write_proto;
 	case BPF_FUNC_dynptr_data:
 		return &bpf_dynptr_data_proto;
+	case BPF_FUNC_dynptr_data_rdonly:
+		return &bpf_dynptr_data_rdonly_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b1f66a1cc690..c312d931359d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -506,7 +506,8 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id)
 
 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
 {
-	return func_id == BPF_FUNC_dynptr_data;
+	return func_id == BPF_FUNC_dynptr_data ||
+		func_id == BPF_FUNC_dynptr_data_rdonly;
 }
 
 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
@@ -7398,6 +7399,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		}
 		break;
 	case BPF_FUNC_dynptr_data:
+	case BPF_FUNC_dynptr_data_rdonly:
 	{
 		struct bpf_reg_state *reg;
 
@@ -7495,7 +7497,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		mark_reg_known_zero(env, regs, BPF_REG_0);
 		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
 		regs[BPF_REG_0].mem_size = meta.mem_size;
-		if (func_id == BPF_FUNC_dynptr_data) {
+		if (func_id == BPF_FUNC_dynptr_data ||
+		    func_id == BPF_FUNC_dynptr_data_rdonly) {
 			if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
 				regs[BPF_REG_0].type |= DYNPTR_TYPE_SKB;
 			else if (dynptr_type == BPF_DYNPTR_TYPE_XDP)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c55c23f25c0f..cce3356765fc 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5439,6 +5439,20 @@ union bpf_attr {
  *		*flags* is currently unused, it must be 0 for now.
  *	Return
  *		0 on success, -EINVAL if flags is not 0.
+ *
+ * void *bpf_dynptr_data_rdonly(struct bpf_dynptr *ptr, u32 offset, u32 len)
+ *	Description
+ *		Get a read-only pointer to the underlying dynptr data.
+ *
+ *		This is equivalent to **bpf_dynptr_data**\ () except the
+ *		pointer returned is read-only, which allows this to support
+ *		both read-write and read-only dynptrs. For more details on using
+ *		the API, please refer to **bpf_dynptr_data**\ ().
+ *	Return
+ *		Read-only pointer to the underlying dynptr data, NULL if the
+ *		dynptr is invalid or if the offset and length is out of bounds
+ *		or in a paged buffer for skb-type dynptrs or across fragments
+ *		for xdp-type dynptrs.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5652,6 +5666,7 @@ union bpf_attr {
 	FN(ktime_get_tai_ns),		\
 	FN(dynptr_from_skb),		\
 	FN(dynptr_from_xdp),		\
+	FN(dynptr_data_rdonly),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH bpf-next v1 2/8] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance
  2022-09-08  0:02 [PATCH bpf-next v1 0/8] Dynptr convenience helpers Joanne Koong
  2022-09-08  0:02 ` [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly Joanne Koong
@ 2022-09-08  0:02 ` Joanne Koong
  2022-09-09 15:32   ` Song Liu
                     ` (2 more replies)
  2022-09-08  0:02 ` [PATCH bpf-next v1 3/8] bpf: Add bpf_dynptr_is_null and bpf_dynptr_is_rdonly Joanne Koong
                   ` (5 subsequent siblings)
  7 siblings, 3 replies; 33+ messages in thread
From: Joanne Koong @ 2022-09-08  0:02 UTC (permalink / raw)
  To: bpf; +Cc: daniel, martin.lau, andrii, ast, Kernel-team, Joanne Koong

Add two new helper functions: bpf_dynptr_trim and bpf_dynptr_advance.

bpf_dynptr_trim decreases the size of a dynptr by the specified
number of bytes (offset remains the same). bpf_dynptr_advance advances
the offset of the dynptr by the specified number of bytes (size
decreases correspondingly).

One example where trimming / advancing the dynptr may useful is for
hashing. If the dynptr points to a larger struct, it is possible to hash
an individual field within the struct through dynptrs by using
bpf_dynptr_advance+trim.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 include/uapi/linux/bpf.h       | 16 +++++++++
 kernel/bpf/helpers.c           | 63 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 16 +++++++++
 3 files changed, 95 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index cce3356765fc..3b054553be30 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5453,6 +5453,20 @@ union bpf_attr {
  *		dynptr is invalid or if the offset and length is out of bounds
  *		or in a paged buffer for skb-type dynptrs or across fragments
  *		for xdp-type dynptrs.
+ *
+ * long bpf_dynptr_advance(struct bpf_dynptr *ptr, u32 len)
+ *	Description
+ *		Advance a dynptr by *len*.
+ *	Return
+ *		0 on success, -EINVAL if the dynptr is invalid, -ERANGE if *len*
+ *		exceeds the bounds of the dynptr.
+ *
+ * long bpf_dynptr_trim(struct bpf_dynptr *ptr, u32 len)
+ *	Description
+ *		Trim a dynptr by *len*.
+ *	Return
+ *		0 on success, -EINVAL if the dynptr is invalid, -ERANGE if
+ *		trying to trim more bytes than the size of the dynptr.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5667,6 +5681,8 @@ union bpf_attr {
 	FN(dynptr_from_skb),		\
 	FN(dynptr_from_xdp),		\
 	FN(dynptr_data_rdonly),		\
+	FN(dynptr_advance),		\
+	FN(dynptr_trim),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 30a59c9e5df3..9f356105ab49 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1423,6 +1423,13 @@ static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
 	return ptr->size & DYNPTR_SIZE_MASK;
 }
 
+static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
+{
+	u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
+
+	ptr->size = new_size | metadata;
+}
+
 int bpf_dynptr_check_size(u32 size)
 {
 	return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
@@ -1646,6 +1653,58 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
 	.arg3_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
 };
 
+BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
+{
+	u32 size;
+
+	if (!ptr->data)
+		return -EINVAL;
+
+	size = bpf_dynptr_get_size(ptr);
+
+	if (len > size)
+		return -ERANGE;
+
+	ptr->offset += len;
+
+	bpf_dynptr_set_size(ptr, size - len);
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_dynptr_advance_proto = {
+	.func		= bpf_dynptr_advance,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_DYNPTR,
+	.arg2_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_dynptr_trim, struct bpf_dynptr_kern *, ptr, u32, len)
+{
+	u32 size;
+
+	if (!ptr->data)
+		return -EINVAL;
+
+	size = bpf_dynptr_get_size(ptr);
+
+	if (len > size)
+		return -ERANGE;
+
+	bpf_dynptr_set_size(ptr, size - len);
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_dynptr_trim_proto = {
+	.func		= bpf_dynptr_trim,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_DYNPTR,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
@@ -1718,6 +1777,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_dynptr_data_proto;
 	case BPF_FUNC_dynptr_data_rdonly:
 		return &bpf_dynptr_data_rdonly_proto;
+	case BPF_FUNC_dynptr_advance:
+		return &bpf_dynptr_advance_proto;
+	case BPF_FUNC_dynptr_trim:
+		return &bpf_dynptr_trim_proto;
 	default:
 		break;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index cce3356765fc..3b054553be30 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5453,6 +5453,20 @@ union bpf_attr {
  *		dynptr is invalid or if the offset and length is out of bounds
  *		or in a paged buffer for skb-type dynptrs or across fragments
  *		for xdp-type dynptrs.
+ *
+ * long bpf_dynptr_advance(struct bpf_dynptr *ptr, u32 len)
+ *	Description
+ *		Advance a dynptr by *len*.
+ *	Return
+ *		0 on success, -EINVAL if the dynptr is invalid, -ERANGE if *len*
+ *		exceeds the bounds of the dynptr.
+ *
+ * long bpf_dynptr_trim(struct bpf_dynptr *ptr, u32 len)
+ *	Description
+ *		Trim a dynptr by *len*.
+ *	Return
+ *		0 on success, -EINVAL if the dynptr is invalid, -ERANGE if
+ *		trying to trim more bytes than the size of the dynptr.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5667,6 +5681,8 @@ union bpf_attr {
 	FN(dynptr_from_skb),		\
 	FN(dynptr_from_xdp),		\
 	FN(dynptr_data_rdonly),		\
+	FN(dynptr_advance),		\
+	FN(dynptr_trim),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH bpf-next v1 3/8] bpf: Add bpf_dynptr_is_null and bpf_dynptr_is_rdonly
  2022-09-08  0:02 [PATCH bpf-next v1 0/8] Dynptr convenience helpers Joanne Koong
  2022-09-08  0:02 ` [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly Joanne Koong
  2022-09-08  0:02 ` [PATCH bpf-next v1 2/8] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance Joanne Koong
@ 2022-09-08  0:02 ` Joanne Koong
  2022-09-09 15:46   ` Song Liu
  2022-09-08  0:02 ` [PATCH bpf-next v1 4/8] bpf: Add bpf_dynptr_get_size and bpf_dynptr_get_offset Joanne Koong
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 33+ messages in thread
From: Joanne Koong @ 2022-09-08  0:02 UTC (permalink / raw)
  To: bpf; +Cc: daniel, martin.lau, andrii, ast, Kernel-team, Joanne Koong

Add two new helper functions: bpf_dynptr_is_null and
bpf_dynptr_is_rdonly.

bpf_dynptr_is_null returns true if the dynptr is null / invalid
(determined by whether ptr->data is NULL), else false if
the dynptr is a valid dynptr.

bpf_dynptr_is_rdonly returns true if the dynptr is read-only,
else false if the dynptr is read-writable.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 include/uapi/linux/bpf.h       | 20 ++++++++++++++++++
 kernel/bpf/helpers.c           | 37 +++++++++++++++++++++++++++++++---
 scripts/bpf_doc.py             |  3 +++
 tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++
 4 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3b054553be30..90b6d0744df2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5467,6 +5467,24 @@ union bpf_attr {
  *	Return
  *		0 on success, -EINVAL if the dynptr is invalid, -ERANGE if
  *		trying to trim more bytes than the size of the dynptr.
+ *
+ * bool bpf_dynptr_is_null(struct bpf_dynptr *ptr)
+ *	Description
+ *		Determine whether a dynptr is null / invalid.
+ *
+ *		*ptr* must be an initialized dynptr.
+ *	Return
+ *		True if the dynptr is null, else false.
+ *
+ * bool bpf_dynptr_is_rdonly(struct bpf_dynptr *ptr)
+ *	Description
+ *		Determine whether a dynptr is read-only.
+ *
+ *		*ptr* must be an initialized dynptr. If *ptr*
+ *		is a null dynptr, this will return false.
+ *	Return
+ *		True if the dynptr is read-only and a valid dynptr,
+ *		else false.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5683,6 +5701,8 @@ union bpf_attr {
 	FN(dynptr_data_rdonly),		\
 	FN(dynptr_advance),		\
 	FN(dynptr_trim),		\
+	FN(dynptr_is_null),		\
+	FN(dynptr_is_rdonly),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9f356105ab49..8729383d0966 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1398,7 +1398,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
 #define DYNPTR_SIZE_MASK	0xFFFFFF
 #define DYNPTR_RDONLY_BIT	BIT(31)
 
-static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
+static bool __bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
 {
 	return ptr->size & DYNPTR_RDONLY_BIT;
 }
@@ -1539,7 +1539,7 @@ BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *,
 	enum bpf_dynptr_type type;
 	int err;
 
-	if (!dst->data || bpf_dynptr_is_rdonly(dst))
+	if (!dst->data || __bpf_dynptr_is_rdonly(dst))
 		return -EINVAL;
 
 	err = bpf_dynptr_check_off_len(dst, offset, len);
@@ -1592,7 +1592,7 @@ void *__bpf_dynptr_data(struct bpf_dynptr_kern *ptr, u32 offset, u32 len, bool w
 	if (err)
 		return 0;
 
-	if (writable && bpf_dynptr_is_rdonly(ptr))
+	if (writable && __bpf_dynptr_is_rdonly(ptr))
 		return 0;
 
 	type = bpf_dynptr_get_type(ptr);
@@ -1705,6 +1705,33 @@ static const struct bpf_func_proto bpf_dynptr_trim_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_dynptr_is_null, struct bpf_dynptr_kern *, ptr)
+{
+	return !ptr->data;
+}
+
+static const struct bpf_func_proto bpf_dynptr_is_null_proto = {
+	.func		= bpf_dynptr_is_null,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_DYNPTR,
+};
+
+BPF_CALL_1(bpf_dynptr_is_rdonly, struct bpf_dynptr_kern *, ptr)
+{
+	if (!ptr->data)
+		return 0;
+
+	return __bpf_dynptr_is_rdonly(ptr);
+}
+
+static const struct bpf_func_proto bpf_dynptr_is_rdonly_proto = {
+	.func		= bpf_dynptr_is_rdonly,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_DYNPTR,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
@@ -1781,6 +1808,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_dynptr_advance_proto;
 	case BPF_FUNC_dynptr_trim:
 		return &bpf_dynptr_trim_proto;
+	case BPF_FUNC_dynptr_is_null:
+		return &bpf_dynptr_is_null_proto;
+	case BPF_FUNC_dynptr_is_rdonly:
+		return &bpf_dynptr_is_rdonly_proto;
 	default:
 		break;
 	}
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index d5c389df6045..ecd227c2ea34 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -691,6 +691,7 @@ class PrinterHelpers(Printer):
             'int',
             'long',
             'unsigned long',
+            'bool',
 
             '__be16',
             '__be32',
@@ -761,6 +762,8 @@ class PrinterHelpers(Printer):
         header = '''\
 /* This is auto-generated file. See bpf_doc.py for details. */
 
+#include <stdbool.h>
+
 /* Forward declarations of BPF structs */'''
 
         print(header)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3b054553be30..90b6d0744df2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5467,6 +5467,24 @@ union bpf_attr {
  *	Return
  *		0 on success, -EINVAL if the dynptr is invalid, -ERANGE if
  *		trying to trim more bytes than the size of the dynptr.
+ *
+ * bool bpf_dynptr_is_null(struct bpf_dynptr *ptr)
+ *	Description
+ *		Determine whether a dynptr is null / invalid.
+ *
+ *		*ptr* must be an initialized dynptr.
+ *	Return
+ *		True if the dynptr is null, else false.
+ *
+ * bool bpf_dynptr_is_rdonly(struct bpf_dynptr *ptr)
+ *	Description
+ *		Determine whether a dynptr is read-only.
+ *
+ *		*ptr* must be an initialized dynptr. If *ptr*
+ *		is a null dynptr, this will return false.
+ *	Return
+ *		True if the dynptr is read-only and a valid dynptr,
+ *		else false.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5683,6 +5701,8 @@ union bpf_attr {
 	FN(dynptr_data_rdonly),		\
 	FN(dynptr_advance),		\
 	FN(dynptr_trim),		\
+	FN(dynptr_is_null),		\
+	FN(dynptr_is_rdonly),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH bpf-next v1 4/8] bpf: Add bpf_dynptr_get_size and bpf_dynptr_get_offset
  2022-09-08  0:02 [PATCH bpf-next v1 0/8] Dynptr convenience helpers Joanne Koong
                   ` (2 preceding siblings ...)
  2022-09-08  0:02 ` [PATCH bpf-next v1 3/8] bpf: Add bpf_dynptr_is_null and bpf_dynptr_is_rdonly Joanne Koong
@ 2022-09-08  0:02 ` Joanne Koong
  2022-09-09 16:52   ` Shmulik Ladkani
  2022-09-08  0:02 ` [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone Joanne Koong
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 33+ messages in thread
From: Joanne Koong @ 2022-09-08  0:02 UTC (permalink / raw)
  To: bpf; +Cc: daniel, martin.lau, andrii, ast, Kernel-team, Joanne Koong

Add two new helper functions: bpf_dynptr_get_size and
bpf_dynptr_get_offset.

bpf_dynptr_get_size returns the number of usable bytes in a dynptr and
bpf_dynptr_get_offset returns the current offset into the dynptr.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 include/uapi/linux/bpf.h       | 25 ++++++++++++++++++++
 kernel/bpf/helpers.c           | 42 ++++++++++++++++++++++++++++++----
 tools/include/uapi/linux/bpf.h | 25 ++++++++++++++++++++
 3 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 90b6d0744df2..4ca07cf500d2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5485,6 +5485,29 @@ union bpf_attr {
  *	Return
  *		True if the dynptr is read-only and a valid dynptr,
  *		else false.
+ *
+ * long bpf_dynptr_get_size(struct bpf_dynptr *ptr)
+ *	Description
+ *		Get the size of *ptr*.
+ *
+ *		Size refers to the number of usable bytes. For example,
+ *		if *ptr* was initialized with 100 bytes and its
+ *		offset was advanced by 40 bytes, then the size will be
+ *		60 bytes.
+ *
+ *		*ptr* must be an initialized dynptr.
+ *	Return
+ *		The size of the dynptr on success, -EINVAL if the dynptr is
+ *		invalid.
+ *
+ * long bpf_dynptr_get_offset(struct bpf_dynptr *ptr)
+ *	Description
+ *		Get the offset of the dynptr.
+ *
+ *		*ptr* must be an initialized dynptr.
+ *	Return
+ *		The offset of the dynptr on success, -EINVAL if the dynptr is
+ *		invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5703,6 +5726,8 @@ union bpf_attr {
 	FN(dynptr_trim),		\
 	FN(dynptr_is_null),		\
 	FN(dynptr_is_rdonly),		\
+	FN(dynptr_get_size),		\
+	FN(dynptr_get_offset),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 8729383d0966..62ed27444b73 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1418,7 +1418,7 @@ static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *pt
 	return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
 }
 
-static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
+static u32 __bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
 {
 	return ptr->size & DYNPTR_SIZE_MASK;
 }
@@ -1451,7 +1451,7 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
 
 static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
 {
-	u32 size = bpf_dynptr_get_size(ptr);
+	u32 size = __bpf_dynptr_get_size(ptr);
 
 	if (len > size || offset > size - len)
 		return -E2BIG;
@@ -1660,7 +1660,7 @@ BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
 	if (!ptr->data)
 		return -EINVAL;
 
-	size = bpf_dynptr_get_size(ptr);
+	size = __bpf_dynptr_get_size(ptr);
 
 	if (len > size)
 		return -ERANGE;
@@ -1687,7 +1687,7 @@ BPF_CALL_2(bpf_dynptr_trim, struct bpf_dynptr_kern *, ptr, u32, len)
 	if (!ptr->data)
 		return -EINVAL;
 
-	size = bpf_dynptr_get_size(ptr);
+	size = __bpf_dynptr_get_size(ptr);
 
 	if (len > size)
 		return -ERANGE;
@@ -1732,6 +1732,36 @@ static const struct bpf_func_proto bpf_dynptr_is_rdonly_proto = {
 	.arg1_type	= ARG_PTR_TO_DYNPTR,
 };
 
+BPF_CALL_1(bpf_dynptr_get_size, struct bpf_dynptr_kern *, ptr)
+{
+	if (!ptr->data)
+		return -EINVAL;
+
+	return __bpf_dynptr_get_size(ptr);
+}
+
+static const struct bpf_func_proto bpf_dynptr_get_size_proto = {
+	.func		= bpf_dynptr_get_size,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_DYNPTR,
+};
+
+BPF_CALL_1(bpf_dynptr_get_offset, struct bpf_dynptr_kern *, ptr)
+{
+	if (!ptr->data)
+		return -EINVAL;
+
+	return ptr->offset;
+}
+
+static const struct bpf_func_proto bpf_dynptr_get_offset_proto = {
+	.func		= bpf_dynptr_get_offset,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_DYNPTR,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
@@ -1812,6 +1842,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_dynptr_is_null_proto;
 	case BPF_FUNC_dynptr_is_rdonly:
 		return &bpf_dynptr_is_rdonly_proto;
+	case BPF_FUNC_dynptr_get_size:
+		return &bpf_dynptr_get_size_proto;
+	case BPF_FUNC_dynptr_get_offset:
+		return &bpf_dynptr_get_offset_proto;
 	default:
 		break;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 90b6d0744df2..4ca07cf500d2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5485,6 +5485,29 @@ union bpf_attr {
  *	Return
  *		True if the dynptr is read-only and a valid dynptr,
  *		else false.
+ *
+ * long bpf_dynptr_get_size(struct bpf_dynptr *ptr)
+ *	Description
+ *		Get the size of *ptr*.
+ *
+ *		Size refers to the number of usable bytes. For example,
+ *		if *ptr* was initialized with 100 bytes and its
+ *		offset was advanced by 40 bytes, then the size will be
+ *		60 bytes.
+ *
+ *		*ptr* must be an initialized dynptr.
+ *	Return
+ *		The size of the dynptr on success, -EINVAL if the dynptr is
+ *		invalid.
+ *
+ * long bpf_dynptr_get_offset(struct bpf_dynptr *ptr)
+ *	Description
+ *		Get the offset of the dynptr.
+ *
+ *		*ptr* must be an initialized dynptr.
+ *	Return
+ *		The offset of the dynptr on success, -EINVAL if the dynptr is
+ *		invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5703,6 +5726,8 @@ union bpf_attr {
 	FN(dynptr_trim),		\
 	FN(dynptr_is_null),		\
 	FN(dynptr_is_rdonly),		\
+	FN(dynptr_get_size),		\
+	FN(dynptr_get_offset),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone
  2022-09-08  0:02 [PATCH bpf-next v1 0/8] Dynptr convenience helpers Joanne Koong
                   ` (3 preceding siblings ...)
  2022-09-08  0:02 ` [PATCH bpf-next v1 4/8] bpf: Add bpf_dynptr_get_size and bpf_dynptr_get_offset Joanne Koong
@ 2022-09-08  0:02 ` Joanne Koong
  2022-09-09 16:41   ` Shmulik Ladkani
  2022-09-28 22:29   ` Andrii Nakryiko
  2022-09-08  0:02 ` [PATCH bpf-next v1 6/8] bpf: Add verifier support for custom callback return range Joanne Koong
                   ` (2 subsequent siblings)
  7 siblings, 2 replies; 33+ messages in thread
From: Joanne Koong @ 2022-09-08  0:02 UTC (permalink / raw)
  To: bpf; +Cc: daniel, martin.lau, andrii, ast, Kernel-team, Joanne Koong

Add a new helper, bpf_dynptr_clone, which clones a dynptr.

The cloned dynptr will point to the same data as its parent dynptr,
with the same type, offset, size and read-only properties.

Any writes to a dynptr will be reflected across all instances
(by 'instance', this means any dynptrs that point to the same
underlying data).

Please note that data slice and dynptr invalidations will affect all
instances as well. For example, if bpf_dynptr_write() is called on an
skb-type dynptr, all data slices of dynptr instances to that skb
will be invalidated as well (eg data slices of any clones, parents,
grandparents, ...). Another example is if a ringbuf dynptr is submitted,
any instance of that dynptr will be invalidated.

Changing the view of the dynptr (eg advancing the offset or
trimming the size) will only affect that dynptr and not affect any
other instances.

One example use case where cloning may be helpful is for hashing or
iterating through dynptr data. Cloning will allow the user to maintain
the original view of the dynptr for future use, while also allowing
views to smaller subsets of the data after the offset is advanced or the
size is trimmed.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 include/uapi/linux/bpf.h       |  24 +++++++
 kernel/bpf/helpers.c           |  23 +++++++
 kernel/bpf/verifier.c          | 116 +++++++++++++++++++++------------
 tools/include/uapi/linux/bpf.h |  24 +++++++
 4 files changed, 147 insertions(+), 40 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4ca07cf500d2..16973fa4d073 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5508,6 +5508,29 @@ union bpf_attr {
  *	Return
  *		The offset of the dynptr on success, -EINVAL if the dynptr is
  *		invalid.
+ *
+ * long bpf_dynptr_clone(struct bpf_dynptr *ptr, struct bpf_dynptr *clone)
+ *	Description
+ *		Clone an initialized dynptr *ptr*. After this call, both *ptr*
+ *		and *clone* will point to the same underlying data.
+ *
+ *		*clone* must be an uninitialized dynptr.
+ *
+ *		Any data slice or dynptr invalidations will apply equally for
+ *		both dynptrs after this call. For example, if ptr1 is a
+ *		ringbuf-type dynptr with multiple data slices that is cloned to
+ *		ptr2, if ptr2 discards the ringbuf sample, then ptr2, ptr2's
+ *		data slices, ptr1, and ptr1's data slices will all be
+ *		invalidated.
+ *
+ *		This is convenient for getting different "views" to the same
+ *		data. For instance, if one wishes to hash only a particular
+ *		section of data, one can clone the dynptr, advance it to a
+ *		specified offset and trim it to a specified size, pass it
+ *		to the hash function, and discard it after hashing, without
+ *		losing access to the original view of the dynptr.
+ *	Return
+ *		0 on success, -EINVAL if the dynptr to clone is invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5728,6 +5751,7 @@ union bpf_attr {
 	FN(dynptr_is_rdonly),		\
 	FN(dynptr_get_size),		\
 	FN(dynptr_get_offset),		\
+	FN(dynptr_clone),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 62ed27444b73..667f1e213a61 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1762,6 +1762,27 @@ static const struct bpf_func_proto bpf_dynptr_get_offset_proto = {
 	.arg1_type	= ARG_PTR_TO_DYNPTR,
 };
 
+BPF_CALL_2(bpf_dynptr_clone, struct bpf_dynptr_kern *, ptr,
+	   struct bpf_dynptr_kern *, clone)
+{
+	if (!ptr->data) {
+		bpf_dynptr_set_null(clone);
+		return -EINVAL;
+	}
+
+	memcpy(clone, ptr, sizeof(*clone));
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_dynptr_clone_proto = {
+	.func		= bpf_dynptr_clone,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_DYNPTR,
+	.arg2_type	= ARG_PTR_TO_DYNPTR | MEM_UNINIT,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
@@ -1846,6 +1867,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_dynptr_get_size_proto;
 	case BPF_FUNC_dynptr_get_offset:
 		return &bpf_dynptr_get_offset_proto;
+	case BPF_FUNC_dynptr_clone:
+		return &bpf_dynptr_clone_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c312d931359d..8c8c101513f5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -694,17 +694,38 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
 	}
 }
 
+static bool arg_type_is_dynptr(enum bpf_arg_type type)
+{
+	return base_type(type) == ARG_PTR_TO_DYNPTR;
+}
+
 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
 {
 	return type == BPF_DYNPTR_TYPE_RINGBUF;
 }
 
-static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
-				   enum bpf_arg_type arg_type, int insn_idx)
+static enum bpf_dynptr_type stack_slot_get_dynptr_info(struct bpf_verifier_env *env,
+						       struct bpf_reg_state *reg,
+						       int *ref_obj_id)
 {
 	struct bpf_func_state *state = func(env, reg);
-	enum bpf_dynptr_type type;
-	int spi, i, id;
+	int spi = get_spi(reg->off);
+
+	if (ref_obj_id)
+		*ref_obj_id = state->stack[spi].spilled_ptr.id;
+
+	return state->stack[spi].spilled_ptr.dynptr.type;
+}
+
+static int mark_stack_slots_dynptr(struct bpf_verifier_env *env,
+				   const struct bpf_func_proto *fn,
+				   struct bpf_reg_state *reg,
+				   enum bpf_arg_type arg_type,
+				   int insn_idx, int func_id)
+{
+	enum bpf_dynptr_type type = BPF_DYNPTR_TYPE_INVALID;
+	struct bpf_func_state *state = func(env, reg);
+	int spi, i, id = 0;
 
 	spi = get_spi(reg->off);
 
@@ -716,7 +737,24 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
 		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
 	}
 
-	type = arg_to_dynptr_type(arg_type);
+	if (func_id == BPF_FUNC_dynptr_clone) {
+		/* find the type and id of the dynptr we're cloning and
+		 * assign it to the clone
+		 */
+		for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
+			enum bpf_arg_type t = fn->arg_type[i];
+
+			if (arg_type_is_dynptr(t) && !(t & MEM_UNINIT)) {
+				type = stack_slot_get_dynptr_info(env,
+								  &state->regs[BPF_REG_1 + i],
+								  &id);
+				break;
+			}
+		}
+	} else {
+		type = arg_to_dynptr_type(arg_type);
+	}
+
 	if (type == BPF_DYNPTR_TYPE_INVALID)
 		return -EINVAL;
 
@@ -726,9 +764,11 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
 
 	if (dynptr_type_refcounted(type)) {
 		/* The id is used to track proper releasing */
-		id = acquire_reference_state(env, insn_idx);
-		if (id < 0)
-			return id;
+		if (!id) {
+			id = acquire_reference_state(env, insn_idx);
+			if (id < 0)
+				return id;
+		}
 
 		state->stack[spi].spilled_ptr.id = id;
 		state->stack[spi - 1].spilled_ptr.id = id;
@@ -737,6 +777,17 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
 	return 0;
 }
 
+static void invalidate_dynptr(struct bpf_func_state *state, int spi)
+{
+	int i;
+
+	state->stack[spi].spilled_ptr.id = 0;
+	for (i = 0; i < BPF_REG_SIZE; i++)
+		state->stack[spi].slot_type[i] = STACK_INVALID;
+	state->stack[spi].spilled_ptr.dynptr.first_slot = false;
+	state->stack[spi].spilled_ptr.dynptr.type = 0;
+}
+
 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	struct bpf_func_state *state = func(env, reg);
@@ -747,22 +798,25 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
 	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
 		return -EINVAL;
 
-	for (i = 0; i < BPF_REG_SIZE; i++) {
-		state->stack[spi].slot_type[i] = STACK_INVALID;
-		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
-	}
-
-	/* Invalidate any slices associated with this dynptr */
 	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
+		int id = state->stack[spi].spilled_ptr.id;
+
+		/* If the dynptr is refcounted, we need to invalidate two things:
+		 * 1) any dynptrs with a matching id
+		 * 2) any slices associated with the dynptr id
+		 */
+
 		release_reference(env, state->stack[spi].spilled_ptr.id);
-		state->stack[spi].spilled_ptr.id = 0;
-		state->stack[spi - 1].spilled_ptr.id = 0;
+		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+			if (state->stack[i].slot_type[0] == STACK_DYNPTR &&
+			    state->stack[i].spilled_ptr.id == id)
+				invalidate_dynptr(state, i);
+		}
+	} else {
+		invalidate_dynptr(state, spi);
+		invalidate_dynptr(state, spi - 1);
 	}
 
-	state->stack[spi].spilled_ptr.dynptr.first_slot = false;
-	state->stack[spi].spilled_ptr.dynptr.type = 0;
-	state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
-
 	return 0;
 }
 
@@ -5578,11 +5632,6 @@ static bool arg_type_is_release(enum bpf_arg_type type)
 	return type & OBJ_RELEASE;
 }
 
-static bool arg_type_is_dynptr(enum bpf_arg_type type)
-{
-	return base_type(type) == ARG_PTR_TO_DYNPTR;
-}
-
 static int int_ptr_type_to_size(enum bpf_arg_type type)
 {
 	if (type == ARG_PTR_TO_INT)
@@ -5872,19 +5921,6 @@ static struct bpf_reg_state *get_dynptr_arg_reg(const struct bpf_func_proto *fn,
 	return NULL;
 }
 
-static enum bpf_dynptr_type stack_slot_get_dynptr_info(struct bpf_verifier_env *env,
-						       struct bpf_reg_state *reg,
-						       int *ref_obj_id)
-{
-	struct bpf_func_state *state = func(env, reg);
-	int spi = get_spi(reg->off);
-
-	if (ref_obj_id)
-		*ref_obj_id = state->stack[spi].spilled_ptr.id;
-
-	return state->stack[spi].spilled_ptr.dynptr.type;
-}
-
 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 			  struct bpf_call_arg_meta *meta,
 			  const struct bpf_func_proto *fn)
@@ -7317,9 +7353,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 				return err;
 		}
 
-		err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
+		err = mark_stack_slots_dynptr(env, fn, &regs[meta.uninit_dynptr_regno],
 					      fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
-					      insn_idx);
+					      insn_idx, func_id);
 		if (err)
 			return err;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4ca07cf500d2..16973fa4d073 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5508,6 +5508,29 @@ union bpf_attr {
  *	Return
  *		The offset of the dynptr on success, -EINVAL if the dynptr is
  *		invalid.
+ *
+ * long bpf_dynptr_clone(struct bpf_dynptr *ptr, struct bpf_dynptr *clone)
+ *	Description
+ *		Clone an initialized dynptr *ptr*. After this call, both *ptr*
+ *		and *clone* will point to the same underlying data.
+ *
+ *		*clone* must be an uninitialized dynptr.
+ *
+ *		Any data slice or dynptr invalidations will apply equally for
+ *		both dynptrs after this call. For example, if ptr1 is a
+ *		ringbuf-type dynptr with multiple data slices that is cloned to
+ *		ptr2, if ptr2 discards the ringbuf sample, then ptr2, ptr2's
+ *		data slices, ptr1, and ptr1's data slices will all be
+ *		invalidated.
+ *
+ *		This is convenient for getting different "views" to the same
+ *		data. For instance, if one wishes to hash only a particular
+ *		section of data, one can clone the dynptr, advance it to a
+ *		specified offset and trim it to a specified size, pass it
+ *		to the hash function, and discard it after hashing, without
+ *		losing access to the original view of the dynptr.
+ *	Return
+ *		0 on success, -EINVAL if the dynptr to clone is invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5728,6 +5751,7 @@ union bpf_attr {
 	FN(dynptr_is_rdonly),		\
 	FN(dynptr_get_size),		\
 	FN(dynptr_get_offset),		\
+	FN(dynptr_clone),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH bpf-next v1 6/8] bpf: Add verifier support for custom callback return range
  2022-09-08  0:02 [PATCH bpf-next v1 0/8] Dynptr convenience helpers Joanne Koong
                   ` (4 preceding siblings ...)
  2022-09-08  0:02 ` [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone Joanne Koong
@ 2022-09-08  0:02 ` Joanne Koong
  2022-09-08  0:02 ` [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator Joanne Koong
  2022-09-08  0:02 ` [PATCH bpf-next v1 8/8] selftests/bpf: Tests for dynptr convenience helpers Joanne Koong
  7 siblings, 0 replies; 33+ messages in thread
From: Joanne Koong @ 2022-09-08  0:02 UTC (permalink / raw)
  To: bpf; +Cc: daniel, martin.lau, andrii, ast, Kernel-team, Joanne Koong

This is ported from Dave Marchevsky's patch [0], which is needed for the
bpf iterators callback to return values beyond the 0, 1 range.

For comments, please refer to Dave's patch.

[0] https://lore.kernel.org/bpf/20220830172759.4069786-2-davemarchevsky@fb.com/

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 include/linux/bpf_verifier.h | 1 +
 kernel/bpf/verifier.c        | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 8fbc1d05281e..63cda732ee10 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -248,6 +248,7 @@ struct bpf_func_state {
 	 */
 	u32 async_entry_cnt;
 	bool in_callback_fn;
+	struct tnum callback_ret_range;
 	bool in_async_callback_fn;
 
 	/* The following fields should be last. See copy_func_state() */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8c8c101513f5..2eb2a4410344 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1814,6 +1814,7 @@ static void init_func_state(struct bpf_verifier_env *env,
 	state->callsite = callsite;
 	state->frameno = frameno;
 	state->subprogno = subprogno;
+	state->callback_ret_range = tnum_range(0, 1);
 	init_reg_state(env, state);
 	mark_verifier_state_scratched(env);
 }
@@ -6974,6 +6975,7 @@ static int set_find_vma_callback_state(struct bpf_verifier_env *env,
 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 	callee->in_callback_fn = true;
+	callee->callback_ret_range = tnum_range(0, 1);
 	return 0;
 }
 
@@ -7000,8 +7002,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 	state->curframe--;
 	caller = state->frame[state->curframe];
 	if (callee->in_callback_fn) {
-		/* enforce R0 return value range [0, 1]. */
-		struct tnum range = tnum_range(0, 1);
+		struct tnum range = callee->callback_ret_range;
 
 		if (r0->type != SCALAR_VALUE) {
 			verbose(env, "R0 not a scalar value\n");
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator
  2022-09-08  0:02 [PATCH bpf-next v1 0/8] Dynptr convenience helpers Joanne Koong
                   ` (5 preceding siblings ...)
  2022-09-08  0:02 ` [PATCH bpf-next v1 6/8] bpf: Add verifier support for custom callback return range Joanne Koong
@ 2022-09-08  0:02 ` Joanne Koong
  2022-09-19  0:07   ` Kumar Kartikeya Dwivedi
  2022-09-28 22:41   ` Andrii Nakryiko
  2022-09-08  0:02 ` [PATCH bpf-next v1 8/8] selftests/bpf: Tests for dynptr convenience helpers Joanne Koong
  7 siblings, 2 replies; 33+ messages in thread
From: Joanne Koong @ 2022-09-08  0:02 UTC (permalink / raw)
  To: bpf; +Cc: daniel, martin.lau, andrii, ast, Kernel-team, Joanne Koong

Add a new helper function, bpf_dynptr_iterator:

  long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn,
			   void *callback_ctx, u64 flags)

where callback_fn is defined as:

  long (*callback_fn)(struct bpf_dynptr *ptr, void *ctx)

and callback_fn returns the number of bytes to advance the
dynptr by (or an error code in the case of error). The iteration
will stop if the callback_fn returns 0 or an error or tries to
advance by more bytes than available.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 include/uapi/linux/bpf.h       | 20 ++++++++++++++
 kernel/bpf/helpers.c           | 48 +++++++++++++++++++++++++++++++---
 kernel/bpf/verifier.c          | 27 +++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 20 ++++++++++++++
 4 files changed, 111 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 16973fa4d073..ff78a94c262a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5531,6 +5531,25 @@ union bpf_attr {
  *		losing access to the original view of the dynptr.
  *	Return
  *		0 on success, -EINVAL if the dynptr to clone is invalid.
+ *
+ * long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn, void *callback_ctx, u64 flags)
+ *	Description
+ *		Iterate through the dynptr data, calling **callback_fn** on each
+ *		iteration with **callback_ctx** as the context parameter.
+ *		The **callback_fn** should be a static function and
+ *		the **callback_ctx** should be a pointer to the stack.
+ *		Currently **flags** is unused and must be 0.
+ *
+ *		long (\*callback_fn)(struct bpf_dynptr \*ptr, void \*ctx);
+ *
+ *		where **callback_fn** returns the number of bytes to advance
+ *		the dynptr by or an error. The iteration will stop if **callback_fn**
+ *		returns 0 or an error or tries to advance by more bytes than the
+ *		size of the dynptr.
+ *	Return
+ *		0 on success, -EINVAL if the dynptr is invalid or **flags** is not 0,
+ *		-ERANGE if attempting to iterate more bytes than available, or other
+ *		negative error if **callback_fn** returns an error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5752,6 +5771,7 @@ union bpf_attr {
 	FN(dynptr_get_size),		\
 	FN(dynptr_get_offset),		\
 	FN(dynptr_clone),		\
+	FN(dynptr_iterator),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 667f1e213a61..519b3da06d49 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1653,13 +1653,11 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
 	.arg3_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
 };
 
-BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
+/* *ptr* should always be a valid dynptr */
+static int __bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
 {
 	u32 size;
 
-	if (!ptr->data)
-		return -EINVAL;
-
 	size = __bpf_dynptr_get_size(ptr);
 
 	if (len > size)
@@ -1672,6 +1670,14 @@ BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
 	return 0;
 }
 
+BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
+{
+	if (!ptr->data)
+		return -EINVAL;
+
+	return __bpf_dynptr_advance(ptr, len);
+}
+
 static const struct bpf_func_proto bpf_dynptr_advance_proto = {
 	.func		= bpf_dynptr_advance,
 	.gpl_only	= false,
@@ -1783,6 +1789,38 @@ static const struct bpf_func_proto bpf_dynptr_clone_proto = {
 	.arg2_type	= ARG_PTR_TO_DYNPTR | MEM_UNINIT,
 };
 
+BPF_CALL_4(bpf_dynptr_iterator, struct bpf_dynptr_kern *, ptr, void *, callback_fn,
+	   void *, callback_ctx, u64, flags)
+{
+	bpf_callback_t callback = (bpf_callback_t)callback_fn;
+	int nr_bytes, err;
+
+	if (!ptr->data || flags)
+		return -EINVAL;
+
+	while (ptr->size > 0) {
+		nr_bytes = callback((u64)(long)ptr, (u64)(long)callback_ctx, 0, 0, 0);
+		if (nr_bytes <= 0)
+			return nr_bytes;
+
+		err = __bpf_dynptr_advance(ptr, nr_bytes);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_dynptr_iterator_proto = {
+	.func		= bpf_dynptr_iterator,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_DYNPTR,
+	.arg2_type	= ARG_PTR_TO_FUNC,
+	.arg3_type	= ARG_PTR_TO_STACK_OR_NULL,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
@@ -1869,6 +1907,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_dynptr_get_offset_proto;
 	case BPF_FUNC_dynptr_clone:
 		return &bpf_dynptr_clone_proto;
+	case BPF_FUNC_dynptr_iterator:
+		return &bpf_dynptr_iterator_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2eb2a4410344..76108cd4ed85 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6901,6 +6901,29 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static int set_dynptr_iterator_callback_state(struct bpf_verifier_env *env,
+					      struct bpf_func_state *caller,
+					      struct bpf_func_state *callee,
+					      int insn_idx)
+{
+	/* bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn,
+	 * void *callback_ctx, u64 flags);
+	 *
+	 * callback_fn(struct bpf_dynptr *ptr, void *callback_ctx);
+	 */
+	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
+	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
+	callee->callback_ret_range = tnum_range(0, U64_MAX);
+
+	/* unused */
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+
+	callee->in_callback_fn = true;
+	return 0;
+}
+
 static int set_loop_callback_state(struct bpf_verifier_env *env,
 				   struct bpf_func_state *caller,
 				   struct bpf_func_state *callee,
@@ -7472,6 +7495,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 
 		break;
 	}
+	case BPF_FUNC_dynptr_iterator:
+		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+					set_dynptr_iterator_callback_state);
+		break;
 	}
 
 	if (err)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 16973fa4d073..ff78a94c262a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5531,6 +5531,25 @@ union bpf_attr {
  *		losing access to the original view of the dynptr.
  *	Return
  *		0 on success, -EINVAL if the dynptr to clone is invalid.
+ *
+ * long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn, void *callback_ctx, u64 flags)
+ *	Description
+ *		Iterate through the dynptr data, calling **callback_fn** on each
+ *		iteration with **callback_ctx** as the context parameter.
+ *		The **callback_fn** should be a static function and
+ *		the **callback_ctx** should be a pointer to the stack.
+ *		Currently **flags** is unused and must be 0.
+ *
+ *		long (\*callback_fn)(struct bpf_dynptr \*ptr, void \*ctx);
+ *
+ *		where **callback_fn** returns the number of bytes to advance
+ *		the dynptr by or an error. The iteration will stop if **callback_fn**
+ *		returns 0 or an error or tries to advance by more bytes than the
+ *		size of the dynptr.
+ *	Return
+ *		0 on success, -EINVAL if the dynptr is invalid or **flags** is not 0,
+ *		-ERANGE if attempting to iterate more bytes than available, or other
+ *		negative error if **callback_fn** returns an error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5752,6 +5771,7 @@ union bpf_attr {
 	FN(dynptr_get_size),		\
 	FN(dynptr_get_offset),		\
 	FN(dynptr_clone),		\
+	FN(dynptr_iterator),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH bpf-next v1 8/8] selftests/bpf: Tests for dynptr convenience helpers
  2022-09-08  0:02 [PATCH bpf-next v1 0/8] Dynptr convenience helpers Joanne Koong
                   ` (6 preceding siblings ...)
  2022-09-08  0:02 ` [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator Joanne Koong
@ 2022-09-08  0:02 ` Joanne Koong
  7 siblings, 0 replies; 33+ messages in thread
From: Joanne Koong @ 2022-09-08  0:02 UTC (permalink / raw)
  To: bpf; +Cc: daniel, martin.lau, andrii, ast, Kernel-team, Joanne Koong

Test dynptr convenience helpers in the following way:

1) bpf_dynptr_data_rdonly
    * "data_slice_rdonly" tests that writes into read-only slices
      are rejected.
    * "skb_invalid_data_slice_rdonly{1,2}" tests that read-only
      slices for skb-type dynptrs get invalidated whenever packet data
      may change.

2) bpf_dynptr_trim and bpf_dynptr_advance
    * "test_advance_trim" tests that dynptr offset and size get adjusted
      correctly.
    * "test_advance_trim_err" tests that advances beyond dynptr size and
      trims larger than dynptr size are rejected.
    * "test_zero_size_dynptr" tests that a zero-size dynptr (after
      advancing or trimming) can only read and write 0 bytes.

3) bpf_dynptr_is_null
    * "dynptr_is_null_invalid" tests that only initialized dynptrs can
      be passed in.
    * "test_dynptr_is_null" tests that null dynptrs return true and
      non-null dynptrs return false.

4) bpf_dynptr_is_rdonly
    * "dynptr_is_rdonly_invalid" tests that only initialized dynptrs can
      be passed in.
    * "test_dynptr_is_rdonly" tests that rdonly dynptrs return true and
      non-rdonly or invalid dynptrs return false.

5) bpf_dynptr_get_size
    * "dynptr_get_size_invalid" tests that only initialized dynptrs can
      be passed in.
    * Additional functionality is tested as a by-product in
      "test_advance_trim"

6) bpf_dynptr_get_offset
    * "dynptr_get_offset_invalid" tests that only initialized dynptrs can
      be passed in.
    * Additional functionality is tested as a by-product in
      "test_advance_trim"

7) bpf_dynptr_clone
    * "clone_invalidate_{1..6}" tests that invalidating a dynptr
      invalidates all instances and invalidating a dynptr's data slices
      invalidates all data slices for all instances.
    * "clone_skb_packet_data" tests that data slices of skb dynptr instances
      are invalidated when packet data changes.
    * "clone_xdp_packet_data" tests that data slices of xdp dynptr instances
      are invalidated when packet data changes.
    * "clone_invalid1" tests that only initialized dynptrs can be
      cloned.
    * "clone_invalid2" tests that only uninitialized dynptrs can be
      a clone.
    * "test_dynptr_clone" tests that the views from the same dynptr instances
      are independent (advancing or trimming a dynptr doesn't affect other
      instances), and that a clone will return a dynptr with the same
      type, offset, size, and rd-only property.

8) bpf_dynptr_iterator
    * "iterator_invalid1" tests that any dynptr requiring a release
      that gets acquired in an iterator callback must also be released
      within the callback
    * "iterator_invalid2" tests that bpf_dynptr_iterator can't be called
      on an uninitialized dynptr
    * "iterator_invalid3" tests that an already-initialized dynptr can't
      be initialized again in the iterator callback function
    * "iterator_invalid4" tests that the dynptr that's iterating
      can't be released in the callback function
    * "iterator_invalid5" tests that the dynptr can't be passed as a
      callback ctx and released within the callback
    * "test_dynptr_iterator" tests basic functionality of the iterator
    * "iterator_parse_strings" tests parsing strings as values

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 .../testing/selftests/bpf/prog_tests/dynptr.c |  30 +
 .../testing/selftests/bpf/progs/dynptr_fail.c | 462 +++++++++++++++
 .../selftests/bpf/progs/dynptr_success.c      | 551 +++++++++++++++++-
 3 files changed, 1041 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index f3bdec4a595c..de084ba980a9 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -53,15 +53,45 @@ static struct {
 		"Unsupported reg type fp for bpf_dynptr_from_mem data"},
 	{"skb_invalid_data_slice1", "invalid mem access 'scalar'"},
 	{"skb_invalid_data_slice2", "invalid mem access 'scalar'"},
+	{"skb_invalid_data_slice_rdonly1", "invalid mem access 'scalar'"},
+	{"skb_invalid_data_slice_rdonly2", "invalid mem access 'scalar'"},
 	{"xdp_invalid_data_slice", "invalid mem access 'scalar'"},
 	{"skb_invalid_ctx", "unknown func bpf_dynptr_from_skb"},
 	{"xdp_invalid_ctx", "unknown func bpf_dynptr_from_xdp"},
+	{"data_slice_rdonly", "cannot write into rdonly_mem"},
+	{"dynptr_is_null_invalid", "Expected an initialized dynptr as arg #1"},
+	{"dynptr_is_rdonly_invalid", "Expected an initialized dynptr as arg #1"},
+	{"dynptr_get_size_invalid", "Expected an initialized dynptr as arg #1"},
+	{"dynptr_get_offset_invalid", "Expected an initialized dynptr as arg #1"},
+	{"clone_invalid1", "Expected an initialized dynptr as arg #1"},
+	{"clone_invalid2", "Dynptr has to be an uninitialized dynptr"},
+	{"clone_invalidate1", "Expected an initialized dynptr"},
+	{"clone_invalidate2", "Expected an initialized dynptr"},
+	{"clone_invalidate3", "Expected an initialized dynptr"},
+	{"clone_invalidate4", "invalid mem access 'scalar'"},
+	{"clone_invalidate5", "invalid mem access 'scalar'"},
+	{"clone_invalidate6", "invalid mem access 'scalar'"},
+	{"clone_skb_packet_data", "invalid mem access 'scalar'"},
+	{"clone_xdp_packet_data", "invalid mem access 'scalar'"},
+	{"iterator_invalid1", "Unreleased reference id=1"},
+	{"iterator_invalid2", "Expected an initialized dynptr as arg #1"},
+	{"iterator_invalid3", "Dynptr has to be an uninitialized dynptr"},
+	{"iterator_invalid4", "Unreleased reference"},
+	{"iterator_invalid5", "Unreleased reference"},
 
 	/* these tests should be run and should succeed */
 	{"test_read_write", NULL, SETUP_SYSCALL_SLEEP},
 	{"test_data_slice", NULL, SETUP_SYSCALL_SLEEP},
 	{"test_ringbuf", NULL, SETUP_SYSCALL_SLEEP},
 	{"test_skb_readonly", NULL, SETUP_SKB_PROG},
+	{"test_advance_trim", NULL, SETUP_SYSCALL_SLEEP},
+	{"test_advance_trim_err", NULL, SETUP_SYSCALL_SLEEP},
+	{"test_zero_size_dynptr", NULL, SETUP_SYSCALL_SLEEP},
+	{"test_dynptr_is_null", NULL, SETUP_SYSCALL_SLEEP},
+	{"test_dynptr_is_rdonly", NULL, SETUP_SKB_PROG},
+	{"test_dynptr_clone", NULL, SETUP_SKB_PROG},
+	{"test_dynptr_iterator", NULL, SETUP_SKB_PROG},
+	{"iterator_parse_strings", NULL, SETUP_SYSCALL_SLEEP},
 };
 
 static void verify_fail(const char *prog_name, const char *expected_err_msg)
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 2da6703d0caf..5a63389613c7 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -668,6 +668,51 @@ int skb_invalid_data_slice2(struct __sk_buff *skb)
 	return SK_PASS;
 }
 
+/* The read-only data slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+int skb_invalid_data_slice_rdonly1(struct __sk_buff *skb)
+{
+	struct bpf_dynptr ptr;
+	struct ethhdr *hdr;
+
+	bpf_dynptr_from_skb(skb, 0, &ptr);
+	hdr = bpf_dynptr_data_rdonly(&ptr, 0, sizeof(*hdr));
+
+	if (!hdr)
+		return SK_DROP;
+
+	val = hdr->h_proto;
+
+	if (bpf_skb_pull_data(skb, skb->len))
+		return SK_DROP;
+
+	/* this should fail */
+	val = hdr->h_proto;
+
+	return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever bpf_dynptr_write() is called */
+SEC("?tc")
+int skb_invalid_data_slice_rdonly2(struct __sk_buff *skb)
+{
+	char write_data[64] = "hello there, world!!";
+	struct bpf_dynptr ptr;
+	struct ethhdr *hdr;
+
+	bpf_dynptr_from_skb(skb, 0, &ptr);
+	hdr = bpf_dynptr_data_rdonly(&ptr, 0, sizeof(*hdr));
+	if (!hdr)
+		return SK_DROP;
+
+	bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+	/* this should fail */
+	val = hdr->h_proto;
+
+	return SK_PASS;
+}
+
 /* The data slice is invalidated whenever a helper changes packet data */
 SEC("?xdp")
 int xdp_invalid_data_slice(struct xdp_md *xdp)
@@ -714,3 +759,420 @@ int xdp_invalid_ctx(void *ctx)
 
 	return 0;
 }
+
+/* Writing into a read-only data slice is not permitted */
+SEC("?raw_tp")
+int data_slice_rdonly(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	__u64 *data;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+	data = bpf_dynptr_data_rdonly(&ptr, 0, 64);
+	if (data)
+		/* this should fail */
+		*data = 123;
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* dynptr_is_null can only be called on initialized dynptrs */
+SEC("?raw_tp")
+int dynptr_is_null_invalid(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_dynptr_is_null(&ptr);
+
+	return 0;
+}
+
+/* dynptr_is_rdonly can only be called on initialized dynptrs */
+SEC("?raw_tp")
+int dynptr_is_rdonly_invalid(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_dynptr_is_rdonly(&ptr);
+
+	return 0;
+}
+
+/* dynptr_get_size can only be called on initialized dynptrs */
+SEC("?raw_tp")
+int dynptr_get_size_invalid(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_dynptr_get_size(&ptr);
+
+	return 0;
+}
+
+/* dynptr_get_offset can only be called on initialized dynptrs */
+SEC("?raw_tp")
+int dynptr_get_offset_invalid(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_dynptr_get_offset(&ptr);
+
+	return 0;
+}
+
+/* Only initialized dynptrs can be cloned */
+SEC("?raw_tp")
+int clone_invalid1(void *ctx)
+{
+	struct bpf_dynptr ptr1;
+	struct bpf_dynptr ptr2;
+
+	/* this should fail */
+	bpf_dynptr_clone(&ptr1, &ptr2);
+
+	return 0;
+}
+
+/* Only uninitialized dynptrs can be clones */
+SEC("?xdp")
+int clone_invalid2(struct xdp_md *xdp)
+{
+	struct bpf_dynptr ptr1;
+	struct bpf_dynptr clone;
+
+	bpf_dynptr_from_xdp(xdp, 0, &ptr1);
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &clone);
+
+	/* this should fail */
+	bpf_dynptr_clone(&ptr1, &clone);
+
+	bpf_ringbuf_submit_dynptr(&clone, 0);
+
+	return 0;
+}
+
+/* Invalidating a dynptr should invalidate its clones */
+SEC("?raw_tp")
+int clone_invalidate1(void *ctx)
+{
+	struct bpf_dynptr clone;
+	struct bpf_dynptr ptr;
+	char read_data[64];
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	bpf_dynptr_clone(&ptr, &clone);
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), &clone, 0, 0);
+
+	return 0;
+}
+
+/* Invalidating a dynptr should invalidate its parent */
+SEC("?raw_tp")
+int clone_invalidate2(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct bpf_dynptr clone;
+	char read_data[64];
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	bpf_dynptr_clone(&ptr, &clone);
+
+	bpf_ringbuf_submit_dynptr(&clone, 0);
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+	return 0;
+}
+
+/* Invalidating a dynptr should invalidate its siblings */
+SEC("?raw_tp")
+int clone_invalidate3(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct bpf_dynptr clone1;
+	struct bpf_dynptr clone2;
+	char read_data[64];
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	bpf_dynptr_clone(&ptr, &clone1);
+
+	bpf_dynptr_clone(&ptr, &clone2);
+
+	bpf_ringbuf_submit_dynptr(&clone2, 0);
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), &clone1, 0, 0);
+
+	return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its clones
+ */
+SEC("?raw_tp")
+int clone_invalidate4(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct bpf_dynptr clone;
+	int *data;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	bpf_dynptr_clone(&ptr, &clone);
+	data = bpf_dynptr_data(&clone, 0, sizeof(val));
+	if (!data)
+		return 0;
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	/* this should fail */
+	*data = 123;
+
+	return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its parent
+ */
+SEC("?raw_tp")
+int clone_invalidate5(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct bpf_dynptr clone;
+	int *data;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+	data = bpf_dynptr_data(&ptr, 0, sizeof(val));
+	if (!data)
+		return 0;
+
+	bpf_dynptr_clone(&ptr, &clone);
+
+	bpf_ringbuf_submit_dynptr(&clone, 0);
+
+	/* this should fail */
+	*data = 123;
+
+	return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its sibling
+ */
+SEC("?raw_tp")
+int clone_invalidate6(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct bpf_dynptr clone1;
+	struct bpf_dynptr clone2;
+	int *data;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	bpf_dynptr_clone(&ptr, &clone1);
+
+	bpf_dynptr_clone(&ptr, &clone2);
+
+	data = bpf_dynptr_data(&clone1, 0, sizeof(val));
+	if (!data)
+		return 0;
+
+	bpf_ringbuf_submit_dynptr(&clone2, 0);
+
+	/* this should fail */
+	*data = 123;
+
+	return 0;
+}
+
+/* A skb clone's data slices should be invalid anytime packet data changes */
+SEC("?tc")
+int clone_skb_packet_data(struct __sk_buff *skb)
+{
+	struct bpf_dynptr ptr;
+	struct bpf_dynptr clone;
+	__u32 *data;
+
+	bpf_dynptr_from_skb(skb, 0, &ptr);
+
+	bpf_dynptr_clone(&ptr, &clone);
+	data = bpf_dynptr_data(&clone, 0, sizeof(*data));
+	if (!data)
+		return XDP_DROP;
+
+	if (bpf_skb_pull_data(skb, skb->len))
+		return SK_DROP;
+
+	/* this should fail */
+	*data = 123;
+
+	return 0;
+}
+
+/* A xdp clone's data slices should be invalid anytime packet data changes */
+SEC("?xdp")
+int clone_xdp_packet_data(struct xdp_md *xdp)
+{
+	struct bpf_dynptr ptr;
+	struct bpf_dynptr clone;
+	struct ethhdr *hdr;
+	__u32 *data;
+
+	bpf_dynptr_from_xdp(xdp, 0, &ptr);
+
+	bpf_dynptr_clone(&ptr, &clone);
+	data = bpf_dynptr_data(&clone, 0, sizeof(*data));
+	if (!data)
+		return XDP_DROP;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+		return XDP_DROP;
+
+	/* this should fail */
+	*data = 123;
+
+	return 0;
+}
+
+static int iterator_callback1(struct bpf_dynptr *ptr, void *ctx)
+{
+	struct bpf_dynptr local_ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &local_ptr);
+
+	/* missing a call to bpf_ringbuf_discard/submit_dynptr */
+
+	return 0;
+}
+
+/* If a dynptr requiring a release is initialized within the iterator callback
+ * function, then it must also be released within that function
+ */
+SEC("?xdp")
+int iterator_invalid1(struct xdp_md *xdp)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_dynptr_from_xdp(xdp, 0, &ptr);
+
+	bpf_dynptr_iterator(&ptr, iterator_callback1, NULL, 0);
+
+	return 0;
+}
+
+/* bpf_dynptr_iterator can't be called on an uninitialized dynptr */
+SEC("?xdp")
+int iterator_invalid2(struct xdp_md *xdp)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_dynptr_iterator(&ptr, iterator_callback1, NULL, 0);
+
+	return 0;
+}
+
+static int iterator_callback2(struct bpf_dynptr *ptr, void *ctx)
+{
+	/* this should fail */
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, ptr);
+
+	bpf_ringbuf_submit_dynptr(ptr, 0);
+
+	return 0;
+}
+
+/* An already-initialized dynptr can't be initialized again
+ * within an iterator callback function
+ */
+SEC("?raw_tp")
+int iterator_invalid3(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	bpf_dynptr_iterator(&ptr, iterator_callback2, NULL, 0);
+
+	return 0;
+}
+
+static int iterator_callback3(struct bpf_dynptr *ptr, void *ctx)
+{
+	char write_data[64] = "hello there, world!!";
+
+	bpf_dynptr_write(ptr, 0, write_data, sizeof(write_data), 0);
+
+	bpf_ringbuf_submit_dynptr(ptr, 0);
+
+	return 0;
+}
+
+/* The dynptr that is being iterated on can't be released in an
+ * iterator callback.
+ *
+ * Currently, the verifier doesn't strictly check for this since
+ * it only runs the callback once when verifying. For now, we
+ * use the fact that the verifier doesn't mark the reference in
+ * the parent func state as released if it's released in the
+ * callback. This is what we currently lean on in bpf_loop() as
+ * well. This is a bit of a hack for now, and will need to be
+ * addressed more thoroughly in the future.
+ */
+SEC("?raw_tp")
+int iterator_invalid4(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	bpf_dynptr_iterator(&ptr, iterator_callback3, NULL, 0);
+
+	return 0;
+}
+
+static int iterator_callback4(struct bpf_dynptr *ptr, void *ctx)
+{
+	bpf_ringbuf_submit_dynptr(ctx, 0);
+
+	return 0;
+}
+
+/* If a dynptr is passed in as the callback ctx, the dynptr
+ * can't be released.
+ *
+ * Currently, the verifier doesn't strictly check for this since
+ * it only runs the callback once when verifying. For now, we
+ * use the fact that the verifier doesn't mark the reference in
+ * the parent func state as released if it's released in the
+ * callback. This is what we currently lean on in bpf_loop() as
+ * well. This is a bit of a hack for now, and will need to be
+ * addressed more thoroughly in the future.
+ */
+SEC("?raw_tp")
+int iterator_invalid5(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	bpf_dynptr_iterator(&ptr, iterator_callback4, &ptr, 0);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 8ed3ef201284..22164ad6df9d 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2022 Facebook */
 
+#include <errno.h>
 #include <string.h>
 #include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
-#include "errno.h"
+#include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -29,6 +29,13 @@ struct {
 	__type(value, __u32);
 } array_map SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__uint(value_size, 64);
+} array_map2 SEC(".maps");
+
 SEC("tp/syscalls/sys_enter_nanosleep")
 int test_read_write(void *ctx)
 {
@@ -194,3 +201,543 @@ int test_skb_readonly(struct __sk_buff *skb)
 
 	return 0;
 }
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_advance_trim(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	__u32 bytes = 64;
+	__u32 off = 10;
+	__u32 trim = 5;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	err = bpf_ringbuf_reserve_dynptr(&ringbuf, bytes, 0, &ptr);
+	if (err) {
+		err = 1;
+		goto done;
+	}
+
+	if (bpf_dynptr_get_size(&ptr) != bytes) {
+		err = 2;
+		goto done;
+	}
+
+	/* Advance the dynptr by off */
+	err = bpf_dynptr_advance(&ptr, off);
+	if (err) {
+		err = 3;
+		goto done;
+	}
+
+	/* Check that the dynptr off and size were adjusted correctly */
+	if (bpf_dynptr_get_offset(&ptr) != off) {
+		err = 4;
+		goto done;
+	}
+	if (bpf_dynptr_get_size(&ptr) != bytes - off) {
+		err = 5;
+		goto done;
+	}
+
+	/* Trim the dynptr */
+	err = bpf_dynptr_trim(&ptr, trim);
+	if (err) {
+		err = 6;
+		goto done;
+	}
+
+	/* Check that the off was unaffected */
+	if (bpf_dynptr_get_offset(&ptr) != off) {
+		err = 7;
+		goto done;
+	}
+	/* Check that the size was adjusted correctly */
+	if (bpf_dynptr_get_size(&ptr) != bytes - off - trim) {
+		err = 8;
+		goto done;
+	}
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_advance_trim_err(void *ctx)
+{
+	char write_data[45] = "hello there, world!!";
+	struct bpf_dynptr ptr;
+	__u32 trim_size = 10;
+	__u32 size = 64;
+	__u32 off = 10;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) {
+		err = 1;
+		goto done;
+	}
+
+	/* Check that you can't advance beyond size of dynptr data */
+	if (bpf_dynptr_advance(&ptr, size + 1) != -ERANGE) {
+		err = 2;
+		goto done;
+	}
+
+	if (bpf_dynptr_advance(&ptr, off)) {
+		err = 3;
+		goto done;
+	}
+
+	/* Check that you can't trim more than size of dynptr data */
+	if (bpf_dynptr_trim(&ptr, size - off + 1) != -ERANGE) {
+		err = 4;
+		goto done;
+	}
+
+	/* Check that you can't write more bytes than available into the dynptr
+	 * after you've trimmed it
+	 */
+	if (bpf_dynptr_trim(&ptr, trim_size)) {
+		err = 5;
+		goto done;
+	}
+
+	if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) {
+		err = 6;
+		goto done;
+	}
+
+	/* Check that even after advancing / trimming, submitting/discarding
+	 * a ringbuf dynptr works
+	 */
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+	return 0;
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_zero_size_dynptr(void *ctx)
+{
+	char write_data = 'x', read_data;
+	struct bpf_dynptr ptr;
+	__u32 size = 64;
+	__u32 off = 10;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	/* check that you can reserve a dynamic size reservation */
+	if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) {
+		err = 1;
+		goto done;
+	}
+
+	/* After this, the dynptr has a size of 0 */
+	if (bpf_dynptr_advance(&ptr, size)) {
+		err = 2;
+		goto done;
+	}
+
+	/* Test that reading + writing non-zero bytes is not ok */
+	if (bpf_dynptr_read(&read_data, sizeof(read_data), &ptr, 0, 0) != -E2BIG) {
+		err = 3;
+		goto done;
+	}
+
+	if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) {
+		err = 4;
+		goto done;
+	}
+
+	/* Test that reading + writing 0 bytes from a 0-size dynptr is ok */
+	if (bpf_dynptr_read(&read_data, 0, &ptr, 0, 0)) {
+		err = 5;
+		goto done;
+	}
+
+	if (bpf_dynptr_write(&ptr, 0, &write_data, 0, 0)) {
+		err = 6;
+		goto done;
+	}
+
+	err = 0;
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_is_null(void *ctx)
+{
+	struct bpf_dynptr ptr1;
+	struct bpf_dynptr ptr2;
+	__u64 size = 4;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	/* Pass in invalid flags, get back an invalid dynptr */
+	if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 123, &ptr1) != -EINVAL) {
+		err = 1;
+		goto exit_early;
+	}
+
+	/* Test that the invalid dynptr is null */
+	if (!bpf_dynptr_is_null(&ptr1)) {
+		err = 2;
+		goto exit_early;
+	}
+
+	/* Get a valid dynptr */
+	if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr2)) {
+		err = 3;
+		goto exit;
+	}
+
+	/* Test that the valid dynptr is not null */
+	if (bpf_dynptr_is_null(&ptr2)) {
+		err = 4;
+		goto exit;
+	}
+
+exit:
+	bpf_ringbuf_discard_dynptr(&ptr2, 0);
+exit_early:
+	bpf_ringbuf_discard_dynptr(&ptr1, 0);
+	return 0;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_is_rdonly(struct __sk_buff *skb)
+{
+	struct bpf_dynptr ptr1;
+	struct bpf_dynptr ptr2;
+	struct bpf_dynptr ptr3;
+
+	/* Pass in invalid flags, get back an invalid dynptr */
+	if (bpf_dynptr_from_skb(skb, 123, &ptr1) != -EINVAL) {
+		err = 1;
+		return 0;
+	}
+
+	/* Test that an invalid dynptr is_rdonly returns false */
+	if (bpf_dynptr_is_rdonly(&ptr1)) {
+		err = 2;
+		return 0;
+	}
+
+	/* Get a read-only dynptr */
+	if (bpf_dynptr_from_skb(skb, 0, &ptr2)) {
+		err = 3;
+		return 0;
+	}
+
+	/* Test that the dynptr is read-only */
+	if (!bpf_dynptr_is_rdonly(&ptr2)) {
+		err = 4;
+		return 0;
+	}
+
+	/* Get a read-writeable dynptr */
+	if (bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr3)) {
+		err = 5;
+		goto done;
+	}
+
+	/* Test that the dynptr is read-only */
+	if (bpf_dynptr_is_rdonly(&ptr3)) {
+		err = 6;
+		goto done;
+	}
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr3, 0);
+	return 0;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_clone(struct __sk_buff *skb)
+{
+	struct bpf_dynptr ptr1;
+	struct bpf_dynptr ptr2;
+	__u32 off = 2, size;
+
+	/* Get a dynptr */
+	if (bpf_dynptr_from_skb(skb, 0, &ptr1)) {
+		err = 1;
+		return 0;
+	}
+
+	if (bpf_dynptr_advance(&ptr1, off)) {
+		err = 2;
+		return 0;
+	}
+
+	/* Clone the dynptr */
+	if (bpf_dynptr_clone(&ptr1, &ptr2)) {
+		err = 3;
+		return 0;
+	}
+
+	size = bpf_dynptr_get_size(&ptr1);
+
+	/* Check that the clone has the same offset, size, and rd-only */
+	if (bpf_dynptr_get_size(&ptr2) != size) {
+		err = 4;
+		return 0;
+	}
+
+	if (bpf_dynptr_get_offset(&ptr2) != off) {
+		err = 5;
+		return 0;
+	}
+
+	if (bpf_dynptr_is_rdonly(&ptr2) != bpf_dynptr_is_rdonly(&ptr1)) {
+		err = 6;
+		return 0;
+	}
+
+	/* Advance and trim the original dynptr */
+	bpf_dynptr_advance(&ptr1, 50);
+	bpf_dynptr_trim(&ptr1, 50);
+
+	/* Check that only original dynptr was affected, and the clone wasn't */
+	if (bpf_dynptr_get_offset(&ptr2) != off) {
+		err = 7;
+		return 0;
+	}
+
+	if (bpf_dynptr_get_size(&ptr2) != size) {
+		err = 8;
+		return 0;
+	}
+
+	return 0;
+}
+
+struct iter_ctx {
+	/* if set, this means try to iterate more bytes than available */
+	bool trigger_err_erange;
+	/* iteration index */
+	int index;
+	/* offset at the first iteration */
+	__u32 start_off;
+	/* if non-zero, this indicates the offset to stop at */
+	__u32 stop_off;
+	/* number of bytes to advance by each time */
+	__u32 iter_bytes;
+	/* total number of bytes iterated on */
+	__u32 total_bytes;
+};
+
+static int iter_callback(struct bpf_dynptr *ptr, struct iter_ctx *ctx)
+{
+	if (ctx->trigger_err_erange)
+		return bpf_dynptr_get_size(ptr) + 1;
+
+	if (ctx->index == 0)
+		ctx->start_off = bpf_dynptr_get_offset(ptr);
+	else if (ctx->stop_off == ctx->total_bytes)
+		return 0;
+
+	ctx->index++;
+	ctx->total_bytes += ctx->iter_bytes;
+
+	if (bpf_dynptr_get_size(ptr) == 0)
+		return 0;
+
+	return ctx->iter_bytes;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_iterator(struct __sk_buff *skb)
+{
+	struct iter_ctx ctx = { .iter_bytes = 1, };
+	struct bpf_dynptr ptr;
+	__u32 off = 1, size;
+
+	/* Get a dynptr */
+	if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+		err = 1;
+		return 0;
+	}
+
+	if (bpf_dynptr_advance(&ptr, off)) {
+		err = 2;
+		return 0;
+	}
+
+	size = bpf_dynptr_get_size(&ptr);
+
+	/* Test the case where the callback tries to advance by more
+	 * bytes than available
+	 */
+	ctx.trigger_err_erange = true;
+	if (bpf_dynptr_iterator(&ptr, iter_callback, &ctx, 0) != -ERANGE) {
+		err = 3;
+		return 0;
+	}
+	if (bpf_dynptr_get_size(&ptr) != size) {
+		err = 4;
+		return 0;
+	}
+	if (bpf_dynptr_get_offset(&ptr) != off) {
+		err = 5;
+		return 0;
+	}
+	ctx.trigger_err_erange = false;
+
+	/* Test the case where we stop after a certain number of bytes */
+	ctx.stop_off = 15;
+	if (bpf_dynptr_iterator(&ptr, iter_callback, &ctx, 0)) {
+		err = 6;
+		return 0;
+	}
+	if (bpf_dynptr_get_size(&ptr) != size - ctx.stop_off) {
+		err = 7;
+		return 0;
+	}
+	if (bpf_dynptr_get_offset(&ptr) != off + ctx.stop_off) {
+		err = 8;
+		return 0;
+	}
+
+	/* Test that the iteration started at the current offset of the dynptr */
+	if (ctx.start_off != off) {
+		err = 9;
+		return 0;
+	}
+
+	/* Test that we iterate to the end */
+	ctx.stop_off = 0;
+	if (bpf_dynptr_iterator(&ptr, iter_callback, &ctx, 0)) {
+		err = 10;
+		return 0;
+	}
+	if (bpf_dynptr_get_size(&ptr) != 0) {
+		err = 11;
+		return 0;
+	}
+	if (bpf_dynptr_get_offset(&ptr) != off + size) {
+		err = 12;
+		return 0;
+	}
+
+	return 0;
+}
+
+static char values[3][64] = {};
+
+#define MAX_STRINGS_LEN 10
+static int parse_strings_callback(struct bpf_dynptr *ptr, int *nr_values)
+{
+	__u32 size = bpf_dynptr_get_size(ptr);
+	char buf[MAX_STRINGS_LEN] = {};
+	char *data;
+	int i, j, k;
+	int err;
+
+	if (size < MAX_STRINGS_LEN) {
+		err = bpf_dynptr_read(buf, size, ptr, 0, 0);
+		if (err)
+			return err;
+		data = buf;
+	} else {
+		data = bpf_dynptr_data(ptr, 0, MAX_STRINGS_LEN);
+		if (!data)
+			return -ENOENT;
+		size = MAX_STRINGS_LEN;
+	}
+
+	for (i = 0; i < size; i++) {
+		if (data[i] != '=')
+			continue;
+
+		for (j = i; j < size - i; j++) {
+			int index = 0;
+
+			if (data[j] != '/')
+				continue;
+
+			for (k = i + 1; k < j; k++) {
+				values[*nr_values][index] = data[k];
+				index += 1;
+			}
+
+			*nr_values += 1;
+			return j;
+		}
+
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int iterator_parse_strings(void *ctx)
+{
+	char val[64] = "x=foo/y=bar/z=baz/";
+	struct bpf_dynptr ptr;
+	__u32 map_val_size;
+	int nr_values = 0;
+	__u32 key = 0;
+	char *map_val;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	map_val_size = sizeof(val);
+
+	if (bpf_map_update_elem(&array_map2, &key, &val, 0)) {
+		err = 1;
+		return 0;
+	}
+
+	map_val = bpf_map_lookup_elem(&array_map2, &key);
+	if (!map_val) {
+		err = 2;
+		return 0;
+	}
+
+	if (bpf_dynptr_from_mem(map_val, map_val_size, 0, &ptr)) {
+		err = 3;
+		return 0;
+	}
+
+	if (bpf_dynptr_iterator(&ptr, parse_strings_callback,
+				&nr_values, 0)) {
+		err = 4;
+		return 0;
+	}
+
+	if (nr_values != 3) {
+		err = 8;
+		return 0;
+	}
+
+	if (memcmp(values[0], "foo", sizeof("foo"))) {
+		err = 5;
+		return 0;
+	}
+
+	if (memcmp(values[1], "bar", sizeof("bar"))) {
+		err = 6;
+		return 0;
+	}
+
+	if (memcmp(values[2], "baz", sizeof("baz"))) {
+		err = 7;
+		return 0;
+	}
+
+	return 0;
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly
  2022-09-08  0:02 ` [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly Joanne Koong
@ 2022-09-09 15:29   ` Song Liu
  2022-09-09 15:32     ` Alexei Starovoitov
  2022-09-09 15:51   ` Shmulik Ladkani
  1 sibling, 1 reply; 33+ messages in thread
From: Song Liu @ 2022-09-09 15:29 UTC (permalink / raw)
  To: Joanne Koong
  Cc: bpf, Daniel Borkmann, martin.lau, Andrii Nakryiko,
	Alexei Starovoitov, Kernel Team

On Thu, Sep 8, 2022 at 1:10 AM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> Add a new helper bpf_dynptr_data_rdonly
>
> void *bpf_dynptr_data_rdonly(struct bpf_dynptr *ptr, u32 offset, u32 len);
>
> which gets a read-only pointer to the underlying dynptr data.
>
> This is equivalent to bpf_dynptr_data(), except the pointer returned is
> read-only, which allows this to support both read-write and read-only
> dynptrs.
>
> One example where this will be useful is for skb dynptrs where the
> program type only allows read-only access to packet data. This API will
> provide a way to obtain a data slice that can be used for direct reads.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
>  include/uapi/linux/bpf.h       | 15 +++++++++++++++
>  kernel/bpf/helpers.c           | 32 ++++++++++++++++++++++++++------
>  kernel/bpf/verifier.c          |  7 +++++--
>  tools/include/uapi/linux/bpf.h | 15 +++++++++++++++
>  4 files changed, 61 insertions(+), 8 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c55c23f25c0f..cce3356765fc 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5439,6 +5439,20 @@ union bpf_attr {
>   *             *flags* is currently unused, it must be 0 for now.
>   *     Return
>   *             0 on success, -EINVAL if flags is not 0.
> + *
> + * void *bpf_dynptr_data_rdonly(struct bpf_dynptr *ptr, u32 offset, u32 len)
> + *     Description
> + *             Get a read-only pointer to the underlying dynptr data.
> + *
> + *             This is equivalent to **bpf_dynptr_data**\ () except the
> + *             pointer returned is read-only, which allows this to support
> + *             both read-write and read-only dynptrs. For more details on using
> + *             the API, please refer to **bpf_dynptr_data**\ ().
> + *     Return
> + *             Read-only pointer to the underlying dynptr data, NULL if the
> + *             dynptr is invalid or if the offset and length is out of bounds
> + *             or in a paged buffer for skb-type dynptrs or across fragments
> + *             for xdp-type dynptrs.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5652,6 +5666,7 @@ union bpf_attr {
>         FN(ktime_get_tai_ns),           \
>         FN(dynptr_from_skb),            \
>         FN(dynptr_from_xdp),            \
> +       FN(dynptr_data_rdonly),         \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index befafae34a63..30a59c9e5df3 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1572,7 +1572,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
>         .arg5_type      = ARG_ANYTHING,
>  };
>
> -BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
> +void *__bpf_dynptr_data(struct bpf_dynptr_kern *ptr, u32 offset, u32 len, bool writable)
>  {
>         enum bpf_dynptr_type type;
>         void *data;
> @@ -1585,7 +1585,7 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
>         if (err)
>                 return 0;

Let's return NULL for void* type.

>
> -       if (bpf_dynptr_is_rdonly(ptr))
> +       if (writable && bpf_dynptr_is_rdonly(ptr))
>                 return 0;
ditto
>
>         type = bpf_dynptr_get_type(ptr);
> @@ -1610,13 +1610,31 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
>                 /* if the requested data in across fragments, then it cannot
>                  * be accessed directly - bpf_xdp_pointer will return NULL
>                  */
> -               return (unsigned long)bpf_xdp_pointer(ptr->data,
> -                                                     ptr->offset + offset, len);
> +               return bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
>         default:
> -               WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
> +               WARN_ONCE(true, "__bpf_dynptr_data: unknown dynptr type %d\n", type);

Let's use __func__ so we don't have to change this again.

WARN_ONCE(true, "%s: unknown dynptr type %d\n", __func__, type);

Thanks,
Song

[...]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly
  2022-09-09 15:29   ` Song Liu
@ 2022-09-09 15:32     ` Alexei Starovoitov
  2022-09-09 15:59       ` Song Liu
  0 siblings, 1 reply; 33+ messages in thread
From: Alexei Starovoitov @ 2022-09-09 15:32 UTC (permalink / raw)
  To: Song Liu
  Cc: Joanne Koong, bpf, Daniel Borkmann, martin.lau, Andrii Nakryiko,
	Alexei Starovoitov, Kernel Team

On Fri, Sep 9, 2022 at 8:29 AM Song Liu <song@kernel.org> wrote:
>
> On Thu, Sep 8, 2022 at 1:10 AM Joanne Koong <joannelkoong@gmail.com> wrote:
> >
> > Add a new helper bpf_dynptr_data_rdonly
> >
> > void *bpf_dynptr_data_rdonly(struct bpf_dynptr *ptr, u32 offset, u32 len);
> >
> > which gets a read-only pointer to the underlying dynptr data.
> >
> > This is equivalent to bpf_dynptr_data(), except the pointer returned is
> > read-only, which allows this to support both read-write and read-only
> > dynptrs.
> >
> > One example where this will be useful is for skb dynptrs where the
> > program type only allows read-only access to packet data. This API will
> > provide a way to obtain a data slice that can be used for direct reads.
> >
> > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > ---
> >  include/uapi/linux/bpf.h       | 15 +++++++++++++++
> >  kernel/bpf/helpers.c           | 32 ++++++++++++++++++++++++++------
> >  kernel/bpf/verifier.c          |  7 +++++--
> >  tools/include/uapi/linux/bpf.h | 15 +++++++++++++++
> >  4 files changed, 61 insertions(+), 8 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index c55c23f25c0f..cce3356765fc 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -5439,6 +5439,20 @@ union bpf_attr {
> >   *             *flags* is currently unused, it must be 0 for now.
> >   *     Return
> >   *             0 on success, -EINVAL if flags is not 0.
> > + *
> > + * void *bpf_dynptr_data_rdonly(struct bpf_dynptr *ptr, u32 offset, u32 len)
> > + *     Description
> > + *             Get a read-only pointer to the underlying dynptr data.
> > + *
> > + *             This is equivalent to **bpf_dynptr_data**\ () except the
> > + *             pointer returned is read-only, which allows this to support
> > + *             both read-write and read-only dynptrs. For more details on using
> > + *             the API, please refer to **bpf_dynptr_data**\ ().
> > + *     Return
> > + *             Read-only pointer to the underlying dynptr data, NULL if the
> > + *             dynptr is invalid or if the offset and length is out of bounds
> > + *             or in a paged buffer for skb-type dynptrs or across fragments
> > + *             for xdp-type dynptrs.
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)          \
> >         FN(unspec),                     \
> > @@ -5652,6 +5666,7 @@ union bpf_attr {
> >         FN(ktime_get_tai_ns),           \
> >         FN(dynptr_from_skb),            \
> >         FN(dynptr_from_xdp),            \
> > +       FN(dynptr_data_rdonly),         \
> >         /* */
> >
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index befafae34a63..30a59c9e5df3 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -1572,7 +1572,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
> >         .arg5_type      = ARG_ANYTHING,
> >  };
> >
> > -BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
> > +void *__bpf_dynptr_data(struct bpf_dynptr_kern *ptr, u32 offset, u32 len, bool writable)
> >  {
> >         enum bpf_dynptr_type type;
> >         void *data;
> > @@ -1585,7 +1585,7 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
> >         if (err)
> >                 return 0;
>
> Let's return NULL for void* type.
>
> >
> > -       if (bpf_dynptr_is_rdonly(ptr))
> > +       if (writable && bpf_dynptr_is_rdonly(ptr))
> >                 return 0;
> ditto
> >
> >         type = bpf_dynptr_get_type(ptr);
> > @@ -1610,13 +1610,31 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
> >                 /* if the requested data in across fragments, then it cannot
> >                  * be accessed directly - bpf_xdp_pointer will return NULL
> >                  */
> > -               return (unsigned long)bpf_xdp_pointer(ptr->data,
> > -                                                     ptr->offset + offset, len);
> > +               return bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
> >         default:
> > -               WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
> > +               WARN_ONCE(true, "__bpf_dynptr_data: unknown dynptr type %d\n", type);
>
> Let's use __func__ so we don't have to change this again.
>
> WARN_ONCE(true, "%s: unknown dynptr type %d\n", __func__, type);

WARN includes file and line automatically.
Do we really need func here too?

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 2/8] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance
  2022-09-08  0:02 ` [PATCH bpf-next v1 2/8] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance Joanne Koong
@ 2022-09-09 15:32   ` Song Liu
  2022-09-09 16:16   ` Shmulik Ladkani
  2022-09-28 22:14   ` Andrii Nakryiko
  2 siblings, 0 replies; 33+ messages in thread
From: Song Liu @ 2022-09-09 15:32 UTC (permalink / raw)
  To: Joanne Koong
  Cc: bpf, Daniel Borkmann, martin.lau, Andrii Nakryiko,
	Alexei Starovoitov, Kernel Team

On Thu, Sep 8, 2022 at 1:10 AM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> Add two new helper functions: bpf_dynptr_trim and bpf_dynptr_advance.
>
> bpf_dynptr_trim decreases the size of a dynptr by the specified
> number of bytes (offset remains the same). bpf_dynptr_advance advances
> the offset of the dynptr by the specified number of bytes (size
> decreases correspondingly).
>
> One example where trimming / advancing the dynptr may useful is for
> hashing. If the dynptr points to a larger struct, it is possible to hash
> an individual field within the struct through dynptrs by using
> bpf_dynptr_advance+trim.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>

Acked-by: Song Liu <song@kernel.org>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 3/8] bpf: Add bpf_dynptr_is_null and bpf_dynptr_is_rdonly
  2022-09-08  0:02 ` [PATCH bpf-next v1 3/8] bpf: Add bpf_dynptr_is_null and bpf_dynptr_is_rdonly Joanne Koong
@ 2022-09-09 15:46   ` Song Liu
  2022-09-09 21:28     ` Joanne Koong
  0 siblings, 1 reply; 33+ messages in thread
From: Song Liu @ 2022-09-09 15:46 UTC (permalink / raw)
  To: Joanne Koong
  Cc: bpf, Daniel Borkmann, martin.lau, Andrii Nakryiko,
	Alexei Starovoitov, Kernel Team

On Thu, Sep 8, 2022 at 1:07 AM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> Add two new helper functions: bpf_dynptr_is_null and
> bpf_dynptr_is_rdonly.
>
> bpf_dynptr_is_null returns true if the dynptr is null / invalid
> (determined by whether ptr->data is NULL), else false if
> the dynptr is a valid dynptr.
>
> bpf_dynptr_is_rdonly returns true if the dynptr is read-only,
> else false if the dynptr is read-writable.

Might be a dump question.. Can we just let the bpf program to
access struct bpf_dynptr? Using a helper for this feel like an
overkill.

Thanks,
Song

>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
>  include/uapi/linux/bpf.h       | 20 ++++++++++++++++++
>  kernel/bpf/helpers.c           | 37 +++++++++++++++++++++++++++++++---
>  scripts/bpf_doc.py             |  3 +++
>  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++
>  4 files changed, 77 insertions(+), 3 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 3b054553be30..90b6d0744df2 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5467,6 +5467,24 @@ union bpf_attr {
>   *     Return
>   *             0 on success, -EINVAL if the dynptr is invalid, -ERANGE if
>   *             trying to trim more bytes than the size of the dynptr.
> + *
> + * bool bpf_dynptr_is_null(struct bpf_dynptr *ptr)
> + *     Description
> + *             Determine whether a dynptr is null / invalid.
> + *
> + *             *ptr* must be an initialized dynptr.
> + *     Return
> + *             True if the dynptr is null, else false.
> + *
> + * bool bpf_dynptr_is_rdonly(struct bpf_dynptr *ptr)
> + *     Description
> + *             Determine whether a dynptr is read-only.
> + *
> + *             *ptr* must be an initialized dynptr. If *ptr*
> + *             is a null dynptr, this will return false.
> + *     Return
> + *             True if the dynptr is read-only and a valid dynptr,
> + *             else false.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5683,6 +5701,8 @@ union bpf_attr {
>         FN(dynptr_data_rdonly),         \
>         FN(dynptr_advance),             \
>         FN(dynptr_trim),                \
> +       FN(dynptr_is_null),             \
> +       FN(dynptr_is_rdonly),           \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 9f356105ab49..8729383d0966 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1398,7 +1398,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
>  #define DYNPTR_SIZE_MASK       0xFFFFFF
>  #define DYNPTR_RDONLY_BIT      BIT(31)
>
> -static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
> +static bool __bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
>  {
>         return ptr->size & DYNPTR_RDONLY_BIT;
>  }
> @@ -1539,7 +1539,7 @@ BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *,
>         enum bpf_dynptr_type type;
>         int err;
>
> -       if (!dst->data || bpf_dynptr_is_rdonly(dst))
> +       if (!dst->data || __bpf_dynptr_is_rdonly(dst))
>                 return -EINVAL;
>
>         err = bpf_dynptr_check_off_len(dst, offset, len);
> @@ -1592,7 +1592,7 @@ void *__bpf_dynptr_data(struct bpf_dynptr_kern *ptr, u32 offset, u32 len, bool w
>         if (err)
>                 return 0;
>
> -       if (writable && bpf_dynptr_is_rdonly(ptr))
> +       if (writable && __bpf_dynptr_is_rdonly(ptr))
>                 return 0;
>
>         type = bpf_dynptr_get_type(ptr);
> @@ -1705,6 +1705,33 @@ static const struct bpf_func_proto bpf_dynptr_trim_proto = {
>         .arg2_type      = ARG_ANYTHING,
>  };
>
> +BPF_CALL_1(bpf_dynptr_is_null, struct bpf_dynptr_kern *, ptr)
> +{
> +       return !ptr->data;
> +}
> +
> +static const struct bpf_func_proto bpf_dynptr_is_null_proto = {
> +       .func           = bpf_dynptr_is_null,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_DYNPTR,
> +};
> +
> +BPF_CALL_1(bpf_dynptr_is_rdonly, struct bpf_dynptr_kern *, ptr)
> +{
> +       if (!ptr->data)
> +               return 0;
> +
> +       return __bpf_dynptr_is_rdonly(ptr);
> +}
> +
> +static const struct bpf_func_proto bpf_dynptr_is_rdonly_proto = {
> +       .func           = bpf_dynptr_is_rdonly,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_DYNPTR,
> +};
> +
>  const struct bpf_func_proto bpf_get_current_task_proto __weak;
>  const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
>  const struct bpf_func_proto bpf_probe_read_user_proto __weak;
> @@ -1781,6 +1808,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
>                 return &bpf_dynptr_advance_proto;
>         case BPF_FUNC_dynptr_trim:
>                 return &bpf_dynptr_trim_proto;
> +       case BPF_FUNC_dynptr_is_null:
> +               return &bpf_dynptr_is_null_proto;
> +       case BPF_FUNC_dynptr_is_rdonly:
> +               return &bpf_dynptr_is_rdonly_proto;
>         default:
>                 break;
>         }
> diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
> index d5c389df6045..ecd227c2ea34 100755
> --- a/scripts/bpf_doc.py
> +++ b/scripts/bpf_doc.py
> @@ -691,6 +691,7 @@ class PrinterHelpers(Printer):
>              'int',
>              'long',
>              'unsigned long',
> +            'bool',
>
>              '__be16',
>              '__be32',
> @@ -761,6 +762,8 @@ class PrinterHelpers(Printer):
>          header = '''\
>  /* This is auto-generated file. See bpf_doc.py for details. */
>
> +#include <stdbool.h>
> +
>  /* Forward declarations of BPF structs */'''
>
>          print(header)
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 3b054553be30..90b6d0744df2 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -5467,6 +5467,24 @@ union bpf_attr {
>   *     Return
>   *             0 on success, -EINVAL if the dynptr is invalid, -ERANGE if
>   *             trying to trim more bytes than the size of the dynptr.
> + *
> + * bool bpf_dynptr_is_null(struct bpf_dynptr *ptr)
> + *     Description
> + *             Determine whether a dynptr is null / invalid.
> + *
> + *             *ptr* must be an initialized dynptr.
> + *     Return
> + *             True if the dynptr is null, else false.
> + *
> + * bool bpf_dynptr_is_rdonly(struct bpf_dynptr *ptr)
> + *     Description
> + *             Determine whether a dynptr is read-only.
> + *
> + *             *ptr* must be an initialized dynptr. If *ptr*
> + *             is a null dynptr, this will return false.
> + *     Return
> + *             True if the dynptr is read-only and a valid dynptr,
> + *             else false.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5683,6 +5701,8 @@ union bpf_attr {
>         FN(dynptr_data_rdonly),         \
>         FN(dynptr_advance),             \
>         FN(dynptr_trim),                \
> +       FN(dynptr_is_null),             \
> +       FN(dynptr_is_rdonly),           \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly
  2022-09-08  0:02 ` [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly Joanne Koong
  2022-09-09 15:29   ` Song Liu
@ 2022-09-09 15:51   ` Shmulik Ladkani
  1 sibling, 0 replies; 33+ messages in thread
From: Shmulik Ladkani @ 2022-09-09 15:51 UTC (permalink / raw)
  To: Joanne Koong; +Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Wed,  7 Sep 2022 17:02:47 -0700 Joanne Koong <joannelkoong@gmail.com> wrote:

> Add a new helper bpf_dynptr_data_rdonly
> 
> void *bpf_dynptr_data_rdonly(struct bpf_dynptr *ptr, u32 offset, u32 len);
> 
> which gets a read-only pointer to the underlying dynptr data.
> 
> This is equivalent to bpf_dynptr_data(), except the pointer returned is
> read-only, which allows this to support both read-write and read-only
> dynptrs.
> 
> One example where this will be useful is for skb dynptrs where the
> program type only allows read-only access to packet data. This API will
> provide a way to obtain a data slice that can be used for direct reads.
> 
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>

Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly
  2022-09-09 15:32     ` Alexei Starovoitov
@ 2022-09-09 15:59       ` Song Liu
  0 siblings, 0 replies; 33+ messages in thread
From: Song Liu @ 2022-09-09 15:59 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Joanne Koong, bpf, Daniel Borkmann, martin.lau, Andrii Nakryiko,
	Alexei Starovoitov, Kernel Team

On Fri, Sep 9, 2022 at 4:32 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Sep 9, 2022 at 8:29 AM Song Liu <song@kernel.org> wrote:
> >
> > On Thu, Sep 8, 2022 at 1:10 AM Joanne Koong <joannelkoong@gmail.com> wrote:
> > >

[...]

> >
> > Let's return NULL for void* type.
> >
> > >
> > > -       if (bpf_dynptr_is_rdonly(ptr))
> > > +       if (writable && bpf_dynptr_is_rdonly(ptr))
> > >                 return 0;
> > ditto
> > >
> > >         type = bpf_dynptr_get_type(ptr);
> > > @@ -1610,13 +1610,31 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
> > >                 /* if the requested data in across fragments, then it cannot
> > >                  * be accessed directly - bpf_xdp_pointer will return NULL
> > >                  */
> > > -               return (unsigned long)bpf_xdp_pointer(ptr->data,
> > > -                                                     ptr->offset + offset, len);
> > > +               return bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
> > >         default:
> > > -               WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
> > > +               WARN_ONCE(true, "__bpf_dynptr_data: unknown dynptr type %d\n", type);
> >
> > Let's use __func__ so we don't have to change this again.
> >
> > WARN_ONCE(true, "%s: unknown dynptr type %d\n", __func__, type);
>
> WARN includes file and line automatically.
> Do we really need func here too?

I think func is helpful when I read a slightly different version of the
code and line number changes. (Well, maybe I shouldn't do this.)

I won't mind if we remove it. We just shouldn't hard code it manually.

Thanks,
Song

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 2/8] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance
  2022-09-08  0:02 ` [PATCH bpf-next v1 2/8] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance Joanne Koong
  2022-09-09 15:32   ` Song Liu
@ 2022-09-09 16:16   ` Shmulik Ladkani
  2022-09-28 22:14   ` Andrii Nakryiko
  2 siblings, 0 replies; 33+ messages in thread
From: Shmulik Ladkani @ 2022-09-09 16:16 UTC (permalink / raw)
  To: Joanne Koong; +Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Wed,  7 Sep 2022 17:02:48 -0700 Joanne Koong <joannelkoong@gmail.com> wrote:

> Add two new helper functions: bpf_dynptr_trim and bpf_dynptr_advance.
> 
> bpf_dynptr_trim decreases the size of a dynptr by the specified
> number of bytes (offset remains the same). bpf_dynptr_advance advances
> the offset of the dynptr by the specified number of bytes (size
> decreases correspondingly).
> 
> One example where trimming / advancing the dynptr may useful is for
> hashing. If the dynptr points to a larger struct, it is possible to hash
> an individual field within the struct through dynptrs by using
> bpf_dynptr_advance+trim.
> 
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>

Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone
  2022-09-08  0:02 ` [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone Joanne Koong
@ 2022-09-09 16:41   ` Shmulik Ladkani
  2022-09-09 22:18     ` Joanne Koong
  2022-09-28 22:29   ` Andrii Nakryiko
  1 sibling, 1 reply; 33+ messages in thread
From: Shmulik Ladkani @ 2022-09-09 16:41 UTC (permalink / raw)
  To: Joanne Koong
  Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team, Eyal Birger

On Wed,  7 Sep 2022 17:02:51 -0700 Joanne Koong <joannelkoong@gmail.com> wrote:

> Add a new helper, bpf_dynptr_clone, which clones a dynptr.
> 
> The cloned dynptr will point to the same data as its parent dynptr,
> with the same type, offset, size and read-only properties.

[...]

> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4ca07cf500d2..16973fa4d073 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5508,6 +5508,29 @@ union bpf_attr {
>   *	Return
>   *		The offset of the dynptr on success, -EINVAL if the dynptr is
>   *		invalid.
> + *
> + * long bpf_dynptr_clone(struct bpf_dynptr *ptr, struct bpf_dynptr *clone)
> + *	Description
> + *		Clone an initialized dynptr *ptr*. After this call, both *ptr*
> + *		and *clone* will point to the same underlying data.
> + *

How about adding 'off' and 'len' parameters so that a view ("slice") of
the dynptr can be created in a single call?

Otherwise, for a simple slice creation, ebpf user needs to:
  bpf_dynptr_clone(orig, clone)
  bpf_dynptr_advance(clone, off)
  trim_len = bpf_dynptr_get_size(clone) - len
  bpf_dynptr_trim(clone, trim_len)

This fits the usecase described here:
  https://lore.kernel.org/bpf/20220830231349.46c49c50@blondie/

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 4/8] bpf: Add bpf_dynptr_get_size and bpf_dynptr_get_offset
  2022-09-08  0:02 ` [PATCH bpf-next v1 4/8] bpf: Add bpf_dynptr_get_size and bpf_dynptr_get_offset Joanne Koong
@ 2022-09-09 16:52   ` Shmulik Ladkani
  2022-09-09 20:37     ` Joanne Koong
  0 siblings, 1 reply; 33+ messages in thread
From: Shmulik Ladkani @ 2022-09-09 16:52 UTC (permalink / raw)
  To: Joanne Koong; +Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Wed,  7 Sep 2022 17:02:50 -0700 Joanne Koong <joannelkoong@gmail.com> wrote:

> + * long bpf_dynptr_get_offset(struct bpf_dynptr *ptr)
> + *	Description
> + *		Get the offset of the dynptr.
> + *

The offset is an internal thing of the dynptr.

The user may initialize the dynptr and "advance" it using APIs.

Why do we need to expose the internal offset to the ebpf user?

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 4/8] bpf: Add bpf_dynptr_get_size and bpf_dynptr_get_offset
  2022-09-09 16:52   ` Shmulik Ladkani
@ 2022-09-09 20:37     ` Joanne Koong
  0 siblings, 0 replies; 33+ messages in thread
From: Joanne Koong @ 2022-09-09 20:37 UTC (permalink / raw)
  To: Shmulik Ladkani; +Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Fri, Sep 9, 2022 at 9:52 AM Shmulik Ladkani
<shmulik.ladkani@gmail.com> wrote:
>
> On Wed,  7 Sep 2022 17:02:50 -0700 Joanne Koong <joannelkoong@gmail.com> wrote:
>
> > + * long bpf_dynptr_get_offset(struct bpf_dynptr *ptr)
> > + *   Description
> > + *           Get the offset of the dynptr.
> > + *
>
> The offset is an internal thing of the dynptr.
>
> The user may initialize the dynptr and "advance" it using APIs.
>
> Why do we need to expose the internal offset to the ebpf user?

There might be use cases where knowing the offset is helpful (for
example, when packet parsing, a user may want to know how far they are
from the start of the packet)

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 3/8] bpf: Add bpf_dynptr_is_null and bpf_dynptr_is_rdonly
  2022-09-09 15:46   ` Song Liu
@ 2022-09-09 21:28     ` Joanne Koong
  2022-09-09 23:17       ` Song Liu
  0 siblings, 1 reply; 33+ messages in thread
From: Joanne Koong @ 2022-09-09 21:28 UTC (permalink / raw)
  To: Song Liu
  Cc: bpf, Daniel Borkmann, martin.lau, Andrii Nakryiko,
	Alexei Starovoitov, Kernel Team

On Fri, Sep 9, 2022 at 8:46 AM Song Liu <song@kernel.org> wrote:
>
> On Thu, Sep 8, 2022 at 1:07 AM Joanne Koong <joannelkoong@gmail.com> wrote:
> >
> > Add two new helper functions: bpf_dynptr_is_null and
> > bpf_dynptr_is_rdonly.
> >
> > bpf_dynptr_is_null returns true if the dynptr is null / invalid
> > (determined by whether ptr->data is NULL), else false if
> > the dynptr is a valid dynptr.
> >
> > bpf_dynptr_is_rdonly returns true if the dynptr is read-only,
> > else false if the dynptr is read-writable.
>
> Might be a dump question.. Can we just let the bpf program to
> access struct bpf_dynptr? Using a helper for this feel like an
> overkill.
>
> Thanks,
> Song
>

Not a dumb question at all, this is an interesting idea :) Right now
the struct bpf_dynptr is opaque from the bpf program side but if we
were to expose it (it'd still be read-only in the program), the
program could directly get offset and whether it's null, but would
need to do some manipulation to determine the size (since the last few
bits of 'size' stores the dynptr type and rd-only) and rd-only. I see
the advantages of either approach - personally I think it's cleaner if
the struct is completely opaque from the program side but I don't feel
strongly about it. If the worry is the overhead of needing a helper
for each, maybe another idea is to conflate them into 1 general
bpf_dynptr_get_info helper that returns offset, size, is_null, and
rd-only status?


> >
> > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > ---
> >  include/uapi/linux/bpf.h       | 20 ++++++++++++++++++
> >  kernel/bpf/helpers.c           | 37 +++++++++++++++++++++++++++++++---
> >  scripts/bpf_doc.py             |  3 +++
> >  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++
> >  4 files changed, 77 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 3b054553be30..90b6d0744df2 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -5467,6 +5467,24 @@ union bpf_attr {
> >   *     Return
> >   *             0 on success, -EINVAL if the dynptr is invalid, -ERANGE if
> >   *             trying to trim more bytes than the size of the dynptr.
> > + *
> > + * bool bpf_dynptr_is_null(struct bpf_dynptr *ptr)
> > + *     Description
> > + *             Determine whether a dynptr is null / invalid.
> > + *
> > + *             *ptr* must be an initialized dynptr.
> > + *     Return
> > + *             True if the dynptr is null, else false.
> > + *
> > + * bool bpf_dynptr_is_rdonly(struct bpf_dynptr *ptr)
> > + *     Description
> > + *             Determine whether a dynptr is read-only.
> > + *
> > + *             *ptr* must be an initialized dynptr. If *ptr*
> > + *             is a null dynptr, this will return false.
> > + *     Return
> > + *             True if the dynptr is read-only and a valid dynptr,
> > + *             else false.
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)          \
> >         FN(unspec),                     \
> > @@ -5683,6 +5701,8 @@ union bpf_attr {
> >         FN(dynptr_data_rdonly),         \
> >         FN(dynptr_advance),             \
> >         FN(dynptr_trim),                \
> > +       FN(dynptr_is_null),             \
> > +       FN(dynptr_is_rdonly),           \
> >         /* */
> >
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index 9f356105ab49..8729383d0966 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -1398,7 +1398,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
> >  #define DYNPTR_SIZE_MASK       0xFFFFFF
> >  #define DYNPTR_RDONLY_BIT      BIT(31)
> >
> > -static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
> > +static bool __bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
> >  {
> >         return ptr->size & DYNPTR_RDONLY_BIT;
> >  }
> > @@ -1539,7 +1539,7 @@ BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *,
> >         enum bpf_dynptr_type type;
> >         int err;
> >
> > -       if (!dst->data || bpf_dynptr_is_rdonly(dst))
> > +       if (!dst->data || __bpf_dynptr_is_rdonly(dst))
> >                 return -EINVAL;
> >
> >         err = bpf_dynptr_check_off_len(dst, offset, len);
> > @@ -1592,7 +1592,7 @@ void *__bpf_dynptr_data(struct bpf_dynptr_kern *ptr, u32 offset, u32 len, bool w
> >         if (err)
> >                 return 0;
> >
> > -       if (writable && bpf_dynptr_is_rdonly(ptr))
> > +       if (writable && __bpf_dynptr_is_rdonly(ptr))
> >                 return 0;
> >
> >         type = bpf_dynptr_get_type(ptr);
> > @@ -1705,6 +1705,33 @@ static const struct bpf_func_proto bpf_dynptr_trim_proto = {
> >         .arg2_type      = ARG_ANYTHING,
> >  };
> >
> > +BPF_CALL_1(bpf_dynptr_is_null, struct bpf_dynptr_kern *, ptr)
> > +{
> > +       return !ptr->data;
> > +}
> > +
> > +static const struct bpf_func_proto bpf_dynptr_is_null_proto = {
> > +       .func           = bpf_dynptr_is_null,
> > +       .gpl_only       = false,
> > +       .ret_type       = RET_INTEGER,
> > +       .arg1_type      = ARG_PTR_TO_DYNPTR,
> > +};
> > +
> > +BPF_CALL_1(bpf_dynptr_is_rdonly, struct bpf_dynptr_kern *, ptr)
> > +{
> > +       if (!ptr->data)
> > +               return 0;
> > +
> > +       return __bpf_dynptr_is_rdonly(ptr);
> > +}
> > +
> > +static const struct bpf_func_proto bpf_dynptr_is_rdonly_proto = {
> > +       .func           = bpf_dynptr_is_rdonly,
> > +       .gpl_only       = false,
> > +       .ret_type       = RET_INTEGER,
> > +       .arg1_type      = ARG_PTR_TO_DYNPTR,
> > +};
> > +
> >  const struct bpf_func_proto bpf_get_current_task_proto __weak;
> >  const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
> >  const struct bpf_func_proto bpf_probe_read_user_proto __weak;
> > @@ -1781,6 +1808,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
> >                 return &bpf_dynptr_advance_proto;
> >         case BPF_FUNC_dynptr_trim:
> >                 return &bpf_dynptr_trim_proto;
> > +       case BPF_FUNC_dynptr_is_null:
> > +               return &bpf_dynptr_is_null_proto;
> > +       case BPF_FUNC_dynptr_is_rdonly:
> > +               return &bpf_dynptr_is_rdonly_proto;
> >         default:
> >                 break;
> >         }
> > diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
> > index d5c389df6045..ecd227c2ea34 100755
> > --- a/scripts/bpf_doc.py
> > +++ b/scripts/bpf_doc.py
> > @@ -691,6 +691,7 @@ class PrinterHelpers(Printer):
> >              'int',
> >              'long',
> >              'unsigned long',
> > +            'bool',
> >
> >              '__be16',
> >              '__be32',
> > @@ -761,6 +762,8 @@ class PrinterHelpers(Printer):
> >          header = '''\
> >  /* This is auto-generated file. See bpf_doc.py for details. */
> >
> > +#include <stdbool.h>
> > +
> >  /* Forward declarations of BPF structs */'''
> >
> >          print(header)
> > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> > index 3b054553be30..90b6d0744df2 100644
> > --- a/tools/include/uapi/linux/bpf.h
> > +++ b/tools/include/uapi/linux/bpf.h
> > @@ -5467,6 +5467,24 @@ union bpf_attr {
> >   *     Return
> >   *             0 on success, -EINVAL if the dynptr is invalid, -ERANGE if
> >   *             trying to trim more bytes than the size of the dynptr.
> > + *
> > + * bool bpf_dynptr_is_null(struct bpf_dynptr *ptr)
> > + *     Description
> > + *             Determine whether a dynptr is null / invalid.
> > + *
> > + *             *ptr* must be an initialized dynptr.
> > + *     Return
> > + *             True if the dynptr is null, else false.
> > + *
> > + * bool bpf_dynptr_is_rdonly(struct bpf_dynptr *ptr)
> > + *     Description
> > + *             Determine whether a dynptr is read-only.
> > + *
> > + *             *ptr* must be an initialized dynptr. If *ptr*
> > + *             is a null dynptr, this will return false.
> > + *     Return
> > + *             True if the dynptr is read-only and a valid dynptr,
> > + *             else false.
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)          \
> >         FN(unspec),                     \
> > @@ -5683,6 +5701,8 @@ union bpf_attr {
> >         FN(dynptr_data_rdonly),         \
> >         FN(dynptr_advance),             \
> >         FN(dynptr_trim),                \
> > +       FN(dynptr_is_null),             \
> > +       FN(dynptr_is_rdonly),           \
> >         /* */
> >
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > --
> > 2.30.2
> >

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone
  2022-09-09 16:41   ` Shmulik Ladkani
@ 2022-09-09 22:18     ` Joanne Koong
  2022-09-10  5:31       ` Shmulik Ladkani
  0 siblings, 1 reply; 33+ messages in thread
From: Joanne Koong @ 2022-09-09 22:18 UTC (permalink / raw)
  To: Shmulik Ladkani
  Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team, Eyal Birger

On Fri, Sep 9, 2022 at 9:41 AM Shmulik Ladkani
<shmulik.ladkani@gmail.com> wrote:
>
> On Wed,  7 Sep 2022 17:02:51 -0700 Joanne Koong <joannelkoong@gmail.com> wrote:
>
> > Add a new helper, bpf_dynptr_clone, which clones a dynptr.
> >
> > The cloned dynptr will point to the same data as its parent dynptr,
> > with the same type, offset, size and read-only properties.
>
> [...]
>
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 4ca07cf500d2..16973fa4d073 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -5508,6 +5508,29 @@ union bpf_attr {
> >   *   Return
> >   *           The offset of the dynptr on success, -EINVAL if the dynptr is
> >   *           invalid.
> > + *
> > + * long bpf_dynptr_clone(struct bpf_dynptr *ptr, struct bpf_dynptr *clone)
> > + *   Description
> > + *           Clone an initialized dynptr *ptr*. After this call, both *ptr*
> > + *           and *clone* will point to the same underlying data.
> > + *
>
> How about adding 'off' and 'len' parameters so that a view ("slice") of
> the dynptr can be created in a single call?
>
> Otherwise, for a simple slice creation, ebpf user needs to:
>   bpf_dynptr_clone(orig, clone)
>   bpf_dynptr_advance(clone, off)
>   trim_len = bpf_dynptr_get_size(clone) - len
>   bpf_dynptr_trim(clone, trim_len)

I like the idea, where 'off' is an offset from ptr's offset and 'len'
is the number of bytes to trim.

Btw, I will be traveling for the next ~6 weeks and won't have access
to a computer, so v2 will be sometime after that.

>
> This fits the usecase described here:
>   https://lore.kernel.org/bpf/20220830231349.46c49c50@blondie/

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 3/8] bpf: Add bpf_dynptr_is_null and bpf_dynptr_is_rdonly
  2022-09-09 21:28     ` Joanne Koong
@ 2022-09-09 23:17       ` Song Liu
  0 siblings, 0 replies; 33+ messages in thread
From: Song Liu @ 2022-09-09 23:17 UTC (permalink / raw)
  To: Joanne Koong
  Cc: bpf, Daniel Borkmann, martin.lau, Andrii Nakryiko,
	Alexei Starovoitov, Kernel Team

On Fri, Sep 9, 2022 at 10:28 PM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> On Fri, Sep 9, 2022 at 8:46 AM Song Liu <song@kernel.org> wrote:
> >
> > On Thu, Sep 8, 2022 at 1:07 AM Joanne Koong <joannelkoong@gmail.com> wrote:
> > >
> > > Add two new helper functions: bpf_dynptr_is_null and
> > > bpf_dynptr_is_rdonly.
> > >
> > > bpf_dynptr_is_null returns true if the dynptr is null / invalid
> > > (determined by whether ptr->data is NULL), else false if
> > > the dynptr is a valid dynptr.
> > >
> > > bpf_dynptr_is_rdonly returns true if the dynptr is read-only,
> > > else false if the dynptr is read-writable.
> >
> > Might be a dump question.. Can we just let the bpf program to
> > access struct bpf_dynptr? Using a helper for this feel like an
> > overkill.
> >
> > Thanks,
> > Song
> >
>
> Not a dumb question at all, this is an interesting idea :) Right now
> the struct bpf_dynptr is opaque from the bpf program side but if we
> were to expose it (it'd still be read-only in the program), the
> program could directly get offset and whether it's null, but would
> need to do some manipulation to determine the size (since the last few
> bits of 'size' stores the dynptr type and rd-only) and rd-only. I see
> the advantages of either approach - personally I think it's cleaner if
> the struct is completely opaque from the program side but I don't feel
> strongly about it. If the worry is the overhead of needing a helper
> for each, maybe another idea is to conflate them into 1 general
> bpf_dynptr_get_info helper that returns offset, size, is_null, and
> rd-only status?

While all the helpers are fast direct calls, I think they are still slower
than simple memory accesses. It is probably a good idea to benchmark
the difference. While we can inline the helper functions in the verifier,
it is cleaner if we simply expose the data?

Thanks,
Song

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone
  2022-09-09 22:18     ` Joanne Koong
@ 2022-09-10  5:31       ` Shmulik Ladkani
  2022-09-28 22:34         ` Andrii Nakryiko
  0 siblings, 1 reply; 33+ messages in thread
From: Shmulik Ladkani @ 2022-09-10  5:31 UTC (permalink / raw)
  To: Joanne Koong
  Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team, Eyal Birger

On Fri, 9 Sep 2022 15:18:57 -0700 Joanne Koong <joannelkoong@gmail.com> wrote:

> I like the idea, where 'off' is an offset from ptr's offset and 'len'
> is the number of bytes to trim.

Actually parameters better be 'from' (inclusive) and 'to' (exclusive),
similar to slicing in most languages. Specifying a negative 'to' provides
the trimming functionality.

> Btw, I will be traveling for the next ~6 weeks and won't have access
> to a computer, so v2 will be sometime after that.

Enjoy

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator
  2022-09-08  0:02 ` [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator Joanne Koong
@ 2022-09-19  0:07   ` Kumar Kartikeya Dwivedi
  2022-09-28 22:47     ` Andrii Nakryiko
  2022-09-28 22:41   ` Andrii Nakryiko
  1 sibling, 1 reply; 33+ messages in thread
From: Kumar Kartikeya Dwivedi @ 2022-09-19  0:07 UTC (permalink / raw)
  To: Joanne Koong; +Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Thu, 8 Sept 2022 at 02:16, Joanne Koong <joannelkoong@gmail.com> wrote:
>
> Add a new helper function, bpf_dynptr_iterator:
>
>   long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn,
>                            void *callback_ctx, u64 flags)
>
> where callback_fn is defined as:
>
>   long (*callback_fn)(struct bpf_dynptr *ptr, void *ctx)
>
> and callback_fn returns the number of bytes to advance the
> dynptr by (or an error code in the case of error). The iteration
> will stop if the callback_fn returns 0 or an error or tries to
> advance by more bytes than available.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---

This is buggy as is.

A user can just reinitialize the dynptr from inside the callback, and
then you will never stop advancing it inside your helper, therefore an
infinite loop can be constructed. The stack frame of the caller is
accessible using callback_ctx.

For example (modifying your selftest)

diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c
b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 22164ad6df9d..a9e78316c508 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -540,6 +540,19 @@ struct iter_ctx {

 static int iter_callback(struct bpf_dynptr *ptr, struct iter_ctx *ctx)
 {
+       struct map_value *map_val;
+       int key = 0;
+
+       map_val = bpf_map_lookup_elem(&array_map2, &key);
+       if (!map_val) {
+               return 0;
+       }
+
+       *(void **)ptr = 0;
+       *((void **)ptr + 1) = 0;
+       bpf_dynptr_from_mem(map_val, 2, 0, ptr);
+       return 1;
+
        if (ctx->trigger_err_erange)
                return bpf_dynptr_get_size(ptr) + 1;

... leads to a lockup.

It doesn't have to be ringbuf_reserver_dynptr, it can just be
dynptr_from_mem, which also gets around reference state restrictions
inside callbacks.

You cannot prevent overwriting dynptr stack slots in general. Some of
them don't have to be released. It would be prohibitive for stack
reuse.

So it seems like you need to temporarily mark both the slots as
immutable for the caller stack state during exploration of the
callback.
Setting some flag in r1 for callback is not enough (as it can reload
PTR_TO_STACK of caller stack frame pointing at dynptr from
callback_ctx). It needs to be set in spilled_ptr.

Then certain operations modifying the view of the dynptr do not accept
dynptr with that type flag set (e.g. trim, advance, init functions).
While for others which only operate on the underlying view, you fold
the flag (e.g. read/write/dynptr_data).

It is the difference between struct bpf_dynptr *, vs const struct
bpf_dynptr *, we need to give the callback access to the latter.
I.e. it should still allow accessing the dynptr's view, but not modifying it.

And at the risk of sounding like a broken record (and same suggestion
as Martin in skb/xdp v6 set), the view's mutability should ideally
also be part of the verifier's state. That doesn't preclude runtime
tracking later, but there seems to be no strong motivation for that
right now.

>  include/uapi/linux/bpf.h       | 20 ++++++++++++++
>  kernel/bpf/helpers.c           | 48 +++++++++++++++++++++++++++++++---
>  kernel/bpf/verifier.c          | 27 +++++++++++++++++++
>  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++
>  4 files changed, 111 insertions(+), 4 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 16973fa4d073..ff78a94c262a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5531,6 +5531,25 @@ union bpf_attr {
>   *             losing access to the original view of the dynptr.
>   *     Return
>   *             0 on success, -EINVAL if the dynptr to clone is invalid.
> + *
> + * long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn, void *callback_ctx, u64 flags)
> + *     Description
> + *             Iterate through the dynptr data, calling **callback_fn** on each
> + *             iteration with **callback_ctx** as the context parameter.
> + *             The **callback_fn** should be a static function and
> + *             the **callback_ctx** should be a pointer to the stack.
> + *             Currently **flags** is unused and must be 0.
> + *
> + *             long (\*callback_fn)(struct bpf_dynptr \*ptr, void \*ctx);
> + *
> + *             where **callback_fn** returns the number of bytes to advance
> + *             the dynptr by or an error. The iteration will stop if **callback_fn**
> + *             returns 0 or an error or tries to advance by more bytes than the
> + *             size of the dynptr.
> + *     Return
> + *             0 on success, -EINVAL if the dynptr is invalid or **flags** is not 0,
> + *             -ERANGE if attempting to iterate more bytes than available, or other
> + *             negative error if **callback_fn** returns an error.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5752,6 +5771,7 @@ union bpf_attr {
>         FN(dynptr_get_size),            \
>         FN(dynptr_get_offset),          \
>         FN(dynptr_clone),               \
> +       FN(dynptr_iterator),            \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 667f1e213a61..519b3da06d49 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1653,13 +1653,11 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
>         .arg3_type      = ARG_CONST_ALLOC_SIZE_OR_ZERO,
>  };
>
> -BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> +/* *ptr* should always be a valid dynptr */
> +static int __bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
>  {
>         u32 size;
>
> -       if (!ptr->data)
> -               return -EINVAL;
> -
>         size = __bpf_dynptr_get_size(ptr);
>
>         if (len > size)
> @@ -1672,6 +1670,14 @@ BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
>         return 0;
>  }
>
> +BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> +{
> +       if (!ptr->data)
> +               return -EINVAL;
> +
> +       return __bpf_dynptr_advance(ptr, len);
> +}
> +
>  static const struct bpf_func_proto bpf_dynptr_advance_proto = {
>         .func           = bpf_dynptr_advance,
>         .gpl_only       = false,
> @@ -1783,6 +1789,38 @@ static const struct bpf_func_proto bpf_dynptr_clone_proto = {
>         .arg2_type      = ARG_PTR_TO_DYNPTR | MEM_UNINIT,
>  };
>
> +BPF_CALL_4(bpf_dynptr_iterator, struct bpf_dynptr_kern *, ptr, void *, callback_fn,
> +          void *, callback_ctx, u64, flags)
> +{
> +       bpf_callback_t callback = (bpf_callback_t)callback_fn;
> +       int nr_bytes, err;
> +
> +       if (!ptr->data || flags)
> +               return -EINVAL;
> +
> +       while (ptr->size > 0) {
> +               nr_bytes = callback((u64)(long)ptr, (u64)(long)callback_ctx, 0, 0, 0);
> +               if (nr_bytes <= 0)
> +                       return nr_bytes;
> +
> +               err = __bpf_dynptr_advance(ptr, nr_bytes);
> +               if (err)
> +                       return err;
> +       }
> +
> +       return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_dynptr_iterator_proto = {
> +       .func           = bpf_dynptr_iterator,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_DYNPTR,
> +       .arg2_type      = ARG_PTR_TO_FUNC,
> +       .arg3_type      = ARG_PTR_TO_STACK_OR_NULL,
> +       .arg4_type      = ARG_ANYTHING,
> +};
> +
>  const struct bpf_func_proto bpf_get_current_task_proto __weak;
>  const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
>  const struct bpf_func_proto bpf_probe_read_user_proto __weak;
> @@ -1869,6 +1907,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
>                 return &bpf_dynptr_get_offset_proto;
>         case BPF_FUNC_dynptr_clone:
>                 return &bpf_dynptr_clone_proto;
> +       case BPF_FUNC_dynptr_iterator:
> +               return &bpf_dynptr_iterator_proto;
>         default:
>                 break;
>         }
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 2eb2a4410344..76108cd4ed85 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -6901,6 +6901,29 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
>         return 0;
>  }
>
> +static int set_dynptr_iterator_callback_state(struct bpf_verifier_env *env,
> +                                             struct bpf_func_state *caller,
> +                                             struct bpf_func_state *callee,
> +                                             int insn_idx)
> +{
> +       /* bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn,
> +        * void *callback_ctx, u64 flags);
> +        *
> +        * callback_fn(struct bpf_dynptr *ptr, void *callback_ctx);
> +        */
> +       callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
> +       callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
> +       callee->callback_ret_range = tnum_range(0, U64_MAX);
> +
> +       /* unused */
> +       __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
> +       __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
> +       __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
> +
> +       callee->in_callback_fn = true;
> +       return 0;
> +}
> +
>  static int set_loop_callback_state(struct bpf_verifier_env *env,
>                                    struct bpf_func_state *caller,
>                                    struct bpf_func_state *callee,
> @@ -7472,6 +7495,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
>
>                 break;
>         }
> +       case BPF_FUNC_dynptr_iterator:
> +               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
> +                                       set_dynptr_iterator_callback_state);
> +               break;
>         }
>
>         if (err)
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 16973fa4d073..ff78a94c262a 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -5531,6 +5531,25 @@ union bpf_attr {
>   *             losing access to the original view of the dynptr.
>   *     Return
>   *             0 on success, -EINVAL if the dynptr to clone is invalid.
> + *
> + * long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn, void *callback_ctx, u64 flags)
> + *     Description
> + *             Iterate through the dynptr data, calling **callback_fn** on each
> + *             iteration with **callback_ctx** as the context parameter.
> + *             The **callback_fn** should be a static function and
> + *             the **callback_ctx** should be a pointer to the stack.
> + *             Currently **flags** is unused and must be 0.
> + *
> + *             long (\*callback_fn)(struct bpf_dynptr \*ptr, void \*ctx);
> + *
> + *             where **callback_fn** returns the number of bytes to advance
> + *             the dynptr by or an error. The iteration will stop if **callback_fn**
> + *             returns 0 or an error or tries to advance by more bytes than the
> + *             size of the dynptr.
> + *     Return
> + *             0 on success, -EINVAL if the dynptr is invalid or **flags** is not 0,
> + *             -ERANGE if attempting to iterate more bytes than available, or other
> + *             negative error if **callback_fn** returns an error.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5752,6 +5771,7 @@ union bpf_attr {
>         FN(dynptr_get_size),            \
>         FN(dynptr_get_offset),          \
>         FN(dynptr_clone),               \
> +       FN(dynptr_iterator),            \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> --
> 2.30.2
>

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 2/8] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance
  2022-09-08  0:02 ` [PATCH bpf-next v1 2/8] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance Joanne Koong
  2022-09-09 15:32   ` Song Liu
  2022-09-09 16:16   ` Shmulik Ladkani
@ 2022-09-28 22:14   ` Andrii Nakryiko
  2 siblings, 0 replies; 33+ messages in thread
From: Andrii Nakryiko @ 2022-09-28 22:14 UTC (permalink / raw)
  To: Joanne Koong; +Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Wed, Sep 7, 2022 at 5:10 PM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> Add two new helper functions: bpf_dynptr_trim and bpf_dynptr_advance.
>
> bpf_dynptr_trim decreases the size of a dynptr by the specified
> number of bytes (offset remains the same). bpf_dynptr_advance advances
> the offset of the dynptr by the specified number of bytes (size
> decreases correspondingly).
>
> One example where trimming / advancing the dynptr may useful is for
> hashing. If the dynptr points to a larger struct, it is possible to hash
> an individual field within the struct through dynptrs by using
> bpf_dynptr_advance+trim.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
>  include/uapi/linux/bpf.h       | 16 +++++++++
>  kernel/bpf/helpers.c           | 63 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h | 16 +++++++++
>  3 files changed, 95 insertions(+)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index cce3356765fc..3b054553be30 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5453,6 +5453,20 @@ union bpf_attr {
>   *             dynptr is invalid or if the offset and length is out of bounds
>   *             or in a paged buffer for skb-type dynptrs or across fragments
>   *             for xdp-type dynptrs.
> + *
> + * long bpf_dynptr_advance(struct bpf_dynptr *ptr, u32 len)
> + *     Description
> + *             Advance a dynptr by *len*.

This might be a bit to succinct a description. "Advance dynptr's
internal offset by *len* bytes."?

> + *     Return
> + *             0 on success, -EINVAL if the dynptr is invalid, -ERANGE if *len*
> + *             exceeds the bounds of the dynptr.
> + *
> + * long bpf_dynptr_trim(struct bpf_dynptr *ptr, u32 len)
> + *     Description
> + *             Trim a dynptr by *len*.

Similar as above, something like "Reduce the size of memory pointed to
by dynptr by *len* without changing offset"?

> + *     Return
> + *             0 on success, -EINVAL if the dynptr is invalid, -ERANGE if
> + *             trying to trim more bytes than the size of the dynptr.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5667,6 +5681,8 @@ union bpf_attr {
>         FN(dynptr_from_skb),            \
>         FN(dynptr_from_xdp),            \
>         FN(dynptr_data_rdonly),         \
> +       FN(dynptr_advance),             \
> +       FN(dynptr_trim),                \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 30a59c9e5df3..9f356105ab49 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1423,6 +1423,13 @@ static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
>         return ptr->size & DYNPTR_SIZE_MASK;
>  }
>
> +static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
> +{
> +       u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
> +
> +       ptr->size = new_size | metadata;
> +}
> +
>  int bpf_dynptr_check_size(u32 size)
>  {
>         return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
> @@ -1646,6 +1653,58 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
>         .arg3_type      = ARG_CONST_ALLOC_SIZE_OR_ZERO,
>  };
>
> +BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> +{
> +       u32 size;
> +
> +       if (!ptr->data)
> +               return -EINVAL;
> +
> +       size = bpf_dynptr_get_size(ptr);
> +
> +       if (len > size)
> +               return -ERANGE;
> +
> +       ptr->offset += len;
> +
> +       bpf_dynptr_set_size(ptr, size - len);

Kind of weird that offset is adjusted directly through field, but size
is set through helper.

One idea is to have (internal) helper that *increments* offset and
*decrements* size as one step. You can move all the error checking
inside it, instead of duplicating. E.g., something like this signature

static int bpf_dynptr_adjust(struct btf_dynptr_kern *ptr, u32 off_inc,
u32 sz_dec);

This increment/decrement is a bit surprising, but we don't allow size
to grow and offset to decrement, so this makes sense from that point
of view.

With that, we'll just do

return bpf_dynptr_adjust(ptr, len, len);

> +
> +       return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_dynptr_advance_proto = {
> +       .func           = bpf_dynptr_advance,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_DYNPTR,
> +       .arg2_type      = ARG_ANYTHING,
> +};
> +
> +BPF_CALL_2(bpf_dynptr_trim, struct bpf_dynptr_kern *, ptr, u32, len)
> +{
> +       u32 size;
> +
> +       if (!ptr->data)
> +               return -EINVAL;
> +
> +       size = bpf_dynptr_get_size(ptr);
> +
> +       if (len > size)
> +               return -ERANGE;
> +
> +       bpf_dynptr_set_size(ptr, size - len);

and here we can just do

return bpf_dynptr_adjust(ptr, 0, len);

> +
> +       return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_dynptr_trim_proto = {
> +       .func           = bpf_dynptr_trim,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_DYNPTR,
> +       .arg2_type      = ARG_ANYTHING,
> +};

[...]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone
  2022-09-08  0:02 ` [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone Joanne Koong
  2022-09-09 16:41   ` Shmulik Ladkani
@ 2022-09-28 22:29   ` Andrii Nakryiko
  1 sibling, 0 replies; 33+ messages in thread
From: Andrii Nakryiko @ 2022-09-28 22:29 UTC (permalink / raw)
  To: Joanne Koong; +Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Wed, Sep 7, 2022 at 5:07 PM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> Add a new helper, bpf_dynptr_clone, which clones a dynptr.
>
> The cloned dynptr will point to the same data as its parent dynptr,
> with the same type, offset, size and read-only properties.
>
> Any writes to a dynptr will be reflected across all instances
> (by 'instance', this means any dynptrs that point to the same
> underlying data).
>
> Please note that data slice and dynptr invalidations will affect all
> instances as well. For example, if bpf_dynptr_write() is called on an
> skb-type dynptr, all data slices of dynptr instances to that skb
> will be invalidated as well (eg data slices of any clones, parents,
> grandparents, ...). Another example is if a ringbuf dynptr is submitted,
> any instance of that dynptr will be invalidated.
>
> Changing the view of the dynptr (eg advancing the offset or
> trimming the size) will only affect that dynptr and not affect any
> other instances.
>
> One example use case where cloning may be helpful is for hashing or
> iterating through dynptr data. Cloning will allow the user to maintain
> the original view of the dynptr for future use, while also allowing
> views to smaller subsets of the data after the offset is advanced or the
> size is trimmed.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
>  include/uapi/linux/bpf.h       |  24 +++++++
>  kernel/bpf/helpers.c           |  23 +++++++
>  kernel/bpf/verifier.c          | 116 +++++++++++++++++++++------------
>  tools/include/uapi/linux/bpf.h |  24 +++++++
>  4 files changed, 147 insertions(+), 40 deletions(-)
>

This is a very important helper in practice, looking forward to have
it as part of dynptr API family!

> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4ca07cf500d2..16973fa4d073 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5508,6 +5508,29 @@ union bpf_attr {
>   *     Return
>   *             The offset of the dynptr on success, -EINVAL if the dynptr is
>   *             invalid.
> + *
> + * long bpf_dynptr_clone(struct bpf_dynptr *ptr, struct bpf_dynptr *clone)

const struct bpf_dynptr *ptr to make it clear that ptr is not
modified. We can also use src and dst terminology here for less
ambiguity.

> + *     Description
> + *             Clone an initialized dynptr *ptr*. After this call, both *ptr*
> + *             and *clone* will point to the same underlying data.
> + *
> + *             *clone* must be an uninitialized dynptr.
> + *
> + *             Any data slice or dynptr invalidations will apply equally for
> + *             both dynptrs after this call. For example, if ptr1 is a
> + *             ringbuf-type dynptr with multiple data slices that is cloned to
> + *             ptr2, if ptr2 discards the ringbuf sample, then ptr2, ptr2's
> + *             data slices, ptr1, and ptr1's data slices will all be
> + *             invalidated.
> + *
> + *             This is convenient for getting different "views" to the same
> + *             data. For instance, if one wishes to hash only a particular
> + *             section of data, one can clone the dynptr, advance it to a
> + *             specified offset and trim it to a specified size, pass it
> + *             to the hash function, and discard it after hashing, without
> + *             losing access to the original view of the dynptr.
> + *     Return
> + *             0 on success, -EINVAL if the dynptr to clone is invalid.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5728,6 +5751,7 @@ union bpf_attr {
>         FN(dynptr_is_rdonly),           \
>         FN(dynptr_get_size),            \
>         FN(dynptr_get_offset),          \
> +       FN(dynptr_clone),               \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 62ed27444b73..667f1e213a61 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1762,6 +1762,27 @@ static const struct bpf_func_proto bpf_dynptr_get_offset_proto = {
>         .arg1_type      = ARG_PTR_TO_DYNPTR,
>  };
>
> +BPF_CALL_2(bpf_dynptr_clone, struct bpf_dynptr_kern *, ptr,
> +          struct bpf_dynptr_kern *, clone)
> +{
> +       if (!ptr->data) {
> +               bpf_dynptr_set_null(clone);
> +               return -EINVAL;
> +       }

once we have MALLOC dynptr this will break without appropriate refcnt
bump. Let's have an explicit switch over all types of DYNPTR and error
out on "unknown" (really, forgotten) types of dynptr. Better safe than
having to debug corruptions in production.


> +
> +       memcpy(clone, ptr, sizeof(*clone));

*clone = *ptr?

> +
> +       return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_dynptr_clone_proto = {
> +       .func           = bpf_dynptr_clone,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_DYNPTR,
> +       .arg2_type      = ARG_PTR_TO_DYNPTR | MEM_UNINIT,
> +};
> +
>  const struct bpf_func_proto bpf_get_current_task_proto __weak;
>  const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
>  const struct bpf_func_proto bpf_probe_read_user_proto __weak;
> @@ -1846,6 +1867,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
>                 return &bpf_dynptr_get_size_proto;
>         case BPF_FUNC_dynptr_get_offset:
>                 return &bpf_dynptr_get_offset_proto;
> +       case BPF_FUNC_dynptr_clone:
> +               return &bpf_dynptr_clone_proto;
>         default:
>                 break;
>         }
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index c312d931359d..8c8c101513f5 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -694,17 +694,38 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
>         }
>  }
>
> +static bool arg_type_is_dynptr(enum bpf_arg_type type)
> +{
> +       return base_type(type) == ARG_PTR_TO_DYNPTR;
> +}
> +
>  static bool dynptr_type_refcounted(enum bpf_dynptr_type type)

just noticed this name, I think it's quite confusing and will get even
more so with MALLOC dynptr. It's not really reference *counted*, more
like verifier *tracks references*, right? Consider renaming this to
avoid this confusion. MALLOC dynptr will be both reference tracked
(internally for verifier) and reference counted (at runtime to keep it
alive).

>  {
>         return type == BPF_DYNPTR_TYPE_RINGBUF;
>  }
>
> -static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
> -                                  enum bpf_arg_type arg_type, int insn_idx)
> +static enum bpf_dynptr_type stack_slot_get_dynptr_info(struct bpf_verifier_env *env,
> +                                                      struct bpf_reg_state *reg,
> +                                                      int *ref_obj_id)
>  {
>         struct bpf_func_state *state = func(env, reg);
> -       enum bpf_dynptr_type type;
> -       int spi, i, id;
> +       int spi = get_spi(reg->off);
> +
> +       if (ref_obj_id)
> +               *ref_obj_id = state->stack[spi].spilled_ptr.id;
> +
> +       return state->stack[spi].spilled_ptr.dynptr.type;
> +}
> +
> +static int mark_stack_slots_dynptr(struct bpf_verifier_env *env,
> +                                  const struct bpf_func_proto *fn,
> +                                  struct bpf_reg_state *reg,
> +                                  enum bpf_arg_type arg_type,
> +                                  int insn_idx, int func_id)
> +{
> +       enum bpf_dynptr_type type = BPF_DYNPTR_TYPE_INVALID;
> +       struct bpf_func_state *state = func(env, reg);
> +       int spi, i, id = 0;
>
>         spi = get_spi(reg->off);
>
> @@ -716,7 +737,24 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
>                 state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
>         }
>
> -       type = arg_to_dynptr_type(arg_type);
> +       if (func_id == BPF_FUNC_dynptr_clone) {
> +               /* find the type and id of the dynptr we're cloning and
> +                * assign it to the clone
> +                */
> +               for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
> +                       enum bpf_arg_type t = fn->arg_type[i];
> +
> +                       if (arg_type_is_dynptr(t) && !(t & MEM_UNINIT)) {
> +                               type = stack_slot_get_dynptr_info(env,
> +                                                                 &state->regs[BPF_REG_1 + i],

so given we hard-coded BPF_FUNC_dynptr_clone func ID, we know that we
need first argument, so no need for this loop?

> +                                                                 &id);
> +                               break;
> +                       }
> +               }
> +       } else {
> +               type = arg_to_dynptr_type(arg_type);
> +       }
> +
>         if (type == BPF_DYNPTR_TYPE_INVALID)
>                 return -EINVAL;
>

[...]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone
  2022-09-10  5:31       ` Shmulik Ladkani
@ 2022-09-28 22:34         ` Andrii Nakryiko
  0 siblings, 0 replies; 33+ messages in thread
From: Andrii Nakryiko @ 2022-09-28 22:34 UTC (permalink / raw)
  To: Shmulik Ladkani
  Cc: Joanne Koong, bpf, daniel, martin.lau, andrii, ast, Kernel-team,
	Eyal Birger

On Fri, Sep 9, 2022 at 10:31 PM Shmulik Ladkani
<shmulik.ladkani@gmail.com> wrote:
>
> On Fri, 9 Sep 2022 15:18:57 -0700 Joanne Koong <joannelkoong@gmail.com> wrote:
>
> > I like the idea, where 'off' is an offset from ptr's offset and 'len'
> > is the number of bytes to trim.
>
> Actually parameters better be 'from' (inclusive) and 'to' (exclusive),
> similar to slicing in most languages. Specifying a negative 'to' provides
> the trimming functionality.

If we do [from, to) then user will have to make sure that they always
provide correct to, which might be quite cumbersome (you'll be calling
bpf_dynptr_get_size() - 1). Ideally this adjustment is optional, so
that user can pass 0, 0 and create exact copy.

So in that regard off_inc and sz_dec (just like I proposed for
internal bpf_dynptr_adjust()) makes most sense, but to be honest it's
quite confusing as they have "opposite directions". So it feels like
just simple bpf_dynptr_clone() followed by bpf_dynptr_{trim/advance}
might be best in terms of principle of least surprise in UAPI design.

>
> > Btw, I will be traveling for the next ~6 weeks and won't have access
> > to a computer, so v2 will be sometime after that.
>
> Enjoy

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator
  2022-09-08  0:02 ` [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator Joanne Koong
  2022-09-19  0:07   ` Kumar Kartikeya Dwivedi
@ 2022-09-28 22:41   ` Andrii Nakryiko
  2022-09-29  0:31     ` Kumar Kartikeya Dwivedi
  1 sibling, 1 reply; 33+ messages in thread
From: Andrii Nakryiko @ 2022-09-28 22:41 UTC (permalink / raw)
  To: Joanne Koong; +Cc: bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Wed, Sep 7, 2022 at 5:07 PM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> Add a new helper function, bpf_dynptr_iterator:
>
>   long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn,
>                            void *callback_ctx, u64 flags)
>
> where callback_fn is defined as:
>
>   long (*callback_fn)(struct bpf_dynptr *ptr, void *ctx)
>
> and callback_fn returns the number of bytes to advance the
> dynptr by (or an error code in the case of error). The iteration
> will stop if the callback_fn returns 0 or an error or tries to
> advance by more bytes than available.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
>  include/uapi/linux/bpf.h       | 20 ++++++++++++++
>  kernel/bpf/helpers.c           | 48 +++++++++++++++++++++++++++++++---
>  kernel/bpf/verifier.c          | 27 +++++++++++++++++++
>  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++
>  4 files changed, 111 insertions(+), 4 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 16973fa4d073..ff78a94c262a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5531,6 +5531,25 @@ union bpf_attr {
>   *             losing access to the original view of the dynptr.
>   *     Return
>   *             0 on success, -EINVAL if the dynptr to clone is invalid.
> + *
> + * long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn, void *callback_ctx, u64 flags)
> + *     Description
> + *             Iterate through the dynptr data, calling **callback_fn** on each
> + *             iteration with **callback_ctx** as the context parameter.
> + *             The **callback_fn** should be a static function and
> + *             the **callback_ctx** should be a pointer to the stack.
> + *             Currently **flags** is unused and must be 0.
> + *
> + *             long (\*callback_fn)(struct bpf_dynptr \*ptr, void \*ctx);
> + *
> + *             where **callback_fn** returns the number of bytes to advance
> + *             the dynptr by or an error. The iteration will stop if **callback_fn**
> + *             returns 0 or an error or tries to advance by more bytes than the
> + *             size of the dynptr.
> + *     Return
> + *             0 on success, -EINVAL if the dynptr is invalid or **flags** is not 0,
> + *             -ERANGE if attempting to iterate more bytes than available, or other
> + *             negative error if **callback_fn** returns an error.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5752,6 +5771,7 @@ union bpf_attr {
>         FN(dynptr_get_size),            \
>         FN(dynptr_get_offset),          \
>         FN(dynptr_clone),               \
> +       FN(dynptr_iterator),            \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 667f1e213a61..519b3da06d49 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1653,13 +1653,11 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
>         .arg3_type      = ARG_CONST_ALLOC_SIZE_OR_ZERO,
>  };
>
> -BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> +/* *ptr* should always be a valid dynptr */
> +static int __bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
>  {
>         u32 size;
>
> -       if (!ptr->data)
> -               return -EINVAL;
> -
>         size = __bpf_dynptr_get_size(ptr);
>
>         if (len > size)
> @@ -1672,6 +1670,14 @@ BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
>         return 0;
>  }
>
> +BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> +{
> +       if (!ptr->data)
> +               return -EINVAL;
> +
> +       return __bpf_dynptr_advance(ptr, len);
> +}
> +
>  static const struct bpf_func_proto bpf_dynptr_advance_proto = {
>         .func           = bpf_dynptr_advance,
>         .gpl_only       = false,
> @@ -1783,6 +1789,38 @@ static const struct bpf_func_proto bpf_dynptr_clone_proto = {
>         .arg2_type      = ARG_PTR_TO_DYNPTR | MEM_UNINIT,
>  };
>
> +BPF_CALL_4(bpf_dynptr_iterator, struct bpf_dynptr_kern *, ptr, void *, callback_fn,
> +          void *, callback_ctx, u64, flags)
> +{
> +       bpf_callback_t callback = (bpf_callback_t)callback_fn;
> +       int nr_bytes, err;
> +
> +       if (!ptr->data || flags)
> +               return -EINVAL;
> +
> +       while (ptr->size > 0) {
> +               nr_bytes = callback((u64)(long)ptr, (u64)(long)callback_ctx, 0, 0, 0);
> +               if (nr_bytes <= 0)
> +                       return nr_bytes;

callback is defined as returning long but here you are silently
truncating it to int. Let's either stick to longs or to ints.

> +
> +               err = __bpf_dynptr_advance(ptr, nr_bytes);

as Kumar pointed out, you can't modify ptr in place, you have to
create a local copy and bpf_dynptr_clone() it (so that for MALLOC
you'll bump refcnt). Then advance and pass it to callback. David has
such local dynptr use case in bpf_user_ringbuf_drain() helper.

> +               if (err)
> +                       return err;
> +       }
> +
> +       return 0;
> +}
> +

[...]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator
  2022-09-19  0:07   ` Kumar Kartikeya Dwivedi
@ 2022-09-28 22:47     ` Andrii Nakryiko
  0 siblings, 0 replies; 33+ messages in thread
From: Andrii Nakryiko @ 2022-09-28 22:47 UTC (permalink / raw)
  To: Kumar Kartikeya Dwivedi
  Cc: Joanne Koong, bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Sun, Sep 18, 2022 at 5:08 PM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> On Thu, 8 Sept 2022 at 02:16, Joanne Koong <joannelkoong@gmail.com> wrote:
> >
> > Add a new helper function, bpf_dynptr_iterator:
> >
> >   long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn,
> >                            void *callback_ctx, u64 flags)
> >
> > where callback_fn is defined as:
> >
> >   long (*callback_fn)(struct bpf_dynptr *ptr, void *ctx)
> >
> > and callback_fn returns the number of bytes to advance the
> > dynptr by (or an error code in the case of error). The iteration
> > will stop if the callback_fn returns 0 or an error or tries to
> > advance by more bytes than available.
> >
> > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > ---
>
> This is buggy as is.
>
> A user can just reinitialize the dynptr from inside the callback, and
> then you will never stop advancing it inside your helper, therefore an
> infinite loop can be constructed. The stack frame of the caller is
> accessible using callback_ctx.
>
> For example (modifying your selftest)
>
> diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c
> b/tools/testing/selftests/bpf/progs/dynptr_success.c
> index 22164ad6df9d..a9e78316c508 100644
> --- a/tools/testing/selftests/bpf/progs/dynptr_success.c
> +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
> @@ -540,6 +540,19 @@ struct iter_ctx {
>
>  static int iter_callback(struct bpf_dynptr *ptr, struct iter_ctx *ctx)
>  {
> +       struct map_value *map_val;
> +       int key = 0;
> +
> +       map_val = bpf_map_lookup_elem(&array_map2, &key);
> +       if (!map_val) {
> +               return 0;
> +       }
> +
> +       *(void **)ptr = 0;
> +       *((void **)ptr + 1) = 0;
> +       bpf_dynptr_from_mem(map_val, 2, 0, ptr);
> +       return 1;
> +
>         if (ctx->trigger_err_erange)
>                 return bpf_dynptr_get_size(ptr) + 1;
>
> ... leads to a lockup.
>
> It doesn't have to be ringbuf_reserver_dynptr, it can just be
> dynptr_from_mem, which also gets around reference state restrictions
> inside callbacks.
>
> You cannot prevent overwriting dynptr stack slots in general. Some of
> them don't have to be released. It would be prohibitive for stack
> reuse.
>
> So it seems like you need to temporarily mark both the slots as
> immutable for the caller stack state during exploration of the
> callback.
> Setting some flag in r1 for callback is not enough (as it can reload
> PTR_TO_STACK of caller stack frame pointing at dynptr from
> callback_ctx). It needs to be set in spilled_ptr.

This sounds overcomplicated. We need a local copy of dynptr for the
duration of iteration and work with it. Basically internal
bpf_dynptr_clone(). See my other reply in this thread.

>
> Then certain operations modifying the view of the dynptr do not accept
> dynptr with that type flag set (e.g. trim, advance, init functions).
> While for others which only operate on the underlying view, you fold
> the flag (e.g. read/write/dynptr_data).
>
> It is the difference between struct bpf_dynptr *, vs const struct
> bpf_dynptr *, we need to give the callback access to the latter.
> I.e. it should still allow accessing the dynptr's view, but not modifying it.
>
> And at the risk of sounding like a broken record (and same suggestion
> as Martin in skb/xdp v6 set), the view's mutability should ideally
> also be part of the verifier's state. That doesn't preclude runtime
> tracking later, but there seems to be no strong motivation for that
> right now.

The unexpected NULL for bpf_dynptr_data() vs bpf_dynptr_data_rdonly()
argument from Martin is pretty convincing, I agree. So I guess I don't
mind tracking it statically at this point.

>
> >  include/uapi/linux/bpf.h       | 20 ++++++++++++++
> >  kernel/bpf/helpers.c           | 48 +++++++++++++++++++++++++++++++---
> >  kernel/bpf/verifier.c          | 27 +++++++++++++++++++
> >  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++
> >  4 files changed, 111 insertions(+), 4 deletions(-)
> >

please trim irrelevant parts

[...]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator
  2022-09-28 22:41   ` Andrii Nakryiko
@ 2022-09-29  0:31     ` Kumar Kartikeya Dwivedi
  2022-09-29  0:43       ` Andrii Nakryiko
  0 siblings, 1 reply; 33+ messages in thread
From: Kumar Kartikeya Dwivedi @ 2022-09-29  0:31 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Joanne Koong, bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Thu, 29 Sept 2022 at 01:04, Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Sep 7, 2022 at 5:07 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> >
> > Add a new helper function, bpf_dynptr_iterator:
> >
> >   long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn,
> >                            void *callback_ctx, u64 flags)
> >
> > where callback_fn is defined as:
> >
> >   long (*callback_fn)(struct bpf_dynptr *ptr, void *ctx)
> >
> > and callback_fn returns the number of bytes to advance the
> > dynptr by (or an error code in the case of error). The iteration
> > will stop if the callback_fn returns 0 or an error or tries to
> > advance by more bytes than available.
> >
> > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > ---
> >  include/uapi/linux/bpf.h       | 20 ++++++++++++++
> >  kernel/bpf/helpers.c           | 48 +++++++++++++++++++++++++++++++---
> >  kernel/bpf/verifier.c          | 27 +++++++++++++++++++
> >  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++
> >  4 files changed, 111 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 16973fa4d073..ff78a94c262a 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -5531,6 +5531,25 @@ union bpf_attr {
> >   *             losing access to the original view of the dynptr.
> >   *     Return
> >   *             0 on success, -EINVAL if the dynptr to clone is invalid.
> > + *
> > + * long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn, void *callback_ctx, u64 flags)
> > + *     Description
> > + *             Iterate through the dynptr data, calling **callback_fn** on each
> > + *             iteration with **callback_ctx** as the context parameter.
> > + *             The **callback_fn** should be a static function and
> > + *             the **callback_ctx** should be a pointer to the stack.
> > + *             Currently **flags** is unused and must be 0.
> > + *
> > + *             long (\*callback_fn)(struct bpf_dynptr \*ptr, void \*ctx);
> > + *
> > + *             where **callback_fn** returns the number of bytes to advance
> > + *             the dynptr by or an error. The iteration will stop if **callback_fn**
> > + *             returns 0 or an error or tries to advance by more bytes than the
> > + *             size of the dynptr.
> > + *     Return
> > + *             0 on success, -EINVAL if the dynptr is invalid or **flags** is not 0,
> > + *             -ERANGE if attempting to iterate more bytes than available, or other
> > + *             negative error if **callback_fn** returns an error.
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)          \
> >         FN(unspec),                     \
> > @@ -5752,6 +5771,7 @@ union bpf_attr {
> >         FN(dynptr_get_size),            \
> >         FN(dynptr_get_offset),          \
> >         FN(dynptr_clone),               \
> > +       FN(dynptr_iterator),            \
> >         /* */
> >
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index 667f1e213a61..519b3da06d49 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -1653,13 +1653,11 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
> >         .arg3_type      = ARG_CONST_ALLOC_SIZE_OR_ZERO,
> >  };
> >
> > -BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > +/* *ptr* should always be a valid dynptr */
> > +static int __bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
> >  {
> >         u32 size;
> >
> > -       if (!ptr->data)
> > -               return -EINVAL;
> > -
> >         size = __bpf_dynptr_get_size(ptr);
> >
> >         if (len > size)
> > @@ -1672,6 +1670,14 @@ BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> >         return 0;
> >  }
> >
> > +BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > +{
> > +       if (!ptr->data)
> > +               return -EINVAL;
> > +
> > +       return __bpf_dynptr_advance(ptr, len);
> > +}
> > +
> >  static const struct bpf_func_proto bpf_dynptr_advance_proto = {
> >         .func           = bpf_dynptr_advance,
> >         .gpl_only       = false,
> > @@ -1783,6 +1789,38 @@ static const struct bpf_func_proto bpf_dynptr_clone_proto = {
> >         .arg2_type      = ARG_PTR_TO_DYNPTR | MEM_UNINIT,
> >  };
> >
> > +BPF_CALL_4(bpf_dynptr_iterator, struct bpf_dynptr_kern *, ptr, void *, callback_fn,
> > +          void *, callback_ctx, u64, flags)
> > +{
> > +       bpf_callback_t callback = (bpf_callback_t)callback_fn;
> > +       int nr_bytes, err;
> > +
> > +       if (!ptr->data || flags)
> > +               return -EINVAL;
> > +
> > +       while (ptr->size > 0) {
> > +               nr_bytes = callback((u64)(long)ptr, (u64)(long)callback_ctx, 0, 0, 0);
> > +               if (nr_bytes <= 0)
> > +                       return nr_bytes;
>
> callback is defined as returning long but here you are silently
> truncating it to int. Let's either stick to longs or to ints.
>
> > +
> > +               err = __bpf_dynptr_advance(ptr, nr_bytes);
>
> as Kumar pointed out, you can't modify ptr in place, you have to
> create a local copy and bpf_dynptr_clone() it (so that for MALLOC
> you'll bump refcnt). Then advance and pass it to callback. David has
> such local dynptr use case in bpf_user_ringbuf_drain() helper.
>

My solution was a bit overcomplicated because just creating a local
copy doesn't fix this completely, there's still the hole of writing
through arg#0 (which now does not reflect runtime state, as writes at
runtime go to clone while verifier thinks you touched stack slots),
and still allows constructing the infinite loop (because well, you can
overwrite dynptr through it). The 'side channel' of writing to dynptr
slots through callback_ctx is still there as well.

Maybe the infinite loop _can_ be avoided if you clone inside each
iteration, that part isn't very clear.

My reasoning was that when you iterate, the container is always
immutable (to prevent iterator invalidation) while mutability of
elements remains unaffected from that. Setting it in spilled_ptr
covers all bases (PTR_TO_STACK arg#0, access to it through
callback_ctx, etc.).

But I totally agree with you that we should be working on a local copy
inside the helper and leave the original dynptr untouched.
I think then the first arg should not be PTR_TO_STACK but some other
pointer type. Maybe it should be its own register type, and
spilled_ptr should reflect the same register, which allows the dynptr
state that we track to be copied into the callback arg#0 easily.

For the case of writes to the original dynptr that is already broken
right now, we can't track the state of the stack across iterations
correctly anyway so I think that has to be left as is until your N
callbacks rework happens.

wdyt?

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator
  2022-09-29  0:31     ` Kumar Kartikeya Dwivedi
@ 2022-09-29  0:43       ` Andrii Nakryiko
  2022-10-02 16:45         ` Kumar Kartikeya Dwivedi
  0 siblings, 1 reply; 33+ messages in thread
From: Andrii Nakryiko @ 2022-09-29  0:43 UTC (permalink / raw)
  To: Kumar Kartikeya Dwivedi
  Cc: Joanne Koong, bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Wed, Sep 28, 2022 at 5:32 PM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> On Thu, 29 Sept 2022 at 01:04, Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Wed, Sep 7, 2022 at 5:07 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> > >
> > > Add a new helper function, bpf_dynptr_iterator:
> > >
> > >   long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn,
> > >                            void *callback_ctx, u64 flags)
> > >
> > > where callback_fn is defined as:
> > >
> > >   long (*callback_fn)(struct bpf_dynptr *ptr, void *ctx)
> > >
> > > and callback_fn returns the number of bytes to advance the
> > > dynptr by (or an error code in the case of error). The iteration
> > > will stop if the callback_fn returns 0 or an error or tries to
> > > advance by more bytes than available.
> > >
> > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > > ---
> > >  include/uapi/linux/bpf.h       | 20 ++++++++++++++
> > >  kernel/bpf/helpers.c           | 48 +++++++++++++++++++++++++++++++---
> > >  kernel/bpf/verifier.c          | 27 +++++++++++++++++++
> > >  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++
> > >  4 files changed, 111 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index 16973fa4d073..ff78a94c262a 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -5531,6 +5531,25 @@ union bpf_attr {
> > >   *             losing access to the original view of the dynptr.
> > >   *     Return
> > >   *             0 on success, -EINVAL if the dynptr to clone is invalid.
> > > + *
> > > + * long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn, void *callback_ctx, u64 flags)
> > > + *     Description
> > > + *             Iterate through the dynptr data, calling **callback_fn** on each
> > > + *             iteration with **callback_ctx** as the context parameter.
> > > + *             The **callback_fn** should be a static function and
> > > + *             the **callback_ctx** should be a pointer to the stack.
> > > + *             Currently **flags** is unused and must be 0.
> > > + *
> > > + *             long (\*callback_fn)(struct bpf_dynptr \*ptr, void \*ctx);
> > > + *
> > > + *             where **callback_fn** returns the number of bytes to advance
> > > + *             the dynptr by or an error. The iteration will stop if **callback_fn**
> > > + *             returns 0 or an error or tries to advance by more bytes than the
> > > + *             size of the dynptr.
> > > + *     Return
> > > + *             0 on success, -EINVAL if the dynptr is invalid or **flags** is not 0,
> > > + *             -ERANGE if attempting to iterate more bytes than available, or other
> > > + *             negative error if **callback_fn** returns an error.
> > >   */
> > >  #define __BPF_FUNC_MAPPER(FN)          \
> > >         FN(unspec),                     \
> > > @@ -5752,6 +5771,7 @@ union bpf_attr {
> > >         FN(dynptr_get_size),            \
> > >         FN(dynptr_get_offset),          \
> > >         FN(dynptr_clone),               \
> > > +       FN(dynptr_iterator),            \
> > >         /* */
> > >
> > >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > > index 667f1e213a61..519b3da06d49 100644
> > > --- a/kernel/bpf/helpers.c
> > > +++ b/kernel/bpf/helpers.c
> > > @@ -1653,13 +1653,11 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
> > >         .arg3_type      = ARG_CONST_ALLOC_SIZE_OR_ZERO,
> > >  };
> > >
> > > -BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > > +/* *ptr* should always be a valid dynptr */
> > > +static int __bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
> > >  {
> > >         u32 size;
> > >
> > > -       if (!ptr->data)
> > > -               return -EINVAL;
> > > -
> > >         size = __bpf_dynptr_get_size(ptr);
> > >
> > >         if (len > size)
> > > @@ -1672,6 +1670,14 @@ BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > >         return 0;
> > >  }
> > >
> > > +BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > > +{
> > > +       if (!ptr->data)
> > > +               return -EINVAL;
> > > +
> > > +       return __bpf_dynptr_advance(ptr, len);
> > > +}
> > > +
> > >  static const struct bpf_func_proto bpf_dynptr_advance_proto = {
> > >         .func           = bpf_dynptr_advance,
> > >         .gpl_only       = false,
> > > @@ -1783,6 +1789,38 @@ static const struct bpf_func_proto bpf_dynptr_clone_proto = {
> > >         .arg2_type      = ARG_PTR_TO_DYNPTR | MEM_UNINIT,
> > >  };
> > >
> > > +BPF_CALL_4(bpf_dynptr_iterator, struct bpf_dynptr_kern *, ptr, void *, callback_fn,
> > > +          void *, callback_ctx, u64, flags)
> > > +{
> > > +       bpf_callback_t callback = (bpf_callback_t)callback_fn;
> > > +       int nr_bytes, err;
> > > +
> > > +       if (!ptr->data || flags)
> > > +               return -EINVAL;
> > > +
> > > +       while (ptr->size > 0) {
> > > +               nr_bytes = callback((u64)(long)ptr, (u64)(long)callback_ctx, 0, 0, 0);
> > > +               if (nr_bytes <= 0)
> > > +                       return nr_bytes;
> >
> > callback is defined as returning long but here you are silently
> > truncating it to int. Let's either stick to longs or to ints.
> >
> > > +
> > > +               err = __bpf_dynptr_advance(ptr, nr_bytes);
> >
> > as Kumar pointed out, you can't modify ptr in place, you have to
> > create a local copy and bpf_dynptr_clone() it (so that for MALLOC
> > you'll bump refcnt). Then advance and pass it to callback. David has
> > such local dynptr use case in bpf_user_ringbuf_drain() helper.
> >
>
> My solution was a bit overcomplicated because just creating a local
> copy doesn't fix this completely, there's still the hole of writing
> through arg#0 (which now does not reflect runtime state, as writes at
> runtime go to clone while verifier thinks you touched stack slots),
> and still allows constructing the infinite loop (because well, you can
> overwrite dynptr through it). The 'side channel' of writing to dynptr
> slots through callback_ctx is still there as well.
>
> Maybe the infinite loop _can_ be avoided if you clone inside each
> iteration, that part isn't very clear.
>
> My reasoning was that when you iterate, the container is always
> immutable (to prevent iterator invalidation) while mutability of
> elements remains unaffected from that. Setting it in spilled_ptr
> covers all bases (PTR_TO_STACK arg#0, access to it through
> callback_ctx, etc.).
>
> But I totally agree with you that we should be working on a local copy
> inside the helper and leave the original dynptr untouched.
> I think then the first arg should not be PTR_TO_STACK but some other
> pointer type. Maybe it should be its own register type, and
> spilled_ptr should reflect the same register, which allows the dynptr
> state that we track to be copied into the callback arg#0 easily.

Right, something like what David Vernet did with his
bpf_user_ringbuf_drain() helper that passes kernel-only (not
PTR_TO_STACK) dynptr into callback. If that implementation has the
same hole (being able to be modified), we should fix it the same way
in both cases, by not allowing this for PTR_TO_DYNPTR (or whatever the
new reg type is called).

>
> For the case of writes to the original dynptr that is already broken
> right now, we can't track the state of the stack across iterations
> correctly anyway so I think that has to be left as is until your N
> callbacks rework happens.

Right, that's a separate issue.

>
> wdyt?

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator
  2022-09-29  0:43       ` Andrii Nakryiko
@ 2022-10-02 16:45         ` Kumar Kartikeya Dwivedi
  2022-10-03 18:39           ` Andrii Nakryiko
  0 siblings, 1 reply; 33+ messages in thread
From: Kumar Kartikeya Dwivedi @ 2022-10-02 16:45 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Joanne Koong, bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Thu, 29 Sept 2022 at 02:43, Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Sep 28, 2022 at 5:32 PM Kumar Kartikeya Dwivedi
> <memxor@gmail.com> wrote:
> >
> > On Thu, 29 Sept 2022 at 01:04, Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Wed, Sep 7, 2022 at 5:07 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> > > >
> > > > Add a new helper function, bpf_dynptr_iterator:
> > > >
> > > >   long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn,
> > > >                            void *callback_ctx, u64 flags)
> > > >
> > > > where callback_fn is defined as:
> > > >
> > > >   long (*callback_fn)(struct bpf_dynptr *ptr, void *ctx)
> > > >
> > > > and callback_fn returns the number of bytes to advance the
> > > > dynptr by (or an error code in the case of error). The iteration
> > > > will stop if the callback_fn returns 0 or an error or tries to
> > > > advance by more bytes than available.
> > > >
> > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > > > ---
> > > >  include/uapi/linux/bpf.h       | 20 ++++++++++++++
> > > >  kernel/bpf/helpers.c           | 48 +++++++++++++++++++++++++++++++---
> > > >  kernel/bpf/verifier.c          | 27 +++++++++++++++++++
> > > >  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++
> > > >  4 files changed, 111 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > index 16973fa4d073..ff78a94c262a 100644
> > > > --- a/include/uapi/linux/bpf.h
> > > > +++ b/include/uapi/linux/bpf.h
> > > > @@ -5531,6 +5531,25 @@ union bpf_attr {
> > > >   *             losing access to the original view of the dynptr.
> > > >   *     Return
> > > >   *             0 on success, -EINVAL if the dynptr to clone is invalid.
> > > > + *
> > > > + * long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn, void *callback_ctx, u64 flags)
> > > > + *     Description
> > > > + *             Iterate through the dynptr data, calling **callback_fn** on each
> > > > + *             iteration with **callback_ctx** as the context parameter.
> > > > + *             The **callback_fn** should be a static function and
> > > > + *             the **callback_ctx** should be a pointer to the stack.
> > > > + *             Currently **flags** is unused and must be 0.
> > > > + *
> > > > + *             long (\*callback_fn)(struct bpf_dynptr \*ptr, void \*ctx);
> > > > + *
> > > > + *             where **callback_fn** returns the number of bytes to advance
> > > > + *             the dynptr by or an error. The iteration will stop if **callback_fn**
> > > > + *             returns 0 or an error or tries to advance by more bytes than the
> > > > + *             size of the dynptr.
> > > > + *     Return
> > > > + *             0 on success, -EINVAL if the dynptr is invalid or **flags** is not 0,
> > > > + *             -ERANGE if attempting to iterate more bytes than available, or other
> > > > + *             negative error if **callback_fn** returns an error.
> > > >   */
> > > >  #define __BPF_FUNC_MAPPER(FN)          \
> > > >         FN(unspec),                     \
> > > > @@ -5752,6 +5771,7 @@ union bpf_attr {
> > > >         FN(dynptr_get_size),            \
> > > >         FN(dynptr_get_offset),          \
> > > >         FN(dynptr_clone),               \
> > > > +       FN(dynptr_iterator),            \
> > > >         /* */
> > > >
> > > >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > > > index 667f1e213a61..519b3da06d49 100644
> > > > --- a/kernel/bpf/helpers.c
> > > > +++ b/kernel/bpf/helpers.c
> > > > @@ -1653,13 +1653,11 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
> > > >         .arg3_type      = ARG_CONST_ALLOC_SIZE_OR_ZERO,
> > > >  };
> > > >
> > > > -BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > > > +/* *ptr* should always be a valid dynptr */
> > > > +static int __bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
> > > >  {
> > > >         u32 size;
> > > >
> > > > -       if (!ptr->data)
> > > > -               return -EINVAL;
> > > > -
> > > >         size = __bpf_dynptr_get_size(ptr);
> > > >
> > > >         if (len > size)
> > > > @@ -1672,6 +1670,14 @@ BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > > >         return 0;
> > > >  }
> > > >
> > > > +BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > > > +{
> > > > +       if (!ptr->data)
> > > > +               return -EINVAL;
> > > > +
> > > > +       return __bpf_dynptr_advance(ptr, len);
> > > > +}
> > > > +
> > > >  static const struct bpf_func_proto bpf_dynptr_advance_proto = {
> > > >         .func           = bpf_dynptr_advance,
> > > >         .gpl_only       = false,
> > > > @@ -1783,6 +1789,38 @@ static const struct bpf_func_proto bpf_dynptr_clone_proto = {
> > > >         .arg2_type      = ARG_PTR_TO_DYNPTR | MEM_UNINIT,
> > > >  };
> > > >
> > > > +BPF_CALL_4(bpf_dynptr_iterator, struct bpf_dynptr_kern *, ptr, void *, callback_fn,
> > > > +          void *, callback_ctx, u64, flags)
> > > > +{
> > > > +       bpf_callback_t callback = (bpf_callback_t)callback_fn;
> > > > +       int nr_bytes, err;
> > > > +
> > > > +       if (!ptr->data || flags)
> > > > +               return -EINVAL;
> > > > +
> > > > +       while (ptr->size > 0) {
> > > > +               nr_bytes = callback((u64)(long)ptr, (u64)(long)callback_ctx, 0, 0, 0);
> > > > +               if (nr_bytes <= 0)
> > > > +                       return nr_bytes;
> > >
> > > callback is defined as returning long but here you are silently
> > > truncating it to int. Let's either stick to longs or to ints.
> > >
> > > > +
> > > > +               err = __bpf_dynptr_advance(ptr, nr_bytes);
> > >
> > > as Kumar pointed out, you can't modify ptr in place, you have to
> > > create a local copy and bpf_dynptr_clone() it (so that for MALLOC
> > > you'll bump refcnt). Then advance and pass it to callback. David has
> > > such local dynptr use case in bpf_user_ringbuf_drain() helper.
> > >
> >
> > My solution was a bit overcomplicated because just creating a local
> > copy doesn't fix this completely, there's still the hole of writing
> > through arg#0 (which now does not reflect runtime state, as writes at
> > runtime go to clone while verifier thinks you touched stack slots),
> > and still allows constructing the infinite loop (because well, you can
> > overwrite dynptr through it). The 'side channel' of writing to dynptr
> > slots through callback_ctx is still there as well.
> >
> > Maybe the infinite loop _can_ be avoided if you clone inside each
> > iteration, that part isn't very clear.
> >
> > My reasoning was that when you iterate, the container is always
> > immutable (to prevent iterator invalidation) while mutability of
> > elements remains unaffected from that. Setting it in spilled_ptr
> > covers all bases (PTR_TO_STACK arg#0, access to it through
> > callback_ctx, etc.).
> >
> > But I totally agree with you that we should be working on a local copy
> > inside the helper and leave the original dynptr untouched.
> > I think then the first arg should not be PTR_TO_STACK but some other
> > pointer type. Maybe it should be its own register type, and
> > spilled_ptr should reflect the same register, which allows the dynptr
> > state that we track to be copied into the callback arg#0 easily.
>
> Right, something like what David Vernet did with his
> bpf_user_ringbuf_drain() helper that passes kernel-only (not
> PTR_TO_STACK) dynptr into callback. If that implementation has the
> same hole (being able to be modified), we should fix it the same way
> in both cases, by not allowing this for PTR_TO_DYNPTR (or whatever the
> new reg type is called).
>

Sorry for the late response.

Somehow I missed that series entirely. Yes, we should reuse that register type,
and it does seem like it needs to check for MEM_UNINIT to prevent
reinitialization. I'm rolling that fix into
my dynptr series that I'm sending next week, since it would lead to
lots of conflicts otherwise.
Then this set can use the same approach.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator
  2022-10-02 16:45         ` Kumar Kartikeya Dwivedi
@ 2022-10-03 18:39           ` Andrii Nakryiko
  0 siblings, 0 replies; 33+ messages in thread
From: Andrii Nakryiko @ 2022-10-03 18:39 UTC (permalink / raw)
  To: Kumar Kartikeya Dwivedi
  Cc: Joanne Koong, bpf, daniel, martin.lau, andrii, ast, Kernel-team

On Sun, Oct 2, 2022 at 9:46 AM Kumar Kartikeya Dwivedi <memxor@gmail.com> wrote:
>
> On Thu, 29 Sept 2022 at 02:43, Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Wed, Sep 28, 2022 at 5:32 PM Kumar Kartikeya Dwivedi
> > <memxor@gmail.com> wrote:
> > >
> > > On Thu, 29 Sept 2022 at 01:04, Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Wed, Sep 7, 2022 at 5:07 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> > > > >
> > > > > Add a new helper function, bpf_dynptr_iterator:
> > > > >
> > > > >   long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn,
> > > > >                            void *callback_ctx, u64 flags)
> > > > >
> > > > > where callback_fn is defined as:
> > > > >
> > > > >   long (*callback_fn)(struct bpf_dynptr *ptr, void *ctx)
> > > > >
> > > > > and callback_fn returns the number of bytes to advance the
> > > > > dynptr by (or an error code in the case of error). The iteration
> > > > > will stop if the callback_fn returns 0 or an error or tries to
> > > > > advance by more bytes than available.
> > > > >
> > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > > > > ---
> > > > >  include/uapi/linux/bpf.h       | 20 ++++++++++++++
> > > > >  kernel/bpf/helpers.c           | 48 +++++++++++++++++++++++++++++++---
> > > > >  kernel/bpf/verifier.c          | 27 +++++++++++++++++++
> > > > >  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++
> > > > >  4 files changed, 111 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > > index 16973fa4d073..ff78a94c262a 100644
> > > > > --- a/include/uapi/linux/bpf.h
> > > > > +++ b/include/uapi/linux/bpf.h
> > > > > @@ -5531,6 +5531,25 @@ union bpf_attr {
> > > > >   *             losing access to the original view of the dynptr.
> > > > >   *     Return
> > > > >   *             0 on success, -EINVAL if the dynptr to clone is invalid.
> > > > > + *
> > > > > + * long bpf_dynptr_iterator(struct bpf_dynptr *ptr, void *callback_fn, void *callback_ctx, u64 flags)
> > > > > + *     Description
> > > > > + *             Iterate through the dynptr data, calling **callback_fn** on each
> > > > > + *             iteration with **callback_ctx** as the context parameter.
> > > > > + *             The **callback_fn** should be a static function and
> > > > > + *             the **callback_ctx** should be a pointer to the stack.
> > > > > + *             Currently **flags** is unused and must be 0.
> > > > > + *
> > > > > + *             long (\*callback_fn)(struct bpf_dynptr \*ptr, void \*ctx);
> > > > > + *
> > > > > + *             where **callback_fn** returns the number of bytes to advance
> > > > > + *             the dynptr by or an error. The iteration will stop if **callback_fn**
> > > > > + *             returns 0 or an error or tries to advance by more bytes than the
> > > > > + *             size of the dynptr.
> > > > > + *     Return
> > > > > + *             0 on success, -EINVAL if the dynptr is invalid or **flags** is not 0,
> > > > > + *             -ERANGE if attempting to iterate more bytes than available, or other
> > > > > + *             negative error if **callback_fn** returns an error.
> > > > >   */
> > > > >  #define __BPF_FUNC_MAPPER(FN)          \
> > > > >         FN(unspec),                     \
> > > > > @@ -5752,6 +5771,7 @@ union bpf_attr {
> > > > >         FN(dynptr_get_size),            \
> > > > >         FN(dynptr_get_offset),          \
> > > > >         FN(dynptr_clone),               \
> > > > > +       FN(dynptr_iterator),            \
> > > > >         /* */
> > > > >
> > > > >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > > > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > > > > index 667f1e213a61..519b3da06d49 100644
> > > > > --- a/kernel/bpf/helpers.c
> > > > > +++ b/kernel/bpf/helpers.c
> > > > > @@ -1653,13 +1653,11 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
> > > > >         .arg3_type      = ARG_CONST_ALLOC_SIZE_OR_ZERO,
> > > > >  };
> > > > >
> > > > > -BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > > > > +/* *ptr* should always be a valid dynptr */
> > > > > +static int __bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
> > > > >  {
> > > > >         u32 size;
> > > > >
> > > > > -       if (!ptr->data)
> > > > > -               return -EINVAL;
> > > > > -
> > > > >         size = __bpf_dynptr_get_size(ptr);
> > > > >
> > > > >         if (len > size)
> > > > > @@ -1672,6 +1670,14 @@ BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > > > >         return 0;
> > > > >  }
> > > > >
> > > > > +BPF_CALL_2(bpf_dynptr_advance, struct bpf_dynptr_kern *, ptr, u32, len)
> > > > > +{
> > > > > +       if (!ptr->data)
> > > > > +               return -EINVAL;
> > > > > +
> > > > > +       return __bpf_dynptr_advance(ptr, len);
> > > > > +}
> > > > > +
> > > > >  static const struct bpf_func_proto bpf_dynptr_advance_proto = {
> > > > >         .func           = bpf_dynptr_advance,
> > > > >         .gpl_only       = false,
> > > > > @@ -1783,6 +1789,38 @@ static const struct bpf_func_proto bpf_dynptr_clone_proto = {
> > > > >         .arg2_type      = ARG_PTR_TO_DYNPTR | MEM_UNINIT,
> > > > >  };
> > > > >
> > > > > +BPF_CALL_4(bpf_dynptr_iterator, struct bpf_dynptr_kern *, ptr, void *, callback_fn,
> > > > > +          void *, callback_ctx, u64, flags)
> > > > > +{
> > > > > +       bpf_callback_t callback = (bpf_callback_t)callback_fn;
> > > > > +       int nr_bytes, err;
> > > > > +
> > > > > +       if (!ptr->data || flags)
> > > > > +               return -EINVAL;
> > > > > +
> > > > > +       while (ptr->size > 0) {
> > > > > +               nr_bytes = callback((u64)(long)ptr, (u64)(long)callback_ctx, 0, 0, 0);
> > > > > +               if (nr_bytes <= 0)
> > > > > +                       return nr_bytes;
> > > >
> > > > callback is defined as returning long but here you are silently
> > > > truncating it to int. Let's either stick to longs or to ints.
> > > >
> > > > > +
> > > > > +               err = __bpf_dynptr_advance(ptr, nr_bytes);
> > > >
> > > > as Kumar pointed out, you can't modify ptr in place, you have to
> > > > create a local copy and bpf_dynptr_clone() it (so that for MALLOC
> > > > you'll bump refcnt). Then advance and pass it to callback. David has
> > > > such local dynptr use case in bpf_user_ringbuf_drain() helper.
> > > >
> > >
> > > My solution was a bit overcomplicated because just creating a local
> > > copy doesn't fix this completely, there's still the hole of writing
> > > through arg#0 (which now does not reflect runtime state, as writes at
> > > runtime go to clone while verifier thinks you touched stack slots),
> > > and still allows constructing the infinite loop (because well, you can
> > > overwrite dynptr through it). The 'side channel' of writing to dynptr
> > > slots through callback_ctx is still there as well.
> > >
> > > Maybe the infinite loop _can_ be avoided if you clone inside each
> > > iteration, that part isn't very clear.
> > >
> > > My reasoning was that when you iterate, the container is always
> > > immutable (to prevent iterator invalidation) while mutability of
> > > elements remains unaffected from that. Setting it in spilled_ptr
> > > covers all bases (PTR_TO_STACK arg#0, access to it through
> > > callback_ctx, etc.).
> > >
> > > But I totally agree with you that we should be working on a local copy
> > > inside the helper and leave the original dynptr untouched.
> > > I think then the first arg should not be PTR_TO_STACK but some other
> > > pointer type. Maybe it should be its own register type, and
> > > spilled_ptr should reflect the same register, which allows the dynptr
> > > state that we track to be copied into the callback arg#0 easily.
> >
> > Right, something like what David Vernet did with his
> > bpf_user_ringbuf_drain() helper that passes kernel-only (not
> > PTR_TO_STACK) dynptr into callback. If that implementation has the
> > same hole (being able to be modified), we should fix it the same way
> > in both cases, by not allowing this for PTR_TO_DYNPTR (or whatever the
> > new reg type is called).
> >
>
> Sorry for the late response.
>
> Somehow I missed that series entirely. Yes, we should reuse that register type,
> and it does seem like it needs to check for MEM_UNINIT to prevent
> reinitialization. I'm rolling that fix into
> my dynptr series that I'm sending next week, since it would lead to
> lots of conflicts otherwise.
> Then this set can use the same approach.

Sounds good, thanks!

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2022-10-03 18:39 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-08  0:02 [PATCH bpf-next v1 0/8] Dynptr convenience helpers Joanne Koong
2022-09-08  0:02 ` [PATCH bpf-next v1 1/8] bpf: Add bpf_dynptr_data_rdonly Joanne Koong
2022-09-09 15:29   ` Song Liu
2022-09-09 15:32     ` Alexei Starovoitov
2022-09-09 15:59       ` Song Liu
2022-09-09 15:51   ` Shmulik Ladkani
2022-09-08  0:02 ` [PATCH bpf-next v1 2/8] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance Joanne Koong
2022-09-09 15:32   ` Song Liu
2022-09-09 16:16   ` Shmulik Ladkani
2022-09-28 22:14   ` Andrii Nakryiko
2022-09-08  0:02 ` [PATCH bpf-next v1 3/8] bpf: Add bpf_dynptr_is_null and bpf_dynptr_is_rdonly Joanne Koong
2022-09-09 15:46   ` Song Liu
2022-09-09 21:28     ` Joanne Koong
2022-09-09 23:17       ` Song Liu
2022-09-08  0:02 ` [PATCH bpf-next v1 4/8] bpf: Add bpf_dynptr_get_size and bpf_dynptr_get_offset Joanne Koong
2022-09-09 16:52   ` Shmulik Ladkani
2022-09-09 20:37     ` Joanne Koong
2022-09-08  0:02 ` [PATCH bpf-next v1 5/8] bpf: Add bpf_dynptr_clone Joanne Koong
2022-09-09 16:41   ` Shmulik Ladkani
2022-09-09 22:18     ` Joanne Koong
2022-09-10  5:31       ` Shmulik Ladkani
2022-09-28 22:34         ` Andrii Nakryiko
2022-09-28 22:29   ` Andrii Nakryiko
2022-09-08  0:02 ` [PATCH bpf-next v1 6/8] bpf: Add verifier support for custom callback return range Joanne Koong
2022-09-08  0:02 ` [PATCH bpf-next v1 7/8] bpf: Add bpf_dynptr_iterator Joanne Koong
2022-09-19  0:07   ` Kumar Kartikeya Dwivedi
2022-09-28 22:47     ` Andrii Nakryiko
2022-09-28 22:41   ` Andrii Nakryiko
2022-09-29  0:31     ` Kumar Kartikeya Dwivedi
2022-09-29  0:43       ` Andrii Nakryiko
2022-10-02 16:45         ` Kumar Kartikeya Dwivedi
2022-10-03 18:39           ` Andrii Nakryiko
2022-09-08  0:02 ` [PATCH bpf-next v1 8/8] selftests/bpf: Tests for dynptr convenience helpers Joanne Koong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.