bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dave Marchevsky <davemarchevsky@fb.com>
To: <bpf@vger.kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>,
	Kernel Team <kernel-team@fb.com>, Tejun Heo <tj@kernel.org>,
	Dave Marchevsky <davemarchevsky@fb.com>
Subject: [RFC PATCH bpf-next 08/11] bpf: Add OBJ_NON_OWNING_REF type flag
Date: Fri, 22 Jul 2022 11:34:35 -0700	[thread overview]
Message-ID: <20220722183438.3319790-9-davemarchevsky@fb.com> (raw)
In-Reply-To: <20220722183438.3319790-1-davemarchevsky@fb.com>

Consider a pointer to a type that would normally need acquire / release
semantics to be safely held. There may be scenarios where such a pointer
can be safely held without the need to acquire a reference.

For example, although a PTR_TO_BTF_ID for a rbtree_map node is released
via bpf_rbtree_add helper, the helper doesn't change the address of the
node and must be called with the rbtree_map's spinlock held. Since the
only way to remove a node from the rbtree - bpf_rbtree_remove helper -
requires the same lock, the newly-added node cannot be removed by a
concurrently-running program until the lock is released. Therefore it is
safe to hold a reference to this node until bpf_rbtree_unlock is called.

This patch introduces a new type flag and associated verifier logic to
handle such "non-owning" references.

Currently the only usecase I have is the rbtree example above, so the
verifier logic is straightforward:
  * Tag return types of bpf_rbtree_{add,find} with OBJ_NON_OWNING_REF
    * These both require the rbtree lock to be held to return anything
    non-NULL
    * Since ret type for both is PTR_TO_BTF_ID_OR_NULL, if lock is not
    held and NULL is returned, existing mark_ptr_or_null_reg logic
    will clear reg type.
    * So if mark_ptr_or_null_reg logic turns the returned reg into a
    PTR_TO_BTF_ID | OBJ_NON_OWNING_REF, verifier knows lock is held.

  * When the lock is released the verifier invalidates any regs holding
  non owning refs similarly to existing release_reference logic - but no
  need to clear ref_obj_id as an 'owning' reference was never acquired.

[ TODO: Currently the invalidation logic in
clear_rbtree_node_non_owning_refs is not parametrized by map so
unlocking any rbtree lock will invalidate all non-owning refs ]

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
---
 include/linux/bpf.h   |  1 +
 kernel/bpf/rbtree.c   |  4 +--
 kernel/bpf/verifier.c | 63 +++++++++++++++++++++++++++++++++++++++----
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index eb8c550db0e2..c9c4b4fb019c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -412,6 +412,7 @@ enum bpf_type_flag {
 	/* Size is known at compile time. */
 	MEM_FIXED_SIZE		= BIT(10 + BPF_BASE_TYPE_BITS),
 
+	OBJ_NON_OWNING_REF	= BIT(11 + BPF_BASE_TYPE_BITS),
 	__BPF_TYPE_FLAG_MAX,
 	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
 };
diff --git a/kernel/bpf/rbtree.c b/kernel/bpf/rbtree.c
index 5b1ab73e164f..34864fc83209 100644
--- a/kernel/bpf/rbtree.c
+++ b/kernel/bpf/rbtree.c
@@ -111,7 +111,7 @@ BPF_CALL_3(bpf_rbtree_add, struct bpf_map *, map, void *, value, void *, cb)
 const struct bpf_func_proto bpf_rbtree_add_proto = {
 	.func = bpf_rbtree_add,
 	.gpl_only = true,
-	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL | OBJ_NON_OWNING_REF,
 	.arg1_type = ARG_CONST_MAP_PTR,
 	.arg2_type = ARG_PTR_TO_BTF_ID | OBJ_RELEASE,
 	.arg2_btf_id = &bpf_rbtree_btf_ids[0],
@@ -133,7 +133,7 @@ BPF_CALL_3(bpf_rbtree_find, struct bpf_map *, map, void *, key, void *, cb)
 const struct bpf_func_proto bpf_rbtree_find_proto = {
 	.func = bpf_rbtree_find,
 	.gpl_only = true,
-	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL | OBJ_NON_OWNING_REF,
 	.ret_btf_id = &bpf_rbtree_btf_ids[0],
 	.arg1_type = ARG_CONST_MAP_PTR,
 	.arg2_type = ARG_ANYTHING,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 174a355d97fd..4f46b2dfbc4b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -467,6 +467,11 @@ static bool type_is_rdonly_mem(u32 type)
 	return type & MEM_RDONLY;
 }
 
+static bool type_is_non_owning_ref(u32 type)
+{
+	return type & OBJ_NON_OWNING_REF;
+}
+
 static bool type_may_be_null(u32 type)
 {
 	return type & PTR_MAYBE_NULL;
@@ -555,7 +560,9 @@ static bool function_returns_rbtree_node(enum bpf_func_id func_id)
 static const char *reg_type_str(struct bpf_verifier_env *env,
 				enum bpf_reg_type type)
 {
-	char postfix[16] = {0}, prefix[32] = {0};
+	char postfix[32] = {0}, prefix[32] = {0};
+	unsigned int postfix_idx = 0;
+
 	static const char * const str[] = {
 		[NOT_INIT]		= "?",
 		[SCALAR_VALUE]		= "scalar",
@@ -579,11 +586,18 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
 		[PTR_TO_MAP_KEY]	= "map_key",
 	};
 
-	if (type & PTR_MAYBE_NULL) {
+	if (type_may_be_null(type)) {
 		if (base_type(type) == PTR_TO_BTF_ID)
-			strncpy(postfix, "or_null_", 16);
+			postfix_idx += strlcpy(postfix + postfix_idx, "or_null_", 32 - postfix_idx);
 		else
-			strncpy(postfix, "_or_null", 16);
+			postfix_idx += strlcpy(postfix + postfix_idx, "_or_null", 32 - postfix_idx);
+	}
+
+	if (type_is_non_owning_ref(type)) {
+		if (base_type(type) == PTR_TO_BTF_ID)
+			postfix_idx += strlcpy(postfix + postfix_idx, "non_own_", 32 - postfix_idx);
+		else
+			postfix_idx += strlcpy(postfix + postfix_idx, "_non_own", 32 - postfix_idx);
 	}
 
 	if (type & MEM_RDONLY)
@@ -5684,12 +5698,18 @@ static const struct bpf_reg_types int_ptr_types = {
 	},
 };
 
+static const struct bpf_reg_types btf_ptr_types = {
+	.types = {
+		PTR_TO_BTF_ID,
+		PTR_TO_BTF_ID | OBJ_NON_OWNING_REF,
+	},
+};
+
 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
 static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
-static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
@@ -6635,6 +6655,33 @@ static int release_reference(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static void clear_non_owning_ref_regs(struct bpf_verifier_env *env,
+				      struct bpf_func_state *state)
+{
+	struct bpf_reg_state *regs = state->regs, *reg;
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++)
+		if (type_is_non_owning_ref(regs[i].type))
+			mark_reg_unknown(env, regs, i);
+
+	bpf_for_each_spilled_reg(i, state, reg) {
+		if (!reg)
+			continue;
+		if (type_is_non_owning_ref(reg->type))
+			__mark_reg_unknown(env, reg);
+	}
+}
+
+static void clear_rbtree_node_non_owning_refs(struct bpf_verifier_env *env)
+{
+	struct bpf_verifier_state *vstate = env->cur_state;
+	int i;
+
+	for (i = 0; i <= vstate->curframe; i++)
+		clear_non_owning_ref_regs(env, vstate->frame[i]);
+}
+
 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
 				    struct bpf_reg_state *regs)
 {
@@ -7436,6 +7483,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			}
 		}
 		break;
+	case BPF_FUNC_rbtree_unlock:
+		/* TODO clear_rbtree_node_non_owning_refs calls should be
+		 * parametrized by base_type or ideally by owning map
+		 */
+		clear_rbtree_node_non_owning_refs(env);
+		break;
 	}
 
 	if (err)
-- 
2.30.2


  parent reply	other threads:[~2022-07-22 18:35 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-22 18:34 [RFC PATCH bpf-next 00/11] bpf: Introduce rbtree map Dave Marchevsky
2022-07-22 18:34 ` [RFC PATCH bpf-next 01/11] bpf: Pull repeated reg access bounds check into helper fn Dave Marchevsky
2022-07-22 18:34 ` [RFC PATCH bpf-next 02/11] bpf: Add verifier support for custom callback return range Dave Marchevsky
2022-07-22 18:34 ` [RFC PATCH bpf-next 03/11] bpf: Add rb_node_off to bpf_map Dave Marchevsky
2022-08-01 22:19   ` Alexei Starovoitov
2022-07-22 18:34 ` [RFC PATCH bpf-next 04/11] bpf: Add rbtree map Dave Marchevsky
2022-08-01 21:49   ` Alexei Starovoitov
2022-07-22 18:34 ` [RFC PATCH bpf-next 05/11] bpf: Add bpf_spin_lock member to rbtree Dave Marchevsky
2022-08-01 22:17   ` Alexei Starovoitov
2022-08-02 13:59     ` Kumar Kartikeya Dwivedi
2022-08-02 15:30       ` Alexei Starovoitov
2022-08-10 21:46     ` Kumar Kartikeya Dwivedi
2022-08-10 22:06       ` Alexei Starovoitov
2022-08-10 23:16         ` Kumar Kartikeya Dwivedi
2022-08-15  5:33       ` Yonghong Song
2022-08-15  5:37         ` Kumar Kartikeya Dwivedi
2022-07-22 18:34 ` [RFC PATCH bpf-next 06/11] bpf: Add bpf_rbtree_{lock,unlock} helpers Dave Marchevsky
2022-08-01 21:58   ` Alexei Starovoitov
2022-07-22 18:34 ` [RFC PATCH bpf-next 07/11] bpf: Enforce spinlock hold for bpf_rbtree_{add,remove,find} Dave Marchevsky
2022-07-22 18:34 ` Dave Marchevsky [this message]
2022-08-01 22:41   ` [RFC PATCH bpf-next 08/11] bpf: Add OBJ_NON_OWNING_REF type flag Alexei Starovoitov
2022-07-22 18:34 ` [RFC PATCH bpf-next 09/11] bpf: Add CONDITIONAL_RELEASE " Dave Marchevsky
2022-08-01 22:23   ` Alexei Starovoitov
2022-07-22 18:34 ` [RFC PATCH bpf-next 10/11] bpf: Introduce PTR_ITER and PTR_ITER_END type flags Dave Marchevsky
2022-07-29 16:31   ` Tejun Heo
2022-08-01 22:44   ` Alexei Starovoitov
2022-08-02 13:05     ` Kumar Kartikeya Dwivedi
2022-08-02 15:10       ` Alexei Starovoitov
2022-08-10 17:56     ` Dave Marchevsky
2022-07-22 18:34 ` [RFC PATCH bpf-next 11/11] selftests/bpf: Add rbtree map tests Dave Marchevsky
2022-07-28  7:18   ` Yonghong Song
2022-08-10 17:48     ` Dave Marchevsky
2022-07-28  7:04 ` [RFC PATCH bpf-next 00/11] bpf: Introduce rbtree map Yonghong Song
2022-08-10 17:54   ` Dave Marchevsky
2022-08-01 21:27 ` Alexei Starovoitov
2022-08-10 18:11   ` Dave Marchevsky
2022-08-02 22:02 ` Andrii Nakryiko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220722183438.3319790-9-davemarchevsky@fb.com \
    --to=davemarchevsky@fb.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).