BPF Archive on lore.kernel.org
 help / color / Atom feed
From: Song Liu <songliubraving@fb.com>
To: <bpf@vger.kernel.org>, <netdev@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Cc: <ast@kernel.org>, <daniel@iogearbox.net>, <kernel-team@fb.com>,
	<peterz@infradead.org>, Song Liu <songliubraving@fb.com>
Subject: [PATCH v4 bpf-next 2/6] bpf: prevent deadlock from recursive bpf_task_storage_[get|delete]
Date: Mon, 22 Feb 2021 17:20:10 -0800
Message-ID: <20210223012014.2087583-3-songliubraving@fb.com> (raw)
In-Reply-To: <20210223012014.2087583-1-songliubraving@fb.com>

BPF helpers bpf_task_storage_[get|delete] could hold two locks:
bpf_local_storage_map_bucket->lock and bpf_local_storage->lock. Calling
these helpers from fentry/fexit programs on functions in bpf_*_storage.c
may cause deadlock on either locks.

Prevent such deadlock with a per cpu counter, bpf_task_storage_busy, which
is similar to bpf_prog_active. We need this counter to be global, because
the two locks here belong to two different objects: bpf_local_storage_map
and bpf_local_storage. If we pick one of them as the owner of the counter,
it is still possible to trigger deadlock on the other lock. For example,
if bpf_local_storage_map owns the counters, it cannot prevent deadlock
on bpf_local_storage->lock when two maps are used.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 kernel/bpf/bpf_task_storage.c | 57 ++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 2034019966d44..ed7d2e02b1c19 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -20,6 +20,31 @@
 
 DEFINE_BPF_STORAGE_CACHE(task_cache);
 
+DEFINE_PER_CPU(int, bpf_task_storage_busy);
+
+static void bpf_task_storage_lock(void)
+{
+	migrate_disable();
+	__this_cpu_inc(bpf_task_storage_busy);
+}
+
+static void bpf_task_storage_unlock(void)
+{
+	__this_cpu_dec(bpf_task_storage_busy);
+	migrate_enable();
+}
+
+static bool bpf_task_storage_trylock(void)
+{
+	migrate_disable();
+	if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
+		__this_cpu_dec(bpf_task_storage_busy);
+		migrate_enable();
+		return false;
+	}
+	return true;
+}
+
 static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
 {
 	struct task_struct *task = owner;
@@ -67,6 +92,7 @@ void bpf_task_storage_free(struct task_struct *task)
 	 * when unlinking elem from the local_storage->list and
 	 * the map's bucket->list.
 	 */
+	bpf_task_storage_lock();
 	raw_spin_lock_irqsave(&local_storage->lock, flags);
 	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
 		/* Always unlink from map before unlinking from
@@ -77,6 +103,7 @@ void bpf_task_storage_free(struct task_struct *task)
 			local_storage, selem, false);
 	}
 	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+	bpf_task_storage_unlock();
 	rcu_read_unlock();
 
 	/* free_task_storage should always be true as long as
@@ -109,7 +136,9 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
 		goto out;
 	}
 
+	bpf_task_storage_lock();
 	sdata = task_storage_lookup(task, map, true);
+	bpf_task_storage_unlock();
 	put_pid(pid);
 	return sdata ? sdata->data : NULL;
 out:
@@ -141,8 +170,10 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
 		goto out;
 	}
 
+	bpf_task_storage_lock();
 	sdata = bpf_local_storage_update(
 		task, (struct bpf_local_storage_map *)map, value, map_flags);
+	bpf_task_storage_unlock();
 
 	err = PTR_ERR_OR_ZERO(sdata);
 out:
@@ -185,7 +216,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
 		goto out;
 	}
 
+	bpf_task_storage_lock();
 	err = task_storage_delete(task, map);
+	bpf_task_storage_unlock();
 out:
 	put_pid(pid);
 	return err;
@@ -207,34 +240,44 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
 	if (!task || !task_storage_ptr(task))
 		return (unsigned long)NULL;
 
+	if (!bpf_task_storage_trylock())
+		return (unsigned long)NULL;
+
 	sdata = task_storage_lookup(task, map, true);
 	if (sdata)
-		return (unsigned long)sdata->data;
+		goto unlock;
 
 	/* only allocate new storage, when the task is refcounted */
 	if (refcount_read(&task->usage) &&
-	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
+	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
 		sdata = bpf_local_storage_update(
 			task, (struct bpf_local_storage_map *)map, value,
 			BPF_NOEXIST);
-		return IS_ERR(sdata) ? (unsigned long)NULL :
-					     (unsigned long)sdata->data;
-	}
 
-	return (unsigned long)NULL;
+unlock:
+	bpf_task_storage_unlock();
+	return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL :
+		(unsigned long)sdata->data;
 }
 
 BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
 	   task)
 {
+	int ret;
+
 	if (!task)
 		return -EINVAL;
 
+	if (!bpf_task_storage_trylock())
+		return -EBUSY;
+
 	/* This helper must only be called from places where the lifetime of the task
 	 * is guaranteed. Either by being refcounted or by being protected
 	 * by an RCU read-side critical section.
 	 */
-	return task_storage_delete(task, map);
+	ret = task_storage_delete(task, map);
+	bpf_task_storage_unlock();
+	return ret;
 }
 
 static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
-- 
2.24.1


  parent reply index

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-23  1:20 [PATCH v4 bpf-next 0/6] bpf: enable task local storage for tracing programs Song Liu
2021-02-23  1:20 ` [PATCH v4 bpf-next 1/6] " Song Liu
2021-02-23  3:08   ` kernel test robot
2021-02-23  4:04   ` kernel test robot
2021-02-23 19:23   ` Martin KaFai Lau
2021-02-23 20:51     ` Song Liu
2021-02-23  1:20 ` Song Liu [this message]
2021-02-23  6:21   ` [PATCH v4 bpf-next 2/6] bpf: prevent deadlock from recursive bpf_task_storage_[get|delete] Andrii Nakryiko
2021-02-23  7:16     ` Song Liu
2021-02-23  7:19       ` Andrii Nakryiko
2021-02-23 16:44         ` Alexei Starovoitov
2021-02-23 11:06   ` Peter Zijlstra
2021-02-23 20:49     ` Song Liu
2021-02-23  1:20 ` [PATCH v4 bpf-next 3/6] selftests/bpf: add non-BPF_LSM test for task local storage Song Liu
2021-02-23  1:20 ` [PATCH v4 bpf-next 4/6] selftests/bpf: test deadlock from recursive bpf_task_storage_[get|delete] Song Liu
2021-02-23  1:20 ` [PATCH v4 bpf-next 5/6] bpf: runqslower: prefer using local vmlimux to generate vmlinux.h Song Liu
2021-02-23  6:26   ` Andrii Nakryiko
2021-02-23 21:24   ` Martin KaFai Lau
2021-02-23  1:20 ` [PATCH v4 bpf-next 6/6] bpf: runqslower: use task local storage Song Liu
2021-02-23 21:33   ` Martin KaFai Lau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210223012014.2087583-3-songliubraving@fb.com \
    --to=songliubraving@fb.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

BPF Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/bpf/0 bpf/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 bpf bpf/ https://lore.kernel.org/bpf \
		bpf@vger.kernel.org
	public-inbox-index bpf

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.bpf


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git