linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] firmware: selftest for request_firmware_into_buf
@ 2019-08-16  0:09 Scott Branden
  2019-08-16  0:09 ` [PATCH 1/3] test_firmware: add support " Scott Branden
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Scott Branden @ 2019-08-16  0:09 UTC (permalink / raw)
  To: Luis Chamberlain, Greg Kroah-Hartman, Andy Gross, David Brown,
	Alexander Viro, Shuah Khan, bjorn.andersson
  Cc: Rafael J . Wysocki, linux-kernel, linux-arm-msm, linux-fsdevel,
	BCM Kernel Feedback, Olof Johansson, Andrew Morton,
	Dan Carpenter, Colin Ian King, Kees Cook, Takashi Iwai,
	linux-kselftest, Scott Branden

This patch series adds kernel selftest of request_firmware_into_buf.
The API was added to the kernel previously untested.

Also included in this patch series is a fix for a race condition
discovered while testing request_firmware_into_buf.  Mutex may
not be correct final solution but demonstrates a fix to a race
condition new test exposes.

Scott Branden (3):
  test_firmware: add support for request_firmware_into_buf
  selftest: firmware: Add request_firmware_into_buf tests
  firmware: add mutex fw_lock_fallback for race condition

 drivers/base/firmware_loader/main.c           | 15 +++++
 lib/test_firmware.c                           | 50 +++++++++++++++-
 .../selftests/firmware/fw_filesystem.sh       | 57 ++++++++++++++++++-
 tools/testing/selftests/firmware/fw_lib.sh    | 11 ++++
 4 files changed, 129 insertions(+), 4 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/3] test_firmware: add support for request_firmware_into_buf
  2019-08-16  0:09 [PATCH 0/3] firmware: selftest for request_firmware_into_buf Scott Branden
@ 2019-08-16  0:09 ` Scott Branden
  2019-08-19  5:24   ` Luis Chamberlain
  2019-08-16  0:09 ` [PATCH 2/3] selftest: firmware: Add request_firmware_into_buf tests Scott Branden
  2019-08-16  0:09 ` [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition Scott Branden
  2 siblings, 1 reply; 16+ messages in thread
From: Scott Branden @ 2019-08-16  0:09 UTC (permalink / raw)
  To: Luis Chamberlain, Greg Kroah-Hartman, Andy Gross, David Brown,
	Alexander Viro, Shuah Khan, bjorn.andersson
  Cc: Rafael J . Wysocki, linux-kernel, linux-arm-msm, linux-fsdevel,
	BCM Kernel Feedback, Olof Johansson, Andrew Morton,
	Dan Carpenter, Colin Ian King, Kees Cook, Takashi Iwai,
	linux-kselftest, Scott Branden

Add test config into_buf to allow request_firmware_into_buf to be
called instead of request_firmware/request_firmware_direct.  The number
of parameters differ calling request_firmware_into_buf and support
has not been added to test such api in test_firmware until now.

Signed-off-by: Scott Branden <scott.branden@broadcom.com>
---
 lib/test_firmware.c | 50 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 6ca97a63b3d6..1aa430228ae6 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -18,6 +18,7 @@
 #include <linux/device.h>
 #include <linux/fs.h>
 #include <linux/miscdevice.h>
+#include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/delay.h>
@@ -26,6 +27,7 @@
 
 #define TEST_FIRMWARE_NAME	"test-firmware.bin"
 #define TEST_FIRMWARE_NUM_REQS	4
+#define TEST_FIRMWARE_BUF_SIZE	SZ_1K
 
 static DEFINE_MUTEX(test_fw_mutex);
 static const struct firmware *test_firmware;
@@ -45,6 +47,8 @@ struct test_batched_req {
  * test_config - represents configuration for the test for different triggers
  *
  * @name: the name of the firmware file to look for
+ * @into_buf: when the into_buf is used if this is true
+ *	request_firmware_into_buf() will be used instead.
  * @sync_direct: when the sync trigger is used if this is true
  *	request_firmware_direct() will be used instead.
  * @send_uevent: whether or not to send a uevent for async requests
@@ -83,6 +87,7 @@ struct test_batched_req {
  */
 struct test_config {
 	char *name;
+	bool into_buf;
 	bool sync_direct;
 	bool send_uevent;
 	u8 num_requests;
@@ -176,6 +181,7 @@ static int __test_firmware_config_init(void)
 
 	test_fw_config->num_requests = TEST_FIRMWARE_NUM_REQS;
 	test_fw_config->send_uevent = true;
+	test_fw_config->into_buf = false;
 	test_fw_config->sync_direct = false;
 	test_fw_config->req_firmware = request_firmware;
 	test_fw_config->test_result = 0;
@@ -244,6 +250,9 @@ static ssize_t config_show(struct device *dev,
 			test_fw_config->send_uevent ?
 			"FW_ACTION_HOTPLUG" :
 			"FW_ACTION_NOHOTPLUG");
+	len += scnprintf(buf+len, PAGE_SIZE - len,
+			"into_buf:\t\t%s\n",
+			test_fw_config->into_buf ? "true" : "false");
 	len += scnprintf(buf+len, PAGE_SIZE - len,
 			"sync_direct:\t\t%s\n",
 			test_fw_config->sync_direct ? "true" : "false");
@@ -393,6 +402,23 @@ static ssize_t config_num_requests_show(struct device *dev,
 }
 static DEVICE_ATTR_RW(config_num_requests);
 
+static ssize_t config_into_buf_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return test_dev_config_update_bool(buf,
+					   count,
+					   &test_fw_config->into_buf);
+}
+
+static ssize_t config_into_buf_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	return test_dev_config_show_bool(buf, test_fw_config->into_buf);
+}
+static DEVICE_ATTR_RW(config_into_buf);
+
 static ssize_t config_sync_direct_store(struct device *dev,
 					struct device_attribute *attr,
 					const char *buf, size_t count)
@@ -522,7 +548,7 @@ static ssize_t trigger_async_request_store(struct device *dev,
 		rc = count;
 	} else {
 		pr_err("failed to async load firmware\n");
-		rc = -ENODEV;
+		rc = -ENOMEM;
 	}
 
 out:
@@ -585,7 +611,26 @@ static int test_fw_run_batch_request(void *data)
 		return -EINVAL;
 	}
 
-	req->rc = test_fw_config->req_firmware(&req->fw, req->name, req->dev);
+	if (test_fw_config->into_buf) {
+		void *test_buf;
+
+		test_buf = kzalloc(TEST_FIRMWARE_BUF_SIZE, GFP_KERNEL);
+		if (!test_buf)
+			return -ENOSPC;
+
+		req->rc = request_firmware_into_buf(&req->fw,
+						    req->name,
+						    req->dev,
+						    test_buf,
+						    TEST_FIRMWARE_BUF_SIZE);
+		if (!req->fw)
+			kfree(test_buf);
+	} else {
+		req->rc = test_fw_config->req_firmware(&req->fw,
+						       req->name,
+						       req->dev);
+	}
+
 	if (req->rc) {
 		pr_info("#%u: batched sync load failed: %d\n",
 			req->idx, req->rc);
@@ -849,6 +894,7 @@ static struct attribute *test_dev_attrs[] = {
 	TEST_FW_DEV_ATTR(config),
 	TEST_FW_DEV_ATTR(config_name),
 	TEST_FW_DEV_ATTR(config_num_requests),
+	TEST_FW_DEV_ATTR(config_into_buf),
 	TEST_FW_DEV_ATTR(config_sync_direct),
 	TEST_FW_DEV_ATTR(config_send_uevent),
 	TEST_FW_DEV_ATTR(config_read_fw_idx),
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/3] selftest: firmware: Add request_firmware_into_buf tests
  2019-08-16  0:09 [PATCH 0/3] firmware: selftest for request_firmware_into_buf Scott Branden
  2019-08-16  0:09 ` [PATCH 1/3] test_firmware: add support " Scott Branden
@ 2019-08-16  0:09 ` Scott Branden
  2019-08-19  5:24   ` Luis Chamberlain
  2019-08-16  0:09 ` [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition Scott Branden
  2 siblings, 1 reply; 16+ messages in thread
From: Scott Branden @ 2019-08-16  0:09 UTC (permalink / raw)
  To: Luis Chamberlain, Greg Kroah-Hartman, Andy Gross, David Brown,
	Alexander Viro, Shuah Khan, bjorn.andersson
  Cc: Rafael J . Wysocki, linux-kernel, linux-arm-msm, linux-fsdevel,
	BCM Kernel Feedback, Olof Johansson, Andrew Morton,
	Dan Carpenter, Colin Ian King, Kees Cook, Takashi Iwai,
	linux-kselftest, Scott Branden

Add tests cases for checking request_firmware_into_buf api.
API was introduced into kernel with no testing present previously.

Signed-off-by: Scott Branden <scott.branden@broadcom.com>
---
 .../selftests/firmware/fw_filesystem.sh       | 57 ++++++++++++++++++-
 tools/testing/selftests/firmware/fw_lib.sh    | 11 ++++
 2 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index f901076aa2ea..56894477c8bd 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -116,6 +116,16 @@ config_set_name()
 	echo -n $1 >  $DIR/config_name
 }
 
+config_set_into_buf()
+{
+	echo 1 >  $DIR/config_into_buf
+}
+
+config_unset_into_buf()
+{
+	echo 0 >  $DIR/config_into_buf
+}
+
 config_set_sync_direct()
 {
 	echo 1 >  $DIR/config_sync_direct
@@ -153,11 +163,14 @@ config_set_read_fw_idx()
 
 read_firmwares()
 {
-	if [ "$1" = "xzonly" ]; then
-		fwfile="${FW}-orig"
+	if [ "$(cat $DIR/config_into_buf)" == "1" ]; then
+		fwfile="$FW_INTO_BUF"
 	else
 		fwfile="$FW"
 	fi
+	if [ "$1" = "xzonly" ]; then
+		fwfile="${fwfile}-orig"
+	fi
 	for i in $(seq 0 3); do
 		config_set_read_fw_idx $i
 		# Verify the contents are what we expect.
@@ -194,6 +207,18 @@ test_batched_request_firmware_nofile()
 	echo "OK"
 }
 
+test_batched_request_firmware_into_buf_nofile()
+{
+	echo -n "Batched request_firmware_into_buf() nofile try #$1: "
+	config_reset
+	config_set_name nope-test-firmware.bin
+	config_set_into_buf
+	config_trigger_sync
+	read_firmwares_expect_nofile
+	release_all_firmware
+	echo "OK"
+}
+
 test_batched_request_firmware_direct_nofile()
 {
 	echo -n "Batched request_firmware_direct() nofile try #$1: "
@@ -259,6 +284,18 @@ test_batched_request_firmware()
 	echo "OK"
 }
 
+test_batched_request_firmware_into_buf()
+{
+	echo -n "Batched request_firmware_into_buf() $2 try #$1: "
+	config_reset
+	config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME
+	config_set_into_buf
+	config_trigger_sync
+	read_firmwares $2
+	release_all_firmware
+	echo "OK"
+}
+
 test_batched_request_firmware_direct()
 {
 	echo -n "Batched request_firmware_direct() $2 try #$1: "
@@ -307,6 +344,10 @@ for i in $(seq 1 5); do
 	test_batched_request_firmware $i normal
 done
 
+for i in $(seq 1 5); do
+	test_batched_request_firmware_into_buf $i normal
+done
+
 for i in $(seq 1 5); do
 	test_batched_request_firmware_direct $i normal
 done
@@ -327,6 +368,10 @@ for i in $(seq 1 5); do
 	test_batched_request_firmware_nofile $i
 done
 
+for i in $(seq 1 5); do
+	test_batched_request_firmware_into_buf_nofile $i
+done
+
 for i in $(seq 1 5); do
 	test_batched_request_firmware_direct_nofile $i
 done
@@ -350,6 +395,10 @@ for i in $(seq 1 5); do
 	test_batched_request_firmware $i both
 done
 
+for i in $(seq 1 5); do
+	test_batched_request_firmware_into_buf $i both
+done
+
 for i in $(seq 1 5); do
 	test_batched_request_firmware_direct $i both
 done
@@ -370,6 +419,10 @@ for i in $(seq 1 5); do
 	test_batched_request_firmware $i xzonly
 done
 
+for i in $(seq 1 5); do
+	test_batched_request_firmware_into_buf $i xzonly
+done
+
 for i in $(seq 1 5); do
 	test_batched_request_firmware_direct $i xzonly
 done
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index f236cc295450..b879305a766d 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -9,6 +9,12 @@ DIR=/sys/devices/virtual/misc/test_firmware
 PROC_CONFIG="/proc/config.gz"
 TEST_DIR=$(dirname $0)
 
+# We need to load a different file to test request_firmware_into_buf
+# I believe the issue is firmware loaded cached vs. non-cached
+# with same filename is bungled.
+# To reproduce rename this to test-firmware.bin
+TEST_FIRMWARE_INTO_BUF_FILENAME=test-firmware-into-buf.bin
+
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
@@ -108,6 +114,8 @@ setup_tmp_file()
 	FWPATH=$(mktemp -d)
 	FW="$FWPATH/test-firmware.bin"
 	echo "ABCD0123" >"$FW"
+	FW_INTO_BUF="$FWPATH/$TEST_FIRMWARE_INTO_BUF_FILENAME"
+	echo "EFGH4567" >"$FW_INTO_BUF"
 	NAME=$(basename "$FW")
 	if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
 		echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path
@@ -175,6 +183,9 @@ test_finish()
 	if [ -f $FW ]; then
 		rm -f "$FW"
 	fi
+	if [ -f $FW_INTO_BUF ]; then
+		rm -f "$FW_INTO_BUF"
+	fi
 	if [ -d $FWPATH ]; then
 		rm -rf "$FWPATH"
 	fi
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition
  2019-08-16  0:09 [PATCH 0/3] firmware: selftest for request_firmware_into_buf Scott Branden
  2019-08-16  0:09 ` [PATCH 1/3] test_firmware: add support " Scott Branden
  2019-08-16  0:09 ` [PATCH 2/3] selftest: firmware: Add request_firmware_into_buf tests Scott Branden
@ 2019-08-16  0:09 ` Scott Branden
  2019-08-19  5:39   ` Luis Chamberlain
  2 siblings, 1 reply; 16+ messages in thread
From: Scott Branden @ 2019-08-16  0:09 UTC (permalink / raw)
  To: Luis Chamberlain, Greg Kroah-Hartman, Andy Gross, David Brown,
	Alexander Viro, Shuah Khan, bjorn.andersson
  Cc: Rafael J . Wysocki, linux-kernel, linux-arm-msm, linux-fsdevel,
	BCM Kernel Feedback, Olof Johansson, Andrew Morton,
	Dan Carpenter, Colin Ian King, Kees Cook, Takashi Iwai,
	linux-kselftest, Scott Branden

A race condition exists between _request_firmware_prepare checking
if firmware is assigned and firmware_fallback_sysfs creating a sysfs
entry (kernel trace below).  To avoid such condition add a mutex
fw_lock_fallback to protect against such condition.

misc test_firmware: Falling back to sysfs fallback for: nope-test-firmware.bin
sysfs: cannot create duplicate filename '/devices/virtual/misc/test_firmware/nope-test-firmware.bin'
CPU: 4 PID: 2059 Comm: test_firmware-3 Not tainted 5.3.0-rc4 #1
Hardware name: Dell Inc. OptiPlex 7010/0KRC95, BIOS A13 03/25/2013
Call Trace:
 dump_stack+0x67/0x90
 sysfs_warn_dup.cold+0x17/0x24
 sysfs_create_dir_ns+0xb3/0xd0
 kobject_add_internal+0xa6/0x2a0
 kobject_add+0x7e/0xb0
 ? _cond_resched+0x15/0x30
 device_add+0x121/0x670
 firmware_fallback_sysfs+0x15c/0x3c9
 _request_firmware+0x432/0x5a0
 ? devres_find+0x63/0xc0
 request_firmware_into_buf+0x63/0x80
 test_fw_run_batch_request+0x96/0xe0
 kthread+0xfb/0x130
 ? reset_store+0x30/0x30
 ? kthread_park+0x80/0x80
 ret_from_fork+0x3a/0x50
kobject_add_internal failed for nope-test-firmware.bin with -EEXIST, don't try to register things with the same name in the same directory.

Signed-off-by: Scott Branden <scott.branden@broadcom.com>
---
 drivers/base/firmware_loader/main.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index bf44c79beae9..ce9896e3b782 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -88,6 +88,7 @@ static inline struct fw_priv *to_fw_priv(struct kref *ref)
 /* fw_lock could be moved to 'struct fw_sysfs' but since it is just
  * guarding for corner cases a global lock should be OK */
 DEFINE_MUTEX(fw_lock);
+DEFINE_MUTEX(fw_lock_fallback);
 
 static struct firmware_cache fw_cache;
 
@@ -758,6 +759,17 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
 	if (!firmware_p)
 		return -EINVAL;
 
+	/*
+	 * There is a race condition between _request_firmware_prepare checking
+	 * if firmware is assigned and firmware_fallback_sysfs creating sysfs
+	 * entries with duplicate names.
+	 * Yet, with this lock the firmware_test locks up with cache enabled
+	 * and no event used during firmware test.
+	 * This points to some very racy code I don't know how to entirely fix.
+	 */
+	if (opt_flags & FW_OPT_NOCACHE)
+		mutex_lock(&fw_lock_fallback);
+
 	if (!name || name[0] == '\0') {
 		ret = -EINVAL;
 		goto out;
@@ -791,6 +803,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
 		fw = NULL;
 	}
 
+	if (opt_flags & FW_OPT_NOCACHE)
+		mutex_unlock(&fw_lock_fallback);
+
 	*firmware_p = fw;
 	return ret;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] test_firmware: add support for request_firmware_into_buf
  2019-08-16  0:09 ` [PATCH 1/3] test_firmware: add support " Scott Branden
@ 2019-08-19  5:24   ` Luis Chamberlain
  2019-08-19 20:27     ` shuah
  0 siblings, 1 reply; 16+ messages in thread
From: Luis Chamberlain @ 2019-08-19  5:24 UTC (permalink / raw)
  To: Scott Branden
  Cc: Greg Kroah-Hartman, Andy Gross, David Brown, Alexander Viro,
	Shuah Khan, bjorn.andersson, Rafael J . Wysocki, linux-kernel,
	linux-arm-msm, linux-fsdevel, BCM Kernel Feedback,
	Olof Johansson, Andrew Morton, Dan Carpenter, Colin Ian King,
	Kees Cook, Takashi Iwai, linux-kselftest

On Thu, Aug 15, 2019 at 05:09:43PM -0700, Scott Branden wrote:
> Add test config into_buf to allow request_firmware_into_buf to be
> called instead of request_firmware/request_firmware_direct.  The number
> of parameters differ calling request_firmware_into_buf and support
> has not been added to test such api in test_firmware until now.
> 
> Signed-off-by: Scott Branden <scott.branden@broadcom.com>

Thanks for the patch!

Acked-by: Luis Chamberlain <mcgrof@kernel.org>

  Luis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] selftest: firmware: Add request_firmware_into_buf tests
  2019-08-16  0:09 ` [PATCH 2/3] selftest: firmware: Add request_firmware_into_buf tests Scott Branden
@ 2019-08-19  5:24   ` Luis Chamberlain
  2019-08-19 20:27     ` shuah
  0 siblings, 1 reply; 16+ messages in thread
From: Luis Chamberlain @ 2019-08-19  5:24 UTC (permalink / raw)
  To: Scott Branden
  Cc: Greg Kroah-Hartman, Andy Gross, David Brown, Alexander Viro,
	Shuah Khan, bjorn.andersson, Rafael J . Wysocki, linux-kernel,
	linux-arm-msm, linux-fsdevel, BCM Kernel Feedback,
	Olof Johansson, Andrew Morton, Dan Carpenter, Colin Ian King,
	Kees Cook, Takashi Iwai, linux-kselftest

On Thu, Aug 15, 2019 at 05:09:44PM -0700, Scott Branden wrote:
> Add tests cases for checking request_firmware_into_buf api.
> API was introduced into kernel with no testing present previously.
> 
> Signed-off-by: Scott Branden <scott.branden@broadcom.com>

Acked-by: Luis Chamberlain <mcgrof@kernel.org>

  Luis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition
  2019-08-16  0:09 ` [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition Scott Branden
@ 2019-08-19  5:39   ` Luis Chamberlain
  2019-08-19 16:19     ` Scott Branden
  0 siblings, 1 reply; 16+ messages in thread
From: Luis Chamberlain @ 2019-08-19  5:39 UTC (permalink / raw)
  To: Scott Branden
  Cc: Greg Kroah-Hartman, Andy Gross, David Brown, Alexander Viro,
	Shuah Khan, bjorn.andersson, Rafael J . Wysocki, linux-kernel,
	linux-arm-msm, linux-fsdevel, BCM Kernel Feedback,
	Olof Johansson, Andrew Morton, Dan Carpenter, Colin Ian King,
	Kees Cook, Takashi Iwai, linux-kselftest

On Thu, Aug 15, 2019 at 05:09:45PM -0700, Scott Branden wrote:
> A race condition exists between _request_firmware_prepare checking
> if firmware is assigned and firmware_fallback_sysfs creating a sysfs
> entry (kernel trace below).  To avoid such condition add a mutex
> fw_lock_fallback to protect against such condition.

I am not buying this fix, and it seems sloppy. More below.

> misc test_firmware: Falling back to sysfs fallback for: nope-test-firmware.bin

So the fallback kicks in with the file that is not there.

> sysfs: cannot create duplicate filename '/devices/virtual/misc/test_firmware/nope-test-firmware.bin'

And we have a duplicate entry, for the *device* created to allow us to
create a file entry to allow us to copy the file. Your tests had a loop,
so there is actually a race between two entries being created while
one one failed.

> CPU: 4 PID: 2059 Comm: test_firmware-3 Not tainted 5.3.0-rc4 #1
> Hardware name: Dell Inc. OptiPlex 7010/0KRC95, BIOS A13 03/25/2013
> Call Trace:
>  dump_stack+0x67/0x90
>  sysfs_warn_dup.cold+0x17/0x24
>  sysfs_create_dir_ns+0xb3/0xd0
>  kobject_add_internal+0xa6/0x2a0
>  kobject_add+0x7e/0xb0

Note: kobject_add().

>  ? _cond_resched+0x15/0x30
>  device_add+0x121/0x670
>  firmware_fallback_sysfs+0x15c/0x3c9
>  _request_firmware+0x432/0x5a0
>  ? devres_find+0x63/0xc0
>  request_firmware_into_buf+0x63/0x80
>  test_fw_run_batch_request+0x96/0xe0
>  kthread+0xfb/0x130
>  ? reset_store+0x30/0x30
>  ? kthread_park+0x80/0x80
>  ret_from_fork+0x3a/0x50
> kobject_add_internal failed for nope-test-firmware.bin with -EEXIST, don't try to register things with the same name in the same directory.

So above it makes it even clearer, two kobjets with the same name.

> Signed-off-by: Scott Branden <scott.branden@broadcom.com>
> ---
>  drivers/base/firmware_loader/main.c | 15 +++++++++++++++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
> index bf44c79beae9..ce9896e3b782 100644
> --- a/drivers/base/firmware_loader/main.c
> +++ b/drivers/base/firmware_loader/main.c
> @@ -88,6 +88,7 @@ static inline struct fw_priv *to_fw_priv(struct kref *ref)
>  /* fw_lock could be moved to 'struct fw_sysfs' but since it is just
>   * guarding for corner cases a global lock should be OK */
>  DEFINE_MUTEX(fw_lock);
> +DEFINE_MUTEX(fw_lock_fallback);

The reason I don't like this fix is that this mutex is named after ther
fallback interface... but...

>  
>  static struct firmware_cache fw_cache;
>  
> @@ -758,6 +759,17 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
>  	if (!firmware_p)
>  		return -EINVAL;
>  
> +	/*
> +	 * There is a race condition between _request_firmware_prepare checking
> +	 * if firmware is assigned and firmware_fallback_sysfs creating sysfs
> +	 * entries with duplicate names.
> +	 * Yet, with this lock the firmware_test locks up with cache enabled
> +	 * and no event used during firmware test.
> +	 * This points to some very racy code I don't know how to entirely fix.
> +	 */
> +	if (opt_flags & FW_OPT_NOCACHE)
> +		mutex_lock(&fw_lock_fallback);

Whoa.. What does no-cache have anything to do with the fallback interface
other than the fact we enable this feature for the fallback interface?
We don't need to penalize non-fallback users who *also* may want to
enable the no-cache feature.

So, the fix should be within the boundaries of the creation / deletion
of the kobject, not this nocache feature. Can you please re-evaluate
this code and look for a more compartamentalized solution to the
fallback code only?

  Luis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition
  2019-08-19  5:39   ` Luis Chamberlain
@ 2019-08-19 16:19     ` Scott Branden
  2019-08-20  1:26       ` Luis Chamberlain
  0 siblings, 1 reply; 16+ messages in thread
From: Scott Branden @ 2019-08-19 16:19 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: Greg Kroah-Hartman, Andy Gross, David Brown, Alexander Viro,
	Shuah Khan, bjorn.andersson, Rafael J . Wysocki, linux-kernel,
	linux-arm-msm, linux-fsdevel, BCM Kernel Feedback,
	Olof Johansson, Andrew Morton, Dan Carpenter, Colin Ian King,
	Kees Cook, Takashi Iwai, linux-kselftest

Hi Luis,

Thanks for the review.

I did not think this patch would be the final solution either

as indicated in the original cover letter and code comment.

Some comments inline.

On 2019-08-18 10:39 p.m., Luis Chamberlain wrote:

> On Thu, Aug 15, 2019 at 05:09:45PM -0700, Scott Branden wrote:
>> A race condition exists between _request_firmware_prepare checking
>> if firmware is assigned and firmware_fallback_sysfs creating a sysfs
>> entry (kernel trace below).  To avoid such condition add a mutex
>> fw_lock_fallback to protect against such condition.
> I am not buying this fix, and it seems sloppy. More below.
>
>> misc test_firmware: Falling back to sysfs fallback for: nope-test-firmware.bin
> So the fallback kicks in with the file that is not there.
>
>> sysfs: cannot create duplicate filename '/devices/virtual/misc/test_firmware/nope-test-firmware.bin'
> And we have a duplicate entry, for the *device* created to allow us to
> create a file entry to allow us to copy the file. Your tests had a loop,
> so there is actually a race between two entries being created while
> one one failed.
>
>> CPU: 4 PID: 2059 Comm: test_firmware-3 Not tainted 5.3.0-rc4 #1
>> Hardware name: Dell Inc. OptiPlex 7010/0KRC95, BIOS A13 03/25/2013
>> Call Trace:
>>   dump_stack+0x67/0x90
>>   sysfs_warn_dup.cold+0x17/0x24
>>   sysfs_create_dir_ns+0xb3/0xd0
>>   kobject_add_internal+0xa6/0x2a0
>>   kobject_add+0x7e/0xb0
> Note: kobject_add().
>
>>   ? _cond_resched+0x15/0x30
>>   device_add+0x121/0x670
>>   firmware_fallback_sysfs+0x15c/0x3c9
>>   _request_firmware+0x432/0x5a0
>>   ? devres_find+0x63/0xc0
>>   request_firmware_into_buf+0x63/0x80
>>   test_fw_run_batch_request+0x96/0xe0
>>   kthread+0xfb/0x130
>>   ? reset_store+0x30/0x30
>>   ? kthread_park+0x80/0x80
>>   ret_from_fork+0x3a/0x50
>> kobject_add_internal failed for nope-test-firmware.bin with -EEXIST, don't try to register things with the same name in the same directory.
> So above it makes it even clearer, two kobjets with the same name.
>
>> Signed-off-by: Scott Branden <scott.branden@broadcom.com>
>> ---
>>   drivers/base/firmware_loader/main.c | 15 +++++++++++++++
>>   1 file changed, 15 insertions(+)
>>
>> diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
>> index bf44c79beae9..ce9896e3b782 100644
>> --- a/drivers/base/firmware_loader/main.c
>> +++ b/drivers/base/firmware_loader/main.c
>> @@ -88,6 +88,7 @@ static inline struct fw_priv *to_fw_priv(struct kref *ref)
>>   /* fw_lock could be moved to 'struct fw_sysfs' but since it is just
>>    * guarding for corner cases a global lock should be OK */
>>   DEFINE_MUTEX(fw_lock);
>> +DEFINE_MUTEX(fw_lock_fallback);
> The reason I don't like this fix is that this mutex is named after ther
> fallback interface... but...
>
>>   
>>   static struct firmware_cache fw_cache;
>>   
>> @@ -758,6 +759,17 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
>>   	if (!firmware_p)
>>   		return -EINVAL;
>>   
>> +	/*
>> +	 * There is a race condition between _request_firmware_prepare checking
>> +	 * if firmware is assigned and firmware_fallback_sysfs creating sysfs
>> +	 * entries with duplicate names.
>> +	 * Yet, with this lock the firmware_test locks up with cache enabled
>> +	 * and no event used during firmware test.
>> +	 * This points to some very racy code I don't know how to entirely fix.
>> +	 */
>> +	if (opt_flags & FW_OPT_NOCACHE)
>> +		mutex_lock(&fw_lock_fallback);
> Whoa.. What does no-cache have anything to do with the fallback interface
> other than the fact we enable this feature for the fallback interface?
> We don't need to penalize non-fallback users who *also* may want to
> enable the no-cache feature.
>
> So, the fix should be within the boundaries of the creation / deletion
> of the kobject, not this nocache feature. Can you please re-evaluate
> this code and look for a more compartamentalized solution to the
> fallback code only?

To be honest, I find the entire firmware code sloppy.  I don't think the 
cache/no-cache feature is

implemented or tested properly nor fallback to begin with.  I'm not 
claiming this patch is the final

solution and indicated such in the cover letter and the comment above.

I hope there is someone more familiar with this code to comment further 
and come up with a proper solution.


I have found numerous issues and race conditions with the firmware code 
(I simply added a test).

1) Try loading the same valid firmware using no-cache once it has 
already been loaded with cache.

It won't work, which is why I had to use a different filename in the 
test for request_firmware_into_buf.

2) Try removing the "if (opt_flags & FW_OPT_NOCACHE)" in my patch and 
always call the mutex.

The firmware test will lock up during a "no uevent" test.  I am not 
familiar with the code to

know why such is true and what issue this exposes in the code.

3) I have a driver that uses request_firmware_into_buf and have multiple 
instances of the driver

loading the same firmware in parallel.  Some of the data is not read 
correctly in each instance.

I haven't yet to reproduce this issue with the firmware test but 
currently have a mutex around the entire

call to request_firmware_into_buf in our driver.


Perhaps it is better at this point to add a mutex in 
request_firmware_into_buf to make is entirely safe?

(Perhaps even with every request_firmware functions as none seems to be 
tested properly.)

Or, add a new function called safe_request_firmware_into_buf with such 
mutex to protect the function.

The current racey request_firmware functions could then be left alone 
and those who want reliable

firmware loading can use the safe calls?

>
>    Luis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] test_firmware: add support for request_firmware_into_buf
  2019-08-19  5:24   ` Luis Chamberlain
@ 2019-08-19 20:27     ` shuah
  0 siblings, 0 replies; 16+ messages in thread
From: shuah @ 2019-08-19 20:27 UTC (permalink / raw)
  To: Luis Chamberlain, Scott Branden
  Cc: Greg Kroah-Hartman, Andy Gross, David Brown, Alexander Viro,
	bjorn.andersson, Rafael J . Wysocki, linux-kernel, linux-arm-msm,
	linux-fsdevel, BCM Kernel Feedback, Olof Johansson,
	Andrew Morton, Dan Carpenter, Colin Ian King, Kees Cook,
	Takashi Iwai, linux-kselftest, shuah

On 8/18/19 11:24 PM, Luis Chamberlain wrote:
> On Thu, Aug 15, 2019 at 05:09:43PM -0700, Scott Branden wrote:
>> Add test config into_buf to allow request_firmware_into_buf to be
>> called instead of request_firmware/request_firmware_direct.  The number
>> of parameters differ calling request_firmware_into_buf and support
>> has not been added to test such api in test_firmware until now.
>>
>> Signed-off-by: Scott Branden <scott.branden@broadcom.com>
> 
> Thanks for the patch!
> 
> Acked-by: Luis Chamberlain <mcgrof@kernel.org>
> 
>    Luis
> 

Greg! Pls let me know if you would like me to take this
throough my tree. If not,

Acked-by: Shuah Khan <skhan@linuxfoundation.org>


thanks,
-- Shuah

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] selftest: firmware: Add request_firmware_into_buf tests
  2019-08-19  5:24   ` Luis Chamberlain
@ 2019-08-19 20:27     ` shuah
  0 siblings, 0 replies; 16+ messages in thread
From: shuah @ 2019-08-19 20:27 UTC (permalink / raw)
  To: Luis Chamberlain, Scott Branden, Greg Kroah-Hartman
  Cc: Andy Gross, David Brown, Alexander Viro, bjorn.andersson,
	Rafael J . Wysocki, linux-kernel, linux-arm-msm, linux-fsdevel,
	BCM Kernel Feedback, Olof Johansson, Andrew Morton,
	Dan Carpenter, Colin Ian King, Kees Cook, Takashi Iwai,
	linux-kselftest, shuah

On 8/18/19 11:24 PM, Luis Chamberlain wrote:
> On Thu, Aug 15, 2019 at 05:09:44PM -0700, Scott Branden wrote:
>> Add tests cases for checking request_firmware_into_buf api.
>> API was introduced into kernel with no testing present previously.
>>
>> Signed-off-by: Scott Branden <scott.branden@broadcom.com>
> 
> Acked-by: Luis Chamberlain <mcgrof@kernel.org>
> 
>    Luis
> 

Greg! Pls let me know if you would like me to take this
throough my tree. If not,

Acked-by: Shuah Khan <skhan@linuxfoundation.org>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition
  2019-08-19 16:19     ` Scott Branden
@ 2019-08-20  1:26       ` Luis Chamberlain
  2019-08-20 15:54         ` Scott Branden
  2019-08-23 10:31         ` Takashi Iwai
  0 siblings, 2 replies; 16+ messages in thread
From: Luis Chamberlain @ 2019-08-20  1:26 UTC (permalink / raw)
  To: Scott Branden
  Cc: Greg Kroah-Hartman, Andy Gross, David Brown, Alexander Viro,
	Shuah Khan, bjorn.andersson, Rafael J . Wysocki, linux-kernel,
	linux-arm-msm, linux-fsdevel, BCM Kernel Feedback,
	Olof Johansson, Andrew Morton, Dan Carpenter, Colin Ian King,
	Kees Cook, Takashi Iwai, linux-kselftest

On Mon, Aug 19, 2019 at 09:19:51AM -0700, Scott Branden wrote:
> To be honest, I find the entire firmware code sloppy.

And that is after years of cleanup on my part. Try going back to v4.1
for instance, check the code out then for an incredible horrific sight :)

> I don't think the cache/no-cache feature is
> implemented or tested properly nor fallback to begin with.

I'm in total agreement! I *know* there must be holes in that code, and I
acknowledge a few possible gotchas on the commit logs. For instance, I
acknowledged that the firmware cache had a secondary purpose which was
not well documented or understood through commit e44565f62a720
("firmware: fix batched requests - wake all waiters"). The firmware
cache allows for batching requests and sharing the same original request
for multiple consecutive requests which *race against each other*.
That's when I started having my doubts about the architecture of the
firmware cache mechanism, it seemed too complex and perhaps overkill
and considered killing it.

As I noted in that commit, the firmware cache is used for:
    
1) Addressing races with file lookups during the suspend/resume cycle by
keeping firmware in memory during the suspend/resume cycle
	           
2) Batched requests for the same file rely only on work from the first
file lookup, which keeps the firmware in memory until the last
release_firmware() is called

Also worth quoting from that commit as well:

"Batched requests *only* take effect if secondary requests come in
prior to the first user calling release_firmware(). The devres name used
for the internal firmware cache is used as a hint other pending requests
are ongoing, the firmware buffer data is kept in memory until the last
user of the buffer calls release_firmware(), therefore serializing
requests and delaying the release until all requests are done."

Later we discovered that the firmware cache had a serious security issue
since its inception through commit 422b3db2a503 ("firmware: Fix security
issue with request_firmware_into_buf()"). Granted, exploiting this would
require the ability to load kernel code, so the vector of exploitation
is rather small.

The cache stuff cannot be removed as it *at least* resolves the fw
suspend stuff, but still, this can likely use a revisit in rachitecture
long term. The second implicit use case for batched requests however
seems complex and not sure if its worth to maintain. I'll note that
at least some drivers *do* their own firmware caching, iwlwifi, is one,
so there is an example there to allow drivers to say "I actually don't
need caching" for the future.

If you're volunteering to cleaning / testing the cache stuff I highly
welcome that. That and the fallback stuff has been needing testing for
years. Someoone was working on patches on the test case for cache stuff
a while ago, from Intel, but they disappeared.

> I'm not claiming this patch is the final
> solution and indicated such in the cover letter and the comment above.

I missed that sorry.

> I hope there is someone more familiar with this code to comment further and
> come up with a proper solution.

Alright, I'll dig in and take a look, and propose an alternative.

> I have found numerous issues and race conditions with the firmware code (I
> simply added a test).

That is nothing compared to the amount of fixes I have found and
actually fixed too, the code was a nightmare before I took on
maintenance.

> 1) Try loading the same valid firmware using no-cache once it has already
> been loaded with cache.

:) 

> It won't work, which is why I had to use a different filename in the test
> for request_firmware_into_buf.

Alright, I'll go try to fix this. Thanks for the report.

> 2) Try removing the "if (opt_flags & FW_OPT_NOCACHE)" in my patch and always
> call the mutex.
> 
> The firmware test will lock up during a "no uevent" test.  I am not familiar
> with the code to
> 
> know why such is true and what issue this exposes in the code.

I hinted in my review of the oops what the issue was.

> 3) I have a driver that uses request_firmware_into_buf and have multiple
> instances of the driver

Cool, is the driver upstream?

> loading the same firmware in parallel.  Some of the data is not read
> correctly in each instance.

Makes perfect sense considering the lack of testing I noted.

> I haven't yet to reproduce this issue with the firmware test 

That's because of batched firmware request mechanism.

> but currently
> have a mutex around the entire
> call to request_firmware_into_buf in our driver.

I will take a look at this now.

> Perhaps it is better at this point to add a mutex in
> request_firmware_into_buf to make is entirely safe?

No, that is not sufficient, although it would also solve the
issue.

> (Perhaps even with every request_firmware functions as none seems to be
> tested properly.)

No, you are incorrect. The other firmware API calls *have* been
elaborately tested. The firmware cache stuff *is a mess* however,
since we *use and support it*, I've done my best to salvage it and
document it.

I'll take a look at this and propose an alternative solution.

  Luis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition
  2019-08-20  1:26       ` Luis Chamberlain
@ 2019-08-20 15:54         ` Scott Branden
  2019-08-23 10:31         ` Takashi Iwai
  1 sibling, 0 replies; 16+ messages in thread
From: Scott Branden @ 2019-08-20 15:54 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: Greg Kroah-Hartman, Andy Gross, David Brown, Alexander Viro,
	Shuah Khan, bjorn.andersson, Rafael J . Wysocki, linux-kernel,
	linux-arm-msm, linux-fsdevel, BCM Kernel Feedback,
	Olof Johansson, Andrew Morton, Dan Carpenter, Colin Ian King,
	Kees Cook, Takashi Iwai, linux-kselftest

Hi Luis,

I'm glad you are a subject expert in this area.

Some more comments inline.


On 2019-08-19 6:26 p.m., Luis Chamberlain wrote:
> On Mon, Aug 19, 2019 at 09:19:51AM -0700, Scott Branden wrote:
>> To be honest, I find the entire firmware code sloppy.
> And that is after years of cleanup on my part. Try going back to v4.1
> for instance, check the code out then for an incredible horrific sight :)
>
>> I don't think the cache/no-cache feature is
>> implemented or tested properly nor fallback to begin with.
> I'm in total agreement! I *know* there must be holes in that code, and I
> acknowledge a few possible gotchas on the commit logs. For instance, I
> acknowledged that the firmware cache had a secondary purpose which was
> not well documented or understood through commit e44565f62a720
> ("firmware: fix batched requests - wake all waiters"). The firmware
> cache allows for batching requests and sharing the same original request
> for multiple consecutive requests which *race against each other*.
> That's when I started having my doubts about the architecture of the
> firmware cache mechanism, it seemed too complex and perhaps overkill
> and considered killing it.

Great (kill it!).  I have no need for cached or batched requests.

The would remove a lot of problems.

>
> As I noted in that commit, the firmware cache is used for:
>      
> 1) Addressing races with file lookups during the suspend/resume cycle by
> keeping firmware in memory during the suspend/resume cycle
> 	
> 2) Batched requests for the same file rely only on work from the first
> file lookup, which keeps the firmware in memory until the last
> release_firmware() is called
>
> Also worth quoting from that commit as well:
>
> "Batched requests *only* take effect if secondary requests come in
> prior to the first user calling release_firmware(). The devres name used
> for the internal firmware cache is used as a hint other pending requests
> are ongoing, the firmware buffer data is kept in memory until the last
> user of the buffer calls release_firmware(), therefore serializing
> requests and delaying the release until all requests are done."
>
> Later we discovered that the firmware cache had a serious security issue
> since its inception through commit 422b3db2a503 ("firmware: Fix security
> issue with request_firmware_into_buf()"). Granted, exploiting this would
> require the ability to load kernel code, so the vector of exploitation
> is rather small.
>
> The cache stuff cannot be removed as it *at least* resolves the fw
> suspend stuff, but still, this can likely use a revisit in rachitecture
> long term. The second implicit use case for batched requests however
> seems complex and not sure if its worth to maintain. I'll note that
> at least some drivers *do* their own firmware caching, iwlwifi, is one,
> so there is an example there to allow drivers to say "I actually don't
> need caching" for the future.
>
> If you're volunteering to cleaning / testing the cache stuff I highly
> welcome that.

I would only volunteer to remove it, not test or support it.

>   That and the fallback stuff has been needing testing for
> years. Someoone was working on patches on the test case for cache stuff
> a while ago, from Intel, but they disappeared.
Again, I would only volunteer to remove the fallback mechanism to remove 
added race conditions.
>> I'm not claiming this patch is the final
>> solution and indicated such in the cover letter and the comment above.
> I missed that sorry.
>
>> I hope there is someone more familiar with this code to comment further and
>> come up with a proper solution.
> Alright, I'll dig in and take a look, and propose an alternative.
>
>> I have found numerous issues and race conditions with the firmware code (I
>> simply added a test).
> That is nothing compared to the amount of fixes I have found and
> actually fixed too, the code was a nightmare before I took on
> maintenance.
>
>> 1) Try loading the same valid firmware using no-cache once it has already
>> been loaded with cache.
> :)
>
>> It won't work, which is why I had to use a different filename in the test
>> for request_firmware_into_buf.
> Alright, I'll go try to fix this. Thanks for the report.

I think it's a minor issue compared to the race conditions present.

In reality I don't think anyone will load the same firmware using cache vs.

no-cache.

It's just something I stumbled upon when adding the test case and then 
had to avoid.

>
>> 2) Try removing the "if (opt_flags & FW_OPT_NOCACHE)" in my patch and always
>> call the mutex.
>>
>> The firmware test will lock up during a "no uevent" test.  I am not familiar
>> with the code to
>>
>> know why such is true and what issue this exposes in the code.
> I hinted in my review of the oops what the issue was.

I don't know if it's the same bug for the "no uevent" test case though?  
The test

just hangs and the kernel oops is not present.  It might be exposing another

underlying issue with the request_firmware code.

>
>> 3) I have a driver that uses request_firmware_into_buf and have multiple
>> instances of the driver
> Cool, is the driver upstream?

I'm working on cleaning up the driver right now to upstream.

First thing is I need the request_firmware_into_buf tests accepted upstream.

Then I can add my enhancement to request_firmware_into_buf to partial 
read the file (previous sent out but needed test case).

In order to do so Greg K-H required a test case for this but even the 
current API had no test.

In that patch series I can then add the new driver which requires my 
enhanced request_firmware_into_buf.

>
>> loading the same firmware in parallel.  Some of the data is not read
>> correctly in each instance.
> Makes perfect sense considering the lack of testing I noted.
>
>> I haven't yet to reproduce this issue with the firmware test
> That's because of batched firmware request mechanism.

Is there a way to not use the batch firmware request mechanism when 
calling request_firmware_into_buf

to see if the problem doesn't happen?

>
>> but currently
>> have a mutex around the entire
>> call to request_firmware_into_buf in our driver.
> I will take a look at this now.
>
>> Perhaps it is better at this point to add a mutex in
>> request_firmware_into_buf to make is entirely safe?
> No, that is not sufficient, although it would also solve the
> issue.

I don't have another solution with all the other mechanisms in

play in the current firmware code.  For now I'll leave the mutex

in the driver I'm upstreaming so it works reliably.

>
>> (Perhaps even with every request_firmware functions as none seems to be
>> tested properly.)
> No, you are incorrect. The other firmware API calls *have* been
> elaborately tested. The firmware cache stuff *is a mess* however,
> since we *use and support it*, I've done my best to salvage it and
> document it.

OK, I don't use any of the other mechanisms right now.

All I require is request_firmware_into_buf.

>
> I'll take a look at this and propose an alternative solution.
>
>    Luis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition
  2019-08-20  1:26       ` Luis Chamberlain
  2019-08-20 15:54         ` Scott Branden
@ 2019-08-23 10:31         ` Takashi Iwai
  2019-08-23 15:43           ` Luis Chamberlain
  2019-08-23 19:48           ` Scott Branden
  1 sibling, 2 replies; 16+ messages in thread
From: Takashi Iwai @ 2019-08-23 10:31 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: Scott Branden, Greg Kroah-Hartman, Andy Gross, David Brown,
	Alexander Viro, Shuah Khan, bjorn.andersson, Rafael J . Wysocki,
	linux-kernel, linux-arm-msm, linux-fsdevel, BCM Kernel Feedback,
	Olof Johansson, Andrew Morton, Dan Carpenter, Colin Ian King,
	Kees Cook, linux-kselftest

On Tue, 20 Aug 2019 03:26:55 +0200,
Luis Chamberlain wrote:
> 
> On Mon, Aug 19, 2019 at 09:19:51AM -0700, Scott Branden wrote:
> > To be honest, I find the entire firmware code sloppy.
> 
> And that is after years of cleanup on my part. Try going back to v4.1
> for instance, check the code out then for an incredible horrific sight :)
> 
> > I don't think the cache/no-cache feature is
> > implemented or tested properly nor fallback to begin with.
> 
> I'm in total agreement! I *know* there must be holes in that code, and I
> acknowledge a few possible gotchas on the commit logs. For instance, I
> acknowledged that the firmware cache had a secondary purpose which was
> not well documented or understood through commit e44565f62a720
> ("firmware: fix batched requests - wake all waiters"). The firmware
> cache allows for batching requests and sharing the same original request
> for multiple consecutive requests which *race against each other*.
> That's when I started having my doubts about the architecture of the
> firmware cache mechanism, it seemed too complex and perhaps overkill
> and considered killing it.
>
> As I noted in that commit, the firmware cache is used for:
>     
> 1) Addressing races with file lookups during the suspend/resume cycle by
> keeping firmware in memory during the suspend/resume cycle

Right, this one is the significant need.  And currently the fw loader
core takes a complicated approach as:

- Store firmware name string in devres for each firmware
- Upon suspend, loop over all devices and associated firmware names,
  create a list, then loop over the list for loading the firmware
  files before sleeping.
- Upon resume, release the firmware files that have been loaded at
  suspend in a delayed manner.

So we have different level of lists there, which make the code quite
hard to understand.

The reason of the above approach is because we didn't know which
device driver would need the firmware at resume, so basically we do
cache for all devices.  Maybe it'd better to look for the exact
drivers that require the firmware at resume, and handle only such
ones instead of catch-all approach.

OTOH, I find it's not bad to keep the loaded firmware file names per
device and expose e.g. via sysfs.  Currently we have no way to look at
which firmware files have been loaded afterwards; the only way to see
it is enabling some debug option and read through kernel messages.
(FWIW, I stumbled on this problem since I wanted to provide the split
 kernel-firmware package on SUSE distro, and let the installer decide
 which package to pick up.)

> 2) Batched requests for the same file rely only on work from the first
> file lookup, which keeps the firmware in memory until the last
> release_firmware() is called

IMO, this feature can be omitted if it makes things too complicated.
I guess it were added because we handle the fw caching in anyway.
There isn't a big need for this due to performance.  If the
performance matters, such driver should re-use its own firmware by
itself.

(snip)
> > 3) I have a driver that uses request_firmware_into_buf and have multiple
> > instances of the driver
> 
> Cool, is the driver upstream?
> 
> > loading the same firmware in parallel.  Some of the data is not read
> > correctly in each instance.
> 
> Makes perfect sense considering the lack of testing I noted.
> 
> > I haven't yet to reproduce this issue with the firmware test 
> 
> That's because of batched firmware request mechanism.
> 
> > but currently
> > have a mutex around the entire
> > call to request_firmware_into_buf in our driver.
> 
> I will take a look at this now.
> 
> > Perhaps it is better at this point to add a mutex in
> > request_firmware_into_buf to make is entirely safe?
> 
> No, that is not sufficient, although it would also solve the
> issue.

The mutex for request_firmware_into_buf() doesn't sound like a good
approach.  Basically the direct fw loading should work in parallel
for the same firmware file.  We might have some bug wrt cache stuff,
but it can be fixed properly.

However, the fw loading in fallback mode can't run in parallel for
the same file, per design -- no matter whether cached or not.
So, if any, we'd need put a mutex around the fallback loader code.
And, the mutex should be rather per device, not a global one.

Or we may trick it by appending the second parallel caller into the
same wait queue, but the code will be more complex, so I don't think
worth for it.


thanks,

Takashi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition
  2019-08-23 10:31         ` Takashi Iwai
@ 2019-08-23 15:43           ` Luis Chamberlain
  2019-08-23 19:56             ` Scott Branden
  2019-08-23 19:48           ` Scott Branden
  1 sibling, 1 reply; 16+ messages in thread
From: Luis Chamberlain @ 2019-08-23 15:43 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Scott Branden, Greg Kroah-Hartman, Andy Gross, David Brown,
	Alexander Viro, Shuah Khan, bjorn.andersson, Rafael J . Wysocki,
	linux-kernel, linux-arm-msm, linux-fsdevel, BCM Kernel Feedback,
	Olof Johansson, Andrew Morton, Dan Carpenter, Colin Ian King,
	Kees Cook, linux-kselftest

On Fri, Aug 23, 2019 at 12:31:40PM +0200, Takashi Iwai wrote:
> So, if any, we'd need put a mutex around the fallback loader code.
> And, the mutex should be rather per device, not a global one.
> 
> Or we may trick it by appending the second parallel caller into the
> same wait queue, but the code will be more complex, so I don't think
> worth for it.

For now I'm thinking of a new API with a devname prefix to the driver.
I'll have to test if that works, but not sure if I'll get to it today
before my vacation starts (today).

  Luis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition
  2019-08-23 10:31         ` Takashi Iwai
  2019-08-23 15:43           ` Luis Chamberlain
@ 2019-08-23 19:48           ` Scott Branden
  1 sibling, 0 replies; 16+ messages in thread
From: Scott Branden @ 2019-08-23 19:48 UTC (permalink / raw)
  To: Takashi Iwai, Luis Chamberlain
  Cc: Greg Kroah-Hartman, Andy Gross, David Brown, Alexander Viro,
	Shuah Khan, bjorn.andersson, Rafael J . Wysocki, linux-kernel,
	linux-arm-msm, linux-fsdevel, BCM Kernel Feedback,
	Olof Johansson, Andrew Morton, Dan Carpenter, Colin Ian King,
	Kees Cook, linux-kselftest


On 2019-08-23 3:31 a.m., Takashi Iwai wrote:
> On Tue, 20 Aug 2019 03:26:55 +0200,
> Luis Chamberlain wrote:
>> On Mon, Aug 19, 2019 at 09:19:51AM -0700, Scott Branden wrote:
>>> To be honest, I find the entire firmware code sloppy.
>> And that is after years of cleanup on my part. Try going back to v4.1
>> for instance, check the code out then for an incredible horrific sight :)
>>
>>> I don't think the cache/no-cache feature is
>>> implemented or tested properly nor fallback to begin with.
>> I'm in total agreement! I *know* there must be holes in that code, and I
>> acknowledge a few possible gotchas on the commit logs. For instance, I
>> acknowledged that the firmware cache had a secondary purpose which was
>> not well documented or understood through commit e44565f62a720
>> ("firmware: fix batched requests - wake all waiters"). The firmware
>> cache allows for batching requests and sharing the same original request
>> for multiple consecutive requests which *race against each other*.
>> That's when I started having my doubts about the architecture of the
>> firmware cache mechanism, it seemed too complex and perhaps overkill
>> and considered killing it.
>>
>> As I noted in that commit, the firmware cache is used for:
>>      
>> 1) Addressing races with file lookups during the suspend/resume cycle by
>> keeping firmware in memory during the suspend/resume cycle
> Right, this one is the significant need.  And currently the fw loader
> core takes a complicated approach as:
>
> - Store firmware name string in devres for each firmware
> - Upon suspend, loop over all devices and associated firmware names,
>    create a list, then loop over the list for loading the firmware
>    files before sleeping.
> - Upon resume, release the firmware files that have been loaded at
>    suspend in a delayed manner.
>
> So we have different level of lists there, which make the code quite
> hard to understand.
>
> The reason of the above approach is because we didn't know which
> device driver would need the firmware at resume, so basically we do
> cache for all devices.  Maybe it'd better to look for the exact
> drivers that require the firmware at resume, and handle only such
> ones instead of catch-all approach.

Yes, that would be better.  Or remove this cache mechanism entirely

and provide some helper functions of some sort to the limited

drivers that actually require such mechanism.

>
> OTOH, I find it's not bad to keep the loaded firmware file names per
> device and expose e.g. via sysfs.  Currently we have no way to look at
> which firmware files have been loaded afterwards; the only way to see
> it is enabling some debug option and read through kernel messages.
> (FWIW, I stumbled on this problem since I wanted to provide the split
>   kernel-firmware package on SUSE distro, and let the installer decide
>   which package to pick up.)
>
>> 2) Batched requests for the same file rely only on work from the first
>> file lookup, which keeps the firmware in memory until the last
>> release_firmware() is called
> IMO, this feature can be omitted if it makes things too complicated.
> I guess it were added because we handle the fw caching in anyway.
> There isn't a big need for this due to performance.  If the
> performance matters, such driver should re-use its own firmware by
> itself.

Any simplifications would be appreciated.

I sure don't understand what the code is trying to do.

>
> (snip)
>>> 3) I have a driver that uses request_firmware_into_buf and have multiple
>>> instances of the driver
>> Cool, is the driver upstream?
>>
>>> loading the same firmware in parallel.  Some of the data is not read
>>> correctly in each instance.
>> Makes perfect sense considering the lack of testing I noted.
>>
>>> I haven't yet to reproduce this issue with the firmware test
>> That's because of batched firmware request mechanism.
>>
>>> but currently
>>> have a mutex around the entire
>>> call to request_firmware_into_buf in our driver.
>> I will take a look at this now.
>>
>>> Perhaps it is better at this point to add a mutex in
>>> request_firmware_into_buf to make is entirely safe?
>> No, that is not sufficient, although it would also solve the
>> issue.
> The mutex for request_firmware_into_buf() doesn't sound like a good
> approach.  Basically the direct fw loading should work in parallel
> for the same firmware file.  We might have some bug wrt cache stuff,
> but it can be fixed properly.
>
> However, the fw loading in fallback mode can't run in parallel for
> the same file, per design -- no matter whether cached or not.
> So, if any, we'd need put a mutex around the fallback loader code.
> And, the mutex should be rather per device, not a global one.

Sure, whatever solves the issue.  All I wish to do is read

part of file into a buffer specified.

>
> Or we may trick it by appending the second parallel caller into the
> same wait queue, but the code will be more complex, so I don't think
> worth for it.
>
>
> thanks,
>
> Takashi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition
  2019-08-23 15:43           ` Luis Chamberlain
@ 2019-08-23 19:56             ` Scott Branden
  0 siblings, 0 replies; 16+ messages in thread
From: Scott Branden @ 2019-08-23 19:56 UTC (permalink / raw)
  To: Luis Chamberlain, Takashi Iwai
  Cc: Greg Kroah-Hartman, Andy Gross, David Brown, Alexander Viro,
	Shuah Khan, bjorn.andersson, Rafael J . Wysocki, linux-kernel,
	linux-arm-msm, linux-fsdevel, BCM Kernel Feedback,
	Olof Johansson, Andrew Morton, Dan Carpenter, Colin Ian King,
	Kees Cook, linux-kselftest


On 2019-08-23 8:43 a.m., Luis Chamberlain wrote:
> On Fri, Aug 23, 2019 at 12:31:40PM +0200, Takashi Iwai wrote:
>> So, if any, we'd need put a mutex around the fallback loader code.
>> And, the mutex should be rather per device, not a global one.
>>
>> Or we may trick it by appending the second parallel caller into the
>> same wait queue, but the code will be more complex, so I don't think
>> worth for it.
> For now I'm thinking of a new API with a devname prefix to the driver.
> I'll have to test if that works, but not sure if I'll get to it today
> before my vacation starts (today).
Have a good vacation.
>
>    Luis

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2019-08-23 19:56 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-16  0:09 [PATCH 0/3] firmware: selftest for request_firmware_into_buf Scott Branden
2019-08-16  0:09 ` [PATCH 1/3] test_firmware: add support " Scott Branden
2019-08-19  5:24   ` Luis Chamberlain
2019-08-19 20:27     ` shuah
2019-08-16  0:09 ` [PATCH 2/3] selftest: firmware: Add request_firmware_into_buf tests Scott Branden
2019-08-19  5:24   ` Luis Chamberlain
2019-08-19 20:27     ` shuah
2019-08-16  0:09 ` [PATCH 3/3] firmware: add mutex fw_lock_fallback for race condition Scott Branden
2019-08-19  5:39   ` Luis Chamberlain
2019-08-19 16:19     ` Scott Branden
2019-08-20  1:26       ` Luis Chamberlain
2019-08-20 15:54         ` Scott Branden
2019-08-23 10:31         ` Takashi Iwai
2019-08-23 15:43           ` Luis Chamberlain
2019-08-23 19:56             ` Scott Branden
2019-08-23 19:48           ` Scott Branden

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).