* [PATCH 1/2] test_firmware: add suspend support to test buggy drivers
2021-04-16 23:58 [PATCH 0/2] fs: provide a stop gap fix for buggy resume firmware API calls Luis Chamberlain
@ 2021-04-16 23:58 ` Luis Chamberlain
2021-04-21 22:23 ` Lukas Middendorf
2021-04-16 23:58 ` [PATCH 2/2] fs/kernel_read_file: use usermodehelper_read_trylock() as a stop gap Luis Chamberlain
1 sibling, 1 reply; 6+ messages in thread
From: Luis Chamberlain @ 2021-04-16 23:58 UTC (permalink / raw)
To: rafael, gregkh, viro, jack, bvanassche, jeyu, ebiederm
Cc: mchehab, keescook, linux-fsdevel, kernel, kexec, linux-kernel,
Luis Chamberlain
Lukas Middendorf reported a situation where a driver's
request_firmware() call on resume caused a stall. Upon
inspection the issue was that the driver in question was
calling request_firmware() only on resume, and since we
currently do not have a generic kernel VFS freeze / thaw
solution in place we are allowing races for filesystems
to race against the disappearance of a block device, and
this is presently an issue which can lead to a stall.
It is difficult to reproduce this unless you have hardware
which mimics this setup. So to test this setup, let's just
implement support for doing these wacky things. This lets
us test that corner case easily as follows.
echo N > /sys/module/printk/parameters/console_suspend
modprobe test_firmware
echo 1 > /sys/devices/virtual/misc/test_firmware/config_enable_resume_test
systemctl suspend
Then call virsh dompmwakeup guest-id, on the guest and you
can reproduce the issue easily. The issue is reprodicible
with xfs and btrfs, and using a new partition for /lib/firmware
with a files created first as follows:
for n in {1..1000}; do
dd if=/dev/urandom of=/lib/firmware/file$( printf %03d "$n" ).bin bs=1 count=$((RANDOM + 1024 ))
done
Cc: rafael@kernel.org
Cc: jack@suse.cz
Cc: bvanassche@acm.org
Cc: kernel@tuxforce.de
Cc: mchehab@kernel.org
Cc: keescook@chromium.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
lib/test_firmware.c | 136 ++++++++++++++++--
tools/testing/selftests/firmware/fw_lib.sh | 8 +-
.../selftests/firmware/fw_test_resume.sh | 80 +++++++++++
3 files changed, 211 insertions(+), 13 deletions(-)
create mode 100755 tools/testing/selftests/firmware/fw_test_resume.sh
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index b6fe89add9fe..47ba6f4380ab 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -18,6 +18,7 @@
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -35,6 +36,8 @@ MODULE_IMPORT_NS(TEST_FIRMWARE);
static DEFINE_MUTEX(test_fw_mutex);
static const struct firmware *test_firmware;
+static struct platform_device *pdev;
+
struct test_batched_req {
u8 idx;
int rc;
@@ -58,6 +61,9 @@ struct test_batched_req {
* @sync_direct: when the sync trigger is used if this is true
* request_firmware_direct() will be used instead.
* @send_uevent: whether or not to send a uevent for async requests
+ * @enable_resume_test: if @senable_resume is true this will enable a test to
+ * issue a request_firmware() upon resume. This is useful to test resume
+ * after suspend filesystem races.
* @num_requests: number of requests to try per test case. This is trigger
* specific.
* @reqs: stores all requests information
@@ -99,6 +105,7 @@ struct test_config {
bool partial;
bool sync_direct;
bool send_uevent;
+ bool enable_resume_test;
u8 num_requests;
u8 read_fw_idx;
@@ -195,6 +202,7 @@ static int __test_firmware_config_init(void)
test_fw_config->file_offset = 0;
test_fw_config->partial = false;
test_fw_config->sync_direct = false;
+ test_fw_config->enable_resume_test = false;
test_fw_config->req_firmware = request_firmware;
test_fw_config->test_result = 0;
test_fw_config->reqs = NULL;
@@ -275,6 +283,9 @@ static ssize_t config_show(struct device *dev,
len += scnprintf(buf + len, PAGE_SIZE - len,
"sync_direct:\t\t%s\n",
test_fw_config->sync_direct ? "true" : "false");
+ len += scnprintf(buf+len, PAGE_SIZE - len,
+ "enable_resume_test:\t\t%s\n",
+ test_fw_config->enable_resume_test ? "true" : "false");
len += scnprintf(buf + len, PAGE_SIZE - len,
"read_fw_idx:\t%u\n", test_fw_config->read_fw_idx);
@@ -538,6 +549,22 @@ static ssize_t config_sync_direct_show(struct device *dev,
}
static DEVICE_ATTR_RW(config_sync_direct);
+static ssize_t config_enable_resume_test_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ return test_dev_config_update_bool(buf, count,
+ &test_fw_config->enable_resume_test);
+}
+
+static ssize_t config_enable_resume_test_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return test_dev_config_show_bool(buf, test_fw_config->enable_resume_test);
+}
+static DEVICE_ATTR_RW(config_enable_resume_test);
+
static ssize_t config_send_uevent_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -1065,6 +1092,7 @@ static struct attribute *test_dev_attrs[] = {
TEST_FW_DEV_ATTR(config_partial),
TEST_FW_DEV_ATTR(config_sync_direct),
TEST_FW_DEV_ATTR(config_send_uevent),
+ TEST_FW_DEV_ATTR(config_enable_resume_test),
TEST_FW_DEV_ATTR(config_read_fw_idx),
/* These don't use the config at all - they could be ported! */
@@ -1094,6 +1122,81 @@ static struct miscdevice test_fw_misc_device = {
.groups = test_dev_groups,
};
+static int __maybe_unused test_firmware_suspend(struct device *dev)
+{
+ return 0;
+}
+
+
+static int __maybe_unused test_firmware_resume(struct device *dev)
+{
+ int rc;
+
+ if (!test_fw_config->enable_resume_test)
+ return 0;
+
+ pr_info("resume test, loading '%s'\n", test_fw_config->name);
+
+ mutex_lock(&test_fw_mutex);
+ release_firmware(test_firmware);
+ test_firmware = NULL;
+ rc = request_firmware(&test_firmware, test_fw_config->name, dev);
+ if (rc) {
+ mutex_unlock(&test_fw_mutex);
+ pr_info("load of '%s' failed: %d\n", test_fw_config->name, rc);
+ goto out;
+ }
+
+ pr_info("loaded: %zu\n", test_firmware->size);
+ mutex_unlock(&test_fw_mutex);
+ pr_info("resume test, completed successfully\n");
+out:
+ return rc;
+}
+
+static SIMPLE_DEV_PM_OPS(test_dev_pm_ops, test_firmware_suspend, test_firmware_resume);
+
+static int test_firmware_probe(struct platform_device *dev)
+{
+ int rc;
+
+ rc = misc_register(&test_fw_misc_device);
+ if (rc) {
+ kfree(test_fw_config);
+ pr_err("could not register misc device: %d\n", rc);
+ return rc;
+ }
+
+ pr_info("interface ready\n");
+
+ return 0;
+}
+
+static int test_firmware_remove(struct platform_device *dev)
+{
+ mutex_lock(&test_fw_mutex);
+ release_firmware(test_firmware);
+ misc_deregister(&test_fw_misc_device);
+ mutex_unlock(&test_fw_mutex);
+
+ return 0;
+}
+
+static void test_firmware_shutdown(struct platform_device *dev)
+{
+}
+
+static struct platform_driver test_firmware_driver = {
+ .driver = {
+ .name = "test_firmware",
+ .pm = &test_dev_pm_ops,
+ },
+ .probe = test_firmware_probe,
+ .remove = test_firmware_remove,
+ .shutdown = test_firmware_shutdown,
+};
+
+
static int __init test_firmware_init(void)
{
int rc;
@@ -1109,28 +1212,39 @@ static int __init test_firmware_init(void)
return rc;
}
- rc = misc_register(&test_fw_misc_device);
- if (rc) {
- kfree(test_fw_config);
- pr_err("could not register misc device: %d\n", rc);
- return rc;
- }
+ rc = platform_driver_register(&test_firmware_driver);
+ if (rc)
+ goto err_alloc;
+
+ pdev = platform_device_alloc("test_firmware", -1);
+ if (!pdev)
+ goto err_driver_unregister;
- pr_warn("interface ready\n");
+ rc = platform_device_add(pdev);
+ if (rc)
+ goto err_free_device;
return 0;
+
+ err_free_device:
+ platform_device_put(pdev);
+ err_driver_unregister:
+ platform_driver_unregister(&test_firmware_driver);
+ err_alloc:
+ __test_firmware_config_free();
+ kfree(test_fw_config);
+ return rc;
}
module_init(test_firmware_init);
static void __exit test_firmware_exit(void)
{
- mutex_lock(&test_fw_mutex);
- release_firmware(test_firmware);
- misc_deregister(&test_fw_misc_device);
+ platform_device_unregister(pdev);
+ platform_driver_unregister(&test_firmware_driver);
+
__test_firmware_config_free();
kfree(test_fw_config);
- mutex_unlock(&test_fw_mutex);
pr_warn("removed interface\n");
}
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index 5b8c0fedee76..adba33f78cb3 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -119,9 +119,13 @@ setup_tmp_file()
{
FWPATH=$(mktemp -d)
FW="$FWPATH/test-firmware.bin"
- echo "ABCD0123" >"$FW"
+ if [[ "$1" != "--skip-file-creation" ]]; then
+ echo "ABCD0123" >"$FW"
+ fi
FW_INTO_BUF="$FWPATH/$TEST_FIRMWARE_INTO_BUF_FILENAME"
- echo "EFGH4567" >"$FW_INTO_BUF"
+ if [[ "$1" != "--skip-file-creation" ]]; then
+ echo "EFGH4567" >"$FW_INTO_BUF"
+ fi
NAME=$(basename "$FW")
if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path
diff --git a/tools/testing/selftests/firmware/fw_test_resume.sh b/tools/testing/selftests/firmware/fw_test_resume.sh
new file mode 100755
index 000000000000..159483297166
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_test_resume.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This will enable the resume firmware request call. Since we have no
+# control over a guest / hypervisor, the caller is in charge of the
+# actual suspend / resume cycle. This only enables the test to be triggered
+# upon resume.
+set -e
+
+TEST_REQS_FW_SYSFS_FALLBACK="no"
+TEST_REQS_FW_SET_CUSTOM_PATH="yes"
+TEST_DIR=$(dirname $0)
+FWPATH=""
+source $TEST_DIR/fw_lib.sh
+
+check_mods
+check_setup
+verify_reqs
+#setup_tmp_file --skip-file-creation
+
+test_resume()
+{
+ if [ ! -f $DIR/reset ]; then
+ echo "Configuration triggers not present, ignoring test"
+ exit $ksft_skip
+ fi
+}
+
+release_all_firmware()
+{
+ echo 1 > $DIR/release_all_firmware
+}
+
+config_enable_resume_test()
+{
+ echo 1 > $DIR/config_enable_resume_test
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ # Turn down the timeout so failures don't take so long.
+ echo 1 >/sys/class/firmware/timeout
+ fi
+}
+
+config_disable_resume_test()
+{
+ echo 0 > $DIR/config_enable_resume_test
+}
+
+usage()
+{
+ echo "Usage: $0 [ -v ] | [ -h | --help]"
+ echo ""
+ echo " --check-resume-test Verify resume test"
+ echo " -h|--help Help"
+ echo
+ echo "Without any arguments this will enable the resume firmware test"
+ echo "after suspend. To verify that the test went well, run with -v".
+ echo
+ exit 1
+}
+
+verify_resume_test()
+{
+ trap "test_finish" EXIT
+}
+
+parse_args()
+{
+ if [ $# -eq 0 ]; then
+ config_enable_resume_test
+ else
+ if [[ "$1" = "--check-resume-test" ]]; then
+ config_disable_resume_test
+ verify_resume_test
+ else
+ usage
+ fi
+ fi
+}
+
+exit 0
--
2.29.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/2] fs/kernel_read_file: use usermodehelper_read_trylock() as a stop gap
2021-04-16 23:58 [PATCH 0/2] fs: provide a stop gap fix for buggy resume firmware API calls Luis Chamberlain
2021-04-16 23:58 ` [PATCH 1/2] test_firmware: add suspend support to test buggy drivers Luis Chamberlain
@ 2021-04-16 23:58 ` Luis Chamberlain
2021-04-19 18:15 ` Luis Chamberlain
2021-04-21 20:42 ` Lukas Middendorf
1 sibling, 2 replies; 6+ messages in thread
From: Luis Chamberlain @ 2021-04-16 23:58 UTC (permalink / raw)
To: rafael, gregkh, viro, jack, bvanassche, jeyu, ebiederm
Cc: mchehab, keescook, linux-fsdevel, kernel, kexec, linux-kernel,
Luis Chamberlain
The VFS lacks support to do automatic freeze / thaw of filesystems
on the suspend / resume cycle. This can cause some issues, namely
stalls when we have reads/writes during the suspend / resume cycle.
Although for module loading / kexec the probability of this happening
is extremely low, maybe even impossible, its a known real issue with
the request_firmare() API which it does direct fs read. For this reason
only be chatty about the issue on the call used by the firmware API.
Lukas Middendorf has reported an easy situation to reproduce, which can
be caused by questionably buggy drivers which call the request_firmware()
API on resume.
All you have to do is do the suspend / resume cycle using a driver
which will call request_firmware() on resume on a fresh XFS or
btrfs filesystem which has random files but the target file present:
for n in {1..1000}; do
dd if=/dev/urandom of=/lib/firmware/file$( printf %03d "$n" ).bin bs=1 count=$((RANDOM + 1024 ))
done
You can reproduce this either with a buggy driver or by using the
test_firmware driver with the new hooks added to test this case.
No other request_firmware() calls can be triggered for this hang to work.
The hang occurs because:
request_firmware() -->
kernel_read_file_from_path_initns() -->
file_open_root() -->
btrfs_lookup() --> ... -->
btrfs_search_slot() --> read_block_for_search() -->
--> read_tree_block() --> btree_read_extent_buffer_pages() -->
--> submit_one_bio() --> btrfs_submit_metadata_bio() -->
--> btrfsic_submit_bio() --> submit_bio()
--> this completes and then
--> wait_on_page_locked() on the first page
--> never returns
The endless wait for the read which the piece of hardware never got
stalls resume as sync calls are all asynchronous.
The VFS fs freeze work fixes this issue, however it requires a bit
more work, which may take a while to land upstream, and so for now
this provides a simple stop-gap solution.
We can remove this stop-gap solution once the kernel gets VFS
fs freeze / thaw support.
Cc: rafael@kernel.org
Cc: jack@suse.cz
Cc: bvanassche@acm.org
Cc: kernel@tuxforce.de
Cc: mchehab@kernel.org
Cc: keescook@chromium.org
Reported-by: Lukas Middendorf <kernel@tuxforce.de>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
fs/kernel_read_file.c | 37 +++++++++++++++++++++++++++++++++++--
kernel/kexec_file.c | 9 ++++++++-
kernel/module.c | 8 +++++++-
3 files changed, 50 insertions(+), 4 deletions(-)
diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c
index 90d255fbdd9b..f82d8534bf39 100644
--- a/fs/kernel_read_file.c
+++ b/fs/kernel_read_file.c
@@ -4,6 +4,7 @@
#include <linux/kernel_read_file.h>
#include <linux/security.h>
#include <linux/vmalloc.h>
+#include <linux/umh.h>
/**
* kernel_read_file() - read file contents into a kernel buffer
@@ -134,12 +135,20 @@ int kernel_read_file_from_path(const char *path, loff_t offset, void **buf,
if (!path || !*path)
return -EINVAL;
+ ret = usermodehelper_read_trylock();
+ if (ret)
+ return ret;
+
file = filp_open(path, O_RDONLY, 0);
- if (IS_ERR(file))
+ if (IS_ERR(file)) {
+ usermodehelper_read_unlock();
return PTR_ERR(file);
+ }
ret = kernel_read_file(file, offset, buf, buf_size, file_size, id);
fput(file);
+
+ usermodehelper_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
@@ -160,13 +169,37 @@ int kernel_read_file_from_path_initns(const char *path, loff_t offset,
get_fs_root(init_task.fs, &root);
task_unlock(&init_task);
+ /*
+ * Note: we may be incorrectly called on a driver's resume callback.
+ *
+ * The kernel's power management may be busy bringing us to suspend
+ * or trying to get us back to resume. If we try to do a direct write
+ * during this time a block driver may never get that request, and the
+ * filesystem can wait forever. This requires proper VFS work, which
+ * is not yet ready.
+ *
+ * Likewise busy trying here is not possible as well as we'd be holding
+ * up the kernel's pm resume, and waiting will not allow use to thaw
+ * the filesystem, we'd just wait forever. Best we can do is
+ * communuicate the problem so that drivers use the firwmare cache or
+ * implement their own prior to resume.
+ */
+ ret = usermodehelper_read_trylock();
+ if (ret) {
+ pr_warn_once("Trying direct fs read while not allowed");
+ return ret;
+ }
+
file = file_open_root(root.dentry, root.mnt, path, O_RDONLY, 0);
path_put(&root);
- if (IS_ERR(file))
+ if (IS_ERR(file)) {
+ usermodehelper_read_unlock();
return PTR_ERR(file);
+ }
ret = kernel_read_file(file, offset, buf, buf_size, file_size, id);
fput(file);
+ usermodehelper_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(kernel_read_file_from_path_initns);
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 33400ff051a8..d19a01836128 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -226,10 +226,16 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
int ret;
void *ldata;
+ ret = usermodehelper_read_trylock();
+ if (ret)
+ return ret;
+
ret = kernel_read_file_from_fd(kernel_fd, 0, &image->kernel_buf,
INT_MAX, NULL, READING_KEXEC_IMAGE);
- if (ret < 0)
+ if (ret < 0) {
+ usermodehelper_read_unlock();
return ret;
+ }
image->kernel_buf_len = ret;
/* Call arch image probe handlers */
@@ -291,6 +297,7 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
/* In case of error, free up all allocated memory in this function */
if (ret)
kimage_file_post_load_cleanup(image);
+ usermodehelper_read_unlock();
return ret;
}
diff --git a/kernel/module.c b/kernel/module.c
index b5dd92e35b02..9058a104610d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -4140,13 +4140,19 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
|MODULE_INIT_IGNORE_VERMAGIC))
return -EINVAL;
+ err = usermodehelper_read_trylock();
+ if (err)
+ return err;
err = kernel_read_file_from_fd(fd, 0, &hdr, INT_MAX, NULL,
READING_MODULE);
- if (err < 0)
+ if (err < 0) {
+ usermodehelper_read_unlock();
return err;
+ }
info.hdr = hdr;
info.len = err;
+ usermodehelper_read_unlock();
return load_module(&info, uargs, flags);
}
--
2.29.2
^ permalink raw reply related [flat|nested] 6+ messages in thread