* [PATCH 1/7] test_firmware: move misc_device down
2017-01-23 16:11 ` [PATCH 0/7] firmware: expand test units for fallback mechanism Luis R. Rodriguez
@ 2017-01-23 16:11 ` Luis R. Rodriguez
2017-01-23 16:11 ` [PATCH 2/7] test_firmware: use device attribute groups Luis R. Rodriguez
` (5 subsequent siblings)
6 siblings, 0 replies; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-23 16:11 UTC (permalink / raw)
To: gregkh
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, Luis R. Rodriguez
This will make further changes easier to review.
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
---
lib/test_firmware.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index a3e8ec3fb1c5..1cb9bf9eb41f 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -42,12 +42,6 @@ static const struct file_operations test_fw_fops = {
.read = test_fw_misc_read,
};
-static struct miscdevice test_fw_misc_device = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "test_firmware",
- .fops = &test_fw_fops,
-};
-
static ssize_t trigger_request_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -132,6 +126,12 @@ static ssize_t trigger_async_request_store(struct device *dev,
}
static DEVICE_ATTR_WO(trigger_async_request);
+static struct miscdevice test_fw_misc_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "test_firmware",
+ .fops = &test_fw_fops,
+};
+
static int __init test_firmware_init(void)
{
int rc;
--
2.11.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 2/7] test_firmware: use device attribute groups
2017-01-23 16:11 ` [PATCH 0/7] firmware: expand test units for fallback mechanism Luis R. Rodriguez
2017-01-23 16:11 ` [PATCH 1/7] test_firmware: move misc_device down Luis R. Rodriguez
@ 2017-01-23 16:11 ` Luis R. Rodriguez
2017-01-23 16:11 ` [PATCH 3/7] tools: firmware: check for distro fallback udev cancel rule Luis R. Rodriguez
` (4 subsequent siblings)
6 siblings, 0 replies; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-23 16:11 UTC (permalink / raw)
To: gregkh
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, Luis R. Rodriguez
This simplifies init and exit.
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
---
lib/test_firmware.c | 35 +++++++++++------------------------
1 file changed, 11 insertions(+), 24 deletions(-)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 1cb9bf9eb41f..38cc188c4d3c 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -126,10 +126,21 @@ static ssize_t trigger_async_request_store(struct device *dev,
}
static DEVICE_ATTR_WO(trigger_async_request);
+#define TEST_FW_DEV_ATTR(name) &dev_attr_##name.attr
+
+static struct attribute *test_dev_attrs[] = {
+ TEST_FW_DEV_ATTR(trigger_request),
+ TEST_FW_DEV_ATTR(trigger_async_request),
+ NULL,
+};
+
+ATTRIBUTE_GROUPS(test_dev);
+
static struct miscdevice test_fw_misc_device = {
.minor = MISC_DYNAMIC_MINOR,
.name = "test_firmware",
.fops = &test_fw_fops,
+ .groups = test_dev_groups,
};
static int __init test_firmware_init(void)
@@ -141,30 +152,10 @@ static int __init test_firmware_init(void)
pr_err("could not register misc device: %d\n", rc);
return rc;
}
- rc = device_create_file(test_fw_misc_device.this_device,
- &dev_attr_trigger_request);
- if (rc) {
- pr_err("could not create sysfs interface: %d\n", rc);
- goto dereg;
- }
-
- rc = device_create_file(test_fw_misc_device.this_device,
- &dev_attr_trigger_async_request);
- if (rc) {
- pr_err("could not create async sysfs interface: %d\n", rc);
- goto remove_file;
- }
pr_warn("interface ready\n");
return 0;
-
-remove_file:
- device_remove_file(test_fw_misc_device.this_device,
- &dev_attr_trigger_async_request);
-dereg:
- misc_deregister(&test_fw_misc_device);
- return rc;
}
module_init(test_firmware_init);
@@ -172,10 +163,6 @@ module_init(test_firmware_init);
static void __exit test_firmware_exit(void)
{
release_firmware(test_firmware);
- device_remove_file(test_fw_misc_device.this_device,
- &dev_attr_trigger_async_request);
- device_remove_file(test_fw_misc_device.this_device,
- &dev_attr_trigger_request);
misc_deregister(&test_fw_misc_device);
pr_warn("removed interface\n");
}
--
2.11.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 3/7] tools: firmware: check for distro fallback udev cancel rule
2017-01-23 16:11 ` [PATCH 0/7] firmware: expand test units for fallback mechanism Luis R. Rodriguez
2017-01-23 16:11 ` [PATCH 1/7] test_firmware: move misc_device down Luis R. Rodriguez
2017-01-23 16:11 ` [PATCH 2/7] test_firmware: use device attribute groups Luis R. Rodriguez
@ 2017-01-23 16:11 ` Luis R. Rodriguez
2017-01-23 16:11 ` [PATCH 4/7] tools: firmware: rename fallback mechanism script Luis R. Rodriguez
` (3 subsequent siblings)
6 siblings, 0 replies; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-23 16:11 UTC (permalink / raw)
To: gregkh
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, Luis R. Rodriguez
Some distributions (Debian, OpenSUSE) have a udev rule in place to cancel
all fallback mechanism uevents immediately. This would obviously
make it hard to test against the fallback mechanism test interface,
so we need to check for this.
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
---
tools/testing/selftests/firmware/fw_userhelper.sh | 28 +++++++++++++++++++++--
1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/firmware/fw_userhelper.sh b/tools/testing/selftests/firmware/fw_userhelper.sh
index b9983f8e09f6..01c626a1f226 100755
--- a/tools/testing/selftests/firmware/fw_userhelper.sh
+++ b/tools/testing/selftests/firmware/fw_userhelper.sh
@@ -64,9 +64,33 @@ trap "test_finish" EXIT
echo "ABCD0123" >"$FW"
NAME=$(basename "$FW")
+DEVPATH="$DIR"/"nope-$NAME"/loading
+
# Test failure when doing nothing (timeout works).
-echo 1 >/sys/class/firmware/timeout
-echo -n "$NAME" >"$DIR"/trigger_request
+echo -n 2 >/sys/class/firmware/timeout
+echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null &
+
+# Give the kernel some time to load the loading file, must be less
+# than the timeout above.
+sleep 1
+if [ ! -f $DEVPATH ]; then
+ echo "$0: fallback mechanism immediately cancelled"
+ echo ""
+ echo "The file never appeared: $DEVPATH"
+ echo ""
+ echo "This might be a distribution udev rule setup by your distribution"
+ echo "to immediately cancel all fallback requests, this must be"
+ echo "removed before running these tests. To confirm look for"
+ echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules"
+ echo "and see if you have something like this:"
+ echo ""
+ echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\""
+ echo ""
+ echo "If you do remove this file or comment out this line before"
+ echo "proceeding with these tests."
+ exit 1
+fi
+
if diff -q "$FW" /dev/test_firmware >/dev/null ; then
echo "$0: firmware was not expected to match" >&2
exit 1
--
2.11.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 4/7] tools: firmware: rename fallback mechanism script
2017-01-23 16:11 ` [PATCH 0/7] firmware: expand test units for fallback mechanism Luis R. Rodriguez
` (2 preceding siblings ...)
2017-01-23 16:11 ` [PATCH 3/7] tools: firmware: check for distro fallback udev cancel rule Luis R. Rodriguez
@ 2017-01-23 16:11 ` Luis R. Rodriguez
2017-01-23 16:11 ` [PATCH 5/7] tools: firmware: add fallback cancelation testing Luis R. Rodriguez
` (2 subsequent siblings)
6 siblings, 0 replies; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-23 16:11 UTC (permalink / raw)
To: gregkh
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, Luis R. Rodriguez
Calling it user mode helper only creates confusion.
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
---
tools/testing/selftests/firmware/Makefile | 2 +-
.../testing/selftests/firmware/{fw_userhelper.sh => fw_fallback.sh} | 5 +++--
2 files changed, 4 insertions(+), 3 deletions(-)
rename tools/testing/selftests/firmware/{fw_userhelper.sh => fw_fallback.sh} (96%)
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 9bf82234855b..1894d625af2d 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -3,7 +3,7 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := fw_filesystem.sh fw_userhelper.sh
+TEST_PROGS := fw_filesystem.sh fw_fallback.sh
include ../lib.mk
diff --git a/tools/testing/selftests/firmware/fw_userhelper.sh b/tools/testing/selftests/firmware/fw_fallback.sh
similarity index 96%
rename from tools/testing/selftests/firmware/fw_userhelper.sh
rename to tools/testing/selftests/firmware/fw_fallback.sh
index 01c626a1f226..f694afb7d12d 100755
--- a/tools/testing/selftests/firmware/fw_userhelper.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# This validates that the kernel will fall back to using the user helper
+# This validates that the kernel will fall back to using the fallback mechanism
# to load firmware it can't find on disk itself. We must request a firmware
# that the kernel won't find, and any installed helper (e.g. udev) also
# won't find so that we can do the load ourself manually.
@@ -117,7 +117,8 @@ if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
echo "$0: firmware was not loaded" >&2
exit 1
else
- echo "$0: user helper firmware loading works"
+ echo "$0: fallback mechanism works"
+
fi
exit 0
--
2.11.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 5/7] tools: firmware: add fallback cancelation testing
2017-01-23 16:11 ` [PATCH 0/7] firmware: expand test units for fallback mechanism Luis R. Rodriguez
` (3 preceding siblings ...)
2017-01-23 16:11 ` [PATCH 4/7] tools: firmware: rename fallback mechanism script Luis R. Rodriguez
@ 2017-01-23 16:11 ` Luis R. Rodriguez
2017-01-23 16:11 ` [PATCH 6/7] test_firmware: add test custom fallback trigger Luis R. Rodriguez
2017-01-23 16:11 ` [PATCH 7/7] firmware: firmware: fix NULL pointer dereference in __fw_load_abort() Luis R. Rodriguez
6 siblings, 0 replies; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-23 16:11 UTC (permalink / raw)
To: gregkh
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, Luis R. Rodriguez
Add a test case for cancelling the sync fallback mechanism.
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
---
tools/testing/selftests/firmware/fw_fallback.sh | 32 +++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index f694afb7d12d..68e27e5f27a4 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -58,6 +58,31 @@ load_fw()
wait
}
+load_fw_cancel()
+{
+ local name="$1"
+ local file="$2"
+
+ # This will block until our load (below) has finished.
+ echo -n "$name" >"$DIR"/trigger_request 2>/dev/null &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo -1 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+}
+
trap "test_finish" EXIT
# This is an unlikely real-world firmware content. :)
@@ -118,7 +143,14 @@ if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
exit 1
else
echo "$0: fallback mechanism works"
+fi
+load_fw_cancel "nope-$NAME" "$FW"
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was expected to be cancelled" >&2
+ exit 1
+else
+ echo "$0: cancelling fallback mechanism works"
fi
exit 0
--
2.11.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 6/7] test_firmware: add test custom fallback trigger
2017-01-23 16:11 ` [PATCH 0/7] firmware: expand test units for fallback mechanism Luis R. Rodriguez
` (4 preceding siblings ...)
2017-01-23 16:11 ` [PATCH 5/7] tools: firmware: add fallback cancelation testing Luis R. Rodriguez
@ 2017-01-23 16:11 ` Luis R. Rodriguez
2017-01-23 16:11 ` [PATCH 7/7] firmware: firmware: fix NULL pointer dereference in __fw_load_abort() Luis R. Rodriguez
6 siblings, 0 replies; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-23 16:11 UTC (permalink / raw)
To: gregkh
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, Luis R. Rodriguez
We have no custom fallback mechanism test interface. Provide one.
This tests both the custom fallback mechanism and cancelling the
it.
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
---
lib/test_firmware.c | 45 ++++++++++++++++
tools/testing/selftests/firmware/fw_fallback.sh | 68 +++++++++++++++++++++++++
2 files changed, 113 insertions(+)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 38cc188c4d3c..09371b0a9baf 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -126,11 +126,56 @@ static ssize_t trigger_async_request_store(struct device *dev,
}
static DEVICE_ATTR_WO(trigger_async_request);
+static ssize_t trigger_custom_fallback_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int rc;
+ char *name;
+
+ name = kstrndup(buf, count, GFP_KERNEL);
+ if (!name)
+ return -ENOSPC;
+
+ pr_info("loading '%s' using custom fallback mechanism\n", name);
+
+ mutex_lock(&test_fw_mutex);
+ release_firmware(test_firmware);
+ test_firmware = NULL;
+ rc = request_firmware_nowait(THIS_MODULE, FW_ACTION_NOHOTPLUG, name,
+ dev, GFP_KERNEL, NULL,
+ trigger_async_request_cb);
+ if (rc) {
+ pr_info("async load of '%s' failed: %d\n", name, rc);
+ kfree(name);
+ goto out;
+ }
+ /* Free 'name' ASAP, to test for race conditions */
+ kfree(name);
+
+ wait_for_completion(&async_fw_done);
+
+ if (test_firmware) {
+ pr_info("loaded: %zu\n", test_firmware->size);
+ rc = count;
+ } else {
+ pr_err("failed to async load firmware\n");
+ rc = -ENODEV;
+ }
+
+out:
+ mutex_unlock(&test_fw_mutex);
+
+ return rc;
+}
+static DEVICE_ATTR_WO(trigger_custom_fallback);
+
#define TEST_FW_DEV_ATTR(name) &dev_attr_##name.attr
static struct attribute *test_dev_attrs[] = {
TEST_FW_DEV_ATTR(trigger_request),
TEST_FW_DEV_ATTR(trigger_async_request),
+ TEST_FW_DEV_ATTR(trigger_custom_fallback),
NULL,
};
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index 68e27e5f27a4..2e4c22d5abf7 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -83,6 +83,58 @@ load_fw_cancel()
wait
}
+load_fw_custom()
+{
+ local name="$1"
+ local file="$2"
+
+ echo -n "$name" >"$DIR"/trigger_custom_fallback 2>/dev/null &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo 1 >"$DIR"/"$name"/loading
+ cat "$file" >"$DIR"/"$name"/data
+ echo 0 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+}
+
+
+load_fw_custom_cancel()
+{
+ local name="$1"
+ local file="$2"
+
+ echo -n "$name" >"$DIR"/trigger_custom_fallback 2>/dev/null &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo -1 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+}
+
+
trap "test_finish" EXIT
# This is an unlikely real-world firmware content. :)
@@ -153,4 +205,20 @@ else
echo "$0: cancelling fallback mechanism works"
fi
+load_fw_custom "$NAME" "$FW"
+if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded" >&2
+ exit 1
+else
+ echo "$0: custom fallback loading mechanism works"
+fi
+
+load_fw_custom_cancel "nope-$NAME" "$FW"
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was expected to be cancelled" >&2
+ exit 1
+else
+ echo "$0: cancelling custom fallback mechanism works"
+fi
+
exit 0
--
2.11.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 7/7] firmware: firmware: fix NULL pointer dereference in __fw_load_abort()
2017-01-23 16:11 ` [PATCH 0/7] firmware: expand test units for fallback mechanism Luis R. Rodriguez
` (5 preceding siblings ...)
2017-01-23 16:11 ` [PATCH 6/7] test_firmware: add test custom fallback trigger Luis R. Rodriguez
@ 2017-01-23 16:11 ` Luis R. Rodriguez
2017-01-25 10:52 ` Greg KH
6 siblings, 1 reply; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-23 16:11 UTC (permalink / raw)
To: gregkh
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, Luis R. Rodriguez, [3.10+]
Since commit 5d47ec02c37ea632398cb251c884e3a488dff794
("firmware: Correct handling of fw_state_wait() return value")
fw_load_abort(fw_priv) could be called twice and lead us to a
kernel crash. This happens only when the firmware fallback mechanism
(regular or custom) is used. The fallback mechanism exposes a sysfs
interface for userspace to upload a file and notify the kernel when
the file is loaded and ready, or to cancel an upload by echo'ing -1
into on the loading file:
echo -n "-1" > /sys/$DEVPATH/loading
This will call fw_load_abort(). Some distributions actually have
a udev rule in place to *always* immediately cancel all firmware
fallback mechanism requests (Debian, OpenSUSE), they have:
$ cat /lib/udev/rules.d/50-firmware.rules
# stub for immediately telling the kernel that userspace firmware loading
# failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y
SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1
This was done since udev removed the firmware fallback mechanism a while ago
and a long standing misunderstood issues with the timeout (but now corrected).
Distributions with this udev rule would run into this crash only if the
fallback mechanism is used. Since most distributions disable by default
using the fallback mechanism (CONFIG_FW_LOADER_USER_HELPER_FALLBACK), this
would typicaly mean only 2 drivers which *require* the fallback mechanism
could typically incur a crash: drivers/firmware/dell_rbu.c and the
drivers/leds/leds-lp55xx-common.c driver.
Distributions enabling CONFIG_FW_LOADER_USER_HELPER_FALLBACK are clearly
more exposed as every file not found through a firmware request will
use the fallback mechanism.
The crash happens because after commit 5b029624948d ("firmware: do not
use fw_lock for fw_state protection") and subsequent fix commit
5d47ec02c37ea6 ("firmware: Correct handling of fw_state_wait() return
value") a race can happen between this cancelation and the firmware
fw_state_wait_timeout() being woken up after a state change with which
fw_load_abort() as that calls swake_up(). Upon error fw_state_wait_timeout()
will also again call fw_load_abort() and trigger a null reference.
At first glance we could just fix this with a !buf check on
fw_load_abort() before accessing buf->fw_st, however there is
a logical issue in having a state machine used for the fallback
mechanism and preventing access from it once we abort as its inside
the buf (buf->fw_st).
The firmware_class.c code is setting the buf to NULL to annotate an
abort has occurred. Replace this mechanism by simply using the state check
instead. All the other code in place already uses similar checks
for aborting as well so no further changes are needed.
An oops can be reproduced with the new fw_fallback.sh fallback
mechanism cancellation test. Either cancelling the fallback mechanism
or the custom fallback mechanism triggers a crash.
mcgrof@piggy ~/linux-next/tools/testing/selftests/firmware
(git::20170111-fw-fixes)$ sudo ./fw_fallback.sh
./fw_fallback.sh: timeout works
./fw_fallback.sh: firmware comparison works
./fw_fallback.sh: fallback mechanism works
[ this then sits here when it is trying the cancellation test ]
Kernel log:
[ 36.750521] test_firmware: loading 'nope-test-firmware.bin'
[ 36.751144] misc test_firmware: Direct firmware load for nope-test-firmware.bin failed with error -2
[ 36.752034] misc test_firmware: Falling back to user helper
[ 36.853324] BUG: unable to handle kernel NULL pointer dereference at 0000000000000038
[ 36.854221] IP: _request_firmware+0xa27/0xad0
[ 36.854671] PGD 0
[ 36.854672]
[ 36.855081] Oops: 0000 [#1] SMP
[ 36.855433] Modules linked in: test_firmware(E) ... etc ...
[ 36.857802] CPU: 1 PID: 1396 Comm: fw_fallback.sh Tainted: G W E 4.10.0-rc3-next-20170111+ #30
[ 36.857802] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
[ 36.857802] task: ffff9740b27f4340 task.stack: ffffbb15c0bc8000
[ 36.857802] RIP: 0010:_request_firmware+0xa27/0xad0
[ 36.857802] RSP: 0018:ffffbb15c0bcbd10 EFLAGS: 00010246
[ 36.857802] RAX: 00000000fffffffe RBX: ffff9740afe5aa80 RCX: 0000000000000000
[ 36.857802] RDX: ffff9740b27f4340 RSI: 0000000000000283 RDI: 0000000000000000
[ 36.857802] RBP: ffffbb15c0bcbd90 R08: ffffbb15c0bcbcd8 R09: 0000000000000000
[ 36.857802] R10: 0000000894a0d4b1 R11: 000000000000008c R12: ffffffffc0312480
[ 36.857802] R13: 0000000000000005 R14: ffff9740b1c32400 R15: 00000000000003e8
[ 36.857802] FS: 00007f8604422700(0000) GS:ffff9740bfc80000(0000) knlGS:0000000000000000
[ 36.857802] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 36.857802] CR2: 0000000000000038 CR3: 000000012164c000 CR4: 00000000000006e0
[ 36.857802] Call Trace:
[ 36.857802] request_firmware+0x37/0x50
[ 36.857802] trigger_request_store+0x79/0xd0 [test_firmware]
[ 36.857802] dev_attr_store+0x18/0x30
[ 36.857802] sysfs_kf_write+0x37/0x40
[ 36.857802] kernfs_fop_write+0x110/0x1a0
[ 36.857802] __vfs_write+0x37/0x160
[ 36.857802] ? _cond_resched+0x1a/0x50
[ 36.857802] vfs_write+0xb5/0x1a0
[ 36.857802] SyS_write+0x55/0xc0
[ 36.857802] ? trace_do_page_fault+0x37/0xd0
[ 36.857802] entry_SYSCALL_64_fastpath+0x1e/0xad
[ 36.857802] RIP: 0033:0x7f8603f49620
[ 36.857802] RSP: 002b:00007fff6287b788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 36.857802] RAX: ffffffffffffffda RBX: 000055c307b110a0 RCX: 00007f8603f49620
[ 36.857802] RDX: 0000000000000016 RSI: 000055c3084d8a90 RDI: 0000000000000001
[ 36.857802] RBP: 0000000000000016 R08: 000000000000c0ff R09: 000055c3084d6336
[ 36.857802] R10: 000055c307b108b0 R11: 0000000000000246 R12: 000055c307b13c80
[ 36.857802] R13: 000055c3084d6320 R14: 0000000000000000 R15: 00007fff6287b950
[ 36.857802] Code: 9f 64 84 e8 9c 61 fe ff b8 f4 ff ff ff e9 6b f9 ff
ff 48 c7 c7 40 6b 8d 84 89 45 a8 e8 43 84 18 00 49 8b be 00 03 00 00 8b
45 a8 <83> 7f 38 02 74 08 e8 6e ec ff ff 8b 45 a8 49 c7 86 00 03 00 00
[ 36.857802] RIP: _request_firmware+0xa27/0xad0 RSP: ffffbb15c0bcbd10
[ 36.857802] CR2: 0000000000000038
[ 36.872685] ---[ end trace 6d94ac339c133e6f ]---
In above case the call hierarchy that causes the crash looks as follows:
lib/test_firmware.c request_firmware()
-> fw_load_from_user_helper()
-> _request_firmware_load()
-> call fw_state_wait_timeout()
Some time later firmware_loading_store() scans a control value of "-1"
-> switch(loading) case -1: will call
-> fw_load_abort(fw_priv) which calls
-> __fw_load_abort(fw_priv->buf)
-> and set fw_priv->buf = NULL;
Upon being woken up via swake_up(), back in _request_firmware_load()
fw_state_wait_timeout() returns -ENOENT
-> since mentioned commit
-> fw_load_abort(fw_priv) is called a second time
-> and this time it would call:
-> __fw_load_abort(NULL /* fw_priv->buf */)
-> and we get: NULL->fw_st.status
Fixes: 5d47ec02c37e ("firmware: Correct handling of fw_state_wait() return value")
Reported-and-Tested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reported-and-Tested-by: Patrick Bruenn <p.bruenn@beckhoff.com>
Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
CC: <stable@vger.kernel.org> [3.10+]
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
---
drivers/base/firmware_class.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 4497d263209f..ac350c518e0c 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -558,9 +558,6 @@ static void fw_load_abort(struct firmware_priv *fw_priv)
struct firmware_buf *buf = fw_priv->buf;
__fw_load_abort(buf);
-
- /* avoid user action after loading abort */
- fw_priv->buf = NULL;
}
static LIST_HEAD(pending_fw_head);
@@ -713,7 +710,7 @@ static ssize_t firmware_loading_store(struct device *dev,
mutex_lock(&fw_lock);
fw_buf = fw_priv->buf;
- if (!fw_buf)
+ if (fw_state_is_aborted(&fw_buf->fw_st))
goto out;
switch (loading) {
--
2.11.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH 7/7] firmware: firmware: fix NULL pointer dereference in __fw_load_abort()
2017-01-23 16:11 ` [PATCH 7/7] firmware: firmware: fix NULL pointer dereference in __fw_load_abort() Luis R. Rodriguez
@ 2017-01-25 10:52 ` Greg KH
2017-01-25 13:36 ` Luis R. Rodriguez
0 siblings, 1 reply; 26+ messages in thread
From: Greg KH @ 2017-01-25 10:52 UTC (permalink / raw)
To: Luis R. Rodriguez
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, [3.10+]
On Mon, Jan 23, 2017 at 08:11:11AM -0800, Luis R. Rodriguez wrote:
> Since commit 5d47ec02c37ea632398cb251c884e3a488dff794
> ("firmware: Correct handling of fw_state_wait() return value")
> fw_load_abort(fw_priv) could be called twice and lead us to a
> kernel crash. This happens only when the firmware fallback mechanism
> (regular or custom) is used. The fallback mechanism exposes a sysfs
> interface for userspace to upload a file and notify the kernel when
> the file is loaded and ready, or to cancel an upload by echo'ing -1
> into on the loading file:
>
> echo -n "-1" > /sys/$DEVPATH/loading
>
> This will call fw_load_abort(). Some distributions actually have
> a udev rule in place to *always* immediately cancel all firmware
> fallback mechanism requests (Debian, OpenSUSE), they have:
>
> $ cat /lib/udev/rules.d/50-firmware.rules
> # stub for immediately telling the kernel that userspace firmware loading
> # failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y
> SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1
>
> This was done since udev removed the firmware fallback mechanism a while ago
> and a long standing misunderstood issues with the timeout (but now corrected).
> Distributions with this udev rule would run into this crash only if the
> fallback mechanism is used. Since most distributions disable by default
> using the fallback mechanism (CONFIG_FW_LOADER_USER_HELPER_FALLBACK), this
> would typicaly mean only 2 drivers which *require* the fallback mechanism
> could typically incur a crash: drivers/firmware/dell_rbu.c and the
> drivers/leds/leds-lp55xx-common.c driver.
>
> Distributions enabling CONFIG_FW_LOADER_USER_HELPER_FALLBACK are clearly
> more exposed as every file not found through a firmware request will
> use the fallback mechanism.
>
> The crash happens because after commit 5b029624948d ("firmware: do not
> use fw_lock for fw_state protection") and subsequent fix commit
> 5d47ec02c37ea6 ("firmware: Correct handling of fw_state_wait() return
> value") a race can happen between this cancelation and the firmware
> fw_state_wait_timeout() being woken up after a state change with which
> fw_load_abort() as that calls swake_up(). Upon error fw_state_wait_timeout()
> will also again call fw_load_abort() and trigger a null reference.
>
> At first glance we could just fix this with a !buf check on
> fw_load_abort() before accessing buf->fw_st, however there is
> a logical issue in having a state machine used for the fallback
> mechanism and preventing access from it once we abort as its inside
> the buf (buf->fw_st).
>
> The firmware_class.c code is setting the buf to NULL to annotate an
> abort has occurred. Replace this mechanism by simply using the state check
> instead. All the other code in place already uses similar checks
> for aborting as well so no further changes are needed.
>
> An oops can be reproduced with the new fw_fallback.sh fallback
> mechanism cancellation test. Either cancelling the fallback mechanism
> or the custom fallback mechanism triggers a crash.
>
> mcgrof@piggy ~/linux-next/tools/testing/selftests/firmware
> (git::20170111-fw-fixes)$ sudo ./fw_fallback.sh
>
> ./fw_fallback.sh: timeout works
> ./fw_fallback.sh: firmware comparison works
> ./fw_fallback.sh: fallback mechanism works
>
> [ this then sits here when it is trying the cancellation test ]
>
> Kernel log:
>
> [ 36.750521] test_firmware: loading 'nope-test-firmware.bin'
> [ 36.751144] misc test_firmware: Direct firmware load for nope-test-firmware.bin failed with error -2
> [ 36.752034] misc test_firmware: Falling back to user helper
> [ 36.853324] BUG: unable to handle kernel NULL pointer dereference at 0000000000000038
> [ 36.854221] IP: _request_firmware+0xa27/0xad0
> [ 36.854671] PGD 0
> [ 36.854672]
> [ 36.855081] Oops: 0000 [#1] SMP
> [ 36.855433] Modules linked in: test_firmware(E) ... etc ...
> [ 36.857802] CPU: 1 PID: 1396 Comm: fw_fallback.sh Tainted: G W E 4.10.0-rc3-next-20170111+ #30
> [ 36.857802] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
> [ 36.857802] task: ffff9740b27f4340 task.stack: ffffbb15c0bc8000
> [ 36.857802] RIP: 0010:_request_firmware+0xa27/0xad0
> [ 36.857802] RSP: 0018:ffffbb15c0bcbd10 EFLAGS: 00010246
> [ 36.857802] RAX: 00000000fffffffe RBX: ffff9740afe5aa80 RCX: 0000000000000000
> [ 36.857802] RDX: ffff9740b27f4340 RSI: 0000000000000283 RDI: 0000000000000000
> [ 36.857802] RBP: ffffbb15c0bcbd90 R08: ffffbb15c0bcbcd8 R09: 0000000000000000
> [ 36.857802] R10: 0000000894a0d4b1 R11: 000000000000008c R12: ffffffffc0312480
> [ 36.857802] R13: 0000000000000005 R14: ffff9740b1c32400 R15: 00000000000003e8
> [ 36.857802] FS: 00007f8604422700(0000) GS:ffff9740bfc80000(0000) knlGS:0000000000000000
> [ 36.857802] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 36.857802] CR2: 0000000000000038 CR3: 000000012164c000 CR4: 00000000000006e0
> [ 36.857802] Call Trace:
> [ 36.857802] request_firmware+0x37/0x50
> [ 36.857802] trigger_request_store+0x79/0xd0 [test_firmware]
> [ 36.857802] dev_attr_store+0x18/0x30
> [ 36.857802] sysfs_kf_write+0x37/0x40
> [ 36.857802] kernfs_fop_write+0x110/0x1a0
> [ 36.857802] __vfs_write+0x37/0x160
> [ 36.857802] ? _cond_resched+0x1a/0x50
> [ 36.857802] vfs_write+0xb5/0x1a0
> [ 36.857802] SyS_write+0x55/0xc0
> [ 36.857802] ? trace_do_page_fault+0x37/0xd0
> [ 36.857802] entry_SYSCALL_64_fastpath+0x1e/0xad
> [ 36.857802] RIP: 0033:0x7f8603f49620
> [ 36.857802] RSP: 002b:00007fff6287b788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
> [ 36.857802] RAX: ffffffffffffffda RBX: 000055c307b110a0 RCX: 00007f8603f49620
> [ 36.857802] RDX: 0000000000000016 RSI: 000055c3084d8a90 RDI: 0000000000000001
> [ 36.857802] RBP: 0000000000000016 R08: 000000000000c0ff R09: 000055c3084d6336
> [ 36.857802] R10: 000055c307b108b0 R11: 0000000000000246 R12: 000055c307b13c80
> [ 36.857802] R13: 000055c3084d6320 R14: 0000000000000000 R15: 00007fff6287b950
> [ 36.857802] Code: 9f 64 84 e8 9c 61 fe ff b8 f4 ff ff ff e9 6b f9 ff
> ff 48 c7 c7 40 6b 8d 84 89 45 a8 e8 43 84 18 00 49 8b be 00 03 00 00 8b
> 45 a8 <83> 7f 38 02 74 08 e8 6e ec ff ff 8b 45 a8 49 c7 86 00 03 00 00
> [ 36.857802] RIP: _request_firmware+0xa27/0xad0 RSP: ffffbb15c0bcbd10
> [ 36.857802] CR2: 0000000000000038
> [ 36.872685] ---[ end trace 6d94ac339c133e6f ]---
>
> In above case the call hierarchy that causes the crash looks as follows:
>
> lib/test_firmware.c request_firmware()
> -> fw_load_from_user_helper()
> -> _request_firmware_load()
> -> call fw_state_wait_timeout()
>
> Some time later firmware_loading_store() scans a control value of "-1"
> -> switch(loading) case -1: will call
> -> fw_load_abort(fw_priv) which calls
> -> __fw_load_abort(fw_priv->buf)
> -> and set fw_priv->buf = NULL;
>
> Upon being woken up via swake_up(), back in _request_firmware_load()
> fw_state_wait_timeout() returns -ENOENT
> -> since mentioned commit
> -> fw_load_abort(fw_priv) is called a second time
> -> and this time it would call:
> -> __fw_load_abort(NULL /* fw_priv->buf */)
> -> and we get: NULL->fw_st.status
>
> Fixes: 5d47ec02c37e ("firmware: Correct handling of fw_state_wait() return value")
> Reported-and-Tested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Reported-and-Tested-by: Patrick Bruenn <p.bruenn@beckhoff.com>
> Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
> CC: <stable@vger.kernel.org> [3.10+]
> Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
> ---
> drivers/base/firmware_class.c | 5 +----
> 1 file changed, 1 insertion(+), 4 deletions(-)
Why is this patch 7/7? Shouldn't it go into 4.10-final now? Why wait
for 4.11-rc1?
thanks,
greg k-h
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 7/7] firmware: firmware: fix NULL pointer dereference in __fw_load_abort()
2017-01-25 10:52 ` Greg KH
@ 2017-01-25 13:36 ` Luis R. Rodriguez
2017-01-25 13:42 ` Luis R. Rodriguez
0 siblings, 1 reply; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-25 13:36 UTC (permalink / raw)
To: Greg KH
Cc: Luis R. Rodriguez, ming.lei, keescook, linux-kernel-dev,
jakub.kicinski, chris, oss-drivers, johannes, j, teg, kay,
jwboyer, dmitry.torokhov, seth.forshee, bjorn.andersson,
linux-kernel, wagi, stephen.boyd, zohar, tiwai, dwmw2,
fengguang.wu, dhowells, arend.vanspriel, kvalo, [3.10+]
On Wed, Jan 25, 2017 at 11:52:04AM +0100, Greg KH wrote:
> On Mon, Jan 23, 2017 at 08:11:11AM -0800, Luis R. Rodriguez wrote:
> > Since commit 5d47ec02c37ea632398cb251c884e3a488dff794
> > ("firmware: Correct handling of fw_state_wait() return value")
> > fw_load_abort(fw_priv) could be called twice and lead us to a
> > kernel crash. This happens only when the firmware fallback mechanism
> > (regular or custom) is used. The fallback mechanism exposes a sysfs
> > interface for userspace to upload a file and notify the kernel when
> > the file is loaded and ready, or to cancel an upload by echo'ing -1
> > into on the loading file:
> >
> > echo -n "-1" > /sys/$DEVPATH/loading
> >
> > This will call fw_load_abort(). Some distributions actually have
> > a udev rule in place to *always* immediately cancel all firmware
> > fallback mechanism requests (Debian, OpenSUSE), they have:
> >
> > $ cat /lib/udev/rules.d/50-firmware.rules
> > # stub for immediately telling the kernel that userspace firmware loading
> > # failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y
> > SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1
> >
> > This was done since udev removed the firmware fallback mechanism a while ago
> > and a long standing misunderstood issues with the timeout (but now corrected).
> > Distributions with this udev rule would run into this crash only if the
> > fallback mechanism is used. Since most distributions disable by default
> > using the fallback mechanism (CONFIG_FW_LOADER_USER_HELPER_FALLBACK), this
> > would typicaly mean only 2 drivers which *require* the fallback mechanism
> > could typically incur a crash: drivers/firmware/dell_rbu.c and the
> > drivers/leds/leds-lp55xx-common.c driver.
> >
> > Distributions enabling CONFIG_FW_LOADER_USER_HELPER_FALLBACK are clearly
> > more exposed as every file not found through a firmware request will
> > use the fallback mechanism.
> >
> > The crash happens because after commit 5b029624948d ("firmware: do not
> > use fw_lock for fw_state protection") and subsequent fix commit
> > 5d47ec02c37ea6 ("firmware: Correct handling of fw_state_wait() return
> > value") a race can happen between this cancelation and the firmware
> > fw_state_wait_timeout() being woken up after a state change with which
> > fw_load_abort() as that calls swake_up(). Upon error fw_state_wait_timeout()
> > will also again call fw_load_abort() and trigger a null reference.
> >
> > At first glance we could just fix this with a !buf check on
> > fw_load_abort() before accessing buf->fw_st, however there is
> > a logical issue in having a state machine used for the fallback
> > mechanism and preventing access from it once we abort as its inside
> > the buf (buf->fw_st).
> >
> > The firmware_class.c code is setting the buf to NULL to annotate an
> > abort has occurred. Replace this mechanism by simply using the state check
> > instead. All the other code in place already uses similar checks
> > for aborting as well so no further changes are needed.
> >
> > An oops can be reproduced with the new fw_fallback.sh fallback
> > mechanism cancellation test. Either cancelling the fallback mechanism
> > or the custom fallback mechanism triggers a crash.
> >
> > mcgrof@piggy ~/linux-next/tools/testing/selftests/firmware
> > (git::20170111-fw-fixes)$ sudo ./fw_fallback.sh
> >
> > ./fw_fallback.sh: timeout works
> > ./fw_fallback.sh: firmware comparison works
> > ./fw_fallback.sh: fallback mechanism works
> >
> > [ this then sits here when it is trying the cancellation test ]
> >
> > Kernel log:
> >
> > [ 36.750521] test_firmware: loading 'nope-test-firmware.bin'
> > [ 36.751144] misc test_firmware: Direct firmware load for nope-test-firmware.bin failed with error -2
> > [ 36.752034] misc test_firmware: Falling back to user helper
> > [ 36.853324] BUG: unable to handle kernel NULL pointer dereference at 0000000000000038
> > [ 36.854221] IP: _request_firmware+0xa27/0xad0
> > [ 36.854671] PGD 0
> > [ 36.854672]
> > [ 36.855081] Oops: 0000 [#1] SMP
> > [ 36.855433] Modules linked in: test_firmware(E) ... etc ...
> > [ 36.857802] CPU: 1 PID: 1396 Comm: fw_fallback.sh Tainted: G W E 4.10.0-rc3-next-20170111+ #30
> > [ 36.857802] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
> > [ 36.857802] task: ffff9740b27f4340 task.stack: ffffbb15c0bc8000
> > [ 36.857802] RIP: 0010:_request_firmware+0xa27/0xad0
> > [ 36.857802] RSP: 0018:ffffbb15c0bcbd10 EFLAGS: 00010246
> > [ 36.857802] RAX: 00000000fffffffe RBX: ffff9740afe5aa80 RCX: 0000000000000000
> > [ 36.857802] RDX: ffff9740b27f4340 RSI: 0000000000000283 RDI: 0000000000000000
> > [ 36.857802] RBP: ffffbb15c0bcbd90 R08: ffffbb15c0bcbcd8 R09: 0000000000000000
> > [ 36.857802] R10: 0000000894a0d4b1 R11: 000000000000008c R12: ffffffffc0312480
> > [ 36.857802] R13: 0000000000000005 R14: ffff9740b1c32400 R15: 00000000000003e8
> > [ 36.857802] FS: 00007f8604422700(0000) GS:ffff9740bfc80000(0000) knlGS:0000000000000000
> > [ 36.857802] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 36.857802] CR2: 0000000000000038 CR3: 000000012164c000 CR4: 00000000000006e0
> > [ 36.857802] Call Trace:
> > [ 36.857802] request_firmware+0x37/0x50
> > [ 36.857802] trigger_request_store+0x79/0xd0 [test_firmware]
> > [ 36.857802] dev_attr_store+0x18/0x30
> > [ 36.857802] sysfs_kf_write+0x37/0x40
> > [ 36.857802] kernfs_fop_write+0x110/0x1a0
> > [ 36.857802] __vfs_write+0x37/0x160
> > [ 36.857802] ? _cond_resched+0x1a/0x50
> > [ 36.857802] vfs_write+0xb5/0x1a0
> > [ 36.857802] SyS_write+0x55/0xc0
> > [ 36.857802] ? trace_do_page_fault+0x37/0xd0
> > [ 36.857802] entry_SYSCALL_64_fastpath+0x1e/0xad
> > [ 36.857802] RIP: 0033:0x7f8603f49620
> > [ 36.857802] RSP: 002b:00007fff6287b788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
> > [ 36.857802] RAX: ffffffffffffffda RBX: 000055c307b110a0 RCX: 00007f8603f49620
> > [ 36.857802] RDX: 0000000000000016 RSI: 000055c3084d8a90 RDI: 0000000000000001
> > [ 36.857802] RBP: 0000000000000016 R08: 000000000000c0ff R09: 000055c3084d6336
> > [ 36.857802] R10: 000055c307b108b0 R11: 0000000000000246 R12: 000055c307b13c80
> > [ 36.857802] R13: 000055c3084d6320 R14: 0000000000000000 R15: 00007fff6287b950
> > [ 36.857802] Code: 9f 64 84 e8 9c 61 fe ff b8 f4 ff ff ff e9 6b f9 ff
> > ff 48 c7 c7 40 6b 8d 84 89 45 a8 e8 43 84 18 00 49 8b be 00 03 00 00 8b
> > 45 a8 <83> 7f 38 02 74 08 e8 6e ec ff ff 8b 45 a8 49 c7 86 00 03 00 00
> > [ 36.857802] RIP: _request_firmware+0xa27/0xad0 RSP: ffffbb15c0bcbd10
> > [ 36.857802] CR2: 0000000000000038
> > [ 36.872685] ---[ end trace 6d94ac339c133e6f ]---
> >
> > In above case the call hierarchy that causes the crash looks as follows:
> >
> > lib/test_firmware.c request_firmware()
> > -> fw_load_from_user_helper()
> > -> _request_firmware_load()
> > -> call fw_state_wait_timeout()
> >
> > Some time later firmware_loading_store() scans a control value of "-1"
> > -> switch(loading) case -1: will call
> > -> fw_load_abort(fw_priv) which calls
> > -> __fw_load_abort(fw_priv->buf)
> > -> and set fw_priv->buf = NULL;
> >
> > Upon being woken up via swake_up(), back in _request_firmware_load()
> > fw_state_wait_timeout() returns -ENOENT
> > -> since mentioned commit
> > -> fw_load_abort(fw_priv) is called a second time
> > -> and this time it would call:
> > -> __fw_load_abort(NULL /* fw_priv->buf */)
> > -> and we get: NULL->fw_st.status
> >
> > Fixes: 5d47ec02c37e ("firmware: Correct handling of fw_state_wait() return value")
> > Reported-and-Tested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> > Reported-and-Tested-by: Patrick Bruenn <p.bruenn@beckhoff.com>
> > Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
> > CC: <stable@vger.kernel.org> [3.10+]
Note: 3.10+
> > Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
> > ---
> > drivers/base/firmware_class.c | 5 +----
> > 1 file changed, 1 insertion(+), 4 deletions(-)
>
> Why is this patch 7/7?
Without the tests available on a development tree one cannot easily
reproduce.
> Shouldn't it go into 4.10-final now? Why wait
> for 4.11-rc1?
Certainly, it should go into 4.10 now, sorry if it seemed otherwise.
Luis
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 7/7] firmware: firmware: fix NULL pointer dereference in __fw_load_abort()
2017-01-25 13:36 ` Luis R. Rodriguez
@ 2017-01-25 13:42 ` Luis R. Rodriguez
2017-01-25 14:41 ` Greg KH
0 siblings, 1 reply; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-25 13:42 UTC (permalink / raw)
To: Luis R. Rodriguez
Cc: Greg KH, ming.lei, keescook, linux-kernel-dev, jakub.kicinski,
chris, oss-drivers, johannes, j, teg, kay, jwboyer,
dmitry.torokhov, seth.forshee, bjorn.andersson, linux-kernel,
wagi, stephen.boyd, zohar, tiwai, dwmw2, fengguang.wu, dhowells,
arend.vanspriel, kvalo, [3.10+]
On Wed, Jan 25, 2017 at 02:36:31PM +0100, Luis R. Rodriguez wrote:
> On Wed, Jan 25, 2017 at 11:52:04AM +0100, Greg KH wrote:
> > On Mon, Jan 23, 2017 at 08:11:11AM -0800, Luis R. Rodriguez wrote:
> > > Since commit 5d47ec02c37ea632398cb251c884e3a488dff794
> > > ("firmware: Correct handling of fw_state_wait() return value")
> > > fw_load_abort(fw_priv) could be called twice and lead us to a
> > > kernel crash. This happens only when the firmware fallback mechanism
> > > (regular or custom) is used. The fallback mechanism exposes a sysfs
> > > interface for userspace to upload a file and notify the kernel when
> > > the file is loaded and ready, or to cancel an upload by echo'ing -1
> > > into on the loading file:
> > >
> > > echo -n "-1" > /sys/$DEVPATH/loading
> > >
> > > This will call fw_load_abort(). Some distributions actually have
> > > a udev rule in place to *always* immediately cancel all firmware
> > > fallback mechanism requests (Debian, OpenSUSE), they have:
I made a typo here, OpenSUSE in fact does not carry this. Its so far
only Debian I am aware of.
> > > $ cat /lib/udev/rules.d/50-firmware.rules
> > > # stub for immediately telling the kernel that userspace firmware loading
> > > # failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y
> > > SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1
Luis
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 7/7] firmware: firmware: fix NULL pointer dereference in __fw_load_abort()
2017-01-25 13:42 ` Luis R. Rodriguez
@ 2017-01-25 14:41 ` Greg KH
2017-01-25 15:21 ` [PATCH v2] " Luis R. Rodriguez
0 siblings, 1 reply; 26+ messages in thread
From: Greg KH @ 2017-01-25 14:41 UTC (permalink / raw)
To: Luis R. Rodriguez
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, [3.10+]
On Wed, Jan 25, 2017 at 02:42:50PM +0100, Luis R. Rodriguez wrote:
> On Wed, Jan 25, 2017 at 02:36:31PM +0100, Luis R. Rodriguez wrote:
> > On Wed, Jan 25, 2017 at 11:52:04AM +0100, Greg KH wrote:
> > > On Mon, Jan 23, 2017 at 08:11:11AM -0800, Luis R. Rodriguez wrote:
> > > > Since commit 5d47ec02c37ea632398cb251c884e3a488dff794
> > > > ("firmware: Correct handling of fw_state_wait() return value")
> > > > fw_load_abort(fw_priv) could be called twice and lead us to a
> > > > kernel crash. This happens only when the firmware fallback mechanism
> > > > (regular or custom) is used. The fallback mechanism exposes a sysfs
> > > > interface for userspace to upload a file and notify the kernel when
> > > > the file is loaded and ready, or to cancel an upload by echo'ing -1
> > > > into on the loading file:
> > > >
> > > > echo -n "-1" > /sys/$DEVPATH/loading
> > > >
> > > > This will call fw_load_abort(). Some distributions actually have
> > > > a udev rule in place to *always* immediately cancel all firmware
> > > > fallback mechanism requests (Debian, OpenSUSE), they have:
>
> I made a typo here, OpenSUSE in fact does not carry this. Its so far
> only Debian I am aware of.
>
> > > > $ cat /lib/udev/rules.d/50-firmware.rules
> > > > # stub for immediately telling the kernel that userspace firmware loading
> > > > # failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y
> > > > SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1
Please resend this as a single patch, to be applied to 4.10-final, with
the above fix, and I will be glad to take it. Never mix patches in a
series that are for -final and for -next.
thanks,
greg k-h
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v2] firmware: fix NULL pointer dereference in __fw_load_abort()
2017-01-25 14:41 ` Greg KH
@ 2017-01-25 15:21 ` Luis R. Rodriguez
2017-01-25 15:47 ` Greg KH
0 siblings, 1 reply; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-25 15:21 UTC (permalink / raw)
To: gregkh
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, kimran, Luis R. Rodriguez, [3.10+]
Since commit 5d47ec02c37ea632398cb251c884e3a488dff794
("firmware: Correct handling of fw_state_wait() return value")
fw_load_abort(fw_priv) could be called twice and lead us to a
kernel crash. This happens only when the firmware fallback mechanism
(regular or custom) is used. The fallback mechanism exposes a sysfs
interface for userspace to upload a file and notify the kernel when
the file is loaded and ready, or to cancel an upload by echo'ing -1
into on the loading file:
echo -n "-1" > /sys/$DEVPATH/loading
This will call fw_load_abort(). Some distributions actually have
a udev rule in place to *always* immediately cancel all firmware
fallback mechanism requests (Debian), they have:
$ cat /lib/udev/rules.d/50-firmware.rules
# stub for immediately telling the kernel that userspace firmware loading
# failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y
SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1
This was done since udev removed the firmware fallback mechanism a while ago
and a long standing misunderstood issues with the timeout (but now corrected).
Distributions with this udev rule would run into this crash only if the
fallback mechanism is used. Since most distributions disable by default
using the fallback mechanism (CONFIG_FW_LOADER_USER_HELPER_FALLBACK), this
would typicaly mean only 2 drivers which *require* the fallback mechanism
could typically incur a crash: drivers/firmware/dell_rbu.c and the
drivers/leds/leds-lp55xx-common.c driver.
The crash happens because after commit 5b029624948d ("firmware: do not
use fw_lock for fw_state protection") and subsequent fix commit
5d47ec02c37ea6 ("firmware: Correct handling of fw_state_wait() return
value") a race can happen between this cancelation and the firmware
fw_state_wait_timeout() being woken up after a state change with which
fw_load_abort() as that calls swake_up(). Upon error fw_state_wait_timeout()
will also again call fw_load_abort() and trigger a null reference.
At first glance we could just fix this with a !buf check on
fw_load_abort() before accessing buf->fw_st, however there is
a logical issue in having a state machine used for the fallback
mechanism and preventing access from it once we abort as its inside
the buf (buf->fw_st).
The firmware_class.c code is setting the buf to NULL to annotate an
abort has occurred. Replace this mechanism by simply using the state check
instead. All the other code in place already uses similar checks
for aborting as well so no further changes are needed.
An oops can be reproduced with the new fw_fallback.sh fallback
mechanism cancellation test. Either cancelling the fallback mechanism
or the custom fallback mechanism triggers a crash.
mcgrof@piggy ~/linux-next/tools/testing/selftests/firmware
(git::20170111-fw-fixes)$ sudo ./fw_fallback.sh
./fw_fallback.sh: timeout works
./fw_fallback.sh: firmware comparison works
./fw_fallback.sh: fallback mechanism works
[ this then sits here when it is trying the cancellation test ]
Kernel log:
test_firmware: loading 'nope-test-firmware.bin'
misc test_firmware: Direct firmware load for nope-test-firmware.bin failed with error -2
misc test_firmware: Falling back to user helper
BUG: unable to handle kernel NULL pointer dereference at 0000000000000038
IP: _request_firmware+0xa27/0xad0
PGD 0
Oops: 0000 [#1] SMP
Modules linked in: test_firmware(E) ... etc ...
CPU: 1 PID: 1396 Comm: fw_fallback.sh Tainted: G W E 4.10.0-rc3-next-20170111+ #30
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
task: ffff9740b27f4340 task.stack: ffffbb15c0bc8000
RIP: 0010:_request_firmware+0xa27/0xad0
RSP: 0018:ffffbb15c0bcbd10 EFLAGS: 00010246
RAX: 00000000fffffffe RBX: ffff9740afe5aa80 RCX: 0000000000000000
RDX: ffff9740b27f4340 RSI: 0000000000000283 RDI: 0000000000000000
RBP: ffffbb15c0bcbd90 R08: ffffbb15c0bcbcd8 R09: 0000000000000000
R10: 0000000894a0d4b1 R11: 000000000000008c R12: ffffffffc0312480
R13: 0000000000000005 R14: ffff9740b1c32400 R15: 00000000000003e8
FS: 00007f8604422700(0000) GS:ffff9740bfc80000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000038 CR3: 000000012164c000 CR4: 00000000000006e0
Call Trace:
request_firmware+0x37/0x50
trigger_request_store+0x79/0xd0 [test_firmware]
dev_attr_store+0x18/0x30
sysfs_kf_write+0x37/0x40
kernfs_fop_write+0x110/0x1a0
__vfs_write+0x37/0x160
? _cond_resched+0x1a/0x50
vfs_write+0xb5/0x1a0
SyS_write+0x55/0xc0
? trace_do_page_fault+0x37/0xd0
entry_SYSCALL_64_fastpath+0x1e/0xad
RIP: 0033:0x7f8603f49620
RSP: 002b:00007fff6287b788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 000055c307b110a0 RCX: 00007f8603f49620
RDX: 0000000000000016 RSI: 000055c3084d8a90 RDI: 0000000000000001
RBP: 0000000000000016 R08: 000000000000c0ff R09: 000055c3084d6336
R10: 000055c307b108b0 R11: 0000000000000246 R12: 000055c307b13c80
R13: 000055c3084d6320 R14: 0000000000000000 R15: 00007fff6287b950
Code: 9f 64 84 e8 9c 61 fe ff b8 f4 ff ff ff e9 6b f9 ff
ff 48 c7 c7 40 6b 8d 84 89 45 a8 e8 43 84 18 00 49 8b be 00 03 00 00 8b
45 a8 <83> 7f 38 02 74 08 e8 6e ec ff ff 8b 45 a8 49 c7 86 00 03 00 00
RIP: _request_firmware+0xa27/0xad0 RSP: ffffbb15c0bcbd10
CR2: 0000000000000038
---[ end trace 6d94ac339c133e6f ]---
In above case the call hierarchy that causes the crash looks as follows:
lib/test_firmware.c request_firmware()
-> fw_load_from_user_helper()
-> _request_firmware_load()
-> call fw_state_wait_timeout()
Some time later firmware_loading_store() scans a control value of "-1"
-> switch(loading) case -1: will call
-> fw_load_abort(fw_priv) which calls
-> __fw_load_abort(fw_priv->buf)
-> and set fw_priv->buf = NULL;
Upon being woken up via swake_up(), back in _request_firmware_load()
fw_state_wait_timeout() returns -ENOENT
-> since mentioned commit
-> fw_load_abort(fw_priv) is called a second time
-> and this time it would call:
-> __fw_load_abort(NULL /* fw_priv->buf */)
-> and we get: NULL->fw_st.status
Fixes: 5d47ec02c37e ("firmware: Correct handling of fw_state_wait() return value")
Reported-and-Tested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reported-and-Tested-by: Patrick Bruenn <p.bruenn@beckhoff.com>
Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
CC: <stable@vger.kernel.org> [3.10+]
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
---
v2: adjust commit log to remove the OpenSUSE reference as it doesn't
carry the same udev rule which cancels all fallback requests.
drivers/base/firmware_class.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 4497d263209f..ac350c518e0c 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -558,9 +558,6 @@ static void fw_load_abort(struct firmware_priv *fw_priv)
struct firmware_buf *buf = fw_priv->buf;
__fw_load_abort(buf);
-
- /* avoid user action after loading abort */
- fw_priv->buf = NULL;
}
static LIST_HEAD(pending_fw_head);
@@ -713,7 +710,7 @@ static ssize_t firmware_loading_store(struct device *dev,
mutex_lock(&fw_lock);
fw_buf = fw_priv->buf;
- if (!fw_buf)
+ if (fw_state_is_aborted(&fw_buf->fw_st))
goto out;
switch (loading) {
--
2.11.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH v2] firmware: fix NULL pointer dereference in __fw_load_abort()
2017-01-25 15:21 ` [PATCH v2] " Luis R. Rodriguez
@ 2017-01-25 15:47 ` Greg KH
2017-01-25 18:31 ` Luis R. Rodriguez
2017-01-25 18:31 ` [PATCH v3] " Luis R. Rodriguez
0 siblings, 2 replies; 26+ messages in thread
From: Greg KH @ 2017-01-25 15:47 UTC (permalink / raw)
To: Luis R. Rodriguez
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, kimran, [3.10+]
On Wed, Jan 25, 2017 at 07:21:18AM -0800, Luis R. Rodriguez wrote:
> Since commit 5d47ec02c37ea632398cb251c884e3a488dff794
> ("firmware: Correct handling of fw_state_wait() return value")
> fw_load_abort(fw_priv) could be called twice and lead us to a
> kernel crash. This happens only when the firmware fallback mechanism
> (regular or custom) is used. The fallback mechanism exposes a sysfs
> interface for userspace to upload a file and notify the kernel when
> the file is loaded and ready, or to cancel an upload by echo'ing -1
> into on the loading file:
>
> echo -n "-1" > /sys/$DEVPATH/loading
>
> This will call fw_load_abort(). Some distributions actually have
> a udev rule in place to *always* immediately cancel all firmware
> fallback mechanism requests (Debian), they have:
>
> $ cat /lib/udev/rules.d/50-firmware.rules
> # stub for immediately telling the kernel that userspace firmware loading
> # failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y
> SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1
>
> This was done since udev removed the firmware fallback mechanism a while ago
> and a long standing misunderstood issues with the timeout (but now corrected).
> Distributions with this udev rule would run into this crash only if the
> fallback mechanism is used. Since most distributions disable by default
> using the fallback mechanism (CONFIG_FW_LOADER_USER_HELPER_FALLBACK), this
> would typicaly mean only 2 drivers which *require* the fallback mechanism
> could typically incur a crash: drivers/firmware/dell_rbu.c and the
> drivers/leds/leds-lp55xx-common.c driver.
>
> The crash happens because after commit 5b029624948d ("firmware: do not
> use fw_lock for fw_state protection") and subsequent fix commit
> 5d47ec02c37ea6 ("firmware: Correct handling of fw_state_wait() return
> value") a race can happen between this cancelation and the firmware
> fw_state_wait_timeout() being woken up after a state change with which
> fw_load_abort() as that calls swake_up(). Upon error fw_state_wait_timeout()
> will also again call fw_load_abort() and trigger a null reference.
>
> At first glance we could just fix this with a !buf check on
> fw_load_abort() before accessing buf->fw_st, however there is
> a logical issue in having a state machine used for the fallback
> mechanism and preventing access from it once we abort as its inside
> the buf (buf->fw_st).
>
> The firmware_class.c code is setting the buf to NULL to annotate an
> abort has occurred. Replace this mechanism by simply using the state check
> instead. All the other code in place already uses similar checks
> for aborting as well so no further changes are needed.
>
> An oops can be reproduced with the new fw_fallback.sh fallback
> mechanism cancellation test. Either cancelling the fallback mechanism
> or the custom fallback mechanism triggers a crash.
You are still writing books here.
With crazy margins, pick one line width (72 columns), and stick with it
please.
Can you reformat this and resend please?
thanks,
greg k-h-
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2] firmware: fix NULL pointer dereference in __fw_load_abort()
2017-01-25 15:47 ` Greg KH
@ 2017-01-25 18:31 ` Luis R. Rodriguez
2017-01-25 18:31 ` [PATCH v3] " Luis R. Rodriguez
1 sibling, 0 replies; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-25 18:31 UTC (permalink / raw)
To: Greg KH
Cc: Luis R. Rodriguez, ming.lei, keescook, linux-kernel-dev,
jakub.kicinski, chris, oss-drivers, johannes, j, teg, kay,
jwboyer, dmitry.torokhov, seth.forshee, bjorn.andersson,
linux-kernel, wagi, stephen.boyd, zohar, tiwai, dwmw2,
fengguang.wu, dhowells, arend.vanspriel, kvalo, kimran, [3.10+]
On Wed, Jan 25, 2017 at 04:47:25PM +0100, Greg KH wrote:
> On Wed, Jan 25, 2017 at 07:21:18AM -0800, Luis R. Rodriguez wrote:
> > Since commit 5d47ec02c37ea632398cb251c884e3a488dff794
> > ("firmware: Correct handling of fw_state_wait() return value")
> > fw_load_abort(fw_priv) could be called twice and lead us to a
> > kernel crash. This happens only when the firmware fallback mechanism
> > (regular or custom) is used. The fallback mechanism exposes a sysfs
> > interface for userspace to upload a file and notify the kernel when
> > the file is loaded and ready, or to cancel an upload by echo'ing -1
> > into on the loading file:
> >
> > echo -n "-1" > /sys/$DEVPATH/loading
> >
> > This will call fw_load_abort(). Some distributions actually have
> > a udev rule in place to *always* immediately cancel all firmware
> > fallback mechanism requests (Debian), they have:
> >
> > $ cat /lib/udev/rules.d/50-firmware.rules
> > # stub for immediately telling the kernel that userspace firmware loading
> > # failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y
> > SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1
> >
> > This was done since udev removed the firmware fallback mechanism a while ago
> > and a long standing misunderstood issues with the timeout (but now corrected).
> > Distributions with this udev rule would run into this crash only if the
> > fallback mechanism is used. Since most distributions disable by default
> > using the fallback mechanism (CONFIG_FW_LOADER_USER_HELPER_FALLBACK), this
> > would typicaly mean only 2 drivers which *require* the fallback mechanism
> > could typically incur a crash: drivers/firmware/dell_rbu.c and the
> > drivers/leds/leds-lp55xx-common.c driver.
> >
> > The crash happens because after commit 5b029624948d ("firmware: do not
> > use fw_lock for fw_state protection") and subsequent fix commit
> > 5d47ec02c37ea6 ("firmware: Correct handling of fw_state_wait() return
> > value") a race can happen between this cancelation and the firmware
> > fw_state_wait_timeout() being woken up after a state change with which
> > fw_load_abort() as that calls swake_up(). Upon error fw_state_wait_timeout()
> > will also again call fw_load_abort() and trigger a null reference.
> >
> > At first glance we could just fix this with a !buf check on
> > fw_load_abort() before accessing buf->fw_st, however there is
> > a logical issue in having a state machine used for the fallback
> > mechanism and preventing access from it once we abort as its inside
> > the buf (buf->fw_st).
> >
> > The firmware_class.c code is setting the buf to NULL to annotate an
> > abort has occurred. Replace this mechanism by simply using the state check
> > instead. All the other code in place already uses similar checks
> > for aborting as well so no further changes are needed.
> >
> > An oops can be reproduced with the new fw_fallback.sh fallback
> > mechanism cancellation test. Either cancelling the fallback mechanism
> > or the custom fallback mechanism triggers a crash.
>
> You are still writing books here.
Alright trimmed.
> With crazy margins, pick one line width (72 columns), and stick with it
> please.
>
> Can you reformat this and resend please?
Sure.
Luis
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v3] firmware: fix NULL pointer dereference in __fw_load_abort()
2017-01-25 15:47 ` Greg KH
2017-01-25 18:31 ` Luis R. Rodriguez
@ 2017-01-25 18:31 ` Luis R. Rodriguez
1 sibling, 0 replies; 26+ messages in thread
From: Luis R. Rodriguez @ 2017-01-25 18:31 UTC (permalink / raw)
To: gregkh
Cc: ming.lei, keescook, linux-kernel-dev, jakub.kicinski, chris,
oss-drivers, johannes, j, teg, kay, jwboyer, dmitry.torokhov,
seth.forshee, bjorn.andersson, linux-kernel, wagi, stephen.boyd,
zohar, tiwai, dwmw2, fengguang.wu, dhowells, arend.vanspriel,
kvalo, kimran, Luis R. Rodriguez, [3.10+]
Since commit 5d47ec02c37ea6 ("firmware: Correct handling of
fw_state_wait() return value") fw_load_abort() could be called twice and
lead us to a kernel crash. This happens only when the firmware fallback
mechanism (regular or custom) is used. The fallback mechanism exposes a
sysfs interface for userspace to upload a file and notify the kernel
when the file is loaded and ready, or to cancel an upload by echo'ing -1
into on the loading file:
echo -n "-1" > /sys/$DEVPATH/loading
This will call fw_load_abort(). Some distributions actually have a udev
rule in place to *always* immediately cancel all firmware fallback
mechanism requests (Debian), they have:
$ cat /lib/udev/rules.d/50-firmware.rules
# stub for immediately telling the kernel that userspace firmware loading
# failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y
SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1
Distributions with this udev rule would run into this crash only if the
fallback mechanism is used. Since most distributions disable by default
using the fallback mechanism (CONFIG_FW_LOADER_USER_HELPER_FALLBACK),
this would typicaly mean only 2 drivers which *require* the fallback
mechanism could typically incur a crash: drivers/firmware/dell_rbu.c and
the drivers/leds/leds-lp55xx-common.c driver. Distributions enabling
CONFIG_FW_LOADER_USER_HELPER_FALLBACK by default are obviously more
exposed to this crash.
The crash happens because after commit 5b029624948d ("firmware: do not
use fw_lock for fw_state protection") and subsequent fix commit
5d47ec02c37ea6 ("firmware: Correct handling of fw_state_wait() return
value") a race can happen between this cancelation and the firmware
fw_state_wait_timeout() being woken up after a state change with which
fw_load_abort() as that calls swake_up(). Upon error
fw_state_wait_timeout() will also again call fw_load_abort() and trigger
a null reference.
At first glance we could just fix this with a !buf check on
fw_load_abort() before accessing buf->fw_st, however there is a logical
issue in having a state machine used for the fallback mechanism and
preventing access from it once we abort as its inside the buf
(buf->fw_st).
The firmware_class.c code is setting the buf to NULL to annotate an
abort has occurred. Replace this mechanism by simply using the state
check instead. All the other code in place already uses similar checks
for aborting as well so no further changes are needed.
An oops can be reproduced with the new fw_fallback.sh fallback mechanism
cancellation test. Either cancelling the fallback mechanism or the
custom fallback mechanism triggers a crash.
mcgrof@piggy ~/linux-next/tools/testing/selftests/firmware
(git::20170111-fw-fixes)$ sudo ./fw_fallback.sh
./fw_fallback.sh: timeout works
./fw_fallback.sh: firmware comparison works
./fw_fallback.sh: fallback mechanism works
[ this then sits here when it is trying the cancellation test ]
Kernel log:
test_firmware: loading 'nope-test-firmware.bin'
misc test_firmware: Direct firmware load for nope-test-firmware.bin failed with error -2
misc test_firmware: Falling back to user helper
BUG: unable to handle kernel NULL pointer dereference at 0000000000000038
IP: _request_firmware+0xa27/0xad0
PGD 0
Oops: 0000 [#1] SMP
Modules linked in: test_firmware(E) ... etc ...
CPU: 1 PID: 1396 Comm: fw_fallback.sh Tainted: G W E 4.10.0-rc3-next-20170111+ #30
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
task: ffff9740b27f4340 task.stack: ffffbb15c0bc8000
RIP: 0010:_request_firmware+0xa27/0xad0
RSP: 0018:ffffbb15c0bcbd10 EFLAGS: 00010246
RAX: 00000000fffffffe RBX: ffff9740afe5aa80 RCX: 0000000000000000
RDX: ffff9740b27f4340 RSI: 0000000000000283 RDI: 0000000000000000
RBP: ffffbb15c0bcbd90 R08: ffffbb15c0bcbcd8 R09: 0000000000000000
R10: 0000000894a0d4b1 R11: 000000000000008c R12: ffffffffc0312480
R13: 0000000000000005 R14: ffff9740b1c32400 R15: 00000000000003e8
FS: 00007f8604422700(0000) GS:ffff9740bfc80000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000038 CR3: 000000012164c000 CR4: 00000000000006e0
Call Trace:
request_firmware+0x37/0x50
trigger_request_store+0x79/0xd0 [test_firmware]
dev_attr_store+0x18/0x30
sysfs_kf_write+0x37/0x40
kernfs_fop_write+0x110/0x1a0
__vfs_write+0x37/0x160
? _cond_resched+0x1a/0x50
vfs_write+0xb5/0x1a0
SyS_write+0x55/0xc0
? trace_do_page_fault+0x37/0xd0
entry_SYSCALL_64_fastpath+0x1e/0xad
RIP: 0033:0x7f8603f49620
RSP: 002b:00007fff6287b788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 000055c307b110a0 RCX: 00007f8603f49620
RDX: 0000000000000016 RSI: 000055c3084d8a90 RDI: 0000000000000001
RBP: 0000000000000016 R08: 000000000000c0ff R09: 000055c3084d6336
R10: 000055c307b108b0 R11: 0000000000000246 R12: 000055c307b13c80
R13: 000055c3084d6320 R14: 0000000000000000 R15: 00007fff6287b950
Code: 9f 64 84 e8 9c 61 fe ff b8 f4 ff ff ff e9 6b f9 ff
ff 48 c7 c7 40 6b 8d 84 89 45 a8 e8 43 84 18 00 49 8b be 00 03 00 00 8b
45 a8 <83> 7f 38 02 74 08 e8 6e ec ff ff 8b 45 a8 49 c7 86 00 03 00 00
RIP: _request_firmware+0xa27/0xad0 RSP: ffffbb15c0bcbd10
CR2: 0000000000000038
---[ end trace 6d94ac339c133e6f ]---
Fixes: 5d47ec02c37e ("firmware: Correct handling of fw_state_wait() return value")
Reported-and-Tested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reported-and-Tested-by: Patrick Bruenn <p.bruenn@beckhoff.com>
Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
CC: <stable@vger.kernel.org> [3.10+]
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
---
v3: shorten commit log and stick to 72 characters for margins on
commit log
drivers/base/firmware_class.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 4497d263209f..ac350c518e0c 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -558,9 +558,6 @@ static void fw_load_abort(struct firmware_priv *fw_priv)
struct firmware_buf *buf = fw_priv->buf;
__fw_load_abort(buf);
-
- /* avoid user action after loading abort */
- fw_priv->buf = NULL;
}
static LIST_HEAD(pending_fw_head);
@@ -713,7 +710,7 @@ static ssize_t firmware_loading_store(struct device *dev,
mutex_lock(&fw_lock);
fw_buf = fw_priv->buf;
- if (!fw_buf)
+ if (fw_state_is_aborted(&fw_buf->fw_st))
goto out;
switch (loading) {
--
2.11.0
^ permalink raw reply related [flat|nested] 26+ messages in thread