* [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY()
@ 2019-06-09 15:24 Coly Li
2019-06-09 15:28 ` Coly Li
2019-06-09 18:28 ` Rolf Fokkens
0 siblings, 2 replies; 8+ messages in thread
From: Coly Li @ 2019-06-09 15:24 UTC (permalink / raw)
To: linux-bcache; +Cc: linux-block, Coly Li, Kent Overstreet, Nix
Recently people report bcache code compiled with gcc9 is broken, one of
the buggy behavior I observe is that two adjacent 4KB I/Os should merge
into one but they don't. Finally it turns out to be a stack corruption
caused by macro PRECEDING_KEY().
See how PRECEDING_KEY() is defined in bset.h,
437 #define PRECEDING_KEY(_k) \
438 ({ \
439 struct bkey *_ret = NULL; \
440 \
441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
443 \
444 if (!_ret->low) \
445 _ret->high--; \
446 _ret->low--; \
447 } \
448 \
449 _ret; \
450 })
At line 442, _ret points to address of a on-stack variable combined by
KEY(), the life range of this on-stack variable is in line 442-446,
once _ret is returned to bch_btree_insert_key(), the returned address
points to an invalid stack address and this address is overwritten in
the following called bch_btree_iter_init(). Then argument 'search' of
bch_btree_iter_init() points to some address inside stackframe of
bch_btree_iter_init(), exact address depends on how the compiler
allocates stack space. Now the stack is corrupted.
Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: Rolf Fokkens <rolf@rolffokkens.nl>
Reviewed-by: Pierre JUHEN <pierre.juhen@orange.fr>
Tested-by: Shenghui Wang <shhuiw@foxmail.com>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Nix <nix@esperi.org.uk>
---
Changlog:
V2: Fix a pointer assignment problem in preceding_key(), which is
pointed by Rolf Fokkens and Pierre JUHEN.
V1: Initial RFC patch for review and comment.
drivers/md/bcache/bset.c | 16 +++++++++++++---
drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
2 files changed, 33 insertions(+), 17 deletions(-)
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 8f07fa6e1739..268f1b685084 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
struct bset *i = bset_tree_last(b)->data;
struct bkey *m, *prev = NULL;
struct btree_iter iter;
+ struct bkey preceding_key_on_stack = ZERO_KEY;
+ struct bkey *preceding_key_p = &preceding_key_on_stack;
BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
- m = bch_btree_iter_init(b, &iter, b->ops->is_extents
- ? PRECEDING_KEY(&START_KEY(k))
- : PRECEDING_KEY(k));
+ /*
+ * If k has preceding key, preceding_key_p will be set to address
+ * of k's preceding key; otherwise preceding_key_p will be set
+ * to NULL inside preceding_key().
+ */
+ if (b->ops->is_extents)
+ preceding_key(&START_KEY(k), &preceding_key_p);
+ else
+ preceding_key(k, &preceding_key_p);
+
+ m = bch_btree_iter_init(b, &iter, preceding_key_p);
if (b->ops->insert_fixup(b, k, &iter, replace_key))
return status;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index bac76aabca6d..c71365e7c1fa 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
return __bch_cut_back(where, k);
}
-#define PRECEDING_KEY(_k) \
-({ \
- struct bkey *_ret = NULL; \
- \
- if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
- _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
- \
- if (!_ret->low) \
- _ret->high--; \
- _ret->low--; \
- } \
- \
- _ret; \
-})
+/*
+ * Pointer '*preceding_key_p' points to a memory object to store preceding
+ * key of k. If the preceding key does not exist, set '*preceding_key_p' to
+ * NULL. So the caller of preceding_key() needs to take care of memory
+ * which '*preceding_key_p' pointed to before calling preceding_key().
+ * Currently the only caller of preceding_key() is bch_btree_insert_key(),
+ * and it points to an on-stack variable, so the memory release is handled
+ * by stackframe itself.
+ */
+static inline void preceding_key(struct bkey *k, struct bkey **preceding_key_p)
+{
+ if (KEY_INODE(k) || KEY_OFFSET(k)) {
+ (**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
+ if (!(*preceding_key_p)->low)
+ (*preceding_key_p)->high--;
+ (*preceding_key_p)->low--;
+ } else {
+ (*preceding_key_p) = NULL;
+ }
+}
static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
{
--
2.16.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-09 15:24 [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY() Coly Li
@ 2019-06-09 15:28 ` Coly Li
2019-06-09 17:52 ` Pierre JUHEN
2019-06-09 18:28 ` Rolf Fokkens
1 sibling, 1 reply; 8+ messages in thread
From: Coly Li @ 2019-06-09 15:28 UTC (permalink / raw)
To: linux-bcache, Rolf Fokkens, Pierre JUHEN
Cc: linux-block, Kent Overstreet, Nix
On 2019/6/9 11:24 下午, Coly Li wrote:
> Recently people report bcache code compiled with gcc9 is broken, one of
> the buggy behavior I observe is that two adjacent 4KB I/Os should merge
> into one but they don't. Finally it turns out to be a stack corruption
> caused by macro PRECEDING_KEY().
>
> See how PRECEDING_KEY() is defined in bset.h,
> 437 #define PRECEDING_KEY(_k) \
> 438 ({ \
> 439 struct bkey *_ret = NULL; \
> 440 \
> 441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
> 442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
> 443 \
> 444 if (!_ret->low) \
> 445 _ret->high--; \
> 446 _ret->low--; \
> 447 } \
> 448 \
> 449 _ret; \
> 450 })
>
> At line 442, _ret points to address of a on-stack variable combined by
> KEY(), the life range of this on-stack variable is in line 442-446,
> once _ret is returned to bch_btree_insert_key(), the returned address
> points to an invalid stack address and this address is overwritten in
> the following called bch_btree_iter_init(). Then argument 'search' of
> bch_btree_iter_init() points to some address inside stackframe of
> bch_btree_iter_init(), exact address depends on how the compiler
> allocates stack space. Now the stack is corrupted.
>
> Signed-off-by: Coly Li <colyli@suse.de>
> Reviewed-by: Rolf Fokkens <rolf@rolffokkens.nl>
> Reviewed-by: Pierre JUHEN <pierre.juhen@orange.fr>
Hi Rolf and Pierre,
Oops, I am a little bit too hurry, just realize you don't offer
Reviewed-by: yet.
Could you like to offer a Reviewed-by: to this patch, then I may submit
to Jens in this run ASAP.
Many thanks of your code review and help !
Coly Li
> Tested-by: Shenghui Wang <shhuiw@foxmail.com>
> Cc: Kent Overstreet <kent.overstreet@gmail.com>
> Cc: Nix <nix@esperi.org.uk>
> ---
> Changlog:
> V2: Fix a pointer assignment problem in preceding_key(), which is
> pointed by Rolf Fokkens and Pierre JUHEN.
> V1: Initial RFC patch for review and comment.
>
> drivers/md/bcache/bset.c | 16 +++++++++++++---
> drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
> 2 files changed, 33 insertions(+), 17 deletions(-)
[snipped]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-09 15:28 ` Coly Li
@ 2019-06-09 17:52 ` Pierre JUHEN
2019-06-09 22:15 ` Coly Li
0 siblings, 1 reply; 8+ messages in thread
From: Pierre JUHEN @ 2019-06-09 17:52 UTC (permalink / raw)
To: Coly Li, linux-bcache, Rolf Fokkens; +Cc: linux-block, Kent Overstreet, Nix
I tested a patched bcache module. OK for me.
Le 09/06/2019 à 17:28, Coly Li a écrit :
> On 2019/6/9 11:24 下午, Coly Li wrote:
>> Recently people report bcache code compiled with gcc9 is broken, one of
>> the buggy behavior I observe is that two adjacent 4KB I/Os should merge
>> into one but they don't. Finally it turns out to be a stack corruption
>> caused by macro PRECEDING_KEY().
>>
>> See how PRECEDING_KEY() is defined in bset.h,
>> 437 #define PRECEDING_KEY(_k) \
>> 438 ({ \
>> 439 struct bkey *_ret = NULL; \
>> 440 \
>> 441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
>> 442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
>> 443 \
>> 444 if (!_ret->low) \
>> 445 _ret->high--; \
>> 446 _ret->low--; \
>> 447 } \
>> 448 \
>> 449 _ret; \
>> 450 })
>>
>> At line 442, _ret points to address of a on-stack variable combined by
>> KEY(), the life range of this on-stack variable is in line 442-446,
>> once _ret is returned to bch_btree_insert_key(), the returned address
>> points to an invalid stack address and this address is overwritten in
>> the following called bch_btree_iter_init(). Then argument 'search' of
>> bch_btree_iter_init() points to some address inside stackframe of
>> bch_btree_iter_init(), exact address depends on how the compiler
>> allocates stack space. Now the stack is corrupted.
>>
>> Signed-off-by: Coly Li <colyli@suse.de>
>> Reviewed-by: Rolf Fokkens <rolf@rolffokkens.nl>
>> Reviewed-by: Pierre JUHEN <pierre.juhen@orange.fr>
> Hi Rolf and Pierre,
>
> Oops, I am a little bit too hurry, just realize you don't offer
> Reviewed-by: yet.
>
> Could you like to offer a Reviewed-by: to this patch, then I may submit
> to Jens in this run ASAP.
>
> Many thanks of your code review and help !
>
> Coly Li
>
>
>> Tested-by: Shenghui Wang <shhuiw@foxmail.com>
>> Cc: Kent Overstreet <kent.overstreet@gmail.com>
>> Cc: Nix <nix@esperi.org.uk>
>> ---
>> Changlog:
>> V2: Fix a pointer assignment problem in preceding_key(), which is
>> pointed by Rolf Fokkens and Pierre JUHEN.
>> V1: Initial RFC patch for review and comment.
>>
>> drivers/md/bcache/bset.c | 16 +++++++++++++---
>> drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
>> 2 files changed, 33 insertions(+), 17 deletions(-)
> [snipped]
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-09 15:24 [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY() Coly Li
2019-06-09 15:28 ` Coly Li
@ 2019-06-09 18:28 ` Rolf Fokkens
2019-06-09 22:17 ` Coly Li
2019-06-10 7:00 ` Rolf Fokkens
1 sibling, 2 replies; 8+ messages in thread
From: Rolf Fokkens @ 2019-06-09 18:28 UTC (permalink / raw)
To: Coly Li, linux-bcache; +Cc: linux-block, Kent Overstreet, Nix
I haven't tested the fix (yet), but just looking at the code I'm
perfectly fine with the proposed replacement of the macro PRECEDING_KEY
by the preceding_key function.
I have some minor concerns about the efficiency of the amount of
indirections, but the gcc optimizer may take care of this. This is for
later concern anyway.
On 6/9/19 5:24 PM, Coly Li wrote:
> Recently people report bcache code compiled with gcc9 is broken, one of
> the buggy behavior I observe is that two adjacent 4KB I/Os should merge
> into one but they don't. Finally it turns out to be a stack corruption
> caused by macro PRECEDING_KEY().
>
> See how PRECEDING_KEY() is defined in bset.h,
> 437 #define PRECEDING_KEY(_k) \
> 438 ({ \
> 439 struct bkey *_ret = NULL; \
> 440 \
> 441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
> 442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
> 443 \
> 444 if (!_ret->low) \
> 445 _ret->high--; \
> 446 _ret->low--; \
> 447 } \
> 448 \
> 449 _ret; \
> 450 })
>
> At line 442, _ret points to address of a on-stack variable combined by
> KEY(), the life range of this on-stack variable is in line 442-446,
> once _ret is returned to bch_btree_insert_key(), the returned address
> points to an invalid stack address and this address is overwritten in
> the following called bch_btree_iter_init(). Then argument 'search' of
> bch_btree_iter_init() points to some address inside stackframe of
> bch_btree_iter_init(), exact address depends on how the compiler
> allocates stack space. Now the stack is corrupted.
>
> Signed-off-by: Coly Li <colyli@suse.de>
> Reviewed-by: Rolf Fokkens <rolf@rolffokkens.nl>
> Reviewed-by: Pierre JUHEN <pierre.juhen@orange.fr>
> Tested-by: Shenghui Wang <shhuiw@foxmail.com>
> Cc: Kent Overstreet <kent.overstreet@gmail.com>
> Cc: Nix <nix@esperi.org.uk>
> ---
> Changlog:
> V2: Fix a pointer assignment problem in preceding_key(), which is
> pointed by Rolf Fokkens and Pierre JUHEN.
> V1: Initial RFC patch for review and comment.
>
> drivers/md/bcache/bset.c | 16 +++++++++++++---
> drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
> 2 files changed, 33 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
> index 8f07fa6e1739..268f1b685084 100644
> --- a/drivers/md/bcache/bset.c
> +++ b/drivers/md/bcache/bset.c
> @@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
> struct bset *i = bset_tree_last(b)->data;
> struct bkey *m, *prev = NULL;
> struct btree_iter iter;
> + struct bkey preceding_key_on_stack = ZERO_KEY;
> + struct bkey *preceding_key_p = &preceding_key_on_stack;
>
> BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
>
> - m = bch_btree_iter_init(b, &iter, b->ops->is_extents
> - ? PRECEDING_KEY(&START_KEY(k))
> - : PRECEDING_KEY(k));
> + /*
> + * If k has preceding key, preceding_key_p will be set to address
> + * of k's preceding key; otherwise preceding_key_p will be set
> + * to NULL inside preceding_key().
> + */
> + if (b->ops->is_extents)
> + preceding_key(&START_KEY(k), &preceding_key_p);
> + else
> + preceding_key(k, &preceding_key_p);
> +
> + m = bch_btree_iter_init(b, &iter, preceding_key_p);
>
> if (b->ops->insert_fixup(b, k, &iter, replace_key))
> return status;
> diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
> index bac76aabca6d..c71365e7c1fa 100644
> --- a/drivers/md/bcache/bset.h
> +++ b/drivers/md/bcache/bset.h
> @@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
> return __bch_cut_back(where, k);
> }
>
> -#define PRECEDING_KEY(_k) \
> -({ \
> - struct bkey *_ret = NULL; \
> - \
> - if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
> - _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
> - \
> - if (!_ret->low) \
> - _ret->high--; \
> - _ret->low--; \
> - } \
> - \
> - _ret; \
> -})
> +/*
> + * Pointer '*preceding_key_p' points to a memory object to store preceding
> + * key of k. If the preceding key does not exist, set '*preceding_key_p' to
> + * NULL. So the caller of preceding_key() needs to take care of memory
> + * which '*preceding_key_p' pointed to before calling preceding_key().
> + * Currently the only caller of preceding_key() is bch_btree_insert_key(),
> + * and it points to an on-stack variable, so the memory release is handled
> + * by stackframe itself.
> + */
> +static inline void preceding_key(struct bkey *k, struct bkey **preceding_key_p)
> +{
> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
> + (**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
> + if (!(*preceding_key_p)->low)
> + (*preceding_key_p)->high--;
> + (*preceding_key_p)->low--;
> + } else {
> + (*preceding_key_p) = NULL;
> + }
> +}
>
> static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
> {
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-09 17:52 ` Pierre JUHEN
@ 2019-06-09 22:15 ` Coly Li
0 siblings, 0 replies; 8+ messages in thread
From: Coly Li @ 2019-06-09 22:15 UTC (permalink / raw)
To: Pierre JUHEN
Cc: linux-bcache, Rolf Fokkens, linux-block, Kent Overstreet, Nix
On 2019/6/10 1:52 上午, Pierre JUHEN wrote:
> I tested a patched bcache module. OK for me.
hi Pierre,
Cool, thank you!
Coly Li
>
> Le 09/06/2019 à 17:28, Coly Li a écrit :
>> On 2019/6/9 11:24 下午, Coly Li wrote:
>>> Recently people report bcache code compiled with gcc9 is broken, one of
>>> the buggy behavior I observe is that two adjacent 4KB I/Os should merge
>>> into one but they don't. Finally it turns out to be a stack corruption
>>> caused by macro PRECEDING_KEY().
>>>
>>> See how PRECEDING_KEY() is defined in bset.h,
>>> 437 #define PRECEDING_KEY(_k) \
>>> 438 ({ \
>>> 439 struct bkey *_ret = NULL; \
>>> 440 \
>>> 441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
>>> 442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
>>> 443 \
>>> 444 if (!_ret->low) \
>>> 445 _ret->high--; \
>>> 446 _ret->low--; \
>>> 447 } \
>>> 448 \
>>> 449 _ret; \
>>> 450 })
>>>
>>> At line 442, _ret points to address of a on-stack variable combined by
>>> KEY(), the life range of this on-stack variable is in line 442-446,
>>> once _ret is returned to bch_btree_insert_key(), the returned address
>>> points to an invalid stack address and this address is overwritten in
>>> the following called bch_btree_iter_init(). Then argument 'search' of
>>> bch_btree_iter_init() points to some address inside stackframe of
>>> bch_btree_iter_init(), exact address depends on how the compiler
>>> allocates stack space. Now the stack is corrupted.
>>>
>>> Signed-off-by: Coly Li <colyli@suse.de>
>>> Reviewed-by: Rolf Fokkens <rolf@rolffokkens.nl>
>>> Reviewed-by: Pierre JUHEN <pierre.juhen@orange.fr>
>> Hi Rolf and Pierre,
>>
>> Oops, I am a little bit too hurry, just realize you don't offer
>> Reviewed-by: yet.
>>
>> Could you like to offer a Reviewed-by: to this patch, then I may submit
>> to Jens in this run ASAP.
>>
>> Many thanks of your code review and help !
>>
>> Coly Li
>>
>>
>>> Tested-by: Shenghui Wang <shhuiw@foxmail.com>
>>> Cc: Kent Overstreet <kent.overstreet@gmail.com>
>>> Cc: Nix <nix@esperi.org.uk>
>>> ---
>>> Changlog:
>>> V2: Fix a pointer assignment problem in preceding_key(), which is
>>> pointed by Rolf Fokkens and Pierre JUHEN.
>>> V1: Initial RFC patch for review and comment.
>>>
>>> drivers/md/bcache/bset.c | 16 +++++++++++++---
>>> drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
>>> 2 files changed, 33 insertions(+), 17 deletions(-)
>> [snipped]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-09 18:28 ` Rolf Fokkens
@ 2019-06-09 22:17 ` Coly Li
2019-06-10 7:00 ` Rolf Fokkens
1 sibling, 0 replies; 8+ messages in thread
From: Coly Li @ 2019-06-09 22:17 UTC (permalink / raw)
To: Rolf Fokkens; +Cc: linux-bcache, linux-block, Kent Overstreet, Nix
On 2019/6/10 2:28 上午, Rolf Fokkens wrote:
> I haven't tested the fix (yet), but just looking at the code I'm
> perfectly fine with the proposed replacement of the macro PRECEDING_KEY
> by the preceding_key function.
>
> I have some minor concerns about the efficiency of the amount of
> indirections, but the gcc optimizer may take care of this. This is for
> later concern anyway.
>
Hi Rolf,
I see the point, if the indirected pointers became performance bottle
neck in future, let's fix it then.
Thank you for the help!
Coly Li
> On 6/9/19 5:24 PM, Coly Li wrote:
>> Recently people report bcache code compiled with gcc9 is broken, one of
>> the buggy behavior I observe is that two adjacent 4KB I/Os should merge
>> into one but they don't. Finally it turns out to be a stack corruption
>> caused by macro PRECEDING_KEY().
>>
>> See how PRECEDING_KEY() is defined in bset.h,
>> 437 #define PRECEDING_KEY(_k) \
>> 438 ({ \
>> 439 struct bkey *_ret = NULL; \
>> 440 \
>> 441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
>> 442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
>> 443 \
>> 444 if (!_ret->low) \
>> 445 _ret->high--; \
>> 446 _ret->low--; \
>> 447 } \
>> 448 \
>> 449 _ret; \
>> 450 })
>>
>> At line 442, _ret points to address of a on-stack variable combined by
>> KEY(), the life range of this on-stack variable is in line 442-446,
>> once _ret is returned to bch_btree_insert_key(), the returned address
>> points to an invalid stack address and this address is overwritten in
>> the following called bch_btree_iter_init(). Then argument 'search' of
>> bch_btree_iter_init() points to some address inside stackframe of
>> bch_btree_iter_init(), exact address depends on how the compiler
>> allocates stack space. Now the stack is corrupted.
>>
>> Signed-off-by: Coly Li <colyli@suse.de>
>> Reviewed-by: Rolf Fokkens <rolf@rolffokkens.nl>
>> Reviewed-by: Pierre JUHEN <pierre.juhen@orange.fr>
>> Tested-by: Shenghui Wang <shhuiw@foxmail.com>
>> Cc: Kent Overstreet <kent.overstreet@gmail.com>
>> Cc: Nix <nix@esperi.org.uk>
>> ---
>> Changlog:
>> V2: Fix a pointer assignment problem in preceding_key(), which is
>> pointed by Rolf Fokkens and Pierre JUHEN.
>> V1: Initial RFC patch for review and comment.
>>
>> drivers/md/bcache/bset.c | 16 +++++++++++++---
>> drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
>> 2 files changed, 33 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
>> index 8f07fa6e1739..268f1b685084 100644
>> --- a/drivers/md/bcache/bset.c
>> +++ b/drivers/md/bcache/bset.c
>> @@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct
>> btree_keys *b, struct bkey *k,
>> struct bset *i = bset_tree_last(b)->data;
>> struct bkey *m, *prev = NULL;
>> struct btree_iter iter;
>> + struct bkey preceding_key_on_stack = ZERO_KEY;
>> + struct bkey *preceding_key_p = &preceding_key_on_stack;
>> BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
>> - m = bch_btree_iter_init(b, &iter, b->ops->is_extents
>> - ? PRECEDING_KEY(&START_KEY(k))
>> - : PRECEDING_KEY(k));
>> + /*
>> + * If k has preceding key, preceding_key_p will be set to address
>> + * of k's preceding key; otherwise preceding_key_p will be set
>> + * to NULL inside preceding_key().
>> + */
>> + if (b->ops->is_extents)
>> + preceding_key(&START_KEY(k), &preceding_key_p);
>> + else
>> + preceding_key(k, &preceding_key_p);
>> +
>> + m = bch_btree_iter_init(b, &iter, preceding_key_p);
>> if (b->ops->insert_fixup(b, k, &iter, replace_key))
>> return status;
>> diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
>> index bac76aabca6d..c71365e7c1fa 100644
>> --- a/drivers/md/bcache/bset.h
>> +++ b/drivers/md/bcache/bset.h
>> @@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct
>> bkey *where, struct bkey *k)
>> return __bch_cut_back(where, k);
>> }
>> -#define PRECEDING_KEY(_k) \
>> -({ \
>> - struct bkey *_ret = NULL; \
>> - \
>> - if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
>> - _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
>> - \
>> - if (!_ret->low) \
>> - _ret->high--; \
>> - _ret->low--; \
>> - } \
>> - \
>> - _ret; \
>> -})
>> +/*
>> + * Pointer '*preceding_key_p' points to a memory object to store
>> preceding
>> + * key of k. If the preceding key does not exist, set
>> '*preceding_key_p' to
>> + * NULL. So the caller of preceding_key() needs to take care of memory
>> + * which '*preceding_key_p' pointed to before calling preceding_key().
>> + * Currently the only caller of preceding_key() is
>> bch_btree_insert_key(),
>> + * and it points to an on-stack variable, so the memory release is
>> handled
>> + * by stackframe itself.
>> + */
>> +static inline void preceding_key(struct bkey *k, struct bkey
>> **preceding_key_p)
>> +{
>> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
>> + (**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
>> + if (!(*preceding_key_p)->low)
>> + (*preceding_key_p)->high--;
>> + (*preceding_key_p)->low--;
>> + } else {
>> + (*preceding_key_p) = NULL;
>> + }
>> +}
>> static inline bool bch_ptr_invalid(struct btree_keys *b, const
>> struct bkey *k)
>> {
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-09 18:28 ` Rolf Fokkens
2019-06-09 22:17 ` Coly Li
@ 2019-06-10 7:00 ` Rolf Fokkens
2019-06-10 8:11 ` Coly Li
1 sibling, 1 reply; 8+ messages in thread
From: Rolf Fokkens @ 2019-06-10 7:00 UTC (permalink / raw)
To: Coly Li, linux-bcache; +Cc: linux-block, Kent Overstreet, Nix
Did some testing, and I should not have underestimated the gcc
optimizer. The inline function seems like a fine alternative for the macro.
On 6/9/19 8:28 PM, Rolf Fokkens wrote:
> I haven't tested the fix (yet), but just looking at the code I'm
> perfectly fine with the proposed replacement of the macro
> PRECEDING_KEY by the preceding_key function.
>
> I have some minor concerns about the efficiency of the amount of
> indirections, but the gcc optimizer may take care of this. This is for
> later concern anyway.
>
> On 6/9/19 5:24 PM, Coly Li wrote:
>> Recently people report bcache code compiled with gcc9 is broken, one of
>> the buggy behavior I observe is that two adjacent 4KB I/Os should merge
>> into one but they don't. Finally it turns out to be a stack corruption
>> caused by macro PRECEDING_KEY().
>>
>> See how PRECEDING_KEY() is defined in bset.h,
>> 437 #define PRECEDING_KEY(_k) \
>> 438 ({ \
>> 439 struct bkey *_ret = NULL; \
>> 440 \
>> 441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
>> 442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
>> 443 \
>> 444 if (!_ret->low) \
>> 445 _ret->high--; \
>> 446 _ret->low--; \
>> 447 } \
>> 448 \
>> 449 _ret; \
>> 450 })
>>
>> At line 442, _ret points to address of a on-stack variable combined by
>> KEY(), the life range of this on-stack variable is in line 442-446,
>> once _ret is returned to bch_btree_insert_key(), the returned address
>> points to an invalid stack address and this address is overwritten in
>> the following called bch_btree_iter_init(). Then argument 'search' of
>> bch_btree_iter_init() points to some address inside stackframe of
>> bch_btree_iter_init(), exact address depends on how the compiler
>> allocates stack space. Now the stack is corrupted.
>>
>> Signed-off-by: Coly Li <colyli@suse.de>
>> Reviewed-by: Rolf Fokkens <rolf@rolffokkens.nl>
>> Reviewed-by: Pierre JUHEN <pierre.juhen@orange.fr>
>> Tested-by: Shenghui Wang <shhuiw@foxmail.com>
>> Cc: Kent Overstreet <kent.overstreet@gmail.com>
>> Cc: Nix <nix@esperi.org.uk>
>> ---
>> Changlog:
>> V2: Fix a pointer assignment problem in preceding_key(), which is
>> pointed by Rolf Fokkens and Pierre JUHEN.
>> V1: Initial RFC patch for review and comment.
>>
>> drivers/md/bcache/bset.c | 16 +++++++++++++---
>> drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
>> 2 files changed, 33 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
>> index 8f07fa6e1739..268f1b685084 100644
>> --- a/drivers/md/bcache/bset.c
>> +++ b/drivers/md/bcache/bset.c
>> @@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct
>> btree_keys *b, struct bkey *k,
>> struct bset *i = bset_tree_last(b)->data;
>> struct bkey *m, *prev = NULL;
>> struct btree_iter iter;
>> + struct bkey preceding_key_on_stack = ZERO_KEY;
>> + struct bkey *preceding_key_p = &preceding_key_on_stack;
>> BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
>> - m = bch_btree_iter_init(b, &iter, b->ops->is_extents
>> - ? PRECEDING_KEY(&START_KEY(k))
>> - : PRECEDING_KEY(k));
>> + /*
>> + * If k has preceding key, preceding_key_p will be set to address
>> + * of k's preceding key; otherwise preceding_key_p will be set
>> + * to NULL inside preceding_key().
>> + */
>> + if (b->ops->is_extents)
>> + preceding_key(&START_KEY(k), &preceding_key_p);
>> + else
>> + preceding_key(k, &preceding_key_p);
>> +
>> + m = bch_btree_iter_init(b, &iter, preceding_key_p);
>> if (b->ops->insert_fixup(b, k, &iter, replace_key))
>> return status;
>> diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
>> index bac76aabca6d..c71365e7c1fa 100644
>> --- a/drivers/md/bcache/bset.h
>> +++ b/drivers/md/bcache/bset.h
>> @@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct
>> bkey *where, struct bkey *k)
>> return __bch_cut_back(where, k);
>> }
>> -#define PRECEDING_KEY(_k) \
>> -({ \
>> - struct bkey *_ret = NULL; \
>> - \
>> - if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
>> - _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
>> - \
>> - if (!_ret->low) \
>> - _ret->high--; \
>> - _ret->low--; \
>> - } \
>> - \
>> - _ret; \
>> -})
>> +/*
>> + * Pointer '*preceding_key_p' points to a memory object to store
>> preceding
>> + * key of k. If the preceding key does not exist, set
>> '*preceding_key_p' to
>> + * NULL. So the caller of preceding_key() needs to take care of memory
>> + * which '*preceding_key_p' pointed to before calling preceding_key().
>> + * Currently the only caller of preceding_key() is
>> bch_btree_insert_key(),
>> + * and it points to an on-stack variable, so the memory release is
>> handled
>> + * by stackframe itself.
>> + */
>> +static inline void preceding_key(struct bkey *k, struct bkey
>> **preceding_key_p)
>> +{
>> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
>> + (**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
>> + if (!(*preceding_key_p)->low)
>> + (*preceding_key_p)->high--;
>> + (*preceding_key_p)->low--;
>> + } else {
>> + (*preceding_key_p) = NULL;
>> + }
>> +}
>> static inline bool bch_ptr_invalid(struct btree_keys *b, const
>> struct bkey *k)
>> {
>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-10 7:00 ` Rolf Fokkens
@ 2019-06-10 8:11 ` Coly Li
0 siblings, 0 replies; 8+ messages in thread
From: Coly Li @ 2019-06-10 8:11 UTC (permalink / raw)
To: Rolf Fokkens, linux-bcache; +Cc: linux-block, Kent Overstreet, Nix
On 2019/6/10 3:00 下午, Rolf Fokkens wrote:
> Did some testing, and I should not have underestimated the gcc
> optimizer. The inline function seems like a fine alternative for the macro.
>
Hi Rolf,
Thanks for the confirmation! I do appreciate all of your help from bug
report, information sharing, code review, and fix verification :-)
Coly Li
> On 6/9/19 8:28 PM, Rolf Fokkens wrote:
>> I haven't tested the fix (yet), but just looking at the code I'm
>> perfectly fine with the proposed replacement of the macro
>> PRECEDING_KEY by the preceding_key function.
>>
>> I have some minor concerns about the efficiency of the amount of
>> indirections, but the gcc optimizer may take care of this. This is for
>> later concern anyway.
>>
>> On 6/9/19 5:24 PM, Coly Li wrote:
>>> Recently people report bcache code compiled with gcc9 is broken, one of
>>> the buggy behavior I observe is that two adjacent 4KB I/Os should merge
>>> into one but they don't. Finally it turns out to be a stack corruption
>>> caused by macro PRECEDING_KEY().
>>>
>>> See how PRECEDING_KEY() is defined in bset.h,
>>> 437 #define PRECEDING_KEY(_k) \
>>> 438 ({ \
>>> 439 struct bkey *_ret = NULL; \
>>> 440 \
>>> 441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
>>> 442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
>>> 443 \
>>> 444 if (!_ret->low) \
>>> 445 _ret->high--; \
>>> 446 _ret->low--; \
>>> 447 } \
>>> 448 \
>>> 449 _ret; \
>>> 450 })
>>>
>>> At line 442, _ret points to address of a on-stack variable combined by
>>> KEY(), the life range of this on-stack variable is in line 442-446,
>>> once _ret is returned to bch_btree_insert_key(), the returned address
>>> points to an invalid stack address and this address is overwritten in
>>> the following called bch_btree_iter_init(). Then argument 'search' of
>>> bch_btree_iter_init() points to some address inside stackframe of
>>> bch_btree_iter_init(), exact address depends on how the compiler
>>> allocates stack space. Now the stack is corrupted.
>>>
>>> Signed-off-by: Coly Li <colyli@suse.de>
>>> Reviewed-by: Rolf Fokkens <rolf@rolffokkens.nl>
>>> Reviewed-by: Pierre JUHEN <pierre.juhen@orange.fr>
>>> Tested-by: Shenghui Wang <shhuiw@foxmail.com>
>>> Cc: Kent Overstreet <kent.overstreet@gmail.com>
>>> Cc: Nix <nix@esperi.org.uk>
>>> ---
>>> Changlog:
>>> V2: Fix a pointer assignment problem in preceding_key(), which is
>>> pointed by Rolf Fokkens and Pierre JUHEN.
>>> V1: Initial RFC patch for review and comment.
>>>
>>> drivers/md/bcache/bset.c | 16 +++++++++++++---
>>> drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
>>> 2 files changed, 33 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
>>> index 8f07fa6e1739..268f1b685084 100644
>>> --- a/drivers/md/bcache/bset.c
>>> +++ b/drivers/md/bcache/bset.c
>>> @@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct
>>> btree_keys *b, struct bkey *k,
>>> struct bset *i = bset_tree_last(b)->data;
>>> struct bkey *m, *prev = NULL;
>>> struct btree_iter iter;
>>> + struct bkey preceding_key_on_stack = ZERO_KEY;
>>> + struct bkey *preceding_key_p = &preceding_key_on_stack;
>>> BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
>>> - m = bch_btree_iter_init(b, &iter, b->ops->is_extents
>>> - ? PRECEDING_KEY(&START_KEY(k))
>>> - : PRECEDING_KEY(k));
>>> + /*
>>> + * If k has preceding key, preceding_key_p will be set to address
>>> + * of k's preceding key; otherwise preceding_key_p will be set
>>> + * to NULL inside preceding_key().
>>> + */
>>> + if (b->ops->is_extents)
>>> + preceding_key(&START_KEY(k), &preceding_key_p);
>>> + else
>>> + preceding_key(k, &preceding_key_p);
>>> +
>>> + m = bch_btree_iter_init(b, &iter, preceding_key_p);
>>> if (b->ops->insert_fixup(b, k, &iter, replace_key))
>>> return status;
>>> diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
>>> index bac76aabca6d..c71365e7c1fa 100644
>>> --- a/drivers/md/bcache/bset.h
>>> +++ b/drivers/md/bcache/bset.h
>>> @@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct
>>> bkey *where, struct bkey *k)
>>> return __bch_cut_back(where, k);
>>> }
>>> -#define PRECEDING_KEY(_k) \
>>> -({ \
>>> - struct bkey *_ret = NULL; \
>>> - \
>>> - if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
>>> - _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
>>> - \
>>> - if (!_ret->low) \
>>> - _ret->high--; \
>>> - _ret->low--; \
>>> - } \
>>> - \
>>> - _ret; \
>>> -})
>>> +/*
>>> + * Pointer '*preceding_key_p' points to a memory object to store
>>> preceding
>>> + * key of k. If the preceding key does not exist, set
>>> '*preceding_key_p' to
>>> + * NULL. So the caller of preceding_key() needs to take care of memory
>>> + * which '*preceding_key_p' pointed to before calling preceding_key().
>>> + * Currently the only caller of preceding_key() is
>>> bch_btree_insert_key(),
>>> + * and it points to an on-stack variable, so the memory release is
>>> handled
>>> + * by stackframe itself.
>>> + */
>>> +static inline void preceding_key(struct bkey *k, struct bkey
>>> **preceding_key_p)
>>> +{
>>> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
>>> + (**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
>>> + if (!(*preceding_key_p)->low)
>>> + (*preceding_key_p)->high--;
>>> + (*preceding_key_p)->low--;
>>> + } else {
>>> + (*preceding_key_p) = NULL;
>>> + }
>>> +}
>>> static inline bool bch_ptr_invalid(struct btree_keys *b, const
>>> struct bkey *k)
>>> {
>>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2019-06-10 8:11 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-09 15:24 [PATCH V2] bcache: fix stack corruption by PRECEDING_KEY() Coly Li
2019-06-09 15:28 ` Coly Li
2019-06-09 17:52 ` Pierre JUHEN
2019-06-09 22:15 ` Coly Li
2019-06-09 18:28 ` Rolf Fokkens
2019-06-09 22:17 ` Coly Li
2019-06-10 7:00 ` Rolf Fokkens
2019-06-10 8:11 ` Coly Li
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).