* [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY()
@ 2019-06-08 10:22 Coly Li
2019-06-08 18:50 ` Rolf Fokkens
2019-06-09 9:21 ` Coly Li
0 siblings, 2 replies; 9+ messages in thread
From: Coly Li @ 2019-06-08 10:22 UTC (permalink / raw)
To: linux-bcache
Cc: Coly Li, Kent Overstreet, Rolf Fokkens, Nix, Pierre JUHEN, linux-block
Recently people report bcache code compiled with gcc9 is broken, one of
the buggy behavior I observe is that two adjacent 4KB I/Os should merge
into one but they don't. Finally it turns out to be a stack corruption
caused by macro PRECEDING_KEY().
See how PRECEDING_KEY() is defined in bset.h,
437 #define PRECEDING_KEY(_k) \
438 ({ \
439 struct bkey *_ret = NULL; \
440 \
441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
443 \
444 if (!_ret->low) \
445 _ret->high--; \
446 _ret->low--; \
447 } \
448 \
449 _ret; \
450 })
At line 442, _ret points to address of a on-stack variable combined by
KEY(), the life range of this on-stack variable is in line 442-446,
once _ret is returned to bch_btree_insert_key(), the returned address
points to an invalid stack address and this adress is overwritten in
the following called bch_btree_iter_init(). Then argument 'search' of
bch_btree_iter_init() points to some address inside stackframe of
bch_btree_iter_init(), exact address depends on how the compiler
allocates stack space. Now the stack is corrupted.
The fix is to avoid to allocate and return an on-stack variable only
in range of PRECEDING_KEY(). This patch changes macro PRECEDING_KEY()
to an inline function, and allocate another on-stack variable from
function bch_btree_insert_key(), then the allocated memory address
will be always valid in life range of bch_btree_insert_key().
NOTE: This is only a RFC patch for more people to test. During my
test I find bcache code does not complain out-of-order bkeys in btree
node anymore, but the adjacent keys still don't totally merge as
expected (e.g. they should be merged into one single key). So now I
still continue to check what needs to be fixed more.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Rolf Fokkens <rolf@rolffokkens.nl>
Cc: Nix <nix@esperi.org.uk>
Cc: Pierre JUHEN <pierre.juhen@orange.fr>
Cc: linux-bcache@vger.kernel.org
Cc: linux-block@vger.kernel.org
---
drivers/md/bcache/bset.c | 16 +++++++++++++---
drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
2 files changed, 33 insertions(+), 17 deletions(-)
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 8f07fa6e1739..9422f3f1c682 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
struct bset *i = bset_tree_last(b)->data;
struct bkey *m, *prev = NULL;
struct btree_iter iter;
+ struct bkey preceding_key_on_stack = ZERO_KEY;
+ struct bkey *preceding_key_p = &preceding_key_on_stack;
BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
- m = bch_btree_iter_init(b, &iter, b->ops->is_extents
- ? PRECEDING_KEY(&START_KEY(k))
- : PRECEDING_KEY(k));
+ /*
+ * If k has preceding key, preceding_key_p will be set to address
+ * of k's preceding key; otherwise preceding_key_p will be set
+ * to NULL inside preceding_key().
+ */
+ if (b->ops->is_extents)
+ preceding_key(&START_KEY(k), preceding_key_p);
+ else
+ preceding_key(k, preceding_key_p);
+
+ m = bch_btree_iter_init(b, &iter, preceding_key_p);
if (b->ops->insert_fixup(b, k, &iter, replace_key))
return status;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index bac76aabca6d..6ab165dcb717 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
return __bch_cut_back(where, k);
}
-#define PRECEDING_KEY(_k) \
-({ \
- struct bkey *_ret = NULL; \
- \
- if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
- _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
- \
- if (!_ret->low) \
- _ret->high--; \
- _ret->low--; \
- } \
- \
- _ret; \
-})
+/*
+ * Pointer preceding_key_p points to a memory object to store preceding
+ * key of k. If the preceding key does not exist, set preceding_key_p to
+ * NULL. So the caller of preceding_key() needs to take care of memory
+ * which preceding_key_p pointed to before calling preceding_key().
+ * Currently the only caller of preceding_key() is bch_btree_insert_key(),
+ * and preceding_key_p points to an on-stack variable, so the memory
+ * release is handled by stackframe itself.
+ */
+static inline void preceding_key(struct bkey *k, struct bkey *preceding_key_p)
+{
+ if (KEY_INODE(k) || KEY_OFFSET(k)) {
+ *preceding_key_p = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
+ if (!preceding_key_p->low)
+ preceding_key_p->high--;
+ preceding_key_p->low--;
+ } else {
+ preceding_key_p = NULL;
+ }
+}
static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
{
--
2.16.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-08 10:22 [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY() Coly Li
@ 2019-06-08 18:50 ` Rolf Fokkens
2019-06-08 21:52 ` Pierre JUHEN
2019-06-09 0:59 ` Coly Li
2019-06-09 9:21 ` Coly Li
1 sibling, 2 replies; 9+ messages in thread
From: Rolf Fokkens @ 2019-06-08 18:50 UTC (permalink / raw)
To: Coly Li, linux-bcache; +Cc: Kent Overstreet, Nix, Pierre JUHEN, linux-block
On 6/8/19 12:22 PM, Coly Li wrote:
> +static inline void preceding_key(struct bkey *k, struct bkey *preceding_key_p)
> +{
> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
> + *preceding_key_p = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
> + if (!preceding_key_p->low)
> + preceding_key_p->high--;
> + preceding_key_p->low--;
> + } else {
> + preceding_key_p = NULL;
If I'm correct, the line above has no net effect, it just changes a
local variable (parameter) with no effect elsewhere. So the else part
may be left out, or do you mean this?
*preceding_key_p = ZERO_KEY;
> + }
> +}
>
> static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
> {
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-08 18:50 ` Rolf Fokkens
@ 2019-06-08 21:52 ` Pierre JUHEN
2019-06-09 0:59 ` Coly Li
1 sibling, 0 replies; 9+ messages in thread
From: Pierre JUHEN @ 2019-06-08 21:52 UTC (permalink / raw)
To: Rolf Fokkens, Coly Li; +Cc: Kent Overstreet, Nix, linux-block
Hi Coly, Rolf,
I think Rolf is right, with his second option (*preceding_key_p = ZERO_KEY;)
Regards,
Le 08/06/2019 à 20:50, Rolf Fokkens a écrit :
> On 6/8/19 12:22 PM, Coly Li wrote:
>> +static inline void preceding_key(struct bkey *k, struct bkey
>> *preceding_key_p)
>> +{
>> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
>> + *preceding_key_p = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
>> + if (!preceding_key_p->low)
>> + preceding_key_p->high--;
>> + preceding_key_p->low--;
>> + } else {
>> + preceding_key_p = NULL;
>
> If I'm correct, the line above has no net effect, it just changes a
> local variable (parameter) with no effect elsewhere. So the else part
> may be left out, or do you mean this?
>
> *preceding_key_p = ZERO_KEY;
>
>> + }
>> +}
>> static inline bool bch_ptr_invalid(struct btree_keys *b, const
>> struct bkey *k)
>> {
>
>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-08 18:50 ` Rolf Fokkens
2019-06-08 21:52 ` Pierre JUHEN
@ 2019-06-09 0:59 ` Coly Li
2019-06-09 5:56 ` Pierre JUHEN
1 sibling, 1 reply; 9+ messages in thread
From: Coly Li @ 2019-06-09 0:59 UTC (permalink / raw)
To: Rolf Fokkens, linux-bcache
Cc: Kent Overstreet, Nix, Pierre JUHEN, linux-block
On 2019/6/9 2:50 上午, Rolf Fokkens wrote:
> On 6/8/19 12:22 PM, Coly Li wrote:
>> +static inline void preceding_key(struct bkey *k, struct bkey
>> *preceding_key_p)
>> +{
>> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
>> + *preceding_key_p = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
>> + if (!preceding_key_p->low)
>> + preceding_key_p->high--;
>> + preceding_key_p->low--;
>> + } else {
>> + preceding_key_p = NULL;
>
> If I'm correct, the line above has no net effect, it just changes a
> local variable (parameter) with no effect elsewhere. So the else part
> may be left out, or do you mean this?
>
> *preceding_key_p = ZERO_KEY;
>
Hi Rolf and Pierre,
Setting preceding_key_p to NULL is for the following
bch_btree_iter_init(). See the call chains
bch_btree_insert_key()->bch_btree_iter_init()->
__bch_btree_iter_init()->bch_bset_search()
preceding_key_p is parameter 'search' in bch_bset_search().
If it is NULL, t->data->start returns directly; if it is not NULL,
__bch_bset_search() is called.
Indeed *preceding_key_p = ZERO_KEY is unnecessary, just makes me
comfortable. The problem is PRECEDING_KEY() allocates an on-stack
variable, and this one is overlapped with stackframe of
bch_btree_iter_init(), and overwritten. Because this anonymous on-stack
variable is allocated inside PRECEDING_KEY(), not (and should not be)
protected by compiler.
So I add the new local variable preceding_key (and make preceding_key_p
points to it) explicitly on stack frame of bch_btree_insert_key(), which
will never be overlapped with stackframe of bch_btree_iter_init().
Thanks.
--
Coly Li
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-09 0:59 ` Coly Li
@ 2019-06-09 5:56 ` Pierre JUHEN
2019-06-09 8:23 ` Coly Li
0 siblings, 1 reply; 9+ messages in thread
From: Pierre JUHEN @ 2019-06-09 5:56 UTC (permalink / raw)
To: Coly Li, Rolf Fokkens; +Cc: Nix, linux-block
Le 09/06/2019 à 02:59, Coly Li a écrit :
> On 2019/6/9 2:50 上午, Rolf Fokkens wrote:
>> On 6/8/19 12:22 PM, Coly Li wrote:
>>> +static inline void preceding_key(struct bkey *k, struct bkey
>>> *preceding_key_p)
>>> +{
>>> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
>>> + *preceding_key_p = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
>>> + if (!preceding_key_p->low)
>>> + preceding_key_p->high--;
>>> + preceding_key_p->low--;
>>> + } else {
>>> + preceding_key_p = NULL;
>> If I'm correct, the line above has no net effect, it just changes a
>> local variable (parameter) with no effect elsewhere. So the else part
>> may be left out, or do you mean this?
>>
>> *preceding_key_p = ZERO_KEY;
>>
> Hi Rolf and Pierre,
>
> Setting preceding_key_p to NULL is for the following
> bch_btree_iter_init(). See the call chains
>
> bch_btree_insert_key()->bch_btree_iter_init()->
> __bch_btree_iter_init()->bch_bset_search()
>
> preceding_key_p is parameter 'search' in bch_bset_search().
> If it is NULL, t->data->start returns directly; if it is not NULL,
> __bch_bset_search() is called.
>
> Indeed *preceding_key_p = ZERO_KEY is unnecessary, just makes me
> comfortable. The problem is PRECEDING_KEY() allocates an on-stack
> variable, and this one is overlapped with stackframe of
> bch_btree_iter_init(), and overwritten. Because this anonymous on-stack
> variable is allocated inside PRECEDING_KEY(), not (and should not be)
> protected by compiler.
>
> So I add the new local variable preceding_key (and make preceding_key_p
> points to it) explicitly on stack frame of bch_btree_insert_key(), which
> will never be overlapped with stackframe of bch_btree_iter_init().
>
> Thanks.
HI,
so the right line should be :
*preceding_key_p = NULL;
because Rolf is right
preceding_key_p = NULL;
does change only the value of the calling parameter and exits, not the
value of the preceding key in the stack.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-09 5:56 ` Pierre JUHEN
@ 2019-06-09 8:23 ` Coly Li
0 siblings, 0 replies; 9+ messages in thread
From: Coly Li @ 2019-06-09 8:23 UTC (permalink / raw)
To: Pierre JUHEN, Rolf Fokkens; +Cc: Nix, linux-block
On 2019/6/9 1:56 下午, Pierre JUHEN wrote:
> Le 09/06/2019 à 02:59, Coly Li a écrit :
>> On 2019/6/9 2:50 上午, Rolf Fokkens wrote:
>>> On 6/8/19 12:22 PM, Coly Li wrote:
>>>> +static inline void preceding_key(struct bkey *k, struct bkey
>>>> *preceding_key_p)
>>>> +{
>>>> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
>>>> + *preceding_key_p = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
>>>> + if (!preceding_key_p->low)
>>>> + preceding_key_p->high--;
>>>> + preceding_key_p->low--;
>>>> + } else {
>>>> + preceding_key_p = NULL;
>>> If I'm correct, the line above has no net effect, it just changes a
>>> local variable (parameter) with no effect elsewhere. So the else part
>>> may be left out, or do you mean this?
>>>
>>> *preceding_key_p = ZERO_KEY;
>>>
>> Hi Rolf and Pierre,
>>
>> Setting preceding_key_p to NULL is for the following
>> bch_btree_iter_init(). See the call chains
>>
>> bch_btree_insert_key()->bch_btree_iter_init()->
>> __bch_btree_iter_init()->bch_bset_search()
>>
>> preceding_key_p is parameter 'search' in bch_bset_search().
>> If it is NULL, t->data->start returns directly; if it is not NULL,
>> __bch_bset_search() is called.
>>
>> Indeed *preceding_key_p = ZERO_KEY is unnecessary, just makes me
>> comfortable. The problem is PRECEDING_KEY() allocates an on-stack
>> variable, and this one is overlapped with stackframe of
>> bch_btree_iter_init(), and overwritten. Because this anonymous on-stack
>> variable is allocated inside PRECEDING_KEY(), not (and should not be)
>> protected by compiler.
>>
>> So I add the new local variable preceding_key (and make preceding_key_p
>> points to it) explicitly on stack frame of bch_btree_insert_key(), which
>> will never be overlapped with stackframe of bch_btree_iter_init().
>>
>> Thanks.
>
> HI,
>
>
> so the right line should be :
>
> *preceding_key_p = NULL;
>
> because Rolf is right
>
> preceding_key_p = NULL;
>
> does change only the value of the calling parameter and exits, not the
> value of the preceding key in the stack.
Hmm, can you talk more specific to the code ? I don't catch what you
mean .... Thanks.
Coly Li
--
Coly Li
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-08 10:22 [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY() Coly Li
2019-06-08 18:50 ` Rolf Fokkens
@ 2019-06-09 9:21 ` Coly Li
2019-06-09 10:46 ` Pierre JUHEN
1 sibling, 1 reply; 9+ messages in thread
From: Coly Li @ 2019-06-09 9:21 UTC (permalink / raw)
To: linux-bcache
Cc: Kent Overstreet, Rolf Fokkens, Nix, Pierre JUHEN, linux-block
On 2019/6/8 6:22 下午, Coly Li wrote:
> Recently people report bcache code compiled with gcc9 is broken, one of
> the buggy behavior I observe is that two adjacent 4KB I/Os should merge
> into one but they don't. Finally it turns out to be a stack corruption
> caused by macro PRECEDING_KEY().
>
> See how PRECEDING_KEY() is defined in bset.h,
> 437 #define PRECEDING_KEY(_k) \
> 438 ({ \
> 439 struct bkey *_ret = NULL; \
> 440 \
> 441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
> 442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
> 443 \
> 444 if (!_ret->low) \
> 445 _ret->high--; \
> 446 _ret->low--; \
> 447 } \
> 448 \
> 449 _ret; \
> 450 })
>
> At line 442, _ret points to address of a on-stack variable combined by
> KEY(), the life range of this on-stack variable is in line 442-446,
> once _ret is returned to bch_btree_insert_key(), the returned address
> points to an invalid stack address and this adress is overwritten in
> the following called bch_btree_iter_init(). Then argument 'search' of
> bch_btree_iter_init() points to some address inside stackframe of
> bch_btree_iter_init(), exact address depends on how the compiler
> allocates stack space. Now the stack is corrupted.
>
> The fix is to avoid to allocate and return an on-stack variable only
> in range of PRECEDING_KEY(). This patch changes macro PRECEDING_KEY()
> to an inline function, and allocate another on-stack variable from
> function bch_btree_insert_key(), then the allocated memory address
> will be always valid in life range of bch_btree_insert_key().
>
> NOTE: This is only a RFC patch for more people to test. During my
> test I find bcache code does not complain out-of-order bkeys in btree
> node anymore, but the adjacent keys still don't totally merge as
> expected (e.g. they should be merged into one single key). So now I
> still continue to check what needs to be fixed more.
>
Hi folks,
After more testing, I realize the cached bkeys are not always merged,
this is the bkey dump information from
/sys/kernel/debug/bcache/bcache-87adfbc4-0b11-45b9-9a11-a11cfe5df2eb,
0:16 len 120 -> [0:377856 gen 1] dirty
0:136 len 8 -> [0:377976 gen 1] dirty
0:144 len 896 -> [0:721000 gen 1]
0:4112 len 8 -> [0:393136 gen 1]
So the patched bcache code is behaving correctly, IMHO no more fix
necessary.
I see Shenghui tested and verified the fix, more testing or review
comments are welcome.
Thanks.
Coly Li
> Signed-off-by: Coly Li <colyli@suse.de>
> Cc: Kent Overstreet <kent.overstreet@gmail.com>
> Cc: Rolf Fokkens <rolf@rolffokkens.nl>
> Cc: Nix <nix@esperi.org.uk>
> Cc: Pierre JUHEN <pierre.juhen@orange.fr>
> Cc: linux-bcache@vger.kernel.org
> Cc: linux-block@vger.kernel.org
> ---
> drivers/md/bcache/bset.c | 16 +++++++++++++---
> drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
> 2 files changed, 33 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
> index 8f07fa6e1739..9422f3f1c682 100644
> --- a/drivers/md/bcache/bset.c
> +++ b/drivers/md/bcache/bset.c
> @@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
> struct bset *i = bset_tree_last(b)->data;
> struct bkey *m, *prev = NULL;
> struct btree_iter iter;
> + struct bkey preceding_key_on_stack = ZERO_KEY;
> + struct bkey *preceding_key_p = &preceding_key_on_stack;
>
> BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
>
> - m = bch_btree_iter_init(b, &iter, b->ops->is_extents
> - ? PRECEDING_KEY(&START_KEY(k))
> - : PRECEDING_KEY(k));
> + /*
> + * If k has preceding key, preceding_key_p will be set to address
> + * of k's preceding key; otherwise preceding_key_p will be set
> + * to NULL inside preceding_key().
> + */
> + if (b->ops->is_extents)
> + preceding_key(&START_KEY(k), preceding_key_p);
> + else
> + preceding_key(k, preceding_key_p);
> +
> + m = bch_btree_iter_init(b, &iter, preceding_key_p);
>
> if (b->ops->insert_fixup(b, k, &iter, replace_key))
> return status;
> diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
> index bac76aabca6d..6ab165dcb717 100644
> --- a/drivers/md/bcache/bset.h
> +++ b/drivers/md/bcache/bset.h
> @@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
> return __bch_cut_back(where, k);
> }
>
> -#define PRECEDING_KEY(_k) \
> -({ \
> - struct bkey *_ret = NULL; \
> - \
> - if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
> - _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
> - \
> - if (!_ret->low) \
> - _ret->high--; \
> - _ret->low--; \
> - } \
> - \
> - _ret; \
> -})
> +/*
> + * Pointer preceding_key_p points to a memory object to store preceding
> + * key of k. If the preceding key does not exist, set preceding_key_p to
> + * NULL. So the caller of preceding_key() needs to take care of memory
> + * which preceding_key_p pointed to before calling preceding_key().
> + * Currently the only caller of preceding_key() is bch_btree_insert_key(),
> + * and preceding_key_p points to an on-stack variable, so the memory
> + * release is handled by stackframe itself.
> + */
> +static inline void preceding_key(struct bkey *k, struct bkey *preceding_key_p)
> +{
> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
> + *preceding_key_p = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
> + if (!preceding_key_p->low)
> + preceding_key_p->high--;
> + preceding_key_p->low--;
> + } else {
> + preceding_key_p = NULL;
> + }
> +}
>
> static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
> {
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-09 9:21 ` Coly Li
@ 2019-06-09 10:46 ` Pierre JUHEN
2019-06-09 12:16 ` Coly Li
0 siblings, 1 reply; 9+ messages in thread
From: Pierre JUHEN @ 2019-06-09 10:46 UTC (permalink / raw)
To: Coly Li, linux-bcache; +Cc: Rolf Fokkens, Nix, linux-block
Hi Coly,
As Rolf and I said, the value of preceding_key_p in the stack cannot be set to NULL by your code.
The modified patch hereafter does what you expect (I think).
Regards,
drivers/md/bcache/bset.c | 16 +++++++++++++---
drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
2 files changed, 33 insertions(+), 17 deletions(-)
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 8f07fa6e1739..9422f3f1c682 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
struct bset *i = bset_tree_last(b)->data;
struct bkey *m, *prev = NULL;
struct btree_iter iter;
+ struct bkey preceding_key_on_stack = ZERO_KEY;
+ struct bkey *preceding_key_p = &preceding_key_on_stack;
BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
- m = bch_btree_iter_init(b, &iter, b->ops->is_extents
- ? PRECEDING_KEY(&START_KEY(k))
- : PRECEDING_KEY(k));
+ /*
+ * If k has preceding key, preceding_key_p will be set to address
+ * of k's preceding key; otherwise preceding_key_p will be set
+ * to NULL inside preceding_key().
+ */
+ if (b->ops->is_extents)
+ preceding_key_p = preceding_key(&START_KEY(k), preceding_key_p);
+ else
+ preceding_key_p = preceding_key(k, preceding_key_p);
+
+ m = bch_btree_iter_init(b, &iter, preceding_key_p);
if (b->ops->insert_fixup(b, k, &iter, replace_key))
return status;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index bac76aabca6d..6ab165dcb717 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
return __bch_cut_back(where, k);
}
-#define PRECEDING_KEY(_k) \
-({ \
- struct bkey *_ret = NULL; \
- \
- if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
- _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
- \
- if (!_ret->low) \
- _ret->high--; \
- _ret->low--; \
- } \
- \
- _ret; \
-})
+/*
+ * Pointer preceding_key_p points to a memory object to store preceding
+ * key of k. If the preceding key does not exist, set preceding_key_p to
+ * NULL. So the caller of preceding_key() needs to take care of memory
+ * which preceding_key_p pointed to before calling preceding_key().
+ * Currently the only caller of preceding_key() is bch_btree_insert_key(),
+ * and preceding_key_p points to an on-stack variable, so the memory
+ * release is handled by stackframe itself.
+ */
+static inline struct bkey *preceding_key(struct bkey *k, struct bkey *preceding_key_p)
+{
+ if (KEY_INODE(k) || KEY_OFFSET(k)) {
+ *preceding_key_p = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
+ if (!preceding_key_p->low)
+ preceding_key_p->high--;
+ preceding_key_p->low--;
+ return (preceding_key_p);
+ } else {
+ return(NULL);
+ }
+}
static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
{
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY()
2019-06-09 10:46 ` Pierre JUHEN
@ 2019-06-09 12:16 ` Coly Li
0 siblings, 0 replies; 9+ messages in thread
From: Coly Li @ 2019-06-09 12:16 UTC (permalink / raw)
To: Pierre JUHEN, linux-bcache; +Cc: Rolf Fokkens, Nix, linux-block
On 2019/6/9 6:46 下午, Pierre JUHEN wrote:
> Hi Coly,
>
> As Rolf and I said, the value of preceding_key_p in the stack cannot be
> set to NULL by your code.
>
> The modified patch hereafter does what you expect (I think).
>
Oh, I understand now. Yes you are right, I made a mistake in previous
patch. I will post an update version which uses "struct bkey
**preceding_key_p" as parameter of preceding_key().
And I will add Reviewed-by: tag to you (Pierre and Rolf) in update
version. Thanks for your review!
Coly Li
>
>
> drivers/md/bcache/bset.c | 16 +++++++++++++---
> drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
> 2 files changed, 33 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
> index 8f07fa6e1739..9422f3f1c682 100644
> --- a/drivers/md/bcache/bset.c
> +++ b/drivers/md/bcache/bset.c
> @@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct
> btree_keys *b, struct bkey *k,
> struct bset *i = bset_tree_last(b)->data;
> struct bkey *m, *prev = NULL;
> struct btree_iter iter;
> + struct bkey preceding_key_on_stack = ZERO_KEY;
> + struct bkey *preceding_key_p = &preceding_key_on_stack;
>
> BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
>
> - m = bch_btree_iter_init(b, &iter, b->ops->is_extents
> - ? PRECEDING_KEY(&START_KEY(k))
> - : PRECEDING_KEY(k));
> + /*
> + * If k has preceding key, preceding_key_p will be set to address
> + * of k's preceding key; otherwise preceding_key_p will be set
> + * to NULL inside preceding_key().
> + */
> + if (b->ops->is_extents)
> + preceding_key_p = preceding_key(&START_KEY(k), preceding_key_p);
> + else
> + preceding_key_p = preceding_key(k, preceding_key_p);
> +
> + m = bch_btree_iter_init(b, &iter, preceding_key_p);
>
> if (b->ops->insert_fixup(b, k, &iter, replace_key))
> return status;
> diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
> index bac76aabca6d..6ab165dcb717 100644
> --- a/drivers/md/bcache/bset.h
> +++ b/drivers/md/bcache/bset.h
> @@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct bkey
> *where, struct bkey *k)
> return __bch_cut_back(where, k);
> }
>
> -#define PRECEDING_KEY(_k) \
> -({ \
> - struct bkey *_ret = NULL; \
> - \
> - if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
> - _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
> - \
> - if (!_ret->low) \
> - _ret->high--; \
> - _ret->low--; \
> - } \
> - \
> - _ret; \
> -})
> +/*
> + * Pointer preceding_key_p points to a memory object to store preceding
> + * key of k. If the preceding key does not exist, set preceding_key_p to
> + * NULL. So the caller of preceding_key() needs to take care of memory
> + * which preceding_key_p pointed to before calling preceding_key().
> + * Currently the only caller of preceding_key() is bch_btree_insert_key(),
> + * and preceding_key_p points to an on-stack variable, so the memory
> + * release is handled by stackframe itself.
> + */
> +static inline struct bkey *preceding_key(struct bkey *k, struct bkey
> *preceding_key_p)
> +{
> + if (KEY_INODE(k) || KEY_OFFSET(k)) {
> + *preceding_key_p = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
> + if (!preceding_key_p->low)
> + preceding_key_p->high--;
> + preceding_key_p->low--;
> + return (preceding_key_p);
> + } else {
> + return(NULL);
> + }
> +}
>
> static inline bool bch_ptr_invalid(struct btree_keys *b, const struct
> bkey *k)
> {
>
>
--
Coly Li
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2019-06-09 12:16 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-08 10:22 [RFC PATCH] bcache: fix stack corruption by PRECEDING_KEY() Coly Li
2019-06-08 18:50 ` Rolf Fokkens
2019-06-08 21:52 ` Pierre JUHEN
2019-06-09 0:59 ` Coly Li
2019-06-09 5:56 ` Pierre JUHEN
2019-06-09 8:23 ` Coly Li
2019-06-09 9:21 ` Coly Li
2019-06-09 10:46 ` Pierre JUHEN
2019-06-09 12:16 ` Coly Li
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).