* [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer
@ 2023-05-05 12:32 Zhang Yi
2023-05-05 12:32 ` [PATCH v2 2/3] jbd2: remove t_checkpoint_io_list Zhang Yi
` (3 more replies)
0 siblings, 4 replies; 7+ messages in thread
From: Zhang Yi @ 2023-05-05 12:32 UTC (permalink / raw)
To: linux-ext4
Cc: tytso, adilger.kernel, jack, yi.zhang, yi.zhang, yukuai3, chengzhihao1
From: Zhang Yi <yi.zhang@huawei.com>
There is a long-standing metadata corruption issue that happens from
time to time, but it's very difficult to reproduce and analyse, benefit
from the JBD2_CYCLE_RECORD option, we found out that the problem is the
checkpointing process miss to write out some buffers which are raced by
another do_get_write_access(). Looks below for detail.
jbd2_log_do_checkpoint() //transaction X
//buffer A is dirty and not belones to any transaction
__buffer_relink_io() //move it to the IO list
__flush_batch()
write_dirty_buffer()
do_get_write_access()
clear_buffer_dirty
__jbd2_journal_file_buffer()
//add buffer A to a new transaction Y
lock_buffer(bh)
//doesn't write out
__jbd2_journal_remove_checkpoint()
//finish checkpoint except buffer A
//filesystem corrupt if the new transaction Y isn't fully write out.
Due to the t_checkpoint_list walking loop in jbd2_log_do_checkpoint()
have already handles waiting for buffers under IO and re-added new
transaction to complete commit, and it also removing cleaned buffers,
this makes sure the list will eventually get empty. So it's fine to
leave buffers on the t_checkpoint_list while flushing out and completely
stop using the t_checkpoint_io_list.
Cc: stable@vger.kernel.org
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Tested-by: Zhihao Cheng <chengzhihao1@huawei.com>
---
v1->v2:
- Leave flushing checkpointing buffers on the t_checkpoint_list and
stop using t_checkpoint_io_list.
fs/jbd2/checkpoint.c | 94 +++++++++++---------------------------------
1 file changed, 23 insertions(+), 71 deletions(-)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 51bd38da21cd..ae1ebfb8bc86 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -57,28 +57,6 @@ static inline void __buffer_unlink(struct journal_head *jh)
}
}
-/*
- * Move a buffer from the checkpoint list to the checkpoint io list
- *
- * Called with j_list_lock held
- */
-static inline void __buffer_relink_io(struct journal_head *jh)
-{
- transaction_t *transaction = jh->b_cp_transaction;
-
- __buffer_unlink_first(jh);
-
- if (!transaction->t_checkpoint_io_list) {
- jh->b_cpnext = jh->b_cpprev = jh;
- } else {
- jh->b_cpnext = transaction->t_checkpoint_io_list;
- jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
- jh->b_cpprev->b_cpnext = jh;
- jh->b_cpnext->b_cpprev = jh;
- }
- transaction->t_checkpoint_io_list = jh;
-}
-
/*
* Check a checkpoint buffer could be release or not.
*
@@ -183,6 +161,7 @@ __flush_batch(journal_t *journal, int *batch_count)
struct buffer_head *bh = journal->j_chkpt_bhs[i];
BUFFER_TRACE(bh, "brelse");
__brelse(bh);
+ journal->j_chkpt_bhs[i] = NULL;
}
*batch_count = 0;
}
@@ -242,6 +221,11 @@ int jbd2_log_do_checkpoint(journal_t *journal)
jh = transaction->t_checkpoint_list;
bh = jh2bh(jh);
+ /*
+ * The buffer may be writing back, or flushing out in the
+ * last couple of cycles, or re-adding into a new transaction,
+ * need to check it again until it's unlocked.
+ */
if (buffer_locked(bh)) {
get_bh(bh);
spin_unlock(&journal->j_list_lock);
@@ -290,25 +274,25 @@ int jbd2_log_do_checkpoint(journal_t *journal)
if (__jbd2_journal_remove_checkpoint(jh))
/* The transaction was released; we're done */
goto out;
- continue;
+ } else {
+ /*
+ * We are about to write the buffer, it could be
+ * raced by some other transaction shrink or buffer
+ * re-log logic once we release the j_list_lock,
+ * leave it on the checkpoint list and check status
+ * again to make sure it's clean.
+ */
+ BUFFER_TRACE(bh, "queue");
+ get_bh(bh);
+ J_ASSERT_BH(bh, !buffer_jwrite(bh));
+ journal->j_chkpt_bhs[batch_count++] = bh;
+ transaction->t_chp_stats.cs_written++;
+ transaction->t_checkpoint_list = jh->b_cpnext;
}
- /*
- * Important: we are about to write the buffer, and
- * possibly block, while still holding the journal
- * lock. We cannot afford to let the transaction
- * logic start messing around with this buffer before
- * we write it to disk, as that would break
- * recoverability.
- */
- BUFFER_TRACE(bh, "queue");
- get_bh(bh);
- J_ASSERT_BH(bh, !buffer_jwrite(bh));
- journal->j_chkpt_bhs[batch_count++] = bh;
- __buffer_relink_io(jh);
- transaction->t_chp_stats.cs_written++;
+
if ((batch_count == JBD2_NR_BATCH) ||
- need_resched() ||
- spin_needbreak(&journal->j_list_lock))
+ need_resched() || spin_needbreak(&journal->j_list_lock) ||
+ jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
goto unlock_and_flush;
}
@@ -322,38 +306,6 @@ int jbd2_log_do_checkpoint(journal_t *journal)
goto restart;
}
- /*
- * Now we issued all of the transaction's buffers, let's deal
- * with the buffers that are out for I/O.
- */
-restart2:
- /* Did somebody clean up the transaction in the meanwhile? */
- if (journal->j_checkpoint_transactions != transaction ||
- transaction->t_tid != this_tid)
- goto out;
-
- while (transaction->t_checkpoint_io_list) {
- jh = transaction->t_checkpoint_io_list;
- bh = jh2bh(jh);
- if (buffer_locked(bh)) {
- get_bh(bh);
- spin_unlock(&journal->j_list_lock);
- wait_on_buffer(bh);
- /* the journal_head may have gone by now */
- BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
- spin_lock(&journal->j_list_lock);
- goto restart2;
- }
-
- /*
- * Now in whatever state the buffer currently is, we
- * know that it has been written out and so we can
- * drop it from the list
- */
- if (__jbd2_journal_remove_checkpoint(jh))
- break;
- }
out:
spin_unlock(&journal->j_list_lock);
result = jbd2_cleanup_journal_tail(journal);
--
2.31.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2 2/3] jbd2: remove t_checkpoint_io_list
2023-05-05 12:32 [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer Zhang Yi
@ 2023-05-05 12:32 ` Zhang Yi
2023-05-05 13:13 ` Jan Kara
2023-05-05 12:32 ` [PATCH v2 3/3] jbd2: remove released parameter in journal_shrink_one_cp_list() Zhang Yi
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Zhang Yi @ 2023-05-05 12:32 UTC (permalink / raw)
To: linux-ext4
Cc: tytso, adilger.kernel, jack, yi.zhang, yi.zhang, yukuai3, chengzhihao1
From: Zhang Yi <yi.zhang@huawei.com>
Since t_checkpoint_io_list was stop using in jbd2_log_do_checkpoint()
now, it's time to remove the whole t_checkpoint_io_list logic.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/jbd2/checkpoint.c | 42 ++----------------------------------------
fs/jbd2/commit.c | 3 +--
include/linux/jbd2.h | 6 ------
3 files changed, 3 insertions(+), 48 deletions(-)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ae1ebfb8bc86..2b62154e9f1e 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -27,7 +27,7 @@
*
* Called with j_list_lock held.
*/
-static inline void __buffer_unlink_first(struct journal_head *jh)
+static inline void __buffer_unlink(struct journal_head *jh)
{
transaction_t *transaction = jh->b_cp_transaction;
@@ -40,23 +40,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh)
}
}
-/*
- * Unlink a buffer from a transaction checkpoint(io) list.
- *
- * Called with j_list_lock held.
- */
-static inline void __buffer_unlink(struct journal_head *jh)
-{
- transaction_t *transaction = jh->b_cp_transaction;
-
- __buffer_unlink_first(jh);
- if (transaction->t_checkpoint_io_list == jh) {
- transaction->t_checkpoint_io_list = jh->b_cpnext;
- if (transaction->t_checkpoint_io_list == jh)
- transaction->t_checkpoint_io_list = NULL;
- }
-}
-
/*
* Check a checkpoint buffer could be release or not.
*
@@ -499,15 +482,6 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
break;
if (need_resched() || spin_needbreak(&journal->j_list_lock))
break;
- if (released)
- continue;
-
- nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
- nr_to_scan, &released);
- if (*nr_to_scan == 0)
- break;
- if (need_resched() || spin_needbreak(&journal->j_list_lock))
- break;
} while (transaction != last_transaction);
if (transaction != last_transaction) {
@@ -562,17 +536,6 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
*/
if (need_resched())
return;
- if (ret)
- continue;
- /*
- * It is essential that we are as careful as in the case of
- * t_checkpoint_list with removing the buffer from the list as
- * we can possibly see not yet submitted buffers on io_list
- */
- ret = journal_clean_one_cp_list(transaction->
- t_checkpoint_io_list, destroy);
- if (need_resched())
- return;
/*
* Stop scanning if we couldn't free the transaction. This
* avoids pointless scanning of transactions which still
@@ -657,7 +620,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
jbd2_journal_put_journal_head(jh);
/* Is this transaction empty? */
- if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
+ if (transaction->t_checkpoint_list)
return 0;
/*
@@ -749,7 +712,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
J_ASSERT(transaction->t_forget == NULL);
J_ASSERT(transaction->t_shadow_list == NULL);
J_ASSERT(transaction->t_checkpoint_list == NULL);
- J_ASSERT(transaction->t_checkpoint_io_list == NULL);
J_ASSERT(atomic_read(&transaction->t_updates) == 0);
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b33155dd7001..1073259902a6 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1141,8 +1141,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
spin_lock(&journal->j_list_lock);
commit_transaction->t_state = T_FINISHED;
/* Check if the transaction can be dropped now that we are finished */
- if (commit_transaction->t_checkpoint_list == NULL &&
- commit_transaction->t_checkpoint_io_list == NULL) {
+ if (commit_transaction->t_checkpoint_list == NULL) {
__jbd2_journal_drop_transaction(journal, commit_transaction);
jbd2_journal_free_transaction(commit_transaction);
}
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f619bae1dcc5..91a2cf4bc575 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -622,12 +622,6 @@ struct transaction_s
*/
struct journal_head *t_checkpoint_list;
- /*
- * Doubly-linked circular list of all buffers submitted for IO while
- * checkpointing. [j_list_lock]
- */
- struct journal_head *t_checkpoint_io_list;
-
/*
* Doubly-linked circular list of metadata buffers being
* shadowed by log IO. The IO buffers on the iobuf list and
--
2.31.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2 3/3] jbd2: remove released parameter in journal_shrink_one_cp_list()
2023-05-05 12:32 [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer Zhang Yi
2023-05-05 12:32 ` [PATCH v2 2/3] jbd2: remove t_checkpoint_io_list Zhang Yi
@ 2023-05-05 12:32 ` Zhang Yi
2023-05-05 13:13 ` Jan Kara
2023-05-05 13:12 ` [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer Jan Kara
2023-05-15 6:24 ` Zhang Yi
3 siblings, 1 reply; 7+ messages in thread
From: Zhang Yi @ 2023-05-05 12:32 UTC (permalink / raw)
To: linux-ext4
Cc: tytso, adilger.kernel, jack, yi.zhang, yi.zhang, yukuai3, chengzhihao1
From: Zhang Yi <yi.zhang@huawei.com>
After t_checkpoint_io_list is gone, the 'released' parameter in
journal_shrink_one_cp_list() becomes useless, just remove it.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/jbd2/checkpoint.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 2b62154e9f1e..7b6320081f11 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -387,15 +387,13 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
* journal_shrink_one_cp_list
*
* Find 'nr_to_scan' written-back checkpoint buffers in the given list
- * and try to release them. If the whole transaction is released, set
- * the 'released' parameter. Return the number of released checkpointed
+ * and try to release them. Return the number of released checkpointed
* buffers.
*
* Called with j_list_lock held.
*/
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
- unsigned long *nr_to_scan,
- bool *released)
+ unsigned long *nr_to_scan)
{
struct journal_head *last_jh;
struct journal_head *next_jh = jh;
@@ -416,10 +414,8 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
nr_freed++;
ret = __jbd2_journal_remove_checkpoint(jh);
- if (ret) {
- *released = true;
+ if (ret)
break;
- }
if (need_resched())
break;
@@ -441,7 +437,6 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
unsigned long *nr_to_scan)
{
transaction_t *transaction, *last_transaction, *next_transaction;
- bool released;
tid_t first_tid = 0, last_tid = 0, next_tid = 0;
tid_t tid = 0;
unsigned long nr_freed = 0;
@@ -474,10 +469,9 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
tid = transaction->t_tid;
- released = false;
nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
- nr_to_scan, &released);
+ nr_to_scan);
if (*nr_to_scan == 0)
break;
if (need_resched() || spin_needbreak(&journal->j_list_lock))
--
2.31.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer
2023-05-05 12:32 [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer Zhang Yi
2023-05-05 12:32 ` [PATCH v2 2/3] jbd2: remove t_checkpoint_io_list Zhang Yi
2023-05-05 12:32 ` [PATCH v2 3/3] jbd2: remove released parameter in journal_shrink_one_cp_list() Zhang Yi
@ 2023-05-05 13:12 ` Jan Kara
2023-05-15 6:24 ` Zhang Yi
3 siblings, 0 replies; 7+ messages in thread
From: Jan Kara @ 2023-05-05 13:12 UTC (permalink / raw)
To: Zhang Yi
Cc: linux-ext4, tytso, adilger.kernel, jack, yi.zhang, yukuai3, chengzhihao1
On Fri 05-05-23 20:32:17, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
>
> There is a long-standing metadata corruption issue that happens from
> time to time, but it's very difficult to reproduce and analyse, benefit
> from the JBD2_CYCLE_RECORD option, we found out that the problem is the
> checkpointing process miss to write out some buffers which are raced by
> another do_get_write_access(). Looks below for detail.
>
> jbd2_log_do_checkpoint() //transaction X
> //buffer A is dirty and not belones to any transaction
> __buffer_relink_io() //move it to the IO list
> __flush_batch()
> write_dirty_buffer()
> do_get_write_access()
> clear_buffer_dirty
> __jbd2_journal_file_buffer()
> //add buffer A to a new transaction Y
> lock_buffer(bh)
> //doesn't write out
> __jbd2_journal_remove_checkpoint()
> //finish checkpoint except buffer A
> //filesystem corrupt if the new transaction Y isn't fully write out.
>
> Due to the t_checkpoint_list walking loop in jbd2_log_do_checkpoint()
> have already handles waiting for buffers under IO and re-added new
> transaction to complete commit, and it also removing cleaned buffers,
> this makes sure the list will eventually get empty. So it's fine to
> leave buffers on the t_checkpoint_list while flushing out and completely
> stop using the t_checkpoint_io_list.
>
> Cc: stable@vger.kernel.org
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> Tested-by: Zhihao Cheng <chengzhihao1@huawei.com>
> ---
> v1->v2:
> - Leave flushing checkpointing buffers on the t_checkpoint_list and
> stop using t_checkpoint_io_list.
>
> fs/jbd2/checkpoint.c | 94 +++++++++++---------------------------------
> 1 file changed, 23 insertions(+), 71 deletions(-)
Thanks for the fix! The patch looks good to me. Feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
>
> diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
> index 51bd38da21cd..ae1ebfb8bc86 100644
> --- a/fs/jbd2/checkpoint.c
> +++ b/fs/jbd2/checkpoint.c
> @@ -57,28 +57,6 @@ static inline void __buffer_unlink(struct journal_head *jh)
> }
> }
>
> -/*
> - * Move a buffer from the checkpoint list to the checkpoint io list
> - *
> - * Called with j_list_lock held
> - */
> -static inline void __buffer_relink_io(struct journal_head *jh)
> -{
> - transaction_t *transaction = jh->b_cp_transaction;
> -
> - __buffer_unlink_first(jh);
> -
> - if (!transaction->t_checkpoint_io_list) {
> - jh->b_cpnext = jh->b_cpprev = jh;
> - } else {
> - jh->b_cpnext = transaction->t_checkpoint_io_list;
> - jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
> - jh->b_cpprev->b_cpnext = jh;
> - jh->b_cpnext->b_cpprev = jh;
> - }
> - transaction->t_checkpoint_io_list = jh;
> -}
> -
> /*
> * Check a checkpoint buffer could be release or not.
> *
> @@ -183,6 +161,7 @@ __flush_batch(journal_t *journal, int *batch_count)
> struct buffer_head *bh = journal->j_chkpt_bhs[i];
> BUFFER_TRACE(bh, "brelse");
> __brelse(bh);
> + journal->j_chkpt_bhs[i] = NULL;
> }
> *batch_count = 0;
> }
> @@ -242,6 +221,11 @@ int jbd2_log_do_checkpoint(journal_t *journal)
> jh = transaction->t_checkpoint_list;
> bh = jh2bh(jh);
>
> + /*
> + * The buffer may be writing back, or flushing out in the
> + * last couple of cycles, or re-adding into a new transaction,
> + * need to check it again until it's unlocked.
> + */
> if (buffer_locked(bh)) {
> get_bh(bh);
> spin_unlock(&journal->j_list_lock);
> @@ -290,25 +274,25 @@ int jbd2_log_do_checkpoint(journal_t *journal)
> if (__jbd2_journal_remove_checkpoint(jh))
> /* The transaction was released; we're done */
> goto out;
> - continue;
> + } else {
> + /*
> + * We are about to write the buffer, it could be
> + * raced by some other transaction shrink or buffer
> + * re-log logic once we release the j_list_lock,
> + * leave it on the checkpoint list and check status
> + * again to make sure it's clean.
> + */
> + BUFFER_TRACE(bh, "queue");
> + get_bh(bh);
> + J_ASSERT_BH(bh, !buffer_jwrite(bh));
> + journal->j_chkpt_bhs[batch_count++] = bh;
> + transaction->t_chp_stats.cs_written++;
> + transaction->t_checkpoint_list = jh->b_cpnext;
> }
> - /*
> - * Important: we are about to write the buffer, and
> - * possibly block, while still holding the journal
> - * lock. We cannot afford to let the transaction
> - * logic start messing around with this buffer before
> - * we write it to disk, as that would break
> - * recoverability.
> - */
> - BUFFER_TRACE(bh, "queue");
> - get_bh(bh);
> - J_ASSERT_BH(bh, !buffer_jwrite(bh));
> - journal->j_chkpt_bhs[batch_count++] = bh;
> - __buffer_relink_io(jh);
> - transaction->t_chp_stats.cs_written++;
> +
> if ((batch_count == JBD2_NR_BATCH) ||
> - need_resched() ||
> - spin_needbreak(&journal->j_list_lock))
> + need_resched() || spin_needbreak(&journal->j_list_lock) ||
> + jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
> goto unlock_and_flush;
> }
>
> @@ -322,38 +306,6 @@ int jbd2_log_do_checkpoint(journal_t *journal)
> goto restart;
> }
>
> - /*
> - * Now we issued all of the transaction's buffers, let's deal
> - * with the buffers that are out for I/O.
> - */
> -restart2:
> - /* Did somebody clean up the transaction in the meanwhile? */
> - if (journal->j_checkpoint_transactions != transaction ||
> - transaction->t_tid != this_tid)
> - goto out;
> -
> - while (transaction->t_checkpoint_io_list) {
> - jh = transaction->t_checkpoint_io_list;
> - bh = jh2bh(jh);
> - if (buffer_locked(bh)) {
> - get_bh(bh);
> - spin_unlock(&journal->j_list_lock);
> - wait_on_buffer(bh);
> - /* the journal_head may have gone by now */
> - BUFFER_TRACE(bh, "brelse");
> - __brelse(bh);
> - spin_lock(&journal->j_list_lock);
> - goto restart2;
> - }
> -
> - /*
> - * Now in whatever state the buffer currently is, we
> - * know that it has been written out and so we can
> - * drop it from the list
> - */
> - if (__jbd2_journal_remove_checkpoint(jh))
> - break;
> - }
> out:
> spin_unlock(&journal->j_list_lock);
> result = jbd2_cleanup_journal_tail(journal);
> --
> 2.31.1
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2 2/3] jbd2: remove t_checkpoint_io_list
2023-05-05 12:32 ` [PATCH v2 2/3] jbd2: remove t_checkpoint_io_list Zhang Yi
@ 2023-05-05 13:13 ` Jan Kara
0 siblings, 0 replies; 7+ messages in thread
From: Jan Kara @ 2023-05-05 13:13 UTC (permalink / raw)
To: Zhang Yi
Cc: linux-ext4, tytso, adilger.kernel, jack, yi.zhang, yukuai3, chengzhihao1
On Fri 05-05-23 20:32:18, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
>
> Since t_checkpoint_io_list was stop using in jbd2_log_do_checkpoint()
> now, it's time to remove the whole t_checkpoint_io_list logic.
>
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Looks good. Feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> fs/jbd2/checkpoint.c | 42 ++----------------------------------------
> fs/jbd2/commit.c | 3 +--
> include/linux/jbd2.h | 6 ------
> 3 files changed, 3 insertions(+), 48 deletions(-)
>
> diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
> index ae1ebfb8bc86..2b62154e9f1e 100644
> --- a/fs/jbd2/checkpoint.c
> +++ b/fs/jbd2/checkpoint.c
> @@ -27,7 +27,7 @@
> *
> * Called with j_list_lock held.
> */
> -static inline void __buffer_unlink_first(struct journal_head *jh)
> +static inline void __buffer_unlink(struct journal_head *jh)
> {
> transaction_t *transaction = jh->b_cp_transaction;
>
> @@ -40,23 +40,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh)
> }
> }
>
> -/*
> - * Unlink a buffer from a transaction checkpoint(io) list.
> - *
> - * Called with j_list_lock held.
> - */
> -static inline void __buffer_unlink(struct journal_head *jh)
> -{
> - transaction_t *transaction = jh->b_cp_transaction;
> -
> - __buffer_unlink_first(jh);
> - if (transaction->t_checkpoint_io_list == jh) {
> - transaction->t_checkpoint_io_list = jh->b_cpnext;
> - if (transaction->t_checkpoint_io_list == jh)
> - transaction->t_checkpoint_io_list = NULL;
> - }
> -}
> -
> /*
> * Check a checkpoint buffer could be release or not.
> *
> @@ -499,15 +482,6 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
> break;
> if (need_resched() || spin_needbreak(&journal->j_list_lock))
> break;
> - if (released)
> - continue;
> -
> - nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
> - nr_to_scan, &released);
> - if (*nr_to_scan == 0)
> - break;
> - if (need_resched() || spin_needbreak(&journal->j_list_lock))
> - break;
> } while (transaction != last_transaction);
>
> if (transaction != last_transaction) {
> @@ -562,17 +536,6 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
> */
> if (need_resched())
> return;
> - if (ret)
> - continue;
> - /*
> - * It is essential that we are as careful as in the case of
> - * t_checkpoint_list with removing the buffer from the list as
> - * we can possibly see not yet submitted buffers on io_list
> - */
> - ret = journal_clean_one_cp_list(transaction->
> - t_checkpoint_io_list, destroy);
> - if (need_resched())
> - return;
> /*
> * Stop scanning if we couldn't free the transaction. This
> * avoids pointless scanning of transactions which still
> @@ -657,7 +620,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
> jbd2_journal_put_journal_head(jh);
>
> /* Is this transaction empty? */
> - if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
> + if (transaction->t_checkpoint_list)
> return 0;
>
> /*
> @@ -749,7 +712,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
> J_ASSERT(transaction->t_forget == NULL);
> J_ASSERT(transaction->t_shadow_list == NULL);
> J_ASSERT(transaction->t_checkpoint_list == NULL);
> - J_ASSERT(transaction->t_checkpoint_io_list == NULL);
> J_ASSERT(atomic_read(&transaction->t_updates) == 0);
> J_ASSERT(journal->j_committing_transaction != transaction);
> J_ASSERT(journal->j_running_transaction != transaction);
> diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
> index b33155dd7001..1073259902a6 100644
> --- a/fs/jbd2/commit.c
> +++ b/fs/jbd2/commit.c
> @@ -1141,8 +1141,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
> spin_lock(&journal->j_list_lock);
> commit_transaction->t_state = T_FINISHED;
> /* Check if the transaction can be dropped now that we are finished */
> - if (commit_transaction->t_checkpoint_list == NULL &&
> - commit_transaction->t_checkpoint_io_list == NULL) {
> + if (commit_transaction->t_checkpoint_list == NULL) {
> __jbd2_journal_drop_transaction(journal, commit_transaction);
> jbd2_journal_free_transaction(commit_transaction);
> }
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index f619bae1dcc5..91a2cf4bc575 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -622,12 +622,6 @@ struct transaction_s
> */
> struct journal_head *t_checkpoint_list;
>
> - /*
> - * Doubly-linked circular list of all buffers submitted for IO while
> - * checkpointing. [j_list_lock]
> - */
> - struct journal_head *t_checkpoint_io_list;
> -
> /*
> * Doubly-linked circular list of metadata buffers being
> * shadowed by log IO. The IO buffers on the iobuf list and
> --
> 2.31.1
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2 3/3] jbd2: remove released parameter in journal_shrink_one_cp_list()
2023-05-05 12:32 ` [PATCH v2 3/3] jbd2: remove released parameter in journal_shrink_one_cp_list() Zhang Yi
@ 2023-05-05 13:13 ` Jan Kara
0 siblings, 0 replies; 7+ messages in thread
From: Jan Kara @ 2023-05-05 13:13 UTC (permalink / raw)
To: Zhang Yi
Cc: linux-ext4, tytso, adilger.kernel, jack, yi.zhang, yukuai3, chengzhihao1
On Fri 05-05-23 20:32:19, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
>
> After t_checkpoint_io_list is gone, the 'released' parameter in
> journal_shrink_one_cp_list() becomes useless, just remove it.
>
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Looks good. Feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> fs/jbd2/checkpoint.c | 14 ++++----------
> 1 file changed, 4 insertions(+), 10 deletions(-)
>
> diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
> index 2b62154e9f1e..7b6320081f11 100644
> --- a/fs/jbd2/checkpoint.c
> +++ b/fs/jbd2/checkpoint.c
> @@ -387,15 +387,13 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
> * journal_shrink_one_cp_list
> *
> * Find 'nr_to_scan' written-back checkpoint buffers in the given list
> - * and try to release them. If the whole transaction is released, set
> - * the 'released' parameter. Return the number of released checkpointed
> + * and try to release them. Return the number of released checkpointed
> * buffers.
> *
> * Called with j_list_lock held.
> */
> static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
> - unsigned long *nr_to_scan,
> - bool *released)
> + unsigned long *nr_to_scan)
> {
> struct journal_head *last_jh;
> struct journal_head *next_jh = jh;
> @@ -416,10 +414,8 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
>
> nr_freed++;
> ret = __jbd2_journal_remove_checkpoint(jh);
> - if (ret) {
> - *released = true;
> + if (ret)
> break;
> - }
>
> if (need_resched())
> break;
> @@ -441,7 +437,6 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
> unsigned long *nr_to_scan)
> {
> transaction_t *transaction, *last_transaction, *next_transaction;
> - bool released;
> tid_t first_tid = 0, last_tid = 0, next_tid = 0;
> tid_t tid = 0;
> unsigned long nr_freed = 0;
> @@ -474,10 +469,9 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
> transaction = next_transaction;
> next_transaction = transaction->t_cpnext;
> tid = transaction->t_tid;
> - released = false;
>
> nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
> - nr_to_scan, &released);
> + nr_to_scan);
> if (*nr_to_scan == 0)
> break;
> if (need_resched() || spin_needbreak(&journal->j_list_lock))
> --
> 2.31.1
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer
2023-05-05 12:32 [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer Zhang Yi
` (2 preceding siblings ...)
2023-05-05 13:12 ` [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer Jan Kara
@ 2023-05-15 6:24 ` Zhang Yi
3 siblings, 0 replies; 7+ messages in thread
From: Zhang Yi @ 2023-05-15 6:24 UTC (permalink / raw)
To: linux-ext4; +Cc: tytso, adilger.kernel, jack, yi.zhang, yukuai3, chengzhihao1
On 2023/5/5 20:32, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
>
> There is a long-standing metadata corruption issue that happens from
> time to time, but it's very difficult to reproduce and analyse, benefit
> from the JBD2_CYCLE_RECORD option, we found out that the problem is the
> checkpointing process miss to write out some buffers which are raced by
> another do_get_write_access(). Looks below for detail.
>
> jbd2_log_do_checkpoint() //transaction X
> //buffer A is dirty and not belones to any transaction
> __buffer_relink_io() //move it to the IO list
> __flush_batch()
> write_dirty_buffer()
> do_get_write_access()
> clear_buffer_dirty
> __jbd2_journal_file_buffer()
> //add buffer A to a new transaction Y
> lock_buffer(bh)
> //doesn't write out
> __jbd2_journal_remove_checkpoint()
> //finish checkpoint except buffer A
> //filesystem corrupt if the new transaction Y isn't fully write out.
>
> Due to the t_checkpoint_list walking loop in jbd2_log_do_checkpoint()
> have already handles waiting for buffers under IO and re-added new
> transaction to complete commit, and it also removing cleaned buffers,
> this makes sure the list will eventually get empty. So it's fine to
> leave buffers on the t_checkpoint_list while flushing out and completely
> stop using the t_checkpoint_io_list.
>
> Cc: stable@vger.kernel.org
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> Tested-by: Zhihao Cheng <chengzhihao1@huawei.com>
> ---
> v1->v2:
> - Leave flushing checkpointing buffers on the t_checkpoint_list and
> stop using t_checkpoint_io_list.
>
> fs/jbd2/checkpoint.c | 94 +++++++++++---------------------------------
> 1 file changed, 23 insertions(+), 71 deletions(-)
>
> diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
> index 51bd38da21cd..ae1ebfb8bc86 100644
> --- a/fs/jbd2/checkpoint.c
> +++ b/fs/jbd2/checkpoint.c
> @@ -57,28 +57,6 @@ static inline void __buffer_unlink(struct journal_head *jh)
> }
> }
>
> -/*
> - * Move a buffer from the checkpoint list to the checkpoint io list
> - *
> - * Called with j_list_lock held
> - */
> -static inline void __buffer_relink_io(struct journal_head *jh)
> -{
> - transaction_t *transaction = jh->b_cp_transaction;
> -
> - __buffer_unlink_first(jh);
> -
> - if (!transaction->t_checkpoint_io_list) {
> - jh->b_cpnext = jh->b_cpprev = jh;
> - } else {
> - jh->b_cpnext = transaction->t_checkpoint_io_list;
> - jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
> - jh->b_cpprev->b_cpnext = jh;
> - jh->b_cpnext->b_cpprev = jh;
> - }
> - transaction->t_checkpoint_io_list = jh;
> -}
> -
> /*
> * Check a checkpoint buffer could be release or not.
> *
> @@ -183,6 +161,7 @@ __flush_batch(journal_t *journal, int *batch_count)
> struct buffer_head *bh = journal->j_chkpt_bhs[i];
> BUFFER_TRACE(bh, "brelse");
> __brelse(bh);
> + journal->j_chkpt_bhs[i] = NULL;
> }
> *batch_count = 0;
> }
> @@ -242,6 +221,11 @@ int jbd2_log_do_checkpoint(journal_t *journal)
> jh = transaction->t_checkpoint_list;
> bh = jh2bh(jh);
>
> + /*
> + * The buffer may be writing back, or flushing out in the
> + * last couple of cycles, or re-adding into a new transaction,
> + * need to check it again until it's unlocked.
> + */
> if (buffer_locked(bh)) {
> get_bh(bh);
> spin_unlock(&journal->j_list_lock);
> @@ -290,25 +274,25 @@ int jbd2_log_do_checkpoint(journal_t *journal)
> if (__jbd2_journal_remove_checkpoint(jh))
> /* The transaction was released; we're done */
> goto out;
In __jbd2_journal_remove_checkpoint(), if transaction's state is not
T_FINISHED, it could leave an empty transaction and return 0, which
could lead to NULL pointer dereference in below
'jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])'
checking. So we also need to check and go out if the list is empty.
Will fix this in v3.
Thanks,
Yi.
> - continue;
> + } else {
> + /*
> + * We are about to write the buffer, it could be
> + * raced by some other transaction shrink or buffer
> + * re-log logic once we release the j_list_lock,
> + * leave it on the checkpoint list and check status
> + * again to make sure it's clean.
> + */
> + BUFFER_TRACE(bh, "queue");
> + get_bh(bh);
> + J_ASSERT_BH(bh, !buffer_jwrite(bh));
> + journal->j_chkpt_bhs[batch_count++] = bh;
> + transaction->t_chp_stats.cs_written++;
> + transaction->t_checkpoint_list = jh->b_cpnext;
> }
> - /*
> - * Important: we are about to write the buffer, and
> - * possibly block, while still holding the journal
> - * lock. We cannot afford to let the transaction
> - * logic start messing around with this buffer before
> - * we write it to disk, as that would break
> - * recoverability.
> - */
> - BUFFER_TRACE(bh, "queue");
> - get_bh(bh);
> - J_ASSERT_BH(bh, !buffer_jwrite(bh));
> - journal->j_chkpt_bhs[batch_count++] = bh;
> - __buffer_relink_io(jh);
> - transaction->t_chp_stats.cs_written++;
> +
> if ((batch_count == JBD2_NR_BATCH) ||
> - need_resched() ||
> - spin_needbreak(&journal->j_list_lock))
> + need_resched() || spin_needbreak(&journal->j_list_lock) ||
> + jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
> goto unlock_and_flush;
> }
>
> @@ -322,38 +306,6 @@ int jbd2_log_do_checkpoint(journal_t *journal)
> goto restart;
> }
>
> - /*
> - * Now we issued all of the transaction's buffers, let's deal
> - * with the buffers that are out for I/O.
> - */
> -restart2:
> - /* Did somebody clean up the transaction in the meanwhile? */
> - if (journal->j_checkpoint_transactions != transaction ||
> - transaction->t_tid != this_tid)
> - goto out;
> -
> - while (transaction->t_checkpoint_io_list) {
> - jh = transaction->t_checkpoint_io_list;
> - bh = jh2bh(jh);
> - if (buffer_locked(bh)) {
> - get_bh(bh);
> - spin_unlock(&journal->j_list_lock);
> - wait_on_buffer(bh);
> - /* the journal_head may have gone by now */
> - BUFFER_TRACE(bh, "brelse");
> - __brelse(bh);
> - spin_lock(&journal->j_list_lock);
> - goto restart2;
> - }
> -
> - /*
> - * Now in whatever state the buffer currently is, we
> - * know that it has been written out and so we can
> - * drop it from the list
> - */
> - if (__jbd2_journal_remove_checkpoint(jh))
> - break;
> - }
> out:
> spin_unlock(&journal->j_list_lock);
> result = jbd2_cleanup_journal_tail(journal);
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2023-05-15 6:25 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-05 12:32 [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer Zhang Yi
2023-05-05 12:32 ` [PATCH v2 2/3] jbd2: remove t_checkpoint_io_list Zhang Yi
2023-05-05 13:13 ` Jan Kara
2023-05-05 12:32 ` [PATCH v2 3/3] jbd2: remove released parameter in journal_shrink_one_cp_list() Zhang Yi
2023-05-05 13:13 ` Jan Kara
2023-05-05 13:12 ` [PATCH v2 1/3] jbd2: recheck chechpointing non-dirty buffer Jan Kara
2023-05-15 6:24 ` Zhang Yi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.