* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
@ 2017-11-01 1:14 piaojun
2017-11-01 2:47 ` Changwei Ge
0 siblings, 1 reply; 11+ messages in thread
From: piaojun @ 2017-11-01 1:14 UTC (permalink / raw)
To: ocfs2-devel
wait for dlm recovery done when migrating all lockres in case of new
lockres to be left after leaving dlm domain.
NodeA NodeB NodeC
umount and migrate
all lockres
node down
do recovery for NodeB
and collect a new lockres
form other live nodes
leave domain but the
new lockres remains
mount and join domain
request for the owner
of the new lockres, but
all the other nodes said
'NO', so NodeC decide to
the owner, and send do
assert msg to other nodes.
other nodes receive the msg
and found two masters exist.
at last cause BUG in
dlm_assert_master_handler()
-->BUG();
Fixes: bc9838c4d44a ("dlm: allow dlm do recovery during shutdown")
Signed-off-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Alex Chen <alex.chen@huawei.com>
Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
---
fs/ocfs2/dlm/dlmcommon.h | 1 +
fs/ocfs2/dlm/dlmdomain.c | 14 ++++++++++++++
fs/ocfs2/dlm/dlmrecovery.c | 12 +++++++++---
3 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index e9f3705..999ab7d 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -140,6 +140,7 @@ struct dlm_ctxt
u8 node_num;
u32 key;
u8 joining_node;
+ u8 migrate_done; /* set to 1 means node has migrated all lockres */
wait_queue_head_t dlm_join_events;
unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index e1fea14..98a8f56 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -461,6 +461,18 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm)
cond_resched_lock(&dlm->spinlock);
num += n;
}
+
+ if (!num) {
+ if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
+ mlog(0, "%s: perhaps there are more lock resources need to "
+ "be migrated after dlm recovery\n", dlm->name);
+ ret = -EAGAIN;
+ } else {
+ mlog(0, "%s: we won't do dlm recovery after migrating all lockres",
+ dlm->name);
+ dlm->migrate_done = 1;
+ }
+ }
spin_unlock(&dlm->spinlock);
wake_up(&dlm->dlm_thread_wq);
@@ -2052,6 +2064,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
init_waitqueue_head(&dlm->dlm_join_events);
+ dlm->migrate_done = 0;
+
dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 74407c6..3106332 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
static void dlm_begin_recovery(struct dlm_ctxt *dlm)
{
- spin_lock(&dlm->spinlock);
+ assert_spin_locked(&dlm->spinlock);
BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
dlm->name, dlm->reco.dead_node);
dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
- spin_unlock(&dlm->spinlock);
}
static void dlm_end_recovery(struct dlm_ctxt *dlm)
@@ -456,6 +455,12 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
spin_lock(&dlm->spinlock);
+ if (dlm->migrate_done) {
+ mlog(0, "%s: no need do recovery after migrating all lockres\n",
+ dlm->name);
+ return 0;
+ }
+
/* check to see if the new master has died */
if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
test_bit(dlm->reco.new_master, dlm->recovery_map)) {
@@ -490,12 +495,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
dlm->reco.dead_node);
- spin_unlock(&dlm->spinlock);
/* take write barrier */
/* (stops the list reshuffling thread, proxy ast handling) */
dlm_begin_recovery(dlm);
+ spin_unlock(&dlm->spinlock);
+
if (dlm->reco.new_master == dlm->node_num)
goto master_here;
--
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
2017-11-01 1:14 [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres piaojun
@ 2017-11-01 2:47 ` Changwei Ge
2017-11-01 5:52 ` piaojun
0 siblings, 1 reply; 11+ messages in thread
From: Changwei Ge @ 2017-11-01 2:47 UTC (permalink / raw)
To: ocfs2-devel
Hi Jun,
Thanks for reporting.
I am very interesting in this issue. But, first of all, I want to make
this issue clear, so that I might be able to provide some comments.
On 2017/11/1 9:16, piaojun wrote:
> wait for dlm recovery done when migrating all lockres in case of new
> lockres to be left after leaving dlm domain.
What do you mean by 'a new lock resource to be left after leaving
domain'? It means we leak a dlm lock resource if below situation happens.
>
> NodeA NodeB NodeC
>
> umount and migrate
> all lockres
>
> node down
>
> do recovery for NodeB
> and collect a new lockres
> form other live nodes
You mean a lock resource whose owner was NodeB is just migrated from
other cluster member nodes?
>
> leave domain but the
> new lockres remains
>
> mount and join domain
>
> request for the owner
> of the new lockres, but
> all the other nodes said
> 'NO', so NodeC decide to
> the owner, and send do
> assert msg to other nodes.
>
> other nodes receive the msg
> and found two masters exist.
> at last cause BUG in
> dlm_assert_master_handler()
> -->BUG();
If this issue truly exists, can we take some efforts in
dlm_exit_domain_handler? Or perhaps we should kick dlm's work queue
before migrating all lock resources.
>
> Fixes: bc9838c4d44a ("dlm: allow dlm do recovery during shutdown")
>
> Signed-off-by: Jun Piao <piaojun@huawei.com>
> Reviewed-by: Alex Chen <alex.chen@huawei.com>
> Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
> ---
> fs/ocfs2/dlm/dlmcommon.h | 1 +
> fs/ocfs2/dlm/dlmdomain.c | 14 ++++++++++++++
> fs/ocfs2/dlm/dlmrecovery.c | 12 +++++++++---
> 3 files changed, 24 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
> index e9f3705..999ab7d 100644
> --- a/fs/ocfs2/dlm/dlmcommon.h
> +++ b/fs/ocfs2/dlm/dlmcommon.h
> @@ -140,6 +140,7 @@ struct dlm_ctxt
> u8 node_num;
> u32 key;
> u8 joining_node;
> + u8 migrate_done; /* set to 1 means node has migrated all lockres */
> wait_queue_head_t dlm_join_events;
> unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
> unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
> index e1fea14..98a8f56 100644
> --- a/fs/ocfs2/dlm/dlmdomain.c
> +++ b/fs/ocfs2/dlm/dlmdomain.c
> @@ -461,6 +461,18 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm)
> cond_resched_lock(&dlm->spinlock);
> num += n;
> }
> +
> + if (!num) {
> + if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
> + mlog(0, "%s: perhaps there are more lock resources need to "
> + "be migrated after dlm recovery\n", dlm->name);
If dlm is mark with DLM_RECO_STATE_ACTIVE, then a lock resource must
already be marked with DLM_LOCK_RES_RECOVERING which can't be migrated.
So code will goto redo_bucket in function dlm_migrate_all_locks.
So I don't understand why a judgement is added here?
> + ret = -EAGAIN;
> + } else {
> + mlog(0, "%s: we won't do dlm recovery after migrating all lockres",
> + dlm->name);
> + dlm->migrate_done = 1;
> + }
> + }
> spin_unlock(&dlm->spinlock);
> wake_up(&dlm->dlm_thread_wq);
>
> @@ -2052,6 +2064,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
> dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
> init_waitqueue_head(&dlm->dlm_join_events);
>
> + dlm->migrate_done = 0;
> +
> dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
> dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
>
> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
> index 74407c6..3106332 100644
> --- a/fs/ocfs2/dlm/dlmrecovery.c
> +++ b/fs/ocfs2/dlm/dlmrecovery.c
> @@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
>
> static void dlm_begin_recovery(struct dlm_ctxt *dlm)
> {
> - spin_lock(&dlm->spinlock);
> + assert_spin_locked(&dlm->spinlock);
> BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
> printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
> dlm->name, dlm->reco.dead_node);
> dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
> - spin_unlock(&dlm->spinlock);
> }
>
> static void dlm_end_recovery(struct dlm_ctxt *dlm)
> @@ -456,6 +455,12 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
>
> spin_lock(&dlm->spinlock);
>
> + if (dlm->migrate_done) {
> + mlog(0, "%s: no need do recovery after migrating all lockres\n",
> + dlm->name);
Don't we need unlock above spin_lock before return?
And if we just return here, how dlm lock resource can clear its
REDISCOVERING flag. I suppose this may cause cluster hang.
And I cc this to ocfs2 maintainers.
Thanks,
Changwei
> + return 0;
> + }
> +
> /* check to see if the new master has died */
> if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
> test_bit(dlm->reco.new_master, dlm->recovery_map)) {
> @@ -490,12 +495,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
> mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
> dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
> dlm->reco.dead_node);
> - spin_unlock(&dlm->spinlock);
>
> /* take write barrier */
> /* (stops the list reshuffling thread, proxy ast handling) */
> dlm_begin_recovery(dlm);
>
> + spin_unlock(&dlm->spinlock);
> +
> if (dlm->reco.new_master == dlm->node_num)
> goto master_here;
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
2017-11-01 2:47 ` Changwei Ge
@ 2017-11-01 5:52 ` piaojun
2017-11-01 7:13 ` Changwei Ge
0 siblings, 1 reply; 11+ messages in thread
From: piaojun @ 2017-11-01 5:52 UTC (permalink / raw)
To: ocfs2-devel
Hi Changwei,
On 2017/11/1 10:47, Changwei Ge wrote:
> Hi Jun,
>
> Thanks for reporting.
> I am very interesting in this issue. But, first of all, I want to make
> this issue clear, so that I might be able to provide some comments.
>
>
> On 2017/11/1 9:16, piaojun wrote:
>> wait for dlm recovery done when migrating all lockres in case of new
>> lockres to be left after leaving dlm domain.
>
> What do you mean by 'a new lock resource to be left after leaving
> domain'? It means we leak a dlm lock resource if below situation happens.
>
a new lockres is the one collected by NodeA during recoverying for
NodeB. It leaks a lockres indeed.
>>
>> NodeA NodeB NodeC
>>
>> umount and migrate
>> all lockres
>>
>> node down
>>
>> do recovery for NodeB
>> and collect a new lockres
>> form other live nodes
>
> You mean a lock resource whose owner was NodeB is just migrated from
> other cluster member nodes?
>
that is it.
>>
>> leave domain but the
>> new lockres remains
>>
>> mount and join domain
>>
>> request for the owner
>> of the new lockres, but
>> all the other nodes said
>> 'NO', so NodeC decide to
>> the owner, and send do
>> assert msg to other nodes.
>>
>> other nodes receive the msg
>> and found two masters exist.
>> at last cause BUG in
>> dlm_assert_master_handler()
>> -->BUG();
>
> If this issue truly exists, can we take some efforts in
> dlm_exit_domain_handler? Or perhaps we should kick dlm's work queue
> before migrating all lock resources.
>
If NodeA has entered dlm_leave_domain(), we can hardly go back
migrating res. Perhaps more work will be needed in that way.
>>
>> Fixes: bc9838c4d44a ("dlm: allow dlm do recovery during shutdown")
>>
>> Signed-off-by: Jun Piao <piaojun@huawei.com>
>> Reviewed-by: Alex Chen <alex.chen@huawei.com>
>> Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
>> ---
>> fs/ocfs2/dlm/dlmcommon.h | 1 +
>> fs/ocfs2/dlm/dlmdomain.c | 14 ++++++++++++++
>> fs/ocfs2/dlm/dlmrecovery.c | 12 +++++++++---
>> 3 files changed, 24 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
>> index e9f3705..999ab7d 100644
>> --- a/fs/ocfs2/dlm/dlmcommon.h
>> +++ b/fs/ocfs2/dlm/dlmcommon.h
>> @@ -140,6 +140,7 @@ struct dlm_ctxt
>> u8 node_num;
>> u32 key;
>> u8 joining_node;
>> + u8 migrate_done; /* set to 1 means node has migrated all lockres */
>> wait_queue_head_t dlm_join_events;
>> unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
>> unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
>> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
>> index e1fea14..98a8f56 100644
>> --- a/fs/ocfs2/dlm/dlmdomain.c
>> +++ b/fs/ocfs2/dlm/dlmdomain.c
>> @@ -461,6 +461,18 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm)
>> cond_resched_lock(&dlm->spinlock);
>> num += n;
>> }
>> +
>> + if (!num) {
>> + if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
>> + mlog(0, "%s: perhaps there are more lock resources need to "
>> + "be migrated after dlm recovery\n", dlm->name);
>
> If dlm is mark with DLM_RECO_STATE_ACTIVE, then a lock resource must
> already be marked with DLM_LOCK_RES_RECOVERING which can't be migrated.
> So code will goto redo_bucket in function dlm_migrate_all_locks.
> So I don't understand why a judgement is added here?
>
>
>
because we have done migrating before recoverying. the judgement here
is to avoid the following potential recoverying.
>> + ret = -EAGAIN;
>> + } else {
>> + mlog(0, "%s: we won't do dlm recovery after migrating all lockres",
>> + dlm->name);
>> + dlm->migrate_done = 1;
>> + }
>> + }
>> spin_unlock(&dlm->spinlock);
>> wake_up(&dlm->dlm_thread_wq);
>>
>> @@ -2052,6 +2064,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
>> dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
>> init_waitqueue_head(&dlm->dlm_join_events);
>>
>> + dlm->migrate_done = 0;
>> +
>> dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
>> dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
>>
>> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
>> index 74407c6..3106332 100644
>> --- a/fs/ocfs2/dlm/dlmrecovery.c
>> +++ b/fs/ocfs2/dlm/dlmrecovery.c
>> @@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
>>
>> static void dlm_begin_recovery(struct dlm_ctxt *dlm)
>> {
>> - spin_lock(&dlm->spinlock);
>> + assert_spin_locked(&dlm->spinlock);
>> BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
>> printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
>> dlm->name, dlm->reco.dead_node);
>> dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
>> - spin_unlock(&dlm->spinlock);
>> }
>>
>> static void dlm_end_recovery(struct dlm_ctxt *dlm)
>> @@ -456,6 +455,12 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
>>
>> spin_lock(&dlm->spinlock);
>>
>> + if (dlm->migrate_done) {
>> + mlog(0, "%s: no need do recovery after migrating all lockres\n",
>> + dlm->name);
>
> Don't we need unlock above spin_lock before return?
>
> And if we just return here, how dlm lock resource can clear its
> REDISCOVERING flag. I suppose this may cause cluster hang.
>
> And I cc this to ocfs2 maintainers.
>
> Thanks,
> Changwei
>
oh, good catch, I missed spin_unlock(&dlm->spinlock);
>> + return 0;
>> + }
>> +
>> /* check to see if the new master has died */
>> if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
>> test_bit(dlm->reco.new_master, dlm->recovery_map)) {
>> @@ -490,12 +495,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
>> mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
>> dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
>> dlm->reco.dead_node);
>> - spin_unlock(&dlm->spinlock);
>>
>> /* take write barrier */
>> /* (stops the list reshuffling thread, proxy ast handling) */
>> dlm_begin_recovery(dlm);
>>
>> + spin_unlock(&dlm->spinlock);
>> +
>> if (dlm->reco.new_master == dlm->node_num)
>> goto master_here;
>>
>
> .
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
2017-11-01 5:52 ` piaojun
@ 2017-11-01 7:13 ` Changwei Ge
2017-11-01 7:56 ` piaojun
0 siblings, 1 reply; 11+ messages in thread
From: Changwei Ge @ 2017-11-01 7:13 UTC (permalink / raw)
To: ocfs2-devel
Hi Jun,
I probably get your point.
You mean that dlm finds no lock resource to be migrated and no more lock
resource is managed by its hash table. After that a node dies all of a
sudden and the dead node is put into dlm's recovery map, right?
Furthermore, a lock resource is migrated from other nodes and local node
has already asserted master to them.
If so, I want to suggest a easier way to solve it.
We don't have to add a new flag to dlm structure, just leverage existed
dlm status and bitmap.
It will bring a bonus - no lock resource will be marked with RECOVERING,
it's a safer way, I suppose.
Please take a review.
Thanks,
Changwei
Subject: [PATCH] ocfs2/dlm: a node can't be involved in recovery if it
is being shutdown
Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
---
fs/ocfs2/dlm/dlmdomain.c | 4 ++++
fs/ocfs2/dlm/dlmrecovery.c | 3 +++
2 files changed, 7 insertions(+)
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index a2b19fbdcf46..5e9283e509a4 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -707,11 +707,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
* want new domain joins to communicate with us at
* least until we've completed migration of our
* resources. */
+ spin_lock(&dlm->spinlock);
dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN;
+ spin_unlock(&dlm->spinlock);
leave = 1;
}
spin_unlock(&dlm_domain_lock);
+ dlm_wait_for_recovery(dlm);
+
if (leave) {
mlog(0, "shutting down domain %s\n", dlm->name);
dlm_begin_exit_domain(dlm);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 74407c6dd592..764c95b2b35c 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2441,6 +2441,9 @@ static void __dlm_hb_node_down(struct dlm_ctxt
*dlm, int idx)
{
assert_spin_locked(&dlm->spinlock);
+ if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
+ return;
+
if (dlm->reco.new_master == idx) {
mlog(0, "%s: recovery master %d just died\n",
dlm->name, idx);
--
2.11.0
On 2017/11/1 13:53, piaojun wrote:
> Hi Changwei,
>
> On 2017/11/1 10:47, Changwei Ge wrote:
>> Hi Jun,
>>
>> Thanks for reporting.
>> I am very interesting in this issue. But, first of all, I want to make
>> this issue clear, so that I might be able to provide some comments.
>>
>>
>> On 2017/11/1 9:16, piaojun wrote:
>>> wait for dlm recovery done when migrating all lockres in case of new
>>> lockres to be left after leaving dlm domain.
>>
>> What do you mean by 'a new lock resource to be left after leaving
>> domain'? It means we leak a dlm lock resource if below situation happens.
>>
> a new lockres is the one collected by NodeA during recoverying for
> NodeB. It leaks a lockres indeed.
>>>
>>> NodeA NodeB NodeC
>>>
>>> umount and migrate
>>> all lockres
>>>
>>> node down
>>>
>>> do recovery for NodeB
>>> and collect a new lockres
>>> form other live nodes
>>
>> You mean a lock resource whose owner was NodeB is just migrated from
>> other cluster member nodes?
>>
> that is it.
>>>
>>> leave domain but the
>>> new lockres remains
>>>
>>> mount and join domain
>>>
>>> request for the owner
>>> of the new lockres, but
>>> all the other nodes said
>>> 'NO', so NodeC decide to
>>> the owner, and send do
>>> assert msg to other nodes.
>>>
>>> other nodes receive the msg
>>> and found two masters exist.
>>> at last cause BUG in
>>> dlm_assert_master_handler()
>>> -->BUG();
>>
>> If this issue truly exists, can we take some efforts in
>> dlm_exit_domain_handler? Or perhaps we should kick dlm's work queue
>> before migrating all lock resources.
>>
> If NodeA has entered dlm_leave_domain(), we can hardly go back
> migrating res. Perhaps more work will be needed in that way.
>>>
>>> Fixes: bc9838c4d44a ("dlm: allow dlm do recovery during shutdown")
>>>
>>> Signed-off-by: Jun Piao <piaojun@huawei.com>
>>> Reviewed-by: Alex Chen <alex.chen@huawei.com>
>>> Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
>>> ---
>>> fs/ocfs2/dlm/dlmcommon.h | 1 +
>>> fs/ocfs2/dlm/dlmdomain.c | 14 ++++++++++++++
>>> fs/ocfs2/dlm/dlmrecovery.c | 12 +++++++++---
>>> 3 files changed, 24 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
>>> index e9f3705..999ab7d 100644
>>> --- a/fs/ocfs2/dlm/dlmcommon.h
>>> +++ b/fs/ocfs2/dlm/dlmcommon.h
>>> @@ -140,6 +140,7 @@ struct dlm_ctxt
>>> u8 node_num;
>>> u32 key;
>>> u8 joining_node;
>>> + u8 migrate_done; /* set to 1 means node has migrated all lockres */
>>> wait_queue_head_t dlm_join_events;
>>> unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
>>> unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
>>> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
>>> index e1fea14..98a8f56 100644
>>> --- a/fs/ocfs2/dlm/dlmdomain.c
>>> +++ b/fs/ocfs2/dlm/dlmdomain.c
>>> @@ -461,6 +461,18 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm)
>>> cond_resched_lock(&dlm->spinlock);
>>> num += n;
>>> }
>>> +
>>> + if (!num) {
>>> + if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
>>> + mlog(0, "%s: perhaps there are more lock resources need to "
>>> + "be migrated after dlm recovery\n", dlm->name);
>>
>> If dlm is mark with DLM_RECO_STATE_ACTIVE, then a lock resource must
>> already be marked with DLM_LOCK_RES_RECOVERING which can't be migrated.
>> So code will goto redo_bucket in function dlm_migrate_all_locks.
>> So I don't understand why a judgement is added here?
>>
>>
>>
> because we have done migrating before recoverying. the judgement here
> is to avoid the following potential recoverying.
>>> + ret = -EAGAIN;
>>> + } else {
>>> + mlog(0, "%s: we won't do dlm recovery after migrating all lockres",
>>> + dlm->name);
>>> + dlm->migrate_done = 1;
>>> + }
>>> + }
>>> spin_unlock(&dlm->spinlock);
>>> wake_up(&dlm->dlm_thread_wq);
>>>
>>> @@ -2052,6 +2064,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
>>> dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
>>> init_waitqueue_head(&dlm->dlm_join_events);
>>>
>>> + dlm->migrate_done = 0;
>>> +
>>> dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
>>> dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
>>>
>>> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
>>> index 74407c6..3106332 100644
>>> --- a/fs/ocfs2/dlm/dlmrecovery.c
>>> +++ b/fs/ocfs2/dlm/dlmrecovery.c
>>> @@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
>>>
>>> static void dlm_begin_recovery(struct dlm_ctxt *dlm)
>>> {
>>> - spin_lock(&dlm->spinlock);
>>> + assert_spin_locked(&dlm->spinlock);
>>> BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
>>> printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
>>> dlm->name, dlm->reco.dead_node);
>>> dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
>>> - spin_unlock(&dlm->spinlock);
>>> }
>>>
>>> static void dlm_end_recovery(struct dlm_ctxt *dlm)
>>> @@ -456,6 +455,12 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
>>>
>>> spin_lock(&dlm->spinlock);
>>>
>>> + if (dlm->migrate_done) {
>>> + mlog(0, "%s: no need do recovery after migrating all lockres\n",
>>> + dlm->name);
>>
>> Don't we need unlock above spin_lock before return?
>>
>> And if we just return here, how dlm lock resource can clear its
>> REDISCOVERING flag. I suppose this may cause cluster hang.
>>
>> And I cc this to ocfs2 maintainers.
>>
>> Thanks,
>> Changwei
>>
> oh, good catch, I missed spin_unlock(&dlm->spinlock);
>>> + return 0;
>>> + }
>>> +
>>> /* check to see if the new master has died */
>>> if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
>>> test_bit(dlm->reco.new_master, dlm->recovery_map)) {
>>> @@ -490,12 +495,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
>>> mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
>>> dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
>>> dlm->reco.dead_node);
>>> - spin_unlock(&dlm->spinlock);
>>>
>>> /* take write barrier */
>>> /* (stops the list reshuffling thread, proxy ast handling) */
>>> dlm_begin_recovery(dlm);
>>>
>>> + spin_unlock(&dlm->spinlock);
>>> +
>>> if (dlm->reco.new_master == dlm->node_num)
>>> goto master_here;
>>>
>>
>> .
>>
>
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
2017-11-01 7:13 ` Changwei Ge
@ 2017-11-01 7:56 ` piaojun
2017-11-01 8:11 ` Changwei Ge
0 siblings, 1 reply; 11+ messages in thread
From: piaojun @ 2017-11-01 7:56 UTC (permalink / raw)
To: ocfs2-devel
Hi Changwei,
thanks for reviewing, and I think waiting for recoverying done before
migrating seems another solution, but I wonder if new problems will be
invoked as following comments.
thanks,
Jun
On 2017/11/1 15:13, Changwei Ge wrote:
> Hi Jun,
>
> I probably get your point.
>
> You mean that dlm finds no lock resource to be migrated and no more lock
> resource is managed by its hash table. After that a node dies all of a
> sudden and the dead node is put into dlm's recovery map, right?
that is it.
> Furthermore, a lock resource is migrated from other nodes and local node
> has already asserted master to them.
>
> If so, I want to suggest a easier way to solve it.
> We don't have to add a new flag to dlm structure, just leverage existed
> dlm status and bitmap.
> It will bring a bonus - no lock resource will be marked with RECOVERING,
> it's a safer way, I suppose.
>
> Please take a review.
>
> Thanks,
> Changwei
>
>
> Subject: [PATCH] ocfs2/dlm: a node can't be involved in recovery if it
> is being shutdown
>
> Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
> ---
> fs/ocfs2/dlm/dlmdomain.c | 4 ++++
> fs/ocfs2/dlm/dlmrecovery.c | 3 +++
> 2 files changed, 7 insertions(+)
>
> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
> index a2b19fbdcf46..5e9283e509a4 100644
> --- a/fs/ocfs2/dlm/dlmdomain.c
> +++ b/fs/ocfs2/dlm/dlmdomain.c
> @@ -707,11 +707,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
> * want new domain joins to communicate with us at
> * least until we've completed migration of our
> * resources. */
> + spin_lock(&dlm->spinlock);
> dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN;
> + spin_unlock(&dlm->spinlock);
I guess there will be misuse of 'dlm->spinlock' and dlm_domain_lock.
> leave = 1;
> }
> spin_unlock(&dlm_domain_lock);
>
> + dlm_wait_for_recovery(dlm);
> +
> if (leave) {
> mlog(0, "shutting down domain %s\n", dlm->name);
> dlm_begin_exit_domain(dlm);
> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
> index 74407c6dd592..764c95b2b35c 100644
> --- a/fs/ocfs2/dlm/dlmrecovery.c
> +++ b/fs/ocfs2/dlm/dlmrecovery.c
> @@ -2441,6 +2441,9 @@ static void __dlm_hb_node_down(struct dlm_ctxt
> *dlm, int idx)
> {
> assert_spin_locked(&dlm->spinlock);
>
> + if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
> + return;
> +
'dlm->dlm_state' probably need be to protected by 'dlm_domain_lock'.
and I wander if there is more work to be done when in
'DLM_CTXT_IN_SHUTDOWN'?
> if (dlm->reco.new_master == idx) {
> mlog(0, "%s: recovery master %d just died\n",
> dlm->name, idx);
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
2017-11-01 7:56 ` piaojun
@ 2017-11-01 8:11 ` Changwei Ge
2017-11-01 8:45 ` piaojun
0 siblings, 1 reply; 11+ messages in thread
From: Changwei Ge @ 2017-11-01 8:11 UTC (permalink / raw)
To: ocfs2-devel
Hi Jun,
Thanks for reviewing.
I don't think we have to worry about misusing *dlm_domain_lock* and
*dlm::spinlock*. I admit my change may look a little different from most
of other code snippets where using these two spin locks. But our purpose
is to close the race between __dlm_hb_node_down and
dlm_unregister_domain, right? And my change meets that. :-)
I suppose we can do it in a flexible way.
Thanks,
Changwei
On 2017/11/1 15:57, piaojun wrote:
> Hi Changwei,
>
> thanks for reviewing, and I think waiting for recoverying done before
> migrating seems another solution, but I wonder if new problems will be
> invoked as following comments.
>
> thanks,
> Jun
>
> On 2017/11/1 15:13, Changwei Ge wrote:
>> Hi Jun,
>>
>> I probably get your point.
>>
>> You mean that dlm finds no lock resource to be migrated and no more lock
>> resource is managed by its hash table. After that a node dies all of a
>> sudden and the dead node is put into dlm's recovery map, right?
> that is it.
>> Furthermore, a lock resource is migrated from other nodes and local node
>> has already asserted master to them.
>>
>> If so, I want to suggest a easier way to solve it.
>> We don't have to add a new flag to dlm structure, just leverage existed
>> dlm status and bitmap.
>> It will bring a bonus - no lock resource will be marked with RECOVERING,
>> it's a safer way, I suppose.
>>
>> Please take a review.
>>
>> Thanks,
>> Changwei
>>
>>
>> Subject: [PATCH] ocfs2/dlm: a node can't be involved in recovery if it
>> is being shutdown
>>
>> Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
>> ---
>> fs/ocfs2/dlm/dlmdomain.c | 4 ++++
>> fs/ocfs2/dlm/dlmrecovery.c | 3 +++
>> 2 files changed, 7 insertions(+)
>>
>> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
>> index a2b19fbdcf46..5e9283e509a4 100644
>> --- a/fs/ocfs2/dlm/dlmdomain.c
>> +++ b/fs/ocfs2/dlm/dlmdomain.c
>> @@ -707,11 +707,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
>> * want new domain joins to communicate with us at
>> * least until we've completed migration of our
>> * resources. */
>> + spin_lock(&dlm->spinlock);
>> dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN;
>> + spin_unlock(&dlm->spinlock);
> I guess there will be misuse of 'dlm->spinlock' and dlm_domain_lock.
>> leave = 1;
>> }
>> spin_unlock(&dlm_domain_lock);
>>
>> + dlm_wait_for_recovery(dlm);
>> +
>> if (leave) {
>> mlog(0, "shutting down domain %s\n", dlm->name);
>> dlm_begin_exit_domain(dlm);
>> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
>> index 74407c6dd592..764c95b2b35c 100644
>> --- a/fs/ocfs2/dlm/dlmrecovery.c
>> +++ b/fs/ocfs2/dlm/dlmrecovery.c
>> @@ -2441,6 +2441,9 @@ static void __dlm_hb_node_down(struct dlm_ctxt
>> *dlm, int idx)
>> {
>> assert_spin_locked(&dlm->spinlock);
>>
>> + if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
>> + return;
>> +
> 'dlm->dlm_state' probably need be to protected by 'dlm_domain_lock'.
> and I wander if there is more work to be done when in
> 'DLM_CTXT_IN_SHUTDOWN'?
>> if (dlm->reco.new_master == idx) {
>> mlog(0, "%s: recovery master %d just died\n",
>> dlm->name, idx);
>>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
2017-11-01 8:11 ` Changwei Ge
@ 2017-11-01 8:45 ` piaojun
2017-11-01 9:00 ` Changwei Ge
0 siblings, 1 reply; 11+ messages in thread
From: piaojun @ 2017-11-01 8:45 UTC (permalink / raw)
To: ocfs2-devel
Hi Changwei,
I do think we need follow the principle that use 'dlm_domain_lock' to
protect 'dlm_state' as the NOTE says in 'dlm_ctxt':
/* NOTE: Next three are protected by dlm_domain_lock */
deadnode won't be cleared from 'dlm->domain_map' if return from
__dlm_hb_node_down(), and NodeA will retry migrating to NodeB forever
if only NodeA and NodeB in domain. I suggest more testing needed in
your solution.
thanks,
Jun
On 2017/11/1 16:11, Changwei Ge wrote:
> Hi Jun,
>
> Thanks for reviewing.
> I don't think we have to worry about misusing *dlm_domain_lock* and
> *dlm::spinlock*. I admit my change may look a little different from most
> of other code snippets where using these two spin locks. But our purpose
> is to close the race between __dlm_hb_node_down and
> dlm_unregister_domain, right? And my change meets that. :-)
>
> I suppose we can do it in a flexible way.
>
> Thanks,
> Changwei
>
>
> On 2017/11/1 15:57, piaojun wrote:
>> Hi Changwei,
>>
>> thanks for reviewing, and I think waiting for recoverying done before
>> migrating seems another solution, but I wonder if new problems will be
>> invoked as following comments.
>>
>> thanks,
>> Jun
>>
>> On 2017/11/1 15:13, Changwei Ge wrote:
>>> Hi Jun,
>>>
>>> I probably get your point.
>>>
>>> You mean that dlm finds no lock resource to be migrated and no more lock
>>> resource is managed by its hash table. After that a node dies all of a
>>> sudden and the dead node is put into dlm's recovery map, right?
>> that is it.
>>> Furthermore, a lock resource is migrated from other nodes and local node
>>> has already asserted master to them.
>>>
>>> If so, I want to suggest a easier way to solve it.
>>> We don't have to add a new flag to dlm structure, just leverage existed
>>> dlm status and bitmap.
>>> It will bring a bonus - no lock resource will be marked with RECOVERING,
>>> it's a safer way, I suppose.
>>>
>>> Please take a review.
>>>
>>> Thanks,
>>> Changwei
>>>
>>>
>>> Subject: [PATCH] ocfs2/dlm: a node can't be involved in recovery if it
>>> is being shutdown
>>>
>>> Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
>>> ---
>>> fs/ocfs2/dlm/dlmdomain.c | 4 ++++
>>> fs/ocfs2/dlm/dlmrecovery.c | 3 +++
>>> 2 files changed, 7 insertions(+)
>>>
>>> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
>>> index a2b19fbdcf46..5e9283e509a4 100644
>>> --- a/fs/ocfs2/dlm/dlmdomain.c
>>> +++ b/fs/ocfs2/dlm/dlmdomain.c
>>> @@ -707,11 +707,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
>>> * want new domain joins to communicate with us at
>>> * least until we've completed migration of our
>>> * resources. */
>>> + spin_lock(&dlm->spinlock);
>>> dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN;
>>> + spin_unlock(&dlm->spinlock);
>> I guess there will be misuse of 'dlm->spinlock' and dlm_domain_lock.
>>> leave = 1;
>>> }
>>> spin_unlock(&dlm_domain_lock);
>>>
>>> + dlm_wait_for_recovery(dlm);
>>> +
>>> if (leave) {
>>> mlog(0, "shutting down domain %s\n", dlm->name);
>>> dlm_begin_exit_domain(dlm);
>>> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
>>> index 74407c6dd592..764c95b2b35c 100644
>>> --- a/fs/ocfs2/dlm/dlmrecovery.c
>>> +++ b/fs/ocfs2/dlm/dlmrecovery.c
>>> @@ -2441,6 +2441,9 @@ static void __dlm_hb_node_down(struct dlm_ctxt
>>> *dlm, int idx)
>>> {
>>> assert_spin_locked(&dlm->spinlock);
>>>
>>> + if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
>>> + return;
>>> +
>> 'dlm->dlm_state' probably need be to protected by 'dlm_domain_lock'.
>> and I wander if there is more work to be done when in
>> 'DLM_CTXT_IN_SHUTDOWN'?
>>> if (dlm->reco.new_master == idx) {
>>> mlog(0, "%s: recovery master %d just died\n",
>>> dlm->name, idx);
>>>
>>
>
> .
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
2017-11-01 8:45 ` piaojun
@ 2017-11-01 9:00 ` Changwei Ge
2017-11-02 1:42 ` piaojun
0 siblings, 1 reply; 11+ messages in thread
From: Changwei Ge @ 2017-11-01 9:00 UTC (permalink / raw)
To: ocfs2-devel
Hi Jun,
On 2017/11/1 16:46, piaojun wrote:
> Hi Changwei,
>
> I do think we need follow the principle that use 'dlm_domain_lock' to
> protect 'dlm_state' as the NOTE says in 'dlm_ctxt':
> /* NOTE: Next three are protected by dlm_domain_lock */
>
> deadnode won't be cleared from 'dlm->domain_map' if return from
> __dlm_hb_node_down(), and NodeA will retry migrating to NodeB forever
> if only NodeA and NodeB in domain. I suggest more testing needed in
> your solution.
I agree, however, my patch is just a draft to indicate my comments.
Perhaps we can figure out a better way to solve this, as your patch
can't clear DLM RECOVERING flag on lock resource. I am not sure if it is
reasonable, I suppose this may violate ocfs2/dlm design philosophy.
Thanks,
Changwei
>
> thanks,
> Jun
>
> On 2017/11/1 16:11, Changwei Ge wrote:
>> Hi Jun,
>>
>> Thanks for reviewing.
>> I don't think we have to worry about misusing *dlm_domain_lock* and
>> *dlm::spinlock*. I admit my change may look a little different from most
>> of other code snippets where using these two spin locks. But our purpose
>> is to close the race between __dlm_hb_node_down and
>> dlm_unregister_domain, right? And my change meets that. :-)
>>
>> I suppose we can do it in a flexible way.
>>
>> Thanks,
>> Changwei
>>
>>
>> On 2017/11/1 15:57, piaojun wrote:
>>> Hi Changwei,
>>>
>>> thanks for reviewing, and I think waiting for recoverying done before
>>> migrating seems another solution, but I wonder if new problems will be
>>> invoked as following comments.
>>>
>>> thanks,
>>> Jun
>>>
>>> On 2017/11/1 15:13, Changwei Ge wrote:
>>>> Hi Jun,
>>>>
>>>> I probably get your point.
>>>>
>>>> You mean that dlm finds no lock resource to be migrated and no more lock
>>>> resource is managed by its hash table. After that a node dies all of a
>>>> sudden and the dead node is put into dlm's recovery map, right?
>>> that is it.
>>>> Furthermore, a lock resource is migrated from other nodes and local node
>>>> has already asserted master to them.
>>>>
>>>> If so, I want to suggest a easier way to solve it.
>>>> We don't have to add a new flag to dlm structure, just leverage existed
>>>> dlm status and bitmap.
>>>> It will bring a bonus - no lock resource will be marked with RECOVERING,
>>>> it's a safer way, I suppose.
>>>>
>>>> Please take a review.
>>>>
>>>> Thanks,
>>>> Changwei
>>>>
>>>>
>>>> Subject: [PATCH] ocfs2/dlm: a node can't be involved in recovery if it
>>>> is being shutdown
>>>>
>>>> Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
>>>> ---
>>>> fs/ocfs2/dlm/dlmdomain.c | 4 ++++
>>>> fs/ocfs2/dlm/dlmrecovery.c | 3 +++
>>>> 2 files changed, 7 insertions(+)
>>>>
>>>> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
>>>> index a2b19fbdcf46..5e9283e509a4 100644
>>>> --- a/fs/ocfs2/dlm/dlmdomain.c
>>>> +++ b/fs/ocfs2/dlm/dlmdomain.c
>>>> @@ -707,11 +707,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
>>>> * want new domain joins to communicate with us at
>>>> * least until we've completed migration of our
>>>> * resources. */
>>>> + spin_lock(&dlm->spinlock);
>>>> dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN;
>>>> + spin_unlock(&dlm->spinlock);
>>> I guess there will be misuse of 'dlm->spinlock' and dlm_domain_lock.
>>>> leave = 1;
>>>> }
>>>> spin_unlock(&dlm_domain_lock);
>>>>
>>>> + dlm_wait_for_recovery(dlm);
>>>> +
>>>> if (leave) {
>>>> mlog(0, "shutting down domain %s\n", dlm->name);
>>>> dlm_begin_exit_domain(dlm);
>>>> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
>>>> index 74407c6dd592..764c95b2b35c 100644
>>>> --- a/fs/ocfs2/dlm/dlmrecovery.c
>>>> +++ b/fs/ocfs2/dlm/dlmrecovery.c
>>>> @@ -2441,6 +2441,9 @@ static void __dlm_hb_node_down(struct dlm_ctxt
>>>> *dlm, int idx)
>>>> {
>>>> assert_spin_locked(&dlm->spinlock);
>>>>
>>>> + if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
>>>> + return;
>>>> +
>>> 'dlm->dlm_state' probably need be to protected by 'dlm_domain_lock'.
>>> and I wander if there is more work to be done when in
>>> 'DLM_CTXT_IN_SHUTDOWN'?
>>>> if (dlm->reco.new_master == idx) {
>>>> mlog(0, "%s: recovery master %d just died\n",
>>>> dlm->name, idx);
>>>>
>>>
>>
>> .
>>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
2017-11-01 9:00 ` Changwei Ge
@ 2017-11-02 1:42 ` piaojun
2017-11-02 1:56 ` Changwei Ge
0 siblings, 1 reply; 11+ messages in thread
From: piaojun @ 2017-11-02 1:42 UTC (permalink / raw)
To: ocfs2-devel
Hi Changwei,
I had tried a solution like yours before, but failed to prevent the
race just by 'dlm_state' and the existed variable as
'DLM_CTXT_IN_SHUTDOWN' contains many status. so I think we need
introduce a 'migrate_done' to solve that problem.
thanks,
Jun
On 2017/11/1 17:00, Changwei Ge wrote:
> Hi Jun,
>
> On 2017/11/1 16:46, piaojun wrote:
>> Hi Changwei,
>>
>> I do think we need follow the principle that use 'dlm_domain_lock' to
>> protect 'dlm_state' as the NOTE says in 'dlm_ctxt':
>> /* NOTE: Next three are protected by dlm_domain_lock */
>>
>> deadnode won't be cleared from 'dlm->domain_map' if return from
>> __dlm_hb_node_down(), and NodeA will retry migrating to NodeB forever
>> if only NodeA and NodeB in domain. I suggest more testing needed in
>> your solution.
>
> I agree, however, my patch is just a draft to indicate my comments.
>
> Perhaps we can figure out a better way to solve this, as your patch
> can't clear DLM RECOVERING flag on lock resource. I am not sure if it is
> reasonable, I suppose this may violate ocfs2/dlm design philosophy.
>
> Thanks,
> Changwei
>
if res is marked as DLM RECOVERING, migrating process will wait for
recoverying done. and DLM RECOVERING will be cleared after recoverying.
>>
>> thanks,
>> Jun
>>
>> On 2017/11/1 16:11, Changwei Ge wrote:
>>> Hi Jun,
>>>
>>> Thanks for reviewing.
>>> I don't think we have to worry about misusing *dlm_domain_lock* and
>>> *dlm::spinlock*. I admit my change may look a little different from most
>>> of other code snippets where using these two spin locks. But our purpose
>>> is to close the race between __dlm_hb_node_down and
>>> dlm_unregister_domain, right? And my change meets that. :-)
>>>
>>> I suppose we can do it in a flexible way.
>>>
>>> Thanks,
>>> Changwei
>>>
>>>
>>> On 2017/11/1 15:57, piaojun wrote:
>>>> Hi Changwei,
>>>>
>>>> thanks for reviewing, and I think waiting for recoverying done before
>>>> migrating seems another solution, but I wonder if new problems will be
>>>> invoked as following comments.
>>>>
>>>> thanks,
>>>> Jun
>>>>
>>>> On 2017/11/1 15:13, Changwei Ge wrote:
>>>>> Hi Jun,
>>>>>
>>>>> I probably get your point.
>>>>>
>>>>> You mean that dlm finds no lock resource to be migrated and no more lock
>>>>> resource is managed by its hash table. After that a node dies all of a
>>>>> sudden and the dead node is put into dlm's recovery map, right?
>>>> that is it.
>>>>> Furthermore, a lock resource is migrated from other nodes and local node
>>>>> has already asserted master to them.
>>>>>
>>>>> If so, I want to suggest a easier way to solve it.
>>>>> We don't have to add a new flag to dlm structure, just leverage existed
>>>>> dlm status and bitmap.
>>>>> It will bring a bonus - no lock resource will be marked with RECOVERING,
>>>>> it's a safer way, I suppose.
>>>>>
>>>>> Please take a review.
>>>>>
>>>>> Thanks,
>>>>> Changwei
>>>>>
>>>>>
>>>>> Subject: [PATCH] ocfs2/dlm: a node can't be involved in recovery if it
>>>>> is being shutdown
>>>>>
>>>>> Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
>>>>> ---
>>>>> fs/ocfs2/dlm/dlmdomain.c | 4 ++++
>>>>> fs/ocfs2/dlm/dlmrecovery.c | 3 +++
>>>>> 2 files changed, 7 insertions(+)
>>>>>
>>>>> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
>>>>> index a2b19fbdcf46..5e9283e509a4 100644
>>>>> --- a/fs/ocfs2/dlm/dlmdomain.c
>>>>> +++ b/fs/ocfs2/dlm/dlmdomain.c
>>>>> @@ -707,11 +707,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
>>>>> * want new domain joins to communicate with us at
>>>>> * least until we've completed migration of our
>>>>> * resources. */
>>>>> + spin_lock(&dlm->spinlock);
>>>>> dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN;
>>>>> + spin_unlock(&dlm->spinlock);
>>>> I guess there will be misuse of 'dlm->spinlock' and dlm_domain_lock.
>>>>> leave = 1;
>>>>> }
>>>>> spin_unlock(&dlm_domain_lock);
>>>>>
>>>>> + dlm_wait_for_recovery(dlm);
>>>>> +
>>>>> if (leave) {
>>>>> mlog(0, "shutting down domain %s\n", dlm->name);
>>>>> dlm_begin_exit_domain(dlm);
>>>>> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
>>>>> index 74407c6dd592..764c95b2b35c 100644
>>>>> --- a/fs/ocfs2/dlm/dlmrecovery.c
>>>>> +++ b/fs/ocfs2/dlm/dlmrecovery.c
>>>>> @@ -2441,6 +2441,9 @@ static void __dlm_hb_node_down(struct dlm_ctxt
>>>>> *dlm, int idx)
>>>>> {
>>>>> assert_spin_locked(&dlm->spinlock);
>>>>>
>>>>> + if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
>>>>> + return;
>>>>> +
>>>> 'dlm->dlm_state' probably need be to protected by 'dlm_domain_lock'.
>>>> and I wander if there is more work to be done when in
>>>> 'DLM_CTXT_IN_SHUTDOWN'?
>>>>> if (dlm->reco.new_master == idx) {
>>>>> mlog(0, "%s: recovery master %d just died\n",
>>>>> dlm->name, idx);
>>>>>
>>>>
>>>
>>> .
>>>
>>
>
> .
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
2017-11-02 1:42 ` piaojun
@ 2017-11-02 1:56 ` Changwei Ge
2017-11-03 1:01 ` piaojun
0 siblings, 1 reply; 11+ messages in thread
From: Changwei Ge @ 2017-11-02 1:56 UTC (permalink / raw)
To: ocfs2-devel
On 2017/11/2 9:44, piaojun wrote:
> Hi Changwei,
>
> I had tried a solution like yours before, but failed to prevent the
> race just by 'dlm_state' and the existed variable as
> 'DLM_CTXT_IN_SHUTDOWN' contains many status. so I think we need
> introduce a 'migrate_done' to solve that problem.
Hi Jun,
Yes, adding a new flag might be a direction, but I still think we need
to clear other nodes' lock resources' flag - DLM_LOCK_RES_RECOVERING,
which depends on NodeA's dlm recovering progress. Unfortunately, it is
interrupted by the newly added flag ::migrate_done in your patch. :-(
So no DLM_FINALIZE_RECO_MSG message will be sent out to other nodes,
thus DLM_LOCK_RES_RECOVERING can't be cleared.
As I know, if DLM_LOCK_RES_RECOVERING is set, all lock and unlock
requests will be *hang*.
Thanks,
Changwei
>
> thanks,
> Jun
>
> On 2017/11/1 17:00, Changwei Ge wrote:
>> Hi Jun,
>>
>> On 2017/11/1 16:46, piaojun wrote:
>>> Hi Changwei,
>>>
>>> I do think we need follow the principle that use 'dlm_domain_lock' to
>>> protect 'dlm_state' as the NOTE says in 'dlm_ctxt':
>>> /* NOTE: Next three are protected by dlm_domain_lock */
>>>
>>> deadnode won't be cleared from 'dlm->domain_map' if return from
>>> __dlm_hb_node_down(), and NodeA will retry migrating to NodeB forever
>>> if only NodeA and NodeB in domain. I suggest more testing needed in
>>> your solution.
>>
>> I agree, however, my patch is just a draft to indicate my comments.
>>
>> Perhaps we can figure out a better way to solve this, as your patch
>> can't clear DLM RECOVERING flag on lock resource. I am not sure if it is
>> reasonable, I suppose this may violate ocfs2/dlm design philosophy.
>>
>> Thanks,
>> Changwei
>>
>
> if res is marked as DLM RECOVERING, migrating process will wait for
> recoverying done. and DLM RECOVERING will be cleared after recoverying.
>
>>>
>>> thanks,
>>> Jun
>>>
>>> On 2017/11/1 16:11, Changwei Ge wrote:
>>>> Hi Jun,
>>>>
>>>> Thanks for reviewing.
>>>> I don't think we have to worry about misusing *dlm_domain_lock* and
>>>> *dlm::spinlock*. I admit my change may look a little different from most
>>>> of other code snippets where using these two spin locks. But our purpose
>>>> is to close the race between __dlm_hb_node_down and
>>>> dlm_unregister_domain, right? And my change meets that. :-)
>>>>
>>>> I suppose we can do it in a flexible way.
>>>>
>>>> Thanks,
>>>> Changwei
>>>>
>>>>
>>>> On 2017/11/1 15:57, piaojun wrote:
>>>>> Hi Changwei,
>>>>>
>>>>> thanks for reviewing, and I think waiting for recoverying done before
>>>>> migrating seems another solution, but I wonder if new problems will be
>>>>> invoked as following comments.
>>>>>
>>>>> thanks,
>>>>> Jun
>>>>>
>>>>> On 2017/11/1 15:13, Changwei Ge wrote:
>>>>>> Hi Jun,
>>>>>>
>>>>>> I probably get your point.
>>>>>>
>>>>>> You mean that dlm finds no lock resource to be migrated and no more lock
>>>>>> resource is managed by its hash table. After that a node dies all of a
>>>>>> sudden and the dead node is put into dlm's recovery map, right?
>>>>> that is it.
>>>>>> Furthermore, a lock resource is migrated from other nodes and local node
>>>>>> has already asserted master to them.
>>>>>>
>>>>>> If so, I want to suggest a easier way to solve it.
>>>>>> We don't have to add a new flag to dlm structure, just leverage existed
>>>>>> dlm status and bitmap.
>>>>>> It will bring a bonus - no lock resource will be marked with RECOVERING,
>>>>>> it's a safer way, I suppose.
>>>>>>
>>>>>> Please take a review.
>>>>>>
>>>>>> Thanks,
>>>>>> Changwei
>>>>>>
>>>>>>
>>>>>> Subject: [PATCH] ocfs2/dlm: a node can't be involved in recovery if it
>>>>>> is being shutdown
>>>>>>
>>>>>> Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
>>>>>> ---
>>>>>> fs/ocfs2/dlm/dlmdomain.c | 4 ++++
>>>>>> fs/ocfs2/dlm/dlmrecovery.c | 3 +++
>>>>>> 2 files changed, 7 insertions(+)
>>>>>>
>>>>>> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
>>>>>> index a2b19fbdcf46..5e9283e509a4 100644
>>>>>> --- a/fs/ocfs2/dlm/dlmdomain.c
>>>>>> +++ b/fs/ocfs2/dlm/dlmdomain.c
>>>>>> @@ -707,11 +707,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
>>>>>> * want new domain joins to communicate with us at
>>>>>> * least until we've completed migration of our
>>>>>> * resources. */
>>>>>> + spin_lock(&dlm->spinlock);
>>>>>> dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN;
>>>>>> + spin_unlock(&dlm->spinlock);
>>>>> I guess there will be misuse of 'dlm->spinlock' and dlm_domain_lock.
>>>>>> leave = 1;
>>>>>> }
>>>>>> spin_unlock(&dlm_domain_lock);
>>>>>>
>>>>>> + dlm_wait_for_recovery(dlm);
>>>>>> +
>>>>>> if (leave) {
>>>>>> mlog(0, "shutting down domain %s\n", dlm->name);
>>>>>> dlm_begin_exit_domain(dlm);
>>>>>> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
>>>>>> index 74407c6dd592..764c95b2b35c 100644
>>>>>> --- a/fs/ocfs2/dlm/dlmrecovery.c
>>>>>> +++ b/fs/ocfs2/dlm/dlmrecovery.c
>>>>>> @@ -2441,6 +2441,9 @@ static void __dlm_hb_node_down(struct dlm_ctxt
>>>>>> *dlm, int idx)
>>>>>> {
>>>>>> assert_spin_locked(&dlm->spinlock);
>>>>>>
>>>>>> + if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
>>>>>> + return;
>>>>>> +
>>>>> 'dlm->dlm_state' probably need be to protected by 'dlm_domain_lock'.
>>>>> and I wander if there is more work to be done when in
>>>>> 'DLM_CTXT_IN_SHUTDOWN'?
>>>>>> if (dlm->reco.new_master == idx) {
>>>>>> mlog(0, "%s: recovery master %d just died\n",
>>>>>> dlm->name, idx);
>>>>>>
>>>>>
>>>>
>>>> .
>>>>
>>>
>>
>> .
>>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres
2017-11-02 1:56 ` Changwei Ge
@ 2017-11-03 1:01 ` piaojun
0 siblings, 0 replies; 11+ messages in thread
From: piaojun @ 2017-11-03 1:01 UTC (permalink / raw)
To: ocfs2-devel
Hi Changwei,
please see my last mail said:
"
if res is marked as DLM RECOVERING, migrating process will wait for
recoverying done. and DLM RECOVERING will be cleared after recoverying.
"
and if there is still lockres, 'migrate_done' won't be set. moreover if
other node deaded after migrating, I just let another node to do
recovery.
thanks,
Jun
On 2017/11/2 9:56, Changwei Ge wrote:
> On 2017/11/2 9:44, piaojun wrote:
>> Hi Changwei,
>>
>> I had tried a solution like yours before, but failed to prevent the
>> race just by 'dlm_state' and the existed variable as
>> 'DLM_CTXT_IN_SHUTDOWN' contains many status. so I think we need
>> introduce a 'migrate_done' to solve that problem.
> Hi Jun,
>
> Yes, adding a new flag might be a direction, but I still think we need
> to clear other nodes' lock resources' flag - DLM_LOCK_RES_RECOVERING,
> which depends on NodeA's dlm recovering progress. Unfortunately, it is
> interrupted by the newly added flag ::migrate_done in your patch. :-(
>
> So no DLM_FINALIZE_RECO_MSG message will be sent out to other nodes,
> thus DLM_LOCK_RES_RECOVERING can't be cleared.
>
> As I know, if DLM_LOCK_RES_RECOVERING is set, all lock and unlock
> requests will be *hang*.
>
> Thanks,
> Changwei
>
>>
>> thanks,
>> Jun
>>
>> On 2017/11/1 17:00, Changwei Ge wrote:
>>> Hi Jun,
>>>
>>> On 2017/11/1 16:46, piaojun wrote:
>>>> Hi Changwei,
>>>>
>>>> I do think we need follow the principle that use 'dlm_domain_lock' to
>>>> protect 'dlm_state' as the NOTE says in 'dlm_ctxt':
>>>> /* NOTE: Next three are protected by dlm_domain_lock */
>>>>
>>>> deadnode won't be cleared from 'dlm->domain_map' if return from
>>>> __dlm_hb_node_down(), and NodeA will retry migrating to NodeB forever
>>>> if only NodeA and NodeB in domain. I suggest more testing needed in
>>>> your solution.
>>>
>>> I agree, however, my patch is just a draft to indicate my comments.
>>>
>>> Perhaps we can figure out a better way to solve this, as your patch
>>> can't clear DLM RECOVERING flag on lock resource. I am not sure if it is
>>> reasonable, I suppose this may violate ocfs2/dlm design philosophy.
>>>
>>> Thanks,
>>> Changwei
>>>
>>
>> if res is marked as DLM RECOVERING, migrating process will wait for
>> recoverying done. and DLM RECOVERING will be cleared after recoverying.
>>
>>>>
>>>> thanks,
>>>> Jun
>>>>
>>>> On 2017/11/1 16:11, Changwei Ge wrote:
>>>>> Hi Jun,
>>>>>
>>>>> Thanks for reviewing.
>>>>> I don't think we have to worry about misusing *dlm_domain_lock* and
>>>>> *dlm::spinlock*. I admit my change may look a little different from most
>>>>> of other code snippets where using these two spin locks. But our purpose
>>>>> is to close the race between __dlm_hb_node_down and
>>>>> dlm_unregister_domain, right? And my change meets that. :-)
>>>>>
>>>>> I suppose we can do it in a flexible way.
>>>>>
>>>>> Thanks,
>>>>> Changwei
>>>>>
>>>>>
>>>>> On 2017/11/1 15:57, piaojun wrote:
>>>>>> Hi Changwei,
>>>>>>
>>>>>> thanks for reviewing, and I think waiting for recoverying done before
>>>>>> migrating seems another solution, but I wonder if new problems will be
>>>>>> invoked as following comments.
>>>>>>
>>>>>> thanks,
>>>>>> Jun
>>>>>>
>>>>>> On 2017/11/1 15:13, Changwei Ge wrote:
>>>>>>> Hi Jun,
>>>>>>>
>>>>>>> I probably get your point.
>>>>>>>
>>>>>>> You mean that dlm finds no lock resource to be migrated and no more lock
>>>>>>> resource is managed by its hash table. After that a node dies all of a
>>>>>>> sudden and the dead node is put into dlm's recovery map, right?
>>>>>> that is it.
>>>>>>> Furthermore, a lock resource is migrated from other nodes and local node
>>>>>>> has already asserted master to them.
>>>>>>>
>>>>>>> If so, I want to suggest a easier way to solve it.
>>>>>>> We don't have to add a new flag to dlm structure, just leverage existed
>>>>>>> dlm status and bitmap.
>>>>>>> It will bring a bonus - no lock resource will be marked with RECOVERING,
>>>>>>> it's a safer way, I suppose.
>>>>>>>
>>>>>>> Please take a review.
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Changwei
>>>>>>>
>>>>>>>
>>>>>>> Subject: [PATCH] ocfs2/dlm: a node can't be involved in recovery if it
>>>>>>> is being shutdown
>>>>>>>
>>>>>>> Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
>>>>>>> ---
>>>>>>> fs/ocfs2/dlm/dlmdomain.c | 4 ++++
>>>>>>> fs/ocfs2/dlm/dlmrecovery.c | 3 +++
>>>>>>> 2 files changed, 7 insertions(+)
>>>>>>>
>>>>>>> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
>>>>>>> index a2b19fbdcf46..5e9283e509a4 100644
>>>>>>> --- a/fs/ocfs2/dlm/dlmdomain.c
>>>>>>> +++ b/fs/ocfs2/dlm/dlmdomain.c
>>>>>>> @@ -707,11 +707,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
>>>>>>> * want new domain joins to communicate with us at
>>>>>>> * least until we've completed migration of our
>>>>>>> * resources. */
>>>>>>> + spin_lock(&dlm->spinlock);
>>>>>>> dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN;
>>>>>>> + spin_unlock(&dlm->spinlock);
>>>>>> I guess there will be misuse of 'dlm->spinlock' and dlm_domain_lock.
>>>>>>> leave = 1;
>>>>>>> }
>>>>>>> spin_unlock(&dlm_domain_lock);
>>>>>>>
>>>>>>> + dlm_wait_for_recovery(dlm);
>>>>>>> +
>>>>>>> if (leave) {
>>>>>>> mlog(0, "shutting down domain %s\n", dlm->name);
>>>>>>> dlm_begin_exit_domain(dlm);
>>>>>>> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
>>>>>>> index 74407c6dd592..764c95b2b35c 100644
>>>>>>> --- a/fs/ocfs2/dlm/dlmrecovery.c
>>>>>>> +++ b/fs/ocfs2/dlm/dlmrecovery.c
>>>>>>> @@ -2441,6 +2441,9 @@ static void __dlm_hb_node_down(struct dlm_ctxt
>>>>>>> *dlm, int idx)
>>>>>>> {
>>>>>>> assert_spin_locked(&dlm->spinlock);
>>>>>>>
>>>>>>> + if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
>>>>>>> + return;
>>>>>>> +
>>>>>> 'dlm->dlm_state' probably need be to protected by 'dlm_domain_lock'.
>>>>>> and I wander if there is more work to be done when in
>>>>>> 'DLM_CTXT_IN_SHUTDOWN'?
>>>>>>> if (dlm->reco.new_master == idx) {
>>>>>>> mlog(0, "%s: recovery master %d just died\n",
>>>>>>> dlm->name, idx);
>>>>>>>
>>>>>>
>>>>>
>>>>> .
>>>>>
>>>>
>>>
>>> .
>>>
>>
>
> .
>
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2017-11-03 1:01 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-01 1:14 [Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lockres piaojun
2017-11-01 2:47 ` Changwei Ge
2017-11-01 5:52 ` piaojun
2017-11-01 7:13 ` Changwei Ge
2017-11-01 7:56 ` piaojun
2017-11-01 8:11 ` Changwei Ge
2017-11-01 8:45 ` piaojun
2017-11-01 9:00 ` Changwei Ge
2017-11-02 1:42 ` piaojun
2017-11-02 1:56 ` Changwei Ge
2017-11-03 1:01 ` piaojun
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.