linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] Bug fix in aio ring page migration.
@ 2014-02-27 10:40 Tang Chen
  2014-02-27 10:40 ` [PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages Tang Chen
  2014-02-27 10:40 ` [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration Tang Chen
  0 siblings, 2 replies; 13+ messages in thread
From: Tang Chen @ 2014-02-27 10:40 UTC (permalink / raw)
  To: viro, bcrl, jmoyer, kosaki.motohiro, kosaki.motohiro,
	isimatu.yasuaki, guz.fnst
  Cc: linux-fsdevel, linux-aio, linux-kernel

This patch-set fixes the following two problems:

1. Need to use ctx->completion_lock to protect ring pages 
   from being mis-written while migration.

2. Need memory barrier to ensure memory copy is done before 
   ctx->ring_pages[] is updated.

NOTE: AIO ring page migration was implemented since Linux 3.12.
      So we need to merge these two patches into 3.12 stable tree.  


Tang Chen (2):
  aio, memory-hotplug: Fix confliction when migrating and accessing ring
    pages.
  aio, mem-hotplug: Add memory barrier to aio ring page migration.

 fs/aio.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages.
  2014-02-27 10:40 [PATCH 0/2] Bug fix in aio ring page migration Tang Chen
@ 2014-02-27 10:40 ` Tang Chen
  2014-03-05 19:23   ` Jeff Moyer
  2014-02-27 10:40 ` [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration Tang Chen
  1 sibling, 1 reply; 13+ messages in thread
From: Tang Chen @ 2014-02-27 10:40 UTC (permalink / raw)
  To: viro, bcrl, jmoyer, kosaki.motohiro, kosaki.motohiro,
	isimatu.yasuaki, guz.fnst
  Cc: linux-fsdevel, linux-aio, linux-kernel

AIO ring page migration has been implemented by the following patch:

        https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/aio.c?id=36bc08cc01709b4a9bb563b35aa530241ddc63e3

In this patch, ctx->completion_lock is used to prevent other processes
from accessing the ring page being migrated.

But in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring(),
when writing to the ring page, they didn't take ctx->completion_lock.

As a result, for example, we have the following problem:

            thread 1                      |              thread 2
                                          |
aio_migratepage()                         |
 |-> take ctx->completion_lock            |
 |-> migrate_page_copy(new, old)          |
 |   *NOW*, ctx->ring_pages[idx] == old   |
                                          |
                                          |    *NOW*, ctx->ring_pages[idx] == old
                                          |    aio_read_events_ring()
                                          |     |-> ring = kmap_atomic(ctx->ring_pages[0])
                                          |     |-> ring->head = head;          *HERE, write to the old ring page*
                                          |     |-> kunmap_atomic(ring);
                                          |
 |-> ctx->ring_pages[idx] = new           |
 |   *BUT NOW*, the content of            |
 |    ring_pages[idx] is old.             |
 |-> release ctx->completion_lock         |

As above, the new ring page will not be updated.

The solution is taking ctx->completion_lock in thread 2, which means,
in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring() when
writing to ring pages.

Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
---
 fs/aio.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/fs/aio.c b/fs/aio.c
index 062a5f6..50c089c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -366,6 +366,7 @@ static int aio_setup_ring(struct kioctx *ctx)
 	int nr_pages;
 	int i;
 	struct file *file;
+	unsigned long flags;
 
 	/* Compensate for the ring buffer's head/tail overlap entry */
 	nr_events += 2;	/* 1 is required, 2 for good luck */
@@ -437,6 +438,14 @@ static int aio_setup_ring(struct kioctx *ctx)
 	ctx->user_id = ctx->mmap_base;
 	ctx->nr_events = nr_events; /* trusted copy */
 
+	/*
+	 * The aio ring pages are user space pages, so they can be migrated.
+	 * When writing to an aio ring page, we should ensure the page is not
+	 * being migrated. Aio page migration procedure is protected by
+	 * ctx->completion_lock, so we add this lock here.
+	 */
+	spin_lock_irqsave(&ctx->completion_lock, flags);
+
 	ring = kmap_atomic(ctx->ring_pages[0]);
 	ring->nr = nr_events;	/* user copy */
 	ring->id = ~0U;
@@ -448,6 +457,8 @@ static int aio_setup_ring(struct kioctx *ctx)
 	kunmap_atomic(ring);
 	flush_dcache_page(ctx->ring_pages[0]);
 
+	spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
 	return 0;
 }
 
@@ -542,6 +553,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
 	unsigned i, new_nr;
 	struct kioctx_table *table, *old;
 	struct aio_ring *ring;
+	unsigned long flags;
 
 	spin_lock(&mm->ioctx_lock);
 	rcu_read_lock();
@@ -556,9 +568,19 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
 					rcu_read_unlock();
 					spin_unlock(&mm->ioctx_lock);
 
+					/*
+					 * Accessing ring pages must be done
+					 * holding ctx->completion_lock to
+					 * prevent aio ring page migration
+					 * procedure from migrating ring pages.
+					 */
+					spin_lock_irqsave(&ctx->completion_lock,
+							  flags);
 					ring = kmap_atomic(ctx->ring_pages[0]);
 					ring->id = ctx->id;
 					kunmap_atomic(ring);
+					spin_unlock_irqrestore(
+						&ctx->completion_lock, flags);
 					return 0;
 				}
 
@@ -1021,6 +1043,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
 	unsigned head, tail, pos;
 	long ret = 0;
 	int copy_ret;
+	unsigned long flags;
 
 	mutex_lock(&ctx->ring_lock);
 
@@ -1066,11 +1089,21 @@ static long aio_read_events_ring(struct kioctx *ctx,
 		head %= ctx->nr_events;
 	}
 
+	/*
+	 * The aio ring pages are user space pages, so they can be migrated.
+	 * When writing to an aio ring page, we should ensure the page is not
+	 * being migrated. Aio page migration procedure is protected by
+	 * ctx->completion_lock, so we add this lock here.
+	 */
+	spin_lock_irqsave(&ctx->completion_lock, flags);
+
 	ring = kmap_atomic(ctx->ring_pages[0]);
 	ring->head = head;
 	kunmap_atomic(ring);
 	flush_dcache_page(ctx->ring_pages[0]);
 
+	spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
 	pr_debug("%li  h%u t%u\n", ret, head, tail);
 
 	put_reqs_available(ctx, ret);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration.
  2014-02-27 10:40 [PATCH 0/2] Bug fix in aio ring page migration Tang Chen
  2014-02-27 10:40 ` [PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages Tang Chen
@ 2014-02-27 10:40 ` Tang Chen
  2014-02-27 12:06   ` Yasuaki Ishimatsu
  2014-02-27 14:57   ` [PATCH " Benjamin LaHaise
  1 sibling, 2 replies; 13+ messages in thread
From: Tang Chen @ 2014-02-27 10:40 UTC (permalink / raw)
  To: viro, bcrl, jmoyer, kosaki.motohiro, kosaki.motohiro,
	isimatu.yasuaki, guz.fnst
  Cc: linux-fsdevel, linux-aio, linux-kernel

When doing aio ring page migration, we migrated the page, and update
ctx->ring_pages[]. Like the following:

aio_migratepage()
 |-> migrate_page_copy(new, old)
 |   ......				/* Need barrier here */
 |-> ctx->ring_pages[idx] = new

Actually, we need a memory barrier between these two operations.
Otherwise, if ctx->ring_pages[] is updated before memory copy due to
the compiler optimization, other processes may have an opportunity
to access to the not fully initialized new ring page.

So add a wmb to synchronize them.

Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
---
 fs/aio.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/aio.c b/fs/aio.c
index 50c089c..f0ed838 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 		pgoff_t idx;
 		spin_lock_irqsave(&ctx->completion_lock, flags);
 		migrate_page_copy(new, old);
+
+		/*
+		 * Ensure memory copy is finished before updating
+		 * ctx->ring_pages[]. Otherwise other processes may access to
+		 * new ring pages which are not fully initialized.
+		 */
+		smp_wmb();
+
 		idx = old->index;
 		if (idx < (pgoff_t)ctx->nr_pages) {
 			/* And only do the move if things haven't changed */
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration.
  2014-02-27 10:40 ` [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration Tang Chen
@ 2014-02-27 12:06   ` Yasuaki Ishimatsu
  2014-02-27 12:44     ` [Update PATCH " Yasuaki Ishimatsu
  2014-02-27 14:57   ` [PATCH " Benjamin LaHaise
  1 sibling, 1 reply; 13+ messages in thread
From: Yasuaki Ishimatsu @ 2014-02-27 12:06 UTC (permalink / raw)
  To: Tang Chen
  Cc: viro, bcrl, jmoyer, kosaki.motohiro, kosaki.motohiro, guz.fnst,
	linux-fsdevel, linux-aio, linux-kernel

Hi Tang,

(2014/02/27 19:40), Tang Chen wrote:
> When doing aio ring page migration, we migrated the page, and update
> ctx->ring_pages[]. Like the following:
> 
> aio_migratepage()
>   |-> migrate_page_copy(new, old)
>   |   ......				/* Need barrier here */
>   |-> ctx->ring_pages[idx] = new
> 
> Actually, we need a memory barrier between these two operations.
> Otherwise, if ctx->ring_pages[] is updated before memory copy due to
> the compiler optimization, other processes may have an opportunity
> to access to the not fully initialized new ring page.
> 
> So add a wmb to synchronize them.
> 
> Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
> ---
>   fs/aio.c | 8 ++++++++
>   1 file changed, 8 insertions(+)
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 50c089c..f0ed838 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
>   		pgoff_t idx;
>   		spin_lock_irqsave(&ctx->completion_lock, flags);
>   		migrate_page_copy(new, old);
> +
> +		/*
> +		 * Ensure memory copy is finished before updating
> +		 * ctx->ring_pages[]. Otherwise other processes may access to
> +		 * new ring pages which are not fully initialized.
> +		 */
> +		smp_wmb();
> +

If you put smp_wmb() here, you should put smp_rmb() before kmap() in
aio_read_events_ring().

Thanks,
Yasuaki Ishimatsu

>   		idx = old->index;
>   		if (idx < (pgoff_t)ctx->nr_pages) {
>   			/* And only do the move if things haven't changed */
> 



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration.
  2014-02-27 12:06   ` Yasuaki Ishimatsu
@ 2014-02-27 12:44     ` Yasuaki Ishimatsu
  2014-03-04  5:35       ` Miao Xie
  2014-03-05  7:17       ` [Update v2 " Yasuaki Ishimatsu
  0 siblings, 2 replies; 13+ messages in thread
From: Yasuaki Ishimatsu @ 2014-02-27 12:44 UTC (permalink / raw)
  To: Tang Chen, bcrl
  Cc: viro, jmoyer, kosaki.motohiro, kosaki.motohiro, guz.fnst,
	linux-fsdevel, linux-aio, linux-kernel

When doing aio ring page migration, we migrated the page, and update
ctx->ring_pages[]. Like the following:

aio_migratepage()
 |-> migrate_page_copy(new, old)
 |   ......				/* Need barrier here */
 |-> ctx->ring_pages[idx] = new

Actually, we need a memory barrier between these two operations.
Otherwise, if ctx->ring_pages[] is updated before memory copy due to
the compiler optimization, other processes may have an opportunity
to access to the not fully initialized new ring page.

So add a wmb and rmb to synchronize them.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 fs/aio.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/aio.c b/fs/aio.c
index 50c089c..8d9b82b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 		pgoff_t idx;
 		spin_lock_irqsave(&ctx->completion_lock, flags);
 		migrate_page_copy(new, old);
+
+		/*
+		 * Ensure memory copy is finished before updating
+		 * ctx->ring_pages[]. Otherwise other processes may access to
+		 * new ring pages which are not fully initialized.
+		 */
+		smp_wmb();
+
 		idx = old->index;
 		if (idx < (pgoff_t)ctx->nr_pages) {
 			/* And only do the move if things haven't changed */
@@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx,
 		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
 		pos %= AIO_EVENTS_PER_PAGE;

+		/*
+		 * Ensure that the page's data was copied from old one by
+		 * aio_migratepage().
+		 */
+		smp_rmb();
+
 		ev = kmap(page);
 		copy_ret = copy_to_user(event + ret, ev + pos,
 					sizeof(*ev) * avail);


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration.
  2014-02-27 10:40 ` [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration Tang Chen
  2014-02-27 12:06   ` Yasuaki Ishimatsu
@ 2014-02-27 14:57   ` Benjamin LaHaise
  2014-02-28  1:29     ` Gu Zheng
  2014-02-28  7:25     ` Yasuaki Ishimatsu
  1 sibling, 2 replies; 13+ messages in thread
From: Benjamin LaHaise @ 2014-02-27 14:57 UTC (permalink / raw)
  To: Tang Chen
  Cc: viro, jmoyer, kosaki.motohiro, kosaki.motohiro, isimatu.yasuaki,
	guz.fnst, linux-fsdevel, linux-aio, linux-kernel

On Thu, Feb 27, 2014 at 06:40:16PM +0800, Tang Chen wrote:
> When doing aio ring page migration, we migrated the page, and update
> ctx->ring_pages[]. Like the following:
> 
> aio_migratepage()
>  |-> migrate_page_copy(new, old)
>  |   ......				/* Need barrier here */
>  |-> ctx->ring_pages[idx] = new
> 
> Actually, we need a memory barrier between these two operations.
> Otherwise, if ctx->ring_pages[] is updated before memory copy due to
> the compiler optimization, other processes may have an opportunity
> to access to the not fully initialized new ring page.
> 
> So add a wmb to synchronize them.

The smp_wmb() is not needed after you added the taking of ctx->completion_lock 
lock since all accesses to ring_pages is then protected by the spinlock.  
Why are you adding this then?  Or have you missed adding the lock somewhere?  
Also, if you've changed the patch, it is customary to add a "v2" somewhere in 
the patch title so that I have some idea what version of the patch should be 
applied.

		-ben

> Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
> ---
>  fs/aio.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 50c089c..f0ed838 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
>  		pgoff_t idx;
>  		spin_lock_irqsave(&ctx->completion_lock, flags);
>  		migrate_page_copy(new, old);
> +
> +		/*
> +		 * Ensure memory copy is finished before updating
> +		 * ctx->ring_pages[]. Otherwise other processes may access to
> +		 * new ring pages which are not fully initialized.
> +		 */
> +		smp_wmb();
> +
>  		idx = old->index;
>  		if (idx < (pgoff_t)ctx->nr_pages) {
>  			/* And only do the move if things haven't changed */
> -- 
> 1.8.3.1

-- 
"Thought is the essence of where you are now."

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration.
  2014-02-27 14:57   ` [PATCH " Benjamin LaHaise
@ 2014-02-28  1:29     ` Gu Zheng
  2014-02-28  7:25     ` Yasuaki Ishimatsu
  1 sibling, 0 replies; 13+ messages in thread
From: Gu Zheng @ 2014-02-28  1:29 UTC (permalink / raw)
  To: Benjamin LaHaise
  Cc: Tang Chen, viro, jmoyer, kosaki.motohiro, kosaki.motohiro,
	isimatu.yasuaki, linux-fsdevel, linux-aio, linux-kernel

Hi Ben,
On 02/27/2014 10:57 PM, Benjamin LaHaise wrote:

> On Thu, Feb 27, 2014 at 06:40:16PM +0800, Tang Chen wrote:
>> When doing aio ring page migration, we migrated the page, and update
>> ctx->ring_pages[]. Like the following:
>>
>> aio_migratepage()
>>  |-> migrate_page_copy(new, old)
>>  |   ......				/* Need barrier here */
>>  |-> ctx->ring_pages[idx] = new
>>
>> Actually, we need a memory barrier between these two operations.
>> Otherwise, if ctx->ring_pages[] is updated before memory copy due to
>> the compiler optimization, other processes may have an opportunity
>> to access to the not fully initialized new ring page.
>>
>> So add a wmb to synchronize them.
> 
> The smp_wmb() is not needed after you added the taking of ctx->completion_lock 
> lock since all accesses to ring_pages is then protected by the spinlock.  
> Why are you adding this then?  Or have you missed adding the lock somewhere?  
> Also, if you've changed the patch, it is customary to add a "v2" somewhere in 
> the patch title so that I have some idea what version of the patch should be 
> applied.

The completion_lock just protects updating ring->head when reading events,
so wmb is still needed here.

Regards,
Gu

> 
> 		-ben
> 
>> Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
>> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
>> ---
>>  fs/aio.c | 8 ++++++++
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/fs/aio.c b/fs/aio.c
>> index 50c089c..f0ed838 100644
>> --- a/fs/aio.c
>> +++ b/fs/aio.c
>> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
>>  		pgoff_t idx;
>>  		spin_lock_irqsave(&ctx->completion_lock, flags);
>>  		migrate_page_copy(new, old);
>> +
>> +		/*
>> +		 * Ensure memory copy is finished before updating
>> +		 * ctx->ring_pages[]. Otherwise other processes may access to
>> +		 * new ring pages which are not fully initialized.
>> +		 */
>> +		smp_wmb();
>> +
>>  		idx = old->index;
>>  		if (idx < (pgoff_t)ctx->nr_pages) {
>>  			/* And only do the move if things haven't changed */
>> -- 
>> 1.8.3.1
> 



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration.
  2014-02-27 14:57   ` [PATCH " Benjamin LaHaise
  2014-02-28  1:29     ` Gu Zheng
@ 2014-02-28  7:25     ` Yasuaki Ishimatsu
  1 sibling, 0 replies; 13+ messages in thread
From: Yasuaki Ishimatsu @ 2014-02-28  7:25 UTC (permalink / raw)
  To: Benjamin LaHaise
  Cc: Tang Chen, viro, jmoyer, kosaki.motohiro, kosaki.motohiro,
	guz.fnst, linux-fsdevel, linux-aio, linux-kernel

(2014/02/27 23:57), Benjamin LaHaise wrote:
> On Thu, Feb 27, 2014 at 06:40:16PM +0800, Tang Chen wrote:
>> When doing aio ring page migration, we migrated the page, and update
>> ctx->ring_pages[]. Like the following:
>>
>> aio_migratepage()
>>   |-> migrate_page_copy(new, old)
>>   |   ......				/* Need barrier here */
>>   |-> ctx->ring_pages[idx] = new
>>
>> Actually, we need a memory barrier between these two operations.
>> Otherwise, if ctx->ring_pages[] is updated before memory copy due to
>> the compiler optimization, other processes may have an opportunity
>> to access to the not fully initialized new ring page.
>>
>> So add a wmb to synchronize them.
>
> The smp_wmb() is not needed after you added the taking of ctx->completion_lock
> lock since all accesses to ring_pages is then protected by the spinlock.
> Why are you adding this then?  Or have you missed adding the lock somewhere?

Pleaes see following thread. It's a updated patch.

https://lkml.org/lkml/2014/2/27/237

aio_read_events_ring() accesses ring_pages without locking ctx-completion_lock.
If copying old page'date to new page runs after new page was set to ctx->ring_pages
by changing order, aio_read_events_ring() may not work correctly.

So smp_wmb() and smp_rmb() is needed.

Thanks,
Yasuaki Ishimatsu

> Also, if you've changed the patch, it is customary to add a "v2" somewhere in
> the patch title so that I have some idea what version of the patch should be
> applied.
>
> 		-ben



>
>> Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
>> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
>> ---
>>   fs/aio.c | 8 ++++++++
>>   1 file changed, 8 insertions(+)
>>
>> diff --git a/fs/aio.c b/fs/aio.c
>> index 50c089c..f0ed838 100644
>> --- a/fs/aio.c
>> +++ b/fs/aio.c
>> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
>>   		pgoff_t idx;
>>   		spin_lock_irqsave(&ctx->completion_lock, flags);
>>   		migrate_page_copy(new, old);
>> +
>> +		/*
>> +		 * Ensure memory copy is finished before updating
>> +		 * ctx->ring_pages[]. Otherwise other processes may access to
>> +		 * new ring pages which are not fully initialized.
>> +		 */
>> +		smp_wmb();
>> +
>>   		idx = old->index;
>>   		if (idx < (pgoff_t)ctx->nr_pages) {
>>   			/* And only do the move if things haven't changed */
>> --
>> 1.8.3.1
>





^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration.
  2014-02-27 12:44     ` [Update PATCH " Yasuaki Ishimatsu
@ 2014-03-04  5:35       ` Miao Xie
  2014-03-05  3:04         ` KOSAKI Motohiro
  2014-03-05  6:59         ` Yasuaki Ishimatsu
  2014-03-05  7:17       ` [Update v2 " Yasuaki Ishimatsu
  1 sibling, 2 replies; 13+ messages in thread
From: Miao Xie @ 2014-03-04  5:35 UTC (permalink / raw)
  To: Yasuaki Ishimatsu, Tang Chen, bcrl
  Cc: viro, jmoyer, kosaki.motohiro, kosaki.motohiro, guz.fnst,
	linux-fsdevel, linux-aio, linux-kernel

On 	thu, 27 Feb 2014 21:44:23 +0900, Yasuaki Ishimatsu wrote:
> When doing aio ring page migration, we migrated the page, and update
> ctx->ring_pages[]. Like the following:
> 
> aio_migratepage()
>  |-> migrate_page_copy(new, old)
>  |   ......				/* Need barrier here */
>  |-> ctx->ring_pages[idx] = new
> 
> Actually, we need a memory barrier between these two operations.
> Otherwise, if ctx->ring_pages[] is updated before memory copy due to
> the compiler optimization, other processes may have an opportunity
> to access to the not fully initialized new ring page.
> 
> So add a wmb and rmb to synchronize them.
> 
> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> 
> ---
>  fs/aio.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 50c089c..8d9b82b 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
>  		pgoff_t idx;
>  		spin_lock_irqsave(&ctx->completion_lock, flags);
>  		migrate_page_copy(new, old);
> +
> +		/*
> +		 * Ensure memory copy is finished before updating
> +		 * ctx->ring_pages[]. Otherwise other processes may access to
> +		 * new ring pages which are not fully initialized.
> +		 */
> +		smp_wmb();
> +
>  		idx = old->index;
>  		if (idx < (pgoff_t)ctx->nr_pages) {
>  			/* And only do the move if things haven't changed */
> @@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx,
>  		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
>  		pos %= AIO_EVENTS_PER_PAGE;
> 
> +		/*
> +		 * Ensure that the page's data was copied from old one by
> +		 * aio_migratepage().
> +		 */
> +		smp_rmb();
> +

smp_read_barrier_depends() is better.

"One could place an A smp_rmb() primitive between the pointer fetch and
 dereference. However, this imposes unneeded overhead on systems (such as
 i386, IA64, PPC, and SPARC) that respect data dependencies on the read side.
 A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel
 to eliminate overhead on these systems."
		-- From Chapter 7.1 of <Memory Barriers: a Hardware View for Software Hackers>
		   Written by Paul E. McKenney

Thanks
Miao

>  		ev = kmap(page);
>  		copy_ret = copy_to_user(event + ret, ev + pos,
>  					sizeof(*ev) * avail);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration.
  2014-03-04  5:35       ` Miao Xie
@ 2014-03-05  3:04         ` KOSAKI Motohiro
  2014-03-05  6:59         ` Yasuaki Ishimatsu
  1 sibling, 0 replies; 13+ messages in thread
From: KOSAKI Motohiro @ 2014-03-05  3:04 UTC (permalink / raw)
  To: Miao Xie
  Cc: Yasuaki Ishimatsu, Tang Chen, Benjamin LaHaise, Al Viro,
	Jeff Moyer, guz.fnst, linux-fsdevel, linux-aio, LKML

>> +             /*
>> +              * Ensure that the page's data was copied from old one by
>> +              * aio_migratepage().
>> +              */
>> +             smp_rmb();
>> +
>
> smp_read_barrier_depends() is better.
>
> "One could place an A smp_rmb() primitive between the pointer fetch and
>  dereference. However, this imposes unneeded overhead on systems (such as
>  i386, IA64, PPC, and SPARC) that respect data dependencies on the read side.
>  A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel
>  to eliminate overhead on these systems."
>                 -- From Chapter 7.1 of <Memory Barriers: a Hardware View for Software Hackers>
>                    Written by Paul E. McKenney
>

Right.
The document of memory barrier described this situation.


see https://www.kernel.org/doc/Documentation/memory-barriers.txt

CPU 1                            CPU 2
===============      ===============
{ M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 }
M[1] = 4;
<write barrier>
ACCESS_ONCE(P) = 1
                                     Q = ACCESS_ONCE(P);
                                     <data dependency barrier>
                                     D = M[Q];

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration.
  2014-03-04  5:35       ` Miao Xie
  2014-03-05  3:04         ` KOSAKI Motohiro
@ 2014-03-05  6:59         ` Yasuaki Ishimatsu
  1 sibling, 0 replies; 13+ messages in thread
From: Yasuaki Ishimatsu @ 2014-03-05  6:59 UTC (permalink / raw)
  To: miaox
  Cc: Tang Chen, bcrl, viro, jmoyer, kosaki.motohiro, kosaki.motohiro,
	guz.fnst, linux-fsdevel, linux-aio, linux-kernel

(2014/03/04 14:35), Miao Xie wrote:
> On 	thu, 27 Feb 2014 21:44:23 +0900, Yasuaki Ishimatsu wrote:
>> When doing aio ring page migration, we migrated the page, and update
>> ctx->ring_pages[]. Like the following:
>>
>> aio_migratepage()
>>   |-> migrate_page_copy(new, old)
>>   |   ......				/* Need barrier here */
>>   |-> ctx->ring_pages[idx] = new
>>
>> Actually, we need a memory barrier between these two operations.
>> Otherwise, if ctx->ring_pages[] is updated before memory copy due to
>> the compiler optimization, other processes may have an opportunity
>> to access to the not fully initialized new ring page.
>>
>> So add a wmb and rmb to synchronize them.
>>
>> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
>> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
>>
>> ---
>>   fs/aio.c | 14 ++++++++++++++
>>   1 file changed, 14 insertions(+)
>>
>> diff --git a/fs/aio.c b/fs/aio.c
>> index 50c089c..8d9b82b 100644
>> --- a/fs/aio.c
>> +++ b/fs/aio.c
>> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
>>   		pgoff_t idx;
>>   		spin_lock_irqsave(&ctx->completion_lock, flags);
>>   		migrate_page_copy(new, old);
>> +
>> +		/*
>> +		 * Ensure memory copy is finished before updating
>> +		 * ctx->ring_pages[]. Otherwise other processes may access to
>> +		 * new ring pages which are not fully initialized.
>> +		 */
>> +		smp_wmb();
>> +
>>   		idx = old->index;
>>   		if (idx < (pgoff_t)ctx->nr_pages) {
>>   			/* And only do the move if things haven't changed */
>> @@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx,
>>   		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
>>   		pos %= AIO_EVENTS_PER_PAGE;
>>
>> +		/*
>> +		 * Ensure that the page's data was copied from old one by
>> +		 * aio_migratepage().
>> +		 */
>> +		smp_rmb();
>> +
> 
> smp_read_barrier_depends() is better.
> 
> "One could place an A smp_rmb() primitive between the pointer fetch and
>   dereference. However, this imposes unneeded overhead on systems (such as
>   i386, IA64, PPC, and SPARC) that respect data dependencies on the read side.
>   A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel
>   to eliminate overhead on these systems."
> 		-- From Chapter 7.1 of <Memory Barriers: a Hardware View for Software Hackers>
> 		   Written by Paul E. McKenney
> 
> Thanks
> Miao

Thank you for your comment.
I'll update soon.

Thanks,
Yasauaki Ishimatsu


> 
>>   		ev = kmap(page);
>>   		copy_ret = copy_to_user(event + ret, ev + pos,
>>   					sizeof(*ev) * avail);
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>>
> 



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Update v2 PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration.
  2014-02-27 12:44     ` [Update PATCH " Yasuaki Ishimatsu
  2014-03-04  5:35       ` Miao Xie
@ 2014-03-05  7:17       ` Yasuaki Ishimatsu
  1 sibling, 0 replies; 13+ messages in thread
From: Yasuaki Ishimatsu @ 2014-03-05  7:17 UTC (permalink / raw)
  To: Tang Chen, bcrl
  Cc: viro, jmoyer, kosaki.motohiro, kosaki.motohiro, guz.fnst,
	linux-fsdevel, linux-aio, linux-kernel, miaox

When doing aio ring page migration, we migrated the page, and update
ctx->ring_pages[]. Like the following:

aio_migratepage()
 |-> migrate_page_copy(new, old)
 |   ......				/* Need barrier here */
 |-> ctx->ring_pages[idx] = new

Actually, we need a memory barrier between these two operations.
Otherwise, if ctx->ring_pages[] is updated before memory copy due to
the compiler optimization, other processes may have an opportunity
to access to the not fully initialized new ring page.

So add a wmb and rmb to synchronize them.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
v2: change smp_rmb() to smp_read_barrier_depends(). Thanks Miao.
---
 fs/aio.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/aio.c b/fs/aio.c
index 50c089c..98c7f2d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 		pgoff_t idx;
 		spin_lock_irqsave(&ctx->completion_lock, flags);
 		migrate_page_copy(new, old);
+
+		/*
+		 * Ensure memory copy is finished before updating
+		 * ctx->ring_pages[]. Otherwise other processes may access to
+		 * new ring pages which are not fully initialized.
+		 */
+		smp_wmb();
+
 		idx = old->index;
 		if (idx < (pgoff_t)ctx->nr_pages) {
 			/* And only do the move if things haven't changed */
@@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx,
 		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
 		pos %= AIO_EVENTS_PER_PAGE;

+		/*
+		 * Ensure that the page's data was copied from old one by
+		 * aio_migratepage().
+		 */
+		smp_read_barrier_depends();
+
 		ev = kmap(page);
 		copy_ret = copy_to_user(event + ret, ev + pos,
 					sizeof(*ev) * avail);


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages.
  2014-02-27 10:40 ` [PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages Tang Chen
@ 2014-03-05 19:23   ` Jeff Moyer
  0 siblings, 0 replies; 13+ messages in thread
From: Jeff Moyer @ 2014-03-05 19:23 UTC (permalink / raw)
  To: Tang Chen
  Cc: viro, bcrl, kosaki.motohiro, kosaki.motohiro, isimatu.yasuaki,
	guz.fnst, linux-fsdevel, linux-aio, linux-kernel

Tang Chen <tangchen@cn.fujitsu.com> writes:

> AIO ring page migration has been implemented by the following patch:
>
>         https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/aio.c?id=36bc08cc01709b4a9bb563b35aa530241ddc63e3
>
> In this patch, ctx->completion_lock is used to prevent other processes
> from accessing the ring page being migrated.
>
> But in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring(),
> when writing to the ring page, they didn't take ctx->completion_lock.
>
> As a result, for example, we have the following problem:
>
>             thread 1                      |              thread 2
>                                           |
> aio_migratepage()                         |
>  |-> take ctx->completion_lock            |
>  |-> migrate_page_copy(new, old)          |
>  |   *NOW*, ctx->ring_pages[idx] == old   |
>                                           |
>                                           |    *NOW*, ctx->ring_pages[idx] == old
>                                           |    aio_read_events_ring()
>                                           |     |-> ring = kmap_atomic(ctx->ring_pages[0])
>                                           |     |-> ring->head = head;          *HERE, write to the old ring page*
>                                           |     |-> kunmap_atomic(ring);
>                                           |
>  |-> ctx->ring_pages[idx] = new           |
>  |   *BUT NOW*, the content of            |
>  |    ring_pages[idx] is old.             |
>  |-> release ctx->completion_lock         |
>
> As above, the new ring page will not be updated.
>
> The solution is taking ctx->completion_lock in thread 2, which means,
> in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring() when
> writing to ring pages.

Thanks for the good explanation and for adding comments to the code.
This looks right to me.  I guess the only issue I have with it is that
the code paths changed don't run in interrupt context, so I think you
could just use spin_lock_irq.  That's not a big deal, though.

Reviewed-by: Jeff Moyer <jmoyer@redhat.com>


> Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
> ---
>  fs/aio.c | 33 +++++++++++++++++++++++++++++++++
>  1 file changed, 33 insertions(+)
>
> diff --git a/fs/aio.c b/fs/aio.c
> index 062a5f6..50c089c 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -366,6 +366,7 @@ static int aio_setup_ring(struct kioctx *ctx)
>  	int nr_pages;
>  	int i;
>  	struct file *file;
> +	unsigned long flags;
>  
>  	/* Compensate for the ring buffer's head/tail overlap entry */
>  	nr_events += 2;	/* 1 is required, 2 for good luck */
> @@ -437,6 +438,14 @@ static int aio_setup_ring(struct kioctx *ctx)
>  	ctx->user_id = ctx->mmap_base;
>  	ctx->nr_events = nr_events; /* trusted copy */
>  
> +	/*
> +	 * The aio ring pages are user space pages, so they can be migrated.
> +	 * When writing to an aio ring page, we should ensure the page is not
> +	 * being migrated. Aio page migration procedure is protected by
> +	 * ctx->completion_lock, so we add this lock here.
> +	 */
> +	spin_lock_irqsave(&ctx->completion_lock, flags);
> +
>  	ring = kmap_atomic(ctx->ring_pages[0]);
>  	ring->nr = nr_events;	/* user copy */
>  	ring->id = ~0U;
> @@ -448,6 +457,8 @@ static int aio_setup_ring(struct kioctx *ctx)
>  	kunmap_atomic(ring);
>  	flush_dcache_page(ctx->ring_pages[0]);
>  
> +	spin_unlock_irqrestore(&ctx->completion_lock, flags);
> +
>  	return 0;
>  }
>  
> @@ -542,6 +553,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
>  	unsigned i, new_nr;
>  	struct kioctx_table *table, *old;
>  	struct aio_ring *ring;
> +	unsigned long flags;
>  
>  	spin_lock(&mm->ioctx_lock);
>  	rcu_read_lock();
> @@ -556,9 +568,19 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
>  					rcu_read_unlock();
>  					spin_unlock(&mm->ioctx_lock);
>  
> +					/*
> +					 * Accessing ring pages must be done
> +					 * holding ctx->completion_lock to
> +					 * prevent aio ring page migration
> +					 * procedure from migrating ring pages.
> +					 */
> +					spin_lock_irqsave(&ctx->completion_lock,
> +							  flags);
>  					ring = kmap_atomic(ctx->ring_pages[0]);
>  					ring->id = ctx->id;
>  					kunmap_atomic(ring);
> +					spin_unlock_irqrestore(
> +						&ctx->completion_lock, flags);
>  					return 0;
>  				}
>  
> @@ -1021,6 +1043,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
>  	unsigned head, tail, pos;
>  	long ret = 0;
>  	int copy_ret;
> +	unsigned long flags;
>  
>  	mutex_lock(&ctx->ring_lock);
>  
> @@ -1066,11 +1089,21 @@ static long aio_read_events_ring(struct kioctx *ctx,
>  		head %= ctx->nr_events;
>  	}
>  
> +	/*
> +	 * The aio ring pages are user space pages, so they can be migrated.
> +	 * When writing to an aio ring page, we should ensure the page is not
> +	 * being migrated. Aio page migration procedure is protected by
> +	 * ctx->completion_lock, so we add this lock here.
> +	 */
> +	spin_lock_irqsave(&ctx->completion_lock, flags);
> +
>  	ring = kmap_atomic(ctx->ring_pages[0]);
>  	ring->head = head;
>  	kunmap_atomic(ring);
>  	flush_dcache_page(ctx->ring_pages[0]);
>  
> +	spin_unlock_irqrestore(&ctx->completion_lock, flags);
> +
>  	pr_debug("%li  h%u t%u\n", ret, head, tail);
>  
>  	put_reqs_available(ctx, ret);

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2014-03-05 19:24 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-02-27 10:40 [PATCH 0/2] Bug fix in aio ring page migration Tang Chen
2014-02-27 10:40 ` [PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages Tang Chen
2014-03-05 19:23   ` Jeff Moyer
2014-02-27 10:40 ` [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration Tang Chen
2014-02-27 12:06   ` Yasuaki Ishimatsu
2014-02-27 12:44     ` [Update PATCH " Yasuaki Ishimatsu
2014-03-04  5:35       ` Miao Xie
2014-03-05  3:04         ` KOSAKI Motohiro
2014-03-05  6:59         ` Yasuaki Ishimatsu
2014-03-05  7:17       ` [Update v2 " Yasuaki Ishimatsu
2014-02-27 14:57   ` [PATCH " Benjamin LaHaise
2014-02-28  1:29     ` Gu Zheng
2014-02-28  7:25     ` Yasuaki Ishimatsu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).