All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
       [not found] <001701d1ba44$b9c0d560$2d428020$@alibaba-inc.com>
@ 2016-05-30  8:07   ` Hillf Danton
  0 siblings, 0 replies; 12+ messages in thread
From: Hillf Danton @ 2016-05-30  8:07 UTC (permalink / raw)
  To: Aneesh Kumar; +Cc: linux-kernel, linux-mm

> diff --git a/mm/memory.c b/mm/memory.c
> index 15322b73636b..a01db5bc756b 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -292,23 +292,24 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
>   *	handling the additional races in SMP caused by other CPUs caching valid
>   *	mappings in their TLBs. Returns the number of free page slots left.
>   *	When out of page slots we must call tlb_flush_mmu().
> + *returns true if the caller should flush.
>   */
> -int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
> +bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
>  {
>  	struct mmu_gather_batch *batch;
> 
>  	VM_BUG_ON(!tlb->end);
> 
>  	batch = tlb->active;
> -	batch->pages[batch->nr++] = page;
>  	if (batch->nr == batch->max) {
>  		if (!tlb_next_batch(tlb))
> -			return 0;
> +			return true;
>  		batch = tlb->active;
>  	}
>  	VM_BUG_ON_PAGE(batch->nr > batch->max, page);

Still needed?
> 
> -	return batch->max - batch->nr;
> +	batch->pages[batch->nr++] = page;
> +	return false;
>  }
> 
>  #endif /* HAVE_GENERIC_MMU_GATHER */
> @@ -1109,6 +1110,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  	pte_t *start_pte;
>  	pte_t *pte;
>  	swp_entry_t entry;
> +	struct page *pending_page = NULL;
> 
>  again:
>  	init_rss_vec(rss);
> @@ -1160,8 +1162,9 @@ again:
>  			page_remove_rmap(page, false);
>  			if (unlikely(page_mapcount(page) < 0))
>  				print_bad_pte(vma, addr, ptent, page);
> -			if (unlikely(!__tlb_remove_page(tlb, page))) {
> +			if (unlikely(__tlb_remove_page(tlb, page))) {
>  				force_flush = 1;
> +				pending_page = page;
>  				addr += PAGE_SIZE;
>  				break;
>  			}
> @@ -1202,7 +1205,12 @@ again:
>  	if (force_flush) {
>  		force_flush = 0;
>  		tlb_flush_mmu_free(tlb);
> -
> +		if (pending_page) {
> +			/* remove the page with new size */
> +			__tlb_adjust_range(tlb, tlb->addr);

Would you please specify why tlb->addr is used here?

thanks
Hillf 
> +			__tlb_remove_page(tlb, pending_page);
> +			pending_page = NULL;
> +		}
>  		if (addr != end)
>  			goto again;
>  	}
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
@ 2016-05-30  8:07   ` Hillf Danton
  0 siblings, 0 replies; 12+ messages in thread
From: Hillf Danton @ 2016-05-30  8:07 UTC (permalink / raw)
  To: Aneesh Kumar; +Cc: linux-kernel, linux-mm

> diff --git a/mm/memory.c b/mm/memory.c
> index 15322b73636b..a01db5bc756b 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -292,23 +292,24 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
>   *	handling the additional races in SMP caused by other CPUs caching valid
>   *	mappings in their TLBs. Returns the number of free page slots left.
>   *	When out of page slots we must call tlb_flush_mmu().
> + *returns true if the caller should flush.
>   */
> -int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
> +bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
>  {
>  	struct mmu_gather_batch *batch;
> 
>  	VM_BUG_ON(!tlb->end);
> 
>  	batch = tlb->active;
> -	batch->pages[batch->nr++] = page;
>  	if (batch->nr == batch->max) {
>  		if (!tlb_next_batch(tlb))
> -			return 0;
> +			return true;
>  		batch = tlb->active;
>  	}
>  	VM_BUG_ON_PAGE(batch->nr > batch->max, page);

Still needed?
> 
> -	return batch->max - batch->nr;
> +	batch->pages[batch->nr++] = page;
> +	return false;
>  }
> 
>  #endif /* HAVE_GENERIC_MMU_GATHER */
> @@ -1109,6 +1110,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  	pte_t *start_pte;
>  	pte_t *pte;
>  	swp_entry_t entry;
> +	struct page *pending_page = NULL;
> 
>  again:
>  	init_rss_vec(rss);
> @@ -1160,8 +1162,9 @@ again:
>  			page_remove_rmap(page, false);
>  			if (unlikely(page_mapcount(page) < 0))
>  				print_bad_pte(vma, addr, ptent, page);
> -			if (unlikely(!__tlb_remove_page(tlb, page))) {
> +			if (unlikely(__tlb_remove_page(tlb, page))) {
>  				force_flush = 1;
> +				pending_page = page;
>  				addr += PAGE_SIZE;
>  				break;
>  			}
> @@ -1202,7 +1205,12 @@ again:
>  	if (force_flush) {
>  		force_flush = 0;
>  		tlb_flush_mmu_free(tlb);
> -
> +		if (pending_page) {
> +			/* remove the page with new size */
> +			__tlb_adjust_range(tlb, tlb->addr);

Would you please specify why tlb->addr is used here?

thanks
Hillf 
> +			__tlb_remove_page(tlb, pending_page);
> +			pending_page = NULL;
> +		}
>  		if (addr != end)
>  			goto again;
>  	}
> --
> 2.7.4


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
  2016-05-30  8:07   ` Hillf Danton
@ 2016-05-30 15:34     ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2016-05-30 15:34 UTC (permalink / raw)
  To: Hillf Danton; +Cc: linux-kernel, linux-mm

Hillf Danton <hillf.zj@alibaba-inc.com> writes:

>> diff --git a/mm/memory.c b/mm/memory.c
>> index 15322b73636b..a01db5bc756b 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -292,23 +292,24 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
>>   *	handling the additional races in SMP caused by other CPUs caching valid
>>   *	mappings in their TLBs. Returns the number of free page slots left.
>>   *	When out of page slots we must call tlb_flush_mmu().
>> + *returns true if the caller should flush.
>>   */
>> -int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
>> +bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
>>  {
>>  	struct mmu_gather_batch *batch;
>> 
>>  	VM_BUG_ON(!tlb->end);
>> 
>>  	batch = tlb->active;
>> -	batch->pages[batch->nr++] = page;
>>  	if (batch->nr == batch->max) {
>>  		if (!tlb_next_batch(tlb))
>> -			return 0;
>> +			return true;
>>  		batch = tlb->active;
>>  	}
>>  	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
>
> Still needed?

yes, we need to make sure the batch we picked doesn't have a wrong
batch->nr value.

>> 
>> -	return batch->max - batch->nr;
>> +	batch->pages[batch->nr++] = page;
>> +	return false;
>>  }
>> 
>>  #endif /* HAVE_GENERIC_MMU_GATHER */
>> @@ -1109,6 +1110,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>>  	pte_t *start_pte;
>>  	pte_t *pte;
>>  	swp_entry_t entry;
>> +	struct page *pending_page = NULL;
>> 
>>  again:
>>  	init_rss_vec(rss);
>> @@ -1160,8 +1162,9 @@ again:
>>  			page_remove_rmap(page, false);
>>  			if (unlikely(page_mapcount(page) < 0))
>>  				print_bad_pte(vma, addr, ptent, page);
>> -			if (unlikely(!__tlb_remove_page(tlb, page))) {
>> +			if (unlikely(__tlb_remove_page(tlb, page))) {
>>  				force_flush = 1;
>> +				pending_page = page;
>>  				addr += PAGE_SIZE;
>>  				break;
>>  			}
>> @@ -1202,7 +1205,12 @@ again:
>>  	if (force_flush) {
>>  		force_flush = 0;
>>  		tlb_flush_mmu_free(tlb);
>> -
>> +		if (pending_page) {
>> +			/* remove the page with new size */
>> +			__tlb_adjust_range(tlb, tlb->addr);
>
> Would you please specify why tlb->addr is used here?
>

That is needed because tlb_flush_mmu_tlbonly() does a __tlb_reset_range().


>> +			__tlb_remove_page(tlb, pending_page);
>> +			pending_page = NULL;
>> +		}
>>  		if (addr != end)
>>  			goto again;
>>  	}
>> --
>> 2.7.4

-aneesh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
@ 2016-05-30 15:34     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2016-05-30 15:34 UTC (permalink / raw)
  To: Hillf Danton; +Cc: linux-kernel, linux-mm

Hillf Danton <hillf.zj@alibaba-inc.com> writes:

>> diff --git a/mm/memory.c b/mm/memory.c
>> index 15322b73636b..a01db5bc756b 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -292,23 +292,24 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
>>   *	handling the additional races in SMP caused by other CPUs caching valid
>>   *	mappings in their TLBs. Returns the number of free page slots left.
>>   *	When out of page slots we must call tlb_flush_mmu().
>> + *returns true if the caller should flush.
>>   */
>> -int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
>> +bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
>>  {
>>  	struct mmu_gather_batch *batch;
>> 
>>  	VM_BUG_ON(!tlb->end);
>> 
>>  	batch = tlb->active;
>> -	batch->pages[batch->nr++] = page;
>>  	if (batch->nr == batch->max) {
>>  		if (!tlb_next_batch(tlb))
>> -			return 0;
>> +			return true;
>>  		batch = tlb->active;
>>  	}
>>  	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
>
> Still needed?

yes, we need to make sure the batch we picked doesn't have a wrong
batch->nr value.

>> 
>> -	return batch->max - batch->nr;
>> +	batch->pages[batch->nr++] = page;
>> +	return false;
>>  }
>> 
>>  #endif /* HAVE_GENERIC_MMU_GATHER */
>> @@ -1109,6 +1110,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>>  	pte_t *start_pte;
>>  	pte_t *pte;
>>  	swp_entry_t entry;
>> +	struct page *pending_page = NULL;
>> 
>>  again:
>>  	init_rss_vec(rss);
>> @@ -1160,8 +1162,9 @@ again:
>>  			page_remove_rmap(page, false);
>>  			if (unlikely(page_mapcount(page) < 0))
>>  				print_bad_pte(vma, addr, ptent, page);
>> -			if (unlikely(!__tlb_remove_page(tlb, page))) {
>> +			if (unlikely(__tlb_remove_page(tlb, page))) {
>>  				force_flush = 1;
>> +				pending_page = page;
>>  				addr += PAGE_SIZE;
>>  				break;
>>  			}
>> @@ -1202,7 +1205,12 @@ again:
>>  	if (force_flush) {
>>  		force_flush = 0;
>>  		tlb_flush_mmu_free(tlb);
>> -
>> +		if (pending_page) {
>> +			/* remove the page with new size */
>> +			__tlb_adjust_range(tlb, tlb->addr);
>
> Would you please specify why tlb->addr is used here?
>

That is needed because tlb_flush_mmu_tlbonly() does a __tlb_reset_range().


>> +			__tlb_remove_page(tlb, pending_page);
>> +			pending_page = NULL;
>> +		}
>>  		if (addr != end)
>>  			goto again;
>>  	}
>> --
>> 2.7.4

-aneesh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
  2016-05-30 15:34     ` Aneesh Kumar K.V
@ 2016-05-31  3:52       ` Hillf Danton
  -1 siblings, 0 replies; 12+ messages in thread
From: Hillf Danton @ 2016-05-31  3:52 UTC (permalink / raw)
  To: 'Aneesh Kumar K.V'; +Cc: 'linux-kernel', linux-mm

> >> @@ -1202,7 +1205,12 @@ again:
> >>  	if (force_flush) {
> >>  		force_flush = 0;
> >>  		tlb_flush_mmu_free(tlb);
> >> -
> >> +		if (pending_page) {
> >> +			/* remove the page with new size */
> >> +			__tlb_adjust_range(tlb, tlb->addr);
> >
> > Would you please specify why tlb->addr is used here?
> >
> 
> That is needed because tlb_flush_mmu_tlbonly() does a __tlb_reset_range().
> 
If ->addr is updated in resetting, then it is a noop here to deliver tlb->addr to
__tlb_adjust_range().
On the other hand, if ->addr is not updated in resetting, then it is also a noop here.

Do you want to update ->addr here?

thanks
Hillf

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
@ 2016-05-31  3:52       ` Hillf Danton
  0 siblings, 0 replies; 12+ messages in thread
From: Hillf Danton @ 2016-05-31  3:52 UTC (permalink / raw)
  To: 'Aneesh Kumar K.V'; +Cc: 'linux-kernel', linux-mm

> >> @@ -1202,7 +1205,12 @@ again:
> >>  	if (force_flush) {
> >>  		force_flush = 0;
> >>  		tlb_flush_mmu_free(tlb);
> >> -
> >> +		if (pending_page) {
> >> +			/* remove the page with new size */
> >> +			__tlb_adjust_range(tlb, tlb->addr);
> >
> > Would you please specify why tlb->addr is used here?
> >
> 
> That is needed because tlb_flush_mmu_tlbonly() does a __tlb_reset_range().
> 
If ->addr is updated in resetting, then it is a noop here to deliver tlb->addr to
__tlb_adjust_range().
On the other hand, if ->addr is not updated in resetting, then it is also a noop here.

Do you want to update ->addr here?

thanks
Hillf

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
  2016-05-31  3:52       ` Hillf Danton
@ 2016-05-31  6:50         ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2016-05-31  6:50 UTC (permalink / raw)
  To: Hillf Danton; +Cc: 'linux-kernel', linux-mm

Hillf Danton <hillf.zj@alibaba-inc.com> writes:

>> >> @@ -1202,7 +1205,12 @@ again:
>> >>  	if (force_flush) {
>> >>  		force_flush = 0;
>> >>  		tlb_flush_mmu_free(tlb);
>> >> -
>> >> +		if (pending_page) {
>> >> +			/* remove the page with new size */
>> >> +			__tlb_adjust_range(tlb, tlb->addr);
>> >
>> > Would you please specify why tlb->addr is used here?
>> >
>> 
>> That is needed because tlb_flush_mmu_tlbonly() does a __tlb_reset_range().
>> 
> If ->addr is updated in resetting, then it is a noop here to deliver tlb->addr to
> __tlb_adjust_range().
> On the other hand, if ->addr is not updated in resetting, then it is also a noop here.
>
> Do you want to update ->addr here?
>

I don't get that question. We wanted to track the alst adjusted addr in
tlb->addr because when we do a tlb_flush_mmu_tlbonly() we does a
__tlb_reset_range(), which clears tlb->start and tlb->end. Now we need
to update the range again with the last adjusted addr before we can call
__tlb_remove_page(). Look for VM_BUG_ON(!tlb->end); in
__tlb_remove_page().

-aneesh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
@ 2016-05-31  6:50         ` Aneesh Kumar K.V
  0 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2016-05-31  6:50 UTC (permalink / raw)
  To: Hillf Danton; +Cc: 'linux-kernel', linux-mm

Hillf Danton <hillf.zj@alibaba-inc.com> writes:

>> >> @@ -1202,7 +1205,12 @@ again:
>> >>  	if (force_flush) {
>> >>  		force_flush = 0;
>> >>  		tlb_flush_mmu_free(tlb);
>> >> -
>> >> +		if (pending_page) {
>> >> +			/* remove the page with new size */
>> >> +			__tlb_adjust_range(tlb, tlb->addr);
>> >
>> > Would you please specify why tlb->addr is used here?
>> >
>> 
>> That is needed because tlb_flush_mmu_tlbonly() does a __tlb_reset_range().
>> 
> If ->addr is updated in resetting, then it is a noop here to deliver tlb->addr to
> __tlb_adjust_range().
> On the other hand, if ->addr is not updated in resetting, then it is also a noop here.
>
> Do you want to update ->addr here?
>

I don't get that question. We wanted to track the alst adjusted addr in
tlb->addr because when we do a tlb_flush_mmu_tlbonly() we does a
__tlb_reset_range(), which clears tlb->start and tlb->end. Now we need
to update the range again with the last adjusted addr before we can call
__tlb_remove_page(). Look for VM_BUG_ON(!tlb->end); in
__tlb_remove_page().

-aneesh


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
  2016-05-31  6:50         ` Aneesh Kumar K.V
@ 2016-05-31  7:26           ` Hillf Danton
  -1 siblings, 0 replies; 12+ messages in thread
From: Hillf Danton @ 2016-05-31  7:26 UTC (permalink / raw)
  To: 'Aneesh Kumar K.V'; +Cc: 'linux-kernel', linux-mm

> > Do you want to update ->addr here?
> >
> 
> I don't get that question. We wanted to track the alst adjusted addr in
> tlb->addr because when we do a tlb_flush_mmu_tlbonly() we does a
> __tlb_reset_range(), which clears tlb->start and tlb->end. Now we need
> to update the range again with the last adjusted addr before we can call
> __tlb_remove_page(). Look for VM_BUG_ON(!tlb->end); in
> __tlb_remove_page().
> 
Got, thanks.

Hillf

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
@ 2016-05-31  7:26           ` Hillf Danton
  0 siblings, 0 replies; 12+ messages in thread
From: Hillf Danton @ 2016-05-31  7:26 UTC (permalink / raw)
  To: 'Aneesh Kumar K.V'; +Cc: 'linux-kernel', linux-mm

> > Do you want to update ->addr here?
> >
> 
> I don't get that question. We wanted to track the alst adjusted addr in
> tlb->addr because when we do a tlb_flush_mmu_tlbonly() we does a
> __tlb_reset_range(), which clears tlb->start and tlb->end. Now we need
> to update the range again with the last adjusted addr before we can call
> __tlb_remove_page(). Look for VM_BUG_ON(!tlb->end); in
> __tlb_remove_page().
> 
Got, thanks.

Hillf


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
  2016-05-30  5:44 [RFC PATCH 1/4] mm/hugetlb: Simplify hugetlb unmap Aneesh Kumar K.V
@ 2016-05-30  5:44   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2016-05-30  5:44 UTC (permalink / raw)
  To: akpm, linux-arch; +Cc: linux-mm, linux-kernel, Aneesh Kumar K.V

This update the generic and arch specific implementation to return true
if we need to do a tlb flush. That means if a __tlb_remove_page indicate
a flush is needed, the page we try to remove need to be tracked and
added again after the flush. We need to track it because we have already
update the pte to none and we can't just loop back.

This changes is done to enable us to do a tlb_flush when we try to flush
a range that consists of different page sizes. For architectures like
ppc64, we can do a range based tlb flush and we need to track page size
for that. When we try to remove a huge page, we will force a tlb flush
and starts a new mmu gather.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/arm/include/asm/tlb.h  | 11 +++++++----
 arch/ia64/include/asm/tlb.h | 13 ++++++++-----
 arch/s390/include/asm/tlb.h |  4 ++--
 arch/sh/include/asm/tlb.h   |  2 +-
 arch/um/include/asm/tlb.h   |  2 +-
 include/asm-generic/tlb.h   | 18 ++++++++++++++++--
 mm/memory.c                 | 20 ++++++++++++++------
 7 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 3cadb726ec88..45dea952b0e6 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -209,17 +209,20 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
 		tlb_flush(tlb);
 }
 
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
+	if (tlb->nr == tlb->max)
+		return true;
 	tlb->pages[tlb->nr++] = page;
-	VM_BUG_ON(tlb->nr > tlb->max);
-	return tlb->max - tlb->nr;
+	return false;
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (__tlb_remove_page(tlb, page)) {
 		tlb_flush_mmu(tlb);
+		__tlb_remove_page(tlb, page);
+	}
 }
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 39d64e0df1de..85005ab513e9 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -205,17 +205,18 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
  * must be delayed until after the TLB has been flushed (see comments at the beginning of
  * this file).
  */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
+	if (tlb->nr == tlb->max)
+		return true;
+
 	tlb->need_flush = 1;
 
 	if (!tlb->nr && tlb->pages == tlb->local)
 		__tlb_alloc_page(tlb);
 
 	tlb->pages[tlb->nr++] = page;
-	VM_BUG_ON(tlb->nr > tlb->max);
-
-	return tlb->max - tlb->nr;
+	return false;
 }
 
 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
@@ -235,8 +236,10 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (__tlb_remove_page(tlb, page)) {
 		tlb_flush_mmu(tlb);
+		__tlb_remove_page(tlb, page);
+	}
 }
 
 /*
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 7a92e69c50bc..6b98cb3601d5 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -87,10 +87,10 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb,
  * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
  * has already been freed, so just do free_page_and_swap_cache.
  */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	free_page_and_swap_cache(page);
-	return 1; /* avoid calling tlb_flush_mmu */
+	return false; /* avoid calling tlb_flush_mmu */
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 62f80d2a9df9..3dec5e0734f5 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -101,7 +101,7 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	free_page_and_swap_cache(page);
-	return 1; /* avoid calling tlb_flush_mmu */
+	return false; /* avoid calling tlb_flush_mmu */
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 16eb63fac57d..c6638f8e5e90 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -102,7 +102,7 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	tlb->need_flush = 1;
 	free_page_and_swap_cache(page);
-	return 1; /* avoid calling tlb_flush_mmu */
+	return false; /* avoid calling tlb_flush_mmu */
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 9dbb739cafa0..2ac8fe202e9a 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -107,6 +107,11 @@ struct mmu_gather {
 	struct mmu_gather_batch	local;
 	struct page		*__pages[MMU_GATHER_BUNDLE];
 	unsigned int		batch_count;
+	/*
+	 * __tlb_adjust_range  will track the new addr here,
+	 * that that we can adjust the range after the flush
+	 */
+	unsigned long addr;
 };
 
 #define HAVE_GENERIC_MMU_GATHER
@@ -115,7 +120,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
 							unsigned long end);
-int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
+bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
 
 /* tlb_remove_page
  *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
@@ -123,8 +128,11 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
  */
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (__tlb_remove_page(tlb, page)) {
 		tlb_flush_mmu(tlb);
+		__tlb_adjust_range(tlb, tlb->addr);
+		__tlb_remove_page(tlb, page);
+	}
 }
 
 static inline void __tlb_adjust_range(struct mmu_gather *tlb,
@@ -132,6 +140,12 @@ static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 {
 	tlb->start = min(tlb->start, address);
 	tlb->end = max(tlb->end, address + PAGE_SIZE);
+	/*
+	 * Track the last address with which we adjusted the range. This
+	 * will be used later to adjust again after a mmu_flush due to
+	 * failed __tlb_remove_page
+	 */
+	tlb->addr = address;
 }
 
 static inline void __tlb_reset_range(struct mmu_gather *tlb)
diff --git a/mm/memory.c b/mm/memory.c
index 15322b73636b..a01db5bc756b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -292,23 +292,24 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
  *	handling the additional races in SMP caused by other CPUs caching valid
  *	mappings in their TLBs. Returns the number of free page slots left.
  *	When out of page slots we must call tlb_flush_mmu().
+ *returns true if the caller should flush.
  */
-int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	struct mmu_gather_batch *batch;
 
 	VM_BUG_ON(!tlb->end);
 
 	batch = tlb->active;
-	batch->pages[batch->nr++] = page;
 	if (batch->nr == batch->max) {
 		if (!tlb_next_batch(tlb))
-			return 0;
+			return true;
 		batch = tlb->active;
 	}
 	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
 
-	return batch->max - batch->nr;
+	batch->pages[batch->nr++] = page;
+	return false;
 }
 
 #endif /* HAVE_GENERIC_MMU_GATHER */
@@ -1109,6 +1110,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	pte_t *start_pte;
 	pte_t *pte;
 	swp_entry_t entry;
+	struct page *pending_page = NULL;
 
 again:
 	init_rss_vec(rss);
@@ -1160,8 +1162,9 @@ again:
 			page_remove_rmap(page, false);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
-			if (unlikely(!__tlb_remove_page(tlb, page))) {
+			if (unlikely(__tlb_remove_page(tlb, page))) {
 				force_flush = 1;
+				pending_page = page;
 				addr += PAGE_SIZE;
 				break;
 			}
@@ -1202,7 +1205,12 @@ again:
 	if (force_flush) {
 		force_flush = 0;
 		tlb_flush_mmu_free(tlb);
-
+		if (pending_page) {
+			/* remove the page with new size */
+			__tlb_adjust_range(tlb, tlb->addr);
+			__tlb_remove_page(tlb, pending_page);
+			pending_page = NULL;
+		}
 		if (addr != end)
 			goto again;
 	}
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page
@ 2016-05-30  5:44   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2016-05-30  5:44 UTC (permalink / raw)
  To: akpm, linux-arch; +Cc: linux-mm, linux-kernel, Aneesh Kumar K.V

This update the generic and arch specific implementation to return true
if we need to do a tlb flush. That means if a __tlb_remove_page indicate
a flush is needed, the page we try to remove need to be tracked and
added again after the flush. We need to track it because we have already
update the pte to none and we can't just loop back.

This changes is done to enable us to do a tlb_flush when we try to flush
a range that consists of different page sizes. For architectures like
ppc64, we can do a range based tlb flush and we need to track page size
for that. When we try to remove a huge page, we will force a tlb flush
and starts a new mmu gather.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/arm/include/asm/tlb.h  | 11 +++++++----
 arch/ia64/include/asm/tlb.h | 13 ++++++++-----
 arch/s390/include/asm/tlb.h |  4 ++--
 arch/sh/include/asm/tlb.h   |  2 +-
 arch/um/include/asm/tlb.h   |  2 +-
 include/asm-generic/tlb.h   | 18 ++++++++++++++++--
 mm/memory.c                 | 20 ++++++++++++++------
 7 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 3cadb726ec88..45dea952b0e6 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -209,17 +209,20 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
 		tlb_flush(tlb);
 }
 
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
+	if (tlb->nr == tlb->max)
+		return true;
 	tlb->pages[tlb->nr++] = page;
-	VM_BUG_ON(tlb->nr > tlb->max);
-	return tlb->max - tlb->nr;
+	return false;
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (__tlb_remove_page(tlb, page)) {
 		tlb_flush_mmu(tlb);
+		__tlb_remove_page(tlb, page);
+	}
 }
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 39d64e0df1de..85005ab513e9 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -205,17 +205,18 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
  * must be delayed until after the TLB has been flushed (see comments at the beginning of
  * this file).
  */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
+	if (tlb->nr == tlb->max)
+		return true;
+
 	tlb->need_flush = 1;
 
 	if (!tlb->nr && tlb->pages == tlb->local)
 		__tlb_alloc_page(tlb);
 
 	tlb->pages[tlb->nr++] = page;
-	VM_BUG_ON(tlb->nr > tlb->max);
-
-	return tlb->max - tlb->nr;
+	return false;
 }
 
 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
@@ -235,8 +236,10 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (__tlb_remove_page(tlb, page)) {
 		tlb_flush_mmu(tlb);
+		__tlb_remove_page(tlb, page);
+	}
 }
 
 /*
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 7a92e69c50bc..6b98cb3601d5 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -87,10 +87,10 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb,
  * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
  * has already been freed, so just do free_page_and_swap_cache.
  */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	free_page_and_swap_cache(page);
-	return 1; /* avoid calling tlb_flush_mmu */
+	return false; /* avoid calling tlb_flush_mmu */
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 62f80d2a9df9..3dec5e0734f5 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -101,7 +101,7 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	free_page_and_swap_cache(page);
-	return 1; /* avoid calling tlb_flush_mmu */
+	return false; /* avoid calling tlb_flush_mmu */
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 16eb63fac57d..c6638f8e5e90 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -102,7 +102,7 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	tlb->need_flush = 1;
 	free_page_and_swap_cache(page);
-	return 1; /* avoid calling tlb_flush_mmu */
+	return false; /* avoid calling tlb_flush_mmu */
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 9dbb739cafa0..2ac8fe202e9a 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -107,6 +107,11 @@ struct mmu_gather {
 	struct mmu_gather_batch	local;
 	struct page		*__pages[MMU_GATHER_BUNDLE];
 	unsigned int		batch_count;
+	/*
+	 * __tlb_adjust_range  will track the new addr here,
+	 * that that we can adjust the range after the flush
+	 */
+	unsigned long addr;
 };
 
 #define HAVE_GENERIC_MMU_GATHER
@@ -115,7 +120,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
 							unsigned long end);
-int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
+bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
 
 /* tlb_remove_page
  *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
@@ -123,8 +128,11 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
  */
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (__tlb_remove_page(tlb, page)) {
 		tlb_flush_mmu(tlb);
+		__tlb_adjust_range(tlb, tlb->addr);
+		__tlb_remove_page(tlb, page);
+	}
 }
 
 static inline void __tlb_adjust_range(struct mmu_gather *tlb,
@@ -132,6 +140,12 @@ static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 {
 	tlb->start = min(tlb->start, address);
 	tlb->end = max(tlb->end, address + PAGE_SIZE);
+	/*
+	 * Track the last address with which we adjusted the range. This
+	 * will be used later to adjust again after a mmu_flush due to
+	 * failed __tlb_remove_page
+	 */
+	tlb->addr = address;
 }
 
 static inline void __tlb_reset_range(struct mmu_gather *tlb)
diff --git a/mm/memory.c b/mm/memory.c
index 15322b73636b..a01db5bc756b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -292,23 +292,24 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
  *	handling the additional races in SMP caused by other CPUs caching valid
  *	mappings in their TLBs. Returns the number of free page slots left.
  *	When out of page slots we must call tlb_flush_mmu().
+ *returns true if the caller should flush.
  */
-int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	struct mmu_gather_batch *batch;
 
 	VM_BUG_ON(!tlb->end);
 
 	batch = tlb->active;
-	batch->pages[batch->nr++] = page;
 	if (batch->nr == batch->max) {
 		if (!tlb_next_batch(tlb))
-			return 0;
+			return true;
 		batch = tlb->active;
 	}
 	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
 
-	return batch->max - batch->nr;
+	batch->pages[batch->nr++] = page;
+	return false;
 }
 
 #endif /* HAVE_GENERIC_MMU_GATHER */
@@ -1109,6 +1110,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	pte_t *start_pte;
 	pte_t *pte;
 	swp_entry_t entry;
+	struct page *pending_page = NULL;
 
 again:
 	init_rss_vec(rss);
@@ -1160,8 +1162,9 @@ again:
 			page_remove_rmap(page, false);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
-			if (unlikely(!__tlb_remove_page(tlb, page))) {
+			if (unlikely(__tlb_remove_page(tlb, page))) {
 				force_flush = 1;
+				pending_page = page;
 				addr += PAGE_SIZE;
 				break;
 			}
@@ -1202,7 +1205,12 @@ again:
 	if (force_flush) {
 		force_flush = 0;
 		tlb_flush_mmu_free(tlb);
-
+		if (pending_page) {
+			/* remove the page with new size */
+			__tlb_adjust_range(tlb, tlb->addr);
+			__tlb_remove_page(tlb, pending_page);
+			pending_page = NULL;
+		}
 		if (addr != end)
 			goto again;
 	}
-- 
2.7.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-05-31  7:32 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <001701d1ba44$b9c0d560$2d428020$@alibaba-inc.com>
2016-05-30  8:07 ` [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page Hillf Danton
2016-05-30  8:07   ` Hillf Danton
2016-05-30 15:34   ` Aneesh Kumar K.V
2016-05-30 15:34     ` Aneesh Kumar K.V
2016-05-31  3:52     ` Hillf Danton
2016-05-31  3:52       ` Hillf Danton
2016-05-31  6:50       ` Aneesh Kumar K.V
2016-05-31  6:50         ` Aneesh Kumar K.V
2016-05-31  7:26         ` Hillf Danton
2016-05-31  7:26           ` Hillf Danton
2016-05-30  5:44 [RFC PATCH 1/4] mm/hugetlb: Simplify hugetlb unmap Aneesh Kumar K.V
2016-05-30  5:44 ` [RFC PATCH 2/4] mm: Change the interface for __tlb_remove_page Aneesh Kumar K.V
2016-05-30  5:44   ` Aneesh Kumar K.V

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.