All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Yin, Fengwei" <fengwei.yin@intel.com>
To: "Matthew Wilcox (Oracle)" <willy@infradead.org>,
	<linux-mm@kvack.org>, <linux-riscv@lists.infradead.org>,
	Alexandre Ghiti <alex@ghiti.fr>,
	"Paul Walmsley" <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Albert Ou <aou@eecs.berkeley.edu>, <linux-arch@vger.kernel.org>
Subject: Re: [PATCH 10/7] riscv: Implement the new page table range API
Date: Wed, 15 Feb 2023 20:27:27 +0800	[thread overview]
Message-ID: <27809f5a-17fc-abf6-7703-62a3c1b41ef3@intel.com> (raw)
In-Reply-To: <c02faf6e4af4babd24b3107d5fc2c6bff1d63100.camel@intel.com>



On 2/15/2023 4:35 PM, Yin Fengwei wrote:
> On Wed, 2023-02-15 at 00:04 +0000, Matthew Wilcox (Oracle) wrote:
>> Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
>>
>> The PG_dcache_clear flag changes from being a per-page bit to being a
>> per-folio bit.
>>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>  arch/riscv/include/asm/cacheflush.h | 19 +++++++++----------
>>  arch/riscv/include/asm/pgtable.h    | 25 ++++++++++++++++++-------
>>  arch/riscv/mm/cacheflush.c          | 11 ++---------
>>  3 files changed, 29 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/cacheflush.h
>> b/arch/riscv/include/asm/cacheflush.h
>> index 03e3b95ae6da..10e5e96f09b5 100644
>> --- a/arch/riscv/include/asm/cacheflush.h
>> +++ b/arch/riscv/include/asm/cacheflush.h
>> @@ -15,20 +15,19 @@ static inline void local_flush_icache_all(void)
>>  
>>  #define PG_dcache_clean PG_arch_1
>>  
>> -static inline void flush_dcache_page(struct page *page)
>> +static inline void flush_dcache_folio(struct folio *folio)
>>  {
>> -       /*
>> -        * HugeTLB pages are always fully mapped and only head page
>> will be
>> -        * set PG_dcache_clean (see comments in flush_icache_pte()).
>> -        */
>> -       if (PageHuge(page))
>> -               page = compound_head(page);
>> -
>> -       if (test_bit(PG_dcache_clean, &page->flags))
>> -               clear_bit(PG_dcache_clean, &page->flags);
>> +       if (test_bit(PG_dcache_clean, &folio->flags))
>> +               clear_bit(PG_dcache_clean, &folio->flags);
>>  }
>> +#define flush_dcache_folio flush_dcache_folio
>>  #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
>>  
>> +static inline void flush_dcache_page(struct page *page)
>> +{
>> +       flush_dcache_folio(page_folio(page));
>> +}
>> +
>>  /*
>>   * RISC-V doesn't have an instruction to flush parts of the
>> instruction cache,
>>   * so instead we just flush the whole thing.
>> diff --git a/arch/riscv/include/asm/pgtable.h
>> b/arch/riscv/include/asm/pgtable.h
>> index 13222fd5c4b4..03706c833e70 100644
>> --- a/arch/riscv/include/asm/pgtable.h
>> +++ b/arch/riscv/include/asm/pgtable.h
>> @@ -405,8 +405,8 @@ static inline pte_t pte_modify(pte_t pte,
>> pgprot_t newprot)
>>  
>>  
>>  /* Commit new configuration to MMU hardware */
>> -static inline void update_mmu_cache(struct vm_area_struct *vma,
>> -       unsigned long address, pte_t *ptep)
>> +static inline void update_mmu_cache_range(struct vm_area_struct
>> *vma,
>> +               unsigned long address, pte_t *ptep, unsigned int nr)
>>  {
>>         /*
>>          * The kernel assumes that TLBs don't cache invalid entries,
>> but
>> @@ -415,8 +415,10 @@ static inline void update_mmu_cache(struct
>> vm_area_struct *vma,
>>          * Relying on flush_tlb_fix_spurious_fault would suffice, but
>>          * the extra traps reduce performance.  So, eagerly
>> SFENCE.VMA.
>>          */
>> -       flush_tlb_page(vma, address);
>> +       flush_tlb_range(vma, address, address + nr * PAGE_SIZE);
> 
> The flush_tlb_range() of riscv is a little bit strange to me. It gives
> __sbi_tlb_flush_range() stride PAGE_SIZE. That means if (end - start)
> is larger than stride, it will trigger flush_tlb_all().
> 
> So this change could trigger flush_tlb_all() while original
> flush_tlb_page() just trigger flush_tlb_page().
> 
> My understanding is flush_tlb_page() should be better because 
> flush_pmd_tlb_range() has PMD_SIZE as stride to avoid flush_tlb_all().
> I must miss something here.
So the huge page can have one TLB for huge page. So PMD_SIZE here
makes sense.

Regards
Yin, Fengwei 

> 
> Regards
> Yin, Fengwei
> 
>>  }
>> +#define update_mmu_cache(vma, addr, ptep) \
>> +       update_mmu_cache_range(vma, addr, ptep, 1)
>>  
>>  #define __HAVE_ARCH_UPDATE_MMU_TLB
>>  #define update_mmu_tlb update_mmu_cache
>> @@ -456,12 +458,21 @@ static inline void __set_pte_at(struct
>> mm_struct *mm,
>>         set_pte(ptep, pteval);
>>  }
>>  
>> -static inline void set_pte_at(struct mm_struct *mm,
>> -       unsigned long addr, pte_t *ptep, pte_t pteval)
>> +static inline void set_ptes(struct mm_struct *mm, unsigned long
>> addr,
>> +               pte_t *ptep, pte_t pteval, unsigned int nr)
>>  {
>> -       page_table_check_ptes_set(mm, addr, ptep, pteval, 1);
>> -       __set_pte_at(mm, addr, ptep, pteval);
>> +       page_table_check_ptes_set(mm, addr, ptep, pteval, nr);
>> +
>> +       for (;;) {
>> +               __set_pte_at(mm, addr, ptep, pteval);
>> +               if (--nr == 0)
>> +                       break;
>> +               ptep++;
>> +               addr += PAGE_SIZE;
>> +               pte_val(pteval) += 1 << _PAGE_PFN_SHIFT;
>> +       }
>>  }
>> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep,
>> pte, 1)
>>  
>>  static inline void pte_clear(struct mm_struct *mm,
>>         unsigned long addr, pte_t *ptep)
>> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
>> index 3cc07ed45aeb..b725c3f6f57f 100644
>> --- a/arch/riscv/mm/cacheflush.c
>> +++ b/arch/riscv/mm/cacheflush.c
>> @@ -81,16 +81,9 @@ void flush_icache_mm(struct mm_struct *mm, bool
>> local)
>>  #ifdef CONFIG_MMU
>>  void flush_icache_pte(pte_t pte)
>>  {
>> -       struct page *page = pte_page(pte);
>> +       struct folio *folio = page_folio(pte_page(pte));
>>  
>> -       /*
>> -        * HugeTLB pages are always fully mapped, so only setting
>> head page's
>> -        * PG_dcache_clean flag is enough.
>> -        */
>> -       if (PageHuge(page))
>> -               page = compound_head(page);
>> -
>> -       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
>> +       if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
>>                 flush_icache_all();
>>  }
>>  #endif /* CONFIG_MMU */
> 

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

WARNING: multiple messages have this Message-ID (diff)
From: "Yin, Fengwei" <fengwei.yin@intel.com>
To: "Matthew Wilcox (Oracle)" <willy@infradead.org>,
	<linux-mm@kvack.org>, <linux-riscv@lists.infradead.org>,
	Alexandre Ghiti <alex@ghiti.fr>,
	"Paul Walmsley" <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Albert Ou <aou@eecs.berkeley.edu>, <linux-arch@vger.kernel.org>
Subject: Re: [PATCH 10/7] riscv: Implement the new page table range API
Date: Wed, 15 Feb 2023 20:27:27 +0800	[thread overview]
Message-ID: <27809f5a-17fc-abf6-7703-62a3c1b41ef3@intel.com> (raw)
In-Reply-To: <c02faf6e4af4babd24b3107d5fc2c6bff1d63100.camel@intel.com>



On 2/15/2023 4:35 PM, Yin Fengwei wrote:
> On Wed, 2023-02-15 at 00:04 +0000, Matthew Wilcox (Oracle) wrote:
>> Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
>>
>> The PG_dcache_clear flag changes from being a per-page bit to being a
>> per-folio bit.
>>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>  arch/riscv/include/asm/cacheflush.h | 19 +++++++++----------
>>  arch/riscv/include/asm/pgtable.h    | 25 ++++++++++++++++++-------
>>  arch/riscv/mm/cacheflush.c          | 11 ++---------
>>  3 files changed, 29 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/cacheflush.h
>> b/arch/riscv/include/asm/cacheflush.h
>> index 03e3b95ae6da..10e5e96f09b5 100644
>> --- a/arch/riscv/include/asm/cacheflush.h
>> +++ b/arch/riscv/include/asm/cacheflush.h
>> @@ -15,20 +15,19 @@ static inline void local_flush_icache_all(void)
>>  
>>  #define PG_dcache_clean PG_arch_1
>>  
>> -static inline void flush_dcache_page(struct page *page)
>> +static inline void flush_dcache_folio(struct folio *folio)
>>  {
>> -       /*
>> -        * HugeTLB pages are always fully mapped and only head page
>> will be
>> -        * set PG_dcache_clean (see comments in flush_icache_pte()).
>> -        */
>> -       if (PageHuge(page))
>> -               page = compound_head(page);
>> -
>> -       if (test_bit(PG_dcache_clean, &page->flags))
>> -               clear_bit(PG_dcache_clean, &page->flags);
>> +       if (test_bit(PG_dcache_clean, &folio->flags))
>> +               clear_bit(PG_dcache_clean, &folio->flags);
>>  }
>> +#define flush_dcache_folio flush_dcache_folio
>>  #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
>>  
>> +static inline void flush_dcache_page(struct page *page)
>> +{
>> +       flush_dcache_folio(page_folio(page));
>> +}
>> +
>>  /*
>>   * RISC-V doesn't have an instruction to flush parts of the
>> instruction cache,
>>   * so instead we just flush the whole thing.
>> diff --git a/arch/riscv/include/asm/pgtable.h
>> b/arch/riscv/include/asm/pgtable.h
>> index 13222fd5c4b4..03706c833e70 100644
>> --- a/arch/riscv/include/asm/pgtable.h
>> +++ b/arch/riscv/include/asm/pgtable.h
>> @@ -405,8 +405,8 @@ static inline pte_t pte_modify(pte_t pte,
>> pgprot_t newprot)
>>  
>>  
>>  /* Commit new configuration to MMU hardware */
>> -static inline void update_mmu_cache(struct vm_area_struct *vma,
>> -       unsigned long address, pte_t *ptep)
>> +static inline void update_mmu_cache_range(struct vm_area_struct
>> *vma,
>> +               unsigned long address, pte_t *ptep, unsigned int nr)
>>  {
>>         /*
>>          * The kernel assumes that TLBs don't cache invalid entries,
>> but
>> @@ -415,8 +415,10 @@ static inline void update_mmu_cache(struct
>> vm_area_struct *vma,
>>          * Relying on flush_tlb_fix_spurious_fault would suffice, but
>>          * the extra traps reduce performance.  So, eagerly
>> SFENCE.VMA.
>>          */
>> -       flush_tlb_page(vma, address);
>> +       flush_tlb_range(vma, address, address + nr * PAGE_SIZE);
> 
> The flush_tlb_range() of riscv is a little bit strange to me. It gives
> __sbi_tlb_flush_range() stride PAGE_SIZE. That means if (end - start)
> is larger than stride, it will trigger flush_tlb_all().
> 
> So this change could trigger flush_tlb_all() while original
> flush_tlb_page() just trigger flush_tlb_page().
> 
> My understanding is flush_tlb_page() should be better because 
> flush_pmd_tlb_range() has PMD_SIZE as stride to avoid flush_tlb_all().
> I must miss something here.
So the huge page can have one TLB for huge page. So PMD_SIZE here
makes sense.

Regards
Yin, Fengwei 

> 
> Regards
> Yin, Fengwei
> 
>>  }
>> +#define update_mmu_cache(vma, addr, ptep) \
>> +       update_mmu_cache_range(vma, addr, ptep, 1)
>>  
>>  #define __HAVE_ARCH_UPDATE_MMU_TLB
>>  #define update_mmu_tlb update_mmu_cache
>> @@ -456,12 +458,21 @@ static inline void __set_pte_at(struct
>> mm_struct *mm,
>>         set_pte(ptep, pteval);
>>  }
>>  
>> -static inline void set_pte_at(struct mm_struct *mm,
>> -       unsigned long addr, pte_t *ptep, pte_t pteval)
>> +static inline void set_ptes(struct mm_struct *mm, unsigned long
>> addr,
>> +               pte_t *ptep, pte_t pteval, unsigned int nr)
>>  {
>> -       page_table_check_ptes_set(mm, addr, ptep, pteval, 1);
>> -       __set_pte_at(mm, addr, ptep, pteval);
>> +       page_table_check_ptes_set(mm, addr, ptep, pteval, nr);
>> +
>> +       for (;;) {
>> +               __set_pte_at(mm, addr, ptep, pteval);
>> +               if (--nr == 0)
>> +                       break;
>> +               ptep++;
>> +               addr += PAGE_SIZE;
>> +               pte_val(pteval) += 1 << _PAGE_PFN_SHIFT;
>> +       }
>>  }
>> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep,
>> pte, 1)
>>  
>>  static inline void pte_clear(struct mm_struct *mm,
>>         unsigned long addr, pte_t *ptep)
>> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
>> index 3cc07ed45aeb..b725c3f6f57f 100644
>> --- a/arch/riscv/mm/cacheflush.c
>> +++ b/arch/riscv/mm/cacheflush.c
>> @@ -81,16 +81,9 @@ void flush_icache_mm(struct mm_struct *mm, bool
>> local)
>>  #ifdef CONFIG_MMU
>>  void flush_icache_pte(pte_t pte)
>>  {
>> -       struct page *page = pte_page(pte);
>> +       struct folio *folio = page_folio(pte_page(pte));
>>  
>> -       /*
>> -        * HugeTLB pages are always fully mapped, so only setting
>> head page's
>> -        * PG_dcache_clean flag is enough.
>> -        */
>> -       if (PageHuge(page))
>> -               page = compound_head(page);
>> -
>> -       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
>> +       if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
>>                 flush_icache_all();
>>  }
>>  #endif /* CONFIG_MMU */
> 

  reply	other threads:[~2023-02-15 12:27 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
2023-02-11  3:39 ` [PATCH 1/7] mm: Convert page_table_check_pte_set() to page_table_check_ptes_set() Matthew Wilcox (Oracle)
2023-02-11  3:39 ` [PATCH 2/7] mm: Add generic flush_icache_pages() and documentation Matthew Wilcox (Oracle)
2023-02-11  3:39 ` [PATCH 3/7] mm: Add folio_flush_mapping() Matthew Wilcox (Oracle)
2023-02-11  3:39 ` [PATCH 4/7] mm: Remove ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO Matthew Wilcox (Oracle)
2023-02-12 15:51   ` Mike Rapoport
2023-02-12 23:59     ` Matthew Wilcox
2023-02-11  3:39 ` [PATCH 5/7] alpha: Implement the new page table range API Matthew Wilcox (Oracle)
2023-02-13  3:15   ` Yin, Fengwei
2023-02-11  3:39 ` [PATCH 6/7] arc: " Matthew Wilcox (Oracle)
2023-02-13  3:09   ` Yin, Fengwei
2023-02-13 15:16     ` Matthew Wilcox
2023-02-14  6:32       ` Yin, Fengwei
2023-02-11  3:39 ` [PATCH 7/7] x86: " Matthew Wilcox (Oracle)
2023-02-13 21:04 ` [PATCH 8/7] arm: " Matthew Wilcox (Oracle)
2023-02-13 21:04   ` Matthew Wilcox (Oracle)
2023-02-15  0:04 ` [PATCH 9/7] arm64: " Matthew Wilcox (Oracle)
2023-02-15  0:04   ` Matthew Wilcox (Oracle)
2023-02-15  0:04   ` [PATCH 10/7] riscv: " Matthew Wilcox (Oracle)
2023-02-15  0:04     ` Matthew Wilcox (Oracle)
2023-02-15  8:38     ` Yin, Fengwei
2023-02-15  8:38       ` Yin, Fengwei
2023-02-15 12:27       ` Yin, Fengwei [this message]
2023-02-15 12:27         ` Yin, Fengwei
2023-02-16  8:14       ` Alexandre Ghiti
2023-02-16  8:14         ` Alexandre Ghiti
2023-02-16 13:27         ` Yin, Fengwei
2023-02-16 13:27           ` Yin, Fengwei
2023-02-16  8:16     ` Alexandre Ghiti
2023-02-16  8:16       ` Alexandre Ghiti
2023-02-15  0:04   ` [PATCH 11/7] csky: " Matthew Wilcox (Oracle)
2023-02-15  0:04   ` [PATCH 12/7] hexagon: " Matthew Wilcox (Oracle)
2023-02-15 16:22     ` Brian Cain
2023-02-15  0:04   ` [PATCH 13/7] loongson: " Matthew Wilcox (Oracle)
2023-02-26  4:34     ` Matthew Wilcox
2023-02-26  6:56       ` WANG Xuerui
2023-02-15 13:26   ` [PATCH 9/7] arm64: " Catalin Marinas
2023-02-15 13:26     ` Catalin Marinas
2023-02-15 20:09   ` [PATCH 14/17] ia64: " Matthew Wilcox (Oracle)
2023-02-15 20:09     ` Matthew Wilcox (Oracle)
2023-02-15 20:09     ` [PATCH 15/17] m68k: " Matthew Wilcox (Oracle)
2023-02-16  0:59       ` Michael Schmitz
2023-02-16  4:26         ` Matthew Wilcox
2023-02-16  7:55           ` Geert Uytterhoeven
2023-02-16 22:03           ` Michael Schmitz
2023-02-15 20:09     ` [PATCH 16/17] microblaze: " Matthew Wilcox (Oracle)
2023-02-15 20:09     ` [PATCH 17/17] mips: " Matthew Wilcox (Oracle)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=27809f5a-17fc-abf6-7703-62a3c1b41ef3@intel.com \
    --to=fengwei.yin@intel.com \
    --cc=alex@ghiti.fr \
    --cc=aou@eecs.berkeley.edu \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.