kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Barret Rhoden <brho@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	David Hildenbrand <david@redhat.com>,
	Dave Jiang <dave.jiang@intel.com>,
	Alexander Duyck <alexander.h.duyck@linux.intel.com>,
	linux-nvdimm <linux-nvdimm@lists.01.org>, X86 ML <x86@kernel.org>,
	KVM list <kvm@vger.kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	"Zeng, Jason" <jason.zeng@intel.com>
Subject: Re: [PATCH v5 1/2] mm: make dev_pagemap_mapping_shift() externally visible
Date: Fri, 13 Dec 2019 10:13:13 -0800	[thread overview]
Message-ID: <CAPcyv4ia3mp9d24fBiSXh2m_T4sLHnJHeg6VLw2AZmk8DAD7qQ@mail.gmail.com> (raw)
In-Reply-To: <20191213174702.GB31552@linux.intel.com>

On Fri, Dec 13, 2019 at 9:47 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Thu, Dec 12, 2019 at 01:22:37PM -0500, Barret Rhoden wrote:
> > KVM has a use case for determining the size of a dax mapping.
> >
> > The KVM code has easy access to the address and the mm, and
> > dev_pagemap_mapping_shift() needs only those parameters.  It was
> > deriving them from page and vma.  This commit changes those parameters
> > from (page, vma) to (address, mm).
> >
> > Signed-off-by: Barret Rhoden <brho@google.com>
> > Reviewed-by: David Hildenbrand <david@redhat.com>
> > Acked-by: Dan Williams <dan.j.williams@intel.com>
> > ---
> >  include/linux/mm.h  |  3 +++
> >  mm/memory-failure.c | 38 +++-----------------------------------
> >  mm/util.c           | 34 ++++++++++++++++++++++++++++++++++
> >  3 files changed, 40 insertions(+), 35 deletions(-)
> >
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index a2adf95b3f9c..bfd1882dd5c6 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -1013,6 +1013,9 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
> >  #define page_ref_zero_or_close_to_overflow(page) \
> >       ((unsigned int) page_ref_count(page) + 127u <= 127u)
> >
> > +unsigned long dev_pagemap_mapping_shift(unsigned long address,
> > +                                     struct mm_struct *mm);
> > +
> >  static inline void get_page(struct page *page)
> >  {
> >       page = compound_head(page);
> > diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> > index 3151c87dff73..bafa464c8290 100644
> > --- a/mm/memory-failure.c
> > +++ b/mm/memory-failure.c
> > @@ -261,40 +261,6 @@ void shake_page(struct page *p, int access)
> >  }
> >  EXPORT_SYMBOL_GPL(shake_page);
> >
> > -static unsigned long dev_pagemap_mapping_shift(struct page *page,
> > -             struct vm_area_struct *vma)
> > -{
> > -     unsigned long address = vma_address(page, vma);
> > -     pgd_t *pgd;
> > -     p4d_t *p4d;
> > -     pud_t *pud;
> > -     pmd_t *pmd;
> > -     pte_t *pte;
> > -
> > -     pgd = pgd_offset(vma->vm_mm, address);
> > -     if (!pgd_present(*pgd))
> > -             return 0;
> > -     p4d = p4d_offset(pgd, address);
> > -     if (!p4d_present(*p4d))
> > -             return 0;
> > -     pud = pud_offset(p4d, address);
> > -     if (!pud_present(*pud))
> > -             return 0;
> > -     if (pud_devmap(*pud))
> > -             return PUD_SHIFT;
> > -     pmd = pmd_offset(pud, address);
> > -     if (!pmd_present(*pmd))
> > -             return 0;
> > -     if (pmd_devmap(*pmd))
> > -             return PMD_SHIFT;
> > -     pte = pte_offset_map(pmd, address);
> > -     if (!pte_present(*pte))
> > -             return 0;
> > -     if (pte_devmap(*pte))
> > -             return PAGE_SHIFT;
> > -     return 0;
> > -}
> > -
> >  /*
> >   * Failure handling: if we can't find or can't kill a process there's
> >   * not much we can do.       We just print a message and ignore otherwise.
> > @@ -324,7 +290,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
> >       }
> >       tk->addr = page_address_in_vma(p, vma);
> >       if (is_zone_device_page(p))
> > -             tk->size_shift = dev_pagemap_mapping_shift(p, vma);
> > +             tk->size_shift =
> > +                     dev_pagemap_mapping_shift(vma_address(page, vma),
> > +                                               vma->vm_mm);
> >       else
> >               tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
> >
> > diff --git a/mm/util.c b/mm/util.c
> > index 3ad6db9a722e..59984e6b40ab 100644
> > --- a/mm/util.c
> > +++ b/mm/util.c
> > @@ -901,3 +901,37 @@ int memcmp_pages(struct page *page1, struct page *page2)
> >       kunmap_atomic(addr1);
> >       return ret;
> >  }
> > +
> > +unsigned long dev_pagemap_mapping_shift(unsigned long address,
> > +                                     struct mm_struct *mm)
> > +{
> > +     pgd_t *pgd;
> > +     p4d_t *p4d;
> > +     pud_t *pud;
> > +     pmd_t *pmd;
> > +     pte_t *pte;
> > +
> > +     pgd = pgd_offset(mm, address);
> > +     if (!pgd_present(*pgd))
> > +             return 0;
> > +     p4d = p4d_offset(pgd, address);
> > +     if (!p4d_present(*p4d))
> > +             return 0;
> > +     pud = pud_offset(p4d, address);
> > +     if (!pud_present(*pud))
> > +             return 0;
> > +     if (pud_devmap(*pud))
> > +             return PUD_SHIFT;
> > +     pmd = pmd_offset(pud, address);
> > +     if (!pmd_present(*pmd))
> > +             return 0;
> > +     if (pmd_devmap(*pmd))
> > +             return PMD_SHIFT;
> > +     pte = pte_offset_map(pmd, address);
> > +     if (!pte_present(*pte))
> > +             return 0;
> > +     if (pte_devmap(*pte))
> > +             return PAGE_SHIFT;
> > +     return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(dev_pagemap_mapping_shift);
>
> This is basically a rehash of lookup_address_in_pgd(), and doesn't provide
> exactly what KVM needs.  E.g. KVM works with levels instead of shifts, and
> it would be nice to provide the pte so that KVM can sanity check that the
> pfn from this walk matches the pfn it plans on mapping.
>
> Instead of exporting dev_pagemap_mapping_shift(), what about relacing it
> with a patch to introduce lookup_address_mm() and export that?
>
> dev_pagemap_mapping_shift() could then wrap the new helper (if you want),
> and KVM could do lookup_address_mm() for querying the size of ZONE_DEVICE
> pages.

All of the above sounds great to me. Should have looked that much
harder when implementing dev_pagemap_mapping_shift() originally.

  reply	other threads:[~2019-12-13 20:40 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-12 18:22 [PATCH v5 0/2] kvm: Use huge pages for DAX-backed files Barret Rhoden
2019-12-12 18:22 ` [PATCH v5 1/2] mm: make dev_pagemap_mapping_shift() externally visible Barret Rhoden
2019-12-13 17:47   ` Sean Christopherson
2019-12-13 18:13     ` Dan Williams [this message]
2019-12-16 17:59     ` Barret Rhoden
2019-12-18  0:18       ` Sean Christopherson
2020-01-15 18:33       ` Paolo Bonzini
2019-12-12 18:22 ` [PATCH v5 2/2] kvm: Use huge pages for DAX-backed files Barret Rhoden
2019-12-12 18:47   ` Liran Alon
2019-12-12 18:49     ` Liran Alon
2019-12-12 19:55       ` Barret Rhoden
2019-12-13  1:07         ` Liran Alon
2019-12-13 14:13           ` Barret Rhoden
2019-12-13 17:19           ` Sean Christopherson
2019-12-13 17:31             ` Liran Alon
2019-12-13 17:50               ` Sean Christopherson
2019-12-13 18:08                 ` Liran Alon
2019-12-16 16:05             ` Barret Rhoden
2020-01-07 19:05               ` Sean Christopherson
2020-01-07 19:19                 ` Barret Rhoden
2020-01-08  1:20                   ` Sean Christopherson
2020-01-08  1:39                     ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAPcyv4ia3mp9d24fBiSXh2m_T4sLHnJHeg6VLw2AZmk8DAD7qQ@mail.gmail.com \
    --to=dan.j.williams@intel.com \
    --cc=alexander.h.duyck@linux.intel.com \
    --cc=brho@google.com \
    --cc=dave.jiang@intel.com \
    --cc=david@redhat.com \
    --cc=jason.zeng@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=pbonzini@redhat.com \
    --cc=sean.j.christopherson@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).