linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap
@ 2012-01-16 17:18 Naoya Horiguchi
  0 siblings, 0 replies; 9+ messages in thread
From: Naoya Horiguchi @ 2012-01-16 17:18 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Naoya Horiguchi, linux-mm, Andrew Morton, David Rientjes,
	Andi Kleen, Wu Fengguang, KOSAKI Motohiro, KAMEZAWA Hiroyuki,
	linux-kernel

On Sat, Jan 14, 2012 at 06:00:26PM +0100, Andrea Arcangeli wrote:
> On Thu, Jan 12, 2012 at 02:34:53PM -0500, Naoya Horiguchi wrote:
> > +		if (pmd_trans_splitting(*pmd)) {
> > +			spin_unlock(&walk->mm->page_table_lock);
> > +			wait_split_huge_page(vma->anon_vma, pmd);
> > +		} else {
> > +			for (; addr != end; addr += PAGE_SIZE) {
> > +				unsigned long offset = (addr & ~PAGEMAP_WALK_MASK)
> > +					>> PAGE_SHIFT;
> > +				pfn = thp_pte_to_pagemap_entry(*(pte_t *)pmd,
> > +							       offset);
> 
> What is this that then morphs into a pme (which still has a cast
> inside its creation)? thp_pte_to_pagemap_entry don't seem to be passed
> ptes too. The only case where it is valid to do a cast like that is
> when a function is used by both ptes sand pmds and the code tends to
> work for both with minimal modification to differentiate the two
> cases. Considering the function that gets the cast is called thp_ this
> is hardly the case here.

Agreed.

> Why don't you pass the pmd and then do "if (pmd_present(pmd))
> page_to_pfn(pmd_page(pmd)) ? What's the argument for the cast. I'm
> just reviewing this series and maybe it was covered in previous
> versions.

OK, I can do this by introducing pmd_pte as you commented in another email.

> I don't get this pme thing for something as trivial as above that
> shouldn't require any cast at all.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap
  2012-02-08 15:51 [PATCH 0/6 v5] pagemap handles transparent hugepage Naoya Horiguchi
@ 2012-02-08 15:51 ` Naoya Horiguchi
  0 siblings, 0 replies; 9+ messages in thread
From: Naoya Horiguchi @ 2012-02-08 15:51 UTC (permalink / raw)
  To: linux-mm
  Cc: Andrew Morton, David Rientjes, Andi Kleen, Wu Fengguang,
	Andrea Arcangeli, KOSAKI Motohiro, KAMEZAWA Hiroyuki,
	linux-kernel, Naoya Horiguchi

Thp split is not necessary if we explicitly check whether pmds are
mapping thps or not. This patch introduces this check and adds code
to generate pagemap entries for pmds mapping thps, which results in
less performance impact of pagemap on thp.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

ToDo:
  - Avoid thp split in another 2 split_huge_page_pmd() in mm/memcontrol.c

Changes since v3:
  - Generate pagemap entry directly from pmd to avoid messy casting

Changes since v2:
  - Add comment on if check in thp_pte_to_pagemap_entry()
  - Convert type of offset into unsigned long

Changes since v1:
  - Move pfn declaration to the beginning of pagemap_pte_range()
---
 fs/proc/task_mmu.c |   53 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 47 insertions(+), 6 deletions(-)

diff --git 3.3-rc2.orig/fs/proc/task_mmu.c 3.3-rc2/fs/proc/task_mmu.c
index 7dcd2a2..eb0a93e 100644
--- 3.3-rc2.orig/fs/proc/task_mmu.c
+++ 3.3-rc2/fs/proc/task_mmu.c
@@ -603,6 +603,9 @@ struct pagemapread {
 	u64 *buffer;
 };
 
+#define PAGEMAP_WALK_SIZE	(PMD_SIZE)
+#define PAGEMAP_WALK_MASK	(PMD_MASK)
+
 #define PM_ENTRY_BYTES      sizeof(u64)
 #define PM_STATUS_BITS      3
 #define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
@@ -661,6 +664,27 @@ static u64 pte_to_pagemap_entry(pte_t pte)
 	return pme;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static u64 thp_pmd_to_pagemap_entry(pmd_t pmd, int offset)
+{
+	u64 pme = 0;
+	/*
+	 * Currently pmd for thp is always present because thp can not be
+	 * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
+	 * This if-check is just to prepare for future implementation.
+	 */
+	if (pmd_present(pmd))
+		pme = PM_PFRAME(pmd_pfn(pmd) + offset)
+			| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
+	return pme;
+}
+#else
+static inline u64 thp_pmd_to_pagemap_entry(pmd_t pmd, int offset)
+{
+	return 0;
+}
+#endif
+
 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			     struct mm_walk *walk)
 {
@@ -668,14 +692,33 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	struct pagemapread *pm = walk->private;
 	pte_t *pte;
 	int err = 0;
-
-	split_huge_page_pmd(walk->mm, pmd);
+	u64 pfn = PM_NOT_PRESENT;
 
 	/* find the first VMA at or above 'addr' */
 	vma = find_vma(walk->mm, addr);
-	for (; addr != end; addr += PAGE_SIZE) {
-		u64 pfn = PM_NOT_PRESENT;
 
+	spin_lock(&walk->mm->page_table_lock);
+	if (pmd_trans_huge(*pmd)) {
+		if (pmd_trans_splitting(*pmd)) {
+			spin_unlock(&walk->mm->page_table_lock);
+			wait_split_huge_page(vma->anon_vma, pmd);
+		} else {
+			for (; addr != end; addr += PAGE_SIZE) {
+				unsigned long offset = (addr & ~PAGEMAP_WALK_MASK)
+					>> PAGE_SHIFT;
+				pfn = thp_pmd_to_pagemap_entry(*pmd, offset);
+				err = add_to_pagemap(addr, pfn, pm);
+				if (err)
+					break;
+			}
+			spin_unlock(&walk->mm->page_table_lock);
+			return err;
+		}
+	} else {
+		spin_unlock(&walk->mm->page_table_lock);
+	}
+
+	for (; addr != end; addr += PAGE_SIZE) {
 		/* check to see if we've left 'vma' behind
 		 * and need a new, higher one */
 		if (vma && (addr >= vma->vm_end))
@@ -757,8 +800,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
  * determine which areas of memory are actually mapped and llseek to
  * skip over unmapped regions.
  */
-#define PAGEMAP_WALK_SIZE	(PMD_SIZE)
-#define PAGEMAP_WALK_MASK	(PMD_MASK)
 static ssize_t pagemap_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap
  2012-01-29 13:17   ` Hillf Danton
@ 2012-01-30 19:23     ` Naoya Horiguchi
  0 siblings, 0 replies; 9+ messages in thread
From: Naoya Horiguchi @ 2012-01-30 19:23 UTC (permalink / raw)
  To: Hillf Danton
  Cc: Naoya Horiguchi, linux-mm, Andrew Morton, David Rientjes,
	Andi Kleen, Wu Fengguang, Andrea Arcangeli, KOSAKI Motohiro,
	LKML

On Sun, Jan 29, 2012 at 09:17:32PM +0800, Hillf Danton wrote:
> Hi Naoya
> 
> On Sat, Jan 28, 2012 at 7:02 AM, Naoya Horiguchi
> <n-horiguchi@ah.jp.nec.com> wrote:
> > Thp split is not necessary if we explicitly check whether pmds are
> > mapping thps or not. This patch introduces this check and adds code
> > to generate pagemap entries for pmds mapping thps, which results in
> > less performance impact of pagemap on thp.
> >
> 
> Could the method proposed here cover the two cases of split THP in mem cgroup?

No for now, but yes if "move charge" function supports THP.
I think this can be a bit large step so it is the next work.

Thanks,
Naoya

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap
  2012-01-27 23:02 ` [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap Naoya Horiguchi
@ 2012-01-29 13:17   ` Hillf Danton
  2012-01-30 19:23     ` Naoya Horiguchi
  0 siblings, 1 reply; 9+ messages in thread
From: Hillf Danton @ 2012-01-29 13:17 UTC (permalink / raw)
  To: Naoya Horiguchi
  Cc: linux-mm, Andrew Morton, David Rientjes, Andi Kleen,
	Wu Fengguang, Andrea Arcangeli, KOSAKI Motohiro, LKML,
	Hillf Danton

Hi Naoya

On Sat, Jan 28, 2012 at 7:02 AM, Naoya Horiguchi
<n-horiguchi@ah.jp.nec.com> wrote:
> Thp split is not necessary if we explicitly check whether pmds are
> mapping thps or not. This patch introduces this check and adds code
> to generate pagemap entries for pmds mapping thps, which results in
> less performance impact of pagemap on thp.
>

Could the method proposed here cover the two cases of split THP in mem cgroup?

Thanks
Hillf

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap
  2012-01-27 23:02 [PATCH 0/6 v4] pagemap handles transparent hugepage Naoya Horiguchi
@ 2012-01-27 23:02 ` Naoya Horiguchi
  2012-01-29 13:17   ` Hillf Danton
  0 siblings, 1 reply; 9+ messages in thread
From: Naoya Horiguchi @ 2012-01-27 23:02 UTC (permalink / raw)
  To: linux-mm
  Cc: Andrew Morton, David Rientjes, Andi Kleen, Wu Fengguang,
	Andrea Arcangeli, KOSAKI Motohiro, KAMEZAWA Hiroyuki,
	linux-kernel, Naoya Horiguchi

Thp split is not necessary if we explicitly check whether pmds are
mapping thps or not. This patch introduces this check and adds code
to generate pagemap entries for pmds mapping thps, which results in
less performance impact of pagemap on thp.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

Changes since v3:
  - Generate pagemap entry directly from pmd to avoid messy casting

Changes since v2:
  - Add comment on if check in thp_pte_to_pagemap_entry()
  - Convert type of offset into unsigned long

Changes since v1:
  - Move pfn declaration to the beginning of pagemap_pte_range()
---
 fs/proc/task_mmu.c |   53 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 47 insertions(+), 6 deletions(-)

diff --git 3.3-rc1.orig/fs/proc/task_mmu.c 3.3-rc1/fs/proc/task_mmu.c
index e418c5a..cfbba8d 100644
--- 3.3-rc1.orig/fs/proc/task_mmu.c
+++ 3.3-rc1/fs/proc/task_mmu.c
@@ -600,6 +600,9 @@ struct pagemapread {
 	u64 *buffer;
 };
 
+#define PAGEMAP_WALK_SIZE	(PMD_SIZE)
+#define PAGEMAP_WALK_MASK	(PMD_MASK)
+
 #define PM_ENTRY_BYTES      sizeof(u64)
 #define PM_STATUS_BITS      3
 #define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
@@ -658,6 +661,27 @@ static u64 pte_to_pagemap_entry(pte_t pte)
 	return pme;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static u64 thp_pmd_to_pagemap_entry(pmd_t pmd, int offset)
+{
+	u64 pme = 0;
+	/*
+	 * Currently pmd for thp is always present because thp can not be
+	 * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
+	 * This if-check is just to prepare for future implementation.
+	 */
+	if (pmd_present(pmd))
+		pme = PM_PFRAME(pmd_pfn(pmd) + offset)
+			| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
+	return pme;
+}
+#else
+static inline u64 thp_pmd_to_pagemap_entry(pmd_t pmd, int offset)
+{
+	return 0;
+}
+#endif
+
 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			     struct mm_walk *walk)
 {
@@ -665,14 +689,33 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	struct pagemapread *pm = walk->private;
 	pte_t *pte;
 	int err = 0;
-
-	split_huge_page_pmd(walk->mm, pmd);
+	u64 pfn = PM_NOT_PRESENT;
 
 	/* find the first VMA at or above 'addr' */
 	vma = find_vma(walk->mm, addr);
-	for (; addr != end; addr += PAGE_SIZE) {
-		u64 pfn = PM_NOT_PRESENT;
 
+	spin_lock(&walk->mm->page_table_lock);
+	if (pmd_trans_huge(*pmd)) {
+		if (pmd_trans_splitting(*pmd)) {
+			spin_unlock(&walk->mm->page_table_lock);
+			wait_split_huge_page(vma->anon_vma, pmd);
+		} else {
+			for (; addr != end; addr += PAGE_SIZE) {
+				unsigned long offset = (addr & ~PAGEMAP_WALK_MASK)
+					>> PAGE_SHIFT;
+				pfn = thp_pmd_to_pagemap_entry(*pmd, offset);
+				err = add_to_pagemap(addr, pfn, pm);
+				if (err)
+					break;
+			}
+			spin_unlock(&walk->mm->page_table_lock);
+			return err;
+		}
+	} else {
+		spin_unlock(&walk->mm->page_table_lock);
+	}
+
+	for (; addr != end; addr += PAGE_SIZE) {
 		/* check to see if we've left 'vma' behind
 		 * and need a new, higher one */
 		if (vma && (addr >= vma->vm_end))
@@ -754,8 +797,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
  * determine which areas of memory are actually mapped and llseek to
  * skip over unmapped regions.
  */
-#define PAGEMAP_WALK_SIZE	(PMD_SIZE)
-#define PAGEMAP_WALK_MASK	(PMD_MASK)
 static ssize_t pagemap_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap
  2012-01-15 12:06     ` Andrea Arcangeli
@ 2012-01-16 17:18       ` Naoya Horiguchi
  0 siblings, 0 replies; 9+ messages in thread
From: Naoya Horiguchi @ 2012-01-16 17:18 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Naoya Horiguchi, linux-mm, Andrew Morton, David Rientjes,
	Andi Kleen, Wu Fengguang, KOSAKI Motohiro, KAMEZAWA Hiroyuki,
	linux-kernel

On Sun, Jan 15, 2012 at 01:06:05PM +0100, Andrea Arcangeli wrote:
> On Sat, Jan 14, 2012 at 06:00:26PM +0100, Andrea Arcangeli wrote:
> > Why don't you pass the pmd and then do "if (pmd_present(pmd))
> > page_to_pfn(pmd_page(pmd)) ? What's the argument for the cast. I'm
> 
> Of course I meant pmd_pfn above... in short as a replacement of the
> pte_pfn in your patch.
> 
> About the _stable function, I was now thinking maybe _lock suffix is
> more appropriate than _stable, because that function effectively has
> the objective of taking the page_table_lock in the most efficient way,
> and not much else other than taking the lock. Adding a comment that
> it's only safe to call with the mmap_sem held in the inline version in
> the .h file (the one that then would call the __ version in the .c
> file).

OK, I will use _lock suffix and add the comment in the next post.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap
  2012-01-14 17:00   ` Andrea Arcangeli
@ 2012-01-15 12:06     ` Andrea Arcangeli
  2012-01-16 17:18       ` Naoya Horiguchi
  0 siblings, 1 reply; 9+ messages in thread
From: Andrea Arcangeli @ 2012-01-15 12:06 UTC (permalink / raw)
  To: Naoya Horiguchi
  Cc: linux-mm, Andrew Morton, David Rientjes, Andi Kleen,
	Wu Fengguang, KOSAKI Motohiro, KAMEZAWA Hiroyuki, linux-kernel

On Sat, Jan 14, 2012 at 06:00:26PM +0100, Andrea Arcangeli wrote:
> Why don't you pass the pmd and then do "if (pmd_present(pmd))
> page_to_pfn(pmd_page(pmd)) ? What's the argument for the cast. I'm

Of course I meant pmd_pfn above... in short as a replacement of the
pte_pfn in your patch.

About the _stable function, I was now thinking maybe _lock suffix is
more appropriate than _stable, because that function effectively has
the objective of taking the page_table_lock in the most efficient way,
and not much else other than taking the lock. Adding a comment that
it's only safe to call with the mmap_sem held in the inline version in
the .h file (the one that then would call the __ version in the .c
file).

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap
  2012-01-12 19:34 ` [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap Naoya Horiguchi
@ 2012-01-14 17:00   ` Andrea Arcangeli
  2012-01-15 12:06     ` Andrea Arcangeli
  0 siblings, 1 reply; 9+ messages in thread
From: Andrea Arcangeli @ 2012-01-14 17:00 UTC (permalink / raw)
  To: Naoya Horiguchi
  Cc: linux-mm, Andrew Morton, David Rientjes, Andi Kleen,
	Wu Fengguang, KOSAKI Motohiro, KAMEZAWA Hiroyuki, linux-kernel

On Thu, Jan 12, 2012 at 02:34:53PM -0500, Naoya Horiguchi wrote:
> +		if (pmd_trans_splitting(*pmd)) {
> +			spin_unlock(&walk->mm->page_table_lock);
> +			wait_split_huge_page(vma->anon_vma, pmd);
> +		} else {
> +			for (; addr != end; addr += PAGE_SIZE) {
> +				unsigned long offset = (addr & ~PAGEMAP_WALK_MASK)
> +					>> PAGE_SHIFT;
> +				pfn = thp_pte_to_pagemap_entry(*(pte_t *)pmd,
> +							       offset);

What is this that then morphs into a pme (which still has a cast
inside its creation)? thp_pte_to_pagemap_entry don't seem to be passed
ptes too. The only case where it is valid to do a cast like that is
when a function is used by both ptes sand pmds and the code tends to
work for both with minimal modification to differentiate the two
cases. Considering the function that gets the cast is called thp_ this
is hardly the case here.

Why don't you pass the pmd and then do "if (pmd_present(pmd))
page_to_pfn(pmd_page(pmd)) ? What's the argument for the cast. I'm
just reviewing this series and maybe it was covered in previous
versions.

I don't get this pme thing for something as trivial as above that
shouldn't require any cast at all.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap
  2012-01-12 19:34 [PATCH 0/6 v3] pagemap handles transparent hugepage Naoya Horiguchi
@ 2012-01-12 19:34 ` Naoya Horiguchi
  2012-01-14 17:00   ` Andrea Arcangeli
  0 siblings, 1 reply; 9+ messages in thread
From: Naoya Horiguchi @ 2012-01-12 19:34 UTC (permalink / raw)
  To: linux-mm
  Cc: Andrew Morton, David Rientjes, Andi Kleen, Wu Fengguang,
	Andrea Arcangeli, KOSAKI Motohiro, KAMEZAWA Hiroyuki,
	linux-kernel, Naoya Horiguchi

Thp split is not necessary if we explicitly check whether pmds are
mapping thps or not. This patch introduces the check and the code
to generate pagemap entries for pmds mapping thps, which results in
less performance impact of pagemap on thp.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

Changes since v2:
  - Add comment on if check in thp_pte_to_pagemap_entry()
  - Convert type of offset into unsigned long

Changes since v1:
  - Move pfn declaration to the beginning of pagemap_pte_range()
---
 fs/proc/task_mmu.c |   54 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 48 insertions(+), 6 deletions(-)

diff --git 3.2-rc5.orig/fs/proc/task_mmu.c 3.2-rc5/fs/proc/task_mmu.c
index e418c5a..bd19177 100644
--- 3.2-rc5.orig/fs/proc/task_mmu.c
+++ 3.2-rc5/fs/proc/task_mmu.c
@@ -600,6 +600,9 @@ struct pagemapread {
 	u64 *buffer;
 };
 
+#define PAGEMAP_WALK_SIZE	(PMD_SIZE)
+#define PAGEMAP_WALK_MASK	(PMD_MASK)
+
 #define PM_ENTRY_BYTES      sizeof(u64)
 #define PM_STATUS_BITS      3
 #define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
@@ -658,6 +661,27 @@ static u64 pte_to_pagemap_entry(pte_t pte)
 	return pme;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static u64 thp_pte_to_pagemap_entry(pte_t pte, int offset)
+{
+	u64 pme = 0;
+	/*
+	 * Currently pte for thp is always present because thp can not be
+	 * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
+	 * This if-check is just to prepare for future implementation.
+	 */
+	if (pte_present(pte))
+		pme = PM_PFRAME(pte_pfn(pte) + offset)
+			| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
+	return pme;
+}
+#else
+static inline u64 thp_pte_to_pagemap_entry(pte_t pte, int offset)
+{
+	return 0;
+}
+#endif
+
 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			     struct mm_walk *walk)
 {
@@ -665,14 +689,34 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	struct pagemapread *pm = walk->private;
 	pte_t *pte;
 	int err = 0;
-
-	split_huge_page_pmd(walk->mm, pmd);
+	u64 pfn = PM_NOT_PRESENT;
 
 	/* find the first VMA at or above 'addr' */
 	vma = find_vma(walk->mm, addr);
-	for (; addr != end; addr += PAGE_SIZE) {
-		u64 pfn = PM_NOT_PRESENT;
 
+	spin_lock(&walk->mm->page_table_lock);
+	if (pmd_trans_huge(*pmd)) {
+		if (pmd_trans_splitting(*pmd)) {
+			spin_unlock(&walk->mm->page_table_lock);
+			wait_split_huge_page(vma->anon_vma, pmd);
+		} else {
+			for (; addr != end; addr += PAGE_SIZE) {
+				unsigned long offset = (addr & ~PAGEMAP_WALK_MASK)
+					>> PAGE_SHIFT;
+				pfn = thp_pte_to_pagemap_entry(*(pte_t *)pmd,
+							       offset);
+				err = add_to_pagemap(addr, pfn, pm);
+				if (err)
+					break;
+			}
+			spin_unlock(&walk->mm->page_table_lock);
+			return err;
+		}
+	} else {
+		spin_unlock(&walk->mm->page_table_lock);
+	}
+
+	for (; addr != end; addr += PAGE_SIZE) {
 		/* check to see if we've left 'vma' behind
 		 * and need a new, higher one */
 		if (vma && (addr >= vma->vm_end))
@@ -754,8 +798,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
  * determine which areas of memory are actually mapped and llseek to
  * skip over unmapped regions.
  */
-#define PAGEMAP_WALK_SIZE	(PMD_SIZE)
-#define PAGEMAP_WALK_MASK	(PMD_MASK)
 static ssize_t pagemap_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-- 
1.7.6.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2012-02-08 15:53 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-16 17:18 [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap Naoya Horiguchi
  -- strict thread matches above, loose matches on Subject: below --
2012-02-08 15:51 [PATCH 0/6 v5] pagemap handles transparent hugepage Naoya Horiguchi
2012-02-08 15:51 ` [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap Naoya Horiguchi
2012-01-27 23:02 [PATCH 0/6 v4] pagemap handles transparent hugepage Naoya Horiguchi
2012-01-27 23:02 ` [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap Naoya Horiguchi
2012-01-29 13:17   ` Hillf Danton
2012-01-30 19:23     ` Naoya Horiguchi
2012-01-12 19:34 [PATCH 0/6 v3] pagemap handles transparent hugepage Naoya Horiguchi
2012-01-12 19:34 ` [PATCH 1/6] pagemap: avoid splitting thp when reading /proc/pid/pagemap Naoya Horiguchi
2012-01-14 17:00   ` Andrea Arcangeli
2012-01-15 12:06     ` Andrea Arcangeli
2012-01-16 17:18       ` Naoya Horiguchi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).