All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jérôme Glisse" <jglisse@redhat.com>
To: akpm@linux-foundation.org, <linux-kernel@vger.kernel.org>,
	linux-mm@kvack.org
Cc: "Linus Torvalds" <torvalds@linux-foundation.org>,
	joro@8bytes.org, "Mel Gorman" <mgorman@suse.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Andrea Arcangeli" <aarcange@redhat.com>,
	"Johannes Weiner" <jweiner@redhat.com>,
	"Larry Woodman" <lwoodman@redhat.com>,
	"Rik van Riel" <riel@redhat.com>,
	"Dave Airlie" <airlied@redhat.com>,
	"Brendan Conoboy" <blc@redhat.com>,
	"Joe Donohue" <jdonohue@redhat.com>,
	"Christophe Harle" <charle@nvidia.com>,
	"Duncan Poole" <dpoole@nvidia.com>,
	"Sherry Cheung" <SCheung@nvidia.com>,
	"Subhash Gutti" <sgutti@nvidia.com>,
	"John Hubbard" <jhubbard@nvidia.com>,
	"Mark Hairgrove" <mhairgrove@nvidia.com>,
	"Lucien Dunning" <ldunning@nvidia.com>,
	"Cameron Buschardt" <cabuschardt@nvidia.com>,
	"Arvind Gopalakrishnan" <arvindg@nvidia.com>,
	"Haggai Eran" <haggaie@mellanox.com>,
	"Shachar Raindel" <raindel@mellanox.com>,
	"Liran Liss" <liranl@mellanox.com>,
	"Roland Dreier" <roland@purestorage.com>,
	"Ben Sander" <ben.sander@amd.com>,
	"Greg Stoner" <Greg.Stoner@amd.com>,
	"John Bridgman" <John.Bridgman@amd.com>,
	"Michael Mantor" <Michael.Mantor@amd.com>,
	"Paul Blinzer" <Paul.Blinzer@amd.com>,
	"Leonid Shamis" <Leonid.Shamis@amd.com>,
	"Laurent Morichetti" <Laurent.Morichetti@amd.com>,
	"Alexander Deucher" <Alexander.Deucher@amd.com>,
	"Jérôme Glisse" <jglisse@redhat.com>
Subject: [PATCH v12 13/29] HMM: DMA map memory on behalf of device driver v2.
Date: Tue,  8 Mar 2016 15:43:06 -0500	[thread overview]
Message-ID: <1457469802-11850-14-git-send-email-jglisse@redhat.com> (raw)
In-Reply-To: <1457469802-11850-1-git-send-email-jglisse@redhat.com>

Do the DMA mapping on behalf of the device as HMM is a good place
to perform this common task. Moreover in the future we hope to
add new infrastructure that would make DMA mapping more efficient
(lower overhead per page) by leveraging HMM data structure.

Changed since v1:
  - Adapt to HMM page table changes.

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
---
 include/linux/hmm_pt.h |  11 +++
 mm/hmm.c               | 202 +++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 174 insertions(+), 39 deletions(-)

diff --git a/include/linux/hmm_pt.h b/include/linux/hmm_pt.h
index 4a8beb1..8a59a75 100644
--- a/include/linux/hmm_pt.h
+++ b/include/linux/hmm_pt.h
@@ -176,6 +176,17 @@ static inline dma_addr_t hmm_pte_from_pfn(dma_addr_t pfn)
 	return (pfn << PAGE_SHIFT) | (1 << HMM_PTE_VALID_PFN_BIT);
 }
 
+static inline dma_addr_t hmm_pte_from_dma_addr(dma_addr_t dma_addr)
+{
+	return (dma_addr & HMM_PTE_DMA_MASK) | (1 << HMM_PTE_VALID_DMA_BIT);
+}
+
+static inline dma_addr_t hmm_pte_dma_addr(dma_addr_t pte)
+{
+	/* FIXME Use max dma addr instead of 0 ? */
+	return hmm_pte_test_valid_dma(&pte) ? (pte & HMM_PTE_DMA_MASK) : 0;
+}
+
 static inline unsigned long hmm_pte_pfn(dma_addr_t pte)
 {
 	return hmm_pte_test_valid_pfn(&pte) ? pte >> PAGE_SHIFT : 0;
diff --git a/mm/hmm.c b/mm/hmm.c
index dc37e49..7cab6cb 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -41,6 +41,7 @@
 #include <linux/mman.h>
 #include <linux/delay.h>
 #include <linux/workqueue.h>
+#include <linux/dma-mapping.h>
 
 #include "internal.h"
 
@@ -577,6 +578,46 @@ static inline int hmm_mirror_update(struct hmm_mirror *mirror,
 	return ret;
 }
 
+static void hmm_mirror_update_pte(struct hmm_mirror *mirror,
+				  struct hmm_event *event,
+				  struct hmm_pt_iter *iter,
+				  struct mm_pt_iter *mm_iter,
+				  struct page *page,
+				  dma_addr_t *hmm_pte,
+				  unsigned long addr)
+{
+	bool dirty = hmm_pte_test_and_clear_dirty(hmm_pte);
+
+	if (hmm_pte_test_valid_pfn(hmm_pte)) {
+		*hmm_pte &= event->pte_mask;
+		if (!hmm_pte_test_valid_pfn(hmm_pte))
+			hmm_pt_iter_directory_unref(iter);
+		goto out;
+	}
+
+	if (!hmm_pte_test_valid_dma(hmm_pte))
+		return;
+
+	if (!hmm_pte_test_valid_dma(&event->pte_mask)) {
+		struct device *dev = mirror->device->dev;
+		dma_addr_t dma_addr;
+
+		dma_addr = hmm_pte_dma_addr(*hmm_pte);
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	*hmm_pte &= event->pte_mask;
+	if (!hmm_pte_test_valid_dma(hmm_pte))
+		hmm_pt_iter_directory_unref(iter);
+
+out:
+	if (dirty) {
+		page = page ? : mm_pt_iter_page(mm_iter, addr);
+		if (page)
+			set_page_dirty(page);
+	}
+}
+
 static void hmm_mirror_update_pt(struct hmm_mirror *mirror,
 				 struct hmm_event *event,
 				 struct page *page)
@@ -603,19 +644,9 @@ static void hmm_mirror_update_pt(struct hmm_mirror *mirror,
 		 */
 		hmm_pt_iter_directory_lock(&iter);
 		do {
-			if (!hmm_pte_test_valid_pfn(hmm_pte))
-				continue;
-			if (hmm_pte_test_and_clear_dirty(hmm_pte) &&
-			    hmm_pte_test_write(hmm_pte)) {
-				page = page ? : mm_pt_iter_page(&mm_iter, addr);
-				if (page)
-					set_page_dirty(page);
-				page = NULL;
-			}
-			*hmm_pte &= event->pte_mask;
-			if (hmm_pte_test_valid_pfn(hmm_pte))
-				continue;
-			hmm_pt_iter_directory_unref(&iter);
+			hmm_mirror_update_pte(mirror, event, &iter, &mm_iter,
+					      page, hmm_pte, addr);
+			page = NULL;
 		} while (addr += PAGE_SIZE, hmm_pte++, addr != next);
 		hmm_pt_iter_directory_unlock(&iter);
 	}
@@ -687,6 +718,9 @@ static int hmm_mirror_fault_hpmd(struct hmm_mirror *mirror,
 		 */
 		hmm_pt_iter_directory_lock(iter);
 		do {
+			if (hmm_pte_test_valid_dma(&hmm_pte[i]))
+				continue;
+
 			if (!hmm_pte_test_valid_pfn(&hmm_pte[i])) {
 				hmm_pte[i] = hmm_pte_from_pfn(pfn);
 				hmm_pt_iter_directory_ref(iter);
@@ -760,6 +794,9 @@ static int hmm_mirror_fault_pmd(pmd_t *pmdp,
 				break;
 			}
 
+			if (hmm_pte_test_valid_dma(&hmm_pte[i]))
+				continue;
+
 			if (!hmm_pte_test_valid_pfn(&hmm_pte[i])) {
 				hmm_pte[i] = hmm_pte_from_pfn(pte_pfn(*ptep));
 				hmm_pt_iter_directory_ref(iter);
@@ -776,6 +813,80 @@ static int hmm_mirror_fault_pmd(pmd_t *pmdp,
 	return ret;
 }
 
+static int hmm_mirror_dma_map(struct hmm_mirror *mirror,
+			      struct hmm_pt_iter *iter,
+			      unsigned long start,
+			      unsigned long end)
+{
+	struct device *dev = mirror->device->dev;
+	unsigned long addr;
+	int ret;
+
+	for (ret = 0, addr = start; !ret && addr < end;) {
+		unsigned long i = 0, next = end;
+		dma_addr_t *hmm_pte;
+
+		hmm_pte = hmm_pt_iter_populate(iter, addr, &next);
+		if (!hmm_pte)
+			return -ENOENT;
+
+		do {
+			dma_addr_t dma_addr, pte;
+			struct page *page;
+
+again:
+			pte = ACCESS_ONCE(hmm_pte[i]);
+			if (!hmm_pte_test_valid_pfn(&pte)) {
+				if (!hmm_pte_test_valid_dma(&pte)) {
+					ret = -ENOENT;
+					break;
+				}
+				continue;
+			}
+
+			page = pfn_to_page(hmm_pte_pfn(pte));
+			VM_BUG_ON(!page);
+			dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(dev, dma_addr)) {
+				ret = -ENOMEM;
+				break;
+			}
+
+			hmm_pt_iter_directory_lock(iter);
+			/*
+			 * Make sure we transfer the dirty bit. Note that there
+			 * might still be a window for another thread to set
+			 * the dirty bit before we check for pte equality. This
+			 * will just lead to a useless retry so it is not the
+			 * end of the world here.
+			 */
+			if (hmm_pte_test_dirty(&hmm_pte[i]))
+				hmm_pte_set_dirty(&pte);
+			if (ACCESS_ONCE(hmm_pte[i]) != pte) {
+				hmm_pt_iter_directory_unlock(iter);
+				dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+					       DMA_BIDIRECTIONAL);
+				if (hmm_pte_test_valid_pfn(&pte))
+					goto again;
+				if (!hmm_pte_test_valid_dma(&pte)) {
+					ret = -ENOENT;
+					break;
+				}
+			} else {
+				hmm_pte[i] = hmm_pte_from_dma_addr(dma_addr);
+				if (hmm_pte_test_write(&pte))
+					hmm_pte_set_write(&hmm_pte[i]);
+				if (hmm_pte_test_dirty(&pte))
+					hmm_pte_set_dirty(&hmm_pte[i]);
+				hmm_pt_iter_directory_unlock(iter);
+			}
+		} while (addr += PAGE_SIZE, i++, addr != next && !ret);
+	}
+
+	return ret;
+}
+
 static int hmm_mirror_handle_fault(struct hmm_mirror *mirror,
 				   struct hmm_event *event,
 				   struct vm_area_struct *vma,
@@ -784,7 +895,7 @@ static int hmm_mirror_handle_fault(struct hmm_mirror *mirror,
 	struct hmm_mirror_fault mirror_fault;
 	unsigned long addr = event->start;
 	struct mm_walk walk = {0};
-	int ret = 0;
+	int ret;
 
 	if ((event->etype == HMM_DEVICE_WFAULT) && !(vma->vm_flags & VM_WRITE))
 		return -EACCES;
@@ -793,33 +904,45 @@ static int hmm_mirror_handle_fault(struct hmm_mirror *mirror,
 	if (ret)
 		return ret;
 
-again:
-	if (event->backoff) {
-		ret = -EAGAIN;
-		goto out;
-	}
-	if (addr >= event->end)
-		goto out;
+	do {
+		if (event->backoff) {
+			ret = -EAGAIN;
+			break;
+		}
+		if (addr >= event->end)
+			break;
+
+		mirror_fault.event = event;
+		mirror_fault.mirror = mirror;
+		mirror_fault.vma = vma;
+		mirror_fault.addr = addr;
+		mirror_fault.iter = iter;
+		walk.mm = mirror->hmm->mm;
+		walk.private = &mirror_fault;
+		walk.pmd_entry = hmm_mirror_fault_pmd;
+		walk.pte_hole = hmm_pte_hole;
+		ret = walk_page_range(addr, event->end, &walk);
+		if (ret)
+			break;
+
+		if (event->backoff) {
+			ret = -EAGAIN;
+			break;
+		}
 
-	mirror_fault.event = event;
-	mirror_fault.mirror = mirror;
-	mirror_fault.vma = vma;
-	mirror_fault.addr = addr;
-	mirror_fault.iter = iter;
-	walk.mm = mirror->hmm->mm;
-	walk.private = &mirror_fault;
-	walk.pmd_entry = hmm_mirror_fault_pmd;
-	walk.pte_hole = hmm_pte_hole;
-	ret = walk_page_range(addr, event->end, &walk);
-	if (!ret) {
-		ret = mirror->device->ops->update(mirror, event);
-		if (!ret) {
-			addr = mirror_fault.addr;
-			goto again;
+		if (mirror->device->dev) {
+			ret = hmm_mirror_dma_map(mirror, iter,
+						 addr, event->end);
+			if (ret)
+				break;
 		}
-	}
 
-out:
+		ret = mirror->device->ops->update(mirror, event);
+		if (ret)
+			break;
+		addr = mirror_fault.addr;
+	} while (1);
+
 	hmm_device_fault_end(mirror->hmm, event);
 	if (ret == -ENOENT) {
 		ret = hmm_mm_fault(mirror->hmm, event, vma, addr);
@@ -973,7 +1096,8 @@ void hmm_mirror_range_dirty(struct hmm_mirror *mirror,
 
 		hmm_pte = hmm_pt_iter_walk(&iter, &addr, &next);
 		for (; hmm_pte && addr != next; hmm_pte++, addr += PAGE_SIZE) {
-			if (!hmm_pte_test_valid_pfn(hmm_pte) ||
+			if ((!hmm_pte_test_valid_pfn(hmm_pte) &&
+			     !hmm_pte_test_valid_dma(hmm_pte)) ||
 			    !hmm_pte_test_write(hmm_pte))
 				continue;
 			hmm_pte_set_dirty(hmm_pte);
-- 
2.4.3

WARNING: multiple messages have this Message-ID (diff)
From: "Jérôme Glisse" <jglisse@redhat.com>
To: akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Cc: "Linus Torvalds" <torvalds@linux-foundation.org>,
	joro@8bytes.org, "Mel Gorman" <mgorman@suse.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Andrea Arcangeli" <aarcange@redhat.com>,
	"Johannes Weiner" <jweiner@redhat.com>,
	"Larry Woodman" <lwoodman@redhat.com>,
	"Rik van Riel" <riel@redhat.com>,
	"Dave Airlie" <airlied@redhat.com>,
	"Brendan Conoboy" <blc@redhat.com>,
	"Joe Donohue" <jdonohue@redhat.com>,
	"Christophe Harle" <charle@nvidia.com>,
	"Duncan Poole" <dpoole@nvidia.com>,
	"Sherry Cheung" <SCheung@nvidia.com>,
	"Subhash Gutti" <sgutti@nvidia.com>,
	"John Hubbard" <jhubbard@nvidia.com>,
	"Mark Hairgrove" <mhairgrove@nvidia.com>,
	"Lucien Dunning" <ldunning@nvidia.com>,
	"Cameron Buschardt" <cabuschardt@nvidia.com>,
	"Arvind Gopalakrishnan" <arvindg@nvidia.com>,
	"Haggai Eran" <haggaie@mellanox.com>,
	"Shachar Raindel" <raindel@mellanox.com>,
	"Liran Liss" <liranl@mellanox.com>,
	"Roland Dreier" <roland@purestorage.com>,
	"Ben Sander" <ben.sander@amd.com>,
	"Greg Stoner" <Greg.Stoner@amd.com>,
	"John Bridgman" <John.Bridgman@amd.com>,
	"Michael Mantor" <Michael.Mantor@amd.com>,
	"Paul Blinzer" <Paul.Blinzer@amd.com>,
	"Leonid Shamis" <Leonid.Shamis@amd.com>,
	"Laurent Morichetti" <Laurent.Morichetti@amd.com>,
	"Alexander Deucher" <Alexander.Deucher@amd.com>,
	"Jérôme Glisse" <jglisse@redhat.com>
Subject: [PATCH v12 13/29] HMM: DMA map memory on behalf of device driver v2.
Date: Tue,  8 Mar 2016 15:43:06 -0500	[thread overview]
Message-ID: <1457469802-11850-14-git-send-email-jglisse@redhat.com> (raw)
In-Reply-To: <1457469802-11850-1-git-send-email-jglisse@redhat.com>

Do the DMA mapping on behalf of the device as HMM is a good place
to perform this common task. Moreover in the future we hope to
add new infrastructure that would make DMA mapping more efficient
(lower overhead per page) by leveraging HMM data structure.

Changed since v1:
  - Adapt to HMM page table changes.

Signed-off-by: JA(C)rA'me Glisse <jglisse@redhat.com>
---
 include/linux/hmm_pt.h |  11 +++
 mm/hmm.c               | 202 +++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 174 insertions(+), 39 deletions(-)

diff --git a/include/linux/hmm_pt.h b/include/linux/hmm_pt.h
index 4a8beb1..8a59a75 100644
--- a/include/linux/hmm_pt.h
+++ b/include/linux/hmm_pt.h
@@ -176,6 +176,17 @@ static inline dma_addr_t hmm_pte_from_pfn(dma_addr_t pfn)
 	return (pfn << PAGE_SHIFT) | (1 << HMM_PTE_VALID_PFN_BIT);
 }
 
+static inline dma_addr_t hmm_pte_from_dma_addr(dma_addr_t dma_addr)
+{
+	return (dma_addr & HMM_PTE_DMA_MASK) | (1 << HMM_PTE_VALID_DMA_BIT);
+}
+
+static inline dma_addr_t hmm_pte_dma_addr(dma_addr_t pte)
+{
+	/* FIXME Use max dma addr instead of 0 ? */
+	return hmm_pte_test_valid_dma(&pte) ? (pte & HMM_PTE_DMA_MASK) : 0;
+}
+
 static inline unsigned long hmm_pte_pfn(dma_addr_t pte)
 {
 	return hmm_pte_test_valid_pfn(&pte) ? pte >> PAGE_SHIFT : 0;
diff --git a/mm/hmm.c b/mm/hmm.c
index dc37e49..7cab6cb 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -41,6 +41,7 @@
 #include <linux/mman.h>
 #include <linux/delay.h>
 #include <linux/workqueue.h>
+#include <linux/dma-mapping.h>
 
 #include "internal.h"
 
@@ -577,6 +578,46 @@ static inline int hmm_mirror_update(struct hmm_mirror *mirror,
 	return ret;
 }
 
+static void hmm_mirror_update_pte(struct hmm_mirror *mirror,
+				  struct hmm_event *event,
+				  struct hmm_pt_iter *iter,
+				  struct mm_pt_iter *mm_iter,
+				  struct page *page,
+				  dma_addr_t *hmm_pte,
+				  unsigned long addr)
+{
+	bool dirty = hmm_pte_test_and_clear_dirty(hmm_pte);
+
+	if (hmm_pte_test_valid_pfn(hmm_pte)) {
+		*hmm_pte &= event->pte_mask;
+		if (!hmm_pte_test_valid_pfn(hmm_pte))
+			hmm_pt_iter_directory_unref(iter);
+		goto out;
+	}
+
+	if (!hmm_pte_test_valid_dma(hmm_pte))
+		return;
+
+	if (!hmm_pte_test_valid_dma(&event->pte_mask)) {
+		struct device *dev = mirror->device->dev;
+		dma_addr_t dma_addr;
+
+		dma_addr = hmm_pte_dma_addr(*hmm_pte);
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	*hmm_pte &= event->pte_mask;
+	if (!hmm_pte_test_valid_dma(hmm_pte))
+		hmm_pt_iter_directory_unref(iter);
+
+out:
+	if (dirty) {
+		page = page ? : mm_pt_iter_page(mm_iter, addr);
+		if (page)
+			set_page_dirty(page);
+	}
+}
+
 static void hmm_mirror_update_pt(struct hmm_mirror *mirror,
 				 struct hmm_event *event,
 				 struct page *page)
@@ -603,19 +644,9 @@ static void hmm_mirror_update_pt(struct hmm_mirror *mirror,
 		 */
 		hmm_pt_iter_directory_lock(&iter);
 		do {
-			if (!hmm_pte_test_valid_pfn(hmm_pte))
-				continue;
-			if (hmm_pte_test_and_clear_dirty(hmm_pte) &&
-			    hmm_pte_test_write(hmm_pte)) {
-				page = page ? : mm_pt_iter_page(&mm_iter, addr);
-				if (page)
-					set_page_dirty(page);
-				page = NULL;
-			}
-			*hmm_pte &= event->pte_mask;
-			if (hmm_pte_test_valid_pfn(hmm_pte))
-				continue;
-			hmm_pt_iter_directory_unref(&iter);
+			hmm_mirror_update_pte(mirror, event, &iter, &mm_iter,
+					      page, hmm_pte, addr);
+			page = NULL;
 		} while (addr += PAGE_SIZE, hmm_pte++, addr != next);
 		hmm_pt_iter_directory_unlock(&iter);
 	}
@@ -687,6 +718,9 @@ static int hmm_mirror_fault_hpmd(struct hmm_mirror *mirror,
 		 */
 		hmm_pt_iter_directory_lock(iter);
 		do {
+			if (hmm_pte_test_valid_dma(&hmm_pte[i]))
+				continue;
+
 			if (!hmm_pte_test_valid_pfn(&hmm_pte[i])) {
 				hmm_pte[i] = hmm_pte_from_pfn(pfn);
 				hmm_pt_iter_directory_ref(iter);
@@ -760,6 +794,9 @@ static int hmm_mirror_fault_pmd(pmd_t *pmdp,
 				break;
 			}
 
+			if (hmm_pte_test_valid_dma(&hmm_pte[i]))
+				continue;
+
 			if (!hmm_pte_test_valid_pfn(&hmm_pte[i])) {
 				hmm_pte[i] = hmm_pte_from_pfn(pte_pfn(*ptep));
 				hmm_pt_iter_directory_ref(iter);
@@ -776,6 +813,80 @@ static int hmm_mirror_fault_pmd(pmd_t *pmdp,
 	return ret;
 }
 
+static int hmm_mirror_dma_map(struct hmm_mirror *mirror,
+			      struct hmm_pt_iter *iter,
+			      unsigned long start,
+			      unsigned long end)
+{
+	struct device *dev = mirror->device->dev;
+	unsigned long addr;
+	int ret;
+
+	for (ret = 0, addr = start; !ret && addr < end;) {
+		unsigned long i = 0, next = end;
+		dma_addr_t *hmm_pte;
+
+		hmm_pte = hmm_pt_iter_populate(iter, addr, &next);
+		if (!hmm_pte)
+			return -ENOENT;
+
+		do {
+			dma_addr_t dma_addr, pte;
+			struct page *page;
+
+again:
+			pte = ACCESS_ONCE(hmm_pte[i]);
+			if (!hmm_pte_test_valid_pfn(&pte)) {
+				if (!hmm_pte_test_valid_dma(&pte)) {
+					ret = -ENOENT;
+					break;
+				}
+				continue;
+			}
+
+			page = pfn_to_page(hmm_pte_pfn(pte));
+			VM_BUG_ON(!page);
+			dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(dev, dma_addr)) {
+				ret = -ENOMEM;
+				break;
+			}
+
+			hmm_pt_iter_directory_lock(iter);
+			/*
+			 * Make sure we transfer the dirty bit. Note that there
+			 * might still be a window for another thread to set
+			 * the dirty bit before we check for pte equality. This
+			 * will just lead to a useless retry so it is not the
+			 * end of the world here.
+			 */
+			if (hmm_pte_test_dirty(&hmm_pte[i]))
+				hmm_pte_set_dirty(&pte);
+			if (ACCESS_ONCE(hmm_pte[i]) != pte) {
+				hmm_pt_iter_directory_unlock(iter);
+				dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+					       DMA_BIDIRECTIONAL);
+				if (hmm_pte_test_valid_pfn(&pte))
+					goto again;
+				if (!hmm_pte_test_valid_dma(&pte)) {
+					ret = -ENOENT;
+					break;
+				}
+			} else {
+				hmm_pte[i] = hmm_pte_from_dma_addr(dma_addr);
+				if (hmm_pte_test_write(&pte))
+					hmm_pte_set_write(&hmm_pte[i]);
+				if (hmm_pte_test_dirty(&pte))
+					hmm_pte_set_dirty(&hmm_pte[i]);
+				hmm_pt_iter_directory_unlock(iter);
+			}
+		} while (addr += PAGE_SIZE, i++, addr != next && !ret);
+	}
+
+	return ret;
+}
+
 static int hmm_mirror_handle_fault(struct hmm_mirror *mirror,
 				   struct hmm_event *event,
 				   struct vm_area_struct *vma,
@@ -784,7 +895,7 @@ static int hmm_mirror_handle_fault(struct hmm_mirror *mirror,
 	struct hmm_mirror_fault mirror_fault;
 	unsigned long addr = event->start;
 	struct mm_walk walk = {0};
-	int ret = 0;
+	int ret;
 
 	if ((event->etype == HMM_DEVICE_WFAULT) && !(vma->vm_flags & VM_WRITE))
 		return -EACCES;
@@ -793,33 +904,45 @@ static int hmm_mirror_handle_fault(struct hmm_mirror *mirror,
 	if (ret)
 		return ret;
 
-again:
-	if (event->backoff) {
-		ret = -EAGAIN;
-		goto out;
-	}
-	if (addr >= event->end)
-		goto out;
+	do {
+		if (event->backoff) {
+			ret = -EAGAIN;
+			break;
+		}
+		if (addr >= event->end)
+			break;
+
+		mirror_fault.event = event;
+		mirror_fault.mirror = mirror;
+		mirror_fault.vma = vma;
+		mirror_fault.addr = addr;
+		mirror_fault.iter = iter;
+		walk.mm = mirror->hmm->mm;
+		walk.private = &mirror_fault;
+		walk.pmd_entry = hmm_mirror_fault_pmd;
+		walk.pte_hole = hmm_pte_hole;
+		ret = walk_page_range(addr, event->end, &walk);
+		if (ret)
+			break;
+
+		if (event->backoff) {
+			ret = -EAGAIN;
+			break;
+		}
 
-	mirror_fault.event = event;
-	mirror_fault.mirror = mirror;
-	mirror_fault.vma = vma;
-	mirror_fault.addr = addr;
-	mirror_fault.iter = iter;
-	walk.mm = mirror->hmm->mm;
-	walk.private = &mirror_fault;
-	walk.pmd_entry = hmm_mirror_fault_pmd;
-	walk.pte_hole = hmm_pte_hole;
-	ret = walk_page_range(addr, event->end, &walk);
-	if (!ret) {
-		ret = mirror->device->ops->update(mirror, event);
-		if (!ret) {
-			addr = mirror_fault.addr;
-			goto again;
+		if (mirror->device->dev) {
+			ret = hmm_mirror_dma_map(mirror, iter,
+						 addr, event->end);
+			if (ret)
+				break;
 		}
-	}
 
-out:
+		ret = mirror->device->ops->update(mirror, event);
+		if (ret)
+			break;
+		addr = mirror_fault.addr;
+	} while (1);
+
 	hmm_device_fault_end(mirror->hmm, event);
 	if (ret == -ENOENT) {
 		ret = hmm_mm_fault(mirror->hmm, event, vma, addr);
@@ -973,7 +1096,8 @@ void hmm_mirror_range_dirty(struct hmm_mirror *mirror,
 
 		hmm_pte = hmm_pt_iter_walk(&iter, &addr, &next);
 		for (; hmm_pte && addr != next; hmm_pte++, addr += PAGE_SIZE) {
-			if (!hmm_pte_test_valid_pfn(hmm_pte) ||
+			if ((!hmm_pte_test_valid_pfn(hmm_pte) &&
+			     !hmm_pte_test_valid_dma(hmm_pte)) ||
 			    !hmm_pte_test_write(hmm_pte))
 				continue;
 			hmm_pte_set_dirty(hmm_pte);
-- 
2.4.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-03-08 19:53 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-08 20:42 HMM (Heterogeneous Memory Management) Jérôme Glisse
2016-03-08 20:42 ` Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 01/29] mmu_notifier: add event information to address invalidation v9 Jérôme Glisse
2016-03-08 20:42   ` Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 02/29] mmu_notifier: keep track of active invalidation ranges v5 Jérôme Glisse
2016-03-08 20:42   ` Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 03/29] mmu_notifier: pass page pointer to mmu_notifier_invalidate_page() v2 Jérôme Glisse
2016-03-08 20:42   ` Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 04/29] mmu_notifier: allow range invalidation to exclude a specific mmu_notifier Jérôme Glisse
2016-03-08 20:42   ` Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 05/29] HMM: introduce heterogeneous memory management v5 Jérôme Glisse
2016-03-08 20:42   ` Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 06/29] HMM: add HMM page table v4 Jérôme Glisse
2016-03-08 20:42   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 07/29] HMM: add per mirror " Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-29 22:58   ` John Hubbard
2016-03-29 22:58     ` John Hubbard
2016-03-08 20:43 ` [PATCH v12 08/29] HMM: add device page fault support v6 Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-23  6:52   ` Aneesh Kumar K.V
2016-03-23  6:52     ` Aneesh Kumar K.V
2016-03-23 10:09     ` Jerome Glisse
2016-03-23 10:09       ` Jerome Glisse
2016-03-23 10:29       ` Aneesh Kumar K.V
2016-03-23 10:29         ` Aneesh Kumar K.V
2016-03-23 11:25         ` Jerome Glisse
2016-03-23 11:25           ` Jerome Glisse
2016-03-08 20:43 ` [PATCH v12 09/29] HMM: add mm page table iterator helpers Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 10/29] HMM: use CPU page table during invalidation Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 11/29] HMM: add discard range helper (to clear and free resources for a range) Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 12/29] HMM: add dirty range helper (toggle dirty bit inside mirror page table) v2 Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` Jérôme Glisse [this message]
2016-03-08 20:43   ` [PATCH v12 13/29] HMM: DMA map memory on behalf of device driver v2 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 14/29] HMM: Add support for hugetlb Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 15/29] HMM: add documentation explaining HMM internals and how to use it Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 16/29] fork: pass the dst vma to copy_page_range() and its sub-functions Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 17/29] HMM: add special swap filetype for memory migrated to device v2 Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 18/29] HMM: add new HMM page table flag (valid device memory) Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 19/29] HMM: add new HMM page table flag (select flag) Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 20/29] HMM: handle HMM device page table entry on mirror page table fault and update Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 21/29] HMM: mm add helper to update page table when migrating memory back v2 Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-21 11:27   ` Aneesh Kumar K.V
2016-03-21 11:27     ` Aneesh Kumar K.V
2016-03-21 12:02     ` Jerome Glisse
2016-03-21 12:02       ` Jerome Glisse
2016-03-21 13:48       ` Aneesh Kumar K.V
2016-03-21 13:48         ` Aneesh Kumar K.V
2016-03-21 14:30         ` Jerome Glisse
2016-03-21 14:30           ` Jerome Glisse
2016-03-08 20:43 ` [PATCH v12 22/29] HMM: mm add helper to update page table when migrating memory v3 Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-21 14:24   ` Aneesh Kumar K.V
2016-03-21 14:24     ` Aneesh Kumar K.V
2016-03-08 20:43 ` [PATCH v12 23/29] HMM: new callback for copying memory from and to device memory v2 Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 24/29] HMM: allow to get pointer to spinlock protecting a directory Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 25/29] HMM: split DMA mapping function in two Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 26/29] HMM: add helpers for migration back to system memory v3 Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 27/29] HMM: fork copy migrated memory into system memory for child process Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 28/29] HMM: CPU page fault on migrated memory Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 29/29] HMM: add mirror fault support for system to device memory migration v3 Jérôme Glisse
2016-03-08 20:43   ` Jérôme Glisse
2016-03-08 22:02 ` HMM (Heterogeneous Memory Management) John Hubbard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1457469802-11850-14-git-send-email-jglisse@redhat.com \
    --to=jglisse@redhat.com \
    --cc=Alexander.Deucher@amd.com \
    --cc=Greg.Stoner@amd.com \
    --cc=John.Bridgman@amd.com \
    --cc=Laurent.Morichetti@amd.com \
    --cc=Leonid.Shamis@amd.com \
    --cc=Michael.Mantor@amd.com \
    --cc=Paul.Blinzer@amd.com \
    --cc=SCheung@nvidia.com \
    --cc=aarcange@redhat.com \
    --cc=airlied@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=arvindg@nvidia.com \
    --cc=ben.sander@amd.com \
    --cc=blc@redhat.com \
    --cc=cabuschardt@nvidia.com \
    --cc=charle@nvidia.com \
    --cc=dpoole@nvidia.com \
    --cc=haggaie@mellanox.com \
    --cc=hpa@zytor.com \
    --cc=jdonohue@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=jweiner@redhat.com \
    --cc=ldunning@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liranl@mellanox.com \
    --cc=lwoodman@redhat.com \
    --cc=mgorman@suse.de \
    --cc=mhairgrove@nvidia.com \
    --cc=peterz@infradead.org \
    --cc=raindel@mellanox.com \
    --cc=riel@redhat.com \
    --cc=roland@purestorage.com \
    --cc=sgutti@nvidia.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.