From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751745AbcCHTvX (ORCPT ); Tue, 8 Mar 2016 14:51:23 -0500 Received: from mx1.redhat.com ([209.132.183.28]:36453 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751504AbcCHTrv (ORCPT ); Tue, 8 Mar 2016 14:47:51 -0500 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= To: akpm@linux-foundation.org, , linux-mm@kvack.org Cc: Linus Torvalds , , Mel Gorman , "H. Peter Anvin" , Peter Zijlstra , Andrea Arcangeli , Johannes Weiner , Larry Woodman , Rik van Riel , Dave Airlie , Brendan Conoboy , Joe Donohue , Christophe Harle , Duncan Poole , Sherry Cheung , Subhash Gutti , John Hubbard , Mark Hairgrove , Lucien Dunning , Cameron Buschardt , Arvind Gopalakrishnan , Haggai Eran , Shachar Raindel , Liran Liss , Roland Dreier , Ben Sander , Greg Stoner , John Bridgman , Michael Mantor , Paul Blinzer , Leonid Shamis , Laurent Morichetti , Alexander Deucher , =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= , Jatin Kumar Subject: [PATCH v12 26/29] HMM: add helpers for migration back to system memory v3. Date: Tue, 8 Mar 2016 15:43:19 -0500 Message-Id: <1457469802-11850-27-git-send-email-jglisse@redhat.com> In-Reply-To: <1457469802-11850-1-git-send-email-jglisse@redhat.com> References: <1457469802-11850-1-git-send-email-jglisse@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org This patch add all necessary functions and helpers for migration from device memory back to system memory. They are 3 differents case that would use that code : - CPU page fault - fork - device driver request Note that this patch use regular memory accounting this means that migration can fail as a result of memory cgroup resource exhaustion. Latter patches will modify memcg to allow to keep remote memory accounted as regular memory thus removing this point of failure. Changed since v1: - Fixed logic in dma unmap code path on migration error. Changed since v2: - Adapt to HMM page table changes. - Fix bug in migration failure code path. Signed-off-by: Jérôme Glisse Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Signed-off-by: Mark Hairgrove Signed-off-by: John Hubbard Signed-off-by: Jatin Kumar --- mm/hmm.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/mm/hmm.c b/mm/hmm.c index 07f1ab6..435e376 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -47,6 +47,12 @@ static struct mmu_notifier_ops hmm_notifier_ops; static void hmm_mirror_kill(struct hmm_mirror *mirror); +static int hmm_mirror_migrate_back(struct hmm_mirror *mirror, + struct hmm_event *event, + pte_t *new_pte, + dma_addr_t *dst, + unsigned long start, + unsigned long end); static inline int hmm_mirror_update(struct hmm_mirror *mirror, struct hmm_event *event, struct page *page); @@ -421,6 +427,46 @@ static struct mmu_notifier_ops hmm_notifier_ops = { }; +static int hmm_migrate_back(struct hmm *hmm, + struct hmm_event *event, + struct mm_struct *mm, + struct vm_area_struct *vma, + pte_t *new_pte, + dma_addr_t *dst, + unsigned long start, + unsigned long end) +{ + struct hmm_mirror *mirror; + int r, ret; + + /* + * Do not return right away on error, as there might be valid page we + * can migrate. + */ + ret = mm_hmm_migrate_back(mm, vma, new_pte, start, end); + +again: + down_read(&hmm->rwsem); + hlist_for_each_entry(mirror, &hmm->mirrors, mlist) { + r = hmm_mirror_migrate_back(mirror, event, new_pte, + dst, start, end); + if (r) { + ret = ret ? ret : r; + mirror = hmm_mirror_ref(mirror); + BUG_ON(!mirror); + up_read(&hmm->rwsem); + hmm_mirror_kill(mirror); + hmm_mirror_unref(&mirror); + goto again; + } + } + up_read(&hmm->rwsem); + + mm_hmm_migrate_back_cleanup(mm, vma, new_pte, dst, start, end); + + return ret; +} + int hmm_handle_cpu_fault(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmdp, unsigned long addr, @@ -1153,6 +1199,111 @@ out: } EXPORT_SYMBOL(hmm_mirror_fault); +static int hmm_mirror_migrate_back(struct hmm_mirror *mirror, + struct hmm_event *event, + pte_t *new_pte, + dma_addr_t *dst, + unsigned long start, + unsigned long end) +{ + unsigned long addr, i, npages = (end - start) >> PAGE_SHIFT; + struct hmm_device *device = mirror->device; + struct device *dev = mirror->device->dev; + struct hmm_pt_iter iter; + int r, ret = 0; + + hmm_pt_iter_init(&iter, &mirror->pt); + for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { + unsigned long next = end; + dma_addr_t *hmm_pte; + + hmm_pte_clear_select(&dst[i]); + + if (!pte_present(new_pte[i])) + continue; + hmm_pte = hmm_pt_iter_lookup(&iter, addr, &next); + if (!hmm_pte) + continue; + + if (!hmm_pte_test_valid_dev(hmm_pte)) + continue; + + dst[i] = hmm_pte_from_pfn(pte_pfn(new_pte[i])); + hmm_pte_set_select(&dst[i]); + hmm_pte_set_write(&dst[i]); + } + + if (dev) { + ret = hmm_mirror_dma_map_range(mirror, dst, NULL, npages); + if (ret) { + for (i = 0; i < npages; ++i) { + if (!hmm_pte_test_select(&dst[i])) + continue; + if (hmm_pte_test_valid_dma(&dst[i])) + continue; + dst[i] = 0; + } + } + } + + r = device->ops->copy_from_device(mirror, event, dst, start, end); + + /* Update mirror page table with successfully migrated entry. */ + for (addr = start; addr < end;) { + unsigned long idx, next = end, npages; + dma_addr_t *hmm_pte; + + hmm_pte = hmm_pt_iter_walk(&iter, &addr, &next); + if (!hmm_pte) + continue; + idx = (addr - event->start) >> PAGE_SHIFT; + npages = (next - addr) >> PAGE_SHIFT; + hmm_pt_iter_directory_lock(&iter); + for (i = 0; i < npages; i++, idx++) { + if (!hmm_pte_test_valid_pfn(&dst[idx]) && + !hmm_pte_test_valid_dma(&dst[idx])) { + if (hmm_pte_test_valid_dev(&hmm_pte[i])) { + hmm_pte[i] = 0; + hmm_pt_iter_directory_unref(&iter); + } + continue; + } + + VM_BUG_ON(!hmm_pte_test_select(&dst[idx])); + VM_BUG_ON(!hmm_pte_test_valid_dev(&hmm_pte[i])); + hmm_pte[i] = dst[idx]; + } + hmm_pt_iter_directory_unlock(&iter); + + /* DMA unmap failed migrate entry. */ + if (dev) { + idx = (addr - event->start) >> PAGE_SHIFT; + for (i = 0; i < npages; i++, idx++) { + dma_addr_t dma_addr; + + /* + * Failed entry have the valid bit clear but + * the select bit remain set. + */ + if (!hmm_pte_test_select(&dst[idx]) || + hmm_pte_test_valid_dma(&dst[i])) + continue; + + hmm_pte_set_valid_dma(&dst[idx]); + dma_addr = hmm_pte_dma_addr(dst[idx]); + dma_unmap_page(dev, dma_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + dst[idx] = 0; + } + } + + addr = next; + } + hmm_pt_iter_fini(&iter); + + return ret ? ret : r; +} + /* hmm_mirror_range_discard() - discard a range of address. * * @mirror: The mirror struct. -- 2.4.3 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f52.google.com (mail-qg0-f52.google.com [209.85.192.52]) by kanga.kvack.org (Postfix) with ESMTP id 68DB6828E6 for ; Tue, 8 Mar 2016 14:47:52 -0500 (EST) Received: by mail-qg0-f52.google.com with SMTP id u110so21993260qge.3 for ; Tue, 08 Mar 2016 11:47:52 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id d10si4496089qgf.109.2016.03.08.11.47.51 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Mar 2016 11:47:51 -0800 (PST) From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Subject: [PATCH v12 26/29] HMM: add helpers for migration back to system memory v3. Date: Tue, 8 Mar 2016 15:43:19 -0500 Message-Id: <1457469802-11850-27-git-send-email-jglisse@redhat.com> In-Reply-To: <1457469802-11850-1-git-send-email-jglisse@redhat.com> References: <1457469802-11850-1-git-send-email-jglisse@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org List-ID: To: akpm@linux-foundation.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org Cc: Linus Torvalds , joro@8bytes.org, Mel Gorman , "H. Peter Anvin" , Peter Zijlstra , Andrea Arcangeli , Johannes Weiner , Larry Woodman , Rik van Riel , Dave Airlie , Brendan Conoboy , Joe Donohue , Christophe Harle , Duncan Poole , Sherry Cheung , Subhash Gutti , John Hubbard , Mark Hairgrove , Lucien Dunning , Cameron Buschardt , Arvind Gopalakrishnan , Haggai Eran , Shachar Raindel , Liran Liss , Roland Dreier , Ben Sander , Greg Stoner , John Bridgman , Michael Mantor , Paul Blinzer , Leonid Shamis , Laurent Morichetti , Alexander Deucher , =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= , Jatin Kumar This patch add all necessary functions and helpers for migration from device memory back to system memory. They are 3 differents case that would use that code : - CPU page fault - fork - device driver request Note that this patch use regular memory accounting this means that migration can fail as a result of memory cgroup resource exhaustion. Latter patches will modify memcg to allow to keep remote memory accounted as regular memory thus removing this point of failure. Changed since v1: - Fixed logic in dma unmap code path on migration error. Changed since v2: - Adapt to HMM page table changes. - Fix bug in migration failure code path. Signed-off-by: JA(C)rA'me Glisse Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Signed-off-by: Mark Hairgrove Signed-off-by: John Hubbard Signed-off-by: Jatin Kumar --- mm/hmm.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/mm/hmm.c b/mm/hmm.c index 07f1ab6..435e376 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -47,6 +47,12 @@ static struct mmu_notifier_ops hmm_notifier_ops; static void hmm_mirror_kill(struct hmm_mirror *mirror); +static int hmm_mirror_migrate_back(struct hmm_mirror *mirror, + struct hmm_event *event, + pte_t *new_pte, + dma_addr_t *dst, + unsigned long start, + unsigned long end); static inline int hmm_mirror_update(struct hmm_mirror *mirror, struct hmm_event *event, struct page *page); @@ -421,6 +427,46 @@ static struct mmu_notifier_ops hmm_notifier_ops = { }; +static int hmm_migrate_back(struct hmm *hmm, + struct hmm_event *event, + struct mm_struct *mm, + struct vm_area_struct *vma, + pte_t *new_pte, + dma_addr_t *dst, + unsigned long start, + unsigned long end) +{ + struct hmm_mirror *mirror; + int r, ret; + + /* + * Do not return right away on error, as there might be valid page we + * can migrate. + */ + ret = mm_hmm_migrate_back(mm, vma, new_pte, start, end); + +again: + down_read(&hmm->rwsem); + hlist_for_each_entry(mirror, &hmm->mirrors, mlist) { + r = hmm_mirror_migrate_back(mirror, event, new_pte, + dst, start, end); + if (r) { + ret = ret ? ret : r; + mirror = hmm_mirror_ref(mirror); + BUG_ON(!mirror); + up_read(&hmm->rwsem); + hmm_mirror_kill(mirror); + hmm_mirror_unref(&mirror); + goto again; + } + } + up_read(&hmm->rwsem); + + mm_hmm_migrate_back_cleanup(mm, vma, new_pte, dst, start, end); + + return ret; +} + int hmm_handle_cpu_fault(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmdp, unsigned long addr, @@ -1153,6 +1199,111 @@ out: } EXPORT_SYMBOL(hmm_mirror_fault); +static int hmm_mirror_migrate_back(struct hmm_mirror *mirror, + struct hmm_event *event, + pte_t *new_pte, + dma_addr_t *dst, + unsigned long start, + unsigned long end) +{ + unsigned long addr, i, npages = (end - start) >> PAGE_SHIFT; + struct hmm_device *device = mirror->device; + struct device *dev = mirror->device->dev; + struct hmm_pt_iter iter; + int r, ret = 0; + + hmm_pt_iter_init(&iter, &mirror->pt); + for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { + unsigned long next = end; + dma_addr_t *hmm_pte; + + hmm_pte_clear_select(&dst[i]); + + if (!pte_present(new_pte[i])) + continue; + hmm_pte = hmm_pt_iter_lookup(&iter, addr, &next); + if (!hmm_pte) + continue; + + if (!hmm_pte_test_valid_dev(hmm_pte)) + continue; + + dst[i] = hmm_pte_from_pfn(pte_pfn(new_pte[i])); + hmm_pte_set_select(&dst[i]); + hmm_pte_set_write(&dst[i]); + } + + if (dev) { + ret = hmm_mirror_dma_map_range(mirror, dst, NULL, npages); + if (ret) { + for (i = 0; i < npages; ++i) { + if (!hmm_pte_test_select(&dst[i])) + continue; + if (hmm_pte_test_valid_dma(&dst[i])) + continue; + dst[i] = 0; + } + } + } + + r = device->ops->copy_from_device(mirror, event, dst, start, end); + + /* Update mirror page table with successfully migrated entry. */ + for (addr = start; addr < end;) { + unsigned long idx, next = end, npages; + dma_addr_t *hmm_pte; + + hmm_pte = hmm_pt_iter_walk(&iter, &addr, &next); + if (!hmm_pte) + continue; + idx = (addr - event->start) >> PAGE_SHIFT; + npages = (next - addr) >> PAGE_SHIFT; + hmm_pt_iter_directory_lock(&iter); + for (i = 0; i < npages; i++, idx++) { + if (!hmm_pte_test_valid_pfn(&dst[idx]) && + !hmm_pte_test_valid_dma(&dst[idx])) { + if (hmm_pte_test_valid_dev(&hmm_pte[i])) { + hmm_pte[i] = 0; + hmm_pt_iter_directory_unref(&iter); + } + continue; + } + + VM_BUG_ON(!hmm_pte_test_select(&dst[idx])); + VM_BUG_ON(!hmm_pte_test_valid_dev(&hmm_pte[i])); + hmm_pte[i] = dst[idx]; + } + hmm_pt_iter_directory_unlock(&iter); + + /* DMA unmap failed migrate entry. */ + if (dev) { + idx = (addr - event->start) >> PAGE_SHIFT; + for (i = 0; i < npages; i++, idx++) { + dma_addr_t dma_addr; + + /* + * Failed entry have the valid bit clear but + * the select bit remain set. + */ + if (!hmm_pte_test_select(&dst[idx]) || + hmm_pte_test_valid_dma(&dst[i])) + continue; + + hmm_pte_set_valid_dma(&dst[idx]); + dma_addr = hmm_pte_dma_addr(dst[idx]); + dma_unmap_page(dev, dma_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + dst[idx] = 0; + } + } + + addr = next; + } + hmm_pt_iter_fini(&iter); + + return ret ? ret : r; +} + /* hmm_mirror_range_discard() - discard a range of address. * * @mirror: The mirror struct. -- 2.4.3 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org