From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751915AbcCHTtd (ORCPT ); Tue, 8 Mar 2016 14:49:33 -0500 Received: from mx1.redhat.com ([209.132.183.28]:54069 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751562AbcCHTsA (ORCPT ); Tue, 8 Mar 2016 14:48:00 -0500 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= To: akpm@linux-foundation.org, , linux-mm@kvack.org Cc: Linus Torvalds , , Mel Gorman , "H. Peter Anvin" , Peter Zijlstra , Andrea Arcangeli , Johannes Weiner , Larry Woodman , Rik van Riel , Dave Airlie , Brendan Conoboy , Joe Donohue , Christophe Harle , Duncan Poole , Sherry Cheung , Subhash Gutti , John Hubbard , Mark Hairgrove , Lucien Dunning , Cameron Buschardt , Arvind Gopalakrishnan , Haggai Eran , Shachar Raindel , Liran Liss , Roland Dreier , Ben Sander , Greg Stoner , John Bridgman , Michael Mantor , Paul Blinzer , Leonid Shamis , Laurent Morichetti , Alexander Deucher , =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= , Jatin Kumar Subject: [PATCH v12 29/29] HMM: add mirror fault support for system to device memory migration v3. Date: Tue, 8 Mar 2016 15:43:22 -0500 Message-Id: <1457469802-11850-30-git-send-email-jglisse@redhat.com> In-Reply-To: <1457469802-11850-1-git-send-email-jglisse@redhat.com> References: <1457469802-11850-1-git-send-email-jglisse@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.38]); Tue, 08 Mar 2016 19:48:00 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Migration to device memory is done as a special kind of device mirror fault. Memory migration being initiated by device driver and never by HMM (unless it is a migration back to system memory). Changed since v1: - Adapt to HMM page table changes. Changed since v2: - Fix error code path for migration, calling mm_hmm_migrate_cleanup() is wrong. Signed-off-by: Jérôme Glisse Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Signed-off-by: Mark Hairgrove Signed-off-by: John Hubbard Signed-off-by: Jatin Kumar --- mm/hmm.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/mm/hmm.c b/mm/hmm.c index 38943a7..41637a3 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -53,6 +53,10 @@ static int hmm_mirror_migrate_back(struct hmm_mirror *mirror, dma_addr_t *dst, unsigned long start, unsigned long end); +static int hmm_mirror_migrate(struct hmm_mirror *mirror, + struct hmm_event *event, + struct vm_area_struct *vma, + struct hmm_pt_iter *iter); static inline int hmm_mirror_update(struct hmm_mirror *mirror, struct hmm_event *event, struct page *page); @@ -101,6 +105,12 @@ static inline int hmm_event_init(struct hmm_event *event, return 0; } +static inline unsigned long hmm_event_npages(const struct hmm_event *event) +{ + return (PAGE_ALIGN(event->end) - (event->start & PAGE_MASK)) >> + PAGE_SHIFT; +} + /* hmm - core HMM functions. * @@ -1255,6 +1265,9 @@ retry: } switch (event->etype) { + case HMM_COPY_TO_DEVICE: + ret = hmm_mirror_migrate(mirror, event, vma, &iter); + break; case HMM_DEVICE_WFAULT: if (!(vma->vm_flags & VM_WRITE)) { ret = -EFAULT; @@ -1392,6 +1405,163 @@ static int hmm_mirror_migrate_back(struct hmm_mirror *mirror, return ret ? ret : r; } +static int hmm_mirror_migrate(struct hmm_mirror *mirror, + struct hmm_event *event, + struct vm_area_struct *vma, + struct hmm_pt_iter *iter) +{ + struct hmm_device *device = mirror->device; + struct hmm *hmm = mirror->hmm; + struct hmm_event invalidate; + unsigned long addr, npages; + struct hmm_mirror *tmp; + dma_addr_t *dst; + pte_t *save_pte; + int r = 0, ret; + + /* Only allow migration of private anonymous memory. */ + if (vma->vm_ops || unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) + return -EINVAL; + + /* + * TODO More advance loop for splitting migration into several chunk. + * For now limit the amount that can be migrated in one shot. Also we + * would need to see if we need rescheduling if this is happening as + * part of system call to the device driver. + */ + npages = hmm_event_npages(event); + if (npages * max(sizeof(*dst), sizeof(*save_pte)) > PAGE_SIZE) + return -EINVAL; + dst = kcalloc(npages, sizeof(*dst), GFP_KERNEL); + if (dst == NULL) + return -ENOMEM; + save_pte = kcalloc(npages, sizeof(*save_pte), GFP_KERNEL); + if (save_pte == NULL) { + ret = -ENOMEM; + goto out; + } + + ret = mm_hmm_migrate(hmm->mm, vma, save_pte, &event->backoff, + &hmm->mmu_notifier, event->start, event->end); + if (ret == -EAGAIN) + goto out; + if (ret) + goto out; + + /* + * Now invalidate for all other device, note that they can not race + * with us as the CPU page table is full of special entry. + */ + hmm_event_init(&invalidate, mirror->hmm, event->start, + event->end, HMM_MIGRATE); +again: + down_read(&hmm->rwsem); + hlist_for_each_entry(tmp, &hmm->mirrors, mlist) { + if (tmp == mirror) + continue; + if (hmm_mirror_update(tmp, &invalidate, NULL)) { + hmm_mirror_ref(tmp); + up_read(&hmm->rwsem); + hmm_mirror_kill(tmp); + hmm_mirror_unref(&tmp); + goto again; + } + } + up_read(&hmm->rwsem); + + /* + * Populate the mirror page table with saved entry and also mark entry + * that can be migrated. + */ + for (addr = event->start; addr < event->end;) { + unsigned long i, idx, next = event->end, npages; + dma_addr_t *hmm_pte; + + hmm_pte = hmm_pt_iter_populate(iter, addr, &next); + if (!hmm_pte) { + ret = -ENOMEM; + goto out_cleanup; + } + + npages = (next - addr) >> PAGE_SHIFT; + idx = (addr - event->start) >> PAGE_SHIFT; + hmm_pt_iter_directory_lock(iter); + for (i = 0; i < npages; i++, idx++) { + hmm_pte_clear_select(&hmm_pte[i]); + if (!pte_present(save_pte[idx])) + continue; + hmm_pte_set_select(&hmm_pte[i]); + /* This can not be a valid device entry here. */ + VM_BUG_ON(hmm_pte_test_valid_dev(&hmm_pte[i])); + if (hmm_pte_test_valid_dma(&hmm_pte[i])) + continue; + + if (hmm_pte_test_valid_pfn(&hmm_pte[i])) + continue; + + hmm_pt_iter_directory_ref(iter); + hmm_pte[i] = hmm_pte_from_pfn(pte_pfn(save_pte[idx])); + if (pte_write(save_pte[idx])) + hmm_pte_set_write(&hmm_pte[i]); + hmm_pte_set_select(&hmm_pte[i]); + } + hmm_pt_iter_directory_unlock(iter); + + if (device->dev) { + spinlock_t *lock; + + lock = hmm_pt_iter_directory_lock_ptr(iter); + ret = hmm_mirror_dma_map_range(mirror, hmm_pte, + lock, npages); + /* Keep going only for entry that have been mapped. */ + if (ret) { + for (i = 0; i < npages; ++i) { + if (!hmm_pte_test_select(&dst[i])) + continue; + if (hmm_pte_test_valid_dma(&dst[i])) + continue; + hmm_pte_clear_select(&hmm_pte[i]); + } + } + } + addr = next; + } + + /* Now Waldo we can do the copy. */ + r = device->ops->copy_to_device(mirror, event, vma, dst, + event->start, event->end); + + /* Update mirror page table with successfully migrated entry. */ + for (addr = event->start; addr < event->end;) { + unsigned long i, idx, next = event->end, npages; + dma_addr_t *hmm_pte; + + hmm_pte = hmm_pt_iter_walk(iter, &addr, &next); + if (!hmm_pte) + continue; + npages = (next - addr) >> PAGE_SHIFT; + idx = (addr - event->start) >> PAGE_SHIFT; + hmm_pt_iter_directory_lock(iter); + for (i = 0; i < npages; i++, idx++) { + if (!hmm_pte_test_valid_dev(&dst[idx])) + continue; + + VM_BUG_ON(!hmm_pte_test_select(&hmm_pte[i])); + hmm_pte[i] = dst[idx]; + } + hmm_pt_iter_directory_unlock(iter); + addr = next; + } + +out_cleanup: + mm_hmm_migrate_cleanup(hmm->mm, vma, save_pte, dst, + event->start, event->end); +out: + kfree(save_pte); + kfree(dst); + return ret ? ret : r; +} + /* hmm_mirror_range_discard() - discard a range of address. * * @mirror: The mirror struct. -- 2.4.3 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f49.google.com (mail-qg0-f49.google.com [209.85.192.49]) by kanga.kvack.org (Postfix) with ESMTP id 2D8D3828E6 for ; Tue, 8 Mar 2016 14:48:02 -0500 (EST) Received: by mail-qg0-f49.google.com with SMTP id w104so22061764qge.1 for ; Tue, 08 Mar 2016 11:48:02 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id 30si4541723qgt.32.2016.03.08.11.48.01 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Mar 2016 11:48:01 -0800 (PST) From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Subject: [PATCH v12 29/29] HMM: add mirror fault support for system to device memory migration v3. Date: Tue, 8 Mar 2016 15:43:22 -0500 Message-Id: <1457469802-11850-30-git-send-email-jglisse@redhat.com> In-Reply-To: <1457469802-11850-1-git-send-email-jglisse@redhat.com> References: <1457469802-11850-1-git-send-email-jglisse@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org List-ID: To: akpm@linux-foundation.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org Cc: Linus Torvalds , joro@8bytes.org, Mel Gorman , "H. Peter Anvin" , Peter Zijlstra , Andrea Arcangeli , Johannes Weiner , Larry Woodman , Rik van Riel , Dave Airlie , Brendan Conoboy , Joe Donohue , Christophe Harle , Duncan Poole , Sherry Cheung , Subhash Gutti , John Hubbard , Mark Hairgrove , Lucien Dunning , Cameron Buschardt , Arvind Gopalakrishnan , Haggai Eran , Shachar Raindel , Liran Liss , Roland Dreier , Ben Sander , Greg Stoner , John Bridgman , Michael Mantor , Paul Blinzer , Leonid Shamis , Laurent Morichetti , Alexander Deucher , =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= , Jatin Kumar Migration to device memory is done as a special kind of device mirror fault. Memory migration being initiated by device driver and never by HMM (unless it is a migration back to system memory). Changed since v1: - Adapt to HMM page table changes. Changed since v2: - Fix error code path for migration, calling mm_hmm_migrate_cleanup() is wrong. Signed-off-by: JA(C)rA'me Glisse Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Signed-off-by: Mark Hairgrove Signed-off-by: John Hubbard Signed-off-by: Jatin Kumar --- mm/hmm.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/mm/hmm.c b/mm/hmm.c index 38943a7..41637a3 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -53,6 +53,10 @@ static int hmm_mirror_migrate_back(struct hmm_mirror *mirror, dma_addr_t *dst, unsigned long start, unsigned long end); +static int hmm_mirror_migrate(struct hmm_mirror *mirror, + struct hmm_event *event, + struct vm_area_struct *vma, + struct hmm_pt_iter *iter); static inline int hmm_mirror_update(struct hmm_mirror *mirror, struct hmm_event *event, struct page *page); @@ -101,6 +105,12 @@ static inline int hmm_event_init(struct hmm_event *event, return 0; } +static inline unsigned long hmm_event_npages(const struct hmm_event *event) +{ + return (PAGE_ALIGN(event->end) - (event->start & PAGE_MASK)) >> + PAGE_SHIFT; +} + /* hmm - core HMM functions. * @@ -1255,6 +1265,9 @@ retry: } switch (event->etype) { + case HMM_COPY_TO_DEVICE: + ret = hmm_mirror_migrate(mirror, event, vma, &iter); + break; case HMM_DEVICE_WFAULT: if (!(vma->vm_flags & VM_WRITE)) { ret = -EFAULT; @@ -1392,6 +1405,163 @@ static int hmm_mirror_migrate_back(struct hmm_mirror *mirror, return ret ? ret : r; } +static int hmm_mirror_migrate(struct hmm_mirror *mirror, + struct hmm_event *event, + struct vm_area_struct *vma, + struct hmm_pt_iter *iter) +{ + struct hmm_device *device = mirror->device; + struct hmm *hmm = mirror->hmm; + struct hmm_event invalidate; + unsigned long addr, npages; + struct hmm_mirror *tmp; + dma_addr_t *dst; + pte_t *save_pte; + int r = 0, ret; + + /* Only allow migration of private anonymous memory. */ + if (vma->vm_ops || unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) + return -EINVAL; + + /* + * TODO More advance loop for splitting migration into several chunk. + * For now limit the amount that can be migrated in one shot. Also we + * would need to see if we need rescheduling if this is happening as + * part of system call to the device driver. + */ + npages = hmm_event_npages(event); + if (npages * max(sizeof(*dst), sizeof(*save_pte)) > PAGE_SIZE) + return -EINVAL; + dst = kcalloc(npages, sizeof(*dst), GFP_KERNEL); + if (dst == NULL) + return -ENOMEM; + save_pte = kcalloc(npages, sizeof(*save_pte), GFP_KERNEL); + if (save_pte == NULL) { + ret = -ENOMEM; + goto out; + } + + ret = mm_hmm_migrate(hmm->mm, vma, save_pte, &event->backoff, + &hmm->mmu_notifier, event->start, event->end); + if (ret == -EAGAIN) + goto out; + if (ret) + goto out; + + /* + * Now invalidate for all other device, note that they can not race + * with us as the CPU page table is full of special entry. + */ + hmm_event_init(&invalidate, mirror->hmm, event->start, + event->end, HMM_MIGRATE); +again: + down_read(&hmm->rwsem); + hlist_for_each_entry(tmp, &hmm->mirrors, mlist) { + if (tmp == mirror) + continue; + if (hmm_mirror_update(tmp, &invalidate, NULL)) { + hmm_mirror_ref(tmp); + up_read(&hmm->rwsem); + hmm_mirror_kill(tmp); + hmm_mirror_unref(&tmp); + goto again; + } + } + up_read(&hmm->rwsem); + + /* + * Populate the mirror page table with saved entry and also mark entry + * that can be migrated. + */ + for (addr = event->start; addr < event->end;) { + unsigned long i, idx, next = event->end, npages; + dma_addr_t *hmm_pte; + + hmm_pte = hmm_pt_iter_populate(iter, addr, &next); + if (!hmm_pte) { + ret = -ENOMEM; + goto out_cleanup; + } + + npages = (next - addr) >> PAGE_SHIFT; + idx = (addr - event->start) >> PAGE_SHIFT; + hmm_pt_iter_directory_lock(iter); + for (i = 0; i < npages; i++, idx++) { + hmm_pte_clear_select(&hmm_pte[i]); + if (!pte_present(save_pte[idx])) + continue; + hmm_pte_set_select(&hmm_pte[i]); + /* This can not be a valid device entry here. */ + VM_BUG_ON(hmm_pte_test_valid_dev(&hmm_pte[i])); + if (hmm_pte_test_valid_dma(&hmm_pte[i])) + continue; + + if (hmm_pte_test_valid_pfn(&hmm_pte[i])) + continue; + + hmm_pt_iter_directory_ref(iter); + hmm_pte[i] = hmm_pte_from_pfn(pte_pfn(save_pte[idx])); + if (pte_write(save_pte[idx])) + hmm_pte_set_write(&hmm_pte[i]); + hmm_pte_set_select(&hmm_pte[i]); + } + hmm_pt_iter_directory_unlock(iter); + + if (device->dev) { + spinlock_t *lock; + + lock = hmm_pt_iter_directory_lock_ptr(iter); + ret = hmm_mirror_dma_map_range(mirror, hmm_pte, + lock, npages); + /* Keep going only for entry that have been mapped. */ + if (ret) { + for (i = 0; i < npages; ++i) { + if (!hmm_pte_test_select(&dst[i])) + continue; + if (hmm_pte_test_valid_dma(&dst[i])) + continue; + hmm_pte_clear_select(&hmm_pte[i]); + } + } + } + addr = next; + } + + /* Now Waldo we can do the copy. */ + r = device->ops->copy_to_device(mirror, event, vma, dst, + event->start, event->end); + + /* Update mirror page table with successfully migrated entry. */ + for (addr = event->start; addr < event->end;) { + unsigned long i, idx, next = event->end, npages; + dma_addr_t *hmm_pte; + + hmm_pte = hmm_pt_iter_walk(iter, &addr, &next); + if (!hmm_pte) + continue; + npages = (next - addr) >> PAGE_SHIFT; + idx = (addr - event->start) >> PAGE_SHIFT; + hmm_pt_iter_directory_lock(iter); + for (i = 0; i < npages; i++, idx++) { + if (!hmm_pte_test_valid_dev(&dst[idx])) + continue; + + VM_BUG_ON(!hmm_pte_test_select(&hmm_pte[i])); + hmm_pte[i] = dst[idx]; + } + hmm_pt_iter_directory_unlock(iter); + addr = next; + } + +out_cleanup: + mm_hmm_migrate_cleanup(hmm->mm, vma, save_pte, dst, + event->start, event->end); +out: + kfree(save_pte); + kfree(dst); + return ret ? ret : r; +} + /* hmm_mirror_range_discard() - discard a range of address. * * @mirror: The mirror struct. -- 2.4.3 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org