From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752301AbcKRRUJ (ORCPT ); Fri, 18 Nov 2016 12:20:09 -0500 Received: from mx1.redhat.com ([209.132.183.28]:49980 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932221AbcKRRRx (ORCPT ); Fri, 18 Nov 2016 12:17:53 -0500 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= To: akpm@linux-foundation.org, , linux-mm@kvack.org Cc: John Hubbard , =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= , Jatin Kumar , Mark Hairgrove , Sherry Cheung , Subhash Gutti Subject: [HMM v13 11/18] mm/hmm/mirror: add range monitor helper, to monitor CPU page table update Date: Fri, 18 Nov 2016 13:18:20 -0500 Message-Id: <1479493107-982-12-git-send-email-jglisse@redhat.com> In-Reply-To: <1479493107-982-1-git-send-email-jglisse@redhat.com> References: <1479493107-982-1-git-send-email-jglisse@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Fri, 18 Nov 2016 17:17:53 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Complement the hmm_vma_range_lock/unlock() mechanism with a range monitor that do not block CPU page table invalidation and thus do not garanty forward progress. It is still usefull as in many situations concurrent CPU page table update and CPU snapshot are taking place in different region of the virtual address space. Signed-off-by: Jérôme Glisse Signed-off-by: Jatin Kumar Signed-off-by: John Hubbard Signed-off-by: Mark Hairgrove Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti --- include/linux/hmm.h | 18 ++++++++++ mm/hmm.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/include/linux/hmm.h b/include/linux/hmm.h index c0b1c07..6571647 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -254,6 +254,24 @@ int hmm_vma_range_lock(struct hmm_range *range, void hmm_vma_range_unlock(struct hmm_range *range); +/* + * Monitoring a range allow to track any CPU page table modification that can + * affect the range. It complements the hmm_vma_range_lock/unlock() mechanism + * as a non blocking method for synchronizing device page table with the CPU + * page table. See functions description in mm/hmm.c for documentation. + * + * NOTE AFTER A CALL TO hmm_vma_range_monitor_start() THAT RETURNED TRUE YOU + * MUST MAKE A CALL TO hmm_vma_range_monitor_end() BEFORE FREEING THE RANGE + * STRUCT OR BAD THING WILL HAPPEN ! + */ +bool hmm_vma_range_monitor_start(struct hmm_range *range, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + bool wait); +bool hmm_vma_range_monitor_end(struct hmm_range *range); + + /* Below are for HMM internal use only ! Not to be use by device driver ! */ void hmm_mm_destroy(struct mm_struct *mm); diff --git a/mm/hmm.c b/mm/hmm.c index ee05419..746eb96 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -40,6 +40,7 @@ struct hmm { spinlock_t lock; struct list_head ranges; struct list_head mirrors; + struct list_head monitors; atomic_t sequence; wait_queue_head_t wait_queue; struct mmu_notifier mmu_notifier; @@ -65,6 +66,7 @@ static struct hmm *hmm_register(struct mm_struct *mm) return NULL; init_waitqueue_head(&hmm->wait_queue); atomic_set(&hmm->notifier_count, 0); + INIT_LIST_HEAD(&hmm->monitors); INIT_LIST_HEAD(&hmm->mirrors); atomic_set(&hmm->sequence, 0); hmm->mmu_notifier.ops = NULL; @@ -112,7 +114,7 @@ static void hmm_invalidate_range(struct hmm *hmm, unsigned long start, unsigned long end) { - struct hmm_range range, *tmp; + struct hmm_range range, *tmp, *next; struct hmm_mirror *mirror; /* @@ -127,6 +129,13 @@ static void hmm_invalidate_range(struct hmm *hmm, range.hmm = hmm; spin_lock(&hmm->lock); + /* Remove any range monitors */ + list_for_each_entry_safe (tmp, next, &hmm->monitors, list) { + if (range.start >= tmp->end || range.end <= tmp->start) + continue; + /* This range is no longer valid */ + list_del_init(&tmp->list); + } list_for_each_entry (tmp, &hmm->ranges, list) { if (range.start >= tmp->end || range.end <= tmp->start) continue; @@ -361,3 +370,87 @@ void hmm_vma_range_unlock(struct hmm_range *range) wake_up(&hmm->wait_queue); } EXPORT_SYMBOL(hmm_vma_range_unlock); + + +/* + * hmm_vma_range_monitor_start() - start monitoring of a range + * @range: pointer to hmm_range struct use to monitor + * @vma: virtual memory area for the range + * @start: start address of the range to monitor (inclusive) + * @end: end address of the range to monitor (exclusive) + * @wait: wait for any pending CPU page table to finish + * Returns: false if there is pendding CPU page table update, true otherwise + * + * The use pattern of this function is : + * retry: + * hmm_vma_range_monitor_start(range, vma, start, end, true); + * // Do something that rely on stable CPU page table content but do not + * // Prepare device page table update transaction + * ... + * // Take device driver lock that serialize device page table update + * driver_lock_device_page_table_update(); + * if (!hmm_vma_range_monitor_end(range)) { + * driver_unlock_device_page_table_update(); + * // Abort transaction you just build and cleanup anything that need + * // to be. Same comment as above, about avoiding busy loop. + * goto retry; + * } + * // Commit device page table update + * driver_unlock_device_page_table_update(); + */ +bool hmm_vma_range_monitor_start(struct hmm_range *range, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + bool wait) +{ + BUG_ON(!vma); + BUG_ON(!range); + + INIT_LIST_HEAD(&range->list); + range->hmm = hmm_register(vma->vm_mm); + if (!range->hmm) + return false; + +again: + spin_lock(&range->hmm->lock); + if (atomic_read(&range->hmm->notifier_count)) { + spin_unlock(&range->hmm->lock); + if (!wait) + return false; + /* + * FIXME: Wait for all active mmu_notifier this is because we + * can no keep an hmm_range struct around while waiting for + * range invalidation to finish. Need to update mmu_notifier + * to make this doable. + */ + wait_event(range->hmm->wait_queue, + !atomic_read(&range->hmm->notifier_count)); + goto again; + } + list_add_tail(&range->list, &range->hmm->monitors); + spin_unlock(&range->hmm->lock); + return true; +} +EXPORT_SYMBOL(hmm_vma_range_monitor_start); + +/* + * hmm_vma_range_monitor_end() - end monitoring of a range + * @range: range that was being monitored + * Returns: true if no invalidation since hmm_vma_range_monitor_start() + */ +bool hmm_vma_range_monitor_end(struct hmm_range *range) +{ + bool valid; + + if (!range->hmm || list_empty(&range->list)) + return false; + + spin_lock(&range->hmm->lock); + valid = !list_empty(&range->list); + list_del_init(&range->list); + spin_unlock(&range->hmm->lock); + + return valid; +} +EXPORT_SYMBOL(hmm_vma_range_monitor_end); -- 2.4.3