All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ira Weiny <ira.weiny@intel.com>
To: jglisse@redhat.com
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	John Hubbard <jhubbard@nvidia.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Dan Williams <dan.j.williams@intel.com>
Subject: Re: [PATCH v2 02/11] mm/hmm: use reference counting for HMM struct v2
Date: Thu, 28 Mar 2019 04:07:20 -0700	[thread overview]
Message-ID: <20190328110719.GA31324@iweiny-DESK2.sc.intel.com> (raw)
In-Reply-To: <20190325144011.10560-3-jglisse@redhat.com>

On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote:
> From: Jérôme Glisse <jglisse@redhat.com>
> 
> Every time i read the code to check that the HMM structure does not
> vanish before it should thanks to the many lock protecting its removal
> i get a headache. Switch to reference counting instead it is much
> easier to follow and harder to break. This also remove some code that
> is no longer needed with refcounting.
> 
> Changes since v1:
>     - removed bunch of useless check (if API is use with bogus argument
>       better to fail loudly so user fix their code)
>     - s/hmm_get/mm_get_hmm/
> 
> Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
> Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
> Cc: John Hubbard <jhubbard@nvidia.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Dan Williams <dan.j.williams@intel.com>
> ---
>  include/linux/hmm.h |   2 +
>  mm/hmm.c            | 170 ++++++++++++++++++++++++++++----------------
>  2 files changed, 112 insertions(+), 60 deletions(-)
> 
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index ad50b7b4f141..716fc61fa6d4 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -131,6 +131,7 @@ enum hmm_pfn_value_e {
>  /*
>   * struct hmm_range - track invalidation lock on virtual address range
>   *
> + * @hmm: the core HMM structure this range is active against
>   * @vma: the vm area struct for the range
>   * @list: all range lock are on a list
>   * @start: range virtual start address (inclusive)
> @@ -142,6 +143,7 @@ enum hmm_pfn_value_e {
>   * @valid: pfns array did not change since it has been fill by an HMM function
>   */
>  struct hmm_range {
> +	struct hmm		*hmm;
>  	struct vm_area_struct	*vma;
>  	struct list_head	list;
>  	unsigned long		start;
> diff --git a/mm/hmm.c b/mm/hmm.c
> index fe1cd87e49ac..306e57f7cded 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -50,6 +50,7 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
>   */
>  struct hmm {
>  	struct mm_struct	*mm;
> +	struct kref		kref;
>  	spinlock_t		lock;
>  	struct list_head	ranges;
>  	struct list_head	mirrors;
> @@ -57,6 +58,16 @@ struct hmm {
>  	struct rw_semaphore	mirrors_sem;
>  };
>  
> +static inline struct hmm *mm_get_hmm(struct mm_struct *mm)
> +{
> +	struct hmm *hmm = READ_ONCE(mm->hmm);
> +
> +	if (hmm && kref_get_unless_zero(&hmm->kref))
> +		return hmm;
> +
> +	return NULL;
> +}
> +
>  /*
>   * hmm_register - register HMM against an mm (HMM internal)
>   *
> @@ -67,14 +78,9 @@ struct hmm {
>   */
>  static struct hmm *hmm_register(struct mm_struct *mm)
>  {
> -	struct hmm *hmm = READ_ONCE(mm->hmm);
> +	struct hmm *hmm = mm_get_hmm(mm);

FWIW: having hmm_register == "hmm get" is a bit confusing...

Ira

>  	bool cleanup = false;
>  
> -	/*
> -	 * The hmm struct can only be freed once the mm_struct goes away,
> -	 * hence we should always have pre-allocated an new hmm struct
> -	 * above.
> -	 */
>  	if (hmm)
>  		return hmm;
>  
> @@ -86,6 +92,7 @@ static struct hmm *hmm_register(struct mm_struct *mm)
>  	hmm->mmu_notifier.ops = NULL;
>  	INIT_LIST_HEAD(&hmm->ranges);
>  	spin_lock_init(&hmm->lock);
> +	kref_init(&hmm->kref);
>  	hmm->mm = mm;
>  
>  	spin_lock(&mm->page_table_lock);
> @@ -106,7 +113,7 @@ static struct hmm *hmm_register(struct mm_struct *mm)
>  	if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
>  		goto error_mm;
>  
> -	return mm->hmm;
> +	return hmm;
>  
>  error_mm:
>  	spin_lock(&mm->page_table_lock);
> @@ -118,9 +125,41 @@ static struct hmm *hmm_register(struct mm_struct *mm)
>  	return NULL;
>  }
>  
> +static void hmm_free(struct kref *kref)
> +{
> +	struct hmm *hmm = container_of(kref, struct hmm, kref);
> +	struct mm_struct *mm = hmm->mm;
> +
> +	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
> +
> +	spin_lock(&mm->page_table_lock);
> +	if (mm->hmm == hmm)
> +		mm->hmm = NULL;
> +	spin_unlock(&mm->page_table_lock);
> +
> +	kfree(hmm);
> +}
> +
> +static inline void hmm_put(struct hmm *hmm)
> +{
> +	kref_put(&hmm->kref, hmm_free);
> +}
> +
>  void hmm_mm_destroy(struct mm_struct *mm)
>  {
> -	kfree(mm->hmm);
> +	struct hmm *hmm;
> +
> +	spin_lock(&mm->page_table_lock);
> +	hmm = mm_get_hmm(mm);
> +	mm->hmm = NULL;
> +	if (hmm) {
> +		hmm->mm = NULL;
> +		spin_unlock(&mm->page_table_lock);
> +		hmm_put(hmm);
> +		return;
> +	}
> +
> +	spin_unlock(&mm->page_table_lock);
>  }
>  
>  static int hmm_invalidate_range(struct hmm *hmm, bool device,
> @@ -165,7 +204,7 @@ static int hmm_invalidate_range(struct hmm *hmm, bool device,
>  static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
>  {
>  	struct hmm_mirror *mirror;
> -	struct hmm *hmm = mm->hmm;
> +	struct hmm *hmm = mm_get_hmm(mm);
>  
>  	down_write(&hmm->mirrors_sem);
>  	mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror,
> @@ -186,13 +225,16 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
>  						  struct hmm_mirror, list);
>  	}
>  	up_write(&hmm->mirrors_sem);
> +
> +	hmm_put(hmm);
>  }
>  
>  static int hmm_invalidate_range_start(struct mmu_notifier *mn,
>  			const struct mmu_notifier_range *range)
>  {
> +	struct hmm *hmm = mm_get_hmm(range->mm);
>  	struct hmm_update update;
> -	struct hmm *hmm = range->mm->hmm;
> +	int ret;
>  
>  	VM_BUG_ON(!hmm);
>  
> @@ -200,14 +242,16 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
>  	update.end = range->end;
>  	update.event = HMM_UPDATE_INVALIDATE;
>  	update.blockable = range->blockable;
> -	return hmm_invalidate_range(hmm, true, &update);
> +	ret = hmm_invalidate_range(hmm, true, &update);
> +	hmm_put(hmm);
> +	return ret;
>  }
>  
>  static void hmm_invalidate_range_end(struct mmu_notifier *mn,
>  			const struct mmu_notifier_range *range)
>  {
> +	struct hmm *hmm = mm_get_hmm(range->mm);
>  	struct hmm_update update;
> -	struct hmm *hmm = range->mm->hmm;
>  
>  	VM_BUG_ON(!hmm);
>  
> @@ -216,6 +260,7 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn,
>  	update.event = HMM_UPDATE_INVALIDATE;
>  	update.blockable = true;
>  	hmm_invalidate_range(hmm, false, &update);
> +	hmm_put(hmm);
>  }
>  
>  static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
> @@ -241,24 +286,13 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
>  	if (!mm || !mirror || !mirror->ops)
>  		return -EINVAL;
>  
> -again:
>  	mirror->hmm = hmm_register(mm);
>  	if (!mirror->hmm)
>  		return -ENOMEM;
>  
>  	down_write(&mirror->hmm->mirrors_sem);
> -	if (mirror->hmm->mm == NULL) {
> -		/*
> -		 * A racing hmm_mirror_unregister() is about to destroy the hmm
> -		 * struct. Try again to allocate a new one.
> -		 */
> -		up_write(&mirror->hmm->mirrors_sem);
> -		mirror->hmm = NULL;
> -		goto again;
> -	} else {
> -		list_add(&mirror->list, &mirror->hmm->mirrors);
> -		up_write(&mirror->hmm->mirrors_sem);
> -	}
> +	list_add(&mirror->list, &mirror->hmm->mirrors);
> +	up_write(&mirror->hmm->mirrors_sem);
>  
>  	return 0;
>  }
> @@ -273,33 +307,18 @@ EXPORT_SYMBOL(hmm_mirror_register);
>   */
>  void hmm_mirror_unregister(struct hmm_mirror *mirror)
>  {
> -	bool should_unregister = false;
> -	struct mm_struct *mm;
> -	struct hmm *hmm;
> +	struct hmm *hmm = READ_ONCE(mirror->hmm);
>  
> -	if (mirror->hmm == NULL)
> +	if (hmm == NULL)
>  		return;
>  
> -	hmm = mirror->hmm;
>  	down_write(&hmm->mirrors_sem);
>  	list_del_init(&mirror->list);
> -	should_unregister = list_empty(&hmm->mirrors);
> +	/* To protect us against double unregister ... */
>  	mirror->hmm = NULL;
> -	mm = hmm->mm;
> -	hmm->mm = NULL;
>  	up_write(&hmm->mirrors_sem);
>  
> -	if (!should_unregister || mm == NULL)
> -		return;
> -
> -	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
> -
> -	spin_lock(&mm->page_table_lock);
> -	if (mm->hmm == hmm)
> -		mm->hmm = NULL;
> -	spin_unlock(&mm->page_table_lock);
> -
> -	kfree(hmm);
> +	hmm_put(hmm);
>  }
>  EXPORT_SYMBOL(hmm_mirror_unregister);
>  
> @@ -708,6 +727,8 @@ int hmm_vma_get_pfns(struct hmm_range *range)
>  	struct mm_walk mm_walk;
>  	struct hmm *hmm;
>  
> +	range->hmm = NULL;
> +
>  	/* Sanity check, this really should not happen ! */
>  	if (range->start < vma->vm_start || range->start >= vma->vm_end)
>  		return -EINVAL;
> @@ -717,14 +738,18 @@ int hmm_vma_get_pfns(struct hmm_range *range)
>  	hmm = hmm_register(vma->vm_mm);
>  	if (!hmm)
>  		return -ENOMEM;
> -	/* Caller must have registered a mirror, via hmm_mirror_register() ! */
> -	if (!hmm->mmu_notifier.ops)
> +
> +	/* Check if hmm_mm_destroy() was call. */
> +	if (hmm->mm == NULL) {
> +		hmm_put(hmm);
>  		return -EINVAL;
> +	}
>  
>  	/* FIXME support hugetlb fs */
>  	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
>  			vma_is_dax(vma)) {
>  		hmm_pfns_special(range);
> +		hmm_put(hmm);
>  		return -EINVAL;
>  	}
>  
> @@ -736,6 +761,7 @@ int hmm_vma_get_pfns(struct hmm_range *range)
>  		 * operations such has atomic access would not work.
>  		 */
>  		hmm_pfns_clear(range, range->pfns, range->start, range->end);
> +		hmm_put(hmm);
>  		return -EPERM;
>  	}
>  
> @@ -758,6 +784,12 @@ int hmm_vma_get_pfns(struct hmm_range *range)
>  	mm_walk.pte_hole = hmm_vma_walk_hole;
>  
>  	walk_page_range(range->start, range->end, &mm_walk);
> +	/*
> +	 * Transfer hmm reference to the range struct it will be drop inside
> +	 * the hmm_vma_range_done() function (which _must_ be call if this
> +	 * function return 0).
> +	 */
> +	range->hmm = hmm;
>  	return 0;
>  }
>  EXPORT_SYMBOL(hmm_vma_get_pfns);
> @@ -802,25 +834,27 @@ EXPORT_SYMBOL(hmm_vma_get_pfns);
>   */
>  bool hmm_vma_range_done(struct hmm_range *range)
>  {
> -	unsigned long npages = (range->end - range->start) >> PAGE_SHIFT;
> -	struct hmm *hmm;
> +	bool ret = false;
>  
> -	if (range->end <= range->start) {
> +	/* Sanity check this really should not happen. */
> +	if (range->hmm == NULL || range->end <= range->start) {
>  		BUG();
>  		return false;
>  	}
>  
> -	hmm = hmm_register(range->vma->vm_mm);
> -	if (!hmm) {
> -		memset(range->pfns, 0, sizeof(*range->pfns) * npages);
> -		return false;
> -	}
> -
> -	spin_lock(&hmm->lock);
> +	spin_lock(&range->hmm->lock);
>  	list_del_rcu(&range->list);
> -	spin_unlock(&hmm->lock);
> +	ret = range->valid;
> +	spin_unlock(&range->hmm->lock);
>  
> -	return range->valid;
> +	/* Is the mm still alive ? */
> +	if (range->hmm->mm == NULL)
> +		ret = false;
> +
> +	/* Drop reference taken by hmm_vma_fault() or hmm_vma_get_pfns() */
> +	hmm_put(range->hmm);
> +	range->hmm = NULL;
> +	return ret;
>  }
>  EXPORT_SYMBOL(hmm_vma_range_done);
>  
> @@ -880,6 +914,8 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
>  	struct hmm *hmm;
>  	int ret;
>  
> +	range->hmm = NULL;
> +
>  	/* Sanity check, this really should not happen ! */
>  	if (range->start < vma->vm_start || range->start >= vma->vm_end)
>  		return -EINVAL;
> @@ -891,14 +927,18 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
>  		hmm_pfns_clear(range, range->pfns, range->start, range->end);
>  		return -ENOMEM;
>  	}
> -	/* Caller must have registered a mirror using hmm_mirror_register() */
> -	if (!hmm->mmu_notifier.ops)
> +
> +	/* Check if hmm_mm_destroy() was call. */
> +	if (hmm->mm == NULL) {
> +		hmm_put(hmm);
>  		return -EINVAL;
> +	}
>  
>  	/* FIXME support hugetlb fs */
>  	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
>  			vma_is_dax(vma)) {
>  		hmm_pfns_special(range);
> +		hmm_put(hmm);
>  		return -EINVAL;
>  	}
>  
> @@ -910,6 +950,7 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
>  		 * operations such has atomic access would not work.
>  		 */
>  		hmm_pfns_clear(range, range->pfns, range->start, range->end);
> +		hmm_put(hmm);
>  		return -EPERM;
>  	}
>  
> @@ -945,7 +986,16 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
>  		hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last,
>  			       range->end);
>  		hmm_vma_range_done(range);
> +		hmm_put(hmm);
> +	} else {
> +		/*
> +		 * Transfer hmm reference to the range struct it will be drop
> +		 * inside the hmm_vma_range_done() function (which _must_ be
> +		 * call if this function return 0).
> +		 */
> +		range->hmm = hmm;
>  	}
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL(hmm_vma_fault);
> -- 
> 2.17.2
> 

  reply	other threads:[~2019-03-28 19:08 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-25 14:40 [PATCH v2 00/11] Improve HMM driver API v2 jglisse
2019-03-25 14:40 ` [PATCH v2 01/11] mm/hmm: select mmu notifier when selecting HMM jglisse
2019-03-28 20:33   ` John Hubbard
2019-03-29 21:15     ` Jerome Glisse
2019-03-29 21:42       ` John Hubbard
2019-03-25 14:40 ` [PATCH v2 02/11] mm/hmm: use reference counting for HMM struct v2 jglisse
2019-03-28 11:07   ` Ira Weiny [this message]
2019-03-28 19:11     ` Jerome Glisse
2019-03-28 20:43       ` John Hubbard
2019-03-28 21:21         ` Jerome Glisse
2019-03-29  0:39           ` John Hubbard
2019-03-28 16:57             ` Ira Weiny
2019-03-29  1:00               ` Jerome Glisse
2019-03-29  1:18                 ` John Hubbard
2019-03-29  1:50                   ` Jerome Glisse
2019-03-28 18:21                     ` Ira Weiny
2019-03-29  2:25                       ` Jerome Glisse
2019-03-29 20:07                         ` John Hubbard
2019-03-29  2:11                     ` John Hubbard
2019-03-29  2:22                       ` Jerome Glisse
2019-03-25 14:40 ` [PATCH v2 03/11] mm/hmm: do not erase snapshot when a range is invalidated jglisse
2019-03-25 14:40 ` [PATCH v2 04/11] mm/hmm: improve and rename hmm_vma_get_pfns() to hmm_range_snapshot() v2 jglisse
2019-03-28 13:30   ` Ira Weiny
2019-03-25 14:40 ` [PATCH v2 05/11] mm/hmm: improve and rename hmm_vma_fault() to hmm_range_fault() v2 jglisse
2019-03-28 13:43   ` Ira Weiny
2019-03-28 22:03     ` Jerome Glisse
2019-03-25 14:40 ` [PATCH v2 06/11] mm/hmm: improve driver API to work and wait over a range v2 jglisse
2019-03-28 13:11   ` Ira Weiny
2019-03-28 21:39     ` Jerome Glisse
2019-03-28 16:12   ` Ira Weiny
2019-03-29  0:56     ` Jerome Glisse
2019-03-28 18:49       ` Ira Weiny
2019-03-25 14:40 ` [PATCH v2 07/11] mm/hmm: add default fault flags to avoid the need to pre-fill pfns arrays jglisse
2019-03-28 21:59   ` John Hubbard
2019-03-28 22:12     ` Jerome Glisse
2019-03-28 22:19       ` John Hubbard
2019-03-28 22:31         ` Jerome Glisse
2019-03-28 22:40           ` John Hubbard
2019-03-28 23:21             ` Jerome Glisse
2019-03-28 23:28               ` John Hubbard
2019-03-28 16:42                 ` Ira Weiny
2019-03-29  1:17                   ` Jerome Glisse
2019-03-29  1:30                     ` John Hubbard
2019-03-29  1:42                       ` Jerome Glisse
2019-03-29  1:59                         ` Jerome Glisse
2019-03-29  2:05                           ` John Hubbard
2019-03-29  2:12                             ` Jerome Glisse
2019-03-28 23:43                 ` Jerome Glisse
2019-03-25 14:40 ` [PATCH v2 08/11] mm/hmm: mirror hugetlbfs (snapshoting, faulting and DMA mapping) v2 jglisse
2019-03-28 16:53   ` Ira Weiny
2019-03-25 14:40 ` [PATCH v2 09/11] mm/hmm: allow to mirror vma of a file on a DAX backed filesystem v2 jglisse
2019-03-28 18:04   ` Ira Weiny
2019-03-29  2:17     ` Jerome Glisse
2019-03-25 14:40 ` [PATCH v2 10/11] mm/hmm: add helpers for driver to safely take the mmap_sem v2 jglisse
2019-03-28 20:54   ` John Hubbard
2019-03-28 21:30     ` Jerome Glisse
2019-03-28 21:41       ` John Hubbard
2019-03-28 22:08         ` Jerome Glisse
2019-03-28 22:25           ` John Hubbard
2019-03-28 22:40             ` Jerome Glisse
2019-03-28 22:43               ` John Hubbard
2019-03-28 23:05                 ` Jerome Glisse
2019-03-28 23:20                   ` John Hubbard
2019-03-28 23:24                     ` Jerome Glisse
2019-03-28 23:34                       ` John Hubbard
2019-03-28 18:44                         ` Ira Weiny
2019-03-25 14:40 ` [PATCH v2 11/11] mm/hmm: add an helper function that fault pages and map them to a device v2 jglisse
2019-04-01 11:59   ` Souptick Joarder
2019-04-01 11:59     ` Souptick Joarder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190328110719.GA31324@iweiny-DESK2.sc.intel.com \
    --to=ira.weiny@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=jglisse@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.