Re: [PATCH v11 7/9] hugetlb: support file_region coalescing again

From: Mike Kravetz <mike.kravetz@oracle.com>
To: Mina Almasry <almasrymina@google.com>
Cc: shuah@kernel.org, rientjes@google.com, shakeelb@google.com,
	gthelen@google.com, akpm@linux-foundation.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-kselftest@vger.kernel.org, cgroups@vger.kernel.org
Subject: Re: [PATCH v11 7/9] hugetlb: support file_region coalescing again
Date: Thu, 6 Feb 2020 16:17:25 -0800	[thread overview]
Message-ID: <2541c294-9f61-083b-9a0d-0dfdc3dcac68@oracle.com> (raw)
In-Reply-To: <20200203232248.104733-7-almasrymina@google.com>

On 2/3/20 3:22 PM, Mina Almasry wrote:
> An earlier patch in this series disabled file_region coalescing in order
> to hang the hugetlb_cgroup uncharge info on the file_region entries.
> 
> This patch re-adds support for coalescing of file_region entries.
> Essentially everytime we add an entry, we check to see if the
> hugetlb_cgroup uncharge info is the same as any adjacent entries. If it
> is, instead of adding an entry we simply extend the appropriate entry.
> 
> This is an important performance optimization as private mappings add
> their entries page by page, and we could incur big performance costs for
> large mappings with lots of file_region entries in their resv_map.
> 
> Signed-off-by: Mina Almasry <almasrymina@google.com>
> 
> ---
>  mm/hugetlb.c | 62 +++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 52 insertions(+), 10 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index ec0b55ea1506e..058dd9c8269cf 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -272,6 +272,22 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
>  #endif
>  }
> 
> +static bool has_same_uncharge_info(struct file_region *rg,
> +				   struct hugetlb_cgroup *h_cg,
> +				   struct hstate *h)
> +{
> +#ifdef CONFIG_CGROUP_HUGETLB
> +	return rg &&
> +	       rg->reservation_counter ==
> +		       &h_cg->rsvd_hugepage[hstate_index(h)] &&
> +	       rg->pages_per_hpage == pages_per_huge_page(h) &&
> +	       rg->css == &h_cg->css;
> +
> +#else
> +	return true;
> +#endif
> +}
> +
>  /* Must be called with resv->lock held. Calling this with count_only == true
>   * will count the number of pages to be added but will not modify the linked
>   * list. If regions_needed != NULL and count_only == true, then regions_needed
> @@ -286,7 +302,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
>  	long add = 0;
>  	struct list_head *head = &resv->regions;
>  	long last_accounted_offset = f;
> -	struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
> +	struct file_region *rg = NULL, *trg = NULL, *nrg = NULL, *prg = NULL;
> 
>  	if (regions_needed)
>  		*regions_needed = 0;
> @@ -318,16 +334,34 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,

I seem to be missing something.  For context, here is the beginning of that
loop:

	/* In this loop, we essentially handle an entry for the range
	 * [last_accounted_offset, rg->from), at every iteration, with some
	 * bounds checking.
	 */
	list_for_each_entry_safe(rg, trg, head, link) {
		/* Skip irrelevant regions that start before our range. */
		if (rg->from < f) {
			/* If this region ends after the last accounted offset,
			 * then we need to update last_accounted_offset.
			 */
			if (rg->to > last_accounted_offset)
				last_accounted_offset = rg->to;
			continue;
		}

		/* When we find a region that starts beyond our range, we've
		 * finished.
		 */
		if (rg->from > t)
			break;

Suppose the resv_map contains one entry [0,2) and we are going to add
[2,4).  Will we not 'continue' after the first entry and then exit loop
without setting prg?  So, there is no chance for coalescing?

-- 
Mike Kravetz

>  		if (rg->from > last_accounted_offset) {
>  			add += rg->from - last_accounted_offset;
>  			if (!count_only) {
> -				nrg = get_file_region_entry_from_cache(
> -					resv, last_accounted_offset, rg->from);
> -				record_hugetlb_cgroup_uncharge_info(h_cg, nrg,
> -								    h);
> -				list_add(&nrg->link, rg->link.prev);
> +				/* Check if the last region can be extended. */
> +				if (prg && prg->to == last_accounted_offset &&
> +				    has_same_uncharge_info(prg, h_cg, h)) {
> +					prg->to = rg->from;
> +				/* Check if the next region can be extended. */
> +				} else if (has_same_uncharge_info(rg, h_cg,
> +								  h)) {
> +					rg->from = last_accounted_offset;
> +				/* If neither of the regions can be extended,
> +				 * add a region.
> +				 */
> +				} else {
> +					nrg = get_file_region_entry_from_cache(
> +						resv, last_accounted_offset,
> +						rg->from);
> +					record_hugetlb_cgroup_uncharge_info(
> +						h_cg, nrg, h);
> +					list_add(&nrg->link, rg->link.prev);
> +				}
>  			} else if (regions_needed)
>  				*regions_needed += 1;
>  		}
> 
>  		last_accounted_offset = rg->to;
> +		/* Record rg as the 'previous file region' incase we need it
> +		 * for the next iteration.
> +		 */
> +		prg = rg;
>  	}
> 
>  	/* Handle the case where our range extends beyond
> @@ -336,10 +370,18 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
>  	if (last_accounted_offset < t) {
>  		add += t - last_accounted_offset;
>  		if (!count_only) {
> -			nrg = get_file_region_entry_from_cache(
> -				resv, last_accounted_offset, t);
> -			record_hugetlb_cgroup_uncharge_info(h_cg, nrg, h);
> -			list_add(&nrg->link, rg->link.prev);
> +			/* Check if the last region can be extended. */
> +			if (prg && prg->to == last_accounted_offset &&
> +			    has_same_uncharge_info(prg, h_cg, h)) {
> +				prg->to = last_accounted_offset;
> +			} else {
> +				/* If not, just create a new region. */
> +				nrg = get_file_region_entry_from_cache(
> +					resv, last_accounted_offset, t);
> +				record_hugetlb_cgroup_uncharge_info(h_cg, nrg,
> +								    h);
> +				list_add(&nrg->link, rg->link.prev);
> +			}
>  		} else if (regions_needed)
>  			*regions_needed += 1;
>  	}
> --
> 2.25.0.341.g760bfbb309-goog
>