* [PATCH] hugetlb_cgroup: fix imbalanced css_get and css_put pair for shared mappings
@ 2021-02-10 9:16 Miaohe Lin
2021-02-10 15:32 ` kernel test robot
2021-02-10 15:52 ` kernel test robot
0 siblings, 2 replies; 4+ messages in thread
From: Miaohe Lin @ 2021-02-10 9:16 UTC (permalink / raw)
To: akpm, mike.kravetz
Cc: almasrymina, rientjes, linux-mm, linux-kernel, linmiaohe
The current implementation of hugetlb_cgroup for shared mappings could have
different behavior. Consider the following two scenarios:
1.Assume initial css reference count of hugetlb_cgroup is 1:
1.1 Call hugetlb_reserve_pages with from = 1, to = 2. So css reference
count is 2 associated with 1 file_region.
1.2 Call hugetlb_reserve_pages with from = 2, to = 3. So css reference
count is 3 associated with 2 file_region.
1.3 coalesce_file_region will coalesce these two file_regions into one.
So css reference count is 3 associated with 1 file_region now.
2.Assume initial css reference count of hugetlb_cgroup is 1 again:
2.1 Call hugetlb_reserve_pages with from = 1, to = 3. So css reference
count is 2 associated with 1 file_region.
Therefore, we might have one file_region while holding one or more css
reference counts. This inconsistency could lead to imbalanced css_get()
and css_put() pair. If we do css_put one by one (i.g. hole punch case),
scenario 2 would put one more css reference. If we do css_put all together
(i.g. truncate case), scenario 1 will leak one css reference.
The imbalanced css_get() and css_put() pair would result in a non-zero
reference when we try to destroy the hugetlb cgroup. The hugetlb cgroup
directory is removed __but__ associated resource is not freed. This might
result in OOM or can not create a new hugetlb cgroup in a busy workload
ultimately.
In order to fix this, we have to make sure that one file_region must hold
and only hold one css reference. So in coalesce_file_region case, we should
release one css reference before coalescence. Also only put css reference
when the entire file_region is removed.
The last thing to note is that the caller of region_add() will only hold
one reference to h_cg->css for the whole contiguous reservation region.
But this area might be scattered when there are already some file_regions
reside in it. As a result, many file_regions may share only one h_cg->css
reference. In order to ensure that one file_region must hold and only hold
one h_cg->css reference, we should do css_get() for each file_region and
release the reference held by caller when they are done.
Fixes: 075a61d07a8e ("hugetlb_cgroup: add accounting for shared mappings")
Signed-off-by: Shijie Luo <luoshijie1@huawei.com>
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: stable@kernel.org
---
The Spring Festival is coming. It's the year of ox. Happy New Year!
_______________
< Happy New Year! >
---------------
\ ^__^
\ (oo)\_______
(__)\ )\/\
||----w |
|| ||
---
include/linux/hugetlb_cgroup.h | 6 +++--
mm/hugetlb.c | 42 ++++++++++++++++++++++++++++++----
mm/hugetlb_cgroup.c | 11 +++++++--
3 files changed, 51 insertions(+), 8 deletions(-)
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 2ad6e92f124a..7610efcd96bd 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -138,7 +138,8 @@ extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
struct file_region *rg,
- unsigned long nr_pages);
+ unsigned long nr_pages,
+ bool region_del);
extern void hugetlb_cgroup_file_init(void) __init;
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
@@ -147,7 +148,8 @@ extern void hugetlb_cgroup_migrate(struct page *oldhpage,
#else
static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
struct file_region *rg,
- unsigned long nr_pages)
+ unsigned long nr_pages,
+ bool region_del)
{
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9a4b4715faa1..3664cf8b0748 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -280,6 +280,18 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
nrg->reservation_counter =
&h_cg->rsvd_hugepage[hstate_index(h)];
nrg->css = &h_cg->css;
+ /*
+ * The caller (hugetlb_reserve_pages now) will only hold one
+ * h_cg->css reference for the whole contiguous reservation
+ * region. But this area might be scattered when there are
+ * already some file_regions reside in it. As a result, many
+ * file_regions may share only one h_cg->css reference. In
+ * order to ensure that one file_region must hold and only
+ * hold one h_cg->css reference, we should do css_get for
+ * each file_region and leave the reference held by caller
+ * untouched.
+ */
+ css_get(&h_cg->css);
if (!resv->pages_per_hpage)
resv->pages_per_hpage = pages_per_huge_page(h);
/* pages_per_hpage should be the same for all entries in
@@ -293,6 +305,14 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
#endif
}
+static void put_uncharge_info(struct file_region *rg)
+{
+#ifdef CONFIG_CGROUP_HUGETLB
+ if (rg->css)
+ css_put(rg->css);
+#endif
+}
+
static bool has_same_uncharge_info(struct file_region *rg,
struct file_region *org)
{
@@ -316,6 +336,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
prg->to = rg->to;
list_del(&rg->link);
+ put_uncharge_info(rg);
kfree(rg);
rg = prg;
@@ -327,6 +348,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
nrg->from = rg->from;
list_del(&rg->link);
+ put_uncharge_info(rg);
kfree(rg);
}
}
@@ -659,7 +681,7 @@ static long region_del(struct resv_map *resv, long f, long t)
del += t - f;
hugetlb_cgroup_uncharge_file_region(
- resv, rg, t - f);
+ resv, rg, t - f, false);
/* New entry for end of split region */
nrg->from = t;
@@ -680,7 +702,7 @@ static long region_del(struct resv_map *resv, long f, long t)
if (f <= rg->from && t >= rg->to) { /* Remove entire region */
del += rg->to - rg->from;
hugetlb_cgroup_uncharge_file_region(resv, rg,
- rg->to - rg->from);
+ rg->to - rg->from, true);
list_del(&rg->link);
kfree(rg);
continue;
@@ -688,13 +710,13 @@ static long region_del(struct resv_map *resv, long f, long t)
if (f <= rg->from) { /* Trim beginning of region */
hugetlb_cgroup_uncharge_file_region(resv, rg,
- t - rg->from);
+ t - rg->from, false);
del += t - rg->from;
rg->from = t;
} else { /* Trim end of region */
hugetlb_cgroup_uncharge_file_region(resv, rg,
- rg->to - f);
+ rg->to - f, false);
del += rg->to - f;
rg->to = f;
@@ -5139,6 +5161,10 @@ bool hugetlb_reserve_pages(struct inode *inode,
*/
long rsv_adjust;
+ /*
+ * hugetlb_cgroup_uncharge_cgroup_rsvd() will put the
+ * reference to h_cg->css. See comment below for detail.
+ */
hugetlb_cgroup_uncharge_cgroup_rsvd(
hstate_index(h),
(chg - add) * pages_per_huge_page(h), h_cg);
@@ -5146,6 +5172,14 @@ bool hugetlb_reserve_pages(struct inode *inode,
rsv_adjust = hugepage_subpool_put_pages(spool,
chg - add);
hugetlb_acct_memory(h, -rsv_adjust);
+ } else if (h_cg) {
+ /*
+ * The file_regions will hold their own reference to
+ * h_cg->css. So we should release the reference held
+ * via hugetlb_cgroup_charge_cgroup_rsvd() when we are
+ * done.
+ */
+ css_put(&h_cg->css);
}
}
return true;
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index f68b51fcda3d..8668ba87cfe4 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -391,7 +391,8 @@ void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
struct file_region *rg,
- unsigned long nr_pages)
+ unsigned long nr_pages,
+ bool region_del)
{
if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
return;
@@ -400,7 +401,13 @@ void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
!resv->reservation_counter) {
page_counter_uncharge(rg->reservation_counter,
nr_pages * resv->pages_per_hpage);
- css_put(rg->css);
+ /*
+ * Only do css_put(rg->css) when we delete the entire region
+ * because one file_region must hold and only hold one rg->css
+ * reference.
+ */
+ if (region_del)
+ css_put(rg->css);
}
}
--
2.19.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] hugetlb_cgroup: fix imbalanced css_get and css_put pair for shared mappings
2021-02-10 9:16 [PATCH] hugetlb_cgroup: fix imbalanced css_get and css_put pair for shared mappings Miaohe Lin
@ 2021-02-10 15:32 ` kernel test robot
2021-03-01 11:27 ` Miaohe Lin
2021-02-10 15:52 ` kernel test robot
1 sibling, 1 reply; 4+ messages in thread
From: kernel test robot @ 2021-02-10 15:32 UTC (permalink / raw)
To: Miaohe Lin, akpm, mike.kravetz
Cc: kbuild-all, clang-built-linux, almasrymina, rientjes, linux-mm,
linux-kernel, linmiaohe
[-- Attachment #1: Type: text/plain, Size: 7977 bytes --]
Hi Miaohe,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on next-20210125]
[also build test ERROR on v5.11-rc7]
[cannot apply to linux/master linus/master hnaz-linux-mm/master v5.11-rc7 v5.11-rc6 v5.11-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Miaohe-Lin/hugetlb_cgroup-fix-imbalanced-css_get-and-css_put-pair-for-shared-mappings/20210210-171736
base: 59fa6a163ffabc1bf25c5e0e33899e268a96d3cc
config: arm64-randconfig-r025-20210209 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project c9439ca36342fb6013187d0a69aef92736951476)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm64 cross compiling tool for clang build
# apt-get install binutils-aarch64-linux-gnu
# https://github.com/0day-ci/linux/commit/68f4ed1b80aa7c51a921c3ad913ee7e6e00618d0
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Miaohe-Lin/hugetlb_cgroup-fix-imbalanced-css_get-and-css_put-pair-for-shared-mappings/20210210-171736
git checkout 68f4ed1b80aa7c51a921c3ad913ee7e6e00618d0
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> mm/hugetlb.c:5222:17: error: incomplete definition of type 'struct hugetlb_cgroup'
css_put(&h_cg->css);
~~~~^
include/linux/hugetlb_cgroup.h:20:8: note: forward declaration of 'struct hugetlb_cgroup'
struct hugetlb_cgroup;
^
1 error generated.
vim +5222 mm/hugetlb.c
5091
5092 /* Return true if reservation was successful, false otherwise. */
5093 bool hugetlb_reserve_pages(struct inode *inode,
5094 long from, long to,
5095 struct vm_area_struct *vma,
5096 vm_flags_t vm_flags)
5097 {
5098 long chg, add = -1;
5099 struct hstate *h = hstate_inode(inode);
5100 struct hugepage_subpool *spool = subpool_inode(inode);
5101 struct resv_map *resv_map;
5102 struct hugetlb_cgroup *h_cg = NULL;
5103 long gbl_reserve, regions_needed = 0;
5104
5105 /* This should never happen */
5106 if (from > to) {
5107 VM_WARN(1, "%s called with a negative range\n", __func__);
5108 return false;
5109 }
5110
5111 /*
5112 * Only apply hugepage reservation if asked. At fault time, an
5113 * attempt will be made for VM_NORESERVE to allocate a page
5114 * without using reserves
5115 */
5116 if (vm_flags & VM_NORESERVE)
5117 return true;
5118
5119 /*
5120 * Shared mappings base their reservation on the number of pages that
5121 * are already allocated on behalf of the file. Private mappings need
5122 * to reserve the full area even if read-only as mprotect() may be
5123 * called to make the mapping read-write. Assume !vma is a shm mapping
5124 */
5125 if (!vma || vma->vm_flags & VM_MAYSHARE) {
5126 /*
5127 * resv_map can not be NULL as hugetlb_reserve_pages is only
5128 * called for inodes for which resv_maps were created (see
5129 * hugetlbfs_get_inode).
5130 */
5131 resv_map = inode_resv_map(inode);
5132
5133 chg = region_chg(resv_map, from, to, ®ions_needed);
5134
5135 } else {
5136 /* Private mapping. */
5137 resv_map = resv_map_alloc();
5138 if (!resv_map)
5139 return false;
5140
5141 chg = to - from;
5142
5143 set_vma_resv_map(vma, resv_map);
5144 set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
5145 }
5146
5147 if (chg < 0)
5148 goto out_err;
5149
5150 if (hugetlb_cgroup_charge_cgroup_rsvd(hstate_index(h),
5151 chg * pages_per_huge_page(h), &h_cg) < 0)
5152 goto out_err;
5153
5154 if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) {
5155 /* For private mappings, the hugetlb_cgroup uncharge info hangs
5156 * of the resv_map.
5157 */
5158 resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, h_cg, h);
5159 }
5160
5161 /*
5162 * There must be enough pages in the subpool for the mapping. If
5163 * the subpool has a minimum size, there may be some global
5164 * reservations already in place (gbl_reserve).
5165 */
5166 gbl_reserve = hugepage_subpool_get_pages(spool, chg);
5167 if (gbl_reserve < 0)
5168 goto out_uncharge_cgroup;
5169
5170 /*
5171 * Check enough hugepages are available for the reservation.
5172 * Hand the pages back to the subpool if there are not
5173 */
5174 if (hugetlb_acct_memory(h, gbl_reserve) < 0)
5175 goto out_put_pages;
5176
5177 /*
5178 * Account for the reservations made. Shared mappings record regions
5179 * that have reservations as they are shared by multiple VMAs.
5180 * When the last VMA disappears, the region map says how much
5181 * the reservation was and the page cache tells how much of
5182 * the reservation was consumed. Private mappings are per-VMA and
5183 * only the consumed reservations are tracked. When the VMA
5184 * disappears, the original reservation is the VMA size and the
5185 * consumed reservations are stored in the map. Hence, nothing
5186 * else has to be done for private mappings here
5187 */
5188 if (!vma || vma->vm_flags & VM_MAYSHARE) {
5189 add = region_add(resv_map, from, to, regions_needed, h, h_cg);
5190
5191 if (unlikely(add < 0)) {
5192 hugetlb_acct_memory(h, -gbl_reserve);
5193 goto out_put_pages;
5194 } else if (unlikely(chg > add)) {
5195 /*
5196 * pages in this range were added to the reserve
5197 * map between region_chg and region_add. This
5198 * indicates a race with alloc_huge_page. Adjust
5199 * the subpool and reserve counts modified above
5200 * based on the difference.
5201 */
5202 long rsv_adjust;
5203
5204 /*
5205 * hugetlb_cgroup_uncharge_cgroup_rsvd() will put the
5206 * reference to h_cg->css. See comment below for detail.
5207 */
5208 hugetlb_cgroup_uncharge_cgroup_rsvd(
5209 hstate_index(h),
5210 (chg - add) * pages_per_huge_page(h), h_cg);
5211
5212 rsv_adjust = hugepage_subpool_put_pages(spool,
5213 chg - add);
5214 hugetlb_acct_memory(h, -rsv_adjust);
5215 } else if (h_cg) {
5216 /*
5217 * The file_regions will hold their own reference to
5218 * h_cg->css. So we should release the reference held
5219 * via hugetlb_cgroup_charge_cgroup_rsvd() when we are
5220 * done.
5221 */
> 5222 css_put(&h_cg->css);
5223 }
5224 }
5225 return true;
5226
5227 out_put_pages:
5228 /* put back original number of pages, chg */
5229 (void)hugepage_subpool_put_pages(spool, chg);
5230 out_uncharge_cgroup:
5231 hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
5232 chg * pages_per_huge_page(h), h_cg);
5233 out_err:
5234 if (!vma || vma->vm_flags & VM_MAYSHARE)
5235 /* Only call region_abort if the region_chg succeeded but the
5236 * region_add failed or didn't run.
5237 */
5238 if (chg >= 0 && add < 0)
5239 region_abort(resv_map, from, to, regions_needed);
5240 if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
5241 kref_put(&resv_map->refs, resv_map_release);
5242 return false;
5243 }
5244
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28791 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] hugetlb_cgroup: fix imbalanced css_get and css_put pair for shared mappings
2021-02-10 15:32 ` kernel test robot
@ 2021-03-01 11:27 ` Miaohe Lin
0 siblings, 0 replies; 4+ messages in thread
From: Miaohe Lin @ 2021-03-01 11:27 UTC (permalink / raw)
To: kernel test robot, akpm, mike.kravetz
Cc: kbuild-all, clang-built-linux, almasrymina, rientjes, linux-mm,
linux-kernel
On 2021/2/10 23:32, kernel test robot wrote:
> Hi Miaohe,
>
> Thank you for the patch! Yet something to improve:
>
> [auto build test ERROR on next-20210125]
> [also build test ERROR on v5.11-rc7]
> [cannot apply to linux/master linus/master hnaz-linux-mm/master v5.11-rc7 v5.11-rc6 v5.11-rc5]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url: https://github.com/0day-ci/linux/commits/Miaohe-Lin/hugetlb_cgroup-fix-imbalanced-css_get-and-css_put-pair-for-shared-mappings/20210210-171736
> base: 59fa6a163ffabc1bf25c5e0e33899e268a96d3cc
> config: arm64-randconfig-r025-20210209 (attached as .config)
> compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project c9439ca36342fb6013187d0a69aef92736951476)
> reproduce (this is a W=1 build):
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # install arm64 cross compiling tool for clang build
> # apt-get install binutils-aarch64-linux-gnu
> # https://github.com/0day-ci/linux/commit/68f4ed1b80aa7c51a921c3ad913ee7e6e00618d0
> git remote add linux-review https://github.com/0day-ci/linux
> git fetch --no-tags linux-review Miaohe-Lin/hugetlb_cgroup-fix-imbalanced-css_get-and-css_put-pair-for-shared-mappings/20210210-171736
> git checkout 68f4ed1b80aa7c51a921c3ad913ee7e6e00618d0
> # save the attached .config to linux build tree
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
>>> mm/hugetlb.c:5222:17: error: incomplete definition of type 'struct hugetlb_cgroup'
> css_put(&h_cg->css);
> ~~~~^
> include/linux/hugetlb_cgroup.h:20:8: note: forward declaration of 'struct hugetlb_cgroup'
> struct hugetlb_cgroup;
> ^
> 1 error generated.
>
Sorry for late respond. Will fix this. Thanks!
>
> vim +5222 mm/hugetlb.c
>
> 5091
> 5092 /* Return true if reservation was successful, false otherwise. */
> 5093 bool hugetlb_reserve_pages(struct inode *inode,
> 5094 long from, long to,
> 5095 struct vm_area_struct *vma,
> 5096 vm_flags_t vm_flags)
> 5097 {
> 5098 long chg, add = -1;
> 5099 struct hstate *h = hstate_inode(inode);
> 5100 struct hugepage_subpool *spool = subpool_inode(inode);
> 5101 struct resv_map *resv_map;
> 5102 struct hugetlb_cgroup *h_cg = NULL;
> 5103 long gbl_reserve, regions_needed = 0;
> 5104
> 5105 /* This should never happen */
> 5106 if (from > to) {
> 5107 VM_WARN(1, "%s called with a negative range\n", __func__);
> 5108 return false;
> 5109 }
> 5110
> 5111 /*
> 5112 * Only apply hugepage reservation if asked. At fault time, an
> 5113 * attempt will be made for VM_NORESERVE to allocate a page
> 5114 * without using reserves
> 5115 */
> 5116 if (vm_flags & VM_NORESERVE)
> 5117 return true;
> 5118
> 5119 /*
> 5120 * Shared mappings base their reservation on the number of pages that
> 5121 * are already allocated on behalf of the file. Private mappings need
> 5122 * to reserve the full area even if read-only as mprotect() may be
> 5123 * called to make the mapping read-write. Assume !vma is a shm mapping
> 5124 */
> 5125 if (!vma || vma->vm_flags & VM_MAYSHARE) {
> 5126 /*
> 5127 * resv_map can not be NULL as hugetlb_reserve_pages is only
> 5128 * called for inodes for which resv_maps were created (see
> 5129 * hugetlbfs_get_inode).
> 5130 */
> 5131 resv_map = inode_resv_map(inode);
> 5132
> 5133 chg = region_chg(resv_map, from, to, ®ions_needed);
> 5134
> 5135 } else {
> 5136 /* Private mapping. */
> 5137 resv_map = resv_map_alloc();
> 5138 if (!resv_map)
> 5139 return false;
> 5140
> 5141 chg = to - from;
> 5142
> 5143 set_vma_resv_map(vma, resv_map);
> 5144 set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
> 5145 }
> 5146
> 5147 if (chg < 0)
> 5148 goto out_err;
> 5149
> 5150 if (hugetlb_cgroup_charge_cgroup_rsvd(hstate_index(h),
> 5151 chg * pages_per_huge_page(h), &h_cg) < 0)
> 5152 goto out_err;
> 5153
> 5154 if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) {
> 5155 /* For private mappings, the hugetlb_cgroup uncharge info hangs
> 5156 * of the resv_map.
> 5157 */
> 5158 resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, h_cg, h);
> 5159 }
> 5160
> 5161 /*
> 5162 * There must be enough pages in the subpool for the mapping. If
> 5163 * the subpool has a minimum size, there may be some global
> 5164 * reservations already in place (gbl_reserve).
> 5165 */
> 5166 gbl_reserve = hugepage_subpool_get_pages(spool, chg);
> 5167 if (gbl_reserve < 0)
> 5168 goto out_uncharge_cgroup;
> 5169
> 5170 /*
> 5171 * Check enough hugepages are available for the reservation.
> 5172 * Hand the pages back to the subpool if there are not
> 5173 */
> 5174 if (hugetlb_acct_memory(h, gbl_reserve) < 0)
> 5175 goto out_put_pages;
> 5176
> 5177 /*
> 5178 * Account for the reservations made. Shared mappings record regions
> 5179 * that have reservations as they are shared by multiple VMAs.
> 5180 * When the last VMA disappears, the region map says how much
> 5181 * the reservation was and the page cache tells how much of
> 5182 * the reservation was consumed. Private mappings are per-VMA and
> 5183 * only the consumed reservations are tracked. When the VMA
> 5184 * disappears, the original reservation is the VMA size and the
> 5185 * consumed reservations are stored in the map. Hence, nothing
> 5186 * else has to be done for private mappings here
> 5187 */
> 5188 if (!vma || vma->vm_flags & VM_MAYSHARE) {
> 5189 add = region_add(resv_map, from, to, regions_needed, h, h_cg);
> 5190
> 5191 if (unlikely(add < 0)) {
> 5192 hugetlb_acct_memory(h, -gbl_reserve);
> 5193 goto out_put_pages;
> 5194 } else if (unlikely(chg > add)) {
> 5195 /*
> 5196 * pages in this range were added to the reserve
> 5197 * map between region_chg and region_add. This
> 5198 * indicates a race with alloc_huge_page. Adjust
> 5199 * the subpool and reserve counts modified above
> 5200 * based on the difference.
> 5201 */
> 5202 long rsv_adjust;
> 5203
> 5204 /*
> 5205 * hugetlb_cgroup_uncharge_cgroup_rsvd() will put the
> 5206 * reference to h_cg->css. See comment below for detail.
> 5207 */
> 5208 hugetlb_cgroup_uncharge_cgroup_rsvd(
> 5209 hstate_index(h),
> 5210 (chg - add) * pages_per_huge_page(h), h_cg);
> 5211
> 5212 rsv_adjust = hugepage_subpool_put_pages(spool,
> 5213 chg - add);
> 5214 hugetlb_acct_memory(h, -rsv_adjust);
> 5215 } else if (h_cg) {
> 5216 /*
> 5217 * The file_regions will hold their own reference to
> 5218 * h_cg->css. So we should release the reference held
> 5219 * via hugetlb_cgroup_charge_cgroup_rsvd() when we are
> 5220 * done.
> 5221 */
>> 5222 css_put(&h_cg->css);
> 5223 }
> 5224 }
> 5225 return true;
> 5226
> 5227 out_put_pages:
> 5228 /* put back original number of pages, chg */
> 5229 (void)hugepage_subpool_put_pages(spool, chg);
> 5230 out_uncharge_cgroup:
> 5231 hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
> 5232 chg * pages_per_huge_page(h), h_cg);
> 5233 out_err:
> 5234 if (!vma || vma->vm_flags & VM_MAYSHARE)
> 5235 /* Only call region_abort if the region_chg succeeded but the
> 5236 * region_add failed or didn't run.
> 5237 */
> 5238 if (chg >= 0 && add < 0)
> 5239 region_abort(resv_map, from, to, regions_needed);
> 5240 if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
> 5241 kref_put(&resv_map->refs, resv_map_release);
> 5242 return false;
> 5243 }
> 5244
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] hugetlb_cgroup: fix imbalanced css_get and css_put pair for shared mappings
2021-02-10 9:16 [PATCH] hugetlb_cgroup: fix imbalanced css_get and css_put pair for shared mappings Miaohe Lin
2021-02-10 15:32 ` kernel test robot
@ 2021-02-10 15:52 ` kernel test robot
1 sibling, 0 replies; 4+ messages in thread
From: kernel test robot @ 2021-02-10 15:52 UTC (permalink / raw)
To: Miaohe Lin, akpm, mike.kravetz
Cc: kbuild-all, almasrymina, rientjes, linux-mm, linux-kernel, linmiaohe
[-- Attachment #1: Type: text/plain, Size: 7491 bytes --]
Hi Miaohe,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on next-20210125]
[also build test ERROR on v5.11-rc7]
[cannot apply to linux/master linus/master hnaz-linux-mm/master v5.11-rc7 v5.11-rc6 v5.11-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Miaohe-Lin/hugetlb_cgroup-fix-imbalanced-css_get-and-css_put-pair-for-shared-mappings/20210210-171736
base: 59fa6a163ffabc1bf25c5e0e33899e268a96d3cc
config: x86_64-randconfig-c002-20210209 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce (this is a W=1 build):
# https://github.com/0day-ci/linux/commit/68f4ed1b80aa7c51a921c3ad913ee7e6e00618d0
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Miaohe-Lin/hugetlb_cgroup-fix-imbalanced-css_get-and-css_put-pair-for-shared-mappings/20210210-171736
git checkout 68f4ed1b80aa7c51a921c3ad913ee7e6e00618d0
# save the attached .config to linux build tree
make W=1 ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
mm/hugetlb.c: In function 'hugetlb_reserve_pages':
>> mm/hugetlb.c:5222:17: error: dereferencing pointer to incomplete type 'struct hugetlb_cgroup'
5222 | css_put(&h_cg->css);
| ^~
vim +5222 mm/hugetlb.c
5091
5092 /* Return true if reservation was successful, false otherwise. */
5093 bool hugetlb_reserve_pages(struct inode *inode,
5094 long from, long to,
5095 struct vm_area_struct *vma,
5096 vm_flags_t vm_flags)
5097 {
5098 long chg, add = -1;
5099 struct hstate *h = hstate_inode(inode);
5100 struct hugepage_subpool *spool = subpool_inode(inode);
5101 struct resv_map *resv_map;
5102 struct hugetlb_cgroup *h_cg = NULL;
5103 long gbl_reserve, regions_needed = 0;
5104
5105 /* This should never happen */
5106 if (from > to) {
5107 VM_WARN(1, "%s called with a negative range\n", __func__);
5108 return false;
5109 }
5110
5111 /*
5112 * Only apply hugepage reservation if asked. At fault time, an
5113 * attempt will be made for VM_NORESERVE to allocate a page
5114 * without using reserves
5115 */
5116 if (vm_flags & VM_NORESERVE)
5117 return true;
5118
5119 /*
5120 * Shared mappings base their reservation on the number of pages that
5121 * are already allocated on behalf of the file. Private mappings need
5122 * to reserve the full area even if read-only as mprotect() may be
5123 * called to make the mapping read-write. Assume !vma is a shm mapping
5124 */
5125 if (!vma || vma->vm_flags & VM_MAYSHARE) {
5126 /*
5127 * resv_map can not be NULL as hugetlb_reserve_pages is only
5128 * called for inodes for which resv_maps were created (see
5129 * hugetlbfs_get_inode).
5130 */
5131 resv_map = inode_resv_map(inode);
5132
5133 chg = region_chg(resv_map, from, to, ®ions_needed);
5134
5135 } else {
5136 /* Private mapping. */
5137 resv_map = resv_map_alloc();
5138 if (!resv_map)
5139 return false;
5140
5141 chg = to - from;
5142
5143 set_vma_resv_map(vma, resv_map);
5144 set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
5145 }
5146
5147 if (chg < 0)
5148 goto out_err;
5149
5150 if (hugetlb_cgroup_charge_cgroup_rsvd(hstate_index(h),
5151 chg * pages_per_huge_page(h), &h_cg) < 0)
5152 goto out_err;
5153
5154 if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) {
5155 /* For private mappings, the hugetlb_cgroup uncharge info hangs
5156 * of the resv_map.
5157 */
5158 resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, h_cg, h);
5159 }
5160
5161 /*
5162 * There must be enough pages in the subpool for the mapping. If
5163 * the subpool has a minimum size, there may be some global
5164 * reservations already in place (gbl_reserve).
5165 */
5166 gbl_reserve = hugepage_subpool_get_pages(spool, chg);
5167 if (gbl_reserve < 0)
5168 goto out_uncharge_cgroup;
5169
5170 /*
5171 * Check enough hugepages are available for the reservation.
5172 * Hand the pages back to the subpool if there are not
5173 */
5174 if (hugetlb_acct_memory(h, gbl_reserve) < 0)
5175 goto out_put_pages;
5176
5177 /*
5178 * Account for the reservations made. Shared mappings record regions
5179 * that have reservations as they are shared by multiple VMAs.
5180 * When the last VMA disappears, the region map says how much
5181 * the reservation was and the page cache tells how much of
5182 * the reservation was consumed. Private mappings are per-VMA and
5183 * only the consumed reservations are tracked. When the VMA
5184 * disappears, the original reservation is the VMA size and the
5185 * consumed reservations are stored in the map. Hence, nothing
5186 * else has to be done for private mappings here
5187 */
5188 if (!vma || vma->vm_flags & VM_MAYSHARE) {
5189 add = region_add(resv_map, from, to, regions_needed, h, h_cg);
5190
5191 if (unlikely(add < 0)) {
5192 hugetlb_acct_memory(h, -gbl_reserve);
5193 goto out_put_pages;
5194 } else if (unlikely(chg > add)) {
5195 /*
5196 * pages in this range were added to the reserve
5197 * map between region_chg and region_add. This
5198 * indicates a race with alloc_huge_page. Adjust
5199 * the subpool and reserve counts modified above
5200 * based on the difference.
5201 */
5202 long rsv_adjust;
5203
5204 /*
5205 * hugetlb_cgroup_uncharge_cgroup_rsvd() will put the
5206 * reference to h_cg->css. See comment below for detail.
5207 */
5208 hugetlb_cgroup_uncharge_cgroup_rsvd(
5209 hstate_index(h),
5210 (chg - add) * pages_per_huge_page(h), h_cg);
5211
5212 rsv_adjust = hugepage_subpool_put_pages(spool,
5213 chg - add);
5214 hugetlb_acct_memory(h, -rsv_adjust);
5215 } else if (h_cg) {
5216 /*
5217 * The file_regions will hold their own reference to
5218 * h_cg->css. So we should release the reference held
5219 * via hugetlb_cgroup_charge_cgroup_rsvd() when we are
5220 * done.
5221 */
> 5222 css_put(&h_cg->css);
5223 }
5224 }
5225 return true;
5226
5227 out_put_pages:
5228 /* put back original number of pages, chg */
5229 (void)hugepage_subpool_put_pages(spool, chg);
5230 out_uncharge_cgroup:
5231 hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
5232 chg * pages_per_huge_page(h), h_cg);
5233 out_err:
5234 if (!vma || vma->vm_flags & VM_MAYSHARE)
5235 /* Only call region_abort if the region_chg succeeded but the
5236 * region_add failed or didn't run.
5237 */
5238 if (chg >= 0 && add < 0)
5239 region_abort(resv_map, from, to, regions_needed);
5240 if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
5241 kref_put(&resv_map->refs, resv_map_release);
5242 return false;
5243 }
5244
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 37166 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-03-01 11:27 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-10 9:16 [PATCH] hugetlb_cgroup: fix imbalanced css_get and css_put pair for shared mappings Miaohe Lin
2021-02-10 15:32 ` kernel test robot
2021-03-01 11:27 ` Miaohe Lin
2021-02-10 15:52 ` kernel test robot
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).