All of lore.kernel.org
 help / color / mirror / Atom feed
From: yulei.kernel@gmail.com
To: akpm@linux-foundation.org, naoya.horiguchi@nec.com,
	viro@zeniv.linux.org.uk, pbonzini@redhat.com
Cc: linux-fsdevel@vger.kernel.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, xiaoguangrong.eric@gmail.com,
	kernellwp@gmail.com, lihaiwei.kernel@gmail.com,
	Yulei Zhang <yuleixzhang@tencent.com>,
	Xiao Guangrong <gloryxiao@tencent.com>
Subject: [PATCH 03/35] dmem: implement dmem memory management
Date: Thu,  8 Oct 2020 15:53:53 +0800	[thread overview]
Message-ID: <57408f6bd8122d915e46deed96a20a8ac6d90d9f.1602093760.git.yuleixzhang@tencent.com> (raw)
In-Reply-To: <cover.1602093760.git.yuleixzhang@tencent.com>
In-Reply-To: <cover.1602093760.git.yuleixzhang@tencent.com>

From: Yulei Zhang <yuleixzhang@tencent.com>

It introduces the interfaces to manage dmem pages that include:
  - dmem_region_register(), it registers the reserved memory to the
    dmem management system, later it can be allocated out for dmemfs

 - dmem_alloc_init(), initiate dmem allocator, note the page size the
   allocator used isn't the same thing with the alignment used to
   reserve dmem memory

 - dmem_alloc_pages_from_vma() and dmem_free_pages() are the interfaces
   allocating and freeing dmem memory, multiple pages can be allocated
   at one time, but it should be power of two

Signed-off-by: Xiao Guangrong <gloryxiao@tencent.com>
Signed-off-by: Yulei Zhang <yuleixzhang@tencent.com>
---
 include/linux/dmem.h |   3 +
 mm/dmem.c            | 674 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 677 insertions(+)

diff --git a/include/linux/dmem.h b/include/linux/dmem.h
index 5049322d941c..476a82e8f252 100644
--- a/include/linux/dmem.h
+++ b/include/linux/dmem.h
@@ -7,6 +7,9 @@ int dmem_reserve_init(void);
 void dmem_init(void);
 int dmem_region_register(int node, phys_addr_t start, phys_addr_t end);
 
+int dmem_alloc_init(unsigned long dpage_shift);
+void dmem_alloc_uinit(void);
+
 #else
 static inline int dmem_reserve_init(void)
 {
diff --git a/mm/dmem.c b/mm/dmem.c
index b5fb4f1b92db..a77a064c8d59 100644
--- a/mm/dmem.c
+++ b/mm/dmem.c
@@ -91,11 +91,38 @@ static struct dmem_pool dmem_pool = {
 	.lock = __MUTEX_INITIALIZER(dmem_pool.lock),
 };
 
+#define DMEM_PAGE_SIZE		(1UL << dmem_pool.dpage_shift)
+#define DMEM_PAGE_UP(x)		phys_to_dpage(((x) + DMEM_PAGE_SIZE - 1))
+#define DMEM_PAGE_DOWN(x)	phys_to_dpage(x)
+
+#define dpage_to_phys(_dpage)						\
+	((_dpage) << dmem_pool.dpage_shift)
+#define phys_to_dpage(_addr)						\
+	((_addr) >> dmem_pool.dpage_shift)
+
+#define dpage_to_pfn(_dpage)						\
+	(__phys_to_pfn(dpage_to_phys(_dpage)))
+#define pfn_to_dpage(_pfn)						\
+	(phys_to_dpage(__pfn_to_phys(_pfn)))
+
+#define dnode_to_nid(_dnode)						\
+	((_dnode) - dmem_pool.nodes)
+#define nid_to_dnode(nid)						\
+	(&dmem_pool.nodes[nid])
+
 #define for_each_dmem_node(_dnode)					\
 	for (_dnode = dmem_pool.nodes;					\
 		_dnode < dmem_pool.nodes + ARRAY_SIZE(dmem_pool.nodes);	\
 		_dnode++)
 
+#define for_each_dmem_region(_dnode, _dregion)				\
+	list_for_each_entry(_dregion, &(_dnode)->regions, node)
+
+static inline int *dmem_nodelist(int nid)
+{
+	return nid_to_dnode(nid)->nodelist;
+}
+
 void __init dmem_init(void)
 {
 	struct dmem_node *dnode;
@@ -135,3 +162,649 @@ int dmem_region_register(int node, phys_addr_t start, phys_addr_t end)
 	return 0;
 }
 
+#define PENALTY_FOR_DMEM_SHARED_NODE		(1)
+
+static int dmem_nodeload[MAX_NUMNODES] __initdata;
+
+/* Evaluate penalty for each dmem node */
+static int __init dmem_evaluate_node(int local, int node)
+{
+	int penalty;
+
+	/* Use the distance array to find the distance */
+	penalty = node_distance(local, node);
+
+	/* Penalize nodes under us ("prefer the next node") */
+	penalty += (node < local);
+
+	/* Give preference to headless and unused nodes */
+	if (!cpumask_empty(cpumask_of_node(node)))
+		penalty += PENALTY_FOR_NODE_WITH_CPUS;
+
+	/* Penalize dmem-node shared with kernel */
+	if (node_state(node, N_MEMORY))
+		penalty += PENALTY_FOR_DMEM_SHARED_NODE;
+
+	/* Slight preference for less loaded node */
+	penalty *= (nr_online_nodes * MAX_NUMNODES);
+
+	penalty += dmem_nodeload[node];
+
+	return penalty;
+}
+
+static int __init find_next_dmem_node(int local, nodemask_t *used_nodes)
+{
+	struct dmem_node *dnode;
+	int node, best_node = NUMA_NO_NODE;
+	int penalty, min_penalty = INT_MAX;
+
+	/* Invalid node is not suitable to call node_distance */
+	if (!node_state(local, N_POSSIBLE))
+		return NUMA_NO_NODE;
+
+	/* Use the local node if we haven't already */
+	if (!node_isset(local, *used_nodes)) {
+		node_set(local, *used_nodes);
+		return local;
+	}
+
+	for_each_dmem_node(dnode) {
+		if (list_empty(&dnode->regions))
+			continue;
+
+		node = dnode_to_nid(dnode);
+
+		/* Don't want a node to appear more than once */
+		if (node_isset(node, *used_nodes))
+			continue;
+
+		penalty = dmem_evaluate_node(local, node);
+
+		if (penalty < min_penalty) {
+			min_penalty = penalty;
+			best_node = node;
+		}
+	}
+
+	if (best_node >= 0)
+		node_set(best_node, *used_nodes);
+
+	return best_node;
+}
+
+static int __init dmem_node_init(struct dmem_node *dnode)
+{
+	int *nodelist;
+	nodemask_t used_nodes;
+	int local, node, prev;
+	int load;
+	int i = 0;
+
+	nodelist = dnode->nodelist;
+	nodes_clear(used_nodes);
+	local = dnode_to_nid(dnode);
+	prev = local;
+	load = nr_online_nodes;
+
+	while ((node = find_next_dmem_node(local, &used_nodes)) >= 0) {
+		/*
+		 * We don't want to pressure a particular node.
+		 * So adding penalty to the first node in same
+		 * distance group to make it round-robin.
+		 */
+		if (node_distance(local, node) != node_distance(local, prev))
+			dmem_nodeload[node] = load;
+
+		nodelist[i++] = prev = node;
+		load--;
+	}
+
+	return 0;
+}
+
+static void __init dmem_region_uinit(struct dmem_region *dregion)
+{
+	unsigned long nr_pages, size, *bitmap = dregion->error_bitmap;
+
+	if (!bitmap)
+		return;
+
+	nr_pages = __phys_to_pfn(dregion->reserved_end_addr)
+		- __phys_to_pfn(dregion->reserved_start_addr);
+
+	WARN_ON(!nr_pages);
+
+	size = BITS_TO_LONGS(nr_pages) * sizeof(long);
+	if (size > sizeof(dregion->static_bitmap))
+		kfree(bitmap);
+	dregion->error_bitmap = NULL;
+}
+
+/*
+ * we only stop allocator to use the reserved page and do not
+ * reture pages back if anything goes wrong
+ */
+static void __init dmem_uinit(void)
+{
+	struct dmem_region *dregion, *dr;
+	struct dmem_node *dnode;
+
+	for_each_dmem_node(dnode) {
+		dnode->nodelist[0] = NUMA_NO_NODE;
+		list_for_each_entry_safe(dregion, dr, &dnode->regions, node) {
+			dmem_region_uinit(dregion);
+			dregion->reserved_start_addr =
+				dregion->reserved_end_addr = 0;
+			list_del(&dregion->node);
+		}
+	}
+
+	dmem_pool.region_num = 0;
+	dmem_pool.registered_pages = 0;
+}
+
+static int __init dmem_region_init(struct dmem_region *dregion)
+{
+	unsigned long *bitmap, size, nr_pages;
+
+	nr_pages = __phys_to_pfn(dregion->reserved_end_addr)
+		- __phys_to_pfn(dregion->reserved_start_addr);
+
+	size = BITS_TO_LONGS(nr_pages) * sizeof(long);
+	if (size <= sizeof(dregion->static_error_bitmap)) {
+		bitmap = &dregion->static_error_bitmap;
+	} else {
+		bitmap = kzalloc(size, GFP_KERNEL);
+		if (!bitmap)
+			return -ENOMEM;
+	}
+	dregion->error_bitmap = bitmap;
+	return 0;
+}
+
+/*
+ * dmem memory is not 'struct page' backend, i.e, the kernel threats
+ * it as invalid pfn
+ */
+static int __init dmem_check_region(struct dmem_region *dregion)
+{
+	unsigned long pfn;
+
+	for (pfn = __phys_to_pfn(dregion->reserved_start_addr);
+	      pfn < __phys_to_pfn(dregion->reserved_end_addr); pfn++) {
+		if (!WARN_ON(pfn_valid(pfn)))
+			continue;
+
+		pr_err("dmem: check pfn %#lx failed, its memory was not properly reserved\n",
+			pfn);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init dmem_late_init(void)
+{
+	struct dmem_region *dregion;
+	struct dmem_node *dnode;
+	int ret;
+
+	for_each_dmem_node(dnode) {
+		dmem_node_init(dnode);
+
+		for_each_dmem_region(dnode, dregion) {
+			ret = dmem_region_init(dregion);
+			if (ret)
+				goto exit;
+			ret = dmem_check_region(dregion);
+			if (ret)
+				goto exit;
+		}
+	}
+	return ret;
+exit:
+	dmem_uinit();
+	return ret;
+}
+late_initcall(dmem_late_init);
+
+static int dmem_alloc_region_init(struct dmem_region *dregion,
+				  unsigned long *dpages)
+{
+	unsigned long start, end, *bitmap, size;
+
+	start = DMEM_PAGE_UP(dregion->reserved_start_addr);
+	end = DMEM_PAGE_DOWN(dregion->reserved_end_addr);
+
+	*dpages = end - start;
+	if (!*dpages)
+		return 0;
+
+	size = BITS_TO_LONGS(*dpages) * sizeof(long);
+	if (size <= sizeof(dregion->static_bitmap))
+		bitmap = &dregion->static_bitmap;
+	else {
+		bitmap = kzalloc(size, GFP_KERNEL);
+		if (!bitmap)
+			return -ENOMEM;
+	}
+
+	dregion->bitmap = bitmap;
+	dregion->next_free_pos = 0;
+	dregion->dpage_start_pfn = start;
+	dregion->dpage_end_pfn = end;
+
+	dmem_pool.unaligned_pages += __phys_to_pfn((dpage_to_phys(start)
+		- dregion->reserved_start_addr));
+	dmem_pool.unaligned_pages += __phys_to_pfn(dregion->reserved_end_addr
+		- dpage_to_phys(end));
+	return 0;
+}
+
+static bool dmem_dpage_is_error(struct dmem_region *dregion, phys_addr_t dpage)
+{
+	unsigned long valid_pages;
+	unsigned long pos_pfn, pos_offset;
+	unsigned long pages_per_dpage = DMEM_PAGE_SIZE >> PAGE_SHIFT;
+	phys_addr_t reserved_start_pfn;
+
+	reserved_start_pfn = __phys_to_pfn(dregion->reserved_start_addr);
+	valid_pages = dpage_to_pfn(dregion->dpage_end_pfn) - reserved_start_pfn;
+
+	pos_offset = dpage_to_pfn(dpage) - reserved_start_pfn;
+	pos_pfn = find_next_bit(dregion->error_bitmap, valid_pages, pos_offset);
+	if (pos_pfn < pos_offset + pages_per_dpage)
+		return true;
+	return false;
+}
+
+static unsigned long
+dmem_alloc_bitmap_clear(struct dmem_region *dregion, phys_addr_t dpage,
+			unsigned int dpages_nr)
+{
+	u64 pos = dpage - dregion->dpage_start_pfn;
+	unsigned int i;
+	unsigned long err_num = 0;
+
+	for (i = 0; i < dpages_nr; i++) {
+		if (dmem_dpage_is_error(dregion, dpage + i)) {
+			WARN_ON(!test_bit(pos + i, dregion->bitmap));
+			err_num++;
+		} else {
+			WARN_ON(!__test_and_clear_bit(pos + i,
+						      dregion->bitmap));
+		}
+	}
+	return err_num;
+}
+
+/* set or clear corresponding bit on allocation bitmap based on error bitmap */
+static unsigned long dregion_alloc_bitmap_set_clear(struct dmem_region *dregion,
+						    bool set)
+{
+	unsigned long pos_pfn, pos_offset;
+	unsigned long valid_pages, mce_dpages = 0;
+	phys_addr_t dpage, reserved_start_pfn;
+
+	reserved_start_pfn = __phys_to_pfn(dregion->reserved_start_addr);
+
+	valid_pages = dpage_to_pfn(dregion->dpage_end_pfn) - reserved_start_pfn;
+	pos_offset = dpage_to_pfn(dregion->dpage_start_pfn)
+		- reserved_start_pfn;
+try_set:
+	pos_pfn = find_next_bit(dregion->error_bitmap, valid_pages, pos_offset);
+
+	if (pos_pfn >= valid_pages)
+		return mce_dpages;
+	mce_dpages++;
+	dpage = pfn_to_dpage(pos_pfn + reserved_start_pfn);
+	if (set)
+		WARN_ON(__test_and_set_bit(dpage - dregion->dpage_start_pfn,
+					   dregion->bitmap));
+	else
+		WARN_ON(!__test_and_clear_bit(dpage - dregion->dpage_start_pfn,
+					      dregion->bitmap));
+	pos_offset = dpage_to_pfn(dpage + 1) - reserved_start_pfn;
+	goto try_set;
+}
+
+static void dmem_uinit_check_alloc_bitmap(struct dmem_region *dregion)
+{
+	unsigned long dpages, size;
+
+	dregion_alloc_bitmap_set_clear(dregion, false);
+
+	dpages = dregion->dpage_end_pfn - dregion->dpage_start_pfn;
+	size = BITS_TO_LONGS(dpages) * sizeof(long);
+	WARN_ON(!bitmap_empty(dregion->bitmap, size * BITS_PER_BYTE));
+}
+
+static void dmem_alloc_region_uinit(struct dmem_region *dregion)
+{
+	unsigned long dpages, size, *bitmap = dregion->bitmap;
+
+	if (!bitmap)
+		return;
+
+	dpages = dregion->dpage_end_pfn - dregion->dpage_start_pfn;
+	WARN_ON(!dpages);
+
+	dmem_uinit_check_alloc_bitmap(dregion);
+
+	size = BITS_TO_LONGS(dpages) * sizeof(long);
+	if (size > sizeof(dregion->static_bitmap))
+		kfree(bitmap);
+	dregion->bitmap = NULL;
+}
+
+static void __dmem_alloc_uinit(void)
+{
+	struct dmem_node *dnode;
+	struct dmem_region *dregion;
+
+	if (!dmem_pool.dpage_shift)
+		return;
+
+	dmem_pool.unaligned_pages = 0;
+
+	for_each_dmem_node(dnode) {
+		for_each_dmem_region(dnode, dregion)
+			dmem_alloc_region_uinit(dregion);
+
+		dnode->total_dpages = dnode->free_dpages = 0;
+	}
+
+	dmem_pool.dpage_shift = 0;
+	dmem_pool.total_dpages = dmem_pool.free_dpages = 0;
+}
+
+static void dnode_count_free_dpages(struct dmem_node *dnode, long dpages)
+{
+	dnode->free_dpages += dpages;
+	dmem_pool.free_dpages += dpages;
+}
+
+/*
+ * uninitialize dmem allocator
+ *
+ * all dpages should be freed before calling it
+ */
+void dmem_alloc_uinit(void)
+{
+	mutex_lock(&dmem_pool.lock);
+	if (!--dmem_pool.user_count)
+		__dmem_alloc_uinit();
+	mutex_unlock(&dmem_pool.lock);
+}
+EXPORT_SYMBOL(dmem_alloc_uinit);
+
+/*
+ * initialize dmem allocator
+ *   @dpage_shift: the shift bits of dmem page size used to manange
+ *      dmem memory, it should be CPU's nature page size at least
+ *
+ * Note: the page size the allocator used isn't the same thing with
+ *       the alignment used to reserve dmem memory
+ */
+int dmem_alloc_init(unsigned long dpage_shift)
+{
+	struct dmem_node *dnode;
+	struct dmem_region *dregion;
+	unsigned long dpages;
+	int ret = 0;
+
+	if (dpage_shift < PAGE_SHIFT)
+		return -EINVAL;
+
+	mutex_lock(&dmem_pool.lock);
+
+	if (dmem_pool.dpage_shift) {
+		/*
+		 * double init on the same page size is okay
+		 * to make the unit tests happy
+		 */
+		if (dmem_pool.dpage_shift != dpage_shift)
+			ret = -EBUSY;
+
+		goto exit;
+	}
+
+	dmem_pool.dpage_shift = dpage_shift;
+
+	for_each_dmem_node(dnode) {
+		for_each_dmem_region(dnode, dregion) {
+			ret = dmem_alloc_region_init(dregion, &dpages);
+			if (ret < 0) {
+				__dmem_alloc_uinit();
+				goto exit;
+			}
+
+			dnode_count_free_dpages(dnode, dpages);
+		}
+		dnode->total_dpages = dnode->free_dpages;
+	}
+
+	dmem_pool.total_dpages = dmem_pool.free_dpages;
+
+	if (dmem_pool.unaligned_pages && !ret)
+		pr_warn("dmem: %llu pages are wasted due to alignment\n",
+			(unsigned long long)dmem_pool.unaligned_pages);
+exit:
+	if (!ret)
+		dmem_pool.user_count++;
+
+	mutex_unlock(&dmem_pool.lock);
+	return ret;
+}
+EXPORT_SYMBOL(dmem_alloc_init);
+
+static phys_addr_t
+dmem_alloc_region_page(struct dmem_region *dregion, unsigned int try_max,
+		       unsigned int *result_nr)
+{
+	unsigned long pos, dpages;
+	unsigned int i;
+
+	/* no dpage is available in this region */
+	if (!dregion->bitmap)
+		return 0;
+
+	dpages = dregion->dpage_end_pfn - dregion->dpage_start_pfn;
+
+	/* no free page in this region */
+	if (dregion->next_free_pos >= dpages)
+		return 0;
+
+	pos = find_next_zero_bit(dregion->bitmap, dpages,
+				 dregion->next_free_pos);
+	if (pos >= dpages) {
+		dregion->next_free_pos = pos;
+		return 0;
+	}
+
+	__set_bit(pos, dregion->bitmap);
+
+	/* do not go beyond the region */
+	try_max = min(try_max, (unsigned int)(dpages - pos - 1));
+	for (i = 1; i < try_max; i++)
+		if (__test_and_set_bit(pos + i, dregion->bitmap))
+			break;
+
+	*result_nr = i;
+	dregion->next_free_pos = pos + *result_nr;
+	return dpage_to_phys(dregion->dpage_start_pfn + pos);
+}
+
+/*
+ * allocate dmem pages from the nodelist
+ *
+ *   @nodelist: dmem_node's nodelist
+ *   @nodemask: nodemask for filtering the dmem nodelist
+ *   @try_max: try to allocate @try_max dpages if possible
+ *   @result_nr: allocated dpage number returned to the caller
+ *
+ * return the physical address of the first dpage allocated from dmem
+ * pool, or 0 on failure. The allocated dpage number is filled into
+ * @result_nr
+ */
+static phys_addr_t
+dmem_alloc_pages_from_nodelist(int *nodelist, nodemask_t *nodemask,
+			       unsigned int try_max, unsigned int *result_nr)
+{
+	struct dmem_node *dnode;
+	struct dmem_region *dregion;
+	phys_addr_t addr = 0;
+	int node, i;
+	unsigned int local_result_nr;
+
+	WARN_ON(try_max > 1 && !result_nr);
+
+	if (!result_nr)
+		result_nr = &local_result_nr;
+
+	*result_nr = 0;
+
+	for (i = 0; !addr && i < ARRAY_SIZE(dnode->nodelist); i++) {
+		node = nodelist[i];
+
+		if (nodemask && !node_isset(node, *nodemask))
+			continue;
+
+		mutex_lock(&dmem_pool.lock);
+
+		WARN_ON(!dmem_pool.dpage_shift);
+
+		dnode = &dmem_pool.nodes[node];
+		for_each_dmem_region(dnode, dregion) {
+			addr = dmem_alloc_region_page(dregion, try_max,
+						      result_nr);
+			if (addr) {
+				dnode_count_free_dpages(dnode,
+							-(long)(*result_nr));
+				break;
+			}
+		}
+
+		mutex_unlock(&dmem_pool.lock);
+	}
+	return addr;
+}
+
+/*
+ * allocate a dmem page from the dmem pool and try to allocate more
+ * continuous dpages if @try_max is not less than 1
+ *
+ *   @nid: the NUMA node the dmem page got from
+ *   @nodemask: nodemask for filtering the dmem nodelist
+ *   @try_max: try to allocate @try_max dpages if possible
+ *   @result_nr: allocated dpage number returned to the caller
+ *
+ * return the physical address of the first dpage allocated from dmem
+ * pool, or 0 on failure. The allocated dpage number is filled into
+ * @result_nr
+ */
+phys_addr_t
+dmem_alloc_pages_nodemask(int nid, nodemask_t *nodemask, unsigned int try_max,
+			  unsigned int *result_nr)
+{
+	int *nodelist;
+
+	if (nid >= sizeof(ARRAY_SIZE(dmem_pool.nodes)))
+		return 0;
+
+	nodelist = dmem_nodelist(nid);
+	return dmem_alloc_pages_from_nodelist(nodelist, nodemask,
+					      try_max, result_nr);
+}
+EXPORT_SYMBOL(dmem_alloc_pages_nodemask);
+
+/*
+ * dmem_alloc_pages_vma - Allocate pages for a VMA.
+ *
+ *   @vma:  Pointer to VMA or NULL if not available.
+ *   @addr: Virtual Address of the allocation. Must be inside the VMA.
+ *   @try_max: try to allocate @try_max dpages if possible
+ *   @result_nr: allocated dpage number returned to the caller
+ *
+ * Return the physical address of the first dpage allocated from dmem
+ * pool, or 0 on failure. The allocated dpage number is filled into
+ * @result_nr
+ */
+phys_addr_t
+dmem_alloc_pages_vma(struct vm_area_struct *vma, unsigned long addr,
+		     unsigned int try_max, unsigned int *result_nr)
+{
+	phys_addr_t phys_addr;
+	int *nl;
+	unsigned int cpuset_mems_cookie;
+
+retry_cpuset:
+	nl = dmem_nodelist(numa_node_id());
+
+	phys_addr = dmem_alloc_pages_from_nodelist(nl, NULL, try_max,
+						   result_nr);
+	if (unlikely(!phys_addr && read_mems_allowed_retry(cpuset_mems_cookie)))
+		goto retry_cpuset;
+
+	return phys_addr;
+}
+EXPORT_SYMBOL(dmem_alloc_pages_vma);
+
+/*
+ * Don't need to call it in a lock.
+ * This function uses the reserved addresses those are initially registered
+ * and will not be modified at run time.
+ */
+static struct dmem_region *find_dmem_region(phys_addr_t phys_addr,
+					    struct dmem_node **pdnode)
+{
+	struct dmem_node *dnode;
+	struct dmem_region *dregion;
+
+	for_each_dmem_node(dnode)
+		for_each_dmem_region(dnode, dregion) {
+			if (dregion->reserved_start_addr > phys_addr)
+				continue;
+			if (dregion->reserved_end_addr <= phys_addr)
+				continue;
+
+			*pdnode = dnode;
+			return dregion;
+		}
+
+	return NULL;
+}
+
+/*
+ * free dmem page to the dmem pool
+ *   @addr: the physical addree will be freed
+ *   @dpage_nr: the number of dpage to be freed
+ */
+void dmem_free_pages(phys_addr_t addr, unsigned int dpages_nr)
+{
+	struct dmem_region *dregion;
+	struct dmem_node *pdnode = NULL;
+	phys_addr_t dpage = phys_to_dpage(addr);
+	u64 pos;
+	unsigned long err_dpages;
+
+	mutex_lock(&dmem_pool.lock);
+
+	WARN_ON(!dmem_pool.dpage_shift);
+
+	dregion = find_dmem_region(addr, &pdnode);
+	WARN_ON(!dregion || !dregion->bitmap || !pdnode);
+
+	pos = dpage - dregion->dpage_start_pfn;
+	dregion->next_free_pos = min(dregion->next_free_pos, pos);
+
+	/* it is not possible to span multiple regions */
+	WARN_ON(dpage + dpages_nr - 1 >= dregion->dpage_end_pfn);
+
+	err_dpages = dmem_alloc_bitmap_clear(dregion, dpage, dpages_nr);
+
+	dnode_count_free_dpages(pdnode, dpages_nr - err_dpages);
+	mutex_unlock(&dmem_pool.lock);
+}
+EXPORT_SYMBOL(dmem_free_pages);
-- 
2.28.0


  parent reply	other threads:[~2020-10-08  7:53 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-08  7:53 [PATCH 00/35] Enhance memory utilization with DMEMFS yulei.kernel
2020-10-08  7:53 ` [PATCH 01/35] fs: introduce dmemfs module yulei.kernel
2020-11-10 20:04   ` Al Viro
2020-11-11  8:53     ` yulei zhang
2020-11-11 23:09       ` Al Viro
2020-11-12 10:03         ` yulei zhang
2020-10-08  7:53 ` [PATCH 02/35] mm: support direct memory reservation yulei.kernel
2020-10-08 20:27   ` Randy Dunlap
2020-10-08 20:34   ` Randy Dunlap
2020-10-08  7:53 ` yulei.kernel [this message]
2020-10-08  7:53 ` [PATCH 04/35] dmem: let pat recognize dmem yulei.kernel
2020-10-13  7:27   ` Paolo Bonzini
2020-10-13  9:53     ` yulei zhang
2020-10-08  7:53 ` [PATCH 05/35] dmemfs: support mmap yulei.kernel
2020-10-08  7:53 ` [PATCH 06/35] dmemfs: support truncating inode down yulei.kernel
2020-10-08  7:53 ` [PATCH 07/35] dmem: trace core functions yulei.kernel
2020-10-08  7:53 ` [PATCH 08/35] dmem: show some statistic in debugfs yulei.kernel
2020-10-08 20:23   ` Randy Dunlap
2020-10-09 11:49     ` yulei zhang
2020-10-08  7:53 ` [PATCH 09/35] dmemfs: support remote access yulei.kernel
2020-10-08  7:54 ` [PATCH 10/35] dmemfs: introduce max_alloc_try_dpages parameter yulei.kernel
2020-10-08  7:54 ` [PATCH 11/35] mm: export mempolicy interfaces to serve dmem allocator yulei.kernel
2020-10-08  7:54 ` [PATCH 12/35] dmem: introduce mempolicy support yulei.kernel
2020-10-08  7:54 ` [PATCH 13/35] mm, dmem: introduce PFN_DMEM and pfn_t_dmem yulei.kernel
2020-10-08  7:54 ` [PATCH 14/35] mm, dmem: dmem-pmd vs thp-pmd yulei.kernel
2020-10-08  7:54 ` [PATCH 15/35] mm: add pmd_special() check for pmd_trans_huge_lock() yulei.kernel
2020-10-08  7:54 ` [PATCH 16/35] dmemfs: introduce ->split() to dmemfs_vm_ops yulei.kernel
2020-10-08  7:54 ` [PATCH 17/35] mm, dmemfs: support unmap_page_range() for dmemfs pmd yulei.kernel
2020-10-08  7:54 ` [PATCH 18/35] mm: follow_pmd_mask() for dmem huge pmd yulei.kernel
2020-10-08  7:54 ` [PATCH 19/35] mm: gup_huge_pmd() " yulei.kernel
2020-10-08  7:54 ` [PATCH 20/35] mm: support dmem huge pmd for vmf_insert_pfn_pmd() yulei.kernel
2020-10-08  7:54 ` [PATCH 21/35] mm: support dmem huge pmd for follow_pfn() yulei.kernel
2020-10-08  7:54 ` [PATCH 22/35] kvm, x86: Distinguish dmemfs page from mmio page yulei.kernel
2020-10-09  0:58   ` Sean Christopherson
2020-10-09 10:28     ` Joao Martins
2020-10-09 11:42       ` yulei zhang
2020-10-08  7:54 ` [PATCH 23/35] kvm, x86: introduce VM_DMEM yulei.kernel
2020-10-08  7:54 ` [PATCH 24/35] dmemfs: support hugepage for dmemfs yulei.kernel
2020-10-08  7:54 ` [PATCH 25/35] mm, x86, dmem: fix estimation of reserved page for vaddr_get_pfn() yulei.kernel
2020-10-08  7:54 ` [PATCH 26/35] mm, dmem: introduce pud_special() yulei.kernel
2020-10-08  7:54 ` [PATCH 27/35] mm: add pud_special() to support dmem huge pud yulei.kernel
2020-10-08  7:54 ` [PATCH 28/35] mm, dmemfs: support huge_fault() for dmemfs yulei.kernel
2020-10-08  7:54 ` [PATCH 29/35] mm: add follow_pte_pud() yulei.kernel
2020-10-08  7:54 ` [PATCH 30/35] dmem: introduce dmem_bitmap_alloc() and dmem_bitmap_free() yulei.kernel
2020-10-08  7:54 ` [PATCH 31/35] dmem: introduce mce handler yulei.kernel
2020-10-08  7:54 ` [PATCH 32/35] mm, dmemfs: register and handle the dmem mce yulei.kernel
2020-10-08  7:54 ` [PATCH 33/35] kvm, x86: temporary disable record_steal_time for dmem yulei.kernel
2020-10-08  7:54 ` [PATCH 34/35] dmem: add dmem unit tests yulei.kernel
2020-10-08  7:54 ` [PATCH 35/35] Add documentation for dmemfs yulei.kernel
2020-10-09  1:26   ` Randy Dunlap
2020-10-08 19:01 ` [PATCH 00/35] Enhance memory utilization with DMEMFS Joao Martins
2020-10-09 11:39   ` yulei zhang
2020-10-09 11:53     ` Joao Martins
2020-10-10  8:15       ` yulei zhang
2020-10-12 10:59         ` Joao Martins
2020-10-14 22:25           ` Dan Williams
2020-10-19 13:37             ` Paolo Bonzini
2020-10-19 19:03               ` Joao Martins
2020-10-20 15:22                 ` yulei zhang
2020-10-12 11:57 ` Zengtao (B)
2020-10-13  2:45   ` yulei zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=57408f6bd8122d915e46deed96a20a8ac6d90d9f.1602093760.git.yuleixzhang@tencent.com \
    --to=yulei.kernel@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=gloryxiao@tencent.com \
    --cc=kernellwp@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=lihaiwei.kernel@gmail.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=naoya.horiguchi@nec.com \
    --cc=pbonzini@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=xiaoguangrong.eric@gmail.com \
    --cc=yuleixzhang@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.