All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] iommu: Use put_pages_list
@ 2021-09-30 16:20 ` Matthew Wilcox (Oracle)
  0 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-09-30 16:20 UTC (permalink / raw)
  To: Joerg Roedel, Suravee Suthikulpanit, Will Deacon,
	David Woodhouse, Lu Baolu, iommu, linux-kernel
  Cc: Matthew Wilcox (Oracle)

page->freelist is for the use of slab.  We already have the ability
to free a list of pages in the core mm, but it requires the use of a
list_head and for the pages to be chained together through page->lru.
Switch the iommu code over to using free_pages_list().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 drivers/iommu/amd/io_pgtable.c | 99 +++++++++++++++-------------------
 drivers/iommu/dma-iommu.c      | 11 +---
 drivers/iommu/intel/iommu.c    | 89 +++++++++++-------------------
 include/linux/iommu.h          |  3 +-
 4 files changed, 77 insertions(+), 125 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 182c93a43efd..8dfa6ee58b76 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -74,49 +74,37 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
  *
  ****************************************************************************/
 
-static void free_page_list(struct page *freelist)
-{
-	while (freelist != NULL) {
-		unsigned long p = (unsigned long)page_address(freelist);
-
-		freelist = freelist->freelist;
-		free_page(p);
-	}
-}
-
-static struct page *free_pt_page(unsigned long pt, struct page *freelist)
+static void free_pt_page(unsigned long pt, struct list_head *list)
 {
 	struct page *p = virt_to_page((void *)pt);
 
-	p->freelist = freelist;
-
-	return p;
+	list_add_tail(&p->lru, list);
 }
 
 #define DEFINE_FREE_PT_FN(LVL, FN)						\
-static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)	\
-{										\
-	unsigned long p;							\
-	u64 *pt;								\
-	int i;									\
-										\
-	pt = (u64 *)__pt;							\
-										\
-	for (i = 0; i < 512; ++i) {						\
-		/* PTE present? */						\
-		if (!IOMMU_PTE_PRESENT(pt[i]))					\
-			continue;						\
-										\
-		/* Large PTE? */						\
-		if (PM_PTE_LEVEL(pt[i]) == 0 ||					\
-		    PM_PTE_LEVEL(pt[i]) == 7)					\
-			continue;						\
-										\
-		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);			\
-		freelist = FN(p, freelist);					\
-	}									\
-										\
-	return free_pt_page((unsigned long)pt, freelist);			\
+static void free_pt_##LVL (unsigned long __pt, struct list_head *list)	\
+{									\
+	unsigned long p;						\
+	u64 *pt;							\
+	int i;								\
+									\
+	pt = (u64 *)__pt;						\
+									\
+	for (i = 0; i < 512; ++i) {					\
+		/* PTE present? */					\
+		if (!IOMMU_PTE_PRESENT(pt[i]))				\
+			continue;					\
+									\
+		/* Large PTE? */					\
+		if (PM_PTE_LEVEL(pt[i]) == 0 ||				\
+		    PM_PTE_LEVEL(pt[i]) == 7)				\
+			continue;					\
+									\
+		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);		\
+		FN(p, list);						\
+	}								\
+									\
+	free_pt_page((unsigned long)pt, list);				\
 }
 
 DEFINE_FREE_PT_FN(l2, free_pt_page)
@@ -125,36 +113,33 @@ DEFINE_FREE_PT_FN(l4, free_pt_l3)
 DEFINE_FREE_PT_FN(l5, free_pt_l4)
 DEFINE_FREE_PT_FN(l6, free_pt_l5)
 
-static struct page *free_sub_pt(unsigned long root, int mode,
-				struct page *freelist)
+static void free_sub_pt(unsigned long root, int mode, struct list_head *list)
 {
 	switch (mode) {
 	case PAGE_MODE_NONE:
 	case PAGE_MODE_7_LEVEL:
 		break;
 	case PAGE_MODE_1_LEVEL:
-		freelist = free_pt_page(root, freelist);
+		free_pt_page(root, list);
 		break;
 	case PAGE_MODE_2_LEVEL:
-		freelist = free_pt_l2(root, freelist);
+		free_pt_l2(root, list);
 		break;
 	case PAGE_MODE_3_LEVEL:
-		freelist = free_pt_l3(root, freelist);
+		free_pt_l3(root, list);
 		break;
 	case PAGE_MODE_4_LEVEL:
-		freelist = free_pt_l4(root, freelist);
+		free_pt_l4(root, list);
 		break;
 	case PAGE_MODE_5_LEVEL:
-		freelist = free_pt_l5(root, freelist);
+		free_pt_l5(root, list);
 		break;
 	case PAGE_MODE_6_LEVEL:
-		freelist = free_pt_l6(root, freelist);
+		free_pt_l6(root, list);
 		break;
 	default:
 		BUG();
 	}
-
-	return freelist;
 }
 
 void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
@@ -362,7 +347,7 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
 	return pte;
 }
 
-static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
+static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *list)
 {
 	unsigned long pt;
 	int mode;
@@ -373,12 +358,12 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
 	}
 
 	if (!IOMMU_PTE_PRESENT(pteval))
-		return freelist;
+		return;
 
 	pt   = (unsigned long)IOMMU_PTE_PAGE(pteval);
 	mode = IOMMU_PTE_MODE(pteval);
 
-	return free_sub_pt(pt, mode, freelist);
+	free_sub_pt(pt, mode, list);
 }
 
 /*
@@ -392,7 +377,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
 			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
 	struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
-	struct page *freelist = NULL;
+	LIST_HEAD(freelist);
 	bool updated = false;
 	u64 __pte, *pte;
 	int ret, i, count;
@@ -412,9 +397,9 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
 		goto out;
 
 	for (i = 0; i < count; ++i)
-		freelist = free_clear_pte(&pte[i], pte[i], freelist);
+		free_clear_pte(&pte[i], pte[i], &freelist);
 
-	if (freelist != NULL)
+	if (!list_empty(&freelist))
 		updated = true;
 
 	if (count > 1) {
@@ -449,7 +434,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
 	}
 
 	/* Everything flushed out, free pages now */
-	free_page_list(freelist);
+	put_pages_list(&freelist);
 
 	return ret;
 }
@@ -511,7 +496,7 @@ static void v1_free_pgtable(struct io_pgtable *iop)
 {
 	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
 	struct protection_domain *dom;
-	struct page *freelist = NULL;
+	LIST_HEAD(freelist);
 	unsigned long root;
 
 	if (pgtable->mode == PAGE_MODE_NONE)
@@ -530,9 +515,9 @@ static void v1_free_pgtable(struct io_pgtable *iop)
 	       pgtable->mode > PAGE_MODE_6_LEVEL);
 
 	root = (unsigned long)pgtable->root;
-	freelist = free_sub_pt(root, pgtable->mode, freelist);
+	free_sub_pt(root, pgtable->mode, &freelist);
 
-	free_page_list(freelist);
+	put_pages_list(&freelist);
 }
 
 static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 896bea04c347..16742d9d8a1a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -66,14 +66,7 @@ early_param("iommu.forcedac", iommu_dma_forcedac_setup);
 
 static void iommu_dma_entry_dtor(unsigned long data)
 {
-	struct page *freelist = (struct page *)data;
-
-	while (freelist) {
-		unsigned long p = (unsigned long)page_address(freelist);
-
-		freelist = freelist->freelist;
-		free_page(p);
-	}
+	put_pages_list((struct list_head *)data);
 }
 
 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
@@ -481,7 +474,7 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 	else if (gather && gather->queued)
 		queue_iova(iovad, iova_pfn(iovad, iova),
 				size >> iova_shift(iovad),
-				(unsigned long)gather->freelist);
+				(unsigned long)&gather->freelist);
 	else
 		free_iova_fast(iovad, iova_pfn(iovad, iova),
 				size >> iova_shift(iovad));
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d75f59ae28e6..eaaff646e1b4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1186,35 +1186,30 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
    know the hardware page-walk will no longer touch them.
    The 'pte' argument is the *parent* PTE, pointing to the page that is to
    be freed. */
-static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
-					    int level, struct dma_pte *pte,
-					    struct page *freelist)
+static void dma_pte_list_pagetables(struct dmar_domain *domain,
+				    int level, struct dma_pte *pte,
+				    struct list_head *list)
 {
 	struct page *pg;
 
 	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
-	pg->freelist = freelist;
-	freelist = pg;
+	list_add_tail(&pg->lru, list);
 
 	if (level == 1)
-		return freelist;
+		return;
 
 	pte = page_address(pg);
 	do {
 		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
-			freelist = dma_pte_list_pagetables(domain, level - 1,
-							   pte, freelist);
+			dma_pte_list_pagetables(domain, level - 1, pte, list);
 		pte++;
 	} while (!first_pte_in_page(pte));
-
-	return freelist;
 }
 
-static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
-					struct dma_pte *pte, unsigned long pfn,
-					unsigned long start_pfn,
-					unsigned long last_pfn,
-					struct page *freelist)
+static void dma_pte_clear_level(struct dmar_domain *domain, int level,
+				struct dma_pte *pte, unsigned long pfn,
+				unsigned long start_pfn, unsigned long last_pfn,
+				struct list_head *list)
 {
 	struct dma_pte *first_pte = NULL, *last_pte = NULL;
 
@@ -1235,7 +1230,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
 			/* These suborbinate page tables are going away entirely. Don't
 			   bother to clear them; we're just going to *free* them. */
 			if (level > 1 && !dma_pte_superpage(pte))
-				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
+				dma_pte_list_pagetables(domain, level - 1, pte, list);
 
 			dma_clear_pte(pte);
 			if (!first_pte)
@@ -1243,10 +1238,10 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
 			last_pte = pte;
 		} else if (level > 1) {
 			/* Recurse down into a level that isn't *entirely* obsolete */
-			freelist = dma_pte_clear_level(domain, level - 1,
-						       phys_to_virt(dma_pte_addr(pte)),
-						       level_pfn, start_pfn, last_pfn,
-						       freelist);
+			dma_pte_clear_level(domain, level - 1,
+					    phys_to_virt(dma_pte_addr(pte)),
+					    level_pfn, start_pfn, last_pfn,
+					    list);
 		}
 next:
 		pfn += level_size(level);
@@ -1255,47 +1250,28 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
 	if (first_pte)
 		domain_flush_cache(domain, first_pte,
 				   (void *)++last_pte - (void *)first_pte);
-
-	return freelist;
 }
 
 /* We can't just free the pages because the IOMMU may still be walking
    the page tables, and may have cached the intermediate levels. The
    pages can only be freed after the IOTLB flush has been done. */
-static struct page *domain_unmap(struct dmar_domain *domain,
-				 unsigned long start_pfn,
-				 unsigned long last_pfn,
-				 struct page *freelist)
+static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
+			 unsigned long last_pfn, struct list_head *list)
 {
 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
 	BUG_ON(start_pfn > last_pfn);
 
 	/* we don't need lock here; nobody else touches the iova range */
-	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
-				       domain->pgd, 0, start_pfn, last_pfn,
-				       freelist);
+	dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
+			    domain->pgd, 0, start_pfn, last_pfn, list);
 
 	/* free pgd */
 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
 		struct page *pgd_page = virt_to_page(domain->pgd);
-		pgd_page->freelist = freelist;
-		freelist = pgd_page;
-
+		list_add_tail(&pgd_page->lru, list);
 		domain->pgd = NULL;
 	}
-
-	return freelist;
-}
-
-static void dma_free_pagelist(struct page *freelist)
-{
-	struct page *pg;
-
-	while ((pg = freelist)) {
-		freelist = pg->freelist;
-		free_pgtable_page(page_address(pg));
-	}
 }
 
 /* iommu handling */
@@ -1972,11 +1948,10 @@ static void domain_exit(struct dmar_domain *domain)
 	domain_remove_dev_info(domain);
 
 	if (domain->pgd) {
-		struct page *freelist;
+		LIST_HEAD(pages);
 
-		freelist = domain_unmap(domain, 0,
-					DOMAIN_MAX_PFN(domain->gaw), NULL);
-		dma_free_pagelist(freelist);
+		domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &pages);
+		put_pages_list(&pages);
 	}
 
 	free_domain_mem(domain);
@@ -4068,19 +4043,17 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
 		{
 			struct dmar_drhd_unit *drhd;
 			struct intel_iommu *iommu;
-			struct page *freelist;
+			LIST_HEAD(pages);
 
-			freelist = domain_unmap(si_domain,
-						start_vpfn, last_vpfn,
-						NULL);
+			domain_unmap(si_domain, start_vpfn, last_vpfn, &pages);
 
 			rcu_read_lock();
 			for_each_active_iommu(iommu, drhd)
 				iommu_flush_iotlb_psi(iommu, si_domain,
 					start_vpfn, mhp->nr_pages,
-					!freelist, 0);
+					list_empty(&pages), 0);
 			rcu_read_unlock();
-			dma_free_pagelist(freelist);
+			put_pages_list(&pages);
 		}
 		break;
 	}
@@ -5087,8 +5060,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 	start_pfn = iova >> VTD_PAGE_SHIFT;
 	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
 
-	gather->freelist = domain_unmap(dmar_domain, start_pfn,
-					last_pfn, gather->freelist);
+	domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
 
 	if (dmar_domain->max_addr == iova + size)
 		dmar_domain->max_addr = iova;
@@ -5124,9 +5096,10 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
 
 	for_each_domain_iommu(iommu_id, dmar_domain)
 		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
-				      start_pfn, nrpages, !gather->freelist, 0);
+				      start_pfn, nrpages,
+				      list_empty(&gather->freelist), 0);
 
-	dma_free_pagelist(gather->freelist);
+	put_pages_list(&gather->freelist);
 }
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d2f3435e7d17..de0c57a567c8 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -186,7 +186,7 @@ struct iommu_iotlb_gather {
 	unsigned long		start;
 	unsigned long		end;
 	size_t			pgsize;
-	struct page		*freelist;
+	struct list_head	freelist;
 	bool			queued;
 };
 
@@ -399,6 +399,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
 {
 	*gather = (struct iommu_iotlb_gather) {
 		.start	= ULONG_MAX,
+		.freelist = LIST_HEAD_INIT(gather->freelist),
 	};
 }
 
-- 
2.32.0
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC] iommu: Use put_pages_list
@ 2021-09-30 16:20 ` Matthew Wilcox (Oracle)
  0 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-09-30 16:20 UTC (permalink / raw)
  To: Joerg Roedel, Suravee Suthikulpanit, Will Deacon,
	David Woodhouse, Lu Baolu, iommu, linux-kernel
  Cc: Matthew Wilcox (Oracle)

page->freelist is for the use of slab.  We already have the ability
to free a list of pages in the core mm, but it requires the use of a
list_head and for the pages to be chained together through page->lru.
Switch the iommu code over to using free_pages_list().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 drivers/iommu/amd/io_pgtable.c | 99 +++++++++++++++-------------------
 drivers/iommu/dma-iommu.c      | 11 +---
 drivers/iommu/intel/iommu.c    | 89 +++++++++++-------------------
 include/linux/iommu.h          |  3 +-
 4 files changed, 77 insertions(+), 125 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 182c93a43efd..8dfa6ee58b76 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -74,49 +74,37 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
  *
  ****************************************************************************/
 
-static void free_page_list(struct page *freelist)
-{
-	while (freelist != NULL) {
-		unsigned long p = (unsigned long)page_address(freelist);
-
-		freelist = freelist->freelist;
-		free_page(p);
-	}
-}
-
-static struct page *free_pt_page(unsigned long pt, struct page *freelist)
+static void free_pt_page(unsigned long pt, struct list_head *list)
 {
 	struct page *p = virt_to_page((void *)pt);
 
-	p->freelist = freelist;
-
-	return p;
+	list_add_tail(&p->lru, list);
 }
 
 #define DEFINE_FREE_PT_FN(LVL, FN)						\
-static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)	\
-{										\
-	unsigned long p;							\
-	u64 *pt;								\
-	int i;									\
-										\
-	pt = (u64 *)__pt;							\
-										\
-	for (i = 0; i < 512; ++i) {						\
-		/* PTE present? */						\
-		if (!IOMMU_PTE_PRESENT(pt[i]))					\
-			continue;						\
-										\
-		/* Large PTE? */						\
-		if (PM_PTE_LEVEL(pt[i]) == 0 ||					\
-		    PM_PTE_LEVEL(pt[i]) == 7)					\
-			continue;						\
-										\
-		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);			\
-		freelist = FN(p, freelist);					\
-	}									\
-										\
-	return free_pt_page((unsigned long)pt, freelist);			\
+static void free_pt_##LVL (unsigned long __pt, struct list_head *list)	\
+{									\
+	unsigned long p;						\
+	u64 *pt;							\
+	int i;								\
+									\
+	pt = (u64 *)__pt;						\
+									\
+	for (i = 0; i < 512; ++i) {					\
+		/* PTE present? */					\
+		if (!IOMMU_PTE_PRESENT(pt[i]))				\
+			continue;					\
+									\
+		/* Large PTE? */					\
+		if (PM_PTE_LEVEL(pt[i]) == 0 ||				\
+		    PM_PTE_LEVEL(pt[i]) == 7)				\
+			continue;					\
+									\
+		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);		\
+		FN(p, list);						\
+	}								\
+									\
+	free_pt_page((unsigned long)pt, list);				\
 }
 
 DEFINE_FREE_PT_FN(l2, free_pt_page)
@@ -125,36 +113,33 @@ DEFINE_FREE_PT_FN(l4, free_pt_l3)
 DEFINE_FREE_PT_FN(l5, free_pt_l4)
 DEFINE_FREE_PT_FN(l6, free_pt_l5)
 
-static struct page *free_sub_pt(unsigned long root, int mode,
-				struct page *freelist)
+static void free_sub_pt(unsigned long root, int mode, struct list_head *list)
 {
 	switch (mode) {
 	case PAGE_MODE_NONE:
 	case PAGE_MODE_7_LEVEL:
 		break;
 	case PAGE_MODE_1_LEVEL:
-		freelist = free_pt_page(root, freelist);
+		free_pt_page(root, list);
 		break;
 	case PAGE_MODE_2_LEVEL:
-		freelist = free_pt_l2(root, freelist);
+		free_pt_l2(root, list);
 		break;
 	case PAGE_MODE_3_LEVEL:
-		freelist = free_pt_l3(root, freelist);
+		free_pt_l3(root, list);
 		break;
 	case PAGE_MODE_4_LEVEL:
-		freelist = free_pt_l4(root, freelist);
+		free_pt_l4(root, list);
 		break;
 	case PAGE_MODE_5_LEVEL:
-		freelist = free_pt_l5(root, freelist);
+		free_pt_l5(root, list);
 		break;
 	case PAGE_MODE_6_LEVEL:
-		freelist = free_pt_l6(root, freelist);
+		free_pt_l6(root, list);
 		break;
 	default:
 		BUG();
 	}
-
-	return freelist;
 }
 
 void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
@@ -362,7 +347,7 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
 	return pte;
 }
 
-static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
+static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *list)
 {
 	unsigned long pt;
 	int mode;
@@ -373,12 +358,12 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
 	}
 
 	if (!IOMMU_PTE_PRESENT(pteval))
-		return freelist;
+		return;
 
 	pt   = (unsigned long)IOMMU_PTE_PAGE(pteval);
 	mode = IOMMU_PTE_MODE(pteval);
 
-	return free_sub_pt(pt, mode, freelist);
+	free_sub_pt(pt, mode, list);
 }
 
 /*
@@ -392,7 +377,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
 			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
 	struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
-	struct page *freelist = NULL;
+	LIST_HEAD(freelist);
 	bool updated = false;
 	u64 __pte, *pte;
 	int ret, i, count;
@@ -412,9 +397,9 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
 		goto out;
 
 	for (i = 0; i < count; ++i)
-		freelist = free_clear_pte(&pte[i], pte[i], freelist);
+		free_clear_pte(&pte[i], pte[i], &freelist);
 
-	if (freelist != NULL)
+	if (!list_empty(&freelist))
 		updated = true;
 
 	if (count > 1) {
@@ -449,7 +434,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
 	}
 
 	/* Everything flushed out, free pages now */
-	free_page_list(freelist);
+	put_pages_list(&freelist);
 
 	return ret;
 }
@@ -511,7 +496,7 @@ static void v1_free_pgtable(struct io_pgtable *iop)
 {
 	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
 	struct protection_domain *dom;
-	struct page *freelist = NULL;
+	LIST_HEAD(freelist);
 	unsigned long root;
 
 	if (pgtable->mode == PAGE_MODE_NONE)
@@ -530,9 +515,9 @@ static void v1_free_pgtable(struct io_pgtable *iop)
 	       pgtable->mode > PAGE_MODE_6_LEVEL);
 
 	root = (unsigned long)pgtable->root;
-	freelist = free_sub_pt(root, pgtable->mode, freelist);
+	free_sub_pt(root, pgtable->mode, &freelist);
 
-	free_page_list(freelist);
+	put_pages_list(&freelist);
 }
 
 static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 896bea04c347..16742d9d8a1a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -66,14 +66,7 @@ early_param("iommu.forcedac", iommu_dma_forcedac_setup);
 
 static void iommu_dma_entry_dtor(unsigned long data)
 {
-	struct page *freelist = (struct page *)data;
-
-	while (freelist) {
-		unsigned long p = (unsigned long)page_address(freelist);
-
-		freelist = freelist->freelist;
-		free_page(p);
-	}
+	put_pages_list((struct list_head *)data);
 }
 
 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
@@ -481,7 +474,7 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 	else if (gather && gather->queued)
 		queue_iova(iovad, iova_pfn(iovad, iova),
 				size >> iova_shift(iovad),
-				(unsigned long)gather->freelist);
+				(unsigned long)&gather->freelist);
 	else
 		free_iova_fast(iovad, iova_pfn(iovad, iova),
 				size >> iova_shift(iovad));
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d75f59ae28e6..eaaff646e1b4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1186,35 +1186,30 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
    know the hardware page-walk will no longer touch them.
    The 'pte' argument is the *parent* PTE, pointing to the page that is to
    be freed. */
-static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
-					    int level, struct dma_pte *pte,
-					    struct page *freelist)
+static void dma_pte_list_pagetables(struct dmar_domain *domain,
+				    int level, struct dma_pte *pte,
+				    struct list_head *list)
 {
 	struct page *pg;
 
 	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
-	pg->freelist = freelist;
-	freelist = pg;
+	list_add_tail(&pg->lru, list);
 
 	if (level == 1)
-		return freelist;
+		return;
 
 	pte = page_address(pg);
 	do {
 		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
-			freelist = dma_pte_list_pagetables(domain, level - 1,
-							   pte, freelist);
+			dma_pte_list_pagetables(domain, level - 1, pte, list);
 		pte++;
 	} while (!first_pte_in_page(pte));
-
-	return freelist;
 }
 
-static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
-					struct dma_pte *pte, unsigned long pfn,
-					unsigned long start_pfn,
-					unsigned long last_pfn,
-					struct page *freelist)
+static void dma_pte_clear_level(struct dmar_domain *domain, int level,
+				struct dma_pte *pte, unsigned long pfn,
+				unsigned long start_pfn, unsigned long last_pfn,
+				struct list_head *list)
 {
 	struct dma_pte *first_pte = NULL, *last_pte = NULL;
 
@@ -1235,7 +1230,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
 			/* These suborbinate page tables are going away entirely. Don't
 			   bother to clear them; we're just going to *free* them. */
 			if (level > 1 && !dma_pte_superpage(pte))
-				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
+				dma_pte_list_pagetables(domain, level - 1, pte, list);
 
 			dma_clear_pte(pte);
 			if (!first_pte)
@@ -1243,10 +1238,10 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
 			last_pte = pte;
 		} else if (level > 1) {
 			/* Recurse down into a level that isn't *entirely* obsolete */
-			freelist = dma_pte_clear_level(domain, level - 1,
-						       phys_to_virt(dma_pte_addr(pte)),
-						       level_pfn, start_pfn, last_pfn,
-						       freelist);
+			dma_pte_clear_level(domain, level - 1,
+					    phys_to_virt(dma_pte_addr(pte)),
+					    level_pfn, start_pfn, last_pfn,
+					    list);
 		}
 next:
 		pfn += level_size(level);
@@ -1255,47 +1250,28 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
 	if (first_pte)
 		domain_flush_cache(domain, first_pte,
 				   (void *)++last_pte - (void *)first_pte);
-
-	return freelist;
 }
 
 /* We can't just free the pages because the IOMMU may still be walking
    the page tables, and may have cached the intermediate levels. The
    pages can only be freed after the IOTLB flush has been done. */
-static struct page *domain_unmap(struct dmar_domain *domain,
-				 unsigned long start_pfn,
-				 unsigned long last_pfn,
-				 struct page *freelist)
+static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
+			 unsigned long last_pfn, struct list_head *list)
 {
 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
 	BUG_ON(start_pfn > last_pfn);
 
 	/* we don't need lock here; nobody else touches the iova range */
-	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
-				       domain->pgd, 0, start_pfn, last_pfn,
-				       freelist);
+	dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
+			    domain->pgd, 0, start_pfn, last_pfn, list);
 
 	/* free pgd */
 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
 		struct page *pgd_page = virt_to_page(domain->pgd);
-		pgd_page->freelist = freelist;
-		freelist = pgd_page;
-
+		list_add_tail(&pgd_page->lru, list);
 		domain->pgd = NULL;
 	}
-
-	return freelist;
-}
-
-static void dma_free_pagelist(struct page *freelist)
-{
-	struct page *pg;
-
-	while ((pg = freelist)) {
-		freelist = pg->freelist;
-		free_pgtable_page(page_address(pg));
-	}
 }
 
 /* iommu handling */
@@ -1972,11 +1948,10 @@ static void domain_exit(struct dmar_domain *domain)
 	domain_remove_dev_info(domain);
 
 	if (domain->pgd) {
-		struct page *freelist;
+		LIST_HEAD(pages);
 
-		freelist = domain_unmap(domain, 0,
-					DOMAIN_MAX_PFN(domain->gaw), NULL);
-		dma_free_pagelist(freelist);
+		domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &pages);
+		put_pages_list(&pages);
 	}
 
 	free_domain_mem(domain);
@@ -4068,19 +4043,17 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
 		{
 			struct dmar_drhd_unit *drhd;
 			struct intel_iommu *iommu;
-			struct page *freelist;
+			LIST_HEAD(pages);
 
-			freelist = domain_unmap(si_domain,
-						start_vpfn, last_vpfn,
-						NULL);
+			domain_unmap(si_domain, start_vpfn, last_vpfn, &pages);
 
 			rcu_read_lock();
 			for_each_active_iommu(iommu, drhd)
 				iommu_flush_iotlb_psi(iommu, si_domain,
 					start_vpfn, mhp->nr_pages,
-					!freelist, 0);
+					list_empty(&pages), 0);
 			rcu_read_unlock();
-			dma_free_pagelist(freelist);
+			put_pages_list(&pages);
 		}
 		break;
 	}
@@ -5087,8 +5060,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 	start_pfn = iova >> VTD_PAGE_SHIFT;
 	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
 
-	gather->freelist = domain_unmap(dmar_domain, start_pfn,
-					last_pfn, gather->freelist);
+	domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
 
 	if (dmar_domain->max_addr == iova + size)
 		dmar_domain->max_addr = iova;
@@ -5124,9 +5096,10 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
 
 	for_each_domain_iommu(iommu_id, dmar_domain)
 		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
-				      start_pfn, nrpages, !gather->freelist, 0);
+				      start_pfn, nrpages,
+				      list_empty(&gather->freelist), 0);
 
-	dma_free_pagelist(gather->freelist);
+	put_pages_list(&gather->freelist);
 }
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d2f3435e7d17..de0c57a567c8 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -186,7 +186,7 @@ struct iommu_iotlb_gather {
 	unsigned long		start;
 	unsigned long		end;
 	size_t			pgsize;
-	struct page		*freelist;
+	struct list_head	freelist;
 	bool			queued;
 };
 
@@ -399,6 +399,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
 {
 	*gather = (struct iommu_iotlb_gather) {
 		.start	= ULONG_MAX,
+		.freelist = LIST_HEAD_INIT(gather->freelist),
 	};
 }
 
-- 
2.32.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC] iommu: Use put_pages_list
  2021-09-30 16:20 ` Matthew Wilcox (Oracle)
@ 2021-10-07 18:16   ` Matthew Wilcox
  -1 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox @ 2021-10-07 18:16 UTC (permalink / raw)
  To: Joerg Roedel, Suravee Suthikulpanit, Will Deacon,
	David Woodhouse, Lu Baolu, iommu, linux-kernel

ping?

On Thu, Sep 30, 2021 at 05:20:42PM +0100, Matthew Wilcox (Oracle) wrote:
> page->freelist is for the use of slab.  We already have the ability
> to free a list of pages in the core mm, but it requires the use of a
> list_head and for the pages to be chained together through page->lru.
> Switch the iommu code over to using free_pages_list().
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  drivers/iommu/amd/io_pgtable.c | 99 +++++++++++++++-------------------
>  drivers/iommu/dma-iommu.c      | 11 +---
>  drivers/iommu/intel/iommu.c    | 89 +++++++++++-------------------
>  include/linux/iommu.h          |  3 +-
>  4 files changed, 77 insertions(+), 125 deletions(-)
> 
> diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
> index 182c93a43efd..8dfa6ee58b76 100644
> --- a/drivers/iommu/amd/io_pgtable.c
> +++ b/drivers/iommu/amd/io_pgtable.c
> @@ -74,49 +74,37 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
>   *
>   ****************************************************************************/
>  
> -static void free_page_list(struct page *freelist)
> -{
> -	while (freelist != NULL) {
> -		unsigned long p = (unsigned long)page_address(freelist);
> -
> -		freelist = freelist->freelist;
> -		free_page(p);
> -	}
> -}
> -
> -static struct page *free_pt_page(unsigned long pt, struct page *freelist)
> +static void free_pt_page(unsigned long pt, struct list_head *list)
>  {
>  	struct page *p = virt_to_page((void *)pt);
>  
> -	p->freelist = freelist;
> -
> -	return p;
> +	list_add_tail(&p->lru, list);
>  }
>  
>  #define DEFINE_FREE_PT_FN(LVL, FN)						\
> -static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)	\
> -{										\
> -	unsigned long p;							\
> -	u64 *pt;								\
> -	int i;									\
> -										\
> -	pt = (u64 *)__pt;							\
> -										\
> -	for (i = 0; i < 512; ++i) {						\
> -		/* PTE present? */						\
> -		if (!IOMMU_PTE_PRESENT(pt[i]))					\
> -			continue;						\
> -										\
> -		/* Large PTE? */						\
> -		if (PM_PTE_LEVEL(pt[i]) == 0 ||					\
> -		    PM_PTE_LEVEL(pt[i]) == 7)					\
> -			continue;						\
> -										\
> -		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);			\
> -		freelist = FN(p, freelist);					\
> -	}									\
> -										\
> -	return free_pt_page((unsigned long)pt, freelist);			\
> +static void free_pt_##LVL (unsigned long __pt, struct list_head *list)	\
> +{									\
> +	unsigned long p;						\
> +	u64 *pt;							\
> +	int i;								\
> +									\
> +	pt = (u64 *)__pt;						\
> +									\
> +	for (i = 0; i < 512; ++i) {					\
> +		/* PTE present? */					\
> +		if (!IOMMU_PTE_PRESENT(pt[i]))				\
> +			continue;					\
> +									\
> +		/* Large PTE? */					\
> +		if (PM_PTE_LEVEL(pt[i]) == 0 ||				\
> +		    PM_PTE_LEVEL(pt[i]) == 7)				\
> +			continue;					\
> +									\
> +		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);		\
> +		FN(p, list);						\
> +	}								\
> +									\
> +	free_pt_page((unsigned long)pt, list);				\
>  }
>  
>  DEFINE_FREE_PT_FN(l2, free_pt_page)
> @@ -125,36 +113,33 @@ DEFINE_FREE_PT_FN(l4, free_pt_l3)
>  DEFINE_FREE_PT_FN(l5, free_pt_l4)
>  DEFINE_FREE_PT_FN(l6, free_pt_l5)
>  
> -static struct page *free_sub_pt(unsigned long root, int mode,
> -				struct page *freelist)
> +static void free_sub_pt(unsigned long root, int mode, struct list_head *list)
>  {
>  	switch (mode) {
>  	case PAGE_MODE_NONE:
>  	case PAGE_MODE_7_LEVEL:
>  		break;
>  	case PAGE_MODE_1_LEVEL:
> -		freelist = free_pt_page(root, freelist);
> +		free_pt_page(root, list);
>  		break;
>  	case PAGE_MODE_2_LEVEL:
> -		freelist = free_pt_l2(root, freelist);
> +		free_pt_l2(root, list);
>  		break;
>  	case PAGE_MODE_3_LEVEL:
> -		freelist = free_pt_l3(root, freelist);
> +		free_pt_l3(root, list);
>  		break;
>  	case PAGE_MODE_4_LEVEL:
> -		freelist = free_pt_l4(root, freelist);
> +		free_pt_l4(root, list);
>  		break;
>  	case PAGE_MODE_5_LEVEL:
> -		freelist = free_pt_l5(root, freelist);
> +		free_pt_l5(root, list);
>  		break;
>  	case PAGE_MODE_6_LEVEL:
> -		freelist = free_pt_l6(root, freelist);
> +		free_pt_l6(root, list);
>  		break;
>  	default:
>  		BUG();
>  	}
> -
> -	return freelist;
>  }
>  
>  void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
> @@ -362,7 +347,7 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
>  	return pte;
>  }
>  
> -static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
> +static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *list)
>  {
>  	unsigned long pt;
>  	int mode;
> @@ -373,12 +358,12 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
>  	}
>  
>  	if (!IOMMU_PTE_PRESENT(pteval))
> -		return freelist;
> +		return;
>  
>  	pt   = (unsigned long)IOMMU_PTE_PAGE(pteval);
>  	mode = IOMMU_PTE_MODE(pteval);
>  
> -	return free_sub_pt(pt, mode, freelist);
> +	free_sub_pt(pt, mode, list);
>  }
>  
>  /*
> @@ -392,7 +377,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
>  			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
>  {
>  	struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
> -	struct page *freelist = NULL;
> +	LIST_HEAD(freelist);
>  	bool updated = false;
>  	u64 __pte, *pte;
>  	int ret, i, count;
> @@ -412,9 +397,9 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
>  		goto out;
>  
>  	for (i = 0; i < count; ++i)
> -		freelist = free_clear_pte(&pte[i], pte[i], freelist);
> +		free_clear_pte(&pte[i], pte[i], &freelist);
>  
> -	if (freelist != NULL)
> +	if (!list_empty(&freelist))
>  		updated = true;
>  
>  	if (count > 1) {
> @@ -449,7 +434,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
>  	}
>  
>  	/* Everything flushed out, free pages now */
> -	free_page_list(freelist);
> +	put_pages_list(&freelist);
>  
>  	return ret;
>  }
> @@ -511,7 +496,7 @@ static void v1_free_pgtable(struct io_pgtable *iop)
>  {
>  	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
>  	struct protection_domain *dom;
> -	struct page *freelist = NULL;
> +	LIST_HEAD(freelist);
>  	unsigned long root;
>  
>  	if (pgtable->mode == PAGE_MODE_NONE)
> @@ -530,9 +515,9 @@ static void v1_free_pgtable(struct io_pgtable *iop)
>  	       pgtable->mode > PAGE_MODE_6_LEVEL);
>  
>  	root = (unsigned long)pgtable->root;
> -	freelist = free_sub_pt(root, pgtable->mode, freelist);
> +	free_sub_pt(root, pgtable->mode, &freelist);
>  
> -	free_page_list(freelist);
> +	put_pages_list(&freelist);
>  }
>  
>  static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 896bea04c347..16742d9d8a1a 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -66,14 +66,7 @@ early_param("iommu.forcedac", iommu_dma_forcedac_setup);
>  
>  static void iommu_dma_entry_dtor(unsigned long data)
>  {
> -	struct page *freelist = (struct page *)data;
> -
> -	while (freelist) {
> -		unsigned long p = (unsigned long)page_address(freelist);
> -
> -		freelist = freelist->freelist;
> -		free_page(p);
> -	}
> +	put_pages_list((struct list_head *)data);
>  }
>  
>  static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
> @@ -481,7 +474,7 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
>  	else if (gather && gather->queued)
>  		queue_iova(iovad, iova_pfn(iovad, iova),
>  				size >> iova_shift(iovad),
> -				(unsigned long)gather->freelist);
> +				(unsigned long)&gather->freelist);
>  	else
>  		free_iova_fast(iovad, iova_pfn(iovad, iova),
>  				size >> iova_shift(iovad));
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index d75f59ae28e6..eaaff646e1b4 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -1186,35 +1186,30 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
>     know the hardware page-walk will no longer touch them.
>     The 'pte' argument is the *parent* PTE, pointing to the page that is to
>     be freed. */
> -static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
> -					    int level, struct dma_pte *pte,
> -					    struct page *freelist)
> +static void dma_pte_list_pagetables(struct dmar_domain *domain,
> +				    int level, struct dma_pte *pte,
> +				    struct list_head *list)
>  {
>  	struct page *pg;
>  
>  	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
> -	pg->freelist = freelist;
> -	freelist = pg;
> +	list_add_tail(&pg->lru, list);
>  
>  	if (level == 1)
> -		return freelist;
> +		return;
>  
>  	pte = page_address(pg);
>  	do {
>  		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
> -			freelist = dma_pte_list_pagetables(domain, level - 1,
> -							   pte, freelist);
> +			dma_pte_list_pagetables(domain, level - 1, pte, list);
>  		pte++;
>  	} while (!first_pte_in_page(pte));
> -
> -	return freelist;
>  }
>  
> -static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
> -					struct dma_pte *pte, unsigned long pfn,
> -					unsigned long start_pfn,
> -					unsigned long last_pfn,
> -					struct page *freelist)
> +static void dma_pte_clear_level(struct dmar_domain *domain, int level,
> +				struct dma_pte *pte, unsigned long pfn,
> +				unsigned long start_pfn, unsigned long last_pfn,
> +				struct list_head *list)
>  {
>  	struct dma_pte *first_pte = NULL, *last_pte = NULL;
>  
> @@ -1235,7 +1230,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
>  			/* These suborbinate page tables are going away entirely. Don't
>  			   bother to clear them; we're just going to *free* them. */
>  			if (level > 1 && !dma_pte_superpage(pte))
> -				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
> +				dma_pte_list_pagetables(domain, level - 1, pte, list);
>  
>  			dma_clear_pte(pte);
>  			if (!first_pte)
> @@ -1243,10 +1238,10 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
>  			last_pte = pte;
>  		} else if (level > 1) {
>  			/* Recurse down into a level that isn't *entirely* obsolete */
> -			freelist = dma_pte_clear_level(domain, level - 1,
> -						       phys_to_virt(dma_pte_addr(pte)),
> -						       level_pfn, start_pfn, last_pfn,
> -						       freelist);
> +			dma_pte_clear_level(domain, level - 1,
> +					    phys_to_virt(dma_pte_addr(pte)),
> +					    level_pfn, start_pfn, last_pfn,
> +					    list);
>  		}
>  next:
>  		pfn += level_size(level);
> @@ -1255,47 +1250,28 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
>  	if (first_pte)
>  		domain_flush_cache(domain, first_pte,
>  				   (void *)++last_pte - (void *)first_pte);
> -
> -	return freelist;
>  }
>  
>  /* We can't just free the pages because the IOMMU may still be walking
>     the page tables, and may have cached the intermediate levels. The
>     pages can only be freed after the IOTLB flush has been done. */
> -static struct page *domain_unmap(struct dmar_domain *domain,
> -				 unsigned long start_pfn,
> -				 unsigned long last_pfn,
> -				 struct page *freelist)
> +static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
> +			 unsigned long last_pfn, struct list_head *list)
>  {
>  	BUG_ON(!domain_pfn_supported(domain, start_pfn));
>  	BUG_ON(!domain_pfn_supported(domain, last_pfn));
>  	BUG_ON(start_pfn > last_pfn);
>  
>  	/* we don't need lock here; nobody else touches the iova range */
> -	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
> -				       domain->pgd, 0, start_pfn, last_pfn,
> -				       freelist);
> +	dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
> +			    domain->pgd, 0, start_pfn, last_pfn, list);
>  
>  	/* free pgd */
>  	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
>  		struct page *pgd_page = virt_to_page(domain->pgd);
> -		pgd_page->freelist = freelist;
> -		freelist = pgd_page;
> -
> +		list_add_tail(&pgd_page->lru, list);
>  		domain->pgd = NULL;
>  	}
> -
> -	return freelist;
> -}
> -
> -static void dma_free_pagelist(struct page *freelist)
> -{
> -	struct page *pg;
> -
> -	while ((pg = freelist)) {
> -		freelist = pg->freelist;
> -		free_pgtable_page(page_address(pg));
> -	}
>  }
>  
>  /* iommu handling */
> @@ -1972,11 +1948,10 @@ static void domain_exit(struct dmar_domain *domain)
>  	domain_remove_dev_info(domain);
>  
>  	if (domain->pgd) {
> -		struct page *freelist;
> +		LIST_HEAD(pages);
>  
> -		freelist = domain_unmap(domain, 0,
> -					DOMAIN_MAX_PFN(domain->gaw), NULL);
> -		dma_free_pagelist(freelist);
> +		domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &pages);
> +		put_pages_list(&pages);
>  	}
>  
>  	free_domain_mem(domain);
> @@ -4068,19 +4043,17 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
>  		{
>  			struct dmar_drhd_unit *drhd;
>  			struct intel_iommu *iommu;
> -			struct page *freelist;
> +			LIST_HEAD(pages);
>  
> -			freelist = domain_unmap(si_domain,
> -						start_vpfn, last_vpfn,
> -						NULL);
> +			domain_unmap(si_domain, start_vpfn, last_vpfn, &pages);
>  
>  			rcu_read_lock();
>  			for_each_active_iommu(iommu, drhd)
>  				iommu_flush_iotlb_psi(iommu, si_domain,
>  					start_vpfn, mhp->nr_pages,
> -					!freelist, 0);
> +					list_empty(&pages), 0);
>  			rcu_read_unlock();
> -			dma_free_pagelist(freelist);
> +			put_pages_list(&pages);
>  		}
>  		break;
>  	}
> @@ -5087,8 +5060,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
>  	start_pfn = iova >> VTD_PAGE_SHIFT;
>  	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
>  
> -	gather->freelist = domain_unmap(dmar_domain, start_pfn,
> -					last_pfn, gather->freelist);
> +	domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
>  
>  	if (dmar_domain->max_addr == iova + size)
>  		dmar_domain->max_addr = iova;
> @@ -5124,9 +5096,10 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
>  
>  	for_each_domain_iommu(iommu_id, dmar_domain)
>  		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
> -				      start_pfn, nrpages, !gather->freelist, 0);
> +				      start_pfn, nrpages,
> +				      list_empty(&gather->freelist), 0);
>  
> -	dma_free_pagelist(gather->freelist);
> +	put_pages_list(&gather->freelist);
>  }
>  
>  static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index d2f3435e7d17..de0c57a567c8 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -186,7 +186,7 @@ struct iommu_iotlb_gather {
>  	unsigned long		start;
>  	unsigned long		end;
>  	size_t			pgsize;
> -	struct page		*freelist;
> +	struct list_head	freelist;
>  	bool			queued;
>  };
>  
> @@ -399,6 +399,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
>  {
>  	*gather = (struct iommu_iotlb_gather) {
>  		.start	= ULONG_MAX,
> +		.freelist = LIST_HEAD_INIT(gather->freelist),
>  	};
>  }
>  
> -- 
> 2.32.0

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] iommu: Use put_pages_list
@ 2021-10-07 18:16   ` Matthew Wilcox
  0 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox @ 2021-10-07 18:16 UTC (permalink / raw)
  To: Joerg Roedel, Suravee Suthikulpanit, Will Deacon,
	David Woodhouse, Lu Baolu, iommu, linux-kernel

ping?

On Thu, Sep 30, 2021 at 05:20:42PM +0100, Matthew Wilcox (Oracle) wrote:
> page->freelist is for the use of slab.  We already have the ability
> to free a list of pages in the core mm, but it requires the use of a
> list_head and for the pages to be chained together through page->lru.
> Switch the iommu code over to using free_pages_list().
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  drivers/iommu/amd/io_pgtable.c | 99 +++++++++++++++-------------------
>  drivers/iommu/dma-iommu.c      | 11 +---
>  drivers/iommu/intel/iommu.c    | 89 +++++++++++-------------------
>  include/linux/iommu.h          |  3 +-
>  4 files changed, 77 insertions(+), 125 deletions(-)
> 
> diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
> index 182c93a43efd..8dfa6ee58b76 100644
> --- a/drivers/iommu/amd/io_pgtable.c
> +++ b/drivers/iommu/amd/io_pgtable.c
> @@ -74,49 +74,37 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
>   *
>   ****************************************************************************/
>  
> -static void free_page_list(struct page *freelist)
> -{
> -	while (freelist != NULL) {
> -		unsigned long p = (unsigned long)page_address(freelist);
> -
> -		freelist = freelist->freelist;
> -		free_page(p);
> -	}
> -}
> -
> -static struct page *free_pt_page(unsigned long pt, struct page *freelist)
> +static void free_pt_page(unsigned long pt, struct list_head *list)
>  {
>  	struct page *p = virt_to_page((void *)pt);
>  
> -	p->freelist = freelist;
> -
> -	return p;
> +	list_add_tail(&p->lru, list);
>  }
>  
>  #define DEFINE_FREE_PT_FN(LVL, FN)						\
> -static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)	\
> -{										\
> -	unsigned long p;							\
> -	u64 *pt;								\
> -	int i;									\
> -										\
> -	pt = (u64 *)__pt;							\
> -										\
> -	for (i = 0; i < 512; ++i) {						\
> -		/* PTE present? */						\
> -		if (!IOMMU_PTE_PRESENT(pt[i]))					\
> -			continue;						\
> -										\
> -		/* Large PTE? */						\
> -		if (PM_PTE_LEVEL(pt[i]) == 0 ||					\
> -		    PM_PTE_LEVEL(pt[i]) == 7)					\
> -			continue;						\
> -										\
> -		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);			\
> -		freelist = FN(p, freelist);					\
> -	}									\
> -										\
> -	return free_pt_page((unsigned long)pt, freelist);			\
> +static void free_pt_##LVL (unsigned long __pt, struct list_head *list)	\
> +{									\
> +	unsigned long p;						\
> +	u64 *pt;							\
> +	int i;								\
> +									\
> +	pt = (u64 *)__pt;						\
> +									\
> +	for (i = 0; i < 512; ++i) {					\
> +		/* PTE present? */					\
> +		if (!IOMMU_PTE_PRESENT(pt[i]))				\
> +			continue;					\
> +									\
> +		/* Large PTE? */					\
> +		if (PM_PTE_LEVEL(pt[i]) == 0 ||				\
> +		    PM_PTE_LEVEL(pt[i]) == 7)				\
> +			continue;					\
> +									\
> +		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);		\
> +		FN(p, list);						\
> +	}								\
> +									\
> +	free_pt_page((unsigned long)pt, list);				\
>  }
>  
>  DEFINE_FREE_PT_FN(l2, free_pt_page)
> @@ -125,36 +113,33 @@ DEFINE_FREE_PT_FN(l4, free_pt_l3)
>  DEFINE_FREE_PT_FN(l5, free_pt_l4)
>  DEFINE_FREE_PT_FN(l6, free_pt_l5)
>  
> -static struct page *free_sub_pt(unsigned long root, int mode,
> -				struct page *freelist)
> +static void free_sub_pt(unsigned long root, int mode, struct list_head *list)
>  {
>  	switch (mode) {
>  	case PAGE_MODE_NONE:
>  	case PAGE_MODE_7_LEVEL:
>  		break;
>  	case PAGE_MODE_1_LEVEL:
> -		freelist = free_pt_page(root, freelist);
> +		free_pt_page(root, list);
>  		break;
>  	case PAGE_MODE_2_LEVEL:
> -		freelist = free_pt_l2(root, freelist);
> +		free_pt_l2(root, list);
>  		break;
>  	case PAGE_MODE_3_LEVEL:
> -		freelist = free_pt_l3(root, freelist);
> +		free_pt_l3(root, list);
>  		break;
>  	case PAGE_MODE_4_LEVEL:
> -		freelist = free_pt_l4(root, freelist);
> +		free_pt_l4(root, list);
>  		break;
>  	case PAGE_MODE_5_LEVEL:
> -		freelist = free_pt_l5(root, freelist);
> +		free_pt_l5(root, list);
>  		break;
>  	case PAGE_MODE_6_LEVEL:
> -		freelist = free_pt_l6(root, freelist);
> +		free_pt_l6(root, list);
>  		break;
>  	default:
>  		BUG();
>  	}
> -
> -	return freelist;
>  }
>  
>  void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
> @@ -362,7 +347,7 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
>  	return pte;
>  }
>  
> -static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
> +static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *list)
>  {
>  	unsigned long pt;
>  	int mode;
> @@ -373,12 +358,12 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
>  	}
>  
>  	if (!IOMMU_PTE_PRESENT(pteval))
> -		return freelist;
> +		return;
>  
>  	pt   = (unsigned long)IOMMU_PTE_PAGE(pteval);
>  	mode = IOMMU_PTE_MODE(pteval);
>  
> -	return free_sub_pt(pt, mode, freelist);
> +	free_sub_pt(pt, mode, list);
>  }
>  
>  /*
> @@ -392,7 +377,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
>  			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
>  {
>  	struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
> -	struct page *freelist = NULL;
> +	LIST_HEAD(freelist);
>  	bool updated = false;
>  	u64 __pte, *pte;
>  	int ret, i, count;
> @@ -412,9 +397,9 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
>  		goto out;
>  
>  	for (i = 0; i < count; ++i)
> -		freelist = free_clear_pte(&pte[i], pte[i], freelist);
> +		free_clear_pte(&pte[i], pte[i], &freelist);
>  
> -	if (freelist != NULL)
> +	if (!list_empty(&freelist))
>  		updated = true;
>  
>  	if (count > 1) {
> @@ -449,7 +434,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
>  	}
>  
>  	/* Everything flushed out, free pages now */
> -	free_page_list(freelist);
> +	put_pages_list(&freelist);
>  
>  	return ret;
>  }
> @@ -511,7 +496,7 @@ static void v1_free_pgtable(struct io_pgtable *iop)
>  {
>  	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
>  	struct protection_domain *dom;
> -	struct page *freelist = NULL;
> +	LIST_HEAD(freelist);
>  	unsigned long root;
>  
>  	if (pgtable->mode == PAGE_MODE_NONE)
> @@ -530,9 +515,9 @@ static void v1_free_pgtable(struct io_pgtable *iop)
>  	       pgtable->mode > PAGE_MODE_6_LEVEL);
>  
>  	root = (unsigned long)pgtable->root;
> -	freelist = free_sub_pt(root, pgtable->mode, freelist);
> +	free_sub_pt(root, pgtable->mode, &freelist);
>  
> -	free_page_list(freelist);
> +	put_pages_list(&freelist);
>  }
>  
>  static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 896bea04c347..16742d9d8a1a 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -66,14 +66,7 @@ early_param("iommu.forcedac", iommu_dma_forcedac_setup);
>  
>  static void iommu_dma_entry_dtor(unsigned long data)
>  {
> -	struct page *freelist = (struct page *)data;
> -
> -	while (freelist) {
> -		unsigned long p = (unsigned long)page_address(freelist);
> -
> -		freelist = freelist->freelist;
> -		free_page(p);
> -	}
> +	put_pages_list((struct list_head *)data);
>  }
>  
>  static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
> @@ -481,7 +474,7 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
>  	else if (gather && gather->queued)
>  		queue_iova(iovad, iova_pfn(iovad, iova),
>  				size >> iova_shift(iovad),
> -				(unsigned long)gather->freelist);
> +				(unsigned long)&gather->freelist);
>  	else
>  		free_iova_fast(iovad, iova_pfn(iovad, iova),
>  				size >> iova_shift(iovad));
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index d75f59ae28e6..eaaff646e1b4 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -1186,35 +1186,30 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
>     know the hardware page-walk will no longer touch them.
>     The 'pte' argument is the *parent* PTE, pointing to the page that is to
>     be freed. */
> -static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
> -					    int level, struct dma_pte *pte,
> -					    struct page *freelist)
> +static void dma_pte_list_pagetables(struct dmar_domain *domain,
> +				    int level, struct dma_pte *pte,
> +				    struct list_head *list)
>  {
>  	struct page *pg;
>  
>  	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
> -	pg->freelist = freelist;
> -	freelist = pg;
> +	list_add_tail(&pg->lru, list);
>  
>  	if (level == 1)
> -		return freelist;
> +		return;
>  
>  	pte = page_address(pg);
>  	do {
>  		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
> -			freelist = dma_pte_list_pagetables(domain, level - 1,
> -							   pte, freelist);
> +			dma_pte_list_pagetables(domain, level - 1, pte, list);
>  		pte++;
>  	} while (!first_pte_in_page(pte));
> -
> -	return freelist;
>  }
>  
> -static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
> -					struct dma_pte *pte, unsigned long pfn,
> -					unsigned long start_pfn,
> -					unsigned long last_pfn,
> -					struct page *freelist)
> +static void dma_pte_clear_level(struct dmar_domain *domain, int level,
> +				struct dma_pte *pte, unsigned long pfn,
> +				unsigned long start_pfn, unsigned long last_pfn,
> +				struct list_head *list)
>  {
>  	struct dma_pte *first_pte = NULL, *last_pte = NULL;
>  
> @@ -1235,7 +1230,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
>  			/* These suborbinate page tables are going away entirely. Don't
>  			   bother to clear them; we're just going to *free* them. */
>  			if (level > 1 && !dma_pte_superpage(pte))
> -				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
> +				dma_pte_list_pagetables(domain, level - 1, pte, list);
>  
>  			dma_clear_pte(pte);
>  			if (!first_pte)
> @@ -1243,10 +1238,10 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
>  			last_pte = pte;
>  		} else if (level > 1) {
>  			/* Recurse down into a level that isn't *entirely* obsolete */
> -			freelist = dma_pte_clear_level(domain, level - 1,
> -						       phys_to_virt(dma_pte_addr(pte)),
> -						       level_pfn, start_pfn, last_pfn,
> -						       freelist);
> +			dma_pte_clear_level(domain, level - 1,
> +					    phys_to_virt(dma_pte_addr(pte)),
> +					    level_pfn, start_pfn, last_pfn,
> +					    list);
>  		}
>  next:
>  		pfn += level_size(level);
> @@ -1255,47 +1250,28 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
>  	if (first_pte)
>  		domain_flush_cache(domain, first_pte,
>  				   (void *)++last_pte - (void *)first_pte);
> -
> -	return freelist;
>  }
>  
>  /* We can't just free the pages because the IOMMU may still be walking
>     the page tables, and may have cached the intermediate levels. The
>     pages can only be freed after the IOTLB flush has been done. */
> -static struct page *domain_unmap(struct dmar_domain *domain,
> -				 unsigned long start_pfn,
> -				 unsigned long last_pfn,
> -				 struct page *freelist)
> +static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
> +			 unsigned long last_pfn, struct list_head *list)
>  {
>  	BUG_ON(!domain_pfn_supported(domain, start_pfn));
>  	BUG_ON(!domain_pfn_supported(domain, last_pfn));
>  	BUG_ON(start_pfn > last_pfn);
>  
>  	/* we don't need lock here; nobody else touches the iova range */
> -	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
> -				       domain->pgd, 0, start_pfn, last_pfn,
> -				       freelist);
> +	dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
> +			    domain->pgd, 0, start_pfn, last_pfn, list);
>  
>  	/* free pgd */
>  	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
>  		struct page *pgd_page = virt_to_page(domain->pgd);
> -		pgd_page->freelist = freelist;
> -		freelist = pgd_page;
> -
> +		list_add_tail(&pgd_page->lru, list);
>  		domain->pgd = NULL;
>  	}
> -
> -	return freelist;
> -}
> -
> -static void dma_free_pagelist(struct page *freelist)
> -{
> -	struct page *pg;
> -
> -	while ((pg = freelist)) {
> -		freelist = pg->freelist;
> -		free_pgtable_page(page_address(pg));
> -	}
>  }
>  
>  /* iommu handling */
> @@ -1972,11 +1948,10 @@ static void domain_exit(struct dmar_domain *domain)
>  	domain_remove_dev_info(domain);
>  
>  	if (domain->pgd) {
> -		struct page *freelist;
> +		LIST_HEAD(pages);
>  
> -		freelist = domain_unmap(domain, 0,
> -					DOMAIN_MAX_PFN(domain->gaw), NULL);
> -		dma_free_pagelist(freelist);
> +		domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &pages);
> +		put_pages_list(&pages);
>  	}
>  
>  	free_domain_mem(domain);
> @@ -4068,19 +4043,17 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
>  		{
>  			struct dmar_drhd_unit *drhd;
>  			struct intel_iommu *iommu;
> -			struct page *freelist;
> +			LIST_HEAD(pages);
>  
> -			freelist = domain_unmap(si_domain,
> -						start_vpfn, last_vpfn,
> -						NULL);
> +			domain_unmap(si_domain, start_vpfn, last_vpfn, &pages);
>  
>  			rcu_read_lock();
>  			for_each_active_iommu(iommu, drhd)
>  				iommu_flush_iotlb_psi(iommu, si_domain,
>  					start_vpfn, mhp->nr_pages,
> -					!freelist, 0);
> +					list_empty(&pages), 0);
>  			rcu_read_unlock();
> -			dma_free_pagelist(freelist);
> +			put_pages_list(&pages);
>  		}
>  		break;
>  	}
> @@ -5087,8 +5060,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
>  	start_pfn = iova >> VTD_PAGE_SHIFT;
>  	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
>  
> -	gather->freelist = domain_unmap(dmar_domain, start_pfn,
> -					last_pfn, gather->freelist);
> +	domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
>  
>  	if (dmar_domain->max_addr == iova + size)
>  		dmar_domain->max_addr = iova;
> @@ -5124,9 +5096,10 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
>  
>  	for_each_domain_iommu(iommu_id, dmar_domain)
>  		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
> -				      start_pfn, nrpages, !gather->freelist, 0);
> +				      start_pfn, nrpages,
> +				      list_empty(&gather->freelist), 0);
>  
> -	dma_free_pagelist(gather->freelist);
> +	put_pages_list(&gather->freelist);
>  }
>  
>  static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index d2f3435e7d17..de0c57a567c8 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -186,7 +186,7 @@ struct iommu_iotlb_gather {
>  	unsigned long		start;
>  	unsigned long		end;
>  	size_t			pgsize;
> -	struct page		*freelist;
> +	struct list_head	freelist;
>  	bool			queued;
>  };
>  
> @@ -399,6 +399,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
>  {
>  	*gather = (struct iommu_iotlb_gather) {
>  		.start	= ULONG_MAX,
> +		.freelist = LIST_HEAD_INIT(gather->freelist),
>  	};
>  }
>  
> -- 
> 2.32.0
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] iommu: Use put_pages_list
  2021-10-07 18:16   ` Matthew Wilcox
@ 2021-10-14 11:20     ` Matthew Wilcox
  -1 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox @ 2021-10-14 11:20 UTC (permalink / raw)
  To: Joerg Roedel, Suravee Suthikulpanit, Will Deacon,
	David Woodhouse, Lu Baolu, iommu, linux-kernel

I'm going to keep pinging this patch weekly.

On Thu, Oct 07, 2021 at 07:17:02PM +0100, Matthew Wilcox wrote:
> ping?
> 
> On Thu, Sep 30, 2021 at 05:20:42PM +0100, Matthew Wilcox (Oracle) wrote:
> > page->freelist is for the use of slab.  We already have the ability
> > to free a list of pages in the core mm, but it requires the use of a
> > list_head and for the pages to be chained together through page->lru.
> > Switch the iommu code over to using free_pages_list().
> > 
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> >  drivers/iommu/amd/io_pgtable.c | 99 +++++++++++++++-------------------
> >  drivers/iommu/dma-iommu.c      | 11 +---
> >  drivers/iommu/intel/iommu.c    | 89 +++++++++++-------------------
> >  include/linux/iommu.h          |  3 +-
> >  4 files changed, 77 insertions(+), 125 deletions(-)
> > 
> > diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
> > index 182c93a43efd..8dfa6ee58b76 100644
> > --- a/drivers/iommu/amd/io_pgtable.c
> > +++ b/drivers/iommu/amd/io_pgtable.c
> > @@ -74,49 +74,37 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
> >   *
> >   ****************************************************************************/
> >  
> > -static void free_page_list(struct page *freelist)
> > -{
> > -	while (freelist != NULL) {
> > -		unsigned long p = (unsigned long)page_address(freelist);
> > -
> > -		freelist = freelist->freelist;
> > -		free_page(p);
> > -	}
> > -}
> > -
> > -static struct page *free_pt_page(unsigned long pt, struct page *freelist)
> > +static void free_pt_page(unsigned long pt, struct list_head *list)
> >  {
> >  	struct page *p = virt_to_page((void *)pt);
> >  
> > -	p->freelist = freelist;
> > -
> > -	return p;
> > +	list_add_tail(&p->lru, list);
> >  }
> >  
> >  #define DEFINE_FREE_PT_FN(LVL, FN)						\
> > -static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)	\
> > -{										\
> > -	unsigned long p;							\
> > -	u64 *pt;								\
> > -	int i;									\
> > -										\
> > -	pt = (u64 *)__pt;							\
> > -										\
> > -	for (i = 0; i < 512; ++i) {						\
> > -		/* PTE present? */						\
> > -		if (!IOMMU_PTE_PRESENT(pt[i]))					\
> > -			continue;						\
> > -										\
> > -		/* Large PTE? */						\
> > -		if (PM_PTE_LEVEL(pt[i]) == 0 ||					\
> > -		    PM_PTE_LEVEL(pt[i]) == 7)					\
> > -			continue;						\
> > -										\
> > -		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);			\
> > -		freelist = FN(p, freelist);					\
> > -	}									\
> > -										\
> > -	return free_pt_page((unsigned long)pt, freelist);			\
> > +static void free_pt_##LVL (unsigned long __pt, struct list_head *list)	\
> > +{									\
> > +	unsigned long p;						\
> > +	u64 *pt;							\
> > +	int i;								\
> > +									\
> > +	pt = (u64 *)__pt;						\
> > +									\
> > +	for (i = 0; i < 512; ++i) {					\
> > +		/* PTE present? */					\
> > +		if (!IOMMU_PTE_PRESENT(pt[i]))				\
> > +			continue;					\
> > +									\
> > +		/* Large PTE? */					\
> > +		if (PM_PTE_LEVEL(pt[i]) == 0 ||				\
> > +		    PM_PTE_LEVEL(pt[i]) == 7)				\
> > +			continue;					\
> > +									\
> > +		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);		\
> > +		FN(p, list);						\
> > +	}								\
> > +									\
> > +	free_pt_page((unsigned long)pt, list);				\
> >  }
> >  
> >  DEFINE_FREE_PT_FN(l2, free_pt_page)
> > @@ -125,36 +113,33 @@ DEFINE_FREE_PT_FN(l4, free_pt_l3)
> >  DEFINE_FREE_PT_FN(l5, free_pt_l4)
> >  DEFINE_FREE_PT_FN(l6, free_pt_l5)
> >  
> > -static struct page *free_sub_pt(unsigned long root, int mode,
> > -				struct page *freelist)
> > +static void free_sub_pt(unsigned long root, int mode, struct list_head *list)
> >  {
> >  	switch (mode) {
> >  	case PAGE_MODE_NONE:
> >  	case PAGE_MODE_7_LEVEL:
> >  		break;
> >  	case PAGE_MODE_1_LEVEL:
> > -		freelist = free_pt_page(root, freelist);
> > +		free_pt_page(root, list);
> >  		break;
> >  	case PAGE_MODE_2_LEVEL:
> > -		freelist = free_pt_l2(root, freelist);
> > +		free_pt_l2(root, list);
> >  		break;
> >  	case PAGE_MODE_3_LEVEL:
> > -		freelist = free_pt_l3(root, freelist);
> > +		free_pt_l3(root, list);
> >  		break;
> >  	case PAGE_MODE_4_LEVEL:
> > -		freelist = free_pt_l4(root, freelist);
> > +		free_pt_l4(root, list);
> >  		break;
> >  	case PAGE_MODE_5_LEVEL:
> > -		freelist = free_pt_l5(root, freelist);
> > +		free_pt_l5(root, list);
> >  		break;
> >  	case PAGE_MODE_6_LEVEL:
> > -		freelist = free_pt_l6(root, freelist);
> > +		free_pt_l6(root, list);
> >  		break;
> >  	default:
> >  		BUG();
> >  	}
> > -
> > -	return freelist;
> >  }
> >  
> >  void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
> > @@ -362,7 +347,7 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
> >  	return pte;
> >  }
> >  
> > -static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
> > +static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *list)
> >  {
> >  	unsigned long pt;
> >  	int mode;
> > @@ -373,12 +358,12 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
> >  	}
> >  
> >  	if (!IOMMU_PTE_PRESENT(pteval))
> > -		return freelist;
> > +		return;
> >  
> >  	pt   = (unsigned long)IOMMU_PTE_PAGE(pteval);
> >  	mode = IOMMU_PTE_MODE(pteval);
> >  
> > -	return free_sub_pt(pt, mode, freelist);
> > +	free_sub_pt(pt, mode, list);
> >  }
> >  
> >  /*
> > @@ -392,7 +377,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
> >  			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
> >  {
> >  	struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
> > -	struct page *freelist = NULL;
> > +	LIST_HEAD(freelist);
> >  	bool updated = false;
> >  	u64 __pte, *pte;
> >  	int ret, i, count;
> > @@ -412,9 +397,9 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
> >  		goto out;
> >  
> >  	for (i = 0; i < count; ++i)
> > -		freelist = free_clear_pte(&pte[i], pte[i], freelist);
> > +		free_clear_pte(&pte[i], pte[i], &freelist);
> >  
> > -	if (freelist != NULL)
> > +	if (!list_empty(&freelist))
> >  		updated = true;
> >  
> >  	if (count > 1) {
> > @@ -449,7 +434,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
> >  	}
> >  
> >  	/* Everything flushed out, free pages now */
> > -	free_page_list(freelist);
> > +	put_pages_list(&freelist);
> >  
> >  	return ret;
> >  }
> > @@ -511,7 +496,7 @@ static void v1_free_pgtable(struct io_pgtable *iop)
> >  {
> >  	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
> >  	struct protection_domain *dom;
> > -	struct page *freelist = NULL;
> > +	LIST_HEAD(freelist);
> >  	unsigned long root;
> >  
> >  	if (pgtable->mode == PAGE_MODE_NONE)
> > @@ -530,9 +515,9 @@ static void v1_free_pgtable(struct io_pgtable *iop)
> >  	       pgtable->mode > PAGE_MODE_6_LEVEL);
> >  
> >  	root = (unsigned long)pgtable->root;
> > -	freelist = free_sub_pt(root, pgtable->mode, freelist);
> > +	free_sub_pt(root, pgtable->mode, &freelist);
> >  
> > -	free_page_list(freelist);
> > +	put_pages_list(&freelist);
> >  }
> >  
> >  static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index 896bea04c347..16742d9d8a1a 100644
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -66,14 +66,7 @@ early_param("iommu.forcedac", iommu_dma_forcedac_setup);
> >  
> >  static void iommu_dma_entry_dtor(unsigned long data)
> >  {
> > -	struct page *freelist = (struct page *)data;
> > -
> > -	while (freelist) {
> > -		unsigned long p = (unsigned long)page_address(freelist);
> > -
> > -		freelist = freelist->freelist;
> > -		free_page(p);
> > -	}
> > +	put_pages_list((struct list_head *)data);
> >  }
> >  
> >  static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
> > @@ -481,7 +474,7 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
> >  	else if (gather && gather->queued)
> >  		queue_iova(iovad, iova_pfn(iovad, iova),
> >  				size >> iova_shift(iovad),
> > -				(unsigned long)gather->freelist);
> > +				(unsigned long)&gather->freelist);
> >  	else
> >  		free_iova_fast(iovad, iova_pfn(iovad, iova),
> >  				size >> iova_shift(iovad));
> > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> > index d75f59ae28e6..eaaff646e1b4 100644
> > --- a/drivers/iommu/intel/iommu.c
> > +++ b/drivers/iommu/intel/iommu.c
> > @@ -1186,35 +1186,30 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
> >     know the hardware page-walk will no longer touch them.
> >     The 'pte' argument is the *parent* PTE, pointing to the page that is to
> >     be freed. */
> > -static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
> > -					    int level, struct dma_pte *pte,
> > -					    struct page *freelist)
> > +static void dma_pte_list_pagetables(struct dmar_domain *domain,
> > +				    int level, struct dma_pte *pte,
> > +				    struct list_head *list)
> >  {
> >  	struct page *pg;
> >  
> >  	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
> > -	pg->freelist = freelist;
> > -	freelist = pg;
> > +	list_add_tail(&pg->lru, list);
> >  
> >  	if (level == 1)
> > -		return freelist;
> > +		return;
> >  
> >  	pte = page_address(pg);
> >  	do {
> >  		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
> > -			freelist = dma_pte_list_pagetables(domain, level - 1,
> > -							   pte, freelist);
> > +			dma_pte_list_pagetables(domain, level - 1, pte, list);
> >  		pte++;
> >  	} while (!first_pte_in_page(pte));
> > -
> > -	return freelist;
> >  }
> >  
> > -static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
> > -					struct dma_pte *pte, unsigned long pfn,
> > -					unsigned long start_pfn,
> > -					unsigned long last_pfn,
> > -					struct page *freelist)
> > +static void dma_pte_clear_level(struct dmar_domain *domain, int level,
> > +				struct dma_pte *pte, unsigned long pfn,
> > +				unsigned long start_pfn, unsigned long last_pfn,
> > +				struct list_head *list)
> >  {
> >  	struct dma_pte *first_pte = NULL, *last_pte = NULL;
> >  
> > @@ -1235,7 +1230,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
> >  			/* These suborbinate page tables are going away entirely. Don't
> >  			   bother to clear them; we're just going to *free* them. */
> >  			if (level > 1 && !dma_pte_superpage(pte))
> > -				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
> > +				dma_pte_list_pagetables(domain, level - 1, pte, list);
> >  
> >  			dma_clear_pte(pte);
> >  			if (!first_pte)
> > @@ -1243,10 +1238,10 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
> >  			last_pte = pte;
> >  		} else if (level > 1) {
> >  			/* Recurse down into a level that isn't *entirely* obsolete */
> > -			freelist = dma_pte_clear_level(domain, level - 1,
> > -						       phys_to_virt(dma_pte_addr(pte)),
> > -						       level_pfn, start_pfn, last_pfn,
> > -						       freelist);
> > +			dma_pte_clear_level(domain, level - 1,
> > +					    phys_to_virt(dma_pte_addr(pte)),
> > +					    level_pfn, start_pfn, last_pfn,
> > +					    list);
> >  		}
> >  next:
> >  		pfn += level_size(level);
> > @@ -1255,47 +1250,28 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
> >  	if (first_pte)
> >  		domain_flush_cache(domain, first_pte,
> >  				   (void *)++last_pte - (void *)first_pte);
> > -
> > -	return freelist;
> >  }
> >  
> >  /* We can't just free the pages because the IOMMU may still be walking
> >     the page tables, and may have cached the intermediate levels. The
> >     pages can only be freed after the IOTLB flush has been done. */
> > -static struct page *domain_unmap(struct dmar_domain *domain,
> > -				 unsigned long start_pfn,
> > -				 unsigned long last_pfn,
> > -				 struct page *freelist)
> > +static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
> > +			 unsigned long last_pfn, struct list_head *list)
> >  {
> >  	BUG_ON(!domain_pfn_supported(domain, start_pfn));
> >  	BUG_ON(!domain_pfn_supported(domain, last_pfn));
> >  	BUG_ON(start_pfn > last_pfn);
> >  
> >  	/* we don't need lock here; nobody else touches the iova range */
> > -	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
> > -				       domain->pgd, 0, start_pfn, last_pfn,
> > -				       freelist);
> > +	dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
> > +			    domain->pgd, 0, start_pfn, last_pfn, list);
> >  
> >  	/* free pgd */
> >  	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
> >  		struct page *pgd_page = virt_to_page(domain->pgd);
> > -		pgd_page->freelist = freelist;
> > -		freelist = pgd_page;
> > -
> > +		list_add_tail(&pgd_page->lru, list);
> >  		domain->pgd = NULL;
> >  	}
> > -
> > -	return freelist;
> > -}
> > -
> > -static void dma_free_pagelist(struct page *freelist)
> > -{
> > -	struct page *pg;
> > -
> > -	while ((pg = freelist)) {
> > -		freelist = pg->freelist;
> > -		free_pgtable_page(page_address(pg));
> > -	}
> >  }
> >  
> >  /* iommu handling */
> > @@ -1972,11 +1948,10 @@ static void domain_exit(struct dmar_domain *domain)
> >  	domain_remove_dev_info(domain);
> >  
> >  	if (domain->pgd) {
> > -		struct page *freelist;
> > +		LIST_HEAD(pages);
> >  
> > -		freelist = domain_unmap(domain, 0,
> > -					DOMAIN_MAX_PFN(domain->gaw), NULL);
> > -		dma_free_pagelist(freelist);
> > +		domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &pages);
> > +		put_pages_list(&pages);
> >  	}
> >  
> >  	free_domain_mem(domain);
> > @@ -4068,19 +4043,17 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
> >  		{
> >  			struct dmar_drhd_unit *drhd;
> >  			struct intel_iommu *iommu;
> > -			struct page *freelist;
> > +			LIST_HEAD(pages);
> >  
> > -			freelist = domain_unmap(si_domain,
> > -						start_vpfn, last_vpfn,
> > -						NULL);
> > +			domain_unmap(si_domain, start_vpfn, last_vpfn, &pages);
> >  
> >  			rcu_read_lock();
> >  			for_each_active_iommu(iommu, drhd)
> >  				iommu_flush_iotlb_psi(iommu, si_domain,
> >  					start_vpfn, mhp->nr_pages,
> > -					!freelist, 0);
> > +					list_empty(&pages), 0);
> >  			rcu_read_unlock();
> > -			dma_free_pagelist(freelist);
> > +			put_pages_list(&pages);
> >  		}
> >  		break;
> >  	}
> > @@ -5087,8 +5060,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
> >  	start_pfn = iova >> VTD_PAGE_SHIFT;
> >  	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
> >  
> > -	gather->freelist = domain_unmap(dmar_domain, start_pfn,
> > -					last_pfn, gather->freelist);
> > +	domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
> >  
> >  	if (dmar_domain->max_addr == iova + size)
> >  		dmar_domain->max_addr = iova;
> > @@ -5124,9 +5096,10 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
> >  
> >  	for_each_domain_iommu(iommu_id, dmar_domain)
> >  		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
> > -				      start_pfn, nrpages, !gather->freelist, 0);
> > +				      start_pfn, nrpages,
> > +				      list_empty(&gather->freelist), 0);
> >  
> > -	dma_free_pagelist(gather->freelist);
> > +	put_pages_list(&gather->freelist);
> >  }
> >  
> >  static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index d2f3435e7d17..de0c57a567c8 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -186,7 +186,7 @@ struct iommu_iotlb_gather {
> >  	unsigned long		start;
> >  	unsigned long		end;
> >  	size_t			pgsize;
> > -	struct page		*freelist;
> > +	struct list_head	freelist;
> >  	bool			queued;
> >  };
> >  
> > @@ -399,6 +399,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
> >  {
> >  	*gather = (struct iommu_iotlb_gather) {
> >  		.start	= ULONG_MAX,
> > +		.freelist = LIST_HEAD_INIT(gather->freelist),
> >  	};
> >  }
> >  
> > -- 
> > 2.32.0

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] iommu: Use put_pages_list
@ 2021-10-14 11:20     ` Matthew Wilcox
  0 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox @ 2021-10-14 11:20 UTC (permalink / raw)
  To: Joerg Roedel, Suravee Suthikulpanit, Will Deacon,
	David Woodhouse, Lu Baolu, iommu, linux-kernel

I'm going to keep pinging this patch weekly.

On Thu, Oct 07, 2021 at 07:17:02PM +0100, Matthew Wilcox wrote:
> ping?
> 
> On Thu, Sep 30, 2021 at 05:20:42PM +0100, Matthew Wilcox (Oracle) wrote:
> > page->freelist is for the use of slab.  We already have the ability
> > to free a list of pages in the core mm, but it requires the use of a
> > list_head and for the pages to be chained together through page->lru.
> > Switch the iommu code over to using free_pages_list().
> > 
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> >  drivers/iommu/amd/io_pgtable.c | 99 +++++++++++++++-------------------
> >  drivers/iommu/dma-iommu.c      | 11 +---
> >  drivers/iommu/intel/iommu.c    | 89 +++++++++++-------------------
> >  include/linux/iommu.h          |  3 +-
> >  4 files changed, 77 insertions(+), 125 deletions(-)
> > 
> > diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
> > index 182c93a43efd..8dfa6ee58b76 100644
> > --- a/drivers/iommu/amd/io_pgtable.c
> > +++ b/drivers/iommu/amd/io_pgtable.c
> > @@ -74,49 +74,37 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
> >   *
> >   ****************************************************************************/
> >  
> > -static void free_page_list(struct page *freelist)
> > -{
> > -	while (freelist != NULL) {
> > -		unsigned long p = (unsigned long)page_address(freelist);
> > -
> > -		freelist = freelist->freelist;
> > -		free_page(p);
> > -	}
> > -}
> > -
> > -static struct page *free_pt_page(unsigned long pt, struct page *freelist)
> > +static void free_pt_page(unsigned long pt, struct list_head *list)
> >  {
> >  	struct page *p = virt_to_page((void *)pt);
> >  
> > -	p->freelist = freelist;
> > -
> > -	return p;
> > +	list_add_tail(&p->lru, list);
> >  }
> >  
> >  #define DEFINE_FREE_PT_FN(LVL, FN)						\
> > -static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)	\
> > -{										\
> > -	unsigned long p;							\
> > -	u64 *pt;								\
> > -	int i;									\
> > -										\
> > -	pt = (u64 *)__pt;							\
> > -										\
> > -	for (i = 0; i < 512; ++i) {						\
> > -		/* PTE present? */						\
> > -		if (!IOMMU_PTE_PRESENT(pt[i]))					\
> > -			continue;						\
> > -										\
> > -		/* Large PTE? */						\
> > -		if (PM_PTE_LEVEL(pt[i]) == 0 ||					\
> > -		    PM_PTE_LEVEL(pt[i]) == 7)					\
> > -			continue;						\
> > -										\
> > -		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);			\
> > -		freelist = FN(p, freelist);					\
> > -	}									\
> > -										\
> > -	return free_pt_page((unsigned long)pt, freelist);			\
> > +static void free_pt_##LVL (unsigned long __pt, struct list_head *list)	\
> > +{									\
> > +	unsigned long p;						\
> > +	u64 *pt;							\
> > +	int i;								\
> > +									\
> > +	pt = (u64 *)__pt;						\
> > +									\
> > +	for (i = 0; i < 512; ++i) {					\
> > +		/* PTE present? */					\
> > +		if (!IOMMU_PTE_PRESENT(pt[i]))				\
> > +			continue;					\
> > +									\
> > +		/* Large PTE? */					\
> > +		if (PM_PTE_LEVEL(pt[i]) == 0 ||				\
> > +		    PM_PTE_LEVEL(pt[i]) == 7)				\
> > +			continue;					\
> > +									\
> > +		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);		\
> > +		FN(p, list);						\
> > +	}								\
> > +									\
> > +	free_pt_page((unsigned long)pt, list);				\
> >  }
> >  
> >  DEFINE_FREE_PT_FN(l2, free_pt_page)
> > @@ -125,36 +113,33 @@ DEFINE_FREE_PT_FN(l4, free_pt_l3)
> >  DEFINE_FREE_PT_FN(l5, free_pt_l4)
> >  DEFINE_FREE_PT_FN(l6, free_pt_l5)
> >  
> > -static struct page *free_sub_pt(unsigned long root, int mode,
> > -				struct page *freelist)
> > +static void free_sub_pt(unsigned long root, int mode, struct list_head *list)
> >  {
> >  	switch (mode) {
> >  	case PAGE_MODE_NONE:
> >  	case PAGE_MODE_7_LEVEL:
> >  		break;
> >  	case PAGE_MODE_1_LEVEL:
> > -		freelist = free_pt_page(root, freelist);
> > +		free_pt_page(root, list);
> >  		break;
> >  	case PAGE_MODE_2_LEVEL:
> > -		freelist = free_pt_l2(root, freelist);
> > +		free_pt_l2(root, list);
> >  		break;
> >  	case PAGE_MODE_3_LEVEL:
> > -		freelist = free_pt_l3(root, freelist);
> > +		free_pt_l3(root, list);
> >  		break;
> >  	case PAGE_MODE_4_LEVEL:
> > -		freelist = free_pt_l4(root, freelist);
> > +		free_pt_l4(root, list);
> >  		break;
> >  	case PAGE_MODE_5_LEVEL:
> > -		freelist = free_pt_l5(root, freelist);
> > +		free_pt_l5(root, list);
> >  		break;
> >  	case PAGE_MODE_6_LEVEL:
> > -		freelist = free_pt_l6(root, freelist);
> > +		free_pt_l6(root, list);
> >  		break;
> >  	default:
> >  		BUG();
> >  	}
> > -
> > -	return freelist;
> >  }
> >  
> >  void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
> > @@ -362,7 +347,7 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
> >  	return pte;
> >  }
> >  
> > -static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
> > +static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *list)
> >  {
> >  	unsigned long pt;
> >  	int mode;
> > @@ -373,12 +358,12 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
> >  	}
> >  
> >  	if (!IOMMU_PTE_PRESENT(pteval))
> > -		return freelist;
> > +		return;
> >  
> >  	pt   = (unsigned long)IOMMU_PTE_PAGE(pteval);
> >  	mode = IOMMU_PTE_MODE(pteval);
> >  
> > -	return free_sub_pt(pt, mode, freelist);
> > +	free_sub_pt(pt, mode, list);
> >  }
> >  
> >  /*
> > @@ -392,7 +377,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
> >  			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
> >  {
> >  	struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
> > -	struct page *freelist = NULL;
> > +	LIST_HEAD(freelist);
> >  	bool updated = false;
> >  	u64 __pte, *pte;
> >  	int ret, i, count;
> > @@ -412,9 +397,9 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
> >  		goto out;
> >  
> >  	for (i = 0; i < count; ++i)
> > -		freelist = free_clear_pte(&pte[i], pte[i], freelist);
> > +		free_clear_pte(&pte[i], pte[i], &freelist);
> >  
> > -	if (freelist != NULL)
> > +	if (!list_empty(&freelist))
> >  		updated = true;
> >  
> >  	if (count > 1) {
> > @@ -449,7 +434,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
> >  	}
> >  
> >  	/* Everything flushed out, free pages now */
> > -	free_page_list(freelist);
> > +	put_pages_list(&freelist);
> >  
> >  	return ret;
> >  }
> > @@ -511,7 +496,7 @@ static void v1_free_pgtable(struct io_pgtable *iop)
> >  {
> >  	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
> >  	struct protection_domain *dom;
> > -	struct page *freelist = NULL;
> > +	LIST_HEAD(freelist);
> >  	unsigned long root;
> >  
> >  	if (pgtable->mode == PAGE_MODE_NONE)
> > @@ -530,9 +515,9 @@ static void v1_free_pgtable(struct io_pgtable *iop)
> >  	       pgtable->mode > PAGE_MODE_6_LEVEL);
> >  
> >  	root = (unsigned long)pgtable->root;
> > -	freelist = free_sub_pt(root, pgtable->mode, freelist);
> > +	free_sub_pt(root, pgtable->mode, &freelist);
> >  
> > -	free_page_list(freelist);
> > +	put_pages_list(&freelist);
> >  }
> >  
> >  static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index 896bea04c347..16742d9d8a1a 100644
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -66,14 +66,7 @@ early_param("iommu.forcedac", iommu_dma_forcedac_setup);
> >  
> >  static void iommu_dma_entry_dtor(unsigned long data)
> >  {
> > -	struct page *freelist = (struct page *)data;
> > -
> > -	while (freelist) {
> > -		unsigned long p = (unsigned long)page_address(freelist);
> > -
> > -		freelist = freelist->freelist;
> > -		free_page(p);
> > -	}
> > +	put_pages_list((struct list_head *)data);
> >  }
> >  
> >  static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
> > @@ -481,7 +474,7 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
> >  	else if (gather && gather->queued)
> >  		queue_iova(iovad, iova_pfn(iovad, iova),
> >  				size >> iova_shift(iovad),
> > -				(unsigned long)gather->freelist);
> > +				(unsigned long)&gather->freelist);
> >  	else
> >  		free_iova_fast(iovad, iova_pfn(iovad, iova),
> >  				size >> iova_shift(iovad));
> > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> > index d75f59ae28e6..eaaff646e1b4 100644
> > --- a/drivers/iommu/intel/iommu.c
> > +++ b/drivers/iommu/intel/iommu.c
> > @@ -1186,35 +1186,30 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
> >     know the hardware page-walk will no longer touch them.
> >     The 'pte' argument is the *parent* PTE, pointing to the page that is to
> >     be freed. */
> > -static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
> > -					    int level, struct dma_pte *pte,
> > -					    struct page *freelist)
> > +static void dma_pte_list_pagetables(struct dmar_domain *domain,
> > +				    int level, struct dma_pte *pte,
> > +				    struct list_head *list)
> >  {
> >  	struct page *pg;
> >  
> >  	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
> > -	pg->freelist = freelist;
> > -	freelist = pg;
> > +	list_add_tail(&pg->lru, list);
> >  
> >  	if (level == 1)
> > -		return freelist;
> > +		return;
> >  
> >  	pte = page_address(pg);
> >  	do {
> >  		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
> > -			freelist = dma_pte_list_pagetables(domain, level - 1,
> > -							   pte, freelist);
> > +			dma_pte_list_pagetables(domain, level - 1, pte, list);
> >  		pte++;
> >  	} while (!first_pte_in_page(pte));
> > -
> > -	return freelist;
> >  }
> >  
> > -static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
> > -					struct dma_pte *pte, unsigned long pfn,
> > -					unsigned long start_pfn,
> > -					unsigned long last_pfn,
> > -					struct page *freelist)
> > +static void dma_pte_clear_level(struct dmar_domain *domain, int level,
> > +				struct dma_pte *pte, unsigned long pfn,
> > +				unsigned long start_pfn, unsigned long last_pfn,
> > +				struct list_head *list)
> >  {
> >  	struct dma_pte *first_pte = NULL, *last_pte = NULL;
> >  
> > @@ -1235,7 +1230,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
> >  			/* These suborbinate page tables are going away entirely. Don't
> >  			   bother to clear them; we're just going to *free* them. */
> >  			if (level > 1 && !dma_pte_superpage(pte))
> > -				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
> > +				dma_pte_list_pagetables(domain, level - 1, pte, list);
> >  
> >  			dma_clear_pte(pte);
> >  			if (!first_pte)
> > @@ -1243,10 +1238,10 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
> >  			last_pte = pte;
> >  		} else if (level > 1) {
> >  			/* Recurse down into a level that isn't *entirely* obsolete */
> > -			freelist = dma_pte_clear_level(domain, level - 1,
> > -						       phys_to_virt(dma_pte_addr(pte)),
> > -						       level_pfn, start_pfn, last_pfn,
> > -						       freelist);
> > +			dma_pte_clear_level(domain, level - 1,
> > +					    phys_to_virt(dma_pte_addr(pte)),
> > +					    level_pfn, start_pfn, last_pfn,
> > +					    list);
> >  		}
> >  next:
> >  		pfn += level_size(level);
> > @@ -1255,47 +1250,28 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
> >  	if (first_pte)
> >  		domain_flush_cache(domain, first_pte,
> >  				   (void *)++last_pte - (void *)first_pte);
> > -
> > -	return freelist;
> >  }
> >  
> >  /* We can't just free the pages because the IOMMU may still be walking
> >     the page tables, and may have cached the intermediate levels. The
> >     pages can only be freed after the IOTLB flush has been done. */
> > -static struct page *domain_unmap(struct dmar_domain *domain,
> > -				 unsigned long start_pfn,
> > -				 unsigned long last_pfn,
> > -				 struct page *freelist)
> > +static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
> > +			 unsigned long last_pfn, struct list_head *list)
> >  {
> >  	BUG_ON(!domain_pfn_supported(domain, start_pfn));
> >  	BUG_ON(!domain_pfn_supported(domain, last_pfn));
> >  	BUG_ON(start_pfn > last_pfn);
> >  
> >  	/* we don't need lock here; nobody else touches the iova range */
> > -	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
> > -				       domain->pgd, 0, start_pfn, last_pfn,
> > -				       freelist);
> > +	dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
> > +			    domain->pgd, 0, start_pfn, last_pfn, list);
> >  
> >  	/* free pgd */
> >  	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
> >  		struct page *pgd_page = virt_to_page(domain->pgd);
> > -		pgd_page->freelist = freelist;
> > -		freelist = pgd_page;
> > -
> > +		list_add_tail(&pgd_page->lru, list);
> >  		domain->pgd = NULL;
> >  	}
> > -
> > -	return freelist;
> > -}
> > -
> > -static void dma_free_pagelist(struct page *freelist)
> > -{
> > -	struct page *pg;
> > -
> > -	while ((pg = freelist)) {
> > -		freelist = pg->freelist;
> > -		free_pgtable_page(page_address(pg));
> > -	}
> >  }
> >  
> >  /* iommu handling */
> > @@ -1972,11 +1948,10 @@ static void domain_exit(struct dmar_domain *domain)
> >  	domain_remove_dev_info(domain);
> >  
> >  	if (domain->pgd) {
> > -		struct page *freelist;
> > +		LIST_HEAD(pages);
> >  
> > -		freelist = domain_unmap(domain, 0,
> > -					DOMAIN_MAX_PFN(domain->gaw), NULL);
> > -		dma_free_pagelist(freelist);
> > +		domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &pages);
> > +		put_pages_list(&pages);
> >  	}
> >  
> >  	free_domain_mem(domain);
> > @@ -4068,19 +4043,17 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
> >  		{
> >  			struct dmar_drhd_unit *drhd;
> >  			struct intel_iommu *iommu;
> > -			struct page *freelist;
> > +			LIST_HEAD(pages);
> >  
> > -			freelist = domain_unmap(si_domain,
> > -						start_vpfn, last_vpfn,
> > -						NULL);
> > +			domain_unmap(si_domain, start_vpfn, last_vpfn, &pages);
> >  
> >  			rcu_read_lock();
> >  			for_each_active_iommu(iommu, drhd)
> >  				iommu_flush_iotlb_psi(iommu, si_domain,
> >  					start_vpfn, mhp->nr_pages,
> > -					!freelist, 0);
> > +					list_empty(&pages), 0);
> >  			rcu_read_unlock();
> > -			dma_free_pagelist(freelist);
> > +			put_pages_list(&pages);
> >  		}
> >  		break;
> >  	}
> > @@ -5087,8 +5060,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
> >  	start_pfn = iova >> VTD_PAGE_SHIFT;
> >  	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
> >  
> > -	gather->freelist = domain_unmap(dmar_domain, start_pfn,
> > -					last_pfn, gather->freelist);
> > +	domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
> >  
> >  	if (dmar_domain->max_addr == iova + size)
> >  		dmar_domain->max_addr = iova;
> > @@ -5124,9 +5096,10 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
> >  
> >  	for_each_domain_iommu(iommu_id, dmar_domain)
> >  		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
> > -				      start_pfn, nrpages, !gather->freelist, 0);
> > +				      start_pfn, nrpages,
> > +				      list_empty(&gather->freelist), 0);
> >  
> > -	dma_free_pagelist(gather->freelist);
> > +	put_pages_list(&gather->freelist);
> >  }
> >  
> >  static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index d2f3435e7d17..de0c57a567c8 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -186,7 +186,7 @@ struct iommu_iotlb_gather {
> >  	unsigned long		start;
> >  	unsigned long		end;
> >  	size_t			pgsize;
> > -	struct page		*freelist;
> > +	struct list_head	freelist;
> >  	bool			queued;
> >  };
> >  
> > @@ -399,6 +399,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
> >  {
> >  	*gather = (struct iommu_iotlb_gather) {
> >  		.start	= ULONG_MAX,
> > +		.freelist = LIST_HEAD_INIT(gather->freelist),
> >  	};
> >  }
> >  
> > -- 
> > 2.32.0
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] iommu: Use put_pages_list
  2021-10-14 11:20     ` Matthew Wilcox
@ 2021-10-14 11:52       ` John Garry
  -1 siblings, 0 replies; 12+ messages in thread
From: John Garry @ 2021-10-14 11:52 UTC (permalink / raw)
  To: Matthew Wilcox, Joerg Roedel, Suravee Suthikulpanit, Will Deacon,
	David Woodhouse, Lu Baolu, iommu, linux-kernel, Robin Murphy

On 14/10/2021 12:20, Matthew Wilcox wrote:
> I'm going to keep pinging this patch weekly.
> 
> On Thu, Oct 07, 2021 at 07:17:02PM +0100, Matthew Wilcox wrote:
>> ping?

Robin, Were you checking this? You mentioned "I got
side-tracked trying to make io-pgtable use that freelist properly" in 
another thread, which seems related.

Thanks,
John

>>
>> On Thu, Sep 30, 2021 at 05:20:42PM +0100, Matthew Wilcox (Oracle) wrote:
>>> page->freelist is for the use of slab.  We already have the ability
>>> to free a list of pages in the core mm, but it requires the use of a
>>> list_head and for the pages to be chained together through page->lru.
>>> Switch the iommu code over to using free_pages_list().
>>>
>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>>> ---
>>>   drivers/iommu/amd/io_pgtable.c | 99 +++++++++++++++-------------------
>>>   drivers/iommu/dma-iommu.c      | 11 +---
>>>   drivers/iommu/intel/iommu.c    | 89 +++++++++++-------------------
>>>   include/linux/iommu.h          |  3 +-
>>>   4 files changed, 77 insertions(+), 125 deletions(-)
>>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] iommu: Use put_pages_list
@ 2021-10-14 11:52       ` John Garry
  0 siblings, 0 replies; 12+ messages in thread
From: John Garry @ 2021-10-14 11:52 UTC (permalink / raw)
  To: Matthew Wilcox, Joerg Roedel, Suravee Suthikulpanit, Will Deacon,
	David Woodhouse, Lu Baolu, iommu, linux-kernel, Robin Murphy

On 14/10/2021 12:20, Matthew Wilcox wrote:
> I'm going to keep pinging this patch weekly.
> 
> On Thu, Oct 07, 2021 at 07:17:02PM +0100, Matthew Wilcox wrote:
>> ping?

Robin, Were you checking this? You mentioned "I got
side-tracked trying to make io-pgtable use that freelist properly" in 
another thread, which seems related.

Thanks,
John

>>
>> On Thu, Sep 30, 2021 at 05:20:42PM +0100, Matthew Wilcox (Oracle) wrote:
>>> page->freelist is for the use of slab.  We already have the ability
>>> to free a list of pages in the core mm, but it requires the use of a
>>> list_head and for the pages to be chained together through page->lru.
>>> Switch the iommu code over to using free_pages_list().
>>>
>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>>> ---
>>>   drivers/iommu/amd/io_pgtable.c | 99 +++++++++++++++-------------------
>>>   drivers/iommu/dma-iommu.c      | 11 +---
>>>   drivers/iommu/intel/iommu.c    | 89 +++++++++++-------------------
>>>   include/linux/iommu.h          |  3 +-
>>>   4 files changed, 77 insertions(+), 125 deletions(-)
>>>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] iommu: Use put_pages_list
  2021-10-14 11:52       ` John Garry
@ 2021-10-14 16:17         ` Robin Murphy
  -1 siblings, 0 replies; 12+ messages in thread
From: Robin Murphy @ 2021-10-14 16:17 UTC (permalink / raw)
  To: John Garry, Matthew Wilcox, Joerg Roedel, Suravee Suthikulpanit,
	Will Deacon, David Woodhouse, Lu Baolu, iommu, linux-kernel

On 2021-10-14 12:52, John Garry wrote:
> On 14/10/2021 12:20, Matthew Wilcox wrote:
>> I'm going to keep pinging this patch weekly.
>>
>> On Thu, Oct 07, 2021 at 07:17:02PM +0100, Matthew Wilcox wrote:
>>> ping?
> 
> Robin, Were you checking this? You mentioned "I got
> side-tracked trying to make io-pgtable use that freelist properly" in 
> another thread, which seems related.

Ooh, thanks for the heads-up John - I'm still only just starting to 
catch up on my mailing list folders since I got back off holiday.

Indeed I already started untangling the freelist handling in the flush 
queue code (to make the move into iommu-dma smaller). Once I'd figured 
out how it worked I did wonder whether there was any more "standard" 
field to borrow, since page->freelist did seem very much in the 
minority. If page->lru is it then great! From a quick skim of the patch 
I think I'd only have a few trivial review comments to make - certainly 
no objection to the fundamental change itself (indeed I hit a point in 
io-pgtable-arm where adding to the pointer chain got rather awkward, so 
having proper lists to splice would be lovely).

Matthew - is this something getting in the way of mm development, or 
just a nice cleanup? I'd be happy either to pursue merging it on its 
own, or to pick it up and work it into a series with my stuff.

Cheers,
Robin.

> 
> Thanks,
> John
> 
>>>
>>> On Thu, Sep 30, 2021 at 05:20:42PM +0100, Matthew Wilcox (Oracle) wrote:
>>>> page->freelist is for the use of slab.  We already have the ability
>>>> to free a list of pages in the core mm, but it requires the use of a
>>>> list_head and for the pages to be chained together through page->lru.
>>>> Switch the iommu code over to using free_pages_list().
>>>>
>>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>>>> ---
>>>>   drivers/iommu/amd/io_pgtable.c | 99 
>>>> +++++++++++++++-------------------
>>>>   drivers/iommu/dma-iommu.c      | 11 +---
>>>>   drivers/iommu/intel/iommu.c    | 89 +++++++++++-------------------
>>>>   include/linux/iommu.h          |  3 +-
>>>>   4 files changed, 77 insertions(+), 125 deletions(-)
>>>>
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] iommu: Use put_pages_list
@ 2021-10-14 16:17         ` Robin Murphy
  0 siblings, 0 replies; 12+ messages in thread
From: Robin Murphy @ 2021-10-14 16:17 UTC (permalink / raw)
  To: John Garry, Matthew Wilcox, Joerg Roedel, Suravee Suthikulpanit,
	Will Deacon, David Woodhouse, Lu Baolu, iommu, linux-kernel

On 2021-10-14 12:52, John Garry wrote:
> On 14/10/2021 12:20, Matthew Wilcox wrote:
>> I'm going to keep pinging this patch weekly.
>>
>> On Thu, Oct 07, 2021 at 07:17:02PM +0100, Matthew Wilcox wrote:
>>> ping?
> 
> Robin, Were you checking this? You mentioned "I got
> side-tracked trying to make io-pgtable use that freelist properly" in 
> another thread, which seems related.

Ooh, thanks for the heads-up John - I'm still only just starting to 
catch up on my mailing list folders since I got back off holiday.

Indeed I already started untangling the freelist handling in the flush 
queue code (to make the move into iommu-dma smaller). Once I'd figured 
out how it worked I did wonder whether there was any more "standard" 
field to borrow, since page->freelist did seem very much in the 
minority. If page->lru is it then great! From a quick skim of the patch 
I think I'd only have a few trivial review comments to make - certainly 
no objection to the fundamental change itself (indeed I hit a point in 
io-pgtable-arm where adding to the pointer chain got rather awkward, so 
having proper lists to splice would be lovely).

Matthew - is this something getting in the way of mm development, or 
just a nice cleanup? I'd be happy either to pursue merging it on its 
own, or to pick it up and work it into a series with my stuff.

Cheers,
Robin.

> 
> Thanks,
> John
> 
>>>
>>> On Thu, Sep 30, 2021 at 05:20:42PM +0100, Matthew Wilcox (Oracle) wrote:
>>>> page->freelist is for the use of slab.  We already have the ability
>>>> to free a list of pages in the core mm, but it requires the use of a
>>>> list_head and for the pages to be chained together through page->lru.
>>>> Switch the iommu code over to using free_pages_list().
>>>>
>>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>>>> ---
>>>>   drivers/iommu/amd/io_pgtable.c | 99 
>>>> +++++++++++++++-------------------
>>>>   drivers/iommu/dma-iommu.c      | 11 +---
>>>>   drivers/iommu/intel/iommu.c    | 89 +++++++++++-------------------
>>>>   include/linux/iommu.h          |  3 +-
>>>>   4 files changed, 77 insertions(+), 125 deletions(-)
>>>>
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] iommu: Use put_pages_list
  2021-10-14 16:17         ` Robin Murphy
@ 2021-10-14 16:32           ` Matthew Wilcox
  -1 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox @ 2021-10-14 16:32 UTC (permalink / raw)
  To: Robin Murphy
  Cc: John Garry, Joerg Roedel, Suravee Suthikulpanit, Will Deacon,
	David Woodhouse, Lu Baolu, iommu, linux-kernel

On Thu, Oct 14, 2021 at 05:17:18PM +0100, Robin Murphy wrote:
> On 2021-10-14 12:52, John Garry wrote:
> > On 14/10/2021 12:20, Matthew Wilcox wrote:
> > > I'm going to keep pinging this patch weekly.
> > > 
> > > On Thu, Oct 07, 2021 at 07:17:02PM +0100, Matthew Wilcox wrote:
> > > > ping?
> > 
> > Robin, Were you checking this? You mentioned "I got
> > side-tracked trying to make io-pgtable use that freelist properly" in
> > another thread, which seems related.
> 
> Ooh, thanks for the heads-up John - I'm still only just starting to catch up
> on my mailing list folders since I got back off holiday.
> 
> Indeed I already started untangling the freelist handling in the flush queue
> code (to make the move into iommu-dma smaller). Once I'd figured out how it
> worked I did wonder whether there was any more "standard" field to borrow,
> since page->freelist did seem very much in the minority. If page->lru is it
> then great! From a quick skim of the patch I think I'd only have a few
> trivial review comments to make - certainly no objection to the fundamental
> change itself (indeed I hit a point in io-pgtable-arm where adding to the
> pointer chain got rather awkward, so having proper lists to splice would be
> lovely).

Great to hear!

> Matthew - is this something getting in the way of mm development, or just a
> nice cleanup? I'd be happy either to pursue merging it on its own, or to
> pick it up and work it into a series with my stuff.

This is probably going to get in the way of MM development in ~6 months
time.  I'm happy for you to pick it up and put it in a series of your own!
BTW, the optimisation of the implementation of put_pages_list() is sitting
in akpm's tree, so if you see a performance problem, please give that
a try.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] iommu: Use put_pages_list
@ 2021-10-14 16:32           ` Matthew Wilcox
  0 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox @ 2021-10-14 16:32 UTC (permalink / raw)
  To: Robin Murphy; +Cc: Will Deacon, linux-kernel, iommu, David Woodhouse

On Thu, Oct 14, 2021 at 05:17:18PM +0100, Robin Murphy wrote:
> On 2021-10-14 12:52, John Garry wrote:
> > On 14/10/2021 12:20, Matthew Wilcox wrote:
> > > I'm going to keep pinging this patch weekly.
> > > 
> > > On Thu, Oct 07, 2021 at 07:17:02PM +0100, Matthew Wilcox wrote:
> > > > ping?
> > 
> > Robin, Were you checking this? You mentioned "I got
> > side-tracked trying to make io-pgtable use that freelist properly" in
> > another thread, which seems related.
> 
> Ooh, thanks for the heads-up John - I'm still only just starting to catch up
> on my mailing list folders since I got back off holiday.
> 
> Indeed I already started untangling the freelist handling in the flush queue
> code (to make the move into iommu-dma smaller). Once I'd figured out how it
> worked I did wonder whether there was any more "standard" field to borrow,
> since page->freelist did seem very much in the minority. If page->lru is it
> then great! From a quick skim of the patch I think I'd only have a few
> trivial review comments to make - certainly no objection to the fundamental
> change itself (indeed I hit a point in io-pgtable-arm where adding to the
> pointer chain got rather awkward, so having proper lists to splice would be
> lovely).

Great to hear!

> Matthew - is this something getting in the way of mm development, or just a
> nice cleanup? I'd be happy either to pursue merging it on its own, or to
> pick it up and work it into a series with my stuff.

This is probably going to get in the way of MM development in ~6 months
time.  I'm happy for you to pick it up and put it in a series of your own!
BTW, the optimisation of the implementation of put_pages_list() is sitting
in akpm's tree, so if you see a performance problem, please give that
a try.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-10-14 16:34 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-30 16:20 [RFC] iommu: Use put_pages_list Matthew Wilcox (Oracle)
2021-09-30 16:20 ` Matthew Wilcox (Oracle)
2021-10-07 18:16 ` Matthew Wilcox
2021-10-07 18:16   ` Matthew Wilcox
2021-10-14 11:20   ` Matthew Wilcox
2021-10-14 11:20     ` Matthew Wilcox
2021-10-14 11:52     ` John Garry
2021-10-14 11:52       ` John Garry
2021-10-14 16:17       ` Robin Murphy
2021-10-14 16:17         ` Robin Murphy
2021-10-14 16:32         ` Matthew Wilcox
2021-10-14 16:32           ` Matthew Wilcox

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.