linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Zi Yan <zi.yan@sent.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Dave Hansen <dave.hansen@linux.intel.com>,
	Michal Hocko <mhocko@kernel.org>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	Mel Gorman <mgorman@techsingularity.net>,
	John Hubbard <jhubbard@nvidia.com>,
	Mark Hairgrove <mhairgrove@nvidia.com>,
	Nitin Gupta <nigupta@nvidia.com>,
	David Nellans <dnellans@nvidia.com>, Zi Yan <ziy@nvidia.com>
Subject: [RFC PATCH 26/31] mm: thp: promote PTE-mapped THP to PMD-mapped THP.
Date: Fri, 15 Feb 2019 14:08:51 -0800	[thread overview]
Message-ID: <20190215220856.29749-27-zi.yan@sent.com> (raw)
In-Reply-To: <20190215220856.29749-1-zi.yan@sent.com>

From: Zi Yan <ziy@nvidia.com>

First promote 512 base pages to a PTE-mapped THP, then promote the
PTE-mapped THP to a PMD-mapped THP.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 include/linux/khugepaged.h |   1 +
 mm/filemap.c               |   8 +
 mm/huge_memory.c           | 419 +++++++++++++++++++++++++++++++++++++
 mm/internal.h              |   6 +
 mm/khugepaged.c            |   2 +-
 5 files changed, 435 insertions(+), 1 deletion(-)

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 082d1d2a5216..675c5ee99698 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -55,6 +55,7 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
 				return -ENOMEM;
 	return 0;
 }
+void release_pte_pages(pte_t *pte, pte_t *_pte);
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
diff --git a/mm/filemap.c b/mm/filemap.c
index 9f5e323e883e..54babad945ad 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1236,6 +1236,14 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem
 
 #endif
 
+void __unlock_page(struct page *page)
+{
+	BUILD_BUG_ON(PG_waiters != 7);
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
+		wake_up_page_bit(page, PG_locked);
+}
+
 /**
  * unlock_page - unlock a locked page
  * @page: the page
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fa3e12b17621..f856f7e39095 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -4284,3 +4284,422 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
 	update_mmu_cache_pmd(vma, address, pvmw->pmd);
 }
 #endif
+
+/* promote HPAGE_PMD_SIZE range into a PMD map.
+ * mmap_sem needs to be down_write.
+ */
+int promote_huge_pmd_address(struct vm_area_struct *vma, unsigned long haddr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pmd_t *pmd, _pmd;
+	pte_t *pte, *_pte;
+	spinlock_t *pmd_ptl, *pte_ptl;
+	struct mmu_notifier_range range;
+	pgtable_t pgtable;
+	struct page *page, *head;
+	unsigned long address = haddr;
+	int ret = -EBUSY;
+
+	VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
+
+	if (haddr < vma->vm_start || (haddr + HPAGE_PMD_SIZE) > vma->vm_end)
+		return -EINVAL;
+
+	pmd = mm_find_pmd(mm, haddr);
+	if (!pmd || pmd_trans_huge(*pmd))
+		goto out;
+
+	anon_vma_lock_write(vma->anon_vma);
+
+	pte = pte_offset_map(pmd, haddr);
+	pte_ptl = pte_lockptr(mm, pmd);
+
+	head = page = vm_normal_page(vma, haddr, *pte);
+	if (!page || !PageTransCompound(page))
+		goto out_unlock;
+	VM_BUG_ON(page != compound_head(page));
+	lock_page(head);
+
+	mmu_notifier_range_init(&range, mm, haddr, haddr + HPAGE_PMD_SIZE);
+	mmu_notifier_invalidate_range_start(&range);
+	pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
+	/*
+	 * After this gup_fast can't run anymore. This also removes
+	 * any huge TLB entry from the CPU so we won't allow
+	 * huge and small TLB entries for the same virtual address
+	 * to avoid the risk of CPU bugs in that area.
+	 */
+
+	_pmd = pmdp_collapse_flush(vma, haddr, pmd);
+	spin_unlock(pmd_ptl);
+	mmu_notifier_invalidate_range_end(&range);
+
+	/* remove ptes */
+	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
+				_pte++, page++, address += PAGE_SIZE) {
+		pte_t pteval = *_pte;
+
+		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
+			pr_err("pte none or zero pfn during pmd promotion\n");
+			if (is_zero_pfn(pte_pfn(pteval))) {
+				/*
+				 * ptl mostly unnecessary.
+				 */
+				spin_lock(pte_ptl);
+				/*
+				 * paravirt calls inside pte_clear here are
+				 * superfluous.
+				 */
+				pte_clear(vma->vm_mm, address, _pte);
+				spin_unlock(pte_ptl);
+			}
+		} else {
+			/*
+			 * ptl mostly unnecessary, but preempt has to
+			 * be disabled to update the per-cpu stats
+			 * inside page_remove_rmap().
+			 */
+			spin_lock(pte_ptl);
+			/*
+			 * paravirt calls inside pte_clear here are
+			 * superfluous.
+			 */
+			pte_clear(vma->vm_mm, address, _pte);
+			atomic_dec(&page->_mapcount);
+			/*page_remove_rmap(page, false, 0);*/
+			if (atomic_read(&page->_mapcount) > -1) {
+				SetPageDoubleMap(head);
+				pr_info("page double mapped");
+			}
+			spin_unlock(pte_ptl);
+		}
+	}
+	page_ref_sub(head, HPAGE_PMD_NR - 1);
+
+	pte_unmap(pte);
+	pgtable = pmd_pgtable(_pmd);
+
+	_pmd = mk_huge_pmd(head, vma->vm_page_prot);
+	_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
+
+	/*
+	 * spin_lock() below is not the equivalent of smp_wmb(), so
+	 * this is needed to avoid the copy_huge_page writes to become
+	 * visible after the set_pmd_at() write.
+	 */
+	smp_wmb();
+
+	spin_lock(pmd_ptl);
+	VM_BUG_ON(!pmd_none(*pmd));
+	atomic_inc(compound_mapcount_ptr(head));
+	__inc_node_page_state(head, NR_ANON_THPS);
+	pgtable_trans_huge_deposit(mm, pmd, pgtable);
+	set_pmd_at(mm, haddr, pmd, _pmd);
+	update_mmu_cache_pmd(vma, haddr, pmd);
+	spin_unlock(pmd_ptl);
+	unlock_page(head);
+	ret = 0;
+
+out_unlock:
+	anon_vma_unlock_write(vma->anon_vma);
+out:
+	return ret;
+}
+
+/* Racy check whether the huge page can be split */
+static bool can_promote_huge_page(struct page *page)
+{
+	int extra_pins;
+
+	/* Additional pins from radix tree */
+	if (PageAnon(page))
+		extra_pins = PageSwapCache(page) ? 1 : 0;
+	else
+		return false;
+	if (PageSwapCache(page))
+		return false;
+	if (PageWriteback(page))
+		return false;
+	return total_mapcount(page) == page_count(page) - extra_pins - 1;
+}
+
+/* write a __promote_huge_page_isolate(struct vm_area_struct *vma,
+ * unsigned long address, pte_t *pte) to isolate all subpages into a list,
+ * then call promote_list_to_huge_page() to promote in-place
+ */
+
+static int __promote_huge_page_isolate(struct vm_area_struct *vma,
+					unsigned long haddr, pte_t *pte,
+					struct page **head, struct list_head *subpage_list)
+{
+	struct page *page = NULL;
+	pte_t *_pte;
+	bool writable = false;
+	unsigned long address = haddr;
+
+	*head = NULL;
+	lru_add_drain();
+	for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
+	     _pte++, address += PAGE_SIZE) {
+		pte_t pteval = *_pte;
+
+		if (pte_none(pteval) || (pte_present(pteval) &&
+				is_zero_pfn(pte_pfn(pteval))))
+			goto out;
+		if (!pte_present(pteval))
+			goto out;
+		page = vm_normal_page(vma, address, pteval);
+		if (unlikely(!page))
+			goto out;
+
+		if (address == haddr) {
+			*head = page;
+			if (page_to_pfn(page) & ((1<<HPAGE_PMD_ORDER) - 1))
+				goto out;
+		}
+
+		if ((*head + (address - haddr)/PAGE_SIZE) != page)
+			goto out;
+
+		if (PageCompound(page))
+			goto out;
+
+		if (PageMlocked(page))
+			goto out;
+
+		VM_BUG_ON_PAGE(!PageAnon(page), page);
+
+		/*
+		 * We can do it before isolate_lru_page because the
+		 * page can't be freed from under us. NOTE: PG_lock
+		 * is needed to serialize against split_huge_page
+		 * when invoked from the VM.
+		 */
+		if (!trylock_page(page))
+			goto out;
+
+		/*
+		 * cannot use mapcount: can't collapse if there's a gup pin.
+		 * The page must only be referenced by the scanned process
+		 * and page swap cache.
+		 */
+		if (page_count(page) != page_mapcount(page) + PageSwapCache(page)) {
+			unlock_page(page);
+			goto out;
+		}
+		if (pte_write(pteval)) {
+			writable = true;
+		} else {
+			if (PageSwapCache(page) &&
+			    !reuse_swap_page(page, NULL)) {
+				unlock_page(page);
+				goto out;
+			}
+			/*
+			 * Page is not in the swap cache. It can be collapsed
+			 * into a THP.
+			 */
+		}
+
+		/*
+		 * Isolate the page to avoid collapsing an hugepage
+		 * currently in use by the VM.
+		 */
+		if (isolate_lru_page(page)) {
+			unlock_page(page);
+			goto out;
+		}
+
+		inc_node_page_state(page,
+				NR_ISOLATED_ANON + page_is_file_cache(page));
+		VM_BUG_ON_PAGE(!PageLocked(page), page);
+		VM_BUG_ON_PAGE(PageLRU(page), page);
+	}
+	if (likely(writable)) {
+		int i;
+
+		for (i = 0; i < HPAGE_PMD_NR; i++) {
+			struct page *p = *head + i;
+
+			list_add_tail(&p->lru, subpage_list);
+			VM_BUG_ON_PAGE(!PageLocked(p), p);
+		}
+		return 1;
+	} else {
+		/*result = SCAN_PAGE_RO;*/
+	}
+
+out:
+	release_pte_pages(pte, _pte);
+	return 0;
+}
+
+/*
+ * This function promotes normal pages into a huge page. @list point to all
+ * subpages of huge page to promote, @head point to the head page.
+ *
+ * Only caller must hold pin on the pages on @list, otherwise promotion
+ * fails with -EBUSY. All subpages must be locked.
+ *
+ * Both head page and tail pages will inherit mapping, flags, and so on from
+ * the hugepage.
+ *
+ * GUP pin and PG_locked transferred to @page. *
+ *
+ * Returns 0 if the hugepage is promoted successfully.
+ * Returns -EBUSY if any subpage is pinned or if anon_vma disappeared from
+ * under us.
+ */
+int promote_list_to_huge_page(struct page *head, struct list_head *list)
+{
+	struct anon_vma *anon_vma = NULL;
+	int ret = 0;
+	DECLARE_BITMAP(subpage_bitmap, HPAGE_PMD_NR);
+	struct page *subpage;
+	int i;
+
+	/* no file-backed page support yet */
+	if (PageAnon(head)) {
+		/*
+		 * The caller does not necessarily hold an mmap_sem that would
+		 * prevent the anon_vma disappearing so we first we take a
+		 * reference to it and then lock the anon_vma for write. This
+		 * is similar to page_lock_anon_vma_read except the write lock
+		 * is taken to serialise against parallel split or collapse
+		 * operations.
+		 */
+		anon_vma = page_get_anon_vma(head);
+		if (!anon_vma) {
+			ret = -EBUSY;
+			goto out;
+		}
+		anon_vma_lock_write(anon_vma);
+	} else
+		return -EBUSY;
+
+	/* Racy check each subpage to see if any has extra pin */
+	list_for_each_entry(subpage, list, lru) {
+		if (can_promote_huge_page(subpage))
+			bitmap_set(subpage_bitmap, subpage - head, 1);
+	}
+	/* Proceed only if none of subpages has extra pin.  */
+	if (!bitmap_full(subpage_bitmap, HPAGE_PMD_NR)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	list_for_each_entry(subpage, list, lru) {
+		enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+			TTU_RMAP_LOCKED;
+		bool unmap_success;
+
+		if (PageAnon(subpage))
+			ttu_flags |= TTU_SPLIT_FREEZE;
+
+		unmap_success = try_to_unmap(subpage, ttu_flags);
+		VM_BUG_ON_PAGE(!unmap_success, subpage);
+	}
+
+	/* Take care of migration wait list:
+	 * make compound page first, since it is impossible to move waiting
+	 * process from subpage queues to the head page queue.
+	 */
+	set_compound_page_dtor(head, COMPOUND_PAGE_DTOR);
+	set_compound_order(head, HPAGE_PMD_ORDER);
+	__SetPageHead(head);
+	for (i = 1; i < HPAGE_PMD_NR; i++) {
+		struct page *p = head + i;
+
+		p->index = 0;
+		p->mapping = TAIL_MAPPING;
+		p->mem_cgroup = NULL;
+		ClearPageActive(p);
+		/* move subpage refcount to head page */
+		page_ref_add(head, page_count(p) - 1);
+		set_page_count(p, 0);
+		set_compound_head(p, head);
+	}
+	atomic_set(compound_mapcount_ptr(head), -1);
+	prep_transhuge_page(head);
+
+	remap_page(head);
+
+	if (!mem_cgroup_disabled())
+		mod_memcg_state(head->mem_cgroup, MEMCG_RSS_HUGE, HPAGE_PMD_NR);
+
+	for (i = 1; i < HPAGE_PMD_NR; i++) {
+		struct page *subpage = head + i;
+		__unlock_page(subpage);
+	}
+
+	INIT_LIST_HEAD(&head->lru);
+	unlock_page(head);
+	putback_lru_page(head);
+
+	mod_node_page_state(page_pgdat(head),
+			NR_ISOLATED_ANON + page_is_file_cache(head), -HPAGE_PMD_NR);
+out_unlock:
+	if (anon_vma) {
+		anon_vma_unlock_write(anon_vma);
+		put_anon_vma(anon_vma);
+	}
+out:
+	return ret;
+}
+
+static int promote_huge_page_isolate(struct vm_area_struct *vma,
+					unsigned long haddr,
+					struct page **head, struct list_head *subpage_list)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pmd_t *pmd;
+	pte_t *pte;
+	spinlock_t *pte_ptl;
+	int ret = -EBUSY;
+
+	pmd = mm_find_pmd(mm, haddr);
+	if (!pmd || pmd_trans_huge(*pmd))
+		goto out;
+
+	anon_vma_lock_write(vma->anon_vma);
+
+	pte = pte_offset_map(pmd, haddr);
+	pte_ptl = pte_lockptr(mm, pmd);
+
+	spin_lock(pte_ptl);
+	ret = __promote_huge_page_isolate(vma, haddr, pte, head, subpage_list);
+	spin_unlock(pte_ptl);
+
+	if (unlikely(!ret)) {
+		pte_unmap(pte);
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+	ret = 0;
+	/*
+	 * All pages are isolated and locked so anon_vma rmap
+	 * can't run anymore.
+	 */
+out_unlock:
+	anon_vma_unlock_write(vma->anon_vma);
+out:
+	return ret;
+}
+
+/* assume mmap_sem is down_write, wrapper for madvise */
+int promote_huge_page_address(struct vm_area_struct *vma, unsigned long haddr)
+{
+	LIST_HEAD(subpage_list);
+	struct page *head;
+
+	if (haddr & (HPAGE_PMD_SIZE - 1))
+		return -EINVAL;
+
+	if (haddr < vma->vm_start || (haddr + HPAGE_PMD_SIZE) > vma->vm_end)
+		return -EINVAL;
+
+	if (promote_huge_page_isolate(vma, haddr, &head, &subpage_list))
+		return -EBUSY;
+
+	return promote_list_to_huge_page(head, &subpage_list);
+}
diff --git a/mm/internal.h b/mm/internal.h
index 70a6ef603e5b..c5e5a0f1cc58 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -581,4 +581,10 @@ int expand_free_page(struct zone *zone, struct page *buddy_head,
 void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
 							unsigned int alloc_flags);
 
+void __unlock_page(struct page *page);
+
+int promote_huge_pmd_address(struct vm_area_struct *vma, unsigned long haddr);
+
+int promote_huge_page_address(struct vm_area_struct *vma, unsigned long haddr);
+
 #endif	/* __MM_INTERNAL_H */
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 3acfddcba714..ff059353ebc3 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -508,7 +508,7 @@ static void release_pte_page(struct page *page)
 	putback_lru_page(page);
 }
 
-static void release_pte_pages(pte_t *pte, pte_t *_pte)
+void release_pte_pages(pte_t *pte, pte_t *_pte)
 {
 	while (--_pte >= pte) {
 		pte_t pteval = *_pte;
-- 
2.20.1


  parent reply	other threads:[~2019-02-15 22:10 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-15 22:08 [RFC PATCH 00/31] Generating physically contiguous memory after page allocation Zi Yan
2019-02-15 22:08 ` [RFC PATCH 01/31] mm: migrate: Add exchange_pages to exchange two lists of pages Zi Yan
2019-02-17 11:29   ` Matthew Wilcox
2019-02-18 17:31     ` Zi Yan
2019-02-18 17:42       ` Vlastimil Babka
2019-02-18 17:51         ` Zi Yan
2019-02-18 17:52           ` Matthew Wilcox
2019-02-18 17:59             ` Zi Yan
2019-02-19  7:42               ` Anshuman Khandual
2019-02-19 12:56                 ` Matthew Wilcox
2019-02-20  4:38                   ` Anshuman Khandual
2019-03-14  2:39                     ` Zi Yan
2019-02-21 21:10   ` Jerome Glisse
2019-02-21 21:25     ` Zi Yan
2019-02-15 22:08 ` [RFC PATCH 02/31] mm: migrate: Add THP exchange support Zi Yan
2019-02-15 22:08 ` [RFC PATCH 03/31] mm: migrate: Add tmpfs " Zi Yan
2019-02-15 22:08 ` [RFC PATCH 04/31] mm: add mem_defrag functionality Zi Yan
2019-02-15 22:08 ` [RFC PATCH 05/31] mem_defrag: split a THP if either src or dst is THP only Zi Yan
2019-02-15 22:08 ` [RFC PATCH 06/31] mm: Make MAX_ORDER configurable in Kconfig for buddy allocator Zi Yan
2019-02-15 22:08 ` [RFC PATCH 07/31] mm: deallocate pages with order > MAX_ORDER Zi Yan
2019-02-15 22:08 ` [RFC PATCH 08/31] mm: add pagechain container for storing multiple pages Zi Yan
2019-02-15 22:08 ` [RFC PATCH 09/31] mm: thp: 1GB anonymous page implementation Zi Yan
2019-02-15 22:08 ` [RFC PATCH 10/31] mm: proc: add 1GB THP kpageflag Zi Yan
2019-02-15 22:08 ` [RFC PATCH 11/31] mm: debug: print compound page order in dump_page() Zi Yan
2019-02-15 22:08 ` [RFC PATCH 12/31] mm: stats: Separate PMD THP and PUD THP stats Zi Yan
2019-02-15 22:08 ` [RFC PATCH 13/31] mm: thp: 1GB THP copy on write implementation Zi Yan
2019-02-15 22:08 ` [RFC PATCH 14/31] mm: thp: handling 1GB THP reference bit Zi Yan
2019-02-15 22:08 ` [RFC PATCH 15/31] mm: thp: add 1GB THP split_huge_pud_page() function Zi Yan
2019-02-15 22:08 ` [RFC PATCH 16/31] mm: thp: check compound_mapcount of PMD-mapped PUD THPs at free time Zi Yan
2019-02-15 22:08 ` [RFC PATCH 17/31] mm: thp: split properly PMD-mapped PUD THP to PTE-mapped PUD THP Zi Yan
2019-02-15 22:08 ` [RFC PATCH 18/31] mm: page_vma_walk: teach it about PMD-mapped " Zi Yan
2019-02-15 22:08 ` [RFC PATCH 19/31] mm: thp: 1GB THP support in try_to_unmap() Zi Yan
2019-02-15 22:08 ` [RFC PATCH 20/31] mm: thp: split 1GB THPs at page reclaim Zi Yan
2019-02-15 22:08 ` [RFC PATCH 21/31] mm: thp: 1GB zero page shrinker Zi Yan
2019-02-15 22:08 ` [RFC PATCH 22/31] mm: thp: 1GB THP follow_p*d_page() support Zi Yan
2019-02-15 22:08 ` [RFC PATCH 23/31] mm: support 1GB THP pagemap support Zi Yan
2019-02-15 22:08 ` [RFC PATCH 24/31] sysctl: add an option to only print the head page virtual address Zi Yan
2019-02-15 22:08 ` [RFC PATCH 25/31] mm: thp: add a knob to enable/disable 1GB THPs Zi Yan
2019-02-15 22:08 ` Zi Yan [this message]
2019-02-15 22:08 ` [RFC PATCH 27/31] mm: thp: promote PMD-mapped PUD pages to PUD-mapped PUD pages Zi Yan
2019-02-15 22:08 ` [RFC PATCH 28/31] mm: vmstats: add page promotion stats Zi Yan
2019-02-15 22:08 ` [RFC PATCH 29/31] mm: madvise: add madvise options to split PMD and PUD THPs Zi Yan
2019-02-15 22:08 ` [RFC PATCH 30/31] mm: mem_defrag: thp: PMD THP and PUD THP in-place promotion support Zi Yan
2019-02-15 22:08 ` [RFC PATCH 31/31] sysctl: toggle to promote PUD-mapped 1GB THP or not Zi Yan
2019-02-20  1:42 ` [RFC PATCH 00/31] Generating physically contiguous memory after page allocation Mike Kravetz
2019-02-20  2:33   ` Zi Yan
2019-02-20  3:18     ` Mike Kravetz
2019-02-20  5:19       ` Zi Yan
2019-02-20  5:27         ` Mike Kravetz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190215220856.29749-27-zi.yan@sent.com \
    --to=zi.yan@sent.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=dnellans@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhairgrove@nvidia.com \
    --cc=mhocko@kernel.org \
    --cc=nigupta@nvidia.com \
    --cc=vbabka@suse.cz \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).