From: Hillf Danton <hdanton@sina.com>
To: Alexander Graf <graf@amazon.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>,
kvm@vger.kernel.org, mst@redhat.com,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
akpm@linux-foundation.org, mgorman@techsingularity.net,
Hillf Danton <hdanton@sina.com>, Minchan Kim <minchan@kernel.org>,
vbabka@suse.cz
Subject: Re: [PATCH v16.1 0/9] mm / virtio: Provide support for free page reporting
Date: Fri, 24 Jan 2020 21:23:52 +0800 [thread overview]
Message-ID: <20200124132352.12824-1-hdanton@sina.com> (raw)
In-Reply-To: <20200122173040.6142.39116.stgit@localhost.localdomain>
On Thu, 23 Jan 2020 11:20:07 +0100 Alexander Graf wrote:
>
> The big problem I see is that what I really want from a user's point of
> view is a tuneable that says "Automatically free clean page cache pages
> that were not accessed in the last X minutes".
A diff is made on top of 1a4e58cce84e ("mm: introduce MADV_PAGEOUT") without
test in any form, assuming it goes in line with the tunable above but without
"X minutes" taken into account.
[BTW, please take a look at
Content-Type: text/plain; charset="utf-8"; format="flowed"
Content-Transfer-Encoding: base64
and ensure pure text message.]
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -69,6 +69,7 @@
#define MADV_COLD 20 /* deactivate these pages */
#define MADV_PAGEOUT 21 /* reclaim these pages */
+#define MADV_CCPC 22 /* reclaim cold & clean page cache pages */
/* compatibility flags */
#define MAP_FILE 0
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -35,6 +35,7 @@
struct madvise_walk_private {
struct mmu_gather *tlb;
bool pageout;
+ int behavior;
};
/*
@@ -50,6 +51,7 @@ static int madvise_need_mmap_write(int b
case MADV_DONTNEED:
case MADV_COLD:
case MADV_PAGEOUT:
+ case MADV_CCPC:
case MADV_FREE:
return 0;
default:
@@ -304,6 +306,7 @@ static int madvise_cold_or_pageout_pte_r
struct madvise_walk_private *private = walk->private;
struct mmu_gather *tlb = private->tlb;
bool pageout = private->pageout;
+ bool ccpc = private->behavior == MADV_CCPC;
struct mm_struct *mm = tlb->mm;
struct vm_area_struct *vma = walk->vma;
pte_t *orig_pte, *pte, ptent;
@@ -429,6 +432,8 @@ regular_page:
VM_BUG_ON_PAGE(PageTransCompound(page), page);
if (pte_young(ptent)) {
+ if (ccpc)
+ continue;
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
ptent = pte_mkold(ptent);
@@ -436,6 +441,10 @@ regular_page:
tlb_remove_tlb_entry(tlb, pte, addr);
}
+ if (ccpc)
+ if (PageDirty(page))
+ continue;
+
/*
* We are deactivating a page for accelerating reclaiming.
* VM couldn't reclaim the page unless we clear PG_young.
@@ -502,12 +511,13 @@ static long madvise_cold(struct vm_area_
}
static void madvise_pageout_page_range(struct mmu_gather *tlb,
- struct vm_area_struct *vma,
+ struct vm_area_struct *vma, int behavior,
unsigned long addr, unsigned long end)
{
struct madvise_walk_private walk_private = {
.pageout = true,
.tlb = tlb,
+ .behavior = behavior,
};
tlb_start_vma(tlb, vma);
@@ -515,10 +525,10 @@ static void madvise_pageout_page_range(s
tlb_end_vma(tlb, vma);
}
-static inline bool can_do_pageout(struct vm_area_struct *vma)
+static inline bool can_do_pageout(struct vm_area_struct *vma, int behavior)
{
if (vma_is_anonymous(vma))
- return true;
+ return behavior != MADV_CCPC;
if (!vma->vm_file)
return false;
/*
@@ -531,7 +541,7 @@ static inline bool can_do_pageout(struct
inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
}
-static long madvise_pageout(struct vm_area_struct *vma,
+static long madvise_pageout(struct vm_area_struct *vma, int behavior,
struct vm_area_struct **prev,
unsigned long start_addr, unsigned long end_addr)
{
@@ -542,12 +552,12 @@ static long madvise_pageout(struct vm_ar
if (!can_madv_lru_vma(vma))
return -EINVAL;
- if (!can_do_pageout(vma))
+ if (!can_do_pageout(vma, behavior))
return 0;
lru_add_drain();
tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
- madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
+ madvise_pageout_page_range(&tlb, vma, behavior, start_addr, end_addr);
tlb_finish_mmu(&tlb, start_addr, end_addr);
return 0;
@@ -936,7 +946,8 @@ madvise_vma(struct vm_area_struct *vma,
case MADV_COLD:
return madvise_cold(vma, prev, start, end);
case MADV_PAGEOUT:
- return madvise_pageout(vma, prev, start, end);
+ case MADV_CCPC:
+ return madvise_pageout(vma, behavior, prev, start, end);
case MADV_FREE:
case MADV_DONTNEED:
return madvise_dontneed_free(vma, prev, start, end, behavior);
@@ -960,6 +971,7 @@ madvise_behavior_valid(int behavior)
case MADV_FREE:
case MADV_COLD:
case MADV_PAGEOUT:
+ case MADV_CCPC:
#ifdef CONFIG_KSM
case MADV_MERGEABLE:
case MADV_UNMERGEABLE:
--
next prev parent reply other threads:[~2020-01-24 13:24 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-22 17:43 [PATCH v16.1 0/9] mm / virtio: Provide support for free page reporting Alexander Duyck
2020-01-22 17:43 ` [PATCH v16.1 1/9] mm: Adjust shuffle code to allow for future coalescing Alexander Duyck
2020-01-22 17:43 ` [PATCH v16.1 2/9] mm: Use zone and order instead of free area in free_list manipulators Alexander Duyck
2020-01-22 17:43 ` [PATCH v16.1 3/9] mm: Add function __putback_isolated_page Alexander Duyck
2020-01-22 17:43 ` [PATCH v16.1 4/9] mm: Introduce Reported pages Alexander Duyck
2020-01-22 17:43 ` [PATCH v16.1 5/9] virtio-balloon: Pull page poisoning config out of free page hinting Alexander Duyck
2020-01-22 17:43 ` [PATCH v16.1 6/9] virtio-balloon: Add support for providing free page reports to host Alexander Duyck
2020-02-11 11:03 ` David Hildenbrand
2020-02-11 11:47 ` Michael S. Tsirkin
2020-02-11 12:19 ` David Hildenbrand
2020-02-11 14:07 ` Michael S. Tsirkin
2020-02-11 14:31 ` David Hildenbrand
2020-02-11 14:48 ` Michael S. Tsirkin
2020-02-11 15:13 ` David Hildenbrand
2020-02-11 16:33 ` Alexander Duyck
2020-02-11 17:04 ` David Hildenbrand
2020-01-22 17:43 ` [PATCH v16.1 7/9] mm/page_reporting: Rotate reported pages to the tail of the list Alexander Duyck
2020-01-22 17:43 ` [PATCH v16.1 8/9] mm/page_reporting: Add budget limit on how many pages can be reported per pass Alexander Duyck
2020-01-22 17:44 ` [PATCH v16.1 9/9] mm/page_reporting: Add free page reporting documentation Alexander Duyck
2020-01-23 10:20 ` [PATCH v16.1 0/9] mm / virtio: Provide support for free page reporting Alexander Graf
2020-01-23 14:05 ` David Hildenbrand
2020-01-23 14:52 ` Alexander Graf
2020-01-24 13:25 ` David Hildenbrand
2020-01-24 16:20 ` David Hildenbrand
2020-01-23 16:26 ` Alexander Duyck
2020-01-23 16:54 ` Alexander Graf
2020-01-23 18:33 ` Alexander Duyck
2020-01-23 18:47 ` Graf (AWS), Alexander
2020-01-23 22:05 ` Alexander Duyck
2020-01-23 17:20 ` Dave Hansen
2020-01-23 19:23 ` Konrad Rzeszutek Wilk
2020-01-23 19:17 ` Johannes Weiner
2020-01-23 22:29 ` Alexander Duyck
2020-01-23 23:24 ` Dave Hansen
2020-01-24 13:23 ` Hillf Danton [this message]
2020-01-24 16:40 ` Alexander Graf
2020-01-25 2:01 ` Hillf Danton
2020-02-03 22:05 ` Alexander Duyck
2020-02-10 19:18 ` Should I repost? (was: Re: [PATCH v16.1 0/9] mm / virtio: Provide support for free page reporting) Alexander Duyck
2020-02-11 10:40 ` Mel Gorman
2020-02-11 22:57 ` Alexander Duyck
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200124132352.12824-1-hdanton@sina.com \
--to=hdanton@sina.com \
--cc=akpm@linux-foundation.org \
--cc=alexander.duyck@gmail.com \
--cc=graf@amazon.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=minchan@kernel.org \
--cc=mst@redhat.com \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).