linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: "Theodore Ts'o" <tytso@mit.edu>,
	Andreas Dilger <adilger.kernel@dilger.ca>,
	Jan Kara <jack@suse.com>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
	Hugh Dickins <hughd@google.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Matthew Wilcox <willy@infradead.org>,
	Ross Zwisler <ross.zwisler@linux.intel.com>,
	linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv4 27/43] truncate: make truncate_inode_pages_range() aware about huge pages
Date: Tue, 25 Oct 2016 03:13:26 +0300	[thread overview]
Message-ID: <20161025001342.76126-28-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20161025001342.76126-1-kirill.shutemov@linux.intel.com>

As with shmem_undo_range(), truncate_inode_pages_range() removes huge
pages, if it fully within range.

Partial truncate of huge pages zero out this part of THP.

Unlike with shmem, it doesn't prevent us having holes in the middle of
huge page we still can skip writeback not touched buffers.

With memory-mapped IO we would loose holes in some cases when we have
THP in page cache, since we cannot track access on 4k level in this
case.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 fs/buffer.c   |  2 +-
 mm/truncate.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 88 insertions(+), 9 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 8dff5817e313..670290820325 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1533,7 +1533,7 @@ void block_invalidatepage(struct page *page, unsigned int offset,
 	/*
 	 * Check for overflow
 	 */
-	BUG_ON(stop > PAGE_SIZE || stop < length);
+	BUG_ON(stop > hpage_size(page) || stop < length);
 
 	head = page_buffers(page);
 	bh = head;
diff --git a/mm/truncate.c b/mm/truncate.c
index f88e2f1eb6f0..7bb9d197a9e8 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -74,7 +74,7 @@ void do_invalidatepage(struct page *page, unsigned int offset,
 {
 	void (*invalidatepage)(struct page *, unsigned int, unsigned int);
 
-	invalidatepage = page->mapping->a_ops->invalidatepage;
+	invalidatepage = page_mapping(page)->a_ops->invalidatepage;
 #ifdef CONFIG_BLOCK
 	if (!invalidatepage)
 		invalidatepage = block_invalidatepage;
@@ -100,7 +100,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 		return -EIO;
 
 	if (page_has_private(page))
-		do_invalidatepage(page, 0, PAGE_SIZE);
+		do_invalidatepage(page, 0, hpage_size(page));
 
 	/*
 	 * Some filesystems seem to re-dirty the page even after
@@ -272,6 +272,36 @@ void truncate_inode_pages_range(struct address_space *mapping,
 				unlock_page(page);
 				continue;
 			}
+
+			if (PageTransTail(page)) {
+				/* Middle of THP: zero out the page */
+				clear_highpage(page);
+				if (page_has_private(page)) {
+					int off = page - compound_head(page);
+					do_invalidatepage(compound_head(page),
+							off * PAGE_SIZE,
+							PAGE_SIZE);
+				}
+				unlock_page(page);
+				continue;
+			} else if (PageTransHuge(page)) {
+				if (index == round_down(end, HPAGE_PMD_NR)) {
+					/*
+					 * Range ends in the middle of THP:
+					 * zero out the page
+					 */
+					clear_highpage(page);
+					if (page_has_private(page)) {
+						do_invalidatepage(page, 0,
+								PAGE_SIZE);
+					}
+					unlock_page(page);
+					continue;
+				}
+				index += HPAGE_PMD_NR - 1;
+				i += HPAGE_PMD_NR - 1;
+			}
+
 			truncate_inode_page(mapping, page);
 			unlock_page(page);
 		}
@@ -293,9 +323,12 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			wait_on_page_writeback(page);
 			zero_user_segment(page, partial_start, top);
 			cleancache_invalidate_page(mapping, page);
-			if (page_has_private(page))
-				do_invalidatepage(page, partial_start,
-						  top - partial_start);
+			if (page_has_private(page)) {
+				int off = page - compound_head(page);
+				do_invalidatepage(compound_head(page),
+						off * PAGE_SIZE + partial_start,
+						top - partial_start);
+			}
 			unlock_page(page);
 			put_page(page);
 		}
@@ -306,9 +339,12 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			wait_on_page_writeback(page);
 			zero_user_segment(page, 0, partial_end);
 			cleancache_invalidate_page(mapping, page);
-			if (page_has_private(page))
-				do_invalidatepage(page, 0,
-						  partial_end);
+			if (page_has_private(page)) {
+				int off = page - compound_head(page);
+				do_invalidatepage(compound_head(page),
+						off * PAGE_SIZE,
+						partial_end);
+			}
 			unlock_page(page);
 			put_page(page);
 		}
@@ -357,6 +393,49 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			lock_page(page);
 			WARN_ON(page_to_pgoff(page) != index);
 			wait_on_page_writeback(page);
+
+			if (PageTransTail(page)) {
+				/* Middle of THP: zero out the page */
+				clear_highpage(page);
+				if (page_has_private(page)) {
+					int off = page - compound_head(page);
+					do_invalidatepage(compound_head(page),
+							off * PAGE_SIZE,
+							PAGE_SIZE);
+				}
+				unlock_page(page);
+				/*
+				 * Partial thp truncate due 'start' in middle
+				 * of THP: don't need to look on these pages
+				 * again on !pvec.nr restart.
+				 */
+				if (index != round_down(end, HPAGE_PMD_NR))
+					start++;
+				continue;
+			} else if (PageTransHuge(page)) {
+				if (index == round_down(end, HPAGE_PMD_NR)) {
+					/*
+					 * Range ends in the middle of THP:
+					 * zero out the page
+					 */
+					clear_highpage(page);
+					if (page_has_private(page)) {
+						do_invalidatepage(page, 0,
+								PAGE_SIZE);
+					}
+					unlock_page(page);
+					/*
+					 * Partial thp truncate due 'end' in
+					 * middle of THP: don't need to look on
+					 * these pages again restart.
+					 */
+					start++;
+					continue;
+				}
+				index += HPAGE_PMD_NR - 1;
+				i += HPAGE_PMD_NR - 1;
+			}
+
 			truncate_inode_page(mapping, page);
 			unlock_page(page);
 		}
-- 
2.9.3

  parent reply	other threads:[~2016-10-25  0:19 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-25  0:12 [PATCHv4 00/43] ext4: support of huge pages Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 01/43] tools: Add WARN_ON_ONCE Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 02/43] radix tree test suite: Allow GFP_ATOMIC allocations to fail Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 03/43] radix-tree: Add radix_tree_join Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 04/43] radix-tree: Add radix_tree_split Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 05/43] radix-tree: Add radix_tree_split_preload() Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 06/43] mm, shmem: swich huge tmpfs to multi-order radix-tree entries Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 07/43] Revert "radix-tree: implement radix_tree_maybe_preload_order()" Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 08/43] page-flags: relax page flag policy for few flags Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 09/43] mm, rmap: account file thp pages Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 10/43] thp: try to free page's buffers before attempt split Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 11/43] thp: handle write-protection faults for file THP Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 12/43] truncate: make sure invalidate_mapping_pages() can discard huge pages Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 13/43] filemap: allocate huge page in page_cache_read(), if allowed Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 14/43] filemap: handle huge pages in do_generic_file_read() Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 15/43] filemap: allocate huge page in pagecache_get_page(), if allowed Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 16/43] filemap: handle huge pages in filemap_fdatawait_range() Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 17/43] HACK: readahead: alloc huge pages, if allowed Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 18/43] block: define BIO_MAX_PAGES to HPAGE_PMD_NR if huge page cache enabled Kirill A. Shutemov
2016-10-25  7:21   ` Christoph Hellwig
2016-10-25 12:54     ` Kirill A. Shutemov
2016-10-26  4:13       ` Andreas Dilger
2016-10-26  7:30         ` Ming Lei
2016-10-26  7:36           ` Christoph Hellwig
2016-10-26  7:36         ` Christoph Hellwig
2016-10-26  7:35       ` Christoph Hellwig
2016-10-25  0:13 ` [PATCHv4 19/43] brd: make it handle huge pages Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 20/43] mm: make write_cache_pages() work on " Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 21/43] thp: introduce hpage_size() and hpage_mask() Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 22/43] thp: do not threat slab pages as huge in hpage_{nr_pages,size,mask} Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 23/43] thp: make thp_get_unmapped_area() respect S_HUGE_MODE Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 24/43] fs: make block_read_full_page() be able to read huge page Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 25/43] fs: make block_write_{begin,end}() be able to handle huge pages Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 26/43] fs: make block_page_mkwrite() aware about " Kirill A. Shutemov
2016-10-25  0:13 ` Kirill A. Shutemov [this message]
2016-10-25  0:13 ` [PATCHv4 28/43] truncate: make invalidate_inode_pages2_range() " Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 29/43] mm, hugetlb: switch hugetlbfs to multi-order radix-tree entries Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 30/43] mm: account huge pages to dirty, writaback, reclaimable, etc Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 31/43] ext4: make ext4_mpage_readpages() hugepage-aware Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 32/43] ext4: make ext4_writepage() work on huge pages Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 33/43] ext4: handle huge pages in ext4_page_mkwrite() Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 34/43] ext4: handle huge pages in __ext4_block_zero_page_range() Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 35/43] ext4: make ext4_block_write_begin() aware about huge pages Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 36/43] ext4: handle huge pages in ext4_da_write_end() Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 37/43] ext4: make ext4_da_page_release_reservation() aware about huge pages Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 38/43] ext4: handle writeback with " Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 39/43] ext4: make EXT4_IOC_MOVE_EXT work " Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 40/43] ext4: fix SEEK_DATA/SEEK_HOLE for " Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 41/43] ext4: make fallocate() operations work with " Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 42/43] mm, fs, ext4: expand use of page_mapping() and page_to_pgoff() Kirill A. Shutemov
2016-10-25  0:13 ` [PATCHv4 43/43] ext4, vfs: add huge= mount option Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161025001342.76126-28-kirill.shutemov@linux.intel.com \
    --to=kirill.shutemov@linux.intel.com \
    --cc=aarcange@redhat.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@intel.com \
    --cc=hughd@google.com \
    --cc=jack@suse.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ross.zwisler@linux.intel.com \
    --cc=tytso@mit.edu \
    --cc=vbabka@suse.cz \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).