linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: "Theodore Ts'o" <tytso@mit.edu>,
	Andreas Dilger <adilger.kernel@dilger.ca>,
	Jan Kara <jack@suse.com>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
	Hugh Dickins <hughd@google.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Matthew Wilcox <willy@infradead.org>,
	Ross Zwisler <ross.zwisler@linux.intel.com>,
	linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv3 36/41] ext4: handle writeback with huge pages
Date: Thu, 15 Sep 2016 14:55:18 +0300	[thread overview]
Message-ID: <20160915115523.29737-37-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20160915115523.29737-1-kirill.shutemov@linux.intel.com>

Modify mpage_map_and_submit_buffers() and mpage_release_unused_pages()
to deal with huge pages.

Mostly result of try-and-error. Critical view would be appriciated.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 fs/ext4/inode.c | 61 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6a8da1a8409c..f2e34e340e65 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1652,18 +1652,30 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
 		if (nr_pages == 0)
 			break;
 		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
+			struct page *page = compound_head(pvec.pages[i]);
+
 			if (page->index > end)
 				break;
 			BUG_ON(!PageLocked(page));
 			BUG_ON(PageWriteback(page));
 			if (invalidate) {
-				block_invalidatepage(page, 0, PAGE_SIZE);
+				unsigned long offset, len;
+
+				offset = (index % hpage_nr_pages(page));
+				len = min_t(unsigned long, end - page->index,
+						hpage_nr_pages(page));
+
+				block_invalidatepage(page, offset << PAGE_SHIFT,
+						len << PAGE_SHIFT);
 				ClearPageUptodate(page);
 			}
 			unlock_page(page);
+			if (PageTransHuge(page))
+				break;
 		}
-		index = pvec.pages[nr_pages - 1]->index + 1;
+		index = page_to_pgoff(pvec.pages[nr_pages - 1]) + 1;
+		if (PageTransCompound(pvec.pages[nr_pages - 1]))
+			index = round_up(index, HPAGE_PMD_NR);
 		pagevec_release(&pvec);
 	}
 }
@@ -2097,16 +2109,16 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
 	loff_t size = i_size_read(mpd->inode);
 	int err;
 
-	BUG_ON(page->index != mpd->first_page);
-	if (page->index == size >> PAGE_SHIFT)
-		len = size & ~PAGE_MASK;
-	else
-		len = PAGE_SIZE;
+	page = compound_head(page);
+	len = hpage_size(page);
+	if (page->index + hpage_nr_pages(page) - 1 == size >> PAGE_SHIFT)
+		len = size & ~hpage_mask(page);
+
 	clear_page_dirty_for_io(page);
 	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
 	if (!err)
-		mpd->wbc->nr_to_write--;
-	mpd->first_page++;
+		mpd->wbc->nr_to_write -= hpage_nr_pages(page);
+	mpd->first_page = round_up(mpd->first_page + 1, hpage_nr_pages(page));
 
 	return err;
 }
@@ -2254,12 +2266,16 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 			break;
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
+			unsigned long diff;
 
-			if (page->index > end)
+			if (page_to_pgoff(page) > end)
 				break;
 			/* Up to 'end' pages must be contiguous */
-			BUG_ON(page->index != start);
+			BUG_ON(page_to_pgoff(page) != start);
+			diff = (page - compound_head(page)) << bpp_bits;
 			bh = head = page_buffers(page);
+			while (diff--)
+				bh = bh->b_this_page;
 			do {
 				if (lblk < mpd->map.m_lblk)
 					continue;
@@ -2296,7 +2312,10 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 			 * supports blocksize < pagesize as we will try to
 			 * convert potentially unmapped parts of inode.
 			 */
-			mpd->io_submit.io_end->size += PAGE_SIZE;
+			if (PageTransCompound(page))
+				mpd->io_submit.io_end->size += HPAGE_PMD_SIZE;
+			else
+				mpd->io_submit.io_end->size += PAGE_SIZE;
 			/* Page fully mapped - let IO run! */
 			err = mpage_submit_page(mpd, page);
 			if (err < 0) {
@@ -2304,6 +2323,10 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 				return err;
 			}
 			start++;
+			if (PageTransCompound(page)) {
+				start = round_up(start, HPAGE_PMD_NR);
+				break;
+			}
 		}
 		pagevec_release(&pvec);
 	}
@@ -2543,7 +2566,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 			 * mapping. However, page->index will not change
 			 * because we have a reference on the page.
 			 */
-			if (page->index > end)
+			if (page_to_pgoff(page) > end)
 				goto out;
 
 			/*
@@ -2558,7 +2581,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 				goto out;
 
 			/* If we can't merge this page, we are done. */
-			if (mpd->map.m_len > 0 && mpd->next_page != page->index)
+			if (mpd->map.m_len > 0 && mpd->next_page != page_to_pgoff(page))
 				goto out;
 
 			lock_page(page);
@@ -2572,7 +2595,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 			if (!PageDirty(page) ||
 			    (PageWriteback(page) &&
 			     (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
-			    unlikely(page->mapping != mapping)) {
+			    unlikely(page_mapping(page) != mapping)) {
 				unlock_page(page);
 				continue;
 			}
@@ -2581,8 +2604,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 			BUG_ON(PageWriteback(page));
 
 			if (mpd->map.m_len == 0)
-				mpd->first_page = page->index;
-			mpd->next_page = page->index + 1;
+				mpd->first_page = page_to_pgoff(page);
+			page = compound_head(page);
+			mpd->next_page = round_up(page->index + 1,
+					hpage_nr_pages(page));
 			/* Add all dirty buffers to mpd */
 			lblk = ((ext4_lblk_t)page->index) <<
 				(PAGE_SHIFT - blkbits);
-- 
2.9.3

  parent reply	other threads:[~2016-09-15 12:04 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-15 11:54 [PATCHv3 00/41] ext4: support of huge pages Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 01/41] tools: Add WARN_ON_ONCE Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 02/41] radix tree test suite: Allow GFP_ATOMIC allocations to fail Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 03/41] radix-tree: Add radix_tree_join Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 04/41] radix-tree: Add radix_tree_split Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 05/41] radix-tree: Add radix_tree_split_preload() Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 06/41] radix-tree: Handle multiorder entries being deleted by replace_clear_tags Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 07/41] mm, shmem: swich huge tmpfs to multi-order radix-tree entries Kirill A. Shutemov
2016-09-16 12:07   ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 08/41] Revert "radix-tree: implement radix_tree_maybe_preload_order()" Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 09/41] page-flags: relax page flag policy for few flags Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 10/41] mm, rmap: account file thp pages Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 11/41] thp: try to free page's buffers before attempt split Kirill A. Shutemov
2016-10-11 15:40   ` Jan Kara
2016-10-11 21:43     ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 12/41] thp: handle write-protection faults for file THP Kirill A. Shutemov
2016-10-11 15:47   ` Jan Kara
2016-10-11 21:47     ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 13/41] truncate: make sure invalidate_mapping_pages() can discard huge pages Kirill A. Shutemov
2016-10-11 15:58   ` Jan Kara
2016-10-11 21:53     ` Kirill A. Shutemov
2016-10-12  6:43       ` Jan Kara
2016-10-24 10:41         ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 14/41] filemap: allocate huge page in page_cache_read(), if allowed Kirill A. Shutemov
2016-10-11 16:15   ` Jan Kara
2016-10-11 21:57     ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 15/41] filemap: handle huge pages in do_generic_file_read() Kirill A. Shutemov
2016-10-13  9:33   ` Jan Kara
2016-10-31 18:10     ` Kirill A. Shutemov
2016-11-01 16:39       ` Jan Kara
2016-11-02  8:32         ` Kirill A. Shutemov
2016-11-02 14:37           ` Christoph Hellwig
2016-11-03 20:40           ` Jan Kara
2016-11-07 11:07             ` Kirill A. Shutemov
2016-11-07 14:59               ` Christoph Hellwig
2016-11-02 14:36         ` Christoph Hellwig
2016-11-03 17:56           ` Jan Kara
2016-11-07 11:13           ` Kirill A. Shutemov
2016-11-07 15:01             ` Christoph Hellwig
2016-11-07 16:03               ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 16/41] filemap: allocate huge page in pagecache_get_page(), if allowed Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 17/41] filemap: handle huge pages in filemap_fdatawait_range() Kirill A. Shutemov
2016-10-13  9:44   ` Jan Kara
2016-10-13 12:08     ` Kirill A. Shutemov
2016-10-13 13:18       ` Jan Kara
2016-10-24 11:36         ` Kirill A. Shutemov
2016-10-30 17:31           ` Jan Kara
2016-09-15 11:55 ` [PATCHv3 18/41] HACK: readahead: alloc huge pages, if allowed Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 19/41] block: define BIO_MAX_PAGES to HPAGE_PMD_NR if huge page cache enabled Kirill A. Shutemov
2016-09-16 12:09   ` Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 20/41] mm: make write_cache_pages() work on huge pages Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 21/41] thp: introduce hpage_size() and hpage_mask() Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 22/41] thp: do not threat slab pages as huge in hpage_{nr_pages,size,mask} Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 23/41] fs: make block_read_full_page() be able to read huge page Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 24/41] fs: make block_write_{begin,end}() be able to handle huge pages Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 25/41] fs: make block_page_mkwrite() aware about " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 26/41] truncate: make truncate_inode_pages_range() " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 27/41] truncate: make invalidate_inode_pages2_range() " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 28/41] mm, hugetlb: switch hugetlbfs to multi-order radix-tree entries Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 29/41] ext4: make ext4_mpage_readpages() hugepage-aware Kirill A. Shutemov
2016-09-15 12:27   ` Andreas Dilger
2016-09-16 12:17     ` Kirill A. Shutemov
2016-09-16 12:10   ` Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 30/41] ext4: make ext4_writepage() work on huge pages Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 31/41] ext4: handle huge pages in ext4_page_mkwrite() Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 32/41] ext4: handle huge pages in __ext4_block_zero_page_range() Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 33/41] ext4: make ext4_block_write_begin() aware about huge pages Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 34/41] ext4: handle huge pages in ext4_da_write_end() Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 35/41] ext4: make ext4_da_page_release_reservation() aware about huge pages Kirill A. Shutemov
2016-09-15 11:55 ` Kirill A. Shutemov [this message]
2016-09-15 11:55 ` [PATCHv3 37/41] ext4: make EXT4_IOC_MOVE_EXT work with " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 38/41] ext4: fix SEEK_DATA/SEEK_HOLE for " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 39/41] ext4: make fallocate() operations work with " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 40/41] mm, fs, ext4: expand use of page_mapping() and page_to_pgoff() Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 41/41] ext4, vfs: add huge= mount option Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160915115523.29737-37-kirill.shutemov@linux.intel.com \
    --to=kirill.shutemov@linux.intel.com \
    --cc=aarcange@redhat.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@intel.com \
    --cc=hughd@google.com \
    --cc=jack@suse.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ross.zwisler@linux.intel.com \
    --cc=tytso@mit.edu \
    --cc=vbabka@suse.cz \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).