All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: "Theodore Ts'o" <tytso@mit.edu>,
	Andreas Dilger <adilger.kernel@dilger.ca>,
	Jan Kara <jack@suse.com>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
	Hugh Dickins <hughd@google.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Matthew Wilcox <willy@infradead.org>,
	Ross Zwisler <ross.zwisler@linux.intel.com>,
	linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv5 17/36] fs: make block_read_full_page() be able to read huge page
Date: Tue, 29 Nov 2016 14:22:45 +0300	[thread overview]
Message-ID: <20161129112304.90056-18-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20161129112304.90056-1-kirill.shutemov@linux.intel.com>

The approach is straight-forward: for compound pages we read out whole
huge page.

For huge page we cannot have array of buffer head pointers on stack --
it's 4096 pointers on x86-64 -- 'arr' is allocated with kmalloc() for
huge pages.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 fs/buffer.c                 | 22 +++++++++++++++++-----
 include/linux/buffer_head.h |  9 +++++----
 include/linux/page-flags.h  |  2 +-
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index d21771fcf7d3..090f7edfa6b7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -871,7 +871,7 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
 
 try_again:
 	head = NULL;
-	offset = PAGE_SIZE;
+	offset = hpage_size(page);
 	while ((offset -= size) >= 0) {
 		bh = alloc_buffer_head(GFP_NOFS);
 		if (!bh)
@@ -1466,7 +1466,7 @@ void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset)
 {
 	bh->b_page = page;
-	BUG_ON(offset >= PAGE_SIZE);
+	BUG_ON(offset >= hpage_size(page));
 	if (PageHighMem(page))
 		/*
 		 * This catches illegal uses and preserves the offset:
@@ -2280,11 +2280,13 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 {
 	struct inode *inode = page->mapping->host;
 	sector_t iblock, lblock;
-	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+	struct buffer_head *arr_on_stack[MAX_BUF_PER_PAGE];
+	struct buffer_head *bh, *head, **arr = arr_on_stack;
 	unsigned int blocksize, bbits;
 	int nr, i;
 	int fully_mapped = 1;
 
+	VM_BUG_ON_PAGE(PageTail(page), page);
 	head = create_page_buffers(page, inode, 0);
 	blocksize = head->b_size;
 	bbits = block_size_bits(blocksize);
@@ -2295,6 +2297,11 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 	nr = 0;
 	i = 0;
 
+	if (PageTransHuge(page)) {
+		arr = kmalloc(sizeof(struct buffer_head *) * HPAGE_PMD_NR *
+				MAX_BUF_PER_PAGE, GFP_NOFS);
+	}
+
 	do {
 		if (buffer_uptodate(bh))
 			continue;
@@ -2310,7 +2317,9 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 					SetPageError(page);
 			}
 			if (!buffer_mapped(bh)) {
-				zero_user(page, i * blocksize, blocksize);
+				zero_user(page + (i * blocksize / PAGE_SIZE),
+						i * blocksize % PAGE_SIZE,
+						blocksize);
 				if (!err)
 					set_buffer_uptodate(bh);
 				continue;
@@ -2336,7 +2345,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 		if (!PageError(page))
 			SetPageUptodate(page);
 		unlock_page(page);
-		return 0;
+		goto out;
 	}
 
 	/* Stage two: lock the buffers */
@@ -2358,6 +2367,9 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 		else
 			submit_bh(REQ_OP_READ, 0, bh);
 	}
+out:
+	if (arr != arr_on_stack)
+		kfree(arr);
 	return 0;
 }
 EXPORT_SYMBOL(block_read_full_page);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index fd4134ce9c54..f12f6293ed44 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -131,13 +131,14 @@ BUFFER_FNS(Meta, meta)
 BUFFER_FNS(Prio, prio)
 BUFFER_FNS(Defer_Completion, defer_completion)
 
-#define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
+#define bh_offset(bh)	((unsigned long)(bh)->b_data & ~hpage_mask(bh->b_page))
 
 /* If we *know* page->private refers to buffer_heads */
-#define page_buffers(page)					\
+#define page_buffers(__page)					\
 	({							\
-		BUG_ON(!PagePrivate(page));			\
-		((struct buffer_head *)page_private(page));	\
+		struct page *p = compound_head(__page);		\
+		BUG_ON(!PagePrivate(p));			\
+		((struct buffer_head *)page_private(p));	\
 	})
 #define page_has_buffers(page)	PagePrivate(page)
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a2bef9a41bcf..20b7684e9298 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -730,7 +730,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
  */
 static inline int page_has_private(struct page *page)
 {
-	return !!(page->flags & PAGE_FLAGS_PRIVATE);
+	return !!(compound_head(page)->flags & PAGE_FLAGS_PRIVATE);
 }
 
 #undef PF_ANY
-- 
2.10.2


WARNING: multiple messages have this Message-ID (diff)
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: "Theodore Ts'o" <tytso@mit.edu>,
	Andreas Dilger <adilger.kernel@dilger.ca>,
	Jan Kara <jack@suse.com>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
	Hugh Dickins <hughd@google.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Matthew Wilcox <willy@infradead.org>,
	Ross Zwisler <ross.zwisler@linux.intel.com>,
	linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv5 17/36] fs: make block_read_full_page() be able to read huge page
Date: Tue, 29 Nov 2016 14:22:45 +0300	[thread overview]
Message-ID: <20161129112304.90056-18-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20161129112304.90056-1-kirill.shutemov@linux.intel.com>

The approach is straight-forward: for compound pages we read out whole
huge page.

For huge page we cannot have array of buffer head pointers on stack --
it's 4096 pointers on x86-64 -- 'arr' is allocated with kmalloc() for
huge pages.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 fs/buffer.c                 | 22 +++++++++++++++++-----
 include/linux/buffer_head.h |  9 +++++----
 include/linux/page-flags.h  |  2 +-
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index d21771fcf7d3..090f7edfa6b7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -871,7 +871,7 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
 
 try_again:
 	head = NULL;
-	offset = PAGE_SIZE;
+	offset = hpage_size(page);
 	while ((offset -= size) >= 0) {
 		bh = alloc_buffer_head(GFP_NOFS);
 		if (!bh)
@@ -1466,7 +1466,7 @@ void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset)
 {
 	bh->b_page = page;
-	BUG_ON(offset >= PAGE_SIZE);
+	BUG_ON(offset >= hpage_size(page));
 	if (PageHighMem(page))
 		/*
 		 * This catches illegal uses and preserves the offset:
@@ -2280,11 +2280,13 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 {
 	struct inode *inode = page->mapping->host;
 	sector_t iblock, lblock;
-	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+	struct buffer_head *arr_on_stack[MAX_BUF_PER_PAGE];
+	struct buffer_head *bh, *head, **arr = arr_on_stack;
 	unsigned int blocksize, bbits;
 	int nr, i;
 	int fully_mapped = 1;
 
+	VM_BUG_ON_PAGE(PageTail(page), page);
 	head = create_page_buffers(page, inode, 0);
 	blocksize = head->b_size;
 	bbits = block_size_bits(blocksize);
@@ -2295,6 +2297,11 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 	nr = 0;
 	i = 0;
 
+	if (PageTransHuge(page)) {
+		arr = kmalloc(sizeof(struct buffer_head *) * HPAGE_PMD_NR *
+				MAX_BUF_PER_PAGE, GFP_NOFS);
+	}
+
 	do {
 		if (buffer_uptodate(bh))
 			continue;
@@ -2310,7 +2317,9 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 					SetPageError(page);
 			}
 			if (!buffer_mapped(bh)) {
-				zero_user(page, i * blocksize, blocksize);
+				zero_user(page + (i * blocksize / PAGE_SIZE),
+						i * blocksize % PAGE_SIZE,
+						blocksize);
 				if (!err)
 					set_buffer_uptodate(bh);
 				continue;
@@ -2336,7 +2345,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 		if (!PageError(page))
 			SetPageUptodate(page);
 		unlock_page(page);
-		return 0;
+		goto out;
 	}
 
 	/* Stage two: lock the buffers */
@@ -2358,6 +2367,9 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 		else
 			submit_bh(REQ_OP_READ, 0, bh);
 	}
+out:
+	if (arr != arr_on_stack)
+		kfree(arr);
 	return 0;
 }
 EXPORT_SYMBOL(block_read_full_page);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index fd4134ce9c54..f12f6293ed44 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -131,13 +131,14 @@ BUFFER_FNS(Meta, meta)
 BUFFER_FNS(Prio, prio)
 BUFFER_FNS(Defer_Completion, defer_completion)
 
-#define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
+#define bh_offset(bh)	((unsigned long)(bh)->b_data & ~hpage_mask(bh->b_page))
 
 /* If we *know* page->private refers to buffer_heads */
-#define page_buffers(page)					\
+#define page_buffers(__page)					\
 	({							\
-		BUG_ON(!PagePrivate(page));			\
-		((struct buffer_head *)page_private(page));	\
+		struct page *p = compound_head(__page);		\
+		BUG_ON(!PagePrivate(p));			\
+		((struct buffer_head *)page_private(p));	\
 	})
 #define page_has_buffers(page)	PagePrivate(page)
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a2bef9a41bcf..20b7684e9298 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -730,7 +730,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
  */
 static inline int page_has_private(struct page *page)
 {
-	return !!(page->flags & PAGE_FLAGS_PRIVATE);
+	return !!(compound_head(page)->flags & PAGE_FLAGS_PRIVATE);
 }
 
 #undef PF_ANY
-- 
2.10.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-11-29 11:23 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-29 11:22 [PATCHv5 00/36] ext4: support of huge pages Kirill A. Shutemov
2016-11-29 11:22 ` Kirill A. Shutemov
2016-11-29 11:22 ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 01/36] mm, shmem: swich huge tmpfs to multi-order radix-tree entries Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 02/36] Revert "radix-tree: implement radix_tree_maybe_preload_order()" Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 03/36] page-flags: relax page flag policy for few flags Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 04/36] mm, rmap: account file thp pages Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 05/36] thp: try to free page's buffers before attempt split Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 06/36] thp: handle write-protection faults for file THP Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 07/36] filemap: allocate huge page in page_cache_read(), if allowed Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 08/36] filemap: handle huge pages in do_generic_file_read() Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 09/36] filemap: allocate huge page in pagecache_get_page(), if allowed Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 10/36] filemap: handle huge pages in filemap_fdatawait_range() Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 11/36] HACK: readahead: alloc huge pages, if allowed Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 12/36] brd: make it handle huge pages Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 13/36] mm: make write_cache_pages() work on " Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 14/36] thp: introduce hpage_size() and hpage_mask() Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 15/36] thp: do not threat slab pages as huge in hpage_{nr_pages,size,mask} Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 16/36] thp: make thp_get_unmapped_area() respect S_HUGE_MODE Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` Kirill A. Shutemov [this message]
2016-11-29 11:22   ` [PATCHv5 17/36] fs: make block_read_full_page() be able to read huge page Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 18/36] fs: make block_write_{begin,end}() be able to handle huge pages Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 19/36] fs: make block_page_mkwrite() aware about " Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 20/36] truncate: make truncate_inode_pages_range() " Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 21/36] truncate: make invalidate_inode_pages2_range() " Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 22/36] mm, hugetlb: switch hugetlbfs to multi-order radix-tree entries Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-30  9:48   ` Hillf Danton
2016-11-30  9:48     ` Hillf Danton
2016-11-30  9:48     ` Hillf Danton
2016-11-30 13:15     ` Kirill A. Shutemov
2016-11-30 13:15       ` Kirill A. Shutemov
2016-12-01  3:10       ` Hillf Danton
2016-12-01  3:10         ` Hillf Danton
2016-12-01  3:10         ` Hillf Danton
2016-11-29 11:22 ` [PATCHv5 23/36] mm: account huge pages to dirty, writaback, reclaimable, etc Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 24/36] ext4: make ext4_mpage_readpages() hugepage-aware Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 25/36] ext4: make ext4_writepage() work on huge pages Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 26/36] ext4: handle huge pages in ext4_page_mkwrite() Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 27/36] ext4: handle huge pages in __ext4_block_zero_page_range() Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 28/36] ext4: make ext4_block_write_begin() aware about huge pages Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 29/36] ext4: handle huge pages in ext4_da_write_end() Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 30/36] ext4: make ext4_da_page_release_reservation() aware about huge pages Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:22 ` [PATCHv5 31/36] ext4: handle writeback with " Kirill A. Shutemov
2016-11-29 11:22   ` Kirill A. Shutemov
2016-11-29 11:23 ` [PATCHv5 32/36] ext4: make EXT4_IOC_MOVE_EXT work " Kirill A. Shutemov
2016-11-29 11:23   ` Kirill A. Shutemov
2016-11-29 11:23 ` [PATCHv5 33/36] ext4: fix SEEK_DATA/SEEK_HOLE for " Kirill A. Shutemov
2016-11-29 11:23   ` Kirill A. Shutemov
2016-11-29 11:23 ` [PATCHv5 34/36] ext4: make fallocate() operations work with " Kirill A. Shutemov
2016-11-29 11:23   ` Kirill A. Shutemov
2016-11-29 11:23 ` [PATCHv5 35/36] mm, fs, ext4: expand use of page_mapping() and page_to_pgoff() Kirill A. Shutemov
2016-11-29 11:23   ` Kirill A. Shutemov
2016-11-29 11:23 ` [PATCHv5 36/36] ext4, vfs: add huge= mount option Kirill A. Shutemov
2016-11-29 11:23   ` Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161129112304.90056-18-kirill.shutemov@linux.intel.com \
    --to=kirill.shutemov@linux.intel.com \
    --cc=aarcange@redhat.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@intel.com \
    --cc=hughd@google.com \
    --cc=jack@suse.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ross.zwisler@linux.intel.com \
    --cc=tytso@mit.edu \
    --cc=vbabka@suse.cz \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.