All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: sandeen@redhat.com, linux-nfs@vger.kernel.org,
	linux-cifs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>,
	linux-unionfs@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-mm@kvack.org, linux-btrfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, ocfs2-devel@oss.oracle.com
Subject: [PATCH 04/25] vfs: strengthen checking of file range inputs to generic_remap_checks
Date: Wed, 10 Oct 2018 21:12:46 -0700	[thread overview]
Message-ID: <153923116686.5546.8711942394464060950.stgit@magnolia> (raw)
In-Reply-To: <153923113649.5546.9840926895953408273.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

File range remapping, if allowed to run past the destination file's EOF,
is an optimization on a regular file write.  Regular file writes that
extend the file length are subject to various constraints which are not
checked by range cloning.

This is a correctness problem because we're never allowed to touch
ranges that the page cache can't support (s_maxbytes); we're not
supposed to deal with large offsets (MAX_NON_LFS) if O_LARGEFILE isn't
set; and we must obey resource limits (RLIMIT_FSIZE).

Therefore, add these checks to the new generic_remap_checks function so
that we curtail unexpected behavior.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
---
 mm/filemap.c |   91 ++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 59 insertions(+), 32 deletions(-)


diff --git a/mm/filemap.c b/mm/filemap.c
index 47e6bfd45a91..08ad210fee49 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2915,6 +2915,49 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
 }
 EXPORT_SYMBOL(read_cache_page_gfp);
 
+static int generic_access_check_limits(struct file *file, loff_t pos,
+				       loff_t *count)
+{
+	struct inode *inode = file->f_mapping->host;
+
+	/* Don't exceed the LFS limits. */
+	if (unlikely(pos + *count > MAX_NON_LFS &&
+				!(file->f_flags & O_LARGEFILE))) {
+		if (pos >= MAX_NON_LFS)
+			return -EFBIG;
+		*count = min(*count, (loff_t)MAX_NON_LFS - pos);
+	}
+
+	/*
+	 * Don't operate on ranges the page cache doesn't support.
+	 *
+	 * If we have written data it becomes a short write.  If we have
+	 * exceeded without writing data we send a signal and return EFBIG.
+	 * Linus frestrict idea will clean these up nicely..
+	 */
+	if (unlikely(pos >= inode->i_sb->s_maxbytes))
+		return -EFBIG;
+
+	*count = min(*count, inode->i_sb->s_maxbytes - pos);
+	return 0;
+}
+
+static int generic_write_check_limits(struct file *file, loff_t pos,
+				      loff_t *count)
+{
+	unsigned long limit = rlimit(RLIMIT_FSIZE);
+
+	if (limit != RLIM_INFINITY) {
+		if (pos >= limit) {
+			send_sig(SIGXFSZ, current, 0);
+			return -EFBIG;
+		}
+		*count = min(*count, (loff_t)limit - pos);
+	}
+
+	return generic_access_check_limits(file, pos, count);
+}
+
 /*
  * Performs necessary checks before doing a write
  *
@@ -2926,8 +2969,8 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-	unsigned long limit = rlimit(RLIMIT_FSIZE);
-	loff_t pos;
+	loff_t count;
+	int ret;
 
 	if (!iov_iter_count(from))
 		return 0;
@@ -2936,40 +2979,15 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 	if (iocb->ki_flags & IOCB_APPEND)
 		iocb->ki_pos = i_size_read(inode);
 
-	pos = iocb->ki_pos;
-
 	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
 		return -EINVAL;
 
-	if (limit != RLIM_INFINITY) {
-		if (iocb->ki_pos >= limit) {
-			send_sig(SIGXFSZ, current, 0);
-			return -EFBIG;
-		}
-		iov_iter_truncate(from, limit - (unsigned long)pos);
-	}
+	count = iov_iter_count(from);
+	ret = generic_write_check_limits(file, iocb->ki_pos, &count);
+	if (ret)
+		return ret;
 
-	/*
-	 * LFS rule
-	 */
-	if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS &&
-				!(file->f_flags & O_LARGEFILE))) {
-		if (pos >= MAX_NON_LFS)
-			return -EFBIG;
-		iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos);
-	}
-
-	/*
-	 * Are we about to exceed the fs block limit ?
-	 *
-	 * If we have written data it becomes a short write.  If we have
-	 * exceeded without writing data we send a signal and return EFBIG.
-	 * Linus frestrict idea will clean these up nicely..
-	 */
-	if (unlikely(pos >= inode->i_sb->s_maxbytes))
-		return -EFBIG;
-
-	iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos);
+	iov_iter_truncate(from, count);
 	return iov_iter_count(from);
 }
 EXPORT_SYMBOL(generic_write_checks);
@@ -2991,6 +3009,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 	uint64_t bcount;
 	loff_t size_in, size_out;
 	loff_t bs = inode_out->i_sb->s_blocksize;
+	int ret;
 
 	/* The start of both ranges must be aligned to an fs block. */
 	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
@@ -3014,6 +3033,14 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 		return -EINVAL;
 	count = min(count, size_in - (uint64_t)pos_in);
 
+	ret = generic_access_check_limits(file_in, pos_in, &count);
+	if (ret)
+		return ret;
+
+	ret = generic_write_check_limits(file_out, pos_out, &count);
+	if (ret)
+		return ret;
+
 	/*
 	 * If the user wanted us to link to the infile's EOF, round up to the
 	 * next block boundary for this check.

WARNING: multiple messages have this Message-ID (diff)
From: Darrick J. Wong <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: sandeen@redhat.com, linux-nfs@vger.kernel.org,
	linux-cifs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>,
	linux-unionfs@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-mm@kvack.org, linux-btrfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] [PATCH 04/25] vfs: strengthen checking of file range inputs to generic_remap_checks
Date: Wed, 10 Oct 2018 21:12:46 -0700	[thread overview]
Message-ID: <153923116686.5546.8711942394464060950.stgit@magnolia> (raw)
In-Reply-To: <153923113649.5546.9840926895953408273.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

File range remapping, if allowed to run past the destination file's EOF,
is an optimization on a regular file write.  Regular file writes that
extend the file length are subject to various constraints which are not
checked by range cloning.

This is a correctness problem because we're never allowed to touch
ranges that the page cache can't support (s_maxbytes); we're not
supposed to deal with large offsets (MAX_NON_LFS) if O_LARGEFILE isn't
set; and we must obey resource limits (RLIMIT_FSIZE).

Therefore, add these checks to the new generic_remap_checks function so
that we curtail unexpected behavior.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
---
 mm/filemap.c |   91 ++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 59 insertions(+), 32 deletions(-)


diff --git a/mm/filemap.c b/mm/filemap.c
index 47e6bfd45a91..08ad210fee49 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2915,6 +2915,49 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
 }
 EXPORT_SYMBOL(read_cache_page_gfp);
 
+static int generic_access_check_limits(struct file *file, loff_t pos,
+				       loff_t *count)
+{
+	struct inode *inode = file->f_mapping->host;
+
+	/* Don't exceed the LFS limits. */
+	if (unlikely(pos + *count > MAX_NON_LFS &&
+				!(file->f_flags & O_LARGEFILE))) {
+		if (pos >= MAX_NON_LFS)
+			return -EFBIG;
+		*count = min(*count, (loff_t)MAX_NON_LFS - pos);
+	}
+
+	/*
+	 * Don't operate on ranges the page cache doesn't support.
+	 *
+	 * If we have written data it becomes a short write.  If we have
+	 * exceeded without writing data we send a signal and return EFBIG.
+	 * Linus frestrict idea will clean these up nicely..
+	 */
+	if (unlikely(pos >= inode->i_sb->s_maxbytes))
+		return -EFBIG;
+
+	*count = min(*count, inode->i_sb->s_maxbytes - pos);
+	return 0;
+}
+
+static int generic_write_check_limits(struct file *file, loff_t pos,
+				      loff_t *count)
+{
+	unsigned long limit = rlimit(RLIMIT_FSIZE);
+
+	if (limit != RLIM_INFINITY) {
+		if (pos >= limit) {
+			send_sig(SIGXFSZ, current, 0);
+			return -EFBIG;
+		}
+		*count = min(*count, (loff_t)limit - pos);
+	}
+
+	return generic_access_check_limits(file, pos, count);
+}
+
 /*
  * Performs necessary checks before doing a write
  *
@@ -2926,8 +2969,8 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-	unsigned long limit = rlimit(RLIMIT_FSIZE);
-	loff_t pos;
+	loff_t count;
+	int ret;
 
 	if (!iov_iter_count(from))
 		return 0;
@@ -2936,40 +2979,15 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 	if (iocb->ki_flags & IOCB_APPEND)
 		iocb->ki_pos = i_size_read(inode);
 
-	pos = iocb->ki_pos;
-
 	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
 		return -EINVAL;
 
-	if (limit != RLIM_INFINITY) {
-		if (iocb->ki_pos >= limit) {
-			send_sig(SIGXFSZ, current, 0);
-			return -EFBIG;
-		}
-		iov_iter_truncate(from, limit - (unsigned long)pos);
-	}
+	count = iov_iter_count(from);
+	ret = generic_write_check_limits(file, iocb->ki_pos, &count);
+	if (ret)
+		return ret;
 
-	/*
-	 * LFS rule
-	 */
-	if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS &&
-				!(file->f_flags & O_LARGEFILE))) {
-		if (pos >= MAX_NON_LFS)
-			return -EFBIG;
-		iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos);
-	}
-
-	/*
-	 * Are we about to exceed the fs block limit ?
-	 *
-	 * If we have written data it becomes a short write.  If we have
-	 * exceeded without writing data we send a signal and return EFBIG.
-	 * Linus frestrict idea will clean these up nicely..
-	 */
-	if (unlikely(pos >= inode->i_sb->s_maxbytes))
-		return -EFBIG;
-
-	iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos);
+	iov_iter_truncate(from, count);
 	return iov_iter_count(from);
 }
 EXPORT_SYMBOL(generic_write_checks);
@@ -2991,6 +3009,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 	uint64_t bcount;
 	loff_t size_in, size_out;
 	loff_t bs = inode_out->i_sb->s_blocksize;
+	int ret;
 
 	/* The start of both ranges must be aligned to an fs block. */
 	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
@@ -3014,6 +3033,14 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 		return -EINVAL;
 	count = min(count, size_in - (uint64_t)pos_in);
 
+	ret = generic_access_check_limits(file_in, pos_in, &count);
+	if (ret)
+		return ret;
+
+	ret = generic_write_check_limits(file_out, pos_out, &count);
+	if (ret)
+		return ret;
+
 	/*
 	 * If the user wanted us to link to the infile's EOF, round up to the
 	 * next block boundary for this check.

  parent reply	other threads:[~2018-10-11  4:12 UTC|newest]

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-11  4:12 [PATCH v3 00/25] fs: fixes for serious clone/dedupe problems Darrick J. Wong
2018-10-11  4:12 ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:12 ` [PATCH 01/25] xfs: add a per-xfs trace_printk macro Darrick J. Wong
2018-10-11  4:12   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11 13:39   ` Christoph Hellwig
2018-10-11 13:39     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-11 23:34     ` Darrick J. Wong
2018-10-11 23:34       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:12 ` [PATCH 02/25] vfs: vfs_clone_file_prep_inodes should return EINVAL for a clone from beyond EOF Darrick J. Wong
2018-10-11  4:12   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11 13:40   ` Christoph Hellwig
2018-10-11 13:40     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-11  4:12 ` [PATCH 03/25] vfs: check file ranges before cloning files Darrick J. Wong
2018-10-11  4:12   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11 13:42   ` Christoph Hellwig
2018-10-11 13:42     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-11 14:13     ` Amir Goldstein
2018-10-11  4:12 ` Darrick J. Wong [this message]
2018-10-11  4:12   ` [Ocfs2-devel] [PATCH 04/25] vfs: strengthen checking of file range inputs to generic_remap_checks Darrick J. Wong
2018-10-11 13:43   ` Christoph Hellwig
2018-10-11 13:43     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-11  4:12 ` [PATCH 05/25] vfs: avoid problematic remapping requests into partial EOF block Darrick J. Wong
2018-10-11  4:12   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-12  0:16   ` Dave Chinner
2018-10-12  0:16     ` [Ocfs2-devel] " Dave Chinner
2018-10-12 16:07     ` Darrick J. Wong
2018-10-12 16:07       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-12 20:22   ` Filipe Manana
2018-10-12 20:22     ` Filipe Manana
2018-10-15  0:31     ` Dave Chinner
2018-10-15  0:31       ` [Ocfs2-devel] " Dave Chinner
2018-11-02 12:04       ` Filipe Manana
2018-11-02 12:04         ` Filipe Manana
2018-11-02 17:42         ` Darrick J. Wong
2018-11-02 17:42           ` Darrick J. Wong
2018-11-02 17:42           ` [Ocfs2-devel] " Darrick J. Wong
2018-11-02 18:18           ` Filipe Manana
2018-11-02 19:05             ` Filipe Manana
2018-10-11  4:13 ` [PATCH 06/25] vfs: skip zero-length dedupe requests Darrick J. Wong
2018-10-11  4:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:13 ` [PATCH 07/25] vfs: combine the clone and dedupe into a single remap_file_range Darrick J. Wong
2018-10-11  4:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:13 ` [PATCH 08/25] vfs: rename vfs_clone_file_prep to be more descriptive Darrick J. Wong
2018-10-11  4:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:13 ` [PATCH 09/25] vfs: rename clone_verify_area to remap_verify_area Darrick J. Wong
2018-10-11  4:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:13 ` [PATCH 10/25] vfs: create generic_remap_file_range_touch to update inode metadata Darrick J. Wong
2018-10-11  4:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:13 ` [PATCH 11/25] vfs: pass remap flags to generic_remap_file_range_prep Darrick J. Wong
2018-10-11  4:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:13 ` [PATCH 12/25] vfs: pass remap flags to generic_remap_checks Darrick J. Wong
2018-10-11  4:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:13 ` [PATCH 13/25] vfs: make remap_file_range functions take and return bytes completed Darrick J. Wong
2018-10-11  4:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:14 ` [PATCH 14/25] vfs: plumb RFR_* remap flags through the vfs clone functions Darrick J. Wong
2018-10-11  4:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:14 ` [PATCH 15/25] vfs: plumb RFR_* remap flags through the vfs dedupe functions Darrick J. Wong
2018-10-11  4:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:14 ` [PATCH 16/25] vfs: make remapping to source file eof more explicit Darrick J. Wong
2018-10-11  4:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:14 ` [PATCH 17/25] vfs: enable remap callers that can handle short operations Darrick J. Wong
2018-10-11  4:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  5:15   ` Amir Goldstein
2018-10-11 16:04     ` Darrick J. Wong
2018-10-11 16:04       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11 16:05   ` [PATCH v2 " Darrick J. Wong
2018-10-11 16:05     ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:14 ` [PATCH 18/25] vfs: hide file range comparison function Darrick J. Wong
2018-10-11  4:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:14 ` [PATCH 19/25] vfs: implement opportunistic short dedupe Darrick J. Wong
2018-10-11  4:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:14 ` [PATCH 20/25] ocfs2: truncate page cache for clone destination file before remapping Darrick J. Wong
2018-10-11  4:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:14 ` [PATCH 21/25] ocfs2: fix pagecache truncation prior to reflink Darrick J. Wong
2018-10-11  4:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:15 ` [PATCH 22/25] ocfs2: support partial clone range and dedupe range Darrick J. Wong
2018-10-11  4:15   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:15 ` [PATCH 23/25] xfs: fix pagecache truncation prior to reflink Darrick J. Wong
2018-10-11  4:15   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-12  1:15   ` Dave Chinner
2018-10-12  1:15     ` [Ocfs2-devel] " Dave Chinner
2018-10-11  4:15 ` [PATCH 24/25] xfs: support returning partial reflink results Darrick J. Wong
2018-10-11  4:15   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-12  1:22   ` Dave Chinner
2018-10-12  1:22     ` [Ocfs2-devel] " Dave Chinner
2018-10-12 16:06     ` Darrick J. Wong
2018-10-12 16:06       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-11  4:15 ` [PATCH 25/25] xfs: remove redundant remap partial EOF block checks Darrick J. Wong
2018-10-11  4:15   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-12  1:22   ` Dave Chinner
2018-10-12  1:22     ` [Ocfs2-devel] " Dave Chinner
2018-10-11  8:33 ` [PATCH v3 00/25] fs: fixes for serious clone/dedupe problems Amir Goldstein
2018-10-11 15:55   ` Darrick J. Wong
2018-10-11 15:55     ` [Ocfs2-devel] " Darrick J. Wong
2018-10-13  0:05 [PATCH v4 " Darrick J. Wong
2018-10-13  0:06 ` [PATCH 04/25] vfs: strengthen checking of file range inputs to generic_remap_checks Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=153923116686.5546.8711942394464060950.stgit@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=amir73il@gmail.com \
    --cc=david@fromorbit.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-unionfs@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=ocfs2-devel@oss.oracle.com \
    --cc=sandeen@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.