Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v2 0/2] vfs: make active swap files unwritable
@ 2019-06-28 18:35 Darrick J. Wong
  2019-06-28 18:35 ` [PATCH 1/2] mm: set S_SWAPFILE on blockdev swap devices Darrick J. Wong
  2019-06-28 18:35 ` [PATCH 2/2] vfs: don't allow writes to swap files Darrick J. Wong
  0 siblings, 2 replies; 5+ messages in thread
From: Darrick J. Wong @ 2019-06-28 18:35 UTC (permalink / raw)
  To: hch, akpm, tytso, viro, darrick.wong
  Cc: linux-xfs, linux-fsdevel, linux-kernel, linux-mm

Hi all,

I discovered that it's possible for userspace to write to active swap
files and swap devices.  While activated, the kernel effectively holds
an irrevocable (except by swapoff) longterm lease on the storage
associated with the swap device, so we need to shut down this vector for
memory corruption of userspace programs.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This has been lightly tested with fstests.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=immutable-swapfiles

fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=immutable-swapfiles

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] mm: set S_SWAPFILE on blockdev swap devices
  2019-06-28 18:35 [PATCH v2 0/2] vfs: make active swap files unwritable Darrick J. Wong
@ 2019-06-28 18:35 ` Darrick J. Wong
  2019-06-28 18:35 ` [PATCH 2/2] vfs: don't allow writes to swap files Darrick J. Wong
  1 sibling, 0 replies; 5+ messages in thread
From: Darrick J. Wong @ 2019-06-28 18:35 UTC (permalink / raw)
  To: hch, akpm, tytso, viro, darrick.wong
  Cc: linux-xfs, linux-fsdevel, linux-kernel, linux-mm

From: Darrick J. Wong <darrick.wong@oracle.com>

Set S_SWAPFILE on block device inodes so that they have the same
protections as a swap flie.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 mm/swapfile.c |   31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)


diff --git a/mm/swapfile.c b/mm/swapfile.c
index 596ac98051c5..fa4edd0cca3a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2284,9 +2284,8 @@ EXPORT_SYMBOL_GPL(add_swap_extent);
  * requirements, they are simply tossed out - we will never use those blocks
  * for swapping.
  *
- * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon.  This
- * prevents root from shooting her foot off by ftruncating an in-use swapfile,
- * which will scribble on the fs.
+ * For all swap devices we set S_SWAPFILE across the life of the swapon.  This
+ * prevents users from writing to the swap device, which will corrupt memory.
  *
  * The amount of disk space which a single swap extent represents varies.
  * Typically it is in the 1-4 megabyte range.  So we can have hundreds of
@@ -2551,13 +2550,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	inode = mapping->host;
 	if (S_ISBLK(inode->i_mode)) {
 		struct block_device *bdev = I_BDEV(inode);
+
 		set_blocksize(bdev, old_block_size);
 		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
-	} else {
-		inode_lock(inode);
-		inode->i_flags &= ~S_SWAPFILE;
-		inode_unlock(inode);
 	}
+
+	inode_lock(inode);
+	inode->i_flags &= ~S_SWAPFILE;
+	inode_unlock(inode);
 	filp_close(swap_file, NULL);
 
 	/*
@@ -2780,11 +2780,11 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 		p->flags |= SWP_BLKDEV;
 	} else if (S_ISREG(inode->i_mode)) {
 		p->bdev = inode->i_sb->s_bdev;
-		inode_lock(inode);
-		if (IS_SWAPFILE(inode))
-			return -EBUSY;
-	} else
-		return -EINVAL;
+	}
+
+	inode_lock(inode);
+	if (IS_SWAPFILE(inode))
+		return -EBUSY;
 
 	return 0;
 }
@@ -3185,8 +3185,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	atomic_inc(&proc_poll_event);
 	wake_up_interruptible(&proc_poll_wait);
 
-	if (S_ISREG(inode->i_mode))
-		inode->i_flags |= S_SWAPFILE;
+	inode->i_flags |= S_SWAPFILE;
 	error = 0;
 	goto out;
 bad_swap:
@@ -3208,7 +3207,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (inced_nr_rotate_swap)
 		atomic_dec(&nr_rotate_swap);
 	if (swap_file) {
-		if (inode && S_ISREG(inode->i_mode)) {
+		if (inode) {
 			inode_unlock(inode);
 			inode = NULL;
 		}
@@ -3221,7 +3220,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	}
 	if (name)
 		putname(name);
-	if (inode && S_ISREG(inode->i_mode))
+	if (inode)
 		inode_unlock(inode);
 	if (!error)
 		enable_swap_slots_cache();


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2] vfs: don't allow writes to swap files
  2019-06-28 18:35 [PATCH v2 0/2] vfs: make active swap files unwritable Darrick J. Wong
  2019-06-28 18:35 ` [PATCH 1/2] mm: set S_SWAPFILE on blockdev swap devices Darrick J. Wong
@ 2019-06-28 18:35 ` Darrick J. Wong
  1 sibling, 0 replies; 5+ messages in thread
From: Darrick J. Wong @ 2019-06-28 18:35 UTC (permalink / raw)
  To: hch, akpm, tytso, viro, darrick.wong
  Cc: linux-xfs, linux-fsdevel, linux-kernel, linux-mm

From: Darrick J. Wong <darrick.wong@oracle.com>

Don't let userspace write to an active swap file because the kernel
effectively has a long term lease on the storage and things could get
seriously corrupted if we let this happen.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/attr.c      |   16 ++++++++--------
 fs/block_dev.c |    3 +++
 mm/filemap.c   |    3 +++
 mm/memory.c    |    3 ++-
 mm/mmap.c      |    2 ++
 mm/swapfile.c  |   12 +++++++++++-
 6 files changed, 29 insertions(+), 10 deletions(-)


diff --git a/fs/attr.c b/fs/attr.c
index 1fcfdcc5b367..7480d5dd22c0 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -134,6 +134,14 @@ EXPORT_SYMBOL(setattr_prepare);
  */
 int inode_newsize_ok(const struct inode *inode, loff_t offset)
 {
+	/*
+	 * Truncation of in-use swapfiles is disallowed - the kernel owns the
+	 * disk space now.  We must prevent subsequent swapout to scribble on
+	 * the now-freed blocks.
+	 */
+	if (IS_SWAPFILE(inode) && inode->i_size != offset)
+		return -ETXTBSY;
+
 	if (inode->i_size < offset) {
 		unsigned long limit;
 
@@ -142,14 +150,6 @@ int inode_newsize_ok(const struct inode *inode, loff_t offset)
 			goto out_sig;
 		if (offset > inode->i_sb->s_maxbytes)
 			goto out_big;
-	} else {
-		/*
-		 * truncation of in-use swapfiles is disallowed - it would
-		 * cause subsequent swapout to scribble on the now-freed
-		 * blocks.
-		 */
-		if (IS_SWAPFILE(inode))
-			return -ETXTBSY;
 	}
 
 	return 0;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 749f5984425d..f57d15e5338b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1948,6 +1948,9 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (bdev_read_only(I_BDEV(bd_inode)))
 		return -EPERM;
 
+	if (IS_SWAPFILE(bd_inode))
+		return -ETXTBSY;
+
 	if (!iov_iter_count(from))
 		return 0;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index dad85e10f5f8..fd80bc20e30a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2938,6 +2938,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 	if (IS_IMMUTABLE(inode))
 		return -EPERM;
 
+	if (IS_SWAPFILE(inode))
+		return -ETXTBSY;
+
 	if (!iov_iter_count(from))
 		return 0;
 
diff --git a/mm/memory.c b/mm/memory.c
index abf795277f36..5acb5bb04e21 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2236,7 +2236,8 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
 	vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
 
 	if (vmf->vma->vm_file &&
-	    IS_IMMUTABLE(vmf->vma->vm_file->f_mapping->host))
+	    (IS_IMMUTABLE(vmf->vma->vm_file->f_mapping->host) ||
+	     IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)))
 		return VM_FAULT_SIGBUS;
 
 	ret = vmf->vma->vm_ops->page_mkwrite(vmf);
diff --git a/mm/mmap.c b/mm/mmap.c
index b3ebca2702bf..1abe55822324 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1488,6 +1488,8 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 					return -EACCES;
 				if (IS_IMMUTABLE(file->f_mapping->host))
 					return -EPERM;
+				if (IS_SWAPFILE(file->f_mapping->host))
+					return -ETXTBSY;
 			}
 
 			/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fa4edd0cca3a..1fc820c71baf 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3165,6 +3165,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (error)
 		goto bad_swap;
 
+	/*
+	 * Flush any pending IO and dirty mappings before we start using this
+	 * swap device.
+	 */
+	inode->i_flags |= S_SWAPFILE;
+	error = inode_drain_writes(inode);
+	if (error) {
+		inode->i_flags &= ~S_SWAPFILE;
+		goto bad_swap;
+	}
+
 	mutex_lock(&swapon_mutex);
 	prio = -1;
 	if (swap_flags & SWAP_FLAG_PREFER)
@@ -3185,7 +3196,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	atomic_inc(&proc_poll_event);
 	wake_up_interruptible(&proc_poll_wait);
 
-	inode->i_flags |= S_SWAPFILE;
 	error = 0;
 	goto out;
 bad_swap:


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] mm: set S_SWAPFILE on blockdev swap devices
  2019-08-15 16:05 ` [PATCH 1/2] mm: set S_SWAPFILE on blockdev swap devices Darrick J. Wong
@ 2019-08-16  6:39   ` Christoph Hellwig
  0 siblings, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2019-08-16  6:39 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: hch, akpm, tytso, viro, linux-xfs, linux-fsdevel, linux-kernel, linux-mm

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] mm: set S_SWAPFILE on blockdev swap devices
  2019-08-15 16:05 [PATCH v3 0/2] vfs: make active swap files unwritable Darrick J. Wong
@ 2019-08-15 16:05 ` Darrick J. Wong
  2019-08-16  6:39   ` Christoph Hellwig
  0 siblings, 1 reply; 5+ messages in thread
From: Darrick J. Wong @ 2019-08-15 16:05 UTC (permalink / raw)
  To: hch, akpm, tytso, viro, darrick.wong
  Cc: linux-xfs, linux-fsdevel, linux-kernel, linux-mm

From: Darrick J. Wong <darrick.wong@oracle.com>

Set S_SWAPFILE on block device inodes so that they have the same
protections as a swap flie.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 mm/swapfile.c |   31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)


diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0789a762ce2f..a53b7c49b40e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2368,9 +2368,8 @@ EXPORT_SYMBOL_GPL(add_swap_extent);
  * requirements, they are simply tossed out - we will never use those blocks
  * for swapping.
  *
- * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon.  This
- * prevents root from shooting her foot off by ftruncating an in-use swapfile,
- * which will scribble on the fs.
+ * For all swap devices we set S_SWAPFILE across the life of the swapon.  This
+ * prevents users from writing to the swap device, which will corrupt memory.
  *
  * The amount of disk space which a single swap extent represents varies.
  * Typically it is in the 1-4 megabyte range.  So we can have hundreds of
@@ -2661,13 +2660,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	inode = mapping->host;
 	if (S_ISBLK(inode->i_mode)) {
 		struct block_device *bdev = I_BDEV(inode);
+
 		set_blocksize(bdev, old_block_size);
 		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
-	} else {
-		inode_lock(inode);
-		inode->i_flags &= ~S_SWAPFILE;
-		inode_unlock(inode);
 	}
+
+	inode_lock(inode);
+	inode->i_flags &= ~S_SWAPFILE;
+	inode_unlock(inode);
 	filp_close(swap_file, NULL);
 
 	/*
@@ -2890,11 +2890,11 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 		p->flags |= SWP_BLKDEV;
 	} else if (S_ISREG(inode->i_mode)) {
 		p->bdev = inode->i_sb->s_bdev;
-		inode_lock(inode);
-		if (IS_SWAPFILE(inode))
-			return -EBUSY;
-	} else
-		return -EINVAL;
+	}
+
+	inode_lock(inode);
+	if (IS_SWAPFILE(inode))
+		return -EBUSY;
 
 	return 0;
 }
@@ -3295,8 +3295,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	atomic_inc(&proc_poll_event);
 	wake_up_interruptible(&proc_poll_wait);
 
-	if (S_ISREG(inode->i_mode))
-		inode->i_flags |= S_SWAPFILE;
+	inode->i_flags |= S_SWAPFILE;
 	error = 0;
 	goto out;
 bad_swap:
@@ -3318,7 +3317,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (inced_nr_rotate_swap)
 		atomic_dec(&nr_rotate_swap);
 	if (swap_file) {
-		if (inode && S_ISREG(inode->i_mode)) {
+		if (inode) {
 			inode_unlock(inode);
 			inode = NULL;
 		}
@@ -3331,7 +3330,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	}
 	if (name)
 		putname(name);
-	if (inode && S_ISREG(inode->i_mode))
+	if (inode)
 		inode_unlock(inode);
 	if (!error)
 		enable_swap_slots_cache();


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, back to index

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-28 18:35 [PATCH v2 0/2] vfs: make active swap files unwritable Darrick J. Wong
2019-06-28 18:35 ` [PATCH 1/2] mm: set S_SWAPFILE on blockdev swap devices Darrick J. Wong
2019-06-28 18:35 ` [PATCH 2/2] vfs: don't allow writes to swap files Darrick J. Wong
2019-08-15 16:05 [PATCH v3 0/2] vfs: make active swap files unwritable Darrick J. Wong
2019-08-15 16:05 ` [PATCH 1/2] mm: set S_SWAPFILE on blockdev swap devices Darrick J. Wong
2019-08-16  6:39   ` Christoph Hellwig

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git