stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 026/165] hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem
       [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
@ 2020-08-12  1:31 ` Andrew Morton
  2020-08-12  1:32 ` [patch 034/165] cma: don't quit at first error when activating reserved areas Andrew Morton
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2020-08-12  1:31 UTC (permalink / raw)
  To: aarcange, akpm, aneesh.kumar, dave, hughd, kirill.shutemov,
	linux-mm, mhocko, mike.kravetz, mm-commits, n-horiguchi,
	prakash.sangappa, stable, torvalds, willy

From: Mike Kravetz <mike.kravetz@oracle.com>
Subject: hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem

Commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing
synchronization") requires callers of huge_pte_alloc to hold i_mmap_rwsem
in at least read mode.  This is because the explicit locking in
huge_pmd_share (called by huge_pte_alloc) was removed.  When restructuring
the code, the call to huge_pte_alloc in the else block at the beginning of
hugetlb_fault was missed.

Unfortunately, that else clause is exercised when there is no page table
entry.  This will likely lead to a call to huge_pmd_share.  If
huge_pmd_share thinks pmd sharing is possible, it will traverse the
mapping tree (i_mmap) without holding i_mmap_rwsem.  If someone else is
modifying the tree, bad things such as addressing exceptions or worse
could happen.

Simply remove the else clause.  It should have been removed previously. 
The code following the else will call huge_pte_alloc with the appropriate
locking.

To prevent this type of issue in the future, add routines to assert that
i_mmap_rwsem is held, and call these routines in huge pmd sharing
routines.

Link: http://lkml.kernel.org/r/e670f327-5cf9-1959-96e4-6dc7cc30d3d5@oracle.com
Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Kirill A.Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Prakash Sangappa <prakash.sangappa@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 include/linux/fs.h      |   10 ++++++++++
 include/linux/hugetlb.h |    8 +++++---
 mm/hugetlb.c            |   15 +++++++--------
 mm/rmap.c               |    2 +-
 4 files changed, 23 insertions(+), 12 deletions(-)

--- a/include/linux/fs.h~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/include/linux/fs.h
@@ -518,6 +518,16 @@ static inline void i_mmap_unlock_read(st
 	up_read(&mapping->i_mmap_rwsem);
 }
 
+static inline void i_mmap_assert_locked(struct address_space *mapping)
+{
+	lockdep_assert_held(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_assert_write_locked(struct address_space *mapping)
+{
+	lockdep_assert_held_write(&mapping->i_mmap_rwsem);
+}
+
 /*
  * Might pages of this file be mapped into userspace?
  */
--- a/include/linux/hugetlb.h~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/include/linux/hugetlb.h
@@ -164,7 +164,8 @@ pte_t *huge_pte_alloc(struct mm_struct *
 			unsigned long addr, unsigned long sz);
 pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz);
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+				unsigned long *addr, pte_t *ptep);
 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
 				unsigned long *start, unsigned long *end);
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
@@ -203,8 +204,9 @@ static inline struct address_space *huge
 	return NULL;
 }
 
-static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
-					pte_t *ptep)
+static inline int huge_pmd_unshare(struct mm_struct *mm,
+					struct vm_area_struct *vma,
+					unsigned long *addr, pte_t *ptep)
 {
 	return 0;
 }
--- a/mm/hugetlb.c~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/mm/hugetlb.c
@@ -3967,7 +3967,7 @@ void __unmap_hugepage_range(struct mmu_g
 			continue;
 
 		ptl = huge_pte_lock(h, mm, ptep);
-		if (huge_pmd_unshare(mm, &address, ptep)) {
+		if (huge_pmd_unshare(mm, vma, &address, ptep)) {
 			spin_unlock(ptl);
 			/*
 			 * We just unmapped a page of PMDs by clearing a PUD.
@@ -4554,10 +4554,6 @@ vm_fault_t hugetlb_fault(struct mm_struc
 		} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
 			return VM_FAULT_HWPOISON_LARGE |
 				VM_FAULT_SET_HINDEX(hstate_index(h));
-	} else {
-		ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
-		if (!ptep)
-			return VM_FAULT_OOM;
 	}
 
 	/*
@@ -5034,7 +5030,7 @@ unsigned long hugetlb_change_protection(
 		if (!ptep)
 			continue;
 		ptl = huge_pte_lock(h, mm, ptep);
-		if (huge_pmd_unshare(mm, &address, ptep)) {
+		if (huge_pmd_unshare(mm, vma, &address, ptep)) {
 			pages++;
 			spin_unlock(ptl);
 			shared_pmd = true;
@@ -5415,12 +5411,14 @@ out:
  * returns: 1 successfully unmapped a shared pte page
  *	    0 the underlying pte page is not shared, or it is the last user
  */
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+					unsigned long *addr, pte_t *ptep)
 {
 	pgd_t *pgd = pgd_offset(mm, *addr);
 	p4d_t *p4d = p4d_offset(pgd, *addr);
 	pud_t *pud = pud_offset(p4d, *addr);
 
+	i_mmap_assert_write_locked(vma->vm_file->f_mapping);
 	BUG_ON(page_count(virt_to_page(ptep)) == 0);
 	if (page_count(virt_to_page(ptep)) == 1)
 		return 0;
@@ -5438,7 +5436,8 @@ pte_t *huge_pmd_share(struct mm_struct *
 	return NULL;
 }
 
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+				unsigned long *addr, pte_t *ptep)
 {
 	return 0;
 }
--- a/mm/rmap.c~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/mm/rmap.c
@@ -1469,7 +1469,7 @@ static bool try_to_unmap_one(struct page
 			 * do this outside rmap routines.
 			 */
 			VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
-			if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+			if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
 				/*
 				 * huge_pmd_unshare unmapped an entire PMD
 				 * page.  There is no way of knowing exactly
_

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [patch 034/165] cma: don't quit at first error when activating reserved areas
       [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
  2020-08-12  1:31 ` [patch 026/165] hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem Andrew Morton
@ 2020-08-12  1:32 ` Andrew Morton
  2020-08-12  1:32 ` [patch 039/165] mm/memory_hotplug: fix unpaired mem_hotplug_begin/done Andrew Morton
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2020-08-12  1:32 UTC (permalink / raw)
  To: akpm, guro, iamjoonsoo.kim, kyungmin.park, linux-mm,
	m.szyprowski, mike.kravetz, mina86, mm-commits, song.bao.hua,
	stable, torvalds

From: Mike Kravetz <mike.kravetz@oracle.com>
Subject: cma: don't quit at first error when activating reserved areas

The routine cma_init_reserved_areas is designed to activate all
reserved cma areas.  It quits when it first encounters an error.
This can leave some areas in a state where they are reserved but
not activated.  There is no feedback to code which performed the
reservation.  Attempting to allocate memory from areas in such a
state will result in a BUG.

Modify cma_init_reserved_areas to always attempt to activate all
areas.  The called routine, cma_activate_area is responsible for
leaving the area in a valid state.  No one is making active use
of returned error codes, so change the routine to void.

How to reproduce:  This example uses kernelcore, hugetlb and cma
as an easy way to reproduce.  However, this is a more general cma
issue.

Two node x86 VM 16GB total, 8GB per node
Kernel command line parameters, kernelcore=4G hugetlb_cma=8G
Related boot time messages,
  hugetlb_cma: reserve 8192 MiB, up to 4096 MiB per node
  cma: Reserved 4096 MiB at 0x0000000100000000
  hugetlb_cma: reserved 4096 MiB on node 0
  cma: Reserved 4096 MiB at 0x0000000300000000
  hugetlb_cma: reserved 4096 MiB on node 1
  cma: CMA area hugetlb could not be activated

 # echo 8 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages

  BUG: kernel NULL pointer dereference, address: 0000000000000000
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 0 P4D 0
  Oops: 0000 [#1] SMP PTI
  ...
  Call Trace:
    bitmap_find_next_zero_area_off+0x51/0x90
    cma_alloc+0x1a5/0x310
    alloc_fresh_huge_page+0x78/0x1a0
    alloc_pool_huge_page+0x6f/0xf0
    set_max_huge_pages+0x10c/0x250
    nr_hugepages_store_common+0x92/0x120
    ? __kmalloc+0x171/0x270
    kernfs_fop_write+0xc1/0x1a0
    vfs_write+0xc7/0x1f0
    ksys_write+0x5f/0xe0
    do_syscall_64+0x4d/0x90
    entry_SYSCALL_64_after_hwframe+0x44/0xa9

Link: http://lkml.kernel.org/r/20200730163123.6451-1-mike.kravetz@oracle.com
Fixes: c64be2bb1c6e ("drivers: add Contiguous Memory Allocator")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Roman Gushchin <guro@fb.com>
Acked-by: Barry Song <song.bao.hua@hisilicon.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/cma.c |   23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

--- a/mm/cma.c~cma-dont-quit-at-first-error-when-activating-reserved-areas
+++ a/mm/cma.c
@@ -93,17 +93,15 @@ static void cma_clear_bitmap(struct cma
 	mutex_unlock(&cma->lock);
 }
 
-static int __init cma_activate_area(struct cma *cma)
+static void __init cma_activate_area(struct cma *cma)
 {
 	unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
 	unsigned i = cma->count >> pageblock_order;
 	struct zone *zone;
 
 	cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL);
-	if (!cma->bitmap) {
-		cma->count = 0;
-		return -ENOMEM;
-	}
+	if (!cma->bitmap)
+		goto out_error;
 
 	WARN_ON_ONCE(!pfn_valid(pfn));
 	zone = page_zone(pfn_to_page(pfn));
@@ -133,25 +131,22 @@ static int __init cma_activate_area(stru
 	spin_lock_init(&cma->mem_head_lock);
 #endif
 
-	return 0;
+	return;
 
 not_in_zone:
-	pr_err("CMA area %s could not be activated\n", cma->name);
 	bitmap_free(cma->bitmap);
+out_error:
 	cma->count = 0;
-	return -EINVAL;
+	pr_err("CMA area %s could not be activated\n", cma->name);
+	return;
 }
 
 static int __init cma_init_reserved_areas(void)
 {
 	int i;
 
-	for (i = 0; i < cma_area_count; i++) {
-		int ret = cma_activate_area(&cma_areas[i]);
-
-		if (ret)
-			return ret;
-	}
+	for (i = 0; i < cma_area_count; i++)
+		cma_activate_area(&cma_areas[i]);
 
 	return 0;
 }
_

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [patch 039/165] mm/memory_hotplug: fix unpaired mem_hotplug_begin/done
       [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
  2020-08-12  1:31 ` [patch 026/165] hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem Andrew Morton
  2020-08-12  1:32 ` [patch 034/165] cma: don't quit at first error when activating reserved areas Andrew Morton
@ 2020-08-12  1:32 ` Andrew Morton
  2020-08-12  1:35 ` [patch 098/165] fs/minix: check return value of sb_getblk() Andrew Morton
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2020-08-12  1:32 UTC (permalink / raw)
  To: akpm, bhe, bp, catalin.marinas, dalias, dan.j.williams,
	dave.hansen, dave.jiang, david, fenghua.yu, hpa, hslester96,
	Jonathan.Cameron, justin.he, Kaly.Xin, linux-mm, logang, luto,
	masahiroy, mhocko, mingo, mm-commits, peterz, rppt, stable, tglx,
	tony.luck, torvalds, vishal.l.verma, will, ysato

From: Jia He <justin.he@arm.com>
Subject: mm/memory_hotplug: fix unpaired mem_hotplug_begin/done

When check_memblock_offlined_cb() returns failed rc(e.g. the memblock is
online at that time), mem_hotplug_begin/done is unpaired in such case.

Therefore a warning:
 Call Trace:
  percpu_up_write+0x33/0x40
  try_remove_memory+0x66/0x120
  ? _cond_resched+0x19/0x30
  remove_memory+0x2b/0x40
  dev_dax_kmem_remove+0x36/0x72 [kmem]
  device_release_driver_internal+0xf0/0x1c0
  device_release_driver+0x12/0x20
  bus_remove_device+0xe1/0x150
  device_del+0x17b/0x3e0
  unregister_dev_dax+0x29/0x60
  devm_action_release+0x15/0x20
  release_nodes+0x19a/0x1e0
  devres_release_all+0x3f/0x50
  device_release_driver_internal+0x100/0x1c0
  driver_detach+0x4c/0x8f
  bus_remove_driver+0x5c/0xd0
  driver_unregister+0x31/0x50
  dax_pmem_exit+0x10/0xfe0 [dax_pmem]

Link: http://lkml.kernel.org/r/20200710031619.18762-3-justin.he@arm.com
Fixes: f1037ec0cc8a ("mm/memory_hotplug: fix remove_memory() lockdep splat")
Signed-off-by: Jia He <justin.he@arm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>	[5.6+]
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chuhong Yuan <hslester96@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
Cc: Kaly Xin <Kaly.Xin@arm.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/memory_hotplug.c |    5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

--- a/mm/memory_hotplug.c~mm-memory_hotplug-fix-unpaired-mem_hotplug_begin-done
+++ a/mm/memory_hotplug.c
@@ -1757,7 +1757,7 @@ static int __ref try_remove_memory(int n
 	 */
 	rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb);
 	if (rc)
-		goto done;
+		return rc;
 
 	/* remove memmap entry */
 	firmware_map_remove(start, start + size, "System RAM");
@@ -1781,9 +1781,8 @@ static int __ref try_remove_memory(int n
 
 	try_offline_node(nid);
 
-done:
 	mem_hotplug_done();
-	return rc;
+	return 0;
 }
 
 /**
_

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [patch 098/165] fs/minix: check return value of sb_getblk()
       [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
                   ` (2 preceding siblings ...)
  2020-08-12  1:32 ` [patch 039/165] mm/memory_hotplug: fix unpaired mem_hotplug_begin/done Andrew Morton
@ 2020-08-12  1:35 ` Andrew Morton
  2020-08-12  1:35 ` [patch 099/165] fs/minix: don't allow getting deleted inodes Andrew Morton
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2020-08-12  1:35 UTC (permalink / raw)
  To: akpm, anenbupt, ebiggers, linux-mm, mm-commits, stable, torvalds, viro

From: Eric Biggers <ebiggers@google.com>
Subject: fs/minix: check return value of sb_getblk()

Patch series "fs/minix: fix syzbot bugs and set s_maxbytes".

This series fixes all syzbot bugs in the minix filesystem:

	KASAN: null-ptr-deref Write in get_block
	KASAN: use-after-free Write in get_block
	KASAN: use-after-free Read in get_block
	WARNING in inc_nlink
	KMSAN: uninit-value in get_block
	WARNING in drop_nlink

It also fixes the minix filesystem to set s_maxbytes correctly, so that
userspace sees the correct behavior when exceeding the max file size.


This patch (of 6):

sb_getblk() can fail, so check its return value.

This fixes a NULL pointer dereference.

Originally from Qiujun Huang.

Link: http://lkml.kernel.org/r/20200628060846.682158-1-ebiggers@kernel.org
Link: http://lkml.kernel.org/r/20200628060846.682158-2-ebiggers@kernel.org
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reported-by: syzbot+4a88b2b9dc280f47baf4@syzkaller.appspotmail.com
Cc: Qiujun Huang <anenbupt@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/minix/itree_common.c |    8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

--- a/fs/minix/itree_common.c~fs-minix-check-return-value-of-sb_getblk
+++ a/fs/minix/itree_common.c
@@ -75,6 +75,7 @@ static int alloc_branch(struct inode *in
 	int n = 0;
 	int i;
 	int parent = minix_new_block(inode);
+	int err = -ENOSPC;
 
 	branch[0].key = cpu_to_block(parent);
 	if (parent) for (n = 1; n < num; n++) {
@@ -85,6 +86,11 @@ static int alloc_branch(struct inode *in
 			break;
 		branch[n].key = cpu_to_block(nr);
 		bh = sb_getblk(inode->i_sb, parent);
+		if (!bh) {
+			minix_free_block(inode, nr);
+			err = -ENOMEM;
+			break;
+		}
 		lock_buffer(bh);
 		memset(bh->b_data, 0, bh->b_size);
 		branch[n].bh = bh;
@@ -103,7 +109,7 @@ static int alloc_branch(struct inode *in
 		bforget(branch[i].bh);
 	for (i = 0; i < n; i++)
 		minix_free_block(inode, block_to_cpu(branch[i].key));
-	return -ENOSPC;
+	return err;
 }
 
 static inline int splice_branch(struct inode *inode,
_

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [patch 099/165] fs/minix: don't allow getting deleted inodes
       [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
                   ` (3 preceding siblings ...)
  2020-08-12  1:35 ` [patch 098/165] fs/minix: check return value of sb_getblk() Andrew Morton
@ 2020-08-12  1:35 ` Andrew Morton
  2020-08-12  1:35 ` [patch 100/165] fs/minix: reject too-large maximum file size Andrew Morton
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2020-08-12  1:35 UTC (permalink / raw)
  To: akpm, anenbupt, ebiggers, linux-mm, mm-commits, stable, torvalds, viro

From: Eric Biggers <ebiggers@google.com>
Subject: fs/minix: don't allow getting deleted inodes

If an inode has no links, we need to mark it bad rather than allowing it
to be accessed.  This avoids WARNINGs in inc_nlink() and drop_nlink() when
doing directory operations on a fuzzed filesystem.

Link: http://lkml.kernel.org/r/20200628060846.682158-3-ebiggers@kernel.org
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+a9ac3de1b5de5fb10efc@syzkaller.appspotmail.com
Reported-by: syzbot+df958cf5688a96ad3287@syzkaller.appspotmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Qiujun Huang <anenbupt@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/minix/inode.c |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

--- a/fs/minix/inode.c~fs-minix-dont-allow-getting-deleted-inodes
+++ a/fs/minix/inode.c
@@ -468,6 +468,13 @@ static struct inode *V1_minix_iget(struc
 		iget_failed(inode);
 		return ERR_PTR(-EIO);
 	}
+	if (raw_inode->i_nlinks == 0) {
+		printk("MINIX-fs: deleted inode referenced: %lu\n",
+		       inode->i_ino);
+		brelse(bh);
+		iget_failed(inode);
+		return ERR_PTR(-ESTALE);
+	}
 	inode->i_mode = raw_inode->i_mode;
 	i_uid_write(inode, raw_inode->i_uid);
 	i_gid_write(inode, raw_inode->i_gid);
@@ -501,6 +508,13 @@ static struct inode *V2_minix_iget(struc
 		iget_failed(inode);
 		return ERR_PTR(-EIO);
 	}
+	if (raw_inode->i_nlinks == 0) {
+		printk("MINIX-fs: deleted inode referenced: %lu\n",
+		       inode->i_ino);
+		brelse(bh);
+		iget_failed(inode);
+		return ERR_PTR(-ESTALE);
+	}
 	inode->i_mode = raw_inode->i_mode;
 	i_uid_write(inode, raw_inode->i_uid);
 	i_gid_write(inode, raw_inode->i_gid);
_

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [patch 100/165] fs/minix: reject too-large maximum file size
       [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
                   ` (4 preceding siblings ...)
  2020-08-12  1:35 ` [patch 099/165] fs/minix: don't allow getting deleted inodes Andrew Morton
@ 2020-08-12  1:35 ` Andrew Morton
  2020-08-14  2:39 ` + bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch added to -mm tree Andrew Morton
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2020-08-12  1:35 UTC (permalink / raw)
  To: akpm, anenbupt, ebiggers, linux-mm, mm-commits, stable, torvalds, viro

From: Eric Biggers <ebiggers@google.com>
Subject: fs/minix: reject too-large maximum file size

If the minix filesystem tries to map a very large logical block number to
its on-disk location, block_to_path() can return offsets that are too
large, causing out-of-bounds memory accesses when accessing indirect index
blocks.  This should be prevented by the check against the maximum file
size, but this doesn't work because the maximum file size is read directly
from the on-disk superblock and isn't validated itself.

Fix this by validating the maximum file size at mount time.

Link: http://lkml.kernel.org/r/20200628060846.682158-4-ebiggers@kernel.org
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+c7d9ec7a1a7272dd71b3@syzkaller.appspotmail.com
Reported-by: syzbot+3b7b03a0c28948054fb5@syzkaller.appspotmail.com
Reported-by: syzbot+6e056ee473568865f3e6@syzkaller.appspotmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Qiujun Huang <anenbupt@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/minix/inode.c |   22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

--- a/fs/minix/inode.c~fs-minix-reject-too-large-maximum-file-size
+++ a/fs/minix/inode.c
@@ -150,6 +150,23 @@ static int minix_remount (struct super_b
 	return 0;
 }
 
+static bool minix_check_superblock(struct minix_sb_info *sbi)
+{
+	if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
+		return false;
+
+	/*
+	 * s_max_size must not exceed the block mapping limitation.  This check
+	 * is only needed for V1 filesystems, since V2/V3 support an extra level
+	 * of indirect blocks which places the limit well above U32_MAX.
+	 */
+	if (sbi->s_version == MINIX_V1 &&
+	    sbi->s_max_size > (7 + 512 + 512*512) * BLOCK_SIZE)
+		return false;
+
+	return true;
+}
+
 static int minix_fill_super(struct super_block *s, void *data, int silent)
 {
 	struct buffer_head *bh;
@@ -228,11 +245,12 @@ static int minix_fill_super(struct super
 	} else
 		goto out_no_fs;
 
+	if (!minix_check_superblock(sbi))
+		goto out_illegal_sb;
+
 	/*
 	 * Allocate the buffer map to keep the superblock small.
 	 */
-	if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
-		goto out_illegal_sb;
 	i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
 	map = kzalloc(i, GFP_KERNEL);
 	if (!map)
_

^ permalink raw reply	[flat|nested] 10+ messages in thread

* + bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch added to -mm tree
       [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
                   ` (5 preceding siblings ...)
  2020-08-12  1:35 ` [patch 100/165] fs/minix: reject too-large maximum file size Andrew Morton
@ 2020-08-14  2:39 ` Andrew Morton
  2020-08-14  3:00 ` + mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch " Andrew Morton
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2020-08-14  2:39 UTC (permalink / raw)
  To: mhiramat, mm-commits, rostedt, stable


The patch titled
     Subject: bootconfig: fix off-by-one in xbc_node_compose_key_after()
has been added to the -mm tree.  Its filename is
     bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Steven Rostedt (VMware) <rostedt@goodmis.org>
Subject: bootconfig: fix off-by-one in xbc_node_compose_key_after()

While reviewing some patches for bootconfig, I noticed the following
code in xbc_node_compose_key_after():

	ret = snprintf(buf, size, "%s%s", xbc_node_get_data(node),
		       depth ? "." : "");
	if (ret < 0)
		return ret;
	if (ret > size) {
		size = 0;
	} else {
		size -= ret;
		buf += ret;
	}

But snprintf() returns the number of bytes that would be written, not
the number of bytes that are written (ignoring the nul terminator).
This means that if the number of non null bytes written were to equal
size, then the nul byte, which snprintf() always adds, will overwrite
that last byte.

	ret = snprintf(buf, 5, "hello");
	printf("buf = '%s'
", buf);
	printf("ret = %d
", ret);

produces:

	buf = 'hell'
	ret = 5

The string was truncated without ret being greater than 5.
Test (ret >= size) for overwrite.

Link: http://lkml.kernel.org/r/20200813183050.029a6003@oasis.local.home
Fixes: 76db5a27a827c ("bootconfig: Add Extra Boot Config support")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 lib/bootconfig.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/lib/bootconfig.c~bootconfig-fix-off-by-one-in-xbc_node_compose_key_after
+++ a/lib/bootconfig.c
@@ -248,7 +248,7 @@ int __init xbc_node_compose_key_after(st
 			       depth ? "." : "");
 		if (ret < 0)
 			return ret;
-		if (ret > size) {
+		if (ret >= size) {
 			size = 0;
 		} else {
 			size -= ret;
_

Patches currently in -mm which might be from rostedt@goodmis.org are

bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch


^ permalink raw reply	[flat|nested] 10+ messages in thread

* + mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch added to -mm tree
       [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
                   ` (6 preceding siblings ...)
  2020-08-14  2:39 ` + bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch added to -mm tree Andrew Morton
@ 2020-08-14  3:00 ` Andrew Morton
  2020-08-14  3:26 ` [withdrawn] bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch removed from " Andrew Morton
  2020-08-14 22:31 ` + khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch added to " Andrew Morton
  9 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2020-08-14  3:00 UTC (permalink / raw)
  To: charante, david, mhocko, mm-commits, rientjes, stable, vbabka, vinmenon


The patch titled
     Subject: mm, page_alloc: fix core hung in free_pcppages_bulk()
has been added to the -mm tree.  Its filename is
     mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Charan Teja Reddy <charante@codeaurora.org>
Subject: mm, page_alloc: fix core hung in free_pcppages_bulk()

The following race is observed with the repeated online, offline and a
delay between two successive online of memory blocks of movable zone.

P1						P2

Online the first memory block in
the movable zone. The pcp struct
values are initialized to default
values,i.e., pcp->high = 0 &
pcp->batch = 1.

					Allocate the pages from the
					movable zone.

Try to Online the second memory
block in the movable zone thus it
entered the online_pages() but yet
to call zone_pcp_update().
					This process is entered into
					the exit path thus it tries
					to release the order-0 pages
					to pcp lists through
					free_unref_page_commit().
					As pcp->high = 0, pcp->count = 1
					proceed to call the function
					free_pcppages_bulk().
Update the pcp values thus the
new pcp values are like, say,
pcp->high = 378, pcp->batch = 63.
					Read the pcp's batch value using
					READ_ONCE() and pass the same to
					free_pcppages_bulk(), pcp values
					passed here are, batch = 63,
					count = 1.

					Since num of pages in the pcp
					lists are less than ->batch,
					then it will stuck in
					while(list_empty(list)) loop
					with interrupts disabled thus
					a core hung.

Avoid this by ensuring free_pcppages_bulk() is called with proper count of
pcp list pages.

The mentioned race is some what easily reproducible without [1] because
pcp's are not updated for the first memory block online and thus there is
a enough race window for P2 between alloc+free and pcp struct values
update through onlining of second memory block.

With [1], the race still exists but it is very narrow as we update the pcp
struct values for the first memory block online itself.

This is not limited to the movable zone, it could also happen in cases
with the normal zone (e.g., hotplug to a node that only has DMA memory, or
no other memory yet).

[1]: https://patchwork.kernel.org/patch/11696389/

Link: http://lkml.kernel.org/r/1597150703-19003-1-git-send-email-charante@codeaurora.org
Fixes: 5f8dcc21211a ("page-allocator: split per-cpu list into one-list-per-migrate-type")
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Cc: <stable@vger.kernel.org> [2.6+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/page_alloc.c |    5 +++++
 1 file changed, 5 insertions(+)

--- a/mm/page_alloc.c~mm-page_alloc-fix-core-hung-in-free_pcppages_bulk
+++ a/mm/page_alloc.c
@@ -1301,6 +1301,11 @@ static void free_pcppages_bulk(struct zo
 	struct page *page, *tmp;
 	LIST_HEAD(head);
 
+	/*
+	 * Ensure proper count is passed which otherwise would stuck in the
+	 * below while (list_empty(list)) loop.
+	 */
+	count = min(pcp->count, count);
 	while (count) {
 		struct list_head *list;
 
_

Patches currently in -mm which might be from charante@codeaurora.org are

mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [withdrawn] bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch removed from -mm tree
       [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
                   ` (7 preceding siblings ...)
  2020-08-14  3:00 ` + mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch " Andrew Morton
@ 2020-08-14  3:26 ` Andrew Morton
  2020-08-14 22:31 ` + khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch added to " Andrew Morton
  9 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2020-08-14  3:26 UTC (permalink / raw)
  To: mhiramat, mm-commits, rostedt, stable


The patch titled
     Subject: bootconfig: fix off-by-one in xbc_node_compose_key_after()
has been removed from the -mm tree.  Its filename was
     bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch

This patch was dropped because it was withdrawn

------------------------------------------------------
From: Steven Rostedt (VMware) <rostedt@goodmis.org>
Subject: bootconfig: fix off-by-one in xbc_node_compose_key_after()

While reviewing some patches for bootconfig, I noticed the following
code in xbc_node_compose_key_after():

	ret = snprintf(buf, size, "%s%s", xbc_node_get_data(node),
		       depth ? "." : "");
	if (ret < 0)
		return ret;
	if (ret > size) {
		size = 0;
	} else {
		size -= ret;
		buf += ret;
	}

But snprintf() returns the number of bytes that would be written, not
the number of bytes that are written (ignoring the nul terminator).
This means that if the number of non null bytes written were to equal
size, then the nul byte, which snprintf() always adds, will overwrite
that last byte.

	ret = snprintf(buf, 5, "hello");
	printf("buf = '%s'
", buf);
	printf("ret = %d
", ret);

produces:

	buf = 'hell'
	ret = 5

The string was truncated without ret being greater than 5.
Test (ret >= size) for overwrite.

Link: http://lkml.kernel.org/r/20200813183050.029a6003@oasis.local.home
Fixes: 76db5a27a827c ("bootconfig: Add Extra Boot Config support")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 lib/bootconfig.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/lib/bootconfig.c~bootconfig-fix-off-by-one-in-xbc_node_compose_key_after
+++ a/lib/bootconfig.c
@@ -248,7 +248,7 @@ int __init xbc_node_compose_key_after(st
 			       depth ? "." : "");
 		if (ret < 0)
 			return ret;
-		if (ret > size) {
+		if (ret >= size) {
 			size = 0;
 		} else {
 			size -= ret;
_

Patches currently in -mm which might be from rostedt@goodmis.org are



^ permalink raw reply	[flat|nested] 10+ messages in thread

* + khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch added to -mm tree
       [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
                   ` (8 preceding siblings ...)
  2020-08-14  3:26 ` [withdrawn] bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch removed from " Andrew Morton
@ 2020-08-14 22:31 ` Andrew Morton
  9 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2020-08-14 22:31 UTC (permalink / raw)
  To: aarcange, edumazet, hughd, kirill.shutemov, mike.kravetz,
	mm-commits, songliubraving, stable, syzkaller


The patch titled
     Subject: khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter()
has been added to the -mm tree.  Its filename is
     khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Hugh Dickins <hughd@google.com>
Subject: khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter()

syzbot crashes on the VM_BUG_ON_MM(khugepaged_test_exit(mm), mm) in
__khugepaged_enter(): yes, when one thread is about to dump core, has set
core_state, and is waiting for others, another might do something calling
__khugepaged_enter(), which now crashes because I lumped the core_state
test (known as "mmget_still_valid") into khugepaged_test_exit().  I still
think it's best to lump them together, so just in this exceptional case,
check mm->mm_users directly instead of khugepaged_test_exit().

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008141503370.18085@eggly.anvils
Fixes: bbe98f9cadff ("khugepaged: khugepaged_test_exit() check mmget_still_valid()")
Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/khugepaged.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/mm/khugepaged.c~khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter
+++ a/mm/khugepaged.c
@@ -466,7 +466,7 @@ int __khugepaged_enter(struct mm_struct
 		return -ENOMEM;
 
 	/* __khugepaged_exit() must not run from under us */
-	VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
+	VM_BUG_ON_MM(atomic_read(&mm->mm_users) == 0, mm);
 	if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
 		free_mm_slot(mm_slot);
 		return 0;
_

Patches currently in -mm which might be from hughd@google.com are

dma-debug-fix-debug_dma_assert_idle-use-rcu_read_lock.patch
khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-08-14 22:31 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20200811182949.e12ae9a472e3b5e27e16ad6c@linux-foundation.org>
2020-08-12  1:31 ` [patch 026/165] hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem Andrew Morton
2020-08-12  1:32 ` [patch 034/165] cma: don't quit at first error when activating reserved areas Andrew Morton
2020-08-12  1:32 ` [patch 039/165] mm/memory_hotplug: fix unpaired mem_hotplug_begin/done Andrew Morton
2020-08-12  1:35 ` [patch 098/165] fs/minix: check return value of sb_getblk() Andrew Morton
2020-08-12  1:35 ` [patch 099/165] fs/minix: don't allow getting deleted inodes Andrew Morton
2020-08-12  1:35 ` [patch 100/165] fs/minix: reject too-large maximum file size Andrew Morton
2020-08-14  2:39 ` + bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch added to -mm tree Andrew Morton
2020-08-14  3:00 ` + mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch " Andrew Morton
2020-08-14  3:26 ` [withdrawn] bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch removed from " Andrew Morton
2020-08-14 22:31 ` + khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch added to " Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).