All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
To: Joel Fernandes <joel@joelfernandes.org>
Cc: "Andrew Morton" <akpm@linux-foundation.org>,
	"Todd Kjos" <tkjos@google.com>,
	syzbot+a76129f18c89f3e2ddd4@syzkaller.appspotmail.com,
	ak@linux.intel.com, "Johannes Weiner" <hannes@cmpxchg.org>,
	jack@suse.cz, jrdr.linux@gmail.com,
	LKML <linux-kernel@vger.kernel.org>,
	linux-mm@kvack.org, mawilcox@microsoft.com,
	mgorman@techsingularity.net, syzkaller-bugs@googlegroups.com,
	"Arve Hjønnevåg" <arve@android.com>,
	"Todd Kjos" <tkjos@android.com>,
	"Martijn Coenen" <maco@android.com>,
	"Greg Kroah-Hartman" <gregkh@linuxfoundation.org>
Subject: Re: possible deadlock in __do_page_fault
Date: Thu, 24 Jan 2019 10:52:30 +0900	[thread overview]
Message-ID: <201901240152.x0O1qUUU069046@www262.sakura.ne.jp> (raw)
In-Reply-To: <20190123155751.GA168927@google.com>

Joel Fernandes wrote:
> > Anyway, I need your checks regarding whether this approach is waiting for
> > completion at all locations which need to wait for completion.
> 
> I think you are waiting in unwanted locations. The only location you need to
> wait in is ashmem_pin_unpin.
> 
> So, to my eyes all that is needed to fix this bug is:
> 
> 1. Delete the range from the ashmem_lru_list
> 2. Release the ashmem_mutex
> 3. fallocate the range.
> 4. Do the completion so that any waiting pin/unpin can proceed.
> 
> Could you clarify why you feel you need to wait for completion at those other
> locations?

Because I don't know how ashmem works.

> 
> Note that once a range is unpinned, it is open sesame and userspace cannot
> really expect consistent data from such range till it is pinned again.

Then, I'm tempted to eliminate shrinker and LRU list (like a draft patch shown
below). I think this is not equivalent to current code because this shrinks
upon only range_alloc() time and I don't know whether it is OK to temporarily
release ashmem_mutex during range_alloc() at "Case #4" of ashmem_pin(), but
can't we go this direction? 

By the way, why not to check range_alloc() failure before calling range_shrink() ?

---
 drivers/staging/android/ashmem.c | 154 +++++--------------------------
 1 file changed, 21 insertions(+), 133 deletions(-)

diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 90a8a9f1ac7d..90668eebf35b 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -53,7 +53,6 @@ struct ashmem_area {
 
 /**
  * struct ashmem_range - A range of unpinned/evictable pages
- * @lru:	         The entry in the LRU list
  * @unpinned:	         The entry in its area's unpinned list
  * @asma:	         The associated anonymous shared memory area.
  * @pgstart:	         The starting page (inclusive)
@@ -64,7 +63,6 @@ struct ashmem_area {
  * It is protected by 'ashmem_mutex'
  */
 struct ashmem_range {
-	struct list_head lru;
 	struct list_head unpinned;
 	struct ashmem_area *asma;
 	size_t pgstart;
@@ -72,15 +70,8 @@ struct ashmem_range {
 	unsigned int purged;
 };
 
-/* LRU list of unpinned pages, protected by ashmem_mutex */
-static LIST_HEAD(ashmem_lru_list);
-
-/*
- * long lru_count - The count of pages on our LRU list.
- *
- * This is protected by ashmem_mutex.
- */
-static unsigned long lru_count;
+static atomic_t ashmem_purge_inflight = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(ashmem_purge_wait);
 
 /*
  * ashmem_mutex - protects the list of and each individual ashmem_area
@@ -97,7 +88,7 @@ static inline unsigned long range_size(struct ashmem_range *range)
 	return range->pgend - range->pgstart + 1;
 }
 
-static inline bool range_on_lru(struct ashmem_range *range)
+static inline bool range_not_purged(struct ashmem_range *range)
 {
 	return range->purged == ASHMEM_NOT_PURGED;
 }
@@ -133,32 +124,6 @@ static inline bool range_before_page(struct ashmem_range *range, size_t page)
 
 #define PROT_MASK		(PROT_EXEC | PROT_READ | PROT_WRITE)
 
-/**
- * lru_add() - Adds a range of memory to the LRU list
- * @range:     The memory range being added.
- *
- * The range is first added to the end (tail) of the LRU list.
- * After this, the size of the range is added to @lru_count
- */
-static inline void lru_add(struct ashmem_range *range)
-{
-	list_add_tail(&range->lru, &ashmem_lru_list);
-	lru_count += range_size(range);
-}
-
-/**
- * lru_del() - Removes a range of memory from the LRU list
- * @range:     The memory range being removed
- *
- * The range is first deleted from the LRU list.
- * After this, the size of the range is removed from @lru_count
- */
-static inline void lru_del(struct ashmem_range *range)
-{
-	list_del(&range->lru);
-	lru_count -= range_size(range);
-}
-
 /**
  * range_alloc() - Allocates and initializes a new ashmem_range structure
  * @asma:	   The associated ashmem_area
@@ -188,9 +153,23 @@ static int range_alloc(struct ashmem_area *asma,
 
 	list_add_tail(&range->unpinned, &prev_range->unpinned);
 
-	if (range_on_lru(range))
-		lru_add(range);
+	if (range_not_purged(range)) {
+		loff_t start = range->pgstart * PAGE_SIZE;
+		loff_t end = (range->pgend + 1) * PAGE_SIZE;
+		struct file *f = range->asma->file;
 
+		get_file(f);
+		atomic_inc(&ashmem_purge_inflight);
+		range->purged = ASHMEM_WAS_PURGED;
+		mutex_unlock(&ashmem_mutex);
+		f->f_op->fallocate(f,
+				   FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				   start, end - start);
+		fput(f);
+		if (atomic_dec_and_test(&ashmem_purge_inflight))
+			wake_up(&ashmem_purge_wait);
+		mutex_lock(&ashmem_mutex);
+	}
 	return 0;
 }
 
@@ -201,8 +180,6 @@ static int range_alloc(struct ashmem_area *asma,
 static void range_del(struct ashmem_range *range)
 {
 	list_del(&range->unpinned);
-	if (range_on_lru(range))
-		lru_del(range);
 	kmem_cache_free(ashmem_range_cachep, range);
 }
 
@@ -214,20 +191,12 @@ static void range_del(struct ashmem_range *range)
  *
  * This does not modify the data inside the existing range in any way - It
  * simply shrinks the boundaries of the range.
- *
- * Theoretically, with a little tweaking, this could eventually be changed
- * to range_resize, and expand the lru_count if the new range is larger.
  */
 static inline void range_shrink(struct ashmem_range *range,
 				size_t start, size_t end)
 {
-	size_t pre = range_size(range);
-
 	range->pgstart = start;
 	range->pgend = end;
-
-	if (range_on_lru(range))
-		lru_count -= pre - range_size(range);
 }
 
 /**
@@ -421,72 +390,6 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
 	return ret;
 }
 
-/*
- * ashmem_shrink - our cache shrinker, called from mm/vmscan.c
- *
- * 'nr_to_scan' is the number of objects to scan for freeing.
- *
- * 'gfp_mask' is the mask of the allocation that got us into this mess.
- *
- * Return value is the number of objects freed or -1 if we cannot
- * proceed without risk of deadlock (due to gfp_mask).
- *
- * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial
- * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan'
- * pages freed.
- */
-static unsigned long
-ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
-{
-	struct ashmem_range *range, *next;
-	unsigned long freed = 0;
-
-	/* We might recurse into filesystem code, so bail out if necessary */
-	if (!(sc->gfp_mask & __GFP_FS))
-		return SHRINK_STOP;
-
-	if (!mutex_trylock(&ashmem_mutex))
-		return -1;
-
-	list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) {
-		loff_t start = range->pgstart * PAGE_SIZE;
-		loff_t end = (range->pgend + 1) * PAGE_SIZE;
-
-		range->asma->file->f_op->fallocate(range->asma->file,
-				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-				start, end - start);
-		range->purged = ASHMEM_WAS_PURGED;
-		lru_del(range);
-
-		freed += range_size(range);
-		if (--sc->nr_to_scan <= 0)
-			break;
-	}
-	mutex_unlock(&ashmem_mutex);
-	return freed;
-}
-
-static unsigned long
-ashmem_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
-{
-	/*
-	 * note that lru_count is count of pages on the lru, not a count of
-	 * objects on the list. This means the scan function needs to return the
-	 * number of pages freed, not the number of objects scanned.
-	 */
-	return lru_count;
-}
-
-static struct shrinker ashmem_shrinker = {
-	.count_objects = ashmem_shrink_count,
-	.scan_objects = ashmem_shrink_scan,
-	/*
-	 * XXX (dchinner): I wish people would comment on why they need on
-	 * significant changes to the default value here
-	 */
-	.seeks = DEFAULT_SEEKS * 4,
-};
-
 static int set_prot_mask(struct ashmem_area *asma, unsigned long prot)
 {
 	int ret = 0;
@@ -713,6 +616,7 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd,
 		return -EFAULT;
 
 	mutex_lock(&ashmem_mutex);
+	wait_event(ashmem_purge_wait, !atomic_read(&ashmem_purge_inflight));
 
 	if (!asma->file)
 		goto out_unlock;
@@ -787,15 +691,7 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		ret = ashmem_pin_unpin(asma, cmd, (void __user *)arg);
 		break;
 	case ASHMEM_PURGE_ALL_CACHES:
-		ret = -EPERM;
-		if (capable(CAP_SYS_ADMIN)) {
-			struct shrink_control sc = {
-				.gfp_mask = GFP_KERNEL,
-				.nr_to_scan = LONG_MAX,
-			};
-			ret = ashmem_shrink_count(&ashmem_shrinker, &sc);
-			ashmem_shrink_scan(&ashmem_shrinker, &sc);
-		}
+		ret = capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
 		break;
 	}
 
@@ -883,18 +779,10 @@ static int __init ashmem_init(void)
 		goto out_free2;
 	}
 
-	ret = register_shrinker(&ashmem_shrinker);
-	if (ret) {
-		pr_err("failed to register shrinker!\n");
-		goto out_demisc;
-	}
-
 	pr_info("initialized\n");
 
 	return 0;
 
-out_demisc:
-	misc_deregister(&ashmem_misc);
 out_free2:
 	kmem_cache_destroy(ashmem_range_cachep);
 out_free1:
-- 
2.17.1

  reply	other threads:[~2019-01-24  1:52 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-20 21:04 possible deadlock in __do_page_fault syzbot
2018-09-20 21:10 ` Andrew Morton
2018-09-20 21:12   ` Todd Kjos
2018-09-20 23:33     ` Joel Fernandes
2018-09-21  6:37       ` Dmitry Vyukov
2018-09-21 23:21       ` Andrew Morton
2019-01-22 10:02         ` Tetsuo Handa
2019-01-22 10:12           ` Dmitry Vyukov
2019-01-22 10:32             ` Tetsuo Handa
2019-01-22 13:52               ` Dmitry Vyukov
2019-01-22 13:54                 ` Dmitry Vyukov
2019-01-22 14:08                   ` syzbot
2019-01-22 14:08                     ` syzbot
2019-01-22 15:32           ` Joel Fernandes
2019-01-23  2:01             ` Tetsuo Handa
2019-01-23 15:57               ` Joel Fernandes
2019-01-24  1:52                 ` Tetsuo Handa [this message]
2019-01-24 13:46                   ` Joel Fernandes
2019-01-25 16:02                     ` Tetsuo Handa
2019-01-25 16:02                       ` Tetsuo Handa
2019-01-28 16:45                       ` Joel Fernandes
2019-01-29 10:44                         ` Tetsuo Handa
2019-01-26  1:57                     ` Tetsuo Handa
2019-01-26  1:57                       ` Tetsuo Handa
2018-10-01  5:23 ` syzbot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201901240152.x0O1qUUU069046@www262.sakura.ne.jp \
    --to=penguin-kernel@i-love.sakura.ne.jp \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=arve@android.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=jack@suse.cz \
    --cc=joel@joelfernandes.org \
    --cc=jrdr.linux@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=maco@android.com \
    --cc=mawilcox@microsoft.com \
    --cc=mgorman@techsingularity.net \
    --cc=syzbot+a76129f18c89f3e2ddd4@syzkaller.appspotmail.com \
    --cc=syzkaller-bugs@googlegroups.com \
    --cc=tkjos@android.com \
    --cc=tkjos@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.