* [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue
[not found] <20200318205337.16279-1-sashal@kernel.org>
@ 2020-03-18 20:52 ` Sasha Levin
2020-03-23 19:18 ` Jann Horn
0 siblings, 1 reply; 4+ messages in thread
From: Sasha Levin @ 2020-03-18 20:52 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Peter Zijlstra, Jann Horn, Linus Torvalds, Sasha Levin, linux-fsdevel
From: Peter Zijlstra <peterz@infradead.org>
[ Upstream commit 8019ad13ef7f64be44d4f892af9c840179009254 ]
As reported by Jann, ihold() does not in fact guarantee inode
persistence. And instead of making it so, replace the usage of inode
pointers with a per boot, machine wide, unique inode identifier.
This sequence number is global, but shared (file backed) futexes are
rare enough that this should not become a performance issue.
Reported-by: Jann Horn <jannh@google.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/inode.c | 1 +
include/linux/fs.h | 1 +
include/linux/futex.h | 17 +++++----
kernel/futex.c | 89 ++++++++++++++++++++++++++-----------------
4 files changed, 65 insertions(+), 43 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index 96d62d97694ef..c5267a4db0f5e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -137,6 +137,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
+ atomic64_set(&inode->i_sequence, 0);
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &no_open_fops;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0b4d8fc79e0f3..06668379109e3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -698,6 +698,7 @@ struct inode {
struct rcu_head i_rcu;
};
atomic64_t i_version;
+ atomic64_t i_sequence; /* see futex */
atomic_t i_count;
atomic_t i_dio_count;
atomic_t i_writecount;
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 5cc3fed27d4c2..b70df27d7e85c 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -31,23 +31,26 @@ struct task_struct;
union futex_key {
struct {
+ u64 i_seq;
unsigned long pgoff;
- struct inode *inode;
- int offset;
+ unsigned int offset;
} shared;
struct {
+ union {
+ struct mm_struct *mm;
+ u64 __tmp;
+ };
unsigned long address;
- struct mm_struct *mm;
- int offset;
+ unsigned int offset;
} private;
struct {
+ u64 ptr;
unsigned long word;
- void *ptr;
- int offset;
+ unsigned int offset;
} both;
};
-#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
+#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
#ifdef CONFIG_FUTEX
enum {
diff --git a/kernel/futex.c b/kernel/futex.c
index afbf928d6a6b0..07ab324885ac0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- ihold(key->shared.inode); /* implies smp_mb(); (B) */
+ smp_mb(); /* explicit smp_mb(); (B) */
break;
case FUT_OFF_MMSHARED:
futex_get_mm(key); /* implies smp_mb(); (B) */
@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- iput(key->shared.inode);
break;
case FUT_OFF_MMSHARED:
mmdrop(key->private.mm);
@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
return timeout;
}
+/*
+ * Generate a machine wide unique identifier for this inode.
+ *
+ * This relies on u64 not wrapping in the life-time of the machine; which with
+ * 1ns resolution means almost 585 years.
+ *
+ * This further relies on the fact that a well formed program will not unmap
+ * the file while it has a (shared) futex waiting on it. This mapping will have
+ * a file reference which pins the mount and inode.
+ *
+ * If for some reason an inode gets evicted and read back in again, it will get
+ * a new sequence number and will _NOT_ match, even though it is the exact same
+ * file.
+ *
+ * It is important that match_futex() will never have a false-positive, esp.
+ * for PI futexes that can mess up the state. The above argues that false-negatives
+ * are only possible for malformed programs.
+ */
+static u64 get_inode_sequence_number(struct inode *inode)
+{
+ static atomic64_t i_seq;
+ u64 old;
+
+ /* Does the inode already have a sequence number? */
+ old = atomic64_read(&inode->i_sequence);
+ if (likely(old))
+ return old;
+
+ for (;;) {
+ u64 new = atomic64_add_return(1, &i_seq);
+ if (WARN_ON_ONCE(!new))
+ continue;
+
+ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
+ if (old)
+ return old;
+ return new;
+ }
+}
+
/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
*
* The key words are stored in @key on success.
*
- * For shared mappings, it's (page->index, file_inode(vma->vm_file),
- * offset_within_page). For private mappings, it's (uaddr, current->mm).
- * We can usually work out the index without swapping in the page.
+ * For shared mappings (when @fshared), the key is:
+ * ( inode->i_sequence, page->index, offset_within_page )
+ * [ also see get_inode_sequence_number() ]
+ *
+ * For private mappings (or when !@fshared), the key is:
+ * ( current->mm, address, 0 )
+ *
+ * This allows (cross process, where applicable) identification of the futex
+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
*
* lock_page() might sleep, the caller should not hold a spinlock.
*/
@@ -659,8 +704,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
key->private.mm = mm;
key->private.address = address;
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
-
} else {
struct inode *inode;
@@ -692,40 +735,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
goto again;
}
- /*
- * Take a reference unless it is about to be freed. Previously
- * this reference was taken by ihold under the page lock
- * pinning the inode in place so i_lock was unnecessary. The
- * only way for this check to fail is if the inode was
- * truncated in parallel which is almost certainly an
- * application bug. In such a case, just retry.
- *
- * We are not calling into get_futex_key_refs() in file-backed
- * cases, therefore a successful atomic_inc return below will
- * guarantee that get_futex_key() will still imply smp_mb(); (B).
- */
- if (!atomic_inc_not_zero(&inode->i_count)) {
- rcu_read_unlock();
- put_page(page);
-
- goto again;
- }
-
- /* Should be impossible but lets be paranoid for now */
- if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
- err = -EFAULT;
- rcu_read_unlock();
- iput(inode);
-
- goto out;
- }
-
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
- key->shared.inode = inode;
+ key->shared.i_seq = get_inode_sequence_number(inode);
key->shared.pgoff = basepage_index(tail);
rcu_read_unlock();
}
+ get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
out:
put_page(page);
return err;
--
2.20.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue Sasha Levin
@ 2020-03-23 19:18 ` Jann Horn
2020-03-24 8:06 ` Greg Kroah-Hartman
2020-04-08 9:48 ` backport request for 3.16 [was: Re: [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue] Jann Horn
0 siblings, 2 replies; 4+ messages in thread
From: Jann Horn @ 2020-03-23 19:18 UTC (permalink / raw)
To: Sasha Levin, Greg Kroah-Hartman, stable
Cc: kernel list, Peter Zijlstra, Linus Torvalds, linux-fsdevel
On Wed, Mar 18, 2020 at 9:54 PM Sasha Levin <sashal@kernel.org> wrote:
>
> From: Peter Zijlstra <peterz@infradead.org>
>
> [ Upstream commit 8019ad13ef7f64be44d4f892af9c840179009254 ]
>
> As reported by Jann, ihold() does not in fact guarantee inode
> persistence. And instead of making it so, replace the usage of inode
> pointers with a per boot, machine wide, unique inode identifier.
>
> This sequence number is global, but shared (file backed) futexes are
> rare enough that this should not become a performance issue.
Please also take this patch, together with
8d67743653dce5a0e7aa500fcccb237cde7ad88e "futex: Unbreak futex
hashing", into the older stable branches. This has to go all the way
back; as far as I can tell, the bug already existed at the beginning
of git history.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue
2020-03-23 19:18 ` Jann Horn
@ 2020-03-24 8:06 ` Greg Kroah-Hartman
2020-04-08 9:48 ` backport request for 3.16 [was: Re: [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue] Jann Horn
1 sibling, 0 replies; 4+ messages in thread
From: Greg Kroah-Hartman @ 2020-03-24 8:06 UTC (permalink / raw)
To: Jann Horn
Cc: Sasha Levin, stable, kernel list, Peter Zijlstra, Linus Torvalds,
linux-fsdevel
On Mon, Mar 23, 2020 at 08:18:04PM +0100, Jann Horn wrote:
> On Wed, Mar 18, 2020 at 9:54 PM Sasha Levin <sashal@kernel.org> wrote:
> >
> > From: Peter Zijlstra <peterz@infradead.org>
> >
> > [ Upstream commit 8019ad13ef7f64be44d4f892af9c840179009254 ]
> >
> > As reported by Jann, ihold() does not in fact guarantee inode
> > persistence. And instead of making it so, replace the usage of inode
> > pointers with a per boot, machine wide, unique inode identifier.
> >
> > This sequence number is global, but shared (file backed) futexes are
> > rare enough that this should not become a performance issue.
>
> Please also take this patch, together with
> 8d67743653dce5a0e7aa500fcccb237cde7ad88e "futex: Unbreak futex
> hashing", into the older stable branches. This has to go all the way
> back; as far as I can tell, the bug already existed at the beginning
> of git history.
I have queued these up now, thanks for the hint.
greg k-h
^ permalink raw reply [flat|nested] 4+ messages in thread
* backport request for 3.16 [was: Re: [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue]
2020-03-23 19:18 ` Jann Horn
2020-03-24 8:06 ` Greg Kroah-Hartman
@ 2020-04-08 9:48 ` Jann Horn
1 sibling, 0 replies; 4+ messages in thread
From: Jann Horn @ 2020-04-08 9:48 UTC (permalink / raw)
To: stable, Ben Hutchings
Cc: kernel list, Peter Zijlstra, Linus Torvalds, Sasha Levin,
linux-fsdevel, Greg Kroah-Hartman
@Ben: You'll probably also want to take these two into the next 3.16 release.
Sorry, I forgot that 3.16 has a different maintainer...
On Mon, Mar 23, 2020 at 8:18 PM Jann Horn <jannh@google.com> wrote:
>
> On Wed, Mar 18, 2020 at 9:54 PM Sasha Levin <sashal@kernel.org> wrote:
> >
> > From: Peter Zijlstra <peterz@infradead.org>
> >
> > [ Upstream commit 8019ad13ef7f64be44d4f892af9c840179009254 ]
> >
> > As reported by Jann, ihold() does not in fact guarantee inode
> > persistence. And instead of making it so, replace the usage of inode
> > pointers with a per boot, machine wide, unique inode identifier.
> >
> > This sequence number is global, but shared (file backed) futexes are
> > rare enough that this should not become a performance issue.
>
> Please also take this patch, together with
> 8d67743653dce5a0e7aa500fcccb237cde7ad88e "futex: Unbreak futex
> hashing", into the older stable branches. This has to go all the way
> back; as far as I can tell, the bug already existed at the beginning
> of git history.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-04-08 9:48 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20200318205337.16279-1-sashal@kernel.org>
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue Sasha Levin
2020-03-23 19:18 ` Jann Horn
2020-03-24 8:06 ` Greg Kroah-Hartman
2020-04-08 9:48 ` backport request for 3.16 [was: Re: [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue] Jann Horn
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).