All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>,
	Jann Horn <jannh@google.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Sasha Levin <sashal@kernel.org>,
	linux-fsdevel@vger.kernel.org
Subject: [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue
Date: Wed, 18 Mar 2020 16:52:54 -0400	[thread overview]
Message-ID: <20200318205337.16279-30-sashal@kernel.org> (raw)
In-Reply-To: <20200318205337.16279-1-sashal@kernel.org>

From: Peter Zijlstra <peterz@infradead.org>

[ Upstream commit 8019ad13ef7f64be44d4f892af9c840179009254 ]

As reported by Jann, ihold() does not in fact guarantee inode
persistence. And instead of making it so, replace the usage of inode
pointers with a per boot, machine wide, unique inode identifier.

This sequence number is global, but shared (file backed) futexes are
rare enough that this should not become a performance issue.

Reported-by: Jann Horn <jannh@google.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/inode.c            |  1 +
 include/linux/fs.h    |  1 +
 include/linux/futex.h | 17 +++++----
 kernel/futex.c        | 89 ++++++++++++++++++++++++++-----------------
 4 files changed, 65 insertions(+), 43 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 96d62d97694ef..c5267a4db0f5e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -137,6 +137,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_sb = sb;
 	inode->i_blkbits = sb->s_blocksize_bits;
 	inode->i_flags = 0;
+	atomic64_set(&inode->i_sequence, 0);
 	atomic_set(&inode->i_count, 1);
 	inode->i_op = &empty_iops;
 	inode->i_fop = &no_open_fops;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0b4d8fc79e0f3..06668379109e3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -698,6 +698,7 @@ struct inode {
 		struct rcu_head		i_rcu;
 	};
 	atomic64_t		i_version;
+	atomic64_t		i_sequence; /* see futex */
 	atomic_t		i_count;
 	atomic_t		i_dio_count;
 	atomic_t		i_writecount;
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 5cc3fed27d4c2..b70df27d7e85c 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -31,23 +31,26 @@ struct task_struct;
 
 union futex_key {
 	struct {
+		u64 i_seq;
 		unsigned long pgoff;
-		struct inode *inode;
-		int offset;
+		unsigned int offset;
 	} shared;
 	struct {
+		union {
+			struct mm_struct *mm;
+			u64 __tmp;
+		};
 		unsigned long address;
-		struct mm_struct *mm;
-		int offset;
+		unsigned int offset;
 	} private;
 	struct {
+		u64 ptr;
 		unsigned long word;
-		void *ptr;
-		int offset;
+		unsigned int offset;
 	} both;
 };
 
-#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
+#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
 
 #ifdef CONFIG_FUTEX
 enum {
diff --git a/kernel/futex.c b/kernel/futex.c
index afbf928d6a6b0..07ab324885ac0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
 
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
-		ihold(key->shared.inode); /* implies smp_mb(); (B) */
+		smp_mb();		/* explicit smp_mb(); (B) */
 		break;
 	case FUT_OFF_MMSHARED:
 		futex_get_mm(key); /* implies smp_mb(); (B) */
@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
 
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
-		iput(key->shared.inode);
 		break;
 	case FUT_OFF_MMSHARED:
 		mmdrop(key->private.mm);
@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
 	return timeout;
 }
 
+/*
+ * Generate a machine wide unique identifier for this inode.
+ *
+ * This relies on u64 not wrapping in the life-time of the machine; which with
+ * 1ns resolution means almost 585 years.
+ *
+ * This further relies on the fact that a well formed program will not unmap
+ * the file while it has a (shared) futex waiting on it. This mapping will have
+ * a file reference which pins the mount and inode.
+ *
+ * If for some reason an inode gets evicted and read back in again, it will get
+ * a new sequence number and will _NOT_ match, even though it is the exact same
+ * file.
+ *
+ * It is important that match_futex() will never have a false-positive, esp.
+ * for PI futexes that can mess up the state. The above argues that false-negatives
+ * are only possible for malformed programs.
+ */
+static u64 get_inode_sequence_number(struct inode *inode)
+{
+	static atomic64_t i_seq;
+	u64 old;
+
+	/* Does the inode already have a sequence number? */
+	old = atomic64_read(&inode->i_sequence);
+	if (likely(old))
+		return old;
+
+	for (;;) {
+		u64 new = atomic64_add_return(1, &i_seq);
+		if (WARN_ON_ONCE(!new))
+			continue;
+
+		old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
+		if (old)
+			return old;
+		return new;
+	}
+}
+
 /**
  * get_futex_key() - Get parameters which are the keys for a futex
  * @uaddr:	virtual address of the futex
@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
  *
  * The key words are stored in @key on success.
  *
- * For shared mappings, it's (page->index, file_inode(vma->vm_file),
- * offset_within_page).  For private mappings, it's (uaddr, current->mm).
- * We can usually work out the index without swapping in the page.
+ * For shared mappings (when @fshared), the key is:
+ *   ( inode->i_sequence, page->index, offset_within_page )
+ * [ also see get_inode_sequence_number() ]
+ *
+ * For private mappings (or when !@fshared), the key is:
+ *   ( current->mm, address, 0 )
+ *
+ * This allows (cross process, where applicable) identification of the futex
+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
  *
  * lock_page() might sleep, the caller should not hold a spinlock.
  */
@@ -659,8 +704,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 		key->private.mm = mm;
 		key->private.address = address;
 
-		get_futex_key_refs(key); /* implies smp_mb(); (B) */
-
 	} else {
 		struct inode *inode;
 
@@ -692,40 +735,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 			goto again;
 		}
 
-		/*
-		 * Take a reference unless it is about to be freed. Previously
-		 * this reference was taken by ihold under the page lock
-		 * pinning the inode in place so i_lock was unnecessary. The
-		 * only way for this check to fail is if the inode was
-		 * truncated in parallel which is almost certainly an
-		 * application bug. In such a case, just retry.
-		 *
-		 * We are not calling into get_futex_key_refs() in file-backed
-		 * cases, therefore a successful atomic_inc return below will
-		 * guarantee that get_futex_key() will still imply smp_mb(); (B).
-		 */
-		if (!atomic_inc_not_zero(&inode->i_count)) {
-			rcu_read_unlock();
-			put_page(page);
-
-			goto again;
-		}
-
-		/* Should be impossible but lets be paranoid for now */
-		if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
-			err = -EFAULT;
-			rcu_read_unlock();
-			iput(inode);
-
-			goto out;
-		}
-
 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-		key->shared.inode = inode;
+		key->shared.i_seq = get_inode_sequence_number(inode);
 		key->shared.pgoff = basepage_index(tail);
 		rcu_read_unlock();
 	}
 
+	get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
 out:
 	put_page(page);
 	return err;
-- 
2.20.1


  parent reply	other threads:[~2020-03-18 21:07 UTC|newest]

Thread overview: 103+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-18 20:52 [PATCH AUTOSEL 5.4 01/73] cgroup-v1: cgroup_pidlist_next should update position index Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 02/73] cgroup: Iterate tasks that did not finish do_exit() Sasha Levin
2020-03-18 20:52   ` Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 03/73] clk: imx8mn: Fix incorrect clock defines Sasha Levin
2020-03-18 20:52   ` Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 04/73] batman-adv: Don't schedule OGM for disabled interface Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 05/73] pinctrl: meson-gxl: fix GPIOX sdio pins Sasha Levin
2020-03-18 20:52   ` Sasha Levin
2020-03-18 20:52   ` Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 06/73] pinctrl: imx: scu: Align imx sc msg structs to 4 Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 07/73] nfs: add minor version to nfs_server_key for fscache Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 08/73] pinctrl: core: Remove extra kref_get which blocks hogs being freed Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 09/73] r8152: check disconnect status after long sleep Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 10/73] net: dsa: mv88e6xxx: fix lockup on warm boot Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 11/73] net: phy: avoid clearing PHY interrupts twice in irq handler Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 12/73] bnxt_en: reinitialize IRQs when MTU is modified Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 13/73] bnxt_en: fix error handling when flashing from file Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 14/73] cpupower: avoid multiple definition with gcc -fno-common Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 15/73] fib: add missing attribute validation for tun_id Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 16/73] can: add missing attribute validation for termination Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 17/73] macsec: add missing attribute validation for port Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 18/73] team: add missing attribute validation for port ifindex Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 19/73] team: add missing attribute validation for array index Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 20/73] netfilter: cthelper: add missing attribute validation for cthelper Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 21/73] netfilter: nft_payload: add missing attribute validation for payload csum flags Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 22/73] netfilter: nft_tunnel: add missing attribute validation for tunnels Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 23/73] net: phy: bcm63xx: fix OOPS due to missing driver name Sasha Levin
2020-03-18 20:52   ` Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 24/73] drivers/of/of_mdio.c:fix of_mdiobus_register() Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 25/73] cgroup1: don't call release_agent when it is "" Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 26/73] netfilter: nf_tables: dump NFTA_CHAIN_FLAGS attribute Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 27/73] netfilter: nf_tables: fix infinite loop when expr is not available Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 28/73] slip: make slhc_compress() more robust against malicious packets Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 29/73] net: hns3: fix a not link up issue when fibre port supports autoneg Sasha Levin
2020-03-18 20:52 ` Sasha Levin [this message]
2020-03-23 19:18   ` [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue Jann Horn
2020-03-24  8:06     ` Greg Kroah-Hartman
2020-04-08  9:48     ` backport request for 3.16 [was: Re: [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue] Jann Horn
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 31/73] netfilter: nft_chain_nat: inet family is missing module ownership Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 32/73] dt-bindings: net: FMan erratum A050385 Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 33/73] arm64: dts: ls1043a: " Sasha Levin
2020-03-18 20:52   ` Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 34/73] fsl/fman: detect " Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 35/73] bonding/alb: make sure arp header is pulled before accessing it Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 36/73] virtio_ring: Fix mem leak with vring_new_virtqueue() Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 37/73] virtio-blk: fix hw_queue stopped on arbitrary error Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 38/73] virtio_balloon: Adjust label in virtballoon_probe Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 39/73] ipvlan: do not add hardware address of master to its unicast filter list Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 40/73] net: stmmac: dwmac1000: Disable ACS if enhanced descs are not used Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 41/73] drm/amd/display: update soc bb for nv14 Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 42/73] drm/amdgpu: correct ROM_INDEX/DATA offset for VEGA20 Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 43/73] futex: Unbreak futex hashing Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 44/73] ipvlan: don't deref eth hdr before checking it's set Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 45/73] ipvlan: add cond_resched_rcu() while processing muticast backlog Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 46/73] macvlan: add cond_resched() during multicast processing Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 47/73] ipvlan: do not use cond_resched_rcu() in ipvlan_process_multicast() Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 48/73] drm/exynos: Fix cleanup of IOMMU related objects Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 49/73] i2c: i801: Do not add ICH_RES_IO_SMI for the iTCO_wdt device Sasha Levin
2020-03-19  7:30   ` Wolfram Sang
2020-03-29 20:07     ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 50/73] iommu/vt-d: Fix RCU-list bugs in intel_iommu_init() Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 51/73] iommu/vt-d: Silence RCU-list debugging warnings Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 52/73] i2c: gpio: suppress error on probe defer Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 53/73] s390/qeth: don't reset default_out_queue Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 54/73] s390/qeth: handle error when backing RX buffer Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 55/73] scsi: ipr: Fix softlockup when rescanning devices in petitboot Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 56/73] nl80211: add missing attribute validation for critical protocol indication Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 57/73] nl80211: add missing attribute validation for beacon report scanning Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 58/73] nl80211: add missing attribute validation for channel switch Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 59/73] mac80211: Do not send mesh HWMP PREQ if HWMP is disabled Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 60/73] driver code: clarify and fix platform device DMA mask allocation Sasha Levin
2020-03-19  6:49   ` Greg Kroah-Hartman
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 61/73] dpaa_eth: Remove unnecessary boolean expression in dpaa_get_headroom Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 62/73] net: fec: validate the new settings in fec_enet_set_coalesce() Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 63/73] sxgbe: Fix off by one in samsung driver strncpy size arg Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 64/73] net: mvmdio: avoid error message for optional IRQ Sasha Levin
2020-03-18 20:57   ` Chris Packham
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 65/73] net: hns3: fix "tc qdisc del" failed issue Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 66/73] net: systemport: fix index check to avoid an array out of bounds access Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 67/73] iommu/vt-d: quirk_ioat_snb_local_iommu: replace WARN_TAINT with pr_warn + add_taint Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 68/73] iommu/vt-d: Fix debugfs register reads Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 69/73] i2c: acpi: put device when verifying client fails Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 70/73] iommu/vt-d: Fix the wrong printing in RHSA parsing Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 71/73] iommu/vt-d: Ignore devices with out-of-spec domain number Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 72/73] iommu/amd: Fix IOMMU AVIC not properly update the is_run bit in IRTE Sasha Levin
2020-03-18 20:53   ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 73/73] iommu/vt-d: Populate debugfs if IOMMUs are detected Sasha Levin
2020-03-18 20:53   ` Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200318205337.16279-30-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=jannh@google.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.