All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] dm-zoned fixes
@ 2018-10-17  9:05 Damien Le Moal
  2018-10-17  9:05 ` [PATCH 1/2] dm-zoned: Fix metadata block ref counting Damien Le Moal
  2018-10-17  9:05 ` [PATCH 2/2] dm-zoned: Fix dmz_get_mblock() Damien Le Moal
  0 siblings, 2 replies; 3+ messages in thread
From: Damien Le Moal @ 2018-10-17  9:05 UTC (permalink / raw)
  To: dm-devel, Mike Snitzer

Mike,

As discussed, the first patch changes the atomic_t type of metadata block
reference count to a simple integer (unsigned). While making this change,
I found a couple of problems with the dmz_get_mblock() function which are
fixed in the second patch.

Both are Cc: stable so that the main fix (the second patch) applies
cleanly.

Damien Le Moal (2):
  dm-zoned: Fix metadata block ref counting
  dm-zoned: Fix dmz_get_mblock()

 drivers/md/dm-zoned-metadata.c | 80 +++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 30 deletions(-)

-- 
2.17.2

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] dm-zoned: Fix metadata block ref counting
  2018-10-17  9:05 [PATCH 0/2] dm-zoned fixes Damien Le Moal
@ 2018-10-17  9:05 ` Damien Le Moal
  2018-10-17  9:05 ` [PATCH 2/2] dm-zoned: Fix dmz_get_mblock() Damien Le Moal
  1 sibling, 0 replies; 3+ messages in thread
From: Damien Le Moal @ 2018-10-17  9:05 UTC (permalink / raw)
  To: dm-devel, Mike Snitzer

Since the ref field of struct dmz_mblock is always used with the
spinlock of struct dmz_metadata locked, there is no need to use an
atomic_t type. Change the type of the ref field to an unsigne
integer.

Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target")
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: stable@vger.kernel.org

---
 drivers/md/dm-zoned-metadata.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 969954915566..67b71f6e3bda 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -99,7 +99,7 @@ struct dmz_mblock {
 	struct rb_node		node;
 	struct list_head	link;
 	sector_t		no;
-	atomic_t		ref;
+	unsigned int		ref;
 	unsigned long		state;
 	struct page		*page;
 	void			*data;
@@ -296,7 +296,7 @@ static struct dmz_mblock *dmz_alloc_mblock(struct dmz_metadata *zmd,
 
 	RB_CLEAR_NODE(&mblk->node);
 	INIT_LIST_HEAD(&mblk->link);
-	atomic_set(&mblk->ref, 0);
+	mblk->ref = 0;
 	mblk->state = 0;
 	mblk->no = mblk_no;
 	mblk->data = page_address(mblk->page);
@@ -397,7 +397,7 @@ static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd,
 		return NULL;
 
 	spin_lock(&zmd->mblk_lock);
-	atomic_inc(&mblk->ref);
+	mblk->ref++;
 	set_bit(DMZ_META_READING, &mblk->state);
 	dmz_insert_mblock(zmd, mblk);
 	spin_unlock(&zmd->mblk_lock);
@@ -484,7 +484,8 @@ static void dmz_release_mblock(struct dmz_metadata *zmd,
 
 	spin_lock(&zmd->mblk_lock);
 
-	if (atomic_dec_and_test(&mblk->ref)) {
+	mblk->ref--;
+	if (mblk->ref == 0) {
 		if (test_bit(DMZ_META_ERROR, &mblk->state)) {
 			rb_erase(&mblk->node, &zmd->mblk_rbtree);
 			dmz_free_mblock(zmd, mblk);
@@ -511,7 +512,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
 	mblk = dmz_lookup_mblock(zmd, mblk_no);
 	if (mblk) {
 		/* Cache hit: remove block from LRU list */
-		if (atomic_inc_return(&mblk->ref) == 1 &&
+		mblk->ref++;
+		if (mblk->ref == 1 &&
 		    !test_bit(DMZ_META_DIRTY, &mblk->state))
 			list_del_init(&mblk->link);
 	}
@@ -753,7 +755,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
 
 		spin_lock(&zmd->mblk_lock);
 		clear_bit(DMZ_META_DIRTY, &mblk->state);
-		if (atomic_read(&mblk->ref) == 0)
+		if (mblk->ref == 0)
 			list_add_tail(&mblk->link, &zmd->mblk_lru_list);
 		spin_unlock(&zmd->mblk_lock);
 	}
@@ -2308,7 +2310,7 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd)
 		mblk = list_first_entry(&zmd->mblk_dirty_list,
 					struct dmz_mblock, link);
 		dmz_dev_warn(zmd->dev, "mblock %llu still in dirty list (ref %u)",
-			     (u64)mblk->no, atomic_read(&mblk->ref));
+			     (u64)mblk->no, mblk->ref);
 		list_del_init(&mblk->link);
 		rb_erase(&mblk->node, &zmd->mblk_rbtree);
 		dmz_free_mblock(zmd, mblk);
@@ -2326,8 +2328,8 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd)
 	root = &zmd->mblk_rbtree;
 	rbtree_postorder_for_each_entry_safe(mblk, next, root, node) {
 		dmz_dev_warn(zmd->dev, "mblock %llu ref %u still in rbtree",
-			     (u64)mblk->no, atomic_read(&mblk->ref));
-		atomic_set(&mblk->ref, 0);
+			     (u64)mblk->no, mblk->ref);
+		mblk->ref = 0;
 		dmz_free_mblock(zmd, mblk);
 	}
 
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] dm-zoned: Fix dmz_get_mblock()
  2018-10-17  9:05 [PATCH 0/2] dm-zoned fixes Damien Le Moal
  2018-10-17  9:05 ` [PATCH 1/2] dm-zoned: Fix metadata block ref counting Damien Le Moal
@ 2018-10-17  9:05 ` Damien Le Moal
  1 sibling, 0 replies; 3+ messages in thread
From: Damien Le Moal @ 2018-10-17  9:05 UTC (permalink / raw)
  To: dm-devel, Mike Snitzer

dmz_fetch_mblock() called from dmz_get_mblock() has a race since the
allocation of the new metadata block descriptor and its insertion in
the cache rbtree with the READING state is not atomic. Two different
contexts requesting the same block may end up each adding two different
descriptors of the same block to the cache.

Another problem for this function is that the BIO for processing the
block read is allocated after the metadata block descriptor is inserted
in the cache rbtree. If the BIO allocation fails, the metadata block
descriptor is freed without first being removed from the rbtree.

Fix the first problem by checking again if the requested block is not in
the cache right before inserting the newly allocated descriptor,
atomically under the mblk_lock spinlock. The second problem is fixed by
simply allocating the BIO before inserting the new block in the cache.

Finally, since dmz_fetch_mblock() also increments a block reference
counter, rename the function to dmz_get_mblock_slow(). To be symmetric
and clear, also rename dmz_lookup_mblock() to dmz_get_mblock_fast() and
increment the block reference counter directly in that function rather
than in dmz_get_mblock().

Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target")
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: stable@vger.kernel.org

---
 drivers/md/dm-zoned-metadata.c | 66 +++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 24 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 67b71f6e3bda..fa68336560c3 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -339,10 +339,11 @@ static void dmz_insert_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk)
 }
 
 /*
- * Lookup a metadata block in the rbtree.
+ * Lookup a metadata block in the rbtree. If the block is found, increment
+ * its reference count.
  */
-static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd,
-					    sector_t mblk_no)
+static struct dmz_mblock *dmz_get_mblock_fast(struct dmz_metadata *zmd,
+					      sector_t mblk_no)
 {
 	struct rb_root *root = &zmd->mblk_rbtree;
 	struct rb_node *node = root->rb_node;
@@ -350,8 +351,17 @@ static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd,
 
 	while (node) {
 		mblk = container_of(node, struct dmz_mblock, node);
-		if (mblk->no == mblk_no)
+		if (mblk->no == mblk_no) {
+			/*
+			 * If this is the first reference to the block,
+			 * remove it from the LRU list.
+			 */
+			mblk->ref++;
+			if (mblk->ref == 1 &&
+			    !test_bit(DMZ_META_DIRTY, &mblk->state))
+				list_del_init(&mblk->link);
 			return mblk;
+		}
 		node = (mblk->no < mblk_no) ? node->rb_left : node->rb_right;
 	}
 
@@ -382,32 +392,47 @@ static void dmz_mblock_bio_end_io(struct bio *bio)
 }
 
 /*
- * Read a metadata block from disk.
+ * Read an uncached metadata block from disk and add it to the cache.
  */
-static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd,
-					   sector_t mblk_no)
+static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
+					      sector_t mblk_no)
 {
-	struct dmz_mblock *mblk;
+	struct dmz_mblock *mblk, *m;
 	sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no;
 	struct bio *bio;
 
-	/* Get block and insert it */
+	/* Get a new block and a BIO to read it */
 	mblk = dmz_alloc_mblock(zmd, mblk_no);
 	if (!mblk)
 		return NULL;
 
-	spin_lock(&zmd->mblk_lock);
-	mblk->ref++;
-	set_bit(DMZ_META_READING, &mblk->state);
-	dmz_insert_mblock(zmd, mblk);
-	spin_unlock(&zmd->mblk_lock);
-
 	bio = bio_alloc(GFP_NOIO, 1);
 	if (!bio) {
 		dmz_free_mblock(zmd, mblk);
 		return NULL;
 	}
 
+	spin_lock(&zmd->mblk_lock);
+
+	/*
+	 * Make sure that another context did not start reading
+	 * the block already.
+	 */
+	m = dmz_get_mblock_fast(zmd, mblk_no);
+	if (m) {
+		spin_unlock(&zmd->mblk_lock);
+		dmz_free_mblock(zmd, mblk);
+		bio_put(bio);
+		return m;
+	}
+
+	mblk->ref++;
+	set_bit(DMZ_META_READING, &mblk->state);
+	dmz_insert_mblock(zmd, mblk);
+
+	spin_unlock(&zmd->mblk_lock);
+
+	/* Submit read BIO */
 	bio->bi_iter.bi_sector = dmz_blk2sect(block);
 	bio_set_dev(bio, zmd->dev->bdev);
 	bio->bi_private = mblk;
@@ -509,19 +534,12 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
 
 	/* Check rbtree */
 	spin_lock(&zmd->mblk_lock);
-	mblk = dmz_lookup_mblock(zmd, mblk_no);
-	if (mblk) {
-		/* Cache hit: remove block from LRU list */
-		mblk->ref++;
-		if (mblk->ref == 1 &&
-		    !test_bit(DMZ_META_DIRTY, &mblk->state))
-			list_del_init(&mblk->link);
-	}
+	mblk = dmz_get_mblock_fast(zmd, mblk_no);
 	spin_unlock(&zmd->mblk_lock);
 
 	if (!mblk) {
 		/* Cache miss: read the block from disk */
-		mblk = dmz_fetch_mblock(zmd, mblk_no);
+		mblk = dmz_get_mblock_slow(zmd, mblk_no);
 		if (!mblk)
 			return ERR_PTR(-ENOMEM);
 	}
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-10-17  9:05 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-17  9:05 [PATCH 0/2] dm-zoned fixes Damien Le Moal
2018-10-17  9:05 ` [PATCH 1/2] dm-zoned: Fix metadata block ref counting Damien Le Moal
2018-10-17  9:05 ` [PATCH 2/2] dm-zoned: Fix dmz_get_mblock() Damien Le Moal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.