All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] PPL fixes and improvements
@ 2017-04-04 11:13 Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page Artur Paszkiewicz
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-04 11:13 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, Artur Paszkiewicz

These are mostly fixes for possible deadlocks related to memory allocation and
one optimization.

Artur Paszkiewicz (4):
  raid5-ppl: use a single mempool for ppl_io_unit and header_page
  raid5-ppl: move no_mem_stripes to struct ppl_conf
  raid5-ppl: use resize_stripes() when enabling or disabling ppl
  raid5-ppl: partial parity calculation optimization

 drivers/md/raid5-log.h |   5 ++-
 drivers/md/raid5-ppl.c | 112 +++++++++++++++++++++++++++++++------------------
 drivers/md/raid5.c     |  94 ++++++++++++++++++-----------------------
 3 files changed, 115 insertions(+), 96 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-04 11:13 [PATCH 0/4] PPL fixes and improvements Artur Paszkiewicz
@ 2017-04-04 11:13 ` Artur Paszkiewicz
  2017-04-10 19:09   ` Shaohua Li
  2017-04-04 11:13 ` [PATCH 2/4] raid5-ppl: move no_mem_stripes to struct ppl_conf Artur Paszkiewicz
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-04 11:13 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, Artur Paszkiewicz

Allocate both struct ppl_io_unit and its header_page from a shared
mempool to avoid a possible deadlock. Implement allocate and free
functions for the mempool, remove the second pool for allocating
header_page. The header_pages are now freed with their io_units, not
when the ppl bio completes. Also, use GFP_NOWAIT instead of GFP_ATOMIC
for allocating ppl_io_unit because we can handle failed allocations and
there is no reason to utilize emergency reserves.

Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5-ppl.c | 53 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 86ea9addb51a..42e43467d1e8 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -102,7 +102,6 @@ struct ppl_conf {
 	struct kmem_cache *io_kc;
 	mempool_t *io_pool;
 	struct bio_set *bs;
-	mempool_t *meta_pool;
 
 	/* used only for recovery */
 	int recovered_entries;
@@ -195,6 +194,33 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
 	return tx;
 }
 
+static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
+{
+	struct kmem_cache *kc = pool_data;
+	struct ppl_io_unit *io;
+
+	io = kmem_cache_alloc(kc, gfp_mask);
+	if (!io)
+		return NULL;
+
+	io->header_page = alloc_page(gfp_mask);
+	if (!io->header_page) {
+		kmem_cache_free(kc, io);
+		return NULL;
+	}
+
+	return io;
+}
+
+static void ppl_io_pool_free(void *element, void *pool_data)
+{
+	struct kmem_cache *kc = pool_data;
+	struct ppl_io_unit *io = element;
+
+	__free_page(io->header_page);
+	kmem_cache_free(kc, io);
+}
+
 static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
 					  struct stripe_head *sh)
 {
@@ -202,18 +228,19 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
 	struct ppl_io_unit *io;
 	struct ppl_header *pplhdr;
 
-	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
+	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
 	if (!io)
 		return NULL;
 
-	memset(io, 0, sizeof(*io));
 	io->log = log;
+	io->entries_count = 0;
+	io->pp_size = 0;
+	io->submitted = false;
 	INIT_LIST_HEAD(&io->log_sibling);
 	INIT_LIST_HEAD(&io->stripe_list);
 	atomic_set(&io->pending_stripes, 0);
 	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
 
-	io->header_page = mempool_alloc(ppl_conf->meta_pool, GFP_NOIO);
 	pplhdr = page_address(io->header_page);
 	clear_page(pplhdr);
 	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
@@ -369,8 +396,6 @@ static void ppl_log_endio(struct bio *bio)
 	if (bio->bi_error)
 		md_error(ppl_conf->mddev, log->rdev);
 
-	mempool_free(io->header_page, ppl_conf->meta_pool);
-
 	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
 		list_del_init(&sh->log_list);
 
@@ -998,7 +1023,6 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
 
 	kfree(ppl_conf->child_logs);
 
-	mempool_destroy(ppl_conf->meta_pool);
 	if (ppl_conf->bs)
 		bioset_free(ppl_conf->bs);
 	mempool_destroy(ppl_conf->io_pool);
@@ -1104,25 +1128,20 @@ int ppl_init_log(struct r5conf *conf)
 
 	ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
 	if (!ppl_conf->io_kc) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
-	ppl_conf->io_pool = mempool_create_slab_pool(conf->raid_disks, ppl_conf->io_kc);
+	ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc,
+					   ppl_io_pool_free, ppl_conf->io_kc);
 	if (!ppl_conf->io_pool) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
 	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
 	if (!ppl_conf->bs) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	ppl_conf->meta_pool = mempool_create_page_pool(conf->raid_disks, 0);
-	if (!ppl_conf->meta_pool) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/4] raid5-ppl: move no_mem_stripes to struct ppl_conf
  2017-04-04 11:13 [PATCH 0/4] PPL fixes and improvements Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page Artur Paszkiewicz
@ 2017-04-04 11:13 ` Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 3/4] raid5-ppl: use resize_stripes() when enabling or disabling ppl Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 4/4] raid5-ppl: partial parity calculation optimization Artur Paszkiewicz
  3 siblings, 0 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-04 11:13 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, Artur Paszkiewicz

Use a single no_mem_stripes list instead of per member device lists for
handling stripes that need retrying in case of failed io_unit
allocation. Because io_units are allocated from a memory pool shared
between all member disks, the no_mem_stripes list should be checked when
an io_unit for any member is freed. This fixes a deadlock that could
happen if there are stripes in more than one no_mem_stripes list.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5-ppl.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 42e43467d1e8..0a39a6bbcbde 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -106,6 +106,10 @@ struct ppl_conf {
 	/* used only for recovery */
 	int recovered_entries;
 	int mismatch_count;
+
+	/* stripes to retry if failed to allocate io_unit */
+	struct list_head no_mem_stripes;
+	spinlock_t no_mem_stripes_lock;
 };
 
 struct ppl_log {
@@ -118,8 +122,6 @@ struct ppl_log {
 					 * always at the end of io_list */
 	spinlock_t io_list_lock;
 	struct list_head io_list;	/* all io_units of this log */
-	struct list_head no_mem_stripes;/* stripes to retry if failed to
-					 * allocate io_unit */
 };
 
 #define PPL_IO_INLINE_BVECS 32
@@ -374,9 +376,9 @@ int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh)
 	atomic_inc(&sh->count);
 
 	if (ppl_log_stripe(log, sh)) {
-		spin_lock_irq(&log->io_list_lock);
-		list_add_tail(&sh->log_list, &log->no_mem_stripes);
-		spin_unlock_irq(&log->io_list_lock);
+		spin_lock_irq(&ppl_conf->no_mem_stripes_lock);
+		list_add_tail(&sh->log_list, &ppl_conf->no_mem_stripes);
+		spin_unlock_irq(&ppl_conf->no_mem_stripes_lock);
 	}
 
 	mutex_unlock(&log->io_mutex);
@@ -517,25 +519,32 @@ void ppl_write_stripe_run(struct r5conf *conf)
 static void ppl_io_unit_finished(struct ppl_io_unit *io)
 {
 	struct ppl_log *log = io->log;
+	struct ppl_conf *ppl_conf = log->ppl_conf;
 	unsigned long flags;
 
 	pr_debug("%s: seq: %llu\n", __func__, io->seq);
 
-	spin_lock_irqsave(&log->io_list_lock, flags);
+	local_irq_save(flags);
 
+	spin_lock(&log->io_list_lock);
 	list_del(&io->log_sibling);
-	mempool_free(io, log->ppl_conf->io_pool);
+	spin_unlock(&log->io_list_lock);
+
+	mempool_free(io, ppl_conf->io_pool);
+
+	spin_lock(&ppl_conf->no_mem_stripes_lock);
+	if (!list_empty(&ppl_conf->no_mem_stripes)) {
+		struct stripe_head *sh;
 
-	if (!list_empty(&log->no_mem_stripes)) {
-		struct stripe_head *sh = list_first_entry(&log->no_mem_stripes,
-							  struct stripe_head,
-							  log_list);
+		sh = list_first_entry(&ppl_conf->no_mem_stripes,
+				      struct stripe_head, log_list);
 		list_del_init(&sh->log_list);
 		set_bit(STRIPE_HANDLE, &sh->state);
 		raid5_release_stripe(sh);
 	}
+	spin_unlock(&ppl_conf->no_mem_stripes_lock);
 
-	spin_unlock_irqrestore(&log->io_list_lock, flags);
+	local_irq_restore(flags);
 }
 
 void ppl_stripe_write_finished(struct stripe_head *sh)
@@ -1154,6 +1163,8 @@ int ppl_init_log(struct r5conf *conf)
 	}
 
 	atomic64_set(&ppl_conf->seq, 0);
+	INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
+	spin_lock_init(&ppl_conf->no_mem_stripes_lock);
 
 	if (!mddev->external) {
 		ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
@@ -1169,7 +1180,6 @@ int ppl_init_log(struct r5conf *conf)
 		mutex_init(&log->io_mutex);
 		spin_lock_init(&log->io_list_lock);
 		INIT_LIST_HEAD(&log->io_list);
-		INIT_LIST_HEAD(&log->no_mem_stripes);
 
 		log->ppl_conf = ppl_conf;
 		log->rdev = rdev;
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/4] raid5-ppl: use resize_stripes() when enabling or disabling ppl
  2017-04-04 11:13 [PATCH 0/4] PPL fixes and improvements Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 2/4] raid5-ppl: move no_mem_stripes to struct ppl_conf Artur Paszkiewicz
@ 2017-04-04 11:13 ` Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 4/4] raid5-ppl: partial parity calculation optimization Artur Paszkiewicz
  3 siblings, 0 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-04 11:13 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, Artur Paszkiewicz

Use resize_stripes() instead of raid5_reset_stripe_cache() to allocate
or free sh->ppl_page at runtime for all stripes in the stripe cache.
raid5_reset_stripe_cache() required suspending the mddev and could
deadlock because of GFP_KERNEL allocations.

Move the 'newsize' check to check_reshape() to allow reallocating the
stripes with the same number of disks. Allocate sh->ppl_page in
alloc_stripe() instead of grow_buffers(). Pass 'struct r5conf *conf' as
a parameter to alloc_stripe() because it is needed to check whether to
allocate ppl_page. Add free_stripe() and use it to free stripes rather
than directly call kmem_cache_free(). Also free sh->ppl_page in
free_stripe().

Set MD_HAS_PPL at the end of ppl_init_log() instead of explicitly
setting it in advance and add another parameter to log_init() to allow
calling ppl_init_log() without the bit set. Don't try to calculate
partial parity or add a stripe to log if it does not have ppl_page set.

Enabling ppl can now be performed without suspending the mddev, because
the log won't be used until new stripes are allocated with ppl_page.
Calling mddev_suspend/resume is still necessary when disabling ppl,
because we want all stripes to finish before stopping the log, but
resize_stripes() can be called after mddev_resume() when ppl is no
longer active.

Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5-log.h |  5 +--
 drivers/md/raid5-ppl.c |  3 +-
 drivers/md/raid5.c     | 88 ++++++++++++++++++++++----------------------------
 3 files changed, 43 insertions(+), 53 deletions(-)

diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index 738930ff5d17..27097101ccca 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -93,11 +93,12 @@ static inline void log_exit(struct r5conf *conf)
 		ppl_exit_log(conf);
 }
 
-static inline int log_init(struct r5conf *conf, struct md_rdev *journal_dev)
+static inline int log_init(struct r5conf *conf, struct md_rdev *journal_dev,
+			   bool ppl)
 {
 	if (journal_dev)
 		return r5l_init_log(conf, journal_dev);
-	else if (raid5_has_ppl(conf))
+	else if (ppl)
 		return ppl_init_log(conf);
 
 	return 0;
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 0a39a6bbcbde..e938669810c4 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -355,7 +355,7 @@ int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh)
 	struct ppl_io_unit *io = sh->ppl_io;
 	struct ppl_log *log;
 
-	if (io || test_bit(STRIPE_SYNCING, &sh->state) ||
+	if (io || test_bit(STRIPE_SYNCING, &sh->state) || !sh->ppl_page ||
 	    !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) ||
 	    !test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) {
 		clear_bit(STRIPE_LOG_TRAPPED, &sh->state);
@@ -1223,6 +1223,7 @@ int ppl_init_log(struct r5conf *conf)
 	}
 
 	conf->log_private = ppl_conf;
+	set_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
 
 	return 0;
 err:
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6036d5e41ddd..9cab2fe078c2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -471,11 +471,6 @@ static void shrink_buffers(struct stripe_head *sh)
 		sh->dev[i].page = NULL;
 		put_page(p);
 	}
-
-	if (sh->ppl_page) {
-		put_page(sh->ppl_page);
-		sh->ppl_page = NULL;
-	}
 }
 
 static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
@@ -493,12 +488,6 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
 		sh->dev[i].orig_page = page;
 	}
 
-	if (raid5_has_ppl(sh->raid_conf)) {
-		sh->ppl_page = alloc_page(gfp);
-		if (!sh->ppl_page)
-			return 1;
-	}
-
 	return 0;
 }
 
@@ -2132,8 +2121,15 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 	put_cpu();
 }
 
+static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
+{
+	if (sh->ppl_page)
+		__free_page(sh->ppl_page);
+	kmem_cache_free(sc, sh);
+}
+
 static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
-	int disks)
+	int disks, struct r5conf *conf)
 {
 	struct stripe_head *sh;
 	int i;
@@ -2147,6 +2143,7 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 		INIT_LIST_HEAD(&sh->r5c);
 		INIT_LIST_HEAD(&sh->log_list);
 		atomic_set(&sh->count, 1);
+		sh->raid_conf = conf;
 		sh->log_start = MaxSector;
 		for (i = 0; i < disks; i++) {
 			struct r5dev *dev = &sh->dev[i];
@@ -2154,6 +2151,14 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 			bio_init(&dev->req, &dev->vec, 1);
 			bio_init(&dev->rreq, &dev->rvec, 1);
 		}
+
+		if (raid5_has_ppl(conf)) {
+			sh->ppl_page = alloc_page(gfp);
+			if (!sh->ppl_page) {
+				free_stripe(sc, sh);
+				sh = NULL;
+			}
+		}
 	}
 	return sh;
 }
@@ -2161,15 +2166,13 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 {
 	struct stripe_head *sh;
 
-	sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
+	sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf);
 	if (!sh)
 		return 0;
 
-	sh->raid_conf = conf;
-
 	if (grow_buffers(sh, gfp)) {
 		shrink_buffers(sh);
-		kmem_cache_free(conf->slab_cache, sh);
+		free_stripe(conf->slab_cache, sh);
 		return 0;
 	}
 	sh->hash_lock_index =
@@ -2314,9 +2317,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 	int i;
 	int hash, cnt;
 
-	if (newsize <= conf->pool_size)
-		return 0; /* never bother to shrink */
-
 	err = md_allow_write(conf->mddev);
 	if (err)
 		return err;
@@ -2332,11 +2332,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 	mutex_lock(&conf->cache_size_mutex);
 
 	for (i = conf->max_nr_stripes; i; i--) {
-		nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
+		nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf);
 		if (!nsh)
 			break;
 
-		nsh->raid_conf = conf;
 		list_add(&nsh->lru, &newstripes);
 	}
 	if (i) {
@@ -2344,7 +2343,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 		while (!list_empty(&newstripes)) {
 			nsh = list_entry(newstripes.next, struct stripe_head, lru);
 			list_del(&nsh->lru);
-			kmem_cache_free(sc, nsh);
+			free_stripe(sc, nsh);
 		}
 		kmem_cache_destroy(sc);
 		mutex_unlock(&conf->cache_size_mutex);
@@ -2370,7 +2369,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 			nsh->dev[i].orig_page = osh->dev[i].page;
 		}
 		nsh->hash_lock_index = hash;
-		kmem_cache_free(conf->slab_cache, osh);
+		free_stripe(conf->slab_cache, osh);
 		cnt++;
 		if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
 		    !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
@@ -2445,7 +2444,7 @@ static int drop_one_stripe(struct r5conf *conf)
 		return 0;
 	BUG_ON(atomic_read(&sh->count));
 	shrink_buffers(sh);
-	kmem_cache_free(conf->slab_cache, sh);
+	free_stripe(conf->slab_cache, sh);
 	atomic_dec(&conf->active_stripes);
 	conf->max_nr_stripes--;
 	return 1;
@@ -3168,7 +3167,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 		s->locked++;
 	}
 
-	if (raid5_has_ppl(sh->raid_conf) &&
+	if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page &&
 	    test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) &&
 	    !test_bit(STRIPE_FULL_WRITE, &sh->state) &&
 	    test_bit(R5_Insync, &sh->dev[pd_idx].flags))
@@ -7414,7 +7413,7 @@ static int raid5_run(struct mddev *mddev)
 		blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
 	}
 
-	if (log_init(conf, journal_dev))
+	if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
 		goto abort;
 
 	return 0;
@@ -7623,7 +7622,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 		 * The array is in readonly mode if journal is missing, so no
 		 * write requests running. We should be safe
 		 */
-		log_init(conf, rdev);
+		log_init(conf, rdev, false);
 		return 0;
 	}
 	if (mddev->recovery_disabled == conf->recovery_disabled)
@@ -7773,6 +7772,9 @@ static int check_reshape(struct mddev *mddev)
 				      mddev->chunk_sectors)
 			    ) < 0)
 			return -ENOMEM;
+
+	if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size)
+		return 0; /* never bother to shrink */
 	return resize_stripes(conf, (conf->previous_raid_disks
 				     + mddev->delta_disks));
 }
@@ -8263,20 +8265,6 @@ static void *raid6_takeover(struct mddev *mddev)
 	return setup_conf(mddev);
 }
 
-static void raid5_reset_stripe_cache(struct mddev *mddev)
-{
-	struct r5conf *conf = mddev->private;
-
-	mutex_lock(&conf->cache_size_mutex);
-	while (conf->max_nr_stripes &&
-	       drop_one_stripe(conf))
-		;
-	while (conf->min_nr_stripes > conf->max_nr_stripes &&
-	       grow_one_stripe(conf, GFP_KERNEL))
-		;
-	mutex_unlock(&conf->cache_size_mutex);
-}
-
 static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
 {
 	struct r5conf *conf;
@@ -8291,23 +8279,23 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
 		return -ENODEV;
 	}
 
-	if (strncmp(buf, "ppl", 3) == 0 && !raid5_has_ppl(conf)) {
+	if (strncmp(buf, "ppl", 3) == 0) {
 		/* ppl only works with RAID 5 */
-		if (conf->level == 5) {
-			mddev_suspend(mddev);
-			set_bit(MD_HAS_PPL, &mddev->flags);
-			err = log_init(conf, NULL);
-			if (!err)
-				raid5_reset_stripe_cache(mddev);
-			mddev_resume(mddev);
+		if (!raid5_has_ppl(conf) && conf->level == 5) {
+			err = log_init(conf, NULL, true);
+			if (!err) {
+				err = resize_stripes(conf, conf->pool_size);
+				if (err)
+					log_exit(conf);
+			}
 		} else
 			err = -EINVAL;
 	} else if (strncmp(buf, "resync", 6) == 0) {
 		if (raid5_has_ppl(conf)) {
 			mddev_suspend(mddev);
 			log_exit(conf);
-			raid5_reset_stripe_cache(mddev);
 			mddev_resume(mddev);
+			err = resize_stripes(conf, conf->pool_size);
 		} else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) &&
 			   r5l_log_disk_error(conf)) {
 			bool journal_dev_exists = false;
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/4] raid5-ppl: partial parity calculation optimization
  2017-04-04 11:13 [PATCH 0/4] PPL fixes and improvements Artur Paszkiewicz
                   ` (2 preceding siblings ...)
  2017-04-04 11:13 ` [PATCH 3/4] raid5-ppl: use resize_stripes() when enabling or disabling ppl Artur Paszkiewicz
@ 2017-04-04 11:13 ` Artur Paszkiewicz
  3 siblings, 0 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-04 11:13 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, Artur Paszkiewicz

In case of read-modify-write, partial partity is the same as the result
of ops_run_prexor5(), so we can just copy sh->dev[pd_idx].page into
sh->ppl_page instead of calculating it again.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5-ppl.c | 20 ++++++++++----------
 drivers/md/raid5.c     |  6 +++---
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index e938669810c4..6fef999f2150 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -152,7 +152,7 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
 		       struct dma_async_tx_descriptor *tx)
 {
 	int disks = sh->disks;
-	struct page **xor_srcs = flex_array_get(percpu->scribble, 0);
+	struct page **srcs = flex_array_get(percpu->scribble, 0);
 	int count = 0, pd_idx = sh->pd_idx, i;
 	struct async_submit_ctl submit;
 
@@ -165,18 +165,18 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
 	 * differently.
 	 */
 	if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
-		/* rmw: xor old data and parity from updated disks */
-		for (i = disks; i--;) {
-			struct r5dev *dev = &sh->dev[i];
-			if (test_bit(R5_Wantdrain, &dev->flags) || i == pd_idx)
-				xor_srcs[count++] = dev->page;
-		}
+		/*
+		 * rmw: xor old data and parity from updated disks
+		 * This is calculated earlier by ops_run_prexor5() so just copy
+		 * the parity dev page.
+		 */
+		srcs[count++] = sh->dev[pd_idx].page;
 	} else if (sh->reconstruct_state == reconstruct_state_drain_run) {
 		/* rcw: xor data from all not updated disks */
 		for (i = disks; i--;) {
 			struct r5dev *dev = &sh->dev[i];
 			if (test_bit(R5_UPTODATE, &dev->flags))
-				xor_srcs[count++] = dev->page;
+				srcs[count++] = dev->page;
 		}
 	} else {
 		return tx;
@@ -187,10 +187,10 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
 			  + sizeof(struct page *) * (sh->disks + 2));
 
 	if (count == 1)
-		tx = async_memcpy(sh->ppl_page, xor_srcs[0], 0, 0, PAGE_SIZE,
+		tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE,
 				  &submit);
 	else
-		tx = async_xor(sh->ppl_page, xor_srcs, 0, count, PAGE_SIZE,
+		tx = async_xor(sh->ppl_page, srcs, 0, count, PAGE_SIZE,
 			       &submit);
 
 	return tx;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9cab2fe078c2..f99a12f50f9a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2079,9 +2079,6 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 			async_tx_ack(tx);
 	}
 
-	if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request))
-		tx = ops_run_partial_parity(sh, percpu, tx);
-
 	if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
 		if (level < 6)
 			tx = ops_run_prexor5(sh, percpu, tx);
@@ -2089,6 +2086,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 			tx = ops_run_prexor6(sh, percpu, tx);
 	}
 
+	if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request))
+		tx = ops_run_partial_parity(sh, percpu, tx);
+
 	if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
 		tx = ops_run_biodrain(sh, tx);
 		overlap_clear++;
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-04 11:13 ` [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page Artur Paszkiewicz
@ 2017-04-10 19:09   ` Shaohua Li
  2017-04-11  8:28     ` Artur Paszkiewicz
  0 siblings, 1 reply; 11+ messages in thread
From: Shaohua Li @ 2017-04-10 19:09 UTC (permalink / raw)
  To: Artur Paszkiewicz; +Cc: linux-raid, songliubraving


On Tue, Apr 04, 2017 at 01:13:55PM +0200, Artur Paszkiewicz wrote:

Cc: Song, the raid5-cache needs similar fix.

> Allocate both struct ppl_io_unit and its header_page from a shared
> mempool to avoid a possible deadlock. Implement allocate and free
> functions for the mempool, remove the second pool for allocating
> header_page. The header_pages are now freed with their io_units, not
> when the ppl bio completes. Also, use GFP_NOWAIT instead of GFP_ATOMIC
> for allocating ppl_io_unit because we can handle failed allocations and
> there is no reason to utilize emergency reserves.

I applied the last 3 patches, had some nitpicks for this one though

> Suggested-by: NeilBrown <neilb@suse.com>
> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
> ---
>  drivers/md/raid5-ppl.c | 53 ++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 36 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
> index 86ea9addb51a..42e43467d1e8 100644
> --- a/drivers/md/raid5-ppl.c
> +++ b/drivers/md/raid5-ppl.c
> @@ -102,7 +102,6 @@ struct ppl_conf {
>  	struct kmem_cache *io_kc;
>  	mempool_t *io_pool;
>  	struct bio_set *bs;
> -	mempool_t *meta_pool;
>  
>  	/* used only for recovery */
>  	int recovered_entries;
> @@ -195,6 +194,33 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
>  	return tx;
>  }
>  
> +static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
> +{
> +	struct kmem_cache *kc = pool_data;
> +	struct ppl_io_unit *io;
> +
> +	io = kmem_cache_alloc(kc, gfp_mask);
> +	if (!io)
> +		return NULL;
> +
> +	io->header_page = alloc_page(gfp_mask);
> +	if (!io->header_page) {
> +		kmem_cache_free(kc, io);
> +		return NULL;
> +	}
> +
> +	return io;

Maybe directly use GFP_NOWAIT here, we don't use other gfp

> +}
> +
> +static void ppl_io_pool_free(void *element, void *pool_data)
> +{
> +	struct kmem_cache *kc = pool_data;
> +	struct ppl_io_unit *io = element;
> +
> +	__free_page(io->header_page);
> +	kmem_cache_free(kc, io);
> +}
> +
>  static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>  					  struct stripe_head *sh)
>  {
> @@ -202,18 +228,19 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>  	struct ppl_io_unit *io;
>  	struct ppl_header *pplhdr;
>  
> -	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
> +	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
>  	if (!io)
>  		return NULL;
>  
> -	memset(io, 0, sizeof(*io));
>  	io->log = log;
> +	io->entries_count = 0;
> +	io->pp_size = 0;
> +	io->submitted = false;

I'd suggest moving the memset to ppl_io_pool_alloc. Don't think we need to
optimize to avoid setting header_page. And doing memset is less error prone,
for example adding new fields.

Otherwise looks quite good.

Thanks,
Shaohua
>  	INIT_LIST_HEAD(&io->log_sibling);
>  	INIT_LIST_HEAD(&io->stripe_list);
>  	atomic_set(&io->pending_stripes, 0);
>  	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
>  
> -	io->header_page = mempool_alloc(ppl_conf->meta_pool, GFP_NOIO);
>  	pplhdr = page_address(io->header_page);
>  	clear_page(pplhdr);
>  	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
> @@ -369,8 +396,6 @@ static void ppl_log_endio(struct bio *bio)
>  	if (bio->bi_error)
>  		md_error(ppl_conf->mddev, log->rdev);
>  
> -	mempool_free(io->header_page, ppl_conf->meta_pool);
> -
>  	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
>  		list_del_init(&sh->log_list);
>  
> @@ -998,7 +1023,6 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
>  
>  	kfree(ppl_conf->child_logs);
>  
> -	mempool_destroy(ppl_conf->meta_pool);
>  	if (ppl_conf->bs)
>  		bioset_free(ppl_conf->bs);
>  	mempool_destroy(ppl_conf->io_pool);
> @@ -1104,25 +1128,20 @@ int ppl_init_log(struct r5conf *conf)
>  
>  	ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
>  	if (!ppl_conf->io_kc) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
> -	ppl_conf->io_pool = mempool_create_slab_pool(conf->raid_disks, ppl_conf->io_kc);
> +	ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc,
> +					   ppl_io_pool_free, ppl_conf->io_kc);
>  	if (!ppl_conf->io_pool) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
>  	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
>  	if (!ppl_conf->bs) {
> -		ret = -EINVAL;
> -		goto err;
> -	}
> -
> -	ppl_conf->meta_pool = mempool_create_page_pool(conf->raid_disks, 0);
> -	if (!ppl_conf->meta_pool) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
> -- 
> 2.11.0
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-10 19:09   ` Shaohua Li
@ 2017-04-11  8:28     ` Artur Paszkiewicz
  2017-04-11  9:20       ` Artur Paszkiewicz
  0 siblings, 1 reply; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-11  8:28 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, songliubraving

On 04/10/2017 09:09 PM, Shaohua Li wrote:
>> +static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
>> +{
>> +	struct kmem_cache *kc = pool_data;
>> +	struct ppl_io_unit *io;
>> +
>> +	io = kmem_cache_alloc(kc, gfp_mask);
>> +	if (!io)
>> +		return NULL;
>> +
>> +	io->header_page = alloc_page(gfp_mask);
>> +	if (!io->header_page) {
>> +		kmem_cache_free(kc, io);
>> +		return NULL;
>> +	}
>> +
>> +	return io;
> 
> Maybe directly use GFP_NOWAIT here, we don't use other gfp

Do you mean ignore the gfp_mask parameter? I don't think we should do
that. It is provided by the mempool implementation. We only use
GFP_NOWAIT for explicit allocations, but GFP_KERNEL is used for
allocating initial items in mempool_create().

>> +}
>> +
>> +static void ppl_io_pool_free(void *element, void *pool_data)
>> +{
>> +	struct kmem_cache *kc = pool_data;
>> +	struct ppl_io_unit *io = element;
>> +
>> +	__free_page(io->header_page);
>> +	kmem_cache_free(kc, io);
>> +}
>> +
>>  static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>>  					  struct stripe_head *sh)
>>  {
>> @@ -202,18 +228,19 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>>  	struct ppl_io_unit *io;
>>  	struct ppl_header *pplhdr;
>>  
>> -	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
>> +	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
>>  	if (!io)
>>  		return NULL;
>>  
>> -	memset(io, 0, sizeof(*io));
>>  	io->log = log;
>> +	io->entries_count = 0;
>> +	io->pp_size = 0;
>> +	io->submitted = false;
> 
> I'd suggest moving the memset to ppl_io_pool_alloc. Don't think we need to
> optimize to avoid setting header_page. And doing memset is less error prone,
> for example adding new fields.

OK, I'll change this and resend.

Thanks,
Artur

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-11  8:28     ` Artur Paszkiewicz
@ 2017-04-11  9:20       ` Artur Paszkiewicz
  2017-04-11 15:41         ` Shaohua Li
  0 siblings, 1 reply; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-11  9:20 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, songliubraving

On 04/11/2017 10:28 AM, Artur Paszkiewicz wrote:
> On 04/10/2017 09:09 PM, Shaohua Li wrote:
>>> +static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
>>> +{
>>> +	struct kmem_cache *kc = pool_data;
>>> +	struct ppl_io_unit *io;
>>> +
>>> +	io = kmem_cache_alloc(kc, gfp_mask);
>>> +	if (!io)
>>> +		return NULL;
>>> +
>>> +	io->header_page = alloc_page(gfp_mask);
>>> +	if (!io->header_page) {
>>> +		kmem_cache_free(kc, io);
>>> +		return NULL;
>>> +	}
>>> +
>>> +	return io;
>>
>> Maybe directly use GFP_NOWAIT here, we don't use other gfp
> 
> Do you mean ignore the gfp_mask parameter? I don't think we should do
> that. It is provided by the mempool implementation. We only use
> GFP_NOWAIT for explicit allocations, but GFP_KERNEL is used for
> allocating initial items in mempool_create().
> 
>>> +}
>>> +
>>> +static void ppl_io_pool_free(void *element, void *pool_data)
>>> +{
>>> +	struct kmem_cache *kc = pool_data;
>>> +	struct ppl_io_unit *io = element;
>>> +
>>> +	__free_page(io->header_page);
>>> +	kmem_cache_free(kc, io);
>>> +}
>>> +
>>>  static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>>>  					  struct stripe_head *sh)
>>>  {
>>> @@ -202,18 +228,19 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>>>  	struct ppl_io_unit *io;
>>>  	struct ppl_header *pplhdr;
>>>  
>>> -	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
>>> +	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
>>>  	if (!io)
>>>  		return NULL;
>>>  
>>> -	memset(io, 0, sizeof(*io));
>>>  	io->log = log;
>>> +	io->entries_count = 0;
>>> +	io->pp_size = 0;
>>> +	io->submitted = false;
>>
>> I'd suggest moving the memset to ppl_io_pool_alloc. Don't think we need to
>> optimize to avoid setting header_page. And doing memset is less error prone,
>> for example adding new fields.
> 
> OK, I'll change this and resend.

Well, on second thought, actually I can't move the memset to
ppl_io_pool_alloc. Mempool can use the preallocated elements, which are
then returned back to the pool after calling mempool_free() and can be
reused later. So a ppl_io_unit must be initialized after retrieving it
from mempool_alloc(). I can leave the memset in ppl_new_iounit() but
io->header_page pointer will have to be temporarily copied before
zeroing the iounit and then set back. Is this ok?

Thanks,
Artur

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-11  9:20       ` Artur Paszkiewicz
@ 2017-04-11 15:41         ` Shaohua Li
  2017-04-11 18:50           ` [PATCH v2] " Artur Paszkiewicz
  0 siblings, 1 reply; 11+ messages in thread
From: Shaohua Li @ 2017-04-11 15:41 UTC (permalink / raw)
  To: Artur Paszkiewicz; +Cc: linux-raid, songliubraving

On Tue, Apr 11, 2017 at 11:20:13AM +0200, Artur Paszkiewicz wrote:
> On 04/11/2017 10:28 AM, Artur Paszkiewicz wrote:
> > On 04/10/2017 09:09 PM, Shaohua Li wrote:
> >>> +static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
> >>> +{
> >>> +	struct kmem_cache *kc = pool_data;
> >>> +	struct ppl_io_unit *io;
> >>> +
> >>> +	io = kmem_cache_alloc(kc, gfp_mask);
> >>> +	if (!io)
> >>> +		return NULL;
> >>> +
> >>> +	io->header_page = alloc_page(gfp_mask);
> >>> +	if (!io->header_page) {
> >>> +		kmem_cache_free(kc, io);
> >>> +		return NULL;
> >>> +	}
> >>> +
> >>> +	return io;
> >>
> >> Maybe directly use GFP_NOWAIT here, we don't use other gfp
> > 
> > Do you mean ignore the gfp_mask parameter? I don't think we should do
> > that. It is provided by the mempool implementation. We only use
> > GFP_NOWAIT for explicit allocations, but GFP_KERNEL is used for
> > allocating initial items in mempool_create().
> > 
> >>> +}
> >>> +
> >>> +static void ppl_io_pool_free(void *element, void *pool_data)
> >>> +{
> >>> +	struct kmem_cache *kc = pool_data;
> >>> +	struct ppl_io_unit *io = element;
> >>> +
> >>> +	__free_page(io->header_page);
> >>> +	kmem_cache_free(kc, io);
> >>> +}
> >>> +
> >>>  static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
> >>>  					  struct stripe_head *sh)
> >>>  {
> >>> @@ -202,18 +228,19 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
> >>>  	struct ppl_io_unit *io;
> >>>  	struct ppl_header *pplhdr;
> >>>  
> >>> -	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
> >>> +	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
> >>>  	if (!io)
> >>>  		return NULL;
> >>>  
> >>> -	memset(io, 0, sizeof(*io));
> >>>  	io->log = log;
> >>> +	io->entries_count = 0;
> >>> +	io->pp_size = 0;
> >>> +	io->submitted = false;
> >>
> >> I'd suggest moving the memset to ppl_io_pool_alloc. Don't think we need to
> >> optimize to avoid setting header_page. And doing memset is less error prone,
> >> for example adding new fields.
> > 
> > OK, I'll change this and resend.
> 
> Well, on second thought, actually I can't move the memset to
> ppl_io_pool_alloc. Mempool can use the preallocated elements, which are
> then returned back to the pool after calling mempool_free() and can be
> reused later. So a ppl_io_unit must be initialized after retrieving it
> from mempool_alloc(). I can leave the memset in ppl_new_iounit() but
> io->header_page pointer will have to be temporarily copied before
> zeroing the iounit and then set back. Is this ok?

Yes, that's preferred.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-11 15:41         ` Shaohua Li
@ 2017-04-11 18:50           ` Artur Paszkiewicz
  2017-04-11 21:58             ` Shaohua Li
  0 siblings, 1 reply; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-11 18:50 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, songliubraving, Artur Paszkiewicz

Allocate both struct ppl_io_unit and its header_page from a shared
mempool to avoid a possible deadlock. Implement allocate and free
functions for the mempool, remove the second pool for allocating
header_page. The header_pages are now freed with their io_units, not
when the ppl bio completes. Also, use GFP_NOWAIT instead of GFP_ATOMIC
for allocating ppl_io_unit because we can handle failed allocations and
there is no reason to utilize emergency reserves.

Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5-ppl.c | 53 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 4eb0ebcf9c29..5d25bebf3328 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -102,7 +102,6 @@ struct ppl_conf {
 	struct kmem_cache *io_kc;
 	mempool_t *io_pool;
 	struct bio_set *bs;
-	mempool_t *meta_pool;
 
 	/* used only for recovery */
 	int recovered_entries;
@@ -197,25 +196,55 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
 	return tx;
 }
 
+static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
+{
+	struct kmem_cache *kc = pool_data;
+	struct ppl_io_unit *io;
+
+	io = kmem_cache_alloc(kc, gfp_mask);
+	if (!io)
+		return NULL;
+
+	io->header_page = alloc_page(gfp_mask);
+	if (!io->header_page) {
+		kmem_cache_free(kc, io);
+		return NULL;
+	}
+
+	return io;
+}
+
+static void ppl_io_pool_free(void *element, void *pool_data)
+{
+	struct kmem_cache *kc = pool_data;
+	struct ppl_io_unit *io = element;
+
+	__free_page(io->header_page);
+	kmem_cache_free(kc, io);
+}
+
 static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
 					  struct stripe_head *sh)
 {
 	struct ppl_conf *ppl_conf = log->ppl_conf;
 	struct ppl_io_unit *io;
 	struct ppl_header *pplhdr;
+	struct page *header_page;
 
-	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
+	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
 	if (!io)
 		return NULL;
 
+	header_page = io->header_page;
 	memset(io, 0, sizeof(*io));
+	io->header_page = header_page;
+
 	io->log = log;
 	INIT_LIST_HEAD(&io->log_sibling);
 	INIT_LIST_HEAD(&io->stripe_list);
 	atomic_set(&io->pending_stripes, 0);
 	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
 
-	io->header_page = mempool_alloc(ppl_conf->meta_pool, GFP_NOIO);
 	pplhdr = page_address(io->header_page);
 	clear_page(pplhdr);
 	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
@@ -371,8 +400,6 @@ static void ppl_log_endio(struct bio *bio)
 	if (bio->bi_error)
 		md_error(ppl_conf->mddev, log->rdev);
 
-	mempool_free(io->header_page, ppl_conf->meta_pool);
-
 	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
 		list_del_init(&sh->log_list);
 
@@ -1007,7 +1034,6 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
 
 	kfree(ppl_conf->child_logs);
 
-	mempool_destroy(ppl_conf->meta_pool);
 	if (ppl_conf->bs)
 		bioset_free(ppl_conf->bs);
 	mempool_destroy(ppl_conf->io_pool);
@@ -1113,25 +1139,20 @@ int ppl_init_log(struct r5conf *conf)
 
 	ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
 	if (!ppl_conf->io_kc) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
-	ppl_conf->io_pool = mempool_create_slab_pool(conf->raid_disks, ppl_conf->io_kc);
+	ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc,
+					   ppl_io_pool_free, ppl_conf->io_kc);
 	if (!ppl_conf->io_pool) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
 	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
 	if (!ppl_conf->bs) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	ppl_conf->meta_pool = mempool_create_page_pool(conf->raid_disks, 0);
-	if (!ppl_conf->meta_pool) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-11 18:50           ` [PATCH v2] " Artur Paszkiewicz
@ 2017-04-11 21:58             ` Shaohua Li
  0 siblings, 0 replies; 11+ messages in thread
From: Shaohua Li @ 2017-04-11 21:58 UTC (permalink / raw)
  To: Artur Paszkiewicz; +Cc: linux-raid, songliubraving

On Tue, Apr 11, 2017 at 08:50:51PM +0200, Artur Paszkiewicz wrote:
> Allocate both struct ppl_io_unit and its header_page from a shared
> mempool to avoid a possible deadlock. Implement allocate and free
> functions for the mempool, remove the second pool for allocating
> header_page. The header_pages are now freed with their io_units, not
> when the ppl bio completes. Also, use GFP_NOWAIT instead of GFP_ATOMIC
> for allocating ppl_io_unit because we can handle failed allocations and
> there is no reason to utilize emergency reserves.
> 
> Suggested-by: NeilBrown <neilb@suse.com>
> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>

applied, thanks!
> ---
>  drivers/md/raid5-ppl.c | 53 +++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 37 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
> index 4eb0ebcf9c29..5d25bebf3328 100644
> --- a/drivers/md/raid5-ppl.c
> +++ b/drivers/md/raid5-ppl.c
> @@ -102,7 +102,6 @@ struct ppl_conf {
>  	struct kmem_cache *io_kc;
>  	mempool_t *io_pool;
>  	struct bio_set *bs;
> -	mempool_t *meta_pool;
>  
>  	/* used only for recovery */
>  	int recovered_entries;
> @@ -197,25 +196,55 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
>  	return tx;
>  }
>  
> +static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
> +{
> +	struct kmem_cache *kc = pool_data;
> +	struct ppl_io_unit *io;
> +
> +	io = kmem_cache_alloc(kc, gfp_mask);
> +	if (!io)
> +		return NULL;
> +
> +	io->header_page = alloc_page(gfp_mask);
> +	if (!io->header_page) {
> +		kmem_cache_free(kc, io);
> +		return NULL;
> +	}
> +
> +	return io;
> +}
> +
> +static void ppl_io_pool_free(void *element, void *pool_data)
> +{
> +	struct kmem_cache *kc = pool_data;
> +	struct ppl_io_unit *io = element;
> +
> +	__free_page(io->header_page);
> +	kmem_cache_free(kc, io);
> +}
> +
>  static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>  					  struct stripe_head *sh)
>  {
>  	struct ppl_conf *ppl_conf = log->ppl_conf;
>  	struct ppl_io_unit *io;
>  	struct ppl_header *pplhdr;
> +	struct page *header_page;
>  
> -	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
> +	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
>  	if (!io)
>  		return NULL;
>  
> +	header_page = io->header_page;
>  	memset(io, 0, sizeof(*io));
> +	io->header_page = header_page;
> +
>  	io->log = log;
>  	INIT_LIST_HEAD(&io->log_sibling);
>  	INIT_LIST_HEAD(&io->stripe_list);
>  	atomic_set(&io->pending_stripes, 0);
>  	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
>  
> -	io->header_page = mempool_alloc(ppl_conf->meta_pool, GFP_NOIO);
>  	pplhdr = page_address(io->header_page);
>  	clear_page(pplhdr);
>  	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
> @@ -371,8 +400,6 @@ static void ppl_log_endio(struct bio *bio)
>  	if (bio->bi_error)
>  		md_error(ppl_conf->mddev, log->rdev);
>  
> -	mempool_free(io->header_page, ppl_conf->meta_pool);
> -
>  	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
>  		list_del_init(&sh->log_list);
>  
> @@ -1007,7 +1034,6 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
>  
>  	kfree(ppl_conf->child_logs);
>  
> -	mempool_destroy(ppl_conf->meta_pool);
>  	if (ppl_conf->bs)
>  		bioset_free(ppl_conf->bs);
>  	mempool_destroy(ppl_conf->io_pool);
> @@ -1113,25 +1139,20 @@ int ppl_init_log(struct r5conf *conf)
>  
>  	ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
>  	if (!ppl_conf->io_kc) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
> -	ppl_conf->io_pool = mempool_create_slab_pool(conf->raid_disks, ppl_conf->io_kc);
> +	ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc,
> +					   ppl_io_pool_free, ppl_conf->io_kc);
>  	if (!ppl_conf->io_pool) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
>  	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
>  	if (!ppl_conf->bs) {
> -		ret = -EINVAL;
> -		goto err;
> -	}
> -
> -	ppl_conf->meta_pool = mempool_create_page_pool(conf->raid_disks, 0);
> -	if (!ppl_conf->meta_pool) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
> -- 
> 2.11.0
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-04-11 21:58 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-04 11:13 [PATCH 0/4] PPL fixes and improvements Artur Paszkiewicz
2017-04-04 11:13 ` [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page Artur Paszkiewicz
2017-04-10 19:09   ` Shaohua Li
2017-04-11  8:28     ` Artur Paszkiewicz
2017-04-11  9:20       ` Artur Paszkiewicz
2017-04-11 15:41         ` Shaohua Li
2017-04-11 18:50           ` [PATCH v2] " Artur Paszkiewicz
2017-04-11 21:58             ` Shaohua Li
2017-04-04 11:13 ` [PATCH 2/4] raid5-ppl: move no_mem_stripes to struct ppl_conf Artur Paszkiewicz
2017-04-04 11:13 ` [PATCH 3/4] raid5-ppl: use resize_stripes() when enabling or disabling ppl Artur Paszkiewicz
2017-04-04 11:13 ` [PATCH 4/4] raid5-ppl: partial parity calculation optimization Artur Paszkiewicz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.