All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] MD: change journal disk role to disk 0
@ 2015-12-16 21:13 Shaohua Li
  2015-12-16 21:13 ` [PATCH 2/2] raid5-cache: add journal hot add/remove support Shaohua Li
  0 siblings, 1 reply; 3+ messages in thread
From: Shaohua Li @ 2015-12-16 21:13 UTC (permalink / raw)
  To: linux-raid; +Cc: Kernel-team, songliubraving, hch, neilb

Neil pointed out setting journal disk role to raid_disks will confuse
reshape if we support reshape eventually. Switching the role to 0 (we
should be fine as long as the value >=0) and skip sysfs file creation to
avoid error.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 2 +-
 drivers/md/md.h | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 807095f..874c843 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1652,7 +1652,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
 			rdev->journal_tail = le64_to_cpu(sb->journal_tail);
 			if (mddev->recovery_cp == MaxSector)
 				set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
-			rdev->raid_disk = mddev->raid_disks;
+			rdev->raid_disk = 0;
 			break;
 		default:
 			rdev->saved_raid_disk = role;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 2bea51e..ca0b643 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -566,7 +566,9 @@ static inline char * mdname (struct mddev * mddev)
 static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char nm[20];
-	if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
+	if (!test_bit(Replacement, &rdev->flags) &&
+	    !test_bit(Journal, &rdev->flags) &&
+	    mddev->kobj.sd) {
 		sprintf(nm, "rd%d", rdev->raid_disk);
 		return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
 	} else
@@ -576,7 +578,9 @@ static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
 static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char nm[20];
-	if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
+	if (!test_bit(Replacement, &rdev->flags) &&
+	    !test_bit(Journal, &rdev->flags) &&
+	    mddev->kobj.sd) {
 		sprintf(nm, "rd%d", rdev->raid_disk);
 		sysfs_remove_link(&mddev->kobj, nm);
 	}
-- 
2.4.6


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] raid5-cache: add journal hot add/remove support
  2015-12-16 21:13 [PATCH 1/2] MD: change journal disk role to disk 0 Shaohua Li
@ 2015-12-16 21:13 ` Shaohua Li
  2015-12-16 22:50   ` NeilBrown
  0 siblings, 1 reply; 3+ messages in thread
From: Shaohua Li @ 2015-12-16 21:13 UTC (permalink / raw)
  To: linux-raid; +Cc: Kernel-team, songliubraving, hch, neilb

Add support for journal disk hot add/remove. Mostly trival checks in md
part. The raid5 part is a little tricky. For hot-remove, we can't wait
pending write as it's called from raid5d. The wait will cause deadlock.
We simplily fail the hot-remove. A hot-remove retry can success
eventually since if journal disk is faulty all pending write will be
failed and finish. For hot-add, since an array supporting journal but
without journal disk will be marked read-only, we are safe to hot add
journal without stopping IO (should be read IO, while journal only
handles write IO).

Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c          | 42 ++++++++++++++++++++++++++++++------------
 drivers/md/raid5-cache.c | 16 ++++++++++++----
 drivers/md/raid5.c       | 34 ++++++++++++++++++++++++++--------
 3 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 874c843..7e0a7c6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2052,8 +2052,9 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
 		return -EEXIST;
 
 	/* make sure rdev->sectors exceeds mddev->dev_sectors */
-	if (rdev->sectors && (mddev->dev_sectors == 0 ||
-			rdev->sectors < mddev->dev_sectors)) {
+	if (!test_bit(Journal, &rdev->flags) &&
+	    rdev->sectors &&
+	    (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
 		if (mddev->pers) {
 			/* Cannot change size, so fail
 			 * If mddev->level <= 0, then we don't care
@@ -2084,7 +2085,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
 		}
 	}
 	rcu_read_unlock();
-	if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
+	if (!test_bit(Journal, &rdev->flags) &&
+	    mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
 		printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
 		       mdname(mddev), mddev->max_disks);
 		return -EBUSY;
@@ -6031,8 +6033,23 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
 		else
 			clear_bit(WriteMostly, &rdev->flags);
 
-		if (info->state & (1<<MD_DISK_JOURNAL))
+		if (info->state & (1<<MD_DISK_JOURNAL)) {
+			struct md_rdev *rdev2;
+			bool has_journal = false;
+
+			/* make sure no existing journal disk */
+			rdev_for_each(rdev2, mddev) {
+				if (test_bit(Journal, &rdev2->flags)) {
+					has_journal = true;
+					break;
+				}
+			}
+			if (has_journal) {
+				export_rdev(rdev);
+				return -EBUSY;
+			}
 			set_bit(Journal, &rdev->flags);
+		}
 		/*
 		 * check whether the device shows up in other nodes
 		 */
@@ -8162,19 +8179,20 @@ static int remove_and_add_spares(struct mddev *mddev,
 			continue;
 		if (test_bit(Faulty, &rdev->flags))
 			continue;
-		if (test_bit(Journal, &rdev->flags))
-			continue;
-		if (mddev->ro &&
-		    ! (rdev->saved_raid_disk >= 0 &&
-		       !test_bit(Bitmap_sync, &rdev->flags)))
-			continue;
+		if (!test_bit(Journal, &rdev->flags)) {
+			if (mddev->ro &&
+			    ! (rdev->saved_raid_disk >= 0 &&
+			       !test_bit(Bitmap_sync, &rdev->flags)))
+				continue;
 
-		rdev->recovery_offset = 0;
+			rdev->recovery_offset = 0;
+		}
 		if (mddev->pers->
 		    hot_add_disk(mddev, rdev) == 0) {
 			if (sysfs_link_rdev(mddev, rdev))
 				/* failure here is OK */;
-			spares++;
+			if (!test_bit(Journal, &rdev->flags))
+				spares++;
 			md_new_event(mddev);
 			set_bit(MD_CHANGE_DEVS, &mddev->flags);
 		}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index b887e04..9cd32c2 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -806,10 +806,18 @@ void r5l_quiesce(struct r5l_log *log, int state)
 
 bool r5l_log_disk_error(struct r5conf *conf)
 {
+	struct r5l_log *log;
+	bool ret;
 	/* don't allow write if journal disk is missing */
-	if (!conf->log)
-		return test_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
-	return test_bit(Faulty, &conf->log->rdev->flags);
+	rcu_read_lock();
+	log = rcu_dereference(conf->log);
+
+	if (!log)
+		ret = test_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
+	else
+		ret = test_bit(Faulty, &log->rdev->flags);
+	rcu_read_unlock();
+	return ret;
 }
 
 struct r5l_recovery_ctx {
@@ -1172,7 +1180,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	if (r5l_load_log(log))
 		goto error;
 
-	conf->log = log;
+	rcu_assign_pointer(conf->log, log);
 	return 0;
 error:
 	md_unregister_thread(&log->reclaim_thread);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 704ef7f..783376b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7141,14 +7141,19 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 	struct disk_info *p = conf->disks + number;
 
 	print_raid5_conf(conf);
-	if (test_bit(Journal, &rdev->flags)) {
+	if (test_bit(Journal, &rdev->flags) && conf->log) {
+		struct r5l_log *log;
 		/*
-		 * journal disk is not removable, but we need give a chance to
-		 * update superblock of other disks. Otherwise journal disk
-		 * will be considered as 'fresh'
+		 * we can't wait pending write here, as this is called in
+		 * raid5d, wait will deadlock.
 		 */
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		return -EINVAL;
+		if (atomic_read(&mddev->writes_pending))
+			return -EBUSY;
+		log = conf->log;
+		conf->log = NULL;
+		synchronize_rcu();
+		r5l_exit_log(log);
+		return 0;
 	}
 	if (rdev == p->rdev)
 		rdevp = &p->rdev;
@@ -7212,8 +7217,21 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 	int first = 0;
 	int last = conf->raid_disks - 1;
 
-	if (test_bit(Journal, &rdev->flags))
-		return -EINVAL;
+	if (test_bit(Journal, &rdev->flags)) {
+		char b[BDEVNAME_SIZE];
+		if (conf->log)
+			return -EBUSY;
+
+		rdev->raid_disk = 0;
+		/*
+		 * The array is in readonly mode if journal is missing, so no
+		 * write requests running. We should be safe
+		 */
+		r5l_init_log(conf, rdev);
+		printk(KERN_INFO"md/raid:%s: using device %s as journal\n",
+		       mdname(mddev), bdevname(rdev->bdev, b));
+		return 0;
+	}
 	if (mddev->recovery_disabled == conf->recovery_disabled)
 		return -EBUSY;
 
-- 
2.4.6


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/2] raid5-cache: add journal hot add/remove support
  2015-12-16 21:13 ` [PATCH 2/2] raid5-cache: add journal hot add/remove support Shaohua Li
@ 2015-12-16 22:50   ` NeilBrown
  0 siblings, 0 replies; 3+ messages in thread
From: NeilBrown @ 2015-12-16 22:50 UTC (permalink / raw)
  To: Shaohua Li, linux-raid; +Cc: Kernel-team, songliubraving, hch

[-- Attachment #1: Type: text/plain, Size: 750 bytes --]

On Thu, Dec 17 2015, Shaohua Li wrote:

> Add support for journal disk hot add/remove. Mostly trival checks in md
> part. The raid5 part is a little tricky. For hot-remove, we can't wait
> pending write as it's called from raid5d. The wait will cause deadlock.
> We simplily fail the hot-remove. A hot-remove retry can success
> eventually since if journal disk is faulty all pending write will be
> failed and finish. For hot-add, since an array supporting journal but
> without journal disk will be marked read-only, we are safe to hot add
> journal without stopping IO (should be read IO, while journal only
> handles write IO).

This and previous patch both applied - thanks.
(will push things out to for-next next later today I hope)

NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 818 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-12-16 22:50 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-16 21:13 [PATCH 1/2] MD: change journal disk role to disk 0 Shaohua Li
2015-12-16 21:13 ` [PATCH 2/2] raid5-cache: add journal hot add/remove support Shaohua Li
2015-12-16 22:50   ` NeilBrown

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.