All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] md: Fix unexpected behaviour in is_mddev_idle
@ 2021-12-01  3:27 Li Jinlin
  2021-12-01 13:50 ` Luis Chamberlain
  2021-12-09 17:39 ` Song Liu
  0 siblings, 2 replies; 4+ messages in thread
From: Li Jinlin @ 2021-12-01  3:27 UTC (permalink / raw)
  To: song, axboe, hare, jack, ming.lei, tj, mcgrof
  Cc: linux-raid, linux-kernel, linfeilong

The value of curr_events may be INT_MAX when mddev initializes IO event
counters. Then, rdev->last_events will be set as INT_MAX. 
If all the rdevs of mddev are in this case, 
'curr_events - rdev->last_events > 64' will always false, and
is_mddev_idle() will always return 1, which may cause non-sync IO very
slow.

Fix by using atomic64_t type for sync_io, and using long type for
curr_events/last_events.

Signed-off-by: Li Jinlin <lijinlin3@huawei.com>
---
 drivers/md/md.c       | 6 +++---
 drivers/md/md.h       | 4 ++--
 include/linux/genhd.h | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5111ed966947..f47035838c43 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8429,14 +8429,14 @@ static int is_mddev_idle(struct mddev *mddev, int init)
 {
 	struct md_rdev *rdev;
 	int idle;
-	int curr_events;
+	long curr_events;
 
 	idle = 1;
 	rcu_read_lock();
 	rdev_for_each_rcu(rdev, mddev) {
 		struct gendisk *disk = rdev->bdev->bd_disk;
-		curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
-			      atomic_read(&disk->sync_io);
+		curr_events = (long)part_stat_read_accum(disk->part0, sectors) -
+			      atomic64_read(&disk->sync_io);
 		/* sync IO will cause sync_io to increase before the disk_stats
 		 * as sync_io is counted when a request starts, and
 		 * disk_stats is counted when it completes.
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 53ea7a6961de..3f8327c42b7b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -50,7 +50,7 @@ struct md_rdev {
 
 	sector_t sectors;		/* Device size (in 512bytes sectors) */
 	struct mddev *mddev;		/* RAID array if running */
-	int last_events;		/* IO event timestamp */
+	long last_events;		/* IO event timestamp */
 sync_io
 	/*
 	 * If meta_bdev is non-NULL, it means that a separate device is
@@ -551,7 +551,7 @@ extern void mddev_unlock(struct mddev *mddev);
 
 static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
 {
-	atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
+	atomic64_add(nr_sectors, &bdev->bd_disk->sync_io);
 }
 
 static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 74c410263113..efa7884de11b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -150,7 +150,7 @@ struct gendisk {
 	struct list_head slave_bdevs;
 #endif
 	struct timer_rand_state *random;
-	atomic_t sync_io;		/* RAID */
+	atomic64_t sync_io;		/* RAID */
 	struct disk_events *ev;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct kobject integrity_kobj;
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] md: Fix unexpected behaviour in is_mddev_idle
  2021-12-01  3:27 [PATCH] md: Fix unexpected behaviour in is_mddev_idle Li Jinlin
@ 2021-12-01 13:50 ` Luis Chamberlain
  2021-12-02  5:07   ` Li Jinlin
  2021-12-09 17:39 ` Song Liu
  1 sibling, 1 reply; 4+ messages in thread
From: Luis Chamberlain @ 2021-12-01 13:50 UTC (permalink / raw)
  To: Li Jinlin
  Cc: song, axboe, hare, jack, ming.lei, tj, linux-raid, linux-kernel,
	linfeilong

On Wed, Dec 01, 2021 at 11:27:12AM +0800, Li Jinlin wrote:
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 5111ed966947..f47035838c43 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -8429,14 +8429,14 @@ static int is_mddev_idle(struct mddev *mddev, int init)
>  {
>  	struct md_rdev *rdev;
>  	int idle;
> -	int curr_events;
> +	long curr_events;

>  
>  	idle = 1;
>  	rcu_read_lock();
>  	rdev_for_each_rcu(rdev, mddev) {
>  		struct gendisk *disk = rdev->bdev->bd_disk;
> -		curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
> -			      atomic_read(&disk->sync_io);
> +		curr_events = (long)part_stat_read_accum(disk->part0, sectors) -
> +			      atomic64_read(&disk->sync_io);

And what makes you believe you might not have to go and change all other
drivers to address this as well?

>  static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index 74c410263113..efa7884de11b 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -150,7 +150,7 @@ struct gendisk {
>  	struct list_head slave_bdevs;
>  #endif
>  	struct timer_rand_state *random;
> -	atomic_t sync_io;		/* RAID */
> +	atomic64_t sync_io;		/* RAID */
>  	struct disk_events *ev;
>  #ifdef  CONFIG_BLK_DEV_INTEGRITY
>  	struct kobject integrity_kobj;
> -- 
> 2.31.1

  Luis

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] md: Fix unexpected behaviour in is_mddev_idle
  2021-12-01 13:50 ` Luis Chamberlain
@ 2021-12-02  5:07   ` Li Jinlin
  0 siblings, 0 replies; 4+ messages in thread
From: Li Jinlin @ 2021-12-02  5:07 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: song, axboe, hare, jack, ming.lei, tj, linux-raid, linux-kernel,
	linfeilong



On 12/1/2021 9:50 PM, Luis Chamberlain wrote:
> On Wed, Dec 01, 2021 at 11:27:12AM +0800, Li Jinlin wrote:
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 5111ed966947..f47035838c43 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -8429,14 +8429,14 @@ static int is_mddev_idle(struct mddev *mddev, int init)
>>  {
>>  	struct md_rdev *rdev;
>>  	int idle;
>> -	int curr_events;
>> +	long curr_events;
> 
>>  
>>  	idle = 1;
>>  	rcu_read_lock();
>>  	rdev_for_each_rcu(rdev, mddev) {
>>  		struct gendisk *disk = rdev->bdev->bd_disk;
>> -		curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
>> -			      atomic_read(&disk->sync_io);
>> +		curr_events = (long)part_stat_read_accum(disk->part0, sectors) -
>> +			      atomic64_read(&disk->sync_io);
> 
> And what makes you believe you might not have to go and change all other
> drivers to address this as well? 
The drdb driver also have same problem. I will resend this patch together with
the fix patch of drdb driver.

Thanks,
JinLin


> 
>>  static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
>> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
>> index 74c410263113..efa7884de11b 100644
>> --- a/include/linux/genhd.h
>> +++ b/include/linux/genhd.h
>> @@ -150,7 +150,7 @@ struct gendisk {
>>  	struct list_head slave_bdevs;
>>  #endif
>>  	struct timer_rand_state *random;
>> -	atomic_t sync_io;		/* RAID */
>> +	atomic64_t sync_io;		/* RAID */
>>  	struct disk_events *ev;
>>  #ifdef  CONFIG_BLK_DEV_INTEGRITY
>>  	struct kobject integrity_kobj;
>> -- 
>> 2.31.1
> 
>   Luis
> .
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] md: Fix unexpected behaviour in is_mddev_idle
  2021-12-01  3:27 [PATCH] md: Fix unexpected behaviour in is_mddev_idle Li Jinlin
  2021-12-01 13:50 ` Luis Chamberlain
@ 2021-12-09 17:39 ` Song Liu
  1 sibling, 0 replies; 4+ messages in thread
From: Song Liu @ 2021-12-09 17:39 UTC (permalink / raw)
  To: Li Jinlin
  Cc: Jens Axboe, Hannes Reinecke, Jan Kara, Ming Lei, Tejun Heo,
	Luis Chamberlain, linux-raid, open list, linfeilong

On Tue, Nov 30, 2021 at 6:56 PM Li Jinlin <lijinlin3@huawei.com> wrote:
>
> The value of curr_events may be INT_MAX when mddev initializes IO event
> counters. Then, rdev->last_events will be set as INT_MAX.
> If all the rdevs of mddev are in this case,
> 'curr_events - rdev->last_events > 64' will always false, and
> is_mddev_idle() will always return 1, which may cause non-sync IO very
> slow.
>
> Fix by using atomic64_t type for sync_io, and using long type for
> curr_events/last_events.
>
> Signed-off-by: Li Jinlin <lijinlin3@huawei.com>
> ---
>  drivers/md/md.c       | 6 +++---
>  drivers/md/md.h       | 4 ++--
>  include/linux/genhd.h | 2 +-
>  3 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 5111ed966947..f47035838c43 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -8429,14 +8429,14 @@ static int is_mddev_idle(struct mddev *mddev, int init)
>  {
>         struct md_rdev *rdev;
>         int idle;
> -       int curr_events;
> +       long curr_events;
>
>         idle = 1;
>         rcu_read_lock();
>         rdev_for_each_rcu(rdev, mddev) {
>                 struct gendisk *disk = rdev->bdev->bd_disk;
> -               curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
> -                             atomic_read(&disk->sync_io);
> +               curr_events = (long)part_stat_read_accum(disk->part0, sectors) -
> +                             atomic64_read(&disk->sync_io);
>                 /* sync IO will cause sync_io to increase before the disk_stats
>                  * as sync_io is counted when a request starts, and
>                  * disk_stats is counted when it completes.
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 53ea7a6961de..3f8327c42b7b 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -50,7 +50,7 @@ struct md_rdev {
>
>         sector_t sectors;               /* Device size (in 512bytes sectors) */
>         struct mddev *mddev;            /* RAID array if running */
> -       int last_events;                /* IO event timestamp */
> +       long last_events;               /* IO event timestamp */

I think we need long long here to be safe on 32-bit systems.

Thanks,
Song

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-12-09 17:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-01  3:27 [PATCH] md: Fix unexpected behaviour in is_mddev_idle Li Jinlin
2021-12-01 13:50 ` Luis Chamberlain
2021-12-02  5:07   ` Li Jinlin
2021-12-09 17:39 ` Song Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.