All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] rbd: fix and simplify rbd_ioctl_set_ro()
@ 2017-11-06 15:49 Ilya Dryomov
  2017-11-06 15:49 ` [PATCH 2/2] rbd: get rid of rbd_mapping::read_only Ilya Dryomov
  0 siblings, 1 reply; 5+ messages in thread
From: Ilya Dryomov @ 2017-11-06 15:49 UTC (permalink / raw)
  To: ceph-devel

->open_count/-EBUSY check is bogus and wrong: when an open device is
set read-only, blkdev_write_iter() refuses further writes with -EPERM.
This is standard behaviour and all other block devices allow this.

set_disk_ro() call is also problematic: we affect the entire device
when called on a single partition.

All rbd_ioctl_set_ro() needs to do is refuse ro -> rw transition for
mapped snapshots.  Everything else can be handled by generic code.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 34 ++++++----------------------------
 1 file changed, 6 insertions(+), 28 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 6347ac194e90..832b235b2dc3 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -640,46 +640,24 @@ static void rbd_release(struct gendisk *disk, fmode_t mode)
 
 static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg)
 {
-	int ret = 0;
-	int val;
-	bool ro;
-	bool ro_changed = false;
+	int ro;
 
-	/* get_user() may sleep, so call it before taking rbd_dev->lock */
-	if (get_user(val, (int __user *)(arg)))
+	if (get_user(ro, (int __user *)arg))
 		return -EFAULT;
 
-	ro = val ? true : false;
-	/* Snapshot doesn't allow to write*/
+	/* Snapshots can't be marked read-write */
 	if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro)
 		return -EROFS;
 
-	spin_lock_irq(&rbd_dev->lock);
-	/* prevent others open this device */
-	if (rbd_dev->open_count > 1) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	if (rbd_dev->mapping.read_only != ro) {
-		rbd_dev->mapping.read_only = ro;
-		ro_changed = true;
-	}
-
-out:
-	spin_unlock_irq(&rbd_dev->lock);
-	/* set_disk_ro() may sleep, so call it after releasing rbd_dev->lock */
-	if (ret == 0 && ro_changed)
-		set_disk_ro(rbd_dev->disk, ro ? 1 : 0);
-
-	return ret;
+	/* Let blkdev_roset() handle it */
+	return -ENOTTY;
 }
 
 static int rbd_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned int cmd, unsigned long arg)
 {
 	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
-	int ret = 0;
+	int ret;
 
 	switch (cmd) {
 	case BLKROSET:
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] rbd: get rid of rbd_mapping::read_only
  2017-11-06 15:49 [PATCH 1/2] rbd: fix and simplify rbd_ioctl_set_ro() Ilya Dryomov
@ 2017-11-06 15:49 ` Ilya Dryomov
  2017-11-07 13:26   ` David Disseldorp
  0 siblings, 1 reply; 5+ messages in thread
From: Ilya Dryomov @ 2017-11-06 15:49 UTC (permalink / raw)
  To: ceph-devel

It is redundant -- rw/ro state is stored in hd_struct and managed by
the block layer.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 832b235b2dc3..8a39c399e4ae 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -348,7 +348,6 @@ struct rbd_client_id {
 struct rbd_mapping {
 	u64                     size;
 	u64                     features;
-	bool			read_only;
 };
 
 /*
@@ -608,9 +607,6 @@ static int rbd_open(struct block_device *bdev, fmode_t mode)
 	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
 	bool removing = false;
 
-	if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only)
-		return -EROFS;
-
 	spin_lock_irq(&rbd_dev->lock);
 	if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags))
 		removing = true;
@@ -4028,15 +4024,8 @@ static void rbd_queue_workfn(struct work_struct *work)
 		goto err_rq;
 	}
 
-	/* Only reads are allowed to a read-only device */
-
-	if (op_type != OBJ_OP_READ) {
-		if (rbd_dev->mapping.read_only) {
-			result = -EROFS;
-			goto err_rq;
-		}
-		rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
-	}
+	rbd_assert(op_type == OBJ_OP_READ ||
+		   rbd_dev->spec->snap_id == CEPH_NOSNAP);
 
 	/*
 	 * Quit early if the mapped snapshot no longer exists.  It's
@@ -5972,7 +5961,7 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
 		goto err_out_disk;
 
 	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
-	set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
+	set_disk_ro(rbd_dev->disk, rbd_dev->opts->read_only);
 
 	ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
 	if (ret)
@@ -6123,7 +6112,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 	struct rbd_options *rbd_opts = NULL;
 	struct rbd_spec *spec = NULL;
 	struct rbd_client *rbdc;
-	bool read_only;
 	int rc;
 
 	if (!try_module_get(THIS_MODULE))
@@ -6172,11 +6160,8 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 	}
 
 	/* If we are mapping a snapshot it must be marked read-only */
-
-	read_only = rbd_dev->opts->read_only;
 	if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
-		read_only = true;
-	rbd_dev->mapping.read_only = read_only;
+		rbd_dev->opts->read_only = true;
 
 	rc = rbd_dev_device_setup(rbd_dev);
 	if (rc)
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] rbd: get rid of rbd_mapping::read_only
  2017-11-06 15:49 ` [PATCH 2/2] rbd: get rid of rbd_mapping::read_only Ilya Dryomov
@ 2017-11-07 13:26   ` David Disseldorp
  2017-11-07 16:56     ` Ilya Dryomov
  0 siblings, 1 reply; 5+ messages in thread
From: David Disseldorp @ 2017-11-07 13:26 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel

Hi Ilya,

On Mon,  6 Nov 2017 16:49:30 +0100, Ilya Dryomov wrote:

> -	/* Only reads are allowed to a read-only device */
> -
> -	if (op_type != OBJ_OP_READ) {
> -		if (rbd_dev->mapping.read_only) {
> -			result = -EROFS;
> -			goto err_rq;
> -		}
> -		rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
> -	}
> +	rbd_assert(op_type == OBJ_OP_READ ||
> +		   rbd_dev->spec->snap_id == CEPH_NOSNAP);

AFAICT, the block layer can still queue non-read requests, despite the
block device read-only policy flag, so I think an -EROFS handler needs
to stay.
The following was run on mainline (e4880bc5dfb1f02b152e62a894b5c6f3e995)
with these two patches applied:

RBD mapped at: /dev/rbd/rbd/img@snap
rapido1:/# blockdev --getro /dev/rbd/rbd/img@snap
1
rapido1:/# mkfs.xfs /dev/rbd/rbd/img@snap
[   12.346216]
[   12.346216] Assertion failure in rbd_queue_workfn() at line 4028:
[   12.346216]
[   12.346216]  rbd_assert(op_type == OBJ_OP_READ || rbd_dev->spec->snap_id == CEPH_NOSNAP);
[   12.346216]
[   12.347529] ------------[ cut here ]------------
[   12.347910] kernel BUG at drivers/block/rbd.c:4028!
[   12.348479] invalid opcode: 0000 [#1] SMP
[   12.349044] Modules linked in:
[   12.349499] CPU: 1 PID: 17 Comm: kworker/1:0 Not tainted 4.14.0-rc8+ #405
[   12.350407] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
[   12.352001] Workqueue: rbd rbd_queue_workfn
[   12.352498] task: ffff88001d27b480 task.stack: ffffc90000090000
[   12.353058] RIP: 0010:rbd_queue_workfn+0x4f9/0x500
[   12.353527] RSP: 0018:ffffc90000093e08 EFLAGS: 00010282
[   12.354036] RAX: 0000000000000086 RBX: ffff88001a7c7980 RCX: ffffffff81829398
[   12.354890] RDX: 0000000000000001 RSI: 0000000000000086 RDI: ffffffff819f9f6c
[   12.355580] RBP: ffffc90000093e60 R08: 00000000fffffffe R09: 000000000000048f
[   12.356285] R10: 0000000000000005 R11: 000000000000048e R12: ffff88001ca6f000
[   12.356964] R13: 0000000000000000 R14: 0000000000000002 R15: ffff88001a7c7ac0
[   12.357650] FS:  0000000000000000(0000) GS:ffff88001e500000(0000) knlGS:0000000000000000
[   12.358443] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   12.359031] CR2: 00007fcd19a14000 CR3: 000000001f069000 CR4: 00000000000006a0
[   12.359806] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   12.360604] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   12.361397] Call Trace:
[   12.361686]  process_one_work+0x11e/0x280
[   12.362145]  worker_thread+0x45/0x3b0
[   12.362564]  kthread+0xff/0x140
[   12.362935]  ? process_one_work+0x280/0x280
[   12.363409]  ? kthread_create_on_node+0x40/0x40
[   12.363936]  ret_from_fork+0x22/0x30
[   12.364338] Code: 85 e1 fb ff ff e9 99 fb ff ff 48 c7 c1 e8 af 71 81 ba bc 0f 00 00 48 c7 c6 f0 bb 64 81 48 c7 c7 40 9d 71 81 31 c0 e8 65 df d5 ff <0f> 0b 0f 1f 44 00 00 55 48 89 e5 41 56 41 55 41 54 49 89 fc 53
[   12.366440] RIP: rbd_queue_workfn+0x4f9/0x500 RSP: ffffc90000093e08
[   12.367147] ---[ end trace 75c22000d1a0b248 ]---

Cheers, David

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] rbd: get rid of rbd_mapping::read_only
  2017-11-07 13:26   ` David Disseldorp
@ 2017-11-07 16:56     ` Ilya Dryomov
  2017-11-08 18:32       ` David Disseldorp
  0 siblings, 1 reply; 5+ messages in thread
From: Ilya Dryomov @ 2017-11-07 16:56 UTC (permalink / raw)
  To: David Disseldorp; +Cc: Ceph Development

On Tue, Nov 7, 2017 at 2:26 PM, David Disseldorp <ddiss@suse.de> wrote:
> Hi Ilya,
>
> On Mon,  6 Nov 2017 16:49:30 +0100, Ilya Dryomov wrote:
>
>> -     /* Only reads are allowed to a read-only device */
>> -
>> -     if (op_type != OBJ_OP_READ) {
>> -             if (rbd_dev->mapping.read_only) {
>> -                     result = -EROFS;
>> -                     goto err_rq;
>> -             }
>> -             rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
>> -     }
>> +     rbd_assert(op_type == OBJ_OP_READ ||
>> +                rbd_dev->spec->snap_id == CEPH_NOSNAP);
>
> AFAICT, the block layer can still queue non-read requests, despite the
> block device read-only policy flag, so I think an -EROFS handler needs
> to stay.
> The following was run on mainline (e4880bc5dfb1f02b152e62a894b5c6f3e995)
> with these two patches applied:
>
> RBD mapped at: /dev/rbd/rbd/img@snap
> rapido1:/# blockdev --getro /dev/rbd/rbd/img@snap
> 1
> rapido1:/# mkfs.xfs /dev/rbd/rbd/img@snap
> [   12.346216]
> [   12.346216] Assertion failure in rbd_queue_workfn() at line 4028:
> [   12.346216]
> [   12.346216]  rbd_assert(op_type == OBJ_OP_READ || rbd_dev->spec->snap_id == CEPH_NOSNAP);
> [   12.346216]
> [   12.347529] ------------[ cut here ]------------
> [   12.347910] kernel BUG at drivers/block/rbd.c:4028!
> [   12.348479] invalid opcode: 0000 [#1] SMP
> [   12.349044] Modules linked in:
> [   12.349499] CPU: 1 PID: 17 Comm: kworker/1:0 Not tainted 4.14.0-rc8+ #405
> [   12.350407] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
> [   12.352001] Workqueue: rbd rbd_queue_workfn
> [   12.352498] task: ffff88001d27b480 task.stack: ffffc90000090000
> [   12.353058] RIP: 0010:rbd_queue_workfn+0x4f9/0x500
> [   12.353527] RSP: 0018:ffffc90000093e08 EFLAGS: 00010282
> [   12.354036] RAX: 0000000000000086 RBX: ffff88001a7c7980 RCX: ffffffff81829398
> [   12.354890] RDX: 0000000000000001 RSI: 0000000000000086 RDI: ffffffff819f9f6c
> [   12.355580] RBP: ffffc90000093e60 R08: 00000000fffffffe R09: 000000000000048f
> [   12.356285] R10: 0000000000000005 R11: 000000000000048e R12: ffff88001ca6f000
> [   12.356964] R13: 0000000000000000 R14: 0000000000000002 R15: ffff88001a7c7ac0
> [   12.357650] FS:  0000000000000000(0000) GS:ffff88001e500000(0000) knlGS:0000000000000000
> [   12.358443] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   12.359031] CR2: 00007fcd19a14000 CR3: 000000001f069000 CR4: 00000000000006a0
> [   12.359806] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [   12.360604] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [   12.361397] Call Trace:
> [   12.361686]  process_one_work+0x11e/0x280
> [   12.362145]  worker_thread+0x45/0x3b0
> [   12.362564]  kthread+0xff/0x140
> [   12.362935]  ? process_one_work+0x280/0x280
> [   12.363409]  ? kthread_create_on_node+0x40/0x40
> [   12.363936]  ret_from_fork+0x22/0x30
> [   12.364338] Code: 85 e1 fb ff ff e9 99 fb ff ff 48 c7 c1 e8 af 71 81 ba bc 0f 00 00 48 c7 c6 f0 bb 64 81 48 c7 c7 40 9d 71 81 31 c0 e8 65 df d5 ff <0f> 0b 0f 1f 44 00 00 55 48 89 e5 41 56 41 55 41 54 49 89 fc 53
> [   12.366440] RIP: rbd_queue_workfn+0x4f9/0x500 RSP: ffffc90000093e08
> [   12.367147] ---[ end trace 75c22000d1a0b248 ]---

Hi David,

Heh, apparently I only tested writes and didn't do discards...  Writes
go through blkdev_write_iter() but zeroout/discards seem to rely solely
on FMODE_WRITE checks, which the block layer doesn't enforce.

I'd rather try to fix it in the block layer before working around it in
krbd.  I'll work up a patch tomorrow.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] rbd: get rid of rbd_mapping::read_only
  2017-11-07 16:56     ` Ilya Dryomov
@ 2017-11-08 18:32       ` David Disseldorp
  0 siblings, 0 replies; 5+ messages in thread
From: David Disseldorp @ 2017-11-08 18:32 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: Ceph Development

On Tue, 7 Nov 2017 17:56:51 +0100, Ilya Dryomov wrote:

> Heh, apparently I only tested writes and didn't do discards...  Writes
> go through blkdev_write_iter() but zeroout/discards seem to rely solely
> on FMODE_WRITE checks, which the block layer doesn't enforce.
> 
> I'd rather try to fix it in the block layer before working around it in
> krbd.  I'll work up a patch tomorrow.

FWIW I've pushed a blktests regression test for this to the
wip_ro_get_set_discard branch of my repo at:
https://github.com/ddiss/blktests.git

Cheers, David

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-11-08 18:32 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-06 15:49 [PATCH 1/2] rbd: fix and simplify rbd_ioctl_set_ro() Ilya Dryomov
2017-11-06 15:49 ` [PATCH 2/2] rbd: get rid of rbd_mapping::read_only Ilya Dryomov
2017-11-07 13:26   ` David Disseldorp
2017-11-07 16:56     ` Ilya Dryomov
2017-11-08 18:32       ` David Disseldorp

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.