* [PATCH] [RFC] target/file: add support of direct and async I/O
@ 2018-03-09 0:42 Andrei Vagin
2018-03-15 14:26 ` Bryant G. Ly
` (11 more replies)
0 siblings, 12 replies; 13+ messages in thread
From: Andrei Vagin @ 2018-03-09 0:42 UTC (permalink / raw)
To: target-devel
Direct I/O allows to not affect the write-back cache, this is
expected when a non-buffered mode is used.
Async I/O allows to handle a few commands concurrently, so a target shows a
better perfomance:
Mode: O_DSYNC Async: 1
$ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepthd --name=/dev/sda --runtime --numjobs=2
WRITE: bwE.9MiB/s (48.1MB/s), 21.9MiB/s-23.0MiB/s (22.0MB/s-25.2MB/s), io‘9MiB (963MB), run 002-20020msec
Mode: O_DSYNC Async: 0
$ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepthd --name=/dev/sdb --runtime --numjobs=2
WRITE: bw\x1607KiB/s (1645kB/s), 802KiB/s-805KiB/s (821kB/s-824kB/s), io1.8MiB (33.4MB), run 280-20295msec
Known issue:
DIF (PI) emulation doesn't work when a target uses async I/O, because
DIF metadata is saved in a separate file, and it is another non-trivial
task how to synchronize writing in two files, so that a following read
operation always returns a consisten metadata for a specified block.
Cc: "Nicholas A. Bellinger" <nab@linux-iscsi.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
drivers/target/target_core_file.c | 124 ++++++++++++++++++++++++++++++++++++--
drivers/target/target_core_file.h | 1 +
2 files changed, 120 insertions(+), 5 deletions(-)
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 9b2c0c773022..e8c07a0f7084 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -250,6 +250,96 @@ static void fd_destroy_device(struct se_device *dev)
}
}
+struct target_core_file_cmd {
+ atomic_t ref;
+ unsigned long len;
+ long ret;
+ struct se_cmd *cmd;
+ struct kiocb iocb;
+ struct bio_vec bvec[0];
+};
+
+static void cmd_rw_aio_do_completion(struct target_core_file_cmd *cmd)
+{
+ if (!atomic_dec_and_test(&cmd->ref))
+ return;
+
+ if (cmd->ret != cmd->len)
+ target_complete_cmd(cmd->cmd, SAM_STAT_CHECK_CONDITION);
+ else
+ target_complete_cmd(cmd->cmd, SAM_STAT_GOOD);
+
+ kfree(cmd);
+}
+
+static void cmd_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
+{
+ struct target_core_file_cmd *cmd;
+
+ cmd = container_of(iocb, struct target_core_file_cmd, iocb);
+
+ cmd->ret = ret;
+ cmd_rw_aio_do_completion(cmd);
+}
+
+static int fd_do_aio_rw(struct se_cmd *cmd, struct fd_dev *fd_dev,
+ u32 block_size, struct scatterlist *sgl,
+ u32 sgl_nents, u32 data_length, int is_write)
+{
+ struct file *file = fd_dev->fd_file;
+ struct target_core_file_cmd *aio_cmd;
+ struct scatterlist *sg;
+ struct iov_iter iter = {};
+ struct bio_vec *bvec;
+ ssize_t len = 0;
+ loff_t pos = (cmd->t_task_lba * block_size);
+ int ret = 0, i;
+
+ aio_cmd = kmalloc(sizeof(struct target_core_file_cmd) +
+ sgl_nents * sizeof(struct bio_vec),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!aio_cmd)
+ return -ENOMEM;
+
+ bvec = aio_cmd->bvec;
+
+ for_each_sg(sgl, sg, sgl_nents, i) {
+ bvec[i].bv_page = sg_page(sg);
+ bvec[i].bv_len = sg->length;
+ bvec[i].bv_offset = sg->offset;
+
+ len += sg->length;
+ }
+
+ iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len);
+
+ atomic_set(&aio_cmd->ref, 2);
+
+ aio_cmd->cmd = cmd;
+ aio_cmd->len = len;
+ aio_cmd->iocb.ki_pos = pos;
+ aio_cmd->iocb.ki_filp = file;
+ aio_cmd->iocb.ki_complete = cmd_rw_aio_complete;
+ aio_cmd->iocb.ki_flags = 0;
+
+ if (!(fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE))
+ aio_cmd->iocb.ki_flags |= IOCB_DIRECT;
+ if (is_write && (cmd->se_cmd_flags & SCF_FUA))
+ aio_cmd->iocb.ki_flags |= IOCB_DSYNC;
+
+ if (is_write)
+ ret = call_write_iter(file, &aio_cmd->iocb, &iter);
+ else
+ ret = call_read_iter(file, &aio_cmd->iocb, &iter);
+
+ cmd_rw_aio_do_completion(aio_cmd);
+
+ if (ret != -EIOCBQUEUED)
+ aio_cmd->iocb.ki_complete(&aio_cmd->iocb, ret, 0);
+
+ return 0;
+}
+
static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
u32 block_size, struct scatterlist *sgl,
u32 sgl_nents, u32 data_length, int is_write)
@@ -536,6 +626,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
struct file *pfile = fd_dev->fd_prot_file;
sense_reason_t rc;
int ret = 0;
+ int aio = fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO;
/*
* We are currently limited by the number of iovecs (2048) per
* single vfs_[writev,readv] call.
@@ -550,7 +641,11 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
* Call vectorized fileio functions to map struct scatterlist
* physical memory addresses to struct iovec virtual memory.
*/
- if (data_direction = DMA_FROM_DEVICE) {
+ if (aio) {
+ ret = fd_do_aio_rw(cmd, fd_dev, dev->dev_attrib.block_size,
+ sgl, sgl_nents, cmd->data_length,
+ !(data_direction = DMA_FROM_DEVICE));
+ } else if (data_direction = DMA_FROM_DEVICE) {
if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
ret = fd_do_rw(cmd, pfile, dev->prot_length,
cmd->t_prot_sg, cmd->t_prot_nents,
@@ -616,18 +711,21 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
if (ret < 0)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
- target_complete_cmd(cmd, SAM_STAT_GOOD);
+ if (!aio)
+ target_complete_cmd(cmd, SAM_STAT_GOOD);
return 0;
}
enum {
- Opt_fd_dev_name, Opt_fd_dev_size, Opt_fd_buffered_io, Opt_err
+ Opt_fd_dev_name, Opt_fd_dev_size, Opt_fd_buffered_io,
+ Opt_fd_async_io, Opt_err
};
static match_table_t tokens = {
{Opt_fd_dev_name, "fd_dev_name=%s"},
{Opt_fd_dev_size, "fd_dev_size=%s"},
{Opt_fd_buffered_io, "fd_buffered_io=%d"},
+ {Opt_fd_async_io, "fd_async_io=%d"},
{Opt_err, NULL}
};
@@ -693,6 +791,21 @@ static ssize_t fd_set_configfs_dev_params(struct se_device *dev,
fd_dev->fbd_flags |= FDBD_HAS_BUFFERED_IO_WCE;
break;
+ case Opt_fd_async_io:
+ ret = match_int(args, &arg);
+ if (ret)
+ goto out;
+ if (arg != 1) {
+ pr_err("bogus fd_async_io=%d value\n", arg);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ pr_debug("FILEIO: Using async I/O"
+ " operations for struct fd_dev\n");
+
+ fd_dev->fbd_flags |= FDBD_HAS_ASYNC_IO;
+ break;
default:
break;
}
@@ -709,10 +822,11 @@ static ssize_t fd_show_configfs_dev_params(struct se_device *dev, char *b)
ssize_t bl = 0;
bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id);
- bl += sprintf(b + bl, " File: %s Size: %llu Mode: %s\n",
+ bl += sprintf(b + bl, " File: %s Size: %llu Mode: %s Async: %d\n",
fd_dev->fd_dev_name, fd_dev->fd_dev_size,
(fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) ?
- "Buffered-WCE" : "O_DSYNC");
+ "Buffered-WCE" : "O_DSYNC",
+ !!(fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO));
return bl;
}
diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h
index 53be5ffd3261..929b1ecd544e 100644
--- a/drivers/target/target_core_file.h
+++ b/drivers/target/target_core_file.h
@@ -22,6 +22,7 @@
#define FBDF_HAS_PATH 0x01
#define FBDF_HAS_SIZE 0x02
#define FDBD_HAS_BUFFERED_IO_WCE 0x04
+#define FDBD_HAS_ASYNC_IO 0x08
#define FDBD_FORMAT_UNIT_SIZE 2048
struct fd_dev {
--
2.13.6
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
@ 2018-03-15 14:26 ` Bryant G. Ly
2018-03-15 17:04 ` Andrei Vagin
` (10 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Bryant G. Ly @ 2018-03-15 14:26 UTC (permalink / raw)
To: target-devel
On 3/8/18 6:42 PM, Andrei Vagin wrote:
> Direct I/O allows to not affect the write-back cache, this is
> expected when a non-buffered mode is used.
>
> Async I/O allows to handle a few commands concurrently, so a target shows a
> better perfomance:
>
> Mode: O_DSYNC Async: 1
> $ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepthd --name=/dev/sda --runtime --numjobs=2
> WRITE: bwE.9MiB/s (48.1MB/s), 21.9MiB/s-23.0MiB/s (22.0MB/s-25.2MB/s), io‘9MiB (963MB), run 002-20020msec
>
> Mode: O_DSYNC Async: 0
> $ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepthd --name=/dev/sdb --runtime --numjobs=2
> WRITE: bw\x1607KiB/s (1645kB/s), 802KiB/s-805KiB/s (821kB/s-824kB/s), io1.8MiB (33.4MB), run 280-20295msec
>
> Known issue:
>
> DIF (PI) emulation doesn't work when a target uses async I/O, because
> DIF metadata is saved in a separate file, and it is another non-trivial
> task how to synchronize writing in two files, so that a following read
> operation always returns a consisten metadata for a specified block.
>
> Cc: "Nicholas A. Bellinger" <nab@linux-iscsi.org>
> Signed-off-by: Andrei Vagin <avagin@openvz.org>
> ---
> drivers/target/target_core_file.c | 124 ++++++++++++++++++++++++++++++++++++--
> drivers/target/target_core_file.h | 1 +
> 2 files changed, 120 insertions(+), 5 deletions(-)
>
>
Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Patch looks good to me - Thanks for the performance enhancement!
Btw I have been running I/O tests with HTX against this patch for 24 hrs and have no problems.
-Bryant
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
2018-03-15 14:26 ` Bryant G. Ly
@ 2018-03-15 17:04 ` Andrei Vagin
2018-03-16 7:50 ` Christoph Hellwig
` (9 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Andrei Vagin @ 2018-03-15 17:04 UTC (permalink / raw)
To: target-devel
On Thu, Mar 15, 2018 at 09:26:57AM -0500, Bryant G. Ly wrote:
> On 3/8/18 6:42 PM, Andrei Vagin wrote:
>
> > Direct I/O allows to not affect the write-back cache, this is
> > expected when a non-buffered mode is used.
> >
> > Async I/O allows to handle a few commands concurrently, so a target shows a
> > better perfomance:
> >
> > Mode: O_DSYNC Async: 1
> > $ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepthd --name=/dev/sda --runtime --numjobs=2
> > WRITE: bwE.9MiB/s (48.1MB/s), 21.9MiB/s-23.0MiB/s (22.0MB/s-25.2MB/s), io‘9MiB (963MB), run 002-20020msec
> >
> > Mode: O_DSYNC Async: 0
> > $ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepthd --name=/dev/sdb --runtime --numjobs=2
> > WRITE: bw\x1607KiB/s (1645kB/s), 802KiB/s-805KiB/s (821kB/s-824kB/s), io1.8MiB (33.4MB), run 280-20295msec
> >
> > Known issue:
> >
> > DIF (PI) emulation doesn't work when a target uses async I/O, because
> > DIF metadata is saved in a separate file, and it is another non-trivial
> > task how to synchronize writing in two files, so that a following read
> > operation always returns a consisten metadata for a specified block.
> >
> > Cc: "Nicholas A. Bellinger" <nab@linux-iscsi.org>
> > Signed-off-by: Andrei Vagin <avagin@openvz.org>
> > ---
> > drivers/target/target_core_file.c | 124 ++++++++++++++++++++++++++++++++++++--
> > drivers/target/target_core_file.h | 1 +
> > 2 files changed, 120 insertions(+), 5 deletions(-)
> >
> >
> Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
>
> Patch looks good to me - Thanks for the performance enhancement!
>
> Btw I have been running I/O tests with HTX against this patch for 24 hrs and have no problems.
Bryant, thank you for the feedback.
>
> -Bryant
>
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
2018-03-15 14:26 ` Bryant G. Ly
2018-03-15 17:04 ` Andrei Vagin
@ 2018-03-16 7:50 ` Christoph Hellwig
2018-03-17 0:13 ` Andrei Vagin
` (8 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2018-03-16 7:50 UTC (permalink / raw)
To: target-devel
> DIF (PI) emulation doesn't work when a target uses async I/O, because
> DIF metadata is saved in a separate file, and it is another non-trivial
> task how to synchronize writing in two files, so that a following read
> operation always returns a consisten metadata for a specified block.
There literally is no way to do that, even without aio. The file
DIF implementation should probably regarded as an early bringup /
prototype tool, not something really usable.
> +static void cmd_rw_aio_do_completion(struct target_core_file_cmd *cmd)
> +{
> + if (!atomic_dec_and_test(&cmd->ref))
> + return;
There is no need for reference counting. If the read_iter/write iter
method returns -EIOCBQUEUED the completion callback needs to complete
the I/O and free the structure, else the method caller.
> + if (!(fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE))
> + aio_cmd->iocb.ki_flags |= IOCB_DIRECT;
aio without IOCB_DIRECT doesn't make any sense. But the WCE flag
really has nothing to do with buffers vs direct I/O anyway.
> + if (is_write)
> + ret = call_write_iter(file, &aio_cmd->iocb, &iter);
> + else
> + ret = call_read_iter(file, &aio_cmd->iocb, &iter);
Please call the methods directly instead of through the wrappers.
> +
> static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
> u32 block_size, struct scatterlist *sgl,
> u32 sgl_nents, u32 data_length, int is_write)
> @@ -536,6 +626,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
> struct file *pfile = fd_dev->fd_prot_file;
> sense_reason_t rc;
> int ret = 0;
> + int aio = fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO;
> /*
> * We are currently limited by the number of iovecs (2048) per
> * single vfs_[writev,readv] call.
> @@ -550,7 +641,11 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
> * Call vectorized fileio functions to map struct scatterlist
> * physical memory addresses to struct iovec virtual memory.
> */
> - if (data_direction = DMA_FROM_DEVICE) {
> + if (aio) {
fd_execute_rw shares basically no code with the aio case. I'd rather
have a very high level wrapper here:
static sense_reason_t
fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
enum dma_data_direction data_direction)
{
if (FD_DEV(cmd->se_dev)->fbd_flags & FDBD_HAS_ASYNC_IO)
return fd_execute_rw_aio(cmd, sgl, sgl_nents, dma_direction);
return fd_execute_rw_buffered(cmd, sgl, sgl_nents, dma_direction);
}
and keep the code separate.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
` (2 preceding siblings ...)
2018-03-16 7:50 ` Christoph Hellwig
@ 2018-03-17 0:13 ` Andrei Vagin
2018-03-17 8:25 ` Christoph Hellwig
` (7 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Andrei Vagin @ 2018-03-17 0:13 UTC (permalink / raw)
To: target-devel
Hi Christoph,
Thank you for the review. All comments look reasonable. I will fix and
set a final version soon.
Pls, answer on one inline question.
On Fri, Mar 16, 2018 at 12:50:27AM -0700, Christoph Hellwig wrote:
> > DIF (PI) emulation doesn't work when a target uses async I/O, because
> > DIF metadata is saved in a separate file, and it is another non-trivial
> > task how to synchronize writing in two files, so that a following read
> > operation always returns a consisten metadata for a specified block.
>
> There literally is no way to do that, even without aio. The file
> DIF implementation should probably regarded as an early bringup /
> prototype tool, not something really usable.
>
> > +static void cmd_rw_aio_do_completion(struct target_core_file_cmd *cmd)
> > +{
> > + if (!atomic_dec_and_test(&cmd->ref))
> > + return;
>
> There is no need for reference counting. If the read_iter/write iter
> method returns -EIOCBQUEUED the completion callback needs to complete
> the I/O and free the structure, else the method caller.
>
> > + if (!(fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE))
> > + aio_cmd->iocb.ki_flags |= IOCB_DIRECT;
>
> aio without IOCB_DIRECT doesn't make any sense. But the WCE flag
> really has nothing to do with buffers vs direct I/O anyway.
>
> > + if (is_write)
> > + ret = call_write_iter(file, &aio_cmd->iocb, &iter);
> > + else
> > + ret = call_read_iter(file, &aio_cmd->iocb, &iter);
>
> Please call the methods directly instead of through the wrappers.
Do you mean to call file->f_op->write_iter(kio, iter) instead of
call_write_iter()? What is wrong with these wrappers?
Thanks,
Andrei
>
> > +
> > static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
> > u32 block_size, struct scatterlist *sgl,
> > u32 sgl_nents, u32 data_length, int is_write)
> > @@ -536,6 +626,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
> > struct file *pfile = fd_dev->fd_prot_file;
> > sense_reason_t rc;
> > int ret = 0;
> > + int aio = fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO;
> > /*
> > * We are currently limited by the number of iovecs (2048) per
> > * single vfs_[writev,readv] call.
> > @@ -550,7 +641,11 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
> > * Call vectorized fileio functions to map struct scatterlist
> > * physical memory addresses to struct iovec virtual memory.
> > */
> > - if (data_direction = DMA_FROM_DEVICE) {
> > + if (aio) {
>
> fd_execute_rw shares basically no code with the aio case. I'd rather
> have a very high level wrapper here:
>
> static sense_reason_t
> fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
> enum dma_data_direction data_direction)
> {
> if (FD_DEV(cmd->se_dev)->fbd_flags & FDBD_HAS_ASYNC_IO)
> return fd_execute_rw_aio(cmd, sgl, sgl_nents, dma_direction);
> return fd_execute_rw_buffered(cmd, sgl, sgl_nents, dma_direction);
> }
>
> and keep the code separate.
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
` (3 preceding siblings ...)
2018-03-17 0:13 ` Andrei Vagin
@ 2018-03-17 8:25 ` Christoph Hellwig
2018-03-20 7:54 ` Andrei Vagin
` (6 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2018-03-17 8:25 UTC (permalink / raw)
To: target-devel
On Fri, Mar 16, 2018 at 05:13:25PM -0700, Andrei Vagin wrote:
> > Please call the methods directly instead of through the wrappers.
>
> Do you mean to call file->f_op->write_iter(kio, iter) instead of
> call_write_iter()? What is wrong with these wrappers?
Yes. They are completely pointless and just obsfucate the code.
I plan to remove them eventually.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
` (4 preceding siblings ...)
2018-03-17 8:25 ` Christoph Hellwig
@ 2018-03-20 7:54 ` Andrei Vagin
2018-03-20 8:47 ` Christoph Hellwig
` (5 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Andrei Vagin @ 2018-03-20 7:54 UTC (permalink / raw)
To: target-devel
On Fri, Mar 16, 2018 at 12:50:27AM -0700, Christoph Hellwig wrote:
> > DIF (PI) emulation doesn't work when a target uses async I/O, because
> > DIF metadata is saved in a separate file, and it is another non-trivial
> > task how to synchronize writing in two files, so that a following read
> > operation always returns a consisten metadata for a specified block.
>
> There literally is no way to do that, even without aio. The file
> DIF implementation should probably regarded as an early bringup /
> prototype tool, not something really usable.
>
> > +static void cmd_rw_aio_do_completion(struct target_core_file_cmd *cmd)
> > +{
> > + if (!atomic_dec_and_test(&cmd->ref))
> > + return;
>
> There is no need for reference counting. If the read_iter/write iter
> method returns -EIOCBQUEUED the completion callback needs to complete
> the I/O and free the structure, else the method caller.
I was near to send a final version, but I decided to investigate how a
reference counter was appeared in drivers/block/loop.c:
commit 92d773324b7edbd36bf0c28c1e0157763aeccc92
Author: Shaohua Li <shli@fb.com>
Date: Fri Sep 1 11:15:17 2017 -0700
block/loop: fix use after free
lo_rw_aio->call_read_iter->
1 aops->direct_IO
2 iov_iter_revert
lo_rw_aio_complete could happen between 1 and 2, the bio and bvec
could
be freed before 2, which accesses bvec.
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit looks reasonable, doesn't it? In out case, bvec-s are
freed from the callback too.
>
> > + if (!(fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE))
> > + aio_cmd->iocb.ki_flags |= IOCB_DIRECT;
>
> aio without IOCB_DIRECT doesn't make any sense. But the WCE flag
> really has nothing to do with buffers vs direct I/O anyway.
>
> > + if (is_write)
> > + ret = call_write_iter(file, &aio_cmd->iocb, &iter);
> > + else
> > + ret = call_read_iter(file, &aio_cmd->iocb, &iter);
>
> Please call the methods directly instead of through the wrappers.
>
> > +
> > static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
> > u32 block_size, struct scatterlist *sgl,
> > u32 sgl_nents, u32 data_length, int is_write)
> > @@ -536,6 +626,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
> > struct file *pfile = fd_dev->fd_prot_file;
> > sense_reason_t rc;
> > int ret = 0;
> > + int aio = fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO;
> > /*
> > * We are currently limited by the number of iovecs (2048) per
> > * single vfs_[writev,readv] call.
> > @@ -550,7 +641,11 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
> > * Call vectorized fileio functions to map struct scatterlist
> > * physical memory addresses to struct iovec virtual memory.
> > */
> > - if (data_direction = DMA_FROM_DEVICE) {
> > + if (aio) {
>
> fd_execute_rw shares basically no code with the aio case. I'd rather
> have a very high level wrapper here:
>
> static sense_reason_t
> fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
> enum dma_data_direction data_direction)
> {
> if (FD_DEV(cmd->se_dev)->fbd_flags & FDBD_HAS_ASYNC_IO)
> return fd_execute_rw_aio(cmd, sgl, sgl_nents, dma_direction);
> return fd_execute_rw_buffered(cmd, sgl, sgl_nents, dma_direction);
> }
>
> and keep the code separate.
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
` (5 preceding siblings ...)
2018-03-20 7:54 ` Andrei Vagin
@ 2018-03-20 8:47 ` Christoph Hellwig
2018-03-20 20:41 ` Andrei Vagin
` (4 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2018-03-20 8:47 UTC (permalink / raw)
To: target-devel
On Tue, Mar 20, 2018 at 12:54:58AM -0700, Andrei Vagin wrote:
> commit 92d773324b7edbd36bf0c28c1e0157763aeccc92
> Author: Shaohua Li <shli@fb.com>
> Date: Fri Sep 1 11:15:17 2017 -0700
>
> block/loop: fix use after free
>
> lo_rw_aio->call_read_iter->
> 1 aops->direct_IO
> 2 iov_iter_revert
> lo_rw_aio_complete could happen between 1 and 2, the bio and bvec
> could
> be freed before 2, which accesses bvec.
>
> Signed-off-by: Shaohua Li <shli@fb.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>
> This commit looks reasonable, doesn't it? In out case, bvec-s are
> freed from the callback too.
It looks completely bogus, but it papers over a valid issue. The
iovec/bvec passed to ->read_iter/->write_iter is caller allocated
and freed, so we should always free it after the calls. The actual
I/O completion is separate from that. Below is the real fix for the
loop driver:
---
From 101f857c5b67492cfd75212d104699813f1bd345 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 20 Mar 2018 09:35:55 +0100
Subject: loop: remove loop_cmd refcounting
Instead of introducing an unneeded refcount free the bvec after calling
into ->read_iter and ->write_iter, which follows the the normal read/write
path conventions.
Fixes: 92d77332 ("block/loop: fix use after free")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/block/loop.c | 14 ++------------
drivers/block/loop.h | 1 -
2 files changed, 2 insertions(+), 13 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ee62d2d517bf..2578a39640b8 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -463,15 +463,6 @@ static void lo_complete_rq(struct request *rq)
blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
}
-static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
-{
- if (!atomic_dec_and_test(&cmd->ref))
- return;
- kfree(cmd->bvec);
- cmd->bvec = NULL;
- blk_mq_complete_request(cmd->rq);
-}
-
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
@@ -479,7 +470,7 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
if (cmd->css)
css_put(cmd->css);
cmd->ret = ret;
- lo_rw_aio_do_completion(cmd);
+ blk_mq_complete_request(cmd->rq);
}
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
@@ -527,7 +518,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
segments = bio_segments(bio);
}
- atomic_set(&cmd->ref, 2);
iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
segments, blk_rq_bytes(rq));
@@ -545,7 +535,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
else
ret = call_read_iter(file, &cmd->iocb, &iter);
- lo_rw_aio_do_completion(cmd);
+ kfree(cmd->bvec);
kthread_associate_blkcg(NULL);
if (ret != -EIOCBQUEUED)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 0f45416e4fcf..ba65848c7c33 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -68,7 +68,6 @@ struct loop_cmd {
struct kthread_work work;
struct request *rq;
bool use_aio; /* use AIO interface to handle I/O */
- atomic_t ref; /* only for aio */
long ret;
struct kiocb iocb;
struct bio_vec *bvec;
--
2.14.2
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
` (6 preceding siblings ...)
2018-03-20 8:47 ` Christoph Hellwig
@ 2018-03-20 20:41 ` Andrei Vagin
2018-03-21 7:23 ` Christoph Hellwig
` (3 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Andrei Vagin @ 2018-03-20 20:41 UTC (permalink / raw)
To: target-devel
On Tue, Mar 20, 2018 at 01:47:01AM -0700, Christoph Hellwig wrote:
> On Tue, Mar 20, 2018 at 12:54:58AM -0700, Andrei Vagin wrote:
> > commit 92d773324b7edbd36bf0c28c1e0157763aeccc92
> > Author: Shaohua Li <shli@fb.com>
> > Date: Fri Sep 1 11:15:17 2017 -0700
> >
> > block/loop: fix use after free
> >
> > lo_rw_aio->call_read_iter->
> > 1 aops->direct_IO
> > 2 iov_iter_revert
> > lo_rw_aio_complete could happen between 1 and 2, the bio and bvec
> > could
> > be freed before 2, which accesses bvec.
> >
> > Signed-off-by: Shaohua Li <shli@fb.com>
> > Signed-off-by: Jens Axboe <axboe@kernel.dk>
> >
> > This commit looks reasonable, doesn't it? In out case, bvec-s are
> > freed from the callback too.
>
> It looks completely bogus, but it papers over a valid issue. The
> iovec/bvec passed to ->read_iter/->write_iter is caller allocated
> and freed, so we should always free it after the calls. The actual
> I/O completion is separate from that. Below is the real fix for the
> loop driver:
I'm afraid this patch doesn't work, because we are not always allocate bvec,
sometimes we get it from bio. In this case, we have to call
blk_mq_complete_request after read_iter/write_iter.
[ 102.559165] =================================
[ 102.560396] BUG: KASAN: use-after-free in iov_iter_revert+0xa7/0x440
[ 102.561331] Read of size 4 at addr ffff880111704850 by task loop0/690
[ 102.562501] CPU: 1 PID: 690 Comm: loop0 Not tainted 4.16.0-rc6-00030-gcf309ac88b27-dirty #503
[ 102.563768] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
[ 102.564999] Call Trace:
[ 102.565370] dump_stack+0xda/0x16f
[ 102.565857] ? _atomic_dec_and_lock+0x101/0x101
[ 102.566508] ? del_timer_sync+0xac/0xd0
[ 102.567203] print_address_description+0x6a/0x270
[ 102.567963] kasan_report+0x258/0x380
[ 102.568572] ? iov_iter_revert+0xa7/0x440
[ 102.569243] iov_iter_revert+0xa7/0x440
[ 102.569879] generic_file_read_iter+0xfd0/0x12f0
[ 102.570646] ? match_held_lock+0x7f/0x340
[ 102.571257] ? __save_stack_trace+0x82/0x100
[ 102.571909] ? filemap_range_has_page+0x1b0/0x1b0
[ 102.572562] ? ret_from_fork+0x3a/0x50
[ 102.573102] ? fuse_simple_request+0x1a7/0x280
[ 102.573760] ? save_stack+0x89/0xb0
[ 102.574305] ? find_held_lock+0x6d/0xd0
[ 102.574985] ? fuse_change_attributes+0x243/0x350
[ 102.576072] ? lock_downgrade+0x320/0x320
[ 102.576855] ? map_id_range_down+0x19a/0x1c0
[ 102.577685] ? do_raw_spin_unlock+0x147/0x220
[ 102.578657] ? do_raw_spin_trylock+0x100/0x100
[ 102.579464] ? do_raw_write_trylock+0x100/0x100
[ 102.580304] ? _raw_spin_unlock+0x24/0x30
[ 102.581014] ? fuse_change_attributes+0x32e/0x350
[ 102.581854] ? fuse_change_attributes_common+0x300/0x300
[ 102.582810] ? fuse_simple_request+0x1a7/0x280
[ 102.583600] ? fuse_simple_request+0x1a7/0x280
[ 102.584415] ? fuse_do_getattr+0x27b/0x6e0
[ 102.585149] ? fuse_dentry_revalidate+0x640/0x640
[ 102.585913] ? find_held_lock+0x6d/0xd0
[ 102.586763] ? lock_release+0x4f0/0x4f0
[ 102.587634] ? match_held_lock+0x7f/0x340
[ 102.588854] lo_rw_aio+0x928/0xd20
[ 102.589517] ? save_trace+0x1e0/0x1e0
[ 102.590141] ? lo_compat_ioctl+0xe0/0xe0
[ 102.590888] ? clear_buddies+0x42/0x210
[ 102.591650] ? debug_check_no_locks_freed+0x1b0/0x1b0
[ 102.592611] ? finish_task_switch+0x181/0x500
[ 102.593496] ? do_raw_spin_unlock+0x147/0x220
[ 102.594387] loop_queue_work+0x10aa/0x1900
[ 102.595191] ? _raw_spin_unlock_irq+0x29/0x40
[ 102.596229] ? _raw_spin_unlock_irq+0x29/0x40
[ 102.597865] ? finish_task_switch+0x181/0x500
[ 102.599310] ? finish_task_switch+0x1d5/0x500
[ 102.600118] ? lo_rw_aio+0xd20/0xd20
[ 102.600760] ? set_load_weight+0x110/0x110
[ 102.601465] ? __switch_to_asm+0x34/0x70
[ 102.602165] ? __switch_to_asm+0x34/0x70
[ 102.602867] ? __switch_to_asm+0x40/0x70
[ 102.603571] ? __switch_to_asm+0x34/0x70
[ 102.604279] ? __switch_to_asm+0x40/0x70
[ 102.604982] ? __switch_to_asm+0x34/0x70
[ 102.605856] ? __switch_to_asm+0x40/0x70
[ 102.606698] ? __switch_to_asm+0x34/0x70
[ 102.607473] ? __switch_to_asm+0x34/0x70
[ 102.608206] ? __switch_to_asm+0x40/0x70
[ 102.608980] ? __switch_to_asm+0x34/0x70
[ 102.609877] ? __switch_to_asm+0x40/0x70
[ 102.610632] ? __switch_to_asm+0x34/0x70
[ 102.611368] ? __switch_to_asm+0x40/0x70
[ 102.612074] ? __schedule+0x50e/0x11b0
[ 102.612791] ? mark_held_locks+0x1c/0x90
[ 102.613503] ? _raw_spin_unlock_irq+0x29/0x40
[ 102.614282] ? match_held_lock+0x7f/0x340
[ 102.615001] ? save_trace+0x1e0/0x1e0
[ 102.615661] ? finish_task_switch+0x181/0x500
[ 102.616434] ? finish_task_switch+0x10d/0x500
[ 102.617203] ? __switch_to_asm+0x34/0x70
[ 102.617902] ? __switch_to_asm+0x34/0x70
[ 102.618609] ? set_load_weight+0x110/0x110
[ 102.619350] ? __switch_to_asm+0x34/0x70
[ 102.620136] ? __switch_to_asm+0x34/0x70
[ 102.620859] ? __switch_to_asm+0x40/0x70
[ 102.621584] ? find_held_lock+0x6d/0xd0
[ 102.622328] ? kthread_worker_fn+0x229/0x520
[ 102.623033] ? lock_downgrade+0x320/0x320
[ 102.623683] ? do_raw_spin_unlock+0x147/0x220
[ 102.624443] ? do_raw_spin_trylock+0x100/0x100
[ 102.625550] ? rcu_note_context_switch+0x4e0/0x4e0
[ 102.626597] ? match_held_lock+0x7f/0x340
[ 102.627390] ? mark_held_locks+0x1c/0x90
[ 102.628016] ? _raw_spin_unlock_irq+0x29/0x40
[ 102.628732] kthread_worker_fn+0x272/0x520
[ 102.629429] ? kthread+0x1e0/0x1e0
[ 102.630110] ? find_held_lock+0x6d/0xd0
[ 102.630847] ? lock_downgrade+0x320/0x320
[ 102.631554] ? __schedule+0x11b0/0x11b0
[ 102.632222] ? do_raw_spin_unlock+0x147/0x220
[ 102.633128] ? do_raw_spin_trylock+0x100/0x100
[ 102.634013] ? __lockdep_init_map+0x98/0x2a0
[ 102.634845] ? __init_waitqueue_head+0xbe/0xf0
[ 102.635703] ? mark_held_locks+0x1c/0x90
[ 102.636315] ? _raw_spin_unlock_irqrestore+0x32/0x60
[ 102.637063] ? loop_get_status64+0x100/0x100
[ 102.637695] kthread+0x1b7/0x1e0
[ 102.638243] ? kthread_create_worker_on_cpu+0xc0/0xc0
[ 102.639212] ret_from_fork+0x3a/0x50
[ 102.640354] Allocated by task 691:
[ 102.641024] kasan_kmalloc+0xa0/0xd0
[ 102.641718] kmem_cache_alloc+0xca/0x2d0
[ 102.642375] mempool_alloc+0x117/0x2f0
[ 102.642946] bio_alloc_bioset+0x34d/0x4a0
[ 102.643516] mpage_alloc.isra.8+0x45/0x110
[ 102.644082] do_mpage_readpage+0xa70/0xc60
[ 102.644664] mpage_readpages+0x2d1/0x400
[ 102.645209] __do_page_cache_readahead+0x528/0x740
[ 102.645873] force_page_cache_readahead+0x10b/0x170
[ 102.646560] generic_file_read_iter+0xeb2/0x12f0
[ 102.647198] __vfs_read+0x2a9/0x3c0
[ 102.647705] vfs_read+0xb7/0x1b0
[ 102.648161] SyS_read+0xb6/0x140
[ 102.648619] do_syscall_64+0x193/0x470
[ 102.649142] entry_SYSCALL_64_after_hwframe+0x42/0xb7
[ 102.650063] Freed by task 693:
[ 102.650502] __kasan_slab_free+0x136/0x180
[ 102.651074] kmem_cache_free+0xc6/0x310
[ 102.651711] bio_put+0xdb/0xf0
[ 102.652146] bio_endio+0x23b/0x400
[ 102.652672] blk_update_request+0x12a/0x660
[ 102.653252] blk_mq_end_request+0x26/0x80
[ 102.653826] flush_smp_call_function_queue+0x23d/0x350
[ 102.654544] smp_call_function_single_interrupt+0xdc/0x460
[ 102.655537] The buggy address belongs to the object at ffff8801117047c0
which belongs to the cache bio-0 of size 200
[ 102.657169] The buggy address is located 144 bytes inside of
200-byte region [ffff8801117047c0, ffff880111704888)
[ 102.658773] The buggy address belongs to the page:
[ 102.659459] page:ffffea000445c100 count:1 mapcount:0 mapping:0000000000000000 index:0x0 compound_mapcount: 0
[ 102.660947] flags: 0x2fffc000008100(slab|head)
[ 102.661724] raw: 002fffc000008100 0000000000000000 0000000000000000 0000000100150015
[ 102.662833] raw: dead000000000100 dead000000000200 ffff880119a58700 0000000000000000
[ 102.663908] page dumped because: kasan: bad access detected
[ 102.664948] Memory state around the buggy address:
[ 102.665626] ffff880111704700: fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 102.666814] ffff880111704780: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
[ 102.668278] >ffff880111704800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 102.669706] ^
[ 102.670707] ffff880111704880: fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 102.672119] ffff880111704900: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
[ 102.673514] =================================
>
> ---
> From 101f857c5b67492cfd75212d104699813f1bd345 Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Tue, 20 Mar 2018 09:35:55 +0100
> Subject: loop: remove loop_cmd refcounting
>
> Instead of introducing an unneeded refcount free the bvec after calling
> into ->read_iter and ->write_iter, which follows the the normal read/write
> path conventions.
>
> Fixes: 92d77332 ("block/loop: fix use after free")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> drivers/block/loop.c | 14 ++------------
> drivers/block/loop.h | 1 -
> 2 files changed, 2 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index ee62d2d517bf..2578a39640b8 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -463,15 +463,6 @@ static void lo_complete_rq(struct request *rq)
> blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
> }
>
> -static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
> -{
> - if (!atomic_dec_and_test(&cmd->ref))
> - return;
> - kfree(cmd->bvec);
> - cmd->bvec = NULL;
> - blk_mq_complete_request(cmd->rq);
> -}
> -
> static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
> {
> struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
> @@ -479,7 +470,7 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
> if (cmd->css)
> css_put(cmd->css);
> cmd->ret = ret;
> - lo_rw_aio_do_completion(cmd);
> + blk_mq_complete_request(cmd->rq);
> }
>
> static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> @@ -527,7 +518,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
> segments = bio_segments(bio);
> }
> - atomic_set(&cmd->ref, 2);
>
> iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
> segments, blk_rq_bytes(rq));
> @@ -545,7 +535,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> else
> ret = call_read_iter(file, &cmd->iocb, &iter);
>
> - lo_rw_aio_do_completion(cmd);
> + kfree(cmd->bvec);
> kthread_associate_blkcg(NULL);
>
> if (ret != -EIOCBQUEUED)
> diff --git a/drivers/block/loop.h b/drivers/block/loop.h
> index 0f45416e4fcf..ba65848c7c33 100644
> --- a/drivers/block/loop.h
> +++ b/drivers/block/loop.h
> @@ -68,7 +68,6 @@ struct loop_cmd {
> struct kthread_work work;
> struct request *rq;
> bool use_aio; /* use AIO interface to handle I/O */
> - atomic_t ref; /* only for aio */
> long ret;
> struct kiocb iocb;
> struct bio_vec *bvec;
> --
> 2.14.2
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
` (7 preceding siblings ...)
2018-03-20 20:41 ` Andrei Vagin
@ 2018-03-21 7:23 ` Christoph Hellwig
2018-03-21 18:16 ` Andrei Vagin
` (2 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2018-03-21 7:23 UTC (permalink / raw)
To: target-devel
> I'm afraid this patch doesn't work, because we are not always allocate bvec,
> sometimes we get it from bio. In this case, we have to call
> blk_mq_complete_request after read_iter/write_iter.
The issue there is that we really need to NULL ->bvec after freeing it.
Which normally is an anti-pattern but seems to be needed here (and warrants
a comment). Updated patch below:
From b8eed7302071023852c00f8296165c2e9bbc51f4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 20 Mar 2018 09:35:55 +0100
Subject: loop: remove loop_cmd refcounting
Instead of introducing an unneeded refcount free the bvec after calling
into ->read_iter and ->write_iter, which follows the the normal read/write
path conventions.
Fixes: 92d77332 ("block/loop: fix use after free")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/block/loop.c | 19 +++++++------------
drivers/block/loop.h | 1 -
2 files changed, 7 insertions(+), 13 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ee62d2d517bf..9cb6078cd4f0 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -463,15 +463,6 @@ static void lo_complete_rq(struct request *rq)
blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
}
-static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
-{
- if (!atomic_dec_and_test(&cmd->ref))
- return;
- kfree(cmd->bvec);
- cmd->bvec = NULL;
- blk_mq_complete_request(cmd->rq);
-}
-
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
@@ -479,7 +470,7 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
if (cmd->css)
css_put(cmd->css);
cmd->ret = ret;
- lo_rw_aio_do_completion(cmd);
+ blk_mq_complete_request(cmd->rq);
}
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
@@ -527,7 +518,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
segments = bio_segments(bio);
}
- atomic_set(&cmd->ref, 2);
iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
segments, blk_rq_bytes(rq));
@@ -545,7 +535,12 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
else
ret = call_read_iter(file, &cmd->iocb, &iter);
- lo_rw_aio_do_completion(cmd);
+ /*
+ * Clear bvec to NULL as lo_rw_aio only allocates and sets it for
+ * the multiple bios per request case and we want a clean slate here.
+ */
+ kfree(cmd->bvec);
+ cmd->bvec = NULL;
kthread_associate_blkcg(NULL);
if (ret != -EIOCBQUEUED)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 0f45416e4fcf..ba65848c7c33 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -68,7 +68,6 @@ struct loop_cmd {
struct kthread_work work;
struct request *rq;
bool use_aio; /* use AIO interface to handle I/O */
- atomic_t ref; /* only for aio */
long ret;
struct kiocb iocb;
struct bio_vec *bvec;
--
2.14.2
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
` (8 preceding siblings ...)
2018-03-21 7:23 ` Christoph Hellwig
@ 2018-03-21 18:16 ` Andrei Vagin
2018-03-21 18:47 ` Christoph Hellwig
2018-03-21 20:25 ` Andrei Vagin
11 siblings, 0 replies; 13+ messages in thread
From: Andrei Vagin @ 2018-03-21 18:16 UTC (permalink / raw)
To: target-devel
On Wed, Mar 21, 2018 at 12:23:40AM -0700, Christoph Hellwig wrote:
> > I'm afraid this patch doesn't work, because we are not always allocate bvec,
> > sometimes we get it from bio. In this case, we have to call
> > blk_mq_complete_request after read_iter/write_iter.
>
> The issue there is that we really need to NULL ->bvec after freeing it.
> Which normally is an anti-pattern but seems to be needed here (and warrants
> a comment). Updated patch below:
At first, I thought so too, but if we look at the kasan output, we can
find that this bvec was allocated in bio_alloc_bioset().
[ 102.640354] Allocated by task 691:
[ 102.641024] kasan_kmalloc+0xa0/0xd0
[ 102.641718] kmem_cache_alloc+0xca/0x2d0
[ 102.642375] mempool_alloc+0x117/0x2f0
[ 102.642946] bio_alloc_bioset+0x34d/0x4a0
[ 102.643516] mpage_alloc.isra.8+0x45/0x110
[ 102.644082] do_mpage_readpage+0xa70/0xc60
[ 102.644664] mpage_readpages+0x2d1/0x400
[ 102.645209] __do_page_cache_readahead+0x528/0x740
[ 102.645873] force_page_cache_readahead+0x10b/0x170
[ 102.646560] generic_file_read_iter+0xeb2/0x12f0
[ 102.647198] __vfs_read+0x2a9/0x3c0
[ 102.647705] vfs_read+0xb7/0x1b0
[ 102.648161] SyS_read+0xb6/0x140
[ 102.648619] do_syscall_64+0x193/0x470
[ 102.649142] entry_SYSCALL_64_after_hwframe+0x42/0xb7
[ 102.650063] Freed by task 693:
[ 102.650502] __kasan_slab_free+0x136/0x180
[ 102.651074] kmem_cache_free+0xc6/0x310
[ 102.651711] bio_put+0xdb/0xf0
[ 102.652146] bio_endio+0x23b/0x400
[ 102.652672] blk_update_request+0x12a/0x660
[ 102.653252] blk_mq_end_request+0x26/0x80
[ 102.653826] flush_smp_call_function_queue+0x23d/0x350
[ 102.654544] smp_call_function_single_interrupt+0xdc/0x460
If we look at lo_rw_aio, we can find that bvec can be allocated or got
from bio. I think the problem with the second case.
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
loff_t pos, bool rw)
{
...
if (rq->bio != rq->biotail) {
...
bvec = kmalloc(sizeof(struct bio_vec) * segments,
GFP_NOIO);
...
} else {
...
bvec = __bvec_iter_bvec(bio->bi_io_vec,
bio->bi_iter);
BTW: I tried your patch and I still get the same kasan warning.
>
>
> From b8eed7302071023852c00f8296165c2e9bbc51f4 Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Tue, 20 Mar 2018 09:35:55 +0100
> Subject: loop: remove loop_cmd refcounting
>
> Instead of introducing an unneeded refcount free the bvec after calling
> into ->read_iter and ->write_iter, which follows the the normal read/write
> path conventions.
>
> Fixes: 92d77332 ("block/loop: fix use after free")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> drivers/block/loop.c | 19 +++++++------------
> drivers/block/loop.h | 1 -
> 2 files changed, 7 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index ee62d2d517bf..9cb6078cd4f0 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -463,15 +463,6 @@ static void lo_complete_rq(struct request *rq)
> blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
> }
>
> -static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
> -{
> - if (!atomic_dec_and_test(&cmd->ref))
> - return;
> - kfree(cmd->bvec);
> - cmd->bvec = NULL;
You can remove bvec from loop_cmd, it was only a place out of lo_rw_aio
where it is used.
> - blk_mq_complete_request(cmd->rq);
> -}
> -
> static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
> {
> struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
> @@ -479,7 +470,7 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
> if (cmd->css)
> css_put(cmd->css);
> cmd->ret = ret;
> - lo_rw_aio_do_completion(cmd);
> + blk_mq_complete_request(cmd->rq);
> }
>
> static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> @@ -527,7 +518,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
> segments = bio_segments(bio);
> }
> - atomic_set(&cmd->ref, 2);
>
> iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
> segments, blk_rq_bytes(rq));
> @@ -545,7 +535,12 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> else
> ret = call_read_iter(file, &cmd->iocb, &iter);
>
> - lo_rw_aio_do_completion(cmd);
> + /*
> + * Clear bvec to NULL as lo_rw_aio only allocates and sets it for
> + * the multiple bios per request case and we want a clean slate here.
> + */
> + kfree(cmd->bvec);
> + cmd->bvec = NULL;
> kthread_associate_blkcg(NULL);
>
> if (ret != -EIOCBQUEUED)
> diff --git a/drivers/block/loop.h b/drivers/block/loop.h
> index 0f45416e4fcf..ba65848c7c33 100644
> --- a/drivers/block/loop.h
> +++ b/drivers/block/loop.h
> @@ -68,7 +68,6 @@ struct loop_cmd {
> struct kthread_work work;
> struct request *rq;
> bool use_aio; /* use AIO interface to handle I/O */
> - atomic_t ref; /* only for aio */
> long ret;
> struct kiocb iocb;
> struct bio_vec *bvec;
> --
> 2.14.2
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
` (9 preceding siblings ...)
2018-03-21 18:16 ` Andrei Vagin
@ 2018-03-21 18:47 ` Christoph Hellwig
2018-03-21 20:25 ` Andrei Vagin
11 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2018-03-21 18:47 UTC (permalink / raw)
To: target-devel
On Wed, Mar 21, 2018 at 11:16:09AM -0700, Andrei Vagin wrote:
> If we look at lo_rw_aio, we can find that bvec can be allocated or got
> from bio. I think the problem with the second case.
>
> static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> loff_t pos, bool rw)
> {
> ...
> if (rq->bio != rq->biotail) {
> ...
> bvec = kmalloc(sizeof(struct bio_vec) * segments,
> GFP_NOIO);
> ...
> } else {
> ...
> bvec = __bvec_iter_bvec(bio->bi_io_vec,
> bio->bi_iter);
That is the local 'bvec' variable, not cmd->bvec which is only
assigned to the kmalloc return value.
> BTW: I tried your patch and I still get the same kasan warning.
Strange, as the trace can't really happen with the patch applied.
In fact the ->bvec field isn't even needed, e.g. we should do something
like this on top of the previous patch:
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 9cb6078cd4f0..0401fade3d60 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -477,7 +477,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
loff_t pos, bool rw)
{
struct iov_iter iter;
- struct bio_vec *bvec;
+ struct bio_vec *bvec, *alloc_bvec = NULL;
struct request *rq = cmd->rq;
struct bio *bio = rq->bio;
struct file *file = lo->lo_backing_file;
@@ -491,10 +491,10 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
__rq_for_each_bio(bio, rq)
segments += bio_segments(bio);
- bvec = kmalloc(sizeof(struct bio_vec) * segments, GFP_NOIO);
- if (!bvec)
+ alloc_bvec = kmalloc_array(segments,
+ sizeof(struct bio_vec), GFP_NOIO);
+ if (!alloc_bvec)
return -EIO;
- cmd->bvec = bvec;
/*
* The bios of the request may be started from the middle of
@@ -502,11 +502,12 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
* copy bio->bi_iov_vec to new bvec. The rq_for_each_segment
* API will take care of all details for us.
*/
+ bvec = alloc_bvec;
rq_for_each_segment(tmp, rq, iter) {
*bvec = tmp;
bvec++;
}
- bvec = cmd->bvec;
+ bvec = alloc_bvec;
offset = 0;
} else {
/*
@@ -535,12 +536,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
else
ret = call_read_iter(file, &cmd->iocb, &iter);
- /*
- * Clear bvec to NULL as lo_rw_aio only allocates and sets it for
- * the multiple bios per request case and we want a clean slate here.
- */
- kfree(cmd->bvec);
- cmd->bvec = NULL;
+ kfree(alloc_bvec);
kthread_associate_blkcg(NULL);
if (ret != -EIOCBQUEUED)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index ba65848c7c33..b8eef1977f64 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -70,7 +70,6 @@ struct loop_cmd {
bool use_aio; /* use AIO interface to handle I/O */
long ret;
struct kiocb iocb;
- struct bio_vec *bvec;
struct cgroup_subsys_state *css;
};
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] [RFC] target/file: add support of direct and async I/O
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
` (10 preceding siblings ...)
2018-03-21 18:47 ` Christoph Hellwig
@ 2018-03-21 20:25 ` Andrei Vagin
11 siblings, 0 replies; 13+ messages in thread
From: Andrei Vagin @ 2018-03-21 20:25 UTC (permalink / raw)
To: target-devel
On Wed, Mar 21, 2018 at 11:47:02AM -0700, Christoph Hellwig wrote:
> On Wed, Mar 21, 2018 at 11:16:09AM -0700, Andrei Vagin wrote:
> > If we look at lo_rw_aio, we can find that bvec can be allocated or got
> > from bio. I think the problem with the second case.
> >
> > static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> > loff_t pos, bool rw)
> > {
> > ...
> > if (rq->bio != rq->biotail) {
> > ...
> > bvec = kmalloc(sizeof(struct bio_vec) * segments,
> > GFP_NOIO);
> > ...
> > } else {
> > ...
> > bvec = __bvec_iter_bvec(bio->bi_io_vec,
> > bio->bi_iter);
>
> That is the local 'bvec' variable, not cmd->bvec which is only
> assigned to the kmalloc return value.
>
> > BTW: I tried your patch and I still get the same kasan warning.
>
> Strange, as the trace can't really happen with the patch applied.
In the trace, we see that bvec is accessed from iov_iter_revert(), but it
has been already released. This means than bvec was not allocated in
lo_rw_aio(). In the third trace, we can see that this bvec was released
in lo_rw_aio_complete()->bio_endio(). This means that we used bvec from
bio.
[ 101.436741] Freed by task 693:
[ 101.437187] __kasan_slab_free+0x136/0x180
[ 101.437790] kmem_cache_free+0xc6/0x310
[ 101.438360] bio_put+0xdb/0xf0
[ 101.438844] bio_endio+0x23b/0x400
[ 101.439385] blk_update_request+0x12a/0x660
[ 101.440139] blk_mq_end_request+0x26/0x80
[ 101.440724] __blk_mq_complete_request+0x30a/0x400
[ 101.441406] blk_mq_complete_request+0x110/0x160
[ 101.442071] lo_rw_aio_complete+0xbe/0x240
[ 101.442664] fuse_aio_complete+0x177/0x220
Bellow you can find traces from the kernel with two last patches.
[ 101.335363] =================================
[ 101.339372] BUG: KASAN: use-after-free in iov_iter_revert+0xa7/0x440
[ 101.340612] Read of size 4 at addr ffff8800b95003d0 by task loop0/697
[ 101.342050] CPU: 0 PID: 697 Comm: loop0 Not tainted 4.16.0-rc6-00035-g5b7b2f4a8aba #510
[ 101.343481] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
[ 101.344762] Call Trace:
[ 101.345140] dump_stack+0xda/0x16f
[ 101.345669] ? _atomic_dec_and_lock+0x101/0x101
[ 101.346394] ? del_timer_sync+0xac/0xd0
[ 101.347157] print_address_description+0x6a/0x270
[ 101.348014] kasan_report+0x258/0x380
[ 101.348671] ? iov_iter_revert+0xa7/0x440
[ 101.349511] iov_iter_revert+0xa7/0x440
[ 101.350362] generic_file_read_iter+0xfd0/0x12f0
[ 101.351224] ? match_held_lock+0x7f/0x340
[ 101.351958] ? __save_stack_trace+0x82/0x100
[ 101.352883] ? filemap_range_has_page+0x1b0/0x1b0
[ 101.353788] ? ret_from_fork+0x3a/0x50
[ 101.354716] ? fuse_simple_request+0x1a7/0x280
[ 101.355966] ? save_stack+0x89/0xb0
[ 101.356918] ? find_held_lock+0x6d/0xd0
[ 101.357735] ? fuse_change_attributes+0x243/0x350
[ 101.358676] ? lock_downgrade+0x320/0x320
[ 101.360030] ? map_id_range_down+0x19a/0x1c0
[ 101.361880] ? do_raw_spin_unlock+0x147/0x220
[ 101.363089] ? do_raw_spin_trylock+0x100/0x100
[ 101.363960] ? do_raw_write_trylock+0x100/0x100
[ 101.364816] ? _raw_spin_unlock+0x24/0x30
[ 101.365644] ? fuse_change_attributes+0x32e/0x350
[ 101.366567] ? fuse_change_attributes_common+0x300/0x300
[ 101.367539] ? fuse_simple_request+0x1a7/0x280
[ 101.368339] ? fuse_simple_request+0x1a7/0x280
[ 101.369173] ? fuse_do_getattr+0x27b/0x6e0
[ 101.369950] ? fuse_dentry_revalidate+0x640/0x640
[ 101.370822] ? find_held_lock+0x6d/0xd0
[ 101.371638] ? lock_release+0x4f0/0x4f0
[ 101.372351] ? match_held_lock+0x7f/0x340
[ 101.373130] lo_rw_aio+0x942/0xd10
[ 101.373841] ? lo_rw_aio_complete+0x240/0x240
[ 101.374665] ? match_held_lock+0x7f/0x340
[ 101.375413] ? clear_buddies+0x42/0x210
[ 101.376330] ? debug_check_no_locks_freed+0x1b0/0x1b0
[ 101.377266] ? finish_task_switch+0x181/0x500
[ 101.378099] ? do_raw_spin_unlock+0x147/0x220
[ 101.378872] loop_queue_work+0x10aa/0x1900
[ 101.379635] ? _raw_spin_unlock_irq+0x29/0x40
[ 101.380268] ? _raw_spin_unlock_irq+0x29/0x40
[ 101.380902] ? finish_task_switch+0x181/0x500
[ 101.381534] ? finish_task_switch+0x10d/0x500
[ 101.382165] ? lo_compat_ioctl+0xe0/0xe0
[ 101.382754] ? set_load_weight+0x110/0x110
[ 101.383503] ? __switch_to_asm+0x34/0x70
[ 101.384211] ? __switch_to_asm+0x34/0x70
[ 101.384920] ? __switch_to_asm+0x40/0x70
[ 101.385628] ? __switch_to_asm+0x34/0x70
[ 101.386713] ? __switch_to_asm+0x40/0x70
[ 101.387564] ? __switch_to_asm+0x34/0x70
[ 101.389006] ? __switch_to_asm+0x40/0x70
[ 101.389927] ? __switch_to_asm+0x34/0x70
[ 101.390752] ? __switch_to_asm+0x40/0x70
[ 101.391522] ? __switch_to_asm+0x34/0x70
[ 101.392687] ? __switch_to_asm+0x40/0x70
[ 101.394136] ? __switch_to_asm+0x34/0x70
[ 101.394955] ? __switch_to_asm+0x40/0x70
[ 101.395532] ? __switch_to_asm+0x34/0x70
[ 101.396092] ? __switch_to_asm+0x40/0x70
[ 101.396673] ? __schedule+0x50e/0x11b0
[ 101.397241] ? mark_held_locks+0x1c/0x90
[ 101.397896] ? _raw_spin_unlock_irq+0x29/0x40
[ 101.398839] ? match_held_lock+0x7f/0x340
[ 101.399564] ? save_trace+0x1e0/0x1e0
[ 101.400233] ? finish_task_switch+0x181/0x500
[ 101.401026] ? finish_task_switch+0x10d/0x500
[ 101.401818] ? __switch_to_asm+0x34/0x70
[ 101.402536] ? __switch_to_asm+0x34/0x70
[ 101.403255] ? set_load_weight+0x110/0x110
[ 101.403996] ? __switch_to_asm+0x34/0x70
[ 101.404730] ? __switch_to_asm+0x34/0x70
[ 101.405424] ? __switch_to_asm+0x40/0x70
[ 101.406150] ? find_held_lock+0x6d/0xd0
[ 101.406869] ? kthread_worker_fn+0x229/0x520
[ 101.407635] ? lock_downgrade+0x320/0x320
[ 101.408356] ? do_raw_spin_unlock+0x147/0x220
[ 101.409316] ? do_raw_spin_trylock+0x100/0x100
[ 101.409969] ? rcu_note_context_switch+0x4e0/0x4e0
[ 101.410729] ? match_held_lock+0x7f/0x340
[ 101.411440] ? mark_held_locks+0x1c/0x90
[ 101.412150] ? _raw_spin_unlock_irq+0x29/0x40
[ 101.412959] kthread_worker_fn+0x272/0x520
[ 101.413648] ? kthread+0x1e0/0x1e0
[ 101.414482] ? find_held_lock+0x6d/0xd0
[ 101.415400] ? lock_downgrade+0x320/0x320
[ 101.416319] ? __schedule+0x11b0/0x11b0
[ 101.417130] ? do_raw_spin_unlock+0x147/0x220
[ 101.417847] ? do_raw_spin_trylock+0x100/0x100
[ 101.418560] ? __lockdep_init_map+0x98/0x2a0
[ 101.419446] ? __init_waitqueue_head+0xbe/0xf0
[ 101.420333] ? mark_held_locks+0x1c/0x90
[ 101.421444] ? _raw_spin_unlock_irqrestore+0x32/0x60
[ 101.422726] ? loop_get_status64+0x100/0x100
[ 101.423629] kthread+0x1b7/0x1e0
[ 101.424212] ? kthread_create_worker_on_cpu+0xc0/0xc0
[ 101.425137] ret_from_fork+0x3a/0x50
[ 101.426481] Allocated by task 699:
[ 101.427260] kasan_kmalloc+0xa0/0xd0
[ 101.427986] kmem_cache_alloc+0xca/0x2d0
[ 101.428659] mempool_alloc+0x117/0x2f0
[ 101.429330] bio_alloc_bioset+0x34d/0x4a0
[ 101.430147] mpage_alloc.isra.8+0x45/0x110
[ 101.430652] do_mpage_readpage+0xa70/0xc60
[ 101.431158] mpage_readpages+0x2d1/0x400
[ 101.431688] __do_page_cache_readahead+0x528/0x740
[ 101.432381] force_page_cache_readahead+0x10b/0x170
[ 101.433091] generic_file_read_iter+0xeb2/0x12f0
[ 101.433763] __vfs_read+0x2a9/0x3c0
[ 101.434288] vfs_read+0xb7/0x1b0
[ 101.434759] SyS_read+0xb6/0x140
[ 101.435231] do_syscall_64+0x193/0x470
[ 101.435786] entry_SYSCALL_64_after_hwframe+0x42/0xb7
[ 101.436741] Freed by task 693:
[ 101.437187] __kasan_slab_free+0x136/0x180
[ 101.437790] kmem_cache_free+0xc6/0x310
[ 101.438360] bio_put+0xdb/0xf0
[ 101.438844] bio_endio+0x23b/0x400
[ 101.439385] blk_update_request+0x12a/0x660
[ 101.440139] blk_mq_end_request+0x26/0x80
[ 101.440724] __blk_mq_complete_request+0x30a/0x400
[ 101.441406] blk_mq_complete_request+0x110/0x160
[ 101.442071] lo_rw_aio_complete+0xbe/0x240
[ 101.442664] fuse_aio_complete+0x177/0x220
[ 101.443241] request_end+0x237/0x470
[ 101.443814] fuse_dev_do_write+0x1325/0x1910
[ 101.444477] fuse_dev_write+0x11d/0x180
[ 101.445037] do_iter_readv_writev+0x2a5/0x370
[ 101.445663] do_iter_write+0xd1/0x280
[ 101.446187] vfs_writev+0x145/0x230
[ 101.446690] do_writev+0xc0/0x1b0
[ 101.447170] do_syscall_64+0x193/0x470
[ 101.447711] entry_SYSCALL_64_after_hwframe+0x42/0xb7
[ 101.448672] The buggy address belongs to the object at ffff8800b9500340
which belongs to the cache bio-0 of size 200
[ 101.450534] The buggy address is located 144 bytes inside of
200-byte region [ffff8800b9500340, ffff8800b9500408)
[ 101.452229] The buggy address belongs to the page:
[ 101.452939] page:ffffea0002e54000 count:1 mapcount:0 mapping:0000000000000000 index:0xffff8800b95004c0 compound_mapcount: 0
[ 101.454468] flags: 0x1fffc000008100(slab|head)
[ 101.455149] raw: 001fffc000008100 0000000000000000 ffff8800b95004c0 0000000100150006
[ 101.456525] raw: ffffea00045105a0 ffff8801198e2580 ffff880119a58700 0000000000000000
[ 101.457560] page dumped because: kasan: bad access detected
[ 101.458526] Memory state around the buggy address:
[ 101.459174] ffff8800b9500280: fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 101.460319] ffff8800b9500300: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
[ 101.461412] >ffff8800b9500380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 101.462647] ^
[ 101.463898] ffff8800b9500400: fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 101.465217] ffff8800b9500480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 101.466250] =================================
[ 101.467443] Disabling lock debugging due to kernel taint
> In fact the ->bvec field isn't even needed, e.g. we should do something
> like this on top of the previous patch:
I said this in a previous mail ;)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 9cb6078cd4f0..0401fade3d60 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -477,7 +477,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> loff_t pos, bool rw)
> {
> struct iov_iter iter;
> - struct bio_vec *bvec;
> + struct bio_vec *bvec, *alloc_bvec = NULL;
> struct request *rq = cmd->rq;
> struct bio *bio = rq->bio;
> struct file *file = lo->lo_backing_file;
> @@ -491,10 +491,10 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
>
> __rq_for_each_bio(bio, rq)
> segments += bio_segments(bio);
> - bvec = kmalloc(sizeof(struct bio_vec) * segments, GFP_NOIO);
> - if (!bvec)
> + alloc_bvec = kmalloc_array(segments,
> + sizeof(struct bio_vec), GFP_NOIO);
> + if (!alloc_bvec)
> return -EIO;
> - cmd->bvec = bvec;
>
> /*
> * The bios of the request may be started from the middle of
> @@ -502,11 +502,12 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment
> * API will take care of all details for us.
> */
> + bvec = alloc_bvec;
> rq_for_each_segment(tmp, rq, iter) {
> *bvec = tmp;
> bvec++;
> }
> - bvec = cmd->bvec;
> + bvec = alloc_bvec;
> offset = 0;
> } else {
> /*
> @@ -535,12 +536,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> else
> ret = call_read_iter(file, &cmd->iocb, &iter);
>
> - /*
> - * Clear bvec to NULL as lo_rw_aio only allocates and sets it for
> - * the multiple bios per request case and we want a clean slate here.
> - */
> - kfree(cmd->bvec);
> - cmd->bvec = NULL;
> + kfree(alloc_bvec);
> kthread_associate_blkcg(NULL);
>
> if (ret != -EIOCBQUEUED)
> diff --git a/drivers/block/loop.h b/drivers/block/loop.h
> index ba65848c7c33..b8eef1977f64 100644
> --- a/drivers/block/loop.h
> +++ b/drivers/block/loop.h
> @@ -70,7 +70,6 @@ struct loop_cmd {
> bool use_aio; /* use AIO interface to handle I/O */
> long ret;
> struct kiocb iocb;
> - struct bio_vec *bvec;
> struct cgroup_subsys_state *css;
> };
>
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2018-03-21 20:25 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-09 0:42 [PATCH] [RFC] target/file: add support of direct and async I/O Andrei Vagin
2018-03-15 14:26 ` Bryant G. Ly
2018-03-15 17:04 ` Andrei Vagin
2018-03-16 7:50 ` Christoph Hellwig
2018-03-17 0:13 ` Andrei Vagin
2018-03-17 8:25 ` Christoph Hellwig
2018-03-20 7:54 ` Andrei Vagin
2018-03-20 8:47 ` Christoph Hellwig
2018-03-20 20:41 ` Andrei Vagin
2018-03-21 7:23 ` Christoph Hellwig
2018-03-21 18:16 ` Andrei Vagin
2018-03-21 18:47 ` Christoph Hellwig
2018-03-21 20:25 ` Andrei Vagin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.