* [PATCH v3] virtio_blk: add DISCARD and WRIET ZEROES command support
@ 2018-03-30 0:49 Changpeng Liu
2018-03-30 1:07 ` Michael S. Tsirkin
0 siblings, 1 reply; 3+ messages in thread
From: Changpeng Liu @ 2018-03-30 0:49 UTC (permalink / raw)
To: virtualization
Cc: james.r.harris, pawelx.wodkowski, mst, fabio.miranda.martins,
stefanha, pbonzini, cavery, changpeng.liu
Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES
command support, this will impact the performance when using SSD
backend over file systems.
The idea here is using 16 Bytes payload as one descriptor for
DISCARD/WRITE ZEROES command, users can put several ranges into
one command, for the purpose to support such feature, two feature
flags VIRTIO_BLK_F_DISCARD/VIRTIO_BLK_F_WRITE_ZEROES and two
commands VIRTIO_BLK_T_DISCARD/VIRTIO_BLK_T_WRITE_ZEROES are
introduced, and some parameters are added to the configuration
space to tell the OS the granularity of DISCARD/WRITE ZEROES
commands.
The specification change list here:
https://github.com/oasis-tcs/virtio-spec
CHANGELOG:
v3: finalized the specification change.
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
---
drivers/block/virtio_blk.c | 96 +++++++++++++++++++++++++++++++++++++++--
include/uapi/linux/virtio_blk.h | 39 +++++++++++++++++
2 files changed, 132 insertions(+), 3 deletions(-)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4a07593c..1943adb 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -172,10 +172,53 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
}
+static inline int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
+{
+ unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
+ u32 block_size = queue_logical_block_size(req->q);
+ struct virtio_blk_discard_write_zeroes *range;
+ struct bio *bio;
+
+ if (block_size < 512 || !block_size)
+ return -1;
+
+ range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
+ if (!range)
+ return -1;
+
+ __rq_for_each_bio(bio, req) {
+ u64 sector = bio->bi_iter.bi_sector;
+ u32 num_sectors = bio->bi_iter.bi_size >> 9;
+
+ range[n].flags.unmap = cpu_to_le32(unmap);
+ range[n].flags.reserved = cpu_to_le32(0);
+ range[n].num_sectors = cpu_to_le32(num_sectors);
+ range[n].sector = cpu_to_le64(sector);
+ n++;
+ }
+
+ if (WARN_ON_ONCE(n != segments)) {
+ kfree(range);
+ return -1;
+ }
+
+ req->special_vec.bv_page = virt_to_page(range);
+ req->special_vec.bv_offset = offset_in_page(range);
+ req->special_vec.bv_len = sizeof(*range) * segments;
+ req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+ return 0;
+}
+
static inline void virtblk_request_done(struct request *req)
{
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ kfree(page_address(req->special_vec.bv_page) +
+ req->special_vec.bv_offset);
+ }
+
switch (req_op(req)) {
case REQ_OP_SCSI_IN:
case REQ_OP_SCSI_OUT:
@@ -225,6 +268,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
int qid = hctx->queue_num;
int err;
bool notify = false;
+ bool unmap = false;
u32 type;
BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
@@ -237,6 +281,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
case REQ_OP_FLUSH:
type = VIRTIO_BLK_T_FLUSH;
break;
+ case REQ_OP_DISCARD:
+ type = VIRTIO_BLK_T_DISCARD;
+ break;
+ case REQ_OP_WRITE_ZEROES:
+ type = VIRTIO_BLK_T_WRITE_ZEROES;
+ unmap = !(req->cmd_flags & REQ_NOUNMAP);
+ break;
case REQ_OP_SCSI_IN:
case REQ_OP_SCSI_OUT:
type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,9 +307,16 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(req);
+ if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
+ err = virtblk_setup_discard_write_zeroes(req, unmap);
+ if (err)
+ return BLK_STS_IOERR;
+ }
+
num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
if (num) {
- if (rq_data_dir(req) == WRITE)
+ if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD ||
+ type == VIRTIO_BLK_T_WRITE_ZEROES)
vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
else
vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
@@ -777,6 +835,38 @@ static int virtblk_probe(struct virtio_device *vdev)
if (!err && opt_io_size)
blk_queue_io_opt(q, blk_size * opt_io_size);
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+ q->limits.discard_granularity = blk_size;
+
+ virtio_cread(vdev, struct virtio_blk_config, discard_sector_alignment, &v);
+ if (v)
+ q->limits.discard_alignment = v << 9;
+ else
+ q->limits.discard_alignment = 0;
+
+ virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v);
+ if (v)
+ blk_queue_max_discard_sectors(q, v);
+ else
+ blk_queue_max_discard_sectors(q, UINT_MAX);
+
+ virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, &v);
+ if (v)
+ blk_queue_max_discard_segments(q, v);
+ else
+ blk_queue_max_discard_segments(q, USHRT_MAX);
+
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ }
+
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
+ virtio_cread(vdev, struct virtio_blk_config, max_write_zeroes_sectors, &v);
+ if (v)
+ blk_queue_max_write_zeroes_sectors(q, v);
+ else
+ blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
+ }
+
virtblk_update_capacity(vblk, false);
virtio_device_ready(vdev);
@@ -885,14 +975,14 @@ static int virtblk_restore(struct virtio_device *vdev)
VIRTIO_BLK_F_SCSI,
#endif
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
- VIRTIO_BLK_F_MQ,
+ VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
}
;
static unsigned int features[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
- VIRTIO_BLK_F_MQ,
+ VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
};
static struct virtio_driver virtio_blk = {
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ebe4d9..78a60e6 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -38,6 +38,8 @@
#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */
#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD command is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES command is supported */
/* Legacy feature bits */
#ifndef VIRTIO_BLK_NO_LEGACY
@@ -86,6 +88,22 @@ struct virtio_blk_config {
/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
__u16 num_queues;
+ /* The maximum discard segment size (if VIRTIO_BLK_F_DISCARD) */
+ __u32 max_discard_sectors;
+ /* The maximum number of discard segments (if VIRTIO_BLK_F_DISCARD) */
+ __u32 max_discard_seg;
+ /* The sector alignment for discard (if VIRTIO_BLK_F_DISCARD) */
+ __u32 discard_sector_alignment;
+ /* The maximum number of write zeroes sectors (if VIRTIO_BLK_F_WRITE_ZEROES) */
+ __u32 max_write_zeroes_sectors;
+ /* The maximum number of write zeroes segments (if VIRTIO_BLK_F_WRITE_ZEROES) */
+ __u32 max_write_zeroes_seg;
+ /* Device clear this bit when write zeroes command cannot result in
+ * deallocating one or more sectors
+ * (if VIRTIO_BLK_F_WRITE_ZEROES with unmap bit)
+ */
+ __u8 write_zeroes_may_unmap;
+ __u8 unused1[3];
} __attribute__((packed));
/*
@@ -114,6 +132,12 @@ struct virtio_blk_config {
/* Get device ID command */
#define VIRTIO_BLK_T_GET_ID 8
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD 11
+
+/* Write zeroes command */
+#define VIRTIO_BLK_T_WRITE_ZEROES 13
+
#ifndef VIRTIO_BLK_NO_LEGACY
/* Barrier before this op. */
#define VIRTIO_BLK_T_BARRIER 0x80000000
@@ -133,6 +157,21 @@ struct virtio_blk_outhdr {
__virtio64 sector;
};
+/*
+ * discard/write zeroes range for each request.
+ */
+struct virtio_blk_discard_write_zeroes {
+ /* discard/write zeroes start sector */
+ __virtio64 sector;
+ /* number of discard/write zeroes sectors */
+ __virtio32 num_sectors;
+ /* valid for write zeroes command */
+ struct {
+ __virtio32 unmap:1;
+ __virtio32 reserved:31;
+ } flags;
+};
+
#ifndef VIRTIO_BLK_NO_LEGACY
struct virtio_scsi_inhdr {
__virtio32 errors;
--
1.9.3
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v3] virtio_blk: add DISCARD and WRIET ZEROES command support
2018-03-30 0:49 [PATCH v3] virtio_blk: add DISCARD and WRIET ZEROES command support Changpeng Liu
@ 2018-03-30 1:07 ` Michael S. Tsirkin
2018-05-23 20:37 ` Liu, Changpeng
0 siblings, 1 reply; 3+ messages in thread
From: Michael S. Tsirkin @ 2018-03-30 1:07 UTC (permalink / raw)
To: Changpeng Liu
Cc: james.r.harris, pawelx.wodkowski, fabio.miranda.martins,
virtualization, stefanha, pbonzini, cavery
On Fri, Mar 30, 2018 at 08:49:34AM +0800, Changpeng Liu wrote:
> Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES
> command support, this will impact the performance when using SSD
> backend over file systems.
>
> The idea here is using 16 Bytes payload as one descriptor for
> DISCARD/WRITE ZEROES command, users can put several ranges into
> one command, for the purpose to support such feature, two feature
> flags VIRTIO_BLK_F_DISCARD/VIRTIO_BLK_F_WRITE_ZEROES and two
> commands VIRTIO_BLK_T_DISCARD/VIRTIO_BLK_T_WRITE_ZEROES are
> introduced, and some parameters are added to the configuration
> space to tell the OS the granularity of DISCARD/WRITE ZEROES
> commands.
Pls fix grammar in this comment, I am not sure what are you
trying to say.
>
> The specification change list here:
> https://github.com/oasis-tcs/virtio-spec
Which commit?
> CHANGELOG:
> v3: finalized the specification change.
Changelog belongs after -- and should be complete
including changes v1 to v2.
> Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> ---
> drivers/block/virtio_blk.c | 96 +++++++++++++++++++++++++++++++++++++++--
> include/uapi/linux/virtio_blk.h | 39 +++++++++++++++++
> 2 files changed, 132 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 4a07593c..1943adb 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -172,10 +172,53 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
> return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
> }
>
> +static inline int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
> +{
> + unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
Split on two lines pls:
unsigned short n = 0;
> + u32 block_size = queue_logical_block_size(req->q);
Seems to be unused except for the sanity check below. Why?
> + struct virtio_blk_discard_write_zeroes *range;
> + struct bio *bio;
> +
> + if (block_size < 512 || !block_size)
Why 512? And when is it 0?
> + return -1;
-1 isn't a normal errno code.
> +
> + range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
This might be pretty large: with 64K segments it looks like you are
trying to allocate ~1Mbyte with GFP_ATOMIC which is unlikely to succeed.
Can we split this up in chunks?
> + if (!range)
> + return -1;
> +
> + __rq_for_each_bio(bio, req) {
> + u64 sector = bio->bi_iter.bi_sector;
> + u32 num_sectors = bio->bi_iter.bi_size >> 9;
why 9?
> +
> + range[n].flags.unmap = cpu_to_le32(unmap);
> + range[n].flags.reserved = cpu_to_le32(0);
> + range[n].num_sectors = cpu_to_le32(num_sectors);
> + range[n].sector = cpu_to_le64(sector);
Isn't this causing sparse warnings?
> + n++;
> + }
> +
> + if (WARN_ON_ONCE(n != segments)) {
and when does this happen?
> + kfree(range);
> + return -1;
> + }
> +
> + req->special_vec.bv_page = virt_to_page(range);
> + req->special_vec.bv_offset = offset_in_page(range);
> + req->special_vec.bv_len = sizeof(*range) * segments;
> + req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> + return 0;
> +}
> +
> static inline void virtblk_request_done(struct request *req)
> {
> struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>
> + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> + kfree(page_address(req->special_vec.bv_page) +
> + req->special_vec.bv_offset);
> + }
> +
> switch (req_op(req)) {
> case REQ_OP_SCSI_IN:
> case REQ_OP_SCSI_OUT:
> @@ -225,6 +268,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> int qid = hctx->queue_num;
> int err;
> bool notify = false;
> + bool unmap = false;
> u32 type;
>
> BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> @@ -237,6 +281,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> case REQ_OP_FLUSH:
> type = VIRTIO_BLK_T_FLUSH;
> break;
> + case REQ_OP_DISCARD:
> + type = VIRTIO_BLK_T_DISCARD;
> + break;
> + case REQ_OP_WRITE_ZEROES:
> + type = VIRTIO_BLK_T_WRITE_ZEROES;
> + unmap = !(req->cmd_flags & REQ_NOUNMAP);
> + break;
> case REQ_OP_SCSI_IN:
> case REQ_OP_SCSI_OUT:
> type = VIRTIO_BLK_T_SCSI_CMD;
> @@ -256,9 +307,16 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>
> blk_mq_start_request(req);
>
> + if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
> + err = virtblk_setup_discard_write_zeroes(req, unmap);
> + if (err)
> + return BLK_STS_IOERR;
Does a failure actually indicate an IO error?
> + }
> +
> num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> if (num) {
> - if (rq_data_dir(req) == WRITE)
> + if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD ||
> + type == VIRTIO_BLK_T_WRITE_ZEROES)
> vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
> else
> vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
> @@ -777,6 +835,38 @@ static int virtblk_probe(struct virtio_device *vdev)
> if (!err && opt_io_size)
> blk_queue_io_opt(q, blk_size * opt_io_size);
>
> + if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> + q->limits.discard_granularity = blk_size;
> +
> + virtio_cread(vdev, struct virtio_blk_config, discard_sector_alignment, &v);
> + if (v)
> + q->limits.discard_alignment = v << 9;
> + else
> + q->limits.discard_alignment = 0;
> +
> + virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v);
> + if (v)
> + blk_queue_max_discard_sectors(q, v);
> + else
> + blk_queue_max_discard_sectors(q, UINT_MAX);
> +
> + virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, &v);
> + if (v)
> + blk_queue_max_discard_segments(q, v);
> + else
> + blk_queue_max_discard_segments(q, USHRT_MAX);
> +
> + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
> + }
> +
> + if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> + virtio_cread(vdev, struct virtio_blk_config, max_write_zeroes_sectors, &v);
> + if (v)
> + blk_queue_max_write_zeroes_sectors(q, v);
> + else
> + blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
> + }
> +
> virtblk_update_capacity(vblk, false);
> virtio_device_ready(vdev);
>
> @@ -885,14 +975,14 @@ static int virtblk_restore(struct virtio_device *vdev)
> VIRTIO_BLK_F_SCSI,
> #endif
> VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> - VIRTIO_BLK_F_MQ,
> + VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
> }
> ;
> static unsigned int features[] = {
> VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
> VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
> VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> - VIRTIO_BLK_F_MQ,
> + VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
> };
>
> static struct virtio_driver virtio_blk = {
> diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> index 9ebe4d9..78a60e6 100644
> --- a/include/uapi/linux/virtio_blk.h
> +++ b/include/uapi/linux/virtio_blk.h
> @@ -38,6 +38,8 @@
> #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
> #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */
> #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD command is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES command is supported */
>
> /* Legacy feature bits */
> #ifndef VIRTIO_BLK_NO_LEGACY
> @@ -86,6 +88,22 @@ struct virtio_blk_config {
>
> /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
> __u16 num_queues;
> + /* The maximum discard segment size (if VIRTIO_BLK_F_DISCARD) */
> + __u32 max_discard_sectors;
> + /* The maximum number of discard segments (if VIRTIO_BLK_F_DISCARD) */
> + __u32 max_discard_seg;
> + /* The sector alignment for discard (if VIRTIO_BLK_F_DISCARD) */
> + __u32 discard_sector_alignment;
> + /* The maximum number of write zeroes sectors (if VIRTIO_BLK_F_WRITE_ZEROES) */
> + __u32 max_write_zeroes_sectors;
> + /* The maximum number of write zeroes segments (if VIRTIO_BLK_F_WRITE_ZEROES) */
> + __u32 max_write_zeroes_seg;
> + /* Device clear this bit when write zeroes command cannot result in
> + * deallocating one or more sectors
> + * (if VIRTIO_BLK_F_WRITE_ZEROES with unmap bit)
Pls fix grammar in this comment, I am not sure what are you trying to
say.
> + */
> + __u8 write_zeroes_may_unmap;
> + __u8 unused1[3];
> } __attribute__((packed));
>
> /*
> @@ -114,6 +132,12 @@ struct virtio_blk_config {
> /* Get device ID command */
> #define VIRTIO_BLK_T_GET_ID 8
>
> +/* Discard command */
> +#define VIRTIO_BLK_T_DISCARD 11
> +
> +/* Write zeroes command */
> +#define VIRTIO_BLK_T_WRITE_ZEROES 13
> +
> #ifndef VIRTIO_BLK_NO_LEGACY
> /* Barrier before this op. */
> #define VIRTIO_BLK_T_BARRIER 0x80000000
> @@ -133,6 +157,21 @@ struct virtio_blk_outhdr {
> __virtio64 sector;
> };
>
> +/*
> + * discard/write zeroes range for each request.
> + */
> +struct virtio_blk_discard_write_zeroes {
> + /* discard/write zeroes start sector */
> + __virtio64 sector;
> + /* number of discard/write zeroes sectors */
> + __virtio32 num_sectors;
> + /* valid for write zeroes command */
> + struct {
> + __virtio32 unmap:1;
> + __virtio32 reserved:31;
> + } flags;
> +};
> +
You can't use bitmaps in portable code.
The format differs between architectures.
> #ifndef VIRTIO_BLK_NO_LEGACY
> struct virtio_scsi_inhdr {
> __virtio32 errors;
> --
> 1.9.3
^ permalink raw reply [flat|nested] 3+ messages in thread
* RE: [PATCH v3] virtio_blk: add DISCARD and WRIET ZEROES command support
2018-03-30 1:07 ` Michael S. Tsirkin
@ 2018-05-23 20:37 ` Liu, Changpeng
0 siblings, 0 replies; 3+ messages in thread
From: Liu, Changpeng @ 2018-05-23 20:37 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: virtualization, stefanha, pbonzini, cavery
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:mst@redhat.com]
> Sent: Thursday, March 29, 2018 6:07 PM
> To: Liu, Changpeng <changpeng.liu@intel.com>
> Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> stefanha@redhat.com; jasowang@redhat.com; pbonzini@redhat.com;
> Wodkowski, PawelX <pawelx.wodkowski@intel.com>; Harris, James R
> <james.r.harris@intel.com>; fabio.miranda.martins@canonical.com
> Subject: Re: [PATCH v3] virtio_blk: add DISCARD and WRIET ZEROES command
> support
>
> On Fri, Mar 30, 2018 at 08:49:34AM +0800, Changpeng Liu wrote:
> > Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES
> > command support, this will impact the performance when using SSD
> > backend over file systems.
> >
> > The idea here is using 16 Bytes payload as one descriptor for
> > DISCARD/WRITE ZEROES command, users can put several ranges into
> > one command, for the purpose to support such feature, two feature
> > flags VIRTIO_BLK_F_DISCARD/VIRTIO_BLK_F_WRITE_ZEROES and two
> > commands VIRTIO_BLK_T_DISCARD/VIRTIO_BLK_T_WRITE_ZEROES are
> > introduced, and some parameters are added to the configuration
> > space to tell the OS the granularity of DISCARD/WRITE ZEROES
> > commands.
>
> Pls fix grammar in this comment, I am not sure what are you
> trying to say.
Okay, will add more description here.
>
> >
> > The specification change list here:
> > https://github.com/oasis-tcs/virtio-spec
>
> Which commit?
>
> > CHANGELOG:
> > v3: finalized the specification change.
>
> Changelog belongs after -- and should be complete
> including changes v1 to v2.
>
> > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > ---
> > drivers/block/virtio_blk.c | 96
> +++++++++++++++++++++++++++++++++++++++--
> > include/uapi/linux/virtio_blk.h | 39 +++++++++++++++++
> > 2 files changed, 132 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> > index 4a07593c..1943adb 100644
> > --- a/drivers/block/virtio_blk.c
> > +++ b/drivers/block/virtio_blk.c
> > @@ -172,10 +172,53 @@ static int virtblk_add_req(struct virtqueue *vq, struct
> virtblk_req *vbr,
> > return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
> > }
> >
> > +static inline int virtblk_setup_discard_write_zeroes(struct request *req, bool
> unmap)
> > +{
> > + unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
>
> Split on two lines pls:
> unsigned short n = 0;
>
> > + u32 block_size = queue_logical_block_size(req->q);
>
> Seems to be unused except for the sanity check below. Why?
>
> > + struct virtio_blk_discard_write_zeroes *range;
> > + struct bio *bio;
> > +
> > + if (block_size < 512 || !block_size)
>
> Why 512? And when is it 0?
Ok, actually this check is not necessary, will remove it.
>
> > + return -1;
>
> -1 isn't a normal errno code.
>
> > +
> > + range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
>
> This might be pretty large: with 64K segments it looks like you are
> trying to allocate ~1Mbyte with GFP_ATOMIC which is unlikely to succeed.
> Can we split this up in chunks?
This is already chunks now, if the backend can only support 1 segment, so the number
is always 1, I can change the GFP_ATOMIC with GFP_KERNEL.
>
> > + if (!range)
> > + return -1;
> > +
> > + __rq_for_each_bio(bio, req) {
> > + u64 sector = bio->bi_iter.bi_sector;
> > + u32 num_sectors = bio->bi_iter.bi_size >> 9;
>
> why 9?
The sectors in virtio-blk protocol and Linux is expressed by 512-bytes.
>
> > +
> > + range[n].flags.unmap = cpu_to_le32(unmap);
> > + range[n].flags.reserved = cpu_to_le32(0);
> > + range[n].num_sectors = cpu_to_le32(num_sectors);
> > + range[n].sector = cpu_to_le64(sector);
>
> Isn't this causing sparse warnings?
No warning when I complied the module.
>
>
> > + n++;
> > + }
> > +
> > + if (WARN_ON_ONCE(n != segments)) {
>
> and when does this happen?
This check shouldn't happen too, will remove it.
>
> > + kfree(range);
> > + return -1;
> > + }
> > +
> > + req->special_vec.bv_page = virt_to_page(range);
> > + req->special_vec.bv_offset = offset_in_page(range);
> > + req->special_vec.bv_len = sizeof(*range) * segments;
> > + req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> > +
> > + return 0;
> > +}
> > +
> > static inline void virtblk_request_done(struct request *req)
> > {
> > struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
> >
> > + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> > + kfree(page_address(req->special_vec.bv_page) +
> > + req->special_vec.bv_offset);
> > + }
> > +
> > switch (req_op(req)) {
> > case REQ_OP_SCSI_IN:
> > case REQ_OP_SCSI_OUT:
> > @@ -225,6 +268,7 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> > int qid = hctx->queue_num;
> > int err;
> > bool notify = false;
> > + bool unmap = false;
> > u32 type;
> >
> > BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> > @@ -237,6 +281,13 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> > case REQ_OP_FLUSH:
> > type = VIRTIO_BLK_T_FLUSH;
> > break;
> > + case REQ_OP_DISCARD:
> > + type = VIRTIO_BLK_T_DISCARD;
> > + break;
> > + case REQ_OP_WRITE_ZEROES:
> > + type = VIRTIO_BLK_T_WRITE_ZEROES;
> > + unmap = !(req->cmd_flags & REQ_NOUNMAP);
> > + break;
> > case REQ_OP_SCSI_IN:
> > case REQ_OP_SCSI_OUT:
> > type = VIRTIO_BLK_T_SCSI_CMD;
> > @@ -256,9 +307,16 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> >
> > blk_mq_start_request(req);
> >
> > + if (type == VIRTIO_BLK_T_DISCARD || type ==
> VIRTIO_BLK_T_WRITE_ZEROES) {
> > + err = virtblk_setup_discard_write_zeroes(req, unmap);
> > + if (err)
> > + return BLK_STS_IOERR;
>
> Does a failure actually indicate an IO error?
Good catch, BLK_STS_RESOURCE makes more sense.
>
>
> > + }
> > +
> > num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> > if (num) {
> > - if (rq_data_dir(req) == WRITE)
> > + if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD
> ||
> > + type == VIRTIO_BLK_T_WRITE_ZEROES)
> > vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
> VIRTIO_BLK_T_OUT);
> > else
> > vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
> VIRTIO_BLK_T_IN);
> > @@ -777,6 +835,38 @@ static int virtblk_probe(struct virtio_device *vdev)
> > if (!err && opt_io_size)
> > blk_queue_io_opt(q, blk_size * opt_io_size);
> >
> > + if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> > + q->limits.discard_granularity = blk_size;
> > +
> > + virtio_cread(vdev, struct virtio_blk_config,
> discard_sector_alignment, &v);
> > + if (v)
> > + q->limits.discard_alignment = v << 9;
> > + else
> > + q->limits.discard_alignment = 0;
> > +
> > + virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors,
> &v);
> > + if (v)
> > + blk_queue_max_discard_sectors(q, v);
> > + else
> > + blk_queue_max_discard_sectors(q, UINT_MAX);
> > +
> > + virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, &v);
> > + if (v)
> > + blk_queue_max_discard_segments(q, v);
> > + else
> > + blk_queue_max_discard_segments(q, USHRT_MAX);
> > +
> > + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
> > + }
> > +
> > + if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> > + virtio_cread(vdev, struct virtio_blk_config,
> max_write_zeroes_sectors, &v);
> > + if (v)
> > + blk_queue_max_write_zeroes_sectors(q, v);
> > + else
> > + blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
> > + }
> > +
> > virtblk_update_capacity(vblk, false);
> > virtio_device_ready(vdev);
> >
> > @@ -885,14 +975,14 @@ static int virtblk_restore(struct virtio_device *vdev)
> > VIRTIO_BLK_F_SCSI,
> > #endif
> > VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY,
> VIRTIO_BLK_F_CONFIG_WCE,
> > - VIRTIO_BLK_F_MQ,
> > + VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD,
> VIRTIO_BLK_F_WRITE_ZEROES,
> > }
> > ;
> > static unsigned int features[] = {
> > VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
> VIRTIO_BLK_F_GEOMETRY,
> > VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
> > VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY,
> VIRTIO_BLK_F_CONFIG_WCE,
> > - VIRTIO_BLK_F_MQ,
> > + VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD,
> VIRTIO_BLK_F_WRITE_ZEROES,
> > };
> >
> > static struct virtio_driver virtio_blk = {
> > diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> > index 9ebe4d9..78a60e6 100644
> > --- a/include/uapi/linux/virtio_blk.h
> > +++ b/include/uapi/linux/virtio_blk.h
> > @@ -38,6 +38,8 @@
> > #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
> > #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is
> available */
> > #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
> > +#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD command is
> supported */
> > +#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES
> command is supported */
> >
> > /* Legacy feature bits */
> > #ifndef VIRTIO_BLK_NO_LEGACY
> > @@ -86,6 +88,22 @@ struct virtio_blk_config {
> >
> > /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
> > __u16 num_queues;
> > + /* The maximum discard segment size (if VIRTIO_BLK_F_DISCARD) */
> > + __u32 max_discard_sectors;
> > + /* The maximum number of discard segments (if VIRTIO_BLK_F_DISCARD)
> */
> > + __u32 max_discard_seg;
> > + /* The sector alignment for discard (if VIRTIO_BLK_F_DISCARD) */
> > + __u32 discard_sector_alignment;
> > + /* The maximum number of write zeroes sectors (if
> VIRTIO_BLK_F_WRITE_ZEROES) */
> > + __u32 max_write_zeroes_sectors;
> > + /* The maximum number of write zeroes segments (if
> VIRTIO_BLK_F_WRITE_ZEROES) */
> > + __u32 max_write_zeroes_seg;
> > + /* Device clear this bit when write zeroes command cannot result in
> > + * deallocating one or more sectors
> > + * (if VIRTIO_BLK_F_WRITE_ZEROES with unmap bit)
>
> Pls fix grammar in this comment, I am not sure what are you trying to
> say.
>
> > + */
> > + __u8 write_zeroes_may_unmap;
> > + __u8 unused1[3];
> > } __attribute__((packed));
> >
> > /*
> > @@ -114,6 +132,12 @@ struct virtio_blk_config {
> > /* Get device ID command */
> > #define VIRTIO_BLK_T_GET_ID 8
> >
> > +/* Discard command */
> > +#define VIRTIO_BLK_T_DISCARD 11
> > +
> > +/* Write zeroes command */
> > +#define VIRTIO_BLK_T_WRITE_ZEROES 13
> > +
> > #ifndef VIRTIO_BLK_NO_LEGACY
> > /* Barrier before this op. */
> > #define VIRTIO_BLK_T_BARRIER 0x80000000
> > @@ -133,6 +157,21 @@ struct virtio_blk_outhdr {
> > __virtio64 sector;
> > };
> >
> > +/*
> > + * discard/write zeroes range for each request.
> > + */
> > +struct virtio_blk_discard_write_zeroes {
> > + /* discard/write zeroes start sector */
> > + __virtio64 sector;
> > + /* number of discard/write zeroes sectors */
> > + __virtio32 num_sectors;
> > + /* valid for write zeroes command */
> > + struct {
> > + __virtio32 unmap:1;
> > + __virtio32 reserved:31;
> > + } flags;
> > +};
> > +
>
> You can't use bitmaps in portable code.
> The format differs between architectures.
>
> > #ifndef VIRTIO_BLK_NO_LEGACY
> > struct virtio_scsi_inhdr {
> > __virtio32 errors;
> > --
> > 1.9.3
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-05-23 20:37 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-30 0:49 [PATCH v3] virtio_blk: add DISCARD and WRIET ZEROES command support Changpeng Liu
2018-03-30 1:07 ` Michael S. Tsirkin
2018-05-23 20:37 ` Liu, Changpeng
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.