All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/5] add TRIM/UNMAP support
@ 2010-11-25 13:56 Christoph Hellwig
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
                   ` (5 more replies)
  0 siblings, 6 replies; 27+ messages in thread
From: Christoph Hellwig @ 2010-11-25 13:56 UTC (permalink / raw)
  To: qemu-devel

This patchset adds support for the ATA TRIM and SCSI WRITE SAME with
unmap commands, which allow reclaiming free space from a backing image.

The user facing implementation is pretty complete, but not really
efficient because the underlying bdrv_discard implementation doesn't
use the aio implementation yet.  The reason for that is that the SCSI
layer doesn't really allow any asynchronous commands except for
READ/WRITE by design, and implementing the ATA TRIM command with it's
multiple ranges is rather painful, and combined with the SCSI limitation
I didn't bother yet.  The only backend support so far is the XFS hole
punching ioctl, but others can be added easily when they become
available.  A virtio implementation for a discard command would also
be pretty easy, but until we actually support a better backend then
a plain sparse file it's not worth using for production enviroments
anyway, but more for playing with the thin provisioning infrastructure,
or observing guest behaviour when TRIM / unmap is supported.

If the support is enabled and the backend doesn't support hole punching
the TRIM / WRITE SAME commands become no-ops so that migration from
hosts supporting or not supporting it works.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [PATCH 1/5] block: add discard support
  2010-11-25 13:56 [Qemu-devel] [PATCH 0/5] add TRIM/UNMAP support Christoph Hellwig
@ 2010-11-25 13:57 ` Christoph Hellwig
  2010-11-25 14:09   ` malc
  2010-11-25 14:45   ` Stefan Hajnoczi
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 2/5] scsi-disk: support WRITE SAME (16) with unmap bit Christoph Hellwig
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 27+ messages in thread
From: Christoph Hellwig @ 2010-11-25 13:57 UTC (permalink / raw)
  To: qemu-devel

Add a new bdrv_discard method to free blocks in a mapping image, and a new
drive property to set the granularity for these discard.  If no discard
granularity support is set discard support is disabled.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/block.c
===================================================================
--- qemu.orig/block.c	2010-11-25 12:34:10.580256183 +0100
+++ qemu/block.c	2010-11-25 12:34:41.761254654 +0100
@@ -1499,6 +1499,13 @@ int bdrv_has_zero_init(BlockDriverState
     return 1;
 }
 
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+    if (!bs->drv || !bs->drv->bdrv_discard)
+        return 0;
+    return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
+}
+
 /*
  * Returns true iff the specified sector is present in the disk image. Drivers
  * not implementing the functionality are assumed to not support backing files,
Index: qemu/block.h
===================================================================
--- qemu.orig/block.h	2010-11-25 12:34:10.589253738 +0100
+++ qemu/block.h	2010-11-25 12:34:12.148012156 +0100
@@ -146,6 +146,7 @@ int bdrv_flush(BlockDriverState *bs);
 void bdrv_flush_all(void);
 void bdrv_close_all(void);
 
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
 int bdrv_has_zero_init(BlockDriverState *bs);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 	int *pnum);
Index: qemu/block_int.h
===================================================================
--- qemu.orig/block_int.h	2010-11-25 12:34:10.595254018 +0100
+++ qemu/block_int.h	2010-11-25 12:34:12.150013832 +0100
@@ -73,6 +73,8 @@ struct BlockDriver {
         BlockDriverCompletionFunc *cb, void *opaque);
     BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
         BlockDriverCompletionFunc *cb, void *opaque);
+    int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num,
+                        int nb_sectors);
 
     int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs,
         int num_reqs);
@@ -227,6 +229,7 @@ typedef struct BlockConf {
     uint16_t logical_block_size;
     uint16_t min_io_size;
     uint32_t opt_io_size;
+    uint32_t discard_granularity;
 } BlockConf;
 
 static inline unsigned int get_physical_block_exp(BlockConf *conf)
@@ -249,6 +252,8 @@ static inline unsigned int get_physical_
     DEFINE_PROP_UINT16("physical_block_size", _state,                   \
                        _conf.physical_block_size, 512),                 \
     DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),  \
-    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0)
+    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \
+    DEFINE_PROP_UINT32("discard_granularity", _state, \
+    			_conf.discard_granularity, 0)
 
 #endif /* BLOCK_INT_H */
Index: qemu/block/raw.c
===================================================================
--- qemu.orig/block/raw.c	2010-11-25 12:34:10.601259605 +0100
+++ qemu/block/raw.c	2010-11-25 12:34:12.155004124 +0100
@@ -65,6 +65,11 @@ static int raw_probe(const uint8_t *buf,
    return 1; /* everything can be opened as raw image */
 }
 
+static int raw_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+    return bdrv_discard(bs->file, sector_num, nb_sectors);
+}
+
 static int raw_is_inserted(BlockDriverState *bs)
 {
     return bdrv_is_inserted(bs->file);
@@ -130,6 +135,7 @@ static BlockDriver bdrv_raw = {
     .bdrv_aio_readv     = raw_aio_readv,
     .bdrv_aio_writev    = raw_aio_writev,
     .bdrv_aio_flush     = raw_aio_flush,
+    .bdrv_discard       = raw_discard,
 
     .bdrv_is_inserted   = raw_is_inserted,
     .bdrv_eject         = raw_eject,

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [PATCH 2/5] scsi-disk: support WRITE SAME (16) with unmap bit
  2010-11-25 13:56 [Qemu-devel] [PATCH 0/5] add TRIM/UNMAP support Christoph Hellwig
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
@ 2010-11-25 13:57 ` Christoph Hellwig
  2010-11-25 14:10   ` malc
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 3/5] make dma_bdrv_io available to drivers Christoph Hellwig
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2010-11-25 13:57 UTC (permalink / raw)
  To: qemu-devel

Support discards via the WRITE SAME command with the unmap bit set, and
tell the initiator about the support for it via the block limit and the
new thin provisioning EVPD pages.  Also fix the comment which incorrectly
describedthe block limits EVPD page.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/hw/scsi-disk.c
===================================================================
--- qemu.orig/hw/scsi-disk.c	2010-11-23 14:14:27.259253110 +0100
+++ qemu/hw/scsi-disk.c	2010-11-23 14:31:41.190253529 +0100
@@ -444,8 +444,10 @@ static int scsi_disk_emulate_inquiry(SCS
             buflen += id_len;
             break;
         }
-        case 0xb0: /* block device characteristics */
+        case 0xb0: /* block limits */
         {
+            unsigned int unmap_sectors =
+                    s->qdev.conf.discard_granularity / s->qdev.blocksize;
             unsigned int min_io_size =
                     s->qdev.conf.min_io_size / s->qdev.blocksize;
             unsigned int opt_io_size =
@@ -465,6 +467,21 @@ static int scsi_disk_emulate_inquiry(SCS
             outbuf[13] = (opt_io_size >> 16) & 0xff;
             outbuf[14] = (opt_io_size >> 8) & 0xff;
             outbuf[15] = opt_io_size & 0xff;
+
+            /* optimal unmap granularity */
+            outbuf[28] = (unmap_sectors >> 24) & 0xff;
+            outbuf[29] = (unmap_sectors >> 16) & 0xff;
+            outbuf[30] = (unmap_sectors >> 8) & 0xff;
+            outbuf[31] = unmap_sectors & 0xff;
+            break;
+        }
+        case 0xb2: /* thin provisioning */
+        {
+            outbuf[3] = buflen = 8;
+            outbuf[4] = 0;
+            outbuf[5] = 0x40;	/* write same with unmap supported */
+            outbuf[6] = 0;
+            outbuf[7] = 0;
             break;
         }
         default:
@@ -932,6 +949,11 @@ static int scsi_disk_emulate_command(SCS
             outbuf[11] = 0;
             outbuf[12] = 0;
             outbuf[13] = get_physical_block_exp(&s->qdev.conf);
+
+            /* set TPE bit if the format supports discard */
+            if (s->qdev.conf.discard_granularity)
+                outbuf[14] = 0x80;
+
             /* Protection, exponent and lowest lba field left blank. */
             buflen = req->cmd.xfer;
             break;
@@ -1128,6 +1150,30 @@ static int32_t scsi_send_command(SCSIDev
             goto illegal_lba;
         }
         break;
+    case WRITE_SAME_16:
+        DPRINTF("WRITE SAME(16) (sector %" PRId64 ", count %d)\n", lba, len);
+
+        if (lba > s->max_lba) {
+            goto illegal_lba;
+        }
+
+        /*
+         * We only support WRITE SAME with the unmap bit set for now.
+         */
+        if (!(buf[1] & 0x8)) {
+            goto fail;
+        }
+
+        rc = bdrv_discard(s->bs, lba * s->cluster_size, len * s->cluster_size);
+        if (rc < 0) {
+            /* XXX: better error code ?*/
+            goto fail;
+        }
+
+        scsi_req_set_status(&r->req, GOOD, NO_SENSE);
+        scsi_req_complete(&r->req);
+        scsi_remove_request(r);
+        return 0;
     default:
 	DPRINTF("Unknown SCSI command (%2.2x)\n", buf[0]);
     fail:
Index: qemu/hw/scsi-defs.h
===================================================================
--- qemu.orig/hw/scsi-defs.h	2010-11-23 14:14:27.267253040 +0100
+++ qemu/hw/scsi-defs.h	2010-11-23 14:23:37.995253808 +0100
@@ -84,6 +84,7 @@
 #define MODE_SENSE_10         0x5a
 #define PERSISTENT_RESERVE_IN 0x5e
 #define PERSISTENT_RESERVE_OUT 0x5f
+#define WRITE_SAME_16         0x93
 #define MAINTENANCE_IN        0xa3
 #define MAINTENANCE_OUT       0xa4
 #define MOVE_MEDIUM           0xa5

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [PATCH 3/5] make dma_bdrv_io available to drivers
  2010-11-25 13:56 [Qemu-devel] [PATCH 0/5] add TRIM/UNMAP support Christoph Hellwig
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 2/5] scsi-disk: support WRITE SAME (16) with unmap bit Christoph Hellwig
@ 2010-11-25 13:57 ` Christoph Hellwig
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 4/5] ide: add TRIM support Christoph Hellwig
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2010-11-25 13:57 UTC (permalink / raw)
  To: qemu-devel

Make dma_bdrv_io available for drivers, and pass an explicit I/O function
instead of hardcoding bdrv_aio_readv/bdrv_aio_writev.  This is required
to implement non-READ/WRITE dma commands in the ide driver, e.g. the
upcoming TRIM support.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/dma-helpers.c
===================================================================
--- qemu.orig/dma-helpers.c	2010-11-23 14:13:42.178253390 +0100
+++ qemu/dma-helpers.c	2010-11-23 14:14:45.186253110 +0100
@@ -47,6 +47,7 @@ typedef struct {
     target_phys_addr_t sg_cur_byte;
     QEMUIOVector iov;
     QEMUBH *bh;
+    DMAIOFunc *io_func;
 } DMAAIOCB;
 
 static void dma_bdrv_cb(void *opaque, int ret);
@@ -116,13 +117,8 @@ static void dma_bdrv_cb(void *opaque, in
         return;
     }
 
-    if (dbs->is_write) {
-        dbs->acb = bdrv_aio_writev(dbs->bs, dbs->sector_num, &dbs->iov,
-                                   dbs->iov.size / 512, dma_bdrv_cb, dbs);
-    } else {
-        dbs->acb = bdrv_aio_readv(dbs->bs, dbs->sector_num, &dbs->iov,
-                                  dbs->iov.size / 512, dma_bdrv_cb, dbs);
-    }
+    dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
+                            dbs->iov.size / 512, dma_bdrv_cb, dbs);
     if (!dbs->acb) {
         dma_bdrv_unmap(dbs);
         qemu_iovec_destroy(&dbs->iov);
@@ -144,12 +140,12 @@ static AIOPool dma_aio_pool = {
     .cancel             = dma_aio_cancel,
 };
 
-static BlockDriverAIOCB *dma_bdrv_io(
+BlockDriverAIOCB *dma_bdrv_io(
     BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
-    BlockDriverCompletionFunc *cb, void *opaque,
-    int is_write)
+    DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
+    void *opaque, int is_write)
 {
-    DMAAIOCB *dbs =  qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
+    DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
 
     dbs->acb = NULL;
     dbs->bs = bs;
@@ -158,6 +154,7 @@ static BlockDriverAIOCB *dma_bdrv_io(
     dbs->sg_cur_index = 0;
     dbs->sg_cur_byte = 0;
     dbs->is_write = is_write;
+    dbs->io_func = io_func;
     dbs->bh = NULL;
     qemu_iovec_init(&dbs->iov, sg->nsg);
     dma_bdrv_cb(dbs, 0);
@@ -173,12 +170,12 @@ BlockDriverAIOCB *dma_bdrv_read(BlockDri
                                 QEMUSGList *sg, uint64_t sector,
                                 void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, cb, opaque, 0);
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, 0);
 }
 
 BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
                                  QEMUSGList *sg, uint64_t sector,
                                  void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, cb, opaque, 1);
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, 1);
 }
Index: qemu/dma.h
===================================================================
--- qemu.orig/dma.h	2010-11-23 14:13:42.185253809 +0100
+++ qemu/dma.h	2010-11-23 14:14:45.186253110 +0100
@@ -32,6 +32,14 @@ void qemu_sglist_add(QEMUSGList *qsg, ta
                      target_phys_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 
+typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num,
+                                 QEMUIOVector *iov, int nb_sectors,
+                                 BlockDriverCompletionFunc *cb, void *opaque);
+
+BlockDriverAIOCB *dma_bdrv_io(BlockDriverState *bs,
+                              QEMUSGList *sg, uint64_t sector_num,
+                              DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
+                              void *opaque, int is_write);
 BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
                                 QEMUSGList *sg, uint64_t sector,
                                 BlockDriverCompletionFunc *cb, void *opaque);

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [PATCH 4/5] ide: add TRIM support
  2010-11-25 13:56 [Qemu-devel] [PATCH 0/5] add TRIM/UNMAP support Christoph Hellwig
                   ` (2 preceding siblings ...)
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 3/5] make dma_bdrv_io available to drivers Christoph Hellwig
@ 2010-11-25 13:57 ` Christoph Hellwig
  2010-11-25 14:10   ` malc
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 5/5] raw-posix: add discard support Christoph Hellwig
  2010-12-01 15:35 ` [Qemu-devel] PATCH 0/5] add TRIM/UNMAP support, v2 Christoph Hellwig
  5 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2010-11-25 13:57 UTC (permalink / raw)
  To: qemu-devel

Add support for the data set management command, and the TRIM sub function
in it.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/hw/ide/core.c
===================================================================
--- qemu.orig/hw/ide/core.c	2010-11-25 12:33:40.961255346 +0100
+++ qemu/hw/ide/core.c	2010-11-25 13:00:28.415005802 +0100
@@ -145,6 +145,8 @@ static void ide_identify(IDEState *s)
     put_le16(p + 66, 120);
     put_le16(p + 67, 120);
     put_le16(p + 68, 120);
+    if (dev && dev->conf.discard_granularity)
+        put_le16(p + 69, (1 << 14));	/* determinate TRIM behavior */
     put_le16(p + 80, 0xf0); /* ata3 -> ata6 supported */
     put_le16(p + 81, 0x16); /* conforms to ata5 */
     /* 14=NOP supported, 5=WCACHE supported, 0=SMART supported */
@@ -171,6 +173,8 @@ static void ide_identify(IDEState *s)
     dev = s->unit ? s->bus->slave : s->bus->master;
     if (dev && dev->conf.physical_block_size)
         put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf));
+    if (dev && dev->conf.discard_granularity)
+        put_le16(p + 169, 1);	/* TRIM support */
 
     memcpy(s->identify_data, p, sizeof(s->identify_data));
     s->identify_set = 1;
@@ -1787,6 +1791,128 @@ static void ide_clear_hob(IDEBus *bus)
     bus->ifs[1].select &= ~(1 << 7);
 }
 
+typedef struct TrimAIOCB {
+    BlockDriverAIOCB common;
+    QEMUBH *bh;
+    int ret;
+} TrimAIOCB;
+
+static void trim_aio_cancel(BlockDriverAIOCB *acb)
+{
+    TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+    qemu_aio_release(iocb);
+}
+
+static AIOPool trim_aio_pool = {
+    .aiocb_size         = sizeof(TrimAIOCB),
+    .cancel             = trim_aio_cancel,
+};
+
+static void ide_trim_bh_cb(void *opaque)
+{
+    TrimAIOCB *iocb = opaque;
+
+    iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+
+    qemu_aio_release(iocb);
+}
+
+static BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    TrimAIOCB *iocb;
+    int i, j, ret;
+
+    iocb = qemu_aio_get(&trim_aio_pool, bs, cb, opaque);
+    iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
+    iocb->ret = 0;
+
+    for (j = 0; j < qiov->niov; j++) {
+        uint64_t *buffer = qiov->iov[j].iov_base;
+
+ 	for (i = 0; i < qiov->iov[j].iov_len / 8; i++) {
+		/* 6-byte LBA + 2-byte range per entry */
+		uint64_t entry = le64_to_cpu(buffer[i]);
+		uint64_t sector = entry & 0x0000ffffffffffffULL;
+		uint16_t count = entry >> 48;
+
+		if (count == 0)
+			 break;
+
+	        ret = bdrv_discard(bs, sector * 512, count * 512);
+                if (!iocb->ret)
+                    iocb->ret = ret;
+	}
+    }
+
+    qemu_bh_schedule(iocb->bh);
+
+    return &iocb->common;
+}
+
+static void ide_trim_dma_cb(void *opaque, int ret)
+{
+    BMDMAState *bm = opaque;
+    IDEState *s = bmdma_active_if(bm);
+    int n;
+    int64_t sector_num;
+
+    if (ret < 0) {
+        if (ide_handle_rw_error(s, -ret,  BM_STATUS_DMA_RETRY))
+            return;
+    }
+
+    n = s->io_buffer_size >> 9;
+    sector_num = ide_get_sector(s);
+    if (n > 0) {
+        dma_buf_commit(s, 0);
+        sector_num += n;
+        ide_set_sector(s, sector_num);
+        s->nsector -= n;
+    }
+
+    /* end of transfer ? */
+    if (s->nsector == 0) {
+        s->status = READY_STAT | SEEK_STAT;
+        ide_set_irq(s->bus);
+    eot:
+        bm->status &= ~BM_STATUS_DMAING;
+        bm->status |= BM_STATUS_INT;
+        bm->dma_cb = NULL;
+        bm->unit = -1;
+        bm->aiocb = NULL;
+        return;
+    }
+
+    n = s->nsector;
+    s->io_buffer_size = n * 512;
+    /* launch next transfer */
+    if (dma_buf_prepare(bm, 0) == 0)
+        goto eot;
+#ifdef DEBUG_AIO
+    printf("aio_write: sector_num=%" PRId64 " n=%d\n", sector_num, n);
+#endif
+    bm->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num, ide_issue_trim,
+                    ide_trim_dma_cb, bm, 1);
+    ide_dma_submit_check(s, ide_trim_dma_cb, bm);
+}
+
+static void ide_trim(IDEState *s)
+{
+    s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT;
+    s->io_buffer_index = 0;
+    s->io_buffer_size = 0;
+    s->is_read = 0;
+    ide_dma_start(s, ide_trim_dma_cb);
+}
+
 void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     IDEBus *bus = opaque;
@@ -1866,6 +1992,17 @@ void ide_ioport_write(void *opaque, uint
             break;
 
         switch(val) {
+        case WIN_DSM:
+            switch (s->feature) {
+            case DSM_TRIM:
+                if (!s->bs)
+                   goto abort_cmd;
+                ide_trim(s);
+                break;
+            default:
+	        goto abort_cmd;
+            }
+            break;
         case WIN_IDENTIFY:
             if (s->bs && s->drive_kind != IDE_CD) {
                 if (s->drive_kind != IDE_CFATA)
Index: qemu/hw/ide/internal.h
===================================================================
--- qemu.orig/hw/ide/internal.h	2010-11-25 12:33:40.978254647 +0100
+++ qemu/hw/ide/internal.h	2010-11-25 12:39:16.668023539 +0100
@@ -59,7 +59,11 @@ typedef struct BMDMAState BMDMAState;
  */
 #define CFA_REQ_EXT_ERROR_CODE		0x03 /* CFA Request Extended Error Code */
 /*
- *	0x04->0x07 Reserved
+ *	0x04->0x05 Reserved
+ */
+#define WIN_DSM				0x06
+/*
+ *	0x07 Reserved
  */
 #define WIN_SRST			0x08 /* ATAPI soft reset command */
 #define WIN_DEVICE_RESET		0x08
@@ -187,6 +191,9 @@ typedef struct BMDMAState BMDMAState;
 
 #define IDE_DMA_BUF_SECTORS 256
 
+/* feature values for Data Set Management */
+#define DSM_TRIM			0x01
+
 #if (IDE_DMA_BUF_SECTORS < MAX_MULT_SECTORS)
 #error "IDE_DMA_BUF_SECTORS must be bigger or equal to MAX_MULT_SECTORS"
 #endif
Index: qemu/hw/ide/qdev.c
===================================================================
--- qemu.orig/hw/ide/qdev.c	2010-11-25 14:41:16.846004402 +0100
+++ qemu/hw/ide/qdev.c	2010-11-25 14:41:44.529062161 +0100
@@ -110,6 +110,11 @@ static int ide_drive_initfn(IDEDevice *d
     const char *serial;
     DriveInfo *dinfo;
 
+    if (dev->conf.discard_granularity && dev->conf.discard_granularity != 512) {
+        error_report("discard_granularity must be 512 for ide");
+        return -1;
+    }
+
     serial = dev->serial;
     if (!serial) {
         /* try to fall back to value set with legacy -drive serial=... */

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [PATCH 5/5] raw-posix: add discard support
  2010-11-25 13:56 [Qemu-devel] [PATCH 0/5] add TRIM/UNMAP support Christoph Hellwig
                   ` (3 preceding siblings ...)
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 4/5] ide: add TRIM support Christoph Hellwig
@ 2010-11-25 13:57 ` Christoph Hellwig
  2010-12-01 15:35 ` [Qemu-devel] PATCH 0/5] add TRIM/UNMAP support, v2 Christoph Hellwig
  5 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2010-11-25 13:57 UTC (permalink / raw)
  To: qemu-devel

Add support to discard blocks in a raw image residing on an XFS filesystem
by calling the XFS_IOC_UNRESVSP64 ioctl to punch holes.  Support for other
hole punching mechanisms can be added when they become available.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/block/raw-posix.c
===================================================================
--- qemu.orig/block/raw-posix.c	2010-11-25 12:51:18.474004263 +0100
+++ qemu/block/raw-posix.c	2010-11-25 13:00:42.220003844 +0100
@@ -69,6 +69,10 @@
 #include <sys/diskslice.h>
 #endif
 
+#ifdef CONFIG_XFS
+#include <xfs/xfs.h>
+#endif
+
 //#define DEBUG_FLOPPY
 
 //#define DEBUG_BLOCK
@@ -120,6 +124,9 @@ typedef struct BDRVRawState {
 #endif
     uint8_t *aligned_buf;
     unsigned aligned_buf_size;
+#ifdef CONFIG_XFS
+    int is_xfs : 1;
+#endif
 } BDRVRawState;
 
 static int fd_open(BlockDriverState *bs);
@@ -196,6 +203,12 @@ static int raw_open_common(BlockDriverSt
 #endif
     }
 
+#ifdef CONFIG_XFS
+    if (platform_test_xfs_fd(s->fd)) {
+        s->is_xfs = 1;
+    }
+#endif
+
     return 0;
 
 out_free_buf:
@@ -740,6 +753,36 @@ static int raw_flush(BlockDriverState *b
     return qemu_fdatasync(s->fd);
 }
 
+#ifdef CONFIG_XFS
+static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
+{
+    struct xfs_flock64 fl;
+
+    memset(&fl, 0, sizeof(fl));
+    fl.l_whence = SEEK_SET;
+    fl.l_start = sector_num << 9;
+    fl.l_len = (int64_t)nb_sectors << 9;
+
+    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
+        printf("cannot punch hole (%s)\n", strerror(errno));
+        return -errno;
+    }
+
+    return 0;
+}
+#endif
+
+static int raw_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+    BDRVRawState *s = bs->opaque;
+
+#ifdef CONFIG_XFS
+    if (s->is_xfs)
+        return xfs_discard(s, sector_num, nb_sectors);
+#endif
+
+    return 0;
+}
 
 static QEMUOptionParameter raw_create_options[] = {
     {
@@ -761,6 +804,7 @@ static BlockDriver bdrv_file = {
     .bdrv_close = raw_close,
     .bdrv_create = raw_create,
     .bdrv_flush = raw_flush,
+    .bdrv_discard = raw_discard,
 
     .bdrv_aio_readv = raw_aio_readv,
     .bdrv_aio_writev = raw_aio_writev,
Index: qemu/configure
===================================================================
--- qemu.orig/configure	2010-11-25 12:51:18.484004891 +0100
+++ qemu/configure	2010-11-25 13:00:42.222004263 +0100
@@ -288,6 +288,7 @@ xen=""
 linux_aio=""
 attr=""
 vhost_net=""
+xfs=""
 
 gprof="no"
 debug_tcg="no"
@@ -1393,6 +1394,27 @@ EOF
 fi
 
 ##########################################
+# xfsctl() probe, used for raw-posix
+if test "$xfs" != "no" ; then
+  cat > $TMPC << EOF
+#include <xfs/xfs.h>
+int main(void)
+{
+    xfsctl(NULL, 0, 0, NULL);
+    return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+    xfs="yes"
+  else
+    if test "$xfs" = "yes" ; then
+      feature_not_found "uuid"
+    fi
+    xfs=no
+  fi
+fi
+
+##########################################
 # vde libraries probe
 if test "$vde" != "no" ; then
   vde_libs="-lvdeplug"
@@ -2354,6 +2376,7 @@ echo "vhost-net support $vhost_net"
 echo "Trace backend     $trace_backend"
 echo "Trace output file $trace_file-<pid>"
 echo "spice support     $spice"
+echo "xfsctl support    $xfs"
 
 if test $sdl_too_old = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -2499,6 +2522,9 @@ fi
 if test "$uuid" = "yes" ; then
   echo "CONFIG_UUID=y" >> $config_host_mak
 fi
+if test "$xfs" = "yes" ; then
+  echo "CONFIG_XFS=y" >> $config_host_mak
+fi
 qemu_version=`head $source_path/VERSION`
 echo "VERSION=$qemu_version" >>$config_host_mak
 echo "PKGVERSION=$pkgversion" >>$config_host_mak

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] block: add discard support
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
@ 2010-11-25 14:09   ` malc
  2010-11-25 14:45   ` Stefan Hajnoczi
  1 sibling, 0 replies; 27+ messages in thread
From: malc @ 2010-11-25 14:09 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: qemu-devel

On Thu, 25 Nov 2010, Christoph Hellwig wrote:

> Add a new bdrv_discard method to free blocks in a mapping image, and a new
> drive property to set the granularity for these discard.  If no discard
> granularity support is set discard support is disabled.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: qemu/block.c
> ===================================================================
> --- qemu.orig/block.c	2010-11-25 12:34:10.580256183 +0100
> +++ qemu/block.c	2010-11-25 12:34:41.761254654 +0100
> @@ -1499,6 +1499,13 @@ int bdrv_has_zero_init(BlockDriverState
>      return 1;
>  }
>  
> +int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
> +{
> +    if (!bs->drv || !bs->drv->bdrv_discard)
> +        return 0;
> +    return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
> +}
> +
>  /*
>   * Returns true iff the specified sector is present in the disk image. Drivers
>   * not implementing the functionality are assumed to not support backing files,
> Index: qemu/block.h
> ===================================================================
> --- qemu.orig/block.h	2010-11-25 12:34:10.589253738 +0100
> +++ qemu/block.h	2010-11-25 12:34:12.148012156 +0100
> @@ -146,6 +146,7 @@ int bdrv_flush(BlockDriverState *bs);
>  void bdrv_flush_all(void);
>  void bdrv_close_all(void);
>  
> +int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
>  int bdrv_has_zero_init(BlockDriverState *bs);
>  int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
>  	int *pnum);
> Index: qemu/block_int.h
> ===================================================================
> --- qemu.orig/block_int.h	2010-11-25 12:34:10.595254018 +0100
> +++ qemu/block_int.h	2010-11-25 12:34:12.150013832 +0100
> @@ -73,6 +73,8 @@ struct BlockDriver {
>          BlockDriverCompletionFunc *cb, void *opaque);
>      BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
>          BlockDriverCompletionFunc *cb, void *opaque);
> +    int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num,
> +                        int nb_sectors);
>  
>      int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs,
>          int num_reqs);
> @@ -227,6 +229,7 @@ typedef struct BlockConf {
>      uint16_t logical_block_size;
>      uint16_t min_io_size;
>      uint32_t opt_io_size;
> +    uint32_t discard_granularity;
>  } BlockConf;
>  
>  static inline unsigned int get_physical_block_exp(BlockConf *conf)
> @@ -249,6 +252,8 @@ static inline unsigned int get_physical_
>      DEFINE_PROP_UINT16("physical_block_size", _state,                   \
>                         _conf.physical_block_size, 512),                 \
>      DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),  \
> -    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0)
> +    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \
> +    DEFINE_PROP_UINT32("discard_granularity", _state, \
> +    			_conf.discard_granularity, 0)

Tab here.

>  
>  #endif /* BLOCK_INT_H */
> Index: qemu/block/raw.c
> ===================================================================
> --- qemu.orig/block/raw.c	2010-11-25 12:34:10.601259605 +0100
> +++ qemu/block/raw.c	2010-11-25 12:34:12.155004124 +0100
> @@ -65,6 +65,11 @@ static int raw_probe(const uint8_t *buf,
>     return 1; /* everything can be opened as raw image */
>  }
>  
> +static int raw_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
> +{
> +    return bdrv_discard(bs->file, sector_num, nb_sectors);
> +}
> +
>  static int raw_is_inserted(BlockDriverState *bs)
>  {
>      return bdrv_is_inserted(bs->file);
> @@ -130,6 +135,7 @@ static BlockDriver bdrv_raw = {
>      .bdrv_aio_readv     = raw_aio_readv,
>      .bdrv_aio_writev    = raw_aio_writev,
>      .bdrv_aio_flush     = raw_aio_flush,
> +    .bdrv_discard       = raw_discard,
>  
>      .bdrv_is_inserted   = raw_is_inserted,
>      .bdrv_eject         = raw_eject,
> 

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 4/5] ide: add TRIM support
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 4/5] ide: add TRIM support Christoph Hellwig
@ 2010-11-25 14:10   ` malc
  0 siblings, 0 replies; 27+ messages in thread
From: malc @ 2010-11-25 14:10 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: qemu-devel

On Thu, 25 Nov 2010, Christoph Hellwig wrote:

> Add support for the data set management command, and the TRIM sub function
> in it.

Tabs.

[..snip..]

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] scsi-disk: support WRITE SAME (16) with unmap bit
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 2/5] scsi-disk: support WRITE SAME (16) with unmap bit Christoph Hellwig
@ 2010-11-25 14:10   ` malc
  0 siblings, 0 replies; 27+ messages in thread
From: malc @ 2010-11-25 14:10 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: qemu-devel

On Thu, 25 Nov 2010, Christoph Hellwig wrote:

> Support discards via the WRITE SAME command with the unmap bit set, and
> tell the initiator about the support for it via the block limit and the
> new thin provisioning EVPD pages.  Also fix the comment which incorrectly
> describedthe block limits EVPD page.
> 
Tabs again.

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] block: add discard support
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
  2010-11-25 14:09   ` malc
@ 2010-11-25 14:45   ` Stefan Hajnoczi
  1 sibling, 0 replies; 27+ messages in thread
From: Stefan Hajnoczi @ 2010-11-25 14:45 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: qemu-devel

On Thu, Nov 25, 2010 at 1:57 PM, Christoph Hellwig <hch@lst.de> wrote:
> +int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
> +{
> +    if (!bs->drv || !bs->drv->bdrv_discard)
> +        return 0;

!bs->drv is normally -ENOMEDIUM.  Perhaps we shouldn't lump it in with
!bs->drv->bdrv_discard.

Stefan

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] PATCH 0/5] add TRIM/UNMAP support, v2
  2010-11-25 13:56 [Qemu-devel] [PATCH 0/5] add TRIM/UNMAP support Christoph Hellwig
                   ` (4 preceding siblings ...)
  2010-11-25 13:57 ` [Qemu-devel] [PATCH 5/5] raw-posix: add discard support Christoph Hellwig
@ 2010-12-01 15:35 ` Christoph Hellwig
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
                     ` (4 more replies)
  5 siblings, 5 replies; 27+ messages in thread
From: Christoph Hellwig @ 2010-12-01 15:35 UTC (permalink / raw)
  To: qemu-devel

This patchset adds support for the ATA TRIM and SCSI WRITE SAME with
unmap commands, which allow reclaiming free space from a backing image.

The user facing implementation is pretty complete, but not really
efficient because the underlying bdrv_discard implementation doesn't
use the aio implementation yet.  The reason for that is that the SCSI
layer doesn't really allow any asynchronous commands except for
READ/WRITE by design, and implementing the ATA TRIM command with it's
multiple ranges is rather painful, and combined with the SCSI limitation
I didn't bother yet.  The only backend support so far is the XFS hole
punching ioctl, but others can be added easily when they become
available.  A virtio implementation for a discard command would also
be pretty easy, but until we actually support a better backend then
a plain sparse file it's not worth using for production enviroments
anyway, but more for playing with the thin provisioning infrastructure,
or observing guest behaviour when TRIM / unmap is supported.

If the support is enabled and the backend doesn't support hole punching
the TRIM / WRITE SAME commands become no-ops so that migration from
hosts supporting or not supporting it works.

Version 2:
	- replace tabs with spaces
	- return -ENOMEDIUM from bdrv_discard if there's no driver
	  assigned
	- actually list the TP EVPD page as supported when querying
	  for supported EVPD pages

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [PATCH 1/5] block: add discard support
  2010-12-01 15:35 ` [Qemu-devel] PATCH 0/5] add TRIM/UNMAP support, v2 Christoph Hellwig
@ 2010-12-01 15:35   ` Christoph Hellwig
  2010-12-02 12:12     ` Kevin Wolf
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 2/5] scsi-disk: support WRITE SAME (16) with unmap bit Christoph Hellwig
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2010-12-01 15:35 UTC (permalink / raw)
  To: qemu-devel

Add a new bdrv_discard method to free blocks in a mapping image, and a new
drive property to set the granularity for these discard.  If no discard
granularity support is set discard support is disabled.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/block.c
===================================================================
--- qemu.orig/block.c	2010-11-25 16:17:32.922003704 +0100
+++ qemu/block.c	2010-11-29 14:10:21.793255565 +0100
@@ -1499,6 +1499,15 @@ int bdrv_has_zero_init(BlockDriverState
     return 1;
 }
 
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+    if (!bs->drv)
+        return -ENOMEDIUM;
+    if (!bs->drv->bdrv_discard)
+        return 0;
+    return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
+}
+
 /*
  * Returns true iff the specified sector is present in the disk image. Drivers
  * not implementing the functionality are assumed to not support backing files,
Index: qemu/block.h
===================================================================
--- qemu.orig/block.h	2010-11-25 16:17:32.929004193 +0100
+++ qemu/block.h	2010-11-29 14:07:00.267003145 +0100
@@ -146,6 +146,7 @@ int bdrv_flush(BlockDriverState *bs);
 void bdrv_flush_all(void);
 void bdrv_close_all(void);
 
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
 int bdrv_has_zero_init(BlockDriverState *bs);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 	int *pnum);
Index: qemu/block_int.h
===================================================================
--- qemu.orig/block_int.h	2010-11-25 16:17:32.935003774 +0100
+++ qemu/block_int.h	2010-11-29 14:09:31.926264704 +0100
@@ -73,6 +73,8 @@ struct BlockDriver {
         BlockDriverCompletionFunc *cb, void *opaque);
     BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
         BlockDriverCompletionFunc *cb, void *opaque);
+    int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num,
+                        int nb_sectors);
 
     int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs,
         int num_reqs);
@@ -227,6 +229,7 @@ typedef struct BlockConf {
     uint16_t logical_block_size;
     uint16_t min_io_size;
     uint32_t opt_io_size;
+    uint32_t discard_granularity;
 } BlockConf;
 
 static inline unsigned int get_physical_block_exp(BlockConf *conf)
@@ -249,6 +252,8 @@ static inline unsigned int get_physical_
     DEFINE_PROP_UINT16("physical_block_size", _state,                   \
                        _conf.physical_block_size, 512),                 \
     DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),  \
-    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0)
+    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \
+    DEFINE_PROP_UINT32("discard_granularity", _state, \
+                       _conf.discard_granularity, 0)
 
 #endif /* BLOCK_INT_H */
Index: qemu/block/raw.c
===================================================================
--- qemu.orig/block/raw.c	2010-11-25 16:17:32.943003495 +0100
+++ qemu/block/raw.c	2010-11-29 14:07:00.278003495 +0100
@@ -65,6 +65,11 @@ static int raw_probe(const uint8_t *buf,
    return 1; /* everything can be opened as raw image */
 }
 
+static int raw_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+    return bdrv_discard(bs->file, sector_num, nb_sectors);
+}
+
 static int raw_is_inserted(BlockDriverState *bs)
 {
     return bdrv_is_inserted(bs->file);
@@ -130,6 +135,7 @@ static BlockDriver bdrv_raw = {
     .bdrv_aio_readv     = raw_aio_readv,
     .bdrv_aio_writev    = raw_aio_writev,
     .bdrv_aio_flush     = raw_aio_flush,
+    .bdrv_discard       = raw_discard,
 
     .bdrv_is_inserted   = raw_is_inserted,
     .bdrv_eject         = raw_eject,

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [PATCH 2/5] scsi-disk: support WRITE SAME (16) with unmap bit
  2010-12-01 15:35 ` [Qemu-devel] PATCH 0/5] add TRIM/UNMAP support, v2 Christoph Hellwig
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
@ 2010-12-01 15:35   ` Christoph Hellwig
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 3/5] make dma_bdrv_io available to drivers Christoph Hellwig
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2010-12-01 15:35 UTC (permalink / raw)
  To: qemu-devel

Support discards via the WRITE SAME command with the unmap bit set, and
tell the initiator about the support for it via the block limit and the
new thin provisioning EVPD pages.  Also fix the comment which incorrectly
describedthe block limits EVPD page.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/hw/scsi-disk.c
===================================================================
--- qemu.orig/hw/scsi-disk.c	2010-11-30 23:12:59.000000000 +0100
+++ qemu/hw/scsi-disk.c	2010-12-01 12:02:40.126005527 +0100
@@ -424,7 +424,8 @@ static int scsi_disk_emulate_inquiry(SCS
             outbuf[buflen++] = 0x80; // unit serial number
             outbuf[buflen++] = 0x83; // device identification
             if (bdrv_get_type_hint(s->bs) != BDRV_TYPE_CDROM) {
-                outbuf[buflen++] = 0xb0; // block device characteristics
+                outbuf[buflen++] = 0xb0; // block limits
+                outbuf[buflen++] = 0xb2; // thin provisioning
             }
             outbuf[pages] = buflen - pages - 1; // number of pages
             break;
@@ -466,8 +467,10 @@ static int scsi_disk_emulate_inquiry(SCS
             buflen += id_len;
             break;
         }
-        case 0xb0: /* block device characteristics */
+        case 0xb0: /* block limits */
         {
+            unsigned int unmap_sectors =
+                    s->qdev.conf.discard_granularity / s->qdev.blocksize;
             unsigned int min_io_size =
                     s->qdev.conf.min_io_size / s->qdev.blocksize;
             unsigned int opt_io_size =
@@ -492,6 +495,21 @@ static int scsi_disk_emulate_inquiry(SCS
             outbuf[13] = (opt_io_size >> 16) & 0xff;
             outbuf[14] = (opt_io_size >> 8) & 0xff;
             outbuf[15] = opt_io_size & 0xff;
+
+            /* optimal unmap granularity */
+            outbuf[28] = (unmap_sectors >> 24) & 0xff;
+            outbuf[29] = (unmap_sectors >> 16) & 0xff;
+            outbuf[30] = (unmap_sectors >> 8) & 0xff;
+            outbuf[31] = unmap_sectors & 0xff;
+            break;
+        }
+        case 0xb2: /* thin provisioning */
+        {
+            outbuf[3] = buflen = 8;
+            outbuf[4] = 0;
+            outbuf[5] = 0x40; /* write same with unmap supported */
+            outbuf[6] = 0;
+            outbuf[7] = 0;
             break;
         }
         default:
@@ -959,6 +977,11 @@ static int scsi_disk_emulate_command(SCS
             outbuf[11] = 0;
             outbuf[12] = 0;
             outbuf[13] = get_physical_block_exp(&s->qdev.conf);
+
+            /* set TPE bit if the format supports discard */
+            if (s->qdev.conf.discard_granularity)
+                outbuf[14] = 0x80;
+
             /* Protection, exponent and lowest lba field left blank. */
             buflen = req->cmd.xfer;
             break;
@@ -1123,6 +1146,30 @@ static int32_t scsi_send_command(SCSIDev
             goto illegal_lba;
         }
         break;
+    case WRITE_SAME_16:
+        DPRINTF("WRITE SAME(16) (sector %" PRId64 ", count %d)\n", lba, len);
+
+        if (lba > s->max_lba) {
+            goto illegal_lba;
+        }
+
+        /*
+         * We only support WRITE SAME with the unmap bit set for now.
+         */
+        if (!(buf[1] & 0x8)) {
+            goto fail;
+        }
+
+        rc = bdrv_discard(s->bs, lba * s->cluster_size, len * s->cluster_size);
+        if (rc < 0) {
+            /* XXX: better error code ?*/
+            goto fail;
+        }
+
+        scsi_req_set_status(&r->req, GOOD, NO_SENSE);
+        scsi_req_complete(&r->req);
+        scsi_remove_request(r);
+        return 0;
     default:
         DPRINTF("Unknown SCSI command (%2.2x)\n", buf[0]);
     fail:
Index: qemu/hw/scsi-defs.h
===================================================================
--- qemu.orig/hw/scsi-defs.h	2010-11-30 23:12:59.000000000 +0100
+++ qemu/hw/scsi-defs.h	2010-12-01 12:02:16.149254158 +0100
@@ -84,6 +84,7 @@
 #define MODE_SENSE_10         0x5a
 #define PERSISTENT_RESERVE_IN 0x5e
 #define PERSISTENT_RESERVE_OUT 0x5f
+#define WRITE_SAME_16         0x93
 #define MAINTENANCE_IN        0xa3
 #define MAINTENANCE_OUT       0xa4
 #define MOVE_MEDIUM           0xa5

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [PATCH 3/5] make dma_bdrv_io available to drivers
  2010-12-01 15:35 ` [Qemu-devel] PATCH 0/5] add TRIM/UNMAP support, v2 Christoph Hellwig
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 2/5] scsi-disk: support WRITE SAME (16) with unmap bit Christoph Hellwig
@ 2010-12-01 15:35   ` Christoph Hellwig
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 4/5] ide: add TRIM support Christoph Hellwig
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 5/5] raw-posix: add discard support Christoph Hellwig
  4 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2010-12-01 15:35 UTC (permalink / raw)
  To: qemu-devel

Make dma_bdrv_io available for drivers, and pass an explicit I/O function
instead of hardcoding bdrv_aio_readv/bdrv_aio_writev.  This is required
to implement non-READ/WRITE dma commands in the ide driver, e.g. the
upcoming TRIM support.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/dma-helpers.c
===================================================================
--- qemu.orig/dma-helpers.c	2010-11-23 14:13:42.178253390 +0100
+++ qemu/dma-helpers.c	2010-11-23 14:14:45.186253110 +0100
@@ -47,6 +47,7 @@ typedef struct {
     target_phys_addr_t sg_cur_byte;
     QEMUIOVector iov;
     QEMUBH *bh;
+    DMAIOFunc *io_func;
 } DMAAIOCB;
 
 static void dma_bdrv_cb(void *opaque, int ret);
@@ -116,13 +117,8 @@ static void dma_bdrv_cb(void *opaque, in
         return;
     }
 
-    if (dbs->is_write) {
-        dbs->acb = bdrv_aio_writev(dbs->bs, dbs->sector_num, &dbs->iov,
-                                   dbs->iov.size / 512, dma_bdrv_cb, dbs);
-    } else {
-        dbs->acb = bdrv_aio_readv(dbs->bs, dbs->sector_num, &dbs->iov,
-                                  dbs->iov.size / 512, dma_bdrv_cb, dbs);
-    }
+    dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
+                            dbs->iov.size / 512, dma_bdrv_cb, dbs);
     if (!dbs->acb) {
         dma_bdrv_unmap(dbs);
         qemu_iovec_destroy(&dbs->iov);
@@ -144,12 +140,12 @@ static AIOPool dma_aio_pool = {
     .cancel             = dma_aio_cancel,
 };
 
-static BlockDriverAIOCB *dma_bdrv_io(
+BlockDriverAIOCB *dma_bdrv_io(
     BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
-    BlockDriverCompletionFunc *cb, void *opaque,
-    int is_write)
+    DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
+    void *opaque, int is_write)
 {
-    DMAAIOCB *dbs =  qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
+    DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
 
     dbs->acb = NULL;
     dbs->bs = bs;
@@ -158,6 +154,7 @@ static BlockDriverAIOCB *dma_bdrv_io(
     dbs->sg_cur_index = 0;
     dbs->sg_cur_byte = 0;
     dbs->is_write = is_write;
+    dbs->io_func = io_func;
     dbs->bh = NULL;
     qemu_iovec_init(&dbs->iov, sg->nsg);
     dma_bdrv_cb(dbs, 0);
@@ -173,12 +170,12 @@ BlockDriverAIOCB *dma_bdrv_read(BlockDri
                                 QEMUSGList *sg, uint64_t sector,
                                 void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, cb, opaque, 0);
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, 0);
 }
 
 BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
                                  QEMUSGList *sg, uint64_t sector,
                                  void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, cb, opaque, 1);
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, 1);
 }
Index: qemu/dma.h
===================================================================
--- qemu.orig/dma.h	2010-11-23 14:13:42.185253809 +0100
+++ qemu/dma.h	2010-11-23 14:14:45.186253110 +0100
@@ -32,6 +32,14 @@ void qemu_sglist_add(QEMUSGList *qsg, ta
                      target_phys_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 
+typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num,
+                                 QEMUIOVector *iov, int nb_sectors,
+                                 BlockDriverCompletionFunc *cb, void *opaque);
+
+BlockDriverAIOCB *dma_bdrv_io(BlockDriverState *bs,
+                              QEMUSGList *sg, uint64_t sector_num,
+                              DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
+                              void *opaque, int is_write);
 BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
                                 QEMUSGList *sg, uint64_t sector,
                                 BlockDriverCompletionFunc *cb, void *opaque);

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [PATCH 4/5] ide: add TRIM support
  2010-12-01 15:35 ` [Qemu-devel] PATCH 0/5] add TRIM/UNMAP support, v2 Christoph Hellwig
                     ` (2 preceding siblings ...)
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 3/5] make dma_bdrv_io available to drivers Christoph Hellwig
@ 2010-12-01 15:35   ` Christoph Hellwig
  2010-12-02 14:07     ` Kevin Wolf
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 5/5] raw-posix: add discard support Christoph Hellwig
  4 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2010-12-01 15:35 UTC (permalink / raw)
  To: qemu-devel

Add support for the data set management command, and the TRIM sub function
in it.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/hw/ide/core.c
===================================================================
--- qemu.orig/hw/ide/core.c	2010-11-30 23:12:59.513132702 +0100
+++ qemu/hw/ide/core.c	2010-12-01 12:02:47.347023889 +0100
@@ -145,6 +145,8 @@ static void ide_identify(IDEState *s)
     put_le16(p + 66, 120);
     put_le16(p + 67, 120);
     put_le16(p + 68, 120);
+    if (dev && dev->conf.discard_granularity)
+        put_le16(p + 69, (1 << 14)); /* determinate TRIM behavior */
     put_le16(p + 80, 0xf0); /* ata3 -> ata6 supported */
     put_le16(p + 81, 0x16); /* conforms to ata5 */
     /* 14=NOP supported, 5=WCACHE supported, 0=SMART supported */
@@ -171,6 +173,8 @@ static void ide_identify(IDEState *s)
     dev = s->unit ? s->bus->slave : s->bus->master;
     if (dev && dev->conf.physical_block_size)
         put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf));
+    if (dev && dev->conf.discard_granularity)
+        put_le16(p + 169, 1); /* TRIM support */
 
     memcpy(s->identify_data, p, sizeof(s->identify_data));
     s->identify_set = 1;
@@ -1788,6 +1792,128 @@ static void ide_clear_hob(IDEBus *bus)
     bus->ifs[1].select &= ~(1 << 7);
 }
 
+typedef struct TrimAIOCB {
+    BlockDriverAIOCB common;
+    QEMUBH *bh;
+    int ret;
+} TrimAIOCB;
+
+static void trim_aio_cancel(BlockDriverAIOCB *acb)
+{
+    TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+    qemu_aio_release(iocb);
+}
+
+static AIOPool trim_aio_pool = {
+    .aiocb_size         = sizeof(TrimAIOCB),
+    .cancel             = trim_aio_cancel,
+};
+
+static void ide_trim_bh_cb(void *opaque)
+{
+    TrimAIOCB *iocb = opaque;
+
+    iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+
+    qemu_aio_release(iocb);
+}
+
+static BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    TrimAIOCB *iocb;
+    int i, j, ret;
+
+    iocb = qemu_aio_get(&trim_aio_pool, bs, cb, opaque);
+    iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
+    iocb->ret = 0;
+
+    for (j = 0; j < qiov->niov; j++) {
+        uint64_t *buffer = qiov->iov[j].iov_base;
+
+        for (i = 0; i < qiov->iov[j].iov_len / 8; i++) {
+            /* 6-byte LBA + 2-byte range per entry */
+            uint64_t entry = le64_to_cpu(buffer[i]);
+            uint64_t sector = entry & 0x0000ffffffffffffULL;
+            uint16_t count = entry >> 48;
+
+            if (count == 0)
+                break;
+
+            ret = bdrv_discard(bs, sector * 512, count * 512);
+            if (!iocb->ret)
+                iocb->ret = ret;
+        }
+    }
+
+    qemu_bh_schedule(iocb->bh);
+
+    return &iocb->common;
+}
+
+static void ide_trim_dma_cb(void *opaque, int ret)
+{
+    BMDMAState *bm = opaque;
+    IDEState *s = bmdma_active_if(bm);
+    int n;
+    int64_t sector_num;
+
+    if (ret < 0) {
+        if (ide_handle_rw_error(s, -ret,  BM_STATUS_DMA_RETRY))
+            return;
+    }
+
+    n = s->io_buffer_size >> 9;
+    sector_num = ide_get_sector(s);
+    if (n > 0) {
+        dma_buf_commit(s, 0);
+        sector_num += n;
+        ide_set_sector(s, sector_num);
+        s->nsector -= n;
+    }
+
+    /* end of transfer ? */
+    if (s->nsector == 0) {
+        s->status = READY_STAT | SEEK_STAT;
+        ide_set_irq(s->bus);
+    eot:
+        bm->status &= ~BM_STATUS_DMAING;
+        bm->status |= BM_STATUS_INT;
+        bm->dma_cb = NULL;
+        bm->unit = -1;
+        bm->aiocb = NULL;
+        return;
+    }
+
+    n = s->nsector;
+    s->io_buffer_size = n * 512;
+    /* launch next transfer */
+    if (dma_buf_prepare(bm, 0) == 0)
+        goto eot;
+#ifdef DEBUG_AIO
+    printf("aio_write: sector_num=%" PRId64 " n=%d\n", sector_num, n);
+#endif
+    bm->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num, ide_issue_trim,
+                    ide_trim_dma_cb, bm, 1);
+    ide_dma_submit_check(s, ide_trim_dma_cb, bm);
+}
+
+static void ide_trim(IDEState *s)
+{
+    s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT;
+    s->io_buffer_index = 0;
+    s->io_buffer_size = 0;
+    s->is_read = 0;
+    ide_dma_start(s, ide_trim_dma_cb);
+}
+
 void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     IDEBus *bus = opaque;
@@ -1867,6 +1993,17 @@ void ide_ioport_write(void *opaque, uint
             break;
 
         switch(val) {
+        case WIN_DSM:
+            switch (s->feature) {
+            case DSM_TRIM:
+                if (!s->bs)
+                   goto abort_cmd;
+                ide_trim(s);
+                break;
+            default:
+                goto abort_cmd;
+            }
+            break;
         case WIN_IDENTIFY:
             if (s->bs && s->drive_kind != IDE_CD) {
                 if (s->drive_kind != IDE_CFATA)
Index: qemu/hw/ide/internal.h
===================================================================
--- qemu.orig/hw/ide/internal.h	2010-11-30 23:12:59.515265122 +0100
+++ qemu/hw/ide/internal.h	2010-12-01 12:02:47.352634169 +0100
@@ -60,7 +60,11 @@ typedef struct BMDMAState BMDMAState;
  */
 #define CFA_REQ_EXT_ERROR_CODE		0x03 /* CFA Request Extended Error Code */
 /*
- *	0x04->0x07 Reserved
+ *      0x04->0x05 Reserved
+ */
+#define WIN_DSM                         0x06
+/*
+ *      0x07 Reserved
  */
 #define WIN_SRST			0x08 /* ATAPI soft reset command */
 #define WIN_DEVICE_RESET		0x08
@@ -188,6 +192,9 @@ typedef struct BMDMAState BMDMAState;
 
 #define IDE_DMA_BUF_SECTORS 256
 
+/* feature values for Data Set Management */
+#define DSM_TRIM                        0x01
+
 #if (IDE_DMA_BUF_SECTORS < MAX_MULT_SECTORS)
 #error "IDE_DMA_BUF_SECTORS must be bigger or equal to MAX_MULT_SECTORS"
 #endif
Index: qemu/hw/ide/qdev.c
===================================================================
--- qemu.orig/hw/ide/qdev.c	2010-11-30 23:12:56.601004402 +0100
+++ qemu/hw/ide/qdev.c	2010-12-01 12:02:47.354276228 +0100
@@ -110,6 +110,11 @@ static int ide_drive_initfn(IDEDevice *d
     const char *serial;
     DriveInfo *dinfo;
 
+    if (dev->conf.discard_granularity && dev->conf.discard_granularity != 512) {
+        error_report("discard_granularity must be 512 for ide");
+        return -1;
+    }
+
     serial = dev->serial;
     if (!serial) {
         /* try to fall back to value set with legacy -drive serial=... */

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [PATCH 5/5] raw-posix: add discard support
  2010-12-01 15:35 ` [Qemu-devel] PATCH 0/5] add TRIM/UNMAP support, v2 Christoph Hellwig
                     ` (3 preceding siblings ...)
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 4/5] ide: add TRIM support Christoph Hellwig
@ 2010-12-01 15:35   ` Christoph Hellwig
  2010-12-02 12:04     ` Kevin Wolf
  4 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2010-12-01 15:35 UTC (permalink / raw)
  To: qemu-devel

Add support to discard blocks in a raw image residing on an XFS filesystem
by calling the XFS_IOC_UNRESVSP64 ioctl to punch holes.  Support for other
hole punching mechanisms can be added when they become available.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/block/raw-posix.c
===================================================================
--- qemu.orig/block/raw-posix.c	2010-11-25 12:51:18.474004263 +0100
+++ qemu/block/raw-posix.c	2010-11-25 13:00:42.220003844 +0100
@@ -69,6 +69,10 @@
 #include <sys/diskslice.h>
 #endif
 
+#ifdef CONFIG_XFS
+#include <xfs/xfs.h>
+#endif
+
 //#define DEBUG_FLOPPY
 
 //#define DEBUG_BLOCK
@@ -120,6 +124,9 @@ typedef struct BDRVRawState {
 #endif
     uint8_t *aligned_buf;
     unsigned aligned_buf_size;
+#ifdef CONFIG_XFS
+    int is_xfs : 1;
+#endif
 } BDRVRawState;
 
 static int fd_open(BlockDriverState *bs);
@@ -196,6 +203,12 @@ static int raw_open_common(BlockDriverSt
 #endif
     }
 
+#ifdef CONFIG_XFS
+    if (platform_test_xfs_fd(s->fd)) {
+        s->is_xfs = 1;
+    }
+#endif
+
     return 0;
 
 out_free_buf:
@@ -740,6 +753,36 @@ static int raw_flush(BlockDriverState *b
     return qemu_fdatasync(s->fd);
 }
 
+#ifdef CONFIG_XFS
+static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
+{
+    struct xfs_flock64 fl;
+
+    memset(&fl, 0, sizeof(fl));
+    fl.l_whence = SEEK_SET;
+    fl.l_start = sector_num << 9;
+    fl.l_len = (int64_t)nb_sectors << 9;
+
+    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
+        printf("cannot punch hole (%s)\n", strerror(errno));
+        return -errno;
+    }
+
+    return 0;
+}
+#endif
+
+static int raw_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+    BDRVRawState *s = bs->opaque;
+
+#ifdef CONFIG_XFS
+    if (s->is_xfs)
+        return xfs_discard(s, sector_num, nb_sectors);
+#endif
+
+    return 0;
+}
 
 static QEMUOptionParameter raw_create_options[] = {
     {
@@ -761,6 +804,7 @@ static BlockDriver bdrv_file = {
     .bdrv_close = raw_close,
     .bdrv_create = raw_create,
     .bdrv_flush = raw_flush,
+    .bdrv_discard = raw_discard,
 
     .bdrv_aio_readv = raw_aio_readv,
     .bdrv_aio_writev = raw_aio_writev,
Index: qemu/configure
===================================================================
--- qemu.orig/configure	2010-11-25 12:51:18.484004891 +0100
+++ qemu/configure	2010-11-25 13:00:42.222004263 +0100
@@ -288,6 +288,7 @@ xen=""
 linux_aio=""
 attr=""
 vhost_net=""
+xfs=""
 
 gprof="no"
 debug_tcg="no"
@@ -1393,6 +1394,27 @@ EOF
 fi
 
 ##########################################
+# xfsctl() probe, used for raw-posix
+if test "$xfs" != "no" ; then
+  cat > $TMPC << EOF
+#include <xfs/xfs.h>
+int main(void)
+{
+    xfsctl(NULL, 0, 0, NULL);
+    return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+    xfs="yes"
+  else
+    if test "$xfs" = "yes" ; then
+      feature_not_found "uuid"
+    fi
+    xfs=no
+  fi
+fi
+
+##########################################
 # vde libraries probe
 if test "$vde" != "no" ; then
   vde_libs="-lvdeplug"
@@ -2354,6 +2376,7 @@ echo "vhost-net support $vhost_net"
 echo "Trace backend     $trace_backend"
 echo "Trace output file $trace_file-<pid>"
 echo "spice support     $spice"
+echo "xfsctl support    $xfs"
 
 if test $sdl_too_old = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -2499,6 +2522,9 @@ fi
 if test "$uuid" = "yes" ; then
   echo "CONFIG_UUID=y" >> $config_host_mak
 fi
+if test "$xfs" = "yes" ; then
+  echo "CONFIG_XFS=y" >> $config_host_mak
+fi
 qemu_version=`head $source_path/VERSION`
 echo "VERSION=$qemu_version" >>$config_host_mak
 echo "PKGVERSION=$pkgversion" >>$config_host_mak

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 5/5] raw-posix: add discard support
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 5/5] raw-posix: add discard support Christoph Hellwig
@ 2010-12-02 12:04     ` Kevin Wolf
  0 siblings, 0 replies; 27+ messages in thread
From: Kevin Wolf @ 2010-12-02 12:04 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: qemu-devel

Am 01.12.2010 16:35, schrieb Christoph Hellwig:
> Add support to discard blocks in a raw image residing on an XFS filesystem
> by calling the XFS_IOC_UNRESVSP64 ioctl to punch holes.  Support for other
> hole punching mechanisms can be added when they become available.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: qemu/block/raw-posix.c
> ===================================================================
> --- qemu.orig/block/raw-posix.c	2010-11-25 12:51:18.474004263 +0100
> +++ qemu/block/raw-posix.c	2010-11-25 13:00:42.220003844 +0100
> @@ -69,6 +69,10 @@
>  #include <sys/diskslice.h>
>  #endif
>  
> +#ifdef CONFIG_XFS
> +#include <xfs/xfs.h>
> +#endif
> +
>  //#define DEBUG_FLOPPY
>  
>  //#define DEBUG_BLOCK
> @@ -120,6 +124,9 @@ typedef struct BDRVRawState {
>  #endif
>      uint8_t *aligned_buf;
>      unsigned aligned_buf_size;
> +#ifdef CONFIG_XFS
> +    int is_xfs : 1;
> +#endif

Why not bool?

>  } BDRVRawState;
>  
>  static int fd_open(BlockDriverState *bs);
> @@ -196,6 +203,12 @@ static int raw_open_common(BlockDriverSt
>  #endif
>      }
>  
> +#ifdef CONFIG_XFS
> +    if (platform_test_xfs_fd(s->fd)) {
> +        s->is_xfs = 1;
> +    }
> +#endif
> +
>      return 0;
>  
>  out_free_buf:
> @@ -740,6 +753,36 @@ static int raw_flush(BlockDriverState *b
>      return qemu_fdatasync(s->fd);
>  }
>  
> +#ifdef CONFIG_XFS
> +static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
> +{
> +    struct xfs_flock64 fl;
> +
> +    memset(&fl, 0, sizeof(fl));
> +    fl.l_whence = SEEK_SET;
> +    fl.l_start = sector_num << 9;
> +    fl.l_len = (int64_t)nb_sectors << 9;
> +
> +    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
> +        printf("cannot punch hole (%s)\n", strerror(errno));
> +        return -errno;
> +    }
> +
> +    return 0;
> +}
> +#endif
> +
> +static int raw_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
> +{
> +    BDRVRawState *s = bs->opaque;
> +
> +#ifdef CONFIG_XFS
> +    if (s->is_xfs)
> +        return xfs_discard(s, sector_num, nb_sectors);

Braces

> +#endif
> +
> +    return 0;
> +}

This doesn't compile without XFS:

cc1: warnings being treated as errors
block/raw-posix.c: In function 'raw_discard':
block/raw-posix.c:777: error: unused variable 's'

>  
>  static QEMUOptionParameter raw_create_options[] = {
>      {
> @@ -761,6 +804,7 @@ static BlockDriver bdrv_file = {
>      .bdrv_close = raw_close,
>      .bdrv_create = raw_create,
>      .bdrv_flush = raw_flush,
> +    .bdrv_discard = raw_discard,
>  
>      .bdrv_aio_readv = raw_aio_readv,
>      .bdrv_aio_writev = raw_aio_writev,
> Index: qemu/configure
> ===================================================================
> --- qemu.orig/configure	2010-11-25 12:51:18.484004891 +0100
> +++ qemu/configure	2010-11-25 13:00:42.222004263 +0100
> @@ -288,6 +288,7 @@ xen=""
>  linux_aio=""
>  attr=""
>  vhost_net=""
> +xfs=""
>  
>  gprof="no"
>  debug_tcg="no"
> @@ -1393,6 +1394,27 @@ EOF
>  fi
>  
>  ##########################################
> +# xfsctl() probe, used for raw-posix
> +if test "$xfs" != "no" ; then
> +  cat > $TMPC << EOF
> +#include <xfs/xfs.h>
> +int main(void)
> +{
> +    xfsctl(NULL, 0, 0, NULL);
> +    return 0;
> +}
> +EOF
> +  if compile_prog "" "" ; then
> +    xfs="yes"
> +  else
> +    if test "$xfs" = "yes" ; then
> +      feature_not_found "uuid"

s/uuid/xfs/

Kevin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] block: add discard support
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
@ 2010-12-02 12:12     ` Kevin Wolf
  2010-12-10 13:38       ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Kevin Wolf @ 2010-12-02 12:12 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: qemu-devel

Am 01.12.2010 16:35, schrieb Christoph Hellwig:
> Add a new bdrv_discard method to free blocks in a mapping image, and a new
> drive property to set the granularity for these discard.  If no discard
> granularity support is set discard support is disabled.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: qemu/block.c
> ===================================================================
> --- qemu.orig/block.c	2010-11-25 16:17:32.922003704 +0100
> +++ qemu/block.c	2010-11-29 14:10:21.793255565 +0100
> @@ -1499,6 +1499,15 @@ int bdrv_has_zero_init(BlockDriverState
>      return 1;
>  }
>  
> +int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
> +{
> +    if (!bs->drv)
> +        return -ENOMEDIUM;
> +    if (!bs->drv->bdrv_discard)
> +        return 0;

Missing braces.

> +    return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
> +}
> +
>  /*
>   * Returns true iff the specified sector is present in the disk image. Drivers
>   * not implementing the functionality are assumed to not support backing files,
> Index: qemu/block.h
> ===================================================================
> --- qemu.orig/block.h	2010-11-25 16:17:32.929004193 +0100
> +++ qemu/block.h	2010-11-29 14:07:00.267003145 +0100
> @@ -146,6 +146,7 @@ int bdrv_flush(BlockDriverState *bs);
>  void bdrv_flush_all(void);
>  void bdrv_close_all(void);
>  
> +int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
>  int bdrv_has_zero_init(BlockDriverState *bs);
>  int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
>  	int *pnum);
> Index: qemu/block_int.h
> ===================================================================
> --- qemu.orig/block_int.h	2010-11-25 16:17:32.935003774 +0100
> +++ qemu/block_int.h	2010-11-29 14:09:31.926264704 +0100
> @@ -73,6 +73,8 @@ struct BlockDriver {
>          BlockDriverCompletionFunc *cb, void *opaque);
>      BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
>          BlockDriverCompletionFunc *cb, void *opaque);
> +    int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num,
> +                        int nb_sectors);
>  
>      int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs,
>          int num_reqs);
> @@ -227,6 +229,7 @@ typedef struct BlockConf {
>      uint16_t logical_block_size;
>      uint16_t min_io_size;
>      uint32_t opt_io_size;
> +    uint32_t discard_granularity;
>  } BlockConf;
>  
>  static inline unsigned int get_physical_block_exp(BlockConf *conf)
> @@ -249,6 +252,8 @@ static inline unsigned int get_physical_
>      DEFINE_PROP_UINT16("physical_block_size", _state,                   \
>                         _conf.physical_block_size, 512),                 \
>      DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),  \
> -    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0)
> +    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \
> +    DEFINE_PROP_UINT32("discard_granularity", _state, \
> +                       _conf.discard_granularity, 0)

Is there no way to get this value automatically?

At least for non-raw images (and it should be trivial to implement this
in qcow2) I guess we'll want to set it to the cluster size of the image
instead of requiring the user to set this value.

Kevin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 4/5] ide: add TRIM support
  2010-12-01 15:35   ` [Qemu-devel] [PATCH 4/5] ide: add TRIM support Christoph Hellwig
@ 2010-12-02 14:07     ` Kevin Wolf
  2010-12-10 13:39       ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Kevin Wolf @ 2010-12-02 14:07 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: qemu-devel

Am 01.12.2010 16:35, schrieb Christoph Hellwig:
> Add support for the data set management command, and the TRIM sub function
> in it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: qemu/hw/ide/core.c
> ===================================================================
> --- qemu.orig/hw/ide/core.c	2010-11-30 23:12:59.513132702 +0100
> +++ qemu/hw/ide/core.c	2010-12-01 12:02:47.347023889 +0100
> @@ -145,6 +145,8 @@ static void ide_identify(IDEState *s)
>      put_le16(p + 66, 120);
>      put_le16(p + 67, 120);
>      put_le16(p + 68, 120);
> +    if (dev && dev->conf.discard_granularity)
> +        put_le16(p + 69, (1 << 14)); /* determinate TRIM behavior */

Braces

>      put_le16(p + 80, 0xf0); /* ata3 -> ata6 supported */
>      put_le16(p + 81, 0x16); /* conforms to ata5 */
>      /* 14=NOP supported, 5=WCACHE supported, 0=SMART supported */
> @@ -171,6 +173,8 @@ static void ide_identify(IDEState *s)
>      dev = s->unit ? s->bus->slave : s->bus->master;
>      if (dev && dev->conf.physical_block_size)
>          put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf));
> +    if (dev && dev->conf.discard_granularity)
> +        put_le16(p + 169, 1); /* TRIM support */
>  
>      memcpy(s->identify_data, p, sizeof(s->identify_data));
>      s->identify_set = 1;
> @@ -1788,6 +1792,128 @@ static void ide_clear_hob(IDEBus *bus)
>      bus->ifs[1].select &= ~(1 << 7);
>  }
>  
> +typedef struct TrimAIOCB {
> +    BlockDriverAIOCB common;
> +    QEMUBH *bh;
> +    int ret;
> +} TrimAIOCB;
> +
> +static void trim_aio_cancel(BlockDriverAIOCB *acb)
> +{
> +    TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common);
> +
> +    qemu_bh_delete(iocb->bh);
> +    iocb->bh = NULL;
> +    qemu_aio_release(iocb);
> +}
> +
> +static AIOPool trim_aio_pool = {
> +    .aiocb_size         = sizeof(TrimAIOCB),
> +    .cancel             = trim_aio_cancel,
> +};
> +
> +static void ide_trim_bh_cb(void *opaque)
> +{
> +    TrimAIOCB *iocb = opaque;
> +
> +    iocb->common.cb(iocb->common.opaque, iocb->ret);
> +
> +    qemu_bh_delete(iocb->bh);
> +    iocb->bh = NULL;
> +
> +    qemu_aio_release(iocb);
> +}
> +
> +static BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
> +        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> +        BlockDriverCompletionFunc *cb, void *opaque)
> +{
> +    TrimAIOCB *iocb;
> +    int i, j, ret;
> +
> +    iocb = qemu_aio_get(&trim_aio_pool, bs, cb, opaque);
> +    iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
> +    iocb->ret = 0;
> +
> +    for (j = 0; j < qiov->niov; j++) {
> +        uint64_t *buffer = qiov->iov[j].iov_base;
> +
> +        for (i = 0; i < qiov->iov[j].iov_len / 8; i++) {
> +            /* 6-byte LBA + 2-byte range per entry */
> +            uint64_t entry = le64_to_cpu(buffer[i]);
> +            uint64_t sector = entry & 0x0000ffffffffffffULL;
> +            uint16_t count = entry >> 48;
> +
> +            if (count == 0)
> +                break;
> +
> +            ret = bdrv_discard(bs, sector * 512, count * 512);
> +            if (!iocb->ret)
> +                iocb->ret = ret;
> +        }
> +    }
> +
> +    qemu_bh_schedule(iocb->bh);
> +
> +    return &iocb->common;
> +}
> +
> +static void ide_trim_dma_cb(void *opaque, int ret)
> +{
> +    BMDMAState *bm = opaque;
> +    IDEState *s = bmdma_active_if(bm);
> +    int n;
> +    int64_t sector_num;
> +
> +    if (ret < 0) {
> +        if (ide_handle_rw_error(s, -ret,  BM_STATUS_DMA_RETRY))
> +            return;

This looks wrong. Wouldn't werror=stop cause the request to be retried
as a write when the VM is resumed?

But having a copy&paste error gives just about right reason to mention
that after read and write this is the third almost unchanged copy of
this code. Eventually we'll want to refactor this.

> +    }
> +
> +    n = s->io_buffer_size >> 9;
> +    sector_num = ide_get_sector(s);
> +    if (n > 0) {
> +        dma_buf_commit(s, 0);
> +        sector_num += n;
> +        ide_set_sector(s, sector_num);
> +        s->nsector -= n;
> +    }
> +
> +    /* end of transfer ? */
> +    if (s->nsector == 0) {
> +        s->status = READY_STAT | SEEK_STAT;
> +        ide_set_irq(s->bus);
> +    eot:
> +        bm->status &= ~BM_STATUS_DMAING;
> +        bm->status |= BM_STATUS_INT;
> +        bm->dma_cb = NULL;
> +        bm->unit = -1;
> +        bm->aiocb = NULL;

You can use ide_dma_set_inactive() here.

While we're at it, do you know why in the eot: case we set
BM_STATUS_INT, but don't actually call ide_set_irq? From what I
understand, those two should always be coupled, but I might be wrong.

Kevin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] block: add discard support
  2010-12-02 12:12     ` Kevin Wolf
@ 2010-12-10 13:38       ` Christoph Hellwig
  2010-12-11 12:50         ` Paul Brook
  2010-12-13 16:07         ` [Qemu-devel] " Paolo Bonzini
  0 siblings, 2 replies; 27+ messages in thread
From: Christoph Hellwig @ 2010-12-10 13:38 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Christoph Hellwig, qemu-devel

On Thu, Dec 02, 2010 at 01:12:13PM +0100, Kevin Wolf wrote:
> >      DEFINE_PROP_UINT16("physical_block_size", _state,                   \
> >                         _conf.physical_block_size, 512),                 \
> >      DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),  \
> > -    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0)
> > +    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \
> > +    DEFINE_PROP_UINT32("discard_granularity", _state, \
> > +                       _conf.discard_granularity, 0)
> 
> Is there no way to get this value automatically?
> 
> At least for non-raw images (and it should be trivial to implement this
> in qcow2) I guess we'll want to set it to the cluster size of the image
> instead of requiring the user to set this value.

It's guest visible state, so it must not change due to migrations.  For
the current implementation all values for it work anyway - if it's
smaller than the block size we'll zero out the remainder of the block.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 4/5] ide: add TRIM support
  2010-12-02 14:07     ` Kevin Wolf
@ 2010-12-10 13:39       ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2010-12-10 13:39 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Christoph Hellwig, qemu-devel

On Thu, Dec 02, 2010 at 03:07:49PM +0100, Kevin Wolf wrote:
> This looks wrong. Wouldn't werror=stop cause the request to be retried
> as a write when the VM is resumed?

Indeed.

> But having a copy&paste error gives just about right reason to mention
> that after read and write this is the third almost unchanged copy of
> this code. Eventually we'll want to refactor this.

I've added a patch to refactor the DMA code to the next iteration
of the patch series.

> While we're at it, do you know why in the eot: case we set
> BM_STATUS_INT, but don't actually call ide_set_irq? From what I
> understand, those two should always be coupled, but I might be wrong.

No idea, sorry.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] block: add discard support
  2010-12-10 13:38       ` Christoph Hellwig
@ 2010-12-11 12:50         ` Paul Brook
  2010-12-13 15:43           ` Christoph Hellwig
  2010-12-13 16:07         ` [Qemu-devel] " Paolo Bonzini
  1 sibling, 1 reply; 27+ messages in thread
From: Paul Brook @ 2010-12-11 12:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Christoph Hellwig

> On Thu, Dec 02, 2010 at 01:12:13PM +0100, Kevin Wolf wrote:
> > >      DEFINE_PROP_UINT16("physical_block_size", _state,                 
> > >       \
> > >      
> > >                         _conf.physical_block_size, 512),               
> > >                          \
> > >      
> > >      DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0), 
> > >      \
> > > 
> > > -    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0)
> > > +    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \
> > > +    DEFINE_PROP_UINT32("discard_granularity", _state, \
> > > +                       _conf.discard_granularity, 0)
> > 
> > Is there no way to get this value automatically?
> > 
> > At least for non-raw images (and it should be trivial to implement this
> > in qcow2) I guess we'll want to set it to the cluster size of the image
> > instead of requiring the user to set this value.
> 
> It's guest visible state, so it must not change due to migrations.  For
> the current implementation all values for it work anyway - if it's
> smaller than the block size we'll zero out the remainder of the block.

That sounds wrong. Surely we should leave partial blocks untouched.

Paul

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] block: add discard support
  2010-12-11 12:50         ` Paul Brook
@ 2010-12-13 15:43           ` Christoph Hellwig
  2010-12-13 16:17             ` Paul Brook
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2010-12-13 15:43 UTC (permalink / raw)
  To: Paul Brook; +Cc: Kevin Wolf, qemu-devel, Christoph Hellwig

On Sat, Dec 11, 2010 at 12:50:20PM +0000, Paul Brook wrote:
> > It's guest visible state, so it must not change due to migrations.  For
> > the current implementation all values for it work anyway - if it's
> > smaller than the block size we'll zero out the remainder of the block.
> 
> That sounds wrong. Surely we should leave partial blocks untouched.

While zeroing them is not required for qemu, the general semantics of
the XFS ioctl require it.  It punches a hole, which means it's makes the
new area equivalent to a hole create by truncating a file to a larger
size and then only writing at the larger offset.  The semantics for a
hole in all Unix filesystems is that we read back zeroes from them.
If we write into a sparse file at a not block aligned offset the
zeroing of the partial block also happens.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] Re: [PATCH 1/5] block: add discard support
  2010-12-10 13:38       ` Christoph Hellwig
  2010-12-11 12:50         ` Paul Brook
@ 2010-12-13 16:07         ` Paolo Bonzini
  2010-12-13 16:15           ` Christoph Hellwig
  1 sibling, 1 reply; 27+ messages in thread
From: Paolo Bonzini @ 2010-12-13 16:07 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Kevin Wolf, qemu-devel

On 12/10/2010 02:38 PM, Christoph Hellwig wrote:
> if it's smaller than the block size we'll zero out the remainder of
> the block.

I think it should fail at VM startup time, or even better do nothing at all.

When you write in the middle of an absent block, and a partially-zero 
block is created, this is not visible: a read cannot see the difference 
between "all zeros because it's sparse" and "all zeros because it's zero".

If I ask you to (optionally) punch a 1kb hole but all you can do is 
punch a 2kb hole, I do care about the second kilobyte of data.  Since 
the hole punching of bdrv_discard is completely optional, it should not 
be done in this case.

Paolo

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] Re: [PATCH 1/5] block: add discard support
  2010-12-13 16:07         ` [Qemu-devel] " Paolo Bonzini
@ 2010-12-13 16:15           ` Christoph Hellwig
  2010-12-13 16:24             ` Paolo Bonzini
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2010-12-13 16:15 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Kevin Wolf, Christoph Hellwig, qemu-devel

On Mon, Dec 13, 2010 at 05:07:27PM +0100, Paolo Bonzini wrote:
> On 12/10/2010 02:38 PM, Christoph Hellwig wrote:
> >if it's smaller than the block size we'll zero out the remainder of
> >the block.
> 
> I think it should fail at VM startup time, or even better do nothing at all.

What should fail?

> When you write in the middle of an absent block, and a partially-zero 
> block is created, this is not visible: a read cannot see the difference 
> between "all zeros because it's sparse" and "all zeros because it's zero".

You can not see from a VM if a block is not allocated or zeroed.  Then
again we'll never create a fully zeroed block anyway unless we get
really stupid discard patterns from the guest OS.

> If I ask you to (optionally) punch a 1kb hole but all you can do is 
> punch a 2kb hole, I do care about the second kilobyte of data.  Since 
> the hole punching of bdrv_discard is completely optional, it should not 
> be done in this case.

Of course we do not discard the second KB in that case.  If you issue
a 1k UNRSVSP ioctl on a 2k block size XFS filesystem it will zero
exactly the 1k you specified, which is required for the semantics of the
ioctl.  Yes, it's not optimal, but qemu can't easily know what block
size the underlying filesystem has.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] block: add discard support
  2010-12-13 15:43           ` Christoph Hellwig
@ 2010-12-13 16:17             ` Paul Brook
  0 siblings, 0 replies; 27+ messages in thread
From: Paul Brook @ 2010-12-13 16:17 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Kevin Wolf, qemu-devel

> On Sat, Dec 11, 2010 at 12:50:20PM +0000, Paul Brook wrote:
> > > It's guest visible state, so it must not change due to migrations.  For
> > > the current implementation all values for it work anyway - if it's
> > > smaller than the block size we'll zero out the remainder of the block.
> > 
> > That sounds wrong. Surely we should leave partial blocks untouched.
> 
> While zeroing them is not required for qemu, the general semantics of
> the XFS ioctl require it.  It punches a hole, which means it's makes the
> new area equivalent to a hole create by truncating a file to a larger
> size and then only writing at the larger offset.  The semantics for a
> hole in all Unix filesystems is that we read back zeroes from them.
> If we write into a sparse file at a not block aligned offset the
> zeroing of the partial block also happens.

Ah, so it was just inconsistent use of the term "block".  When the erase 
region includes part of a block, we zero that part of the block and leave the 
rest of the block untouched.

Paul

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] Re: [PATCH 1/5] block: add discard support
  2010-12-13 16:15           ` Christoph Hellwig
@ 2010-12-13 16:24             ` Paolo Bonzini
  0 siblings, 0 replies; 27+ messages in thread
From: Paolo Bonzini @ 2010-12-13 16:24 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Kevin Wolf, qemu-devel

On 12/13/2010 05:15 PM, Christoph Hellwig wrote:
> On Mon, Dec 13, 2010 at 05:07:27PM +0100, Paolo Bonzini wrote:
>> On 12/10/2010 02:38 PM, Christoph Hellwig wrote:
>>> if it's smaller than the block size we'll zero out the remainder of
>>> the block.
>>
>> I think it should fail at VM startup time, or even better do nothing at all.
>
> What should fail?

Nothing -- you wrote "if it's smaller than the block size we'll zero out 
the remainder of the block" which I interpreted the wrong way, i.e. as 
"XFS will round up the size to the remainder of the block and zero that 
part out as well".

Thanks for the clarification.

Paolo

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2010-12-13 16:24 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-25 13:56 [Qemu-devel] [PATCH 0/5] add TRIM/UNMAP support Christoph Hellwig
2010-11-25 13:57 ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
2010-11-25 14:09   ` malc
2010-11-25 14:45   ` Stefan Hajnoczi
2010-11-25 13:57 ` [Qemu-devel] [PATCH 2/5] scsi-disk: support WRITE SAME (16) with unmap bit Christoph Hellwig
2010-11-25 14:10   ` malc
2010-11-25 13:57 ` [Qemu-devel] [PATCH 3/5] make dma_bdrv_io available to drivers Christoph Hellwig
2010-11-25 13:57 ` [Qemu-devel] [PATCH 4/5] ide: add TRIM support Christoph Hellwig
2010-11-25 14:10   ` malc
2010-11-25 13:57 ` [Qemu-devel] [PATCH 5/5] raw-posix: add discard support Christoph Hellwig
2010-12-01 15:35 ` [Qemu-devel] PATCH 0/5] add TRIM/UNMAP support, v2 Christoph Hellwig
2010-12-01 15:35   ` [Qemu-devel] [PATCH 1/5] block: add discard support Christoph Hellwig
2010-12-02 12:12     ` Kevin Wolf
2010-12-10 13:38       ` Christoph Hellwig
2010-12-11 12:50         ` Paul Brook
2010-12-13 15:43           ` Christoph Hellwig
2010-12-13 16:17             ` Paul Brook
2010-12-13 16:07         ` [Qemu-devel] " Paolo Bonzini
2010-12-13 16:15           ` Christoph Hellwig
2010-12-13 16:24             ` Paolo Bonzini
2010-12-01 15:35   ` [Qemu-devel] [PATCH 2/5] scsi-disk: support WRITE SAME (16) with unmap bit Christoph Hellwig
2010-12-01 15:35   ` [Qemu-devel] [PATCH 3/5] make dma_bdrv_io available to drivers Christoph Hellwig
2010-12-01 15:35   ` [Qemu-devel] [PATCH 4/5] ide: add TRIM support Christoph Hellwig
2010-12-02 14:07     ` Kevin Wolf
2010-12-10 13:39       ` Christoph Hellwig
2010-12-01 15:35   ` [Qemu-devel] [PATCH 5/5] raw-posix: add discard support Christoph Hellwig
2010-12-02 12:04     ` Kevin Wolf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.