All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
@ 2011-11-18  8:47 Lan Tianyu
  2011-11-18 10:10 ` Kevin Wolf
  0 siblings, 1 reply; 10+ messages in thread
From: Lan Tianyu @ 2011-11-18  8:47 UTC (permalink / raw)
  To: penberg; +Cc: Lan Tianyu, kvm, asias.hejun, levinsasha928, prasadjoshi124

When meeting request to write the cluster without copied flag,
allocate a new cluster and write original data with modification
to the new cluster. This also adds support for the writing operation
of the qcow2 compressed image. After testing, image file can pass
through "qemu-img check".

 Please enter the commit message for your changes. Lines starting

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
---
 tools/kvm/disk/qcow.c        |  366 +++++++++++++++++++++++++++++-------------
 tools/kvm/include/kvm/qcow.h |    2 +
 2 files changed, 255 insertions(+), 113 deletions(-)

diff --git a/tools/kvm/disk/qcow.c b/tools/kvm/disk/qcow.c
index 680b37d..4d9125d 100644
--- a/tools/kvm/disk/qcow.c
+++ b/tools/kvm/disk/qcow.c
@@ -122,9 +122,6 @@ static int cache_table(struct qcow *q, struct qcow_l2_table *c)
 		 */
 		lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list);
 
-		if (qcow_l2_cache_write(q, lru) < 0)
-			goto error;
-
 		/* Remove the node from the cache */
 		rb_erase(&lru->node, r);
 		list_del_init(&lru->list);
@@ -618,9 +615,6 @@ static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c)
 	if (rft->nr_cached == MAX_CACHE_NODES) {
 		lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list);
 
-		if (write_refcount_block(q, lru) < 0)
-			goto error;
-
 		rb_erase(&lru->node, r);
 		list_del_init(&lru->list);
 		rft->nr_cached--;
@@ -706,6 +700,11 @@ static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64
 
 	rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]);
 
+	if (!rfb_offset) {
+		pr_warning("Don't support to grow refcount table");
+		return NULL;
+	}
+
 	rfb = refcount_block_search(q, rfb_offset);
 	if (rfb)
 		return rfb;
@@ -728,35 +727,121 @@ error_free_rfb:
 	return NULL;
 }
 
-/*
- * QCOW file might grow during a write operation. Not only data but metadata is
- * also written at the end of the file. Therefore it is necessary to ensure
- * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to
- * synchronize the in-core state of QCOW image to disk.
- *
- * We also try to restore the image to a consistent state if the metdata
- * operation fails. The two metadat operations are: level 1 and level 2 table
- * update. If either of them fails the image is truncated to a consistent state.
+static u16 qcow_get_refcount(struct qcow *q, u64 clust_idx)
+{
+	struct qcow_refcount_block *rfb = NULL;
+	struct qcow_header *header = q->header;
+	u64 rfb_idx;
+
+	rfb = qcow_read_refcount_block(q, clust_idx);
+	if (!rfb) {
+		pr_warning("Error while reading refcount table");
+		return -1;
+	}
+
+	rfb_idx = clust_idx & (((1ULL <<
+		(header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
+
+	if (rfb_idx >= rfb->size) {
+		pr_warning("L1: refcount block index out of bounds");
+		return -1;
+	}
+
+	return be16_to_cpu(rfb->entries[rfb_idx]);
+}
+
+static int update_cluster_refcount(struct qcow *q, u64 clust_idx, u16 append)
+{
+	struct qcow_refcount_block *rfb = NULL;
+	struct qcow_header *header = q->header;
+	u16 refcount;
+	u64 rfb_idx;
+
+	rfb = qcow_read_refcount_block(q, clust_idx);
+	if (!rfb) {
+		pr_warning("error while reading refcount table");
+		return -1;
+	}
+
+	rfb_idx = clust_idx & (((1ULL <<
+		(header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
+	if (rfb_idx >= rfb->size) {
+		pr_warning("refcount block index out of bounds");
+		return -1;
+	}
+
+	refcount = be16_to_cpu(rfb->entries[rfb_idx]) + append;
+	rfb->entries[rfb_idx] = cpu_to_be16(refcount);
+	rfb->dirty = 1;
+
+	/*write refcount block*/
+	write_refcount_block(q, rfb);
+
+	/*update free_clust_idx since refcount becomes zero*/
+	if (!refcount && clust_idx < q->free_clust_idx)
+		q->free_clust_idx = clust_idx;
+
+	return 0;
+}
+
+static void  qcow_free_clusters(struct qcow *q, u64 clust_start, u64 size)
+{
+	struct qcow_header *header = q->header;
+	u64 start, end, offset;
+
+	start = clust_start & ~(q->cluster_size - 1);
+	end = (clust_start + size - 1) & ~(q->cluster_size - 1);
+	for (offset = start; offset <= end; offset += q->cluster_size)
+		update_cluster_refcount(q, offset >> header->cluster_bits, -1);
+}
+
+/*Allocate clusters according to the size. Find a postion that
+ *can satisfy the size. free_clust_idx is initialized to zero and
+ *Record last position.
+*/
+static u64 qcow_alloc_clusters(struct qcow *q, u64 size)
+{
+	struct qcow_header *header = q->header;
+	u16 clust_refcount;
+	u32 clust_idx, i;
+	u64 clust_num;
+
+	clust_num = (size + (q->cluster_size - 1)) >> header->cluster_bits;
+
+again:
+	for (i = 0; i < clust_num; i++) {
+		clust_idx = q->free_clust_idx++;
+		clust_refcount = qcow_get_refcount(q, clust_idx);
+		if (clust_refcount < 0)
+			return -1;
+		else if (clust_refcount > 0)
+			goto again;
+	}
+
+	for (i = 0; i < clust_num; i++)
+		update_cluster_refcount(q,
+			q->free_clust_idx - clust_num + i, 1);
+
+	return (q->free_clust_idx - clust_num) << header->cluster_bits;
+}
+
+/*Get l2 table. If the table has been copied, read table directly.
+ *If the table exists, allocate a new cluster and copy the table
+ *to the new cluster.
  */
-static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len)
+static int get_cluster_table(struct qcow *q, u64 offset,
+	struct qcow_l2_table **result_l2t, u64 *result_l2_index)
 {
 	struct qcow_header *header = q->header;
 	struct qcow_l1_table *l1t = &q->table;
 	struct qcow_l2_table *l2t;
-	u64 clust_start;
-	u64 clust_flags;
-	u64 l2t_offset;
-	u64 clust_off;
-	u64 l2t_size;
-	u64 clust_sz;
 	u64 l1t_idx;
+	u64 l2t_offset;
 	u64 l2t_idx;
-	u64 f_sz;
-	u64 len;
+	u64 l2t_size;
+	u64 l2t_new_offset;
 
-	l2t		= NULL;
-	l2t_size	= 1 << header->l2_bits;
-	clust_sz	= 1 << header->cluster_bits;
+	l2t_size = 1 << header->l2_bits;
 
 	l1t_idx = get_l1_index(q, offset);
 	if (l1t_idx >= l1t->table_size)
@@ -766,122 +851,166 @@ static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src
 	if (l2t_idx >= l2t_size)
 		return -1;
 
-	clust_off = get_cluster_offset(q, offset);
-	if (clust_off >= clust_sz)
-		return -1;
-
-	len = clust_sz - clust_off;
-	if (len > src_len)
-		len = src_len;
-
-	mutex_lock(&q->mutex);
-
 	l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]);
-	if (l2t_offset & QCOW2_OFLAG_COMPRESSED) {
-		pr_warning("compressed clusters are not supported");
-		goto error;
-	}
-	if (!(l2t_offset & QCOW2_OFLAG_COPIED)) {
-		pr_warning("L2 copy-on-write clusters are not supported");
-		goto error;
-	}
-
-	l2t_offset &= QCOW2_OFFSET_MASK;
-	if (l2t_offset) {
-		/* read and cache l2 table */
+	if (l2t_offset & QCOW2_OFLAG_COPIED) {
+		l2t_offset &= ~QCOW2_OFLAG_COPIED;
 		l2t = qcow_read_l2_table(q, l2t_offset);
 		if (!l2t)
 			goto error;
 	} else {
-		l2t = new_cache_table(q, l2t_offset);
-		if (!l2t)
+		l2t_new_offset = qcow_alloc_clusters(q, l2t_size*sizeof(u64));
+		if (l2t_new_offset < 0)
 			goto error;
 
-		/* Capture the state of the consistent QCOW image */
-		f_sz = file_size(q->fd);
-		if (!f_sz)
-			goto free_cache;
+		l2t = new_cache_table(q, l2t_new_offset);
+		if (!l2t)
+			goto free_cluster;
+
+		if (l2t_offset)
+			qcow2_read_cluster(q, l2t_offset, l2t->table,
+				l2t_size*sizeof(u64));
+		else
+			memset(l2t->table, 0x00, l2t_size * sizeof(u64));
 
-		/* Write the l2 table of 0's at the end of the file */
-		l2t_offset = qcow_write_l2_table(q, l2t->table);
-		if (!l2t_offset)
+		/*write l2 table*/
+		l2t->dirty = 1;
+		if (qcow_l2_cache_write(q, l2t) < 0)
 			goto free_cache;
 
-		if (cache_table(q, l2t) < 0) {
-			if (ftruncate(q->fd, f_sz) < 0)
-				goto free_cache;
+		/* Update the l1 talble */
+		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_new_offset
+			| QCOW2_OFLAG_COPIED);
 
-			goto free_cache;
-		}
+		if (pwrite_in_full(q->fd, l1t->l1_table,
+			l1t->table_size * sizeof(u64),
+			header->l1_table_offset) < 0)
+			goto error;
 
-		/* Update the in-core entry */
-		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset);
+		/*cache l2 table*/
+		cache_table(q, l2t);
+
+		/*free old cluster*/
+		qcow_free_clusters(q, l2t_offset, q->cluster_size);
 	}
 
-	/* Capture the state of the consistent QCOW image */
-	f_sz		= file_size(q->fd);
-	if (!f_sz)
-		goto error;
+	*result_l2t = l2t;
+	*result_l2_index = l2t_idx;
 
-	clust_start = be64_to_cpu(l2t->table[l2t_idx]);
+	return 0;
 
-	clust_flags = clust_start & QCOW2_OFLAGS_MASK;
-	if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
-		pr_warning("compressed clusters are not supported");
+free_cache:
+	free(l2t);
+
+free_cluster:
+	qcow_free_clusters(q, l2t_new_offset, q->cluster_size);
+
+error:
+	return -1;
+}
+
+/*If the cluster has been copied, write data directly. If not,
+ *read the original data and write it to the new cluster with
+ *modification.
+*/
+static ssize_t qcow_write_cluster(struct qcow *q, u64 offset,
+		void *buf, u32 src_len)
+{
+	struct qcow_header *header = q->header;
+	struct qcow_l2_table *l2t;
+	u64 clust_new_idx;
+	u64 clust_new_start;
+	u64 clust_start;
+	u64 clust_flags;
+	u64 clust_off;
+	u64 l2t_idx;
+	u64 len;
+
+	l2t = NULL;
+
+	clust_off = get_cluster_offset(q, offset);
+	if (clust_off >= q->cluster_size)
+		return -1;
+
+	len = q->cluster_size - clust_off;
+	if (len > src_len)
+		len = src_len;
+
+	mutex_lock(&q->mutex);
+
+	if (get_cluster_table(q, offset, &l2t, &l2t_idx)) {
+		pr_warning("Get l2 table error");
 		goto error;
 	}
 
-	clust_start &= QCOW2_OFFSET_MASK;
-	if (!clust_start) {
-		clust_start		= ALIGN(f_sz, clust_sz);
-		l2t->table[l2t_idx]	= cpu_to_be64(clust_start | QCOW2_OFLAG_COPIED);
-		l2t->dirty		= 1;
-	}
+	clust_start = be64_to_cpu(l2t->table[l2t_idx]);
+	clust_flags = clust_start & QCOW2_OFLAGS_MASK;
 
+	clust_start &= QCOW2_OFFSET_MASK;
 	if (!(clust_flags & QCOW2_OFLAG_COPIED)) {
-		struct qcow_refcount_block *rfb = NULL;
-		u16 clust_refcount;
-		u64 clust_idx;
-		u64 rfb_idx;
-
-		clust_idx = (clust_start & QCOW2_OFFSET_MASK)
-			>> (header->cluster_bits);
 
-		rfb = qcow_read_refcount_block(q, clust_idx);
-		if (!rfb) {
-			pr_warning("L1: error while reading refcount table");
+		clust_new_start	= qcow_alloc_clusters(q, q->cluster_size);
+		if (clust_new_start < 0) {
+			pr_warning("Cluster alloc error!");
 			goto error;
 		}
 
-		rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
-		if (rfb_idx >= rfb->size) {
-			pr_warning("L1: refcount block index out of bounds");
-			goto error;
-		}
+		clust_new_idx = clust_new_start >> header->cluster_bits;
+		offset &= ~(q->cluster_size - 1);
+
+		/*if clust_start is not zero, read the original data*/
+		if (clust_start) {
+			mutex_unlock(&q->mutex);
+			if (qcow2_read_cluster(q, offset, q->copy_buff,
+				q->cluster_size) < 0) {
+				pr_warning("Read copy cluster error");
+				qcow_free_clusters(q, clust_new_start,
+					q->cluster_size);
+				return -1;
+			}
+			mutex_lock(&q->mutex);
+		} else
+			memset(q->copy_buff, 0x00, q->cluster_size);
+
+		memcpy(q->copy_buff + clust_off, buf, len);
+
+		 /* Write actual data */
+		if (pwrite_in_full(q->fd, q->copy_buff, q->cluster_size,
+			clust_new_start) < 0)
+			goto free_cluster;
+
+		/*update l2 table*/
+		l2t->table[l2t_idx] = cpu_to_be64(clust_new_start
+			| QCOW2_OFLAG_COPIED);
+		l2t->dirty = 1;
+
+		if (qcow_l2_cache_write(q, l2t))
+			goto free_cluster;
+
+		/*free old cluster*/
+		if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
+			int size;
+			size = ((clust_start >> q->csize_shift) &
+				q->csize_mask) + 1;
+			size *= 512;
+			clust_start &= q->cluster_offset_mask;
+			clust_start &= ~511;
+
+			qcow_free_clusters(q, clust_start, size);
+		} else if (clust_start)
+			qcow_free_clusters(q, clust_start, q->cluster_size);
 
-		clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]);
-		if (!clust_refcount) {
-			clust_refcount = 1;
-			rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount);
-			rfb->dirty = 1;
-		}
-
-		if (clust_refcount > 1) {
-			pr_warning("L1 copy-on-write clusters are not supported");
+	} else {
+		/* Write actual data */
+		if (pwrite_in_full(q->fd, buf, len,
+			clust_start + clust_off) < 0)
 			goto error;
-		}
 	}
-
 	mutex_unlock(&q->mutex);
-
-	/* Write actual data */
-	if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0)
-		return -1;
-
 	return len;
 
-free_cache:
-	free(l2t);
+free_cluster:
+	qcow_free_clusters(q, clust_new_start, q->cluster_size);
+
 error:
 	mutex_unlock(&q->mutex);
 	return -1;
@@ -993,6 +1122,7 @@ static int qcow_disk_close(struct disk_image *disk)
 
 	refcount_table_free_cache(&q->refcount_table);
 	l1_table_free_cache(&q->table);
+	free(q->copy_buff);
 	free(q->cluster_data);
 	free(q->cluster_cache);
 	free(q->refcount_table.rf_table);
@@ -1117,10 +1247,16 @@ static struct disk_image *qcow2_probe(int fd, bool readonly)
 	q->cluster_offset_mask = (1LL << q->csize_shift) - 1;
 	q->cluster_size = 1 << q->header->cluster_bits;
 
+	q->copy_buff = malloc(q->cluster_size);
+	if (!q->copy_buff) {
+		pr_warning("copy buff malloc error!");
+		goto free_header;
+	}
+
 	q->cluster_data = malloc(q->cluster_size);
 	if (!q->cluster_data) {
 		pr_warning("cluster data malloc error!");
-		goto free_header;
+		goto free_copy_buff;
 	}
 
 	q->cluster_cache = malloc(q->cluster_size);
@@ -1163,6 +1299,9 @@ free_cluster_cache:
 free_cluster_data:
 	if (q->cluster_data)
 		free(q->cluster_data);
+free_copy_buff:
+	if (q->cluster_data)
+		free(q->cluster_data);
 free_header:
 	if (q->header)
 		free(q->header);
@@ -1252,6 +1391,7 @@ static struct disk_image *qcow1_probe(int fd, bool readonly)
 	q->version = QCOW1_VERSION;
 	q->cluster_size = 1 << q->header->cluster_bits;
 	q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1;
+	q->free_clust_idx = 0;
 
 	q->cluster_data = malloc(q->cluster_size);
 	if (!q->cluster_data) {
diff --git a/tools/kvm/include/kvm/qcow.h b/tools/kvm/include/kvm/qcow.h
index bbf7913..e032a1e 100644
--- a/tools/kvm/include/kvm/qcow.h
+++ b/tools/kvm/include/kvm/qcow.h
@@ -84,8 +84,10 @@ struct qcow {
 	u32				version;
 	u64				cluster_size;
 	u64				cluster_offset_mask;
+	u64				free_clust_idx;
 	void				*cluster_cache;
 	void				*cluster_data;
+	void				*copy_buff;
 };
 
 struct qcow1_header_disk {
-- 
1.7.6.rc2.8.g28eb


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
  2011-11-18  8:47 [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters Lan Tianyu
@ 2011-11-18 10:10 ` Kevin Wolf
  2011-11-19  2:09   ` Lan, Tianyu
  0 siblings, 1 reply; 10+ messages in thread
From: Kevin Wolf @ 2011-11-18 10:10 UTC (permalink / raw)
  To: Lan Tianyu; +Cc: penberg, kvm, asias.hejun, levinsasha928, prasadjoshi124

Am 18.11.2011 09:47, schrieb Lan Tianyu:
> When meeting request to write the cluster without copied flag,
> allocate a new cluster and write original data with modification
> to the new cluster. This also adds support for the writing operation
> of the qcow2 compressed image. After testing, image file can pass
> through "qemu-img check".
> 
>  Please enter the commit message for your changes. Lines starting
> 
> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> ---
>  tools/kvm/disk/qcow.c        |  366 +++++++++++++++++++++++++++++-------------
>  tools/kvm/include/kvm/qcow.h |    2 +
>  2 files changed, 255 insertions(+), 113 deletions(-)
> 
> diff --git a/tools/kvm/disk/qcow.c b/tools/kvm/disk/qcow.c
> index 680b37d..4d9125d 100644
> --- a/tools/kvm/disk/qcow.c
> +++ b/tools/kvm/disk/qcow.c
> @@ -122,9 +122,6 @@ static int cache_table(struct qcow *q, struct qcow_l2_table *c)
>  		 */
>  		lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list);
>  
> -		if (qcow_l2_cache_write(q, lru) < 0)
> -			goto error;
> -
>  		/* Remove the node from the cache */
>  		rb_erase(&lru->node, r);
>  		list_del_init(&lru->list);
> @@ -618,9 +615,6 @@ static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c)
>  	if (rft->nr_cached == MAX_CACHE_NODES) {
>  		lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list);
>  
> -		if (write_refcount_block(q, lru) < 0)
> -			goto error;
> -
>  		rb_erase(&lru->node, r);
>  		list_del_init(&lru->list);
>  		rft->nr_cached--;
> @@ -706,6 +700,11 @@ static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64
>  
>  	rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]);
>  
> +	if (!rfb_offset) {
> +		pr_warning("Don't support to grow refcount table");
> +		return NULL;
> +	}
> +
>  	rfb = refcount_block_search(q, rfb_offset);
>  	if (rfb)
>  		return rfb;
> @@ -728,35 +727,121 @@ error_free_rfb:
>  	return NULL;
>  }
>  
> -/*
> - * QCOW file might grow during a write operation. Not only data but metadata is
> - * also written at the end of the file. Therefore it is necessary to ensure
> - * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to
> - * synchronize the in-core state of QCOW image to disk.
> - *
> - * We also try to restore the image to a consistent state if the metdata
> - * operation fails. The two metadat operations are: level 1 and level 2 table
> - * update. If either of them fails the image is truncated to a consistent state.
> +static u16 qcow_get_refcount(struct qcow *q, u64 clust_idx)
> +{
> +	struct qcow_refcount_block *rfb = NULL;
> +	struct qcow_header *header = q->header;
> +	u64 rfb_idx;
> +
> +	rfb = qcow_read_refcount_block(q, clust_idx);
> +	if (!rfb) {
> +		pr_warning("Error while reading refcount table");
> +		return -1;
> +	}
> +
> +	rfb_idx = clust_idx & (((1ULL <<
> +		(header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
> +
> +	if (rfb_idx >= rfb->size) {
> +		pr_warning("L1: refcount block index out of bounds");
> +		return -1;
> +	}
> +
> +	return be16_to_cpu(rfb->entries[rfb_idx]);
> +}
> +
> +static int update_cluster_refcount(struct qcow *q, u64 clust_idx, u16 append)
> +{
> +	struct qcow_refcount_block *rfb = NULL;
> +	struct qcow_header *header = q->header;
> +	u16 refcount;
> +	u64 rfb_idx;
> +
> +	rfb = qcow_read_refcount_block(q, clust_idx);
> +	if (!rfb) {
> +		pr_warning("error while reading refcount table");
> +		return -1;
> +	}
> +
> +	rfb_idx = clust_idx & (((1ULL <<
> +		(header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
> +	if (rfb_idx >= rfb->size) {
> +		pr_warning("refcount block index out of bounds");
> +		return -1;
> +	}
> +
> +	refcount = be16_to_cpu(rfb->entries[rfb_idx]) + append;
> +	rfb->entries[rfb_idx] = cpu_to_be16(refcount);
> +	rfb->dirty = 1;
> +
> +	/*write refcount block*/
> +	write_refcount_block(q, rfb);

Missing error handling.

> +
> +	/*update free_clust_idx since refcount becomes zero*/
> +	if (!refcount && clust_idx < q->free_clust_idx)
> +		q->free_clust_idx = clust_idx;
> +
> +	return 0;
> +}
> +
> +static void  qcow_free_clusters(struct qcow *q, u64 clust_start, u64 size)
> +{
> +	struct qcow_header *header = q->header;
> +	u64 start, end, offset;
> +
> +	start = clust_start & ~(q->cluster_size - 1);
> +	end = (clust_start + size - 1) & ~(q->cluster_size - 1);
> +	for (offset = start; offset <= end; offset += q->cluster_size)
> +		update_cluster_refcount(q, offset >> header->cluster_bits, -1);
> +}
> +
> +/*Allocate clusters according to the size. Find a postion that
> + *can satisfy the size. free_clust_idx is initialized to zero and
> + *Record last position.
> +*/
> +static u64 qcow_alloc_clusters(struct qcow *q, u64 size)
> +{
> +	struct qcow_header *header = q->header;
> +	u16 clust_refcount;
> +	u32 clust_idx, i;
> +	u64 clust_num;
> +
> +	clust_num = (size + (q->cluster_size - 1)) >> header->cluster_bits;
> +
> +again:
> +	for (i = 0; i < clust_num; i++) {
> +		clust_idx = q->free_clust_idx++;
> +		clust_refcount = qcow_get_refcount(q, clust_idx);
> +		if (clust_refcount < 0)
> +			return -1;
> +		else if (clust_refcount > 0)
> +			goto again;
> +	}
> +
> +	for (i = 0; i < clust_num; i++)
> +		update_cluster_refcount(q,
> +			q->free_clust_idx - clust_num + i, 1);

Error handling again.

> +
> +	return (q->free_clust_idx - clust_num) << header->cluster_bits;
> +}
> +
> +/*Get l2 table. If the table has been copied, read table directly.
> + *If the table exists, allocate a new cluster and copy the table
> + *to the new cluster.
>   */
> -static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len)
> +static int get_cluster_table(struct qcow *q, u64 offset,
> +	struct qcow_l2_table **result_l2t, u64 *result_l2_index)
>  {
>  	struct qcow_header *header = q->header;
>  	struct qcow_l1_table *l1t = &q->table;
>  	struct qcow_l2_table *l2t;
> -	u64 clust_start;
> -	u64 clust_flags;
> -	u64 l2t_offset;
> -	u64 clust_off;
> -	u64 l2t_size;
> -	u64 clust_sz;
>  	u64 l1t_idx;
> +	u64 l2t_offset;
>  	u64 l2t_idx;
> -	u64 f_sz;
> -	u64 len;
> +	u64 l2t_size;
> +	u64 l2t_new_offset;
>  
> -	l2t		= NULL;
> -	l2t_size	= 1 << header->l2_bits;
> -	clust_sz	= 1 << header->cluster_bits;
> +	l2t_size = 1 << header->l2_bits;
>  
>  	l1t_idx = get_l1_index(q, offset);
>  	if (l1t_idx >= l1t->table_size)
> @@ -766,122 +851,166 @@ static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src
>  	if (l2t_idx >= l2t_size)
>  		return -1;
>  
> -	clust_off = get_cluster_offset(q, offset);
> -	if (clust_off >= clust_sz)
> -		return -1;
> -
> -	len = clust_sz - clust_off;
> -	if (len > src_len)
> -		len = src_len;
> -
> -	mutex_lock(&q->mutex);
> -
>  	l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]);
> -	if (l2t_offset & QCOW2_OFLAG_COMPRESSED) {
> -		pr_warning("compressed clusters are not supported");
> -		goto error;
> -	}
> -	if (!(l2t_offset & QCOW2_OFLAG_COPIED)) {
> -		pr_warning("L2 copy-on-write clusters are not supported");
> -		goto error;
> -	}
> -
> -	l2t_offset &= QCOW2_OFFSET_MASK;
> -	if (l2t_offset) {
> -		/* read and cache l2 table */
> +	if (l2t_offset & QCOW2_OFLAG_COPIED) {
> +		l2t_offset &= ~QCOW2_OFLAG_COPIED;
>  		l2t = qcow_read_l2_table(q, l2t_offset);
>  		if (!l2t)
>  			goto error;
>  	} else {
> -		l2t = new_cache_table(q, l2t_offset);
> -		if (!l2t)
> +		l2t_new_offset = qcow_alloc_clusters(q, l2t_size*sizeof(u64));
> +		if (l2t_new_offset < 0)
>  			goto error;
>  
> -		/* Capture the state of the consistent QCOW image */
> -		f_sz = file_size(q->fd);
> -		if (!f_sz)
> -			goto free_cache;
> +		l2t = new_cache_table(q, l2t_new_offset);
> +		if (!l2t)
> +			goto free_cluster;
> +
> +		if (l2t_offset)
> +			qcow2_read_cluster(q, l2t_offset, l2t->table,
> +				l2t_size*sizeof(u64));

There must be a system behind it. :-)

> +		else
> +			memset(l2t->table, 0x00, l2t_size * sizeof(u64));
>  
> -		/* Write the l2 table of 0's at the end of the file */
> -		l2t_offset = qcow_write_l2_table(q, l2t->table);
> -		if (!l2t_offset)
> +		/*write l2 table*/
> +		l2t->dirty = 1;
> +		if (qcow_l2_cache_write(q, l2t) < 0)
>  			goto free_cache;

You need to make sure that the refcount update is written first (e.g.
with fsync), otherwise you risk corruption when the host crashes in the
middle.

>  
> -		if (cache_table(q, l2t) < 0) {
> -			if (ftruncate(q->fd, f_sz) < 0)
> -				goto free_cache;
> +		/* Update the l1 talble */
> +		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_new_offset
> +			| QCOW2_OFLAG_COPIED);
>  
> -			goto free_cache;
> -		}
> +		if (pwrite_in_full(q->fd, l1t->l1_table,
> +			l1t->table_size * sizeof(u64),
> +			header->l1_table_offset) < 0)
> +			goto error;

Likewise, the L1 table write must be ordered against the L2 write.

goto error is using the wrong label.

>  
> -		/* Update the in-core entry */
> -		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset);
> +		/*cache l2 table*/
> +		cache_table(q, l2t);

After so many explicit comments, you can probably guess what's wrong here...

> +
> +		/*free old cluster*/
> +		qcow_free_clusters(q, l2t_offset, q->cluster_size);
>  	}
>  
> -	/* Capture the state of the consistent QCOW image */
> -	f_sz		= file_size(q->fd);
> -	if (!f_sz)
> -		goto error;
> +	*result_l2t = l2t;
> +	*result_l2_index = l2t_idx;
>  
> -	clust_start = be64_to_cpu(l2t->table[l2t_idx]);
> +	return 0;
>  
> -	clust_flags = clust_start & QCOW2_OFLAGS_MASK;
> -	if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
> -		pr_warning("compressed clusters are not supported");
> +free_cache:
> +	free(l2t);
> +
> +free_cluster:
> +	qcow_free_clusters(q, l2t_new_offset, q->cluster_size);
> +
> +error:
> +	return -1;
> +}
> +
> +/*If the cluster has been copied, write data directly. If not,
> + *read the original data and write it to the new cluster with
> + *modification.
> +*/
> +static ssize_t qcow_write_cluster(struct qcow *q, u64 offset,
> +		void *buf, u32 src_len)
> +{
> +	struct qcow_header *header = q->header;
> +	struct qcow_l2_table *l2t;
> +	u64 clust_new_idx;
> +	u64 clust_new_start;
> +	u64 clust_start;
> +	u64 clust_flags;
> +	u64 clust_off;
> +	u64 l2t_idx;
> +	u64 len;
> +
> +	l2t = NULL;
> +
> +	clust_off = get_cluster_offset(q, offset);
> +	if (clust_off >= q->cluster_size)
> +		return -1;
> +
> +	len = q->cluster_size - clust_off;
> +	if (len > src_len)
> +		len = src_len;
> +
> +	mutex_lock(&q->mutex);
> +
> +	if (get_cluster_table(q, offset, &l2t, &l2t_idx)) {
> +		pr_warning("Get l2 table error");
>  		goto error;
>  	}
>  
> -	clust_start &= QCOW2_OFFSET_MASK;
> -	if (!clust_start) {
> -		clust_start		= ALIGN(f_sz, clust_sz);
> -		l2t->table[l2t_idx]	= cpu_to_be64(clust_start | QCOW2_OFLAG_COPIED);
> -		l2t->dirty		= 1;
> -	}
> +	clust_start = be64_to_cpu(l2t->table[l2t_idx]);
> +	clust_flags = clust_start & QCOW2_OFLAGS_MASK;
>  
> +	clust_start &= QCOW2_OFFSET_MASK;
>  	if (!(clust_flags & QCOW2_OFLAG_COPIED)) {
> -		struct qcow_refcount_block *rfb = NULL;
> -		u16 clust_refcount;
> -		u64 clust_idx;
> -		u64 rfb_idx;
> -
> -		clust_idx = (clust_start & QCOW2_OFFSET_MASK)
> -			>> (header->cluster_bits);
>  
> -		rfb = qcow_read_refcount_block(q, clust_idx);
> -		if (!rfb) {
> -			pr_warning("L1: error while reading refcount table");
> +		clust_new_start	= qcow_alloc_clusters(q, q->cluster_size);
> +		if (clust_new_start < 0) {
> +			pr_warning("Cluster alloc error!");
>  			goto error;
>  		}
>  
> -		rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
> -		if (rfb_idx >= rfb->size) {
> -			pr_warning("L1: refcount block index out of bounds");
> -			goto error;
> -		}
> +		clust_new_idx = clust_new_start >> header->cluster_bits;
> +		offset &= ~(q->cluster_size - 1);
> +
> +		/*if clust_start is not zero, read the original data*/
> +		if (clust_start) {
> +			mutex_unlock(&q->mutex);
> +			if (qcow2_read_cluster(q, offset, q->copy_buff,
> +				q->cluster_size) < 0) {
> +				pr_warning("Read copy cluster error");
> +				qcow_free_clusters(q, clust_new_start,
> +					q->cluster_size);
> +				return -1;
> +			}
> +			mutex_lock(&q->mutex);
> +		} else
> +			memset(q->copy_buff, 0x00, q->cluster_size);
> +
> +		memcpy(q->copy_buff + clust_off, buf, len);
> +
> +		 /* Write actual data */
> +		if (pwrite_in_full(q->fd, q->copy_buff, q->cluster_size,
> +			clust_new_start) < 0)
> +			goto free_cluster;
> +
> +		/*update l2 table*/
> +		l2t->table[l2t_idx] = cpu_to_be64(clust_new_start
> +			| QCOW2_OFLAG_COPIED);
> +		l2t->dirty = 1;
> +
> +		if (qcow_l2_cache_write(q, l2t))
> +			goto free_cluster;

Cluster allocation must be ordered against L2 update.

> +
> +		/*free old cluster*/
> +		if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
> +			int size;
> +			size = ((clust_start >> q->csize_shift) &
> +				q->csize_mask) + 1;
> +			size *= 512;
> +			clust_start &= q->cluster_offset_mask;
> +			clust_start &= ~511;
> +
> +			qcow_free_clusters(q, clust_start, size);
> +		} else if (clust_start)
> +			qcow_free_clusters(q, clust_start, q->cluster_size);
>  
> -		clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]);
> -		if (!clust_refcount) {
> -			clust_refcount = 1;
> -			rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount);
> -			rfb->dirty = 1;
> -		}
> -
> -		if (clust_refcount > 1) {
> -			pr_warning("L1 copy-on-write clusters are not supported");
> +	} else {
> +		/* Write actual data */
> +		if (pwrite_in_full(q->fd, buf, len,
> +			clust_start + clust_off) < 0)
>  			goto error;
> -		}
>  	}
> -
>  	mutex_unlock(&q->mutex);
> -
> -	/* Write actual data */
> -	if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0)
> -		return -1;
> -
>  	return len;
>  
> -free_cache:
> -	free(l2t);
> +free_cluster:
> +	qcow_free_clusters(q, clust_new_start, q->cluster_size);
> +
>  error:
>  	mutex_unlock(&q->mutex);
>  	return -1;
> @@ -993,6 +1122,7 @@ static int qcow_disk_close(struct disk_image *disk)
>  
>  	refcount_table_free_cache(&q->refcount_table);
>  	l1_table_free_cache(&q->table);
> +	free(q->copy_buff);
>  	free(q->cluster_data);
>  	free(q->cluster_cache);
>  	free(q->refcount_table.rf_table);
> @@ -1117,10 +1247,16 @@ static struct disk_image *qcow2_probe(int fd, bool readonly)
>  	q->cluster_offset_mask = (1LL << q->csize_shift) - 1;
>  	q->cluster_size = 1 << q->header->cluster_bits;
>  
> +	q->copy_buff = malloc(q->cluster_size);
> +	if (!q->copy_buff) {
> +		pr_warning("copy buff malloc error!");
> +		goto free_header;
> +	}
> +
>  	q->cluster_data = malloc(q->cluster_size);
>  	if (!q->cluster_data) {
>  		pr_warning("cluster data malloc error!");
> -		goto free_header;
> +		goto free_copy_buff;
>  	}
>  
>  	q->cluster_cache = malloc(q->cluster_size);
> @@ -1163,6 +1299,9 @@ free_cluster_cache:
>  free_cluster_data:
>  	if (q->cluster_data)
>  		free(q->cluster_data);
> +free_copy_buff:
> +	if (q->cluster_data)
> +		free(q->cluster_data);

This looks like the wrong field.

>  free_header:
>  	if (q->header)
>  		free(q->header);
> @@ -1252,6 +1391,7 @@ static struct disk_image *qcow1_probe(int fd, bool readonly)
>  	q->version = QCOW1_VERSION;
>  	q->cluster_size = 1 << q->header->cluster_bits;
>  	q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1;
> +	q->free_clust_idx = 0;
>  
>  	q->cluster_data = malloc(q->cluster_size);
>  	if (!q->cluster_data) {
> diff --git a/tools/kvm/include/kvm/qcow.h b/tools/kvm/include/kvm/qcow.h
> index bbf7913..e032a1e 100644
> --- a/tools/kvm/include/kvm/qcow.h
> +++ b/tools/kvm/include/kvm/qcow.h
> @@ -84,8 +84,10 @@ struct qcow {
>  	u32				version;
>  	u64				cluster_size;
>  	u64				cluster_offset_mask;
> +	u64				free_clust_idx;
>  	void				*cluster_cache;
>  	void				*cluster_data;
> +	void				*copy_buff;
>  };
>  
>  struct qcow1_header_disk {

Kevin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
  2011-11-18 10:10 ` Kevin Wolf
@ 2011-11-19  2:09   ` Lan, Tianyu
  2011-11-19 15:30     ` Lan, Tianyu
  0 siblings, 1 reply; 10+ messages in thread
From: Lan, Tianyu @ 2011-11-19  2:09 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: penberg, kvm, asias.hejun, levinsasha928, prasadjoshi124

Hi Kevin:
	Thanks for your review. The following means that there should be a fsync after updating 
metadata(refcunt block, l1 table and l2 table).

Thanks 
Tianyu Lan

-----Original Message-----
> +		/*write l2 table*/
> +		l2t->dirty = 1;
> +		if (qcow_l2_cache_write(q, l2t) < 0)
>  			goto free_cache;

You need to make sure that the refcount update is written first (e.g.
with fsync), otherwise you risk corruption when the host crashes in the middle.

>  
> -		if (cache_table(q, l2t) < 0) {
> -			if (ftruncate(q->fd, f_sz) < 0)
> -				goto free_cache;
> +		/* Update the l1 talble */
> +		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_new_offset
> +			| QCOW2_OFLAG_COPIED);
>  
> -			goto free_cache;
> -		}
> +		if (pwrite_in_full(q->fd, l1t->l1_table,
> +			l1t->table_size * sizeof(u64),
> +			header->l1_table_offset) < 0)
> +			goto error;

Likewise, the L1 table write must be ordered against the L2 write.

goto error is using the wrong label.

>  
> -		/* Update the in-core entry */
> -		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset);
> +		/*cache l2 table*/
> +		cache_table(q, l2t);

After so many explicit comments, you can probably guess what's wrong here...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
  2011-11-19  2:09   ` Lan, Tianyu
@ 2011-11-19 15:30     ` Lan, Tianyu
  2011-11-19 16:26       ` Sasha Levin
  0 siblings, 1 reply; 10+ messages in thread
From: Lan, Tianyu @ 2011-11-19 15:30 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: penberg, kvm, asias.hejun, levinsasha928, prasadjoshi124

How about using the sync_file_range to sync the metadata?

-----Original Message-----
From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On Behalf Of Lan, Tianyu
Sent: Saturday, November 19, 2011 10:09 AM
To: Kevin Wolf
Cc: penberg@kernel.org; kvm@vger.kernel.org; asias.hejun@gmail.com; levinsasha928@gmail.com; prasadjoshi124@gmail.com
Subject: RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters

Hi Kevin:
	Thanks for your review. The following means that there should be a fsync after updating 
metadata(refcunt block, l1 table and l2 table).

Thanks 
Tianyu Lan

-----Original Message-----
> +		/*write l2 table*/
> +		l2t->dirty = 1;
> +		if (qcow_l2_cache_write(q, l2t) < 0)
>  			goto free_cache;

You need to make sure that the refcount update is written first (e.g.
with fsync), otherwise you risk corruption when the host crashes in the middle.

>  
> -		if (cache_table(q, l2t) < 0) {
> -			if (ftruncate(q->fd, f_sz) < 0)
> -				goto free_cache;
> +		/* Update the l1 talble */
> +		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_new_offset
> +			| QCOW2_OFLAG_COPIED);
>  
> -			goto free_cache;
> -		}
> +		if (pwrite_in_full(q->fd, l1t->l1_table,
> +			l1t->table_size * sizeof(u64),
> +			header->l1_table_offset) < 0)
> +			goto error;

Likewise, the L1 table write must be ordered against the L2 write.

goto error is using the wrong label.

>  
> -		/* Update the in-core entry */
> -		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset);
> +		/*cache l2 table*/
> +		cache_table(q, l2t);

After so many explicit comments, you can probably guess what's wrong here...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
  2011-11-19 15:30     ` Lan, Tianyu
@ 2011-11-19 16:26       ` Sasha Levin
  2011-11-20  6:14         ` Lan, Tianyu
  0 siblings, 1 reply; 10+ messages in thread
From: Sasha Levin @ 2011-11-19 16:26 UTC (permalink / raw)
  To: Lan, Tianyu; +Cc: Kevin Wolf, penberg, kvm, asias.hejun, prasadjoshi124

On Sat, 2011-11-19 at 23:30 +0800, Lan, Tianyu wrote:
> How about using the sync_file_range to sync the metadata?

sync_file_range() is only a hint, it doesn't actually assure anything.

-- 

Sasha.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
  2011-11-19 16:26       ` Sasha Levin
@ 2011-11-20  6:14         ` Lan, Tianyu
  2011-11-20  6:23           ` Sasha Levin
  0 siblings, 1 reply; 10+ messages in thread
From: Lan, Tianyu @ 2011-11-20  6:14 UTC (permalink / raw)
  To: Sasha Levin; +Cc: Kevin Wolf, penberg, kvm, asias.hejun, prasadjoshi124

OK. Thx.
But fsync is too slow. I try to find a way to sync a range of file. 
Are there any solutions to meet my purpose?

Thanks
Tianyu Lan

-----Original Message-----
From: Sasha Levin [mailto:levinsasha928@gmail.com] 
Sent: Sunday, November 20, 2011 12:27 AM
To: Lan, Tianyu
Cc: Kevin Wolf; penberg@kernel.org; kvm@vger.kernel.org; asias.hejun@gmail.com; prasadjoshi124@gmail.com
Subject: RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters

On Sat, 2011-11-19 at 23:30 +0800, Lan, Tianyu wrote:
> How about using the sync_file_range to sync the metadata?

sync_file_range() is only a hint, it doesn't actually assure anything.

-- 

Sasha.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
  2011-11-20  6:14         ` Lan, Tianyu
@ 2011-11-20  6:23           ` Sasha Levin
  2011-11-20  7:03             ` Lan, Tianyu
  2011-11-20 10:59             ` Pekka Enberg
  0 siblings, 2 replies; 10+ messages in thread
From: Sasha Levin @ 2011-11-20  6:23 UTC (permalink / raw)
  To: Lan, Tianyu; +Cc: Kevin Wolf, penberg, kvm, asias.hejun, prasadjoshi124

On Sun, 2011-11-20 at 14:14 +0800, Lan, Tianyu wrote:
> OK. Thx.
> But fsync is too slow. I try to find a way to sync a range of file. 
> Are there any solutions to meet my purpose?

fdatasync() is as good as it'll get.

tbh, maybe we should just consider opening QCOW images with O_SYNC and
just get it over with?

> Thanks
> Tianyu Lan
> 
> -----Original Message-----
> From: Sasha Levin [mailto:levinsasha928@gmail.com] 
> Sent: Sunday, November 20, 2011 12:27 AM
> To: Lan, Tianyu
> Cc: Kevin Wolf; penberg@kernel.org; kvm@vger.kernel.org; asias.hejun@gmail.com; prasadjoshi124@gmail.com
> Subject: RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
> 
> On Sat, 2011-11-19 at 23:30 +0800, Lan, Tianyu wrote:
> > How about using the sync_file_range to sync the metadata?
> 
> sync_file_range() is only a hint, it doesn't actually assure anything.
> 

-- 

Sasha.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
  2011-11-20  6:23           ` Sasha Levin
@ 2011-11-20  7:03             ` Lan, Tianyu
  2011-11-20 10:59             ` Pekka Enberg
  1 sibling, 0 replies; 10+ messages in thread
From: Lan, Tianyu @ 2011-11-20  7:03 UTC (permalink / raw)
  To: Sasha Levin; +Cc: Kevin Wolf, penberg, kvm, asias.hejun, prasadjoshi124

Yeah. That will make the work quite simple.
After testing, fdatasync is better than fsync.

-----Original Message-----
From: Sasha Levin [mailto:levinsasha928@gmail.com] 
Sent: Sunday, November 20, 2011 2:23 PM
To: Lan, Tianyu
Cc: Kevin Wolf; penberg@kernel.org; kvm@vger.kernel.org; asias.hejun@gmail.com; prasadjoshi124@gmail.com
Subject: RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters

On Sun, 2011-11-20 at 14:14 +0800, Lan, Tianyu wrote:
> OK. Thx.
> But fsync is too slow. I try to find a way to sync a range of file. 
> Are there any solutions to meet my purpose?

fdatasync() is as good as it'll get.

tbh, maybe we should just consider opening QCOW images with O_SYNC and
just get it over with?

> Thanks
> Tianyu Lan
> 
> -----Original Message-----
> From: Sasha Levin [mailto:levinsasha928@gmail.com] 
> Sent: Sunday, November 20, 2011 12:27 AM
> To: Lan, Tianyu
> Cc: Kevin Wolf; penberg@kernel.org; kvm@vger.kernel.org; asias.hejun@gmail.com; prasadjoshi124@gmail.com
> Subject: RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
> 
> On Sat, 2011-11-19 at 23:30 +0800, Lan, Tianyu wrote:
> > How about using the sync_file_range to sync the metadata?
> 
> sync_file_range() is only a hint, it doesn't actually assure anything.
> 

-- 

Sasha.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
  2011-11-20  6:23           ` Sasha Levin
  2011-11-20  7:03             ` Lan, Tianyu
@ 2011-11-20 10:59             ` Pekka Enberg
  2011-11-21  8:30               ` Kevin Wolf
  1 sibling, 1 reply; 10+ messages in thread
From: Pekka Enberg @ 2011-11-20 10:59 UTC (permalink / raw)
  To: Sasha Levin; +Cc: Lan, Tianyu, Kevin Wolf, kvm, asias.hejun, prasadjoshi124

On Sun, 2011-11-20 at 14:14 +0800, Lan, Tianyu wrote:
> > OK. Thx.
> > But fsync is too slow. I try to find a way to sync a range of file. 
> > Are there any solutions to meet my purpose?

On Sun, 2011-11-20 at 08:23 +0200, Sasha Levin wrote:
> fdatasync() is as good as it'll get.
> 
> tbh, maybe we should just consider opening QCOW images with O_SYNC and
> just get it over with?

No, lets not do that. It's easier to improve the performance of correct
code that doesn't use O_SYNC.

			Pekka


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters
  2011-11-20 10:59             ` Pekka Enberg
@ 2011-11-21  8:30               ` Kevin Wolf
  0 siblings, 0 replies; 10+ messages in thread
From: Kevin Wolf @ 2011-11-21  8:30 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: Sasha Levin, Lan, Tianyu, kvm, asias.hejun, prasadjoshi124

Am 20.11.2011 11:59, schrieb Pekka Enberg:
> On Sun, 2011-11-20 at 14:14 +0800, Lan, Tianyu wrote:
>>> OK. Thx.
>>> But fsync is too slow. I try to find a way to sync a range of file. 
>>> Are there any solutions to meet my purpose?
> 
> On Sun, 2011-11-20 at 08:23 +0200, Sasha Levin wrote:
>> fdatasync() is as good as it'll get.
>>
>> tbh, maybe we should just consider opening QCOW images with O_SYNC and
>> just get it over with?
> 
> No, lets not do that. It's easier to improve the performance of correct
> code that doesn't use O_SYNC.

Yes, O_SYNC gives you horrible performance. With explicit fsyncs cluster
allocation is somewhat slow (you can try to speed it up by batching
updates like qemu does), but at least rewrites will perform reasonably
close to raw.

Kevin

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-11-21  8:27 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-11-18  8:47 [RFC v2 PATCH] kvm tools, qcow: Add the support for copy-on-write clusters Lan Tianyu
2011-11-18 10:10 ` Kevin Wolf
2011-11-19  2:09   ` Lan, Tianyu
2011-11-19 15:30     ` Lan, Tianyu
2011-11-19 16:26       ` Sasha Levin
2011-11-20  6:14         ` Lan, Tianyu
2011-11-20  6:23           ` Sasha Levin
2011-11-20  7:03             ` Lan, Tianyu
2011-11-20 10:59             ` Pekka Enberg
2011-11-21  8:30               ` Kevin Wolf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.