All of lore.kernel.org
 help / color / mirror / Atom feed
* switch loop and target to use ITER_BVEC iov_iter
@ 2015-01-18 15:07 Christoph Hellwig
  2015-01-18 15:07 ` [PATCH 1/3] fs: add vfs_bvec_{read,write} helpers Christoph Hellwig
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-01-18 15:07 UTC (permalink / raw)
  To: Al Viro
  Cc: Jens Axboe, Nicholas Bellinger, Ming Lei, linux-fsdevel, target-devel

This series adds two new helpers to easily read from and write to bio_bvecs,
and switches the loop driver and target file backend to use it.

Using bio_vecs directly avoids the need to kmap individual elements in
the callers, which is epecially important in the target driver, and also
gets rid of the horrible splice code abuse hack in the loop driver.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/3] fs: add vfs_bvec_{read,write} helpers
  2015-01-18 15:07 switch loop and target to use ITER_BVEC iov_iter Christoph Hellwig
@ 2015-01-18 15:07 ` Christoph Hellwig
  2015-01-23  6:00   ` Al Viro
  2015-01-18 15:07 ` [PATCH 2/3] loop: convert to vfs_bvec_write Christoph Hellwig
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2015-01-18 15:07 UTC (permalink / raw)
  To: Al Viro
  Cc: Jens Axboe, Nicholas Bellinger, Ming Lei, linux-fsdevel, target-devel

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/read_write.c    | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/splice.c        | 23 +++------------------
 include/linux/fs.h |  5 +++++
 3 files changed, 68 insertions(+), 20 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index c0805c9..299e262 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -333,6 +333,66 @@ out_putf:
 }
 #endif
 
+ssize_t vfs_bvec_read(struct file *file, struct bio_vec *vec,
+		unsigned long nr_segs, size_t count, loff_t *ppos)
+{
+	struct iov_iter iter;
+	struct kiocb kiocb;
+	ssize_t ret;
+
+	if (!file->f_op->read_iter)
+		return -EBADFD;
+
+	iter.type = ITER_BVEC | READ;
+	iter.bvec = vec;
+	iter.nr_segs = nr_segs;
+	iter.count = count;
+	iter.iov_offset = 0;
+
+	init_sync_kiocb(&kiocb, file);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = count;
+
+	ret = file->f_op->read_iter(&kiocb, &iter);
+	if (ret == -EIOCBQUEUED)
+		ret = wait_on_sync_kiocb(&kiocb);
+
+	if (ret > 0)
+		*ppos = kiocb.ki_pos;
+	return ret;
+}
+EXPORT_SYMBOL(vfs_bvec_read);
+
+ssize_t vfs_bvec_write(struct file *file, struct bio_vec *vec,
+		unsigned long nr_segs, size_t count, loff_t *ppos)
+{
+	struct iov_iter iter;
+	struct kiocb kiocb;
+	ssize_t ret;
+
+	if (!file->f_op->write_iter)
+		return -EBADFD;
+
+	iter.type = ITER_BVEC | WRITE;
+	iter.bvec = vec;
+	iter.nr_segs = nr_segs;
+	iter.count = count;
+	iter.iov_offset = 0;
+
+	init_sync_kiocb(&kiocb, file);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = count;
+
+	ret = file->f_op->write_iter(&kiocb, &iter);
+	if (ret == -EIOCBQUEUED)
+		ret = wait_on_sync_kiocb(&kiocb);
+
+	if (ret > 0)
+		*ppos = kiocb.ki_pos;
+	return ret;
+}
+EXPORT_SYMBOL(vfs_bvec_write);
+
 /*
  * rw_verify_area doesn't like huge counts. We limit
  * them to something that fits in "int" so that others
diff --git a/fs/splice.c b/fs/splice.c
index 75c6058..370e6c3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -960,8 +960,6 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 	splice_from_pipe_begin(&sd);
 	while (sd.total_len) {
-		struct iov_iter from;
-		struct kiocb kiocb;
 		size_t left;
 		int n, idx;
 
@@ -1005,29 +1003,14 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 			left -= this_len;
 		}
 
-		/* ... iov_iter */
-		from.type = ITER_BVEC | WRITE;
-		from.bvec = array;
-		from.nr_segs = n;
-		from.count = sd.total_len - left;
-		from.iov_offset = 0;
-
-		/* ... and iocb */
-		init_sync_kiocb(&kiocb, out);
-		kiocb.ki_pos = sd.pos;
-		kiocb.ki_nbytes = sd.total_len - left;
-
-		/* now, send it */
-		ret = out->f_op->write_iter(&kiocb, &from);
-		if (-EIOCBQUEUED == ret)
-			ret = wait_on_sync_kiocb(&kiocb);
-
+		ret = vfs_bvec_write(out, array, n, sd.total_len - left,
+					&sd.pos);
 		if (ret <= 0)
 			break;
 
 		sd.num_spliced += ret;
 		sd.total_len -= ret;
-		*ppos = sd.pos = kiocb.ki_pos;
+		*ppos = sd.pos;
 
 		/* dismiss the fully eaten buffers, adjust the partial one */
 		while (ret) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 42efe13..6423bdc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2494,6 +2494,11 @@ extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t l
 extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 
+ssize_t vfs_bvec_read(struct file *file, struct bio_vec *vec,
+		unsigned long nr_segs, size_t count, loff_t *ppos);
+ssize_t vfs_bvec_write(struct file *file, struct bio_vec *vec,
+		unsigned long nr_segs, size_t count, loff_t *ppos);
+
 /* fs/block_dev.c */
 extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/3] loop: convert to vfs_bvec_write
  2015-01-18 15:07 switch loop and target to use ITER_BVEC iov_iter Christoph Hellwig
  2015-01-18 15:07 ` [PATCH 1/3] fs: add vfs_bvec_{read,write} helpers Christoph Hellwig
@ 2015-01-18 15:07 ` Christoph Hellwig
  2015-01-18 15:07 ` [PATCH 3/3] target: use vfs_bvec_read/write Christoph Hellwig
  2015-01-22  4:11 ` switch loop and target to use ITER_BVEC iov_iter Ming Lei
  3 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-01-18 15:07 UTC (permalink / raw)
  To: Al Viro
  Cc: Jens Axboe, Nicholas Bellinger, Ming Lei, linux-fsdevel, target-devel

---
 drivers/block/loop.c | 281 ++++++++++++++++++++-------------------------------
 1 file changed, 109 insertions(+), 172 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 6cb1beb..3e70116 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -85,28 +85,6 @@ static DEFINE_MUTEX(loop_index_mutex);
 static int max_part;
 static int part_shift;
 
-/*
- * Transfer functions
- */
-static int transfer_none(struct loop_device *lo, int cmd,
-			 struct page *raw_page, unsigned raw_off,
-			 struct page *loop_page, unsigned loop_off,
-			 int size, sector_t real_block)
-{
-	char *raw_buf = kmap_atomic(raw_page) + raw_off;
-	char *loop_buf = kmap_atomic(loop_page) + loop_off;
-
-	if (cmd == READ)
-		memcpy(loop_buf, raw_buf, size);
-	else
-		memcpy(raw_buf, loop_buf, size);
-
-	kunmap_atomic(loop_buf);
-	kunmap_atomic(raw_buf);
-	cond_resched();
-	return 0;
-}
-
 static int transfer_xor(struct loop_device *lo, int cmd,
 			struct page *raw_page, unsigned raw_off,
 			struct page *loop_page, unsigned loop_off,
@@ -145,7 +123,6 @@ static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
 
 static struct loop_func_table none_funcs = {
 	.number = LO_CRYPT_NONE,
-	.transfer = transfer_none,
 }; 	
 
 static struct loop_func_table xor_funcs = {
@@ -212,203 +189,156 @@ lo_do_transfer(struct loop_device *lo, int cmd,
 	       struct page *lpage, unsigned loffs,
 	       int size, sector_t rblock)
 {
-	if (unlikely(!lo->transfer))
+	int ret;
+
+	ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
+	if (likely(!ret))
 		return 0;
 
-	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
+	printk_ratelimited(KERN_ERR
+		"loop: Transfer error at byte offset %llu, length %i.\n",
+		(unsigned long long)rblock << 9, size);
+	return ret;
 }
 
-/**
- * __do_lo_send_write - helper for writing data to a loop device
- *
- * This helper just factors out common code between do_lo_send_direct_write()
- * and do_lo_send_write().
- */
-static int __do_lo_send_write(struct file *file,
-		u8 *buf, const int len, loff_t pos)
+static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
 {
 	ssize_t bw;
-	mm_segment_t old_fs = get_fs();
 
 	file_start_write(file);
-	set_fs(get_ds());
-	bw = file->f_op->write(file, buf, len, &pos);
-	set_fs(old_fs);
+	bw = vfs_bvec_write(file, bvec, 1, bvec->bv_len, ppos);
 	file_end_write(file);
-	if (likely(bw == len))
+
+	if (likely(bw ==  bvec->bv_len))
 		return 0;
-	printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
-			(unsigned long long)pos, len);
+
+	printk_ratelimited(KERN_ERR
+		"loop: Write error at byte offset %llu, length %i.\n",
+		(unsigned long long)*ppos, bvec->bv_len);
 	if (bw >= 0)
 		bw = -EIO;
 	return bw;
 }
 
-/**
- * do_lo_send_direct_write - helper for writing data to a loop device
- *
- * This is the fast, non-transforming version that does not need double
- * buffering.
- */
-static int do_lo_send_direct_write(struct loop_device *lo,
-		struct bio_vec *bvec, loff_t pos, struct page *page)
+static int lo_write_simple(struct loop_device *lo, struct bio *bio, loff_t pos)
 {
-	ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
-			kmap(bvec->bv_page) + bvec->bv_offset,
-			bvec->bv_len, pos);
-	kunmap(bvec->bv_page);
-	cond_resched();
-	return bw;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
+	int ret = 0;
+
+	bio_for_each_segment(bvec, bio, iter) {
+		ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos);
+		if (ret < 0)
+			break;
+		cond_resched();
+	}
+
+	return ret;
 }
 
-/**
- * do_lo_send_write - helper for writing data to a loop device
- *
+/*
  * This is the slow, transforming version that needs to double buffer the
  * data as it cannot do the transformations in place without having direct
  * access to the destination pages of the backing file.
  */
-static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
-		loff_t pos, struct page *page)
+static int lo_write_transfer(struct loop_device *lo, struct bio *bio,
+		loff_t pos)
 {
-	int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
-			bvec->bv_offset, bvec->bv_len, pos >> 9);
-	if (likely(!ret))
-		return __do_lo_send_write(lo->lo_backing_file,
-				page_address(page), bvec->bv_len,
-				pos);
-	printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, "
-			"length %i.\n", (unsigned long long)pos, bvec->bv_len);
-	if (ret > 0)
-		ret = -EIO;
-	return ret;
-}
-
-static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
-{
-	int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
-			struct page *page);
-	struct bio_vec bvec;
+	struct bio_vec bvec, b;
 	struct bvec_iter iter;
-	struct page *page = NULL;
+	struct page *page;
 	int ret = 0;
 
-	if (lo->transfer != transfer_none) {
-		page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
-		if (unlikely(!page))
-			goto fail;
-		kmap(page);
-		do_lo_send = do_lo_send_write;
-	} else {
-		do_lo_send = do_lo_send_direct_write;
-	}
+	page = alloc_page(GFP_NOIO);
+	if (unlikely(!page))
+		return -ENOMEM;
 
 	bio_for_each_segment(bvec, bio, iter) {
-		ret = do_lo_send(lo, &bvec, pos, page);
+		ret = lo_do_transfer(lo, WRITE, page, 0, bvec.bv_page,
+			bvec.bv_offset, bvec.bv_len, pos >> 9);
+		if (unlikely(ret))
+			break;
+
+		b.bv_page = page;
+		b.bv_offset = 0;
+		b.bv_len = bvec.bv_len;
+		ret = lo_write_bvec(lo->lo_backing_file, &b, &pos);
 		if (ret < 0)
 			break;
-		pos += bvec.bv_len;
 	}
-	if (page) {
-		kunmap(page);
-		__free_page(page);
-	}
-out:
+
+	__free_page(page);
 	return ret;
-fail:
-	printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
-	ret = -ENOMEM;
-	goto out;
 }
 
-struct lo_read_data {
-	struct loop_device *lo;
-	struct page *page;
-	unsigned offset;
-	int bsize;
-};
-
-static int
-lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
-		struct splice_desc *sd)
+static int lo_read_simple(struct loop_device *lo, struct bio *bio, loff_t pos)
 {
-	struct lo_read_data *p = sd->u.data;
-	struct loop_device *lo = p->lo;
-	struct page *page = buf->page;
-	sector_t IV;
-	int size;
-
-	IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
-							(buf->offset >> 9);
-	size = sd->len;
-	if (size > p->bsize)
-		size = p->bsize;
-
-	if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
-		printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n",
-		       page->index);
-		size = -EINVAL;
-	}
+	struct bio_vec bvec;
+	struct bvec_iter iter;
+	ssize_t len;
 
-	flush_dcache_page(p->page);
+	bio_for_each_segment(bvec, bio, iter) {
+		len = vfs_bvec_read(lo->lo_backing_file, &bvec, 1, bvec.bv_len,
+				    &pos);
+		if (len < 0)
+			return len;
 
-	if (size > 0)
-		p->offset += size;
+		flush_dcache_page(bvec.bv_page);
 
-	return size;
-}
+		if (len != bvec.bv_len) {
+			zero_fill_bio(bio);
+			break;
+		}
+		cond_resched();
+	}
 
-static int
-lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
-{
-	return __splice_from_pipe(pipe, sd, lo_splice_actor);
+	return 0;
 }
 
-static ssize_t
-do_lo_receive(struct loop_device *lo,
-	      struct bio_vec *bvec, int bsize, loff_t pos)
+static int lo_read_transfer(struct loop_device *lo, struct bio *bio,
+		loff_t pos)
 {
-	struct lo_read_data cookie;
-	struct splice_desc sd;
-	struct file *file;
-	ssize_t retval;
-
-	cookie.lo = lo;
-	cookie.page = bvec->bv_page;
-	cookie.offset = bvec->bv_offset;
-	cookie.bsize = bsize;
+	struct bio_vec bvec, b;
+	struct bvec_iter iter;
+	struct page *page;
+	ssize_t len;
+	int ret = 0;
 
-	sd.len = 0;
-	sd.total_len = bvec->bv_len;
-	sd.flags = 0;
-	sd.pos = pos;
-	sd.u.data = &cookie;
+	page = alloc_page(GFP_NOIO);
+	if (unlikely(!page))
+		return -ENOMEM;
 
-	file = lo->lo_backing_file;
-	retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
+	bio_for_each_segment(bvec, bio, iter) {
+		loff_t offset = pos;
 
-	return retval;
-}
+		b.bv_page = page;
+		b.bv_offset = 0;
+		b.bv_len = bvec.bv_len;
 
-static int
-lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
-{
-	struct bio_vec bvec;
-	struct bvec_iter iter;
-	ssize_t s;
+		len = vfs_bvec_read(lo->lo_backing_file, &b, 1, b.bv_len,
+				    &pos);
+		if (len < 0) {
+			ret = len;
+			goto out_free_page;
+		}
 
-	bio_for_each_segment(bvec, bio, iter) {
-		s = do_lo_receive(lo, &bvec, bsize, pos);
-		if (s < 0)
-			return s;
+		ret = lo_do_transfer(lo, READ, page, 0, bvec.bv_page,
+			bvec.bv_offset, len, offset >> 9);
+		if (ret)
+			goto out_free_page;
+	
+		flush_dcache_page(bvec.bv_page);
 
-		if (s != bvec.bv_len) {
+		if (len != bvec.bv_len) {
 			zero_fill_bio(bio);
 			break;
 		}
-		pos += bvec.bv_len;
 	}
-	return 0;
+
+	ret = 0;
+out_free_page:
+	__free_page(page);
+	return ret;
 }
 
 static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
@@ -452,15 +382,22 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 			goto out;
 		}
 
-		ret = lo_send(lo, bio, pos);
+		if (lo->transfer)
+			ret = lo_write_transfer(lo, bio, pos);
+		else
+			ret = lo_write_simple(lo, bio, pos);
 
 		if ((bio->bi_rw & REQ_FUA) && !ret) {
 			ret = vfs_fsync(file, 0);
 			if (unlikely(ret && ret != -EINVAL))
 				ret = -EIO;
 		}
-	} else
-		ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
+	} else {
+		if (lo->transfer)
+			ret = lo_read_transfer(lo, bio, pos);
+		else
+			ret = lo_read_simple(lo, bio, pos);
+	}
 
 out:
 	return ret;
@@ -886,7 +823,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	lo->lo_device = bdev;
 	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
-	lo->transfer = transfer_none;
+	lo->transfer = NULL;
 	lo->ioctl = NULL;
 	lo->lo_sizelimit = 0;
 	lo->lo_bio_count = 0;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/3] target: use vfs_bvec_read/write
  2015-01-18 15:07 switch loop and target to use ITER_BVEC iov_iter Christoph Hellwig
  2015-01-18 15:07 ` [PATCH 1/3] fs: add vfs_bvec_{read,write} helpers Christoph Hellwig
  2015-01-18 15:07 ` [PATCH 2/3] loop: convert to vfs_bvec_write Christoph Hellwig
@ 2015-01-18 15:07 ` Christoph Hellwig
  2015-01-18 15:37   ` Sagi Grimberg
                     ` (2 more replies)
  2015-01-22  4:11 ` switch loop and target to use ITER_BVEC iov_iter Ming Lei
  3 siblings, 3 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-01-18 15:07 UTC (permalink / raw)
  To: Al Viro
  Cc: Jens Axboe, Nicholas Bellinger, Ming Lei, linux-fsdevel, target-devel

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/target/target_core_file.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index d836de2..63ca7d5 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -331,36 +331,31 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl,
 	struct fd_dev *dev = FD_DEV(se_dev);
 	struct file *fd = dev->fd_file;
 	struct scatterlist *sg;
-	struct iovec *iov;
-	mm_segment_t old_fs;
+	struct bio_vec *bvec;
+	ssize_t len = 0;
 	loff_t pos = (cmd->t_task_lba * se_dev->dev_attrib.block_size);
 	int ret = 0, i;
 
-	iov = kzalloc(sizeof(struct iovec) * sgl_nents, GFP_KERNEL);
-	if (!iov) {
+	bvec = kzalloc(sizeof(struct bio_vec) * sgl_nents, GFP_KERNEL);
+	if (!bvec) {
 		pr_err("Unable to allocate fd_do_readv iov[]\n");
 		return -ENOMEM;
 	}
 
 	for_each_sg(sgl, sg, sgl_nents, i) {
-		iov[i].iov_len = sg->length;
-		iov[i].iov_base = kmap(sg_page(sg)) + sg->offset;
-	}
+		bvec[i].bv_page = sg_page(sg);
+		bvec[i].bv_len = sg->length;
+		bvec[i].bv_offset = sg->offset;
 
-	old_fs = get_fs();
-	set_fs(get_ds());
+		len += sg->length;
+	}
 
 	if (is_write)
-		ret = vfs_writev(fd, &iov[0], sgl_nents, &pos);
+		ret = vfs_bvec_write(fd, bvec, sgl_nents, len, &pos);
 	else
-		ret = vfs_readv(fd, &iov[0], sgl_nents, &pos);
-
-	set_fs(old_fs);
-
-	for_each_sg(sgl, sg, sgl_nents, i)
-		kunmap(sg_page(sg));
+		ret = vfs_bvec_read(fd, bvec, sgl_nents, len, &pos);
 
-	kfree(iov);
+	kfree(bvec);
 
 	if (is_write) {
 		if (ret < 0 || ret != cmd->data_length) {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] target: use vfs_bvec_read/write
  2015-01-18 15:07 ` [PATCH 3/3] target: use vfs_bvec_read/write Christoph Hellwig
@ 2015-01-18 15:37   ` Sagi Grimberg
  2015-01-20 23:32   ` Nicholas A. Bellinger
  2015-01-23 14:08   ` Ming Lei
  2 siblings, 0 replies; 14+ messages in thread
From: Sagi Grimberg @ 2015-01-18 15:37 UTC (permalink / raw)
  To: Christoph Hellwig, Al Viro
  Cc: Jens Axboe, Nicholas Bellinger, Ming Lei, linux-fsdevel, target-devel

On 1/18/2015 5:07 PM, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   drivers/target/target_core_file.c | 29 ++++++++++++-----------------
>   1 file changed, 12 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
> index d836de2..63ca7d5 100644
> --- a/drivers/target/target_core_file.c
> +++ b/drivers/target/target_core_file.c
> @@ -331,36 +331,31 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl,
>   	struct fd_dev *dev = FD_DEV(se_dev);
>   	struct file *fd = dev->fd_file;
>   	struct scatterlist *sg;
> -	struct iovec *iov;
> -	mm_segment_t old_fs;
> +	struct bio_vec *bvec;
> +	ssize_t len = 0;
>   	loff_t pos = (cmd->t_task_lba * se_dev->dev_attrib.block_size);
>   	int ret = 0, i;
>
> -	iov = kzalloc(sizeof(struct iovec) * sgl_nents, GFP_KERNEL);
> -	if (!iov) {
> +	bvec = kzalloc(sizeof(struct bio_vec) * sgl_nents, GFP_KERNEL);
> +	if (!bvec) {

Can you make that kcalloc so checkpatch would be happy?

Otherwise, Looks good to me.
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>

>   		pr_err("Unable to allocate fd_do_readv iov[]\n");
>   		return -ENOMEM;
>   	}
>
>   	for_each_sg(sgl, sg, sgl_nents, i) {
> -		iov[i].iov_len = sg->length;
> -		iov[i].iov_base = kmap(sg_page(sg)) + sg->offset;
> -	}
> +		bvec[i].bv_page = sg_page(sg);
> +		bvec[i].bv_len = sg->length;
> +		bvec[i].bv_offset = sg->offset;
>
> -	old_fs = get_fs();
> -	set_fs(get_ds());
> +		len += sg->length;
> +	}
>
>   	if (is_write)
> -		ret = vfs_writev(fd, &iov[0], sgl_nents, &pos);
> +		ret = vfs_bvec_write(fd, bvec, sgl_nents, len, &pos);
>   	else
> -		ret = vfs_readv(fd, &iov[0], sgl_nents, &pos);
> -
> -	set_fs(old_fs);
> -
> -	for_each_sg(sgl, sg, sgl_nents, i)
> -		kunmap(sg_page(sg));
> +		ret = vfs_bvec_read(fd, bvec, sgl_nents, len, &pos);
>
> -	kfree(iov);
> +	kfree(bvec);
>
>   	if (is_write) {
>   		if (ret < 0 || ret != cmd->data_length) {
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] target: use vfs_bvec_read/write
  2015-01-18 15:07 ` [PATCH 3/3] target: use vfs_bvec_read/write Christoph Hellwig
  2015-01-18 15:37   ` Sagi Grimberg
@ 2015-01-20 23:32   ` Nicholas A. Bellinger
  2015-01-23 14:08   ` Ming Lei
  2 siblings, 0 replies; 14+ messages in thread
From: Nicholas A. Bellinger @ 2015-01-20 23:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, Jens Axboe, Ming Lei, linux-fsdevel, target-devel

On Sun, 2015-01-18 at 16:07 +0100, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/target/target_core_file.c | 29 ++++++++++++-----------------
>  1 file changed, 12 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
> index d836de2..63ca7d5 100644
> --- a/drivers/target/target_core_file.c
> +++ b/drivers/target/target_core_file.c
> @@ -331,36 +331,31 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl,
>  	struct fd_dev *dev = FD_DEV(se_dev);
>  	struct file *fd = dev->fd_file;
>  	struct scatterlist *sg;
> -	struct iovec *iov;
> -	mm_segment_t old_fs;
> +	struct bio_vec *bvec;
> +	ssize_t len = 0;
>  	loff_t pos = (cmd->t_task_lba * se_dev->dev_attrib.block_size);
>  	int ret = 0, i;
>  
> -	iov = kzalloc(sizeof(struct iovec) * sgl_nents, GFP_KERNEL);
> -	if (!iov) {
> +	bvec = kzalloc(sizeof(struct bio_vec) * sgl_nents, GFP_KERNEL);
> +	if (!bvec) {
>  		pr_err("Unable to allocate fd_do_readv iov[]\n");
>  		return -ENOMEM;
>  	}
>  
>  	for_each_sg(sgl, sg, sgl_nents, i) {
> -		iov[i].iov_len = sg->length;
> -		iov[i].iov_base = kmap(sg_page(sg)) + sg->offset;
> -	}
> +		bvec[i].bv_page = sg_page(sg);
> +		bvec[i].bv_len = sg->length;
> +		bvec[i].bv_offset = sg->offset;
>  
> -	old_fs = get_fs();
> -	set_fs(get_ds());
> +		len += sg->length;
> +	}
>  
>  	if (is_write)
> -		ret = vfs_writev(fd, &iov[0], sgl_nents, &pos);
> +		ret = vfs_bvec_write(fd, bvec, sgl_nents, len, &pos);
>  	else
> -		ret = vfs_readv(fd, &iov[0], sgl_nents, &pos);
> -
> -	set_fs(old_fs);
> -
> -	for_each_sg(sgl, sg, sgl_nents, i)
> -		kunmap(sg_page(sg));
> +		ret = vfs_bvec_read(fd, bvec, sgl_nents, len, &pos);
>  
> -	kfree(iov);
> +	kfree(bvec);
>  
>  	if (is_write) {
>  		if (ret < 0 || ret != cmd->data_length) {

Looks fine.  How about doing the same for fd_execute_write_same()..?

--nab


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: switch loop and target to use ITER_BVEC iov_iter
  2015-01-18 15:07 switch loop and target to use ITER_BVEC iov_iter Christoph Hellwig
                   ` (2 preceding siblings ...)
  2015-01-18 15:07 ` [PATCH 3/3] target: use vfs_bvec_read/write Christoph Hellwig
@ 2015-01-22  4:11 ` Ming Lei
  2015-01-25 13:45   ` Christoph Hellwig
  3 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2015-01-22  4:11 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, Jens Axboe, Nicholas Bellinger, Linux FS Devel, target-devel

On Sun, Jan 18, 2015 at 11:07 PM, Christoph Hellwig <hch@lst.de> wrote:
> This series adds two new helpers to easily read from and write to bio_bvecs,
> and switches the loop driver and target file backend to use it.
>
> Using bio_vecs directly avoids the need to kmap individual elements in
> the callers, which is epecially important in the target driver, and also
> gets rid of the horrible splice code abuse hack in the loop driver.

IMO, from driver or kernel view, submit()/complete() model
is very very common, and is more efficient because unnecessary
context switch and process' creation can be avoided when concurrent
read/write is needed for sake of performance, so I think kernel
aio based API is better. My test result about kernel aio based loop
shows CPU can be saved much with kernel AIO in [2].

Currently the kernel AIO based approach for loop(v2 patches, [1] [2])
only handles direct I/O with ITER_BVEC, but it is quite easy to
cover non-direct I/O by BVEC with supporting current submission.

For encrypt_type loop, it may be covered by kernel aio interface too.

I will try to cover the above two loop cases by kernel based AIO in
v3, then the current ->write() and splice code may be removed once
it is done.

[1] http://marc.info/?l=linux-kernel&m=142116394225654&w=2
[2] http://marc.info/?l=linux-kernel&m=142116397525668&w=2

Thanks,
Ming Lei

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] fs: add vfs_bvec_{read,write} helpers
  2015-01-18 15:07 ` [PATCH 1/3] fs: add vfs_bvec_{read,write} helpers Christoph Hellwig
@ 2015-01-23  6:00   ` Al Viro
  0 siblings, 0 replies; 14+ messages in thread
From: Al Viro @ 2015-01-23  6:00 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Nicholas Bellinger, Ming Lei, linux-fsdevel, target-devel

On Sun, Jan 18, 2015 at 04:07:02PM +0100, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/read_write.c    | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/splice.c        | 23 +++------------------
>  include/linux/fs.h |  5 +++++
>  3 files changed, 68 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/read_write.c b/fs/read_write.c
> index c0805c9..299e262 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -333,6 +333,66 @@ out_putf:
>  }
>  #endif
>  
> +ssize_t vfs_bvec_read(struct file *file, struct bio_vec *vec,
> +		unsigned long nr_segs, size_t count, loff_t *ppos)
> +{
> +	struct iov_iter iter;
> +	struct kiocb kiocb;
> +	ssize_t ret;
> +
> +	if (!file->f_op->read_iter)
> +		return -EBADFD;

#define EBADFD          77      /* File descriptor in bad state */

Looks really odd.  For crying out loud, what's a STREAMS-related error
doing here?  What's more, it's a bloody awful one - spelling is too
similar to EBADF and it's really asking for typos that end up as
uncaught rarely-triggered bugs.

> +	iter.type = ITER_BVEC | READ;
> +	iter.bvec = vec;
> +	iter.nr_segs = nr_segs;
> +	iter.count = count;
> +	iter.iov_offset = 0;

iov_iter_bvec(), please (see vfs.git#for-next).

> +ssize_t vfs_bvec_write(struct file *file, struct bio_vec *vec,
> +		unsigned long nr_segs, size_t count, loff_t *ppos)

Same comments here.  I can pick that, but EBADFD is awful and I would
very much prefer to avoid it from the very beginning.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] target: use vfs_bvec_read/write
  2015-01-18 15:07 ` [PATCH 3/3] target: use vfs_bvec_read/write Christoph Hellwig
  2015-01-18 15:37   ` Sagi Grimberg
  2015-01-20 23:32   ` Nicholas A. Bellinger
@ 2015-01-23 14:08   ` Ming Lei
  2015-01-25 13:43     ` Christoph Hellwig
  2 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2015-01-23 14:08 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, Jens Axboe, Nicholas Bellinger, linux-fsdevel, target-devel

Hello,

On 1/18/15, Christoph Hellwig <hch@lst.de> wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/target/target_core_file.c | 29 ++++++++++++-----------------
>  1 file changed, 12 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/target/target_core_file.c
> b/drivers/target/target_core_file.c
> index d836de2..63ca7d5 100644
> --- a/drivers/target/target_core_file.c
> +++ b/drivers/target/target_core_file.c
> @@ -331,36 +331,31 @@ static int fd_do_rw(struct se_cmd *cmd, struct
> scatterlist *sgl,
>  	struct fd_dev *dev = FD_DEV(se_dev);
>  	struct file *fd = dev->fd_file;
>  	struct scatterlist *sg;
> -	struct iovec *iov;
> -	mm_segment_t old_fs;
> +	struct bio_vec *bvec;
> +	ssize_t len = 0;
>  	loff_t pos = (cmd->t_task_lba * se_dev->dev_attrib.block_size);
>  	int ret = 0, i;
>
> -	iov = kzalloc(sizeof(struct iovec) * sgl_nents, GFP_KERNEL);
> -	if (!iov) {
> +	bvec = kzalloc(sizeof(struct bio_vec) * sgl_nents, GFP_KERNEL);
> +	if (!bvec) {
>  		pr_err("Unable to allocate fd_do_readv iov[]\n");
>  		return -ENOMEM;
>  	}
>
>  	for_each_sg(sgl, sg, sgl_nents, i) {
> -		iov[i].iov_len = sg->length;
> -		iov[i].iov_base = kmap(sg_page(sg)) + sg->offset;
> -	}
> +		bvec[i].bv_page = sg_page(sg);
> +		bvec[i].bv_len = sg->length;
> +		bvec[i].bv_offset = sg->offset;

Sorry, I have one question: I understand one bvec should only cover
one page, but
one sg may cover lots of pages, so could ITER_BVEC handle that correctly?

Thanks,
Ming Lei

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] target: use vfs_bvec_read/write
  2015-01-23 14:08   ` Ming Lei
@ 2015-01-25 13:43     ` Christoph Hellwig
  2015-01-26  2:02       ` Ming Lei
  0 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2015-01-25 13:43 UTC (permalink / raw)
  To: Ming Lei
  Cc: Christoph Hellwig, Al Viro, Jens Axboe, Nicholas Bellinger,
	linux-fsdevel, target-devel

On Fri, Jan 23, 2015 at 10:08:01PM +0800, Ming Lei wrote:
> >
> >  	for_each_sg(sgl, sg, sgl_nents, i) {
> > -		iov[i].iov_len = sg->length;
> > -		iov[i].iov_base = kmap(sg_page(sg)) + sg->offset;
> > -	}
> > +		bvec[i].bv_page = sg_page(sg);
> > +		bvec[i].bv_len = sg->length;
> > +		bvec[i].bv_offset = sg->offset;
> 
> Sorry, I have one question: I understand one bvec should only cover
> one page, but
> one sg may cover lots of pages, so could ITER_BVEC handle that correctly?

Each scatterlist entry only contains a single page, which is returned
by sg_page(sg).  The existing code already relies on it because it
kmaps that page.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: switch loop and target to use ITER_BVEC iov_iter
  2015-01-22  4:11 ` switch loop and target to use ITER_BVEC iov_iter Ming Lei
@ 2015-01-25 13:45   ` Christoph Hellwig
  0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-01-25 13:45 UTC (permalink / raw)
  To: Ming Lei
  Cc: Al Viro, Jens Axboe, Nicholas Bellinger, Linux FS Devel, target-devel

On Thu, Jan 22, 2015 at 12:11:18PM +0800, Ming Lei wrote:
> On Sun, Jan 18, 2015 at 11:07 PM, Christoph Hellwig <hch@lst.de> wrote:
> > This series adds two new helpers to easily read from and write to bio_bvecs,
> > and switches the loop driver and target file backend to use it.
> >
> > Using bio_vecs directly avoids the need to kmap individual elements in
> > the callers, which is epecially important in the target driver, and also
> > gets rid of the horrible splice code abuse hack in the loop driver.
> 
> IMO, from driver or kernel view, submit()/complete() model
> is very very common, and is more efficient because unnecessary
> context switch and process' creation can be avoided when concurrent
> read/write is needed for sake of performance, so I think kernel
> aio based API is better. My test result about kernel aio based loop
> shows CPU can be saved much with kernel AIO in [2].
> 
> Currently the kernel AIO based approach for loop(v2 patches, [1] [2])
> only handles direct I/O with ITER_BVEC, but it is quite easy to
> cover non-direct I/O by BVEC with supporting current submission.
> 
> For encrypt_type loop, it may be covered by kernel aio interface too.
> 
> I will try to cover the above two loop cases by kernel based AIO in
> v3, then the current ->write() and splice code may be removed once
> it is done.

We'll still need to support the buffered I/O case.  Some people may
rely on it for their workload performance, and other cases like
a filesystem with a small sector size ontop of a loop device on a
filesystem or device with a larger sector size won't even work at all
using direct I/O.

So we'll need something like my series to get started, and if aio
proves to win over usin a kernel direct I/O read/write (which I suspect it
will, but we'll need numbers) we can use aio for the direct I/O code
path.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] target: use vfs_bvec_read/write
  2015-01-25 13:43     ` Christoph Hellwig
@ 2015-01-26  2:02       ` Ming Lei
  2015-01-26 16:59         ` Christoph Hellwig
  0 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2015-01-26  2:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, Jens Axboe, Nicholas Bellinger, Linux FS Devel, target-devel

On Sun, Jan 25, 2015 at 9:43 PM, Christoph Hellwig <hch@lst.de> wrote:
> On Fri, Jan 23, 2015 at 10:08:01PM +0800, Ming Lei wrote:
>> >
>> >     for_each_sg(sgl, sg, sgl_nents, i) {
>> > -           iov[i].iov_len = sg->length;
>> > -           iov[i].iov_base = kmap(sg_page(sg)) + sg->offset;
>> > -   }
>> > +           bvec[i].bv_page = sg_page(sg);
>> > +           bvec[i].bv_len = sg->length;
>> > +           bvec[i].bv_offset = sg->offset;
>>
>> Sorry, I have one question: I understand one bvec should only cover
>> one page, but
>> one sg may cover lots of pages, so could ITER_BVEC handle that correctly?
>
> Each scatterlist entry only contains a single page, which is returned

I mean scatterlist does not guarantee that, and one sg entry often
contains lots of pages, which DMA/bus address is continuous.

> by sg_page(sg).  The existing code already relies on it because it
> kmaps that page.

If the existing target code path can guarantee that one sg entry
only contains one page, it should be better to use bio_vec explicitly
instead of scatterlist.

Thanks,
Ming Lei

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] target: use vfs_bvec_read/write
  2015-01-26  2:02       ` Ming Lei
@ 2015-01-26 16:59         ` Christoph Hellwig
  2015-01-27  5:14           ` Ming Lei
  0 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2015-01-26 16:59 UTC (permalink / raw)
  To: Ming Lei
  Cc: Christoph Hellwig, Al Viro, Jens Axboe, Nicholas Bellinger,
	Linux FS Devel, target-devel

On Mon, Jan 26, 2015 at 10:02:30AM +0800, Ming Lei wrote:
> >> Sorry, I have one question: I understand one bvec should only cover
> >> one page, but
> >> one sg may cover lots of pages, so could ITER_BVEC handle that correctly?
> >
> > Each scatterlist entry only contains a single page, which is returned
> 
> I mean scatterlist does not guarantee that, and one sg entry often
> contains lots of pages, which DMA/bus address is continuous.

But we still get away with using page_address(), so from the VM
point of view that matters here it's the same as a single high order
page.

> > by sg_page(sg).  The existing code already relies on it because it
> > kmaps that page.
> 
> If the existing target code path can guarantee that one sg entry
> only contains one page, it should be better to use bio_vec explicitly
> instead of scatterlist.

While it allocates a single page per SGL entry (see target_alloc_sgl),
it uses the scatterlist to allow drivers to dma map it for hardware
access, so a scatterlist seems useful here.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] target: use vfs_bvec_read/write
  2015-01-26 16:59         ` Christoph Hellwig
@ 2015-01-27  5:14           ` Ming Lei
  0 siblings, 0 replies; 14+ messages in thread
From: Ming Lei @ 2015-01-27  5:14 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, Jens Axboe, Nicholas Bellinger, Linux FS Devel, target-devel

On 1/27/15, Christoph Hellwig <hch@lst.de> wrote:
> On Mon, Jan 26, 2015 at 10:02:30AM +0800, Ming Lei wrote:
>> >> Sorry, I have one question: I understand one bvec should only cover
>> >> one page, but
>> >> one sg may cover lots of pages, so could ITER_BVEC handle that
>> >> correctly?
>> >
>> > Each scatterlist entry only contains a single page, which is returned
>>
>> I mean scatterlist does not guarantee that, and one sg entry often
>> contains lots of pages, which DMA/bus address is continuous.
>
> But we still get away with using page_address(), so from the VM
> point of view that matters here it's the same as a single high order
> page.

Yes, but simply passing the page, offset and length from sg to bvec
is misleading, so it may be better to add comment about the
fact of single page  sg entry.


Thanks
Ming Lei

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-01-27  5:14 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-18 15:07 switch loop and target to use ITER_BVEC iov_iter Christoph Hellwig
2015-01-18 15:07 ` [PATCH 1/3] fs: add vfs_bvec_{read,write} helpers Christoph Hellwig
2015-01-23  6:00   ` Al Viro
2015-01-18 15:07 ` [PATCH 2/3] loop: convert to vfs_bvec_write Christoph Hellwig
2015-01-18 15:07 ` [PATCH 3/3] target: use vfs_bvec_read/write Christoph Hellwig
2015-01-18 15:37   ` Sagi Grimberg
2015-01-20 23:32   ` Nicholas A. Bellinger
2015-01-23 14:08   ` Ming Lei
2015-01-25 13:43     ` Christoph Hellwig
2015-01-26  2:02       ` Ming Lei
2015-01-26 16:59         ` Christoph Hellwig
2015-01-27  5:14           ` Ming Lei
2015-01-22  4:11 ` switch loop and target to use ITER_BVEC iov_iter Ming Lei
2015-01-25 13:45   ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.