All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] f2fs: introduce a new direct_IO write path
@ 2013-12-17  1:49 ` Jaegeuk Kim
  0 siblings, 0 replies; 9+ messages in thread
From: Jaegeuk Kim @ 2013-12-17  1:49 UTC (permalink / raw)
  Cc: Jaegeuk Kim, linux-fsdevel, linux-kernel, linux-f2fs-devel

Previously, f2fs doesn't support direct IOs with high performance which throws
every write requests to the buffered write path, resulting in highly performance
degradation due to memory opeations like copy_from_user.

This patch introduces a new direct IO path in which every write requests are
processed by generic blockdev_direct_IO() with enhanced get_block function.

The get_data_block() in f2fs handles:
1. if original data blocks are allocates, then give them to blockdev.
2. otherwise,
  a. preallocate requested block addresses
  b. do not use extent cache for better performance
  c. give the block addresses to blockdev

This policy induces that:
- new allocated data are sequentially written to the disk
- updated data are randomly written to the disk.
- f2fs gives consistency on its file meta, not file data.

Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/data.c    | 145 +++++++++++++++++++++++++++++++++++++++---------------
 fs/f2fs/f2fs.h    |   2 +
 fs/f2fs/segment.c |  23 ++++++---
 3 files changed, 123 insertions(+), 47 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 15956fa..9bdacc6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -570,74 +570,146 @@ repeat:
 	return page;
 }
 
+static int __allocate_data_block(struct dnode_of_data *dn)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+	struct f2fs_summary sum;
+	block_t new_blkaddr;
+	struct node_info ni;
+	int type;
+
+	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+		return -EPERM;
+	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
+		return -ENOSPC;
+
+	__set_data_blkaddr(dn, NEW_ADDR);
+	dn->data_blkaddr = NEW_ADDR;
+
+	get_node_info(sbi, dn->nid, &ni);
+	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+
+	type = CURSEG_WARM_DATA;
+
+	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
+
+	/* direct IO doesn't use extent cache to maximize the performance */
+	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
+	update_extent_cache(new_blkaddr, dn);
+	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
+
+	dn->data_blkaddr = new_blkaddr;
+	return 0;
+}
+
 /*
  * This function should be used by the data read flow only where it
  * does not check the "create" flag that indicates block allocation.
  * The reason for this special functionality is to exploit VFS readahead
  * mechanism.
  */
-static int get_data_block_ro(struct inode *inode, sector_t iblock,
+static int get_data_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 	unsigned int blkbits = inode->i_sb->s_blocksize_bits;
 	unsigned maxblocks = bh_result->b_size >> blkbits;
 	struct dnode_of_data dn;
-	pgoff_t pgofs;
-	int err;
+	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
+	pgoff_t pgofs, end_offset;
+	bool allocate = false;
+	int err = 0, ofs = 1;
 
 	/* Get the page offset from the block offset(iblock) */
 	pgofs =	(pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
 
-	if (check_extent_cache(inode, pgofs, bh_result)) {
-		trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
-		return 0;
-	}
+	if (check_extent_cache(inode, pgofs, bh_result))
+		goto out;
+
+	if (create)
+		f2fs_lock_op(sbi);
 
 	/* When reading holes, we need its node page */
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
-	err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
+	err = get_dnode_of_data(&dn, pgofs, mode);
 	if (err) {
-		trace_f2fs_get_data_block(inode, iblock, bh_result, err);
-		return (err == -ENOENT) ? 0 : err;
+		if (err == -ENOENT)
+			err = 0;
+		goto put_out;
 	}
+	if (dn.data_blkaddr == NEW_ADDR)
+		goto put_out;
 
-	/* It does not support data allocation */
-	f2fs_bug_on(create);
-
-	if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
-		int i;
-		unsigned int end_offset;
-
+	if (dn.data_blkaddr != NULL_ADDR) {
+		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+	} else if (create) {
+		allocate = true;
+		err = __allocate_data_block(&dn);
+		if (err)
+			goto put_out;
+		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+	} else {
+		goto put_out;
+	}
+
+	end_offset = IS_INODE(dn.node_page) ?
+			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+	bh_result->b_size = (((size_t)1) << blkbits);
+	dn.ofs_in_node++;
+	pgofs++;
+
+get_next:
+	if (dn.ofs_in_node >= end_offset) {
+		f2fs_put_dnode(&dn);
+		set_new_dnode(&dn, inode, NULL, NULL, 0);
+		err = get_dnode_of_data(&dn, pgofs, mode);
+		if (err) {
+			if (err == -ENOENT)
+				err = 0;
+			goto put_out;
+		}
 		end_offset = IS_INODE(dn.node_page) ?
-				ADDRS_PER_INODE(F2FS_I(inode)) :
-				ADDRS_PER_BLOCK;
-
-		clear_buffer_new(bh_result);
+			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+	}
 
+	if (maxblocks > (bh_result->b_size >> blkbits)) {
+		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+		if (blkaddr == NULL_ADDR && allocate) {
+			err = __allocate_data_block(&dn);
+			if (err)
+				goto put_out;
+			blkaddr = dn.data_blkaddr;
+		}
 		/* Give more consecutive addresses for the read ahead */
-		for (i = 0; i < end_offset - dn.ofs_in_node; i++)
-			if (((datablock_addr(dn.node_page,
-							dn.ofs_in_node + i))
-				!= (dn.data_blkaddr + i)) || maxblocks == i)
-				break;
-		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
-		bh_result->b_size = (((size_t)i) << blkbits);
+		if (blkaddr == (bh_result->b_blocknr + ofs)) {
+			ofs++;
+			dn.ofs_in_node++;
+			pgofs++;
+			bh_result->b_size += (((size_t)1) << blkbits);
+			goto get_next;
+		}
 	}
+put_out:
+	if (allocate && buffer_mapped(bh_result))
+		sync_inode_page(&dn);
 	f2fs_put_dnode(&dn);
-	trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
-	return 0;
+	if (create)
+		f2fs_unlock_op(sbi);
+out:
+	trace_f2fs_get_data_block(inode, iblock, bh_result, err);
+	return err;
 }
 
 static int f2fs_read_data_page(struct file *file, struct page *page)
 {
-	return mpage_readpage(page, get_data_block_ro);
+	return mpage_readpage(page, get_data_block);
 }
 
 static int f2fs_read_data_pages(struct file *file,
 			struct address_space *mapping,
 			struct list_head *pages, unsigned nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro);
+	return mpage_readpages(mapping, pages, nr_pages, get_data_block);
 }
 
 int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
@@ -883,13 +955,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-
-	if (rw == WRITE)
-		return 0;
-
-	/* Needs synchronization with the cleaner */
 	return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-						  get_data_block_ro);
+							get_data_block);
 }
 
 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
@@ -928,7 +995,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
 
 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
 {
-	return generic_block_bmap(mapping, block, get_data_block_ro);
+	return generic_block_bmap(mapping, block, get_data_block);
 }
 
 const struct address_space_operations f2fs_dblock_aops = {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1b05a62..8cbc5a6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1115,6 +1115,8 @@ void recover_data_page(struct f2fs_sb_info *, struct page *,
 				struct f2fs_summary *, block_t, block_t);
 void rewrite_node_page(struct f2fs_sb_info *, struct page *,
 				struct f2fs_summary *, block_t, block_t);
+void allocate_data_block(struct f2fs_sb_info *, struct page *,
+		block_t, block_t *, struct f2fs_summary *, int);
 void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
 void write_data_summaries(struct f2fs_sb_info *, block_t);
 void write_node_summaries(struct f2fs_sb_info *, block_t);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5b890ce..9f8bdd0 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -854,16 +854,14 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
 	return __get_segment_type_6(page, p_type);
 }
 
-static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
-			block_t old_blkaddr, block_t *new_blkaddr,
-			struct f2fs_summary *sum, struct f2fs_io_info *fio)
+void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+		block_t old_blkaddr, block_t *new_blkaddr,
+		struct f2fs_summary *sum, int type)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	struct curseg_info *curseg;
 	unsigned int old_cursegno;
-	int type;
 
-	type = __get_segment_type(page, fio->type);
 	curseg = CURSEG_I(sbi, type);
 
 	mutex_lock(&curseg->curseg_mutex);
@@ -896,13 +894,22 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
 	mutex_unlock(&sit_i->sentry_lock);
 
-	if (fio->type == NODE)
+	if (page && IS_NODESEG(type))
 		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
 
+	mutex_unlock(&curseg->curseg_mutex);
+}
+
+static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
+			block_t old_blkaddr, block_t *new_blkaddr,
+			struct f2fs_summary *sum, struct f2fs_io_info *fio)
+{
+	int type = __get_segment_type(page, fio->type);
+
+	allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
+
 	/* writeout dirty page into bdev */
 	f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
-
-	mutex_unlock(&curseg->curseg_mutex);
 }
 
 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
-- 
1.8.4.474.g128a96c


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH] f2fs: introduce a new direct_IO write path
@ 2013-12-17  1:49 ` Jaegeuk Kim
  0 siblings, 0 replies; 9+ messages in thread
From: Jaegeuk Kim @ 2013-12-17  1:49 UTC (permalink / raw)
  Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

Previously, f2fs doesn't support direct IOs with high performance which throws
every write requests to the buffered write path, resulting in highly performance
degradation due to memory opeations like copy_from_user.

This patch introduces a new direct IO path in which every write requests are
processed by generic blockdev_direct_IO() with enhanced get_block function.

The get_data_block() in f2fs handles:
1. if original data blocks are allocates, then give them to blockdev.
2. otherwise,
  a. preallocate requested block addresses
  b. do not use extent cache for better performance
  c. give the block addresses to blockdev

This policy induces that:
- new allocated data are sequentially written to the disk
- updated data are randomly written to the disk.
- f2fs gives consistency on its file meta, not file data.

Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/data.c    | 145 +++++++++++++++++++++++++++++++++++++++---------------
 fs/f2fs/f2fs.h    |   2 +
 fs/f2fs/segment.c |  23 ++++++---
 3 files changed, 123 insertions(+), 47 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 15956fa..9bdacc6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -570,74 +570,146 @@ repeat:
 	return page;
 }
 
+static int __allocate_data_block(struct dnode_of_data *dn)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+	struct f2fs_summary sum;
+	block_t new_blkaddr;
+	struct node_info ni;
+	int type;
+
+	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+		return -EPERM;
+	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
+		return -ENOSPC;
+
+	__set_data_blkaddr(dn, NEW_ADDR);
+	dn->data_blkaddr = NEW_ADDR;
+
+	get_node_info(sbi, dn->nid, &ni);
+	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+
+	type = CURSEG_WARM_DATA;
+
+	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
+
+	/* direct IO doesn't use extent cache to maximize the performance */
+	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
+	update_extent_cache(new_blkaddr, dn);
+	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
+
+	dn->data_blkaddr = new_blkaddr;
+	return 0;
+}
+
 /*
  * This function should be used by the data read flow only where it
  * does not check the "create" flag that indicates block allocation.
  * The reason for this special functionality is to exploit VFS readahead
  * mechanism.
  */
-static int get_data_block_ro(struct inode *inode, sector_t iblock,
+static int get_data_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 	unsigned int blkbits = inode->i_sb->s_blocksize_bits;
 	unsigned maxblocks = bh_result->b_size >> blkbits;
 	struct dnode_of_data dn;
-	pgoff_t pgofs;
-	int err;
+	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
+	pgoff_t pgofs, end_offset;
+	bool allocate = false;
+	int err = 0, ofs = 1;
 
 	/* Get the page offset from the block offset(iblock) */
 	pgofs =	(pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
 
-	if (check_extent_cache(inode, pgofs, bh_result)) {
-		trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
-		return 0;
-	}
+	if (check_extent_cache(inode, pgofs, bh_result))
+		goto out;
+
+	if (create)
+		f2fs_lock_op(sbi);
 
 	/* When reading holes, we need its node page */
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
-	err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
+	err = get_dnode_of_data(&dn, pgofs, mode);
 	if (err) {
-		trace_f2fs_get_data_block(inode, iblock, bh_result, err);
-		return (err == -ENOENT) ? 0 : err;
+		if (err == -ENOENT)
+			err = 0;
+		goto put_out;
 	}
+	if (dn.data_blkaddr == NEW_ADDR)
+		goto put_out;
 
-	/* It does not support data allocation */
-	f2fs_bug_on(create);
-
-	if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
-		int i;
-		unsigned int end_offset;
-
+	if (dn.data_blkaddr != NULL_ADDR) {
+		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+	} else if (create) {
+		allocate = true;
+		err = __allocate_data_block(&dn);
+		if (err)
+			goto put_out;
+		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+	} else {
+		goto put_out;
+	}
+
+	end_offset = IS_INODE(dn.node_page) ?
+			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+	bh_result->b_size = (((size_t)1) << blkbits);
+	dn.ofs_in_node++;
+	pgofs++;
+
+get_next:
+	if (dn.ofs_in_node >= end_offset) {
+		f2fs_put_dnode(&dn);
+		set_new_dnode(&dn, inode, NULL, NULL, 0);
+		err = get_dnode_of_data(&dn, pgofs, mode);
+		if (err) {
+			if (err == -ENOENT)
+				err = 0;
+			goto put_out;
+		}
 		end_offset = IS_INODE(dn.node_page) ?
-				ADDRS_PER_INODE(F2FS_I(inode)) :
-				ADDRS_PER_BLOCK;
-
-		clear_buffer_new(bh_result);
+			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+	}
 
+	if (maxblocks > (bh_result->b_size >> blkbits)) {
+		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+		if (blkaddr == NULL_ADDR && allocate) {
+			err = __allocate_data_block(&dn);
+			if (err)
+				goto put_out;
+			blkaddr = dn.data_blkaddr;
+		}
 		/* Give more consecutive addresses for the read ahead */
-		for (i = 0; i < end_offset - dn.ofs_in_node; i++)
-			if (((datablock_addr(dn.node_page,
-							dn.ofs_in_node + i))
-				!= (dn.data_blkaddr + i)) || maxblocks == i)
-				break;
-		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
-		bh_result->b_size = (((size_t)i) << blkbits);
+		if (blkaddr == (bh_result->b_blocknr + ofs)) {
+			ofs++;
+			dn.ofs_in_node++;
+			pgofs++;
+			bh_result->b_size += (((size_t)1) << blkbits);
+			goto get_next;
+		}
 	}
+put_out:
+	if (allocate && buffer_mapped(bh_result))
+		sync_inode_page(&dn);
 	f2fs_put_dnode(&dn);
-	trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
-	return 0;
+	if (create)
+		f2fs_unlock_op(sbi);
+out:
+	trace_f2fs_get_data_block(inode, iblock, bh_result, err);
+	return err;
 }
 
 static int f2fs_read_data_page(struct file *file, struct page *page)
 {
-	return mpage_readpage(page, get_data_block_ro);
+	return mpage_readpage(page, get_data_block);
 }
 
 static int f2fs_read_data_pages(struct file *file,
 			struct address_space *mapping,
 			struct list_head *pages, unsigned nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro);
+	return mpage_readpages(mapping, pages, nr_pages, get_data_block);
 }
 
 int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
@@ -883,13 +955,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-
-	if (rw == WRITE)
-		return 0;
-
-	/* Needs synchronization with the cleaner */
 	return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-						  get_data_block_ro);
+							get_data_block);
 }
 
 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
@@ -928,7 +995,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
 
 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
 {
-	return generic_block_bmap(mapping, block, get_data_block_ro);
+	return generic_block_bmap(mapping, block, get_data_block);
 }
 
 const struct address_space_operations f2fs_dblock_aops = {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1b05a62..8cbc5a6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1115,6 +1115,8 @@ void recover_data_page(struct f2fs_sb_info *, struct page *,
 				struct f2fs_summary *, block_t, block_t);
 void rewrite_node_page(struct f2fs_sb_info *, struct page *,
 				struct f2fs_summary *, block_t, block_t);
+void allocate_data_block(struct f2fs_sb_info *, struct page *,
+		block_t, block_t *, struct f2fs_summary *, int);
 void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
 void write_data_summaries(struct f2fs_sb_info *, block_t);
 void write_node_summaries(struct f2fs_sb_info *, block_t);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5b890ce..9f8bdd0 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -854,16 +854,14 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
 	return __get_segment_type_6(page, p_type);
 }
 
-static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
-			block_t old_blkaddr, block_t *new_blkaddr,
-			struct f2fs_summary *sum, struct f2fs_io_info *fio)
+void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+		block_t old_blkaddr, block_t *new_blkaddr,
+		struct f2fs_summary *sum, int type)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	struct curseg_info *curseg;
 	unsigned int old_cursegno;
-	int type;
 
-	type = __get_segment_type(page, fio->type);
 	curseg = CURSEG_I(sbi, type);
 
 	mutex_lock(&curseg->curseg_mutex);
@@ -896,13 +894,22 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
 	mutex_unlock(&sit_i->sentry_lock);
 
-	if (fio->type == NODE)
+	if (page && IS_NODESEG(type))
 		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
 
+	mutex_unlock(&curseg->curseg_mutex);
+}
+
+static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
+			block_t old_blkaddr, block_t *new_blkaddr,
+			struct f2fs_summary *sum, struct f2fs_io_info *fio)
+{
+	int type = __get_segment_type(page, fio->type);
+
+	allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
+
 	/* writeout dirty page into bdev */
 	f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
-
-	mutex_unlock(&curseg->curseg_mutex);
 }
 
 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
-- 
1.8.4.474.g128a96c


------------------------------------------------------------------------------
Rapidly troubleshoot problems before they affect your business. Most IT 
organizations don't have a clear picture of how application performance 
affects their revenue. With AppDynamics, you get 100% visibility into your 
Java,.NET, & PHP application. Start your 15-day FREE TRIAL of AppDynamics Pro!
http://pubads.g.doubleclick.net/gampad/clk?id=84349831&iu=/4140/ostg.clktrk

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] f2fs: introduce a new direct_IO write path
  2013-12-17  1:49 ` Jaegeuk Kim
  (?)
@ 2013-12-18  0:12 ` Jaegeuk Kim
  2013-12-19  1:12     ` Chao Yu
  2013-12-20  2:14     ` Chao Yu
  -1 siblings, 2 replies; 9+ messages in thread
From: Jaegeuk Kim @ 2013-12-18  0:12 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, linux-f2fs-devel

Change log from v1:
 o fix NOSPC error handling

>From b8511a74fe98b67247a9feeed58441e8f5ffd705 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Date: Mon, 16 Dec 2013 19:04:05 +0900
Subject: [PATCH] f2fs: introduce a new direct_IO write path
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-f2fs-devel@lists.sourceforge.net

Previously, f2fs doesn't support direct IOs with high performance, which
throws
every write requests via the buffered write path, resulting in highly
performance degradation due to memory opeations like copy_from_user.

This patch introduces a new direct IO path in which every write requests
are
processed by generic blockdev_direct_IO() with enhanced get_block
function.

The get_data_block() in f2fs handles:
1. if original data blocks are allocates, then give them to blockdev.
2. otherwise,
  a. preallocate requested block addresses
  b. do not use extent cache for better performance
  c. give the block addresses to blockdev

This policy induces that:
- new allocated data are sequentially written to the disk
- updated data are randomly written to the disk.
- f2fs gives consistency on its file meta, not file data.

Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/data.c    | 152
++++++++++++++++++++++++++++++++++++++++--------------
 fs/f2fs/f2fs.h    |   2 +
 fs/f2fs/segment.c |  23 ++++++---
 3 files changed, 129 insertions(+), 48 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 15956fa..a0950bc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -570,74 +570,151 @@ repeat:
 	return page;
 }
 
+static int __allocate_data_block(struct dnode_of_data *dn)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+	struct f2fs_summary sum;
+	block_t new_blkaddr;
+	struct node_info ni;
+	int type;
+
+	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+		return -EPERM;
+	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
+		return -ENOSPC;
+
+	__set_data_blkaddr(dn, NEW_ADDR);
+	dn->data_blkaddr = NEW_ADDR;
+
+	get_node_info(sbi, dn->nid, &ni);
+	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+
+	type = CURSEG_WARM_DATA;
+
+	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
+
+	/* direct IO doesn't use extent cache to maximize the performance */
+	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
+	update_extent_cache(new_blkaddr, dn);
+	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
+
+	dn->data_blkaddr = new_blkaddr;
+	return 0;
+}
+
 /*
  * This function should be used by the data read flow only where it
  * does not check the "create" flag that indicates block allocation.
  * The reason for this special functionality is to exploit VFS
readahead
  * mechanism.
  */
-static int get_data_block_ro(struct inode *inode, sector_t iblock,
+static int get_data_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 	unsigned int blkbits = inode->i_sb->s_blocksize_bits;
 	unsigned maxblocks = bh_result->b_size >> blkbits;
 	struct dnode_of_data dn;
-	pgoff_t pgofs;
-	int err;
+	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
+	pgoff_t pgofs, end_offset;
+	int err = 0, ofs = 1;
+	bool allocated = false;
 
 	/* Get the page offset from the block offset(iblock) */
 	pgofs =	(pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
 
-	if (check_extent_cache(inode, pgofs, bh_result)) {
-		trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
-		return 0;
-	}
+	if (check_extent_cache(inode, pgofs, bh_result))
+		goto out;
+
+	if (create)
+		f2fs_lock_op(sbi);
 
 	/* When reading holes, we need its node page */
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
-	err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
-	if (err) {
-		trace_f2fs_get_data_block(inode, iblock, bh_result, err);
-		return (err == -ENOENT) ? 0 : err;
+	err = get_dnode_of_data(&dn, pgofs, mode);
+	if (err || dn.data_blkaddr == NEW_ADDR) {
+		if (err == -ENOENT)
+			err = 0;
+		goto unlock_out;
 	}
 
-	/* It does not support data allocation */
-	f2fs_bug_on(create);
-
-	if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
-		int i;
-		unsigned int end_offset;
-
+	if (dn.data_blkaddr != NULL_ADDR) {
+		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+	} else if (create) {
+		err = __allocate_data_block(&dn);
+		if (err)
+			goto put_out;
+		allocated = true;
+		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+	} else {
+		goto put_out;
+	}
+
+	end_offset = IS_INODE(dn.node_page) ?
+			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+	bh_result->b_size = (((size_t)1) << blkbits);
+	dn.ofs_in_node++;
+	pgofs++;
+
+get_next:
+	if (dn.ofs_in_node >= end_offset) {
+		if (allocated)
+			sync_inode_page(&dn);
+		allocated = false;
+		f2fs_put_dnode(&dn);
+
+		set_new_dnode(&dn, inode, NULL, NULL, 0);
+		err = get_dnode_of_data(&dn, pgofs, mode);
+		if (err || dn.data_blkaddr == NEW_ADDR) {
+			if (err == -ENOENT)
+				err = 0;
+			goto unlock_out;
+		}
 		end_offset = IS_INODE(dn.node_page) ?
-				ADDRS_PER_INODE(F2FS_I(inode)) :
-				ADDRS_PER_BLOCK;
-
-		clear_buffer_new(bh_result);
+			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+	}
 
+	if (maxblocks > (bh_result->b_size >> blkbits)) {
+		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+		if (blkaddr == NULL_ADDR && create) {
+			err = __allocate_data_block(&dn);
+			if (err)
+				goto sync_out;
+			allocated = true;
+			blkaddr = dn.data_blkaddr;
+		}
 		/* Give more consecutive addresses for the read ahead */
-		for (i = 0; i < end_offset - dn.ofs_in_node; i++)
-			if (((datablock_addr(dn.node_page,
-							dn.ofs_in_node + i))
-				!= (dn.data_blkaddr + i)) || maxblocks == i)
-				break;
-		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
-		bh_result->b_size = (((size_t)i) << blkbits);
+		if (blkaddr == (bh_result->b_blocknr + ofs)) {
+			ofs++;
+			dn.ofs_in_node++;
+			pgofs++;
+			bh_result->b_size += (((size_t)1) << blkbits);
+			goto get_next;
+		}
 	}
+sync_out:
+	if (allocated)
+		sync_inode_page(&dn);
+put_out:
 	f2fs_put_dnode(&dn);
-	trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
-	return 0;
+unlock_out:
+	if (create)
+		f2fs_unlock_op(sbi);
+out:
+	trace_f2fs_get_data_block(inode, iblock, bh_result, err);
+	return err;
 }
 
 static int f2fs_read_data_page(struct file *file, struct page *page)
 {
-	return mpage_readpage(page, get_data_block_ro);
+	return mpage_readpage(page, get_data_block);
 }
 
 static int f2fs_read_data_pages(struct file *file,
 			struct address_space *mapping,
 			struct list_head *pages, unsigned nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro);
+	return mpage_readpages(mapping, pages, nr_pages, get_data_block);
 }
 
 int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
@@ -883,13 +960,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb
*iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-
-	if (rw == WRITE)
-		return 0;
-
-	/* Needs synchronization with the cleaner */
 	return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-						  get_data_block_ro);
+							get_data_block);
 }
 
 static void f2fs_invalidate_data_page(struct page *page, unsigned int
offset,
@@ -928,7 +1000,7 @@ static int f2fs_set_data_page_dirty(struct page
*page)
 
 static sector_t f2fs_bmap(struct address_space *mapping, sector_t
block)
 {
-	return generic_block_bmap(mapping, block, get_data_block_ro);
+	return generic_block_bmap(mapping, block, get_data_block);
 }
 
 const struct address_space_operations f2fs_dblock_aops = {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1b05a62..8cbc5a6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1115,6 +1115,8 @@ void recover_data_page(struct f2fs_sb_info *,
struct page *,
 				struct f2fs_summary *, block_t, block_t);
 void rewrite_node_page(struct f2fs_sb_info *, struct page *,
 				struct f2fs_summary *, block_t, block_t);
+void allocate_data_block(struct f2fs_sb_info *, struct page *,
+		block_t, block_t *, struct f2fs_summary *, int);
 void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
 void write_data_summaries(struct f2fs_sb_info *, block_t);
 void write_node_summaries(struct f2fs_sb_info *, block_t);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5b890ce..9f8bdd0 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -854,16 +854,14 @@ static int __get_segment_type(struct page *page,
enum page_type p_type)
 	return __get_segment_type_6(page, p_type);
 }
 
-static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
-			block_t old_blkaddr, block_t *new_blkaddr,
-			struct f2fs_summary *sum, struct f2fs_io_info *fio)
+void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+		block_t old_blkaddr, block_t *new_blkaddr,
+		struct f2fs_summary *sum, int type)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	struct curseg_info *curseg;
 	unsigned int old_cursegno;
-	int type;
 
-	type = __get_segment_type(page, fio->type);
 	curseg = CURSEG_I(sbi, type);
 
 	mutex_lock(&curseg->curseg_mutex);
@@ -896,13 +894,22 @@ static void do_write_page(struct f2fs_sb_info
*sbi, struct page *page,
 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
 	mutex_unlock(&sit_i->sentry_lock);
 
-	if (fio->type == NODE)
+	if (page && IS_NODESEG(type))
 		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
 
+	mutex_unlock(&curseg->curseg_mutex);
+}
+
+static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
+			block_t old_blkaddr, block_t *new_blkaddr,
+			struct f2fs_summary *sum, struct f2fs_io_info *fio)
+{
+	int type = __get_segment_type(page, fio->type);
+
+	allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
+
 	/* writeout dirty page into bdev */
 	f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
-
-	mutex_unlock(&curseg->curseg_mutex);
 }
 
 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
-- 
1.8.4.474.g128a96c



-- 
Jaegeuk Kim
Samsung


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* RE: [f2fs-dev] [PATCH] f2fs: introduce a new direct_IO write path
  2013-12-18  0:12 ` Jaegeuk Kim
@ 2013-12-19  1:12     ` Chao Yu
  2013-12-20  2:14     ` Chao Yu
  1 sibling, 0 replies; 9+ messages in thread
From: Chao Yu @ 2013-12-19  1:12 UTC (permalink / raw)
  To: jaegeuk.kim; +Cc: linux-kernel, linux-f2fs-devel, linux-fsdevel

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk.kim@samsung.com]
> Sent: Wednesday, December 18, 2013 8:12 AM
> To: linux-fsdevel@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org; linux-f2fs-devel@lists.sourceforge.net
> Subject: Re: [f2fs-dev] [PATCH] f2fs: introduce a new direct_IO write path
> 
> Change log from v1:
>  o fix NOSPC error handling
> 
> >From b8511a74fe98b67247a9feeed58441e8f5ffd705 Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
> Date: Mon, 16 Dec 2013 19:04:05 +0900
> Subject: [PATCH] f2fs: introduce a new direct_IO write path
> Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
> linux-f2fs-devel@lists.sourceforge.net
> 
> Previously, f2fs doesn't support direct IOs with high performance, which
> throws
> every write requests via the buffered write path, resulting in highly
> performance degradation due to memory opeations like copy_from_user.
> 
> This patch introduces a new direct IO path in which every write requests
> are
> processed by generic blockdev_direct_IO() with enhanced get_block
> function.
> 
> The get_data_block() in f2fs handles:
> 1. if original data blocks are allocates, then give them to blockdev.
> 2. otherwise,
>   a. preallocate requested block addresses
>   b. do not use extent cache for better performance
>   c. give the block addresses to blockdev
> 
> This policy induces that:
> - new allocated data are sequentially written to the disk
> - updated data are randomly written to the disk.
> - f2fs gives consistency on its file meta, not file data.

Looks Nice!

> 
> Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>

Reviewed-by: Chao Yu <chao2.yu@samsung.com>

> +
> +	end_offset = IS_INODE(dn.node_page) ?
> +			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
> +	bh_result->b_size = (((size_t)1) << blkbits);
> +	dn.ofs_in_node++;
> +	pgofs++;
> +
> +get_next:
> +	if (dn.ofs_in_node >= end_offset) {
> +		if (allocated)
> +			sync_inode_page(&dn);
> +		allocated = false;

		if (allocated) {
			sync_inode_page(&dn);
			allocated = false;
		}
Certainly, it's really not a big deal. :)

Thanks


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] f2fs: introduce a new direct_IO write path
@ 2013-12-19  1:12     ` Chao Yu
  0 siblings, 0 replies; 9+ messages in thread
From: Chao Yu @ 2013-12-19  1:12 UTC (permalink / raw)
  To: jaegeuk.kim; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk.kim@samsung.com]
> Sent: Wednesday, December 18, 2013 8:12 AM
> To: linux-fsdevel@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org; linux-f2fs-devel@lists.sourceforge.net
> Subject: Re: [f2fs-dev] [PATCH] f2fs: introduce a new direct_IO write path
> 
> Change log from v1:
>  o fix NOSPC error handling
> 
> >From b8511a74fe98b67247a9feeed58441e8f5ffd705 Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
> Date: Mon, 16 Dec 2013 19:04:05 +0900
> Subject: [PATCH] f2fs: introduce a new direct_IO write path
> Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
> linux-f2fs-devel@lists.sourceforge.net
> 
> Previously, f2fs doesn't support direct IOs with high performance, which
> throws
> every write requests via the buffered write path, resulting in highly
> performance degradation due to memory opeations like copy_from_user.
> 
> This patch introduces a new direct IO path in which every write requests
> are
> processed by generic blockdev_direct_IO() with enhanced get_block
> function.
> 
> The get_data_block() in f2fs handles:
> 1. if original data blocks are allocates, then give them to blockdev.
> 2. otherwise,
>   a. preallocate requested block addresses
>   b. do not use extent cache for better performance
>   c. give the block addresses to blockdev
> 
> This policy induces that:
> - new allocated data are sequentially written to the disk
> - updated data are randomly written to the disk.
> - f2fs gives consistency on its file meta, not file data.

Looks Nice!

> 
> Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>

Reviewed-by: Chao Yu <chao2.yu@samsung.com>

> +
> +	end_offset = IS_INODE(dn.node_page) ?
> +			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
> +	bh_result->b_size = (((size_t)1) << blkbits);
> +	dn.ofs_in_node++;
> +	pgofs++;
> +
> +get_next:
> +	if (dn.ofs_in_node >= end_offset) {
> +		if (allocated)
> +			sync_inode_page(&dn);
> +		allocated = false;

		if (allocated) {
			sync_inode_page(&dn);
			allocated = false;
		}
Certainly, it's really not a big deal. :)

Thanks


------------------------------------------------------------------------------
Rapidly troubleshoot problems before they affect your business. Most IT 
organizations don't have a clear picture of how application performance 
affects their revenue. With AppDynamics, you get 100% visibility into your 
Java,.NET, & PHP application. Start your 15-day FREE TRIAL of AppDynamics Pro!
http://pubads.g.doubleclick.net/gampad/clk?id=84349831&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [f2fs-dev] [PATCH] f2fs: introduce a new direct_IO write path
  2013-12-18  0:12 ` Jaegeuk Kim
@ 2013-12-20  2:14     ` Chao Yu
  2013-12-20  2:14     ` Chao Yu
  1 sibling, 0 replies; 9+ messages in thread
From: Chao Yu @ 2013-12-20  2:14 UTC (permalink / raw)
  To: jaegeuk.kim; +Cc: linux-kernel, linux-f2fs-devel, linux-fsdevel

Hi Kim,

One comment as following:

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk.kim@samsung.com]
> Sent: Wednesday, December 18, 2013 8:12 AM
> To: linux-fsdevel@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org; linux-f2fs-devel@lists.sourceforge.net
> Subject: Re: [f2fs-dev] [PATCH] f2fs: introduce a new direct_IO write path
> 
> Change log from v1:
>  o fix NOSPC error handling
> 
> >From b8511a74fe98b67247a9feeed58441e8f5ffd705 Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
> Date: Mon, 16 Dec 2013 19:04:05 +0900
> Subject: [PATCH] f2fs: introduce a new direct_IO write path
> Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
> linux-f2fs-devel@lists.sourceforge.net
> 
> Previously, f2fs doesn't support direct IOs with high performance, which
> throws
> every write requests via the buffered write path, resulting in highly
> performance degradation due to memory opeations like copy_from_user.
> 
> This patch introduces a new direct IO path in which every write requests
> are
> processed by generic blockdev_direct_IO() with enhanced get_block
> function.
> 
> The get_data_block() in f2fs handles:
> 1. if original data blocks are allocates, then give them to blockdev.
> 2. otherwise,
>   a. preallocate requested block addresses
>   b. do not use extent cache for better performance
>   c. give the block addresses to blockdev
> 
> This policy induces that:
> - new allocated data are sequentially written to the disk
> - updated data are randomly written to the disk.
> - f2fs gives consistency on its file meta, not file data.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
> ---
>  fs/f2fs/data.c    | 152
> ++++++++++++++++++++++++++++++++++++++++--------------
>  fs/f2fs/f2fs.h    |   2 +
>  fs/f2fs/segment.c |  23 ++++++---
>  3 files changed, 129 insertions(+), 48 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 15956fa..a0950bc 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -570,74 +570,151 @@ repeat:
>  	return page;
>  }
> 
> +static int __allocate_data_block(struct dnode_of_data *dn)
> +{
> +	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
> +	struct f2fs_summary sum;
> +	block_t new_blkaddr;
> +	struct node_info ni;
> +	int type;
> +
> +	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
> +		return -EPERM;
> +	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
> +		return -ENOSPC;
> +
> +	__set_data_blkaddr(dn, NEW_ADDR);
> +	dn->data_blkaddr = NEW_ADDR;
> +
> +	get_node_info(sbi, dn->nid, &ni);
> +	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
> +
> +	type = CURSEG_WARM_DATA;

If so, our cold data will be written to WARM_DATA segment.
How about check segment type here?

> +
> +	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
> +
> +	/* direct IO doesn't use extent cache to maximize the performance */
> +	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
> +	update_extent_cache(new_blkaddr, dn);
> +	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
> +
> +	dn->data_blkaddr = new_blkaddr;
> +	return 0;
> +}
> +

[snip]


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] f2fs: introduce a new direct_IO write path
@ 2013-12-20  2:14     ` Chao Yu
  0 siblings, 0 replies; 9+ messages in thread
From: Chao Yu @ 2013-12-20  2:14 UTC (permalink / raw)
  To: jaegeuk.kim; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

Hi Kim,

One comment as following:

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk.kim@samsung.com]
> Sent: Wednesday, December 18, 2013 8:12 AM
> To: linux-fsdevel@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org; linux-f2fs-devel@lists.sourceforge.net
> Subject: Re: [f2fs-dev] [PATCH] f2fs: introduce a new direct_IO write path
> 
> Change log from v1:
>  o fix NOSPC error handling
> 
> >From b8511a74fe98b67247a9feeed58441e8f5ffd705 Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
> Date: Mon, 16 Dec 2013 19:04:05 +0900
> Subject: [PATCH] f2fs: introduce a new direct_IO write path
> Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
> linux-f2fs-devel@lists.sourceforge.net
> 
> Previously, f2fs doesn't support direct IOs with high performance, which
> throws
> every write requests via the buffered write path, resulting in highly
> performance degradation due to memory opeations like copy_from_user.
> 
> This patch introduces a new direct IO path in which every write requests
> are
> processed by generic blockdev_direct_IO() with enhanced get_block
> function.
> 
> The get_data_block() in f2fs handles:
> 1. if original data blocks are allocates, then give them to blockdev.
> 2. otherwise,
>   a. preallocate requested block addresses
>   b. do not use extent cache for better performance
>   c. give the block addresses to blockdev
> 
> This policy induces that:
> - new allocated data are sequentially written to the disk
> - updated data are randomly written to the disk.
> - f2fs gives consistency on its file meta, not file data.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
> ---
>  fs/f2fs/data.c    | 152
> ++++++++++++++++++++++++++++++++++++++++--------------
>  fs/f2fs/f2fs.h    |   2 +
>  fs/f2fs/segment.c |  23 ++++++---
>  3 files changed, 129 insertions(+), 48 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 15956fa..a0950bc 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -570,74 +570,151 @@ repeat:
>  	return page;
>  }
> 
> +static int __allocate_data_block(struct dnode_of_data *dn)
> +{
> +	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
> +	struct f2fs_summary sum;
> +	block_t new_blkaddr;
> +	struct node_info ni;
> +	int type;
> +
> +	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
> +		return -EPERM;
> +	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
> +		return -ENOSPC;
> +
> +	__set_data_blkaddr(dn, NEW_ADDR);
> +	dn->data_blkaddr = NEW_ADDR;
> +
> +	get_node_info(sbi, dn->nid, &ni);
> +	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
> +
> +	type = CURSEG_WARM_DATA;

If so, our cold data will be written to WARM_DATA segment.
How about check segment type here?

> +
> +	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
> +
> +	/* direct IO doesn't use extent cache to maximize the performance */
> +	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
> +	update_extent_cache(new_blkaddr, dn);
> +	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
> +
> +	dn->data_blkaddr = new_blkaddr;
> +	return 0;
> +}
> +

[snip]


------------------------------------------------------------------------------
Rapidly troubleshoot problems before they affect your business. Most IT 
organizations don't have a clear picture of how application performance 
affects their revenue. With AppDynamics, you get 100% visibility into your 
Java,.NET, & PHP application. Start your 15-day FREE TRIAL of AppDynamics Pro!
http://pubads.g.doubleclick.net/gampad/clk?id=84349831&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [f2fs-dev] [PATCH] f2fs: introduce a new direct_IO write path
  2013-12-20  2:14     ` Chao Yu
@ 2013-12-20  9:21       ` Jaegeuk Kim
  -1 siblings, 0 replies; 9+ messages in thread
From: Jaegeuk Kim @ 2013-12-20  9:21 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel, linux-fsdevel

Hi,

2013-12-20 (금), 10:14 +0800, Chao Yu:
> Hi Kim,
> 
> One comment as following:
> 
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:jaegeuk.kim@samsung.com]
> > Sent: Wednesday, December 18, 2013 8:12 AM
> > To: linux-fsdevel@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org; linux-f2fs-devel@lists.sourceforge.net
> > Subject: Re: [f2fs-dev] [PATCH] f2fs: introduce a new direct_IO write path
> > 
> > Change log from v1:
> >  o fix NOSPC error handling
> > 
> > >From b8511a74fe98b67247a9feeed58441e8f5ffd705 Mon Sep 17 00:00:00 2001
> > From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
> > Date: Mon, 16 Dec 2013 19:04:05 +0900
> > Subject: [PATCH] f2fs: introduce a new direct_IO write path
> > Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
> > linux-f2fs-devel@lists.sourceforge.net
> > 
> > Previously, f2fs doesn't support direct IOs with high performance, which
> > throws
> > every write requests via the buffered write path, resulting in highly
> > performance degradation due to memory opeations like copy_from_user.
> > 
> > This patch introduces a new direct IO path in which every write requests
> > are
> > processed by generic blockdev_direct_IO() with enhanced get_block
> > function.
> > 
> > The get_data_block() in f2fs handles:
> > 1. if original data blocks are allocates, then give them to blockdev.
> > 2. otherwise,
> >   a. preallocate requested block addresses
> >   b. do not use extent cache for better performance
> >   c. give the block addresses to blockdev
> > 
> > This policy induces that:
> > - new allocated data are sequentially written to the disk
> > - updated data are randomly written to the disk.
> > - f2fs gives consistency on its file meta, not file data.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
> > ---
> >  fs/f2fs/data.c    | 152
> > ++++++++++++++++++++++++++++++++++++++++--------------
> >  fs/f2fs/f2fs.h    |   2 +
> >  fs/f2fs/segment.c |  23 ++++++---
> >  3 files changed, 129 insertions(+), 48 deletions(-)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 15956fa..a0950bc 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -570,74 +570,151 @@ repeat:
> >  	return page;
> >  }
> > 
> > +static int __allocate_data_block(struct dnode_of_data *dn)
> > +{
> > +	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
> > +	struct f2fs_summary sum;
> > +	block_t new_blkaddr;
> > +	struct node_info ni;
> > +	int type;
> > +
> > +	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
> > +		return -EPERM;
> > +	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
> > +		return -ENOSPC;
> > +
> > +	__set_data_blkaddr(dn, NEW_ADDR);
> > +	dn->data_blkaddr = NEW_ADDR;
> > +
> > +	get_node_info(sbi, dn->nid, &ni);
> > +	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
> > +
> > +	type = CURSEG_WARM_DATA;
> 
> If so, our cold data will be written to WARM_DATA segment.
> How about check segment type here?

Actually, I'm not sure this kind of data requested through direct IOs
are cold or hot data.
But I just intended to gather such the direct IO'ed data into one type
of log, not separately.
So, I selected WARM_DATA by the fact that such the data will be
frequently updated through direct IO either.
Thanks,

> 
> > +
> > +	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
> > +
> > +	/* direct IO doesn't use extent cache to maximize the performance */
> > +	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
> > +	update_extent_cache(new_blkaddr, dn);
> > +	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
> > +
> > +	dn->data_blkaddr = new_blkaddr;
> > +	return 0;
> > +}
> > +
> 
> [snip]
> 

-- 
Jaegeuk Kim
Samsung


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] f2fs: introduce a new direct_IO write path
@ 2013-12-20  9:21       ` Jaegeuk Kim
  0 siblings, 0 replies; 9+ messages in thread
From: Jaegeuk Kim @ 2013-12-20  9:21 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

Hi,

2013-12-20 (금), 10:14 +0800, Chao Yu:
> Hi Kim,
> 
> One comment as following:
> 
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:jaegeuk.kim@samsung.com]
> > Sent: Wednesday, December 18, 2013 8:12 AM
> > To: linux-fsdevel@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org; linux-f2fs-devel@lists.sourceforge.net
> > Subject: Re: [f2fs-dev] [PATCH] f2fs: introduce a new direct_IO write path
> > 
> > Change log from v1:
> >  o fix NOSPC error handling
> > 
> > >From b8511a74fe98b67247a9feeed58441e8f5ffd705 Mon Sep 17 00:00:00 2001
> > From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
> > Date: Mon, 16 Dec 2013 19:04:05 +0900
> > Subject: [PATCH] f2fs: introduce a new direct_IO write path
> > Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
> > linux-f2fs-devel@lists.sourceforge.net
> > 
> > Previously, f2fs doesn't support direct IOs with high performance, which
> > throws
> > every write requests via the buffered write path, resulting in highly
> > performance degradation due to memory opeations like copy_from_user.
> > 
> > This patch introduces a new direct IO path in which every write requests
> > are
> > processed by generic blockdev_direct_IO() with enhanced get_block
> > function.
> > 
> > The get_data_block() in f2fs handles:
> > 1. if original data blocks are allocates, then give them to blockdev.
> > 2. otherwise,
> >   a. preallocate requested block addresses
> >   b. do not use extent cache for better performance
> >   c. give the block addresses to blockdev
> > 
> > This policy induces that:
> > - new allocated data are sequentially written to the disk
> > - updated data are randomly written to the disk.
> > - f2fs gives consistency on its file meta, not file data.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
> > ---
> >  fs/f2fs/data.c    | 152
> > ++++++++++++++++++++++++++++++++++++++++--------------
> >  fs/f2fs/f2fs.h    |   2 +
> >  fs/f2fs/segment.c |  23 ++++++---
> >  3 files changed, 129 insertions(+), 48 deletions(-)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 15956fa..a0950bc 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -570,74 +570,151 @@ repeat:
> >  	return page;
> >  }
> > 
> > +static int __allocate_data_block(struct dnode_of_data *dn)
> > +{
> > +	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
> > +	struct f2fs_summary sum;
> > +	block_t new_blkaddr;
> > +	struct node_info ni;
> > +	int type;
> > +
> > +	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
> > +		return -EPERM;
> > +	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
> > +		return -ENOSPC;
> > +
> > +	__set_data_blkaddr(dn, NEW_ADDR);
> > +	dn->data_blkaddr = NEW_ADDR;
> > +
> > +	get_node_info(sbi, dn->nid, &ni);
> > +	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
> > +
> > +	type = CURSEG_WARM_DATA;
> 
> If so, our cold data will be written to WARM_DATA segment.
> How about check segment type here?

Actually, I'm not sure this kind of data requested through direct IOs
are cold or hot data.
But I just intended to gather such the direct IO'ed data into one type
of log, not separately.
So, I selected WARM_DATA by the fact that such the data will be
frequently updated through direct IO either.
Thanks,

> 
> > +
> > +	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
> > +
> > +	/* direct IO doesn't use extent cache to maximize the performance */
> > +	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
> > +	update_extent_cache(new_blkaddr, dn);
> > +	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
> > +
> > +	dn->data_blkaddr = new_blkaddr;
> > +	return 0;
> > +}
> > +
> 
> [snip]
> 

-- 
Jaegeuk Kim
Samsung



------------------------------------------------------------------------------
Rapidly troubleshoot problems before they affect your business. Most IT 
organizations don't have a clear picture of how application performance 
affects their revenue. With AppDynamics, you get 100% visibility into your 
Java,.NET, & PHP application. Start your 15-day FREE TRIAL of AppDynamics Pro!
http://pubads.g.doubleclick.net/gampad/clk?id=84349831&iu=/4140/ostg.clktrk
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-12-20  9:23 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-12-17  1:49 [PATCH] f2fs: introduce a new direct_IO write path Jaegeuk Kim
2013-12-17  1:49 ` Jaegeuk Kim
2013-12-18  0:12 ` Jaegeuk Kim
2013-12-19  1:12   ` [f2fs-dev] " Chao Yu
2013-12-19  1:12     ` Chao Yu
2013-12-20  2:14   ` [f2fs-dev] " Chao Yu
2013-12-20  2:14     ` Chao Yu
2013-12-20  9:21     ` [f2fs-dev] " Jaegeuk Kim
2013-12-20  9:21       ` Jaegeuk Kim

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.