linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] UDF: Add support for O_DIRECT
@ 2012-07-24 12:17 Ian Abbott
  2012-07-31 10:40 ` [PATCH v2] " Ian Abbott
  0 siblings, 1 reply; 13+ messages in thread
From: Ian Abbott @ 2012-07-24 12:17 UTC (permalink / raw)
  To: lkml; +Cc: Jan Kara, Ian Abbott

Add support for the O_DIRECT flag.  There are two cases to deal with:

1. Small files stored in the ICB (inode control block?): just return 0
from the new udf_adinicb_direct_IO() handler to fall back to buffered
I/O.  For direct writes, there is a "gotcha" to deal with when
generic_file_direct_write() in mm/filemap.c invalidates the pages.  In
the udf_adinicb_writepage() handler, only part of the page data will be
valid and the rest will be zeroed out, so only copy the valid part into
the ICB.  (This is actually a bit inefficient as udf_adinicb_write_end()
will have already copied the data into the ICB once, but it's pretty
likely that the file will grow to the point where its data can no longer
be stored in the ICB and will be moved to a different area of the file
system.  At that point, a different direct_IO handler will be used - see
below.)

2. Larger files, not stored in the ICB: nothing special here.  Just call
blockdev_direct_IO() from our new udf_direct_IO() handler and tidy up
any blocks instantiated outside i_size on error.  This is pretty
standard.

Also change the whitespace in udf_aops and udf_adinicb_aops to make them
a bit neater.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
---
 fs/udf/file.c  |   29 +++++++++++++++++++++++++----
 fs/udf/inode.c |   31 +++++++++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7f3f7ba..f5f9ddd 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,6 +34,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include <linux/aio.h>
 
 #include "udf_i.h"
@@ -64,12 +65,23 @@ static int udf_adinicb_writepage(struct page *page,
 	struct inode *inode = page->mapping->host;
 	char *kaddr;
 	struct udf_inode_info *iinfo = UDF_I(inode);
+	loff_t start, end, page_start, page_offset;
 
 	BUG_ON(!PageLocked(page));
 
 	kaddr = kmap(page);
-	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
-	mark_inode_dirty(inode);
+	/* The beginning and/or end of the page data is likely to be invalid
+	 * for O_DIRECT, so only copy the valid part. */
+	page_start = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	start = max(page_start, wbc->range_start);
+	end = min3(page_start + (loff_t)PAGE_CACHE_SIZE - 1,
+		   wbc->range_end, inode->i_size - 1);
+	if (likely(start <= end)) {
+		page_offset = start - page_start;
+		memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + start,
+		       kaddr + page_offset, (end + 1 - start));
+		mark_inode_dirty(inode);
+	}
 	SetPageUptodate(page);
 	kunmap(page);
 	unlock_page(page);
@@ -95,11 +107,20 @@ static int udf_adinicb_write_end(struct file *file,
 	return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
 }
 
+static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
+				     const struct iovec *iov,
+				     loff_t offset, unsigned long nr_segs)
+{
+	/* Fallback to buffered I/O. */
+	return 0;
+}
+
 const struct address_space_operations udf_adinicb_aops = {
 	.readpage	= udf_adinicb_readpage,
 	.writepage	= udf_adinicb_writepage,
-	.write_begin = simple_write_begin,
-	.write_end = udf_adinicb_write_end,
+	.write_begin	= simple_write_begin,
+	.write_end	= udf_adinicb_write_end,
+	.direct_IO	= udf_adinicb_direct_IO,
 };
 
 static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fafaad7..5efad41 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -136,6 +136,32 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
 	return ret;
 }
 
+static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
+			     const struct iovec *iov,
+			     loff_t offset, unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	ssize_t ret;
+
+	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+				  udf_get_block);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
+	return ret;
+}
+
 static sector_t udf_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping, block, udf_get_block);
@@ -145,8 +171,9 @@ const struct address_space_operations udf_aops = {
 	.readpage	= udf_readpage,
 	.readpages	= udf_readpages,
 	.writepage	= udf_writepage,
-	.write_begin		= udf_write_begin,
-	.write_end		= generic_write_end,
+	.write_begin	= udf_write_begin,
+	.write_end	= generic_write_end,
+	.direct_IO	= udf_direct_IO,
 	.bmap		= udf_bmap,
 };
 
-- 
1.7.8.6


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2] UDF: Add support for O_DIRECT
  2012-07-24 12:17 [PATCH] UDF: Add support for O_DIRECT Ian Abbott
@ 2012-07-31 10:40 ` Ian Abbott
  2012-09-04  9:49   ` [PATCH v3] " Ian Abbott
  0 siblings, 1 reply; 13+ messages in thread
From: Ian Abbott @ 2012-07-31 10:40 UTC (permalink / raw)
  To: lkml; +Cc: Jan Kara, Ian Abbott

Add support for the O_DIRECT flag.  There are two cases to deal with:

1. Small files stored in the ICB (inode control block?): just return 0
from the new udf_adinicb_direct_IO() handler to fall back to buffered
I/O.  For direct writes, there is a "gotcha" to deal with when
generic_file_direct_write() in mm/filemap.c invalidates the pages.  In
the udf_adinicb_writepage() handler, only part of the page data will be
valid and the rest will be zeroed out, so only copy the valid part into
the ICB.  (This is actually a bit inefficient as udf_adinicb_write_end()
will have already copied the data into the ICB once, but it's pretty
likely that the file will grow to the point where its data can no longer
be stored in the ICB and will be moved to a different area of the file
system.  At that point, a different direct_IO handler will be used - see
below.)

2. Larger files, not stored in the ICB: nothing special here.  Just call
blockdev_direct_IO() from our new udf_direct_IO() handler and tidy up
any blocks instantiated outside i_size on error.  This is pretty
standard.  Factor error handling code out of udf_write_begin() into new
function udf_write_failed() so it can also be called by udf_direct_IO().

Also change the whitespace in udf_aops and udf_adinicb_aops to make them
a bit neater.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
---
v2: Rework error handling in udf_direct_IO to avoid calling deprecated
    vmtruncate().
---
 fs/udf/file.c  |   29 +++++++++++++++++++++++++----
 fs/udf/inode.c |   52 ++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7f3f7ba..f5f9ddd 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,6 +34,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include <linux/aio.h>
 
 #include "udf_i.h"
@@ -64,12 +65,23 @@ static int udf_adinicb_writepage(struct page *page,
 	struct inode *inode = page->mapping->host;
 	char *kaddr;
 	struct udf_inode_info *iinfo = UDF_I(inode);
+	loff_t start, end, page_start, page_offset;
 
 	BUG_ON(!PageLocked(page));
 
 	kaddr = kmap(page);
-	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
-	mark_inode_dirty(inode);
+	/* The beginning and/or end of the page data is likely to be invalid
+	 * for O_DIRECT, so only copy the valid part. */
+	page_start = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	start = max(page_start, wbc->range_start);
+	end = min3(page_start + (loff_t)PAGE_CACHE_SIZE - 1,
+		   wbc->range_end, inode->i_size - 1);
+	if (likely(start <= end)) {
+		page_offset = start - page_start;
+		memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + start,
+		       kaddr + page_offset, (end + 1 - start));
+		mark_inode_dirty(inode);
+	}
 	SetPageUptodate(page);
 	kunmap(page);
 	unlock_page(page);
@@ -95,11 +107,20 @@ static int udf_adinicb_write_end(struct file *file,
 	return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
 }
 
+static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
+				     const struct iovec *iov,
+				     loff_t offset, unsigned long nr_segs)
+{
+	/* Fallback to buffered I/O. */
+	return 0;
+}
+
 const struct address_space_operations udf_adinicb_aops = {
 	.readpage	= udf_adinicb_readpage,
 	.writepage	= udf_adinicb_writepage,
-	.write_begin = simple_write_begin,
-	.write_end = udf_adinicb_write_end,
+	.write_begin	= simple_write_begin,
+	.write_end	= udf_adinicb_write_end,
+	.direct_IO	= udf_adinicb_direct_IO,
 };
 
 static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fafaad7..165e48e 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -95,6 +95,22 @@ void udf_evict_inode(struct inode *inode)
 	}
 }
 
+static void udf_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+	struct udf_inode_info *iinfo = UDF_I(inode);
+	loff_t isize = inode->i_size;
+
+	if (to > isize) {
+		truncate_pagecache(inode, to, isize);
+		if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+			down_write(&iinfo->i_data_sem);
+			udf_truncate_extents(inode);
+			up_write(&iinfo->i_data_sem);
+		}
+	}
+}
+
 static int udf_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, udf_get_block, wbc);
@@ -118,21 +134,24 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
 	int ret;
 
 	ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
-	if (unlikely(ret)) {
-		struct inode *inode = mapping->host;
-		struct udf_inode_info *iinfo = UDF_I(inode);
-		loff_t isize = inode->i_size;
-
-		if (pos + len > isize) {
-			truncate_pagecache(inode, pos + len, isize);
-			if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
-				down_write(&iinfo->i_data_sem);
-				udf_truncate_extents(inode);
-				up_write(&iinfo->i_data_sem);
-			}
-		}
-	}
+	if (unlikely(ret))
+		udf_write_failed(mapping, pos + len);
+	return ret;
+}
 
+static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
+			     const struct iovec *iov,
+			     loff_t offset, unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	ssize_t ret;
+
+	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+				  udf_get_block);
+	if (unlikely(ret < 0 && (rw && WRITE)))
+		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
 	return ret;
 }
 
@@ -145,8 +164,9 @@ const struct address_space_operations udf_aops = {
 	.readpage	= udf_readpage,
 	.readpages	= udf_readpages,
 	.writepage	= udf_writepage,
-	.write_begin		= udf_write_begin,
-	.write_end		= generic_write_end,
+	.write_begin	= udf_write_begin,
+	.write_end	= generic_write_end,
+	.direct_IO	= udf_direct_IO,
 	.bmap		= udf_bmap,
 };
 
-- 
1.7.8.6


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v3] UDF: Add support for O_DIRECT
  2012-07-31 10:40 ` [PATCH v2] " Ian Abbott
@ 2012-09-04  9:49   ` Ian Abbott
  2012-09-04 14:39     ` Jan Kara
  0 siblings, 1 reply; 13+ messages in thread
From: Ian Abbott @ 2012-09-04  9:49 UTC (permalink / raw)
  To: lkml; +Cc: Jan Kara, Ian Abbott

Add support for the O_DIRECT flag.  There are two cases to deal with:

1. Small files stored in the ICB (inode control block?): just return 0
from the new udf_adinicb_direct_IO() handler to fall back to buffered
I/O.  For direct writes, there is a "gotcha" to deal with when
generic_file_direct_write() in mm/filemap.c invalidates the pages.  In
the udf_adinicb_writepage() handler, only part of the page data will be
valid and the rest will be zeroed out, so only copy the valid part into
the ICB.  (This is actually a bit inefficient as udf_adinicb_write_end()
will have already copied the data into the ICB once, but it's pretty
likely that the file will grow to the point where its data can no longer
be stored in the ICB and will be moved to a different area of the file
system.  At that point, a different direct_IO handler will be used - see
below.)

2. Larger files, not stored in the ICB: nothing special here.  Just call
blockdev_direct_IO() from our new udf_direct_IO() handler and tidy up
any blocks instantiated outside i_size on error.  This is pretty
standard.  Factor error handling code out of udf_write_begin() into new
function udf_write_failed() so it can also be called by udf_direct_IO().

Also change the whitespace in udf_aops and udf_adinicb_aops to make them
a bit neater.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
---
v2: Rework error handling in udf_direct_IO to avoid calling deprecated
    vmtruncate().
v3: Rebased to next-20120904.
---
 fs/udf/file.c  | 29 +++++++++++++++++++++++++----
 fs/udf/inode.c | 52 ++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7f3f7ba..f5f9ddd 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,6 +34,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include <linux/aio.h>
 
 #include "udf_i.h"
@@ -64,12 +65,23 @@ static int udf_adinicb_writepage(struct page *page,
 	struct inode *inode = page->mapping->host;
 	char *kaddr;
 	struct udf_inode_info *iinfo = UDF_I(inode);
+	loff_t start, end, page_start, page_offset;
 
 	BUG_ON(!PageLocked(page));
 
 	kaddr = kmap(page);
-	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
-	mark_inode_dirty(inode);
+	/* The beginning and/or end of the page data is likely to be invalid
+	 * for O_DIRECT, so only copy the valid part. */
+	page_start = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	start = max(page_start, wbc->range_start);
+	end = min3(page_start + (loff_t)PAGE_CACHE_SIZE - 1,
+		   wbc->range_end, inode->i_size - 1);
+	if (likely(start <= end)) {
+		page_offset = start - page_start;
+		memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + start,
+		       kaddr + page_offset, (end + 1 - start));
+		mark_inode_dirty(inode);
+	}
 	SetPageUptodate(page);
 	kunmap(page);
 	unlock_page(page);
@@ -95,11 +107,20 @@ static int udf_adinicb_write_end(struct file *file,
 	return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
 }
 
+static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
+				     const struct iovec *iov,
+				     loff_t offset, unsigned long nr_segs)
+{
+	/* Fallback to buffered I/O. */
+	return 0;
+}
+
 const struct address_space_operations udf_adinicb_aops = {
 	.readpage	= udf_adinicb_readpage,
 	.writepage	= udf_adinicb_writepage,
-	.write_begin = simple_write_begin,
-	.write_end = udf_adinicb_write_end,
+	.write_begin	= simple_write_begin,
+	.write_end	= udf_adinicb_write_end,
+	.direct_IO	= udf_adinicb_direct_IO,
 };
 
 static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1a0588e..b905448 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -95,6 +95,22 @@ void udf_evict_inode(struct inode *inode)
 	}
 }
 
+static void udf_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+	struct udf_inode_info *iinfo = UDF_I(inode);
+	loff_t isize = inode->i_size;
+
+	if (to > isize) {
+		truncate_pagecache(inode, to, isize);
+		if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+			down_write(&iinfo->i_data_sem);
+			udf_truncate_extents(inode);
+			up_write(&iinfo->i_data_sem);
+		}
+	}
+}
+
 static int udf_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, udf_get_block, wbc);
@@ -124,21 +140,24 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
 	int ret;
 
 	ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
-	if (unlikely(ret)) {
-		struct inode *inode = mapping->host;
-		struct udf_inode_info *iinfo = UDF_I(inode);
-		loff_t isize = inode->i_size;
-
-		if (pos + len > isize) {
-			truncate_pagecache(inode, pos + len, isize);
-			if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
-				down_write(&iinfo->i_data_sem);
-				udf_truncate_extents(inode);
-				up_write(&iinfo->i_data_sem);
-			}
-		}
-	}
+	if (unlikely(ret))
+		udf_write_failed(mapping, pos + len);
+	return ret;
+}
 
+static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
+			     const struct iovec *iov,
+			     loff_t offset, unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	ssize_t ret;
+
+	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+				  udf_get_block);
+	if (unlikely(ret < 0 && (rw && WRITE)))
+		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
 	return ret;
 }
 
@@ -152,8 +171,9 @@ const struct address_space_operations udf_aops = {
 	.readpages	= udf_readpages,
 	.writepage	= udf_writepage,
 	.writepages	= udf_writepages,
-	.write_begin		= udf_write_begin,
-	.write_end		= generic_write_end,
+	.write_begin	= udf_write_begin,
+	.write_end	= generic_write_end,
+	.direct_IO	= udf_direct_IO,
 	.bmap		= udf_bmap,
 };
 
-- 
1.7.12


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] UDF: Add support for O_DIRECT
  2012-09-04  9:49   ` [PATCH v3] " Ian Abbott
@ 2012-09-04 14:39     ` Jan Kara
  2012-09-04 15:11       ` Ian Abbott
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Kara @ 2012-09-04 14:39 UTC (permalink / raw)
  To: Ian Abbott; +Cc: lkml, Jan Kara, linux-fsdevel

  Hello,

  first, you have my address wrong (you had suze instead of suse) which is
why I wasn't getting your email and not replying (missed the patch in LKML
traffic). Second, it's good to CC also linux-fsdevel for UDF related
matters (I tend to use that for UDF announcements etc. so people caring
about UDF can watch there and don't have to read high-volume LKML).

On Tue 04-09-12 10:49:39, Ian Abbott wrote:
> Add support for the O_DIRECT flag.  There are two cases to deal with:
  Out of curiosity, do you have a use for this feature or is it mostly
academic interest?

> 1. Small files stored in the ICB (inode control block?): just return 0
> from the new udf_adinicb_direct_IO() handler to fall back to buffered
> I/O.  For direct writes, there is a "gotcha" to deal with when
> generic_file_direct_write() in mm/filemap.c invalidates the pages.  In
> the udf_adinicb_writepage() handler, only part of the page data will be
> valid and the rest will be zeroed out, so only copy the valid part into
> the ICB.  (This is actually a bit inefficient as udf_adinicb_write_end()
> will have already copied the data into the ICB once, but it's pretty
> likely that the file will grow to the point where its data can no longer
> be stored in the ICB and will be moved to a different area of the file
> system.  At that point, a different direct_IO handler will be used - see
> below.)
  Sorry, I didn't quite get this. What is the problem with copying all the
data to inode in udf_adinicb_writepage() as it is now?

								Honza

> 2. Larger files, not stored in the ICB: nothing special here.  Just call
> blockdev_direct_IO() from our new udf_direct_IO() handler and tidy up
> any blocks instantiated outside i_size on error.  This is pretty
> standard.  Factor error handling code out of udf_write_begin() into new
> function udf_write_failed() so it can also be called by udf_direct_IO().
> 
> Also change the whitespace in udf_aops and udf_adinicb_aops to make them
> a bit neater.
> 
> Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
> ---
> v2: Rework error handling in udf_direct_IO to avoid calling deprecated
>     vmtruncate().
> v3: Rebased to next-20120904.
> ---
>  fs/udf/file.c  | 29 +++++++++++++++++++++++++----
>  fs/udf/inode.c | 52 ++++++++++++++++++++++++++++++++++++----------------
>  2 files changed, 61 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/udf/file.c b/fs/udf/file.c
> index 7f3f7ba..f5f9ddd 100644
> --- a/fs/udf/file.c
> +++ b/fs/udf/file.c
> @@ -34,6 +34,7 @@
>  #include <linux/errno.h>
>  #include <linux/pagemap.h>
>  #include <linux/buffer_head.h>
> +#include <linux/writeback.h>
>  #include <linux/aio.h>
>  
>  #include "udf_i.h"
> @@ -64,12 +65,23 @@ static int udf_adinicb_writepage(struct page *page,
>  	struct inode *inode = page->mapping->host;
>  	char *kaddr;
>  	struct udf_inode_info *iinfo = UDF_I(inode);
> +	loff_t start, end, page_start, page_offset;
>  
>  	BUG_ON(!PageLocked(page));
>  
>  	kaddr = kmap(page);
> -	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
> -	mark_inode_dirty(inode);
> +	/* The beginning and/or end of the page data is likely to be invalid
> +	 * for O_DIRECT, so only copy the valid part. */
> +	page_start = (loff_t)page->index << PAGE_CACHE_SHIFT;
> +	start = max(page_start, wbc->range_start);
> +	end = min3(page_start + (loff_t)PAGE_CACHE_SIZE - 1,
> +		   wbc->range_end, inode->i_size - 1);
> +	if (likely(start <= end)) {
> +		page_offset = start - page_start;
> +		memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + start,
> +		       kaddr + page_offset, (end + 1 - start));
> +		mark_inode_dirty(inode);
> +	}
>  	SetPageUptodate(page);
>  	kunmap(page);
>  	unlock_page(page);
> @@ -95,11 +107,20 @@ static int udf_adinicb_write_end(struct file *file,
>  	return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
>  }
>  
> +static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
> +				     const struct iovec *iov,
> +				     loff_t offset, unsigned long nr_segs)
> +{
> +	/* Fallback to buffered I/O. */
> +	return 0;
> +}
> +
>  const struct address_space_operations udf_adinicb_aops = {
>  	.readpage	= udf_adinicb_readpage,
>  	.writepage	= udf_adinicb_writepage,
> -	.write_begin = simple_write_begin,
> -	.write_end = udf_adinicb_write_end,
> +	.write_begin	= simple_write_begin,
> +	.write_end	= udf_adinicb_write_end,
> +	.direct_IO	= udf_adinicb_direct_IO,
>  };
>  
>  static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
> diff --git a/fs/udf/inode.c b/fs/udf/inode.c
> index 1a0588e..b905448 100644
> --- a/fs/udf/inode.c
> +++ b/fs/udf/inode.c
> @@ -95,6 +95,22 @@ void udf_evict_inode(struct inode *inode)
>  	}
>  }
>  
> +static void udf_write_failed(struct address_space *mapping, loff_t to)
> +{
> +	struct inode *inode = mapping->host;
> +	struct udf_inode_info *iinfo = UDF_I(inode);
> +	loff_t isize = inode->i_size;
> +
> +	if (to > isize) {
> +		truncate_pagecache(inode, to, isize);
> +		if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
> +			down_write(&iinfo->i_data_sem);
> +			udf_truncate_extents(inode);
> +			up_write(&iinfo->i_data_sem);
> +		}
> +	}
> +}
> +
>  static int udf_writepage(struct page *page, struct writeback_control *wbc)
>  {
>  	return block_write_full_page(page, udf_get_block, wbc);
> @@ -124,21 +140,24 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
>  	int ret;
>  
>  	ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
> -	if (unlikely(ret)) {
> -		struct inode *inode = mapping->host;
> -		struct udf_inode_info *iinfo = UDF_I(inode);
> -		loff_t isize = inode->i_size;
> -
> -		if (pos + len > isize) {
> -			truncate_pagecache(inode, pos + len, isize);
> -			if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
> -				down_write(&iinfo->i_data_sem);
> -				udf_truncate_extents(inode);
> -				up_write(&iinfo->i_data_sem);
> -			}
> -		}
> -	}
> +	if (unlikely(ret))
> +		udf_write_failed(mapping, pos + len);
> +	return ret;
> +}
>  
> +static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
> +			     const struct iovec *iov,
> +			     loff_t offset, unsigned long nr_segs)
> +{
> +	struct file *file = iocb->ki_filp;
> +	struct address_space *mapping = file->f_mapping;
> +	struct inode *inode = mapping->host;
> +	ssize_t ret;
> +
> +	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
> +				  udf_get_block);
> +	if (unlikely(ret < 0 && (rw && WRITE)))
> +		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
>  	return ret;
>  }
>  
> @@ -152,8 +171,9 @@ const struct address_space_operations udf_aops = {
>  	.readpages	= udf_readpages,
>  	.writepage	= udf_writepage,
>  	.writepages	= udf_writepages,
> -	.write_begin		= udf_write_begin,
> -	.write_end		= generic_write_end,
> +	.write_begin	= udf_write_begin,
> +	.write_end	= generic_write_end,
> +	.direct_IO	= udf_direct_IO,
>  	.bmap		= udf_bmap,
>  };
>  
> -- 
> 1.7.12
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] UDF: Add support for O_DIRECT
  2012-09-04 14:39     ` Jan Kara
@ 2012-09-04 15:11       ` Ian Abbott
  2012-09-05  9:03         ` Ian Abbott
  2012-09-05 12:05         ` Jan Kara
  0 siblings, 2 replies; 13+ messages in thread
From: Ian Abbott @ 2012-09-04 15:11 UTC (permalink / raw)
  To: Jan Kara; +Cc: Ian Abbott, lkml, linux-fsdevel

On 2012-09-04 15:39, Jan Kara wrote:
>    Hello,
>
>    first, you have my address wrong (you had suze instead of suse) which is
> why I wasn't getting your email and not replying (missed the patch in LKML
> traffic). Second, it's good to CC also linux-fsdevel for UDF related
> matters (I tend to use that for UDF announcements etc. so people caring
> about UDF can watch there and don't have to read high-volume LKML).

Oops, sorry about the misspelling.  Also, I've noted the linux-fsdevel 
for future (I was just following what it said in MAINTAINERS).

> On Tue 04-09-12 10:49:39, Ian Abbott wrote:
>> Add support for the O_DIRECT flag.  There are two cases to deal with:
>    Out of curiosity, do you have a use for this feature or is it mostly
> academic interest?

I'm planning to use it for an embedded project that needs to stream 
large files off a CompactFlash card, but the data doesn't need to be in 
the buffer cache as its only read once, and the system has very limited 
memory bandwidth so I can't afford the the extra copy.  The old version 
of this project only supported FAT, but that limited the file size to 
about 4GiB.  The filesystem needs to be something reasonably 
Windows-friendly, at least for adding the files to the CompactFlash card 
in the first place.

>> 1. Small files stored in the ICB (inode control block?): just return 0
>> from the new udf_adinicb_direct_IO() handler to fall back to buffered
>> I/O.  For direct writes, there is a "gotcha" to deal with when
>> generic_file_direct_write() in mm/filemap.c invalidates the pages.  In
>> the udf_adinicb_writepage() handler, only part of the page data will be
>> valid and the rest will be zeroed out, so only copy the valid part into
>> the ICB.  (This is actually a bit inefficient as udf_adinicb_write_end()
>> will have already copied the data into the ICB once, but it's pretty
>> likely that the file will grow to the point where its data can no longer
>> be stored in the ICB and will be moved to a different area of the file
>> system.  At that point, a different direct_IO handler will be used - see
>> below.)
>    Sorry, I didn't quite get this. What is the problem with copying all the
> data to inode in udf_adinicb_writepage() as it is now?

Part of the good data in the ICB outside the range being addressed would 
get overwritten by zeroes.  This can be tested by creating a UDF 
filesystem with 4KiB blocks and with small files stored in the ICB, 
backed by a block device with 512 byte sectors.  Create a 2KiB file with 
random (or non-zero) data on the file system so that its data gets 
stored in the ICB.  Then open the file for writing without truncation 
and with the O_DIRECT flag set, write 512 bytes at some 512 byte offset 
within the 2KiB file and close it.  If you then hexdump the file, you'll 
find some of the old random data has been zeroed out.

-- 
-=( Ian Abbott @ MEV Ltd.    E-mail: <abbotti@mev.co.uk>        )=-
-=( Tel: +44 (0)161 477 1898   FAX: +44 (0)161 718 3587         )=-

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] UDF: Add support for O_DIRECT
  2012-09-04 15:11       ` Ian Abbott
@ 2012-09-05  9:03         ` Ian Abbott
  2012-09-05 12:05         ` Jan Kara
  1 sibling, 0 replies; 13+ messages in thread
From: Ian Abbott @ 2012-09-05  9:03 UTC (permalink / raw)
  To: Jan Kara; +Cc: Ian Abbott, lkml, linux-fsdevel

On 2012-09-04 16:11, Ian Abbott wrote:
> On 2012-09-04 15:39, Jan Kara wrote:
>> On Tue 04-09-12 10:49:39, Ian Abbott wrote:
>>> Add support for the O_DIRECT flag.  There are two cases to deal with:
>>    Out of curiosity, do you have a use for this feature or is it mostly
>> academic interest?
>
> I'm planning to use it for an embedded project that needs to stream
> large files off a CompactFlash card, but the data doesn't need to be in
> the buffer cache as its only read once, and the system has very limited
> memory bandwidth so I can't afford the the extra copy.  The old version
> of this project only supported FAT, but that limited the file size to
> about 4GiB.  The filesystem needs to be something reasonably
> Windows-friendly, at least for adding the files to the CompactFlash card
> in the first place.

Actually, remembering back (the old project was about 3 years ago), the 
main reason for using O_DIRECT was it was causing too much memory 
fragmentation on my MMU-less embedded system.  That and the extra 
overhead of managing the buffer cache for data that was only read once.

-- 
-=( Ian Abbott @ MEV Ltd.    E-mail: <abbotti@mev.co.uk>        )=-
-=( Tel: +44 (0)161 477 1898   FAX: +44 (0)161 718 3587         )=-

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] UDF: Add support for O_DIRECT
  2012-09-04 15:11       ` Ian Abbott
  2012-09-05  9:03         ` Ian Abbott
@ 2012-09-05 12:05         ` Jan Kara
  2012-09-05 12:55           ` Jan Kara
  1 sibling, 1 reply; 13+ messages in thread
From: Jan Kara @ 2012-09-05 12:05 UTC (permalink / raw)
  To: Ian Abbott; +Cc: Jan Kara, Ian Abbott, lkml, linux-fsdevel

On Tue 04-09-12 16:11:32, Ian Abbott wrote:
> On 2012-09-04 15:39, Jan Kara wrote:
> >   Hello,
> >
> >   first, you have my address wrong (you had suze instead of suse) which is
> >why I wasn't getting your email and not replying (missed the patch in LKML
> >traffic). Second, it's good to CC also linux-fsdevel for UDF related
> >matters (I tend to use that for UDF announcements etc. so people caring
> >about UDF can watch there and don't have to read high-volume LKML).
> 
> Oops, sorry about the misspelling.  Also, I've noted the
> linux-fsdevel for future (I was just following what it said in
> MAINTAINERS).
  I see. Actually I thought linux-fsdevel is inherited from the default of fs/
directory. But now I tried get_maintainer.pl script and I can see that it's
not. I'll update MAINTAINERS.

> >On Tue 04-09-12 10:49:39, Ian Abbott wrote:
> >>Add support for the O_DIRECT flag.  There are two cases to deal with:
> >   Out of curiosity, do you have a use for this feature or is it mostly
> >academic interest?
> 
> I'm planning to use it for an embedded project that needs to stream
> large files off a CompactFlash card, but the data doesn't need to be
> in the buffer cache as its only read once, and the system has very
> limited memory bandwidth so I can't afford the the extra copy.  The
> old version of this project only supported FAT, but that limited the
> file size to about 4GiB.  The filesystem needs to be something
> reasonably Windows-friendly, at least for adding the files to the
> CompactFlash card in the first place.
  OK, that sounds reasonably.

> >>1. Small files stored in the ICB (inode control block?): just return 0
> >>from the new udf_adinicb_direct_IO() handler to fall back to buffered
> >>I/O.  For direct writes, there is a "gotcha" to deal with when
> >>generic_file_direct_write() in mm/filemap.c invalidates the pages.  In
> >>the udf_adinicb_writepage() handler, only part of the page data will be
> >>valid and the rest will be zeroed out, so only copy the valid part into
> >>the ICB.  (This is actually a bit inefficient as udf_adinicb_write_end()
> >>will have already copied the data into the ICB once, but it's pretty
> >>likely that the file will grow to the point where its data can no longer
> >>be stored in the ICB and will be moved to a different area of the file
> >>system.  At that point, a different direct_IO handler will be used - see
> >>below.)
> >   Sorry, I didn't quite get this. What is the problem with copying all the
> >data to inode in udf_adinicb_writepage() as it is now?
> 
> Part of the good data in the ICB outside the range being addressed
> would get overwritten by zeroes.  This can be tested by creating a
> UDF filesystem with 4KiB blocks and with small files stored in the
> ICB, backed by a block device with 512 byte sectors.  Create a 2KiB
> file with random (or non-zero) data on the file system so that its
> data gets stored in the ICB.  Then open the file for writing without
> truncation and with the O_DIRECT flag set, write 512 bytes at some
> 512 byte offset within the 2KiB file and close it.  If you then
> hexdump the file, you'll find some of the old random data has been
> zeroed out.
  But don't you fall back to buffered IO for files in ICB? So then no
zeroing should happen?

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] UDF: Add support for O_DIRECT
  2012-09-05 12:05         ` Jan Kara
@ 2012-09-05 12:55           ` Jan Kara
  2012-09-05 16:00             ` Jan Kara
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Kara @ 2012-09-05 12:55 UTC (permalink / raw)
  To: Ian Abbott; +Cc: Jan Kara, Ian Abbott, lkml, linux-fsdevel

On Wed 05-09-12 14:05:20, Jan Kara wrote:
> On Tue 04-09-12 16:11:32, Ian Abbott wrote:
> > >>1. Small files stored in the ICB (inode control block?): just return 0
> > >>from the new udf_adinicb_direct_IO() handler to fall back to buffered
> > >>I/O.  For direct writes, there is a "gotcha" to deal with when
> > >>generic_file_direct_write() in mm/filemap.c invalidates the pages.  In
> > >>the udf_adinicb_writepage() handler, only part of the page data will be
> > >>valid and the rest will be zeroed out, so only copy the valid part into
> > >>the ICB.  (This is actually a bit inefficient as udf_adinicb_write_end()
> > >>will have already copied the data into the ICB once, but it's pretty
> > >>likely that the file will grow to the point where its data can no longer
> > >>be stored in the ICB and will be moved to a different area of the file
> > >>system.  At that point, a different direct_IO handler will be used - see
> > >>below.)
> > >   Sorry, I didn't quite get this. What is the problem with copying all the
> > >data to inode in udf_adinicb_writepage() as it is now?
> > 
> > Part of the good data in the ICB outside the range being addressed
> > would get overwritten by zeroes.  This can be tested by creating a
> > UDF filesystem with 4KiB blocks and with small files stored in the
> > ICB, backed by a block device with 512 byte sectors.  Create a 2KiB
> > file with random (or non-zero) data on the file system so that its
> > data gets stored in the ICB.  Then open the file for writing without
> > truncation and with the O_DIRECT flag set, write 512 bytes at some
> > 512 byte offset within the 2KiB file and close it.  If you then
> > hexdump the file, you'll find some of the old random data has been
> > zeroed out.
>   But don't you fall back to buffered IO for files in ICB? So then no
> zeroing should happen?
  Oh, I've tested things now and the bug is in buffered write as well!
It has nothing to do with direct IO. We cannot use simple_write_begin() for
UDF when the file is in ICB. I'll write a proper fix.

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] UDF: Add support for O_DIRECT
  2012-09-05 12:55           ` Jan Kara
@ 2012-09-05 16:00             ` Jan Kara
  2012-09-05 16:44               ` [PATCH v4] " Ian Abbott
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Kara @ 2012-09-05 16:00 UTC (permalink / raw)
  To: Ian Abbott; +Cc: Jan Kara, Ian Abbott, lkml, linux-fsdevel

On Wed 05-09-12 14:55:33, Jan Kara wrote:
> On Wed 05-09-12 14:05:20, Jan Kara wrote:
> > On Tue 04-09-12 16:11:32, Ian Abbott wrote:
> > > >>1. Small files stored in the ICB (inode control block?): just return 0
> > > >>from the new udf_adinicb_direct_IO() handler to fall back to buffered
> > > >>I/O.  For direct writes, there is a "gotcha" to deal with when
> > > >>generic_file_direct_write() in mm/filemap.c invalidates the pages.  In
> > > >>the udf_adinicb_writepage() handler, only part of the page data will be
> > > >>valid and the rest will be zeroed out, so only copy the valid part into
> > > >>the ICB.  (This is actually a bit inefficient as udf_adinicb_write_end()
> > > >>will have already copied the data into the ICB once, but it's pretty
> > > >>likely that the file will grow to the point where its data can no longer
> > > >>be stored in the ICB and will be moved to a different area of the file
> > > >>system.  At that point, a different direct_IO handler will be used - see
> > > >>below.)
> > > >   Sorry, I didn't quite get this. What is the problem with copying all the
> > > >data to inode in udf_adinicb_writepage() as it is now?
> > > 
> > > Part of the good data in the ICB outside the range being addressed
> > > would get overwritten by zeroes.  This can be tested by creating a
> > > UDF filesystem with 4KiB blocks and with small files stored in the
> > > ICB, backed by a block device with 512 byte sectors.  Create a 2KiB
> > > file with random (or non-zero) data on the file system so that its
> > > data gets stored in the ICB.  Then open the file for writing without
> > > truncation and with the O_DIRECT flag set, write 512 bytes at some
> > > 512 byte offset within the 2KiB file and close it.  If you then
> > > hexdump the file, you'll find some of the old random data has been
> > > zeroed out.
> >   But don't you fall back to buffered IO for files in ICB? So then no
> > zeroing should happen?
>   Oh, I've tested things now and the bug is in buffered write as well!
> It has nothing to do with direct IO. We cannot use simple_write_begin() for
> UDF when the file is in ICB. I'll write a proper fix.
  Fix sent. Please resend your patch without that writepage() change which
shouldn't be needed now. Thanks.

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v4] UDF: Add support for O_DIRECT
  2012-09-05 16:00             ` Jan Kara
@ 2012-09-05 16:44               ` Ian Abbott
  2012-09-05 19:02                 ` Jan Kara
  0 siblings, 1 reply; 13+ messages in thread
From: Ian Abbott @ 2012-09-05 16:44 UTC (permalink / raw)
  To: lkml; +Cc: linux-fsdevel, Jan Kara, Ian Abbott

Add support for the O_DIRECT flag.  There are two cases to deal with:

1. Small files stored in the ICB (inode control block?): just return 0
from the new udf_adinicb_direct_IO() handler to fall back to buffered
I/O.

2. Larger files, not stored in the ICB: nothing special here.  Just call
blockdev_direct_IO() from our new udf_direct_IO() handler and tidy up
any blocks instantiated outside i_size on error.  This is pretty
standard.  Factor error handling code out of udf_write_begin() into new
function udf_write_failed() so it can also be called by udf_direct_IO().

Also change the whitespace in udf_aops to make it a bit neater.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
---
v2: Rework error handling in udf_direct_IO to avoid calling deprecated
    vmtruncate().
v3: Rebased to next-20120904.
v4: Removed special handling in udf_adinicb_writepage() as that turned
    out to be a bug in the handling of buffered writes for files in ICB,
    fixed by Jan Kara's patch "udf: Fix data corruption for files in
    ICB", dated 2012-09-05.  I've tested it with that patch and it seems
    to work without corrupting file data in the ICB.
---
 fs/udf/file.c  |  9 +++++++++
 fs/udf/inode.c | 52 ++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/fs/udf/file.c b/fs/udf/file.c
index d1c6093..77b5953 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -118,11 +118,20 @@ static int udf_adinicb_write_end(struct file *file,
 	return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
 }
 
+static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
+				     const struct iovec *iov,
+				     loff_t offset, unsigned long nr_segs)
+{
+	/* Fallback to buffered I/O. */
+	return 0;
+}
+
 const struct address_space_operations udf_adinicb_aops = {
 	.readpage	= udf_adinicb_readpage,
 	.writepage	= udf_adinicb_writepage,
 	.write_begin	= udf_adinicb_write_begin,
 	.write_end	= udf_adinicb_write_end,
+	.direct_IO	= udf_adinicb_direct_IO,
 };
 
 static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1a0588e..b905448 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -95,6 +95,22 @@ void udf_evict_inode(struct inode *inode)
 	}
 }
 
+static void udf_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+	struct udf_inode_info *iinfo = UDF_I(inode);
+	loff_t isize = inode->i_size;
+
+	if (to > isize) {
+		truncate_pagecache(inode, to, isize);
+		if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+			down_write(&iinfo->i_data_sem);
+			udf_truncate_extents(inode);
+			up_write(&iinfo->i_data_sem);
+		}
+	}
+}
+
 static int udf_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, udf_get_block, wbc);
@@ -124,21 +140,24 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
 	int ret;
 
 	ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
-	if (unlikely(ret)) {
-		struct inode *inode = mapping->host;
-		struct udf_inode_info *iinfo = UDF_I(inode);
-		loff_t isize = inode->i_size;
-
-		if (pos + len > isize) {
-			truncate_pagecache(inode, pos + len, isize);
-			if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
-				down_write(&iinfo->i_data_sem);
-				udf_truncate_extents(inode);
-				up_write(&iinfo->i_data_sem);
-			}
-		}
-	}
+	if (unlikely(ret))
+		udf_write_failed(mapping, pos + len);
+	return ret;
+}
 
+static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
+			     const struct iovec *iov,
+			     loff_t offset, unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	ssize_t ret;
+
+	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+				  udf_get_block);
+	if (unlikely(ret < 0 && (rw && WRITE)))
+		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
 	return ret;
 }
 
@@ -152,8 +171,9 @@ const struct address_space_operations udf_aops = {
 	.readpages	= udf_readpages,
 	.writepage	= udf_writepage,
 	.writepages	= udf_writepages,
-	.write_begin		= udf_write_begin,
-	.write_end		= generic_write_end,
+	.write_begin	= udf_write_begin,
+	.write_end	= generic_write_end,
+	.direct_IO	= udf_direct_IO,
 	.bmap		= udf_bmap,
 };
 
-- 
1.7.12


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] UDF: Add support for O_DIRECT
  2012-09-05 16:44               ` [PATCH v4] " Ian Abbott
@ 2012-09-05 19:02                 ` Jan Kara
  2012-09-06 10:08                   ` [PATCH] UDF: Fix incorrect error handling in udf_direct_IO() Ian Abbott
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Kara @ 2012-09-05 19:02 UTC (permalink / raw)
  To: Ian Abbott; +Cc: lkml, linux-fsdevel, Jan Kara

On Wed 05-09-12 17:44:31, Ian Abbott wrote:
> Add support for the O_DIRECT flag.  There are two cases to deal with:
> 
> 1. Small files stored in the ICB (inode control block?): just return 0
> from the new udf_adinicb_direct_IO() handler to fall back to buffered
> I/O.
> 
> 2. Larger files, not stored in the ICB: nothing special here.  Just call
> blockdev_direct_IO() from our new udf_direct_IO() handler and tidy up
> any blocks instantiated outside i_size on error.  This is pretty
> standard.  Factor error handling code out of udf_write_begin() into new
> function udf_write_failed() so it can also be called by udf_direct_IO().
> 
> Also change the whitespace in udf_aops to make it a bit neater.
  Thanks! I've added the patch to my tree.

								Honza

> 
> Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
> ---
> v2: Rework error handling in udf_direct_IO to avoid calling deprecated
>     vmtruncate().
> v3: Rebased to next-20120904.
> v4: Removed special handling in udf_adinicb_writepage() as that turned
>     out to be a bug in the handling of buffered writes for files in ICB,
>     fixed by Jan Kara's patch "udf: Fix data corruption for files in
>     ICB", dated 2012-09-05.  I've tested it with that patch and it seems
>     to work without corrupting file data in the ICB.
> ---
>  fs/udf/file.c  |  9 +++++++++
>  fs/udf/inode.c | 52 ++++++++++++++++++++++++++++++++++++----------------
>  2 files changed, 45 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/udf/file.c b/fs/udf/file.c
> index d1c6093..77b5953 100644
> --- a/fs/udf/file.c
> +++ b/fs/udf/file.c
> @@ -118,11 +118,20 @@ static int udf_adinicb_write_end(struct file *file,
>  	return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
>  }
>  
> +static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
> +				     const struct iovec *iov,
> +				     loff_t offset, unsigned long nr_segs)
> +{
> +	/* Fallback to buffered I/O. */
> +	return 0;
> +}
> +
>  const struct address_space_operations udf_adinicb_aops = {
>  	.readpage	= udf_adinicb_readpage,
>  	.writepage	= udf_adinicb_writepage,
>  	.write_begin	= udf_adinicb_write_begin,
>  	.write_end	= udf_adinicb_write_end,
> +	.direct_IO	= udf_adinicb_direct_IO,
>  };
>  
>  static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
> diff --git a/fs/udf/inode.c b/fs/udf/inode.c
> index 1a0588e..b905448 100644
> --- a/fs/udf/inode.c
> +++ b/fs/udf/inode.c
> @@ -95,6 +95,22 @@ void udf_evict_inode(struct inode *inode)
>  	}
>  }
>  
> +static void udf_write_failed(struct address_space *mapping, loff_t to)
> +{
> +	struct inode *inode = mapping->host;
> +	struct udf_inode_info *iinfo = UDF_I(inode);
> +	loff_t isize = inode->i_size;
> +
> +	if (to > isize) {
> +		truncate_pagecache(inode, to, isize);
> +		if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
> +			down_write(&iinfo->i_data_sem);
> +			udf_truncate_extents(inode);
> +			up_write(&iinfo->i_data_sem);
> +		}
> +	}
> +}
> +
>  static int udf_writepage(struct page *page, struct writeback_control *wbc)
>  {
>  	return block_write_full_page(page, udf_get_block, wbc);
> @@ -124,21 +140,24 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
>  	int ret;
>  
>  	ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
> -	if (unlikely(ret)) {
> -		struct inode *inode = mapping->host;
> -		struct udf_inode_info *iinfo = UDF_I(inode);
> -		loff_t isize = inode->i_size;
> -
> -		if (pos + len > isize) {
> -			truncate_pagecache(inode, pos + len, isize);
> -			if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
> -				down_write(&iinfo->i_data_sem);
> -				udf_truncate_extents(inode);
> -				up_write(&iinfo->i_data_sem);
> -			}
> -		}
> -	}
> +	if (unlikely(ret))
> +		udf_write_failed(mapping, pos + len);
> +	return ret;
> +}
>  
> +static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
> +			     const struct iovec *iov,
> +			     loff_t offset, unsigned long nr_segs)
> +{
> +	struct file *file = iocb->ki_filp;
> +	struct address_space *mapping = file->f_mapping;
> +	struct inode *inode = mapping->host;
> +	ssize_t ret;
> +
> +	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
> +				  udf_get_block);
> +	if (unlikely(ret < 0 && (rw && WRITE)))
> +		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
>  	return ret;
>  }
>  
> @@ -152,8 +171,9 @@ const struct address_space_operations udf_aops = {
>  	.readpages	= udf_readpages,
>  	.writepage	= udf_writepage,
>  	.writepages	= udf_writepages,
> -	.write_begin		= udf_write_begin,
> -	.write_end		= generic_write_end,
> +	.write_begin	= udf_write_begin,
> +	.write_end	= generic_write_end,
> +	.direct_IO	= udf_direct_IO,
>  	.bmap		= udf_bmap,
>  };
>  
> -- 
> 1.7.12
> 
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] UDF: Fix incorrect error handling in udf_direct_IO()
  2012-09-05 19:02                 ` Jan Kara
@ 2012-09-06 10:08                   ` Ian Abbott
  2012-09-06 14:22                     ` Jan Kara
  0 siblings, 1 reply; 13+ messages in thread
From: Ian Abbott @ 2012-09-06 10:08 UTC (permalink / raw)
  To: lkml; +Cc: linux-fsdevel, Jan Kara, Ian Abbott

My recent patch to add DIRECT_IO support to the UDF filesystem handler
contains a mistake in the error recovery if blockdev_direct_IO() fails.
The test `rw && WRITE` should be `rw & WRITE`.  Fix it.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
---
 fs/udf/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index b905448..41d5830 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -156,7 +156,7 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
 
 	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
 				  udf_get_block);
-	if (unlikely(ret < 0 && (rw && WRITE)))
+	if (unlikely(ret < 0 && (rw & WRITE)))
 		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
 	return ret;
 }
-- 
1.7.12


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] UDF: Fix incorrect error handling in udf_direct_IO()
  2012-09-06 10:08                   ` [PATCH] UDF: Fix incorrect error handling in udf_direct_IO() Ian Abbott
@ 2012-09-06 14:22                     ` Jan Kara
  0 siblings, 0 replies; 13+ messages in thread
From: Jan Kara @ 2012-09-06 14:22 UTC (permalink / raw)
  To: Ian Abbott; +Cc: lkml, linux-fsdevel, Jan Kara

On Thu 06-09-12 11:08:11, Ian Abbott wrote:
> My recent patch to add DIRECT_IO support to the UDF filesystem handler
> contains a mistake in the error recovery if blockdev_direct_IO() fails.
> The test `rw && WRITE` should be `rw & WRITE`.  Fix it.
> 
> Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
  Thanks. I've folded the change into your patch.

								Honza

> ---
>  fs/udf/inode.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/udf/inode.c b/fs/udf/inode.c
> index b905448..41d5830 100644
> --- a/fs/udf/inode.c
> +++ b/fs/udf/inode.c
> @@ -156,7 +156,7 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
>  
>  	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
>  				  udf_get_block);
> -	if (unlikely(ret < 0 && (rw && WRITE)))
> +	if (unlikely(ret < 0 && (rw & WRITE)))
>  		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
>  	return ret;
>  }
> -- 
> 1.7.12
> 
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-09-06 14:22 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-24 12:17 [PATCH] UDF: Add support for O_DIRECT Ian Abbott
2012-07-31 10:40 ` [PATCH v2] " Ian Abbott
2012-09-04  9:49   ` [PATCH v3] " Ian Abbott
2012-09-04 14:39     ` Jan Kara
2012-09-04 15:11       ` Ian Abbott
2012-09-05  9:03         ` Ian Abbott
2012-09-05 12:05         ` Jan Kara
2012-09-05 12:55           ` Jan Kara
2012-09-05 16:00             ` Jan Kara
2012-09-05 16:44               ` [PATCH v4] " Ian Abbott
2012-09-05 19:02                 ` Jan Kara
2012-09-06 10:08                   ` [PATCH] UDF: Fix incorrect error handling in udf_direct_IO() Ian Abbott
2012-09-06 14:22                     ` Jan Kara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).