linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ecryptfs: fix fsx data corruption problems
@ 2007-12-17  5:32 Eric Sandeen
  2007-12-17 15:58 ` Michael Halcrow
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Sandeen @ 2007-12-17  5:32 UTC (permalink / raw)
  To: linux-kernel Mailing List; +Cc: Andrew Morton, Michael Halcrow, mike

ecryptfs in 2.6.24-rc3 wasn't surviving fsx for me at all,
dying after 4 ops.  Generally, encountering problems with stale
data and improperly zeroed pages.  An extending truncate + write
for example would expose stale data.

With the changes below I got to a million ops and beyond with all
mmap ops disabled - mmap still needs work.  (A version of this
patch on a RHEL5 kernel ran for over 110 million fsx ops)

I added a few comments as well, to the best of my understanding
as I read through the code.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>

---


Index: linux-2.6.24-rc3/fs/ecryptfs/mmap.c
===================================================================
--- linux-2.6.24-rc3.orig/fs/ecryptfs/mmap.c
+++ linux-2.6.24-rc3/fs/ecryptfs/mmap.c
@@ -263,14 +263,13 @@ out:
 	return 0;
 }
 
+/* This function must zero any hole we create */
 static int ecryptfs_prepare_write(struct file *file, struct page *page,
 				  unsigned from, unsigned to)
 {
 	int rc = 0;
+	loff_t prev_page_end_size;
 
-	if (from == 0 && to == PAGE_CACHE_SIZE)
-		goto out;	/* If we are writing a full page, it will be
-				   up to date. */
 	if (!PageUptodate(page)) {
 		rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
 						      PAGE_CACHE_SIZE,
@@ -283,22 +282,32 @@ static int ecryptfs_prepare_write(struct
 		} else
 			SetPageUptodate(page);
 	}
-	if (page->index != 0) {
-		loff_t end_of_prev_pg_pos =
-			(((loff_t)page->index << PAGE_CACHE_SHIFT) - 1);
 
-		if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) {
+	prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT);
+
+	/*
+	 * If creating a page or more of holes, zero them out via truncate.
+	 * Note, this will increase i_size.
+	 */
+	if (page->index != 0) {
+		if (prev_page_end_size > i_size_read(page->mapping->host)) {
 			rc = ecryptfs_truncate(file->f_path.dentry,
-					       end_of_prev_pg_pos);
+					       prev_page_end_size);
 			if (rc) {
 				printk(KERN_ERR "Error on attempt to "
 				       "truncate to (higher) offset [%lld];"
-				       " rc = [%d]\n", end_of_prev_pg_pos, rc);
+				       " rc = [%d]\n", prev_page_end_size, rc);
 				goto out;
 			}
 		}
-		if (end_of_prev_pg_pos + 1 > i_size_read(page->mapping->host))
-			zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+	}
+	/*
+	 * Writing to a new page, and creating a small hole from start of page?
+	 * Zero it out.
+	 */
+	if ((i_size_read(page->mapping->host) == prev_page_end_size) &&
+	    (from != 0)) {
+		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
 	}
 out:
 	return rc;
Index: linux-2.6.24-rc3/fs/ecryptfs/read_write.c
===================================================================
--- linux-2.6.24-rc3.orig/fs/ecryptfs/read_write.c
+++ linux-2.6.24-rc3/fs/ecryptfs/read_write.c
@@ -124,6 +124,10 @@ int ecryptfs_write(struct file *ecryptfs
 	loff_t pos;
 	int rc = 0;
 
+	/*
+	 * if we are writing beyond current size, then start pos
+	 * at the current size - we'll fill in zeros from there.
+	 */
 	if (offset > ecryptfs_file_size)
 		pos = ecryptfs_file_size;
 	else
@@ -137,6 +141,7 @@ int ecryptfs_write(struct file *ecryptfs
 		if (num_bytes > total_remaining_bytes)
 			num_bytes = total_remaining_bytes;
 		if (pos < offset) {
+			/* remaining zeros to write, up to destination offset */
 			size_t total_remaining_zeros = (offset - pos);
 
 			if (num_bytes > total_remaining_zeros)
@@ -167,17 +172,27 @@ int ecryptfs_write(struct file *ecryptfs
 			}
 		}
 		ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
+
+		/*
+		 * pos: where we're now writing, offset: where the request was
+		 * If current pos is before request, we are filling zeros
+		 * If we are at or beyond request, we are writing the *data*
+		 * If we're in a fresh page beyond eof, zero it in either case
+		 */
+		if (pos < offset || !start_offset_in_page) {
+			/* We are extending past the previous end of the file.
+			 * Fill in zero values to the end of the page */
+			memset(((char *)ecryptfs_page_virt
+				+ start_offset_in_page), 0,
+				PAGE_CACHE_SIZE - start_offset_in_page);
+		}
+
+		/* pos >= offset, we are now writing the data request */
 		if (pos >= offset) {
 			memcpy(((char *)ecryptfs_page_virt
 				+ start_offset_in_page),
 			       (data + data_offset), num_bytes);
 			data_offset += num_bytes;
-		} else {
-			/* We are extending past the previous end of the file.
-			 * Fill in zero values up to the start of where we
-			 * will be writing data. */
-			memset(((char *)ecryptfs_page_virt
-				+ start_offset_in_page), 0, num_bytes);
 		}
 		kunmap_atomic(ecryptfs_page_virt, KM_USER0);
 		flush_dcache_page(ecryptfs_page);


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] ecryptfs: fix fsx data corruption problems
  2007-12-17  5:32 [PATCH] ecryptfs: fix fsx data corruption problems Eric Sandeen
@ 2007-12-17 15:58 ` Michael Halcrow
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Halcrow @ 2007-12-17 15:58 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: linux-kernel Mailing List, Andrew Morton, Michael Halcrow

On Sun, Dec 16, 2007 at 11:32:33PM -0600, Eric Sandeen wrote:
> ecryptfs in 2.6.24-rc3 wasn't surviving fsx for me at all,
> dying after 4 ops.  Generally, encountering problems with stale
> data and improperly zeroed pages.  An extending truncate + write
> for example would expose stale data.
> 
> With the changes below I got to a million ops and beyond with all
> mmap ops disabled - mmap still needs work.  (A version of this
> patch on a RHEL5 kernel ran for over 110 million fsx ops)
> 
> I added a few comments as well, to the best of my understanding
> as I read through the code.
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>

Acked-by: Michael Halcrow <mhalcrow@us.ibm.com>

I recommend this for the next -rc release.

> ---
> 
> 
> Index: linux-2.6.24-rc3/fs/ecryptfs/mmap.c
> ===================================================================
> --- linux-2.6.24-rc3.orig/fs/ecryptfs/mmap.c
> +++ linux-2.6.24-rc3/fs/ecryptfs/mmap.c
> @@ -263,14 +263,13 @@ out:
>  	return 0;
>  }
>  
> +/* This function must zero any hole we create */
>  static int ecryptfs_prepare_write(struct file *file, struct page *page,
>  				  unsigned from, unsigned to)
>  {
>  	int rc = 0;
> +	loff_t prev_page_end_size;
>  
> -	if (from == 0 && to == PAGE_CACHE_SIZE)
> -		goto out;	/* If we are writing a full page, it will be
> -				   up to date. */
>  	if (!PageUptodate(page)) {
>  		rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
>  						      PAGE_CACHE_SIZE,
> @@ -283,22 +282,32 @@ static int ecryptfs_prepare_write(struct
>  		} else
>  			SetPageUptodate(page);
>  	}
> -	if (page->index != 0) {
> -		loff_t end_of_prev_pg_pos =
> -			(((loff_t)page->index << PAGE_CACHE_SHIFT) - 1);
>  
> -		if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) {
> +	prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT);
> +
> +	/*
> +	 * If creating a page or more of holes, zero them out via truncate.
> +	 * Note, this will increase i_size.
> +	 */
> +	if (page->index != 0) {
> +		if (prev_page_end_size > i_size_read(page->mapping->host)) {
>  			rc = ecryptfs_truncate(file->f_path.dentry,
> -					       end_of_prev_pg_pos);
> +					       prev_page_end_size);
>  			if (rc) {
>  				printk(KERN_ERR "Error on attempt to "
>  				       "truncate to (higher) offset [%lld];"
> -				       " rc = [%d]\n", end_of_prev_pg_pos, rc);
> +				       " rc = [%d]\n", prev_page_end_size, rc);
>  				goto out;
>  			}
>  		}
> -		if (end_of_prev_pg_pos + 1 > i_size_read(page->mapping->host))
> -			zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
> +	}
> +	/*
> +	 * Writing to a new page, and creating a small hole from start of page?
> +	 * Zero it out.
> +	 */
> +	if ((i_size_read(page->mapping->host) == prev_page_end_size) &&
> +	    (from != 0)) {
> +		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
>  	}
>  out:
>  	return rc;
> Index: linux-2.6.24-rc3/fs/ecryptfs/read_write.c
> ===================================================================
> --- linux-2.6.24-rc3.orig/fs/ecryptfs/read_write.c
> +++ linux-2.6.24-rc3/fs/ecryptfs/read_write.c
> @@ -124,6 +124,10 @@ int ecryptfs_write(struct file *ecryptfs
>  	loff_t pos;
>  	int rc = 0;
>  
> +	/*
> +	 * if we are writing beyond current size, then start pos
> +	 * at the current size - we'll fill in zeros from there.
> +	 */
>  	if (offset > ecryptfs_file_size)
>  		pos = ecryptfs_file_size;
>  	else
> @@ -137,6 +141,7 @@ int ecryptfs_write(struct file *ecryptfs
>  		if (num_bytes > total_remaining_bytes)
>  			num_bytes = total_remaining_bytes;
>  		if (pos < offset) {
> +			/* remaining zeros to write, up to destination offset */
>  			size_t total_remaining_zeros = (offset - pos);
>  
>  			if (num_bytes > total_remaining_zeros)
> @@ -167,17 +172,27 @@ int ecryptfs_write(struct file *ecryptfs
>  			}
>  		}
>  		ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
> +
> +		/*
> +		 * pos: where we're now writing, offset: where the request was
> +		 * If current pos is before request, we are filling zeros
> +		 * If we are at or beyond request, we are writing the *data*
> +		 * If we're in a fresh page beyond eof, zero it in either case
> +		 */
> +		if (pos < offset || !start_offset_in_page) {
> +			/* We are extending past the previous end of the file.
> +			 * Fill in zero values to the end of the page */
> +			memset(((char *)ecryptfs_page_virt
> +				+ start_offset_in_page), 0,
> +				PAGE_CACHE_SIZE - start_offset_in_page);
> +		}
> +
> +		/* pos >= offset, we are now writing the data request */
>  		if (pos >= offset) {
>  			memcpy(((char *)ecryptfs_page_virt
>  				+ start_offset_in_page),
>  			       (data + data_offset), num_bytes);
>  			data_offset += num_bytes;
> -		} else {
> -			/* We are extending past the previous end of the file.
> -			 * Fill in zero values up to the start of where we
> -			 * will be writing data. */
> -			memset(((char *)ecryptfs_page_virt
> -				+ start_offset_in_page), 0, num_bytes);
>  		}
>  		kunmap_atomic(ecryptfs_page_virt, KM_USER0);
>  		flush_dcache_page(ecryptfs_page);
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2007-12-17 16:14 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-12-17  5:32 [PATCH] ecryptfs: fix fsx data corruption problems Eric Sandeen
2007-12-17 15:58 ` Michael Halcrow

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).