All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: David Howells <dhowells@redhat.com>
Cc: linux-btrfs@vger.kernel.org, linux-cachefs@redhat.com,
	linux-fsdevel@vger.kernel.org
Subject: [PATCH/RFC] fscache/cachefiles versus btrfs
Date: Thu, 9 Apr 2015 17:49:16 +1000	[thread overview]
Message-ID: <20150409174916.5a2efef5@notabene.brown> (raw)

[-- Attachment #1: Type: text/plain, Size: 7242 bytes --]


hi,
 fscache cannot currently be used with btrfs as the backing store for the
 cache (managed by cachefilesd).
 This is because cachefiles needs the ->bmap address_space_operation, and
 btrfs doesn't provide it.

 cachefiles only uses this to find out if a particular page is a 'hole' or
 not.  For btrfs, this can be done with 'SEEK_DATA'.

 Unfortunately it doesn't seem to be possible to query a filesystem or a file
 to see if SEEK_DATA is reliable or not, so we cannot simply use SEEK_DATA
 when reliable, else ->bmap if available.

 The following patch make fscache work for me on btrfs.  It explicitly checks
 for BTRFS_SUPER_MAGIC.  Not really a nice solution, but all I could think of.

 Is there a better way?  Could a better way be created?  Maybe
 SEEK_DATA_RELIABLE ??

 Comments, suggestions welcome.


Also, if you do try to use fscache on btrfs with 3.19, then nothing gets
cached (as expected) and with a heavy load you can lose a race and get an
asserting fail in fscache_enqueue_operation

	ASSERT(fscache_object_is_available(op->object));

It looks like the object is being killed before it is available...

[  859.700765] kernel BUG at ../fs/fscache/operation.c:38!
...
[  859.703124] Call Trace:
[  859.703193]  [<ffffffffa0448044>] fscache_run_op.isra.4+0x34/0x80 [fscache]
[  859.703260]  [<ffffffffa0448160>] fscache_start_operations+0xa0/0xf0 [fscache]
[  859.703388]  [<ffffffffa0446cd8>] fscache_kill_object+0x98/0xc0 [fscache]
[  859.703455]  [<ffffffffa04475c1>] fscache_object_work_func+0x151/0x210 [fscache]
[  859.703578]  [<ffffffff81078b07>] process_one_work+0x147/0x3c0
[  859.703642]  [<ffffffff8107929c>] worker_thread+0x20c/0x470

I haven't figured out the cause of that yet.


Thanks,
NeilBrown




diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 1e51714eb33e..1389d8483d5d 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -20,6 +20,7 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/slab.h>
+#include <linux/magic.h>
 #include "internal.h"
 
 #define CACHEFILES_KEYBUF_SIZE 512
@@ -647,7 +648,8 @@ lookup_again:
 
 			ret = -EPERM;
 			aops = object->dentry->d_inode->i_mapping->a_ops;
-			if (!aops->bmap)
+			if (!aops->bmap &&
+			    object->dentry->d_sb->s_magic != BTRFS_SUPER_MAGIC)
 				goto check_error;
 
 			object->backer = object->dentry;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c6cd8d7a4eef..49fb330c0ab8 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -410,11 +410,11 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 
 	inode = object->backer->d_inode;
 	ASSERT(S_ISREG(inode->i_mode));
-	ASSERT(inode->i_mapping->a_ops->bmap);
 	ASSERT(inode->i_mapping->a_ops->readpages);
 
 	/* calculate the shift required to use bmap */
-	if (inode->i_sb->s_blocksize > PAGE_SIZE)
+	if (inode->i_mapping->a_ops->bmap &&
+	    inode->i_sb->s_blocksize > PAGE_SIZE)
 		goto enobufs;
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
@@ -423,20 +423,36 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 	op->op.flags |= FSCACHE_OP_ASYNC;
 	op->op.processor = cachefiles_read_copier;
 
-	/* we assume the absence or presence of the first block is a good
-	 * enough indication for the page as a whole
-	 * - TODO: don't use bmap() for this as it is _not_ actually good
-	 *   enough for this as it doesn't indicate errors, but it's all we've
-	 *   got for the moment
-	 */
-	block0 = page->index;
-	block0 <<= shift;
-
-	block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
-	_debug("%llx -> %llx",
-	       (unsigned long long) block0,
-	       (unsigned long long) block);
+	if (inode->i_mapping->a_ops->bmap) {
+		/* we assume the absence or presence of the first block is a good
+		 * enough indication for the page as a whole
+		 * - TODO: don't use bmap() for this as it is _not_ actually good
+		 *   enough for this as it doesn't indicate errors, but it's all we've
+		 *   got for the moment
+		 */
+		block0 = page->index;
+		block0 <<= shift;
 
+		block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
+		_debug("%llx -> %llx",
+		       (unsigned long long) block0,
+		       (unsigned long long) block);
+	} else {
+		/* Use llseek */
+		struct path path;
+		struct file *file;
+		path.mnt = cache->mnt;
+		path.dentry = object->backer;
+		file = dentry_open(&path, O_RDONLY, cache->cache_cred);
+		if (IS_ERR(file))
+			goto enobufs;
+		block = vfs_llseek(file, page->index << PAGE_SHIFT, SEEK_DATA);
+		filp_close(file, NULL);
+		if (block != page->index << PAGE_SHIFT)
+			block = 0;
+		else
+			block = 1;
+	}
 	if (block) {
 		/* submit the apparently valid page to the backing fs to be
 		 * read from disk */
@@ -707,11 +723,11 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 
 	inode = object->backer->d_inode;
 	ASSERT(S_ISREG(inode->i_mode));
-	ASSERT(inode->i_mapping->a_ops->bmap);
 	ASSERT(inode->i_mapping->a_ops->readpages);
 
 	/* calculate the shift required to use bmap */
-	if (inode->i_sb->s_blocksize > PAGE_SIZE)
+	if (inode->i_mapping->a_ops->bmap &&
+	    inode->i_sb->s_blocksize > PAGE_SIZE)
 		goto all_enobufs;
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
@@ -729,21 +745,38 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 	list_for_each_entry_safe(page, _n, pages, lru) {
 		sector_t block0, block;
 
-		/* we assume the absence or presence of the first block is a
-		 * good enough indication for the page as a whole
-		 * - TODO: don't use bmap() for this as it is _not_ actually
-		 *   good enough for this as it doesn't indicate errors, but
-		 *   it's all we've got for the moment
-		 */
-		block0 = page->index;
-		block0 <<= shift;
-
-		block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
-						      block0);
-		_debug("%llx -> %llx",
-		       (unsigned long long) block0,
-		       (unsigned long long) block);
+		if (inode->i_mapping->a_ops->bmap) {
+			/* we assume the absence or presence of the first block is a
+			 * good enough indication for the page as a whole
+			 * - TODO: don't use bmap() for this as it is _not_ actually
+			 *   good enough for this as it doesn't indicate errors, but
+			 *   it's all we've got for the moment
+			 */
+			block0 = page->index;
+			block0 <<= shift;
+
+			block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
+							      block0);
+			_debug("%llx -> %llx",
+			       (unsigned long long) block0,
+			       (unsigned long long) block);
 
+		} else {
+			/* Use llseek */
+			struct path path;
+			struct file *file;
+			path.mnt = cache->mnt;
+			path.dentry = object->backer;
+			file = dentry_open(&path, O_RDONLY, cache->cache_cred);
+			if (IS_ERR(file))
+				goto all_enobufs;
+			block = vfs_llseek(file, page->index << PAGE_SHIFT, SEEK_DATA);
+			filp_close(file, NULL);
+			if (block != page->index << PAGE_SHIFT)
+				block = 0;
+			else
+				block = 1;
+		}
 		if (block) {
 			/* we have data - add it to the list to give to the
 			 * backing fs */

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 811 bytes --]

             reply	other threads:[~2015-04-09  7:49 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-09  7:49 NeilBrown [this message]
2015-04-09  9:23 ` [PATCH/RFC] fscache/cachefiles versus btrfs David Howells
2015-04-09 23:52   ` Dave Chinner
2015-04-10  0:42     ` NeilBrown
2015-04-10  1:08       ` Dave Chinner
2015-04-10  1:24   ` NeilBrown
2015-04-20  4:49     ` NeilBrown
2015-04-20  9:27     ` David Howells
2015-04-10 13:28   ` David Howells
2015-04-13 17:30     ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150409174916.5a2efef5@notabene.brown \
    --to=neilb@suse.de \
    --cc=dhowells@redhat.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-cachefs@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.